From b8e981df1ae9866cc02650205e18ee42835f5822 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Thu, 2 Oct 2014 00:47:46 +0800 Subject: [PATCH] LU-5696 ptlrpc: missing wakeup for ptlrpc_check_set This patch changes a few things: - There is no guarantee that request_out_callback will happen before reply_in_callback, if a request got reply and unlinked reply buffer before request_out_callback is called, then the thread waiting on ptlrpc_request_set will miss wakeup event. This may seriously impact performance of some IO workloads or result in RPC timeout - To make code more easier to understand, this patch changes action-bits "rq_req_unlink" and "rq_reply_unlink" to status-bits "rq_req_unlinked" and "rq_reply_unlinked" Signed-off-by: Liang Zhen Change-Id: Ie6043534af3c9b48a52da30210d327f3de83b866 Reviewed-on: http://review.whamcloud.com/12158 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Johann Lombardi Reviewed-by: Li Wei Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/include/lustre_net.h | 33 +++++++++++++----------- lustre/ptlrpc/client.c | 14 +++++------ lustre/ptlrpc/events.c | 56 ++++++++++++++++++++++------------------- lustre/ptlrpc/niobuf.c | 20 +++++++-------- lustre/ptlrpc/ptlrpc_internal.h | 4 +++ 5 files changed, 70 insertions(+), 57 deletions(-) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index f39d8d7..2bdde31 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -1957,7 +1957,7 @@ struct ptlrpc_request { * rq_list */ spinlock_t rq_lock; - /** client-side flags are serialized by rq_lock */ + /** client-side flags are serialized by rq_lock @{ */ unsigned int rq_intr:1, rq_replied:1, rq_err:1, rq_timedout:1, rq_resend:1, rq_restart:1, /** @@ -1973,24 +1973,29 @@ struct ptlrpc_request { rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1, rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1, rq_early:1, - rq_req_unlink:1, rq_reply_unlink:1, - rq_memalloc:1, /* req originated from "kswapd" */ - /* server-side flags */ - rq_packed_final:1, /* packed final reply */ - rq_hp:1, /* high priority RPC */ - rq_at_linked:1, /* link into service's srv_at_array */ - rq_reply_truncate:1, - rq_committed:1, - /* whether the "rq_set" is a valid one */ - rq_invalid_rqset:1, + rq_req_unlinked:1, /* unlinked request buffer from lnet */ + rq_reply_unlinked:1, /* unlinked reply buffer from lnet */ + rq_memalloc:1, /* req originated from "kswapd" */ + rq_committed:1, + rq_reply_truncated:1, + /** whether the "rq_set" is a valid one */ + rq_invalid_rqset:1, rq_generation_set:1, - /* do not resend request on -EINPROGRESS */ + /** do not resend request on -EINPROGRESS */ rq_no_retry_einprogress:1, /* allow the req to be sent if the import is in recovery * status */ rq_allow_replay:1, /* bulk request, sent to server, but uncommitted */ rq_unstable:1; + /** @} */ + + /** server-side flags @{ */ + unsigned int + rq_hp:1, /**< high priority RPC */ + rq_at_linked:1, /**< link into service's srv_at_array */ + rq_packed_final:1; /**< packed final reply */ + /** @} */ /** one of RQ_PHASE_* */ enum rq_phase rq_phase; @@ -3370,8 +3375,8 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req) spin_unlock(&req->rq_lock); return 1; } - rc = req->rq_receiving_reply ; - rc = rc || req->rq_req_unlink || req->rq_reply_unlink; + rc = !req->rq_req_unlinked || !req->rq_reply_unlinked || + req->rq_receiving_reply; spin_unlock(&req->rq_lock); return rc; } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 2e82e53..3182e3d 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1227,9 +1227,9 @@ static int after_reply(struct ptlrpc_request *req) LASSERT(obd != NULL); /* repbuf must be unlinked */ - LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink); + LASSERT(!req->rq_receiving_reply && req->rq_reply_unlinked); - if (req->rq_reply_truncate) { + if (req->rq_reply_truncated) { if (ptlrpc_no_resend(req)) { DEBUG_REQ(D_ERROR, req, "reply buffer overflow," " expected: %d, actual size: %d", @@ -2446,9 +2446,11 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async) } LASSERT(rc == -ETIMEDOUT); - DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout " - "rvcng=%d unlnk=%d/%d", request->rq_receiving_reply, - request->rq_req_unlink, request->rq_reply_unlink); + DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout " + "receiving_reply=%d req_ulinked=%d reply_unlinked=%d", + request->rq_receiving_reply, + request->rq_req_unlinked, + request->rq_reply_unlinked); } RETURN(0); } @@ -3135,8 +3137,6 @@ void *ptlrpcd_alloc_work(struct obd_import *imp, req->rq_import = class_import_get(imp); req->rq_interpret_reply = work_interpreter; /* don't want reply */ - req->rq_receiving_reply = 0; - req->rq_req_unlink = req->rq_reply_unlink = 0; req->rq_no_delay = req->rq_no_resend = 1; req->rq_pill.rc_fmt = (void *)&worker_format; diff --git a/lustre/ptlrpc/events.c b/lustre/ptlrpc/events.c index ca4b0e6..915c7ea 100644 --- a/lustre/ptlrpc/events.c +++ b/lustre/ptlrpc/events.c @@ -50,35 +50,39 @@ lnet_handle_eq_t ptlrpc_eq_h; */ void request_out_callback(lnet_event_t *ev) { - struct ptlrpc_cb_id *cbid = ev->md.user_ptr; - struct ptlrpc_request *req = cbid->cbid_arg; - ENTRY; + struct ptlrpc_cb_id *cbid = ev->md.user_ptr; + struct ptlrpc_request *req = cbid->cbid_arg; + bool wakeup = false; + ENTRY; - LASSERT (ev->type == LNET_EVENT_SEND || - ev->type == LNET_EVENT_UNLINK); - LASSERT (ev->unlinked); + LASSERT(ev->type == LNET_EVENT_SEND || ev->type == LNET_EVENT_UNLINK); + LASSERT(ev->unlinked); - DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); + DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status); - sptlrpc_request_out_callback(req); - spin_lock(&req->rq_lock); - req->rq_real_sent = cfs_time_current_sec(); - if (ev->unlinked) - req->rq_req_unlink = 0; + sptlrpc_request_out_callback(req); - if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { + spin_lock(&req->rq_lock); + req->rq_real_sent = cfs_time_current_sec(); + req->rq_req_unlinked = 1; + /* reply_in_callback happened before request_out_callback? */ + if (req->rq_reply_unlinked) + wakeup = true; + + if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) { + /* Failed send: make it seem like the reply timed out, just + * like failing sends in client.c does currently... */ + req->rq_net_err = 1; + wakeup = true; + } - /* Failed send: make it seem like the reply timed out, just - * like failing sends in client.c does currently... */ + if (wakeup) + ptlrpc_client_wake_req(req); - req->rq_net_err = 1; - ptlrpc_client_wake_req(req); - } spin_unlock(&req->rq_lock); - ptlrpc_req_finished(req); - - EXIT; + ptlrpc_req_finished(req); + EXIT; } /* @@ -101,10 +105,10 @@ void reply_in_callback(lnet_event_t *ev) spin_lock(&req->rq_lock); - req->rq_receiving_reply = 0; - req->rq_early = 0; - if (ev->unlinked) - req->rq_reply_unlink = 0; + req->rq_receiving_reply = 0; + req->rq_early = 0; + if (ev->unlinked) + req->rq_reply_unlinked = 1; if (ev->status) goto out_wake; @@ -118,7 +122,7 @@ void reply_in_callback(lnet_event_t *ev) if (ev->mlength < ev->rlength ) { CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req, req->rq_replen, ev->rlength, ev->offset); - req->rq_reply_truncate = 1; + req->rq_reply_truncated = 1; req->rq_replied = 1; req->rq_status = -EOVERFLOW; req->rq_nob_received = ev->rlength + ev->offset; diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index d1ae6b8..53c4030 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -761,19 +761,18 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) } spin_lock(&request->rq_lock); - /* If the MD attach succeeds, there _will_ be a reply_in callback */ - request->rq_receiving_reply = !noreply; - request->rq_req_unlink = 1; /* We are responsible for unlinking the reply buffer */ - request->rq_reply_unlink = !noreply; - /* Clear any flags that may be present from previous sends. */ + request->rq_reply_unlinked = noreply; + request->rq_receiving_reply = !noreply; + /* Clear any flags that may be present from previous sends. */ + request->rq_req_unlinked = 0; request->rq_replied = 0; request->rq_err = 0; request->rq_timedout = 0; request->rq_net_err = 0; request->rq_resend = 0; request->rq_restart = 0; - request->rq_reply_truncate = 0; + request->rq_reply_truncated = 0; spin_unlock(&request->rq_lock); if (!noreply) { @@ -788,8 +787,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) reply_md.user_ptr = &request->rq_reply_cbid; reply_md.eq_handle = ptlrpc_eq_h; - /* We must see the unlink callback to unset rq_reply_unlink, - so we can't auto-unlink */ + /* We must see the unlink callback to set rq_reply_unlinked, + * so we can't auto-unlink */ rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN, &request->rq_reply_md_h); if (rc != 0) { @@ -833,9 +832,10 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply) connection, request->rq_request_portal, request->rq_xid, 0); - if (rc == 0) - GOTO(out, rc); + if (likely(rc == 0)) + GOTO(out, rc); + request->rq_req_unlinked = 1; ptlrpc_req_finished(request); if (noreply) GOTO(out, rc); diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h index 17f918b..2058309 100644 --- a/lustre/ptlrpc/ptlrpc_internal.h +++ b/lustre/ptlrpc/ptlrpc_internal.h @@ -343,6 +343,10 @@ static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req) struct ptlrpc_cli_req *cr = &req->rq_cli; ptlrpc_req_comm_init(req); + + req->rq_receiving_reply = 0; + req->rq_req_unlinked = req->rq_reply_unlinked = 1; + INIT_LIST_HEAD(&cr->cr_set_chain); INIT_LIST_HEAD(&cr->cr_ctx_chain); init_waitqueue_head(&cr->cr_reply_waitq); -- 1.8.3.1