Whamcloud - gitweb
LU-5696 ptlrpc: missing wakeup for ptlrpc_check_set 58/12158/10
authorLiang Zhen <liang.zhen@intel.com>
Wed, 1 Oct 2014 16:47:46 +0000 (00:47 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 5 Dec 2014 13:40:44 +0000 (13:40 +0000)
This patch changes a few things:

- There is no guarantee that request_out_callback will happen
  before reply_in_callback, if a request got reply and unlinked
  reply buffer before request_out_callback is called, then the
  thread waiting on ptlrpc_request_set will miss wakeup event.

  This may seriously impact performance of some IO workloads or
  result in RPC timeout

- To make code more easier to understand, this patch changes
  action-bits "rq_req_unlink" and "rq_reply_unlink" to
  status-bits "rq_req_unlinked" and "rq_reply_unlinked"

Signed-off-by: Liang Zhen <liang.zhen@intel.com>
Change-Id: Ie6043534af3c9b48a52da30210d327f3de83b866
Reviewed-on: http://review.whamcloud.com/12158
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Johann Lombardi <johann.lombardi@intel.com>
Reviewed-by: Li Wei <wei.g.li@intel.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre_net.h
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/niobuf.c
lustre/ptlrpc/ptlrpc_internal.h

index f39d8d7..2bdde31 100644 (file)
@@ -1957,7 +1957,7 @@ struct ptlrpc_request {
         * rq_list
         */
        spinlock_t                       rq_lock;
-       /** client-side flags are serialized by rq_lock */
+       /** client-side flags are serialized by rq_lock @{ */
        unsigned int rq_intr:1, rq_replied:1, rq_err:1,
                 rq_timedout:1, rq_resend:1, rq_restart:1,
                 /**
@@ -1973,24 +1973,29 @@ struct ptlrpc_request {
                 rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
                 rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
                rq_early:1,
-               rq_req_unlink:1, rq_reply_unlink:1,
-                rq_memalloc:1,      /* req originated from "kswapd" */
-                /* server-side flags */
-                rq_packed_final:1,  /* packed final reply */
-                rq_hp:1,            /* high priority RPC */
-                rq_at_linked:1,     /* link into service's srv_at_array */
-                rq_reply_truncate:1,
-                rq_committed:1,
-                /* whether the "rq_set" is a valid one */
-                rq_invalid_rqset:1,
+               rq_req_unlinked:1,      /* unlinked request buffer from lnet */
+               rq_reply_unlinked:1,    /* unlinked reply buffer from lnet */
+               rq_memalloc:1,      /* req originated from "kswapd" */
+               rq_committed:1,
+               rq_reply_truncated:1,
+               /** whether the "rq_set" is a valid one */
+               rq_invalid_rqset:1,
                rq_generation_set:1,
-               /* do not resend request on -EINPROGRESS */
+               /** do not resend request on -EINPROGRESS */
                rq_no_retry_einprogress:1,
                /* allow the req to be sent if the import is in recovery
                 * status */
                rq_allow_replay:1,
                /* bulk request, sent to server, but uncommitted */
                rq_unstable:1;
+       /** @} */
+
+       /** server-side flags @{ */
+       unsigned int
+               rq_hp:1,                /**< high priority RPC */
+               rq_at_linked:1,         /**< link into service's srv_at_array */
+               rq_packed_final:1;      /**< packed final reply */
+       /** @} */
 
        /** one of RQ_PHASE_* */
        enum rq_phase                    rq_phase;
@@ -3370,8 +3375,8 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
                spin_unlock(&req->rq_lock);
                return 1;
        }
-       rc = req->rq_receiving_reply ;
-       rc = rc || req->rq_req_unlink || req->rq_reply_unlink;
+       rc = !req->rq_req_unlinked || !req->rq_reply_unlinked ||
+            req->rq_receiving_reply;
        spin_unlock(&req->rq_lock);
        return rc;
 }
index 2e82e53..3182e3d 100644 (file)
@@ -1227,9 +1227,9 @@ static int after_reply(struct ptlrpc_request *req)
 
         LASSERT(obd != NULL);
         /* repbuf must be unlinked */
-       LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink);
+       LASSERT(!req->rq_receiving_reply && req->rq_reply_unlinked);
 
-        if (req->rq_reply_truncate) {
+       if (req->rq_reply_truncated) {
                 if (ptlrpc_no_resend(req)) {
                         DEBUG_REQ(D_ERROR, req, "reply buffer overflow,"
                                   " expected: %d, actual size: %d",
@@ -2446,9 +2446,11 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
                 }
 
                 LASSERT(rc == -ETIMEDOUT);
-                DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout "
-                         "rvcng=%d unlnk=%d/%d", request->rq_receiving_reply,
-                         request->rq_req_unlink, request->rq_reply_unlink);
+               DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout "
+                         "receiving_reply=%d req_ulinked=%d reply_unlinked=%d",
+                         request->rq_receiving_reply,
+                         request->rq_req_unlinked,
+                         request->rq_reply_unlinked);
         }
         RETURN(0);
 }
@@ -3135,8 +3137,6 @@ void *ptlrpcd_alloc_work(struct obd_import *imp,
        req->rq_import = class_import_get(imp);
        req->rq_interpret_reply = work_interpreter;
        /* don't want reply */
-       req->rq_receiving_reply = 0;
-       req->rq_req_unlink = req->rq_reply_unlink = 0;
        req->rq_no_delay = req->rq_no_resend = 1;
        req->rq_pill.rc_fmt = (void *)&worker_format;
 
index ca4b0e6..915c7ea 100644 (file)
@@ -50,35 +50,39 @@ lnet_handle_eq_t   ptlrpc_eq_h;
  */
 void request_out_callback(lnet_event_t *ev)
 {
-        struct ptlrpc_cb_id   *cbid = ev->md.user_ptr;
-        struct ptlrpc_request *req = cbid->cbid_arg;
-        ENTRY;
+       struct ptlrpc_cb_id   *cbid = ev->md.user_ptr;
+       struct ptlrpc_request *req = cbid->cbid_arg;
+       bool                   wakeup = false;
+       ENTRY;
 
-        LASSERT (ev->type == LNET_EVENT_SEND ||
-                 ev->type == LNET_EVENT_UNLINK);
-        LASSERT (ev->unlinked);
+       LASSERT(ev->type == LNET_EVENT_SEND || ev->type == LNET_EVENT_UNLINK);
+       LASSERT(ev->unlinked);
 
-        DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
+       DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
 
-        sptlrpc_request_out_callback(req);
-       spin_lock(&req->rq_lock);
-        req->rq_real_sent = cfs_time_current_sec();
-       if (ev->unlinked)
-               req->rq_req_unlink = 0;
+       sptlrpc_request_out_callback(req);
 
-        if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
+       spin_lock(&req->rq_lock);
+       req->rq_real_sent = cfs_time_current_sec();
+       req->rq_req_unlinked = 1;
+       /* reply_in_callback happened before request_out_callback? */
+       if (req->rq_reply_unlinked)
+               wakeup = true;
+
+       if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
+               /* Failed send: make it seem like the reply timed out, just
+                * like failing sends in client.c does currently...  */
+               req->rq_net_err = 1;
+               wakeup = true;
+       }
 
-                /* Failed send: make it seem like the reply timed out, just
-                 * like failing sends in client.c does currently...  */
+       if (wakeup)
+               ptlrpc_client_wake_req(req);
 
-               req->rq_net_err = 1;
-                ptlrpc_client_wake_req(req);
-        }
        spin_unlock(&req->rq_lock);
 
-        ptlrpc_req_finished(req);
-
-        EXIT;
+       ptlrpc_req_finished(req);
+       EXIT;
 }
 
 /*
@@ -101,10 +105,10 @@ void reply_in_callback(lnet_event_t *ev)
 
        spin_lock(&req->rq_lock);
 
-        req->rq_receiving_reply = 0;
-        req->rq_early = 0;
-        if (ev->unlinked)
-               req->rq_reply_unlink = 0;
+       req->rq_receiving_reply = 0;
+       req->rq_early = 0;
+       if (ev->unlinked)
+               req->rq_reply_unlinked = 1;
 
         if (ev->status)
                 goto out_wake;
@@ -118,7 +122,7 @@ void reply_in_callback(lnet_event_t *ev)
         if (ev->mlength < ev->rlength ) {
                 CDEBUG(D_RPCTRACE, "truncate req %p rpc %d - %d+%d\n", req,
                        req->rq_replen, ev->rlength, ev->offset);
-                req->rq_reply_truncate = 1;
+               req->rq_reply_truncated = 1;
                 req->rq_replied = 1;
                 req->rq_status = -EOVERFLOW;
                 req->rq_nob_received = ev->rlength + ev->offset;
index d1ae6b8..53c4030 100644 (file)
@@ -761,19 +761,18 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
         }
 
        spin_lock(&request->rq_lock);
-        /* If the MD attach succeeds, there _will_ be a reply_in callback */
-        request->rq_receiving_reply = !noreply;
-       request->rq_req_unlink = 1;
        /* We are responsible for unlinking the reply buffer */
-       request->rq_reply_unlink = !noreply;
-        /* Clear any flags that may be present from previous sends. */
+       request->rq_reply_unlinked = noreply;
+       request->rq_receiving_reply = !noreply;
+       /* Clear any flags that may be present from previous sends. */
+       request->rq_req_unlinked = 0;
         request->rq_replied = 0;
         request->rq_err = 0;
         request->rq_timedout = 0;
         request->rq_net_err = 0;
         request->rq_resend = 0;
         request->rq_restart = 0;
-        request->rq_reply_truncate = 0;
+       request->rq_reply_truncated = 0;
        spin_unlock(&request->rq_lock);
 
         if (!noreply) {
@@ -788,8 +787,8 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                 reply_md.user_ptr  = &request->rq_reply_cbid;
                 reply_md.eq_handle = ptlrpc_eq_h;
 
-               /* We must see the unlink callback to unset rq_reply_unlink,
-                   so we can't auto-unlink */
+               /* We must see the unlink callback to set rq_reply_unlinked,
+                * so we can't auto-unlink */
                 rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
                                   &request->rq_reply_md_h);
                 if (rc != 0) {
@@ -833,9 +832,10 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                           connection,
                           request->rq_request_portal,
                           request->rq_xid, 0);
-        if (rc == 0)
-                GOTO(out, rc);
+       if (likely(rc == 0))
+               GOTO(out, rc);
 
+       request->rq_req_unlinked = 1;
         ptlrpc_req_finished(request);
         if (noreply)
                 GOTO(out, rc);
index 17f918b..2058309 100644 (file)
@@ -343,6 +343,10 @@ static inline void ptlrpc_cli_req_init(struct ptlrpc_request *req)
        struct ptlrpc_cli_req *cr = &req->rq_cli;
 
        ptlrpc_req_comm_init(req);
+
+       req->rq_receiving_reply = 0;
+       req->rq_req_unlinked = req->rq_reply_unlinked = 1;
+
        INIT_LIST_HEAD(&cr->cr_set_chain);
        INIT_LIST_HEAD(&cr->cr_ctx_chain);
        init_waitqueue_head(&cr->cr_reply_waitq);