Whamcloud - gitweb
LU-5073 ptlrpc: unlink request buffer correctly 53/10353/3
authorAlexey Lyashkov <alexey_lyashkov@xyratex.com>
Fri, 16 May 2014 17:10:05 +0000 (21:10 +0400)
committerOleg Drokin <oleg.drokin@intel.com>
Mon, 16 Jun 2014 02:37:30 +0000 (02:37 +0000)
outgoning buffer may be hold by lnet and don't unlinked fast,
it's break unloading a lustre modules as request hold a
reference to the export/obd

Signed-off-by: Alexey Lyashkov <alexey_lyashkov@xyratex.com>
Change-Id: I74e59e35a1c4b0c02a3ffa2db8c788c84683b7d6
Xyratex-bug-id: MRP-1848
Reviewed-on: http://review.whamcloud.com/10353
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Mike Pershin <mike.pershin@intel.com>
Reviewed-by: Liang Zhen <liang.zhen@intel.com>
Reviewed-by: Isaac Huang <he.huang@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/lustre_net.h
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c
lustre/ptlrpc/niobuf.c

index 8a55b6e..e10d40e 100644 (file)
@@ -1845,7 +1845,8 @@ struct ptlrpc_request {
                 rq_replay:1,
                 rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
                 rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
                 rq_replay:1,
                 rq_no_resend:1, rq_waiting:1, rq_receiving_reply:1,
                 rq_no_delay:1, rq_net_err:1, rq_wait_ctx:1,
-                rq_early:1, rq_must_unlink:1,
+               rq_early:1,
+               rq_req_unlink:1, rq_reply_unlink:1,
                 rq_memalloc:1,      /* req originated from "kswapd" */
                 /* server-side flags */
                 rq_packed_final:1,  /* packed final reply */
                 rq_memalloc:1,      /* req originated from "kswapd" */
                 /* server-side flags */
                 rq_packed_final:1,  /* packed final reply */
@@ -3343,7 +3344,8 @@ ptlrpc_client_recv_or_unlink(struct ptlrpc_request *req)
                spin_unlock(&req->rq_lock);
                return 1;
        }
                spin_unlock(&req->rq_lock);
                return 1;
        }
-       rc = req->rq_receiving_reply || req->rq_must_unlink;
+       rc = req->rq_receiving_reply ;
+       rc = rc || req->rq_req_unlink || req->rq_reply_unlink;
        spin_unlock(&req->rq_lock);
        return rc;
 }
        spin_unlock(&req->rq_lock);
        return rc;
 }
index 22534cd..8ec94a3 100644 (file)
@@ -1223,7 +1223,7 @@ static int after_reply(struct ptlrpc_request *req)
 
         LASSERT(obd != NULL);
         /* repbuf must be unlinked */
 
         LASSERT(obd != NULL);
         /* repbuf must be unlinked */
-        LASSERT(!req->rq_receiving_reply && !req->rq_must_unlink);
+       LASSERT(!req->rq_receiving_reply && !req->rq_reply_unlink);
 
         if (req->rq_reply_truncate) {
                 if (ptlrpc_no_resend(req)) {
 
         if (req->rq_reply_truncate) {
                 if (ptlrpc_no_resend(req)) {
@@ -2436,8 +2436,8 @@ int ptlrpc_unregister_reply(struct ptlrpc_request *request, int async)
 
                 LASSERT(rc == -ETIMEDOUT);
                 DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout "
 
                 LASSERT(rc == -ETIMEDOUT);
                 DEBUG_REQ(D_WARNING, request, "Unexpectedly long timeout "
-                          "rvcng=%d unlnk=%d", request->rq_receiving_reply,
-                          request->rq_must_unlink);
+                         "rvcng=%d unlnk=%d/%d", request->rq_receiving_reply,
+                         request->rq_req_unlink, request->rq_reply_unlink);
         }
         RETURN(0);
 }
         }
         RETURN(0);
 }
@@ -3124,7 +3124,7 @@ void *ptlrpcd_alloc_work(struct obd_import *imp,
        req->rq_interpret_reply = work_interpreter;
        /* don't want reply */
        req->rq_receiving_reply = 0;
        req->rq_interpret_reply = work_interpreter;
        /* don't want reply */
        req->rq_receiving_reply = 0;
-       req->rq_must_unlink = 0;
+       req->rq_req_unlink = req->rq_reply_unlink = 0;
        req->rq_no_delay = req->rq_no_resend = 1;
        req->rq_pill.rc_fmt = (void *)&worker_format;
 
        req->rq_no_delay = req->rq_no_resend = 1;
        req->rq_pill.rc_fmt = (void *)&worker_format;
 
index d7f9a11..e9d19f4 100644 (file)
@@ -68,19 +68,20 @@ void request_out_callback(lnet_event_t *ev)
         DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
 
         sptlrpc_request_out_callback(req);
         DEBUG_REQ(D_NET, req, "type %d, status %d", ev->type, ev->status);
 
         sptlrpc_request_out_callback(req);
+       spin_lock(&req->rq_lock);
         req->rq_real_sent = cfs_time_current_sec();
         req->rq_real_sent = cfs_time_current_sec();
+       if (ev->unlinked)
+               req->rq_req_unlink = 0;
 
         if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
 
                 /* Failed send: make it seem like the reply timed out, just
                  * like failing sends in client.c does currently...  */
 
 
         if (ev->type == LNET_EVENT_UNLINK || ev->status != 0) {
 
                 /* Failed send: make it seem like the reply timed out, just
                  * like failing sends in client.c does currently...  */
 
-               spin_lock(&req->rq_lock);
                req->rq_net_err = 1;
                req->rq_net_err = 1;
-               spin_unlock(&req->rq_lock);
-
                 ptlrpc_client_wake_req(req);
         }
                 ptlrpc_client_wake_req(req);
         }
+       spin_unlock(&req->rq_lock);
 
         ptlrpc_req_finished(req);
 
 
         ptlrpc_req_finished(req);
 
@@ -110,7 +111,7 @@ void reply_in_callback(lnet_event_t *ev)
         req->rq_receiving_reply = 0;
         req->rq_early = 0;
         if (ev->unlinked)
         req->rq_receiving_reply = 0;
         req->rq_early = 0;
         if (ev->unlinked)
-                req->rq_must_unlink = 0;
+               req->rq_reply_unlink = 0;
 
         if (ev->status)
                 goto out_wake;
 
         if (ev->status)
                 goto out_wake;
index f779054..c1b99c9 100644 (file)
@@ -765,8 +765,9 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
        spin_lock(&request->rq_lock);
         /* If the MD attach succeeds, there _will_ be a reply_in callback */
         request->rq_receiving_reply = !noreply;
        spin_lock(&request->rq_lock);
         /* If the MD attach succeeds, there _will_ be a reply_in callback */
         request->rq_receiving_reply = !noreply;
-        /* We are responsible for unlinking the reply buffer */
-        request->rq_must_unlink = !noreply;
+       request->rq_req_unlink = 1;
+       /* We are responsible for unlinking the reply buffer */
+       request->rq_reply_unlink = !noreply;
         /* Clear any flags that may be present from previous sends. */
         request->rq_replied = 0;
         request->rq_err = 0;
         /* Clear any flags that may be present from previous sends. */
         request->rq_replied = 0;
         request->rq_err = 0;
@@ -789,7 +790,7 @@ int ptl_send_rpc(struct ptlrpc_request *request, int noreply)
                 reply_md.user_ptr  = &request->rq_reply_cbid;
                 reply_md.eq_handle = ptlrpc_eq_h;
 
                 reply_md.user_ptr  = &request->rq_reply_cbid;
                 reply_md.eq_handle = ptlrpc_eq_h;
 
-                /* We must see the unlink callback to unset rq_must_unlink,
+               /* We must see the unlink callback to unset rq_reply_unlink,
                    so we can't auto-unlink */
                 rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
                                   &request->rq_reply_md_h);
                    so we can't auto-unlink */
                 rc = LNetMDAttach(reply_me_h, reply_md, LNET_RETAIN,
                                   &request->rq_reply_md_h);