Whamcloud - gitweb
b=21486 fix for truncated reply buffer
authorLiang Zhen <Zhen.Liang@Sun.COM>
Fri, 19 Mar 2010 08:26:09 +0000 (01:26 -0700)
committerJohann Lombardi <johann@sun.com>
Fri, 19 Mar 2010 10:54:52 +0000 (11:54 +0100)
i=eeb
i=ericm

reply buffer could be referred by reply_in_callback after released

lustre/include/lustre_net.h
lustre/ptlrpc/client.c
lustre/ptlrpc/events.c

index c50eaad..e4f845c 100644 (file)
@@ -339,8 +339,9 @@ struct ptlrpc_request {
         int rq_request_portal;  /* XXX FIXME bug 249 */
         int rq_reply_portal;    /* XXX FIXME bug 249 */
 
-        int rq_nob_received; /* client-side # reply bytes actually received  */
-
+        int rq_nob_received; /* client-side:
+                              * !rq_truncate : # reply bytes actually received,
+                              *  rq_truncate : required repbuf_len for resend */
         int rq_reqlen;
         struct lustre_msg *rq_reqmsg;
 
index 73f174a..7b02fb4 100644 (file)
@@ -1010,21 +1010,33 @@ static int after_reply(struct ptlrpc_request *req)
         long timediff;
         ENTRY;
 
-        LASSERT(!req->rq_receiving_reply);
+        /* repbuf must be unlinked */
+        LASSERT(!req->rq_receiving_reply && !req->rq_must_unlink);
         LASSERT(obd);
 
         /* NB Until this point, the whole of the incoming message,
          * including buflens, status etc is in the sender's byte order. */
 
-        if (req->rq_reply_truncate && !req->rq_no_resend) {
-                req->rq_resend = 1;
+        if (req->rq_reply_truncate) {
+                if (req->rq_no_resend) {
+                        DEBUG_REQ(D_ERROR, req, "reply buffer overflow,"
+                                  " expected: %d, actual size: %d",
+                                  req->rq_nob_received, req->rq_replen);
+                        RETURN(-EOVERFLOW);
+                }
+
                 OBD_FREE(req->rq_repbuf, req->rq_replen);
                 req->rq_repbuf = NULL;
-                req->rq_replen = req->rq_nob_received;
+                /* Pass the required reply buffer size (include
+                 * space for early reply) */
+                req->rq_replen       = size_round(req->rq_nob_received);
+                req->rq_nob_received = 0;
+                req->rq_resend       = 1;
                 RETURN(0);
         }
 
-        LASSERT (req->rq_nob_received <= req->rq_replen);
+        LASSERT ((char *)req->rq_repmsg + req->rq_nob_received <=
+                 (char *)req->rq_repbuf + req->rq_replen);
         rc = unpack_reply(req);
         if (rc)
                 RETURN(rc);
@@ -1181,6 +1193,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
                 struct ptlrpc_request *req =
                         list_entry(tmp, struct ptlrpc_request, rq_set_chain);
                 struct obd_import *imp = req->rq_import;
+                int unregistered = 0;
                 int rc = 0;
 
                 if (req->rq_phase == RQ_PHASE_NEW &&
@@ -1353,6 +1366,12 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
 
                         spin_unlock(&req->rq_lock);
 
+                        /* unlink from net because we are going to
+                         * swab in-place of reply buffer */
+                        unregistered = ptlrpc_unregister_reply(req, 1);
+                        if (!unregistered)
+                                continue;
+
                         req->rq_status = after_reply(req);
                         if (req->rq_resend)
                                 continue;
@@ -1386,7 +1405,7 @@ int ptlrpc_check_set(struct ptlrpc_request_set *set)
 
                 /* This moves to "unregistering" phase we need to wait for
                  * reply unlink. */
-                if (!ptlrpc_unregister_reply(req, 1))
+                if (!unregistered && !ptlrpc_unregister_reply(req, 1))
                         continue;
 
                 if (!ptlrpc_unregister_bulk(req, 1))
index e664265..e624950 100644 (file)
@@ -96,7 +96,7 @@ void reply_in_callback(lnet_event_t *ev)
 
         LASSERT(ev->type == LNET_EVENT_PUT || ev->type == LNET_EVENT_UNLINK);
         LASSERT(ev->md.start == req->rq_repbuf);
-        LASSERT(ev->mlength <= req->rq_replen);
+        LASSERT(ev->offset + ev->mlength <= req->rq_replen);
         /* We've set LNET_MD_MANAGE_REMOTE for all outgoing requests
            for adaptive timeouts' early reply. */
         LASSERT((ev->md.options & LNET_MD_MANAGE_REMOTE) != 0);