From: bobijam <bobijam>
Date: Fri, 15 Aug 2008 16:53:45 +0000 (+0000)
Subject: Branch b1_6
X-Git-Tag: GIT_EPOCH_B_RELEASE_1_6_7~2^3~346
X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=607d24926c2c618a7e1b8635a3742c35ede5c8ce;p=fs%2Flustre-release.git

Branch b1_6
b=16495
o=green
i=adilger, shadow

Assert on freeing replayable request.
---

diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c
index 5c9e1226..05e9ad2 100644
--- a/lustre/ptlrpc/client.c
+++ b/lustre/ptlrpc/client.c
@@ -211,35 +211,35 @@ void ptlrpc_at_set_req_timeout(struct ptlrpc_request *req)
 
         if (AT_OFF) {
                 /* non-AT settings */
-                req->rq_timeout = req->rq_import->imp_server_timeout ? 
+                req->rq_timeout = req->rq_import->imp_server_timeout ?
                         obd_timeout / 2 : obd_timeout;
                 lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
                 return;
         }
 
         at = &req->rq_import->imp_at;
-        idx = import_at_get_index(req->rq_import, 
+        idx = import_at_get_index(req->rq_import,
                                   req->rq_request_portal);
         serv_est = at_get(&at->iat_service_estimate[idx]);
         /* add an arbitrary minimum: 125% +5 sec */
         req->rq_timeout = serv_est + (serv_est >> 2) + 5;
         /* We could get even fancier here, using history to predict increased
            loading... */
-             
-        /* Let the server know what this RPC timeout is by putting it in the 
+
+        /* Let the server know what this RPC timeout is by putting it in the
            reqmsg*/
         lustre_msg_set_timeout(req->rq_reqmsg, req->rq_timeout);
 }
 
 /* Adjust max service estimate based on server value */
-static void ptlrpc_at_adj_service(struct ptlrpc_request *req) 
+static void ptlrpc_at_adj_service(struct ptlrpc_request *req)
 {
         int idx;
         unsigned int serv_est, oldse;
         struct imp_at *at = &req->rq_import->imp_at;
 
         LASSERT(req->rq_import);
-        
+
         /* service estimate is returned in the repmsg timeout field,
            may be 0 on err */
         serv_est = lustre_msg_get_timeout(req->rq_repmsg);
@@ -250,7 +250,7 @@ static void ptlrpc_at_adj_service(struct ptlrpc_request *req)
         oldse = at_add(&at->iat_service_estimate[idx], serv_est);
         if (oldse != 0)
                 CDEBUG(D_ADAPTTO, "The RPC service estimate for %s ptl %d "
-                       "has changed from %d to %d\n", 
+                       "has changed from %d to %d\n",
                        req->rq_import->imp_obd->obd_name,req->rq_request_portal,
                        oldse, at_get(&at->iat_service_estimate[idx]));
 }
@@ -281,7 +281,7 @@ static void ptlrpc_at_adj_net_latency(struct ptlrpc_request *req)
         oldnl = at_add(&at->iat_net_latency, nl);
         if (oldnl != 0)
                 CDEBUG(D_ADAPTTO, "The network latency for %s (nid %s) "
-                       "has changed from %d to %d\n", 
+                       "has changed from %d to %d\n",
                        req->rq_import->imp_obd->obd_name,
                        obd_uuid2str(
                                &req->rq_import->imp_connection->c_remote_uuid),
@@ -311,7 +311,7 @@ static int unpack_reply(struct ptlrpc_request *req)
 }
 
 /* Handle an early reply message.
-   We can't risk the real reply coming in and changing rq_repmsg, 
+   We can't risk the real reply coming in and changing rq_repmsg,
    so this fn must be called under the rq_lock */
 static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req) {
         struct lustre_msg *oldmsg, *msgcpy;
@@ -322,8 +322,8 @@ static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req) {
         req->rq_early = 0;
 
         rc = unpack_reply(req);
-        if (rc) 
-                /* Let's just ignore it - same as if it never got here */ 
+        if (rc)
+                /* Let's just ignore it - same as if it never got here */
                 RETURN(rc);
 
         /* We've got to make sure another early reply doesn't land on
@@ -336,20 +336,20 @@ static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req) {
                 RETURN(-ENOMEM);
         }
         spin_lock(&req->rq_lock);
-        /* Another reply might have changed the repmsg and replen while 
+        /* Another reply might have changed the repmsg and replen while
            we dropped the lock; doesn't really matter, just use the latest.
            If it doesn't fit in oldlen, checksum will be wrong. */
         oldmsg = req->rq_repmsg;
         memcpy(msgcpy, oldmsg, oldlen);
-        if (lustre_msg_get_cksum(msgcpy) != 
+        if (lustre_msg_get_cksum(msgcpy) !=
             lustre_msg_calc_cksum(msgcpy)) {
                 CDEBUG(D_ADAPTTO, "Early reply checksum mismatch, "
                        "discarding %x != %x\n", lustre_msg_get_cksum(msgcpy),
                        lustre_msg_calc_cksum(msgcpy));
-                GOTO(out, rc = -EINVAL); 
+                GOTO(out, rc = -EINVAL);
         }
 
-        /* Our copied msg is valid, now we can adjust the timeouts without 
+        /* Our copied msg is valid, now we can adjust the timeouts without
            worrying that a new reply will land on the copy. */
         req->rq_repmsg = msgcpy;
 
@@ -361,18 +361,18 @@ static int ptlrpc_at_recv_early_reply(struct ptlrpc_request *req) {
         ptlrpc_at_set_req_timeout(req);
 
         olddl = req->rq_deadline;
-        /* server assumes it now has rq_timeout from when it sent the 
+        /* server assumes it now has rq_timeout from when it sent the
            early reply, so client should give it at least that long. */
-        req->rq_deadline = cfs_time_current_sec() + req->rq_timeout + 
+        req->rq_deadline = cfs_time_current_sec() + req->rq_timeout +
                     ptlrpc_at_get_net_latency(req);
 
-        DEBUG_REQ(D_ADAPTTO, req, 
-                  "Early reply #%d, new deadline in %lds (%+lds)", 
+        DEBUG_REQ(D_ADAPTTO, req,
+                  "Early reply #%d, new deadline in %lds (%+lds)",
                   req->rq_early_count, req->rq_deadline -
                   cfs_time_current_sec(), req->rq_deadline - olddl);
-        
+
         req->rq_repmsg = oldmsg;
-        
+
 out:
         OBD_FREE(msgcpy, oldlen);
         RETURN(rc);
@@ -546,7 +546,7 @@ ptlrpc_prep_req_pool(struct obd_import *imp, __u32 version, int opcode,
 
         request->rq_request_portal = imp->imp_client->cli_request_portal;
         request->rq_reply_portal = imp->imp_client->cli_reply_portal;
-        
+
         ptlrpc_at_set_req_timeout(request);
 
         spin_lock_init(&request->rq_lock);
@@ -649,7 +649,7 @@ int ptlrpc_set_add_cb(struct ptlrpc_request_set *set,
 {
         struct ptlrpc_set_cbdata *cbdata;
 
-        OBD_SLAB_ALLOC(cbdata, ptlrpc_cbdata_slab, 
+        OBD_SLAB_ALLOC(cbdata, ptlrpc_cbdata_slab,
                         CFS_ALLOC_STD, sizeof(*cbdata));
         if (cbdata == NULL)
                 RETURN(-ENOMEM);
@@ -657,7 +657,7 @@ int ptlrpc_set_add_cb(struct ptlrpc_request_set *set,
         cbdata->psc_interpret = fn;
         cbdata->psc_data = data;
         list_add_tail(&cbdata->psc_item, &set->set_cblist);
-        
+
         RETURN(0);
 }
 
@@ -828,7 +828,7 @@ static int after_reply(struct ptlrpc_request *req)
 
         LASSERT (req->rq_nob_received <= req->rq_replen);
         rc = unpack_reply(req);
-        if (rc) 
+        if (rc)
                 RETURN(rc);
 
         do_gettimeofday(&work_start);
@@ -862,7 +862,7 @@ static int after_reply(struct ptlrpc_request *req)
                         RETURN(rc);
                 }
         } else {
-                /* Let's look if server sent slv. Do it only for RPC with 
+                /* Let's look if server sent slv. Do it only for RPC with
                  * rc == 0. */
                 ldlm_cli_update_pool(req);
         }
@@ -875,8 +875,8 @@ static int after_reply(struct ptlrpc_request *req)
                 spin_lock(&imp->imp_lock);
                 /* no point in adding already-committed requests to the replay
                  * list, we will just remove them immediately. b=9829 */
-                if (req->rq_transno != 0 && 
-                    (req->rq_transno > 
+                if (req->rq_transno != 0 &&
+                    (req->rq_transno >
                      lustre_msg_get_last_committed(req->rq_repmsg) ||
                      req->rq_replay))
                         ptlrpc_retain_replayable_request(req, imp);
@@ -906,7 +906,7 @@ static int ptlrpc_send_new_req(struct ptlrpc_request *req)
         LASSERT(req->rq_phase == RQ_PHASE_NEW);
         if (req->rq_sent && (req->rq_sent > CURRENT_SECONDS))
                 RETURN (0);
-        
+
         req->rq_phase = RQ_PHASE_RPC;
 
         imp = req->rq_import;
@@ -1251,7 +1251,7 @@ int ptlrpc_expire_one_request(struct ptlrpc_request *req)
                 RETURN(1);
         }
 
-        /* if a request can't be resent we can't wait for an answer after 
+        /* if a request can't be resent we can't wait for an answer after
            the timeout */
         if (req->rq_no_resend) {
                 DEBUG_REQ(D_RPCTRACE, req, "TIMEOUT-NORESEND:");
@@ -1341,7 +1341,7 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
 
                 /* request in-flight? */
                 if (!(((req->rq_phase == RQ_PHASE_RPC) && !req->rq_waiting) ||
-                      (req->rq_phase == RQ_PHASE_BULK) || 
+                      (req->rq_phase == RQ_PHASE_BULK) ||
                       (req->rq_phase == RQ_PHASE_NEW)))
                         continue;
 
@@ -1356,7 +1356,7 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set)
                 if (deadline <= now) {  /* actually expired already */
                         timeout = 1;    /* ASAP */
                         break;
-                } 
+                }
                 if ((timeout == 0) || (timeout > (deadline - now))) {
                         timeout = deadline - now;
                 }
@@ -1423,13 +1423,13 @@ int ptlrpc_set_wait(struct ptlrpc_request_set *set)
                 struct ptlrpc_set_cbdata *cbdata, *n;
                 int err;
 
-                list_for_each_entry_safe(cbdata, n, 
+                list_for_each_entry_safe(cbdata, n,
                                          &set->set_cblist, psc_item) {
                         list_del_init(&cbdata->psc_item);
                         err = cbdata->psc_interpret(set, cbdata->psc_data, rc);
                         if (err && !rc)
                                 rc = err;
-                        OBD_SLAB_FREE(cbdata, ptlrpc_cbdata_slab, 
+                        OBD_SLAB_FREE(cbdata, ptlrpc_cbdata_slab,
                                         sizeof(*cbdata));
                 }
         }
@@ -1458,6 +1458,7 @@ static void __ptlrpc_free_req(struct ptlrpc_request *request, int locked)
         LASSERTF(request->rq_rqbd == NULL, "req %p\n",request);/* client-side */
         LASSERTF(list_empty(&request->rq_list), "req %p\n", request);
         LASSERTF(list_empty(&request->rq_set_chain), "req %p\n", request);
+        LASSERTF(!request->rq_replay, "req %p\n", request);
 
         /* We must take it off the imp_replay_list first.  Otherwise, we'll set
          * request->rq_reqmsg to NULL while osc_close is dereferencing it. */
@@ -1571,7 +1572,7 @@ void ptlrpc_unregister_reply (struct ptlrpc_request *request)
         LNetMDUnlink (request->rq_reply_md_h);
 
         /* We have to l_wait_event() whatever the result, to give liblustre
-         * a chance to run reply_in_callback(), and to make sure we've 
+         * a chance to run reply_in_callback(), and to make sure we've
          * unlinked before returning a req to the pool */
 
         if (request->rq_set != NULL)
@@ -1868,19 +1869,19 @@ restart:
         spin_unlock(&imp->imp_lock);
 
         rc = ptl_send_rpc(req, 0);
-        if (rc) 
+        if (rc)
                 DEBUG_REQ(D_HA, req, "send failed (%d); recovering", rc);
         do {
                 timeoutl = req->rq_deadline - cfs_time_current_sec();
                 timeout = (timeoutl <= 0 || rc) ? CFS_TICK :
                         cfs_time_seconds(timeoutl);
-                DEBUG_REQ(D_NET, req, 
+                DEBUG_REQ(D_NET, req,
                           "-- sleeping for "CFS_DURATION_T" ticks", timeout);
                 lwi = LWI_TIMEOUT_INTR(timeout, NULL, interrupted_request, req);
                 brc = l_wait_event(req->rq_reply_waitq, ptlrpc_check_reply(req),
                                   &lwi);
                 /* Wait again if we changed deadline */
-        } while ((brc == -ETIMEDOUT) && 
+        } while ((brc == -ETIMEDOUT) &&
                  (req->rq_deadline > cfs_time_current_sec()));
 
         if ((brc == -ETIMEDOUT) && !ptlrpc_expire_one_request(req)) {
@@ -1908,7 +1909,7 @@ restart:
 
 
         if (req->rq_err) {
-                DEBUG_REQ(D_RPCTRACE, req, "err rc=%d status=%d", 
+                DEBUG_REQ(D_RPCTRACE, req, "err rc=%d status=%d",
                           rc, req->rq_status);
                 GOTO(out, rc = -EIO);
         }