Whamcloud - gitweb
Add some null-proofing in DEBUG_REQ, and supply arguments for all the format
authorshaver <shaver>
Mon, 28 Oct 2002 20:23:09 +0000 (20:23 +0000)
committershaver <shaver>
Mon, 28 Oct 2002 20:23:09 +0000 (20:23 +0000)
fields (!) while we're at it.

Bullet-proof expired_request to see if we can figure out why we sometimes see
NULL derefs during timeout on dev.

Make the cleanup-from-sending-list stuff use D_ERROR, so that we can see it even
if the client crashes afterwards.  (Just playing a hunch.)

lustre/include/linux/lustre_net.h
lustre/ptlrpc/client.c

index 81a750a..e82a3e1 100644 (file)
@@ -169,8 +169,10 @@ CDEBUG(level,                                                                  \
        "@@@ " fmt " req x"LPD64"/t"LPD64" o%d->%s:%d lens %d/%d fl %x\n",      \
        ## args, req->rq_xid, req->rq_transno,                                  \
        req->rq_reqmsg ? req->rq_reqmsg->opc : -1,                              \
-       req->rq_connection->c_remote_uuid,                                      \
-       req->rq_import->imp_client->cli_request_portal);                        \
+       req->rq_connection ? (char *)req->rq_connection->c_remote_uuid : "<?>", \
+       (req->rq_import && req->rq_import->imp_client) ?                        \
+           req->rq_import->imp_client->cli_request_portal : -1,                \
+       req->rq_reqlen, req->rq_replen, req->rq_flags);                         \
 } while (0)
 
 struct ptlrpc_bulk_page {
index ee6bd63..d87ea31 100644 (file)
@@ -412,7 +412,7 @@ restart1:
                 if (req->rq_import != imp)
                         continue;
                 /* XXX we should make sure that nobody's sleeping on these! */
-                CDEBUG(D_INFO, "Cleaning req %p from sending list.\n", req);
+                DEBUG_REQ(D_ERROR, req, "cleaning up from sending list");
                 list_del_init(&req->rq_list);
                 req->rq_import = NULL;
                 spin_unlock(&conn->c_lock);
@@ -424,7 +424,7 @@ restart2:
                 req = list_entry(tmp, struct ptlrpc_request, rq_list);
                 if (req->rq_import != imp)
                         continue;
-                CERROR("Request %p is on the dying list at cleanup!\n", req);
+                DEBUG_REQ(D_ERROR, req, "on dying list at cleanup");
                 list_del_init(&req->rq_list);
                 req->rq_import = NULL;
                 spin_unlock(&conn->c_lock);
@@ -481,11 +481,27 @@ static int expired_request(void *data)
         struct ptlrpc_request *req = data;
 
         ENTRY;
-        CERROR("req xid "LPD64" op %d: timeout on conn to %s:%d\n",
-               (unsigned long long)req->rq_xid, req->rq_reqmsg->opc,
-               req->rq_connection->c_remote_uuid,
-               req->rq_import->imp_client->cli_request_portal);
+        if (!req) {
+                CERROR("NULL req!");
+                LBUG();
+                RETURN(0);
+        }
+
+        DEBUG_REQ(D_ERROR, req, "timeout");
         req->rq_flags |= PTL_RPC_FL_TIMEOUT;
+
+        if (!req->rq_import) {
+                DEBUG_REQ(D_ERROR, req, "NULL import");
+                LBUG();
+                RETURN(0);
+        }
+
+        if (!req->rq_import->imp_connection) {
+                DEBUG_REQ(D_ERROR, req, "NULL connection");
+                LBUG();
+                RETURN(0);
+        }
+
         if (!req->rq_import->imp_connection->c_recovd_data.rd_recovd)
                 RETURN(1);
 
@@ -582,7 +598,7 @@ int ptlrpc_queue_wait(struct ptlrpc_request *req)
 
         DEBUG_REQ(D_NET, req, "-- sleeping");
         lwi = LWI_TIMEOUT_INTR(req->rq_timeout * HZ, expired_request,
-                               interrupted_request,req);
+                               interrupted_request, req);
         l_wait_event(req->rq_wait_for_rep, ptlrpc_check_reply(req), &lwi);
         DEBUG_REQ(D_NET, req, "-- done sleeping");