Whamcloud - gitweb
Additional debugging for MDS client eviction problem (from 1.0.4).
authoradilger <adilger>
Tue, 10 Feb 2004 00:15:19 +0000 (00:15 +0000)
committeradilger <adilger>
Tue, 10 Feb 2004 00:15:19 +0000 (00:15 +0000)
b=2443

lustre/ChangeLog
lustre/include/linux/lustre_net.h
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c

index f8df578..c89792f 100644 (file)
@@ -26,6 +26,8 @@ tbd         Cluster File Systems, Inc. <info@clusterfs.com>
        - fix timeouts when evicting a client with a single lock held (2642)
        - set deadline for the initial HELLO message to drain (2634)
        - print out dotted-quad IP addresses in the socknal (2302)
+       * miscellania
+       - additional debugging for MDS client eviction problem (2443)
 
 2004-01-27  Cluster File Systems, Inc. <info@clusterfs.com>
        * version 1.0.3
index 7d6f9ee..bb8900e 100644 (file)
@@ -295,7 +295,7 @@ struct ptlrpc_request {
 do {                                                                           \
 CDEBUG(level, "@@@ " fmt                                                       \
        " req@%p x"LPD64"/t"LPD64" o%d->%s@%s:%d lens %d/%d ref %d fl "         \
-       REQ_FLAGS_FMT"/%x/%x rc %x\n" ,  ## args, req, req->rq_xid,             \
+       REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid,           \
        req->rq_transno,                                                        \
        req->rq_reqmsg ? req->rq_reqmsg->opc : -1,                              \
        req->rq_import ? (char *)req->rq_import->imp_target_uuid.uuid : "<?>",  \
@@ -308,7 +308,7 @@ CDEBUG(level, "@@@ " fmt                                                       \
        DEBUG_REQ_FLAGS(req),                                                   \
        req->rq_reqmsg ? req->rq_reqmsg->flags : 0,                             \
        req->rq_repmsg ? req->rq_repmsg->flags : 0,                             \
-       req->rq_status);                                                        \
+       req->rq_status, req->rq_repmsg ? req->rq_repmsg->status : 0);           \
 } while (0)
 
 struct ptlrpc_bulk_page {
index 877018a..64dfb52 100644 (file)
@@ -49,7 +49,7 @@ extern int (*mds_getattr_name_p)(int offset, struct ptlrpc_request *req);
 static DECLARE_MUTEX(ldlm_ref_sem);
 static int ldlm_refcount = 0;
 
-/* LDLM state */ 
+/* LDLM state */
 
 static struct ldlm_state *ldlm ;
 
@@ -125,6 +125,7 @@ static int expired_lock_main(void *arg)
         wake_up(&expired_lock_thread.elt_waitq);
 
         while (1) {
+                struct list_head *tmp, *n, work_list;
                 l_wait_event(expired_lock_thread.elt_waitq,
                              have_expired_locks() ||
                              expired_lock_thread.elt_state == ELT_TERMINATE,
@@ -132,12 +133,32 @@ static int expired_lock_main(void *arg)
 
                 spin_lock_bh(&expired_lock_thread.elt_lock);
                 while (!list_empty(expired)) {
-                        struct ldlm_lock *lock = list_entry(expired->next,
-                                                            struct ldlm_lock,
-                                                            l_pending_chain);
+                        struct ldlm_lock *lock;
+
+                        list_add(&work_list, expired);
+                        list_del_init(expired);
+
+                        list_for_each_entry(lock, &work_list, l_pending_chain) {
+                                LDLM_DEBUG(lock, "moving to work list");
+                        }
+
                         spin_unlock_bh(&expired_lock_thread.elt_lock);
 
-                        ptlrpc_fail_export(lock->l_export);
+
+                        list_for_each_safe(tmp, n, &work_list) {
+                                 lock = list_entry(tmp, struct ldlm_lock,
+                                                   l_pending_chain);
+                                 ptlrpc_fail_export(lock->l_export);
+                        }
+
+
+                        if (!list_empty(&work_list)) {
+                                list_for_each_entry(lock, &work_list, l_pending_chain) {
+                                        LDLM_ERROR(lock, "still on work list!");
+                                }
+                        }
+                        LASSERTF (list_empty(&work_list),
+                                  "some exports not failed properly\n");
 
                         spin_lock_bh(&expired_lock_thread.elt_lock);
                 }
@@ -1125,7 +1146,7 @@ static int ldlm_setup(void)
                 ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE,
                                 LDLM_MAXREQSIZE, LDLM_CB_REQUEST_PORTAL,
                                 LDLM_CB_REPLY_PORTAL,
-                                ldlm_callback_handler, "ldlm_cbd", 
+                                ldlm_callback_handler, "ldlm_cbd",
                                 ldlm_svc_proc_dir);
 
         if (!ldlm->ldlm_cb_service) {
@@ -1137,7 +1158,7 @@ static int ldlm_setup(void)
                 ptlrpc_init_svc(LDLM_NEVENTS, LDLM_NBUFS, LDLM_BUFSIZE,
                                 LDLM_MAXREQSIZE, LDLM_CANCEL_REQUEST_PORTAL,
                                 LDLM_CANCEL_REPLY_PORTAL,
-                                ldlm_cancel_handler, "ldlm_canceld", 
+                                ldlm_cancel_handler, "ldlm_canceld",
                                 ldlm_svc_proc_dir);
 
         if (!ldlm->ldlm_cancel_service) {
index ae9b202..a8f006a 100644 (file)
@@ -53,10 +53,12 @@ int ldlm_expired_completion_wait(void *data)
                            "server code, just going back to sleep");
                 if (time_after(jiffies, next_dump)) {
                         unsigned int debug = portal_debug;
-                        next_dump = jiffies + 300 * HZ;
                         portal_debug |= D_OTHER;
                         ldlm_namespace_dump(lock->l_resource->lr_namespace);
                         portal_debug = debug;
+                        if (next_dump == 0)
+                                portals_debug_dumplog();
+                        next_dump = jiffies + 300 * HZ;
                 }
                 RETURN(0);
         }
@@ -440,7 +442,7 @@ int ldlm_cli_convert(struct lustre_handle *lockh, int new_mode, int *flags)
 
         LDLM_DEBUG(lock, "client-side convert");
 
-        req = ptlrpc_prep_req(class_exp2cliimp(lock->l_conn_export), 
+        req = ptlrpc_prep_req(class_exp2cliimp(lock->l_conn_export),
                               LDLM_CONVERT, 1, &size, NULL);
         if (!req)
                 GOTO(out, rc = -ENOMEM);
@@ -834,10 +836,8 @@ int ldlm_namespace_foreach_res(struct ldlm_namespace *ns,
 }
 
 /* non-blocking function to manipulate a lock whose cb_data is being put away.*/
-void ldlm_change_cbdata(struct ldlm_namespace *ns, 
-                       struct ldlm_res_id *res_id, 
-                       ldlm_iterator_t iter,
-                       void *data)
+void ldlm_change_cbdata(struct ldlm_namespace *ns, struct ldlm_res_id *res_id,
+                        ldlm_iterator_t iter, void *data)
 {
         struct ldlm_resource *res;
         ENTRY;