Whamcloud - gitweb
Branch HEAD
authorzam <zam>
Wed, 4 Nov 2009 18:44:10 +0000 (18:44 +0000)
committerzam <zam>
Wed, 4 Nov 2009 18:44:10 +0000 (18:44 +0000)
b=20498
i=tappro
i=robert.read

More debugging output for hanging unmount: dump locks
for the exports with extra references.

lustre/include/lustre_dlm.h
lustre/include/lustre_export.h
lustre/include/obd_class.h
lustre/ldlm/ldlm_flock.c
lustre/ldlm/ldlm_lib.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/mdt/mdt_handler.c
lustre/obdclass/genops.c
lustre/obdclass/obd_config.c

index 5c6ea4b..c83a841 100644 (file)
@@ -51,7 +51,6 @@
 #include <lustre_net.h>
 #include <lustre_import.h>
 #include <lustre_handles.h>
-#include <lustre_export.h> /* for obd_export, for LDLM_DEBUG */
 #include <interval_tree.h> /* for interval_node{}, ldlm_extent */
 #include <lu_ref.h>
 
@@ -545,6 +544,8 @@ struct ldlm_interval_tree {
         struct interval_node *lit_root; /* actually ldlm_interval */
 };
 
+#define LUSTRE_TRACKS_LOCK_EXP_REFS (1)
+
 struct ldlm_lock {
         /**
          * Must be first in the structure.
@@ -715,6 +716,16 @@ struct ldlm_lock {
         struct list_head      l_sl_mode;
         struct list_head      l_sl_policy;
         struct lu_ref         l_reference;
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+        /* Debugging stuff for bug 20498, for tracking export
+           references. */
+        /** number of export references taken */
+        int                   l_exp_refs_nr;
+        /** link all locks referencing one export */
+        struct list_head      l_exp_refs_link;
+        /** referenced export object */
+        struct obd_export    *l_exp_refs_target;
+#endif
 };
 
 struct ldlm_resource {
@@ -918,6 +929,9 @@ static inline int ldlm_res_lvbo_update(struct ldlm_resource *res,
 int ldlm_error2errno(ldlm_error_t error);
 ldlm_error_t ldlm_errno2error(int err_no); /* don't call it `errno': this
                                             * confuses user-space. */
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+void ldlm_dump_export_locks(struct obd_export *exp);
+#endif
 
 /**
  * Release a temporary lock reference obtained by ldlm_handle2lock() or
index 72a83af..cf24fa6 100644 (file)
@@ -152,7 +152,10 @@ struct obd_export {
         atomic_t                  exp_rpc_count; /** RPC references */
         atomic_t                  exp_cb_count; /** Commit callback references */
         atomic_t                  exp_locks_count; /** Lock references */
-
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+        struct list_head          exp_locks_list;
+        spinlock_t                exp_locks_list_guard;
+#endif
         atomic_t                  exp_replay_count;
         struct obd_uuid           exp_client_uuid;
         struct list_head          exp_obd_chain;
index 0d00d44..9cadd6b 100644 (file)
@@ -115,7 +115,7 @@ struct obd_device *class_incref(struct obd_device *obd,
                                 const char *scope, const void *source);
 void class_decref(struct obd_device *obd,
                   const char *scope, const void *source);
-void dump_exports(struct obd_device *obd);
+void dump_exports(struct obd_device *obd, int locks);
 
 /*obdecho*/
 #ifdef LPROCFS
@@ -172,6 +172,19 @@ struct lustre_profile *class_get_profile(const char * prof);
 void class_del_profile(const char *prof);
 void class_del_profiles(void);
 
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+
+void __class_export_add_lock_ref(struct obd_export *, struct ldlm_lock *);
+void __class_export_del_lock_ref(struct obd_export *, struct ldlm_lock *);
+extern void (*class_export_dump_hook)(struct obd_export *);
+
+#else
+
+#define __class_export_add_lock_ref(exp, lock)             do {} while(0)
+#define __class_export_del_lock_ref(exp, lock)             do {} while(0)
+
+#endif
+
 #define class_export_rpc_get(exp)                                       \
 ({                                                                      \
         atomic_inc(&(exp)->exp_rpc_count);                              \
@@ -189,18 +202,20 @@ void class_del_profiles(void);
         class_export_put(exp);                                          \
 })
 
-#define class_export_lock_get(exp)                                      \
+#define class_export_lock_get(exp, lock)                                \
 ({                                                                      \
         atomic_inc(&(exp)->exp_locks_count);                            \
+        __class_export_add_lock_ref(exp, lock);                         \
         CDEBUG(D_INFO, "lock GETting export %p : new locks_count %d\n", \
                (exp), atomic_read(&(exp)->exp_locks_count));            \
         class_export_get(exp);                                          \
 })
 
-#define class_export_lock_put(exp)                                      \
+#define class_export_lock_put(exp, lock)                                \
 ({                                                                      \
         LASSERT(atomic_read(&exp->exp_locks_count) > 0);                \
         atomic_dec(&(exp)->exp_locks_count);                            \
+        __class_export_del_lock_ref(exp, lock);                         \
         CDEBUG(D_INFO, "lock PUTting export %p : new locks_count %d\n", \
                (exp), atomic_read(&(exp)->exp_locks_count));            \
         class_export_put(exp);                                          \
index b3b2cb1..c80c634 100644 (file)
@@ -400,7 +400,7 @@ reprocess:
                         new->l_policy_data.l_flock.end + 1;
                 new2->l_conn_export = lock->l_conn_export;
                 if (lock->l_export != NULL) {
-                        new2->l_export = class_export_lock_get(lock->l_export);
+                        new2->l_export = class_export_lock_get(lock->l_export, new2);
                         if (new2->l_export->exp_lock_hash &&
                             hlist_unhashed(&new2->l_exp_hash))
                                 lustre_hash_add(new2->l_export->exp_lock_hash,
index ec88bed..04dcd77 100644 (file)
@@ -2476,3 +2476,18 @@ ldlm_error_t ldlm_errno2error(int err_no)
 }
 EXPORT_SYMBOL(ldlm_errno2error);
 
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+void ldlm_dump_export_locks(struct obd_export *exp)
+{
+        spin_lock(&exp->exp_locks_list_guard);
+        if (!list_empty(&exp->exp_locks_list)) {
+            struct ldlm_lock *lock;
+
+            CERROR("dumping locks for export %p,"
+                   "ignore if the unmount doesn't hang\n", exp);
+            list_for_each_entry(lock, &exp->exp_locks_list, l_exp_refs_link)
+                ldlm_lock_dump(D_ERROR, lock, 0);
+        }
+        spin_unlock(&exp->exp_locks_list_guard);
+}
+#endif
index 5d719d4..dd57661 100644 (file)
@@ -164,7 +164,7 @@ void ldlm_lock_put(struct ldlm_lock *lock)
                 ldlm_resource_putref(res);
                 lock->l_resource = NULL;
                 if (lock->l_export) {
-                        class_export_lock_put(lock->l_export);
+                        class_export_lock_put(lock->l_export, lock);
                         lock->l_export = NULL;
                 }
 
@@ -371,6 +371,12 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource)
         lu_ref_add(&lock->l_reference, "hash", lock);
         lock->l_callback_timeout = 0;
 
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+        CFS_INIT_LIST_HEAD(&lock->l_exp_refs_link);
+        lock->l_exp_refs_nr = 0;
+        lock->l_exp_refs_target = NULL;
+#endif
+
         RETURN(lock);
 }
 
index 9f3dc50..c59fd3e 100644 (file)
@@ -208,7 +208,7 @@ static int expired_lock_main(void *arg)
                                 LDLM_LOCK_RELEASE(lock);
                                 continue;
                         }
-                        export = class_export_lock_get(lock->l_export);
+                        export = class_export_lock_get(lock->l_export, lock);
                         spin_unlock_bh(&waiting_locks_spinlock);
 
                         /* release extra ref grabbed by ldlm_add_waiting_lock()
@@ -217,7 +217,7 @@ static int expired_lock_main(void *arg)
 
                         do_dump++;
                         class_fail_export(export);
-                        class_export_lock_put(export);
+                        class_export_lock_put(export, lock);
                         spin_lock_bh(&waiting_locks_spinlock);
                 }
                 spin_unlock_bh(&waiting_locks_spinlock);
@@ -1109,7 +1109,8 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns,
                 LDLM_ERROR(lock, "lock on destroyed export %p", req->rq_export);
                 GOTO(out, rc = -ENOTCONN);
         }
-        lock->l_export = class_export_lock_get(req->rq_export);
+
+        lock->l_export = class_export_lock_get(req->rq_export, lock);
         if (lock->l_export->exp_lock_hash)
                 lustre_hash_add(lock->l_export->exp_lock_hash,
                                 &lock->l_remote_handle,
@@ -2492,7 +2493,9 @@ int __init ldlm_init(void)
                 cfs_mem_cache_destroy(ldlm_lock_slab);
                 return -ENOMEM;
         }
-
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+        class_export_dump_hook = ldlm_dump_export_locks;
+#endif
         return 0;
 }
 
index e6e39bd..d4f8a48 100644 (file)
@@ -3180,7 +3180,7 @@ int mdt_intent_lock_replace(struct mdt_thread_info *info,
                 new_lock->l_writers--;
         }
 
-        new_lock->l_export = class_export_lock_get(req->rq_export);
+        new_lock->l_export = class_export_lock_get(req->rq_export, new_lock);
         new_lock->l_blocking_ast = lock->l_blocking_ast;
         new_lock->l_completion_ast = lock->l_completion_ast;
         new_lock->l_remote_handle = lock->l_remote_handle;
index 1f9c9b1..9899ff3 100644 (file)
@@ -62,7 +62,8 @@ spinlock_t        obd_zombie_impexp_lock;
 static void obd_zombie_impexp_notify(void);
 static void obd_zombie_export_add(struct obd_export *exp);
 static void obd_zombie_import_add(struct obd_import *imp);
-static void print_export_data(struct obd_export *exp, const char *status);
+static void print_export_data(struct obd_export *exp,
+                              const char *status, int locks);
 
 int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c);
 
@@ -785,6 +786,10 @@ struct obd_export *class_new_export(struct obd_device *obd,
         atomic_set(&export->exp_rpc_count, 0);
         atomic_set(&export->exp_cb_count, 0);
         atomic_set(&export->exp_locks_count, 0);
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+        CFS_INIT_LIST_HEAD(&export->exp_locks_list);
+        spin_lock_init(&export->exp_locks_list_guard);
+#endif
         atomic_set(&export->exp_replay_count, 0);
         export->exp_obd = obd;
         CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies);
@@ -985,6 +990,49 @@ void class_destroy_import(struct obd_import *import)
 }
 EXPORT_SYMBOL(class_destroy_import);
 
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+
+void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
+{
+        spin_lock(&exp->exp_locks_list_guard);
+
+        LASSERT(lock->l_exp_refs_nr >= 0);
+
+        if (lock->l_exp_refs_target != NULL &&
+            lock->l_exp_refs_target != exp) {
+                LCONSOLE_WARN("setting export %p for lock %p which already has export %p\n",
+                              exp, lock, lock->l_exp_refs_target);
+        }
+        if ((lock->l_exp_refs_nr ++) == 0) {
+                list_add(&lock->l_exp_refs_link, &exp->exp_locks_list);
+                lock->l_exp_refs_target = exp;
+        }
+        CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n",
+               lock, exp, lock->l_exp_refs_nr);
+        spin_unlock(&exp->exp_locks_list_guard);
+}
+EXPORT_SYMBOL(__class_export_add_lock_ref);
+
+void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock)
+{
+        spin_lock(&exp->exp_locks_list_guard);
+        LASSERT(lock->l_exp_refs_nr > 0);
+        if (lock->l_exp_refs_target != exp) {
+                LCONSOLE_WARN("lock %p, "
+                              "mismatching export pointers: %p, %p\n",
+                              lock, lock->l_exp_refs_target, exp);
+        }
+        if (-- lock->l_exp_refs_nr == 0) {
+                list_del_init(&lock->l_exp_refs_link);
+                lock->l_exp_refs_target = NULL;
+        }
+        CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n",
+               lock, exp, lock->l_exp_refs_nr);
+        spin_unlock(&exp->exp_locks_list_guard);
+}
+EXPORT_SYMBOL(__class_export_del_lock_ref);
+#endif
+
 /* A connection defines an export context in which preallocation can
    be managed. This releases the export pointer reference, and returns
    the export handle, so the export refcount is 1 when this function
@@ -1188,7 +1236,7 @@ void class_disconnect_stale_exports(struct obd_device *obd,
                        obd->obd_name, exp->exp_client_uuid.uuid,
                        exp->exp_connection == NULL ? "<unknown>" :
                        libcfs_nid2str(exp->exp_connection->c_peer.nid));
-                print_export_data(exp, "EVICTING");
+                print_export_data(exp, "EVICTING", 0);
         }
         spin_unlock(&obd->obd_dev_lock);
 
@@ -1308,7 +1356,13 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid)
 }
 EXPORT_SYMBOL(obd_export_evict_by_uuid);
 
-static void print_export_data(struct obd_export *exp, const char *status)
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+void (*class_export_dump_hook)(struct obd_export*) = NULL;
+EXPORT_SYMBOL(class_export_dump_hook);
+#endif
+
+static void print_export_data(struct obd_export *exp, const char *status,
+                              int locks)
 {
         struct ptlrpc_reply_state *rs;
         struct ptlrpc_reply_state *first_reply = NULL;
@@ -1331,23 +1385,27 @@ static void print_export_data(struct obd_export *exp, const char *status)
                exp->exp_disconnected, exp->exp_delayed, exp->exp_failed,
                nreplies, first_reply, nreplies > 3 ? "..." : "",
                exp->exp_last_committed);
+#if LUSTRE_TRACKS_LOCK_EXP_REFS
+        if (locks && class_export_dump_hook != NULL)
+                class_export_dump_hook(exp);
+#endif
 }
 
-void dump_exports(struct obd_device *obd)
+void dump_exports(struct obd_device *obd, int locks)
 {
         struct obd_export *exp;
 
         spin_lock(&obd->obd_dev_lock);
         list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain)
-                print_export_data(exp, "ACTIVE");
+                print_export_data(exp, "ACTIVE", locks);
         list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain)
-                print_export_data(exp, "UNLINKED");
+                print_export_data(exp, "UNLINKED", locks);
         list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain)
-                print_export_data(exp, "DELAYED");
+                print_export_data(exp, "DELAYED", locks);
         spin_unlock(&obd->obd_dev_lock);
         spin_lock(&obd_zombie_impexp_lock);
         list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain)
-                print_export_data(exp, "ZOMBIE");
+                print_export_data(exp, "ZOMBIE", locks);
         spin_unlock(&obd_zombie_impexp_lock);
 }
 EXPORT_SYMBOL(dump_exports);
@@ -1366,7 +1424,7 @@ void obd_exports_barrier(struct obd_device *obd)
                                       "The obd refcount = %d. Is it stuck?\n",
                                       obd->obd_name, waited,
                                       atomic_read(&obd->obd_refcount));
-                        dump_exports(obd);
+                        dump_exports(obd, 0);
                 }
                 waited *= 2;
                 spin_lock(&obd->obd_dev_lock);
index 57d8761..2ee0fe3 100644 (file)
@@ -513,7 +513,7 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg)
                    by other things as well, so don't count on it. */
                 CDEBUG(D_IOCTL, "%s: forcing exports to disconnect: %d\n",
                        obd->obd_name, atomic_read(&obd->obd_refcount) - 3);
-                dump_exports(obd);
+                dump_exports(obd, 0);
                 class_disconnect_exports(obd);
         }