From: zam Date: Wed, 4 Nov 2009 18:44:10 +0000 (+0000) Subject: Branch HEAD X-Git-Tag: GIT_EPOCH_B_HD_KDMU~122 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=e184f98f8c3e4316996fff3f7c0cf69096be6684 Branch HEAD b=20498 i=tappro i=robert.read More debugging output for hanging unmount: dump locks for the exports with extra references. --- diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 5c6ea4b..c83a841 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -51,7 +51,6 @@ #include #include #include -#include /* for obd_export, for LDLM_DEBUG */ #include /* for interval_node{}, ldlm_extent */ #include @@ -545,6 +544,8 @@ struct ldlm_interval_tree { struct interval_node *lit_root; /* actually ldlm_interval */ }; +#define LUSTRE_TRACKS_LOCK_EXP_REFS (1) + struct ldlm_lock { /** * Must be first in the structure. @@ -715,6 +716,16 @@ struct ldlm_lock { struct list_head l_sl_mode; struct list_head l_sl_policy; struct lu_ref l_reference; +#if LUSTRE_TRACKS_LOCK_EXP_REFS + /* Debugging stuff for bug 20498, for tracking export + references. */ + /** number of export references taken */ + int l_exp_refs_nr; + /** link all locks referencing one export */ + struct list_head l_exp_refs_link; + /** referenced export object */ + struct obd_export *l_exp_refs_target; +#endif }; struct ldlm_resource { @@ -918,6 +929,9 @@ static inline int ldlm_res_lvbo_update(struct ldlm_resource *res, int ldlm_error2errno(ldlm_error_t error); ldlm_error_t ldlm_errno2error(int err_no); /* don't call it `errno': this * confuses user-space. */ +#if LUSTRE_TRACKS_LOCK_EXP_REFS +void ldlm_dump_export_locks(struct obd_export *exp); +#endif /** * Release a temporary lock reference obtained by ldlm_handle2lock() or diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 72a83af..cf24fa6 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -152,7 +152,10 @@ struct obd_export { atomic_t exp_rpc_count; /** RPC references */ atomic_t exp_cb_count; /** Commit callback references */ atomic_t exp_locks_count; /** Lock references */ - +#if LUSTRE_TRACKS_LOCK_EXP_REFS + struct list_head exp_locks_list; + spinlock_t exp_locks_list_guard; +#endif atomic_t exp_replay_count; struct obd_uuid exp_client_uuid; struct list_head exp_obd_chain; diff --git a/lustre/include/obd_class.h b/lustre/include/obd_class.h index 0d00d44..9cadd6b 100644 --- a/lustre/include/obd_class.h +++ b/lustre/include/obd_class.h @@ -115,7 +115,7 @@ struct obd_device *class_incref(struct obd_device *obd, const char *scope, const void *source); void class_decref(struct obd_device *obd, const char *scope, const void *source); -void dump_exports(struct obd_device *obd); +void dump_exports(struct obd_device *obd, int locks); /*obdecho*/ #ifdef LPROCFS @@ -172,6 +172,19 @@ struct lustre_profile *class_get_profile(const char * prof); void class_del_profile(const char *prof); void class_del_profiles(void); +#if LUSTRE_TRACKS_LOCK_EXP_REFS + +void __class_export_add_lock_ref(struct obd_export *, struct ldlm_lock *); +void __class_export_del_lock_ref(struct obd_export *, struct ldlm_lock *); +extern void (*class_export_dump_hook)(struct obd_export *); + +#else + +#define __class_export_add_lock_ref(exp, lock) do {} while(0) +#define __class_export_del_lock_ref(exp, lock) do {} while(0) + +#endif + #define class_export_rpc_get(exp) \ ({ \ atomic_inc(&(exp)->exp_rpc_count); \ @@ -189,18 +202,20 @@ void class_del_profiles(void); class_export_put(exp); \ }) -#define class_export_lock_get(exp) \ +#define class_export_lock_get(exp, lock) \ ({ \ atomic_inc(&(exp)->exp_locks_count); \ + __class_export_add_lock_ref(exp, lock); \ CDEBUG(D_INFO, "lock GETting export %p : new locks_count %d\n", \ (exp), atomic_read(&(exp)->exp_locks_count)); \ class_export_get(exp); \ }) -#define class_export_lock_put(exp) \ +#define class_export_lock_put(exp, lock) \ ({ \ LASSERT(atomic_read(&exp->exp_locks_count) > 0); \ atomic_dec(&(exp)->exp_locks_count); \ + __class_export_del_lock_ref(exp, lock); \ CDEBUG(D_INFO, "lock PUTting export %p : new locks_count %d\n", \ (exp), atomic_read(&(exp)->exp_locks_count)); \ class_export_put(exp); \ diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index b3b2cb1..c80c634 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -400,7 +400,7 @@ reprocess: new->l_policy_data.l_flock.end + 1; new2->l_conn_export = lock->l_conn_export; if (lock->l_export != NULL) { - new2->l_export = class_export_lock_get(lock->l_export); + new2->l_export = class_export_lock_get(lock->l_export, new2); if (new2->l_export->exp_lock_hash && hlist_unhashed(&new2->l_exp_hash)) lustre_hash_add(new2->l_export->exp_lock_hash, diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index ec88bed..04dcd77 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -2476,3 +2476,18 @@ ldlm_error_t ldlm_errno2error(int err_no) } EXPORT_SYMBOL(ldlm_errno2error); +#if LUSTRE_TRACKS_LOCK_EXP_REFS +void ldlm_dump_export_locks(struct obd_export *exp) +{ + spin_lock(&exp->exp_locks_list_guard); + if (!list_empty(&exp->exp_locks_list)) { + struct ldlm_lock *lock; + + CERROR("dumping locks for export %p," + "ignore if the unmount doesn't hang\n", exp); + list_for_each_entry(lock, &exp->exp_locks_list, l_exp_refs_link) + ldlm_lock_dump(D_ERROR, lock, 0); + } + spin_unlock(&exp->exp_locks_list_guard); +} +#endif diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 5d719d4..dd57661 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -164,7 +164,7 @@ void ldlm_lock_put(struct ldlm_lock *lock) ldlm_resource_putref(res); lock->l_resource = NULL; if (lock->l_export) { - class_export_lock_put(lock->l_export); + class_export_lock_put(lock->l_export, lock); lock->l_export = NULL; } @@ -371,6 +371,12 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource) lu_ref_add(&lock->l_reference, "hash", lock); lock->l_callback_timeout = 0; +#if LUSTRE_TRACKS_LOCK_EXP_REFS + CFS_INIT_LIST_HEAD(&lock->l_exp_refs_link); + lock->l_exp_refs_nr = 0; + lock->l_exp_refs_target = NULL; +#endif + RETURN(lock); } diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 9f3dc50..c59fd3e 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -208,7 +208,7 @@ static int expired_lock_main(void *arg) LDLM_LOCK_RELEASE(lock); continue; } - export = class_export_lock_get(lock->l_export); + export = class_export_lock_get(lock->l_export, lock); spin_unlock_bh(&waiting_locks_spinlock); /* release extra ref grabbed by ldlm_add_waiting_lock() @@ -217,7 +217,7 @@ static int expired_lock_main(void *arg) do_dump++; class_fail_export(export); - class_export_lock_put(export); + class_export_lock_put(export, lock); spin_lock_bh(&waiting_locks_spinlock); } spin_unlock_bh(&waiting_locks_spinlock); @@ -1109,7 +1109,8 @@ int ldlm_handle_enqueue0(struct ldlm_namespace *ns, LDLM_ERROR(lock, "lock on destroyed export %p", req->rq_export); GOTO(out, rc = -ENOTCONN); } - lock->l_export = class_export_lock_get(req->rq_export); + + lock->l_export = class_export_lock_get(req->rq_export, lock); if (lock->l_export->exp_lock_hash) lustre_hash_add(lock->l_export->exp_lock_hash, &lock->l_remote_handle, @@ -2492,7 +2493,9 @@ int __init ldlm_init(void) cfs_mem_cache_destroy(ldlm_lock_slab); return -ENOMEM; } - +#if LUSTRE_TRACKS_LOCK_EXP_REFS + class_export_dump_hook = ldlm_dump_export_locks; +#endif return 0; } diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index e6e39bd..d4f8a48 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -3180,7 +3180,7 @@ int mdt_intent_lock_replace(struct mdt_thread_info *info, new_lock->l_writers--; } - new_lock->l_export = class_export_lock_get(req->rq_export); + new_lock->l_export = class_export_lock_get(req->rq_export, new_lock); new_lock->l_blocking_ast = lock->l_blocking_ast; new_lock->l_completion_ast = lock->l_completion_ast; new_lock->l_remote_handle = lock->l_remote_handle; diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 1f9c9b1..9899ff3 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -62,7 +62,8 @@ spinlock_t obd_zombie_impexp_lock; static void obd_zombie_impexp_notify(void); static void obd_zombie_export_add(struct obd_export *exp); static void obd_zombie_import_add(struct obd_import *imp); -static void print_export_data(struct obd_export *exp, const char *status); +static void print_export_data(struct obd_export *exp, + const char *status, int locks); int (*ptlrpc_put_connection_superhack)(struct ptlrpc_connection *c); @@ -785,6 +786,10 @@ struct obd_export *class_new_export(struct obd_device *obd, atomic_set(&export->exp_rpc_count, 0); atomic_set(&export->exp_cb_count, 0); atomic_set(&export->exp_locks_count, 0); +#if LUSTRE_TRACKS_LOCK_EXP_REFS + CFS_INIT_LIST_HEAD(&export->exp_locks_list); + spin_lock_init(&export->exp_locks_list_guard); +#endif atomic_set(&export->exp_replay_count, 0); export->exp_obd = obd; CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies); @@ -985,6 +990,49 @@ void class_destroy_import(struct obd_import *import) } EXPORT_SYMBOL(class_destroy_import); +#if LUSTRE_TRACKS_LOCK_EXP_REFS + +void __class_export_add_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) +{ + spin_lock(&exp->exp_locks_list_guard); + + LASSERT(lock->l_exp_refs_nr >= 0); + + if (lock->l_exp_refs_target != NULL && + lock->l_exp_refs_target != exp) { + LCONSOLE_WARN("setting export %p for lock %p which already has export %p\n", + exp, lock, lock->l_exp_refs_target); + } + if ((lock->l_exp_refs_nr ++) == 0) { + list_add(&lock->l_exp_refs_link, &exp->exp_locks_list); + lock->l_exp_refs_target = exp; + } + CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", + lock, exp, lock->l_exp_refs_nr); + spin_unlock(&exp->exp_locks_list_guard); +} +EXPORT_SYMBOL(__class_export_add_lock_ref); + +void __class_export_del_lock_ref(struct obd_export *exp, struct ldlm_lock *lock) +{ + spin_lock(&exp->exp_locks_list_guard); + LASSERT(lock->l_exp_refs_nr > 0); + if (lock->l_exp_refs_target != exp) { + LCONSOLE_WARN("lock %p, " + "mismatching export pointers: %p, %p\n", + lock, lock->l_exp_refs_target, exp); + } + if (-- lock->l_exp_refs_nr == 0) { + list_del_init(&lock->l_exp_refs_link); + lock->l_exp_refs_target = NULL; + } + CDEBUG(D_INFO, "lock = %p, export = %p, refs = %u\n", + lock, exp, lock->l_exp_refs_nr); + spin_unlock(&exp->exp_locks_list_guard); +} +EXPORT_SYMBOL(__class_export_del_lock_ref); +#endif + /* A connection defines an export context in which preallocation can be managed. This releases the export pointer reference, and returns the export handle, so the export refcount is 1 when this function @@ -1188,7 +1236,7 @@ void class_disconnect_stale_exports(struct obd_device *obd, obd->obd_name, exp->exp_client_uuid.uuid, exp->exp_connection == NULL ? "" : libcfs_nid2str(exp->exp_connection->c_peer.nid)); - print_export_data(exp, "EVICTING"); + print_export_data(exp, "EVICTING", 0); } spin_unlock(&obd->obd_dev_lock); @@ -1308,7 +1356,13 @@ int obd_export_evict_by_uuid(struct obd_device *obd, const char *uuid) } EXPORT_SYMBOL(obd_export_evict_by_uuid); -static void print_export_data(struct obd_export *exp, const char *status) +#if LUSTRE_TRACKS_LOCK_EXP_REFS +void (*class_export_dump_hook)(struct obd_export*) = NULL; +EXPORT_SYMBOL(class_export_dump_hook); +#endif + +static void print_export_data(struct obd_export *exp, const char *status, + int locks) { struct ptlrpc_reply_state *rs; struct ptlrpc_reply_state *first_reply = NULL; @@ -1331,23 +1385,27 @@ static void print_export_data(struct obd_export *exp, const char *status) exp->exp_disconnected, exp->exp_delayed, exp->exp_failed, nreplies, first_reply, nreplies > 3 ? "..." : "", exp->exp_last_committed); +#if LUSTRE_TRACKS_LOCK_EXP_REFS + if (locks && class_export_dump_hook != NULL) + class_export_dump_hook(exp); +#endif } -void dump_exports(struct obd_device *obd) +void dump_exports(struct obd_device *obd, int locks) { struct obd_export *exp; spin_lock(&obd->obd_dev_lock); list_for_each_entry(exp, &obd->obd_exports, exp_obd_chain) - print_export_data(exp, "ACTIVE"); + print_export_data(exp, "ACTIVE", locks); list_for_each_entry(exp, &obd->obd_unlinked_exports, exp_obd_chain) - print_export_data(exp, "UNLINKED"); + print_export_data(exp, "UNLINKED", locks); list_for_each_entry(exp, &obd->obd_delayed_exports, exp_obd_chain) - print_export_data(exp, "DELAYED"); + print_export_data(exp, "DELAYED", locks); spin_unlock(&obd->obd_dev_lock); spin_lock(&obd_zombie_impexp_lock); list_for_each_entry(exp, &obd_zombie_exports, exp_obd_chain) - print_export_data(exp, "ZOMBIE"); + print_export_data(exp, "ZOMBIE", locks); spin_unlock(&obd_zombie_impexp_lock); } EXPORT_SYMBOL(dump_exports); @@ -1366,7 +1424,7 @@ void obd_exports_barrier(struct obd_device *obd) "The obd refcount = %d. Is it stuck?\n", obd->obd_name, waited, atomic_read(&obd->obd_refcount)); - dump_exports(obd); + dump_exports(obd, 0); } waited *= 2; spin_lock(&obd->obd_dev_lock); diff --git a/lustre/obdclass/obd_config.c b/lustre/obdclass/obd_config.c index 57d8761..2ee0fe3 100644 --- a/lustre/obdclass/obd_config.c +++ b/lustre/obdclass/obd_config.c @@ -513,7 +513,7 @@ int class_cleanup(struct obd_device *obd, struct lustre_cfg *lcfg) by other things as well, so don't count on it. */ CDEBUG(D_IOCTL, "%s: forcing exports to disconnect: %d\n", obd->obd_name, atomic_read(&obd->obd_refcount) - 3); - dump_exports(obd); + dump_exports(obd, 0); class_disconnect_exports(obd); }