From 2c7a41a5c5958d36fa839ea91f3e63375cbd90fc Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Tue, 26 Jun 2012 01:47:50 +0400 Subject: [PATCH] LU-1157 ldlm: replace waiting flock lists by hashes replace per-export list by per-export hash to locate a lock with blocking export & owner. Change-Id: I9c4089579bbf126781e232ea7021317fd10223e9 Xyratex-Bug-ID: MRP-385 Reviewed-by: Andriy Skulysh Signed-off-by: Vitaly Fertman Reviewed-on: http://review.whamcloud.com/2240 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Oleg Drokin --- libcfs/include/libcfs/libcfs_hash.h | 5 +- lustre/include/lustre_dlm.h | 8 ++ lustre/include/lustre_export.h | 8 +- lustre/ldlm/ldlm_flock.c | 205 ++++++++++++++++++++++++++++-------- lustre/ldlm/ldlm_internal.h | 2 + lustre/ldlm/ldlm_lock.c | 1 + lustre/ldlm/ldlm_lockd.c | 2 + lustre/mdt/mdt_handler.c | 17 ++- lustre/obdclass/genops.c | 4 +- 9 files changed, 193 insertions(+), 59 deletions(-) diff --git a/libcfs/include/libcfs/libcfs_hash.h b/libcfs/include/libcfs/libcfs_hash.h index e24ea60..ca785b8 100644 --- a/libcfs/include/libcfs/libcfs_hash.h +++ b/libcfs/include/libcfs/libcfs_hash.h @@ -354,7 +354,10 @@ typedef struct cfs_hash_ops { void * (*hs_key)(cfs_hlist_node_t *hnode); /** copy key from @hnode to @key */ void (*hs_keycpy)(cfs_hlist_node_t *hnode, void *key); - /** compare @key with key of @hnode */ + /** + * compare @key with key of @hnode + * returns 1 on a match + */ int (*hs_keycmp)(const void *key, cfs_hlist_node_t *hnode); /** return object address of @hnode, i.e: container_of(...hnode) */ void * (*hs_object)(cfs_hlist_node_t *hnode); diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index f7c8010..a2b9b9d 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -601,6 +601,8 @@ struct ldlm_flock { __u64 owner; __u64 blocking_owner; struct obd_export *blocking_export; + /* Protected by the hash lock */ + __u32 blocking_refs; __u32 pid; }; @@ -655,6 +657,12 @@ struct ldlm_lock { /** * Protected by lr_lock. Requested mode. */ + /** + * Protected by per-bucket exp->exp_flock_hash locks. Per export hash + * of locks. + */ + cfs_hlist_node_t l_exp_flock_hash; + ldlm_mode_t l_req_mode; /** * Granted mode, also protected by lr_lock. diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 6bd4bdf..a0740c7 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -212,13 +212,11 @@ struct obd_export { __u32 exp_conn_cnt; /** Hash list of all ldlm locks granted on this export */ cfs_hash_t *exp_lock_hash; - /** Lock protecting access to exp_flock_wait_list */ - cfs_rwlock_t exp_flock_wait_lock; /** - * Wait queue for Posix lock deadlock detection, added with - * ldlm_lock::l_flock_waitq. + * Hash list for Posix lock deadlock detection, added with + * ldlm_lock::l_exp_flock_hash. */ - cfs_list_t exp_flock_wait_list; + cfs_hash_t *exp_flock_hash; cfs_list_t exp_outstanding_replies; cfs_list_t exp_uncommitted_replies; cfs_spinlock_t exp_uncommitted_replies_lock; diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 4090ae0..3d312f0 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -53,8 +53,6 @@ #include "ldlm_internal.h" -#define l_flock_waitq l_lru - int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag); @@ -86,24 +84,34 @@ ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new) lock->l_policy_data.l_flock.start)); } -static inline void ldlm_flock_blocking_link(struct ldlm_lock *req, - struct ldlm_lock *lock) +static inline int ldlm_flock_blocking_link(struct ldlm_lock *req, + struct ldlm_lock *lock) { + int rc = 0; + /* For server only */ if (req->l_export == NULL) - return; + return 0; + + if (unlikely(req->l_export->exp_flock_hash == NULL)) { + rc = ldlm_init_flock_export(req->l_export); + if (rc) + goto error; + } - LASSERT(cfs_list_empty(&req->l_flock_waitq)); - cfs_write_lock(&req->l_export->exp_flock_wait_lock); + LASSERT(cfs_hlist_unhashed(&req->l_exp_flock_hash)); req->l_policy_data.l_flock.blocking_owner = lock->l_policy_data.l_flock.owner; req->l_policy_data.l_flock.blocking_export = - class_export_get(lock->l_export); - - cfs_list_add_tail(&req->l_flock_waitq, - &req->l_export->exp_flock_wait_list); - cfs_write_unlock(&req->l_export->exp_flock_wait_lock); + lock->l_export; + req->l_policy_data.l_flock.blocking_refs = 0; + + cfs_hash_add(req->l_export->exp_flock_hash, + &req->l_policy_data.l_flock.owner, + &req->l_exp_flock_hash); +error: + return rc; } static inline void ldlm_flock_blocking_unlink(struct ldlm_lock *req) @@ -112,15 +120,12 @@ static inline void ldlm_flock_blocking_unlink(struct ldlm_lock *req) if (req->l_export == NULL) return; - cfs_write_lock(&req->l_export->exp_flock_wait_lock); - if (!cfs_list_empty(&req->l_flock_waitq)) { - cfs_list_del_init(&req->l_flock_waitq); - - class_export_put(req->l_policy_data.l_flock.blocking_export); - req->l_policy_data.l_flock.blocking_owner = 0; - req->l_policy_data.l_flock.blocking_export = NULL; - } - cfs_write_unlock(&req->l_export->exp_flock_wait_lock); + check_res_locked(req->l_resource); + if (req->l_export->exp_flock_hash != NULL && + !cfs_hlist_unhashed(&req->l_exp_flock_hash)) + cfs_hash_del(req->l_export->exp_flock_hash, + &req->l_policy_data.l_flock.owner, + &req->l_exp_flock_hash); } static inline void @@ -132,7 +137,7 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags) mode, flags); /* Safe to not lock here, since it should be empty anyway */ - LASSERT(cfs_list_empty(&lock->l_flock_waitq)); + LASSERT(cfs_hlist_unhashed(&lock->l_exp_flock_hash)); cfs_list_del_init(&lock->l_res_link); if (flags == LDLM_FL_WAIT_NOREPROC && @@ -154,40 +159,39 @@ ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock) { struct obd_export *req_exp = req->l_export; struct obd_export *bl_exp = bl_lock->l_export; - struct obd_export *bl_exp_new; __u64 req_owner = req->l_policy_data.l_flock.owner; __u64 bl_owner = bl_lock->l_policy_data.l_flock.owner; - struct ldlm_lock *lock; /* For server only */ if (req_exp == NULL) return 0; class_export_get(bl_exp); -restart: - cfs_read_lock(&bl_exp->exp_flock_wait_lock); - cfs_list_for_each_entry(lock, &bl_exp->exp_flock_wait_list, - l_flock_waitq) { - struct ldlm_flock *flock = &lock->l_policy_data.l_flock; - - /* want to find something from same client and same process */ - if (flock->owner != bl_owner) - continue; - + while (1) { + struct obd_export *bl_exp_new; + struct ldlm_lock *lock = NULL; + struct ldlm_flock *flock; + + if (bl_exp->exp_flock_hash != NULL) + lock = cfs_hash_lookup(bl_exp->exp_flock_hash, + &bl_owner); + if (lock == NULL) + break; + + flock = &lock->l_policy_data.l_flock; + LASSERT(flock->owner == bl_owner); bl_owner = flock->blocking_owner; bl_exp_new = class_export_get(flock->blocking_export); - cfs_read_unlock(&bl_exp->exp_flock_wait_lock); class_export_put(bl_exp); + + cfs_hash_put(bl_exp->exp_flock_hash, &lock->l_exp_flock_hash); bl_exp = bl_exp_new; if (bl_owner == req_owner && bl_exp == req_exp) { class_export_put(bl_exp); return 1; } - - goto restart; } - cfs_read_unlock(&bl_exp->exp_flock_wait_lock); class_export_put(bl_exp); return 0; @@ -210,6 +214,7 @@ ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, int overlaps = 0; int splitted = 0; const struct ldlm_callback_suite null_cbs = { NULL }; + int rc; ENTRY; CDEBUG(D_DLMTRACE, "flags %#x owner "LPU64" pid %u mode %u start "LPU64 @@ -291,8 +296,12 @@ reprocess: RETURN(LDLM_ITER_STOP); } - - ldlm_flock_blocking_link(req, lock); + rc = ldlm_flock_blocking_link(req, lock); + if (rc) { + ldlm_flock_destroy(req, mode, *flags); + *err = rc; + RETURN(LDLM_ITER_STOP); + } ldlm_resource_add_lock(res, &res->lr_waiting, req); *flags |= LDLM_FL_BLOCK_GRANTED; RETURN(LDLM_ITER_STOP); @@ -307,7 +316,7 @@ reprocess: } /* In case we had slept on this lock request take it off of the - * deadlock detection waitq. */ + * deadlock detection hash list. */ ldlm_flock_blocking_unlink(req); /* Scan the locks owned by this process that overlap this request. @@ -529,11 +538,11 @@ ldlm_flock_interrupted_wait(void *data) lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock; - /* take lock off the deadlock detection waitq. */ + /* take lock off the deadlock detection hash list. */ + lock_res_and_lock(lock); ldlm_flock_blocking_unlink(lock); /* client side - set flag to prevent lock from being put on lru list */ - lock_res_and_lock(lock); lock->l_flags |= LDLM_FL_CBPENDING; unlock_res_and_lock(lock); @@ -640,10 +649,11 @@ granted: LDLM_DEBUG(lock, "client-side enqueue granted"); - /* take lock off the deadlock detection waitq. */ + lock_res_and_lock(lock); + + /* take lock off the deadlock detection hash list. */ ldlm_flock_blocking_unlink(lock); - lock_res_and_lock(lock); /* ldlm_lock_enqueue() has already placed lock on the granted list. */ cfs_list_del_init(&lock->l_res_link); @@ -689,8 +699,10 @@ int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, LASSERT(lock); LASSERT(flag == LDLM_CB_CANCELING); - /* take lock off the deadlock detection waitq. */ + /* take lock off the deadlock detection hash list. */ + lock_res_and_lock(lock); ldlm_flock_blocking_unlink(lock); + unlock_res_and_lock(lock); RETURN(0); } @@ -727,3 +739,104 @@ void ldlm_flock_policy_local_to_wire(const ldlm_policy_data_t *lpolicy, wpolicy->l_flock.lfw_pid = lpolicy->l_flock.pid; wpolicy->l_flock.lfw_owner = lpolicy->l_flock.owner; } + +/* + * Export handle<->flock hash operations. + */ +static unsigned +ldlm_export_flock_hash(cfs_hash_t *hs, const void *key, unsigned mask) +{ + return cfs_hash_u64_hash(*(__u64 *)key, mask); +} + +static void * +ldlm_export_flock_key(cfs_hlist_node_t *hnode) +{ + struct ldlm_lock *lock; + + lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash); + return &lock->l_policy_data.l_flock.owner; +} + +static int +ldlm_export_flock_keycmp(const void *key, cfs_hlist_node_t *hnode) +{ + return !memcmp(ldlm_export_flock_key(hnode), key, sizeof(__u64)); +} + +static void * +ldlm_export_flock_object(cfs_hlist_node_t *hnode) +{ + return cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash); +} + +static void +ldlm_export_flock_get(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +{ + struct ldlm_lock *lock; + struct ldlm_flock *flock; + + lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash); + LDLM_LOCK_GET(lock); + + flock = &lock->l_policy_data.l_flock; + LASSERT(flock->blocking_export != NULL); + class_export_get(flock->blocking_export); + flock->blocking_refs++; +} + +static void +ldlm_export_flock_put(cfs_hash_t *hs, cfs_hlist_node_t *hnode) +{ + struct ldlm_lock *lock; + struct ldlm_flock *flock; + + lock = cfs_hlist_entry(hnode, struct ldlm_lock, l_exp_flock_hash); + LDLM_LOCK_RELEASE(lock); + + flock = &lock->l_policy_data.l_flock; + LASSERT(flock->blocking_export != NULL); + class_export_put(flock->blocking_export); + if (--flock->blocking_refs == 0) { + flock->blocking_owner = 0; + flock->blocking_export = NULL; + } +} + +static cfs_hash_ops_t ldlm_export_flock_ops = { + .hs_hash = ldlm_export_flock_hash, + .hs_key = ldlm_export_flock_key, + .hs_keycmp = ldlm_export_flock_keycmp, + .hs_object = ldlm_export_flock_object, + .hs_get = ldlm_export_flock_get, + .hs_put = ldlm_export_flock_put, + .hs_put_locked = ldlm_export_flock_put, +}; + +int ldlm_init_flock_export(struct obd_export *exp) +{ + exp->exp_flock_hash = + cfs_hash_create(obd_uuid2str(&exp->exp_client_uuid), + HASH_EXP_LOCK_CUR_BITS, + HASH_EXP_LOCK_MAX_BITS, + HASH_EXP_LOCK_BKT_BITS, 0, + CFS_HASH_MIN_THETA, CFS_HASH_MAX_THETA, + &ldlm_export_flock_ops, + CFS_HASH_DEFAULT | CFS_HASH_NBLK_CHANGE); + if (!exp->exp_flock_hash) + RETURN(-ENOMEM); + + RETURN(0); +} +EXPORT_SYMBOL(ldlm_init_flock_export); + +void ldlm_destroy_flock_export(struct obd_export *exp) +{ + ENTRY; + if (exp->exp_flock_hash) { + cfs_hash_putref(exp->exp_flock_hash); + exp->exp_flock_hash = NULL; + } + EXIT; +} +EXPORT_SYMBOL(ldlm_destroy_flock_export); diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h index 55d694b4..237152d 100644 --- a/lustre/ldlm/ldlm_internal.h +++ b/lustre/ldlm/ldlm_internal.h @@ -167,6 +167,8 @@ void ldlm_extent_unlink_lock(struct ldlm_lock *lock); /* ldlm_flock.c */ int ldlm_process_flock_lock(struct ldlm_lock *req, int *flags, int first_enq, ldlm_error_t *err, cfs_list_t *work_list); +int ldlm_init_flock_export(struct obd_export *exp); +void ldlm_destroy_flock_export(struct obd_export *exp); /* l_lock.c */ void l_check_ns_lock(struct ldlm_namespace *ns); diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 09126de..cb4c3cb 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -438,6 +438,7 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource) CFS_INIT_LIST_HEAD(&lock->l_sl_mode); CFS_INIT_LIST_HEAD(&lock->l_sl_policy); CFS_INIT_HLIST_NODE(&lock->l_exp_hash); + CFS_INIT_HLIST_NODE(&lock->l_exp_flock_hash); lprocfs_counter_incr(ldlm_res_to_ns(resource)->ns_stats, LDLM_NSS_LOCKS); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 0883897..a605621 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -2515,6 +2515,8 @@ void ldlm_destroy_export(struct obd_export *exp) ENTRY; cfs_hash_putref(exp->exp_lock_hash); exp->exp_lock_hash = NULL; + + ldlm_destroy_flock_export(exp); EXIT; } EXPORT_SYMBOL(ldlm_destroy_export); diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index ee01e1c..648ac70 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -5312,12 +5312,21 @@ static int mdt_init_export(struct obd_export *exp) RETURN(0); rc = lut_client_alloc(exp); - if (rc == 0) - rc = ldlm_init_export(exp); if (rc) - CERROR("%s: Error %d while initializing export\n", - exp->exp_obd->obd_name, rc); + GOTO(err, rc); + + rc = ldlm_init_export(exp); + if (rc) + GOTO(err_free, rc); + RETURN(rc); + +err_free: + lut_client_free(exp); +err: + CERROR("%s: Error %d while initializing export\n", + exp->exp_obd->obd_name, rc); + return rc; } static int mdt_destroy_export(struct obd_export *exp) diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 081b55e..b103fa5 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -781,7 +781,6 @@ static void class_export_destroy(struct obd_export *exp) if (exp->exp_connection) ptlrpc_put_connection_superhack(exp->exp_connection); - LASSERT(cfs_list_empty(&exp->exp_flock_wait_list)); LASSERT(cfs_list_empty(&exp->exp_outstanding_replies)); LASSERT(cfs_list_empty(&exp->exp_uncommitted_replies)); LASSERT(cfs_list_empty(&exp->exp_req_replay_queue)); @@ -850,6 +849,7 @@ struct obd_export *class_new_export(struct obd_device *obd, export->exp_conn_cnt = 0; export->exp_lock_hash = NULL; + export->exp_flock_hash = NULL; cfs_atomic_set(&export->exp_refcount, 2); cfs_atomic_set(&export->exp_rpc_count, 0); cfs_atomic_set(&export->exp_cb_count, 0); @@ -860,8 +860,6 @@ struct obd_export *class_new_export(struct obd_device *obd, #endif cfs_atomic_set(&export->exp_replay_count, 0); export->exp_obd = obd; - CFS_INIT_LIST_HEAD(&export->exp_flock_wait_list); - cfs_rwlock_init(&export->exp_flock_wait_lock); CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies); cfs_spin_lock_init(&export->exp_uncommitted_replies_lock); CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies); -- 1.8.3.1