From: Vitaly Fertman Date: Thu, 1 Mar 2012 16:50:05 +0000 (+0400) Subject: LU-1156 ldlm: per-export waiting flock lists for deadlock detection X-Git-Tag: 2.2.51~7 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=2179aa60137557a8a289ee3b105a5efa7db90ac8 LU-1156 ldlm: per-export waiting flock lists for deadlock detection replace 1 common list of flocks with 1 common spinlock by per-export list with per-export lock Change-Id: Ib59c1b4e0e3d08e07505bdfffc007ac2850e9258 Xyratex-Bug-ID: MRP-382 Reviewed-by: Alexey Lyashkov Reviewed-by: Andriy Skulysh Signed-off-by: Vitaly Fertman Reviewed-on: http://review.whamcloud.com/2239 Tested-by: Hudson Reviewed-by: Andriy Skulysh Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lustre_export.h b/lustre/include/lustre_export.h index 0dbe3a6..8bac3f3 100644 --- a/lustre/include/lustre_export.h +++ b/lustre/include/lustre_export.h @@ -213,8 +213,13 @@ struct obd_export { __u32 exp_conn_cnt; /** Hash list of all ldlm locks granted on this export */ cfs_hash_t *exp_lock_hash; - /** lock to protect exp_lock_hash accesses */ - cfs_spinlock_t exp_lock_hash_lock; + /** Lock protecting access to exp_flock_wait_list */ + cfs_rwlock_t exp_flock_wait_lock; + /** + * Wait queue for Posix lock deadlock detection, added with + * ldlm_lock::l_flock_waitq. + */ + cfs_list_t exp_flock_wait_list; cfs_list_t exp_outstanding_replies; cfs_list_t exp_uncommitted_replies; cfs_spinlock_t exp_uncommitted_replies_lock; diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 1303e8f..c988f28 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -57,16 +57,6 @@ #define l_flock_waitq l_lru -/** - * Wait queue for Posix lock deadlock detection, added with - * ldlm_lock::l_flock_waitq. - */ -static CFS_LIST_HEAD(ldlm_flock_waitq); -/** - * Lock protecting access to ldlm_flock_waitq. - */ -cfs_spinlock_t ldlm_flock_waitq_lock = CFS_SPIN_LOCK_UNLOCKED; - int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, void *data, int flag); @@ -98,6 +88,43 @@ ldlm_flocks_overlap(struct ldlm_lock *lock, struct ldlm_lock *new) lock->l_policy_data.l_flock.start)); } +static inline void ldlm_flock_blocking_link(struct ldlm_lock *req, + struct ldlm_lock *lock) +{ + /* For server only */ + if (req->l_export == NULL) + return; + + LASSERT(cfs_list_empty(&req->l_flock_waitq)); + cfs_write_lock(&req->l_export->exp_flock_wait_lock); + + req->l_policy_data.l_flock.blocking_owner = + lock->l_policy_data.l_flock.owner; + req->l_policy_data.l_flock.blocking_export = + class_export_get(lock->l_export); + + cfs_list_add_tail(&req->l_flock_waitq, + &req->l_export->exp_flock_wait_list); + cfs_write_unlock(&req->l_export->exp_flock_wait_lock); +} + +static inline void ldlm_flock_blocking_unlink(struct ldlm_lock *req) +{ + /* For server only */ + if (req->l_export == NULL) + return; + + cfs_write_lock(&req->l_export->exp_flock_wait_lock); + if (!cfs_list_empty(&req->l_flock_waitq)) { + cfs_list_del_init(&req->l_flock_waitq); + + class_export_put(req->l_policy_data.l_flock.blocking_export); + req->l_policy_data.l_flock.blocking_owner = 0; + req->l_policy_data.l_flock.blocking_export = NULL; + } + cfs_write_unlock(&req->l_export->exp_flock_wait_lock); +} + static inline void ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags) { @@ -125,33 +152,45 @@ ldlm_flock_destroy(struct ldlm_lock *lock, ldlm_mode_t mode, int flags) } static int -ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *blocking_lock) +ldlm_flock_deadlock(struct ldlm_lock *req, struct ldlm_lock *bl_lock) { - struct obd_export *req_export = req->l_export; - struct obd_export *blocking_export = blocking_lock->l_export; + struct obd_export *req_exp = req->l_export; + struct obd_export *bl_exp = bl_lock->l_export; + struct obd_export *bl_exp_new; __u64 req_owner = req->l_policy_data.l_flock.owner; - __u64 blocking_owner = blocking_lock->l_policy_data.l_flock.owner; + __u64 bl_owner = bl_lock->l_policy_data.l_flock.owner; struct ldlm_lock *lock; - cfs_spin_lock(&ldlm_flock_waitq_lock); + /* For server only */ + if (req_exp == NULL) + return 0; + + class_export_get(bl_exp); restart: - cfs_list_for_each_entry(lock, &ldlm_flock_waitq, l_flock_waitq) { - if ((lock->l_policy_data.l_flock.owner != blocking_owner) || - (lock->l_export != blocking_export)) + cfs_read_lock(&bl_exp->exp_flock_wait_lock); + cfs_list_for_each_entry(lock, &bl_exp->exp_flock_wait_list, + l_flock_waitq) { + struct ldlm_flock *flock = &lock->l_policy_data.l_flock; + + /* want to find something from same client and same process */ + if (flock->owner != bl_owner) continue; - blocking_owner = lock->l_policy_data.l_flock.blocking_owner; - blocking_export = (struct obd_export *) - lock->l_policy_data.l_flock.blocking_export; - if (blocking_owner == req_owner && - blocking_export == req_export) { - cfs_spin_unlock(&ldlm_flock_waitq_lock); + bl_owner = flock->blocking_owner; + bl_exp_new = class_export_get(flock->blocking_export); + cfs_read_unlock(&bl_exp->exp_flock_wait_lock); + class_export_put(bl_exp); + bl_exp = bl_exp_new; + + if (bl_owner == req_owner && bl_exp == req_exp) { + class_export_put(bl_exp); return 1; } goto restart; } - cfs_spin_unlock(&ldlm_flock_waitq_lock); + cfs_read_unlock(&bl_exp->exp_flock_wait_lock); + class_export_put(bl_exp); return 0; } @@ -254,17 +293,8 @@ reprocess: RETURN(LDLM_ITER_STOP); } - req->l_policy_data.l_flock.blocking_owner = - lock->l_policy_data.l_flock.owner; - req->l_policy_data.l_flock.blocking_export = - lock->l_export; - - LASSERT(cfs_list_empty(&req->l_flock_waitq)); - cfs_spin_lock(&ldlm_flock_waitq_lock); - cfs_list_add_tail(&req->l_flock_waitq, - &ldlm_flock_waitq); - cfs_spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_link(req, lock); ldlm_resource_add_lock(res, &res->lr_waiting, req); *flags |= LDLM_FL_BLOCK_GRANTED; RETURN(LDLM_ITER_STOP); @@ -280,9 +310,7 @@ reprocess: /* In case we had slept on this lock request take it off of the * deadlock detection waitq. */ - cfs_spin_lock(&ldlm_flock_waitq_lock); - cfs_list_del_init(&req->l_flock_waitq); - cfs_spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_unlink(req); /* Scan the locks owned by this process that overlap this request. * We may have to merge or split existing locks. */ @@ -496,9 +524,7 @@ ldlm_flock_interrupted_wait(void *data) lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock; /* take lock off the deadlock detection waitq. */ - cfs_spin_lock(&ldlm_flock_waitq_lock); - cfs_list_del_init(&lock->l_flock_waitq); - cfs_spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_unlink(lock); /* client side - set flag to prevent lock from being put on lru list */ lock->l_flags |= LDLM_FL_CBPENDING; @@ -607,9 +633,7 @@ granted: LDLM_DEBUG(lock, "client-side enqueue granted"); /* take lock off the deadlock detection waitq. */ - cfs_spin_lock(&ldlm_flock_waitq_lock); - cfs_list_del_init(&lock->l_flock_waitq); - cfs_spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_unlink(lock); lock_res_and_lock(lock); /* ldlm_lock_enqueue() has already placed lock on the granted list. */ @@ -658,9 +682,7 @@ int ldlm_flock_blocking_ast(struct ldlm_lock *lock, struct ldlm_lock_desc *desc, LASSERT(flag == LDLM_CB_CANCELING); /* take lock off the deadlock detection waitq. */ - cfs_spin_lock(&ldlm_flock_waitq_lock); - cfs_list_del_init(&lock->l_flock_waitq); - cfs_spin_unlock(&ldlm_flock_waitq_lock); + ldlm_flock_blocking_unlink(lock); RETURN(0); } diff --git a/lustre/obdclass/genops.c b/lustre/obdclass/genops.c index 1584930..ad0330d 100644 --- a/lustre/obdclass/genops.c +++ b/lustre/obdclass/genops.c @@ -783,6 +783,7 @@ static void class_export_destroy(struct obd_export *exp) if (exp->exp_connection) ptlrpc_put_connection_superhack(exp->exp_connection); + LASSERT(cfs_list_empty(&exp->exp_flock_wait_list)); LASSERT(cfs_list_empty(&exp->exp_outstanding_replies)); LASSERT(cfs_list_empty(&exp->exp_uncommitted_replies)); LASSERT(cfs_list_empty(&exp->exp_req_replay_queue)); @@ -856,6 +857,8 @@ struct obd_export *class_new_export(struct obd_device *obd, #endif cfs_atomic_set(&export->exp_replay_count, 0); export->exp_obd = obd; + CFS_INIT_LIST_HEAD(&export->exp_flock_wait_list); + cfs_rwlock_init(&export->exp_flock_wait_lock); CFS_INIT_LIST_HEAD(&export->exp_outstanding_replies); cfs_spin_lock_init(&export->exp_uncommitted_replies_lock); CFS_INIT_LIST_HEAD(&export->exp_uncommitted_replies);