From 657bbc4969be581aca66549ca0427ccec89ea5a2 Mon Sep 17 00:00:00 2001 From: Liang Zhen Date: Mon, 16 Mar 2015 09:25:17 +0800 Subject: [PATCH] LU-6416 ldlm: no canceled lock on waiting list If a lock was not granted straight away on server, but it's granted with LDLM_FL_AST_SENT set before ldlm_handle_enqueue0 sends out reply, client side will know she needs to cancel this lock. At the meanwhile, this lock can be added to a long granting list by another server thread. When lock cancel request arrives at server and server calls into ldlm_lock_cancel()-> ldlm_cancel_callback()-> tgt_blocking_ast(...LDLM_CB_CANCELING)-> tgt_sync() The other server thread eventually get a chance to send completion AST for this lock with LDLM_FL_AST_SENT set, and add this lock to waiting list again. However, tgt_sync may take arbitrary time which is irrelevant to AT of lock revoke on client, server could evict client only because itself has slow IO. To resolve this race, this patch does not put canceled lock on waiting list anymore. Signed-off-by: Liang Zhen Change-Id: I86c1097d3ccbaa614b8811c1d9f37b39f019c61e Reviewed-on: http://review.whamcloud.com/14085 Reviewed-by: Bobi Jam Tested-by: Maloo Reviewed-by: Fan Yong Tested-by: Jenkins Reviewed-by: Oleg Drokin --- lustre/ldlm/ldlm_lock.c | 5 +---- lustre/ldlm/ldlm_lockd.c | 10 ++++++++-- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index d41c964..5f57dea 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -2139,10 +2139,7 @@ void ldlm_lock_cancel(struct ldlm_lock *lock) /* Releases cancel callback. */ ldlm_cancel_callback(lock); - /* Yes, second time, just in case it was added again while we were - * running with no res lock in ldlm_cancel_callback */ - if (ldlm_is_waited(lock)) - ldlm_del_waiting_lock(lock); + LASSERT(!ldlm_is_waited(lock)); ldlm_resource_unlink_lock(lock); ldlm_lock_destroy_nolock(lock); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 394e11f..9f541d0 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -419,13 +419,17 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) /* NB: must be called with hold of lock_res_and_lock() */ LASSERT(ldlm_is_res_locked(lock)); - ldlm_set_waited(lock); - LASSERT(!ldlm_is_cancel_on_block(lock)); spin_lock_bh(&waiting_locks_spinlock); + if (ldlm_is_cancel(lock)) { + spin_unlock_bh(&waiting_locks_spinlock); + return 0; + } + if (ldlm_is_destroyed(lock)) { static cfs_time_t next; + spin_unlock_bh(&waiting_locks_spinlock); LDLM_ERROR(lock, "not waiting on destroyed lock (bug 5653)"); if (cfs_time_after(cfs_time_current(), next)) { @@ -435,6 +439,7 @@ static int ldlm_add_waiting_lock(struct ldlm_lock *lock) return 0; } + ldlm_set_waited(lock); lock->l_last_activity = cfs_time_current_sec(); ret = __ldlm_add_waiting_lock(lock, timeout); if (ret) { @@ -505,6 +510,7 @@ int ldlm_del_waiting_lock(struct ldlm_lock *lock) spin_lock_bh(&waiting_locks_spinlock); ret = __ldlm_del_waiting_lock(lock); + ldlm_clear_waited(lock); spin_unlock_bh(&waiting_locks_spinlock); /* remove the lock out of export blocking list */ -- 1.8.3.1