From 158a5a2c08a0b5ff1652331915f1d3cd2f41384a Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Fri, 19 Mar 2010 11:19:21 -0700 Subject: [PATCH] b=21501 Properly cleanup flock lock on disconnect Properly wakeup flock waiters on eviction. Destroyed lock for flock completion ast is not an error, return success to avoid double lock decref. i=rread i=adilger --- lustre/ldlm/ldlm_flock.c | 26 +++++++++++--------------- lustre/ldlm/ldlm_request.c | 31 ++++++++++++++++++------------- 2 files changed, 29 insertions(+), 28 deletions(-) diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 752768f..914ed4e 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -485,8 +485,6 @@ static void ldlm_flock_interrupted_wait(void *data) { struct ldlm_lock *lock; - struct lustre_handle lockh; - int rc; ENTRY; lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock; @@ -499,12 +497,6 @@ ldlm_flock_interrupted_wait(void *data) /* client side - set flag to prevent lock from being put on lru list */ lock->l_flags |= LDLM_FL_CBPENDING; - ldlm_lock_decref_internal(lock, lock->l_req_mode); - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc != ELDLM_OK) - CERROR("ldlm_cli_cancel: %d\n", rc); - EXIT; } @@ -537,17 +529,17 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) * references being held, so that it can go away. No point in * holding the lock even if app still believes it has it, since * server already dropped it anyway. Only for granted locks too. */ - lock_res_and_lock(lock); if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) == (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) { - unlock_res_and_lock(lock); if (lock->l_req_mode == lock->l_granted_mode && lock->l_granted_mode != LCK_NL && NULL == data) ldlm_lock_decref_internal(lock, lock->l_req_mode); + + /* Need to wake up the waiter if we were evicted */ + cfs_waitq_signal(&lock->l_waitq); RETURN(0); } - unlock_res_and_lock(lock); LASSERT(flags != LDLM_FL_WAIT_NOREPROC); @@ -590,16 +582,19 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) granted: OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10); - lock_res_and_lock(lock); - if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) { + if (lock->l_destroyed) { LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); - unlock_res(lock->l_resource); + RETURN(0); + } + + if (lock->l_flags & LDLM_FL_FAILED) { + LDLM_DEBUG(lock, "client-side enqueue waking up: failed"); RETURN(-EIO); } + if (rc) { LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", rc); - unlock_res_and_lock(lock); RETURN(rc); } @@ -613,6 +608,7 @@ granted: /* ldlm_lock_enqueue() has already placed lock on the granted list. */ cfs_list_del_init(&lock->l_res_link); + lock_res_and_lock(lock); if (flags & LDLM_FL_TEST_LOCK) { /* fcntl(F_GETLK) request */ /* The old mode was saved in getlk->fl_type so that if the mode diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 4c7c2f0..6e70bf6 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -433,8 +433,7 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, } static void failed_lock_cleanup(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - struct lustre_handle *lockh, int mode) + struct ldlm_lock *lock, int mode) { int need_cancel = 0; @@ -447,25 +446,31 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns, * bl_ast and -EINVAL reply is sent to server anyways. * bug 17645 */ lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED | - LDLM_FL_ATOMIC_CB; + LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING; need_cancel = 1; } unlock_res_and_lock(lock); - if (need_cancel) { + if (need_cancel) LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | " - "LDLM_FL_ATOMIC_CB"); - ldlm_lock_decref_and_cancel(lockh, mode); - } else { + "LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING"); + else LDLM_DEBUG(lock, "lock was granted or failed in race"); - ldlm_lock_decref(lockh, mode); - } + + ldlm_lock_decref_internal(lock, mode); /* XXX - HACK because we shouldn't call ldlm_lock_destroy() * from llite/file.c/ll_file_flock(). */ + /* This code makes for the fact that we do not have blocking handler on + * a client for flock locks. As such this is the place where we must + * completely kill failed locks. (interrupted and those that + * were waiting to be granted when server evicted us. */ if (lock->l_resource->lr_type == LDLM_FLOCK) { - ldlm_lock_destroy(lock); + lock_res_and_lock(lock); + ldlm_resource_unlink_lock(lock); + ldlm_lock_destroy_nolock(lock); + unlock_res_and_lock(lock); } } @@ -614,7 +619,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, int err = lock->l_completion_ast(lock, *flags, NULL); if (!rc) rc = err; - if (rc && type != LDLM_FLOCK) /* bug 9425, bug 10250 */ + if (rc) cleanup_phase = 1; } } @@ -629,7 +634,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, EXIT; cleanup: if (cleanup_phase == 1 && rc) - failed_lock_cleanup(ns, lock, lockh, mode); + failed_lock_cleanup(ns, lock, mode); /* Put lock 2 times, the second reference is held by ldlm_cli_enqueue */ LDLM_LOCK_PUT(lock); LDLM_LOCK_RELEASE(lock); @@ -816,7 +821,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, LUSTRE_DLM_VERSION, LDLM_ENQUEUE); if (req == NULL) { - failed_lock_cleanup(ns, lock, lockh, einfo->ei_mode); + failed_lock_cleanup(ns, lock, einfo->ei_mode); LDLM_LOCK_RELEASE(lock); RETURN(-ENOMEM); } -- 1.8.3.1