From: Oleg Drokin Date: Wed, 17 Feb 2010 07:01:03 +0000 (+0300) Subject: b=21501 flock in process hangs on eviction, does not fail X-Git-Tag: 1.10.0.37~4 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=7e750a8d9f9b87b7a3a608454c194d2fe7b69704 b=21501 flock in process hangs on eviction, does not fail Wake up waiters when failed lock gets completion AST due to eviction. i=adilger i=bzzz --- diff --git a/lustre/ldlm/ldlm_flock.c b/lustre/ldlm/ldlm_flock.c index 752768f..7173777 100644 --- a/lustre/ldlm/ldlm_flock.c +++ b/lustre/ldlm/ldlm_flock.c @@ -485,8 +485,6 @@ static void ldlm_flock_interrupted_wait(void *data) { struct ldlm_lock *lock; - struct lustre_handle lockh; - int rc; ENTRY; lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock; @@ -499,12 +497,6 @@ ldlm_flock_interrupted_wait(void *data) /* client side - set flag to prevent lock from being put on lru list */ lock->l_flags |= LDLM_FL_CBPENDING; - ldlm_lock_decref_internal(lock, lock->l_req_mode); - ldlm_lock2handle(lock, &lockh); - rc = ldlm_cli_cancel(&lockh); - if (rc != ELDLM_OK) - CERROR("ldlm_cli_cancel: %d\n", rc); - EXIT; } @@ -537,17 +529,17 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) * references being held, so that it can go away. No point in * holding the lock even if app still believes it has it, since * server already dropped it anyway. Only for granted locks too. */ - lock_res_and_lock(lock); if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) == (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) { - unlock_res_and_lock(lock); if (lock->l_req_mode == lock->l_granted_mode && lock->l_granted_mode != LCK_NL && NULL == data) ldlm_lock_decref_internal(lock, lock->l_req_mode); + + /* Need to wake up the waiter if we were evicted */ + cfs_waitq_signal(&lock->l_waitq); RETURN(0); } - unlock_res_and_lock(lock); LASSERT(flags != LDLM_FL_WAIT_NOREPROC); @@ -590,16 +582,13 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data) granted: OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10); - lock_res_and_lock(lock); if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) { LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed"); - unlock_res(lock->l_resource); RETURN(-EIO); } if (rc) { LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)", rc); - unlock_res_and_lock(lock); RETURN(rc); } @@ -613,6 +602,7 @@ granted: /* ldlm_lock_enqueue() has already placed lock on the granted list. */ cfs_list_del_init(&lock->l_res_link); + lock_res_and_lock(lock); if (flags & LDLM_FL_TEST_LOCK) { /* fcntl(F_GETLK) request */ /* The old mode was saved in getlk->fl_type so that if the mode diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index b89b699..642bc58 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -926,6 +926,7 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data) if (req == NULL) RETURN(-ENOMEM); + req->rq_no_resend = 1; body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ); body->lock_handle[0] = lock->l_remote_handle; ldlm_lock2desc(lock, &body->lock_desc); diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index 4c7c2f0..6e70bf6 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -433,8 +433,7 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns, } static void failed_lock_cleanup(struct ldlm_namespace *ns, - struct ldlm_lock *lock, - struct lustre_handle *lockh, int mode) + struct ldlm_lock *lock, int mode) { int need_cancel = 0; @@ -447,25 +446,31 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns, * bl_ast and -EINVAL reply is sent to server anyways. * bug 17645 */ lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED | - LDLM_FL_ATOMIC_CB; + LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING; need_cancel = 1; } unlock_res_and_lock(lock); - if (need_cancel) { + if (need_cancel) LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | " - "LDLM_FL_ATOMIC_CB"); - ldlm_lock_decref_and_cancel(lockh, mode); - } else { + "LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING"); + else LDLM_DEBUG(lock, "lock was granted or failed in race"); - ldlm_lock_decref(lockh, mode); - } + + ldlm_lock_decref_internal(lock, mode); /* XXX - HACK because we shouldn't call ldlm_lock_destroy() * from llite/file.c/ll_file_flock(). */ + /* This code makes for the fact that we do not have blocking handler on + * a client for flock locks. As such this is the place where we must + * completely kill failed locks. (interrupted and those that + * were waiting to be granted when server evicted us. */ if (lock->l_resource->lr_type == LDLM_FLOCK) { - ldlm_lock_destroy(lock); + lock_res_and_lock(lock); + ldlm_resource_unlink_lock(lock); + ldlm_lock_destroy_nolock(lock); + unlock_res_and_lock(lock); } } @@ -614,7 +619,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, int err = lock->l_completion_ast(lock, *flags, NULL); if (!rc) rc = err; - if (rc && type != LDLM_FLOCK) /* bug 9425, bug 10250 */ + if (rc) cleanup_phase = 1; } } @@ -629,7 +634,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req, EXIT; cleanup: if (cleanup_phase == 1 && rc) - failed_lock_cleanup(ns, lock, lockh, mode); + failed_lock_cleanup(ns, lock, mode); /* Put lock 2 times, the second reference is held by ldlm_cli_enqueue */ LDLM_LOCK_PUT(lock); LDLM_LOCK_RELEASE(lock); @@ -816,7 +821,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp, LUSTRE_DLM_VERSION, LDLM_ENQUEUE); if (req == NULL) { - failed_lock_cleanup(ns, lock, lockh, einfo->ei_mode); + failed_lock_cleanup(ns, lock, einfo->ei_mode); LDLM_LOCK_RELEASE(lock); RETURN(-ENOMEM); }