Whamcloud - gitweb
b=21501 flock in process hangs on eviction, does not fail
authorOleg Drokin <green@linuxhacker.ru>
Wed, 17 Feb 2010 07:01:03 +0000 (10:01 +0300)
committerMikhail Pershin <tappro@sun.com>
Wed, 17 Feb 2010 07:58:17 +0000 (10:58 +0300)
Wake up waiters when failed lock gets completion AST due to eviction.

i=adilger
i=bzzz

lustre/ldlm/ldlm_flock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c

index 752768f..7173777 100644 (file)
@@ -485,8 +485,6 @@ static void
 ldlm_flock_interrupted_wait(void *data)
 {
         struct ldlm_lock *lock;
 ldlm_flock_interrupted_wait(void *data)
 {
         struct ldlm_lock *lock;
-        struct lustre_handle lockh;
-        int rc;
         ENTRY;
 
         lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
         ENTRY;
 
         lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
@@ -499,12 +497,6 @@ ldlm_flock_interrupted_wait(void *data)
         /* client side - set flag to prevent lock from being put on lru list */
         lock->l_flags |= LDLM_FL_CBPENDING;
 
         /* client side - set flag to prevent lock from being put on lru list */
         lock->l_flags |= LDLM_FL_CBPENDING;
 
-        ldlm_lock_decref_internal(lock, lock->l_req_mode);
-        ldlm_lock2handle(lock, &lockh);
-        rc = ldlm_cli_cancel(&lockh);
-        if (rc != ELDLM_OK)
-                CERROR("ldlm_cli_cancel: %d\n", rc);
-
         EXIT;
 }
 
         EXIT;
 }
 
@@ -537,17 +529,17 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
          * references being held, so that it can go away. No point in
          * holding the lock even if app still believes it has it, since
          * server already dropped it anyway. Only for granted locks too. */
          * references being held, so that it can go away. No point in
          * holding the lock even if app still believes it has it, since
          * server already dropped it anyway. Only for granted locks too. */
-        lock_res_and_lock(lock);
         if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) ==
             (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) {
         if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) ==
             (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) {
-                unlock_res_and_lock(lock);
                 if (lock->l_req_mode == lock->l_granted_mode &&
                     lock->l_granted_mode != LCK_NL &&
                     NULL == data)
                         ldlm_lock_decref_internal(lock, lock->l_req_mode);
                 if (lock->l_req_mode == lock->l_granted_mode &&
                     lock->l_granted_mode != LCK_NL &&
                     NULL == data)
                         ldlm_lock_decref_internal(lock, lock->l_req_mode);
+
+                /* Need to wake up the waiter if we were evicted */
+                cfs_waitq_signal(&lock->l_waitq);
                 RETURN(0);
         }
                 RETURN(0);
         }
-        unlock_res_and_lock(lock);
 
         LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
 
 
         LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
 
@@ -590,16 +582,13 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 granted:
         OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
 
 granted:
         OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
 
-        lock_res_and_lock(lock);
         if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) {
                 LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
         if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) {
                 LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
-                unlock_res(lock->l_resource);
                 RETURN(-EIO);
         }
         if (rc) {
                 LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
                            rc);
                 RETURN(-EIO);
         }
         if (rc) {
                 LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
                            rc);
-                unlock_res_and_lock(lock);
                 RETURN(rc);
         }
 
                 RETURN(rc);
         }
 
@@ -613,6 +602,7 @@ granted:
         /* ldlm_lock_enqueue() has already placed lock on the granted list. */
         cfs_list_del_init(&lock->l_res_link);
 
         /* ldlm_lock_enqueue() has already placed lock on the granted list. */
         cfs_list_del_init(&lock->l_res_link);
 
+        lock_res_and_lock(lock);
         if (flags & LDLM_FL_TEST_LOCK) {
                 /* fcntl(F_GETLK) request */
                 /* The old mode was saved in getlk->fl_type so that if the mode
         if (flags & LDLM_FL_TEST_LOCK) {
                 /* fcntl(F_GETLK) request */
                 /* The old mode was saved in getlk->fl_type so that if the mode
index b89b699..642bc58 100644 (file)
@@ -926,6 +926,7 @@ int ldlm_server_glimpse_ast(struct ldlm_lock *lock, void *data)
         if (req == NULL)
                 RETURN(-ENOMEM);
 
         if (req == NULL)
                 RETURN(-ENOMEM);
 
+        req->rq_no_resend = 1;
         body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
         body->lock_handle[0] = lock->l_remote_handle;
         ldlm_lock2desc(lock, &body->lock_desc);
         body = req_capsule_client_get(&req->rq_pill, &RMF_DLM_REQ);
         body->lock_handle[0] = lock->l_remote_handle;
         ldlm_lock2desc(lock, &body->lock_desc);
index 4c7c2f0..6e70bf6 100644 (file)
@@ -433,8 +433,7 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
 }
 
 static void failed_lock_cleanup(struct ldlm_namespace *ns,
 }
 
 static void failed_lock_cleanup(struct ldlm_namespace *ns,
-                                struct ldlm_lock *lock,
-                                struct lustre_handle *lockh, int mode)
+                                struct ldlm_lock *lock, int mode)
 {
         int need_cancel = 0;
 
 {
         int need_cancel = 0;
 
@@ -447,25 +446,31 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
                  * bl_ast and -EINVAL reply is sent to server anyways.
                  * bug 17645 */
                 lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED |
                  * bl_ast and -EINVAL reply is sent to server anyways.
                  * bug 17645 */
                 lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED |
-                                 LDLM_FL_ATOMIC_CB;
+                                 LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING;
                 need_cancel = 1;
         }
         unlock_res_and_lock(lock);
 
                 need_cancel = 1;
         }
         unlock_res_and_lock(lock);
 
-        if (need_cancel) {
+        if (need_cancel)
                 LDLM_DEBUG(lock,
                            "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | "
                 LDLM_DEBUG(lock,
                            "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | "
-                           "LDLM_FL_ATOMIC_CB");
-                ldlm_lock_decref_and_cancel(lockh, mode);
-        } else {
+                           "LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING");
+        else
                 LDLM_DEBUG(lock, "lock was granted or failed in race");
                 LDLM_DEBUG(lock, "lock was granted or failed in race");
-                ldlm_lock_decref(lockh, mode);
-        }
+
+        ldlm_lock_decref_internal(lock, mode);
 
         /* XXX - HACK because we shouldn't call ldlm_lock_destroy()
          *       from llite/file.c/ll_file_flock(). */
 
         /* XXX - HACK because we shouldn't call ldlm_lock_destroy()
          *       from llite/file.c/ll_file_flock(). */
+        /* This code makes for the fact that we do not have blocking handler on
+         * a client for flock locks. As such this is the place where we must
+         * completely kill failed locks. (interrupted and those that
+         * were waiting to be granted when server evicted us. */
         if (lock->l_resource->lr_type == LDLM_FLOCK) {
         if (lock->l_resource->lr_type == LDLM_FLOCK) {
-                ldlm_lock_destroy(lock);
+                lock_res_and_lock(lock);
+                ldlm_resource_unlink_lock(lock);
+                ldlm_lock_destroy_nolock(lock);
+                unlock_res_and_lock(lock);
         }
 }
 
         }
 }
 
@@ -614,7 +619,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
                         int err = lock->l_completion_ast(lock, *flags, NULL);
                         if (!rc)
                                 rc = err;
                         int err = lock->l_completion_ast(lock, *flags, NULL);
                         if (!rc)
                                 rc = err;
-                        if (rc && type != LDLM_FLOCK) /* bug 9425, bug 10250 */
+                        if (rc)
                                 cleanup_phase = 1;
                 }
         }
                                 cleanup_phase = 1;
                 }
         }
@@ -629,7 +634,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
         EXIT;
 cleanup:
         if (cleanup_phase == 1 && rc)
         EXIT;
 cleanup:
         if (cleanup_phase == 1 && rc)
-                failed_lock_cleanup(ns, lock, lockh, mode);
+                failed_lock_cleanup(ns, lock, mode);
         /* Put lock 2 times, the second reference is held by ldlm_cli_enqueue */
         LDLM_LOCK_PUT(lock);
         LDLM_LOCK_RELEASE(lock);
         /* Put lock 2 times, the second reference is held by ldlm_cli_enqueue */
         LDLM_LOCK_PUT(lock);
         LDLM_LOCK_RELEASE(lock);
@@ -816,7 +821,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
                                                 LUSTRE_DLM_VERSION,
                                                 LDLM_ENQUEUE);
                 if (req == NULL) {
                                                 LUSTRE_DLM_VERSION,
                                                 LDLM_ENQUEUE);
                 if (req == NULL) {
-                        failed_lock_cleanup(ns, lock, lockh, einfo->ei_mode);
+                        failed_lock_cleanup(ns, lock, einfo->ei_mode);
                         LDLM_LOCK_RELEASE(lock);
                         RETURN(-ENOMEM);
                 }
                         LDLM_LOCK_RELEASE(lock);
                         RETURN(-ENOMEM);
                 }