Whamcloud - gitweb
b=21501 Properly cleanup flock lock on disconnect
authorVitaly Fertman <vitaly.fertman@sun.com>
Wed, 30 Mar 2011 10:36:27 +0000 (14:36 +0400)
committerTerry Rutledge <terry.rutledge@oracle.com>
Wed, 30 Mar 2011 15:10:42 +0000 (08:10 -0700)
Properly wakeup flock waiters on eviction.
Destroyed lock for flock completion ast is not an error, return success
to avoid double lock decref.

i=rread
i=adilger

lustre/ldlm/ldlm_flock.c
lustre/ldlm/ldlm_request.c

index 10be102..648b7ba 100644 (file)
@@ -474,8 +474,6 @@ static void
 ldlm_flock_interrupted_wait(void *data)
 {
         struct ldlm_lock *lock;
-        struct lustre_handle lockh;
-        int rc;
         ENTRY;
 
         lock = ((struct ldlm_flock_wait_data *)data)->fwd_lock;
@@ -488,12 +486,6 @@ ldlm_flock_interrupted_wait(void *data)
         /* client side - set flag to prevent lock from being put on lru list */
         lock->l_flags |= LDLM_FL_CBPENDING;
 
-        ldlm_lock_decref_internal(lock, lock->l_req_mode);
-        ldlm_lock2handle(lock, &lockh);
-        rc = ldlm_cli_cancel(&lockh);
-        if (rc != ELDLM_OK)
-                CERROR("ldlm_cli_cancel: %d\n", rc);
-
         EXIT;
 }
 
@@ -526,17 +518,16 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
          * references being held, so that it can go away. No point in
          * holding the lock even if app still believes it has it, since
          * server already dropped it anyway. Only for granted locks too. */
-        lock_res_and_lock(lock);
         if ((lock->l_flags & (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) ==
             (LDLM_FL_FAILED|LDLM_FL_LOCAL_ONLY)) {
-                unlock_res_and_lock(lock);
                 if (lock->l_req_mode == lock->l_granted_mode &&
                     lock->l_granted_mode != LCK_NL &&
                     NULL == data)
                         ldlm_lock_decref_internal(lock, lock->l_req_mode);
+                /* Need to wake up the waiter if we were evicted */
+                cfs_waitq_signal(&lock->l_waitq);
                 RETURN(0);
         }
-        unlock_res_and_lock(lock);
 
         LASSERT(flags != LDLM_FL_WAIT_NOREPROC);
 
@@ -579,16 +570,19 @@ ldlm_flock_completion_ast(struct ldlm_lock *lock, int flags, void *data)
 granted:
         OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_CP_CB_WAIT, 10);
 
-        lock_res_and_lock(lock);
-        if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) {
+        if (lock->l_destroyed) {
                 LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
-                unlock_res_and_lock(lock);
+                RETURN(0);
+        }
+
+        if (lock->l_flags & LDLM_FL_FAILED) {
+                LDLM_DEBUG(lock, "client-side enqueue waking up: failed");
                 RETURN(-EIO);
         }
+
         if (rc) {
                 LDLM_DEBUG(lock, "client-side enqueue waking up: failed (%d)",
                            rc);
-                unlock_res_and_lock(lock);
                 RETURN(rc);
         }
 
@@ -599,6 +593,7 @@ granted:
         list_del_init(&lock->l_flock_waitq);
         spin_unlock(&ldlm_flock_waitq_lock);
 
+        lock_res_and_lock(lock);
         /* ldlm_lock_enqueue() has already placed lock on the granted list. */
         list_del_init(&lock->l_res_link);
 
index 7d692d0..44d5d97 100644 (file)
@@ -339,8 +339,7 @@ int ldlm_cli_enqueue_local(struct ldlm_namespace *ns,
 }
 
 static void failed_lock_cleanup(struct ldlm_namespace *ns,
-                                struct ldlm_lock *lock,
-                                struct lustre_handle *lockh, int mode)
+                                struct ldlm_lock *lock, int mode)
 {
         int need_cancel = 0;
 
@@ -353,25 +352,31 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
                  * bl_ast and -EINVAL reply is sent to server anyways.
                  * bug 17645 */
                 lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED |
-                                 LDLM_FL_ATOMIC_CB;
+                                 LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING;
                 need_cancel = 1;
         }
         unlock_res_and_lock(lock);
 
-        if (need_cancel) {
+        if (need_cancel)
                 LDLM_DEBUG(lock,
                            "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | "
-                           "LDLM_FL_ATOMIC_CB");
-                ldlm_lock_decref_and_cancel(lockh, mode);
-        } else {
+                           "LDLM_FL_ATOMIC_CB | LDLM_FL_CBPENDING");
+        else
                 LDLM_DEBUG(lock, "lock was granted or failed in race");
-                ldlm_lock_decref(lockh, mode);
-        }
+
+        ldlm_lock_decref_internal(lock, mode);
 
         /* XXX - HACK because we shouldn't call ldlm_lock_destroy()
          *       from llite/file.c/ll_file_flock(). */
+        /* This code makes for the fact that we do not have blocking handler on
+         * a client for flock locks. As such this is the place where we must
+         * completely kill failed locks. (interrupted and those that
+         * were waiting to be granted when server evicted us. */
         if (lock->l_resource->lr_type == LDLM_FLOCK) {
-                ldlm_lock_destroy(lock);
+                lock_res_and_lock(lock);
+                ldlm_resource_unlink_lock(lock);
+                ldlm_lock_destroy_nolock(lock);
+                unlock_res_and_lock(lock);
         }
 }
 
@@ -530,7 +535,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
                         int err = lock->l_completion_ast(lock, *flags, NULL);
                         if (!rc)
                                 rc = err;
-                        if (rc && type != LDLM_FLOCK) /* bug 9425, bug 10250 */
+                        if (rc)
                                 cleanup_phase = 1;
                 }
         }
@@ -544,7 +549,7 @@ int ldlm_cli_enqueue_fini(struct obd_export *exp, struct ptlrpc_request *req,
         EXIT;
 cleanup:
         if (cleanup_phase == 1 && rc)
-                failed_lock_cleanup(ns, lock, lockh, mode);
+                failed_lock_cleanup(ns, lock, mode);
         /* Put lock 2 times, the second reference is held by ldlm_cli_enqueue */
         LDLM_LOCK_PUT(lock);
         LDLM_LOCK_PUT(lock);
@@ -718,7 +723,7 @@ int ldlm_cli_enqueue(struct obd_export *exp, struct ptlrpc_request **reqp,
         if (reqp == NULL || *reqp == NULL) {
                 req = ldlm_prep_enqueue_req(exp, 2, size, NULL, 0);
                 if (req == NULL) {
-                        failed_lock_cleanup(ns, lock, lockh, einfo->ei_mode);
+                        failed_lock_cleanup(ns, lock, einfo->ei_mode);
                         LDLM_LOCK_PUT(lock);
                         RETURN(-ENOMEM);
                 }