Whamcloud - gitweb
b=11300
authorjxiong <jxiong>
Thu, 6 Mar 2008 03:03:53 +0000 (03:03 +0000)
committerjxiong <jxiong>
Thu, 6 Mar 2008 03:03:53 +0000 (03:03 +0000)
r=oleg,adilger

Fix interval tree issues at the customer's side.

lustre/include/obd_support.h
lustre/ldlm/ldlm_extent.c
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/tests/sanityN.sh

index a273555..27d7dff 100644 (file)
@@ -208,6 +208,7 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_LDLM_CANCEL_EVICT_RACE  0x311
 #define OBD_FAIL_LDLM_PAUSE_CANCEL       0x312
 #define OBD_FAIL_LDLM_CLOSE_THREAD       0x313
+#define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE  0x314
 
 #define OBD_FAIL_OSC                     0x400
 #define OBD_FAIL_OSC_BRW_READ_BULK       0x401
index aef74f2..cc0e3aa 100644 (file)
@@ -689,8 +689,24 @@ int ldlm_process_extent_lock(struct ldlm_lock *lock, int *flags, int first_enq,
                 unlock_res(res);
                 rc = ldlm_run_bl_ast_work(&rpc_list);
                 lock_res(res);
-                if (rc == -ERESTART)
+
+                if (rc == -ERESTART) {
+                        /* lock was granted while resource was unlocked. */
+                        if (lock->l_granted_mode == lock->l_req_mode) {
+                                /* bug 11300: if the lock has been granted,
+                                 * break earlier because otherwise, we will go
+                                 * to restart and ldlm_resource_unlink will be
+                                 * called and it causes the interval node to be
+                                 * freed. Then we will fail at 
+                                 * ldlm_extent_add_lock() */
+                                *flags &= ~(LDLM_FL_BLOCK_GRANTED | LDLM_FL_BLOCK_CONV |
+                                            LDLM_FL_BLOCK_WAIT);
+                                GOTO(out, rc = 0);
+                        }
+
                         GOTO(restart, -ERESTART);
+                }
+
                 *flags |= LDLM_FL_BLOCK_GRANTED;
                 /* this way we force client to wait for the lock
                  * endlessly once the lock is enqueued -bzzz */
index 48c5437..801ce27 100644 (file)
@@ -1166,6 +1166,7 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
         int local = ns_is_client(res->lr_namespace);
         ldlm_processing_policy policy;
         ldlm_error_t rc = ELDLM_OK;
+        struct ldlm_interval *node = NULL;
         ENTRY;
 
         do_gettimeofday(&lock->l_enqueued_time);
@@ -1192,16 +1193,35 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
                 }
         }
 
+        /* For a replaying lock, it might be already in granted list. So
+         * unlinking the lock will cause the interval node to be freed, we
+         * have to allocate the interval node early otherwise we can't regrant
+         * this lock in the future. - jay */
+        if (!local && (*flags & LDLM_FL_REPLAY) && res->lr_type == LDLM_EXTENT)
+                OBD_SLAB_ALLOC(node, ldlm_interval_slab, CFS_ALLOC_IO,
+                               sizeof(*node));
+
         lock_res_and_lock(lock);
         if (local && lock->l_req_mode == lock->l_granted_mode) {
-                /* The server returned a blocked lock, but it was granted before
-                 * we got a chance to actually enqueue it.  We don't need to do
-                 * anything else. */
+                /* The server returned a blocked lock, but it was granted
+                 * before we got a chance to actually enqueue it.  We don't
+                 * need to do anything else. */
                 *flags &= ~(LDLM_FL_BLOCK_GRANTED |
                             LDLM_FL_BLOCK_CONV | LDLM_FL_BLOCK_WAIT);
                 GOTO(out, ELDLM_OK);
         }
 
+        ldlm_resource_unlink_lock(lock);
+        if (res->lr_type == LDLM_EXTENT && lock->l_tree_node == NULL) {
+                if (node == NULL) {
+                        ldlm_lock_destroy_nolock(lock);
+                        GOTO(out, rc = -ENOMEM);
+                }
+
+                ldlm_interval_attach(node, lock);
+                node = NULL;
+        }
+
         /* Some flags from the enqueue want to make it into the AST, via the
          * lock's l_flags. */
         lock->l_flags |= *flags & LDLM_AST_DISCARD_DATA;
@@ -1217,7 +1237,6 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
          *
          * FIXME (bug 268): Detect obvious lies by checking compatibility in
          * granted/converting queues. */
-        ldlm_resource_unlink_lock(lock);
         if (local) {
                 if (*flags & LDLM_FL_BLOCK_CONV)
                         ldlm_resource_add_lock(res, &res->lr_converting, lock);
@@ -1245,6 +1264,8 @@ ldlm_error_t ldlm_lock_enqueue(struct ldlm_namespace *ns,
         GOTO(out, rc);
 out:
         unlock_res_and_lock(lock);
+        if (node)
+                OBD_SLAB_FREE(node, ldlm_interval_slab, sizeof(*node));
         return rc;
 }
 
index dd8845b..2764595 100644 (file)
@@ -1315,7 +1315,26 @@ static void ldlm_handle_cp_callback(struct ptlrpc_request *req,
 
         LDLM_DEBUG(lock, "client completion callback handler START");
 
+        if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE)) {
+                int to = cfs_time_seconds(1);
+                while (to > 0) {
+                        to = schedule_timeout(to);
+                        if (lock->l_granted_mode == lock->l_req_mode ||
+                            lock->l_destroyed)
+                                break;
+                }
+        }
+
         lock_res_and_lock(lock);
+        if (lock->l_destroyed ||
+            lock->l_granted_mode == lock->l_req_mode) {
+                /* bug 11300: the lock has already been granted */
+                unlock_res_and_lock(lock);
+                LDLM_DEBUG(lock, "Double grant race happened");
+                LDLM_LOCK_PUT(lock);
+                EXIT;
+                return;
+        }
 
         /* If we receive the completion AST before the actual enqueue returned,
          * then we might need to switch lock modes, resources, or extents. */
@@ -1581,6 +1600,15 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
                 RETURN (0);
         }
 
+        /* Force a known safe race, send a cancel to the server for a lock
+         * which the server has already started a blocking callback on. */
+        if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CANCEL_BL_CB_RACE) &&
+            lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
+                rc = ldlm_cli_cancel(&dlm_req->lock_handle[0]);
+                if (rc < 0)
+                        CERROR("ldlm_cli_cancel: %d\n", rc);
+        }
+
         lock = ldlm_handle2lock_ns(ns, &dlm_req->lock_handle[0]);
         if (!lock) {
                 CDEBUG(D_DLMTRACE, "callback on lock "LPX64" - lock "
index 61c9cdb..93f8448 100644 (file)
@@ -583,6 +583,18 @@ test_30() { #bug #11110
 
 run_test 30 "recreate file race ========="
 
+test_31() {
+        mkdir -p $DIR1/$tdir || error "Creating dir $DIR1/$tdir"
+        writes=`LANG=C dd if=/dev/zero of=$DIR/$tdir/$tfile count=1 2>&1 |
+                awk 'BEGIN { FS="+" } /out/ {print $1}'`
+        #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE   0x314
+        sysctl -w lustre.fail_loc=0x314
+        reads=`LANG=C dd if=$DIR2/$tdir/$tfile of=/dev/null 2>&1 |
+               awk 'BEGIN { FS="+" } /in/ {print $1}'`
+        [ $reads -eq $writes ] || error "read" $reads "blocks, must be" $writes
+}
+run_test 31 "voluntary cancel / blocking ast race=============="
+
 log "cleanup: ======================================================"
 
 check_and_cleanup_lustre