Whamcloud - gitweb
b=17645
authoryury <yury>
Sat, 13 Dec 2008 11:51:15 +0000 (11:51 +0000)
committeryury <yury>
Sat, 13 Dec 2008 11:51:15 +0000 (11:51 +0000)
  r=johann,shadow

  - handles -EINTR while waiting for lock + race with bl_ast;
  - adds sanityN.sh test_34 for testing/reproducing this situation.

lustre/include/lustre_dlm.h
lustre/include/obd_support.h
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/ldlm/ldlm_request.c
lustre/tests/sanityN.sh

index 74a5324..8e9c18a 100644 (file)
@@ -183,6 +183,9 @@ typedef enum {
 /* measure lock contention and return -EUSERS if locking contention is high */
 #define LDLM_FL_DENY_ON_CONTENTION 0x40000000
 
+/* 0x80000000 is occupied by LDLM_AST_DISCARD_DATA */
+#define LDLM_FL_FAIL_LOC       0x100000000ULL
+
 /* The blocking callback is overloaded to perform two functions.  These flags
  * indicate which operation should be performed. */
 #define LDLM_CB_BLOCKING    1
@@ -517,7 +520,7 @@ struct ldlm_lock {
         ldlm_policy_data_t    l_policy_data;
 
         /* protected by lr_lock */
-        __u32                 l_flags;
+        __u64                 l_flags;
         __u32                 l_readers;
         __u32                 l_writers;
         __u8                  l_destroyed;
index bc4aeb3..e3530cc 100644 (file)
@@ -239,6 +239,8 @@ extern unsigned int obd_alloc_fail_rate;
 #define OBD_FAIL_LDLM_CANCEL_BL_CB_RACE  0x314
 #define OBD_FAIL_LDLM_CP_CB_WAIT         0x315
 #define OBD_FAIL_LDLM_OST_FAIL_RACE      0x316
+#define OBD_FAIL_LDLM_INTR_CP_AST        0x317
+#define OBD_FAIL_LDLM_CP_BL_RACE         0x318
 
 #define OBD_FAIL_OSC                     0x400
 #define OBD_FAIL_OSC_BRW_READ_BULK       0x401
index e5c49c1..48bb4d2 100644 (file)
@@ -638,6 +638,10 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, __u32 mode)
                 LDLM_LOCK_GET(lock); /* dropped by bl thread */
                 ldlm_lock_remove_from_lru(lock);
                 unlock_res_and_lock(lock);
+
+                if (lock->l_flags & LDLM_FL_FAIL_LOC)
+                        OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
+
                 if ((lock->l_flags & LDLM_FL_ATOMIC_CB) ||
                     ldlm_bl_to_thread_lock(ns, NULL, lock) != 0)
                         ldlm_handle_bl_callback(ns, NULL, lock);
@@ -1744,7 +1748,7 @@ void ldlm_lock_dump(int level, struct ldlm_lock *lock, int pos)
                lock->l_resource->lr_name.name[0],
                lock->l_resource->lr_name.name[1]);
         CDEBUG(level, "  Req mode: %s, grant mode: %s, rc: %u, read: %d, "
-               "write: %d flags: %#x\n", ldlm_lockname[lock->l_req_mode],
+               "write: %d flags: "LPX64"\n", ldlm_lockname[lock->l_req_mode],
                ldlm_lockname[lock->l_granted_mode],
                atomic_read(&lock->l_refc), lock->l_readers, lock->l_writers,
                lock->l_flags);
@@ -1792,7 +1796,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level,
                 libcfs_debug_vmsg2(cdls, data->msg_subsys, level, data->msg_file,
                                    data->msg_fn, data->msg_line, fmt, args,
                                    " ns: \?\? lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
-                                   "res: \?\? rrc=\?\? type: \?\?\? flags: %x remote: "
+                                   "res: \?\? rrc=\?\? type: \?\?\? flags: "LPX64" remote: "
                                    LPX64" expref: %d pid: %u timeout: %lu\n", lock,
                                    lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
                                    lock->l_readers, lock->l_writers,
@@ -1812,7 +1816,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level,
                                    data->msg_fn, data->msg_line, fmt, args,
                                    " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
                                    "res: "LPU64"/"LPU64" rrc: %d type: %s ["LPU64"->"LPU64
-                                   "] (req "LPU64"->"LPU64") flags: %x remote: "LPX64
+                                   "] (req "LPU64"->"LPU64") flags: "LPX64" remote: "LPX64
                                     " expref: %d pid: %u timeout %lu\n",
                                     lock->l_resource->lr_namespace->ns_name, lock,
                                     lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
@@ -1836,7 +1840,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level,
                                    data->msg_fn, data->msg_line, fmt, args,
                                    " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
                                    "res: "LPU64"/"LPU64" rrc: %d type: %s pid: %d "
-                                   "["LPU64"->"LPU64"] flags: %x remote: "LPX64
+                                   "["LPU64"->"LPU64"] flags: "LPX64" remote: "LPX64
                                    " expref: %d pid: %u timeout: %lu\n",
                                    lock->l_resource->lr_namespace->ns_name, lock,
                                    lock->l_handle.h_cookie, atomic_read(&lock->l_refc),
@@ -1860,7 +1864,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level,
                                    data->msg_fn, data->msg_line, fmt, args,
                                    " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
                                    "res: "LPU64"/"LPU64" bits "LPX64" rrc: %d type: %s "
-                                   "flags: %x remote: "LPX64" expref: %d "
+                                   "flags: "LPX64" remote: "LPX64" expref: %d "
                                    "pid: %u timeout: %lu\n",
                                    lock->l_resource->lr_namespace->ns_name,
                                    lock, lock->l_handle.h_cookie,
@@ -1882,7 +1886,7 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, __u32 level,
                 libcfs_debug_vmsg2(cdls, data->msg_subsys, level, data->msg_file,
                                    data->msg_fn, data->msg_line, fmt, args,
                                    " ns: %s lock: %p/"LPX64" lrc: %d/%d,%d mode: %s/%s "
-                                   "res: "LPU64"/"LPU64" rrc: %d type: %s flags: %x "
+                                   "res: "LPU64"/"LPU64" rrc: %d type: %s flags: "LPX64" "
                                    "remote: "LPX64" expref: %d pid: %u timeout %lu\n",
                                    lock->l_resource->lr_namespace->ns_name,
                                    lock, lock->l_handle.h_cookie,
index 1386afe..dc0dea9 100644 (file)
@@ -1183,7 +1183,7 @@ existing_lock:
                 if (!(lock->l_flags & LDLM_FL_CANCEL_ON_BLOCK) ||
                     !(dlm_rep->lock_flags & LDLM_FL_CANCEL_ON_BLOCK)) {
                         CERROR("Granting sync lock to libclient. "
-                               "req fl %d, rep fl %d, lock fl %d\n",
+                               "req fl %d, rep fl %d, lock fl "LPX64"\n",
                                dlm_req->lock_flags, dlm_rep->lock_flags,
                                lock->l_flags);
                         LDLM_ERROR(lock, "sync lock");
@@ -1751,15 +1751,21 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
                 RETURN(0);
         }
 
+        if ((lock->l_flags & LDLM_FL_FAIL_LOC) && 
+            lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK)
+                OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
+
         /* Copy hints/flags (e.g. LDLM_FL_DISCARD_DATA) from AST. */
         lock_res_and_lock(lock);
         lock->l_flags |= (dlm_req->lock_flags & LDLM_AST_FLAGS);
         if (lustre_msg_get_opc(req->rq_reqmsg) == LDLM_BL_CALLBACK) {
-                /* If somebody cancels locks and cache is already droped,
+                /* If somebody cancels lock and cache is already droped,
+                 * or lock is failed before cp_ast received on client,
                  * we can tell the server we have no lock. Otherwise, we
                  * should send cancel after dropping the cache. */
-                if ((lock->l_flags & LDLM_FL_CANCELING) &&
-                    (lock->l_flags & LDLM_FL_BL_DONE)) {
+                if (((lock->l_flags & LDLM_FL_CANCELING) &&
+                    (lock->l_flags & LDLM_FL_BL_DONE)) ||
+                    (lock->l_flags & LDLM_FL_FAILED)) {
                         LDLM_DEBUG(lock, "callback on lock "
                                    LPX64" - lock disappeared\n",
                                    dlm_req->lock_handle[0].cookie);
index 672063a..e318e88 100644 (file)
@@ -188,8 +188,17 @@ noreproc:
                 spin_unlock(&imp->imp_lock);
         }
 
-        /* Go to sleep until the lock is granted or cancelled. */
-        rc = l_wait_event(lock->l_waitq, is_granted_or_cancelled(lock), &lwi);
+        if (ns_is_client(lock->l_resource->lr_namespace) && 
+            lock->l_resource->lr_type == LDLM_EXTENT &&
+            OBD_FAIL_CHECK(OBD_FAIL_LDLM_INTR_CP_AST | OBD_FAIL_ONCE)) {
+                obd_fail_loc = OBD_FAIL_LDLM_CP_BL_RACE | OBD_FAIL_ONCE;
+                lock->l_flags |= LDLM_FL_FAIL_LOC;
+                rc = -EINTR;
+        } else {
+                /* Go to sleep until the lock is granted or cancelled. */
+                rc = l_wait_event(lock->l_waitq, 
+                                  is_granted_or_cancelled(lock), &lwi);
+        }
 
         if (lock->l_destroyed || lock->l_flags & LDLM_FL_FAILED) {
                 LDLM_DEBUG(lock, "client-side enqueue waking up: destroyed");
@@ -350,13 +359,31 @@ static void failed_lock_cleanup(struct ldlm_namespace *ns,
                                 struct ldlm_lock *lock,
                                 struct lustre_handle *lockh, int mode)
 {
+        int need_cancel = 0;
+
         /* Set a flag to prevent us from sending a CANCEL (bug 407) */
         lock_res_and_lock(lock);
-        lock->l_flags |= LDLM_FL_LOCAL_ONLY;
+        /* Check that lock is not granted or failed, we might race. */
+        if ((lock->l_req_mode != lock->l_granted_mode) && 
+            !(lock->l_flags & LDLM_FL_FAILED)) {
+                /* Make sure that this lock will not be found by raced
+                 * bl_ast and -EINVAL reply is sent to server anyways. 
+                 * bug 17645 */
+                lock->l_flags |= LDLM_FL_LOCAL_ONLY | LDLM_FL_FAILED | 
+                                 LDLM_FL_ATOMIC_CB;
+                need_cancel = 1;
+        }
         unlock_res_and_lock(lock);
-        LDLM_DEBUG(lock, "setting FL_LOCAL_ONLY");
-
-        ldlm_lock_decref_and_cancel(lockh, mode);
+  
+        if (need_cancel) {
+                LDLM_DEBUG(lock, 
+                           "setting FL_LOCAL_ONLY | LDLM_FL_FAILED | " 
+                           "LDLM_FL_ATOMIC_CB");
+                ldlm_lock_decref_and_cancel(lockh, mode);
+        } else {
+                LDLM_DEBUG(lock, "lock was granted or failed in race");
+                ldlm_lock_decref(lockh, mode);
+        }
 
         /* XXX - HACK because we shouldn't call ldlm_lock_destroy()
          *       from llite/file.c/ll_file_flock(). */
index 2181fe1..f8844e6 100644 (file)
@@ -736,6 +736,48 @@ test_33() { #16129
 }
 run_test 33 "no lock timeout under IO"
 
+test_34() { # bug 17645
+        local generation=[]
+        local count=0
+        for imp in /proc/fs/lustre/osc/$FSNAME-OST*-osc-*; do
+            g=$(awk '/generation/{print $2}' $imp/import)
+            generation[count]=$g
+            let count=count+1
+        done
+
+        dd if=/dev/zero of=$MOUNT1/$tfile bs=1M count=10
+        sync
+        cancel_lru_locks osc
+
+        # Let's get some read locks so that later we have something to
+        # conflict with
+        dd if=$MOUNT1/$tfile of=$MOUNT1/${tfile}-1 bs=1k count=10000
+        
+        # Let's initiate -EINTR situation by setting fail_loc and take
+        # write lock on same file from same client. This will not cause
+        # bl_ast yet as lock is already in local cache.
+#define OBD_FAIL_LDLM_INTR_CP_AST        0x317
+        do_facet client "lctl set_param fail_loc=0x80000317"
+        dd if=$MOUNT1/${tfile}-1 of=$MOUNT1/$tfile bs=1k count=10000 &
+        sleep 1
+        
+        # Let's take write lock on same file from another mount. This
+        # should cause conflict and bl_ast
+        dd if=$MOUNT2/${tfile}-1 of=$MOUNT2/$tfile bs=1k count=10000 &
+        wait
+        do_facet client "lctl set_param fail_loc=0x0"
+        df -h $MOUNT1 $MOUNT2
+        count=0
+        for imp in /proc/fs/lustre/osc/$FSNAME-OST*-osc-*; do
+            g=$(awk '/generation/{print $2}' $imp/import)
+            if ! test "$g" -eq "${generation[count]}"; then
+                error "Eviction happened on import $(basename $imp)"
+            fi
+            let count=count+1
+        done
+}
+run_test 34 "-EINTR cp_ast vs. bl_ast race does not evict client"
+
 log "cleanup: ======================================================"
 
 check_and_cleanup_lustre