Whamcloud - gitweb
LU-8347 ldlm: granting conflicting locks 59/21059/5
authorAndriy Skulysh <andriy.skulysh@seagate.com>
Wed, 29 Jun 2016 11:07:23 +0000 (14:07 +0300)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 28 Oct 2016 23:49:48 +0000 (23:49 +0000)
Postpone lock reprocess during lock replay stage.
Reprocess is needed during request replay stage
beacause local locks are still in use until
client ACK.

Change-Id: I250d22fee471db643f12a900fdfc51eacfa94aa2
Seagate-bug-id: MRP-3516
Signed-off-by: Andriy Skulysh <andriy.skulysh@seagate.com>
Reviewed-on: http://review.whamcloud.com/21059
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Patrick Farrell <paf@cray.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/ldlm/ldlm_lock.c
lustre/ldlm/ldlm_lockd.c
lustre/tests/replay-dual.sh

index 834b68f..03ba8c2 100644 (file)
@@ -2144,6 +2144,7 @@ void ldlm_reprocess_all(struct ldlm_resource *res)
 {
        struct list_head rpc_list;
 #ifdef HAVE_SERVER_SUPPORT
 {
        struct list_head rpc_list;
 #ifdef HAVE_SERVER_SUPPORT
+       struct obd_device *obd;
         int rc;
         ENTRY;
 
         int rc;
         ENTRY;
 
@@ -2154,6 +2155,13 @@ void ldlm_reprocess_all(struct ldlm_resource *res)
                 return;
         }
 
                 return;
         }
 
+       /* Disable reprocess during lock replay stage but allow during
+        * request replay stage.
+        */
+       obd = ldlm_res_to_ns(res)->ns_obd;
+       if (obd->obd_recovering &&
+           atomic_read(&obd->obd_req_replay_clients) == 0)
+               RETURN_EXIT;
 restart:
         lock_res(res);
         rc = ldlm_reprocess_queue(res, &res->lr_converting, &rpc_list);
 restart:
         lock_res(res);
         rc = ldlm_reprocess_queue(res, &res->lr_converting, &rpc_list);
index 89346ad..c2a420d 100644 (file)
@@ -850,6 +850,11 @@ int ldlm_server_blocking_ast(struct ldlm_lock *lock,
                 /* Don't need to do anything here. */
                 RETURN(0);
 
                 /* Don't need to do anything here. */
                 RETURN(0);
 
+       if (OBD_FAIL_PRECHECK(OBD_FAIL_LDLM_SRV_BL_AST)) {
+               LDLM_DEBUG(lock, "dropping BL AST");
+               RETURN(0);
+       }
+
         LASSERT(lock);
         LASSERT(data != NULL);
         if (lock->l_export->exp_obd->obd_recovering != 0)
         LASSERT(lock);
         LASSERT(data != NULL);
         if (lock->l_export->exp_obd->obd_recovering != 0)
index 65d3106..6197ff9 100755 (executable)
@@ -1002,6 +1002,28 @@ test_26() {
 }
 run_test 26 "dbench and tar with mds failover"
 
 }
 run_test 26 "dbench and tar with mds failover"
 
+test_28() {
+       $SETSTRIPE -i 0 -c 1 $DIR2/$tfile
+       dd if=/dev/zero of=$DIR2/$tfile bs=4096 count=1
+
+       #define OBD_FAIL_LDLM_SRV_BL_AST         0x324
+       do_facet ost1 $LCTL set_param fail_loc=0x80000324
+
+       dd if=/dev/zero of=$DIR/$tfile bs=4096 count=1 &
+       local pid=$!
+       sleep 2
+
+       #define OBD_FAIL_LDLM_GRANT_CHECK        0x32a
+       do_facet ost1 $LCTL set_param fail_loc=0x32a
+
+       fail ost1
+
+       sleep 2
+       cancel_lru_locks OST0000-osc
+       wait $pid || error "dd failed"
+}
+run_test 28 "lock replay should be ordered: waiting after granted"
+
 complete $SECONDS
 SLEEP=$((SECONDS - $NOW))
 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP
 complete $SECONDS
 SLEEP=$((SECONDS - $NOW))
 [ $SLEEP -lt $TIMEOUT ] && sleep $SLEEP