Whamcloud - gitweb
LU-6655 ptlrpc: skip delayed replay requests 05/23205/15
authorHongchao Zhang <hongchao.zhang@intel.com>
Mon, 23 Apr 2018 06:35:11 +0000 (14:35 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Sun, 6 May 2018 03:40:01 +0000 (03:40 +0000)
During recovery, there could be some delayed replay requests
after the final recovery completion ping request was handled,
and it should be skipped.

Change-Id: Ie0d5ff92c75f9d078b8ae28e899d4a821113194f
Signed-off-by: Hongchao Zhang <hongchao.zhang@intel.com>
Reviewed-on: https://review.whamcloud.com/23205
Reviewed-by: Fan Yong <fan.yong@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/ldlm/ldlm_lib.c
lustre/target/tgt_handler.c
lustre/tests/replay-single.sh

index 3844961..f2314c4 100644 (file)
@@ -462,6 +462,7 @@ extern char obd_jobid_var[];
 #define OBD_FAIL_TGT_CLIENT_DEL                 0x718
 #define OBD_FAIL_TGT_SLUGGISH_NET       0x719
 #define OBD_FAIL_TGT_RCVD_EIO           0x720
 #define OBD_FAIL_TGT_CLIENT_DEL                 0x718
 #define OBD_FAIL_TGT_SLUGGISH_NET       0x719
 #define OBD_FAIL_TGT_RCVD_EIO           0x720
+#define OBD_FAIL_TGT_RECOVERY_REQ_RACE  0x721
 
 #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
 #define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
 
 #define OBD_FAIL_MDC_REVALIDATE_PAUSE    0x800
 #define OBD_FAIL_MDC_ENQUEUE_PAUSE       0x801
index b9d5dda..5c122fb 100644 (file)
@@ -2694,6 +2694,17 @@ int target_queue_recovery_request(struct ptlrpc_request *req,
         target_process_req_flags(obd, req);
 
         if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LOCK_REPLAY_DONE) {
         target_process_req_flags(obd, req);
 
         if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LOCK_REPLAY_DONE) {
+               if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_TGT_RECOVERY_REQ_RACE))) {
+                       if (cfs_fail_val == 1) {
+                               cfs_race_state = 1;
+                               cfs_fail_val = 0;
+                               wake_up(&cfs_race_waitq);
+
+                               set_current_state(TASK_INTERRUPTIBLE);
+                               schedule_timeout(cfs_time_seconds(1));
+                       }
+               }
+
                 /* client declares he's ready to complete recovery
                  * so, we put the request on th final queue */
                target_request_copy_get(req);
                 /* client declares he's ready to complete recovery
                  * so, we put the request on th final queue */
                target_request_copy_get(req);
index 4a52875..24e6936 100644 (file)
@@ -661,6 +661,19 @@ static int process_req_last_xid(struct ptlrpc_request *req)
                        RETURN(-EPROTO);
        }
 
                        RETURN(-EPROTO);
        }
 
+       /* The "last_xid" is the minimum xid among unreplied requests,
+        * if the request is from the previous connection, its xid can
+        * still be larger than "exp_last_xid", then the above check of
+        * xid is not enough to determine whether the request is delayed.
+        *
+        * For example, if some replay request was delayed and caused
+        * timeout at client and the replay is restarted, the delayed
+        * replay request will have the larger xid than "exp_last_xid"
+        */
+       if (req->rq_export->exp_conn_cnt >
+           lustre_msg_get_conn_cnt(req->rq_reqmsg))
+               RETURN(-ESTALE);
+
        /* try to release in-memory reply data */
        if (tgt_is_multimodrpcs_client(req->rq_export)) {
                tgt_handle_received_xid(req->rq_export,
        /* try to release in-memory reply data */
        if (tgt_is_multimodrpcs_client(req->rq_export)) {
                tgt_handle_received_xid(req->rq_export,
@@ -687,6 +700,19 @@ int tgt_request_handle(struct ptlrpc_request *req)
        bool                     is_connect = false;
        ENTRY;
 
        bool                     is_connect = false;
        ENTRY;
 
+       if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_TGT_RECOVERY_REQ_RACE))) {
+               if (cfs_fail_val == 0 &&
+                   lustre_msg_get_opc(msg) != OBD_PING &&
+                   lustre_msg_get_flags(msg) & MSG_REQ_REPLAY_DONE) {
+                       struct l_wait_info lwi =  { 0 };
+
+                       cfs_fail_val = 1;
+                       cfs_race_state = 0;
+                       l_wait_event(cfs_race_waitq, (cfs_race_state == 1),
+                                    &lwi);
+               }
+       }
+
        /* Refill the context, to make sure all thread keys are allocated */
        lu_env_refill(req->rq_svc_thread->t_env);
 
        /* Refill the context, to make sure all thread keys are allocated */
        lu_env_refill(req->rq_svc_thread->t_env);
 
index c9629b8..ada7584 100755 (executable)
@@ -4648,6 +4648,43 @@ test_120() {
 }
 run_test 120 "DNE fail abort should stop both normal and DNE replay"
 
 }
 run_test 120 "DNE fail abort should stop both normal and DNE replay"
 
+test_121() {
+       [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.90) ] &&
+               skip "Don't support it before 2.11" &&
+               return 0
+
+       local at_max_saved=$(at_max_get mds)
+
+       touch $DIR/$tfile || error "touch $DIR/$tfile failed"
+       cancel_lru_locks mdc
+
+       multiop_bg_pause $DIR/$tfile s_s || error "multiop $DIR/$tfile failed"
+       mpid=$!
+
+       lctl set_param -n ldlm.cancel_unused_locks_before_replay "0"
+
+       stop mds1
+       change_active mds1
+       wait_for_facet mds1
+
+       #define OBD_FAIL_TGT_RECOVERY_REQ_RACE  0x721
+       do_facet $SINGLEMDS "lctl set_param fail_loc=0x721 fail_val=0"
+       at_max_set 0 mds
+
+       mount_facet mds1
+       wait_clients_import_state "$clients" mds1 FULL
+       clients_up || clients_up || error "failover df: $?"
+
+       kill -USR1 $mpid
+       wait $mpid || error "multiop_bg_pause pid failed"
+
+       do_facet $SINGLEMDS "lctl set_param fail_loc=0x0"
+       lctl set_param -n ldlm.cancel_unused_locks_before_replay "1"
+       at_max_set $at_max_saved mds
+       rm -f $DIR/$tfile
+}
+run_test 121 "lock replay timed out and race"
+
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status
 complete $SECONDS
 check_and_cleanup_lustre
 exit_status