From c1d465de13ccf0eda8020c88661c3cc4d78538ca Mon Sep 17 00:00:00 2001 From: Hongchao Zhang Date: Mon, 23 Apr 2018 14:35:11 +0800 Subject: [PATCH 1/1] LU-6655 ptlrpc: skip delayed replay requests During recovery, there could be some delayed replay requests after the final recovery completion ping request was handled, and it should be skipped. Change-Id: Ie0d5ff92c75f9d078b8ae28e899d4a821113194f Signed-off-by: Hongchao Zhang Reviewed-on: https://review.whamcloud.com/23205 Reviewed-by: Fan Yong Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Lai Siyao Reviewed-by: Oleg Drokin --- lustre/include/obd_support.h | 1 + lustre/ldlm/ldlm_lib.c | 11 +++++++++++ lustre/target/tgt_handler.c | 26 ++++++++++++++++++++++++++ lustre/tests/replay-single.sh | 37 +++++++++++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 3844961..f2314c4 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -462,6 +462,7 @@ extern char obd_jobid_var[]; #define OBD_FAIL_TGT_CLIENT_DEL 0x718 #define OBD_FAIL_TGT_SLUGGISH_NET 0x719 #define OBD_FAIL_TGT_RCVD_EIO 0x720 +#define OBD_FAIL_TGT_RECOVERY_REQ_RACE 0x721 #define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 #define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index b9d5dda..5c122fb 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -2694,6 +2694,17 @@ int target_queue_recovery_request(struct ptlrpc_request *req, target_process_req_flags(obd, req); if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_LOCK_REPLAY_DONE) { + if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_TGT_RECOVERY_REQ_RACE))) { + if (cfs_fail_val == 1) { + cfs_race_state = 1; + cfs_fail_val = 0; + wake_up(&cfs_race_waitq); + + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(cfs_time_seconds(1)); + } + } + /* client declares he's ready to complete recovery * so, we put the request on th final queue */ target_request_copy_get(req); diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index 4a52875..24e6936 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -661,6 +661,19 @@ static int process_req_last_xid(struct ptlrpc_request *req) RETURN(-EPROTO); } + /* The "last_xid" is the minimum xid among unreplied requests, + * if the request is from the previous connection, its xid can + * still be larger than "exp_last_xid", then the above check of + * xid is not enough to determine whether the request is delayed. + * + * For example, if some replay request was delayed and caused + * timeout at client and the replay is restarted, the delayed + * replay request will have the larger xid than "exp_last_xid" + */ + if (req->rq_export->exp_conn_cnt > + lustre_msg_get_conn_cnt(req->rq_reqmsg)) + RETURN(-ESTALE); + /* try to release in-memory reply data */ if (tgt_is_multimodrpcs_client(req->rq_export)) { tgt_handle_received_xid(req->rq_export, @@ -687,6 +700,19 @@ int tgt_request_handle(struct ptlrpc_request *req) bool is_connect = false; ENTRY; + if (unlikely(OBD_FAIL_CHECK(OBD_FAIL_TGT_RECOVERY_REQ_RACE))) { + if (cfs_fail_val == 0 && + lustre_msg_get_opc(msg) != OBD_PING && + lustre_msg_get_flags(msg) & MSG_REQ_REPLAY_DONE) { + struct l_wait_info lwi = { 0 }; + + cfs_fail_val = 1; + cfs_race_state = 0; + l_wait_event(cfs_race_waitq, (cfs_race_state == 1), + &lwi); + } + } + /* Refill the context, to make sure all thread keys are allocated */ lu_env_refill(req->rq_svc_thread->t_env); diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index c9629b8..ada7584 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -4648,6 +4648,43 @@ test_120() { } run_test 120 "DNE fail abort should stop both normal and DNE replay" +test_121() { + [ $(lustre_version_code $SINGLEMDS) -lt $(version_code 2.10.90) ] && + skip "Don't support it before 2.11" && + return 0 + + local at_max_saved=$(at_max_get mds) + + touch $DIR/$tfile || error "touch $DIR/$tfile failed" + cancel_lru_locks mdc + + multiop_bg_pause $DIR/$tfile s_s || error "multiop $DIR/$tfile failed" + mpid=$! + + lctl set_param -n ldlm.cancel_unused_locks_before_replay "0" + + stop mds1 + change_active mds1 + wait_for_facet mds1 + + #define OBD_FAIL_TGT_RECOVERY_REQ_RACE 0x721 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x721 fail_val=0" + at_max_set 0 mds + + mount_facet mds1 + wait_clients_import_state "$clients" mds1 FULL + clients_up || clients_up || error "failover df: $?" + + kill -USR1 $mpid + wait $mpid || error "multiop_bg_pause pid failed" + + do_facet $SINGLEMDS "lctl set_param fail_loc=0x0" + lctl set_param -n ldlm.cancel_unused_locks_before_replay "1" + at_max_set $at_max_saved mds + rm -f $DIR/$tfile +} +run_test 121 "lock replay timed out and race" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1