From: Alexander Boyko Date: Wed, 29 Oct 2014 21:59:12 +0000 (-0700) Subject: LU-5079 ptlrpc: fix early reply timeout for recovery X-Git-Tag: 2.5.3.90~70 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=refs%2Fchanges%2F65%2F12365%2F4;p=fs%2Flustre-release.git LU-5079 ptlrpc: fix early reply timeout for recovery Commit 8b2f9c0e408 http://review.whamcloud.com/9100 changed the deadline calculation from current time to request arrival. During recovery, the new deadline could be less than calculated at request arrival time. And even worse, the deadline may be in past. For the first case unneeded early reply would be sent to client. For the second, client requests would be dropped by timeout and client reconnect happend. Do at_measured() for recovery in the same way like general early reply base on the current time. And set new timeout to the end of recovery. Test to check recovery deadline bug. The patch also speeds up replay-vbr test 7* by decreasing the at_max value in test_7_cycle(). This patch is back-ported from the following one: Lustre-commit: 1d889090f2e2902d861d1fab0227c4343127cc42 Lustre-change: http://review.whamcloud.com/11213 Test-Parameters: alwaysuploadlogs \ envdefinitions=SLOW=yes,ENABLE_QUOTA=yes \ mdtfilesystemtype=ldiskfs mdsfilesystemtype=ldiskfs ostfilesystemtype=ldiskfs \ clientdistro=el6 ossdistro=el6 mdsdistro=el6 \ mdtcount=1 mdssizegb=2 ostcount=7 ostsizegb=2 \ testlist=replay-vbr,replay-vbr Xyratex-bug-id: MRP-1988 Change-Id: I7a5d60ca8e52a805546cc9934eb42bddeaf8ff4e Signed-off-by: Alexander Boyko Signed-off-by: Jian Yu Reviewed-on: http://review.whamcloud.com/12365 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index ace96fa..a43bfbd 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -419,6 +419,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_TGT_LAST_REPLAY 0x710 #define OBD_FAIL_TGT_CLIENT_ADD 0x711 #define OBD_FAIL_TGT_RCVG_FLAG 0x712 +#define OBD_FAIL_TGT_REPLAY_DELAY2 0x714 #define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 #define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index eb8a58b..bfae4cc 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1829,17 +1829,19 @@ repeat: static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd) { - struct ptlrpc_request *req = NULL; - ENTRY; + struct ptlrpc_request *req = NULL; + ENTRY; - CDEBUG(D_HA, "Waiting for transno "LPD64"\n", - obd->obd_next_recovery_transno); + CDEBUG(D_HA, "Waiting for transno "LPD64"\n", + obd->obd_next_recovery_transno); - if (target_recovery_overseer(obd, check_for_next_transno, - exp_req_replay_healthy)) { - abort_req_replay_queue(obd); - abort_lock_replay_queue(obd); - } + CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val); + + if (target_recovery_overseer(obd, check_for_next_transno, + exp_req_replay_healthy)) { + abort_req_replay_queue(obd); + abort_lock_replay_queue(obd); + } spin_lock(&obd->obd_recovery_task_lock); if (!cfs_list_empty(&obd->obd_req_replay_queue)) { diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index a74d51d..22bc111 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1313,16 +1313,18 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) RETURN(-ENOSYS); } - if (req->rq_export && - lustre_msg_get_flags(req->rq_reqmsg) & - (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) { - /* During recovery, we don't want to send too many early - * replies, but on the other hand we want to make sure the - * client has enough time to resend if the rpc is lost. So - * during the recovery period send at least 4 early replies, - * spacing them every at_extra if we can. at_estimate should - * always equal this fixed value during recovery. */ - at_measured(&svcpt->scp_at_estimate, min(at_extra, + if (req->rq_export && + lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) { + /* During recovery, we don't want to send too many early + * replies, but on the other hand we want to make sure the + * client has enough time to resend if the rpc is lost. So + * during the recovery period send at least 4 early replies, + * spacing them every at_extra if we can. at_estimate should + * always equal this fixed value during recovery. */ + at_measured(&svcpt->scp_at_estimate, + cfs_time_current_sec() - + req->rq_arrival_time.tv_sec + min(at_extra, req->rq_export->exp_obd->obd_recovery_timeout / 4)); } else { /* We want to extend the request deadline by at_extra seconds, @@ -1336,17 +1338,17 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) cfs_time_current_sec() - req->rq_arrival_time.tv_sec); - /* Check to see if we've actually increased the deadline - - * we may be past adaptive_max */ - if (req->rq_deadline >= req->rq_arrival_time.tv_sec + - at_get(&svcpt->scp_at_estimate)) { - DEBUG_REQ(D_WARNING, req, "Couldn't add any time " - "(%ld/%ld), not sending early reply\n", - olddl, req->rq_arrival_time.tv_sec + - at_get(&svcpt->scp_at_estimate) - - cfs_time_current_sec()); - RETURN(-ETIMEDOUT); - } + } + /* Check to see if we've actually increased the deadline - + * we may be past adaptive_max */ + if (req->rq_deadline >= req->rq_arrival_time.tv_sec + + at_get(&svcpt->scp_at_estimate)) { + DEBUG_REQ(D_WARNING, req, "Couldn't add any time " + "(%ld/%ld), not sending early reply\n", + olddl, req->rq_arrival_time.tv_sec + + at_get(&svcpt->scp_at_estimate) - + cfs_time_current_sec()); + RETURN(-ETIMEDOUT); } reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS); diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index 42c39fd..52d894e 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -396,6 +396,34 @@ test_8e() { } run_test 8e "Verify that ptlrpc resends request on -EINPROGRESS" +test_9() { + local server_version=$(lustre_version_code ost1) + + [[ $server_version -ge $(version_code 2.6.55) ]] || + [[ $server_version -ge $(version_code 2.5.27) && + $server_version -lt $(version_code 2.5.50) ]] || + [[ $server_version -ge $(version_code 2.5.3) && + $server_version -lt $(version_code 2.5.11) ]] || + { skip "Need OST version 2.5.4+ or 2.5.27+ or 2.6.55+"; return; } + + $SETSTRIPE -i 0 -c 1 $DIR/$tfile + replay_barrier ost1 + # do IO + dd if=/dev/zero of=$DIR/$tfile count=1 bs=1M > /dev/null || + error "failed to write" + # failover, replay and resend replayed waiting request + #define OBD_FAIL_TGT_REPLAY_DELAY2 0x714 + do_facet ost1 $LCTL set_param fail_loc=0x00000714 + do_facet ost1 $LCTL set_param fail_val=$TIMEOUT + fail ost1 + do_facet ost1 $LCTL set_param fail_loc=0 + do_facet ost1 "dmesg | tail -n 100" |\ + sed -n '/no req deadline/,$ p' | grep -q 'Already past' && + return 1 + return 0 +} +run_test 9 "Verify that no req deadline happened during recovery" + complete $SECONDS check_and_cleanup_lustre exit_status diff --git a/lustre/tests/replay-vbr.sh b/lustre/tests/replay-vbr.sh index 4018a9b..f633a0d 100644 --- a/lustre/tests/replay-vbr.sh +++ b/lustre/tests/replay-vbr.sh @@ -711,6 +711,12 @@ test_7_cycle() { do_facet $SINGLEMDS "$LCTL set_param mdd.${!var}.sync_permission=0" do_facet $SINGLEMDS "$LCTL set_param mdt.${!var}.commit_on_sharing=0" + local at_max_saved=0 + if at_is_enabled; then + at_max_saved=$(at_max_get $SINGLEMDS) + at_max_set $TIMEOUT mds client + fi + do_node $CLIENT1 mkdir -p $DIR/$tdir replay_barrier $SINGLEMDS # first operation @@ -727,11 +733,12 @@ test_7_cycle() { # should fail as conflict expected client_evicted $CLIENT1 || rc=1 - wait_recovery_complete $SINGLEMDS + wait_recovery_complete $SINGLEMDS wait_mds_ost_sync || error "wait_mds_ost_sync failed" - rm -rf $DIR/$tdir - return $rc + [[ $at_max_saved -eq 0 ]] || at_max_set $at_max_saved mds client + rm -rf $DIR/$tdir + return $rc } test_7a() {