From 1d889090f2e2902d861d1fab0227c4343127cc42 Mon Sep 17 00:00:00 2001 From: "Alexander.Boyko" Date: Thu, 24 Jul 2014 18:35:44 +0400 Subject: [PATCH] LU-5079 ptlrpc: fix early reply timeout for recovery Commit 8b2f9c0e408 http://review.whamcloud.com/9100 changed the deadline calculation from current time to request arrival. During recovery, the new deadline could be less than calculated at request arrival time. And even worse, the deadline may be in past. For the first case unneeded early reply would be sent to client. For the second, client requests would be dropped by timeout and client reconnect happend. Do at_measured() for recovery in the same way like general early reply base on the current time. And set new timeout to the end of recovery. Test to check recovery deadline bug. Signed-off-by: Alexander Boyko Change-Id: I29327cb962d1c1a3cd8a6181d27a29593d1d8fc4 Xyratex-bug-id: MRP-1988 Reviewed-on: http://review.whamcloud.com/11213 Tested-by: Jenkins Reviewed-by: Niu Yawei Reviewed-by: James Simmons Reviewed-by: Chris Horn Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/include/obd_support.h | 1 + lustre/ldlm/ldlm_lib.c | 20 ++++++++++-------- lustre/ptlrpc/service.c | 44 ++++++++++++++++++++------------------- lustre/tests/replay-ost-single.sh | 21 +++++++++++++++++++ 4 files changed, 56 insertions(+), 30 deletions(-) diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index ff61ede..3257997 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -417,6 +417,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_TGT_CLIENT_ADD 0x711 #define OBD_FAIL_TGT_RCVG_FLAG 0x712 #define OBD_FAIL_TGT_DELAY_CONDITIONAL 0x713 +#define OBD_FAIL_TGT_REPLAY_DELAY2 0x714 #define OBD_FAIL_MDC_REVALIDATE_PAUSE 0x800 #define OBD_FAIL_MDC_ENQUEUE_PAUSE 0x801 diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 46ee0e8..0c878dc 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -1806,17 +1806,19 @@ repeat: static struct ptlrpc_request *target_next_replay_req(struct obd_device *obd) { - struct ptlrpc_request *req = NULL; - ENTRY; + struct ptlrpc_request *req = NULL; + ENTRY; - CDEBUG(D_HA, "Waiting for transno "LPD64"\n", - obd->obd_next_recovery_transno); + CDEBUG(D_HA, "Waiting for transno "LPD64"\n", + obd->obd_next_recovery_transno); - if (target_recovery_overseer(obd, check_for_next_transno, - exp_req_replay_healthy)) { - abort_req_replay_queue(obd); - abort_lock_replay_queue(obd); - } + CFS_FAIL_TIMEOUT(OBD_FAIL_TGT_REPLAY_DELAY2, cfs_fail_val); + + if (target_recovery_overseer(obd, check_for_next_transno, + exp_req_replay_healthy)) { + abort_req_replay_queue(obd); + abort_lock_replay_queue(obd); + } spin_lock(&obd->obd_recovery_task_lock); if (!list_empty(&obd->obd_req_replay_queue)) { diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 71e4804..706b042 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -1293,16 +1293,18 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) RETURN(-ENOSYS); } - if (req->rq_export && - lustre_msg_get_flags(req->rq_reqmsg) & - (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) { - /* During recovery, we don't want to send too many early - * replies, but on the other hand we want to make sure the - * client has enough time to resend if the rpc is lost. So - * during the recovery period send at least 4 early replies, - * spacing them every at_extra if we can. at_estimate should - * always equal this fixed value during recovery. */ - at_measured(&svcpt->scp_at_estimate, min(at_extra, + if (req->rq_export && + lustre_msg_get_flags(req->rq_reqmsg) & + (MSG_REPLAY | MSG_REQ_REPLAY_DONE | MSG_LOCK_REPLAY_DONE)) { + /* During recovery, we don't want to send too many early + * replies, but on the other hand we want to make sure the + * client has enough time to resend if the rpc is lost. So + * during the recovery period send at least 4 early replies, + * spacing them every at_extra if we can. at_estimate should + * always equal this fixed value during recovery. */ + at_measured(&svcpt->scp_at_estimate, + cfs_time_current_sec() - + req->rq_arrival_time.tv_sec + min(at_extra, req->rq_export->exp_obd->obd_recovery_timeout / 4)); } else { /* We want to extend the request deadline by at_extra seconds, @@ -1316,17 +1318,17 @@ static int ptlrpc_at_send_early_reply(struct ptlrpc_request *req) cfs_time_current_sec() - req->rq_arrival_time.tv_sec); - /* Check to see if we've actually increased the deadline - - * we may be past adaptive_max */ - if (req->rq_deadline >= req->rq_arrival_time.tv_sec + - at_get(&svcpt->scp_at_estimate)) { - DEBUG_REQ(D_WARNING, req, "Couldn't add any time " - "(%ld/%ld), not sending early reply\n", - olddl, req->rq_arrival_time.tv_sec + - at_get(&svcpt->scp_at_estimate) - - cfs_time_current_sec()); - RETURN(-ETIMEDOUT); - } + } + /* Check to see if we've actually increased the deadline - + * we may be past adaptive_max */ + if (req->rq_deadline >= req->rq_arrival_time.tv_sec + + at_get(&svcpt->scp_at_estimate)) { + DEBUG_REQ(D_WARNING, req, "Couldn't add any time " + "(%ld/%ld), not sending early reply\n", + olddl, req->rq_arrival_time.tv_sec + + at_get(&svcpt->scp_at_estimate) - + cfs_time_current_sec()); + RETURN(-ETIMEDOUT); } reqcopy = ptlrpc_request_cache_alloc(GFP_NOFS); diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index 42c39fd..74d0d24 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -396,6 +396,27 @@ test_8e() { } run_test 8e "Verify that ptlrpc resends request on -EINPROGRESS" +test_9() { + [ $(lustre_version_code ost1) -ge $(version_code 2.6.54) ] || + { skip "Need OST version at least 2.6.54"; return; } + $SETSTRIPE -i 0 -c 1 $DIR/$tfile + replay_barrier ost1 + # do IO + dd if=/dev/zero of=$DIR/$tfile count=1 bs=1M > /dev/null || + error "failed to write" + # failover, replay and resend replayed waiting request + #define OBD_FAIL_TGT_REPLAY_DELAY2 0x714 + do_facet ost1 $LCTL set_param fail_loc=0x00000714 + do_facet ost1 $LCTL set_param fail_val=$TIMEOUT + fail ost1 + do_facet ost1 $LCTL set_param fail_loc=0 + do_facet ost1 "dmesg | tail -n 100" |\ + sed -n '/no req deadline/,$ p' | grep -q 'Already past' && + return 1 + return 0 +} +run_test 9 "Verify that no req deadline happened during recovery" + complete $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1