From 6ee050666504e6ca5966a9e452729b681e1b5143 Mon Sep 17 00:00:00 2001 From: Johann Lombardi Date: Mon, 27 Aug 2012 18:02:38 +0200 Subject: [PATCH] LU-1788 osc: don't print error msg for EINPROGRESS resend Now that -EINPROGRESS can be legitimately returned during normal operation (e.g. quota rebalancing in progress), we shouldn't print an error message on the client each time the BRW is resent because of -EINPROGRESS. This patch also caps the resend delay for BRW to the current request timeout. Signed-off-by: Johann Lombardi Change-Id: Ie7447602756b0721351c7c90cbfb40ad8e3bb720 Reviewed-on: http://review.whamcloud.com/3792 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Niu Yawei --- lustre/include/lustre/lustre_idl.h | 2 +- lustre/osc/osc_request.c | 17 +++++++++++------ lustre/ptlrpc/niobuf.c | 3 ++- lustre/tests/replay-ost-single.sh | 4 +++- 4 files changed, 17 insertions(+), 9 deletions(-) diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 8809569..f184b71 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1152,7 +1152,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_JOBSTATS 0x20000000000ULL /* jobid in ptlrpc_body */ #define OBD_CONNECT_UMASK 0x40000000000ULL /* create uses client umask */ #define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS - * write RPC error properly */ + * RPC error properly */ #define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for * finer space reservation */ #define OBD_CONNECT_NANOSEC_TIME 0x200000000000ULL /* nanosecond timestamps */ diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index d693fb6..aa79f52 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -1677,16 +1677,16 @@ out: RETURN (rc); } -int osc_brw_redo_request(struct ptlrpc_request *request, - struct osc_brw_async_args *aa) +static int osc_brw_redo_request(struct ptlrpc_request *request, + struct osc_brw_async_args *aa, int rc) { struct ptlrpc_request *new_req; struct osc_brw_async_args *new_aa; struct osc_async_page *oap; - int rc = 0; ENTRY; - DEBUG_REQ(D_ERROR, request, "redo for recoverable error"); + DEBUG_REQ(rc == -EINPROGRESS ? D_RPCTRACE : D_ERROR, request, + "redo for recoverable error %d", rc); rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ, @@ -1713,7 +1713,12 @@ int osc_brw_redo_request(struct ptlrpc_request *request, aa->aa_resends++; new_req->rq_interpret_reply = request->rq_interpret_reply; new_req->rq_async_args = request->rq_async_args; - new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends; + /* cap resend delay to the current request timeout, this is similar to + * what ptlrpc does (see after_reply()) */ + if (aa->aa_resends > new_req->rq_timeout) + new_req->rq_sent = cfs_time_current_sec() + new_req->rq_timeout; + else + new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends; new_req->rq_generation_set = 1; new_req->rq_import_generation = request->rq_import_generation; @@ -1918,7 +1923,7 @@ static int brw_interpret(const struct lu_env *env, aa->aa_oa->o_id, aa->aa_oa->o_seq, rc); } else if (rc == -EINPROGRESS || client_should_resend(aa->aa_resends, aa->aa_cli)) { - rc = osc_brw_redo_request(req, aa); + rc = osc_brw_redo_request(req, aa, rc); } else { CERROR("%s: too many resent retries for object: " ""LPU64":"LPU64", rc = %d.\n", diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 671cee3..cefa639 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -564,7 +564,8 @@ int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult) } if (req->rq_status != -ENOSPC && req->rq_status != -EACCES && - req->rq_status != -EPERM && req->rq_status != -ENOENT) + req->rq_status != -EPERM && req->rq_status != -ENOENT && + req->rq_status != -EINPROGRESS) req->rq_type = PTL_RPC_MSG_ERR; rc = ptlrpc_send_reply(req, may_be_difficult); diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index fa62200..a58c4b3 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -261,10 +261,12 @@ test_8a() { return 1 fi do_facet ost1 "lctl set_param fail_loc=0" - wait $ddpid || return 1 + wait $ddpid || true cancel_lru_locks osc cmp $verify $TDIR/$tfile || return 2 rm -f $verify $TDIR/$tfile + message=`dmesg | grep "redo for recoverable error -115"` + [ -z "$message" ] || error "redo error messages found in dmesg" } run_test 8a "Verify redo io: redo io when get -EINPROGRESS error" -- 1.8.3.1