From 51206e8cd42134400fa0b6259a92d7138f3dc984 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Fri, 13 Jan 2012 00:33:22 -0800 Subject: [PATCH] LU-904 ptlrpc: redo io on -EINPROGRESS When server return -EINPROGRESS for a write RPC, the client should keep resending the RPC until server return other error code or the client is evicted. This is required by the new quota design: when a write on OST can't acquire quota from master for broken network, it should return -EINPROGRESS to inform the client to retry write infinitely. This patch also fixed the defect of redo io RPC can't be aborted during eviction, in a lightweight manner. Signed-off-by: Niu Yawei Change-Id: Iea393cb1ea55e9d006f52dbfc39a2b9a3670d682 Reviewed-on: http://review.whamcloud.com/1962 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Johann Lombardi Reviewed-by: Fan Yong Reviewed-by: Oleg Drokin --- lustre/include/lustre/lustre_idl.h | 3 +- lustre/include/lustre_net.h | 3 +- lustre/include/obd_support.h | 1 + lustre/liblustre/super.c | 2 +- lustre/llite/llite_lib.c | 3 +- lustre/obdfilter/filter_io_26.c | 3 ++ lustre/osc/osc_internal.h | 3 +- lustre/osc/osc_request.c | 61 ++++++++++++++++++++++++++++++-------- lustre/ptlrpc/client.c | 12 ++++---- lustre/tests/replay-ost-single.sh | 60 +++++++++++++++++++++++++++++++++++++ 10 files changed, 129 insertions(+), 22 deletions(-) diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 42426f3..e0e22db 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1172,7 +1172,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \ OBD_CONNECT_GRANT_SHRINK | OBD_CONNECT_FULL20 | \ OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | \ - OBD_CONNECT_MAX_EASIZE) + OBD_CONNECT_MAX_EASIZE | \ + OBD_CONNECT_EINPROGRESS) #define ECHO_CONNECT_SUPPORTED (0) #define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \ OBD_CONNECT_FULL20 | OBD_CONNECT_IMP_RECOV) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 682a977..5c60f12 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -516,7 +516,8 @@ struct ptlrpc_request { rq_reply_truncate:1, rq_committed:1, /* whether the "rq_set" is a valid one */ - rq_invalid_rqset:1; + rq_invalid_rqset:1, + rq_generation_set:1; enum rq_phase rq_phase; /* one of RQ_PHASE_* */ enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */ diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 4c96565..33cc91e 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -288,6 +288,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227 #define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228 #define OBD_FAIL_OST_ENOINO 0x229 +#define OBD_FAIL_OST_DQACQ_NET 0x230 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 9f474bc..312ce52 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -1967,7 +1967,7 @@ llu_fsswop_mount(const char *source, ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_FID | OBD_CONNECT_AT | - OBD_CONNECT_FULL20; + OBD_CONNECT_FULL20 | OBD_CONNECT_EINPROGRESS; ocd.ocd_version = LUSTRE_VERSION_CODE; err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 8bc0c5e..efaa76f 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -385,7 +385,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR| OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH | - OBD_CONNECT_MAXBYTES; + OBD_CONNECT_MAXBYTES | + OBD_CONNECT_EINPROGRESS; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 896fa0d..cafb080 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -693,6 +693,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, if (rc == -ENOTCONN) GOTO(cleanup, rc); + if (OBD_FAIL_CHECK(OBD_FAIL_OST_DQACQ_NET)) + GOTO(cleanup, rc = -EINPROGRESS); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); cleanup_phase = 2; diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 28b5ecb..177ebf1 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -191,7 +191,8 @@ extern struct lu_device_type osc_device_type; static inline int osc_recoverable_error(int rc) { - return (rc == -EIO || rc == -EROFS || rc == -ENOMEM || rc == -EAGAIN); + return (rc == -EIO || rc == -EROFS || rc == -ENOMEM || + rc == -EAGAIN || rc == -EINPROGRESS); } #ifndef min_t diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 6d87a0a..995391d 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -1696,12 +1696,13 @@ static int osc_brw_internal(int cmd, struct obd_export *exp, struct obdo *oa, struct ptlrpc_request *req; int rc; cfs_waitq_t waitq; - int resends = 0; + int generation, resends = 0; struct l_wait_info lwi; ENTRY; cfs_waitq_init(&waitq); + generation = exp->exp_obd->u.cli.cl_import->imp_generation; restart_bulk: rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm, @@ -1709,6 +1710,11 @@ restart_bulk: if (rc != 0) return (rc); + if (resends) { + req->rq_generation_set = 1; + req->rq_import_generation = generation; + } + rc = ptlrpc_queue_wait(req); if (rc == -ETIMEDOUT && req->rq_resend) { @@ -1720,19 +1726,34 @@ restart_bulk: rc = osc_brw_fini_request(req, rc); ptlrpc_req_finished(req); + /* When server return -EINPROGRESS, client should always retry + * regardless of the number of times the bulk was resent already.*/ if (osc_recoverable_error(rc)) { resends++; - if (!client_should_resend(resends, &exp->exp_obd->u.cli)) { - CERROR("too many resend retries, returning error\n"); - RETURN(-EIO); + if (rc != -EINPROGRESS && + !client_should_resend(resends, &exp->exp_obd->u.cli)) { + CERROR("%s: too many resend retries for object: " + ""LPU64":"LPU64", rc = %d.\n", + exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc); + goto out; + } + if (generation != + exp->exp_obd->u.cli.cl_import->imp_generation) { + CDEBUG(D_HA, "%s: resend cross eviction for object: " + ""LPU64":"LPU64", rc = %d.\n", + exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc); + goto out; } - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL); + lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, + NULL); l_wait_event(waitq, 0, &lwi); goto restart_bulk; } - +out: + if (rc == -EAGAIN || rc == -EINPROGRESS) + rc = -EIO; RETURN (rc); } @@ -1746,11 +1767,6 @@ int osc_brw_redo_request(struct ptlrpc_request *request, int rc = 0; ENTRY; - if (!client_should_resend(aa->aa_resends, aa->aa_cli)) { - CERROR("too many resent retries, returning error\n"); - RETURN(-EIO); - } - DEBUG_REQ(D_ERROR, request, "redo for recoverable error"); rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) == @@ -1782,6 +1798,8 @@ int osc_brw_redo_request(struct ptlrpc_request *request, new_req->rq_interpret_reply = request->rq_interpret_reply; new_req->rq_async_args = request->rq_async_args; new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends; + new_req->rq_generation_set = 1; + new_req->rq_import_generation = request->rq_import_generation; new_aa = ptlrpc_req_async_args(new_req); @@ -2232,10 +2250,29 @@ static int brw_interpret(const struct lu_env *env, rc = osc_brw_fini_request(req, rc); CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc); + /* When server return -EINPROGRESS, client should always retry + * regardless of the number of times the bulk was resent already. */ if (osc_recoverable_error(rc)) { - rc = osc_brw_redo_request(req, aa); + if (req->rq_import_generation != + req->rq_import->imp_generation) { + CDEBUG(D_HA, "%s: resend cross eviction for object: " + ""LPU64":"LPU64", rc = %d.\n", + req->rq_import->imp_obd->obd_name, + aa->aa_oa->o_id, aa->aa_oa->o_seq, rc); + } else if (rc == -EINPROGRESS || + client_should_resend(aa->aa_resends, aa->aa_cli)) { + rc = osc_brw_redo_request(req, aa); + } else { + CERROR("%s: too many resent retries for object: " + ""LPU64":"LPU64", rc = %d.\n", + req->rq_import->imp_obd->obd_name, + aa->aa_oa->o_id, aa->aa_oa->o_seq, rc); + } + if (rc == 0) RETURN(0); + else if (rc == -EAGAIN || rc == -EINPROGRESS) + rc = -EIO; } if (aa->aa_ocapa) { diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index 2e1d161..6b50207 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1317,23 +1317,25 @@ static int after_reply(struct ptlrpc_request *req) * Helper function to send request \a req over the network for the first time * Also adjusts request phase. * Returns 0 on success or error code. - */ + */ static int ptlrpc_send_new_req(struct ptlrpc_request *req) { - struct obd_import *imp; + struct obd_import *imp = req->rq_import; int rc; ENTRY; LASSERT(req->rq_phase == RQ_PHASE_NEW); - if (req->rq_sent && (req->rq_sent > cfs_time_current_sec())) + if (req->rq_sent && (req->rq_sent > cfs_time_current_sec()) && + (!req->rq_generation_set || + req->rq_import_generation == imp->imp_generation)) RETURN (0); ptlrpc_rqphase_move(req, RQ_PHASE_RPC); - imp = req->rq_import; cfs_spin_lock(&imp->imp_lock); - req->rq_import_generation = imp->imp_generation; + if (!req->rq_generation_set) + req->rq_import_generation = imp->imp_generation; if (ptlrpc_import_delay_req(imp, req, &rc)) { cfs_spin_lock(&req->rq_lock); diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index 7727f38..036750b 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -245,6 +245,66 @@ test_7() { } run_test 7 "Fail OST before obd_destroy" +test_8a() { + verify=$ROOT/tmp/verify-$$ + dd if=/dev/urandom of=$verify bs=4096 count=1280 || + error "Create verify file failed" +#define OBD_FAIL_OST_DQACQ_NET 0x230 + do_facet ost1 "lctl set_param fail_loc=0x230" + dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync & + ddpid=$! + sleep $TIMEOUT # wait for the io to become redo io + if ! ps -p $ddpid > /dev/null 2>&1; then + error "redo io finished incorrectly" + return 1 + fi + do_facet ost1 "lctl set_param fail_loc=0" + wait $ddpid || return 1 + cancel_lru_locks osc + cmp $verify $TDIR/$tfile || return 2 + rm -f $verify $TDIR/$tfile +} +run_test 8a "Verify redo io: redo io when get -EINPROGRESS error" + +test_8b() { + verify=$ROOT/tmp/verify-$$ + dd if=/dev/urandom of=$verify bs=4096 count=1280 || + error "Create verify file failed" +#define OBD_FAIL_OST_DQACQ_NET 0x230 + do_facet ost1 "lctl set_param fail_loc=0x230" + dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync & + ddpid=$! + sleep $TIMEOUT # wait for the io to become redo io + fail ost1 + do_facet ost1 "lctl set_param fail_loc=0" + wait $ddpid || return 1 + cancel_lru_locks osc + cmp $verify $TDIR/$tfile || return 2 + rm -f $verify $TDIR/$tfile +} +run_test 8b "Verify redo io: redo io should success after recovery" + +test_8c() { + verify=$ROOT/tmp/verify-$$ + dd if=/dev/urandom of=$verify bs=4096 count=1280 || + error "Create verify file failed" +#define OBD_FAIL_OST_DQACQ_NET 0x230 + do_facet ost1 "lctl set_param fail_loc=0x230" + dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync & + ddpid=$! + sleep $TIMEOUT # wait for the io to become redo io + ost_evict_client + # allow recovery to complete + sleep $((TIMEOUT + 2)) + do_facet ost1 "lctl set_param fail_loc=0" + wait $ddpid + cancel_lru_locks osc + cmp $verify $TDIR/$tfile && return 2 + rm -f $verify $TDIR/$tfile +} +run_test 8c "Verify redo io: redo io should fail after eviction" + + complete $(basename $0) $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1