From 52304b7070af4fa902505c080b40a705fbbc3539 Mon Sep 17 00:00:00 2001 From: Niu Yawei Date: Fri, 13 Jan 2012 00:33:22 -0800 Subject: [PATCH] LU-2371 ptlrpc: add support for -EINPROGRESS Backport patches from LU-904, LU-1329 and LU-1788 to introduce support for -EINPROGRESS in lustre 2.1. This is needed for quota interoperability with 2.4 servers. Signed-off-by: Johann Lombardi Change-Id: I17689380c8d42a8daef915342f63d96f777dc6fa Signed-off-by: Niu Yawei Reviewed-on: http://review.whamcloud.com/4645 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lustre/lustre_idl.h | 8 ++- lustre/include/lustre_net.h | 7 ++- lustre/include/obd_support.h | 3 + lustre/liblustre/super.c | 2 +- lustre/llite/llite_lib.c | 6 +- lustre/mdc/mdc_locks.c | 57 +++++++++++++---- lustre/mdc/mdc_reint.c | 31 +++++++++- lustre/mdd/mdd_dir.c | 8 +++ lustre/obdfilter/filter_io_26.c | 3 + lustre/osc/osc_internal.h | 3 +- lustre/osc/osc_request.c | 79 +++++++++++++++++------ lustre/osd-ldiskfs/osd_handler.c | 1 - lustre/ost/ost_handler.c | 3 + lustre/ptlrpc/client.c | 51 +++++++++++++-- lustre/ptlrpc/niobuf.c | 5 +- lustre/tests/replay-ost-single.sh | 124 +++++++++++++++++++++++++++++++++++++ 16 files changed, 345 insertions(+), 46 deletions(-) diff --git a/lustre/include/lustre/lustre_idl.h b/lustre/include/lustre/lustre_idl.h index 00b4a69..0e908ad 100644 --- a/lustre/include/lustre/lustre_idl.h +++ b/lustre/include/lustre/lustre_idl.h @@ -1106,7 +1106,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); #define OBD_CONNECT_JOBSTATS 0x20000000000ULL /* jobid in ptlrpc_body */ #define OBD_CONNECT_UMASK 0x40000000000ULL /* create uses client umask */ #define OBD_CONNECT_EINPROGRESS 0x80000000000ULL /* client handles -EINPROGRESS - * write RPC error properly */ + * RPC error properly */ #define OBD_CONNECT_GRANT_PARAM 0x100000000000ULL/* extra grant params used for * finer space reservation */ #define OBD_CONNECT_NANOSEC_TIME 0x200000000000ULL /* nanosecond timestamps */ @@ -1138,7 +1138,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_FID | LRU_RESIZE_CONNECT_FLAG | \ OBD_CONNECT_VBR | OBD_CONNECT_LOV_V3 | \ OBD_CONNECT_SOM | OBD_CONNECT_FULL20 | \ - OBD_CONNECT_64BITHASH | OBD_CONNECT_UMASK) + OBD_CONNECT_64BITHASH | OBD_CONNECT_UMASK | \ + OBD_CONNECT_EINPROGRESS) #define OST_CONNECT_SUPPORTED (OBD_CONNECT_SRVLOCK | OBD_CONNECT_GRANT | \ OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | \ OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_INDEX | \ @@ -1150,7 +1151,8 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb); OBD_CONNECT_RMT_CLIENT_FORCE | OBD_CONNECT_VBR | \ OBD_CONNECT_MDS | OBD_CONNECT_SKIP_ORPHAN | \ OBD_CONNECT_GRANT_SHRINK | OBD_CONNECT_FULL20 | \ - OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES) + OBD_CONNECT_64BITHASH | OBD_CONNECT_MAXBYTES | \ + OBD_CONNECT_EINPROGRESS) #define ECHO_CONNECT_SUPPORTED (0) #define MGS_CONNECT_SUPPORTED (OBD_CONNECT_VERSION | OBD_CONNECT_AT | \ OBD_CONNECT_FULL20) diff --git a/lustre/include/lustre_net.h b/lustre/include/lustre_net.h index 447714f..3a48694 100644 --- a/lustre/include/lustre_net.h +++ b/lustre/include/lustre_net.h @@ -509,7 +509,12 @@ struct ptlrpc_request { rq_reply_truncate:1, rq_committed:1, /* whether the "rq_set" is a valid one */ - rq_invalid_rqset:1; + rq_invalid_rqset:1, + rq_generation_set:1, + /* do not resend request on -EINPROGRESS */ + rq_no_retry_einprogress:1; + + unsigned int rq_nr_resend; enum rq_phase rq_phase; /* one of RQ_PHASE_* */ enum rq_phase rq_next_phase; /* one of RQ_PHASE_* to be used next */ diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index 4e0a45d..e4457f9 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -236,6 +236,7 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_MDS_WRITEPAGE_PACK 0x184 #define OBD_FAIL_MDS_RECOVERY_ACCEPTS_GAPS 0x185 #define OBD_FAIL_MDS_GET_INFO_NET 0x186 +#define OBD_FAIL_MDS_DQACQ_NET 0x187 #define OBD_FAIL_OST 0x200 #define OBD_FAIL_OST_CONNECT_NET 0x201 @@ -279,6 +280,8 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type, #define OBD_FAIL_OST_BRW_PAUSE_BULK2 0x227 #define OBD_FAIL_OST_MAPBLK_ENOSPC 0x228 #define OBD_FAIL_OST_ENOINO 0x229 +#define OBD_FAIL_OST_DQACQ_NET 0x230 +#define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c index 447481e..65adb39 100644 --- a/lustre/liblustre/super.c +++ b/lustre/liblustre/super.c @@ -1968,7 +1968,7 @@ llu_fsswop_mount(const char *source, ocd.ocd_connect_flags = OBD_CONNECT_SRVLOCK | OBD_CONNECT_REQPORTAL | OBD_CONNECT_VERSION | OBD_CONNECT_TRUNCLOCK | OBD_CONNECT_FID | OBD_CONNECT_AT | - OBD_CONNECT_FULL20; + OBD_CONNECT_FULL20 | OBD_CONNECT_EINPROGRESS; ocd.ocd_version = LUSTRE_VERSION_CODE; err = obd_connect(NULL, &sbi->ll_dt_exp, obd, &sbi->ll_sb_uuid, &ocd, NULL); diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index 14d4693..639e9f5 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -215,7 +215,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_CANCELSET | OBD_CONNECT_FID | OBD_CONNECT_AT | OBD_CONNECT_LOV_V3 | OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_VBR | - OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH; + OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH| + OBD_CONNECT_EINPROGRESS; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; @@ -398,7 +399,8 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt, OBD_CONNECT_AT | OBD_CONNECT_RMT_CLIENT | OBD_CONNECT_OSS_CAPA | OBD_CONNECT_VBR| OBD_CONNECT_FULL20 | OBD_CONNECT_64BITHASH | - OBD_CONNECT_MAXBYTES; + OBD_CONNECT_MAXBYTES | + OBD_CONNECT_EINPROGRESS; if (sbi->ll_flags & LL_SBI_SOM_PREVIEW) data->ocd_connect_flags |= OBD_CONNECT_SOM; diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 976360a..0212b59 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -636,7 +636,7 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, struct obd_device *obddev = class_exp2obd(exp); struct ptlrpc_request *req = NULL; struct req_capsule *pill; - int flags = extra_lock_flags; + int flags, saved_flags = extra_lock_flags; int rc; struct ldlm_res_id res_id; static const ldlm_policy_data_t lookup_policy = @@ -644,6 +644,8 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, static const ldlm_policy_data_t update_policy = { .l_inodebits = { MDS_INODELOCK_UPDATE } }; ldlm_policy_data_t const *policy = &lookup_policy; + int generation, resends = 0; + struct ldlm_reply *lockrep; ENTRY; LASSERTF(!it || einfo->ei_type == LDLM_IBITS, "lock type %d\n", @@ -652,13 +654,15 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, fid_build_reg_res_name(&op_data->op_fid1, &res_id); if (it) - flags |= LDLM_FL_HAS_INTENT; + saved_flags |= LDLM_FL_HAS_INTENT; if (it && it->it_op & (IT_UNLINK | IT_GETATTR | IT_READDIR)) policy = &update_policy; - if (reqp) - req = *reqp; + LASSERT(reqp == NULL); + generation = obddev->u.cli.cl_import->imp_generation; +resend: + flags = saved_flags; if (!it) { /* The only way right now is FLOCK, in this case we hide flock policy as lmm, but lmmsize is 0 */ @@ -688,6 +692,17 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, RETURN(PTR_ERR(req)); pill = &req->rq_pill; + if (req != NULL && it && it->it_op & IT_CREAT) + /* ask ptlrpc not to resend on EINPROGRESS since we have our own + * retry logic */ + req->rq_no_retry_einprogress = 1; + + if (resends) { + req->rq_generation_set = 1; + req->rq_import_generation = generation; + req->rq_sent = cfs_time_current_sec() + resends; + } + /* It is important to obtain rpc_lock first (if applicable), so that * threads that are serialised with rpc_lock are not polluting our * rpcs in flight counter. We do not do flock request limiting, though*/ @@ -704,13 +719,6 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, rc = ldlm_cli_enqueue(exp, &req, einfo, &res_id, policy, &flags, NULL, 0, lockh, 0); - if (reqp) - *reqp = req; - - if (it) { - mdc_exit_request(&obddev->u.cli); - mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it); - } if (!it) { /* For flock requests we immediatelly return without further delay and let caller deal with the rest, since rest of @@ -719,12 +727,39 @@ int mdc_enqueue(struct obd_export *exp, struct ldlm_enqueue_info *einfo, RETURN(rc); } + mdc_exit_request(&obddev->u.cli); + mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it); + if (rc < 0) { CERROR("ldlm_cli_enqueue: %d\n", rc); mdc_clear_replay_flag(req, rc); ptlrpc_req_finished(req); RETURN(rc); } + + lockrep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); + LASSERT(lockrep != NULL); + + /* Retry the create infinitely when we get -EINPROGRESS from + * server. This is required by the new quota design. */ + if (it && it->it_op & IT_CREAT && + (int)lockrep->lock_policy_res2 == -EINPROGRESS) { + mdc_clear_replay_flag(req, rc); + ptlrpc_req_finished(req); + resends++; + + CDEBUG(D_HA, "%s: resend:%d op:%d "DFID"/"DFID"\n", + obddev->obd_name, resends, it->it_op, + PFID(&op_data->op_fid1), PFID(&op_data->op_fid2)); + + if (generation == obddev->u.cli.cl_import->imp_generation) { + goto resend; + } else { + CDEBUG(D_HA, "resend cross eviction\n"); + RETURN(-EIO); + } + } + rc = mdc_finish_enqueue(exp, req, einfo, it, lockh, rc); RETURN(rc); diff --git a/lustre/mdc/mdc_reint.c b/lustre/mdc/mdc_reint.c index 296f030b..da636d3 100644 --- a/lustre/mdc/mdc_reint.c +++ b/lustre/mdc/mdc_reint.c @@ -220,7 +220,9 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data, { struct ptlrpc_request *req; int level, rc; - int count = 0; + int count, resends = 0; + struct obd_import *import = exp->exp_obd->u.cli.cl_import; + int generation = import->imp_generation; CFS_LIST_HEAD(cancels); ENTRY; @@ -237,6 +239,8 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data, } } +rebuild: + count = 0; if ((op_data->op_flags & MF_MDC_CANCEL_FID1) && (fid_is_sane(&op_data->op_fid1))) count = mdc_resource_get_unused(exp, &op_data->op_fid1, @@ -270,6 +274,15 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data, ptlrpc_request_set_replen(req); + /* ask ptlrpc not to resend on EINPROGRESS since we have our own retry + * logic here */ + req->rq_no_retry_einprogress = 1; + + if (resends) { + req->rq_generation_set = 1; + req->rq_import_generation = generation; + req->rq_sent = cfs_time_current_sec() + resends; + } level = LUSTRE_IMP_FULL; resend: rc = mdc_reint(req, exp->exp_obd->u.cli.cl_rpc_lock, level); @@ -278,6 +291,22 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data, if (rc == -ERESTARTSYS) { level = LUSTRE_IMP_RECOVER; goto resend; + } else if (rc == -EINPROGRESS) { + /* Retry create infinitely until succeed or get other + * error code. */ + ptlrpc_req_finished(req); + resends++; + + CDEBUG(D_HA, "%s: resend:%d create on "DFID"/"DFID"\n", + exp->exp_obd->obd_name, resends, + PFID(&op_data->op_fid1), PFID(&op_data->op_fid2)); + + if (generation == import->imp_generation) { + goto rebuild; + } else { + CDEBUG(D_HA, "resend cross eviction\n"); + RETURN(-EIO); + } } else if (rc == 0) { struct mdt_body *body; struct lustre_capa *capa; diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index a2ba0e1..7ea11de 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -1718,6 +1718,9 @@ static int mdd_create(const struct lu_env *env, } #endif + if (OBD_FAIL_CHECK(OBD_FAIL_MDS_DQACQ_NET)) + GOTO(out_pending, rc = -EINPROGRESS); + /* * No RPC inside the transaction, so OST objects should be created at * first. @@ -1895,6 +1898,11 @@ out_pending: quota_opc); } #endif + + /* The child object shouldn't be cached anymore */ + if (rc) + cfs_set_bit(LU_OBJECT_HEARD_BANSHEE, + &child->mo_lu.lo_header->loh_flags); return rc; } diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index e2e49ea..eb9a382 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -694,6 +694,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, if (rc == -ENOTCONN) GOTO(cleanup, rc); + if (OBD_FAIL_CHECK(OBD_FAIL_OST_DQACQ_NET)) + GOTO(cleanup, rc = -EINPROGRESS); + push_ctxt(&saved, &obd->obd_lvfs_ctxt, NULL); cleanup_phase = 2; diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 9b11a9d..09eff34 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -193,7 +193,8 @@ extern struct lu_device_type osc_device_type; static inline int osc_recoverable_error(int rc) { - return (rc == -EIO || rc == -EROFS || rc == -ENOMEM || rc == -EAGAIN); + return (rc == -EIO || rc == -EROFS || rc == -ENOMEM || + rc == -EAGAIN || rc == -EINPROGRESS); } #ifndef min_t diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 545dc4b..cda23c2 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -1325,6 +1325,9 @@ static int osc_brw_prep_request(int cmd, struct client_obd *cli,struct obdo *oa, } req->rq_request_portal = OST_IO_PORTAL; /* bug 7198 */ ptlrpc_at_set_req_timeout(req); + /* ask ptlrpc not to resend on EINPROGRESS since BRWs have their own + * retry logic */ + req->rq_no_retry_einprogress = 1; if (opc == OST_WRITE) desc = ptlrpc_prep_bulk_imp(req, page_count, @@ -1698,12 +1701,13 @@ static int osc_brw_internal(int cmd, struct obd_export *exp, struct obdo *oa, struct ptlrpc_request *req; int rc; cfs_waitq_t waitq; - int resends = 0; + int generation, resends = 0; struct l_wait_info lwi; ENTRY; cfs_waitq_init(&waitq); + generation = exp->exp_obd->u.cli.cl_import->imp_generation; restart_bulk: rc = osc_brw_prep_request(cmd, &exp->exp_obd->u.cli, oa, lsm, @@ -1711,6 +1715,12 @@ restart_bulk: if (rc != 0) return (rc); + if (resends) { + req->rq_generation_set = 1; + req->rq_import_generation = generation; + req->rq_sent = cfs_time_current_sec() + resends; + } + rc = ptlrpc_queue_wait(req); if (rc == -ETIMEDOUT && req->rq_resend) { @@ -1722,38 +1732,48 @@ restart_bulk: rc = osc_brw_fini_request(req, rc); ptlrpc_req_finished(req); + /* When server return -EINPROGRESS, client should always retry + * regardless of the number of times the bulk was resent already.*/ if (osc_recoverable_error(rc)) { resends++; - if (!client_should_resend(resends, &exp->exp_obd->u.cli)) { - CERROR("too many resend retries, returning error\n"); - RETURN(-EIO); + if (rc != -EINPROGRESS && + !client_should_resend(resends, &exp->exp_obd->u.cli)) { + CERROR("%s: too many resend retries for object: " + ""LPU64":"LPU64", rc = %d.\n", + exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc); + goto out; + } + if (generation != + exp->exp_obd->u.cli.cl_import->imp_generation) { + CDEBUG(D_HA, "%s: resend cross eviction for object: " + ""LPU64":"LPU64", rc = %d.\n", + exp->exp_obd->obd_name, oa->o_id, oa->o_seq, rc); + goto out; } - lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, NULL); + lwi = LWI_TIMEOUT_INTR(cfs_time_seconds(resends), NULL, NULL, + NULL); l_wait_event(waitq, 0, &lwi); goto restart_bulk; } - +out: + if (rc == -EAGAIN || rc == -EINPROGRESS) + rc = -EIO; RETURN (rc); } -int osc_brw_redo_request(struct ptlrpc_request *request, - struct osc_brw_async_args *aa) +static int osc_brw_redo_request(struct ptlrpc_request *request, + struct osc_brw_async_args *aa, int rc) { struct ptlrpc_request *new_req; struct ptlrpc_request_set *set = request->rq_set; struct osc_brw_async_args *new_aa; struct osc_async_page *oap; - int rc = 0; ENTRY; - if (!client_should_resend(aa->aa_resends, aa->aa_cli)) { - CERROR("too many resent retries, returning error\n"); - RETURN(-EIO); - } - - DEBUG_REQ(D_ERROR, request, "redo for recoverable error"); + DEBUG_REQ(rc == -EINPROGRESS ? D_RPCTRACE : D_ERROR, request, + "redo for recoverable error %d", rc); rc = osc_brw_prep_request(lustre_msg_get_opc(request->rq_reqmsg) == OST_WRITE ? OBD_BRW_WRITE :OBD_BRW_READ, @@ -1783,7 +1803,14 @@ int osc_brw_redo_request(struct ptlrpc_request *request, aa->aa_resends++; new_req->rq_interpret_reply = request->rq_interpret_reply; new_req->rq_async_args = request->rq_async_args; - new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends; + /* cap resend delay to the current request timeout, this is similar to + * what ptlrpc does (see after_reply()) */ + if (aa->aa_resends > new_req->rq_timeout) + new_req->rq_sent = cfs_time_current_sec() + new_req->rq_timeout; + else + new_req->rq_sent = cfs_time_current_sec() + aa->aa_resends; + new_req->rq_generation_set = 1; + new_req->rq_import_generation = request->rq_import_generation; new_aa = ptlrpc_req_async_args(new_req); @@ -2229,6 +2256,8 @@ static int brw_interpret(const struct lu_env *env, rc = osc_brw_fini_request(req, rc); CDEBUG(D_INODE, "request %p aa %p rc %d\n", req, aa, rc); + /* When server return -EINPROGRESS, client should always retry + * regardless of the number of times the bulk was resent already. */ if (osc_recoverable_error(rc)) { /* Only retry once for mmaped files since the mmaped page * might be modified at anytime. We have to retry at least @@ -2239,10 +2268,24 @@ static int brw_interpret(const struct lu_env *env, aa->aa_oa->o_valid & OBD_MD_FLFLAGS && aa->aa_oa->o_flags & OBD_FL_MMAP) { rc = 0; - } else { - rc = osc_brw_redo_request(req, aa); + } else if (req->rq_import_generation != + req->rq_import->imp_generation) { + CDEBUG(D_HA, "%s: resend cross eviction for object: " + ""LPU64":"LPU64", rc = %d.\n", + req->rq_import->imp_obd->obd_name, + aa->aa_oa->o_id, aa->aa_oa->o_seq, rc); + rc = -EIO; + } else if (rc == -EINPROGRESS || + client_should_resend(aa->aa_resends, aa->aa_cli)) { + rc = osc_brw_redo_request(req, aa, rc); if (rc == 0) RETURN(0); + } else { + CERROR("%s: too many resent retries for object: " + ""LPU64":"LPU64", rc = %d.\n", + req->rq_import->imp_obd->obd_name, + aa->aa_oa->o_id, aa->aa_oa->o_seq, rc); + rc = -EIO; } } diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index df1cf15..4d1725e 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -817,7 +817,6 @@ static void osd_object_release(const struct lu_env *env, { struct osd_object *o = osd_obj(l); - LASSERT(!lu_object_is_dying(l->lo_header)); if (o->oo_inode != NULL && osd_inode_unlinked(o->oo_inode)) cfs_set_bit(LU_OBJECT_HEARD_BANSHEE, &l->lo_header->loh_flags); } diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index f0d391b..c775e2b 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -318,6 +318,9 @@ static int ost_statfs(struct ptlrpc_request *req) if (req->rq_status != 0) CERROR("ost: statfs failed: rc %d\n", req->rq_status); + if (OBD_FAIL_CHECK(OBD_FAIL_OST_STATFS_EINPROGRESS)) + req->rq_status = -EINPROGRESS; + RETURN(0); } diff --git a/lustre/ptlrpc/client.c b/lustre/ptlrpc/client.c index c233cf4..09374c4 100644 --- a/lustre/ptlrpc/client.c +++ b/lustre/ptlrpc/client.c @@ -1230,6 +1230,36 @@ static int after_reply(struct ptlrpc_request *req) RETURN(rc); } + /* retry indefinitely on EINPROGRESS */ + if (lustre_msg_get_status(req->rq_repmsg) == -EINPROGRESS && + ptlrpc_no_resend(req) == 0 && !req->rq_no_retry_einprogress) { + time_t now = cfs_time_current_sec(); + + DEBUG_REQ(D_RPCTRACE, req, "Resending request on EINPROGRESS"); + req->rq_resend = 1; + req->rq_nr_resend++; + + /* allocate new xid to avoid reply reconstruction */ + if (!req->rq_bulk) { + /* new xid is already allocated for bulk in + * ptlrpc_check_set() */ + req->rq_xid = ptlrpc_next_xid(); + DEBUG_REQ(D_RPCTRACE, req, "Allocating new xid for " + "resend on EINPROGRESS"); + } + + /* Readjust the timeout for current conditions */ + ptlrpc_at_set_req_timeout(req); + /* delay resend to give a chance to the server to get ready. + * The delay is increased by 1s on every resend and is capped to + * the current request timeout (i.e. obd_timeout if AT is off, + * or AT service time x 125% + 5s, see at_est2timeout) */ + if (req->rq_nr_resend > req->rq_timeout) + req->rq_sent = now + req->rq_timeout; + else + req->rq_sent = now + req->rq_nr_resend; + } + /* * Security layer unwrap might ask resend this request. */ @@ -1335,23 +1365,25 @@ static int after_reply(struct ptlrpc_request *req) * Helper function to send request \a req over the network for the first time * Also adjusts request phase. * Returns 0 on success or error code. - */ + */ static int ptlrpc_send_new_req(struct ptlrpc_request *req) { - struct obd_import *imp; + struct obd_import *imp = req->rq_import; int rc; ENTRY; LASSERT(req->rq_phase == RQ_PHASE_NEW); - if (req->rq_sent && (req->rq_sent > cfs_time_current_sec())) + if (req->rq_sent && (req->rq_sent > cfs_time_current_sec()) && + (!req->rq_generation_set || + req->rq_import_generation == imp->imp_generation)) RETURN (0); ptlrpc_rqphase_move(req, RQ_PHASE_RPC); - imp = req->rq_import; cfs_spin_lock(&imp->imp_lock); - req->rq_import_generation = imp->imp_generation; + if (!req->rq_generation_set) + req->rq_import_generation = imp->imp_generation; if (ptlrpc_import_delay_req(imp, req, &rc)) { cfs_spin_lock(&req->rq_lock); @@ -1440,7 +1472,12 @@ int ptlrpc_check_set(const struct lu_env *env, struct ptlrpc_request_set *set) /* delayed send - skip */ if (req->rq_phase == RQ_PHASE_NEW && req->rq_sent) - continue; + continue; + + /* delayed resend - skip */ + if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend && + req->rq_sent > cfs_time_current_sec()) + continue; if (!(req->rq_phase == RQ_PHASE_RPC || req->rq_phase == RQ_PHASE_BULK || @@ -1952,6 +1989,8 @@ int ptlrpc_set_next_timeout(struct ptlrpc_request_set *set) if (req->rq_phase == RQ_PHASE_NEW) deadline = req->rq_sent; + else if (req->rq_phase == RQ_PHASE_RPC && req->rq_resend) + deadline = req->rq_sent; else deadline = req->rq_sent + req->rq_timeout; diff --git a/lustre/ptlrpc/niobuf.c b/lustre/ptlrpc/niobuf.c index 240b226..8705446 100644 --- a/lustre/ptlrpc/niobuf.c +++ b/lustre/ptlrpc/niobuf.c @@ -509,7 +509,10 @@ int ptlrpc_send_error(struct ptlrpc_request *req, int may_be_difficult) RETURN(rc); } - req->rq_type = PTL_RPC_MSG_ERR; + if (req->rq_status != -ENOSPC && req->rq_status != -EACCES && + req->rq_status != -EPERM && req->rq_status != -ENOENT && + req->rq_status != -EINPROGRESS) + req->rq_type = PTL_RPC_MSG_ERR; rc = ptlrpc_send_reply(req, may_be_difficult); RETURN(rc); diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh index 7a74166..6627e40 100755 --- a/lustre/tests/replay-ost-single.sh +++ b/lustre/tests/replay-ost-single.sh @@ -231,6 +231,130 @@ test_7() { } run_test 7 "Fail OST before obd_destroy" +test_8a() { + [ -z "$(lctl get_param -n osc.${FSNAME}-*.connect_flags|grep einprogress)" \ + ] && skip_env "OSTs don't support EINPROGRESS" && return + verify=$ROOT/tmp/verify-$$ + dd if=/dev/urandom of=$verify bs=4096 count=1280 || + error "Create verify file failed" +#define OBD_FAIL_OST_DQACQ_NET 0x230 + do_facet ost1 "lctl set_param fail_loc=0x230" + dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync & + ddpid=$! + sleep $TIMEOUT # wait for the io to become redo io + if ! ps -p $ddpid > /dev/null 2>&1; then + error "redo io finished incorrectly" + return 1 + fi + do_facet ost1 "lctl set_param fail_loc=0" + wait $ddpid || true + cancel_lru_locks osc + cmp $verify $TDIR/$tfile || return 2 + rm -f $verify $TDIR/$tfile + message=`dmesg | grep "redo for recoverable error -115"` + [ -z "$message" ] || error "redo error messages found in dmesg" +} +run_test 8a "Verify redo io: redo io when get -EINPROGRESS error" + +test_8b() { + [ -z "$(lctl get_param -n osc.${FSNAME}-*.connect_flags|grep einprogress)" \ + ] && skip_env "OSTs don't support EINPROGRESS" && return + verify=$ROOT/tmp/verify-$$ + dd if=/dev/urandom of=$verify bs=4096 count=1280 || + error "Create verify file failed" +#define OBD_FAIL_OST_DQACQ_NET 0x230 + do_facet ost1 "lctl set_param fail_loc=0x230" + dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync & + ddpid=$! + sleep $TIMEOUT # wait for the io to become redo io + fail ost1 + do_facet ost1 "lctl set_param fail_loc=0" + wait $ddpid || return 1 + cancel_lru_locks osc + cmp $verify $TDIR/$tfile || return 2 + rm -f $verify $TDIR/$tfile +} +run_test 8b "Verify redo io: redo io should success after recovery" + +test_8c() { + [ -z "$(lctl get_param -n osc.${FSNAME}-*.connect_flags|grep einprogress)" \ + ] && skip_env "OSTs don't support EINPROGRESS" && return + [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.3.0) ]] || + { skip "Need MDS version at least 2.3.0"; return; } + verify=$ROOT/tmp/verify-$$ + dd if=/dev/urandom of=$verify bs=4096 count=1280 || + error "Create verify file failed" +#define OBD_FAIL_OST_DQACQ_NET 0x230 + do_facet ost1 "lctl set_param fail_loc=0x230" + dd if=$verify of=$TDIR/$tfile bs=4096 count=1280 oflag=sync & + ddpid=$! + sleep $TIMEOUT # wait for the io to become redo io + ost_evict_client + # allow recovery to complete + sleep $((TIMEOUT + 2)) + do_facet ost1 "lctl set_param fail_loc=0" + wait $ddpid + cancel_lru_locks osc + cmp $verify $TDIR/$tfile && return 2 + rm -f $verify $TDIR/$tfile +} +run_test 8c "Verify redo io: redo io should fail after eviction" + +test_8d() { + [ -z "$(lctl get_param -n mdc.${FSNAME}-*.connect_flags|grep einprogress)" \ + ] && skip_env "MDT doesn't support EINPROGRESS" && return + [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.3.0) ]] || + { skip "Need MDS version at least 2.3.0"; return; } +#define OBD_FAIL_MDS_DQACQ_NET 0x187 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x187" + # test the non-intent create path + mcreate $TDIR/$tfile & + cpid=$! + sleep $TIMEOUT + if ! ps -p $cpid > /dev/null 2>&1; then + error "mknod finished incorrectly" + return 1 + fi + do_facet $SINGLEMDS "lctl set_param fail_loc=0" + wait $cpid || return 2 + stat $TDIR/$tfile || error "mknod failed" + + rm $TDIR/$tfile + +#define OBD_FAIL_MDS_DQACQ_NET 0x187 + do_facet $SINGLEMDS "lctl set_param fail_loc=0x187" + # test the intent create path + openfile -f O_RDWR:O_CREAT $TDIR/$tfile & + cpid=$! + sleep $TIMEOUT + if ! ps -p $cpid > /dev/null 2>&1; then + error "open finished incorrectly" + return 3 + fi + do_facet $SINGLEMDS "lctl set_param fail_loc=0" + wait $cpid || return 4 + stat $TDIR/$tfile || error "open failed" +} +run_test 8d "Verify redo creation on -EINPROGRESS" + +test_8e() { + [ -z "$(lctl get_param -n osc.${FSNAME}-*.connect_flags|grep einprogress)" \ + ] && skip_env "OSTs don't support EINPROGRESS" && return + sleep 1 # ensure we have a fresh statfs +#define OBD_FAIL_OST_STATFS_EINPROGRESS 0x231 + do_facet ost1 "lctl set_param fail_loc=0x231" + df $MOUNT & + dfpid=$! + sleep $TIMEOUT + if ! ps -p $dfpid > /dev/null 2>&1; then + do_facet ost1 "lctl set_param fail_loc=0" + error "df shouldn't have completed!" + return 1 + fi + do_facet ost1 "lctl set_param fail_loc=0" +} +run_test 8e "Verify that ptlrpc resends request on -EINPROGRESS" + complete $(basename $0) $SECONDS check_and_cleanup_lustre exit_status -- 1.8.3.1