From: Andriy Skulysh Date: Sun, 3 Mar 2019 18:10:31 +0000 (+0200) Subject: LU-11952 mdt: fix reconstruct open X-Git-Tag: 2.14.56~22 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=cf6ce3329f92f146206be8b63846d2c6e45c92d6 LU-11952 mdt: fix reconstruct open We shouldn't start a new transaction on resend. Store fid of an opened object and use it during reconstruction of the resend. Change-Id: I8c21e9661903d3d4090ad29e43480e2ba7e35c39 Cray-bug-id: LUS-6957, LUS-7286 Signed-off-by: Andriy Skulysh Reviewed-by: Alexey Lyashkov Reviewed-by: Vitaly Fertman Reviewed-on: https://review.whamcloud.com/35112 Reviewed-by: Vitaly Fertman Tested-by: jenkins Reviewed-by: Alexey Lyashkov Tested-by: Maloo Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index 0711a2f..c86ba79 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -243,6 +243,8 @@ struct tg_reply_data { int trd_index; /** tag the client used */ __u16 trd_tag; + /** child fid to reconstruct open */ + struct lu_fid trd_object; }; extern struct lu_context_key tgt_session_key; @@ -274,6 +276,8 @@ struct tgt_session_info { /* object affected by VBR, for last_rcvd_update */ struct dt_object *tsi_vbr_obj; + /* open child object, for last_rcvd_update */ + struct dt_object *tsi_open_obj; /* opdata for mdt_reint_open(), has the same value as * ldlm_reply:lock_policy_res1. The tgt_update_last_rcvd() stores * this value onto disk for recovery when tgt_txn_stop_cb() is called. @@ -315,6 +319,17 @@ static inline void tgt_vbr_obj_set(const struct lu_env *env, } } +static inline void tgt_open_obj_set(const struct lu_env *env, + struct dt_object *obj) +{ + struct tgt_session_info *tsi; + + if (env->le_ses != NULL) { + tsi = tgt_ses_info(env); + tsi->tsi_open_obj = obj; + } +} + static inline void tgt_opdata_set(const struct lu_env *env, __u64 flags) { struct tgt_session_info *tsi; diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index e05d7ca..7ada2f8 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -423,6 +423,7 @@ struct mdt_reint_record { enum mdt_reint_flag { MRF_OPEN_TRUNC = BIT(0), + MRF_OPEN_RESEND = BIT(1), }; /* diff --git a/lustre/mdt/mdt_open.c b/lustre/mdt/mdt_open.c index d59e92d..cc1afe1 100644 --- a/lustre/mdt/mdt_open.c +++ b/lustre/mdt/mdt_open.c @@ -45,6 +45,9 @@ static const char mfd_open_handle_owner[] = "mdt"; +static int mdt_open_by_fid(struct mdt_thread_info *info, struct ldlm_reply *rep, + struct mdt_lock_handle *lhc); + /* Create a new mdt_file_data struct, initialize it, * and insert it to global hash table */ struct mdt_file_data *mdt_mfd_new(const struct mdt_export_data *med) @@ -509,7 +512,7 @@ err_out: static int mdt_finish_open(struct mdt_thread_info *info, struct mdt_object *p, struct mdt_object *o, - u64 open_flags, int created, + u64 open_flags, struct ldlm_reply *rep) { struct ptlrpc_request *req = mdt_info_req(info); @@ -519,12 +522,14 @@ static int mdt_finish_open(struct mdt_thread_info *info, struct lu_attr *la = &ma->ma_attr; struct mdt_file_data *mfd; struct mdt_body *repbody; + int created; int rc = 0; int isreg, isdir, islnk; struct list_head *t; ENTRY; LASSERT(ma->ma_valid & MA_INODE); + created = mdt_get_disposition(rep, DISP_OPEN_CREATE); repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY); @@ -613,19 +618,20 @@ static int mdt_finish_open(struct mdt_thread_info *info, RETURN(-EAGAIN); mfd = NULL; - if (lustre_msg_get_flags(req->rq_reqmsg) & MSG_RESENT) { + if (info->mti_rr.rr_flags & MRF_OPEN_RESEND) { spin_lock(&med->med_open_lock); list_for_each(t, &med->med_open_head) { mfd = list_entry(t, struct mdt_file_data, mfd_list); - if (mfd->mfd_xid == req->rq_xid) + if (mfd->mfd_xid == req->rq_xid) { + repbody->mbo_open_handle.cookie = + mfd->mfd_open_handle.h_cookie; break; + } mfd = NULL; } spin_unlock(&med->med_open_lock); if (mfd != NULL) { - repbody->mbo_open_handle.cookie = - mfd->mfd_open_handle.h_cookie; /* set repbody->ea_size for resent case */ if (ma->ma_valid & MA_LOV) { LASSERT(ma->ma_lmm_size != 0); @@ -638,6 +644,10 @@ static int mdt_finish_open(struct mdt_thread_info *info, mdt_set_disposition(info, rep, DISP_OPEN_OPEN); RETURN(0); } + /* if we have a real resend (not a resend afrer failover), it + * means close is already happend, so lets return error + */ + RETURN(-ESTALE); } rc = mdt_mfd_open(info, p, o, open_flags, created, rep); @@ -650,100 +660,43 @@ static int mdt_finish_open(struct mdt_thread_info *info, void mdt_reconstruct_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) { - const struct lu_env *env = info->mti_env; - struct mdt_device *mdt = info->mti_mdt; struct req_capsule *pill = info->mti_pill; struct ptlrpc_request *req = mdt_info_req(info); - struct md_attr *ma = &info->mti_attr; struct mdt_reint_record *rr = &info->mti_rr; - u64 open_flags = info->mti_spec.sp_cr_flags; + struct md_attr *ma = &info->mti_attr; struct ldlm_reply *ldlm_rep; - struct mdt_object *parent; - struct mdt_object *child; - struct mdt_body *repbody; u64 opdata; int rc; ENTRY; LASSERT(pill->rc_fmt == &RQF_LDLM_INTENT_OPEN); ldlm_rep = req_capsule_server_get(pill, &RMF_DLM_REP); - repbody = req_capsule_server_get(pill, &RMF_MDT_BODY); ma->ma_need = MA_INODE | MA_HSM; ma->ma_valid = 0; - opdata = mdt_req_from_lrd(req, info->mti_reply_data); mdt_set_disposition(info, ldlm_rep, opdata); CDEBUG(D_INODE, "This is reconstruct open: disp=%#llx, result=%d\n", ldlm_rep->lock_policy_res1, req->rq_status); - - if (mdt_get_disposition(ldlm_rep, DISP_OPEN_CREATE) && - req->rq_status != 0) + if (req->rq_status) /* We did not create successfully, return error to client. */ GOTO(out, rc = req->rq_status); - if (mdt_get_disposition(ldlm_rep, DISP_OPEN_CREATE)) { - struct obd_export *exp = req->rq_export; - /* - * We failed after creation, but we do not know in which step - * we failed. So try to check the child object. - */ - parent = mdt_object_find(env, mdt, rr->rr_fid1); - if (IS_ERR(parent)) { - rc = PTR_ERR(parent); - LCONSOLE_WARN("Parent "DFID" lookup error %d." - " Evicting client %s with export %s.\n", - PFID(rr->rr_fid1), rc, - obd_uuid2str(&exp->exp_client_uuid), - obd_export_nid2str(exp)); - mdt_export_evict(exp); - RETURN_EXIT; - } - - child = mdt_object_find(env, mdt, rr->rr_fid2); - if (IS_ERR(child)) { - rc = PTR_ERR(child); - LCONSOLE_WARN("cannot lookup child "DFID": rc = %d; " - "evicting client %s with export %s\n", - PFID(rr->rr_fid2), rc, - obd_uuid2str(&exp->exp_client_uuid), - obd_export_nid2str(exp)); - mdt_object_put(env, parent); - mdt_export_evict(exp); - RETURN_EXIT; - } - - if (unlikely(mdt_object_remote(child))) { - mdt_object_put(env, parent); - mdt_object_put(env, child); - /* the child object was created on remote server */ - if (!mdt_is_dne_client(exp)) - /* Return -EIO for old client */ - GOTO(out, rc = -EIO); - repbody->mbo_fid1 = *rr->rr_fid2; - repbody->mbo_valid |= (OBD_MD_FLID | OBD_MD_MDS); - GOTO(out, rc = 0); - } - if (mdt_object_exists(child)) { - mdt_prep_ma_buf_from_rep(info, child, ma); - rc = mdt_attr_get_complex(info, child, ma); - if (!rc) - rc = mdt_finish_open(info, parent, child, - open_flags, 1, ldlm_rep); - mdt_object_put(env, parent); - mdt_object_put(env, child); - if (!rc) - mdt_pack_size2body(info, rr->rr_fid2, - &lhc->mlh_reg_lh); - GOTO(out, rc); - } - /* the child does not exist, we should do regular open */ - mdt_object_put(env, parent); - mdt_object_put(env, child); + /* tg_reply_data is just memory only structure, so any non zero fid + * means a real resend not a resend after recovery which need to be + * handled as regular open + */ + if (likely(!fid_is_zero(&info->mti_reply_data->trd_object))) { + rr->rr_fid2 = &info->mti_reply_data->trd_object; + rr->rr_flags |= MRF_OPEN_RESEND; + rc = mdt_open_by_fid(info, ldlm_rep, lhc); + if (rc) + lustre_msg_set_transno(req->rq_repmsg, 0); + } else { + /* We did not try to create, so we are a pure open */ + rc = mdt_reint_open(info, lhc); } - /* We did not try to create, so we are a pure open */ - rc = mdt_reint_open(info, lhc); EXIT; out: req->rq_status = rc; @@ -751,7 +704,8 @@ out: LASSERT(ergo(rc < 0, lustre_msg_get_transno(req->rq_repmsg) == 0)); } -static int mdt_open_by_fid(struct mdt_thread_info *info, struct ldlm_reply *rep) +static int mdt_open_by_fid(struct mdt_thread_info *info, struct ldlm_reply *rep, + struct mdt_lock_handle *lhc) { u64 open_flags = info->mti_spec.sp_cr_flags; struct mdt_reint_record *rr = &info->mti_rr; @@ -777,6 +731,7 @@ static int mdt_open_by_fid(struct mdt_thread_info *info, struct ldlm_reply *rep) rc = 0; } else { if (mdt_object_exists(o)) { + tgt_open_obj_set(info->mti_env, mdt_obj2dt(o)); mdt_set_disposition(info, rep, (DISP_IT_EXECD | DISP_LOOKUP_EXECD | DISP_LOOKUP_POS)); @@ -787,14 +742,18 @@ static int mdt_open_by_fid(struct mdt_thread_info *info, struct ldlm_reply *rep) mdt_prep_ma_buf_from_rep(info, o, ma); rc = mdt_attr_get_complex(info, o, ma); - if (rc == 0) - rc = mdt_finish_open(info, NULL, o, open_flags, - 0, rep); + if (rc) + GOTO(out, rc); + rc = mdt_finish_open(info, NULL, o, open_flags, rep); + if (rc) + GOTO(out, rc); + mdt_pack_size2body(info, rr->rr_fid2, &lhc->mlh_reg_lh); } else { rc = -ENOENT; } } +out: mdt_object_put(info->mti_env, o); RETURN(rc); } @@ -1159,7 +1118,8 @@ static int mdt_open_by_fid_lock(struct mdt_thread_info *info, } } - rc = mdt_finish_open(info, parent, o, open_flags, 0, rep); + tgt_open_obj_set(info->mti_env, mdt_obj2dt(o)); + rc = mdt_finish_open(info, parent, o, open_flags, rep); if (!rc) { mdt_set_disposition(info, rep, DISP_LOOKUP_POS); if (open_flags & MDS_OPEN_LOCK) @@ -1232,7 +1192,7 @@ static int mdt_cross_open(struct mdt_thread_info *info, if (unlikely(rc)) GOTO(out, rc); - rc = mdt_finish_open(info, NULL, o, open_flags, 0, rep); + rc = mdt_finish_open(info, NULL, o, open_flags, rep); } else { /* * Something is wrong here. lookup was positive but @@ -1364,7 +1324,7 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc) ldlm_rep, open_flags); GOTO(out, result); } else if (req_is_replay(req)) { - result = mdt_open_by_fid(info, ldlm_rep); + result = mdt_open_by_fid(info, ldlm_rep, lhc); if (result != -ENOENT) GOTO(out, result); @@ -1480,6 +1440,8 @@ again_pw: if (rc) GOTO(out_child, result = rc); + tgt_open_obj_set(info->mti_env, mdt_obj2dt(child)); + if (result == -ENOENT) { /* Create under OBF and .lustre is not permitted */ if (!fid_is_md_operative(rr->rr_fid1) && @@ -1605,8 +1567,7 @@ again_pw: } } /* Try to open it now. */ - rc = mdt_finish_open(info, parent, child, open_flags, - created, ldlm_rep); + rc = mdt_finish_open(info, parent, child, open_flags, ldlm_rep); if (rc) { result = rc; /* openlock will be released if mdt_finish_open() failed */ diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index 42d553d..7022ceb 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -1309,11 +1309,15 @@ int tgt_mk_reply_data(const struct lu_env *env, struct lsd_reply_data *lrd; __u64 *pre_versions = NULL; int rc; + struct tgt_session_info *tsi = NULL; OBD_ALLOC_PTR(trd); if (unlikely(trd == NULL)) RETURN(-ENOMEM); + if (env != NULL) + tsi = tgt_ses_info(env); + /* fill reply data information */ lrd = &trd->trd_reply; lrd->lrd_transno = transno; @@ -1326,10 +1330,7 @@ int tgt_mk_reply_data(const struct lu_env *env, lrd->lrd_result = th->th_result; } } else { - struct tgt_session_info *tsi; - LASSERT(env != NULL); - tsi = tgt_ses_info(env); LASSERT(tsi->tsi_xid != 0); lrd->lrd_xid = tsi->tsi_xid; @@ -1345,6 +1346,9 @@ int tgt_mk_reply_data(const struct lu_env *env, trd->trd_pre_versions[3] = pre_versions[3]; } + if (tsi && tsi->tsi_open_obj) + trd->trd_object = *lu_object_fid(&tsi->tsi_open_obj->do_lu); + rc = tgt_add_reply_data(env, tgt, ted, trd, req, th, write_update); if (rc < 0) { @@ -2108,6 +2112,7 @@ int tgt_reply_data_init(const struct lu_env *env, struct lu_target *tgt) trd->trd_pre_versions[3] = 0; trd->trd_index = idx; trd->trd_tag = 0; + fid_zero(&trd->trd_object); list_add(&trd->trd_list, &ted->ted_reply_list); ted->ted_reply_cnt++; if (ted->ted_reply_cnt > ted->ted_reply_max) diff --git a/lustre/tests/recovery-small.sh b/lustre/tests/recovery-small.sh index 4506cce..d8a3964 100755 --- a/lustre/tests/recovery-small.sh +++ b/lustre/tests/recovery-small.sh @@ -1280,12 +1280,28 @@ test_52() { run_test 52 "failover OST under load" # test of open reconstruct -test_53() { +test_53a() { touch $DIR/$tfile drop_mdt_ldlm_reply "openfile -f O_RDWR:O_CREAT -m 0755 $DIR/$tfile" ||\ return 2 } -run_test 53 "touch: drop rep" +run_test 53a "touch: drop rep" + +test_53b() { + touch $DIR/$tfile + sync + drop_mdt_ldlm_reply "openfile -f O_RDWR:O_CREAT -m 0755 $DIR/$tfile" || + return 2 +} +run_test 53b "touch: drop rep" + +test_53c() { + rm -rf $DIR/$tfile + sync + drop_mdt_ldlm_reply "openfile -f O_RDWR:O_CREAT -m 0755 $DIR/$tfile" || + return 2 +} +run_test 53c "touch: drop rep" test_54() { zconf_mount $(hostname) $MOUNT2