From 72d45e1d344c5559d7620102a86a83bbf095796b Mon Sep 17 00:00:00 2001 From: Alexander Zarochentsev Date: Thu, 18 Jun 2020 09:18:05 +0300 Subject: [PATCH] LU-13809 mdc: fix lovea for replay lmm->lmm_stripe_offset gets overwritten by layout generation at server reply, so MDT does not recognize such LOVEA as a valid striping at open request replay. This patch extendes LU-7008 fix by supporting of PFL layout. HPE-bug-id: LUS-8820 Signed-off-by: Alexander Zarochentsev Change-Id: If28836c2fcb08620dd3dc869ddfe35147c69e711 Reviewed-on: https://review.whamcloud.com/39468 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Alexander Boyko Reviewed-by: Vladimir Saveliev Reviewed-by: Mike Pershin Reviewed-by: Oleg Drokin --- lustre/include/obd.h | 2 ++ lustre/lov/lov_ea.c | 31 +++++++++++++++++++++++++++++++ lustre/mdc/mdc_internal.h | 3 --- lustre/mdc/mdc_locks.c | 31 ++++++++++++++----------------- lustre/mdc/mdc_request.c | 6 ++++++ lustre/mdt/mdt_handler.c | 4 ++++ lustre/tests/replay-single.sh | 11 +++++++---- lustre/tests/test-framework.sh | 2 +- 8 files changed, 65 insertions(+), 25 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 6ddd0b1..5940d33 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -75,6 +75,8 @@ struct lov_oinfo { /* per-stripe data structure */ struct osc_async_rc loi_ar; }; +void lov_fix_ea_for_replay(void *lovea); + static inline void loi_kms_set(struct lov_oinfo *oinfo, __u64 kms) { oinfo->loi_kms = kms; diff --git a/lustre/lov/lov_ea.c b/lustre/lov/lov_ea.c index 17a9b7e..1498c78 100644 --- a/lustre/lov/lov_ea.c +++ b/lustre/lov/lov_ea.c @@ -631,3 +631,34 @@ int lov_lsm_entry(const struct lov_stripe_md *lsm, __u64 offset) return -1; } + +/** + * lmm_layout_gen overlaps stripe_offset field, it needs to be reset back when + * sending to MDT for passing striping checks + */ +void lov_fix_ea_for_replay(void *lovea) +{ + struct lov_user_md *lmm = lovea; + struct lov_comp_md_v1 *c1; + int i; + + switch (le32_to_cpu(lmm->lmm_magic)) { + case LOV_USER_MAGIC_V1: + case LOV_USER_MAGIC_V3: + lmm->lmm_stripe_offset = LOV_OFFSET_DEFAULT; + break; + + case LOV_USER_MAGIC_COMP_V1: + c1 = (void *)lmm; + for (i = 0; i < le16_to_cpu(c1->lcm_entry_count); i++) { + struct lov_comp_md_entry_v1 *ent = &c1->lcm_entries[i]; + + if (le32_to_cpu(ent->lcme_flags) & LCME_FL_INIT) { + lmm = (void *)((char *)c1 + + le32_to_cpu(ent->lcme_offset)); + lmm->lmm_stripe_offset = LOV_OFFSET_DEFAULT; + } + } + } +} +EXPORT_SYMBOL(lov_fix_ea_for_replay); diff --git a/lustre/mdc/mdc_internal.h b/lustre/mdc/mdc_internal.h index 43b2065..6a57a4e 100644 --- a/lustre/mdc/mdc_internal.h +++ b/lustre/mdc/mdc_internal.h @@ -95,9 +95,6 @@ int mdc_resource_get_unused_res(struct obd_export *exp, int mdc_resource_get_unused(struct obd_export *exp, const struct lu_fid *fid, struct list_head *cancels, enum ldlm_mode mode, __u64 bits); -int mdc_save_lovea(struct ptlrpc_request *req, - const struct req_msg_field *field, - void *data, u32 size); /* mdc/mdc_request.c */ int mdc_fid_alloc(const struct lu_env *env, struct obd_export *exp, struct lu_fid *fid, struct md_op_data *op_data); diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c index 6699f1c..a1887ea 100644 --- a/lustre/mdc/mdc_locks.c +++ b/lustre/mdc/mdc_locks.c @@ -204,7 +204,8 @@ static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc) } } -/* Save a large LOV EA into the request buffer so that it is available +/** + * Save a large LOV EA into the request buffer so that it is available * for replay. We don't do this in the initial request because the * original request doesn't need this buffer (at most it sends just the * lov_mds_md) and it is a waste of RAM/bandwidth to send the empty @@ -216,15 +217,14 @@ static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc) * but this is incredibly unlikely, and questionable whether the client * could do MDS recovery under OOM anyways... */ -int mdc_save_lovea(struct ptlrpc_request *req, - const struct req_msg_field *field, void *data, u32 size) +static int mdc_save_lovea(struct ptlrpc_request *req, void *data, u32 size) { struct req_capsule *pill = &req->rq_pill; - struct lov_user_md *lmm; + void *lovea; int rc = 0; - if (req_capsule_get_size(pill, field, RCL_CLIENT) < size) { - rc = sptlrpc_cli_enlarge_reqbuf(req, field, size); + if (req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT) < size) { + rc = sptlrpc_cli_enlarge_reqbuf(req, &RMF_EADATA, size); if (rc) { CERROR("%s: Can't enlarge ea size to %d: rc = %d\n", req->rq_export->exp_obd->obd_name, @@ -232,16 +232,14 @@ int mdc_save_lovea(struct ptlrpc_request *req, return rc; } } else { - req_capsule_shrink(pill, field, size, RCL_CLIENT); + req_capsule_shrink(pill, &RMF_EADATA, size, RCL_CLIENT); } - req_capsule_set_size(pill, field, RCL_CLIENT, size); - lmm = req_capsule_client_get(pill, field); - if (lmm) { - memcpy(lmm, data, size); - /* overwrite layout generation returned from the MDS */ - lmm->lmm_stripe_offset = - (typeof(lmm->lmm_stripe_offset))LOV_OFFSET_DEFAULT; + req_capsule_set_size(pill, &RMF_EADATA, RCL_CLIENT, size); + lovea = req_capsule_client_get(pill, &RMF_EADATA); + if (lovea) { + memcpy(lovea, data, size); + lov_fix_ea_for_replay(lovea); } return rc; @@ -797,7 +795,7 @@ static int mdc_finish_enqueue(struct obd_export *exp, * (for example error one). */ if ((it->it_op & IT_OPEN) && req->rq_replay) { - rc = mdc_save_lovea(req, &RMF_EADATA, eadata, + rc = mdc_save_lovea(req, eadata, body->mbo_eadatasize); if (rc) { body->mbo_valid &= ~OBD_MD_FLEASIZE; @@ -825,8 +823,7 @@ static int mdc_finish_enqueue(struct obd_export *exp, * another set of OST objects). */ if (req->rq_transno) - (void)mdc_save_lovea(req, &RMF_EADATA, lvb_data, - lvb_len); + (void)mdc_save_lovea(req, lvb_data, lvb_len); } } diff --git a/lustre/mdc/mdc_request.c b/lustre/mdc/mdc_request.c index 6387d98..52274be 100644 --- a/lustre/mdc/mdc_request.c +++ b/lustre/mdc/mdc_request.c @@ -637,6 +637,7 @@ void mdc_replay_open(struct ptlrpc_request *req) struct obd_client_handle *och; struct lustre_handle old_open_handle = { }; struct mdt_body *body; + struct ldlm_reply *rep; ENTRY; if (mod == NULL) { @@ -649,6 +650,11 @@ void mdc_replay_open(struct ptlrpc_request *req) body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY); LASSERT(body != NULL); + rep = req_capsule_server_get(&req->rq_pill, &RMF_DLM_REP); + if (rep != NULL && rep->lock_policy_res2 != 0) + DEBUG_REQ(D_ERROR, req, "Open request replay failed with %ld ", + (long int)rep->lock_policy_res2); + spin_lock(&req->rq_lock); och = mod->mod_och; if (och && och->och_open_handle.cookie) diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index a093a49..67ebfc4 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -4441,6 +4441,7 @@ static int mdt_intent_open(enum ldlm_intent_flags it_opc, struct ldlm_reply *rep = NULL; long opc; int rc; + struct ptlrpc_request *req = mdt_info_req(info); static const struct req_format *intent_fmts[REINT_MAX] = { [REINT_CREATE] = &RQF_LDLM_INTENT_CREATE, @@ -4458,6 +4459,9 @@ static int mdt_intent_open(enum ldlm_intent_flags it_opc, rc = mdt_reint_internal(info, lhc, opc); + if (rc < 0 && lustre_msg_get_flags(req->rq_reqmsg) & MSG_REPLAY) + DEBUG_REQ(D_ERROR, req, "Replay open failed with %d", rc); + /* Check whether the reply has been packed successfully. */ if (mdt_info_req(info)->rq_repmsg != NULL) rep = req_capsule_server_get(info->mti_pill, &RMF_DLM_REP); diff --git a/lustre/tests/replay-single.sh b/lustre/tests/replay-single.sh index d588042..808873e 100755 --- a/lustre/tests/replay-single.sh +++ b/lustre/tests/replay-single.sh @@ -4894,16 +4894,19 @@ test_134() { pool_add pool_134 pool_add_targets pool_134 1 1 - mkdir $DIR/$tdir - $LFS setstripe -p pool_134 $DIR/$tdir + mkdir -p $DIR/$tdir/{A,B} + $LFS setstripe -p pool_134 $DIR/$tdir/A + $LFS setstripe -E EOF -p pool_134 $DIR/$tdir/B replay_barrier mds1 - touch $DIR/$tdir/$tfile + touch $DIR/$tdir/A/$tfile || error "touch non-pfl file failed" + touch $DIR/$tdir/B/$tfile || error "touch pfl failed" fail mds1 - [ -f $DIR/$tdir/$tfile ] || error "file does not exist" + [ -f $DIR/$tdir/A/$tfile ] || error "non-pfl file does not exist" + [ -f $DIR/$tdir/B/$tfile ] || error "pfl file does not exist" } run_test 134 "replay creation of a file created in a pool" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 60052be..9f7c655 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -738,8 +738,8 @@ load_modules_local() { load_module fid/fid load_module lmv/lmv load_module osc/osc - load_module mdc/mdc load_module lov/lov + load_module mdc/mdc load_module mgc/mgc load_module obdecho/obdecho if ! client_only; then -- 1.8.3.1