From 7bf63d0a9e8e171e76a36a04443e585ab0dacbaf Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Wed, 13 Apr 2022 23:15:22 +0800 Subject: [PATCH] LU-14642 flr: allow layout version update from client/MDS Client write/punch request always carries its layout version so that OFD can reject the request if the carried layout version is a stale one. This patch allows MDS as well as client to update new layout version to OST objects. And during resync write, all OST objects will get layout version updated. Lustre-change: https://review.whamcloud.com/45443 Lustre-commit: fa6574150b6f745a668fe69b2d6d970068 Fixes: 7d97777a5d ("LU-14642 flr: abolish MDS transfer layout version to OST") Signed-off-by: Bobi Jam Change-Id: I9f27af354875d48adda3361f6c8ea5a5f6def73b Reviewed-on: https://review.whamcloud.com/47097 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lustre/lod/lod_object.c | 90 +++++++++++++++++++--------------------------- lustre/ofd/ofd_io.c | 6 ++-- lustre/ofd/ofd_objects.c | 14 ++++++-- lustre/tests/sanity-flr.sh | 45 +++++++++++++++-------- 4 files changed, 80 insertions(+), 75 deletions(-) diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index f9dc630..2176617 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -1226,58 +1226,6 @@ unlock: RETURN(rc); } -static bool lod_obj_attr_set_comp_skip_cb(const struct lu_env *env, - struct lod_object *lo, int comp_idx, - struct lod_obj_stripe_cb_data *data) -{ - struct lod_layout_component *lod_comp = &lo->ldo_comp_entries[comp_idx]; - bool skipped = false; - - if (!(data->locd_attr->la_valid & LA_LAYOUT_VERSION)) - return skipped; - - switch (lo->ldo_flr_state) { - case LCM_FL_WRITE_PENDING: { - int i; - - /* skip stale components */ - if (lod_comp->llc_flags & LCME_FL_STALE) { - skipped = true; - break; - } - - /* skip valid and overlapping components, therefore any - * attempts to write overlapped components will never succeed - * because client will get EINPROGRESS. */ - for (i = 0; i < lo->ldo_comp_cnt; i++) { - if (i == comp_idx) - continue; - - if (lo->ldo_comp_entries[i].llc_flags & LCME_FL_STALE) - continue; - - if (lu_extent_is_overlapped(&lod_comp->llc_extent, - &lo->ldo_comp_entries[i].llc_extent)) { - skipped = true; - break; - } - } - break; - } - default: - LASSERTF(0, "impossible: %d\n", lo->ldo_flr_state); - case LCM_FL_SYNC_PENDING: - break; - } - - CDEBUG(D_LAYOUT, DFID": %s to set component %x to version: %u\n", - PFID(lu_object_fid(&lo->ldo_obj.do_lu)), - skipped ? "skipped" : "chose", lod_comp->llc_id, - data->locd_attr->la_layout_version); - - return skipped; -} - static inline int lod_obj_stripe_attr_set_cb(const struct lu_env *env, struct lod_object *lo, struct dt_object *dt, struct thandle *th, @@ -1473,7 +1421,6 @@ static int lod_attr_set(const struct lu_env *env, data.locd_attr = attr; data.locd_declare = false; - data.locd_comp_skip_cb = lod_obj_attr_set_comp_skip_cb; data.locd_stripe_cb = lod_obj_stripe_attr_set_cb; rc = lod_obj_for_each_stripe(env, lo, th, &data); } @@ -7703,6 +7650,7 @@ static int lod_declare_update_rdonly(const struct lu_env *env, struct thandle *th) { struct lod_thread_info *info = lod_env_info(env); + struct lu_attr *layout_attr = &info->lti_layout_attr; struct lod_layout_component *lod_comp; struct lu_extent extent = { 0 }; int rc; @@ -7818,6 +7766,14 @@ static int lod_declare_update_rdonly(const struct lu_env *env, if (rc) GOTO(out, rc); + layout_attr->la_valid = LA_LAYOUT_VERSION; + layout_attr->la_layout_version = 0; + if (mlc->mlc_opc == MD_LAYOUT_RESYNC) + layout_attr->la_layout_version = LU_LAYOUT_RESYNC; + rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th); + if (rc) + GOTO(out, rc); + out: if (rc) lod_striping_free(env, lo); @@ -7829,6 +7785,7 @@ static int lod_declare_update_write_pending(const struct lu_env *env, struct thandle *th) { struct lod_thread_info *info = lod_env_info(env); + struct lu_attr *layout_attr = &info->lti_layout_attr; struct lod_layout_component *lod_comp; struct lu_extent extent = { 0 }; int primary = -1; @@ -7963,6 +7920,19 @@ static int lod_declare_update_write_pending(const struct lu_env *env, GOTO(out, rc); lod_obj_inc_layout_gen(lo); + + /* 3. transfer layout version to OST objects. + * transfer new layout version to OST objects so that stale writes + * can be denied. It also ends an era of writing by setting + * LU_LAYOUT_RESYNC. Normal client can never use this bit to + * send write RPC; only resync RPCs could do it. */ + layout_attr->la_valid = LA_LAYOUT_VERSION; + layout_attr->la_layout_version = 0; + if (mlc->mlc_opc == MD_LAYOUT_RESYNC) + layout_attr->la_layout_version = LU_LAYOUT_RESYNC; + rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th); + if (rc) + GOTO(out, rc); out: if (rc) lod_striping_free(env, lo); @@ -7974,6 +7944,7 @@ static int lod_declare_update_sync_pending(const struct lu_env *env, struct thandle *th) { struct lod_thread_info *info = lod_env_info(env); + struct lu_attr *layout_attr = &info->lti_layout_attr; unsigned sync_components = 0; unsigned resync_components = 0; int i; @@ -8046,6 +8017,12 @@ static int lod_declare_update_sync_pending(const struct lu_env *env, lo->ldo_flr_state = LCM_FL_RDONLY; lod_obj_inc_layout_gen(lo); + layout_attr->la_valid = LA_LAYOUT_VERSION; + layout_attr->la_layout_version = 0; + rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th); + if (rc) + GOTO(out, rc); + info->lti_buf.lb_len = lod_comp_md_size(lo, false); rc = lod_sub_declare_xattr_set(env, lod_object_child(lo), &info->lti_buf, XATTR_NAME_LOV, 0, th); @@ -8732,8 +8709,9 @@ static int lod_layout_change(const struct lu_env *env, struct dt_object *dt, struct md_layout_change *mlc, struct thandle *th) { struct lu_attr *attr = &lod_env_info(env)->lti_attr; + struct lu_attr *layout_attr = &lod_env_info(env)->lti_layout_attr; + struct lod_object *lo = lod_dt_obj(dt); int rc; - ENTRY; if (S_ISDIR(dt->do_lu.lo_header->loh_attr)) { @@ -8743,6 +8721,10 @@ static int lod_layout_change(const struct lu_env *env, struct dt_object *dt, } rc = lod_striped_create(env, dt, attr, NULL, th); + if (!rc && layout_attr->la_valid & LA_LAYOUT_VERSION) { + layout_attr->la_layout_version |= lo->ldo_layout_gen; + rc = lod_attr_set(env, dt, layout_attr, th); + } RETURN(rc); } diff --git a/lustre/ofd/ofd_io.c b/lustre/ofd/ofd_io.c index 586082f..a61ae8b 100644 --- a/lustre/ofd/ofd_io.c +++ b/lustre/ofd/ofd_io.c @@ -452,8 +452,7 @@ int ofd_verify_layout_version(const struct lu_env *env, * that on the disk. */ if (ofd_layout_version_less(oa->o_layout_version, - fo->ofo_ff.ff_layout_version + - fo->ofo_ff.ff_range)) + fo->ofo_ff.ff_layout_version)) RETURN(-ESTALE); EXIT; @@ -463,8 +462,7 @@ out: PFID(lu_object_fid(&fo->ofo_obj.do_lu)), oa->o_layout_version, fo->ofo_ff.ff_layout_version, fo->ofo_ff.ff_range, rc); - return rc; - + RETURN(rc); } /* diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index a336eaa..f78566c 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -593,6 +593,17 @@ int ofd_object_ff_update(const struct lu_env *env, struct ofd_object *fo, PFID(lu_object_fid(&fo->ofo_obj.do_lu)), ff->ff_layout_version, oa->o_layout_version); + /** + * resync write from client on non-primary objects and + * resync start from MDS on primary objects will contain + * LU_LAYOUT_RESYNC flag in the @oa. + * + * The layout version checking for write/punch from client + * happens in ofd_verify_layout_version() before coming to + * here, so that resync with smaller layout version client + * will be rejected there, the biggest resync version will + * be recorded in the OFD objects. + */ if (ff->ff_layout_version & LU_LAYOUT_RESYNC) { /* this opens a new era of writing */ ff->ff_layout_version = 0; @@ -612,8 +623,7 @@ int ofd_object_ff_update(const struct lu_env *env, struct ofd_object *fo, ff->ff_range = 0; } else if (oa->o_layout_version > ff->ff_layout_version) { ff->ff_range = max_t(__u32, ff->ff_range, - oa->o_layout_version - - ff->ff_layout_version); + oa->o_layout_version - ff->ff_layout_version); } } diff --git a/lustre/tests/sanity-flr.sh b/lustre/tests/sanity-flr.sh index 8204431..9cdb149 100644 --- a/lustre/tests/sanity-flr.sh +++ b/lustre/tests/sanity-flr.sh @@ -1313,17 +1313,11 @@ test_35() { run_test 35 "allow to write to mirrored files" get_file_layout_version() { - local tf=$1 - local flv=$($LFS getstripe $tf | awk '/lcm_layout_gen/{print $2}') - - echo -n $flv + $LFS getstripe $1 | awk '/lcm_layout_gen/{print $2}' } get_ost_layout_version() { - local tf=$1 - local olv=$($MULTIOP $tf oXc | awk '/ostlayoutversion/{print $2}') - - echo -n $flv + $MULTIOP $1 oXc | awk '/ostlayoutversion/{print $2}' } verify_ost_layout_version() { @@ -1393,7 +1387,12 @@ run_test 36a "write to mirrored files" test_36b() { local tf=$DIR/$tfile - (( OSTCOUNT < 2 )) && skip "need >= 2 OSTs" && return + (( OST1_VERSION >= $(version_code 2.15.51) || + OST1_VERSION < $(version_code 2.15) && + OST1_VERSION >= $(version_code 2.14.0.43) )) || + skip "Need OST version at least 2.14.0.43(es60) or 2.51.51(master)" + + (( OSTCOUNT >= 2 )) || skip "need >= 2 OSTs" # create 2 mirrors using different OSTs $LFS setstripe -N -c1 -i0 --flags=prefer -N -c1 -i1 $tf || @@ -1417,7 +1416,12 @@ run_test 36b "write should not hung when prefered mirror is stale" test_36c() { local tf=$DIR/$tfile - (( OSTCOUNT < 2 )) && skip "need >= 2 OSTs" && return + (( OST1_VERSION >= $(version_code 2.15.51) || + OST1_VERSION < $(version_code 2.15) && + OST1_VERSION >= $(version_code 2.14.0.43) )) || + skip "Need OST version at least 2.14.0.43(es60) or 2.51.51(master)" + + (( OSTCOUNT >= 2 )) || skip "need >= 2 OSTs" # create 2 mirrors using different OSTs $LFS setstripe -N -c1 -i0 --flags=prefer -N -c1 -i1 $tf || @@ -1441,11 +1445,16 @@ run_test 36c "change prefer mirror during write shouldn't hung" test_36d() { local tf=$DIR/$tfile + (( OST1_VERSION >= $(version_code 2.15.51) || + OST1_VERSION < $(version_code 2.15) && + OST1_VERSION >= $(version_code 2.14.0.43) )) || + skip "Need OST version at least 2.14.0.43(es60) or 2.51.51(master)" + echo " ** create $tf" $LFS mirror create -N $tf || error "create $tf failed" for i in 1 2; do - echo " ** mirror extend $tf" + echo " ** mirror extend $tf ($i/2)" $LFS mirror extend -N $tf || error "mirror extend $tf failed" flv=$(get_file_layout_version $tf) olv=$(get_ost_layout_version $tf) @@ -1453,7 +1462,7 @@ test_36d() { done for i in 1 2; do - echo " ** write $tf" + echo " ** write $tf ($i/2)" dd if=/dev/zero of=$tf bs=1k count=1 || error "write $tf failed" flv=$(get_file_layout_version $tf) olv=$(get_ost_layout_version $tf) @@ -1469,8 +1478,8 @@ test_36d() { echo " flv=$flv olv=$olv" for i in 1 2; do - echo " ** truncate $tf" - $TRUNCATE $tf $((1024 * 1024)) || error "truncate $tf fails" + echo " ** truncate $tf ($i/2)" + $TRUNCATE $tf $((1024 * 1024)) || error "truncate $tf failed" flv=$(get_file_layout_version $tf) olv=$(get_ost_layout_version $tf) echo " flv=$flv olv=$olv" @@ -1478,8 +1487,14 @@ test_36d() { error "truncate update OST layout failed $flv/$olv" done + echo " ** resync $tf" + $LFS mirror resync $tf || error "mirror resync $tf failed" + flv=$(get_file_layout_version $tf) + olv=$(get_ost_layout_version $tf) + echo " flv=$flv olv=$olv" + for i in 1 2; do - echo " ** write $tf" + echo " ** write $tf ($i/2)" dd if=/dev/zero of=$tf bs=1k count=1 || error "write $tf failed" flv=$(get_file_layout_version $tf) olv=$(get_ost_layout_version $tf) -- 1.8.3.1