From: Mikhail Pershin Date: Mon, 25 Apr 2022 06:13:53 +0000 (+0300) Subject: LU-12031 mdt: explicit data version of DoM files X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=3bccd95ca25dc8a52b4961a14648c2fd93d2bc51;p=fs%2Flustre-release.git LU-12031 mdt: explicit data version of DoM files Use EA to store 'data_version' for DoM files explicitly. Unlike OST objects the 'inode_version' of DoM file is changed by metadata operations as well and that leads to problems during HSM operations, e.g. writing HSM EA with file data version inside causes DoM object version update making this HSM EA version obsoleted, also any metadata update on restored file makes it dirty and prevents second release. DoM files have now explicitly updated 'data_version' in addition to ordinary 'inode_version'. The 'data_version' is updated along with 'inode_version' upon write/truncate and fallocate operations and is stored as 'trusted.dataver' EA. Layout swap procedure is updated to move data version between files being swept along with HSM attributes. If DoM file is migrated to RAID0 file then 'dataver' EA is deleted. Corresponding test 1f is added to sanity-hsm.sh and 207j to sanity.sh. Lustre-change: https://review.whamcloud.com/47139 Lustre-commit: aae3289adb2bbc192870f195b78044484f717e16 Test-Parameters: clientversion=2.12.4 testlist=sanity-hsm Signed-off-by: Mikhail Pershin Change-Id: I4689c56394c7323d32cd6f7dd86f58beb6e53353 Reviewed-by: Andreas Dilger Reviewed-by: Sergey Cheremencev Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53214 Tested-by: jenkins Tested-by: Maloo --- diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index dfb935a..8bb4bbb 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -2253,11 +2253,22 @@ static inline int dt_fid_alloc(const struct lu_env *env, } int dt_declare_version_set(const struct lu_env *env, struct dt_object *o, - struct thandle *th); + struct thandle *th); void dt_version_set(const struct lu_env *env, struct dt_object *o, - dt_obj_version_t version, struct thandle *th); + dt_obj_version_t version, struct thandle *th); +int dt_declare_data_version_set(const struct lu_env *env, struct dt_object *o, + struct thandle *th); +void dt_data_version_set(const struct lu_env *env, struct dt_object *o, + dt_obj_version_t version, struct thandle *th); +int dt_declare_data_version_del(const struct lu_env *env, struct dt_object *o, + struct thandle *th); +void dt_data_version_del(const struct lu_env *env, struct dt_object *o, + struct thandle *th); dt_obj_version_t dt_version_get(const struct lu_env *env, struct dt_object *o); - +dt_obj_version_t dt_data_version_get(const struct lu_env *env, + struct dt_object *o); +dt_obj_version_t dt_data_version_init(const struct lu_env *env, + struct dt_object *o); int dt_read(const struct lu_env *env, struct dt_object *dt, struct lu_buf *buf, loff_t *pos); diff --git a/lustre/include/lu_target.h b/lustre/include/lu_target.h index 273f115..104ce1c 100644 --- a/lustre/include/lu_target.h +++ b/lustre/include/lu_target.h @@ -279,6 +279,7 @@ struct tgt_session_info { * this value onto disk for recovery when tgt_txn_stop_cb() is called. */ __u64 tsi_opdata; + bool tsi_dv_update; /* * Additional fail id that can be set by handler. @@ -308,17 +309,24 @@ static inline struct tgt_session_info *tgt_ses_info(const struct lu_env *env) return tsi; } -static inline void tgt_vbr_obj_set(const struct lu_env *env, - struct dt_object *obj) +static inline void tgt_vbr_obj_data_set(const struct lu_env *env, + struct dt_object *obj, bool dv_update) { struct tgt_session_info *tsi; if (env->le_ses != NULL) { tsi = tgt_ses_info(env); tsi->tsi_vbr_obj = obj; + tsi->tsi_dv_update = dv_update; } } +static inline void tgt_vbr_obj_set(const struct lu_env *env, + struct dt_object *obj) +{ + return tgt_vbr_obj_data_set(env, obj, false); +} + static inline void tgt_opdata_set(const struct lu_env *env, __u64 flags) { struct tgt_session_info *tsi; diff --git a/lustre/include/uapi/linux/lustre/lustre_idl.h b/lustre/include/uapi/linux/lustre/lustre_idl.h index 8a9a760..3cef00e 100644 --- a/lustre/include/uapi/linux/lustre/lustre_idl.h +++ b/lustre/include/uapi/linux/lustre/lustre_idl.h @@ -1243,6 +1243,7 @@ struct lov_mds_md_v1 { /* LOV EA mds/wire data (little-endian) */ #define XATTR_NAME_DUMMY "trusted.dummy" #define XATTR_NAME_PROJID "trusted.projid" #define XATTR_NAME_PIN "trusted.pin" +#define XATTR_NAME_DATAVER "trusted.dataver" #define LL_XATTR_NAME_ENCRYPTION_CONTEXT_OLD XATTR_SECURITY_PREFIX"c" #define LL_XATTR_NAME_ENCRYPTION_CONTEXT XATTR_ENCRYPTION_PREFIX"c" diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 4970d22..78074b7 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -3253,7 +3253,6 @@ restart: ioc->idv_version = io->u.ci_data_version.dv_data_version; ioc->idv_layout_version = io->u.ci_data_version.dv_layout_version; - cl_io_fini(env, io); if (unlikely(io->ci_need_restart)) diff --git a/lustre/mdd/mdd_internal.h b/lustre/mdd/mdd_internal.h index 474a89e..416affe 100644 --- a/lustre/mdd/mdd_internal.h +++ b/lustre/mdd/mdd_internal.h @@ -388,6 +388,8 @@ int mdd_changelog_data_store_xattr(const struct lu_env *env, struct mdd_object *mdd_obj, const char *xattr_name, struct thandle *handle); +int mdd_dom_fixup(const struct lu_env *env, struct mdd_device *mdd, + struct mdd_object *mo, struct mdd_object *vo); /* mdd_trans.c */ void mdd_object_make_hint(const struct lu_env *env, struct mdd_object *parent, diff --git a/lustre/mdd/mdd_object.c b/lustre/mdd/mdd_object.c index e3c0b29..fa63c67 100644 --- a/lustre/mdd/mdd_object.c +++ b/lustre/mdd/mdd_object.c @@ -1858,9 +1858,6 @@ static int mdd_split_ea(struct lov_comp_md_v1 *comp_v1, __u16 mirror_id, return 0; } -static int mdd_dom_data_truncate(const struct lu_env *env, - struct mdd_device *mdd, struct mdd_object *mo); - static int mdd_xattr_split(const struct lu_env *env, struct md_object *md_obj, struct md_rejig_data *mrd) { @@ -1973,7 +1970,7 @@ stop: /* Truncate local DOM data if all went well */ if (!rc && dom_stripe) - mdd_dom_data_truncate(env, mdd, obj); + mdd_dom_fixup(env, mdd, obj, NULL); lu_buf_free(buf_save); lu_buf_free(buf); @@ -2404,36 +2401,61 @@ static inline int mdd_set_lmm_gen(struct lov_mds_md *lmm, __u32 *gen) return mdd_lmm_gen(lmm, gen, false); } -static int mdd_dom_data_truncate(const struct lu_env *env, - struct mdd_device *mdd, struct mdd_object *mo) +int mdd_dom_fixup(const struct lu_env *env, struct mdd_device *mdd, + struct mdd_object *mo, struct mdd_object *vo) { + struct dt_object *dom, *vlt; + dt_obj_version_t dv = 0; struct thandle *th; - struct dt_object *dom; int rc; + ENTRY; + + if (vo) { + vlt = dt_object_locate(mdd_object_child(vo), mdd->mdd_bottom); + if (!vlt) + GOTO(out, rc = -ENOENT); + dv = dt_data_version_get(env, vlt); + if (!dv) + GOTO(out, rc = -ENODATA); + } + dom = dt_object_locate(mdd_object_child(mo), mdd->mdd_bottom); - if (!dom) - GOTO(out, rc = -ENODATA); th = dt_trans_create(env, mdd->mdd_bottom); if (IS_ERR(th)) GOTO(out, rc = PTR_ERR(th)); - rc = dt_declare_punch(env, dom, 0, OBD_OBJECT_EOF, th); - if (rc) - GOTO(stop, rc); + if (vo) { + rc = dt_declare_data_version_set(env, dom, th); + if (rc) + GOTO(stop, rc); + } else { + rc = dt_declare_data_version_del(env, dom, th); + if (rc) + GOTO(stop, rc); + rc = dt_declare_punch(env, dom, 0, OBD_OBJECT_EOF, th); + if (rc) + GOTO(stop, rc); + } rc = dt_trans_start_local(env, mdd->mdd_bottom, th); if (rc != 0) GOTO(stop, rc); - rc = dt_punch(env, dom, 0, OBD_OBJECT_EOF, th); + if (vo) { + dt_data_version_set(env, dom, dv, th); + } else { + dt_data_version_del(env, dom, th); + rc = dt_punch(env, dom, 0, OBD_OBJECT_EOF, th); + } + stop: dt_trans_stop(env, mdd->mdd_bottom, th); out: /* Ignore failure but report the error */ if (rc) - CERROR("%s: can't truncate DOM inode "DFID" data: rc = %d\n", + CERROR("%s: can't manage DOM file "DFID" data: rc = %d\n", mdd_obj_dev_name(mo), PFID(mdd_object_fid(mo)), rc); return rc; } @@ -2457,7 +2479,7 @@ static int mdd_swap_layouts(const struct lu_env *env, struct md_object *obj1, struct lu_buf *snd_hsm_buf = &info->mdi_buf[3]; struct ost_id *saved_oi = NULL; struct thandle *handle; - struct mdd_object *dom_o = NULL; + struct mdd_object *dom_o = NULL, *vlt_o = NULL; __u64 domsize_dom, domsize_vlt; __u32 fst_gen, snd_gen, saved_gen; int fst_fl; @@ -2521,9 +2543,11 @@ static int mdd_swap_layouts(const struct lu_env *env, struct md_object *obj1, * target file must be volatile and orphan. */ if (fst_o->mod_flags & (ORPHAN_OBJ | VOLATILE_OBJ)) { + vlt_o = domsize_vlt ? fst_o : NULL; dom_o = domsize_dom ? snd_o : NULL; } else if (snd_o->mod_flags & (ORPHAN_OBJ | VOLATILE_OBJ)) { swap(domsize_dom, domsize_vlt); + vlt_o = domsize_vlt ? snd_o : NULL; dom_o = domsize_dom ? fst_o : NULL; } else if (domsize_dom > 0 || domsize_vlt > 0) { /* 'lfs swap_layouts' case, neither file should have DoM */ @@ -2547,10 +2571,6 @@ static int mdd_swap_layouts(const struct lu_env *env, struct md_object *obj1, mdd_obj_dev_name(fst_o), PFID(mdd_object_fid(fst_o)), rc); GOTO(stop, rc); - } else if (domsize_vlt > 0) { - /* Migration with the same DOM component size, no need to - * truncate local data, it is still being used */ - dom_o = NULL; } /* swapping 2 non existant layouts is a success */ @@ -2764,9 +2784,12 @@ out_restore: stop: rc = mdd_trans_stop(env, mdd, rc, handle); - /* Truncate local DOM data if all went well */ + /* Truncate local DOM data if all went well, except migration case + * with the same DOM component size. In that case a local data is + * still in use and shouldn't be deleted. + */ if (!rc && dom_o) - mdd_dom_data_truncate(env, mdd, dom_o); + mdd_dom_fixup(env, mdd, dom_o, vlt_o); mdd_write_unlock(env, snd_o); mdd_write_unlock(env, fst_o); diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index 29ad053..3de19b9 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -650,7 +650,7 @@ retry: GOTO(out_stop, rc); } - tgt_vbr_obj_set(env, dob); + tgt_vbr_obj_data_set(env, dob, true); rc = dt_trans_start(env, dt, th); if (rc) GOTO(out_stop, rc); @@ -903,7 +903,7 @@ int mdt_object_punch(const struct lu_env *env, struct dt_device *dt, if (rc) GOTO(stop, rc); - tgt_vbr_obj_set(env, dob); + tgt_vbr_obj_data_set(env, dob, true); rc = dt_trans_start(env, dt, th); if (rc) GOTO(stop, rc); @@ -1451,7 +1451,7 @@ int mdt_data_version_get(struct tgt_session_info *tsi) GOTO(out, rc = -EBADF); /* Get version first */ - version = dt_version_get(tsi->tsi_env, mdt_obj2dt(mo)); + version = dt_data_version_get(tsi->tsi_env, mdt_obj2dt(mo)); if (version && version != -EOPNOTSUPP) { repbody->mbo_valid |= OBD_MD_FLDATAVERSION; /* re-use mbo_ioepoch to transfer version */ diff --git a/lustre/obdclass/dt_object.c b/lustre/obdclass/dt_object.c index d79d00e..cec9952 100644 --- a/lustre/obdclass/dt_object.c +++ b/lustre/obdclass/dt_object.c @@ -550,7 +550,6 @@ int dt_declare_version_set(const struct lu_env *env, struct dt_object *o, vbuf.lb_buf = NULL; vbuf.lb_len = sizeof(dt_obj_version_t); return dt_declare_xattr_set(env, o, &vbuf, xname, 0, th); - } EXPORT_SYMBOL(dt_declare_version_set); @@ -564,7 +563,6 @@ void dt_version_set(const struct lu_env *env, struct dt_object *o, LASSERT(o); vbuf.lb_buf = &version; vbuf.lb_len = sizeof(version); - rc = dt_xattr_set(env, o, &vbuf, xname, 0, th); if (rc < 0) CDEBUG(D_INODE, "Can't set version, rc %d\n", rc); @@ -586,10 +584,124 @@ dt_obj_version_t dt_version_get(const struct lu_env *env, struct dt_object *o) CDEBUG(D_INODE, "Can't get version, rc %d\n", rc); version = 0; } + return version; } EXPORT_SYMBOL(dt_version_get); +int dt_declare_data_version_set(const struct lu_env *env, struct dt_object *o, + struct thandle *th) +{ + struct lu_buf vbuf; + + vbuf.lb_buf = NULL; + vbuf.lb_len = sizeof(dt_obj_version_t); + + return dt_declare_xattr_set(env, o, &vbuf, XATTR_NAME_DATAVER, 0, th); +} +EXPORT_SYMBOL(dt_declare_data_version_set); + +void dt_data_version_set(const struct lu_env *env, struct dt_object *o, + dt_obj_version_t version, struct thandle *th) +{ + struct lu_buf vbuf; + + CDEBUG(D_INODE, DFID": set new data version -> %llu\n", + PFID(lu_object_fid(&o->do_lu)), version); + + /* version should never be set to zero */ + LASSERT(version); + vbuf.lb_buf = &version; + vbuf.lb_len = sizeof(version); + dt_xattr_set(env, o, &vbuf, XATTR_NAME_DATAVER, 0, th); +} +EXPORT_SYMBOL(dt_data_version_set); + +int dt_declare_data_version_del(const struct lu_env *env, struct dt_object *o, + struct thandle *th) +{ + return dt_declare_xattr_del(env, o, XATTR_NAME_DATAVER, th); +} +EXPORT_SYMBOL(dt_declare_data_version_del); + +void dt_data_version_del(const struct lu_env *env, struct dt_object *o, + struct thandle *th) +{ + /* file doesn't need explicit data version anymore */ + CDEBUG(D_INODE, DFID": remove explicit data version\n", + PFID(lu_object_fid(&o->do_lu))); + dt_xattr_del(env, o, XATTR_NAME_DATAVER, th); +} +EXPORT_SYMBOL(dt_data_version_del); + +/* Initialize explicit data version, e.g. for DoM files. + * It uses inode version as initial value. + */ +dt_obj_version_t dt_data_version_init(const struct lu_env *env, + struct dt_object *o) +{ + struct dt_device *dt = lu2dt_dev(o->do_lu.lo_dev); + dt_obj_version_t dv; + struct thandle *th; + int rc; + + ENTRY; + + dv = dt_version_get(env, o); + if (!dv) + RETURN(1); + + th = dt_trans_create(env, dt); + if (IS_ERR(th)) + GOTO(out, rc = PTR_ERR(th)); + + rc = dt_declare_data_version_set(env, o, th); + if (rc) + GOTO(stop, rc); + + rc = dt_trans_start_local(env, dt, th); + if (rc) + GOTO(stop, rc); + + dt_data_version_set(env, o, dv, th); +stop: + dt_trans_stop(env, dt, th); +out: + /* Ignore failure but report the error */ + if (rc) + CDEBUG(D_INODE, "can't init data version for "DFID": rc = %d\n", + PFID(lu_object_fid(&o->do_lu)), rc); + + RETURN(dv); +} + +dt_obj_version_t dt_data_version_get(const struct lu_env *env, + struct dt_object *o) +{ + struct lu_buf vbuf; + dt_obj_version_t version; + int rc; + + vbuf.lb_buf = &version; + vbuf.lb_len = sizeof(version); + rc = dt_xattr_get(env, o, &vbuf, XATTR_NAME_DATAVER); + + CDEBUG(D_INODE, DFID": get data version %llu: rc = %d\n", + PFID(lu_object_fid(&o->do_lu)), version, rc); + + if (rc == sizeof(version)) + return version; + + /* data version EA wasn't set yet on the object, initialize it now */ + if (rc == -ENODATA) + return dt_data_version_init(env, o); + + CDEBUG(D_INODE, "Can't get data version: rc = %d\n", rc); + + return 0; +} +EXPORT_SYMBOL(dt_data_version_get); + /* list of all supported index types */ /* directories */ diff --git a/lustre/target/tgt_lastrcvd.c b/lustre/target/tgt_lastrcvd.c index 6de0511..1ab6065 100644 --- a/lustre/target/tgt_lastrcvd.c +++ b/lustre/target/tgt_lastrcvd.c @@ -1417,7 +1417,10 @@ static int tgt_last_rcvd_update(const struct lu_env *env, struct lu_target *tgt, /** VBR: set new versions */ if (th->th_result == 0 && obj != NULL) { struct dt_object *dto = dt_object_locate(obj, th->th_dev); + dt_version_set(env, dto, tti->tti_transno, th); + if (unlikely(tsi->tsi_dv_update)) + dt_data_version_set(env, dto, tti->tti_transno, th); } /* filling reply data */ @@ -1969,6 +1972,8 @@ int tgt_txn_start_cb(const struct lu_env *env, struct thandle *th, !lu_object_remote(&tsi->tsi_vbr_obj->do_lu)) { dto = dt_object_locate(tsi->tsi_vbr_obj, th->th_dev); rc = dt_declare_version_set(env, dto, th); + if (!rc && tsi->tsi_dv_update) + rc = dt_declare_data_version_set(env, dto, th); } return rc; diff --git a/lustre/tests/sanity-hsm.sh b/lustre/tests/sanity-hsm.sh index 98dfcef..2f05dbf 100755 --- a/lustre/tests/sanity-hsm.sh +++ b/lustre/tests/sanity-hsm.sh @@ -733,6 +733,31 @@ test_1e() { } run_test 1e "Archive, Release and Restore SEL file" +test_1f() { + (( $MDS1_VERSION >= $(version_code 2.14.0.116) )) || + skip "need MDS version at least 2.14.0.116" + local dom=$DIR/$tdir/$tfile + + mkdir_on_mdt0 $DIR/$tdir + $LFS setstripe -E 512K -L mdt -E -1 -c 2 $DIR/$tdir || + error "failed to set default stripe" + + test_1bde_base $dom + + [[ $($LFS getstripe --component-start=0 -L $dom) == 'mdt' ]] || + error "MDT stripe isn't set" + + # check that metadata change doesn't prevent second release + chmod 600 $dom || error "chmod failed" + + echo "release again $dom" + $LFS hsm_release $dom || error "second release failed" + $LFS hsm_state $dom + echo "verify released state: " + check_hsm_flags $dom "0x0000000d" && echo "pass" +} +run_test 1f "DoM file release after restore" + test_2() { local f=$DIR/$tdir/$tfile diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 58b06e2..fdaf1ef 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -23348,6 +23348,74 @@ test_270i() { } run_test 270i "DoM: setting invalid DoM striping should fail" +test_270j() { + (( $MDS1_VERSION >= $(version_code 2.14.0.116) )) || + skip "Need MDS version at least 2.14.0.116" + + local dom=$DIR/$tdir/$tfile + local odv + local ndv + + mkdir -p $DIR/$tdir + + $LFS setstripe -E 1M -L mdt -E -1 -c1 $dom + + odv=$($LFS data_version $dom) + chmod 666 $dom + mv $dom ${dom}_moved + link ${dom}_moved $dom + setfattr -n user.attrx -v "some_attr" $dom + ndv=$($LFS data_version $dom) + (( $ndv == $odv )) || + error "data version was changed by metadata operations" + + dd if=/dev/urandom of=$dom bs=1M count=1 || + error "failed to write data into $dom" + cancel_lru_locks mdc + ndv=$($LFS data_version $dom) + (( $ndv != $odv )) || + error "data version wasn't changed on write" + + odv=$ndv + $TRUNCATE $dom 1000 || error "failed to truncate $dom" + ndv=$($LFS data_version $dom) + (( $ndv != $odv )) || + error "data version wasn't changed on truncate down" + + odv=$ndv + $TRUNCATE $dom 25000 + ndv=$($LFS data_version $dom) + (( $ndv != $odv )) || + error "data version wasn't changed on truncate up" + + # check also fallocate for ldiskfs, skip because patch(#41418) is not backported + if false && [[ "$mds1_FSTYPE" == ldiskfs ]]; then + odv=$ndv + export LANG=C + local err=$(fallocate -l 1048576 $dom 2>&1) + rc=$? + + if (( $rc != 0 )); then + if [[ "$err" =~ "Operation not supported" ]]; then + skip "fallocate not supported for DoM" + else + error "$err" + fi + fi + + ndv=$($LFS data_version $dom) + (( $ndv != $odv )) || + error "data version wasn't changed on fallocate" + + odv=$ndv + fallocate -p --offset 4096 -l 4096 $dom + ndv=$($LFS data_version $dom) + (( $ndv != $odv )) || + error "data version wasn't changed on fallocate punch" + fi +} +run_test 270j "DoM migration: DOM file to the OST-striped file (plain)" + test_271a() { [ $MDS1_VERSION -lt $(version_code 2.10.55) ] && skip "Need MDS version at least 2.10.55" @@ -23652,8 +23720,10 @@ test_272b() { $LFS migrate -c2 $dom || error "failed to migrate to the new composite layout" - [ $($LFS getstripe -L $dom) != 'mdt' ] || + [[ $($LFS getstripe --component-start=0 -L $dom) != 'mdt' ]] || error "MDT stripe was not removed" + ! getfattr -n trusted.dataver $dom &> /dev/null || + error "$dir1 shouldn't have DATAVER EA" cancel_lru_locks mdc local new_md5=$(md5sum $dom) @@ -23692,7 +23762,7 @@ test_272c() { $LFS migrate -E 2M -c1 -E -1 -c2 $dom || error "failed to migrate to the new composite layout" - [ $($LFS getstripe -L $dom) == 'mdt' ] && + [[ $($LFS getstripe --component-start=0 -L $dom) != 'mdt' ]] || error "MDT stripe was not removed" cancel_lru_locks mdc