From 8a5e155235cff82e592564c3b9502d8f2c6e5c32 Mon Sep 17 00:00:00 2001 From: Alexander Zarochentsev Date: Thu, 13 Mar 2025 18:17:29 +0000 Subject: [PATCH] LU-18806 dne: migrate to skip non-migratable files Allow migrate and dir restripe to skip open files and files with LinkEA overflow and continue the migrate operation. HPE-bug-id: LUS-12783 Signed-off-by: Alexander Zarochentsev Change-Id: I4fa4cbe49d7681cdda9ba1d043f3c0a8a5d0efe9 Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/58402 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andrew Perepechko Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/mdd/mdd_dir.c | 22 ++++++++++++++++------ lustre/mdt/mdt_reint.c | 17 +++++++++++------ lustre/tests/sanity-lfsck.sh | 29 +++++++++++++++++++++++++++-- lustre/tests/sanity.sh | 28 ++++++++++++++++++++++++++++ lustre/tests/sanityn.sh | 30 +++++++++++++++++++++++++----- 5 files changed, 107 insertions(+), 19 deletions(-) diff --git a/lustre/mdd/mdd_dir.c b/lustre/mdd/mdd_dir.c index f31619f..04394fd 100644 --- a/lustre/mdd/mdd_dir.c +++ b/lustre/mdd/mdd_dir.c @@ -3750,13 +3750,14 @@ static int mdd_migrate_sanity_check(const struct lu_env *env, struct mdd_object *tobj, const struct lu_attr *spattr, const struct lu_attr *tpattr, - const struct lu_attr *attr) + const struct lu_attr *attr, + bool nsonly) { int rc; ENTRY; - if (!mdd_object_remote(sobj)) { + if (!nsonly && !mdd_object_remote(sobj)) { mdd_read_lock(env, sobj, DT_SRC_CHILD); if (sobj->mod_count > 0) { CDEBUG(D_INFO, "%s: "DFID" is opened, count %d\n", @@ -4187,11 +4188,10 @@ static int mdd_migrate_linkea_prepare(const struct lu_env *env, LASSERT(ldata->ld_leh != NULL); /* - * If linkEA is overflow, it means there are some unknown name entries - * under unknown parents, which will prevent the migration. + * If linkEA is overflow, switch to ns-only migrate */ if (unlikely(ldata->ld_leh->leh_overflow_time)) - RETURN(-EOVERFLOW); + RETURN(+EOVERFLOW); rc = mdd_fld_lookup(env, mdd, mdd_object_fid(sobj), &source_mdt_index); if (rc) @@ -4692,7 +4692,17 @@ retry: RETURN(rc); rc = mdd_migrate_sanity_check(env, mdd, spobj, tpobj, sobj, tobj, - spattr, tpattr, attr); + spattr, tpattr, attr, + spec->sp_migrate_nsonly); + if (rc == -EBUSY && !spec->sp_migrate_nsonly) { + spec->sp_migrate_nsonly = 1; + CWARN("%s: "DFID"/%s is open, migrate only dentry\n", + mdd2obd_dev(mdd)->obd_name, PFID(mdd_object_fid(spobj)), + sname->ln_name); + rc = mdd_migrate_sanity_check(env, mdd, spobj, tpobj, sobj, + tobj, spattr, tpattr, attr, + spec->sp_migrate_nsonly); + } if (rc) RETURN(rc); diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index a92fd36..08814b0 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -2434,15 +2434,20 @@ lock_parent: if (info->mti_spec.sp_migrate_close) { /* try to hold open_sem so that nobody else can open the file */ if (!down_write_trylock(&sobj->mot_open_sem)) { - /* close anyway */ - mdd_migrate_close(info, sobj); - GOTO(unlock_source, rc = -EBUSY); + /* migrate only dentry */ + if (!info->mti_spec.sp_migrate_nsonly) + CWARN("%s: "DFID"/%s is open, migrate only dentry\n", + mdt2obd_dev(mdt)->obd_name, + PFID(mdt_object_fid(spobj)), + rr->rr_name.ln_name); + info->mti_spec.sp_migrate_nsonly = 1; + } else { open_sem_locked = true; - rc = mdd_migrate_close(info, sobj); - if (rc && rc != -ESTALE) - GOTO(unlock_open_sem, rc); } + rc = mdd_migrate_close(info, sobj); + if (rc && rc != -ESTALE) + GOTO(unlock_open_sem, rc); } tobj = mdt_object_find(env, mdt, rr->rr_fid2); diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 51479fc..94fefb9 100755 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -4813,7 +4813,21 @@ test_29c() error "(2) Fail to hard link" cancel_lru_locks mdc - if [ $MDSCOUNT -ge 2 ]; then + + local linked_file_migrate=false + (( $MDS1_VERSION >= $(version_code 2.16.50) )) && + linked_file_migrate=true + + if (( $MDSCOUNT >= 2 )) && $linked_file_migrate; then + $LFS migrate -m 1 $DIR/$tdir/guard 2>/dev/null || + error "(3.1) Migrate should succeed" + + echo "The object with linkEA overflow should NOT be migrated" + local newfid=$($LFS path2fid $DIR/$tdir/guard/f0) + [ "$newfid" == "$oldfid" ] || + error "(3.2) The file with overflowed LinkEA should not migrate: $newfid != $oldfid" + fi + if (( $MDSCOUNT >= 2 )) && ! $linked_file_migrate; then $LFS migrate -m 1 $DIR/$tdir/guard 2>/dev/null && error "(3.1) Migrate should fail" @@ -4828,7 +4842,18 @@ test_29c() echo "Remove 100 hard links to save space for the missed linkEA entries" unlinkmany $DIR/$tdir/foo/ttttttttttt 100 || error "(4) Fail to unlink" - if [ $MDSCOUNT -ge 2 ]; then + if (( $MDSCOUNT >= 2 )) && $linked_file_migrate; then + $LFS migrate -m 1 $DIR/$tdir/guard 2>/dev/null || + error "(5.1) Migrate should succeed" + + # The overflow timestamp is still there, so migration + # should not migrate the file with LinkEA overflow timestamp + # but migrate only name + local newfid=$($LFS path2fid $DIR/$tdir/guard/f0) + [ "$newfid" == "$oldfid" ] || + error "(5.2) The file should not migrate: $newfid != $oldfid" + fi + if (( $MDSCOUNT >= 2 )) && ! $linked_file_migrate; then $LFS migrate -m 1 $DIR/$tdir/guard 2>/dev/null && error "(5.1) Migrate should fail" diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 03c0e6e..02decdd 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -23923,6 +23923,34 @@ test_230c() { error "$file is not on MDT${MDTIDX}" done + if (($MDS1_VERSION >= $(version_code 2.16.50) )); then + echo "Migrate a dir with an open file" + touch $migrate_dir/foo + local foo_fid=$($LFS path2fid $migrate_dir/foo) + $MULTIOP $migrate_dir/foo o_c & + local bg_pid=$! + sleep 1 + + $LFS migrate -m 0 $migrate_dir || { + kill -USR1 $bg_pid + error "migrate a dir with an open file fails" + } + + kill -USR1 $bg_pid + wait $bg_pid || error "multiop fails" + + local foo_fid_new=$($LFS path2fid $migrate_dir/foo) + [[ "$foo_fid" == "$foo_fid_new" ]] || + error "migrate should skip an open file $foo_fid != $foo_fid_new" + + for file in $(find $migrate_dir); do + [[ $file == "$migrate_dir/foo" ]] && continue + mdt_index=$($LFS getstripe -m $file) + [[ "$mdt_index" == "0" ]] || + error "$file is not on MDT0" + done + fi + rm -rf $DIR/$tdir || error "rm dir failed after migration" } run_test 230c "check directory accessiblity if migration failed" diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 1453967..3770c08 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -5231,22 +5231,42 @@ test_80a() { cp /etc/passwd $DIR1/$tdir/$tfile - #migrate open file should fails + # attempt to migrate an open file multiop_bg_pause $DIR2/$tdir/$tfile O_c || error "open $file failed" pid=$! # give multiop a chance to open sleep 1 - $LFS migrate -m $MDTIDX $DIR1/$tdir && - error "migrate open files should failed with open files" + local open_file_migrate=false + (($MDS1_VERSION >= $(version_code 2.16.50) )) && open_file_migrate=true - kill -USR1 $pid + if $open_file_migrate; then + local oldfid=$($LFS path2fid $DIR1/$tdir/$tfile) + + $LFS migrate -m $MDTIDX $DIR1/$tdir || + error "migrate open files should not fail" + + kill -USR1 $pid + + local newfid=$($LFS path2fid $DIR1/$tdir/$tfile) + + [[ "$oldfid" == "$newfid" ]] || + error "FID of the open file changed from $oldfid to $newfid" - $LFS migrate -m $MDTIDX $DIR1/$tdir || + else + $LFS migrate -m $MDTIDX $DIR1/$tdir && + error "migrate open files should failed with open files" + + kill -USR1 $pid + + $LFS migrate -m $MDTIDX $DIR1/$tdir || error "migrate remote dir error" + fi echo "Finish migration, then checking.." for file in $(find $DIR1/$tdir); do + $open_file_migrate && [[ "$file" == "$DIR1/$tdir/$tfile" ]] && + continue mdt_index=$($LFS getstripe -m $file) [ $mdt_index == $MDTIDX ] || error "$file is not on MDT${MDTIDX}" -- 1.8.3.1