From f2f516a70bb22fded4b7ce94cd69140e731db0db Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Thu, 14 Jan 2021 17:14:01 +0800 Subject: [PATCH] LU-14119 lfsck: check linkea if it's newly added In LFSCK phase one, if new linkea entry is added, and final linkea entry count is more than one, add file in trace file, so that the linkea sanity will be checked in phase two. And in phase two check, if link parent FID can't be mapped to valid inode, remove it from linkea. Add sanity-lfsck 1d, which changed parent directory FID in LMA, therefore the FID in LMA mismatches with parent FID in child linkea, verify LFSCK can fix such inconsistency. Lustre-change: https://review.whamcloud.com/41261 Lustre-commit: afd00cacd0b6ef87282887b4e965350a9c1a6821 Signed-off-by: Lai Siyao Change-Id: I315983d262110c1e36c3893fa2e51925d96c51a7 Reviewed-by: Andreas Dilger Reviewed-by: Hongchao Zhang Reviewed-on: https://review.whamcloud.com/44237 Tested-by: jenkins Tested-by: Maloo --- lustre/lfsck/lfsck_namespace.c | 45 +++++++++++++++++++++++++---- lustre/tests/sanity-lfsck.sh | 64 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+), 6 deletions(-) diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index 282ffe8..fc0c9e7 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -2882,8 +2882,20 @@ again: } parent = lfsck_object_find_bottom(env, lfsck, &tfid); - if (IS_ERR(parent)) - RETURN(PTR_ERR(parent)); + if (IS_ERR(parent)) { + rc = PTR_ERR(parent); + /* if @pfid doesn't have a valid OI mapping, it will + * trigger OI scrub, and -ENONET is is returned if it's + * remote, -EINPROGRESS if local. + */ + if ((rc == -ENOENT || rc == -EINPROGRESS) && + ldata->ld_leh->leh_reccount > 1) { + lfsck_linkea_del_buf(ldata, cname); + continue; + } + + RETURN(rc); + } if (!dt_object_exists(parent)) { lfsck_object_put(env, parent); @@ -3770,8 +3782,18 @@ static int lfsck_namespace_double_scan_one(const struct lu_env *env, } parent = lfsck_object_find_bottom(env, lfsck, pfid); - if (IS_ERR(parent)) - GOTO(out, rc = PTR_ERR(parent)); + if (IS_ERR(parent)) { + rc = PTR_ERR(parent); + /* if @pfid doesn't have a valid OI mapping, it will + * trigger OI scrub, and -ENONET is is returned if it's + * remote, -EINPROGRESS if local. + */ + if ((rc == -ENOENT || rc == -EINPROGRESS) && + ldata.ld_leh->leh_reccount > 1) + rc = lfsck_namespace_shrink_linkea(env, com, + child, &ldata, cname, pfid, true); + GOTO(out, rc); + } if (!dt_object_exists(parent)) { @@ -5562,8 +5584,8 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env, struct lustre_handle lh = { 0 }; bool repaired = false; bool dtlocked = false; - bool remove; - bool newdata; + bool remove = false; + bool newdata = false; bool log = false; bool bad_hash = false; bool bad_linkea = false; @@ -5948,6 +5970,17 @@ out: if (obj != NULL && count == 1 && S_ISREG(lfsck_object_type(obj))) dt_attr_get(env, obj, la); + + /* if new linkea entry is added, the old entry may be stale, + * check it in phase 2. Sigh, linkea check can only be done + * locally. + */ + if (bad_linkea && !remove && !newdata && + !dt_object_remote(obj) && count > 1) + rc = lfsck_namespace_trace_update(env, com, + &lnr->lnr_fid, + LNTF_CHECK_LINKEA, + true); } trace: diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index c2bf325..36486c7 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -324,6 +324,70 @@ test_1c() { } run_test 1c "LFSCK can find out and repair lost FID-in-dirent" +test_1d() { + [ $MDS1_VERSION -lt $(version_code 2.13.57) ] && + skip "MDS older than 2.13.57" + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" + + check_mount_and_prep + + touch $DIR/$tdir/$tfile + mkdir $DIR/$tdir/subdir + $LFS mkdir -i 1 $DIR/$tdir/remotedir + $LFS path2fid $DIR/$tdir + ll_decode_linkea $DIR/$tdir/$tfile + ll_decode_linkea $DIR/$tdir/subdir + ll_decode_linkea $DIR/$tdir/remotedir + + local mntpt=$(facet_mntpt mds1) + + # unlink OI files to remove the stale entry + local saved_opts=$MDS_MOUNT_OPTS + + stopall + mount_fstype mds1 $mntpt + # increase $tdir FID oid in LMA + do_facet mds1 "getfattr -d -m trusted.lma -e hex \ + --absolute-names $mntpt/ROOT/$tdir | \ + sed -E 's/0(.{8})$/1\1/' | setfattr --restore=-" + unmount_fstype mds1 $mntpt + setupall + + # the FID oid in LMA was increased above, and it's not in OI table, + # run scrub first to generate mapping in OI, so the following namespace + # check can fix linkea correctly, this is not necessary normally. + do_facet mds1 $LCTL lfsck_start -M ${MDT_DEV} -t scrub || + error "failed to start LFSCK for scrub!" + wait_update_facet mds1 "$LCTL get_param -n \ + osd-*.$(facet_svc mds1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 32 || + error "unexpected status" + + $START_NAMESPACE -r -A || error "fail to start LFSCK for namespace!" + wait_update_facet mds1 "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 32 || { + $SHOW_NAMESPACE + error "unexpected status" + } + $LFS path2fid $DIR/$tdir + ll_decode_linkea $DIR/$tdir/$tfile + ll_decode_linkea $DIR/$tdir/subdir + ll_decode_linkea $DIR/$tdir/remotedir + + local pfid + local fid + + fid=$($LFS path2fid $DIR/$tdir) + for f in $tfile subdir remotedir; do + pfid=$(ll_decode_linkea $DIR/$tdir/$f | + awk '/pfid/ { print $3 }') + pfid=${pfid%,} + [ "$pfid" == "$fid" ] || error "$fid in LMA != $pfid in linkea" + done +} +run_test 1d "LFSCK can fix mismatch of FID in LMA and FID in child linkea" + test_2a() { lfsck_prep 1 1 -- 1.8.3.1