From: Lai Siyao Date: Thu, 14 Jan 2021 09:14:01 +0000 (+0800) Subject: LU-14119 lfsck: check linkea if it's newly added X-Git-Tag: 2.14.52~151 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=afd00cacd0b6ef87282887b4e965350a9c1a6821 LU-14119 lfsck: check linkea if it's newly added In LFSCK phase one, if new linkea entry is added, and final linkea entry count is more than one, add file in trace file, so that the linkea sanity will be checked in phase two. And in phase two check, if link parent FID can't be mapped to valid inode, remove it from linkea. Add sanity-lfsck 1d, which changed parent directory FID in LMA, therefore the FID in LMA mismatches with parent FID in child linkea, verify LFSCK can fix such inconsistency. Signed-off-by: Lai Siyao Change-Id: I315983d262110c1e36c3893fa2e51925d96c51a7 Reviewed-on: https://review.whamcloud.com/41261 Reviewed-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo Reviewed-by: Hongchao Zhang Reviewed-by: Oleg Drokin --- diff --git a/lustre/lfsck/lfsck_namespace.c b/lustre/lfsck/lfsck_namespace.c index ff660c4..9569d0c 100644 --- a/lustre/lfsck/lfsck_namespace.c +++ b/lustre/lfsck/lfsck_namespace.c @@ -2882,8 +2882,20 @@ again: } parent = lfsck_object_find_bottom(env, lfsck, &tfid); - if (IS_ERR(parent)) - RETURN(PTR_ERR(parent)); + if (IS_ERR(parent)) { + rc = PTR_ERR(parent); + /* if @pfid doesn't have a valid OI mapping, it will + * trigger OI scrub, and -ENONET is is returned if it's + * remote, -EINPROGRESS if local. + */ + if ((rc == -ENOENT || rc == -EINPROGRESS) && + ldata->ld_leh->leh_reccount > 1) { + lfsck_linkea_del_buf(ldata, cname); + continue; + } + + RETURN(rc); + } if (!dt_object_exists(parent)) { lfsck_object_put(env, parent); @@ -3770,8 +3782,18 @@ static int lfsck_namespace_double_scan_one(const struct lu_env *env, } parent = lfsck_object_find_bottom(env, lfsck, pfid); - if (IS_ERR(parent)) - GOTO(out, rc = PTR_ERR(parent)); + if (IS_ERR(parent)) { + rc = PTR_ERR(parent); + /* if @pfid doesn't have a valid OI mapping, it will + * trigger OI scrub, and -ENONET is is returned if it's + * remote, -EINPROGRESS if local. + */ + if ((rc == -ENOENT || rc == -EINPROGRESS) && + ldata.ld_leh->leh_reccount > 1) + rc = lfsck_namespace_shrink_linkea(env, com, + child, &ldata, cname, pfid, true); + GOTO(out, rc); + } if (!dt_object_exists(parent)) { @@ -5562,8 +5584,8 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env, struct lustre_handle lh = { 0 }; bool repaired = false; bool dtlocked = false; - bool remove; - bool newdata; + bool remove = false; + bool newdata = false; bool log = false; bool bad_hash = false; bool bad_linkea = false; @@ -5948,6 +5970,17 @@ out: if (obj != NULL && count == 1 && S_ISREG(lfsck_object_type(obj))) dt_attr_get(env, obj, la); + + /* if new linkea entry is added, the old entry may be stale, + * check it in phase 2. Sigh, linkea check can only be done + * locally. + */ + if (bad_linkea && !remove && !newdata && + !dt_object_remote(obj) && count > 1) + rc = lfsck_namespace_trace_update(env, com, + &lnr->lnr_fid, + LNTF_CHECK_LINKEA, + true); } trace: diff --git a/lustre/tests/sanity-lfsck.sh b/lustre/tests/sanity-lfsck.sh index 4549a91..fc7d989 100644 --- a/lustre/tests/sanity-lfsck.sh +++ b/lustre/tests/sanity-lfsck.sh @@ -324,6 +324,70 @@ test_1c() { } run_test 1c "LFSCK can find out and repair lost FID-in-dirent" +test_1d() { + [ $MDS1_VERSION -lt $(version_code 2.13.57) ] && + skip "MDS older than 2.13.57" + [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" + + check_mount_and_prep + + touch $DIR/$tdir/$tfile + mkdir $DIR/$tdir/subdir + $LFS mkdir -i 1 $DIR/$tdir/remotedir + $LFS path2fid $DIR/$tdir + ll_decode_linkea $DIR/$tdir/$tfile + ll_decode_linkea $DIR/$tdir/subdir + ll_decode_linkea $DIR/$tdir/remotedir + + local mntpt=$(facet_mntpt mds1) + + # unlink OI files to remove the stale entry + local saved_opts=$MDS_MOUNT_OPTS + + stopall + mount_fstype mds1 $mntpt + # increase $tdir FID oid in LMA + do_facet mds1 "getfattr -d -m trusted.lma -e hex \ + --absolute-names $mntpt/ROOT/$tdir | \ + sed -E 's/0(.{8})$/1\1/' | setfattr --restore=-" + unmount_fstype mds1 $mntpt + setupall + + # the FID oid in LMA was increased above, and it's not in OI table, + # run scrub first to generate mapping in OI, so the following namespace + # check can fix linkea correctly, this is not necessary normally. + do_facet mds1 $LCTL lfsck_start -M ${MDT_DEV} -t scrub || + error "failed to start LFSCK for scrub!" + wait_update_facet mds1 "$LCTL get_param -n \ + osd-*.$(facet_svc mds1).oi_scrub | + awk '/^status/ { print \\\$2 }'" "completed" 32 || + error "unexpected status" + + $START_NAMESPACE -r -A || error "fail to start LFSCK for namespace!" + wait_update_facet mds1 "$LCTL get_param -n \ + mdd.${MDT_DEV}.lfsck_namespace | + awk '/^status/ { print \\\$2 }'" "completed" 32 || { + $SHOW_NAMESPACE + error "unexpected status" + } + $LFS path2fid $DIR/$tdir + ll_decode_linkea $DIR/$tdir/$tfile + ll_decode_linkea $DIR/$tdir/subdir + ll_decode_linkea $DIR/$tdir/remotedir + + local pfid + local fid + + fid=$($LFS path2fid $DIR/$tdir) + for f in $tfile subdir remotedir; do + pfid=$(ll_decode_linkea $DIR/$tdir/$f | + awk '/pfid/ { print $3 }') + pfid=${pfid%,} + [ "$pfid" == "$fid" ] || error "$fid in LMA != $pfid in linkea" + done +} +run_test 1d "LFSCK can fix mismatch of FID in LMA and FID in child linkea" + test_2a() { lfsck_prep 1 1