Whamcloud - gitweb
LU-14119 lfsck: check linkea if it's newly added 70/43270/2
authorLai Siyao <lai.siyao@whamcloud.com>
Thu, 14 Jan 2021 09:14:01 +0000 (17:14 +0800)
committerOleg Drokin <green@whamcloud.com>
Sun, 16 May 2021 22:07:07 +0000 (22:07 +0000)
In LFSCK phase one, if new linkea entry is added, and final linkea
entry count is more than one, add file in trace file, so that the
linkea sanity will be checked in phase two.

And in phase two check, if link parent FID can't be mapped to valid
inode, remove it from linkea.

Add sanity-lfsck 1d, which changed parent directory FID in LMA,
therefore the FID in LMA mismatches with parent FID in child linkea,
verify LFSCK can fix such inconsistency.

Lustre-change: https://review.whamcloud.com/41261
Lustre-commit: afd00cacd0b6ef87282887b4e965350a9c1a6821

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I315983d262110c1e36c3893fa2e51925d96c51a7
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Signed-off-by: Minh Diep <mdiep@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/43270
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/lfsck/lfsck_namespace.c
lustre/tests/sanity-lfsck.sh

index 3da8c26..d20b8cf 100644 (file)
@@ -2730,8 +2730,20 @@ again:
                }
 
                parent = lfsck_object_find_bottom(env, lfsck, &tfid);
-               if (IS_ERR(parent))
-                       RETURN(PTR_ERR(parent));
+               if (IS_ERR(parent)) {
+                       rc = PTR_ERR(parent);
+                       /* if @pfid doesn't have a valid OI mapping, it will
+                        * trigger OI scrub, and -ENONET is is returned if it's
+                        * remote, -EINPROGRESS if local.
+                        */
+                       if ((rc == -ENOENT || rc == -EINPROGRESS) &&
+                           ldata->ld_leh->leh_reccount > 1) {
+                               lfsck_linkea_del_buf(ldata, cname);
+                               continue;
+                       }
+
+                       RETURN(rc);
+               }
 
                if (!dt_object_exists(parent)) {
                        lfsck_object_put(env, parent);
@@ -3623,8 +3635,18 @@ static int lfsck_namespace_double_scan_one(const struct lu_env *env,
                }
 
                parent = lfsck_object_find_bottom(env, lfsck, pfid);
-               if (IS_ERR(parent))
-                       GOTO(out, rc = PTR_ERR(parent));
+               if (IS_ERR(parent)) {
+                       rc = PTR_ERR(parent);
+                       /* if @pfid doesn't have a valid OI mapping, it will
+                        * trigger OI scrub, and -ENONET is is returned if it's
+                        * remote, -EINPROGRESS if local.
+                        */
+                       if ((rc == -ENOENT || rc == -EINPROGRESS) &&
+                           ldata.ld_leh->leh_reccount > 1)
+                               rc = lfsck_namespace_shrink_linkea(env, com,
+                                       child, &ldata, cname, pfid, true);
+                       GOTO(out, rc);
+               }
 
                if (!dt_object_exists(parent)) {
 
@@ -5343,8 +5365,8 @@ static int lfsck_namespace_assistant_handler_p1(const struct lu_env *env,
        struct lustre_handle        lh       = { 0 };
        bool                        repaired = false;
        bool                        dtlocked = false;
-       bool                        remove;
-       bool                        newdata;
+       bool                        remove = false;
+       bool                        newdata = false;
        bool                        log      = false;
        bool                        bad_hash = false;
        bool                        bad_linkea = false;
@@ -5731,6 +5753,17 @@ out:
                if (obj != NULL && count == 1 &&
                    S_ISREG(lfsck_object_type(obj)))
                        dt_attr_get(env, obj, la);
+
+               /* if new linkea entry is added, the old entry may be stale,
+                * check it in phase 2. Sigh, linkea check can only be done
+                * locally.
+                */
+               if (bad_linkea && !remove && !newdata &&
+                   !dt_object_remote(obj) && count > 1)
+                       rc = lfsck_namespace_trace_update(env, com,
+                                                         &lnr->lnr_fid,
+                                                         LNTF_CHECK_LINKEA,
+                                                         true);
        }
 
 trace:
index e4def73..da869df 100644 (file)
@@ -339,6 +339,70 @@ test_1c() {
 }
 run_test 1c "LFSCK can find out and repair lost FID-in-dirent"
 
+test_1d() {
+       [ $MDS1_VERSION -lt $(version_code 2.13.57) ] &&
+               skip "MDS older than 2.13.57"
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs"
+
+       check_mount_and_prep
+
+       touch $DIR/$tdir/$tfile
+       mkdir $DIR/$tdir/subdir
+       $LFS mkdir -i 1 $DIR/$tdir/remotedir
+       $LFS path2fid $DIR/$tdir
+       ll_decode_linkea $DIR/$tdir/$tfile
+       ll_decode_linkea $DIR/$tdir/subdir
+       ll_decode_linkea $DIR/$tdir/remotedir
+
+       local mntpt=$(facet_mntpt mds1)
+
+       # unlink OI files to remove the stale entry
+       local saved_opts=$MDS_MOUNT_OPTS
+
+       stopall
+       mount_fstype mds1 $mntpt
+       # increase $tdir FID oid in LMA
+       do_facet mds1 "getfattr -d -m trusted.lma -e hex \
+               --absolute-names $mntpt/ROOT/$tdir | \
+               sed -E 's/0(.{8})$/1\1/' | setfattr --restore=-"
+       unmount_fstype mds1 $mntpt
+       setupall
+
+       # the FID oid in LMA was increased above, and it's not in OI table,
+       # run scrub first to generate mapping in OI, so the following namespace
+       # check can fix linkea correctly, this is not necessary normally.
+       do_facet mds1 $LCTL lfsck_start -M ${MDT_DEV} -t scrub ||
+               error "failed to start LFSCK for scrub!"
+       wait_update_facet mds1 "$LCTL get_param -n \
+               osd-*.$(facet_svc mds1).oi_scrub |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 ||
+               error "unexpected status"
+
+       $START_NAMESPACE -r -A || error "fail to start LFSCK for namespace!"
+       wait_update_facet mds1 "$LCTL get_param -n \
+               mdd.${MDT_DEV}.lfsck_namespace |
+               awk '/^status/ { print \\\$2 }'" "completed" 32 || {
+               $SHOW_NAMESPACE
+               error "unexpected status"
+       }
+       $LFS path2fid $DIR/$tdir
+       ll_decode_linkea $DIR/$tdir/$tfile
+       ll_decode_linkea $DIR/$tdir/subdir
+       ll_decode_linkea $DIR/$tdir/remotedir
+
+       local pfid
+       local fid
+
+       fid=$($LFS path2fid $DIR/$tdir)
+       for f in $tfile subdir remotedir; do
+               pfid=$(ll_decode_linkea $DIR/$tdir/$f |
+                       awk '/pfid/ { print $3 }')
+               pfid=${pfid%,}
+               [ "$pfid" == "$fid" ] || error "$fid in LMA != $pfid in linkea"
+       done
+}
+run_test 1d "LFSCK can fix mismatch of FID in LMA and FID in child linkea"
+
 test_2a() {
        lfsck_prep 1 1