Whamcloud - gitweb
LU-11446 e2fsck: check trusted.link when fixing nlink
authorLi Dongyang <dongyangli@ddn.com>
Tue, 30 Mar 2021 11:22:40 +0000 (22:22 +1100)
committerAndreas Dilger <adilger@whamcloud.com>
Thu, 15 Dec 2022 22:29:59 +0000 (15:29 -0700)
The inode link count could be higher than what is stored in the
local MDT inode because of remote file links from DNE MDTs.
If we find a mismatched link count, look up the "trusted.link"
xattr.  If it exists, do a sanity check on it, and use the
leh_reccount stored there if larger than the local link count.

If leh_overflow_time is set, then the "trusted.link" xattr may
not hold all of the links, so assume the maximum of available
link counts is valid until LFSCK clears leh_overflow_time.

If the inode is not linked into the namespace, link it into
lost+found before checking trusted.link for the DNE link count.

Update the f_trusted_link test case to include a locally
unreferenced inode that has a trusted.link xattr.  The
inode should be linked into lost+found because of the xattr,
even if it has no blocks/data, and the link cound should be
extracted from the trusted.link xattr.

Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Change-Id: I213d816a92043c348eb55374aaa98e98957ccf23
Reviewed-on: https://review.whamcloud.com/43169
Reviewed-by: Artem Blagodarenko <artem.blagodarenko@hpe.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
e2fsck/pass4.c
lib/ext2fs/ext2fs.h
lib/ext2fs/ext_attr.c
tests/f_trusted_link/expect.1 [new file with mode: 0644]
tests/f_trusted_link/expect.2 [new file with mode: 0644]
tests/f_trusted_link/image.gz [new file with mode: 0644]
tests/f_trusted_link/name [new file with mode: 0644]

index 8c2d2f1..f048fff 100644 (file)
@@ -17,6 +17,7 @@
 #include "config.h"
 #include "e2fsck.h"
 #include "problem.h"
+#include "ext2fs/lfsck.h"
 #include <ext2fs/ext2_ext_attr.h>
 
 /*
@@ -26,7 +27,7 @@
  * This subroutine returns 1 then the caller shouldn't bother with the
  * rest of the pass 4 tests.
  */
-static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
+static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i, ext2_ino_t *last_ino,
                            struct ext2_inode_large *inode)
 {
        ext2_filsys fs = ctx->fs;
@@ -34,9 +35,12 @@ static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
        __u32 eamagic = 0;
        int extra_size = 0;
 
-       e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
-                              EXT2_INODE_SIZE(fs->super),
-                              "pass4: disconnect_inode");
+       if (*last_ino != i) {
+               e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
+                                      EXT2_INODE_SIZE(fs->super),
+                                      "pass4: disconnect_inode");
+               *last_ino = i;
+       }
        if (EXT2_INODE_SIZE(fs->super) > EXT2_GOOD_OLD_INODE_SIZE)
                extra_size = inode->i_extra_isize;
 
@@ -75,6 +79,7 @@ static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
        if (fix_problem(ctx, PR_4_UNATTACHED_INODE, &pctx)) {
                if (e2fsck_reconnect_file(ctx, i))
                        ext2fs_unmark_valid(fs);
+               *last_ino = 0;
        } else {
                /*
                 * If we don't attach the inode, then skip the
@@ -87,20 +92,23 @@ static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
        return 0;
 }
 
-static void check_ea_inode(e2fsck_t ctx, ext2_ino_t i,
+/*
+ * This function is called when link_counted is zero. So this may not
+ * be an xattr inode at all. Return immediately if EA_INODE flag is not
+ * set.
+ */
+static void check_ea_inode(e2fsck_t ctx, ext2_ino_t i, ext2_ino_t *last_ino,
                           struct ext2_inode_large *inode, __u16 *link_counted)
 {
        __u64 actual_refs = 0;
        __u64 ref_count;
 
-       /*
-        * This function is called when link_counted is zero. So this may not
-        * be an xattr inode at all. Return immediately if EA_INODE flag is not
-        * set.
-        */
-       e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
-                              EXT2_INODE_SIZE(ctx->fs->super),
-                              "pass4: check_ea_inode");
+       if (*last_ino != i) {
+               e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
+                                      EXT2_INODE_SIZE(ctx->fs->super),
+                                      "pass4: check_ea_inode");
+               *last_ino = i;
+       }
        if (!(inode->i_flags & EXT4_EA_INODE_FL))
                return;
 
@@ -134,6 +142,73 @@ static void check_ea_inode(e2fsck_t ctx, ext2_ino_t i,
        }
 }
 
+static errcode_t check_link_ea(e2fsck_t ctx, ext2_ino_t ino,
+                              ext2_ino_t *last_ino,
+                              struct ext2_inode_large *inode,
+                              __u16 *link_counted)
+{
+       struct ext2_xattr_handle *handle;
+       struct link_ea_header *leh;
+       void *buf;
+       size_t ea_len;
+       errcode_t retval;
+
+       if (*last_ino != ino) {
+               e2fsck_read_inode_full(ctx, ino, EXT2_INODE(inode),
+                                      EXT2_INODE_SIZE(ctx->fs->super),
+                                      "pass4: get link ea count");
+               *last_ino = ino;
+       }
+
+       retval = ext2fs_xattrs_open(ctx->fs, ino, &handle);
+       if (retval)
+               return retval;
+
+       retval = ext2fs_xattrs_read_inode(handle, inode);
+       if (retval)
+               goto err;
+
+       retval = ext2fs_xattr_get(handle, EXT2_ATTR_INDEX_TRUSTED_PREFIX
+                                 LUSTRE_XATTR_MDT_LINK, &buf, &ea_len);
+       if (retval)
+               goto err;
+
+       leh = (struct link_ea_header *)buf;
+       if (leh->leh_magic == ext2fs_swab32(LINK_EA_MAGIC)) {
+               leh->leh_magic = LINK_EA_MAGIC;
+               leh->leh_reccount = ext2fs_swab32(leh->leh_reccount);
+               leh->leh_len = ext2fs_swab64(leh->leh_len);
+       }
+       if (leh->leh_magic != LINK_EA_MAGIC) {
+               retval = EINVAL;
+               goto err_free;
+       }
+       if (leh->leh_reccount == 0 && !leh->leh_overflow_time) {
+               retval = ENODATA;
+               goto err_free;
+       }
+       if (leh->leh_len > ea_len) {
+               retval = EINVAL;
+               goto err_free;
+       }
+
+       /* if linkEA overflowed and does not hold all links, assume *some*
+        * links exist until LFSCK is next run and resets leh_overflow_time */
+       if (leh->leh_overflow_time) {
+               if (inode->i_links_count > *link_counted)
+                       *link_counted = inode->i_links_count;
+               else if (*link_counted == 0)
+                       *link_counted = 1111;
+       }
+       if (leh->leh_reccount > *link_counted)
+               *link_counted = leh->leh_reccount;
+err_free:
+       ext2fs_free_mem(&buf);
+err:
+       ext2fs_xattrs_close(&handle);
+       return retval;
+}
+
 void e2fsck_pass4(e2fsck_t ctx)
 {
        ext2_filsys fs = ctx->fs;
@@ -180,7 +255,8 @@ void e2fsck_pass4(e2fsck_t ctx)
        inode = e2fsck_allocate_memory(ctx, inode_size, "scratch inode");
 
        /* Protect loop from wrap-around if s_inodes_count maxed */
-       for (i=1; i <= fs->super->s_inodes_count && i > 0; i++) {
+       for (i = 1; i <= fs->super->s_inodes_count && i > 0; i++) {
+               ext2_ino_t last_ino = 0;
                int isdir;
 
                if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
@@ -210,7 +286,7 @@ void e2fsck_pass4(e2fsck_t ctx)
                         * check_ea_inode() will update link_counted if
                         * necessary.
                         */
-                       check_ea_inode(ctx, i, inode, &link_counted);
+                       check_ea_inode(ctx, i, &last_ino, inode, &link_counted);
                }
 
                if (link_counted == 0) {
@@ -219,12 +295,13 @@ void e2fsck_pass4(e2fsck_t ctx)
                                     fs->blocksize, "bad_inode buffer");
                        if (e2fsck_process_bad_inode(ctx, 0, i, buf))
                                continue;
-                       if (disconnect_inode(ctx, i, inode))
+                       if (disconnect_inode(ctx, i, &last_ino, inode))
                                continue;
                        ext2fs_icount_fetch(ctx->inode_link_info, i,
                                            &link_count);
                        ext2fs_icount_fetch(ctx->inode_count, i,
                                            &link_counted);
+                       check_link_ea(ctx, i, &last_ino, inode, &link_counted);
                }
                isdir = ext2fs_test_inode_bitmap2(ctx->inode_dir_map, i);
                if (isdir && (link_counted > EXT2_LINK_MAX)) {
@@ -236,11 +313,18 @@ void e2fsck_pass4(e2fsck_t ctx)
                        }
                        link_counted = 1;
                }
+               if (link_counted != link_count)
+                       check_link_ea(ctx, i, &last_ino, inode, &link_counted);
+
                if (link_counted != link_count) {
                        int fix_nlink = 0;
 
-                       e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
-                                              inode_size, "pass4");
+                       if (last_ino != i) {
+                               e2fsck_read_inode_full(ctx, i,
+                                                      EXT2_INODE(inode),
+                                                      inode_size, "pass4");
+                               last_ino = i;
+                       }
                        pctx.ino = i;
                        pctx.inode = EXT2_INODE(inode);
                        if ((link_count != inode->i_links_count) && !isdir &&
index 53fc1b1..b7fd692 100644 (file)
@@ -1318,6 +1318,8 @@ extern errcode_t ext2fs_adjust_ea_refcount3(ext2_filsys fs, blk64_t blk,
                                           ext2_ino_t inum);
 errcode_t ext2fs_xattrs_write(struct ext2_xattr_handle *handle);
 errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle);
+errcode_t ext2fs_xattrs_read_inode(struct ext2_xattr_handle *handle,
+                                  struct ext2_inode_large *inode);
 errcode_t ext2fs_xattrs_iterate(struct ext2_xattr_handle *h,
                                int (*func)(char *name, char *value,
                                            size_t value_len,
index c718ef3..00b3bbf 100644 (file)
@@ -1024,9 +1024,11 @@ static void xattrs_free_keys(struct ext2_xattr_handle *h)
        h->ibody_count = 0;
 }
 
-errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle)
+/* fetch xattrs from an already-loaded inode */
+errcode_t ext2fs_xattrs_read_inode(struct ext2_xattr_handle *handle,
+                                  struct ext2_inode_large *inode)
 {
-       struct ext2_inode_large *inode;
+
        struct ext2_ext_attr_header *header;
        __u32 ea_inode_magic;
        unsigned int storage_size;
@@ -1036,18 +1038,6 @@ errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle)
        errcode_t err;
 
        EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EA_HANDLE);
-       i = EXT2_INODE_SIZE(handle->fs->super);
-       if (i < sizeof(*inode))
-               i = sizeof(*inode);
-       err = ext2fs_get_memzero(i, &inode);
-       if (err)
-               return err;
-
-       err = ext2fs_read_inode_full(handle->fs, handle->ino,
-                                    (struct ext2_inode *)inode,
-                                    EXT2_INODE_SIZE(handle->fs->super));
-       if (err)
-               goto out;
 
        xattrs_free_keys(handle);
 
@@ -1083,7 +1073,7 @@ errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle)
 
 read_ea_block:
        /* Look for EA in a separate EA block */
-       blk = ext2fs_file_acl_block(handle->fs, (struct ext2_inode *)inode);
+       blk = ext2fs_file_acl_block(handle->fs, EXT2_INODE(inode));
        if (blk != 0) {
                if ((blk < handle->fs->super->s_first_data_block) ||
                    (blk >= ext2fs_blocks_count(handle->fs->super))) {
@@ -1114,20 +1104,39 @@ read_ea_block:
                err = read_xattrs_from_buffer(handle, inode,
                                        (struct ext2_ext_attr_entry *) start,
                                        storage_size, block_buf);
-               if (err)
-                       goto out3;
+       }
 
+out3:
+       if (block_buf)
                ext2fs_free_mem(&block_buf);
-       }
+out:
+       return err;
+}
 
-       ext2fs_free_mem(&block_buf);
-       ext2fs_free_mem(&inode);
-       return 0;
+errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle)
+{
+       struct ext2_inode_large *inode;
+       size_t inode_size = EXT2_INODE_SIZE(handle->fs->super);
+       errcode_t err;
+
+       EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EA_HANDLE);
+
+       if (inode_size < sizeof(*inode))
+               inode_size = sizeof(*inode);
+       err = ext2fs_get_memzero(inode_size, &inode);
+       if (err)
+               return err;
+
+       err = ext2fs_read_inode_full(handle->fs, handle->ino, EXT2_INODE(inode),
+                                    EXT2_INODE_SIZE(handle->fs->super));
+       if (err)
+               goto out;
+
+       err = ext2fs_xattrs_read_inode(handle, inode);
 
-out3:
-       ext2fs_free_mem(&block_buf);
 out:
        ext2fs_free_mem(&inode);
+
        return err;
 }
 
diff --git a/tests/f_trusted_link/expect.1 b/tests/f_trusted_link/expect.1
new file mode 100644 (file)
index 0000000..1063d22
--- /dev/null
@@ -0,0 +1,16 @@
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Inode 15 ref count is 7, should be 6.  Fix? yes
+
+Inode 16 ref count is 3, should be 6.  Fix? yes
+
+Unattached inode 18
+Connect to /lost+found? yes
+
+Pass 5: Checking group summary information
+
+test_filesys: ***** FILE SYSTEM WAS MODIFIED *****
+test_filesys: 18/4096 files (0.0% non-contiguous), 6424/16384 blocks
+Exit status is 1
diff --git a/tests/f_trusted_link/expect.2 b/tests/f_trusted_link/expect.2
new file mode 100644 (file)
index 0000000..a8ff12a
--- /dev/null
@@ -0,0 +1,7 @@
+Pass 1: Checking inodes, blocks, and sizes
+Pass 2: Checking directory structure
+Pass 3: Checking directory connectivity
+Pass 4: Checking reference counts
+Pass 5: Checking group summary information
+test_filesys: 18/4096 files (0.0% non-contiguous), 6424/16384 blocks
+Exit status is 0
diff --git a/tests/f_trusted_link/image.gz b/tests/f_trusted_link/image.gz
new file mode 100644 (file)
index 0000000..8e25873
Binary files /dev/null and b/tests/f_trusted_link/image.gz differ
diff --git a/tests/f_trusted_link/name b/tests/f_trusted_link/name
new file mode 100644 (file)
index 0000000..a6e424b
--- /dev/null
@@ -0,0 +1 @@
+inode nlink according to trusted.link xattr