Whamcloud - gitweb
Merge branch 'maint' into next
authorTheodore Ts'o <tytso@mit.edu>
Thu, 30 Dec 2021 15:59:52 +0000 (10:59 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Thu, 30 Dec 2021 15:59:52 +0000 (10:59 -0500)
26 files changed:
debugfs/set_fields.c
e2fsck/e2fsck.h
e2fsck/pass1.c
e2fsck/pass2.c
e2fsck/pass4.c
e2fsck/problem.c
e2fsck/problem.h
e2fsck/super.c
e2fsck/unix.c
lib/e2p/feature.c
lib/e2p/ls.c
lib/ext2fs/Makefile.in
lib/ext2fs/ext2_fs.h
lib/ext2fs/ext2fs.h
lib/ext2fs/ext_attr.c
lib/ext2fs/orphan.c [new file with mode: 0644]
lib/ext2fs/swapfs.c
lib/ext2fs/tst_super_size.c
lib/support/mkquota.c
misc/e2image.c
misc/mke2fs.8.in
misc/mke2fs.c
misc/tune2fs.8.in
misc/tune2fs.c
tests/m_assume_storage_prezeroed/expect [new file with mode: 0644]
tests/m_assume_storage_prezeroed/script [new file with mode: 0644]

index b001579..f916dea 100644 (file)
@@ -183,6 +183,7 @@ static struct field_set_info super_fields[] = {
        { "lpf_ino", &set_sb.s_lpf_ino, NULL, 4, parse_uint },
        { "checksum_seed", &set_sb.s_checksum_seed, NULL, 4, parse_uint },
        { "encoding", &set_sb.s_encoding, NULL, 2, parse_encoding },
+       { "orphan_file_inum", &set_sb.s_orphan_file_inum, NULL, 4, parse_uint },
        { 0, 0, 0, 0 }
 };
 
index 00b2091..2db216f 100644 (file)
@@ -656,6 +656,7 @@ void sigcatcher_setup(void);
 void check_super_block(e2fsck_t ctx);
 int check_backup_super_block(e2fsck_t ctx);
 void check_resize_inode(e2fsck_t ctx);
+int check_init_orphan_file(e2fsck_t ctx);
 
 /* util.c */
 extern void *e2fsck_allocate_memory(e2fsck_t ctx, unsigned long size,
index dde862a..26b9ab7 100644 (file)
@@ -911,6 +911,7 @@ static void reserve_block_for_lnf_repair(e2fsck_t ctx)
 }
 
 static errcode_t get_inline_data_ea_size(ext2_filsys fs, ext2_ino_t ino,
+                                        struct ext2_inode *inode,
                                         size_t *sz)
 {
        void *p;
@@ -921,7 +922,8 @@ static errcode_t get_inline_data_ea_size(ext2_filsys fs, ext2_ino_t ino,
        if (retval)
                return retval;
 
-       retval = ext2fs_xattrs_read(handle);
+       retval = ext2fs_xattrs_read_inode(handle,
+                                         (struct ext2_inode_large *)inode);
        if (retval)
                goto err;
 
@@ -1508,7 +1510,8 @@ void e2fsck_pass1(e2fsck_t ctx)
                    (ino >= EXT2_FIRST_INODE(fs->super))) {
                        size_t size = 0;
 
-                       pctx.errcode = get_inline_data_ea_size(fs, ino, &size);
+                       pctx.errcode = get_inline_data_ea_size(fs, ino, inode,
+                                                              &size);
                        if (!pctx.errcode &&
                            fix_problem(ctx, PR_1_INLINE_DATA_FEATURE, &pctx)) {
                                ext2fs_set_feature_inline_data(sb);
@@ -1531,7 +1534,7 @@ void e2fsck_pass1(e2fsck_t ctx)
                        flags = fs->flags;
                        if (failed_csum)
                                fs->flags |= EXT2_FLAG_IGNORE_CSUM_ERRORS;
-                       err = get_inline_data_ea_size(fs, ino, &size);
+                       err = get_inline_data_ea_size(fs, ino, inode, &size);
                        fs->flags = (flags & EXT2_FLAG_IGNORE_CSUM_ERRORS) |
                                    (fs->flags & ~EXT2_FLAG_IGNORE_CSUM_ERRORS);
 
@@ -1778,6 +1781,32 @@ void e2fsck_pass1(e2fsck_t ctx)
                                                        inode_size, "pass1");
                                failed_csum = 0;
                        }
+               } else if (ino == fs->super->s_orphan_file_inum) {
+                       ext2fs_mark_inode_bitmap2(ctx->inode_used_map, ino);
+                       if (ext2fs_has_feature_orphan_file(fs->super)) {
+                               if (!LINUX_S_ISREG(inode->i_mode) &&
+                                   fix_problem(ctx, PR_1_ORPHAN_FILE_BAD_MODE,
+                                               &pctx)) {
+                                       inode->i_mode = LINUX_S_IFREG;
+                                       e2fsck_write_inode(ctx, ino, inode,
+                                                          "pass1");
+                                       failed_csum = 0;
+                               }
+                               check_blocks(ctx, &pctx, block_buf, NULL);
+                               FINISH_INODE_LOOP(ctx, ino, &pctx, failed_csum);
+                               continue;
+                       }
+                       if ((inode->i_links_count ||
+                            inode->i_blocks || inode->i_block[0]) &&
+                           fix_problem(ctx, PR_1_ORPHAN_FILE_NOT_CLEAR,
+                                       &pctx)) {
+                               memset(inode, 0, inode_size);
+                               ext2fs_icount_store(ctx->inode_link_info, ino,
+                                                   0);
+                               e2fsck_write_inode_full(ctx, ino, inode,
+                                                       inode_size, "pass1");
+                               failed_csum = 0;
+                       }
                } else if (ino < EXT2_FIRST_INODE(fs->super)) {
                        problem_t problem = 0;
 
@@ -3482,6 +3511,7 @@ static void check_blocks(e2fsck_t ctx, struct problem_context *pctx,
        }
 
        if (ino != quota_type2inum(PRJQUOTA, fs->super) &&
+           ino != fs->super->s_orphan_file_inum &&
            (ino == EXT2_ROOT_INO || ino >= EXT2_FIRST_INODE(ctx->fs->super)) &&
            !(inode->i_flags & EXT4_EA_INODE_FL)) {
                quota_data_add(ctx->qctx, (struct ext2_inode_large *) inode,
index 3b473af..a3a8e1c 100644 (file)
@@ -1444,7 +1444,8 @@ skip_checksum:
                    (dirent->inode > fs->super->s_inodes_count) ||
                    (dirent->inode == fs->super->s_usr_quota_inum) ||
                    (dirent->inode == fs->super->s_grp_quota_inum) ||
-                   (dirent->inode == fs->super->s_prj_quota_inum)) {
+                   (dirent->inode == fs->super->s_prj_quota_inum) ||
+                   (dirent->inode == fs->super->s_orphan_file_inum)) {
                        problem = PR_2_BAD_INO;
                } else if (ctx->inode_bb_map &&
                           (ext2fs_test_inode_bitmap2(ctx->inode_bb_map,
index 8c2d2f1..d2dda02 100644 (file)
@@ -26,7 +26,7 @@
  * This subroutine returns 1 then the caller shouldn't bother with the
  * rest of the pass 4 tests.
  */
-static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
+static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i, ext2_ino_t *last_ino,
                            struct ext2_inode_large *inode)
 {
        ext2_filsys fs = ctx->fs;
@@ -34,9 +34,12 @@ static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
        __u32 eamagic = 0;
        int extra_size = 0;
 
-       e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
-                              EXT2_INODE_SIZE(fs->super),
-                              "pass4: disconnect_inode");
+       if (*last_ino != i) {
+               e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
+                                      EXT2_INODE_SIZE(fs->super),
+                                      "pass4: disconnect_inode");
+               *last_ino = i;
+       }
        if (EXT2_INODE_SIZE(fs->super) > EXT2_GOOD_OLD_INODE_SIZE)
                extra_size = inode->i_extra_isize;
 
@@ -75,6 +78,7 @@ static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
        if (fix_problem(ctx, PR_4_UNATTACHED_INODE, &pctx)) {
                if (e2fsck_reconnect_file(ctx, i))
                        ext2fs_unmark_valid(fs);
+               *last_ino = 0;
        } else {
                /*
                 * If we don't attach the inode, then skip the
@@ -87,20 +91,22 @@ static int disconnect_inode(e2fsck_t ctx, ext2_ino_t i,
        return 0;
 }
 
-static void check_ea_inode(e2fsck_t ctx, ext2_ino_t i,
+/*
+ * This function is called when link_counted is zero. So this may not be
+ * an xattr inode at all. Return immediately if EA_INODE flag is not set.
+ */
+static void check_ea_inode(e2fsck_t ctx, ext2_ino_t i, ext2_ino_t *last_ino,
                           struct ext2_inode_large *inode, __u16 *link_counted)
 {
        __u64 actual_refs = 0;
        __u64 ref_count;
 
-       /*
-        * This function is called when link_counted is zero. So this may not
-        * be an xattr inode at all. Return immediately if EA_INODE flag is not
-        * set.
-        */
-       e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
-                              EXT2_INODE_SIZE(ctx->fs->super),
-                              "pass4: check_ea_inode");
+       if (*last_ino != i) {
+               e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
+                                      EXT2_INODE_SIZE(ctx->fs->super),
+                                      "pass4: check_ea_inode");
+               *last_ino = i;
+       }
        if (!(inode->i_flags & EXT4_EA_INODE_FL))
                return;
 
@@ -180,7 +186,8 @@ void e2fsck_pass4(e2fsck_t ctx)
        inode = e2fsck_allocate_memory(ctx, inode_size, "scratch inode");
 
        /* Protect loop from wrap-around if s_inodes_count maxed */
-       for (i=1; i <= fs->super->s_inodes_count && i > 0; i++) {
+       for (i = 1; i <= fs->super->s_inodes_count && i > 0; i++) {
+               ext2_ino_t last_ino = 0;
                int isdir;
 
                if (ctx->flags & E2F_FLAG_SIGNAL_MASK)
@@ -192,7 +199,7 @@ void e2fsck_pass4(e2fsck_t ctx)
                                        goto errout;
                }
                if (i == quota_type2inum(PRJQUOTA, ctx->fs->super) ||
-                   i == EXT2_BAD_INO ||
+                   i == fs->super->s_orphan_file_inum || i == EXT2_BAD_INO ||
                    (i > EXT2_ROOT_INO && i < EXT2_FIRST_INODE(fs->super)))
                        continue;
                if (!(ext2fs_test_inode_bitmap2(ctx->inode_used_map, i)) ||
@@ -210,7 +217,7 @@ void e2fsck_pass4(e2fsck_t ctx)
                         * check_ea_inode() will update link_counted if
                         * necessary.
                         */
-                       check_ea_inode(ctx, i, inode, &link_counted);
+                       check_ea_inode(ctx, i, &last_ino, inode, &link_counted);
                }
 
                if (link_counted == 0) {
@@ -219,7 +226,7 @@ void e2fsck_pass4(e2fsck_t ctx)
                                     fs->blocksize, "bad_inode buffer");
                        if (e2fsck_process_bad_inode(ctx, 0, i, buf))
                                continue;
-                       if (disconnect_inode(ctx, i, inode))
+                       if (disconnect_inode(ctx, i, &last_ino, inode))
                                continue;
                        ext2fs_icount_fetch(ctx->inode_link_info, i,
                                            &link_count);
@@ -239,8 +246,12 @@ void e2fsck_pass4(e2fsck_t ctx)
                if (link_counted != link_count) {
                        int fix_nlink = 0;
 
-                       e2fsck_read_inode_full(ctx, i, EXT2_INODE(inode),
-                                              inode_size, "pass4");
+                       if (last_ino != i) {
+                               e2fsck_read_inode_full(ctx, i,
+                                                      EXT2_INODE(inode),
+                                                      inode_size, "pass4");
+                               last_ino = i;
+                       }
                        pctx.ino = i;
                        pctx.inode = EXT2_INODE(inode);
                        if ((link_count != inode->i_links_count) && !isdir &&
index 757b5d5..f454dcb 100644 (file)
@@ -526,6 +526,26 @@ static struct e2fsck_problem problem_table[] = {
             "not compatible. Resize @i should be disabled.  "),
          PROMPT_FIX, 0, 0, 0, 0 },
 
+       /* Orphan file contains holes */
+       { PR_0_ORPHAN_FILE_HOLE,
+         N_("Orphan file (@i %i) contains hole at @b %b. Terminating orphan file recovery.\n"),
+         PROMPT_NONE, 0 },
+
+       /* Orphan file block has wrong magic */
+       { PR_0_ORPHAN_FILE_BAD_MAGIC,
+         N_("Orphan file (@i %i) @b %b contains wrong magic. Terminating orphan file recovery.\n"),
+         PROMPT_NONE, 0 },
+
+       /* Orphan file block has wrong checksum */
+       { PR_0_ORPHAN_FILE_BAD_CHECKSUM,
+         N_("Orphan file (@i %i) @b %b contains wrong checksum. Terminating orphan file recovery.\n"),
+         PROMPT_NONE, 0 },
+
+       /* Orphan file size isn't multiple of blocks size */
+       { PR_0_ORPHAN_FILE_WRONG_SIZE,
+         N_("Orphan file (@i %i) size is not multiple of block size. Terminating orphan file recovery.\n"),
+         PROMPT_NONE, 0 },
+
        /* Pass 1 errors */
 
        /* Pass 1: Checking inodes, blocks, and sizes */
@@ -1279,6 +1299,15 @@ static struct e2fsck_problem problem_table[] = {
          N_("@h %i uses SipHash, but should not.  "),
          PROMPT_CLEAR_HTREE, PR_PREEN_OK, 0, 0, 0 },
 
+       /* Orphan file has bad mode */
+       { PR_1_ORPHAN_FILE_BAD_MODE,
+         N_("Orphan file @i %i is not regular file.  "),
+         PROMPT_CLEAR, PR_PREEN_OK },
+
+       /* Orphan file inode is not in use, but contains data */
+       { PR_1_ORPHAN_FILE_NOT_CLEAR,
+         N_("Orphan file @i %i is not in use, but contains data.  "),
+         PROMPT_CLEAR, PR_PREEN_OK },
 
        /* Pass 1b errors */
 
@@ -2259,6 +2288,56 @@ static struct e2fsck_problem problem_table[] = {
          N_("Error writing quota info for quota type %N: %m\n"),
          PROMPT_NULL, 0, 0, 0, 0 },
 
+       /* Orphan file without a journal */
+       { PR_6_ORPHAN_FILE_WITHOUT_JOURNAL,
+         N_("@S has orphan file without @j.\n"),
+         PROMPT_CLEAR, PR_PREEN_OK },
+
+       /* Orphan file truncation failed */
+       { PR_6_ORPHAN_FILE_TRUNC_FAILED,
+         N_("Failed to truncate orphan file.\n"),
+         PROMPT_NONE, 0 },
+
+       /* Failed to initialize orphan file */
+       { PR_6_ORPHAN_FILE_CORRUPTED,
+         N_("Failed to initialize orphan file.\n"),
+         PROMPT_RECREATE, PR_PREEN_OK },
+
+       /* Cannot fix corrupted orphan file with invalid bitmaps */
+       { PR_6_ORPHAN_FILE_BITMAP_INVALID,
+         N_("Cannot fix corrupted orphan file with invalid bitmaps.\n"),
+         PROMPT_NONE, 0 },
+
+       /* Orphan file creation failed */
+       { PR_6_ORPHAN_FILE_CREATE_FAILED,
+         N_("Failed to truncate orphan file (@i %i).\n"),
+         PROMPT_NONE, 0 },
+
+       /* Orphan file block contains data */
+       { PR_6_ORPHAN_BLOCK_DIRTY,
+         N_("Orphan file (@i %i) @b %b is not clean.\n"),
+         PROMPT_CLEAR, PR_PREEN_OK },
+
+       /* orphan_present set but orphan file is empty */
+       { PR_6_ORPHAN_PRESENT_CLEAN_FILE,
+         N_("Feature orphan_present is set but orphan file is clean.\n"),
+         PROMPT_CLEAR, PR_PREEN_OK },
+
+       /* orphan_present set but orphan_file is not */
+       { PR_6_ORPHAN_PRESENT_NO_FILE,
+         N_("Feature orphan_present is set but feature orphan_file is not.\n"),
+         PROMPT_CLEAR, PR_PREEN_OK },
+
+       /* Orphan file size isn't multiple of blocks size */
+       { PR_6_ORPHAN_FILE_WRONG_SIZE,
+         N_("Orphan file (@i %i) size is not multiple of block size.\n"),
+         PROMPT_NONE, 0 },
+
+       /* Orphan file contains holes */
+       { PR_6_ORPHAN_FILE_HOLE,
+         N_("Orphan file (@i %i) contains hole at @b %b.\n"),
+         PROMPT_NONE, 0 },
+
        { 0 }
 };
 
index 24cdcf9..530aea6 100644 (file)
@@ -288,6 +288,18 @@ struct problem_context {
 /* Meta_bg and resize_inode are not compatible, remove resize_inode*/
 #define PR_0_DISABLE_RESIZE_INODE              0x000051
 
+/* Orphan file contains holes */
+#define PR_0_ORPHAN_FILE_HOLE                  0x000052
+
+/* Orphan file block has wrong magic */
+#define PR_0_ORPHAN_FILE_BAD_MAGIC             0x000053
+
+/* Orphan file block has wrong checksum */
+#define PR_0_ORPHAN_FILE_BAD_CHECKSUM          0x000054
+
+/* Orphan file size isn't multiple of blocks size */
+#define PR_0_ORPHAN_FILE_WRONG_SIZE            0x000055
+
 /*
  * Pass 1 errors
  */
@@ -716,6 +728,11 @@ struct problem_context {
 /* Htree directory uses SipHash but should not */
 #define PR_1_HTREE_CANNOT_SIPHASH              0x01008E
 
+/* Orphan file inode is not a regular file */
+#define PR_1_ORPHAN_FILE_BAD_MODE              0x01008F
+
+/* Orphan file inode is not in use, but contains data */
+#define PR_1_ORPHAN_FILE_NOT_CLEAR             0x010090
 
 /*
  * Pass 1b errors
@@ -1293,6 +1310,35 @@ struct problem_context {
 /* Error updating quota information */
 #define PR_6_WRITE_QUOTAS              0x060006
 
+/* Orphan file without a journal */
+#define PR_6_ORPHAN_FILE_WITHOUT_JOURNAL       0x060007
+
+/* Orphan file truncation failed */
+#define PR_6_ORPHAN_FILE_TRUNC_FAILED  0x060008
+
+/* Failed to initialize orphan file */
+#define PR_6_ORPHAN_FILE_CORRUPTED     0x060009
+
+/* Cannot fix corrupted orphan file with invalid bitmaps */
+#define PR_6_ORPHAN_FILE_BITMAP_INVALID        0x06000A
+
+/* Orphan file creation failed */
+#define PR_6_ORPHAN_FILE_CREATE_FAILED 0x06000B
+
+/* Orphan file block contains data */
+#define PR_6_ORPHAN_BLOCK_DIRTY                0x06000C
+
+/* orphan_present set but orphan file is empty */
+#define PR_6_ORPHAN_PRESENT_CLEAN_FILE 0x06000D
+
+/* orphan_present set but orphan_file is not */
+#define PR_6_ORPHAN_PRESENT_NO_FILE    0x06000E
+
+/* Orphan file size isn't multiple of blocks size */
+#define PR_6_ORPHAN_FILE_WRONG_SIZE    0x06000F
+
+/* Orphan file contains holes */
+#define PR_6_ORPHAN_FILE_HOLE          0x060010
 
 /*
  * Function declarations
index 123813b..9495e02 100644 (file)
@@ -314,6 +314,180 @@ static errcode_t e2fsck_write_all_quotas(e2fsck_t ctx)
        return pctx.errcode;
 }
 
+static int release_orphan_inode(e2fsck_t ctx, ext2_ino_t *ino, char *block_buf)
+{
+       ext2_filsys fs = ctx->fs;
+       struct problem_context pctx;
+       struct ext2_inode_large inode;
+       ext2_ino_t next_ino;
+
+       e2fsck_read_inode_full(ctx, *ino, EXT2_INODE(&inode),
+                               sizeof(inode), "release_orphan_inode");
+       clear_problem_context(&pctx);
+       pctx.ino = *ino;
+       pctx.inode = EXT2_INODE(&inode);
+       pctx.str = inode.i_links_count ? _("Truncating") : _("Clearing");
+
+       fix_problem(ctx, PR_0_ORPHAN_CLEAR_INODE, &pctx);
+
+       next_ino = inode.i_dtime;
+       if (next_ino &&
+           ((next_ino < EXT2_FIRST_INODE(fs->super)) ||
+            (next_ino > fs->super->s_inodes_count))) {
+               pctx.ino = next_ino;
+               fix_problem(ctx, PR_0_ORPHAN_ILLEGAL_INODE, &pctx);
+               return 1;
+       }
+
+       if (release_inode_blocks(ctx, *ino, &inode, block_buf, &pctx))
+               return 1;
+
+       if (!inode.i_links_count) {
+               if (ctx->qctx)
+                       quota_data_inodes(ctx->qctx, &inode, *ino, -1);
+               ext2fs_inode_alloc_stats2(fs, *ino, -1,
+                                         LINUX_S_ISDIR(inode.i_mode));
+               ctx->free_inodes++;
+               inode.i_dtime = ctx->now;
+       } else {
+               inode.i_dtime = 0;
+       }
+       e2fsck_write_inode_full(ctx, *ino, EXT2_INODE(&inode),
+                               sizeof(inode), "delete_file");
+       *ino = next_ino;
+       return 0;
+}
+
+struct process_orphan_block_data {
+       e2fsck_t        ctx;
+       char            *buf;
+       char            *block_buf;
+       e2_blkcnt_t     blocks;
+       int             abort;
+       int             clear;
+       errcode_t       errcode;
+       ext2_ino_t      ino;
+       __u32           generation;
+};
+
+static int process_orphan_block(ext2_filsys fs,
+                              blk64_t  *block_nr,
+                              e2_blkcnt_t blockcnt,
+                              blk64_t  ref_blk EXT2FS_ATTR((unused)),
+                              int      ref_offset EXT2FS_ATTR((unused)),
+                              void *priv_data)
+{
+       struct process_orphan_block_data *pd;
+       e2fsck_t                ctx;
+       struct problem_context  pctx;
+       blk64_t                 blk = *block_nr;
+       struct ext4_orphan_block_tail *tail;
+       int                     j;
+       int                     inodes_per_ob;
+       __u32                   *bdata;
+       ext2_ino_t              ino;
+
+       pd = priv_data;
+       ctx = pd->ctx;
+       clear_problem_context(&pctx);
+       pctx.ino = fs->super->s_orphan_file_inum;
+       pctx.blk = blockcnt;
+
+       /* Orphan file must not have holes */
+       if (!blk) {
+               if (blockcnt == pd->blocks)
+                       return BLOCK_ABORT;
+               fix_problem(ctx, PR_0_ORPHAN_FILE_HOLE, &pctx);
+return_abort:
+               pd->abort = 1;
+               return BLOCK_ABORT;
+       }
+       inodes_per_ob = ext2fs_inodes_per_orphan_block(fs);
+       pd->errcode = io_channel_read_blk64(fs->io, blk, 1, pd->buf);
+       if (pd->errcode)
+               goto return_abort;
+       tail = ext2fs_orphan_block_tail(fs, pd->buf);
+       if (ext2fs_le32_to_cpu(tail->ob_magic) !=
+           EXT4_ORPHAN_BLOCK_MAGIC) {
+               fix_problem(ctx, PR_0_ORPHAN_FILE_BAD_MAGIC, &pctx);
+               goto return_abort;
+       }
+       if (!ext2fs_orphan_file_block_csum_verify(fs,
+                       fs->super->s_orphan_file_inum, blk, pd->buf)) {
+               fix_problem(ctx, PR_0_ORPHAN_FILE_BAD_CHECKSUM, &pctx);
+               goto return_abort;
+       }
+       bdata = (__u32 *)pd->buf;
+       for (j = 0; j < inodes_per_ob; j++) {
+               if (!bdata[j])
+                       continue;
+               ino = ext2fs_le32_to_cpu(bdata[j]);
+               if (release_orphan_inode(ctx, &ino, pd->block_buf))
+                       goto return_abort;
+       }
+       return 0;
+}
+
+static int process_orphan_file(e2fsck_t ctx, char *block_buf)
+{
+       ext2_filsys fs = ctx->fs;
+       char *orphan_buf;
+       struct process_orphan_block_data pd;
+       int ret = 0;
+       ext2_ino_t orphan_inum = fs->super->s_orphan_file_inum;
+       struct ext2_inode orphan_inode;
+       struct problem_context  pctx;
+       errcode_t retval;
+
+       if (!ext2fs_has_feature_orphan_file(fs->super))
+               return 0;
+
+       clear_problem_context(&pctx);
+       pctx.ino = orphan_inum;
+
+       orphan_buf = (char *) e2fsck_allocate_memory(ctx, fs->blocksize * 4,
+                                                   "orphan block buffer");
+       retval = ext2fs_read_inode(fs, orphan_inum, &orphan_inode);
+       if (retval < 0) {
+               com_err("process_orphan_file", retval,
+                       _("while reading inode %d"), orphan_inum);
+               ret = 1;
+               goto out;
+       }
+       if (EXT2_I_SIZE(&orphan_inode) & (fs->blocksize - 1)) {
+               fix_problem(ctx, PR_0_ORPHAN_FILE_WRONG_SIZE, &pctx);
+               ret = 1;
+               goto out;
+       }
+       pd.buf = orphan_buf + 3 * fs->blocksize;
+       pd.block_buf = block_buf;
+       pd.blocks = EXT2_I_SIZE(&orphan_inode) / fs->blocksize;
+       pd.ctx = ctx;
+       pd.abort = 0;
+       pd.errcode = 0;
+       retval = ext2fs_block_iterate3(fs, orphan_inum,
+                                      BLOCK_FLAG_DATA_ONLY | BLOCK_FLAG_HOLE,
+                                      orphan_buf, process_orphan_block, &pd);
+       if (retval) {
+               com_err("process_orphan_block", retval,
+                       _("while calling ext2fs_block_iterate for inode %d"),
+                       orphan_inum);
+               ret = 1;
+               goto out;
+       }
+       if (pd.abort) {
+               if (pd.errcode) {
+                       com_err("process_orphan_block", pd.errcode,
+                               _("while reading blocks of inode %d"),
+                               orphan_inum);
+               }
+               ret = 1;
+       }
+out:
+       ext2fs_free_mem(&orphan_buf);
+       return ret;
+}
+
 /*
  * This function releases all of the orphan inodes.  It returns 1 if
  * it hit some error, and 0 on success.
@@ -321,15 +495,17 @@ static errcode_t e2fsck_write_all_quotas(e2fsck_t ctx)
 static int release_orphan_inodes(e2fsck_t ctx)
 {
        ext2_filsys fs = ctx->fs;
-       ext2_ino_t      ino, next_ino;
-       struct ext2_inode_large inode;
+       ext2_ino_t ino;
        struct problem_context pctx;
        char *block_buf;
 
-       if ((ino = fs->super->s_last_orphan) == 0)
+       if (fs->super->s_last_orphan == 0 &&
+           !ext2fs_has_feature_orphan_present(fs->super))
                return 0;
 
        clear_problem_context(&pctx);
+       ino = fs->super->s_last_orphan;
+       pctx.ino = ino;
        pctx.errcode = e2fsck_read_all_quotas(ctx);
        if (pctx.errcode) {
                fix_problem(ctx, PR_0_QUOTA_INIT_CTX, &pctx);
@@ -344,9 +520,10 @@ static int release_orphan_inodes(e2fsck_t ctx)
        ext2fs_mark_super_dirty(fs);
 
        /*
-        * If the filesystem contains errors, don't run the orphan
-        * list, since the orphan list can't be trusted; and we're
-        * going to be running a full e2fsck run anyway...
+        * If the filesystem contains errors, don't process the orphan list
+        * or orphan file, since neither can be trusted; and we're going to
+        * be running a full e2fsck run anyway... We clear orphan file contents
+        * after filesystem is checked to avoid clearing someone else's data.
         */
        if (fs->super->s_state & EXT2_ERROR_FS) {
                if (ctx->qctx)
@@ -354,10 +531,8 @@ static int release_orphan_inodes(e2fsck_t ctx)
                return 0;
        }
 
-       if ((ino < EXT2_FIRST_INODE(fs->super)) ||
-           (ino > fs->super->s_inodes_count)) {
-               clear_problem_context(&pctx);
-               pctx.ino = ino;
+       if (ino && ((ino < EXT2_FIRST_INODE(fs->super)) ||
+           (ino > fs->super->s_inodes_count))) {
                fix_problem(ctx, PR_0_ORPHAN_ILLEGAL_HEAD_INODE, &pctx);
                goto err_qctx;
        }
@@ -366,43 +541,19 @@ static int release_orphan_inodes(e2fsck_t ctx)
                                                    "block iterate buffer");
        e2fsck_read_bitmaps(ctx);
 
+       /* First process orphan list */
        while (ino) {
-               e2fsck_read_inode_full(ctx, ino, EXT2_INODE(&inode),
-                               sizeof(inode), "release_orphan_inodes");
-               clear_problem_context(&pctx);
-               pctx.ino = ino;
-               pctx.inode = EXT2_INODE(&inode);
-               pctx.str = inode.i_links_count ? _("Truncating") :
-                       _("Clearing");
-
-               fix_problem(ctx, PR_0_ORPHAN_CLEAR_INODE, &pctx);
-
-               next_ino = inode.i_dtime;
-               if (next_ino &&
-                   ((next_ino < EXT2_FIRST_INODE(fs->super)) ||
-                    (next_ino > fs->super->s_inodes_count))) {
-                       pctx.ino = next_ino;
-                       fix_problem(ctx, PR_0_ORPHAN_ILLEGAL_INODE, &pctx);
+               if (release_orphan_inode(ctx, &ino, block_buf))
                        goto err_buf;
-               }
+       }
 
-               if (release_inode_blocks(ctx, ino, &inode, block_buf, &pctx))
-                       goto err_buf;
+       /* Next process orphan file */
+       if (ext2fs_has_feature_orphan_present(fs->super) &&
+           !ext2fs_has_feature_orphan_file(fs->super))
+               goto err_buf;
+       if (process_orphan_file(ctx, block_buf))
+               goto err_buf;
 
-               if (!inode.i_links_count) {
-                       if (ctx->qctx)
-                               quota_data_inodes(ctx->qctx, &inode, ino, -1);
-                       ext2fs_inode_alloc_stats2(fs, ino, -1,
-                                                 LINUX_S_ISDIR(inode.i_mode));
-                       ctx->free_inodes++;
-                       inode.i_dtime = ctx->now;
-               } else {
-                       inode.i_dtime = 0;
-               }
-               e2fsck_write_inode_full(ctx, ino, EXT2_INODE(&inode),
-                               sizeof(inode), "delete_file");
-               ino = next_ino;
-       }
        ext2fs_free_mem(&block_buf);
        pctx.errcode = e2fsck_write_all_quotas(ctx);
        if (pctx.errcode)
@@ -417,6 +568,134 @@ err:
        return 1;
 }
 
+static int reinit_orphan_block(ext2_filsys fs,
+                              blk64_t  *block_nr,
+                              e2_blkcnt_t blockcnt,
+                              blk64_t  ref_blk EXT2FS_ATTR((unused)),
+                              int      ref_offset EXT2FS_ATTR((unused)),
+                              void *priv_data)
+{
+       struct process_orphan_block_data *pd;
+       e2fsck_t                ctx;
+       blk64_t                 blk = *block_nr;
+       struct problem_context  pctx;
+
+       pd = priv_data;
+       ctx = pd->ctx;
+
+       /* Orphan file must not have holes */
+       if (!blk) {
+               if (blockcnt == pd->blocks)
+                       return BLOCK_ABORT;
+
+               clear_problem_context(&pctx);
+               pctx.ino = fs->super->s_orphan_file_inum;
+               pctx.blk = blockcnt;
+               fix_problem(ctx, PR_6_ORPHAN_FILE_HOLE, &pctx);
+return_abort:
+               pd->abort = 1;
+               return BLOCK_ABORT;
+       }
+
+       if (ext2fs_has_feature_metadata_csum(fs->super)) {
+               struct ext4_orphan_block_tail *tail;
+
+               tail = ext2fs_orphan_block_tail(fs, pd->buf);
+               /*
+                * Update checksum to match expected buffer contents with
+                * appropriate block number.
+                */
+               tail->ob_checksum = ext2fs_do_orphan_file_block_csum(fs,
+                               pd->ino, pd->generation, blk, pd->buf);
+       }
+       if (!pd->clear) {
+               pd->errcode = io_channel_read_blk64(fs->io, blk, 1,
+                                                   pd->block_buf);
+               /* Block is already cleanly initialized? */
+               if (!memcmp(pd->block_buf, pd->buf, fs->blocksize))
+                       return 0;
+
+               clear_problem_context(&pctx);
+               pctx.ino = fs->super->s_orphan_file_inum;
+               pctx.blk = blockcnt;
+               if (!fix_problem(ctx, PR_6_ORPHAN_BLOCK_DIRTY, &pctx))
+                       goto return_abort;
+               pd->clear = 1;
+       }
+       pd->errcode = io_channel_write_blk64(fs->io, blk, 1, pd->buf);
+       if (pd->errcode)
+               goto return_abort;
+       return 0;
+}
+
+/*
+ * Check and clear orphan file. We just return non-zero if we hit some
+ * inconsistency. Caller will truncate & recreate new orphan file.
+ */
+int check_init_orphan_file(e2fsck_t ctx)
+{
+       ext2_filsys fs = ctx->fs;
+       char *orphan_buf;
+       struct process_orphan_block_data pd;
+       struct ext4_orphan_block_tail *tail;
+       ext2_ino_t orphan_inum = fs->super->s_orphan_file_inum;
+       struct ext2_inode orphan_inode;
+       int ret = 0;
+       errcode_t retval;
+
+       orphan_buf = (char *) e2fsck_allocate_memory(ctx, fs->blocksize * 5,
+                                                   "orphan block buffer");
+       e2fsck_read_inode(ctx, orphan_inum, &orphan_inode, "orphan inode");
+       if (EXT2_I_SIZE(&orphan_inode) & (fs->blocksize - 1)) {
+               struct problem_context  pctx;
+
+               clear_problem_context(&pctx);
+               pctx.ino = orphan_inum;
+               fix_problem(ctx, PR_6_ORPHAN_FILE_WRONG_SIZE, &pctx);
+               ret = 1;
+               goto out;
+       }
+       pd.buf = orphan_buf + 3 * fs->blocksize;
+       pd.block_buf = orphan_buf + 4 * fs->blocksize;
+       pd.blocks = EXT2_I_SIZE(&orphan_inode) / fs->blocksize;
+       pd.ctx = ctx;
+       pd.abort = 0;
+       pd.clear = 0;
+       pd.errcode = 0;
+       pd.ino = orphan_inum;
+       pd.generation = orphan_inode.i_generation;
+       /* Initialize buffer to write */
+       memset(pd.buf, 0, fs->blocksize);
+       tail = ext2fs_orphan_block_tail(fs, pd.buf);
+       tail->ob_magic = ext2fs_cpu_to_le32(EXT4_ORPHAN_BLOCK_MAGIC);
+
+       retval = ext2fs_block_iterate3(fs, orphan_inum,
+                                      BLOCK_FLAG_DATA_ONLY | BLOCK_FLAG_HOLE,
+                                      orphan_buf, reinit_orphan_block, &pd);
+       if (retval) {
+               com_err("reinit_orphan_block", retval,
+                       _("while calling ext2fs_block_iterate for inode %d"),
+                       orphan_inum);
+               ret = 1;
+               goto out;
+       }
+       if (pd.abort) {
+               if (pd.errcode) {
+                       com_err("process_orphan_block", pd.errcode,
+                               _("while reading blocks of inode %d"),
+                               orphan_inum);
+               }
+               ret = 1;
+       }
+
+       /* We had to clear some blocks. Report it up. */
+       if (ret == 0 && pd.clear)
+               ret = 2;
+out:
+       ext2fs_free_mem(&orphan_buf);
+       return ret;
+}
+
 /*
  * Check the resize inode to make sure it is sane.  We check both for
  * the case where on-line resizing is not enabled (in which case the
index f267bae..ae231f9 100644 (file)
@@ -1945,15 +1945,82 @@ print_unsupp_features:
                                _("\n*** journal has been regenerated ***\n"));
                }
        }
-no_journal:
 
+no_journal:
        if (run_result & E2F_FLAG_ABORT) {
                fatal_error(ctx, _("aborted"));
        } else if (run_result & E2F_FLAG_CANCEL) {
                log_out(ctx, _("%s: e2fsck canceled.\n"), ctx->device_name ?
                        ctx->device_name : ctx->filesystem_name);
                exit_value |= FSCK_CANCELED;
-       } else if (ctx->qctx && !ctx->invalid_bitmaps) {
+               goto cleanup;
+       }
+
+       if (ext2fs_has_feature_orphan_file(fs->super)) {
+               int ret;
+
+               /* No point in orphan file without a journal... */
+               if (!ext2fs_has_feature_journal(fs->super) &&
+                   fix_problem(ctx, PR_6_ORPHAN_FILE_WITHOUT_JOURNAL, &pctx)) {
+                       retval = ext2fs_truncate_orphan_file(fs);
+                       if (retval) {
+                               /* Huh, failed to delete file */
+                               fix_problem(ctx, PR_6_ORPHAN_FILE_TRUNC_FAILED,
+                                           &pctx);
+                               goto check_quotas;
+                       }
+                       ext2fs_clear_feature_orphan_file(fs->super);
+                       ext2fs_mark_super_dirty(fs);
+                       goto check_quotas;
+               }
+               ret = check_init_orphan_file(ctx);
+               if (ret == 2 ||
+                   (ret == 0 && ext2fs_has_feature_orphan_present(fs->super) &&
+                    fix_problem(ctx, PR_6_ORPHAN_PRESENT_CLEAN_FILE, &pctx))) {
+                       ext2fs_clear_feature_orphan_present(fs->super);
+                       ext2fs_mark_super_dirty(fs);
+               } else if (ret == 1 &&
+                   fix_problem(ctx, PR_6_ORPHAN_FILE_CORRUPTED, &pctx)) {
+                       int orphan_file_blocks;
+
+                       if (ctx->invalid_bitmaps) {
+                               fix_problem(ctx,
+                                           PR_6_ORPHAN_FILE_BITMAP_INVALID,
+                                           &pctx);
+                               goto check_quotas;
+                       }
+
+                       retval = ext2fs_truncate_orphan_file(fs);
+                       if (retval) {
+                               /* Huh, failed to truncate file */
+                               fix_problem(ctx, PR_6_ORPHAN_FILE_TRUNC_FAILED,
+                                           &pctx);
+                               goto check_quotas;
+                       }
+
+                       orphan_file_blocks =
+                               ext2fs_default_orphan_file_blocks(fs);
+                       log_out(ctx, _("Creating orphan file (%d blocks): "),
+                               orphan_file_blocks);
+                       fflush(stdout);
+                       retval = ext2fs_create_orphan_file(fs,
+                                                          orphan_file_blocks);
+                       if (retval) {
+                               log_out(ctx, "%s: while trying to create "
+                                       "orphan file\n", error_message(retval));
+                               fix_problem(ctx, PR_6_ORPHAN_FILE_CREATE_FAILED,
+                                           &pctx);
+                               goto check_quotas;
+                       }
+                       log_out(ctx, "%s", _(" Done.\n"));
+               }
+       } else if (ext2fs_has_feature_orphan_present(fs->super) &&
+                  fix_problem(ctx, PR_6_ORPHAN_PRESENT_NO_FILE, &pctx)) {
+                       ext2fs_clear_feature_orphan_present(fs->super);
+                       ext2fs_mark_super_dirty(fs);
+       }
+check_quotas:
+       if (ctx->qctx && !ctx->invalid_bitmaps) {
                int needs_writeout;
 
                for (qtype = 0; qtype < MAXQUOTAS; qtype++) {
@@ -1988,6 +2055,7 @@ no_journal:
                goto restart;
        }
 
+cleanup:
 #ifdef MTRACE
        mtrace_print("Cleanup");
 #endif
index 2291060..29b7b15 100644 (file)
@@ -49,6 +49,8 @@ static struct feature feature_list[] = {
                        "fast_commit" },
        {       E2P_FEATURE_COMPAT, EXT4_FEATURE_COMPAT_STABLE_INODES,
                        "stable_inodes" },
+       {       E2P_FEATURE_COMPAT, EXT4_FEATURE_COMPAT_ORPHAN_FILE,
+                       "orphan_file" },
 
        {       E2P_FEATURE_RO_INCOMPAT, EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER,
                        "sparse_super" },
@@ -80,6 +82,8 @@ static struct feature feature_list[] = {
                        "shared_blocks"},
        {       E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_VERITY,
                        "verity"},
+       {       E2P_FEATURE_RO_INCOMPAT, EXT4_FEATURE_RO_COMPAT_ORPHAN_PRESENT,
+                       "orphan_present" },
 
        {       E2P_FEATURE_INCOMPAT, EXT2_FEATURE_INCOMPAT_COMPRESSION,
                        "compression" },
index 176bee0..1762bc4 100644 (file)
@@ -482,6 +482,9 @@ void list_super2(struct ext2_super_block * sb, FILE *f)
        if (ext2fs_has_feature_casefold(sb))
                fprintf(f, "Character encoding:       %s\n",
                        e2p_encoding2str(sb->s_encoding));
+       if (ext2fs_has_feature_orphan_file(sb))
+               fprintf(f, "Orphan file inode:        %u\n",
+                       sb->s_orphan_file_inum);
 }
 
 void list_super (struct ext2_super_block * s)
index 2dca12e..f6a050a 100644 (file)
@@ -115,6 +115,7 @@ OBJS= $(DEBUGFS_LIB_OBJS) $(RESIZE_LIB_OBJS) $(E2IMAGE_LIB_OBJS) \
        newdir.o \
        nls_utf8.o \
        openfs.o \
+       orphan.o \
        progress.o \
        punch.o \
        qcow2.o \
@@ -198,6 +199,7 @@ SRCS= ext2_err.c \
        $(srcdir)/newdir.c \
        $(srcdir)/nls_utf8.c \
        $(srcdir)/openfs.c \
+       $(srcdir)/orphan.c \
        $(srcdir)/progress.c \
        $(srcdir)/punch.c \
        $(srcdir)/qcow2.c \
index 01d2573..0fc9c09 100644 (file)
@@ -773,7 +773,8 @@ struct ext2_super_block {
        __u8    s_last_error_errcode;
 /*27c*/ __le16 s_encoding;             /* Filename charset encoding */
        __le16  s_encoding_flags;       /* Filename charset encoding flags */
-       __le32  s_reserved[95];         /* Padding to the end of the block */
+       __le32  s_orphan_file_inum;     /* Inode for tracking orphan inodes */
+       __le32  s_reserved[94];         /* Padding to the end of the block */
 /*3fc*/        __u32   s_checksum;             /* crc32c(superblock) */
 };
 
@@ -828,7 +829,7 @@ struct ext2_super_block {
 #define EXT4_FEATURE_COMPAT_SPARSE_SUPER2      0x0200
 #define EXT4_FEATURE_COMPAT_FAST_COMMIT                0x0400
 #define EXT4_FEATURE_COMPAT_STABLE_INODES      0x0800
-
+#define EXT4_FEATURE_COMPAT_ORPHAN_FILE                0x1000
 
 #define EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER    0x0001
 #define EXT2_FEATURE_RO_COMPAT_LARGE_FILE      0x0002
@@ -851,6 +852,7 @@ struct ext2_super_block {
 #define EXT4_FEATURE_RO_COMPAT_PROJECT         0x2000 /* Project quota */
 #define EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS   0x4000
 #define EXT4_FEATURE_RO_COMPAT_VERITY          0x8000
+#define EXT4_FEATURE_RO_COMPAT_ORPHAN_PRESENT  0x10000
 
 #define EXT2_FEATURE_INCOMPAT_COMPRESSION      0x0001
 #define EXT2_FEATURE_INCOMPAT_FILETYPE         0x0002
@@ -931,6 +933,7 @@ EXT4_FEATURE_COMPAT_FUNCS(exclude_bitmap,   2, EXCLUDE_BITMAP)
 EXT4_FEATURE_COMPAT_FUNCS(sparse_super2,       4, SPARSE_SUPER2)
 EXT4_FEATURE_COMPAT_FUNCS(fast_commit,         4, FAST_COMMIT)
 EXT4_FEATURE_COMPAT_FUNCS(stable_inodes,       4, STABLE_INODES)
+EXT4_FEATURE_COMPAT_FUNCS(orphan_file,         4, ORPHAN_FILE)
 
 EXT4_FEATURE_RO_COMPAT_FUNCS(sparse_super,     2, SPARSE_SUPER)
 EXT4_FEATURE_RO_COMPAT_FUNCS(large_file,       2, LARGE_FILE)
@@ -947,6 +950,7 @@ EXT4_FEATURE_RO_COMPAT_FUNCS(readonly,              4, READONLY)
 EXT4_FEATURE_RO_COMPAT_FUNCS(project,          4, PROJECT)
 EXT4_FEATURE_RO_COMPAT_FUNCS(shared_blocks,    4, SHARED_BLOCKS)
 EXT4_FEATURE_RO_COMPAT_FUNCS(verity,           4, VERITY)
+EXT4_FEATURE_RO_COMPAT_FUNCS(orphan_present,   4, ORPHAN_PRESENT)
 
 EXT4_FEATURE_INCOMPAT_FUNCS(compression,       2, COMPRESSION)
 EXT4_FEATURE_INCOMPAT_FUNCS(filetype,          2, FILETYPE)
@@ -1114,6 +1118,14 @@ static inline unsigned int ext2fs_dir_rec_len(__u8 name_len,
        return rec_len;
 }
 
+#define EXT4_ORPHAN_BLOCK_MAGIC 0x0b10ca04
+
+/* Structure at the tail of orphan block */
+struct ext4_orphan_block_tail {
+       __u32 ob_magic;
+       __u32 ob_checksum;
+};
+
 /*
  * Constants for ext4's extended time encoding
  */
index 0ac3e45..0ee0e7d 100644 (file)
@@ -632,7 +632,8 @@ typedef struct ext2_icount *ext2_icount_t;
                                         EXT2_FEATURE_COMPAT_EXT_ATTR|\
                                         EXT4_FEATURE_COMPAT_SPARSE_SUPER2|\
                                         EXT4_FEATURE_COMPAT_FAST_COMMIT|\
-                                        EXT4_FEATURE_COMPAT_STABLE_INODES)
+                                        EXT4_FEATURE_COMPAT_STABLE_INODES|\
+                                        EXT4_FEATURE_COMPAT_ORPHAN_FILE)
 
 #ifdef CONFIG_MMP
 #define EXT4_LIB_INCOMPAT_MMP          EXT4_FEATURE_INCOMPAT_MMP
@@ -667,7 +668,8 @@ typedef struct ext2_icount *ext2_icount_t;
                                         EXT4_FEATURE_RO_COMPAT_READONLY |\
                                         EXT4_FEATURE_RO_COMPAT_PROJECT |\
                                         EXT4_FEATURE_RO_COMPAT_SHARED_BLOCKS |\
-                                        EXT4_FEATURE_RO_COMPAT_VERITY)
+                                        EXT4_FEATURE_RO_COMPAT_VERITY |\
+                                        EXT4_FEATURE_RO_COMPAT_ORPHAN_PRESENT)
 
 /*
  * These features are only allowed if EXT2_FLAG_SOFTSUPP_FEATURES is passed
@@ -1288,6 +1290,8 @@ extern errcode_t ext2fs_adjust_ea_refcount3(ext2_filsys fs, blk64_t blk,
                                           ext2_ino_t inum);
 errcode_t ext2fs_xattrs_write(struct ext2_xattr_handle *handle);
 errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle);
+errcode_t ext2fs_xattrs_read_inode(struct ext2_xattr_handle *handle,
+                                  struct ext2_inode_large *inode);
 errcode_t ext2fs_xattrs_iterate(struct ext2_xattr_handle *h,
                                int (*func)(char *name, char *value,
                                            size_t value_len, void *data),
@@ -1699,6 +1703,19 @@ errcode_t ext2fs_get_data_io(ext2_filsys fs, io_channel *old_io);
 errcode_t ext2fs_set_data_io(ext2_filsys fs, io_channel new_io);
 errcode_t ext2fs_rewrite_to_io(ext2_filsys fs, io_channel new_io);
 
+/* orphan.c */
+extern errcode_t ext2fs_create_orphan_file(ext2_filsys fs, blk_t num_blocks);
+extern errcode_t ext2fs_truncate_orphan_file(ext2_filsys fs);
+extern e2_blkcnt_t ext2fs_default_orphan_file_blocks(ext2_filsys fs);
+extern __u32 ext2fs_do_orphan_file_block_csum(ext2_filsys fs, ext2_ino_t ino,
+                                             __u32 gen, blk64_t blk,
+                                             char *buf);
+extern errcode_t ext2fs_orphan_file_block_csum_set(ext2_filsys fs,
+                                                  ext2_ino_t ino, blk64_t blk,
+                                                  char *buf);
+extern int ext2fs_orphan_file_block_csum_verify(ext2_filsys fs, ext2_ino_t ino,
+                                               blk64_t blk, char *buf);
+
 /* get_pathname.c */
 extern errcode_t ext2fs_get_pathname(ext2_filsys fs, ext2_ino_t dir, ext2_ino_t ino,
                               char **name);
@@ -1852,7 +1869,9 @@ extern int ext2fs_dirent_file_type(const struct ext2_dir_entry *entry);
 extern void ext2fs_dirent_set_file_type(struct ext2_dir_entry *entry, int type);
 extern struct ext2_inode *ext2fs_inode(struct ext2_inode_large * large_inode);
 extern const struct ext2_inode *ext2fs_const_inode(const struct ext2_inode_large * large_inode);
-
+extern int ext2fs_inodes_per_orphan_block(ext2_filsys fs);
+extern struct ext4_orphan_block_tail *ext2fs_orphan_block_tail(ext2_filsys fs,
+                                                              char *buf);
 #endif
 
 /*
@@ -2162,6 +2181,19 @@ ext2fs_const_inode(const struct ext2_inode_large * large_inode)
        return (const struct ext2_inode *) large_inode;
 }
 
+_INLINE_ int ext2fs_inodes_per_orphan_block(ext2_filsys fs)
+{
+       return (fs->blocksize - sizeof(struct ext4_orphan_block_tail)) /
+               sizeof(__u32);
+}
+
+_INLINE_ struct ext4_orphan_block_tail *
+ext2fs_orphan_block_tail(ext2_filsys fs, char *buf)
+{
+       return (struct ext4_orphan_block_tail *)(buf + fs->blocksize -
+               sizeof(struct ext4_orphan_block_tail));
+}
+
 #undef _INLINE_
 #endif
 
index efe4d29..d36fe68 100644 (file)
@@ -987,30 +987,20 @@ static void xattrs_free_keys(struct ext2_xattr_handle *h)
        h->ibody_count = 0;
 }
 
-errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle)
+/* fetch xattrs from an already-loaded inode */
+errcode_t ext2fs_xattrs_read_inode(struct ext2_xattr_handle *handle,
+                                  struct ext2_inode_large *inode)
 {
-       struct ext2_inode_large *inode;
+
        struct ext2_ext_attr_header *header;
        __u32 ea_inode_magic;
        unsigned int storage_size;
        char *start, *block_buf = NULL;
        blk64_t blk;
        size_t i;
-       errcode_t err;
+       errcode_t err = 0;
 
        EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EA_HANDLE);
-       i = EXT2_INODE_SIZE(handle->fs->super);
-       if (i < sizeof(*inode))
-               i = sizeof(*inode);
-       err = ext2fs_get_memzero(i, &inode);
-       if (err)
-               return err;
-
-       err = ext2fs_read_inode_full(handle->fs, handle->ino,
-                                    (struct ext2_inode *)inode,
-                                    EXT2_INODE_SIZE(handle->fs->super));
-       if (err)
-               goto out;
 
        xattrs_free_keys(handle);
 
@@ -1046,7 +1036,7 @@ errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle)
 
 read_ea_block:
        /* Look for EA in a separate EA block */
-       blk = ext2fs_file_acl_block(handle->fs, (struct ext2_inode *)inode);
+       blk = ext2fs_file_acl_block(handle->fs, EXT2_INODE(inode));
        if (blk != 0) {
                if ((blk < handle->fs->super->s_first_data_block) ||
                    (blk >= ext2fs_blocks_count(handle->fs->super))) {
@@ -1077,20 +1067,39 @@ read_ea_block:
                err = read_xattrs_from_buffer(handle, inode,
                                        (struct ext2_ext_attr_entry *) start,
                                        storage_size, block_buf);
-               if (err)
-                       goto out3;
+       }
 
+out3:
+       if (block_buf)
                ext2fs_free_mem(&block_buf);
-       }
+out:
+       return err;
+}
 
-       ext2fs_free_mem(&block_buf);
-       ext2fs_free_mem(&inode);
-       return 0;
+errcode_t ext2fs_xattrs_read(struct ext2_xattr_handle *handle)
+{
+       struct ext2_inode_large *inode;
+       size_t inode_size = EXT2_INODE_SIZE(handle->fs->super);
+       errcode_t err;
+
+       EXT2_CHECK_MAGIC(handle, EXT2_ET_MAGIC_EA_HANDLE);
+
+       if (inode_size < sizeof(*inode))
+               inode_size = sizeof(*inode);
+       err = ext2fs_get_memzero(inode_size, &inode);
+       if (err)
+               return err;
+
+       err = ext2fs_read_inode_full(handle->fs, handle->ino, EXT2_INODE(inode),
+                                    EXT2_INODE_SIZE(handle->fs->super));
+       if (err)
+               goto out;
+
+       err = ext2fs_xattrs_read_inode(handle, inode);
 
-out3:
-       ext2fs_free_mem(&block_buf);
 out:
        ext2fs_free_mem(&inode);
+
        return err;
 }
 
diff --git a/lib/ext2fs/orphan.c b/lib/ext2fs/orphan.c
new file mode 100644 (file)
index 0000000..649a043
--- /dev/null
@@ -0,0 +1,273 @@
+/*
+ * orphan.c --- utility function to handle orphan file
+ *
+ * Copyright (C) 2015 Jan Kara.
+ *
+ * %Begin-Header%
+ * This file may be redistributed under the terms of the GNU Library
+ * General Public License, version 2.
+ * %End-Header%
+ */
+
+#include "config.h"
+#include <string.h>
+
+#include "ext2_fs.h"
+#include "ext2fsP.h"
+
+errcode_t ext2fs_truncate_orphan_file(ext2_filsys fs)
+{
+       struct ext2_inode inode;
+       errcode_t err;
+       ext2_ino_t ino = fs->super->s_orphan_file_inum;
+
+       err = ext2fs_read_inode(fs, ino, &inode);
+       if (err)
+               return err;
+
+       err = ext2fs_punch(fs, ino, &inode, NULL, 0, ~0ULL);
+       if (err)
+               return err;
+
+       fs->flags &= ~EXT2_FLAG_SUPER_ONLY;
+       memset(&inode, 0, sizeof(struct ext2_inode));
+       err = ext2fs_write_inode(fs, ino, &inode);
+
+       ext2fs_clear_feature_orphan_file(fs->super);
+       ext2fs_clear_feature_orphan_present(fs->super);
+       ext2fs_mark_super_dirty(fs);
+       /* Need to update group descriptors as well */
+       fs->flags &= ~EXT2_FLAG_SUPER_ONLY;
+
+       return err;
+}
+
+__u32 ext2fs_do_orphan_file_block_csum(ext2_filsys fs, ext2_ino_t ino,
+                                      __u32 gen, blk64_t blk, char *buf)
+{
+       int inodes_per_ob = ext2fs_inodes_per_orphan_block(fs);
+       __u32 crc;
+
+       ino = ext2fs_cpu_to_le32(ino);
+       gen = ext2fs_cpu_to_le32(gen);
+       blk = ext2fs_cpu_to_le64(blk);
+       crc = ext2fs_crc32c_le(fs->csum_seed, (unsigned char *)&ino,
+                              sizeof(ino));
+       crc = ext2fs_crc32c_le(crc, (unsigned char *)&gen, sizeof(gen));
+       crc = ext2fs_crc32c_le(crc, (unsigned char *)&blk, sizeof(blk));
+       crc = ext2fs_crc32c_le(crc, (unsigned char *)buf,
+                               inodes_per_ob * sizeof(__u32));
+
+       return ext2fs_cpu_to_le32(crc);
+}
+
+struct mkorphan_info {
+       char *buf;
+       char *zerobuf;
+       blk_t num_blocks;
+       blk_t alloc_blocks;
+       blk64_t last_blk;
+       errcode_t err;
+       ino_t ino;
+       __u32 generation;
+};
+
+static int mkorphan_proc(ext2_filsys   fs,
+                        blk64_t        *blocknr,
+                        e2_blkcnt_t    blockcnt,
+                        blk64_t        ref_block EXT2FS_ATTR((unused)),
+                        int            ref_offset EXT2FS_ATTR((unused)),
+                        void           *priv_data)
+{
+       struct mkorphan_info *oi = (struct mkorphan_info *)priv_data;
+       blk64_t new_blk;
+       errcode_t err;
+
+       /* Can we just continue in currently allocated cluster? */
+       if (blockcnt &&
+           EXT2FS_B2C(fs, oi->last_blk) == EXT2FS_B2C(fs, oi->last_blk + 1)) {
+               new_blk = oi->last_blk + 1;
+       } else {
+               err = ext2fs_new_block2(fs, oi->last_blk, 0, &new_blk);
+               if (err) {
+                       oi->err = err;
+                       return BLOCK_ABORT;
+               }
+               ext2fs_block_alloc_stats2(fs, new_blk, +1);
+               oi->alloc_blocks++;
+       }
+       if (blockcnt >= 0) {
+               if (ext2fs_has_feature_metadata_csum(fs->super)) {
+                       struct ext4_orphan_block_tail *tail;
+
+                       tail = ext2fs_orphan_block_tail(fs, oi->buf);
+                       tail->ob_checksum = ext2fs_do_orphan_file_block_csum(fs,
+                               oi->ino, oi->generation, new_blk, oi->buf);
+               }
+               err = io_channel_write_blk64(fs->io, new_blk, 1, oi->buf);
+       } else  /* zerobuf is used to initialize new indirect blocks... */
+               err = io_channel_write_blk64(fs->io, new_blk, 1, oi->zerobuf);
+       if (err) {
+               oi->err = err;
+               return BLOCK_ABORT;
+       }
+       oi->last_blk = new_blk;
+       *blocknr = new_blk;
+       if (blockcnt >= 0 && --oi->num_blocks == 0)
+               return BLOCK_CHANGED | BLOCK_ABORT;
+       return BLOCK_CHANGED;
+}
+
+errcode_t ext2fs_create_orphan_file(ext2_filsys fs, blk_t num_blocks)
+{
+       struct ext2_inode inode;
+       ext2_ino_t ino = fs->super->s_orphan_file_inum;
+       errcode_t err;
+       char *buf = NULL, *zerobuf = NULL;
+       struct mkorphan_info oi;
+       struct ext4_orphan_block_tail *ob_tail;
+
+       if (!ino) {
+               err = ext2fs_new_inode(fs, EXT2_ROOT_INO, LINUX_S_IFREG | 0600,
+                                      0, &ino);
+               if (err)
+                       return err;
+               ext2fs_inode_alloc_stats2(fs, ino, +1, 0);
+               ext2fs_mark_ib_dirty(fs);
+       }
+
+       err = ext2fs_read_inode(fs, ino, &inode);
+       if (err)
+               return err;
+       if (EXT2_I_SIZE(&inode)) {
+               err = ext2fs_truncate_orphan_file(fs);
+               if (err)
+                       return err;
+       }
+
+       memset(&inode, 0, sizeof(struct ext2_inode));
+       if (ext2fs_has_feature_extents(fs->super)) {
+               inode.i_flags |= EXT4_EXTENTS_FL;
+               err = ext2fs_write_inode(fs, ino, &inode);
+               if (err)
+                       return err;
+       }
+
+       err = ext2fs_get_mem(fs->blocksize, &buf);
+       if (err)
+               return err;
+       err = ext2fs_get_mem(fs->blocksize, &zerobuf);
+       if (err)
+               goto out;
+       memset(buf, 0, fs->blocksize);
+       memset(zerobuf, 0, fs->blocksize);
+       ob_tail = ext2fs_orphan_block_tail(fs, buf);
+       ob_tail->ob_magic = ext2fs_cpu_to_le32(EXT4_ORPHAN_BLOCK_MAGIC);
+       oi.num_blocks = num_blocks;
+       oi.alloc_blocks = 0;
+       oi.last_blk = 0;
+       oi.generation = inode.i_generation;
+       oi.ino = ino;
+       oi.buf = buf;
+       oi.zerobuf = zerobuf;
+       oi.err = 0;
+       err = ext2fs_block_iterate3(fs, ino, BLOCK_FLAG_APPEND,
+                                   0, mkorphan_proc, &oi);
+       if (err)
+               goto out;
+       if (oi.err) {
+               err = oi.err;
+               goto out;
+       }
+
+       /* Reread inode after blocks were allocated */
+       err = ext2fs_read_inode(fs, ino, &inode);
+       if (err)
+               goto out;
+       ext2fs_iblk_set(fs, &inode, 0);
+       inode.i_atime = inode.i_mtime =
+               inode.i_ctime = fs->now ? fs->now : time(0);
+       inode.i_links_count = 1;
+       inode.i_mode = LINUX_S_IFREG | 0600;
+       ext2fs_iblk_add_blocks(fs, &inode, oi.alloc_blocks);
+       err = ext2fs_inode_size_set(fs, &inode,
+                       (unsigned long long)fs->blocksize * num_blocks);
+       if (err)
+               goto out;
+       err = ext2fs_write_new_inode(fs, ino, &inode);
+       if (err)
+               goto out;
+
+       fs->super->s_orphan_file_inum = ino;
+       ext2fs_set_feature_orphan_file(fs->super);
+       ext2fs_mark_super_dirty(fs);
+       /* Need to update group descriptors as well */
+       fs->flags &= ~EXT2_FLAG_SUPER_ONLY;
+out:
+       if (buf)
+               ext2fs_free_mem(&buf);
+       if (zerobuf)
+               ext2fs_free_mem(&zerobuf);
+       return err;
+}
+
+/*
+ * Find reasonable size for orphan file. We choose orphan file size to be
+ * between 32 and 512 filesystem blocks and not more than 1/4096 of the
+ * filesystem unless it is really small.
+ */
+e2_blkcnt_t ext2fs_default_orphan_file_blocks(ext2_filsys fs)
+{
+       __u64 num_blocks = ext2fs_blocks_count(fs->super);
+       e2_blkcnt_t blks = 512;
+
+       if (num_blocks < 128 * 1024)
+               blks = 32;
+       else if (num_blocks < 2 * 1024 * 1024)
+               blks = num_blocks / 4096;
+       return (blks + EXT2FS_CLUSTER_MASK(fs)) & ~EXT2FS_CLUSTER_MASK(fs);
+}
+
+static errcode_t ext2fs_orphan_file_block_csum(ext2_filsys fs, ext2_ino_t ino,
+                                              blk64_t blk, char *buf,
+                                              __u32 *crcp)
+{
+       struct ext2_inode inode;
+       errcode_t retval;
+
+       retval = ext2fs_read_inode(fs, ino, &inode);
+       if (retval)
+               return retval;
+       *crcp = ext2fs_do_orphan_file_block_csum(fs, ino, inode.i_generation,
+                                                blk, buf);
+       return 0;
+}
+
+errcode_t ext2fs_orphan_file_block_csum_set(ext2_filsys fs, ext2_ino_t ino,
+                                           blk64_t blk, char *buf)
+{
+       struct ext4_orphan_block_tail *tail;
+
+       if (!ext2fs_has_feature_metadata_csum(fs->super))
+               return 0;
+
+       tail = ext2fs_orphan_block_tail(fs, buf);
+       return ext2fs_orphan_file_block_csum(fs, ino, blk, buf,
+                                            &tail->ob_checksum);
+}
+
+int ext2fs_orphan_file_block_csum_verify(ext2_filsys fs, ext2_ino_t ino,
+                                        blk64_t blk, char *buf)
+{
+       struct ext4_orphan_block_tail *tail;
+       __u32 crc;
+       errcode_t retval;
+
+       if (!ext2fs_has_feature_metadata_csum(fs->super))
+               return 1;
+       retval = ext2fs_orphan_file_block_csum(fs, ino, blk, buf, &crc);
+       if (retval)
+               return 0;
+       tail = ext2fs_orphan_block_tail(fs, buf);
+       return ext2fs_le32_to_cpu(tail->ob_checksum) == crc;
+}
index 1006b2d..b844e76 100644 (file)
@@ -131,8 +131,9 @@ void ext2fs_swap_super(struct ext2_super_block * sb)
        /* s_*_time_hi are __u8 and does not need swabbing */
        sb->s_encoding = ext2fs_swab16(sb->s_encoding);
        sb->s_encoding_flags = ext2fs_swab16(sb->s_encoding_flags);
+       sb->s_orphan_file_inum = ext2fs_swab32(sb->s_orphan_file_inum);
        /* catch when new fields are used from s_reserved */
-       EXT2FS_BUILD_BUG_ON(sizeof(sb->s_reserved) != 95 * sizeof(__le32));
+       EXT2FS_BUILD_BUG_ON(sizeof(sb->s_reserved) != 94 * sizeof(__le32));
        sb->s_checksum = ext2fs_swab32(sb->s_checksum);
 }
 
index 80a5269..ad452de 100644 (file)
@@ -152,7 +152,8 @@ int main(int argc, char **argv)
        check_field(s_last_error_errcode, 1);
        check_field(s_encoding, 2);
        check_field(s_encoding_flags, 2);
-       check_field(s_reserved, 95 * 4);
+       check_field(s_orphan_file_inum, 4);
+       check_field(s_reserved, 94 * 4);
        check_field(s_checksum, 4);
        do_field("Superblock end", 0, 0, cur_offset, 1024);
 #endif
index 5de7c48..6f4a0b9 100644 (file)
@@ -507,7 +507,8 @@ errcode_t quota_compute_usage(quota_ctx_t qctx)
                        continue;
                if (ino == EXT2_ROOT_INO ||
                    (ino >= EXT2_FIRST_INODE(fs->super) &&
-                    ino != quota_type2inum(PRJQUOTA, fs->super))) {
+                    ino != quota_type2inum(PRJQUOTA, fs->super) &&
+                    ino != fs->super->s_orphan_file_inum)) {
                        space = ext2fs_get_stat_i_blocks(fs,
                                                EXT2_INODE(inode)) << 9;
                        quota_data_add(qctx, inode, ino, space);
index 0053b51..2c1f3db 100644 (file)
@@ -1370,7 +1370,8 @@ static void write_raw_image_file(ext2_filsys fs, int fd, int type, int flags,
                    ino == fs->super->s_journal_inum ||
                    ino == quota_type2inum(USRQUOTA, fs->super) ||
                    ino == quota_type2inum(GRPQUOTA, fs->super) ||
-                   ino == quota_type2inum(PRJQUOTA, fs->super)) {
+                   ino == quota_type2inum(PRJQUOTA, fs->super) ||
+                   ino == fs->super->s_orphan_file_inum) {
                        retval = ext2fs_block_iterate3(fs, ino,
                                        BLOCK_FLAG_READ_ONLY, block_buf,
                                        process_dir_block, &pb);
index c0b5324..30f97bb 100644 (file)
@@ -365,6 +365,13 @@ small risk if the system crashes before the journal has been overwritten
 entirely one time.  If the option value is omitted, it defaults to 1 to
 enable lazy journal inode zeroing.
 .TP
+.B assume_storage_prezeroed\fR[\fB= \fI<0 to disable, 1 to enable>\fR]
+If enabled,
+.BR mke2fs
+assumes that the storage device has been prezeroed, skips zeroing the journal
+and inode tables, and annotates the block group flags to signal that the inode
+table has been zeroed.
+.TP
 .B no_copy_xattrs
 Normally
 .B mke2fs
@@ -403,6 +410,11 @@ file system to change based on the user running \fBmke2fs\fR.
 Set a flag in the file system superblock indicating that it may be
 mounted using experimental kernel code, such as the ext4dev file system.
 .TP
+.BI orphan_file_size= size
+Set size of the file for tracking unlinked but still open inodes and inodes
+with truncate in progress. Larger file allows for better scalability, reserving
+a few blocks per cpu is ideal.
+.TP
 .B discard
 Attempt to discard blocks at mkfs time (discarding blocks initially is useful
 on solid state devices and sparse / thin-provisioned storage). When the device
index 04b2fbc..76b8b8c 100644 (file)
@@ -94,7 +94,9 @@ static gid_t  root_gid;
 int    journal_size;
 int    journal_flags;
 int    journal_fc_size;
+static e2_blkcnt_t     orphan_file_blocks;
 static int     lazy_itable_init;
+static int     assume_storage_prezeroed;
 static int     packed_meta_blocks;
 int            no_copy_xattrs;
 static char    *bad_blocks_filename = NULL;
@@ -1012,6 +1014,11 @@ static void parse_extended_opts(struct ext2_super_block *param,
                                lazy_itable_init = strtoul(arg, &p, 0);
                        else
                                lazy_itable_init = 1;
+               } else if (!strcmp(token, "assume_storage_prezeroed")) {
+                       if (arg)
+                               assume_storage_prezeroed = strtoul(arg, &p, 0);
+                       else
+                               assume_storage_prezeroed = 1;
                } else if (!strcmp(token, "lazy_journal_init")) {
                        if (arg)
                                journal_flags |= strtoul(arg, &p, 0) ?
@@ -1089,6 +1096,21 @@ static void parse_extended_opts(struct ext2_super_block *param,
                                continue;
                        }
                        encoding_flags = arg;
+               } else if (!strcmp(token, "orphan_file_size")) {
+                       if (!arg) {
+                               r_usage++;
+                               badopt = token;
+                               continue;
+                       }
+                       orphan_file_blocks = parse_num_blocks2(arg,
+                                               fs_param.s_log_block_size);
+                       if (orphan_file_blocks == 0) {
+                               fprintf(stderr,
+                                       _("Invalid size of orphan file %s\n"),
+                                       arg);
+                               r_usage++;
+                               continue;
+                       }
                } else {
                        r_usage++;
                        badopt = token;
@@ -1115,7 +1137,8 @@ static void parse_extended_opts(struct ext2_super_block *param,
                        "\tnodiscard\n"
                        "\tencoding=<encoding>\n"
                        "\tencoding_flags=<flags>\n"
-                       "\tquotatype=<quota type(s) to be enabled>\n\n"),
+                       "\tquotatype=<quota type(s) to be enabled>\n"
+                       "\tassume_storage_prezeroed=<0 to disable, 1 to enable>\n\n"),
                        badopt ? badopt : "");
                free(buf);
                exit(1);
@@ -1156,7 +1179,8 @@ static __u32 ok_features[3] = {
                EXT2_FEATURE_COMPAT_EXT_ATTR |
                EXT4_FEATURE_COMPAT_SPARSE_SUPER2 |
                EXT4_FEATURE_COMPAT_FAST_COMMIT |
-               EXT4_FEATURE_COMPAT_STABLE_INODES,
+               EXT4_FEATURE_COMPAT_STABLE_INODES |
+               EXT4_FEATURE_COMPAT_ORPHAN_FILE,
        /* Incompat */
        EXT2_FEATURE_INCOMPAT_FILETYPE|
                EXT3_FEATURE_INCOMPAT_EXTENTS|
@@ -1551,6 +1575,7 @@ static void PRS(int argc, char *argv[])
        int             lsector_size = 0, psector_size = 0;
        int             show_version_only = 0, is_device = 0;
        unsigned long long num_inodes = 0; /* unsigned long long to catch too-large input */
+       int             default_orphan_file = 0;
        errcode_t       retval;
        char *          oldpath = getenv("PATH");
        char *          extended_opts = 0;
@@ -2101,8 +2126,20 @@ profile_error:
                ext2fs_clear_feature_ea_inode(&fs_param);
                ext2fs_clear_feature_casefold(&fs_param);
        }
-       edit_feature(fs_features ? fs_features : tmp,
-                    &fs_param.s_feature_compat);
+       if (!fs_features && tmp)
+               edit_feature(tmp, &fs_param.s_feature_compat);
+       /*
+        * Now all the defaults are incorporated in fs_param. Check the state
+        * of orphan_file feature so that we know whether we should silently
+        * disabled in case journal gets disabled.
+        */
+       if (ext2fs_has_feature_orphan_file(&fs_param))
+               default_orphan_file = 1;
+       if (fs_features)
+               edit_feature(fs_features, &fs_param.s_feature_compat);
+       /* Silently disable orphan_file if user chose fs without journal */
+       if (default_orphan_file && !ext2fs_has_feature_journal(&fs_param))
+               ext2fs_clear_feature_orphan_file(&fs_param);
        if (tmp)
                free(tmp);
        (void) ext2fs_free_mem(&fs_features);
@@ -3095,6 +3132,18 @@ int main (int argc, char *argv[])
                io_channel_set_options(fs->io, opt_string);
        }
 
+       if (assume_storage_prezeroed) {
+               if (verbose)
+                       printf("%s",
+                              _("Assuming the storage device is prezeroed "
+                              "- skipping inode table and journal wipe\n"));
+
+               lazy_itable_init = 1;
+               itable_zeroed = 1;
+               zero_hugefile = 0;
+               journal_flags |= EXT2_MKJOURNAL_LAZYINIT;
+       }
+
        /* Can't undo discard ... */
        if (!noaction && discard && dev_size && (io_ptr != undo_io_manager)) {
                retval = mke2fs_discard_device(fs);
@@ -3471,6 +3520,23 @@ no_journal:
                fix_cluster_bg_counts(fs);
        if (ext2fs_has_feature_quota(&fs_param))
                create_quota_inodes(fs);
+       if (ext2fs_has_feature_orphan_file(&fs_param)) {
+               if (!ext2fs_has_feature_journal(&fs_param)) {
+                       com_err(program_name, 0, _("cannot set orphan_file "
+                               "feature without a journal."));
+                       exit(1);
+               }
+               if (!orphan_file_blocks) {
+                       orphan_file_blocks =
+                               ext2fs_default_orphan_file_blocks(fs);
+               }
+               retval = ext2fs_create_orphan_file(fs, orphan_file_blocks);
+               if (retval) {
+                       com_err(program_name, retval,
+                               _("while creating orphan file"));
+                       exit(1);
+               }
+       }
 
        retval = mk_hugefiles(fs, device_name);
        if (retval)
index 979f6c5..1e026e5 100644 (file)
@@ -257,6 +257,11 @@ program.
 This superblock setting is only honored in 2.6.35+ kernels;
 and not at all by the ext2 and ext3 file system drivers.
 .TP
+.BI orphan_file_size= size
+Set size of the file for tracking unlinked but still open inodes and inodes
+with truncate in progress. Larger file allows for better scalability, reserving
+a few blocks per cpu is ideal.
+.TP
 .B force_fsck
 Set a flag in the file system superblock indicating that errors have been found.
 This will force fsck to run at the next mount.
index 7f023ad..71a8e99 100644 (file)
@@ -106,6 +106,7 @@ int enabling_casefold;
 int journal_size, journal_fc_size, journal_flags;
 char *journal_device;
 static blk64_t journal_location = ~0LL;
+static e2_blkcnt_t orphan_file_blocks;
 
 static struct list_head blk_move_list;
 
@@ -152,7 +153,8 @@ static __u32 ok_features[3] = {
        EXT3_FEATURE_COMPAT_HAS_JOURNAL |
                EXT2_FEATURE_COMPAT_DIR_INDEX |
                EXT4_FEATURE_COMPAT_FAST_COMMIT |
-               EXT4_FEATURE_COMPAT_STABLE_INODES,
+               EXT4_FEATURE_COMPAT_STABLE_INODES |
+               EXT4_FEATURE_COMPAT_ORPHAN_FILE,
        /* Incompat */
        EXT2_FEATURE_INCOMPAT_FILETYPE |
                EXT3_FEATURE_INCOMPAT_EXTENTS |
@@ -183,7 +185,8 @@ static __u32 clear_ok_features[3] = {
        EXT3_FEATURE_COMPAT_HAS_JOURNAL |
                EXT2_FEATURE_COMPAT_RESIZE_INODE |
                EXT2_FEATURE_COMPAT_DIR_INDEX |
-               EXT4_FEATURE_COMPAT_FAST_COMMIT,
+               EXT4_FEATURE_COMPAT_FAST_COMMIT |
+               EXT4_FEATURE_COMPAT_ORPHAN_FILE,
        /* Incompat */
        EXT2_FEATURE_INCOMPAT_FILETYPE |
                EXT4_FEATURE_INCOMPAT_FLEX_BG |
@@ -1145,6 +1148,56 @@ static int update_feature_set(ext2_filsys fs, char *features)
                }
        }
 
+       if (FEATURE_OFF(E2P_FEATURE_COMPAT, EXT4_FEATURE_COMPAT_ORPHAN_FILE)) {
+               ext2_ino_t ino;
+
+               if (mount_flags & EXT2_MF_MOUNTED) {
+                       fputs(_("The orphan_file feature may only be cleared "
+                               "when the filesystem is unmounted.\n"), stderr);
+                       return 1;
+               }
+               if (ext2fs_has_feature_orphan_present(sb) && f_flag < 2) {
+                       fputs(_("The orphan_present feature is set. Please "
+                               "run e2fsck before clearing orphan_file "
+                               "feature.\n"),
+                             stderr);
+                       return 1;
+               }
+               err = ext2fs_read_bitmaps(fs);
+               if (err) {
+                       com_err(program_name, err, "%s",
+                               _("while loading bitmaps"));
+                       return 1;
+               }
+               err = ext2fs_truncate_orphan_file(fs);
+               if (err) {
+                       com_err(program_name, err,
+                               _("\n\twhile trying to delete orphan file\n"));
+                       return 1;
+               }
+               ino = sb->s_orphan_file_inum;
+               sb->s_orphan_file_inum = 0;
+               ext2fs_inode_alloc_stats2(fs, ino, -1, 0);
+               ext2fs_clear_feature_orphan_file(sb);
+               ext2fs_clear_feature_orphan_present(sb);
+               ext2fs_mark_super_dirty(fs);
+       }
+
+       if (FEATURE_ON(E2P_FEATURE_COMPAT, EXT4_FEATURE_COMPAT_ORPHAN_FILE)) {
+               if (!ext2fs_has_feature_journal(sb)) {
+                       fputs(_("orphan_file feature can be set only for "
+                               "filesystems with journal.\n"), stderr);
+                       return 1;
+               }
+               /*
+                * If adding an orphan file, let the create orphan file
+                * code below handle setting the flag and creating it.
+                * We supply a default size if necessary.
+                */
+               orphan_file_blocks = ext2fs_default_orphan_file_blocks(fs);
+               ext2fs_set_feature_orphan_file(sb);
+       }
+
        if (FEATURE_ON(E2P_FEATURE_RO_INCOMPAT,
                EXT2_FEATURE_RO_COMPAT_SPARSE_SUPER)) {
                if (ext2fs_has_feature_meta_bg(sb)) {
@@ -2269,6 +2322,21 @@ static int parse_extended_opts(ext2_filsys fs, const char *opts)
                                continue;
                        }
                        encoding_flags = arg;
+               } else if (!strcmp(token, "orphan_file_size")) {
+                       if (!arg) {
+                               r_usage++;
+                               continue;
+                       }
+                       orphan_file_blocks = parse_num_blocks2(arg,
+                                                fs->super->s_log_block_size);
+
+                       if (orphan_file_blocks < 1) {
+                               fprintf(stderr,
+                                       _("Invalid size of orphan file %s\n"),
+                                       arg);
+                               r_usage++;
+                               continue;
+                       }
                } else
                        r_usage++;
        }
@@ -3254,6 +3322,24 @@ _("Warning: The journal is dirty. You may wish to replay the journal like:\n\n"
                if (rc)
                        goto closefs;
        }
+       if (orphan_file_blocks) {
+               errcode_t err;
+
+               err = ext2fs_read_bitmaps(fs);
+               if (err) {
+                       com_err(program_name, err, "%s",
+                               _("while loading bitmaps"));
+                       rc = 1;
+                       goto closefs;
+               }
+               err = ext2fs_create_orphan_file(fs, orphan_file_blocks);
+               if (err) {
+                       com_err(program_name, err, "%s",
+                               _("while creating orphan file"));
+                       rc = 1;
+                       goto closefs;
+               }
+       }
 
        if (Q_flag) {
                if (mount_flags & EXT2_MF_MOUNTED) {
diff --git a/tests/m_assume_storage_prezeroed/expect b/tests/m_assume_storage_prezeroed/expect
new file mode 100644 (file)
index 0000000..b735e24
--- /dev/null
@@ -0,0 +1,2 @@
+> 10000
+224
diff --git a/tests/m_assume_storage_prezeroed/script b/tests/m_assume_storage_prezeroed/script
new file mode 100644 (file)
index 0000000..1a8d846
--- /dev/null
@@ -0,0 +1,63 @@
+test_description="test prezeroed storage metadata allocation"
+FILE_SIZE=16M
+
+LOG=$test_name.log
+OUT=$test_name.out
+EXP=$test_dir/expect
+
+if test "$(id -u)" -ne 0 ; then
+    echo "$test_name: $test_description: skipped (not root)"
+elif ! command -v losetup >/dev/null ; then
+    echo "$test_name: $test_description: skipped (no losetup)"
+else
+    dd if=/dev/zero of=$TMPFILE.1 bs=1 count=0 seek=$FILE_SIZE >> $LOG 2>&1
+    dd if=/dev/zero of=$TMPFILE.2 bs=1 count=0 seek=$FILE_SIZE >> $LOG 2>&1
+
+    LOOP1=$(losetup --show --sector-size 4096 -f $TMPFILE.1)
+    if [ ! -b "$LOOP1" ]; then
+        echo "$test_name: $DESCRIPTION: skipped (no loop devices)"
+        rm -f $TMPFILE.1 $TMPFILE.2
+        exit 0
+    fi
+    LOOP2=$(losetup --show --sector-size 4096 -f $TMPFILE.2)
+    if [ ! -b "$LOOP2" ]; then
+        echo "$test_name: $DESCRIPTION: skipped (no loop devices)"
+        rm -f $TMPFILE.1 $TMPFILE.2
+       losetup -d $LOOP1
+        exit 0
+    fi
+
+    echo $MKE2FS -o Linux -t ext4 $LOOP1 >> $LOG 2>&1
+    $MKE2FS -o Linux -t ext4 $LOOP1 >> $LOG 2>&1
+    sync
+    stat $TMPFILE.1 >> $LOG 2>&1
+    SZ=$(stat -c "%b" $TMPFILE.1)
+    if test $SZ -gt 10000 ; then
+       echo "> 10000" > $OUT
+    else
+       echo "$SZ" > $OUT
+    fi
+
+    echo $MKE2FS -o Linux -t ext4 -E assume_storage_prezeroed=1 $LOOP2 >> $LOG 2>&1
+    $MKE2FS -o Linux -t ext4 -E assume_storage_prezeroed=1 $LOOP2 >> $LOG 2>&1
+    sync
+    stat $TMPFILE.2 >> $LOG 2>&1
+    stat -c "%b" $TMPFILE.2 >> $OUT
+
+    losetup -d $LOOP1
+    losetup -d $LOOP2
+    rm -f $TMPFILE.1 $TMPFILE.2
+
+    cmp -s $OUT $EXP
+    status=$?
+
+    if [ "$status" = 0 ] ; then
+       echo "$test_name: $test_description: ok"
+       touch $test_name.ok
+    else
+       echo "$test_name: $test_description: failed"
+       cat $LOG > $test_name.failed
+       diff $EXP $OUT >> $test_name.failed
+    fi
+fi
+unset LOG OUT EXP FILE_SIZE LOOP1 LOOP2