Whamcloud - gitweb
LU-15002 mke2fs: batch zeroing inode table 15/52215/7
authorLi Dongyang <dongyangli@ddn.com>
Fri, 1 Sep 2023 03:59:57 +0000 (13:59 +1000)
committerAndreas Dilger <adilger@whamcloud.com>
Fri, 22 Sep 2023 02:59:02 +0000 (02:59 +0000)
For flex_bg enabled fs, we could merge the
inode table blocks into a contiguous range,
this improves mke2fs time on large devices
when lazy_itable_init is disabled.

On a 977TB device, unpatched mke2fs was running
for 449m10s before getting terminated manually.
strace shows huge number of fallocate, given the
offset from fallocate it has done 41% of the inode
tables, the estimated time needed would be 1082m.

unpatched  patched
real 449m10.954s 4m20.531s
user 0m18.217s 0m16.147s
sys 0m20.311s 0m8.944s

Change-Id: I5257293ea35137cfc5321c15ed5dd8aa98c0612e
Signed-off-by: Li Dongyang <dongyangli@ddn.com>
Reviewed-on: https://review.whamcloud.com/c/tools/e2fsprogs/+/52215
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Emoly Liu <emoly@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
misc/mke2fs.c

index b62252a..c6e9784 100644 (file)
@@ -365,9 +365,9 @@ static errcode_t packed_allocate_tables(ext2_filsys fs)
 static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed)
 {
        errcode_t       retval;
-       blk64_t         blk;
+       blk64_t         start = 0;
        dgrp_t          i;
-       int             num;
+       int             len = 0;
        struct ext2fs_numeric_progress_struct progress;
 
        ext2fs_numeric_progress_init(fs, &progress,
@@ -375,10 +375,10 @@ static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed)
                                     fs->group_desc_count);
 
        for (i = 0; i < fs->group_desc_count; i++) {
-               ext2fs_numeric_progress_update(fs, &progress, i);
+               blk64_t blk = ext2fs_inode_table_loc(fs, i);
+               int num = fs->inode_blocks_per_group;
 
-               blk = ext2fs_inode_table_loc(fs, i);
-               num = fs->inode_blocks_per_group;
+               ext2fs_numeric_progress_update(fs, &progress, i);
 
                if (lazy_flag)
                        num = ext2fs_div_ceil((fs->super->s_inodes_per_group -
@@ -391,14 +391,26 @@ static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed)
                        ext2fs_group_desc_csum_set(fs, i);
                }
                if (!itable_zeroed) {
-                       retval = ext2fs_zero_blocks2(fs, blk, num, &blk, &num);
+                       if (len == 0) {
+                               start = blk;
+                               len = num;
+                               continue;
+                       }
+                       /* 'len' must not overflow 2^31 blocks for ext2fs_zero_blocks2() */
+                       if (start + len == blk && len + num >= len) {
+                               len += num;
+                               continue;
+                       }
+                       retval = ext2fs_zero_blocks2(fs, start, len, &start, &len);
                        if (retval) {
                                fprintf(stderr, _("\nCould not write %d "
                                          "blocks in inode table starting at %llu: %s\n"),
-                                       num, (unsigned long long) blk,
+                                       len, (unsigned long long) start,
                                        error_message(retval));
                                exit(1);
                        }
+                       start = blk;
+                       len = num;
                }
                if (sync_kludge) {
                        if (sync_kludge == 1)
@@ -407,6 +419,18 @@ static void write_inode_tables(ext2_filsys fs, int lazy_flag, int itable_zeroed)
                                io_channel_flush(fs->io);
                }
        }
+       if (len) {
+               retval = ext2fs_zero_blocks2(fs, start, len, &start, &len);
+               if (retval) {
+                       fprintf(stderr, _("\nCould not write %d "
+                                 "blocks in inode table starting at %llu: %s\n"),
+                               len, (unsigned long long) start,
+                               error_message(retval));
+                       exit(1);
+               }
+               if (sync_kludge)
+                       io_channel_flush(fs->io);
+       }
        ext2fs_numeric_progress_close(fs, &progress,
                                      _("done                            \n"));