From 27d5daaad86a70a54e579131b55b637c7e952cf5 Mon Sep 17 00:00:00 2001 From: Li Dongyang Date: Tue, 13 Jun 2023 04:12:22 -0600 Subject: [PATCH] LU-15002 mke2fs: try to pack the GDT blocks together Once a 4KiB block filesystem is 256TiB+ in size, the GDT grows larger than the 128MiB size of the first block group, and would overlap the backup superblock+GDT normally in the next group. If this is the case, mke2fs will now automatically enable the sparse_super2 and flex_bg features to allow the primary GDT to spill into the second group, instead of enabling meta_bg, unless meta_bg is explicitly requested. Since sparse_super2 and flexbg already allow flexible placement of the first and second backup superblock+GDT, no change is needed to ext4 or e2fsck to use such a filesystem. Using sparse_super2 and flexbg is preferable to meta_bg because it packs the metadata close together at the start of the device. This avoids millions of seeks at filesystem mount/open to read the GDT blocks spread across a large filesystem by meta_bg. Currently with sparse_super2 the backup superblock and GDT are put in group #1 group and last group. To allow the primary GDT to spill into group #1, relocate the backups to the same group numbers 3^n, 5^n, 7^n that normal "sparse_super" backups are in. The first backup is close to the start of the device, while the second backup is in a "sparse_super" group near the end of the device. Remove the m_resize_inode_meta_bg test case, since mke2fs no longer enables the meta_bg feature automatically. Change-Id: I90a1d3b448fc17d4b11e8f52e41cf4ce87b89e08 Signed-off-by: Li Dongyang Reviewed-on: https://review.whamcloud.com/c/tools/e2fsprogs/+/51295 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger --- lib/ext2fs/initialize.c | 107 ++++++++++++++------- misc/mke2fs.c | 17 ++-- tests/m_resize_inode_meta_bg/expect.1 | 172 ---------------------------------- tests/m_resize_inode_meta_bg/script | 7 -- 4 files changed, 83 insertions(+), 220 deletions(-) delete mode 100644 tests/m_resize_inode_meta_bg/expect.1 delete mode 100644 tests/m_resize_inode_meta_bg/script diff --git a/lib/ext2fs/initialize.c b/lib/ext2fs/initialize.c index 643da5b..5bd537a 100644 --- a/lib/ext2fs/initialize.c +++ b/lib/ext2fs/initialize.c @@ -379,6 +379,21 @@ ipg_retry: super->s_free_inodes_count = super->s_inodes_count; + /* Set up the locations of the backup superblocks */ + if (ext2fs_has_feature_sparse_super2(super)) { + if (super->s_backup_bgs[0] >= fs->group_desc_count) + super->s_backup_bgs[0] = fs->group_desc_count - 1; + if (super->s_backup_bgs[1] >= fs->group_desc_count) + super->s_backup_bgs[1] = fs->group_desc_count - 1; + if (super->s_backup_bgs[0] == super->s_backup_bgs[1]) + super->s_backup_bgs[1] = 0; + if (super->s_backup_bgs[0] > super->s_backup_bgs[1]) { + __u32 t = super->s_backup_bgs[0]; + super->s_backup_bgs[0] = super->s_backup_bgs[1]; + super->s_backup_bgs[1] = t; + } + } + /* * check the number of reserved group descriptor table blocks */ @@ -391,12 +406,57 @@ ipg_retry: retval = EXT2_ET_RES_GDT_BLOCKS; goto cleanup; } - /* Enable meta_bg if we'd lose more than 3/4 of a BG to GDT blocks. */ + + /* Try to pack the gdt blocks together */ if (super->s_reserved_gdt_blocks + fs->desc_blocks > super->s_blocks_per_group * 3 / 4) { - ext2fs_set_feature_meta_bg(fs->super); - ext2fs_clear_feature_resize_inode(fs->super); - set_field(s_reserved_gdt_blocks, 0); + if (!ext2fs_has_feature_meta_bg(fs->super)) { + unsigned int three = 1, five = 5, seven = 7; + unsigned int overhead_per_grp = 2 + fs->inode_blocks_per_group; + dgrp_t group, overhead_grps; + __u32 backup_bgs[2] = {0, 0}; + + if (!ext2fs_has_feature_sparse_super2(fs->super)) { + ext2fs_set_feature_sparse_super2(fs->super); + ext2fs_clear_feature_sparse_super(fs->super); + super->s_backup_bgs[0] = 1; + super->s_backup_bgs[1] = ~0; + } + if (!ext2fs_has_feature_flex_bg(fs->super)) { + ext2fs_set_feature_flex_bg(fs->super); + /* use 256 as flex_bg_size for 1MiB read size */ + super->s_log_groups_per_flex = 8; + } + + overhead = 1 + super->s_reserved_gdt_blocks + + fs->desc_blocks; + if (fs->blocksize == 1024) + overhead++; + overhead_grps = ext2fs_div_ceil(overhead, + super->s_blocks_per_group); + + while ((group = ext2fs_list_backups(NULL, &three, &five, &seven)) < + fs->group_desc_count) { + blk64_t blks = ext2fs_blocks_count(fs->super) - + ext2fs_group_first_block2(fs, group); + + if (group >= overhead_grps && backup_bgs[0] == 0) + backup_bgs[0] = group; + + if (blks >= (fs->group_desc_count - group) * + overhead_per_grp + overhead) + backup_bgs[1] = group; + } + + if (ext2fs_group_blocks_count(fs, fs->group_desc_count - 1) >= + overhead + overhead_per_grp + 50) + backup_bgs[1] = fs->group_desc_count - 1; + + if (super->s_backup_bgs[0]) + super->s_backup_bgs[0] = backup_bgs[0]; + if (super->s_backup_bgs[1]) + super->s_backup_bgs[1] = backup_bgs[1]; + } } /* @@ -414,7 +474,9 @@ ipg_retry: overhead += fs->desc_blocks; /* This can only happen if the user requested too many inodes */ - if (overhead > super->s_blocks_per_group) { + if (overhead > super->s_blocks_per_group && + !(ext2fs_has_feature_sparse_super2(fs->super) && + ext2fs_has_feature_flex_bg(fs->super))) { retval = EXT2_ET_TOO_MANY_INODES; goto cleanup; } @@ -427,20 +489,12 @@ ipg_retry: * backup. */ overhead = (int) (2 + fs->inode_blocks_per_group); - has_bg = 0; - if (ext2fs_has_feature_sparse_super2(super)) { - /* - * We have to do this manually since - * super->s_backup_bgs hasn't been set up yet. - */ - if (fs->group_desc_count == 2) - has_bg = param->s_backup_bgs[0] != 0; - else - has_bg = param->s_backup_bgs[1] != 0; - } else - has_bg = ext2fs_bg_has_super(fs, fs->group_desc_count - 1); - if (has_bg) - overhead += 1 + fs->desc_blocks + super->s_reserved_gdt_blocks; + if (ext2fs_bg_has_super(fs, fs->group_desc_count - 1)) { + overhead++; + if (!ext2fs_has_feature_meta_bg(fs->super)) + overhead += fs->desc_blocks + + super->s_reserved_gdt_blocks; + } rem = ((ext2fs_blocks_count(super) - super->s_first_data_block) % super->s_blocks_per_group); if ((fs->group_desc_count == 1) && rem && (rem < overhead)) { @@ -468,21 +522,6 @@ ipg_retry: * count. */ - /* Set up the locations of the backup superblocks */ - if (ext2fs_has_feature_sparse_super2(super)) { - if (super->s_backup_bgs[0] >= fs->group_desc_count) - super->s_backup_bgs[0] = fs->group_desc_count - 1; - if (super->s_backup_bgs[1] >= fs->group_desc_count) - super->s_backup_bgs[1] = fs->group_desc_count - 1; - if (super->s_backup_bgs[0] == super->s_backup_bgs[1]) - super->s_backup_bgs[1] = 0; - if (super->s_backup_bgs[0] > super->s_backup_bgs[1]) { - __u32 t = super->s_backup_bgs[0]; - super->s_backup_bgs[0] = super->s_backup_bgs[1]; - super->s_backup_bgs[1] = t; - } - } - retval = ext2fs_get_mem(strlen(fs->device_name) + 80, &buf); if (retval) goto cleanup; diff --git a/misc/mke2fs.c b/misc/mke2fs.c index bcee4c5..f266cad 100644 --- a/misc/mke2fs.c +++ b/misc/mke2fs.c @@ -647,6 +647,7 @@ static void show_stats(ext2_filsys fs) blk64_t group_block; dgrp_t i; int need, col_left; + int is_first_backup = 1; if (!verbose) { printf(_("Creating filesystem with %llu %dk blocks and " @@ -717,8 +718,10 @@ skip_details: group_block += s->s_blocks_per_group; if (!ext2fs_bg_has_super(fs, i)) continue; - if (i != 1) + if (!is_first_backup) printf(", "); + else + is_first_backup = 0; need = int_log10(group_block) + 2; if (need > col_left) { printf("\n\t"); @@ -3287,12 +3290,12 @@ int main (int argc, char *argv[]) if (fs_param.s_flags & EXT2_FLAGS_TEST_FILESYS) fs->super->s_flags |= EXT2_FLAGS_TEST_FILESYS; - if (ext2fs_has_feature_flex_bg(&fs_param) || - ext2fs_has_feature_huge_file(&fs_param) || - ext2fs_has_feature_gdt_csum(&fs_param) || - ext2fs_has_feature_dir_nlink(&fs_param) || - ext2fs_has_feature_metadata_csum(&fs_param) || - ext2fs_has_feature_extra_isize(&fs_param)) + if (ext2fs_has_feature_flex_bg(fs->super) || + ext2fs_has_feature_huge_file(fs->super) || + ext2fs_has_feature_gdt_csum(fs->super) || + ext2fs_has_feature_dir_nlink(fs->super) || + ext2fs_has_feature_metadata_csum(fs->super) || + ext2fs_has_feature_extra_isize(fs->super)) fs->super->s_kbytes_written = 1; /* diff --git a/tests/m_resize_inode_meta_bg/expect.1 b/tests/m_resize_inode_meta_bg/expect.1 deleted file mode 100644 index 83c7bc5..0000000 --- a/tests/m_resize_inode_meta_bg/expect.1 +++ /dev/null @@ -1,172 +0,0 @@ -Creating filesystem with 3840 4k blocks and 960 inodes -Superblock backups stored on blocks: - 256, 768, 1280, 1792, 2304 - -Allocating group tables: done -Writing inode tables: done -Creating journal (1024 blocks): done -Writing superblocks and filesystem accounting information: done - -Filesystem features: has_journal ext_attr dir_index filetype meta_bg extent 64bit flex_bg sparse_super huge_file uninit_bg dir_nlink extra_isize -Pass 1: Checking inodes, blocks, and sizes -Pass 2: Checking directory structure -Pass 3: Checking directory connectivity -Pass 4: Checking reference counts -Pass 5: Checking group summary information -test_filesys: 11/960 files (0.0% non-contiguous), 1127/3840 blocks -Exit status is 0 -Filesystem volume name: -Last mounted on: -Filesystem magic number: 0xEF53 -Filesystem revision #: 1 (dynamic) -Filesystem features: has_journal ext_attr dir_index filetype meta_bg extent 64bit flex_bg sparse_super huge_file uninit_bg dir_nlink extra_isize -Default mount options: (none) -Filesystem state: clean -Errors behavior: Continue -Filesystem OS type: Linux -Inode count: 960 -Block count: 3840 -Reserved block count: 192 -Overhead clusters: 1122 -Free blocks: 2713 -Free inodes: 949 -First block: 0 -Block size: 4096 -Fragment size: 4096 -Group descriptor size: 64 -Blocks per group: 256 -Fragments per group: 256 -Inodes per group: 64 -Inode blocks per group: 4 -Flex block group size: 16 -Mount count: 0 -Check interval: 15552000 (6 months) -Reserved blocks uid: 0 -Reserved blocks gid: 0 -First inode: 11 -Inode size: 256 -Required extra isize: 32 -Desired extra isize: 32 -Journal inode: 8 -Default directory hash: half_md4 -Journal backup: inode blocks -Journal features: (none) -Total journal size: 4096k -Total journal blocks: 1024 -Max transaction length: 1024 -Fast commit length: 0 -Journal sequence: 0x00000001 -Journal start: 0 - - -Group 0: (Blocks 0-255) [ITABLE_ZEROED] - Primary superblock at 0, Group descriptor at 1 - Block bitmap at 2 (+2) - Inode bitmap at 17 (+17) - Inode table at 32-35 (+32) - 159 free blocks, 53 free inodes, 2 directories, 53 unused inodes - Free blocks: 97-255 - Free inodes: 12-64 -Group 1: (Blocks 256-511) [INODE_UNINIT, ITABLE_ZEROED] - Backup superblock at 256, Group descriptor at 257 - Block bitmap at 3 (bg #0 + 3) - Inode bitmap at 18 (bg #0 + 18) - Inode table at 36-39 (bg #0 + 36) - 254 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 258-511 - Free inodes: 65-128 -Group 2: (Blocks 512-767) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] - Block bitmap at 4 (bg #0 + 4) - Inode bitmap at 19 (bg #0 + 19) - Inode table at 40-43 (bg #0 + 40) - 256 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 512-767 - Free inodes: 129-192 -Group 3: (Blocks 768-1023) [INODE_UNINIT, ITABLE_ZEROED] - Backup superblock at 768 - Block bitmap at 5 (bg #0 + 5) - Inode bitmap at 20 (bg #0 + 20) - Inode table at 44-47 (bg #0 + 44) - 255 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 769-1023 - Free inodes: 193-256 -Group 4: (Blocks 1024-1279) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] - Block bitmap at 6 (bg #0 + 6) - Inode bitmap at 21 (bg #0 + 21) - Inode table at 48-51 (bg #0 + 48) - 256 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 1024-1279 - Free inodes: 257-320 -Group 5: (Blocks 1280-1535) [INODE_UNINIT, ITABLE_ZEROED] - Backup superblock at 1280 - Block bitmap at 7 (bg #0 + 7) - Inode bitmap at 22 (bg #0 + 22) - Inode table at 52-55 (bg #0 + 52) - 255 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 1281-1535 - Free inodes: 321-384 -Group 6: (Blocks 1536-1791) [INODE_UNINIT, ITABLE_ZEROED] - Block bitmap at 8 (bg #0 + 8) - Inode bitmap at 23 (bg #0 + 23) - Inode table at 56-59 (bg #0 + 56) - 0 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: - Free inodes: 385-448 -Group 7: (Blocks 1792-2047) [INODE_UNINIT, ITABLE_ZEROED] - Backup superblock at 1792 - Block bitmap at 9 (bg #0 + 9) - Inode bitmap at 24 (bg #0 + 24) - Inode table at 60-63 (bg #0 + 60) - 0 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: - Free inodes: 449-512 -Group 8: (Blocks 2048-2303) [INODE_UNINIT, ITABLE_ZEROED] - Block bitmap at 10 (bg #0 + 10) - Inode bitmap at 25 (bg #0 + 25) - Inode table at 64-67 (bg #0 + 64) - 0 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: - Free inodes: 513-576 -Group 9: (Blocks 2304-2559) [INODE_UNINIT, ITABLE_ZEROED] - Backup superblock at 2304 - Block bitmap at 11 (bg #0 + 11) - Inode bitmap at 26 (bg #0 + 26) - Inode table at 68-71 (bg #0 + 68) - 0 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: - Free inodes: 577-640 -Group 10: (Blocks 2560-2815) [INODE_UNINIT, ITABLE_ZEROED] - Block bitmap at 12 (bg #0 + 12) - Inode bitmap at 27 (bg #0 + 27) - Inode table at 72-75 (bg #0 + 72) - 254 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 2562-2815 - Free inodes: 641-704 -Group 11: (Blocks 2816-3071) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] - Block bitmap at 13 (bg #0 + 13) - Inode bitmap at 28 (bg #0 + 28) - Inode table at 76-79 (bg #0 + 76) - 256 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 2816-3071 - Free inodes: 705-768 -Group 12: (Blocks 3072-3327) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] - Block bitmap at 14 (bg #0 + 14) - Inode bitmap at 29 (bg #0 + 29) - Inode table at 80-83 (bg #0 + 80) - 256 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 3072-3327 - Free inodes: 769-832 -Group 13: (Blocks 3328-3583) [INODE_UNINIT, BLOCK_UNINIT, ITABLE_ZEROED] - Block bitmap at 15 (bg #0 + 15) - Inode bitmap at 30 (bg #0 + 30) - Inode table at 84-87 (bg #0 + 84) - 256 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 3328-3583 - Free inodes: 833-896 -Group 14: (Blocks 3584-3839) [INODE_UNINIT, ITABLE_ZEROED] - Block bitmap at 16 (bg #0 + 16) - Inode bitmap at 31 (bg #0 + 31) - Inode table at 88-91 (bg #0 + 88) - 256 free blocks, 64 free inodes, 0 directories, 64 unused inodes - Free blocks: 3584-3839 - Free inodes: 897-960 diff --git a/tests/m_resize_inode_meta_bg/script b/tests/m_resize_inode_meta_bg/script deleted file mode 100644 index 41ffb32..0000000 --- a/tests/m_resize_inode_meta_bg/script +++ /dev/null @@ -1,7 +0,0 @@ -DESCRIPTION="resize_inode and meta_bg enabled" -FS_SIZE=15360 -MKE2FS_DEVICE_SECTSIZE=4096 -export MKE2FS_DEVICE_SECTSIZE -MKE2FS_OPTS="-T ext4 -g256 -O 64bit" -. $cmd_dir/run_mke2fs -unset MKE2FS_DEVICE_SECTSIZE -- 1.8.3.1