From ad938d1bdf1d0b613ec76ad89823717dacfce38b Mon Sep 17 00:00:00 2001 From: Fan Yong Date: Fri, 26 Jun 2015 23:29:59 +0800 Subject: [PATCH] LU-6722 jbd: double minimum journal size for RHEL7 In RHEL7 (kernel version >= 3.10.0), the maximum journal transaction size is reduced to half of the RHEL6 case. That may cause Lustre to complain that the declared transaction credits too large for very small journal device. This patch increases the minimum journal size from 4MB to 8MB for RHEL7 case, then counteract the above limitation about the journal transaction size. Signed-off-by: Fan Yong Change-Id: Iec8a2c561416cb7b5acce342c8ebcb845c8d7a19 Reviewed-on: http://review.whamcloud.com/15401 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Niu Yawei Reviewed-by: Andreas Dilger --- lib/ext2fs/ext2fsP.h | 3 ++ lib/ext2fs/mkjournal.c | 56 ++++++++++++++++++++++++++++++++++++- misc/mke2fs.c | 61 ++++------------------------------------- misc/util.c | 15 ++++++++-- tests/m_rootdir/expect | 16 +++++------ tests/m_rootdir_acl/expect | 16 +++++------ tests/r_fixup_lastbg/expect | 4 +-- tests/r_fixup_lastbg_big/expect | 4 +-- tests/run_e2fsck | 4 +++ tests/test_config | 10 +++++++ 10 files changed, 109 insertions(+), 80 deletions(-) diff --git a/lib/ext2fs/ext2fsP.h b/lib/ext2fs/ext2fsP.h index d2045af..9e4300c 100644 --- a/lib/ext2fs/ext2fsP.h +++ b/lib/ext2fs/ext2fsP.h @@ -207,3 +207,6 @@ errcode_t ext2fs_add_exit_fn(ext2_exit_fn fn, void *data); errcode_t ext2fs_remove_exit_fn(ext2_exit_fn fn, void *data); #define EXT2FS_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2*!!(cond)])) + +extern int ext2fs_is_before_linux_ver(unsigned int major, unsigned int minor, + unsigned int rev); diff --git a/lib/ext2fs/mkjournal.c b/lib/ext2fs/mkjournal.c index bc8c57b..4a71155 100644 --- a/lib/ext2fs/mkjournal.c +++ b/lib/ext2fs/mkjournal.c @@ -33,6 +33,10 @@ #if HAVE_NETINET_IN_H #include #endif +#ifdef __linux__ +#include +#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) +#endif #include "ext2_fs.h" #include "e2p/e2p.h" @@ -347,6 +351,54 @@ out2: return retval; } +#ifdef __linux__ +static int parse_version_number(const char *s) +{ + int major, minor, rev; + char *endptr; + const char *cp = s; + + if (!s) + return 0; + major = strtol(cp, &endptr, 10); + if (cp == endptr || *endptr != '.') + return 0; + cp = endptr + 1; + minor = strtol(cp, &endptr, 10); + if (cp == endptr || *endptr != '.') + return 0; + cp = endptr + 1; + rev = strtol(cp, &endptr, 10); + if (cp == endptr) + return 0; + return KERNEL_VERSION(major, minor, rev); +} + +int ext2fs_is_before_linux_ver(unsigned int major, unsigned int minor, + unsigned int rev) +{ + struct utsname ut; + static int linux_version_code = -1; + + if (uname(&ut)) { + perror("uname"); + exit(1); + } + if (linux_version_code < 0) + linux_version_code = parse_version_number(ut.release); + if (linux_version_code == 0) + return 0; + + return linux_version_code < KERNEL_VERSION(major, minor, rev); +} +#else +int ext2fs_is_before_linux_ver(unsigned int major, unsigned int minor, + unsigned int rev) +{ + return 0; +} +#endif + /* * Find a reasonable journal file size (in blocks) given the number of blocks * in the filesystem. For very small filesystems, it is not reasonable to @@ -358,8 +410,10 @@ int ext2fs_default_journal_size(__u64 num_blocks) { if (num_blocks < 2048) return -1; - if (num_blocks < 32768) /* 128 MB */ + if (num_blocks <= 8192) /* 32 MB */ return (1024); /* 4 MB */ + if (num_blocks < 32768) /* 128 MB */ + return (2048); /* 8 MB */ if (num_blocks < 256*1024) /* 1 GB */ return (4096); /* 16 MB */ if (num_blocks < 512*1024) /* 2 GB */ diff --git a/misc/mke2fs.c b/misc/mke2fs.c index 2553290..d8aed69 100644 --- a/misc/mke2fs.c +++ b/misc/mke2fs.c @@ -24,10 +24,6 @@ #include #include #include -#ifdef __linux__ -#include -#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) -#endif #ifdef HAVE_GETOPT_H #include #else @@ -165,54 +161,6 @@ int int_log10(unsigned long long arg) return l; } -#ifdef __linux__ -static int parse_version_number(const char *s) -{ - int major, minor, rev; - char *endptr; - const char *cp = s; - - if (!s) - return 0; - major = strtol(cp, &endptr, 10); - if (cp == endptr || *endptr != '.') - return 0; - cp = endptr + 1; - minor = strtol(cp, &endptr, 10); - if (cp == endptr || *endptr != '.') - return 0; - cp = endptr + 1; - rev = strtol(cp, &endptr, 10); - if (cp == endptr) - return 0; - return KERNEL_VERSION(major, minor, rev); -} - -static int is_before_linux_ver(unsigned int major, unsigned int minor, - unsigned int rev) -{ - struct utsname ut; - static int linux_version_code = -1; - - if (uname(&ut)) { - perror("uname"); - exit(1); - } - if (linux_version_code < 0) - linux_version_code = parse_version_number(ut.release); - if (linux_version_code == 0) - return 0; - - return linux_version_code < (int) KERNEL_VERSION(major, minor, rev); -} -#else -static int is_before_linux_ver(unsigned int major, unsigned int minor, - unsigned int rev) -{ - return 0; -} -#endif - /* * Helper function for read_bb_file and test_disk */ @@ -1634,7 +1582,7 @@ profile_error: memset(&fs_param, 0, sizeof(struct ext2_super_block)); fs_param.s_rev_level = 1; /* Create revision 1 filesystems now */ - if (is_before_linux_ver(2, 2, 0)) + if (ext2fs_is_before_linux_ver(2, 2, 0)) fs_param.s_rev_level = 0; if (argc && *argv) { @@ -2152,7 +2100,8 @@ profile_error: if (use_bsize == -1) { use_bsize = sys_page_size; - if (is_before_linux_ver(2, 6, 0) && use_bsize > 4096) + if (ext2fs_is_before_linux_ver(2, 6, 0) && + use_bsize > 4096) use_bsize = 4096; } if (lsector_size && use_bsize < lsector_size) @@ -2384,7 +2333,7 @@ profile_error: } /* Metadata checksumming wasn't totally stable before 3.18. */ - if (is_before_linux_ver(3, 18, 0) && + if (ext2fs_is_before_linux_ver(3, 18, 0) && ext2fs_has_feature_metadata_csum(&fs_param)) fprintf(stderr, _("Suggestion: Use Linux kernel >= 3.18 for " "improved stability of the metadata and journal " @@ -2394,7 +2343,7 @@ profile_error: * On newer kernels we do have lazy_itable_init support. So pick the * right default in case ext4 module is not loaded. */ - if (is_before_linux_ver(2, 6, 37)) + if (ext2fs_is_before_linux_ver(2, 6, 37)) lazy_itable_init = 0; else lazy_itable_init = 1; diff --git a/misc/util.c b/misc/util.c index 48e623d..d14305b 100644 --- a/misc/util.c +++ b/misc/util.c @@ -44,6 +44,7 @@ #include "e2p/e2p.h" #include "ext2fs/ext2_fs.h" #include "ext2fs/ext2fs.h" +#include "ext2fs/ext2fsP.h" #include "support/nls-enable.h" #include "blkid/blkid.h" #include "util.h" @@ -270,6 +271,14 @@ void figure_journal_size(struct ext2fs_journal_params *jparams, if (requested_j_size > 0 || (ext2fs_has_feature_fast_commit(fs->super) && requested_fc_size > 0)) { + int min_size; + + if (ext2fs_is_before_linux_ver(3, 10, 0) || + ext2fs_blocks_count(fs->super) <= 8192) + min_size = 1024; + else + min_size = 2048; + if (requested_j_size > 0) jparams->num_journal_blocks = jsize_to_blks(fs, requested_j_size); @@ -280,12 +289,12 @@ void figure_journal_size(struct ext2fs_journal_params *jparams, else if (!ext2fs_has_feature_fast_commit(fs->super)) jparams->num_fc_blocks = 0; total_blocks = jparams->num_journal_blocks + jparams->num_fc_blocks; - if (total_blocks < 1024 || total_blocks > 10240000) { + if (total_blocks < min_size || total_blocks > 10240000) { fprintf(stderr, _("\nThe total requested journal " "size is %d blocks; it must be\n" - "between 1024 and 10240000 blocks. " + "between %d and 10240000 blocks. " "Aborting.\n"), - total_blocks); + total_blocks, min_size); exit(1); } if ((unsigned int) total_blocks > ext2fs_free_blocks_count(fs->super) / 2) { diff --git a/tests/m_rootdir/expect b/tests/m_rootdir/expect index dbc7977..0a23d4c 100644 --- a/tests/m_rootdir/expect +++ b/tests/m_rootdir/expect @@ -10,8 +10,8 @@ Filesystem OS type: Linux Inode count: 1024 Block count: 16384 Reserved block count: 819 -Overhead clusters: 1543 -Free blocks: 14786 +Overhead clusters: 2567 +Free blocks: 13762 Free inodes: 1005 First block: 1 Block size: 1024 @@ -36,9 +36,9 @@ Default directory hash: half_md4 Journal backup: inode blocks Checksum type: crc32c Journal features: (none) -Total journal size: 1024k -Total journal blocks: 1024 -Max transaction length: 1024 +Total journal size: 2048k +Total journal blocks: 2048 +Max transaction length: 2048 Fast commit length: 0 Journal sequence: 0x00000001 Journal start: 0 @@ -59,8 +59,8 @@ Group 1: (Blocks 8193-16383) [INODE_UNINIT] Block bitmap at 131 (bg #0 + 130) Inode bitmap at 133 (bg #0 + 132) Inode table at 262-389 (bg #0 + 261) - 7038 free blocks, 512 free inodes, 0 directories, 512 unused inodes - Free blocks: 9346-16383 + 6014 free blocks, 512 free inodes, 0 directories, 512 unused inodes + Free blocks: 10370-16383 Free inodes: 513-1024 debugfs: stat /emptyfile Inode: III Type: regular @@ -117,4 +117,4 @@ Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information -test.img: 19/1024 files (0.0% non-contiguous), 1598/16384 blocks +test.img: 19/1024 files (0.0% non-contiguous), 2622/16384 blocks diff --git a/tests/m_rootdir_acl/expect b/tests/m_rootdir_acl/expect index babd802..951e4bd 100644 --- a/tests/m_rootdir_acl/expect +++ b/tests/m_rootdir_acl/expect @@ -10,8 +10,8 @@ Filesystem OS type: Linux Inode count: 1024 Block count: 16384 Reserved block count: 819 -Overhead clusters: 1543 -Free blocks: 14788 +Overhead clusters: 2567 +Free blocks: 13764 Free inodes: 1003 First block: 1 Block size: 1024 @@ -36,9 +36,9 @@ Default directory hash: half_md4 Journal backup: inode blocks Checksum type: crc32c Journal features: (none) -Total journal size: 1024k -Total journal blocks: 1024 -Max transaction length: 1024 +Total journal size: 2048k +Total journal blocks: 2048 +Max transaction length: 2048 Fast commit length: 0 Journal sequence: 0x00000001 Journal start: 0 @@ -59,8 +59,8 @@ Group 1: (Blocks 8193-16383) [INODE_UNINIT] Block bitmap at 131 (bg #0 + 130) Inode bitmap at 133 (bg #0 + 132) Inode table at 262-389 (bg #0 + 261) - 7038 free blocks, 512 free inodes, 0 directories, 512 unused inodes - Free blocks: 9346-16383 + 6014 free blocks, 512 free inodes, 0 directories, 512 unused inodes + Free blocks: 10370-16383 Free inodes: 513-1024 debugfs: stat /emptyfile Inode: III Type: regular @@ -116,4 +116,4 @@ Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information -test.img: 21/1024 files (0.0% non-contiguous), 1596/16384 blocks +test.img: 21/1024 files (0.0% non-contiguous), 2620/16384 blocks diff --git a/tests/r_fixup_lastbg/expect b/tests/r_fixup_lastbg/expect index 96b154a..24b7cc4 100644 --- a/tests/r_fixup_lastbg/expect +++ b/tests/r_fixup_lastbg/expect @@ -4,7 +4,7 @@ Superblock backups stored on blocks: Allocating group tables: done Writing inode tables: done -Creating journal (1024 blocks): done +Creating journal (2048 blocks): done Writing superblocks and filesystem accounting information: done Group 2: (Blocks 16385-19999) [INODE_UNINIT, ITABLE_ZEROED] @@ -36,4 +36,4 @@ Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information -test.img: 11/1248 files (0.0% non-contiguous), 1517/20004 blocks +test.img: 11/1248 files (0.0% non-contiguous), 2541/20004 blocks diff --git a/tests/r_fixup_lastbg_big/expect b/tests/r_fixup_lastbg_big/expect index edaabaf..53da4f0 100644 --- a/tests/r_fixup_lastbg_big/expect +++ b/tests/r_fixup_lastbg_big/expect @@ -4,7 +4,7 @@ Superblock backups stored on blocks: Allocating group tables: done Writing inode tables: done -Creating journal (1024 blocks): done +Creating journal (2048 blocks): done Writing superblocks and filesystem accounting information: done Group 2: (Blocks 16385-19999) [INODE_UNINIT, ITABLE_ZEROED] @@ -42,4 +42,4 @@ Pass 2: Checking directory structure Pass 3: Checking directory connectivity Pass 4: Checking reference counts Pass 5: Checking group summary information -test.img: 11/2080 files (0.0% non-contiguous), 1809/40000 blocks +test.img: 11/2080 files (0.0% non-contiguous), 2833/40000 blocks diff --git a/tests/run_e2fsck b/tests/run_e2fsck index e2c6596..1a39a89 100644 --- a/tests/run_e2fsck +++ b/tests/run_e2fsck @@ -27,6 +27,8 @@ if [ "$EXP1"x = x ]; then gunzip < $test_dir/expect.1.gz > $EXP1 else EXP1=$test_dir/expect.1 + [ "$CHECK_RHEL7" = "true" -a -f $test_dir/expect_rhel7.1 ] && + EXP1=$test_dir/expect_rhel7.1 || true fi fi @@ -36,6 +38,8 @@ if [ "$EXP2"x = x ]; then gunzip < $test_dir/expect.2.gz > $EXP2 else EXP2=$test_dir/expect.2 + [ "$CHECK_RHEL7" = "true" -a -f $test_dir/expect_rhel7.2 ] && + EXP2=$test_dir/expect_rhel7.2 || true fi fi diff --git a/tests/test_config b/tests/test_config index 9dc762c..79eba9d 100644 --- a/tests/test_config +++ b/tests/test_config @@ -46,3 +46,13 @@ RESIZE2FS_FORCE_LAZY_ITABLE_INIT=yes export RESIZE2FS_FORCE_LAZY_ITABLE_INIT E2FSPROGS_LIBMAGIC_SUPPRESS=yes export E2FSPROGS_LIBMAGIC_SUPPRESS + +if [ "$(uname -s)" = "Linux" ]; then + LINUX_VERSION1=`uname -r | awk -F . '{ printf $1 }'` + LINUX_VERSION2=`uname -r | awk -F . '{ printf $2 }'` + [ $LINUX_VERSION1 -ge 3 -a $LINUX_VERSION2 -ge 10 ] && + CHECK_RHEL7=true || CHECK_RHEL7=false +else + CHECK_RHEL7=false +fi +export CHECK_RHEL7 -- 1.8.3.1