From 71b3cc3315114b2101bc32707fa66740279fff54 Mon Sep 17 00:00:00 2001 From: Emoly Liu Date: Wed, 29 Jul 2015 16:15:45 +0800 Subject: [PATCH] LU-6544 mkfs: Improve MDT inode size calculation This patch reduces mkfs.lustre "--stripe-count-hint" limits when calculating MDT inode size, so that there is a proper amount of space reserved for different kinds of EAs and ACL in the inode. This would allow files with N stripes to fit the lov, lma, link EAs into the inode rather than storing it in an external xattr block, which reduces performance and significantly increases the space used per file on the MDT. Also, this patch adds conf-sanity.sh test_87 to verfiy this calculation with different stripe count. Signed-off-by: Emoly Liu Change-Id: Idfdd9a064f0ac07c383a3af79c61c1ff973fb3f7 Reviewed-on: http://review.whamcloud.com/15643 Tested-by: Jenkins Reviewed-by: Fan Yong Reviewed-by: Andreas Dilger Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/tests/conf-sanity.sh | 86 ++++++++++++++++++++++++++++++++++++++ lustre/utils/mount_utils_ldiskfs.c | 29 +++++++++---- 2 files changed, 108 insertions(+), 7 deletions(-) diff --git a/lustre/tests/conf-sanity.sh b/lustre/tests/conf-sanity.sh index 09b4884..d4ef8cb 100644 --- a/lustre/tests/conf-sanity.sh +++ b/lustre/tests/conf-sanity.sh @@ -5441,6 +5441,92 @@ test_86() { } run_test 86 "Replacing mkfs.lustre -G option" +test_87() { #LU-6544 + [[ $(lustre_version_code $SINGLEMDS1) -ge $(version_code 2.7.56) ]] || + { skip "Need MDS version at least 2.7.56" && return; } + [[ $(facet_fstype $SINGLEMDS) != ldiskfs ]] && + { skip "Only applicable to ldiskfs-based MDTs" && return; } + [[ $OSTCOUNT -gt 69 ]] && + { skip "Ignore wide striping situation" && return; } + + local mdsdev=$(mdsdevname 1) + local mdsvdev=$(mdsvdevname 1) + local file=$DIR/$tfile + local mntpt=$(facet_mntpt $SINGLEMDS) + local used_xattr_blk=0 + local inode_size=${1:-512} + local left_size=0 + local xtest="trusted.test" + local value + local orig + local i + + #Please see LU-6544 for MDT inode size calculation + if [ $OSTCOUNT -gt 26 ]; then + inode_size=2048 + elif [ $OSTCOUNT -gt 5 ]; then + inode_size=1024 + fi + left_size=$(expr $inode_size - \ + 156 - \ + 32 - \ + 32 - $OSTCOUNT \* 24 - 16 - 3 - \ + 24 - 16 - 3 - \ + 24 - 18 - $(expr length $tfile) - 16 - 4) + if [ $left_size -le 0 ]; then + echo "No space($left_size) is expected in inode." + echo "Try 1-byte xattr instead to verify this." + left_size=1 + else + echo "Estimate: at most $left_size-byte space left in inode." + fi + + unload_modules + reformat + + add mds1 $(mkfs_opts mds1 ${mdsdev}) --stripe-count-hint=$OSTCOUNT \ + --reformat $mdsdev $mdsvdev || error "add mds1 failed" + start_mdt 1 > /dev/null || error "start mdt1 failed" + for i in $(seq $OSTCOUNT); do + start ost$i $(ostdevname $i) $OST_MOUNT_OPTS > /dev/null || + error "start ost$i failed" + done + mount_client $MOUNT > /dev/null || error "mount client $MOUNT failed" + check_mount || error "check client $MOUNT failed" + + #set xattr + $SETSTRIPE -c -1 $file || error "$SETSTRIPE -c -1 $file failed" + $GETSTRIPE $file || error "$GETSTRIPE $file failed" + i=$($GETSTRIPE -c $file) + if [ $i -ne $OSTCOUNT ]; then + left_size=$(expr $left_size + $(expr $OSTCOUNT - $i) \* 24) + echo -n "Since only $i out $OSTCOUNT OSTs are used, " + echo -n "the expected left space is changed to " + echo "$left_size bytes at most." + fi + value=$(generate_string $left_size) + setfattr -n $xtest -v $value $file + orig=$(get_xattr_value $xtest $file) + [[ "$orig" != "$value" ]] && error "$xtest changed" + + #Verify if inode has some expected space left + umount $MOUNT > /dev/null || error "umount $MOUNT failed" + stop_mdt 1 > /dev/null || error "stop mdt1 failed" + mount_ldiskfs $SINGLEMDS || error "mount -t ldiskfs $SINGLEMDS failed" + + do_facet $SINGLEMDS ls -sal $mntpt/ROOT/$tfile + used_xattr_blk=$(do_facet $SINGLEMDS ls -s $mntpt/ROOT/$tfile | + awk '{ print $1 }') + [[ $used_xattr_blk -eq 0 ]] && + error "Please check MDS inode size calculation: \ + more than $left_size-byte space left in inode." + echo "Verified: at most $left_size-byte space left in inode." + + stopall +} +run_test 87 "check if MDT inode can hold EAs with N stripes properly" + + if ! combined_mgs_mds ; then stop mgs fi diff --git a/lustre/utils/mount_utils_ldiskfs.c b/lustre/utils/mount_utils_ldiskfs.c index d07e996..584d6fd 100644 --- a/lustre/utils/mount_utils_ldiskfs.c +++ b/lustre/utils/mount_utils_ldiskfs.c @@ -741,17 +741,32 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) } } - /* Inode size (for extended attributes). The LOV EA size is - * 32 (EA hdr) + 32 (lov_mds_md) + stripes * 24 (lov_ost_data), - * and we want some margin above that for ACLs, other EAs... */ + /* Inode size includes: + * ldiskfs inode size: 156 + * extended attributes size, including: + * ext4_xattr_header: 32 + * LOV EA size: 32(lov_mds_md) + + * stripes * 24(lov_ost_data) + + * 16(xattr_entry) + 3(lov) + * LMA EA size: 24(lustre_mdt_attrs) + + * 16(xattr_entry) + 3(lma) + * link EA size: 24(link_ea_header) + 18(link_ea_entry) + + * (filename) + 16(xattr_entry) + 4(link) + * and some margin for 4-byte alignment, ACLs and other EAs. + * + * If we say the average filename length is about 32 bytes, + * the calculation looks like: + * 156 + 32 + (32+24*N+19) + (24+19) + (24+18+~32+20) + other <= + * 512*2^m, {m=0,1,2,3} + */ if (strstr(mop->mo_mkfsopts, "-I") == NULL) { if (IS_MDT(&mop->mo_ldd)) { - if (mop->mo_stripe_count > 72) + if (mop->mo_stripe_count > 69) inode_size = 512; /* bz 7241 */ /* see also "-i" below for EA blocks */ - else if (mop->mo_stripe_count > 32) + else if (mop->mo_stripe_count > 26) inode_size = 2048; - else if (mop->mo_stripe_count > 10) + else if (mop->mo_stripe_count > 5) inode_size = 1024; else inode_size = 512; @@ -781,7 +796,7 @@ int ldiskfs_make_lustre(struct mkfs_opts *mop) if (IS_MDT(&mop->mo_ldd)) { bytes_per_inode = inode_size + 1536; - if (mop->mo_stripe_count > 72) { + if (mop->mo_stripe_count > 69) { int extra = mop->mo_stripe_count * 24; extra = ((extra - 1) | 4095) + 1; bytes_per_inode += extra; -- 1.8.3.1