From 51d2a79ac93d31f969baaa5f10da707762acf3f2 Mon Sep 17 00:00:00 2001 From: Andreas Dilger Date: Mon, 25 Jan 2021 16:18:09 -0700 Subject: [PATCH] LU-14286 osd-ldiskfs: enable fallocate by default Enable fallocate on ldiskfs OSTs by default now that the known problems have been resolved. The default mode=0 is the standard "allocate unwritten extents" behavior used by ext4. This is by far the fastest for space allocation, but requires the unwritten extents to be split and/or zeroed when they are overwritten. The OST fallocate mode=1 can also be set to use "zeroed extents", which may be handled by "WRITE SAME", "TRIM zeroes data", or other low-level functionality in the underlying block device. This is somewhat slower at fallocate() time (especially for very large allocations), but still avoids sending any data over the network, avoids runtime overhead from managing the extents. There is not yet an FALLOC_FL_* flag to request this behavior from the client on a per-file basis. If problems are hit in the field, fallocate can also be disabled with mode=-1 at runtime or persistently. lctl set_param [-P] osd-ldiskfs.*.fallocate_zero_blocks= Ensure that all of the tests which currently use fallocate() are enabling it for test runs, even if the default changes again. Fixes: 4f18e08099e5 ("LU-14286 osd-ldiskfs: fallocate with unwritten extents") Signed-off-by: Andreas Dilger Change-Id: Iefa71c525597d54fc82a3d6de27a50d4d2ce7057 Reviewed-on: https://review.whamcloud.com/41315 Tested-by: jenkins Reviewed-by: Arshad Hussain Tested-by: Maloo Reviewed-by: John L. Hammond Reviewed-by: Oleg Drokin --- lustre/osd-ldiskfs/osd_handler.c | 4 ++-- lustre/tests/parallel-scale-cifs.sh | 2 ++ lustre/tests/sanity-benchmark.sh | 6 ++++-- lustre/tests/sanity-dom.sh | 4 +++- lustre/tests/sanity-quota.sh | 2 +- lustre/tests/sanity.sh | 14 ++++++-------- lustre/tests/sanityn.sh | 20 ++++++++++++-------- lustre/tests/test-framework.sh | 23 +++++++++++++++++++---- 8 files changed, 49 insertions(+), 26 deletions(-) diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 90b5493..9a18ea8 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -7904,8 +7904,8 @@ static int osd_device_init0(const struct lu_env *env, o->od_readcache_max_iosize = OSD_READCACHE_MAX_IO_MB << 20; o->od_writethrough_max_iosize = OSD_WRITECACHE_MAX_IO_MB << 20; o->od_auto_scrub_interval = AS_DEFAULT; - /* disable fallocate until issues are fixed: LU-14326/LU-14333 */ - o->od_fallocate_zero_blocks = -1; + /* default fallocate to unwritten extents: LU-14326/LU-14333 */ + o->od_fallocate_zero_blocks = 0; cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4), sizeof(o->od_svname)); diff --git a/lustre/tests/parallel-scale-cifs.sh b/lustre/tests/parallel-scale-cifs.sh index a569908..cb1c40e 100644 --- a/lustre/tests/parallel-scale-cifs.sh +++ b/lustre/tests/parallel-scale-cifs.sh @@ -172,6 +172,8 @@ test_fsx() { local space=$(df -P $SMBCLIMNTPT | tail -n 1 | awk '{ print $4 }') [ $space -lt $((size * nclients)) ] && size=$((space * 3 / 4 / nclients)) + check_set_fallocate + local cmd="$FSX -c 50 -p 500 -S $seed -P $TMP -l $size -N $numop " echo "Using: $cmd" diff --git a/lustre/tests/sanity-benchmark.sh b/lustre/tests/sanity-benchmark.sh index 023f70d..b01dc45 100644 --- a/lustre/tests/sanity-benchmark.sh +++ b/lustre/tests/sanity-benchmark.sh @@ -183,13 +183,15 @@ test_fsx() { FSX_COUNT=1000 local SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'` + check_set_fallocate + [ $SPACE -lt $FSX_SIZE ] && FSX_SIZE=$((SPACE * 3 / 4)) $DEBUG_OFF FSX_SEED=${FSX_SEED:-$RANDOM} rm -f $testfile $LFS setstripe -c -1 $testfile - CMD="fsx -c 50 -p 1000 -S $FSX_SEED -P $TMP -l $FSX_SIZE \ - -N $((FSX_COUNT * 100)) $FSXOPT $testfile" + CMD="$FSX -c 50 -p 1000 -S $FSX_SEED -P $TMP -l $FSX_SIZE \ + -N $((FSX_COUNT * 100)) $FSXOPT $testfile" echo "Using: $CMD" $CMD || error "fsx failed" rm -f $testfile diff --git a/lustre/tests/sanity-dom.sh b/lustre/tests/sanity-dom.sh index 06dde47..963b1f7 100644 --- a/lustre/tests/sanity-dom.sh +++ b/lustre/tests/sanity-dom.sh @@ -172,8 +172,10 @@ test_fsx() { local file1=$DIR1/$tfile local file2=$DIR2/$tfile + check_set_fallocate + touch $file1 - fsx -c 50 -p 100 -N 1000 -l $((DOM_SIZE*2)) -S 0 -d -d $file1 $file2 + $FSX -c 50 -p 100 -N 1000 -l $((DOM_SIZE*2)) -S 0 -d -d $file1 $file2 } run_test fsx "Dual-mount fsx with DoM files" diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index 7785428..c265b0c 100755 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -1088,7 +1088,7 @@ test_1h() { local limit=10 # 10M local testfile="$DIR/$tdir/$tfile-0" - check_for_fallocate + check_set_fallocate_or_skip setup_quota_test || error "setup quota failed with $?" trap cleanup_quota_test EXIT diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 878796c..21e447a 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13433,7 +13433,7 @@ test_150a() { run_test 150a "truncate/append tests" test_150b() { - check_for_fallocate + check_set_fallocate_or_skip touch $DIR/$tfile stack_trap "rm -f $DIR/$tfile; wait_delete_completed" @@ -13442,7 +13442,7 @@ test_150b() { run_test 150b "Verify fallocate (prealloc) functionality" test_150bb() { - check_for_fallocate + check_set_fallocate_or_skip touch $DIR/$tfile stack_trap "rm -f $DIR/$tfile; wait_delete_completed" @@ -13455,9 +13455,7 @@ test_150bb() { [[ "${sum[0]}" == "$expect" ]] || error "fallocate unwritten is not zero" - do_nodes $(comma_list $(osts_nodes)) \ - "$LCTL set_param osd-ldiskfs.*.fallocate_zero_blocks=1" || - error "set osd-ldiskfs.*.fallocate_zero_blocks=1" + check_set_fallocate 1 > $DIR/$tfile fallocate -l $((1048576 * 20)) $DIR/$tfile || error "fallocate failed" @@ -13468,7 +13466,7 @@ test_150bb() { run_test 150bb "Verify fallocate modes both zero space" test_150c() { - check_for_fallocate + check_set_fallocate_or_skip stack_trap "rm -f $DIR/$tfile; wait_delete_completed" $LFS setstripe -c $OSTCOUNT -S1M $DIR/$tfile || error "setstripe failed" @@ -13502,7 +13500,7 @@ test_150c() { run_test 150c "Verify fallocate Size and Blocks" test_150d() { - check_for_fallocate + check_set_fallocate_or_skip stack_trap "rm -f $DIR/$tfile; wait_delete_completed" $LFS setstripe -c $OSTCOUNT -S1M $DIR/$tdir || error "setstripe failed" @@ -13520,7 +13518,7 @@ test_150d() { run_test 150d "Verify fallocate Size and Blocks - Non zero start" test_150e() { - check_for_fallocate + check_set_fallocate_or_skip echo "df before:" $LFS df diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 55c9312..0b82cb9 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -389,6 +389,8 @@ test_16a() { local stripe_size=$(do_facet $SINGLEMDS \ "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") + check_set_fallocate + # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync @@ -396,12 +398,12 @@ test_16a() { rm -f $file1 $LFS setstripe -c -1 $file1 # b=10919 - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || error "fsx failed" rm -f $file1 # O_DIRECT reads and writes must be aligned to the device block size. - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \ + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \ -w 4096 $file1 $file2 || error "fsx with O_DIRECT failed." } run_test 16a "$FSXNUM iterations of dual-mount fsx" @@ -412,6 +414,8 @@ test_16b() { local file2=$DIR2/$tfile local stripe_size=($($LFS getstripe -S $DIR)) + check_set_fallocate + # to allocate grant because it may run out due to test_15. lfs setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync || @@ -423,7 +427,7 @@ test_16b() { lfs setstripe -c -1 $file1 # b=10919 # -o is set to 8192 because writes < 1 page and between 1 and 2 pages # create a mix of tiny writes & normal writes - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 \ + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 \ $file1 $file2 || error "fsx with tiny write failed." } run_test 16b "$FSXNUM iterations of dual-mount fsx at small size" @@ -436,6 +440,8 @@ test_16c() { [ "$ost1_FSTYPE" != ldiskfs ] && skip "dio on ldiskfs only" + check_set_fallocate + # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync @@ -452,7 +458,7 @@ test_16c() { set_osd_param $list '' writethrough_cache_enable 0 $LFS setstripe -c -1 $file1 # b=10919 - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || error "fsx failed" rm -f $file1 @@ -467,16 +473,17 @@ test_16d() { local file1=$DIR1/$tfile local file2=$DIR2/$tfile local file3=$DIR1/file + local tmpfile=$(mktemp) local stripe_size=$(do_facet $SINGLEMDS \ "$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize") # to allocate grant because it may run out due to test_15. $LFS setstripe -c -1 $file1 + stack_trap "rm -f $file1 $file2 $file3 $tmpfile" dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync dd if=/dev/zero of=$file2 bs=$stripe_size count=$OSTCOUNT oflag=sync rm -f $file1 - local tmpfile=`mktemp` $LFS setstripe -c -1 $file1 # b=10919 $LCTL set_param ldlm.namespaces.*.lru_size=clear @@ -497,9 +504,6 @@ test_16d() { # buffer read from another client dd if=$file2 of=$file3 bs=1M count=100 diff $file3 $tmpfile || error "file different(3)" - - rm -f $file1 $file2 $file3 $tmpfile - } run_test 16d "Verify DIO and buffer IO with two clients" diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 6ba716c..fa873e0 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -10560,16 +10560,31 @@ function unlinkmany() { return $rc } -function check_for_fallocate() +# Check if fallocate supported on OSTs, enable if unset, default mode=0 +# Optionally pass the OST fallocate mode (0=unwritten extents, 1=zero extents) +function check_set_fallocate() { - [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + local new_mode="$1" local osts=$(comma_list $(osts_nodes)) local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks" local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null| head -n 1) - [ -n "$old_mode" ] || skip "need at least 2.13.57 for fallocate" + [[ -n "$old_mode" ]] || { echo "fallocate not supported"; return 1; } + [[ -z "$new_mode" && "$old_mode" != "-1" ]] && + { echo "keep default fallocate mode: $old_mode"; return 0; } + [[ "$new_mode" && "$old_mode" == "$new_mode" ]] && + { echo "keep current fallocate mode: $old_mode"; return 0; } + stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode" - do_nodes $osts $LCTL set_param $fa_mode=0 || error "set $fa_mode=0" + do_nodes $osts $LCTL set_param $fa_mode=${new_mode:-0} || + error "set $fa_mode=$new_mode" +} + +# Check if fallocate supported on OSTs, enable if unset, skip if unavailable +function check_set_fallocate_or_skip() +{ + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + check_set_fallocate || skip "need at least 2.13.57 for fallocate" } -- 1.8.3.1