Enable fallocate on ldiskfs OSTs by default now that the known
problems have been resolved. The default mode=0 is the standard
"allocate unwritten extents" behavior used by ext4. This is by
far the fastest for space allocation, but requires the unwritten
extents to be split and/or zeroed when they are overwritten.
The OST fallocate mode=1 can also be set to use "zeroed extents",
which may be handled by "WRITE SAME", "TRIM zeroes data", or
other low-level functionality in the underlying block device.
This is somewhat slower at fallocate() time (especially for very
large allocations), but still avoids sending any data over the
network, avoids runtime overhead from managing the extents. There
is not yet an FALLOC_FL_* flag to request this behavior from the
client on a per-file basis.
If problems are hit in the field, fallocate can also be disabled
with mode=-1 at runtime or persistently.
lctl set_param [-P] osd-ldiskfs.*.fallocate_zero_blocks=<mode>
Ensure that all of the tests which currently use fallocate() are
enabling it for test runs, even if the default changes again.
Fixes:
4f18e08099e5 ("LU-14286 osd-ldiskfs: fallocate with unwritten extents")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Iefa71c525597d54fc82a3d6de27a50d4d2ce7057
Reviewed-on: https://review.whamcloud.com/41315
Tested-by: jenkins <devops@whamcloud.com>
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
o->od_readcache_max_iosize = OSD_READCACHE_MAX_IO_MB << 20;
o->od_writethrough_max_iosize = OSD_WRITECACHE_MAX_IO_MB << 20;
o->od_auto_scrub_interval = AS_DEFAULT;
- /* disable fallocate until issues are fixed: LU-14326/LU-14333 */
- o->od_fallocate_zero_blocks = -1;
+ /* default fallocate to unwritten extents: LU-14326/LU-14333 */
+ o->od_fallocate_zero_blocks = 0;
cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4),
sizeof(o->od_svname));
local space=$(df -P $SMBCLIMNTPT | tail -n 1 | awk '{ print $4 }')
[ $space -lt $((size * nclients)) ] && size=$((space * 3 / 4 / nclients))
+ check_set_fallocate
+
local cmd="$FSX -c 50 -p 500 -S $seed -P $TMP -l $size -N $numop "
echo "Using: $cmd"
FSX_COUNT=1000
local SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
+ check_set_fallocate
+
[ $SPACE -lt $FSX_SIZE ] && FSX_SIZE=$((SPACE * 3 / 4))
$DEBUG_OFF
FSX_SEED=${FSX_SEED:-$RANDOM}
rm -f $testfile
$LFS setstripe -c -1 $testfile
- CMD="fsx -c 50 -p 1000 -S $FSX_SEED -P $TMP -l $FSX_SIZE \
- -N $((FSX_COUNT * 100)) $FSXOPT $testfile"
+ CMD="$FSX -c 50 -p 1000 -S $FSX_SEED -P $TMP -l $FSX_SIZE \
+ -N $((FSX_COUNT * 100)) $FSXOPT $testfile"
echo "Using: $CMD"
$CMD || error "fsx failed"
rm -f $testfile
local file1=$DIR1/$tfile
local file2=$DIR2/$tfile
+ check_set_fallocate
+
touch $file1
- fsx -c 50 -p 100 -N 1000 -l $((DOM_SIZE*2)) -S 0 -d -d $file1 $file2
+ $FSX -c 50 -p 100 -N 1000 -l $((DOM_SIZE*2)) -S 0 -d -d $file1 $file2
}
run_test fsx "Dual-mount fsx with DoM files"
local limit=10 # 10M
local testfile="$DIR/$tdir/$tfile-0"
- check_for_fallocate
+ check_set_fallocate_or_skip
setup_quota_test || error "setup quota failed with $?"
trap cleanup_quota_test EXIT
run_test 150a "truncate/append tests"
test_150b() {
- check_for_fallocate
+ check_set_fallocate_or_skip
touch $DIR/$tfile
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
run_test 150b "Verify fallocate (prealloc) functionality"
test_150bb() {
- check_for_fallocate
+ check_set_fallocate_or_skip
touch $DIR/$tfile
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
[[ "${sum[0]}" == "$expect" ]] || error "fallocate unwritten is not zero"
- do_nodes $(comma_list $(osts_nodes)) \
- "$LCTL set_param osd-ldiskfs.*.fallocate_zero_blocks=1" ||
- error "set osd-ldiskfs.*.fallocate_zero_blocks=1"
+ check_set_fallocate 1
> $DIR/$tfile
fallocate -l $((1048576 * 20)) $DIR/$tfile || error "fallocate failed"
run_test 150bb "Verify fallocate modes both zero space"
test_150c() {
- check_for_fallocate
+ check_set_fallocate_or_skip
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
$LFS setstripe -c $OSTCOUNT -S1M $DIR/$tfile || error "setstripe failed"
run_test 150c "Verify fallocate Size and Blocks"
test_150d() {
- check_for_fallocate
+ check_set_fallocate_or_skip
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
$LFS setstripe -c $OSTCOUNT -S1M $DIR/$tdir || error "setstripe failed"
run_test 150d "Verify fallocate Size and Blocks - Non zero start"
test_150e() {
- check_for_fallocate
+ check_set_fallocate_or_skip
echo "df before:"
$LFS df
local stripe_size=$(do_facet $SINGLEMDS \
"$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize")
+ check_set_fallocate
+
# to allocate grant because it may run out due to test_15.
$LFS setstripe -c -1 $file1
dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync
rm -f $file1
$LFS setstripe -c -1 $file1 # b=10919
- fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 ||
+ $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 ||
error "fsx failed"
rm -f $file1
# O_DIRECT reads and writes must be aligned to the device block size.
- fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \
+ $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 -Z -r 4096 \
-w 4096 $file1 $file2 || error "fsx with O_DIRECT failed."
}
run_test 16a "$FSXNUM iterations of dual-mount fsx"
local file2=$DIR2/$tfile
local stripe_size=($($LFS getstripe -S $DIR))
+ check_set_fallocate
+
# to allocate grant because it may run out due to test_15.
lfs setstripe -c -1 $file1
dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync ||
lfs setstripe -c -1 $file1 # b=10919
# -o is set to 8192 because writes < 1 page and between 1 and 2 pages
# create a mix of tiny writes & normal writes
- fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 \
+ $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 \
$file1 $file2 || error "fsx with tiny write failed."
}
run_test 16b "$FSXNUM iterations of dual-mount fsx at small size"
[ "$ost1_FSTYPE" != ldiskfs ] && skip "dio on ldiskfs only"
+ check_set_fallocate
+
# to allocate grant because it may run out due to test_15.
$LFS setstripe -c -1 $file1
dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync
set_osd_param $list '' writethrough_cache_enable 0
$LFS setstripe -c -1 $file1 # b=10919
- fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 ||
+ $FSX -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 ||
error "fsx failed"
rm -f $file1
local file1=$DIR1/$tfile
local file2=$DIR2/$tfile
local file3=$DIR1/file
+ local tmpfile=$(mktemp)
local stripe_size=$(do_facet $SINGLEMDS \
"$LCTL get_param -n lod.$(facet_svc $SINGLEMDS)*.stripesize")
# to allocate grant because it may run out due to test_15.
$LFS setstripe -c -1 $file1
+ stack_trap "rm -f $file1 $file2 $file3 $tmpfile"
dd if=/dev/zero of=$file1 bs=$stripe_size count=$OSTCOUNT oflag=sync
dd if=/dev/zero of=$file2 bs=$stripe_size count=$OSTCOUNT oflag=sync
rm -f $file1
- local tmpfile=`mktemp`
$LFS setstripe -c -1 $file1 # b=10919
$LCTL set_param ldlm.namespaces.*.lru_size=clear
# buffer read from another client
dd if=$file2 of=$file3 bs=1M count=100
diff $file3 $tmpfile || error "file different(3)"
-
- rm -f $file1 $file2 $file3 $tmpfile
-
}
run_test 16d "Verify DIO and buffer IO with two clients"
return $rc
}
-function check_for_fallocate()
+# Check if fallocate supported on OSTs, enable if unset, default mode=0
+# Optionally pass the OST fallocate mode (0=unwritten extents, 1=zero extents)
+function check_set_fallocate()
{
- [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
+ local new_mode="$1"
local osts=$(comma_list $(osts_nodes))
local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks"
local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null|
head -n 1)
- [ -n "$old_mode" ] || skip "need at least 2.13.57 for fallocate"
+ [[ -n "$old_mode" ]] || { echo "fallocate not supported"; return 1; }
+ [[ -z "$new_mode" && "$old_mode" != "-1" ]] &&
+ { echo "keep default fallocate mode: $old_mode"; return 0; }
+ [[ "$new_mode" && "$old_mode" == "$new_mode" ]] &&
+ { echo "keep current fallocate mode: $old_mode"; return 0; }
+
stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode"
- do_nodes $osts $LCTL set_param $fa_mode=0 || error "set $fa_mode=0"
+ do_nodes $osts $LCTL set_param $fa_mode=${new_mode:-0} ||
+ error "set $fa_mode=$new_mode"
+}
+
+# Check if fallocate supported on OSTs, enable if unset, skip if unavailable
+function check_set_fallocate_or_skip()
+{
+ [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
+ check_set_fallocate || skip "need at least 2.13.57 for fallocate"
}