The osd_fallocate() code should typically be allocating unwritten
extents with LDISKFS_GET_BLOCKS_CREATE_UNWRIT_EXT instead of actually
zeroing the blocks on disk with LDISKFS_GET_BLOCKS_CREATE_ZERO.
Writing zeroes during fallocate() is typically slower initially, and
is causing timeouts in sanity test_150e, which is trying to fill up
all OSTs to 90%. In some cases, zeroing the underlying blocks can
use the underlying storage support for efficient zeroing (WRITE_SAME),
so it may be faster for later use than uninitialized extents that have
to be converted to initialized extents by (possibly) splitting them
into smaller extents and/or zero filling them when they are paritally
being overwritten.
Add a tunable parameter osd-ldiskfs.*.fallocate_zero_blocks to allow
selecting this behavior at runtime. The default is -1, to disable
fallocate completely (return -EOPNOTSUPP) due to current bugs.
Test-Parameters: testlist=sanityn env=ONLY=16,ONLY_REPEAT=10
Fixes:
72617588ac8c ("LU-14286 osd-ldiskfs: fallocate() should zero new blocks")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: Ida3692c487fdc8918863fc5c99459caaba17d92e
Reviewed-on: https://review.whamcloud.com/41204
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Arshad Hussain <arshad.hussain@aeoncomputing.com>
Reviewed-by: John L. Hammond <jhammond@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
o->od_readcache_max_iosize = OSD_READCACHE_MAX_IO_MB << 20;
o->od_writethrough_max_iosize = OSD_WRITECACHE_MAX_IO_MB << 20;
o->od_auto_scrub_interval = AS_DEFAULT;
+ /* disable fallocate until issues are fixed: LU-14326/LU-14333 */
+ o->od_fallocate_zero_blocks = -1;
cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4),
sizeof(o->od_svname));
od_in_init:1,
od_index_in_idif:1,
/* Other flags */
+ od_read_cache:1,
+ od_writethrough_cache:1,
od_nonrotational:1;
+
__s64 od_auto_scrub_interval;
__u32 od_dirent_journal;
int od_index;
spinlock_t od_osfs_lock;
+ int od_fallocate_zero_blocks;
int od_connects;
struct lu_site od_site;
* served bypassing pagecache unless already cached */
unsigned long od_writethrough_max_iosize;
- int od_read_cache;
- int od_writethrough_cache;
-
struct brw_stats od_brw_stats;
atomic_t od_r_in_flight;
atomic_t od_w_in_flight;
if (mode & ~FALLOC_FL_KEEP_SIZE)
RETURN(-EOPNOTSUPP);
+ /* disable fallocate completely */
+ if (osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks < 0)
+ RETURN(-EOPNOTSUPP);
+
LASSERT(th);
LASSERT(inode);
boff = start >> inode->i_blkbits;
blen = (ALIGN(end, 1 << inode->i_blkbits) >> inode->i_blkbits) - boff;
- /* Create and Write zeros to new extents */
- flags = LDISKFS_GET_BLOCKS_CREATE_ZERO;
+ /* Create and mark new extents as either zero or unwritten */
+ flags = osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks ?
+ LDISKFS_GET_BLOCKS_CREATE_ZERO :
+ LDISKFS_GET_BLOCKS_CREATE_UNWRIT_EXT;
if (mode & FALLOC_FL_KEEP_SIZE)
flags |= LDISKFS_GET_BLOCKS_KEEP_SIZE;
if (rc)
return rc;
- osd->od_read_cache = val;
+ osd->od_read_cache = !!val;
return count;
}
LUSTRE_RW_ATTR(read_cache_enable);
if (rc)
return rc;
- osd->od_writethrough_cache = val;
+ osd->od_writethrough_cache = !!val;
return count;
}
LUSTRE_RW_ATTR(writethrough_cache_enable);
+static ssize_t fallocate_zero_blocks_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct dt_device *dt = container_of(kobj, struct dt_device,
+ dd_kobj);
+ struct osd_device *osd = osd_dt_dev(dt);
+
+ LASSERT(osd);
+ if (unlikely(!osd->od_mnt))
+ return -EINPROGRESS;
+
+ return scnprintf(buf, PAGE_SIZE, "%d\n", osd->od_fallocate_zero_blocks);
+}
+
+/*
+ * Set how fallocate() interacts with the backing filesystem:
+ * -1: fallocate is disabled and returns -EOPNOTSUPP
+ * 0: fallocate allocates unwritten extents (like ext4)
+ * 1: fallocate zeroes allocated extents on disk
+ */
+static ssize_t fallocate_zero_blocks_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct dt_device *dt = container_of(kobj, struct dt_device,
+ dd_kobj);
+ struct osd_device *osd = osd_dt_dev(dt);
+ long val;
+ int rc;
+
+ LASSERT(osd);
+ if (unlikely(!osd->od_mnt))
+ return -EINPROGRESS;
+
+ rc = kstrtol(buffer, 0, &val);
+ if (rc)
+ return rc;
+
+ if (val < -1 || val > 1)
+ return -EINVAL;
+
+ osd->od_fallocate_zero_blocks = val;
+ return count;
+}
+LUSTRE_RW_ATTR(fallocate_zero_blocks);
+
ssize_t force_sync_store(struct kobject *kobj, struct attribute *attr,
const char *buffer, size_t count)
{
&lustre_attr_writethrough_cache_enable.attr,
&lustre_attr_fstype.attr,
&lustre_attr_mntdev.attr,
+ &lustre_attr_fallocate_zero_blocks.attr,
&lustre_attr_force_sync.attr,
&lustre_attr_nonrotational.attr,
&lustre_attr_index_backup.attr,
local limit=10 # 10M
local testfile="$DIR/$tdir/$tfile-0"
- [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
- [ $OST1_VERSION -lt $(version_code 2.13.50) ] &&
- skip "Need OST version at least 2.13.53"
+ check_for_fallocate
setup_quota_test || error "setup quota failed with $?"
trap cleanup_quota_test EXIT
run_test 150a "truncate/append tests"
test_150b() {
- [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
- [ $OST1_VERSION -lt $(version_code 2.13.50) ] &&
- skip "Need OST version at least 2.13.53"
+ check_for_fallocate
+
touch $DIR/$tfile
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
check_fallocate $DIR/$tfile || error "fallocate failed"
run_test 150b "Verify fallocate (prealloc) functionality"
test_150bb() {
- [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
- [ $OST1_VERSION -lt $(version_code 2.13.50) ] &&
- skip "Need OST version at least 2.13.53"
+ check_for_fallocate
+
touch $DIR/$tfile
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
dd if=/dev/urandom of=$DIR/$tfile bs=1M count=20 || error "dd failed"
> $DIR/$tfile
fallocate -l $((1048576 * 20)) $DIR/$tfile || error "fallocate failed"
- local sum=($(md5sum $DIR/$tfile))
+ # precomputed md5sum for 20MB of zeroes
local expect="8f4e33f3dc3e414ff94e5fb6905cba8c"
+ local sum=($(md5sum $DIR/$tfile))
+
+ [[ "${sum[0]}" == "$expect" ]] || error "fallocate unwritten is not zero"
+
+ do_nodes $(comma_list $(osts_nodes)) \
+ "$LCTL set_param osd-ldiskfs.*.fallocate_zero_blocks=1" ||
+ error "set osd-ldiskfs.*.fallocate_zero_blocks=1"
- [[ "${sum[0]}" == "$expect" ]] || error "fallocated file is not zero"
+ > $DIR/$tfile
+ fallocate -l $((1048576 * 20)) $DIR/$tfile || error "fallocate failed"
+ sum=($(md5sum $DIR/$tfile))
+
+ [[ "${sum[0]}" == "$expect" ]] || error "fallocate zero is not zero"
}
-run_test 150bb "Verify fallocate zeroes space"
+run_test 150bb "Verify fallocate modes both zero space"
test_150c() {
- local bytes
- local want
-
- [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
- [ $OST1_VERSION -lt $(version_code 2.13.50) ] &&
- skip "Need OST version at least 2.13.53"
+ check_for_fallocate
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
$LFS setstripe -c $OSTCOUNT -S1M $DIR/$tfile || error "setstripe failed"
sync; sync_all_data
cancel_lru_locks $OSC
sleep 5
- bytes=$(($(stat -c '%b * %B' $DIR/$tfile)))
- want=$((1024 * 1048576))
+ local bytes=$(($(stat -c '%b * %B' $DIR/$tfile)))
+ local want=$((1024 * 1048576))
# Must allocate all requested space, not more than 5% extra
(( $bytes >= $want && $bytes < $want * 105 / 100 )) ||
run_test 150c "Verify fallocate Size and Blocks"
test_150d() {
- local bytes
- local want
-
- [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
- [ $OST1_VERSION -lt $(version_code 2.13.50) ] &&
- skip "Need OST version at least 2.13.53"
+ check_for_fallocate
stack_trap "rm -f $DIR/$tfile; wait_delete_completed"
$LFS setstripe -c $OSTCOUNT -S1M $DIR/$tdir || error "setstripe failed"
sync; sync_all_data
cancel_lru_locks $OSC
sleep 5
- bytes=$(($(stat -c '%b * %B' $DIR/$tdir)))
- want=$((OSTCOUNT * 1048576))
+ local bytes=$(($(stat -c '%b * %B' $DIR/$tdir)))
+ local want=$((OSTCOUNT * 1048576))
# Must allocate all requested space, not more than 5% extra
(( $bytes >= $want && $bytes < $want * 105 / 100 )) ||
run_test 150d "Verify fallocate Size and Blocks - Non zero start"
test_150e() {
- [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
- [ $OST1_VERSION -ge $(version_code 2.13.55) ] ||
- skip "Need OST version at least 2.13.55"
+ check_for_fallocate
echo "df before:"
$LFS df
do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\""
return $rc
}
+
+function check_for_fallocate()
+{
+ [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend"
+ local osts=$(comma_list $(osts_nodes))
+ local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks"
+ local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null|
+ head -n 1)
+
+ [ -n "$old_mode" ] || skip "need at least 2.13.57 for fallocate"
+ stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode"
+ do_nodes $osts $LCTL set_param $fa_mode=0 || error "set $fa_mode=0"
+}
+