From: Andreas Dilger Date: Tue, 12 Jan 2021 04:38:31 +0000 (-0700) Subject: LU-14286 osd-ldiskfs: fallocate with unwritten extents X-Git-Tag: 2.14.0-RC1~11 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=4f18e08099e51b682f6acb1cf9fea6d7d45f5fd7;p=fs%2Flustre-release.git LU-14286 osd-ldiskfs: fallocate with unwritten extents The osd_fallocate() code should typically be allocating unwritten extents with LDISKFS_GET_BLOCKS_CREATE_UNWRIT_EXT instead of actually zeroing the blocks on disk with LDISKFS_GET_BLOCKS_CREATE_ZERO. Writing zeroes during fallocate() is typically slower initially, and is causing timeouts in sanity test_150e, which is trying to fill up all OSTs to 90%. In some cases, zeroing the underlying blocks can use the underlying storage support for efficient zeroing (WRITE_SAME), so it may be faster for later use than uninitialized extents that have to be converted to initialized extents by (possibly) splitting them into smaller extents and/or zero filling them when they are paritally being overwritten. Add a tunable parameter osd-ldiskfs.*.fallocate_zero_blocks to allow selecting this behavior at runtime. The default is -1, to disable fallocate completely (return -EOPNOTSUPP) due to current bugs. Test-Parameters: testlist=sanityn env=ONLY=16,ONLY_REPEAT=10 Fixes: 72617588ac8c ("LU-14286 osd-ldiskfs: fallocate() should zero new blocks") Signed-off-by: Andreas Dilger Change-Id: Ida3692c487fdc8918863fc5c99459caaba17d92e Reviewed-on: https://review.whamcloud.com/41204 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Arshad Hussain Reviewed-by: John L. Hammond Reviewed-by: Oleg Drokin --- diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index bd03e70..90b5493 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -7904,6 +7904,8 @@ static int osd_device_init0(const struct lu_env *env, o->od_readcache_max_iosize = OSD_READCACHE_MAX_IO_MB << 20; o->od_writethrough_max_iosize = OSD_WRITECACHE_MAX_IO_MB << 20; o->od_auto_scrub_interval = AS_DEFAULT; + /* disable fallocate until issues are fixed: LU-14326/LU-14333 */ + o->od_fallocate_zero_blocks = -1; cplen = strlcpy(o->od_svname, lustre_cfg_string(cfg, 4), sizeof(o->od_svname)); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 7d2f970..fba02d0 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -273,8 +273,11 @@ struct osd_device { od_in_init:1, od_index_in_idif:1, /* Other flags */ + od_read_cache:1, + od_writethrough_cache:1, od_nonrotational:1; + __s64 od_auto_scrub_interval; __u32 od_dirent_journal; int od_index; @@ -283,6 +286,7 @@ struct osd_device { spinlock_t od_osfs_lock; + int od_fallocate_zero_blocks; int od_connects; struct lu_site od_site; @@ -301,9 +305,6 @@ struct osd_device { * served bypassing pagecache unless already cached */ unsigned long od_writethrough_max_iosize; - int od_read_cache; - int od_writethrough_cache; - struct brw_stats od_brw_stats; atomic_t od_r_in_flight; atomic_t od_w_in_flight; diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index 66db3f5..d2a9c88 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -2170,6 +2170,10 @@ static int osd_declare_fallocate(const struct lu_env *env, if (mode & ~FALLOC_FL_KEEP_SIZE) RETURN(-EOPNOTSUPP); + /* disable fallocate completely */ + if (osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks < 0) + RETURN(-EOPNOTSUPP); + LASSERT(th); LASSERT(inode); @@ -2226,8 +2230,10 @@ static int osd_fallocate(const struct lu_env *env, struct dt_object *dt, boff = start >> inode->i_blkbits; blen = (ALIGN(end, 1 << inode->i_blkbits) >> inode->i_blkbits) - boff; - /* Create and Write zeros to new extents */ - flags = LDISKFS_GET_BLOCKS_CREATE_ZERO; + /* Create and mark new extents as either zero or unwritten */ + flags = osd_dev(dt->do_lu.lo_dev)->od_fallocate_zero_blocks ? + LDISKFS_GET_BLOCKS_CREATE_ZERO : + LDISKFS_GET_BLOCKS_CREATE_UNWRIT_EXT; if (mode & FALLOC_FL_KEEP_SIZE) flags |= LDISKFS_GET_BLOCKS_KEEP_SIZE; diff --git a/lustre/osd-ldiskfs/osd_lproc.c b/lustre/osd-ldiskfs/osd_lproc.c index b2d62cb..97efb36 100644 --- a/lustre/osd-ldiskfs/osd_lproc.c +++ b/lustre/osd-ldiskfs/osd_lproc.c @@ -285,7 +285,7 @@ static ssize_t read_cache_enable_store(struct kobject *kobj, if (rc) return rc; - osd->od_read_cache = val; + osd->od_read_cache = !!val; return count; } LUSTRE_RW_ATTR(read_cache_enable); @@ -324,11 +324,58 @@ static ssize_t writethrough_cache_enable_store(struct kobject *kobj, if (rc) return rc; - osd->od_writethrough_cache = val; + osd->od_writethrough_cache = !!val; return count; } LUSTRE_RW_ATTR(writethrough_cache_enable); +static ssize_t fallocate_zero_blocks_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct dt_device *dt = container_of(kobj, struct dt_device, + dd_kobj); + struct osd_device *osd = osd_dt_dev(dt); + + LASSERT(osd); + if (unlikely(!osd->od_mnt)) + return -EINPROGRESS; + + return scnprintf(buf, PAGE_SIZE, "%d\n", osd->od_fallocate_zero_blocks); +} + +/* + * Set how fallocate() interacts with the backing filesystem: + * -1: fallocate is disabled and returns -EOPNOTSUPP + * 0: fallocate allocates unwritten extents (like ext4) + * 1: fallocate zeroes allocated extents on disk + */ +static ssize_t fallocate_zero_blocks_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct dt_device *dt = container_of(kobj, struct dt_device, + dd_kobj); + struct osd_device *osd = osd_dt_dev(dt); + long val; + int rc; + + LASSERT(osd); + if (unlikely(!osd->od_mnt)) + return -EINPROGRESS; + + rc = kstrtol(buffer, 0, &val); + if (rc) + return rc; + + if (val < -1 || val > 1) + return -EINVAL; + + osd->od_fallocate_zero_blocks = val; + return count; +} +LUSTRE_RW_ATTR(fallocate_zero_blocks); + ssize_t force_sync_store(struct kobject *kobj, struct attribute *attr, const char *buffer, size_t count) { @@ -809,6 +856,7 @@ static struct attribute *ldiskfs_attrs[] = { &lustre_attr_writethrough_cache_enable.attr, &lustre_attr_fstype.attr, &lustre_attr_mntdev.attr, + &lustre_attr_fallocate_zero_blocks.attr, &lustre_attr_force_sync.attr, &lustre_attr_nonrotational.attr, &lustre_attr_index_backup.attr, diff --git a/lustre/tests/sanity-quota.sh b/lustre/tests/sanity-quota.sh index 2347d59..7785428 100755 --- a/lustre/tests/sanity-quota.sh +++ b/lustre/tests/sanity-quota.sh @@ -1088,9 +1088,7 @@ test_1h() { local limit=10 # 10M local testfile="$DIR/$tdir/$tfile-0" - [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" - [ $OST1_VERSION -lt $(version_code 2.13.50) ] && - skip "Need OST version at least 2.13.53" + check_for_fallocate setup_quota_test || error "setup quota failed with $?" trap cleanup_quota_test EXIT diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index fcde344..54ffef7 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13433,9 +13433,8 @@ test_150a() { run_test 150a "truncate/append tests" test_150b() { - [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" - [ $OST1_VERSION -lt $(version_code 2.13.50) ] && - skip "Need OST version at least 2.13.53" + check_for_fallocate + touch $DIR/$tfile stack_trap "rm -f $DIR/$tfile; wait_delete_completed" check_fallocate $DIR/$tfile || error "fallocate failed" @@ -13443,28 +13442,33 @@ test_150b() { run_test 150b "Verify fallocate (prealloc) functionality" test_150bb() { - [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" - [ $OST1_VERSION -lt $(version_code 2.13.50) ] && - skip "Need OST version at least 2.13.53" + check_for_fallocate + touch $DIR/$tfile stack_trap "rm -f $DIR/$tfile; wait_delete_completed" dd if=/dev/urandom of=$DIR/$tfile bs=1M count=20 || error "dd failed" > $DIR/$tfile fallocate -l $((1048576 * 20)) $DIR/$tfile || error "fallocate failed" - local sum=($(md5sum $DIR/$tfile)) + # precomputed md5sum for 20MB of zeroes local expect="8f4e33f3dc3e414ff94e5fb6905cba8c" + local sum=($(md5sum $DIR/$tfile)) + + [[ "${sum[0]}" == "$expect" ]] || error "fallocate unwritten is not zero" + + do_nodes $(comma_list $(osts_nodes)) \ + "$LCTL set_param osd-ldiskfs.*.fallocate_zero_blocks=1" || + error "set osd-ldiskfs.*.fallocate_zero_blocks=1" - [[ "${sum[0]}" == "$expect" ]] || error "fallocated file is not zero" + > $DIR/$tfile + fallocate -l $((1048576 * 20)) $DIR/$tfile || error "fallocate failed" + sum=($(md5sum $DIR/$tfile)) + + [[ "${sum[0]}" == "$expect" ]] || error "fallocate zero is not zero" } -run_test 150bb "Verify fallocate zeroes space" +run_test 150bb "Verify fallocate modes both zero space" test_150c() { - local bytes - local want - - [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" - [ $OST1_VERSION -lt $(version_code 2.13.50) ] && - skip "Need OST version at least 2.13.53" + check_for_fallocate stack_trap "rm -f $DIR/$tfile; wait_delete_completed" $LFS setstripe -c $OSTCOUNT -S1M $DIR/$tfile || error "setstripe failed" @@ -13488,8 +13492,8 @@ test_150c() { sync; sync_all_data cancel_lru_locks $OSC sleep 5 - bytes=$(($(stat -c '%b * %B' $DIR/$tfile))) - want=$((1024 * 1048576)) + local bytes=$(($(stat -c '%b * %B' $DIR/$tfile))) + local want=$((1024 * 1048576)) # Must allocate all requested space, not more than 5% extra (( $bytes >= $want && $bytes < $want * 105 / 100 )) || @@ -13498,12 +13502,7 @@ test_150c() { run_test 150c "Verify fallocate Size and Blocks" test_150d() { - local bytes - local want - - [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" - [ $OST1_VERSION -lt $(version_code 2.13.50) ] && - skip "Need OST version at least 2.13.53" + check_for_fallocate stack_trap "rm -f $DIR/$tfile; wait_delete_completed" $LFS setstripe -c $OSTCOUNT -S1M $DIR/$tdir || error "setstripe failed" @@ -13511,8 +13510,8 @@ test_150d() { sync; sync_all_data cancel_lru_locks $OSC sleep 5 - bytes=$(($(stat -c '%b * %B' $DIR/$tdir))) - want=$((OSTCOUNT * 1048576)) + local bytes=$(($(stat -c '%b * %B' $DIR/$tdir))) + local want=$((OSTCOUNT * 1048576)) # Must allocate all requested space, not more than 5% extra (( $bytes >= $want && $bytes < $want * 105 / 100 )) || @@ -13521,9 +13520,7 @@ test_150d() { run_test 150d "Verify fallocate Size and Blocks - Non zero start" test_150e() { - [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" - [ $OST1_VERSION -ge $(version_code 2.13.55) ] || - skip "Need OST version at least 2.13.55" + check_for_fallocate echo "df before:" $LFS df diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh index 9fbce2f..6ba716c 100755 --- a/lustre/tests/test-framework.sh +++ b/lustre/tests/test-framework.sh @@ -10559,3 +10559,17 @@ function unlinkmany() { do_nodes $list "$LCTL set_param debug=\\\"$saved_debug\\\"" return $rc } + +function check_for_fallocate() +{ + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + local osts=$(comma_list $(osts_nodes)) + local fa_mode="osd-ldiskfs.*.fallocate_zero_blocks" + local old_mode=$(do_facet ost1 $LCTL get_param -n $fa_mode 2>/dev/null| + head -n 1) + + [ -n "$old_mode" ] || skip "need at least 2.13.57 for fallocate" + stack_trap "do_nodes $osts $LCTL set_param $fa_mode=$old_mode" + do_nodes $osts $LCTL set_param $fa_mode=0 || error "set $fa_mode=0" +} +