From: Keguang Xu Date: Wed, 15 Jan 2025 05:57:30 +0000 (+0800) Subject: LU-17055 ldiskfs: add falloc(zero) for OST/ext X-Git-Tag: 2.16.54~15 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=f57455ffa2871ab31a3e8f3a78e0f5e3b9110586;p=fs%2Flustre-release.git LU-17055 ldiskfs: add falloc(zero) for OST/ext This patch implements falloc(FALLOC_FL_ZERO_RANGE) on the ldiskfs backend. Here, we just follow the implementation of "LU-10048 osd: async truncate" where we as well execute zeroing outside of main transaction handle, to avoids restarting zeroing transaction handles in main transaction. NOTE: this is the 1st part of falloc(zero) where it functions only on ldiskfs-extend. To address DoM/indirect part another patch will be added. Signed-off-by: Keguang Xu Change-Id: I76310546715f96872bf4dd441beb453ee4ac0abe Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57763 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Arshad Hussain Reviewed-by: Andreas Dilger Reviewed-by: Qian Yingjin Reviewed-by: Oleg Drokin --- diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 47e5a9a..29258e0 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -6548,7 +6548,8 @@ static long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len) * mode == 0 (which is standard prealloc) and PUNCH is supported * Rest of mode options are not supported yet. */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_ZERO_RANGE)) RETURN(-EOPNOTSUPP); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FALLOCATE, 1); diff --git a/lustre/mdc/mdc_dev.c b/lustre/mdc/mdc_dev.c index d4abd4a..5990487 100644 --- a/lustre/mdc/mdc_dev.c +++ b/lustre/mdc/mdc_dev.c @@ -1050,21 +1050,20 @@ static int mdc_io_setattr_start(const struct lu_env *env, __u64 size = io->u.ci_setattr.sa_attr.lvb_size; unsigned int ia_avalid = io->u.ci_setattr.sa_avalid; enum op_xvalid ia_xvalid = io->u.ci_setattr.sa_xvalid; - int rc; + int rc = 0; /* silently ignore non-truncate setattr for Data-on-MDT object */ if (cl_io_is_trunc(io)) { /* truncate cache dirty pages first */ rc = osc_cache_truncate_start(env, cl2osc(obj), size, &oio->oi_trunc); - if (rc < 0) - return rc; } else if (cl_io_is_fallocate(io) && - io->u.ci_setattr.sa_falloc_mode & FALLOC_FL_PUNCH_HOLE) { + (io->u.ci_setattr.sa_falloc_mode & + (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))) { rc = osc_punch_start(env, io, obj); - if (rc < 0) - return rc; } + if (rc < 0) + return rc; if (oio->oi_lockless == 0) { cl_object_attr_lock(obj); diff --git a/lustre/mdt/mdt_io.c b/lustre/mdt/mdt_io.c index 0fd543b..065935b 100644 --- a/lustre/mdt/mdt_io.c +++ b/lustre/mdt/mdt_io.c @@ -964,7 +964,8 @@ int mdt_fallocate_hdl(struct tgt_session_info *tsi) * mode == 0 (which is standard prealloc) and PUNCH is supported * Rest of mode options are not supported yet. */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_ZERO_RANGE)) RETURN(-EOPNOTSUPP); if (mode & FALLOC_FL_PUNCH_HOLE && !(mode & FALLOC_FL_KEEP_SIZE)) { diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 074f73fb..47c2d0a 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -1955,7 +1955,8 @@ static int ofd_fallocate_hdl(struct tgt_session_info *tsi) * mode == 0 (which is standard prealloc) and PUNCH is supported * Rest of mode options are not supported yet. */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_ZERO_RANGE)) RETURN(-EOPNOTSUPP); /* PUNCH_HOLE mode should always be accompanied with KEEP_SIZE flag diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index c026f35..40f76f0 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -580,9 +580,10 @@ static int osc_io_setattr_start(const struct lu_env *env, if (cl_io_is_trunc(io)) result = osc_cache_truncate_start(env, cl2osc(obj), size, &oio->oi_trunc); - /* flush local pages prior punching them on server */ + /* flush local pages prior punching/zero-range them on server */ if (io_is_falloc && - io->u.ci_setattr.sa_falloc_mode & FALLOC_FL_PUNCH_HOLE) + (io->u.ci_setattr.sa_falloc_mode & + (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE))) result = osc_punch_start(env, io, obj); if (result == 0 && oio->oi_lockless == 0) { diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index a925f17..c0b2797 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -398,7 +398,7 @@ struct osd_access_lock { int tl_mode; bool tl_shared; bool tl_truncate; - bool tl_punch; + bool tl_fallocate; }; struct osd_thandle { diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index b3de147..95dadf9 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -703,8 +703,8 @@ static int osd_bufs_put(const struct lu_env *env, struct dt_object *dt, * \param pos byte offset of IO start * \param len number of bytes of IO * \param lnb array of extents undergoing IO + * \param maxlnb max pages could be loaded * \param rw read or write operation, and other flags - * \param capa capabilities * * \retval pages (zero or more) loaded successfully * \retval -ENOMEM on memory/page allocation error @@ -2108,10 +2108,16 @@ static int osd_declare_fallocate(const struct lu_env *env, ENTRY; /* - * mode == 0 (which is standard prealloc) and PUNCH is supported + * mode == 0 (which is standard prealloc) and PUNCH/ZERO is supported * Rest of mode options is not supported yet. */ - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | + FALLOC_FL_ZERO_RANGE)) + RETURN(-EOPNOTSUPP); + + /* TODO: should fix this for DoM/Indirect in another patch */ + if ((mode & FALLOC_FL_ZERO_RANGE) && + !ldiskfs_test_inode_flag(inode, LDISKFS_INODE_EXTENTS)) RETURN(-EOPNOTSUPP); /* disable fallocate completely */ @@ -2292,9 +2298,9 @@ out: RETURN(rc); } -static int osd_fallocate_punch(const struct lu_env *env, struct dt_object *dt, - __u64 start, __u64 end, int mode, - struct thandle *th) +static int osd_fallocate_advance(const struct lu_env *env, struct dt_object *dt, + __u64 start, __u64 end, int mode, + struct thandle *th) { struct osd_object *obj = osd_dt_obj(dt); struct inode *inode = obj->oo_inode; @@ -2319,11 +2325,11 @@ static int osd_fallocate_punch(const struct lu_env *env, struct dt_object *dt, continue; LASSERT(al->tl_shared == 0); found = 1; - /* do actual punch in osd_trans_stop() */ + /* do actual punch/zero in osd_trans_stop() */ al->tl_start = start; al->tl_end = end; al->tl_mode = mode; - al->tl_punch = true; + al->tl_fallocate = true; break; } @@ -2337,9 +2343,9 @@ static int osd_fallocate(const struct lu_env *env, struct dt_object *dt, ENTRY; - if (mode & FALLOC_FL_PUNCH_HOLE) { - /* punch */ - rc = osd_fallocate_punch(env, dt, start, end, mode, th); + if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_ZERO_RANGE)) { + /* punch/zero-range */ + rc = osd_fallocate_advance(env, dt, start, end, mode, th); } else { /* standard preallocate */ rc = osd_fallocate_preallocate(env, dt, start, end, mode, th); @@ -2755,8 +2761,9 @@ void osd_execute_truncate(struct osd_object *obj) osd_partial_page_flush(d, inode, size); } -static int osd_execute_punch(const struct lu_env *env, struct osd_object *obj, - loff_t start, loff_t end, int mode) +static int osd_execute_fallocate(const struct lu_env *env, + struct osd_object *obj, loff_t start, + loff_t end, int mode) { struct osd_device *d = osd_obj2dev(obj); struct inode *inode = obj->oo_inode; @@ -2789,9 +2796,10 @@ int osd_process_truncates(const struct lu_env *env, struct list_head *list) continue; if (al->tl_truncate) osd_execute_truncate(al->tl_obj); - else if (al->tl_punch) - rc = osd_execute_punch(env, al->tl_obj, al->tl_start, - al->tl_end, al->tl_mode); + else if (al->tl_fallocate) + rc = osd_execute_fallocate(env, al->tl_obj, + al->tl_start, al->tl_end, + al->tl_mode); } return rc; diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 1b93b13..25a04292 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -18168,6 +18168,104 @@ test_150h() { } run_test 150h "Verify extend fallocate updates the file size" +test_150ia() { + (( $MDS1_VERSION >= $(version_code 2.16.50) )) || + skip "need MDS1 version >= 2.16.50 for falloc zero-range" + + if [[ "$ost1_FSTYPE" = "zfs" || "$mds1_FSTYPE" = "zfs" ]]; then + skip "zero-range mode is not implemented on OSD ZFS" + fi + + check_set_fallocate_or_skip + stack_trap "rm -f $DIR/$tfile; wait_delete_completed" + + echo "Verify fallocate(zero): range within the file" + yes 'A' | dd of=$DIR/$tfile bs=$PAGE_SIZE count=8 || + error "dd failed for bs 4096 and count 8" + + # zero range page aligned + local offset=$((2 * PAGE_SIZE)) + local length=$((4 * PAGE_SIZE)) + out=$(fallocate -z --offset $offset -l $length $DIR/$tfile 2>&1) || + skip_eopnotsupp "$out|falloc(zero): off $offset, len $length" + + # precomputed md5sum + local expect="f6b2adb9a352ee2b9d1f54a629e7998c" + cksum=($(md5sum $DIR/$tfile)) + [[ "${cksum[0]}" == "$expect" ]] || + error "unexpected MD5SUM after zero: ${cksum[0]}" + + # zero range partial page + local offset=2000 + local length=1000 + out=$(fallocate -z --offset $offset -l $length $DIR/$tfile 2>&1) || + skip_eopnotsupp "$out|falloc(zero): off $offset, len $length" + + expect="19912462c2a304a225df656b80844ba5" + cksum=($(md5sum $DIR/$tfile)) + [[ "${cksum[0]}" == "$expect" ]] || + error "unexpected MD5SUM after zero(partial): ${cksum[0]}" +} +run_test 150ia "Verify fallocate zero-range ZERO functionality" + +test_150ib() { + (( $MDS1_VERSION >= $(version_code 2.16.50) )) || + skip "need MDS1 version >= 2.16.50 for falloc zero-range" + + if [[ "$ost1_FSTYPE" = "zfs" || "$mds1_FSTYPE" = "zfs" ]]; then + skip "zero-range mode is not implemented on OSD ZFS" + fi + + check_set_fallocate_or_skip + stack_trap "rm -f $DIR/$tfile; wait_delete_completed" + + local blocks_after_punch=$((4 * PAGE_SIZE / 512)) + local blocks_after_zero_fill=$((8 * PAGE_SIZE / 512)) + local blocks_after_extend=$((16 * PAGE_SIZE / 512)) + local expect_len=$((8 * PAGE_SIZE)) + + # file size [0, 32K) + echo "Verify fallocate(zero): range within the file" + yes 'A' | dd of=$DIR/$tfile bs=$PAGE_SIZE count=8 || + error "dd failed for bs 4096 and count 8" + + # punch across [8K,24K) + local offset=$((2 * PAGE_SIZE)) + local length=$((4 * PAGE_SIZE)) + out=$(fallocate -p --offset $offset -l $length $DIR/$tfile 2>&1) || + skip_eopnotsupp "$out|falloc(zero): off $offset, len $length" + + # Verify punch worked as expected + blocks=$(stat -c '%b' $DIR/$tfile) + (( blocks == blocks_after_punch )) || + error "punch failed:$blocks!=$blocks_after_punch" + + # zero prealloc fill the hole just punched + out=$(fallocate -z --offset $offset -l $length $DIR/$tfile 2>&1) || + skip_eopnotsupp "$out|falloc(zero): off $offset, len $length" + + # Verify zero prealloc worked. + blocks=$(stat -c '%b' $DIR/$tfile) + (( blocks == blocks_after_zero_fill )) || + error "zero prealloc failed:$blocks!=$blocks_after_zero_fill" + + # zero prealloc with KEEP_SIZE on + offset=$((8 * PAGE_SIZE)) + length=$((8 * PAGE_SIZE)) + out=$(fallocate -z -n --offset $offset -l $length $DIR/$tfile 2>&1) || + skip_eopnotsupp "$out|falloc(zero): off $offset, len $length" + + # block allocate, size remains + blocks=$(stat -c '%b' $DIR/$tfile) + (( blocks == blocks_after_extend )) || + error "extend failed:$blocks!=$blocks_after_extend" + + lsz=$(stat -c '%s' $DIR/$tfile) + (( lsz == expect_len)) || + error "zero extend failed(len):$lsz!=$expect_len" +} +run_test 150ib "Verify fallocate zero-range PREALLOC functionality" + #LU-2902 roc_hit was not able to read all values from lproc function roc_hit_init() { local osts=${1:-$(osts_nodes)}