From cb037f305c64cd5121fa308afc1e6b7d3df3f61a Mon Sep 17 00:00:00 2001 From: Arshad Hussain Date: Mon, 14 Dec 2020 10:30:30 -0500 Subject: [PATCH] LU-14160 fallocate: Add punch mode to fallocate This patch adds fallocate(2) punch operation (FALLOCATE_FL_PUNCH_HOLE) mode support for ldiskfs backend OSD and for OSC/OST Test cases sanity/150{f,g} are added for verification. FSX test was modified: - add 'punch' operation to an output - fix 'No space' problem when fallocate length become negative - fix wrong bytes number in output Test-Parameters: testlist=sanity ostsizegb=12 env=ONLY="150f 150g" Signed-off-by: Arshad Hussain Change-Id: I0c180d413efdf995823e25d5c340013bec0c8611 Signed-off-by: Mikhail Pershin Reviewed-on: https://review.whamcloud.com/40877 Reviewed-by: Andreas Dilger Reviewed-by: Bobi Jam Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- lustre/include/cl_object.h | 1 - lustre/llite/file.c | 42 +++++----- lustre/mdc/mdc_dev.c | 3 + lustre/ofd/ofd_dev.c | 20 ++++- lustre/ofd/ofd_objects.c | 4 + lustre/osc/osc_io.c | 70 +++++++++------- lustre/osc/osc_request.c | 7 -- lustre/osd-ldiskfs/osd_handler.c | 2 +- lustre/osd-ldiskfs/osd_internal.h | 6 +- lustre/osd-ldiskfs/osd_io.c | 132 +++++++++++++++++++++++++------ lustre/tests/fsx.c | 17 ++-- lustre/tests/sanity-dom.sh | 2 +- lustre/tests/sanity.sh | 162 ++++++++++++++++++++++++++++++++++++++ 13 files changed, 372 insertions(+), 96 deletions(-) diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 9a5b506..cb38515 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -1869,7 +1869,6 @@ struct cl_io { /* The following are used for fallocate(2) */ int sa_falloc_mode; loff_t sa_falloc_offset; - loff_t sa_falloc_len; loff_t sa_falloc_end; } ci_setattr; struct cl_data_version_io { diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 3871c89..fc093b6 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -5121,7 +5121,7 @@ int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len) struct lu_env *env; struct cl_io *io; __u16 refcheck; - int rc; loff_t sa_falloc_end; + int rc; loff_t size = i_size_read(inode); ENTRY; @@ -5136,34 +5136,32 @@ int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len) io->u.ci_setattr.sa_parent_fid = lu_object_fid(&io->ci_obj->co_lu); io->u.ci_setattr.sa_falloc_mode = mode; io->u.ci_setattr.sa_falloc_offset = offset; - io->u.ci_setattr.sa_falloc_len = len; - io->u.ci_setattr.sa_falloc_end = io->u.ci_setattr.sa_falloc_offset + - io->u.ci_setattr.sa_falloc_len; + io->u.ci_setattr.sa_falloc_end = offset + len; io->u.ci_setattr.sa_subtype = CL_SETATTR_FALLOCATE; - sa_falloc_end = io->u.ci_setattr.sa_falloc_end; - if (sa_falloc_end > size) { + if (io->u.ci_setattr.sa_falloc_end > size) { + loff_t newsize = io->u.ci_setattr.sa_falloc_end; + /* Check new size against VFS/VM file size limit and rlimit */ - rc = inode_newsize_ok(inode, sa_falloc_end); + rc = inode_newsize_ok(inode, newsize); if (rc) goto out; - if (sa_falloc_end > ll_file_maxbytes(inode)) { + if (newsize > ll_file_maxbytes(inode)) { CDEBUG(D_INODE, "file size too large %llu > %llu\n", - (unsigned long long)(sa_falloc_end), + (unsigned long long)newsize, ll_file_maxbytes(inode)); rc = -EFBIG; goto out; } } -again: - if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) - rc = cl_io_loop(env, io); - else - rc = io->ci_result; - - cl_io_fini(env, io); - if (unlikely(io->ci_need_restart)) - goto again; + do { + rc = cl_io_init(env, io, CIT_SETATTR, io->ci_obj); + if (!rc) + rc = cl_io_loop(env, io); + else + rc = io->ci_result; + cl_io_fini(env, io); + } while (unlikely(io->ci_need_restart)); out: cl_env_put(env, &refcheck); @@ -5175,6 +5173,8 @@ long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len) struct inode *inode = filp->f_path.dentry->d_inode; int rc; + if (offset < 0 || len <= 0) + RETURN(-EINVAL); /* * Encrypted inodes can't handle collapse range or zero range or insert * range since we would need to re-encrypt blocks with a different IV or @@ -5187,10 +5187,10 @@ long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len) RETURN(-EOPNOTSUPP); /* - * Only mode == 0 (which is standard prealloc) is supported now. - * Punch is not supported yet. + * mode == 0 (which is standard prealloc) and PUNCH is supported + * Rest of mode options are not supported yet. */ - if (mode & ~FALLOC_FL_KEEP_SIZE) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) RETURN(-EOPNOTSUPP); ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FALLOCATE, 1); diff --git a/lustre/mdc/mdc_dev.c b/lustre/mdc/mdc_dev.c index db9e83b..02e562a 100644 --- a/lustre/mdc/mdc_dev.c +++ b/lustre/mdc/mdc_dev.c @@ -1067,6 +1067,9 @@ static int mdc_io_setattr_start(const struct lu_env *env, return rc; } + if (cl_io_is_fallocate(io)) + return -EOPNOTSUPP; + if (oio->oi_lockless == 0) { cl_object_attr_lock(obj); rc = cl_object_attr_get(env, obj, attr); diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 644148cc..c4ebd34 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -1969,16 +1969,30 @@ static int ofd_fallocate_hdl(struct tgt_session_info *tsi) * fallocate start and end are passed in o_size, o_blocks * on the wire. */ + if ((oa->o_valid & (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) != + (OBD_MD_FLSIZE | OBD_MD_FLBLOCKS)) + RETURN(err_serious(-EPROTO)); + start = oa->o_size; end = oa->o_blocks; mode = oa->o_falloc_mode; /* - * Only mode == 0 (which is standard prealloc) is supported now. - * Punch is not supported yet. + * mode == 0 (which is standard prealloc) and PUNCH is supported + * Rest of mode options are not supported yet. */ - if (mode & ~FALLOC_FL_KEEP_SIZE) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) RETURN(-EOPNOTSUPP); + /* PUNCH_HOLE mode should always be accompanied with KEEP_SIZE flag + * Check that and add the missing flag for such invalid call with + * warning. + */ + if (mode & FALLOC_FL_PUNCH_HOLE && !(mode & FALLOC_FL_KEEP_SIZE)) { + CWARN("%s: PUNCH mode misses KEEP_SIZE flag, setting it\n", + tsi->tsi_tgt->lut_obd->obd_name); + mode |= FALLOC_FL_KEEP_SIZE; + } + repbody->oa.o_oi = oa->o_oi; repbody->oa.o_valid = OBD_MD_FLID; diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index 8c1b21a..37061e7 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -806,6 +806,10 @@ int ofd_object_fallocate(const struct lu_env *env, struct ofd_object *fo, if (!ofd_object_exists(fo)) GOTO(unlock, rc = -ENOENT); + if (la->la_valid & (LA_ATIME | LA_MTIME | LA_CTIME)) + tgt_fmd_update(info->fti_exp, &fo->ofo_header.loh_fid, + info->fti_xid); + rc = dt_falloc(env, dob, start, end, mode, th); if (rc) GOTO(unlock, rc); diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index b2ec8e7..ae8cc68 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -535,6 +535,30 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, trunc_check_cb, (void *)&size); } +/** + * Flush affected pages prior punch. + * We shouldn't discard them locally first because that could be data loss + * if server doesn't support fallocate punch, we also need these data to be + * flushed first to prevent re-ordering with the punch + */ +static int osc_punch_start(const struct lu_env *env, struct cl_io *io, + struct cl_object *obj) +{ + struct osc_object *osc = cl2osc(obj); + pgoff_t pg_start = cl_index(obj, io->u.ci_setattr.sa_falloc_offset); + pgoff_t pg_end = cl_index(obj, io->u.ci_setattr.sa_falloc_end - 1); + int rc; + + ENTRY; + rc = osc_cache_writeback_range(env, osc, pg_start, pg_end, 1, 0); + if (rc < 0) + RETURN(rc); + + osc_page_gang_lookup(env, io, osc, pg_start, pg_end, osc_discard_cb, + osc); + RETURN(0); +} + static int osc_io_setattr_start(const struct lu_env *env, const struct cl_io_slice *slice) { @@ -549,19 +573,17 @@ static int osc_io_setattr_start(const struct lu_env *env, enum op_xvalid ia_xvalid = io->u.ci_setattr.sa_xvalid; int result = 0; __u64 size = io->u.ci_setattr.sa_attr.lvb_size; - __u64 end = OBD_OBJECT_EOF; - bool io_is_falloc = false; + bool io_is_falloc = cl_io_is_fallocate(io); ENTRY; /* truncate cache dirty pages first */ - if (cl_io_is_trunc(io)) { + if (cl_io_is_trunc(io)) result = osc_cache_truncate_start(env, cl2osc(obj), size, &oio->oi_trunc); - } else if (cl_io_is_fallocate(io)) { - io_is_falloc = true; - size = io->u.ci_setattr.sa_falloc_offset; - end = io->u.ci_setattr.sa_falloc_end; - } + /* flush local pages prior punching them on server */ + if (io_is_falloc && + io->u.ci_setattr.sa_falloc_mode & FALLOC_FL_PUNCH_HOLE) + result = osc_punch_start(env, io, obj); if (result == 0 && oio->oi_lockless == 0) { cl_object_attr_lock(obj); @@ -571,14 +593,8 @@ static int osc_io_setattr_start(const struct lu_env *env, unsigned int cl_valid = 0; if (ia_avalid & ATTR_SIZE) { - if (io_is_falloc) { - attr->cat_size = - io->u.ci_setattr.sa_attr.lvb_size; - attr->cat_kms = attr->cat_size; - } else { - attr->cat_size = size; - attr->cat_kms = size; - } + attr->cat_size = size; + attr->cat_kms = size; cl_valid = (CAT_SIZE | CAT_KMS); } if (ia_avalid & ATTR_MTIME_SET) { @@ -618,17 +634,8 @@ static int osc_io_setattr_start(const struct lu_env *env, oa->o_valid |= OBD_MD_FLMTIME; oa->o_mtime = attr->cat_mtime; } - if (ia_avalid & ATTR_SIZE) { - if (io_is_falloc) { - oa->o_size = size; - oa->o_blocks = end; - oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - } else { - oa->o_size = size; - oa->o_blocks = OBD_OBJECT_EOF; - oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; - } + if (ia_avalid & ATTR_SIZE || io_is_falloc) { if (oio->oi_lockless) { oa->o_flags = OBD_FL_SRVLOCK; oa->o_valid |= OBD_MD_FLFLAGS; @@ -653,10 +660,16 @@ static int osc_io_setattr_start(const struct lu_env *env, if (io_is_falloc) { int falloc_mode = io->u.ci_setattr.sa_falloc_mode; + oa->o_size = io->u.ci_setattr.sa_falloc_offset; + oa->o_blocks = io->u.ci_setattr.sa_falloc_end; + oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; result = osc_fallocate_base(osc_export(cl2osc(obj)), oa, osc_async_upcall, cbargs, falloc_mode); } else if (ia_avalid & ATTR_SIZE) { + oa->o_size = size; + oa->o_blocks = OBD_OBJECT_EOF; + oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; result = osc_punch_send(osc_export(cl2osc(obj)), oa, osc_async_upcall, cbargs); } else { @@ -690,12 +703,11 @@ void osc_io_setattr_end(const struct lu_env *env, if (result == 0) { if (oio->oi_lockless) { /* lockless truncate */ - struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev); + struct osc_device *osc = lu2osc_dev(obj->co_lu.lo_dev); - LASSERT(cl_io_is_trunc(io)); LASSERT(cl_io_is_trunc(io) || cl_io_is_fallocate(io)); /* XXX: Need a lock. */ - osd->od_stats.os_lockless_truncates++; + osc->od_stats.os_lockless_truncates++; } } diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 97062a2..f5ea70a 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -449,14 +449,7 @@ int osc_fallocate_base(struct obd_export *exp, struct obdo *oa, int rc; ENTRY; - /* - * Only mode == 0 (which is standard prealloc) is supported now. - * Punch is not supported yet. - */ - if (mode & ~FALLOC_FL_KEEP_SIZE) - RETURN(-EOPNOTSUPP); oa->o_falloc_mode = mode; - req = ptlrpc_request_alloc(class_exp2cliimp(exp), &RQF_OST_FALLOCATE); if (req == NULL) diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index 04984f5..4725edd 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -2087,7 +2087,7 @@ static int osd_trans_stop(const struct lu_env *env, struct dt_device *dt, if (!rc) rc = rc2; - osd_process_truncates(&truncates); + osd_process_truncates(env, &truncates); } else { osd_trans_stop_cb(oh, th->th_result); OBD_FREE_PTR(oh); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index ad53ec6..c1c38fb 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -405,8 +405,12 @@ enum osd_op_type { struct osd_access_lock { struct list_head tl_list; struct osd_object *tl_obj; + loff_t tl_start; + loff_t tl_end; + int tl_mode; bool tl_shared; bool tl_truncate; + bool tl_punch; }; struct osd_thandle { @@ -1540,7 +1544,7 @@ osd_index_backup(const struct lu_env *env, struct osd_device *osd, bool backup) int osd_trunc_lock(struct osd_object *obj, struct osd_thandle *oh, bool shared); void osd_trunc_unlock_all(const struct lu_env *env, struct list_head *list); -void osd_process_truncates(struct list_head *list); +void osd_process_truncates(const struct lu_env *env, struct list_head *list); void osd_execute_truncate(struct osd_object *obj); #ifdef HAVE_BIO_ENDIO_USES_ONE_ARG diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index d175fde..fcc7a5a 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -2170,10 +2170,10 @@ static int osd_declare_fallocate(const struct lu_env *env, ENTRY; /* - * Only mode == 0 (which is standard prealloc) is supported now. + * mode == 0 (which is standard prealloc) and PUNCH is supported * Rest of mode options is not supported yet. */ - if (mode & ~FALLOC_FL_KEEP_SIZE) + if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE)) RETURN(-EOPNOTSUPP); /* disable fallocate completely */ @@ -2183,6 +2183,16 @@ static int osd_declare_fallocate(const struct lu_env *env, LASSERT(th); LASSERT(inode); + if (mode & FALLOC_FL_PUNCH_HOLE) { + rc = osd_declare_inode_qid(env, i_uid_read(inode), + i_gid_read(inode), + i_projid_read(inode), 0, oh, + osd_dt_obj(dt), NULL, OSD_QID_BLK); + if (rc == 0) + rc = osd_trunc_lock(osd_dt_obj(dt), oh, false); + RETURN(rc); + } + /* quota space for metadata blocks * approximate metadata estimate should be good enough. */ @@ -2203,8 +2213,10 @@ static int osd_declare_fallocate(const struct lu_env *env, RETURN(rc); } -static int osd_fallocate(const struct lu_env *env, struct dt_object *dt, - __u64 start, __u64 end, int mode, struct thandle *th) +static int osd_fallocate_preallocate(const struct lu_env *env, + struct dt_object *dt, + __u64 start, __u64 end, int mode, + struct thandle *th) { struct osd_thandle *oh = container_of(th, struct osd_thandle, ot_super); handle_t *handle = ldiskfs_journal_current_handle(); @@ -2329,6 +2341,61 @@ out: RETURN(rc); } +static int osd_fallocate_punch(const struct lu_env *env, struct dt_object *dt, + __u64 start, __u64 end, int mode, + struct thandle *th) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + struct osd_access_lock *al; + struct osd_thandle *oh; + int rc = 0, found = 0; + + ENTRY; + + LASSERT(dt_object_exists(dt)); + LASSERT(osd_invariant(obj)); + LASSERT(inode != NULL); + + dquot_initialize(inode); + + LASSERT(th); + oh = container_of(th, struct osd_thandle, ot_super); + LASSERT(oh->ot_handle->h_transaction != NULL); + + list_for_each_entry(al, &oh->ot_trunc_locks, tl_list) { + if (obj != al->tl_obj) + continue; + LASSERT(al->tl_shared == 0); + found = 1; + /* do actual punch in osd_trans_stop() */ + al->tl_start = start; + al->tl_end = end; + al->tl_mode = mode; + al->tl_punch = true; + break; + } + + RETURN(rc); +} + +static int osd_fallocate(const struct lu_env *env, struct dt_object *dt, + __u64 start, __u64 end, int mode, struct thandle *th) +{ + int rc; + + ENTRY; + + if (mode & FALLOC_FL_PUNCH_HOLE) { + /* punch */ + rc = osd_fallocate_punch(env, dt, start, end, mode, th); + } else { + /* standard preallocate */ + rc = osd_fallocate_preallocate(env, dt, start, end, mode, th); + } + RETURN(rc); +} + static int osd_declare_punch(const struct lu_env *env, struct dt_object *dt, __u64 start, __u64 end, struct thandle *th) { @@ -2651,6 +2718,27 @@ void osd_trunc_unlock_all(const struct lu_env *env, struct list_head *list) } } +/* + * For a partial-page truncate, flush the page to disk immediately to + * avoid data corruption during direct disk write. b=17397 + */ +static void osd_partial_page_flush(struct osd_device *d, struct inode *inode, + loff_t offset) +{ + if (!(offset & ~PAGE_MASK)) + return; + + if (osd_use_page_cache(d)) { + filemap_fdatawrite_range(inode->i_mapping, offset, offset + 1); + } else { + /* Notice we use "wait" version to ensure I/O is complete */ + filemap_write_and_wait_range(inode->i_mapping, offset, + offset + 1); + invalidate_mapping_pages(inode->i_mapping, offset >> PAGE_SHIFT, + offset >> PAGE_SHIFT); + } +} + void osd_execute_truncate(struct osd_object *obj) { struct osd_device *d = osd_obj2dev(obj); @@ -2686,24 +2774,22 @@ void osd_execute_truncate(struct osd_object *obj) spin_unlock(&inode->i_lock); osd_dirty_inode(inode, I_DIRTY_DATASYNC); } + osd_partial_page_flush(d, inode, size); +} - /* - * For a partial-page truncate, flush the page to disk immediately to - * avoid data corruption during direct disk write. b=17397 - */ - if ((size & ~PAGE_MASK) == 0) - return; - if (osd_use_page_cache(d)) { - filemap_fdatawrite_range(inode->i_mapping, size, size + 1); - } else { - /* Notice we use "wait" version to ensure I/O is complete */ - filemap_write_and_wait_range(inode->i_mapping, size, size + 1); - invalidate_mapping_pages(inode->i_mapping, size >> PAGE_SHIFT, - size >> PAGE_SHIFT); - } +void osd_execute_punch(const struct lu_env *env, struct osd_object *obj, + loff_t start, loff_t end, int mode) +{ + struct osd_device *d = osd_obj2dev(obj); + struct inode *inode = obj->oo_inode; + struct file *file = osd_quasi_file(env, inode); + + file->f_op->fallocate(file, mode, start, end - start); + osd_partial_page_flush(d, inode, start); + osd_partial_page_flush(d, inode, end - 1); } -void osd_process_truncates(struct list_head *list) +void osd_process_truncates(const struct lu_env *env, struct list_head *list) { struct osd_access_lock *al; @@ -2712,8 +2798,10 @@ void osd_process_truncates(struct list_head *list) list_for_each_entry(al, list, tl_list) { if (al->tl_shared) continue; - if (!al->tl_truncate) - continue; - osd_execute_truncate(al->tl_obj); + if (al->tl_truncate) + osd_execute_truncate(al->tl_obj); + else if (al->tl_punch) + osd_execute_punch(env, al->tl_obj, al->tl_start, + al->tl_end, al->tl_mode); } } diff --git a/lustre/tests/fsx.c b/lustre/tests/fsx.c index 66e1de5..0d1b1d4 100644 --- a/lustre/tests/fsx.c +++ b/lustre/tests/fsx.c @@ -679,6 +679,7 @@ output_line(struct test_file *tf, int op, unsigned int offset, [OP_READ + OP_DIRECT] = "read_OD", [OP_WRITE + OP_DIRECT] = "write_OD", [OP_FALLOCATE] = "fallocate", + [OP_PUNCH_HOLE] = "punch from", }; /* W. */ @@ -689,10 +690,11 @@ output_line(struct test_file *tf, int op, unsigned int offset, (monitorend == -1 || offset <= monitorend))))))) return; - prt("%06lu%s %lu.%06u %-10s %#08x %s %#08x\t(0x05%x bytes)\n", + prt("%06lu%s %lu.%06u %-10s %#08x %s %#08x\t(0x0%x bytes)\n", testcalls, fill_tf_buf(tf), tv.tv_sec, (int)tv.tv_usec, - ops[op], offset, op == OP_TRUNCATE ? " to " : "thru", - offset + size - 1, (int)size < 0 ? -(int)size : size); + ops[op], offset, op == OP_TRUNCATE || op == OP_PUNCH_HOLE ? + " to " : "thru", offset + size - 1, + (int)size < 0 ? -(int)size : size); } void output_debug(unsigned int offset, unsigned int size, const char *what) @@ -1167,7 +1169,6 @@ void do_preallocate(unsigned int offset, unsigned int length) { off_t end_offset; - off_t new_offset; int keep_size; int fd; struct stat statbufs; @@ -1183,8 +1184,7 @@ do_preallocate(unsigned int offset, unsigned int length) keep_size = fl_keep_size && (random() % 2); - end_offset = keep_size ? 0 : offset + length; - + end_offset = offset + length; if (end_offset > biggest) { biggest = end_offset; if (!quiet && testcalls > simulatedopcount) @@ -1200,12 +1200,9 @@ do_preallocate(unsigned int offset, unsigned int length) log4(OP_FALLOCATE, offset, length, (end_offset > file_size) ? (keep_size ? 0 : 1) : 2); - if (end_offset > file_size) { + if (end_offset > file_size && !keep_size) { memset(good_buf + file_size, '\0', end_offset - file_size); file_size = end_offset; - } else { - new_offset = file_size - (offset + length); - length = length + new_offset; } if (testcalls <= simulatedopcount) diff --git a/lustre/tests/sanity-dom.sh b/lustre/tests/sanity-dom.sh index 963b1f7..4fb8fa4 100644 --- a/lustre/tests/sanity-dom.sh +++ b/lustre/tests/sanity-dom.sh @@ -182,7 +182,7 @@ run_test fsx "Dual-mount fsx with DoM files" test_sanity() { SANITY_ONLY=${SANITY_ONLY:-"36 39 40 41 42d 42e 43 46 56r 101e 119a \ - 131 150 155a 155b 155c 155d 207 241 251"} + 131 150a 155a 155b 155c 155d 207 241 251"} SANITY_REPEAT=${SANITY_REPEAT:-1} # XXX: to fix 45. Add 42a, c when LU-9693 fixed. # Add 42b when LU-6493 fixed diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 817ed77..df4b8c8 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13902,6 +13902,168 @@ test_150e() { } run_test 150e "Verify 60% of available OST space consumed by fallocate" +test_150f() { + local size + local blocks + local want_size_before=20480 # in bytes + local want_blocks_before=40 # 512 sized blocks + local want_blocks_after=24 # 512 sized blocks + local length=$(((want_blocks_before - want_blocks_after) * 512)) + + [[ $OST1_VERSION -ge $(version_code 2.14.0) ]] || + skip "need at least 2.14.0 for fallocate punch" + + if [ "$ost1_FSTYPE" = "zfs" ] || [ "$mds1_FSTYPE" = "zfs" ]; then + skip "LU-14160: punch mode is not implemented on OSD ZFS" + fi + + check_set_fallocate_or_skip + stack_trap "rm -f $DIR/$tfile; wait_delete_completed" + + echo "Verify fallocate punch: Range within the file range" + yes 'A' | dd of=$DIR/$tfile bs=4096 count=5 || + error "dd failed for bs 4096 and count 5" + + # Call fallocate with punch range which is within the file range + fallocate -p --offset 4096 -l $length $DIR/$tfile || + error "fallocate failed: offset 4096 and length $length" + # client must see changes immediately after fallocate + size=$(stat -c '%s' $DIR/$tfile) + blocks=$(stat -c '%b' $DIR/$tfile) + + # Verify punch worked. + (( blocks == want_blocks_after )) || + error "punch failed: blocks $blocks != $want_blocks_after" + + (( size == want_size_before )) || + error "punch failed: size $size != $want_size_before" + + # Verify there is hole in file + local data_off=$(lseek_test -d 4096 $DIR/$tfile) + # precomputed md5sum + local expect="4a9a834a2db02452929c0a348273b4aa" + + cksum=($(md5sum $DIR/$tfile)) + [[ "${cksum[0]}" == "$expect" ]] || + error "unexpected MD5SUM after punch: ${cksum[0]}" + + # Start second sub-case for fallocate punch. + echo "Verify fallocate punch: Range overlapping and less than blocksize" + yes 'A' | dd of=$DIR/$tfile bs=4096 count=5 || + error "dd failed for bs 4096 and count 5" + + # Punch range less than block size will have no change in block count + want_blocks_after=40 # 512 sized blocks + + # Punch overlaps two blocks and less than blocksize + fallocate -p --offset 4000 -l 3000 $DIR/$tfile || + error "fallocate failed: offset 4000 length 3000" + size=$(stat -c '%s' $DIR/$tfile) + blocks=$(stat -c '%b' $DIR/$tfile) + + # Verify punch worked. + (( blocks == want_blocks_after )) || + error "punch failed: blocks $blocks != $want_blocks_after" + + (( size == want_size_before )) || + error "punch failed: size $size != $want_size_before" + + # Verify if range is really zero'ed out. We expect Zeros. + # precomputed md5sum + expect="c57ec5d769c3dbe3426edc3f7d7e11d3" + cksum=($(md5sum $DIR/$tfile)) + [[ "${cksum[0]}" == "$expect" ]] || + error "unexpected MD5SUM after punch: ${cksum[0]}" +} +run_test 150f "Verify fallocate punch functionality" + +test_150g() { + local space + local size + local blocks + local blocks_after + local size_after + local BS=4096 # Block size in bytes + + [[ $OST1_VERSION -ge $(version_code 2.14.0) ]] || + skip "need at least 2.14.0 for fallocate punch" + + if [ "$ost1_FSTYPE" = "zfs" ] || [ "$mds1_FSTYPE" = "zfs" ]; then + skip "LU-14160: punch mode is not implemented on OSD ZFS" + fi + + check_set_fallocate_or_skip + stack_trap "rm -f $DIR/$tfile; wait_delete_completed" + + $LFS setstripe -c${OSTCOUNT} $DIR/$tfile || + error "$LFS setstripe -c${OSTCOUNT} $DIR/$tfile failed" + + # Get 100MB per OST of the available space to reduce run time + # else 60% of the available space if we are running SLOW tests + if [ $SLOW == "no" ]; then + space=$((1024 * 100 * OSTCOUNT)) + else + # Find OST with Minimum Size + space=$($LFS df | awk "/$FSNAME-OST/ { print \$4 }" | + sort -un | head -1) + echo "min size OST: $space" + space=$(((space * 60)/100 * OSTCOUNT)) + fi + # space in 1k units, round to 4k blocks + local blkcount=$((space * 1024 / $BS)) + + echo "Verify fallocate punch: Very large Range" + fallocate -l${space}k $DIR/$tfile || + error "fallocate ${space}k $DIR/$tfile failed" + # write 1M at the end, start and in the middle + yes 'A' | dd of=$DIR/$tfile bs=$BS count=256 || + error "dd failed: bs $BS count 256" + yes 'A' | dd of=$DIR/$tfile bs=$BS seek=$((blkcount - 256)) count=256 || + error "dd failed: bs $BS count 256 seek $((blkcount - 256))" + yes 'A' | dd of=$DIR/$tfile bs=$BS seek=$((blkcount / 2)) count=1024 || + error "dd failed: bs $BS count 256 seek $((blkcount / 2))" + + # Gather stats. + size=$(stat -c '%s' $DIR/$tfile) + + # gather punch length. + local punch_size=$((size - (BS * 2))) + + echo "punch_size = $punch_size" + echo "size - punch_size: $((size - punch_size))" + echo "size - punch_size in blocks: $(((size - punch_size)/BS))" + + # Call fallocate to punch all except 2 blocks. We leave the + # first and the last block + echo "fallocate -p --offset $BS -l $punch_size $DIR/$tfile" + fallocate -p --offset $BS -l $punch_size $DIR/$tfile || + error "fallocate failed: offset $BS length $punch_size" + + size_after=$(stat -c '%s' $DIR/$tfile) + blocks_after=$(stat -c '%b' $DIR/$tfile) + + # Verify punch worked. + # Size should be kept + (( size == size_after )) || + error "punch failed: size $size != $size_after" + + # two 4k data blocks to remain plus possible 1 extra extent block + (( blocks_after <= ((BS / 512) * 3) )) || + error "too many blocks remains: $blocks_after" + + # Verify that file has hole between the first and the last blocks + local hole_start=$(lseek_test -l 0 $DIR/$tfile) + local hole_end=$(lseek_test -d $BS $DIR/$tfile) + + echo "Hole at [$hole_start, $hole_end)" + (( hole_start == BS )) || + error "no hole at offset $BS after punch" + + (( hole_end == BS + punch_size )) || + error "data at offset $hole_end < $((BS + punch_size))" +} +run_test 150g "Verify fallocate punch on large range" + #LU-2902 roc_hit was not able to read all values from lproc function roc_hit_init() { local list=$(comma_list $(osts_nodes)) -- 1.8.3.1