From: Swapnil Pimpale Date: Sat, 20 May 2017 18:13:37 +0000 (+0530) Subject: LU-3606 fallocate: Implement fallocate preallocate operation X-Git-Tag: 2.13.54~83 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=48457868a02ae5a0407f304c9028f7e80e7fb574 LU-3606 fallocate: Implement fallocate preallocate operation This patch adds fallocate(2) preallocate operation support for Lustre. fallocate(2) method of the inode_operations or file_operations is implemented and transported to the OSTs to interface with the underlying OSD's fallocate(2) code. In a saperate patch, a new RPC, OST_FALLOCATE has been added and reserved for space preallocation. The fallocate functionality (prealloc) in CLIO has been multiplexed with CIT_SETATTR. (https://review.whamcloud.com/37277) Lustre fsx(File system exerciser) is updated in a saperate patch to handle fallocate calls. (https://review.whamcloud.com/37277) Only fallocate preallocate operation is supported by this patch for now. Other operations like, FALLOC_FL_PUNCH (deallocate), FALLOC_FL_ZERO_RANGE, FALLOC_FL_COLLAPSE_RANGE and FALLOC_FL_INSPECT_RANGE is not supported by this patch and will be addressed by a separate patch. ZFS operation is not supported by this patch. ZFS fallocate(2) will be addressed by patch (https://review.whamcloud.com/36506/) New test case under sanity is added to verify fallocate call. Test-Parameters: fstype=ldiskfs testlist=sanity,sanityn,sanity-dom Signed-off-by: Swapnil Pimpale Signed-off-by: Li Xi Signed-off-by: Abrarahmed Momin Signed-off-by: Arshad Hussain Change-Id: I03f27d356616fbf3a3ab8e6309af26c00434d81b Reviewed-on: https://review.whamcloud.com/9275 Reviewed-by: Andreas Dilger Reviewed-by: Wang Shilong Tested-by: jenkins Tested-by: Maloo Reviewed-by: Oleg Drokin --- diff --git a/lustre/include/cl_object.h b/lustre/include/cl_object.h index 8795c82..fb93a94 100644 --- a/lustre/include/cl_object.h +++ b/lustre/include/cl_object.h @@ -1756,6 +1756,31 @@ struct cl_io_rw_common { size_t crw_count; int crw_nonblock; }; +enum cl_setattr_subtype { + /** regular setattr **/ + CL_SETATTR_REG = 1, + /** truncate(2) **/ + CL_SETATTR_TRUNC, + /** fallocate(2) - mode preallocate **/ + CL_SETATTR_FALLOCATE +}; + +struct cl_io_range { + loff_t cir_pos; + size_t cir_count; +}; + +struct cl_io_pt { + struct cl_io_pt *cip_next; + struct kiocb cip_iocb; + struct iov_iter cip_iter; + struct file *cip_file; + enum cl_io_type cip_iot; + unsigned int cip_need_restart:1; + loff_t cip_pos; + size_t cip_count; + ssize_t cip_result; +}; /** * State for io. @@ -1804,6 +1829,14 @@ struct cl_io { int sa_stripe_index; struct ost_layout sa_layout; const struct lu_fid *sa_parent_fid; + /* SETATTR interface is used for regular setattr, */ + /* truncate(2) and fallocate(2) subtypes */ + enum cl_setattr_subtype sa_subtype; + /* The following are used for fallocate(2) */ + int sa_falloc_mode; + loff_t sa_falloc_offset; + loff_t sa_falloc_len; + loff_t sa_falloc_end; } ci_setattr; struct cl_data_version_io { u64 dv_data_version; @@ -2379,7 +2412,14 @@ static inline int cl_io_is_mkwrite(const struct cl_io *io) static inline int cl_io_is_trunc(const struct cl_io *io) { return io->ci_type == CIT_SETATTR && - (io->u.ci_setattr.sa_avalid & ATTR_SIZE); + (io->u.ci_setattr.sa_avalid & ATTR_SIZE) && + (io->u.ci_setattr.sa_subtype != CL_SETATTR_FALLOCATE); +} + +static inline int cl_io_is_fallocate(const struct cl_io *io) +{ + return (io->ci_type == CIT_SETATTR) && + (io->u.ci_setattr.sa_subtype == CL_SETATTR_FALLOCATE); } struct cl_io *cl_io_top(struct cl_io *io); diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index 47e2591..79f6ac3 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -1413,6 +1413,39 @@ struct dt_body_operations { __u64 start, __u64 end, enum lu_ladvise_type advice); + + /** + * Declare intention to preallocate space for an object + * + * \param[in] env execution environment for this thread + * \param[in] dt object + * \param[in] th transaction handle + * + * \retval 0 on success + * \retval negative negated errno on error + */ + int (*dbo_declare_fallocate)(const struct lu_env *env, + struct dt_object *dt, + struct thandle *th); + /** + * Allocate specified region for an object + * + * \param[in] env execution environment for this thread + * \param[in] dt object + * \param[in] start the start of the region to allocate + * \param[in] end the end of the region to allocate + * \param[in] mode fallocate mode + * \param[in] th transaction handle + * + * \retval 0 on success + * \retval negative negated errno on error + */ + int (*dbo_fallocate)(const struct lu_env *env, + struct dt_object *dt, + __u64 start, + __u64 end, + int mode, + struct thandle *th); }; /** @@ -2518,6 +2551,29 @@ static inline int dt_ladvise(const struct lu_env *env, struct dt_object *dt, return dt->do_body_ops->dbo_ladvise(env, dt, start, end, advice); } +static inline int dt_declare_falloc(const struct lu_env *env, + struct dt_object *dt, struct thandle *th) +{ + LASSERT(dt); + if (!dt->do_body_ops) + return -EOPNOTSUPP; + LASSERT(dt->do_body_ops); + LASSERT(dt->do_body_ops->dbo_declare_fallocate); + return dt->do_body_ops->dbo_declare_fallocate(env, dt, th); +} + +static inline int dt_falloc(const struct lu_env *env, struct dt_object *dt, + __u64 start, __u64 end, int mode, + struct thandle *th) +{ + LASSERT(dt); + if (!dt->do_body_ops) + return -EOPNOTSUPP; + LASSERT(dt->do_body_ops); + LASSERT(dt->do_body_ops->dbo_fallocate); + return dt->do_body_ops->dbo_fallocate(env, dt, start, end, mode, th); +} + static inline int dt_fiemap_get(const struct lu_env *env, struct dt_object *d, struct fiemap *fm) { diff --git a/lustre/include/lustre_req_layout.h b/lustre/include/lustre_req_layout.h index d634a7d..e311754 100644 --- a/lustre/include/lustre_req_layout.h +++ b/lustre/include/lustre_req_layout.h @@ -197,6 +197,7 @@ extern struct req_format RQF_OST_GETATTR; extern struct req_format RQF_OST_SETATTR; extern struct req_format RQF_OST_CREATE; extern struct req_format RQF_OST_PUNCH; +extern struct req_format RQF_OST_FALLOCATE; extern struct req_format RQF_OST_SYNC; extern struct req_format RQF_OST_DESTROY; extern struct req_format RQF_OST_BRW_READ; diff --git a/lustre/include/obd_support.h b/lustre/include/obd_support.h index b26f4d7..74842e2 100644 --- a/lustre/include/obd_support.h +++ b/lustre/include/obd_support.h @@ -344,15 +344,16 @@ extern char obd_jobid_var[]; #define OBD_FAIL_OST_DELAY_TRANS 0x246 #define OBD_FAIL_OST_PREPARE_DELAY 0x247 #define OBD_FAIL_OST_2BIG_NIOBUF 0x248 +#define OBD_FAIL_OST_FALLOCATE_NET 0x249 #define OBD_FAIL_LDLM 0x300 #define OBD_FAIL_LDLM_NAMESPACE_NEW 0x301 -#define OBD_FAIL_LDLM_ENQUEUE_NET 0x302 -#define OBD_FAIL_LDLM_CONVERT_NET 0x303 -#define OBD_FAIL_LDLM_CANCEL_NET 0x304 -#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305 -#define OBD_FAIL_LDLM_CP_CALLBACK_NET 0x306 -#define OBD_FAIL_LDLM_GL_CALLBACK_NET 0x307 +#define OBD_FAIL_LDLM_ENQUEUE_NET 0x302 +#define OBD_FAIL_LDLM_CONVERT_NET 0x303 +#define OBD_FAIL_LDLM_CANCEL_NET 0x304 +#define OBD_FAIL_LDLM_BL_CALLBACK_NET 0x305 +#define OBD_FAIL_LDLM_CP_CALLBACK_NET 0x306 +#define OBD_FAIL_LDLM_GL_CALLBACK_NET 0x307 #define OBD_FAIL_LDLM_ENQUEUE_EXTENT_ERR 0x308 #define OBD_FAIL_LDLM_ENQUEUE_INTENT_ERR 0x309 #define OBD_FAIL_LDLM_CREATE_RESOURCE 0x30a diff --git a/lustre/llite/file.c b/lustre/llite/file.c index 4e76c47..86c5fc4 100644 --- a/lustre/llite/file.c +++ b/lustre/llite/file.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -4780,6 +4781,84 @@ int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) return ll_getattr_dentry(de, stat); } +int cl_falloc(struct inode *inode, int mode, loff_t offset, loff_t len) +{ + struct lu_env *env; + struct cl_io *io; + __u16 refcheck; + int rc; loff_t sa_falloc_end; + loff_t size = i_size_read(inode); + + ENTRY; + + env = cl_env_get(&refcheck); + if (IS_ERR(env)) + RETURN(PTR_ERR(env)); + + io = vvp_env_thread_io(env); + io->ci_obj = ll_i2info(inode)->lli_clob; + io->ci_verify_layout = 1; + io->u.ci_setattr.sa_parent_fid = lu_object_fid(&io->ci_obj->co_lu); + io->u.ci_setattr.sa_falloc_mode = mode; + io->u.ci_setattr.sa_falloc_offset = offset; + io->u.ci_setattr.sa_falloc_len = len; + io->u.ci_setattr.sa_falloc_end = io->u.ci_setattr.sa_falloc_offset + + io->u.ci_setattr.sa_falloc_len; + io->u.ci_setattr.sa_subtype = CL_SETATTR_FALLOCATE; + sa_falloc_end = io->u.ci_setattr.sa_falloc_end; + if (sa_falloc_end > size) { + /* Check new size against VFS/VM file size limit and rlimit */ + rc = inode_newsize_ok(inode, sa_falloc_end); + if (rc) + goto out; + if (sa_falloc_end > ll_file_maxbytes(inode)) { + CDEBUG(D_INODE, "file size too large %llu > %llu\n", + (unsigned long long)(sa_falloc_end), + ll_file_maxbytes(inode)); + rc = -EFBIG; + goto out; + } + io->u.ci_setattr.sa_attr.lvb_size = sa_falloc_end; + if (!(mode & FALLOC_FL_KEEP_SIZE)) + io->u.ci_setattr.sa_avalid |= ATTR_SIZE; + } else { + io->u.ci_setattr.sa_attr.lvb_size = size; + } + +again: + if (cl_io_init(env, io, CIT_SETATTR, io->ci_obj) == 0) + rc = cl_io_loop(env, io); + else + rc = io->ci_result; + + cl_io_fini(env, io); + if (unlikely(io->ci_need_restart)) + goto again; + +out: + cl_env_put(env, &refcheck); + RETURN(rc); +} + +long ll_fallocate(struct file *filp, int mode, loff_t offset, loff_t len) +{ + struct inode *inode = filp->f_path.dentry->d_inode; + int rc; + + /* + * Only mode == 0 (which is standard prealloc) is supported now. + * Punch is not supported yet. + */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + RETURN(-EOPNOTSUPP); + + ll_stats_ops_tally(ll_i2sbi(inode), LPROC_LL_FALLOCATE, 1); + + rc = cl_falloc(inode, mode, offset, len); + + RETURN(rc); +} + static int ll_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, __u64 start, __u64 len) { @@ -4902,19 +4981,22 @@ int ll_inode_permission(struct inode *inode, int mask) cfs_cap_t cap; bool squash_id = false; ktime_t kstart = ktime_get(); + ENTRY; if (mask & MAY_NOT_BLOCK) return -ECHILD; - /* as root inode are NOT getting validated in lookup operation, - * need to do it before permission check. */ + /* + * as root inode are NOT getting validated in lookup operation, + * need to do it before permission check. + */ - if (inode == inode->i_sb->s_root->d_inode) { + if (inode == inode->i_sb->s_root->d_inode) { rc = ll_inode_revalidate(inode->i_sb->s_root, IT_LOOKUP); - if (rc) - RETURN(rc); - } + if (rc) + RETURN(rc); + } CDEBUG(D_VFSTRACE, "VFS Op:inode="DFID"(%p), inode mode %x mask %o\n", PFID(ll_inode2fid(inode)), inode, inode->i_mode, mask); @@ -4983,7 +5065,8 @@ struct file_operations ll_file_operations = { .llseek = ll_file_seek, .splice_read = ll_file_splice_read, .fsync = ll_fsync, - .flush = ll_flush + .flush = ll_flush, + .fallocate = ll_fallocate, }; struct file_operations ll_file_operations_flock = { @@ -5009,7 +5092,8 @@ struct file_operations ll_file_operations_flock = { .fsync = ll_fsync, .flush = ll_flush, .flock = ll_file_flock, - .lock = ll_file_flock + .lock = ll_file_flock, + .fallocate = ll_fallocate, }; /* These are for -o noflock - to return ENOSYS on flock calls */ @@ -5036,7 +5120,8 @@ struct file_operations ll_file_operations_noflock = { .fsync = ll_fsync, .flush = ll_flush, .flock = ll_file_noflock, - .lock = ll_file_noflock + .lock = ll_file_noflock, + .fallocate = ll_fallocate, }; struct inode_operations ll_file_inode_operations = { diff --git a/lustre/llite/lcommon_cl.c b/lustre/llite/lcommon_cl.c index 618de96..f5ba1a8 100644 --- a/lustre/llite/lcommon_cl.c +++ b/lustre/llite/lcommon_cl.c @@ -72,8 +72,8 @@ int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr, { struct lu_env *env; struct cl_io *io; - int result; - __u16 refcheck; + int result; + __u16 refcheck; ENTRY; @@ -93,7 +93,8 @@ int cl_setattr_ost(struct cl_object *obj, const struct iattr *attr, io->u.ci_setattr.sa_avalid = attr->ia_valid; io->u.ci_setattr.sa_xvalid = xvalid; io->u.ci_setattr.sa_parent_fid = lu_object_fid(&obj->co_lu); - + if (attr->ia_valid & ATTR_SIZE) + io->u.ci_setattr.sa_subtype = CL_SETATTR_TRUNC; again: if (attr->ia_valid & ATTR_FILE) ll_io_set_mirror(io, attr->ia_file); @@ -102,8 +103,10 @@ again: struct vvp_io *vio = vvp_env_io(env); if (attr->ia_valid & ATTR_FILE) - /* populate the file descriptor for ftruncate to honor - * group lock - see LU-787 */ + /* + * populate the file descriptor for ftruncate to honor + * group lock - see LU-787 + */ vio->vui_fd = attr->ia_file->private_data; result = cl_io_loop(env, io); diff --git a/lustre/llite/llite_internal.h b/lustre/llite/llite_internal.h index 150e6a8..b6b0908 100644 --- a/lustre/llite/llite_internal.h +++ b/lustre/llite/llite_internal.h @@ -929,6 +929,7 @@ enum { LPROC_LL_LISTXATTR, LPROC_LL_REMOVEXATTR, LPROC_LL_INODE_PERM, + LPROC_LL_FALLOCATE, LPROC_LL_FILE_OPCODES }; diff --git a/lustre/llite/lproc_llite.c b/lustre/llite/lproc_llite.c index 6f90377..55b15b5 100644 --- a/lustre/llite/lproc_llite.c +++ b/lustre/llite/lproc_llite.c @@ -1565,6 +1565,7 @@ static const struct llite_file_opcode { { LPROC_LL_TRUNC, LPROCFS_TYPE_LATENCY, "truncate" }, { LPROC_LL_FLOCK, LPROCFS_TYPE_LATENCY, "flock" }, { LPROC_LL_GETATTR, LPROCFS_TYPE_LATENCY, "getattr" }, + { LPROC_LL_FALLOCATE, LPROCFS_TYPE_LATENCY, "fallocate"}, /* dir inode operation */ { LPROC_LL_CREATE, LPROCFS_TYPE_LATENCY, "create" }, { LPROC_LL_LINK, LPROCFS_TYPE_LATENCY, "link" }, diff --git a/lustre/llite/vvp_io.c b/lustre/llite/vvp_io.c index 652940f..6ae1463 100644 --- a/lustre/llite/vvp_io.c +++ b/lustre/llite/vvp_io.c @@ -648,13 +648,16 @@ static int vvp_io_setattr_lock(const struct lu_env *env, const struct cl_io_slice *ios) { struct cl_io *io = ios->cis_io; - __u64 new_size; + __u64 lock_start = 0; + __u64 lock_end = OBD_OBJECT_EOF; __u32 enqflags = 0; if (cl_io_is_trunc(io)) { - new_size = io->u.ci_setattr.sa_attr.lvb_size; - if (new_size == 0) + if (io->u.ci_setattr.sa_attr.lvb_size == 0) enqflags = CEF_DISCARD_DATA; + } else if (cl_io_is_fallocate(io)) { + lock_start = io->u.ci_setattr.sa_falloc_offset; + lock_end = lock_start + io->u.ci_setattr.sa_attr.lvb_size; } else { unsigned int valid = io->u.ci_setattr.sa_avalid; @@ -668,12 +671,10 @@ static int vvp_io_setattr_lock(const struct lu_env *env, io->u.ci_setattr.sa_attr.lvb_atime >= io->u.ci_setattr.sa_attr.lvb_ctime)) return 0; - - new_size = 0; } return vvp_io_one_lock(env, io, enqflags, CLM_WRITE, - new_size, OBD_OBJECT_EOF); + lock_start, lock_end); } static int vvp_do_vmtruncate(struct inode *inode, size_t size) @@ -732,6 +733,9 @@ static int vvp_io_setattr_start(const struct lu_env *env, trunc_sem_down_write(&lli->lli_trunc_sem); inode_lock(inode); inode_dio_wait(inode); + } else if (cl_io_is_fallocate(io)) { + inode_lock(inode); + inode_dio_wait(inode); } else { inode_lock(inode); } @@ -755,6 +759,8 @@ static void vvp_io_setattr_end(const struct lu_env *env, vvp_do_vmtruncate(inode, io->u.ci_setattr.sa_attr.lvb_size); inode_unlock(inode); trunc_sem_up_write(&lli->lli_trunc_sem); + } else if (cl_io_is_fallocate(io)) { + inode_unlock(inode); } else { inode_unlock(inode); } diff --git a/lustre/lov/lov_io.c b/lustre/lov/lov_io.c index 76ed900..747c72f 100644 --- a/lustre/lov/lov_io.c +++ b/lustre/lov/lov_io.c @@ -497,11 +497,16 @@ static int lov_io_slice_init(struct lov_io *lio, break; case CIT_SETATTR: - if (cl_io_is_trunc(io)) + if (cl_io_is_fallocate(io)) { + lio->lis_pos = io->u.ci_setattr.sa_falloc_offset; + lio->lis_endpos = io->u.ci_setattr.sa_falloc_end; + } else if (cl_io_is_trunc(io)) { lio->lis_pos = io->u.ci_setattr.sa_attr.lvb_size; - else + lio->lis_endpos = OBD_OBJECT_EOF; + } else { lio->lis_pos = 0; - lio->lis_endpos = OBD_OBJECT_EOF; + lio->lis_endpos = OBD_OBJECT_EOF; + } break; case CIT_DATA_VERSION: @@ -653,15 +658,24 @@ static void lov_io_sub_inherit(struct lov_io_sub *sub, struct lov_io *lio, parent->u.ci_setattr.sa_attr_flags; io->u.ci_setattr.sa_avalid = parent->u.ci_setattr.sa_avalid; io->u.ci_setattr.sa_xvalid = parent->u.ci_setattr.sa_xvalid; + io->u.ci_setattr.sa_falloc_mode = + parent->u.ci_setattr.sa_falloc_mode; io->u.ci_setattr.sa_stripe_index = stripe; io->u.ci_setattr.sa_parent_fid = parent->u.ci_setattr.sa_parent_fid; + /* For SETATTR(fallocate) pass the subtype to lower IO */ + io->u.ci_setattr.sa_subtype = parent->u.ci_setattr.sa_subtype; if (cl_io_is_trunc(io)) { loff_t new_size = parent->u.ci_setattr.sa_attr.lvb_size; new_size = lov_size_to_stripe(lsm, index, new_size, stripe); io->u.ci_setattr.sa_attr.lvb_size = new_size; + } else if (cl_io_is_fallocate(io)) { + io->u.ci_setattr.sa_falloc_offset = start; + io->u.ci_setattr.sa_falloc_end = end; + io->u.ci_setattr.sa_attr.lvb_size = + parent->u.ci_setattr.sa_attr.lvb_size; } lov_lsm2layout(lsm, lsm->lsm_entries[index], &io->u.ci_setattr.sa_layout); @@ -1534,8 +1548,11 @@ int lov_io_init_released(const struct lu_env *env, struct cl_object *obj, * - in open, for open O_TRUNC * - in setattr, for truncate */ - /* the truncate is for size > 0 so triggers a restore */ - if (cl_io_is_trunc(io)) { + /* + * the truncate is for size > 0 so triggers a restore, + * also trigger a restore for prealloc/punch + */ + if (cl_io_is_trunc(io) || cl_io_is_fallocate(io)) { io->ci_restore_needed = 1; result = -ENODATA; } else diff --git a/lustre/ofd/lproc_ofd.c b/lustre/ofd/lproc_ofd.c index 60c99eb..a36c316 100644 --- a/lustre/ofd/lproc_ofd.c +++ b/lustre/ofd/lproc_ofd.c @@ -1054,6 +1054,8 @@ void ofd_stats_counter_init(struct lprocfs_stats *stats) 0, "set_info", "reqs"); lprocfs_counter_init(stats, LPROC_OFD_STATS_QUOTACTL, 0, "quotactl", "reqs"); + lprocfs_counter_init(stats, LPROC_OFD_STATS_PREALLOC, + 0, "prealloc", "reqs"); } LPROC_SEQ_FOPS(lprocfs_nid_stats_clear); diff --git a/lustre/ofd/ofd_dev.c b/lustre/ofd/ofd_dev.c index 883d7ef..f13affd 100644 --- a/lustre/ofd/ofd_dev.c +++ b/lustre/ofd/ofd_dev.c @@ -77,6 +77,7 @@ #include #include #include +#include #include "ofd_internal.h" @@ -1929,6 +1930,114 @@ put: } /** + * OFD request handler for OST_FALLOCATE RPC. + * + * This is part of request processing. Validate request fields, + * preallocate the given OFD object and pack reply. + * + * \param[in] tsi target session environment for this request + * + * \retval 0 if successful + * \retval negative value on error + */ +static int ofd_fallocate_hdl(struct tgt_session_info *tsi) +{ + struct obdo *oa = &tsi->tsi_ost_body->oa; + struct ost_body *repbody; + struct ofd_thread_info *info = tsi2ofd_info(tsi); + struct ldlm_namespace *ns = tsi->tsi_tgt->lut_obd->obd_namespace; + struct ldlm_resource *res; + struct ofd_object *fo; + __u64 flags = 0; + struct lustre_handle lh = { 0, }; + int rc, mode; + __u64 start, end; + bool srvlock; + + repbody = req_capsule_server_get(tsi->tsi_pill, &RMF_OST_BODY); + if (repbody == NULL) + RETURN(err_serious(-ENOMEM)); + + /* + * fallocate start and end are passed in o_size, o_blocks + * on the wire. + */ + start = oa->o_size; + end = oa->o_blocks; + mode = oa->o_falloc_mode; + /* + * Only mode == 0 (which is standard prealloc) is supported now. + * Punch is not supported yet. + */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + RETURN(-EOPNOTSUPP); + + repbody->oa.o_oi = oa->o_oi; + repbody->oa.o_valid = OBD_MD_FLID; + + srvlock = oa->o_valid & OBD_MD_FLFLAGS && + oa->o_flags & OBD_FL_SRVLOCK; + + if (srvlock) { + rc = tgt_extent_lock(tsi->tsi_env, ns, &tsi->tsi_resid, + start, end, &lh, LCK_PW, &flags); + if (rc != 0) + RETURN(rc); + } + + fo = ofd_object_find_exists(tsi->tsi_env, ofd_exp(tsi->tsi_exp), + &tsi->tsi_fid); + if (IS_ERR(fo)) + GOTO(out, rc = PTR_ERR(fo)); + + la_from_obdo(&info->fti_attr, oa, + OBD_MD_FLMTIME | OBD_MD_FLATIME | OBD_MD_FLCTIME); + + rc = ofd_object_fallocate(tsi->tsi_env, fo, start, end, mode, + &info->fti_attr, oa); + if (rc) + GOTO(out_put, rc); + + rc = ofd_attr_get(tsi->tsi_env, fo, &info->fti_attr); + if (rc == 0) + obdo_from_la(&repbody->oa, &info->fti_attr, + OFD_VALID_FLAGS); + else + rc = 0; + + ofd_counter_incr(tsi->tsi_exp, LPROC_OFD_STATS_PREALLOC, + tsi->tsi_jobid, 1); + + EXIT; +out_put: + ofd_object_put(tsi->tsi_env, fo); +out: + if (srvlock) + tgt_extent_unlock(&lh, LCK_PW); + if (rc == 0) { + res = ldlm_resource_get(ns, NULL, &tsi->tsi_resid, + LDLM_EXTENT, 0); + if (!IS_ERR(res)) { + struct ost_lvb *res_lvb; + + ldlm_res_lvbo_update(res, NULL, 0); + res_lvb = res->lr_lvb_data; + /* Blocks */ + repbody->oa.o_valid |= OBD_MD_FLBLOCKS; + repbody->oa.o_blocks = res_lvb->lvb_blocks; + /* Size */ + repbody->oa.o_valid |= OBD_MD_FLSIZE; + repbody->oa.o_size = res_lvb->lvb_size; + + ldlm_resource_putref(res); + } + } + + RETURN(rc); +} + + +/** * OFD request handler for OST_PUNCH RPC. * * This is part of request processing. Validate request fields, @@ -2711,6 +2820,7 @@ TGT_OST_HDL_HP(HAS_BODY | HAS_REPLY | IS_MUTABLE, TGT_OST_HDL(HAS_BODY | HAS_REPLY, OST_SYNC, ofd_sync_hdl), TGT_OST_HDL(HAS_REPLY, OST_QUOTACTL, ofd_quotactl), TGT_OST_HDL(HAS_BODY | HAS_REPLY, OST_LADVISE, ofd_ladvise_hdl), +TGT_OST_HDL(HAS_BODY | HAS_REPLY | IS_MUTABLE, OST_FALLOCATE, ofd_fallocate_hdl) }; static struct tgt_opc_slice ofd_common_slice[] = { diff --git a/lustre/ofd/ofd_internal.h b/lustre/ofd/ofd_internal.h index da30314..ca2f8f7 100644 --- a/lustre/ofd/ofd_internal.h +++ b/lustre/ofd/ofd_internal.h @@ -73,6 +73,7 @@ enum { LPROC_OFD_STATS_GET_INFO, LPROC_OFD_STATS_SET_INFO, LPROC_OFD_STATS_QUOTACTL, + LPROC_OFD_STATS_PREALLOC, LPROC_OFD_STATS_LAST, }; @@ -392,6 +393,9 @@ int ofd_attr_set(const struct lu_env *env, struct ofd_object *fo, int ofd_object_punch(const struct lu_env *env, struct ofd_object *fo, __u64 start, __u64 end, struct lu_attr *la, struct obdo *oa); +int ofd_object_fallocate(const struct lu_env *env, struct ofd_object *fo, + __u64 start, __u64 end, int mode, struct lu_attr *la, + struct obdo *oa); int ofd_destroy(const struct lu_env *, struct ofd_object *, int); int ofd_attr_get(const struct lu_env *env, struct ofd_object *fo, struct lu_attr *la); diff --git a/lustre/ofd/ofd_objects.c b/lustre/ofd/ofd_objects.c index 6d55a99..bb92058 100644 --- a/lustre/ofd/ofd_objects.c +++ b/lustre/ofd/ofd_objects.c @@ -742,6 +742,91 @@ out: } /** + * Fallocate(Preallocate) space for OFD object. + * + * This function allocates space for the object from the \a start + * offset to the \a end offset. + * + * \param[in] env execution environment + * \param[in] fo OFD object + * \param[in] start start offset to allocate from + * \param[in] end end of allocate + * \param[in] mode fallocate mode + * \param[in] la object attributes + * \param[in] ff filter_fid structure + * + * \retval 0 if successful + * \retval negative value on error + */ +int ofd_object_fallocate(const struct lu_env *env, struct ofd_object *fo, + __u64 start, __u64 end, int mode, struct lu_attr *la, + struct obdo *oa) +{ + struct ofd_thread_info *info = ofd_info(env); + struct ofd_device *ofd = ofd_obj2dev(fo); + struct dt_object *dob = ofd_object_child(fo); + struct thandle *th; + struct filter_fid *ff = &info->fti_mds_fid; + bool ff_needed = false; + int rc; + + ENTRY; + + ofd_write_lock(env, fo); + if (!ofd_object_exists(fo)) + GOTO(unlock, rc = -ENOENT); + + /* VBR: version recovery check */ + rc = ofd_version_get_check(info, fo); + if (rc != 0) + GOTO(unlock, rc); + + if (ff != NULL) { + rc = ofd_object_ff_load(env, fo); + if (rc == -ENODATA) + ff_needed = true; + else if (rc < 0) + GOTO(unlock, rc); + } + + th = ofd_trans_create(env, ofd); + if (IS_ERR(th)) + GOTO(unlock, rc = PTR_ERR(th)); + + rc = dt_declare_attr_set(env, dob, la, th); + if (rc) + GOTO(stop, rc); + + rc = dt_declare_falloc(env, dob, th); + if (rc) + GOTO(stop, rc); + + rc = ofd_trans_start(env, ofd, fo, th); + if (rc) + GOTO(stop, rc); + + rc = dt_falloc(env, dob, start, end, mode, th); + if (rc) + GOTO(stop, rc); + + rc = dt_attr_set(env, dob, la, th); + if (rc) + GOTO(stop, rc); + + if (ff_needed) { + rc = dt_xattr_set(env, ofd_object_child(fo), &info->fti_buf, + XATTR_NAME_FID, 0, th); + if (!rc) + filter_fid_le_to_cpu(&fo->ofo_ff, ff, sizeof(*ff)); + } +stop: + ofd_trans_stop(env, ofd, th, rc); +unlock: + ofd_write_unlock(env, fo); + RETURN(rc); +} + +/** * Truncate/punch OFD object. * * This function frees all of the allocated object's space from the \a start diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 5a65dcd..4e501b8 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -73,6 +73,9 @@ int osc_match_base(const struct lu_env *env, struct obd_export *exp, int osc_setattr_async(struct obd_export *exp, struct obdo *oa, obd_enqueue_update_f upcall, void *cookie, struct ptlrpc_request_set *rqset); +int osc_fallocate_base(struct obd_export *exp, struct obdo *oa, + obd_enqueue_update_f upcall, void *cookie, + int mode); int osc_sync_base(struct osc_object *obj, struct obdo *oa, obd_enqueue_update_f upcall, void *cookie, struct ptlrpc_request_set *rqset); diff --git a/lustre/osc/osc_io.c b/lustre/osc/osc_io.c index 3ac6102..67c162f 100644 --- a/lustre/osc/osc_io.c +++ b/lustre/osc/osc_io.c @@ -40,6 +40,7 @@ #include #include #include +#include #include "osc_internal.h" @@ -540,23 +541,30 @@ static void osc_trunc_check(const struct lu_env *env, struct cl_io *io, static int osc_io_setattr_start(const struct lu_env *env, const struct cl_io_slice *slice) { - struct cl_io *io = slice->cis_io; - struct osc_io *oio = cl2osc_io(env, slice); - struct cl_object *obj = slice->cis_obj; - struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; - struct cl_attr *attr = &osc_env_info(env)->oti_attr; - struct obdo *oa = &oio->oi_oa; + struct cl_io *io = slice->cis_io; + struct osc_io *oio = cl2osc_io(env, slice); + struct cl_object *obj = slice->cis_obj; + struct lov_oinfo *loi = cl2osc(obj)->oo_oinfo; + struct cl_attr *attr = &osc_env_info(env)->oti_attr; + struct obdo *oa = &oio->oi_oa; struct osc_async_cbargs *cbargs = &oio->oi_cbarg; - __u64 size = io->u.ci_setattr.sa_attr.lvb_size; unsigned int ia_avalid = io->u.ci_setattr.sa_avalid; enum op_xvalid ia_xvalid = io->u.ci_setattr.sa_xvalid; int result = 0; + __u64 size = io->u.ci_setattr.sa_attr.lvb_size; + __u64 end = OBD_OBJECT_EOF; + bool io_is_falloc = false; ENTRY; /* truncate cache dirty pages first */ - if (cl_io_is_trunc(io)) + if (cl_io_is_trunc(io)) { result = osc_cache_truncate_start(env, cl2osc(obj), size, &oio->oi_trunc); + } else if (cl_io_is_fallocate(io)) { + io_is_falloc = true; + size = io->u.ci_setattr.sa_falloc_offset; + end = io->u.ci_setattr.sa_falloc_end; + } if (result == 0 && oio->oi_lockless == 0) { cl_object_attr_lock(obj); @@ -608,9 +616,15 @@ static int osc_io_setattr_start(const struct lu_env *env, oa->o_mtime = attr->cat_mtime; } if (ia_avalid & ATTR_SIZE) { - oa->o_size = size; - oa->o_blocks = OBD_OBJECT_EOF; - oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + if (io_is_falloc) { + oa->o_size = size; + oa->o_blocks = end; + oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + } else { + oa->o_size = size; + oa->o_blocks = OBD_OBJECT_EOF; + oa->o_valid |= OBD_MD_FLSIZE | OBD_MD_FLBLOCKS; + } if (oio->oi_lockless) { oa->o_flags = OBD_FL_SRVLOCK; @@ -633,14 +647,20 @@ static int osc_io_setattr_start(const struct lu_env *env, init_completion(&cbargs->opc_sync); - if (ia_avalid & ATTR_SIZE) + if (io_is_falloc) { + int falloc_mode = io->u.ci_setattr.sa_falloc_mode; + + result = osc_fallocate_base(osc_export(cl2osc(obj)), + oa, osc_async_upcall, + cbargs, falloc_mode); + } else if (ia_avalid & ATTR_SIZE) { result = osc_punch_send(osc_export(cl2osc(obj)), oa, osc_async_upcall, cbargs); - else + } else { result = osc_setattr_async(osc_export(cl2osc(obj)), oa, osc_async_upcall, cbargs, PTLRPCD_SET); - + } cbargs->opc_rpc_sent = result == 0; } @@ -670,6 +690,7 @@ void osc_io_setattr_end(const struct lu_env *env, struct osc_device *osd = lu2osc_dev(obj->co_lu.lo_dev); LASSERT(cl_io_is_trunc(io)); + LASSERT(cl_io_is_trunc(io) || cl_io_is_fallocate(io)); /* XXX: Need a lock. */ osd->od_stats.os_lockless_truncates++; } @@ -689,6 +710,25 @@ void osc_io_setattr_end(const struct lu_env *env, osc_cache_truncate_end(env, oio->oi_trunc); oio->oi_trunc = NULL; } + + if (cl_io_is_fallocate(io)) { + cl_object_attr_lock(obj); + + /* update blocks */ + if (oa->o_valid & OBD_MD_FLBLOCKS) { + attr->cat_blocks = oa->o_blocks; + cl_valid |= CAT_BLOCKS; + } + + /* update size */ + if (oa->o_valid & OBD_MD_FLSIZE) { + attr->cat_size = oa->o_size; + cl_valid |= CAT_SIZE; + } + + cl_object_attr_update(env, obj, attr, cl_valid); + cl_object_attr_unlock(obj); + } } EXPORT_SYMBOL(osc_io_setattr_end); diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index 0ea2ad6..1d9971f 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -33,6 +33,8 @@ #define DEBUG_SUBSYSTEM S_OSC #include +#include +#include #include #include #include @@ -45,6 +47,7 @@ #include #include #include +#include #include "osc_internal.h" @@ -420,6 +423,71 @@ int osc_punch_send(struct obd_export *exp, struct obdo *oa, } EXPORT_SYMBOL(osc_punch_send); +/** + * osc_fallocate_base() - Handles fallocate request. + * + * @exp: Export structure + * @oa: Attributes passed to OSS from client (obdo structure) + * @upcall: Primary & supplementary group information + * @cookie: Exclusive identifier + * @rqset: Request list. + * @mode: Operation done on given range. + * + * osc_fallocate_base() - Handles fallocate requests only. Only block + * allocation or standard preallocate operation is supported currently. + * Other mode flags is not supported yet. ftruncate(2) or truncate(2) + * is supported via SETATTR request. + * + * Return: Non-zero on failure and O on success. + */ +int osc_fallocate_base(struct obd_export *exp, struct obdo *oa, + obd_enqueue_update_f upcall, void *cookie, int mode) +{ + struct ptlrpc_request *req; + struct osc_setattr_args *sa; + struct ost_body *body; + struct obd_import *imp = class_exp2cliimp(exp); + int rc; + ENTRY; + + /* + * Only mode == 0 (which is standard prealloc) is supported now. + * Punch is not supported yet. + */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + RETURN(-EOPNOTSUPP); + oa->o_falloc_mode = mode; + + req = ptlrpc_request_alloc(class_exp2cliimp(exp), + &RQF_OST_FALLOCATE); + if (req == NULL) + RETURN(-ENOMEM); + + rc = ptlrpc_request_pack(req, LUSTRE_OST_VERSION, OST_FALLOCATE); + if (rc != 0) { + ptlrpc_request_free(req); + RETURN(rc); + } + + body = req_capsule_client_get(&req->rq_pill, &RMF_OST_BODY); + LASSERT(body); + + lustre_set_wire_obdo(&imp->imp_connect_data, &body->oa, oa); + + ptlrpc_request_set_replen(req); + + req->rq_interpret_reply = (ptlrpc_interpterer_t)osc_setattr_interpret; + BUILD_BUG_ON(sizeof(*sa) > sizeof(req->rq_async_args)); + sa = ptlrpc_req_async_args(sa, req); + sa->sa_oa = oa; + sa->sa_upcall = upcall; + sa->sa_cookie = cookie; + + ptlrpcd_add_req(req); + + RETURN(0); +} + static int osc_sync_interpret(const struct lu_env *env, struct ptlrpc_request *req, void *args, int rc) { diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 6c62ca4..1817ffa 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -380,17 +380,18 @@ enum osd_full_scrub_ratio { enum osd_op_type { OSD_OT_ATTR_SET = 0, - OSD_OT_PUNCH = 1, - OSD_OT_XATTR_SET = 2, - OSD_OT_CREATE = 3, - OSD_OT_DESTROY = 4, - OSD_OT_REF_ADD = 5, - OSD_OT_REF_DEL = 6, - OSD_OT_WRITE = 7, - OSD_OT_INSERT = 8, - OSD_OT_DELETE = 9, - OSD_OT_QUOTA = 10, - OSD_OT_MAX = 11 + OSD_OT_PUNCH, + OSD_OT_XATTR_SET, + OSD_OT_CREATE, + OSD_OT_DESTROY, + OSD_OT_REF_ADD, + OSD_OT_REF_DEL, + OSD_OT_WRITE, + OSD_OT_INSERT, + OSD_OT_DELETE, + OSD_OT_QUOTA, + OSD_OT_PREALLOC, + OSD_OT_MAX }; struct osd_access_lock { diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index c3df4cc..4623053 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1864,6 +1864,69 @@ static ssize_t osd_write(const struct lu_env *env, struct dt_object *dt, return result; } +static int osd_declare_fallocate(const struct lu_env *env, + struct dt_object *dt, struct thandle *th) +{ + struct osd_thandle *oh; + struct inode *inode; + int rc; + ENTRY; + + LASSERT(th); + oh = container_of(th, struct osd_thandle, ot_super); + + osd_trans_declare_op(env, oh, OSD_OT_PREALLOC, + osd_dto_credits_noquota[DTO_WRITE_BLOCK]); + inode = osd_dt_obj(dt)->oo_inode; + LASSERT(inode); + + rc = osd_declare_inode_qid(env, i_uid_read(inode), i_gid_read(inode), + i_projid_read(inode), 0, oh, osd_dt_obj(dt), + NULL, OSD_QID_BLK); + RETURN(rc); +} + +static int osd_fallocate(const struct lu_env *env, struct dt_object *dt, + __u64 start, __u64 end, int mode, struct thandle *th) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + int rc = 0; + struct osd_thread_info *info = osd_oti_get(env); + struct dentry *dentry = &info->oti_obj_dentry; + struct file *file = &info->oti_file; + + ENTRY; + /* + * Only mode == 0 (which is standard prealloc) is supported now. + * Rest of mode options is not supported yet. + */ + if (mode & ~FALLOC_FL_KEEP_SIZE) + RETURN(-EOPNOTSUPP); + + LASSERT(dt_object_exists(dt)); + LASSERT(osd_invariant(obj)); + LASSERT(inode != NULL); + dquot_initialize(inode); + + LASSERT(th); + + osd_trans_exec_op(env, th, OSD_OT_PREALLOC); + + /* + * Because f_op->fallocate() does not have an inode arg + */ + dentry->d_inode = inode; + dentry->d_sb = inode->i_sb; + file->f_path.dentry = dentry; + file->f_mapping = inode->i_mapping; + file->f_op = inode->i_fop; + file->f_inode = inode; + rc = file->f_op->fallocate(file, mode, start, end - start); + + RETURN(rc); +} + static int osd_declare_punch(const struct lu_env *env, struct dt_object *dt, __u64 start, __u64 end, struct thandle *th) { @@ -1911,7 +1974,6 @@ static int osd_punch(const struct lu_env *env, struct dt_object *dt, bool grow = false; ENTRY; - LASSERT(end == OBD_OBJECT_EOF); LASSERT(dt_object_exists(dt)); LASSERT(osd_invariant(obj)); LASSERT(inode != NULL); @@ -2086,6 +2148,8 @@ const struct dt_body_operations osd_body_ops = { .dbo_punch = osd_punch, .dbo_fiemap_get = osd_fiemap_get, .dbo_ladvise = osd_ladvise, + .dbo_declare_fallocate = osd_declare_fallocate, + .dbo_fallocate = osd_fallocate, }; /** diff --git a/lustre/osd-zfs/osd_io.c b/lustre/osd-zfs/osd_io.c index baef188..59902bd 100644 --- a/lustre/osd-zfs/osd_io.c +++ b/lustre/osd-zfs/osd_io.c @@ -1119,6 +1119,32 @@ static int osd_ladvise(const struct lu_env *env, struct dt_object *dt, RETURN(rc); } +static int osd_fallocate(const struct lu_env *env, struct dt_object *dt, + __u64 start, __u64 end, int mode, struct thandle *th) +{ + int rc = -EOPNOTSUPP; + ENTRY; + + /* + * space preallocation is not supported for ZFS + * Returns -EOPNOTSUPP for now + */ + RETURN(rc); +} + +static int osd_declare_fallocate(const struct lu_env *env, + struct dt_object *dt, struct thandle *th) +{ + int rc = -EOPNOTSUPP; + ENTRY; + + /* + * space preallocation is not supported for ZFS + * Returns -EOPNOTSUPP for now + */ + RETURN(rc); +} + struct dt_body_operations osd_body_ops = { .dbo_read = osd_read, .dbo_declare_write = osd_declare_write, @@ -1132,6 +1158,8 @@ struct dt_body_operations osd_body_ops = { .dbo_declare_punch = osd_declare_punch, .dbo_punch = osd_punch, .dbo_ladvise = osd_ladvise, + .dbo_declare_fallocate = osd_declare_fallocate, + .dbo_fallocate = osd_fallocate, }; struct dt_body_operations osd_body_scrub_ops = { diff --git a/lustre/ptlrpc/layout.c b/lustre/ptlrpc/layout.c index cb8a2db..ff13d5f 100644 --- a/lustre/ptlrpc/layout.c +++ b/lustre/ptlrpc/layout.c @@ -817,6 +817,7 @@ static struct req_format *req_formats[] = { &RQF_OST_SETATTR, &RQF_OST_CREATE, &RQF_OST_PUNCH, + &RQF_OST_FALLOCATE, &RQF_OST_SYNC, &RQF_OST_DESTROY, &RQF_OST_BRW_READ, @@ -1724,6 +1725,10 @@ struct req_format RQF_OST_PUNCH = DEFINE_REQ_FMT0("OST_PUNCH", ost_body_capa, ost_body_only); EXPORT_SYMBOL(RQF_OST_PUNCH); +struct req_format RQF_OST_FALLOCATE = + DEFINE_REQ_FMT0("OST_FALLOCATE", ost_body_capa, ost_body_only); +EXPORT_SYMBOL(RQF_OST_FALLOCATE); + struct req_format RQF_OST_SYNC = DEFINE_REQ_FMT0("OST_SYNC", ost_body_capa, ost_body_only); EXPORT_SYMBOL(RQF_OST_SYNC); diff --git a/lustre/target/tgt_handler.c b/lustre/target/tgt_handler.c index 31ca748..67788f6 100644 --- a/lustre/target/tgt_handler.c +++ b/lustre/target/tgt_handler.c @@ -522,6 +522,7 @@ static int tgt_filter_recovery_request(struct ptlrpc_request *req, case MDS_HSM_PROGRESS: case MDS_HSM_STATE_SET: case MDS_HSM_REQUEST: + case OST_FALLOCATE: *process = target_queue_recovery_request(req, obd); RETURN(0); diff --git a/lustre/tests/Makefile.am b/lustre/tests/Makefile.am index d391ad2..8b7d8d4 100644 --- a/lustre/tests/Makefile.am +++ b/lustre/tests/Makefile.am @@ -77,6 +77,7 @@ THETESTS += group_lock_test llapi_fid_test sendfile_grouplock mmap_cat THETESTS += swap_lock_test lockahead_test mirror_io mmap_mknod_test THETESTS += create_foreign_file parse_foreign_file THETESTS += create_foreign_dir parse_foreign_dir +THETESTS += check_fallocate if TESTS if MPITESTS @@ -109,4 +110,5 @@ mirror_io_LDADD = $(LIBLUSTREAPI) ll_dirstripe_verify_LDADD = $(LIBLUSTREAPI) flocks_test_LDADD = $(LIBLUSTREAPI) $(PTHREAD_LIBS) create_foreign_dir_LDADD = $(LIBLUSTREAPI) +check_fallocate_LDADD = $(LIBLUSTREAPI) endif # TESTS diff --git a/lustre/tests/check_fallocate.c b/lustre/tests/check_fallocate.c new file mode 100644 index 0000000..4de7345 --- /dev/null +++ b/lustre/tests/check_fallocate.c @@ -0,0 +1,289 @@ +/* GPL HEADER START + * + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 only, + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License version 2 for more details (a copy is included + * in the LICENSE file that accompanied this code). + * + * You should have received a copy of the GNU General Public License + * version 2 along with this program; If not, see + * http://www.gnu.org/licenses/gpl-2.0.html + * + * GPL HEADER END + */ + +/* + * Copyright (C) 2014, DataDirect Networks, Inc. + * Author: Swapnil Pimpale + */ + +/* + * This test case tests the following scenarios + * 1) Preallocate: try to fallocate memory blocks and write to it + * i) Non-sparse file + * - DEFAULT MODE + * ii) Sparse file + * - create a hole in a file and preallocate using both the + * modes + * Rest of mode flags is not supported currenlty + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define WRITE_BLOCKS 10 +#define HOLE_BLOCKS 10 + +/* global */ +loff_t blksize; + +void usage(char *prog) +{ + fprintf(stderr, "usage: %s \n", prog); + fprintf(stderr, "filepath: absolute pathname of Lustre file\n"); + exit(1); +} + +int write_data_to_file(int fd) +{ + char buf[blksize + 1]; + int rc, i, j; + + for (i = 0; i < WRITE_BLOCKS; i++) { + for (j = 0; j < blksize; j++) + buf[j] = 'X'; + buf[j] = '\0'; + rc = write(fd, buf, blksize); + if (rc < 0) { + fprintf(stderr, "write failed error %s\n", + strerror(errno)); + return errno; + } + } + return 0; +} + +int get_stat(int fd, struct stat *st) +{ + int rc = 0; + + bzero(st, sizeof(struct stat)); + if (fstat(fd, st)) { + fprintf(stderr, "stat file error: %s\n", strerror(errno)); + rc = errno; + } + return rc; +} + +int __do_fallocate(int fd, int mode, loff_t offset, loff_t len) +{ + int rc; + + rc = fallocate(fd, mode, offset, len); + if (rc != 0) { + fprintf(stderr, "fallocate failed, error %s, mode %d, " + "offset %llu, len %llu\n", strerror(errno), mode, + (unsigned long long)offset, (unsigned long long)len); + rc = errno; + } + + return rc; +} + +int post_fallocate_checks(int fd, int mode, loff_t offset, loff_t len, + loff_t expected_new_size) +{ + struct stat st; + int rc = 0; + + /* check the new size */ + rc = get_stat(fd, &st); + if (rc != 0) + goto out; + + if (st.st_size != expected_new_size) { + fprintf(stderr, "fallocate succeeded but size reported " + "is wrong\n"); + fprintf(stderr, "mode %d, offset %llu, len %llu, " + "new_size %llu, expected_new_size %llu\n", mode, + (unsigned long long)offset, (unsigned long long)len, + (unsigned long long)st.st_size, + (unsigned long long)expected_new_size); + rc = -1; + } +out: + return rc; +} + +int create_hole(int fd) +{ + int rc; + + rc = write_data_to_file(fd); + if (rc != 0) + goto out; + + lseek(fd, HOLE_BLOCKS * blksize, SEEK_CUR); + + rc = write_data_to_file(fd); + if (rc != 0) + return rc; +out: + return rc; +} + +int do_fallocate(int fd, int mode, loff_t offset, loff_t expected_new_size) +{ + int rc; + loff_t len; + + len = blksize; + rc = __do_fallocate(fd, mode, offset, len); + if (rc != 0) + goto out; + + rc = post_fallocate_checks(fd, mode, offset, len, expected_new_size); + if (rc != 0) { + fprintf(stderr, "post_fallocate_checks failed for mode %d\n", + mode); + goto out; + } +out: + return rc; + +} + +int test_prealloc_nonsparse(int fd) +{ + int rc, mode; + loff_t offset, expected_new_size; + struct stat st; + + lseek(fd, 0, SEEK_SET); + rc = write_data_to_file(fd); + if (rc != 0) + goto out; + + rc = get_stat(fd, &st); + if (rc != 0) + goto out; + + /* test default mode */ + mode = 0; + offset = lseek(fd, 0, SEEK_END); + expected_new_size = WRITE_BLOCKS * blksize + blksize; + rc = do_fallocate(fd, mode, offset, expected_new_size); +out: + return rc; +} + +int test_prealloc_sparse(int fd) +{ + int rc, mode; + loff_t offset, expected_new_size; + struct stat st; + + rc = ftruncate(fd, 0); + if (rc != 0) { + fprintf(stderr, "ftruncate error %s\n", strerror(errno)); + rc = errno; + goto out; + } + + lseek(fd, 0, SEEK_SET); + rc = create_hole(fd); + if (rc != 0) + goto out; + + rc = get_stat(fd, &st); + if (rc != 0) + goto out; + + /* test default mode */ + mode = 0; + offset = lseek(fd, (WRITE_BLOCKS + HOLE_BLOCKS / 2) * blksize, + SEEK_SET); + expected_new_size = (2 * WRITE_BLOCKS + HOLE_BLOCKS) * blksize; + rc = do_fallocate(fd, mode, offset, expected_new_size); +out: + return rc; +} + +int main(int argc, char *argv[]) +{ + char *fname, *mount_point = NULL; + int rc = -EINVAL, fd; + struct stat st; + struct mntent *ent; + FILE *mntpt; + + if (argc != 2) + usage(argv[0]); + + fname = argv[1]; + if (fname[0] != '/') { + fprintf(stderr, "Need absolute path of the file\n"); + goto out; + } + + fd = open(fname, O_RDWR | O_CREAT, 0700); + if (fd < 0) { + fprintf(stderr, "open file %s error: %s\n", + fname, strerror(errno)); + rc = errno; + goto out; + } + + mntpt = setmntent("/etc/mtab", "r"); + if (mntpt == NULL) { + fprintf(stderr, "setmntent error: %s\n", + strerror(errno)); + rc = errno; + goto out_open; + } + + while (NULL != (ent = getmntent(mntpt))) { + if (llapi_is_lustre_mnttype(ent->mnt_fsname) == 0) { + mount_point = ent->mnt_dir; + break; + } + } + endmntent(mntpt); + + if (mount_point == NULL) { + fprintf(stderr, "file not on lustre filesystem?\n"); + goto out_open; + } + + rc = get_stat(fd, &st); + if (rc != 0) + goto out_open; + blksize = st.st_blksize; + + rc = test_prealloc_nonsparse(fd); + if (rc != 0) + goto out_open; + + rc = test_prealloc_sparse(fd); + if (rc != 0) + goto out_open; + +out_open: + close(fd); +out: + return rc; +} diff --git a/lustre/tests/sanity-benchmark.sh b/lustre/tests/sanity-benchmark.sh index 370afae..ee57119 100644 --- a/lustre/tests/sanity-benchmark.sh +++ b/lustre/tests/sanity-benchmark.sh @@ -180,21 +180,22 @@ test_iozone() { run_test iozone "iozone" test_fsx() { - local testfile=$DIR/f0.fsxfile - FSX_SIZE=$SIZE - FSX_COUNT=1000 - local SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'` - [ $SPACE -lt $FSX_SIZE ] && FSX_SIZE=$((SPACE * 3 / 4)) - $DEBUG_OFF - FSX_SEED=${FSX_SEED:-$RANDOM} - rm -f $testfile - $LFS setstripe -c -1 $testfile - CMD="fsx -c 50 -p 1000 -S $FSX_SEED -P $TMP -l $FSX_SIZE \ - -N $((FSX_COUNT * 100)) $FSXOPT $testfile" - echo "Using: $CMD" - $CMD || error "fsx failed" - rm -f $testfile - $DEBUG_ON + local testfile=$DIR/f0.fsxfile + FSX_SIZE=$SIZE + FSX_COUNT=1000 + local SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'` + + [ $SPACE -lt $FSX_SIZE ] && FSX_SIZE=$((SPACE * 3 / 4)) + $DEBUG_OFF + FSX_SEED=${FSX_SEED:-$RANDOM} + rm -f $testfile + $LFS setstripe -c -1 $testfile + CMD="fsx -c 50 -p 1000 -S $FSX_SEED -P $TMP -l $FSX_SIZE \ + -N $((FSX_COUNT * 100)) $FSXOPT $testfile" + echo "Using: $CMD" + $CMD || error "fsx failed" + rm -f $testfile + $DEBUG_ON } run_test fsx "fsx" diff --git a/lustre/tests/sanity.sh b/lustre/tests/sanity.sh index 6f91998..a25ecd4 100755 --- a/lustre/tests/sanity.sh +++ b/lustre/tests/sanity.sh @@ -13159,7 +13159,7 @@ test_140() { #bug-17379 } run_test 140 "Check reasonable stack depth (shouldn't LBUG) ====" -test_150() { +test_150a() { [ $PARALLEL == "yes" ] && skip "skip parallel run" local TF="$TMP/$tfile" @@ -13190,7 +13190,60 @@ test_150() { rm -f $TF true } -run_test 150 "truncate/append tests" +run_test 150a "truncate/append tests" + +test_150b() { + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + [ $OST1_VERSION -lt $(version_code 2.13.50) ] && + skip "Need OST version at least 2.13.53" + touch $DIR/$tfile + check_fallocate $DIR/$tfile || error "fallocate failed" +} +run_test 150b "Verify fallocate (prealloc) functionality" + +test_150c() { + local bytes + local want + + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + [ $OST1_VERSION -lt $(version_code 2.13.50) ] && + skip "Need OST version at least 2.13.53" + + $LFS setstripe -c $OSTCOUNT -S1M $DIR/$tdir || error "setstripe failed" + fallocate -l ${OSTCOUNT}m $DIR/$tdir || error "fallocate failed" + sync; sync_all_data + cancel_lru_locks $OSC + sleep 5 + bytes=$(($(stat -c '%b * %B' $DIR/$tdir))) + want=$((OSTCOUNT * 1048576)) + + # Must allocate all requested space, not more than 5% extra + (( $bytes >= $want && $bytes < $want * 105 / 100 )) || + error "bytes $bytes is not $want" +} +run_test 150c "Verify fallocate Size and Blocks" + +test_150d() { + local bytes + local want + + [ "$ost1_FSTYPE" != ldiskfs ] && skip "non-ldiskfs backend" + [ $OST1_VERSION -lt $(version_code 2.13.50) ] && + skip "Need OST version at least 2.13.53" + + $LFS setstripe -c $OSTCOUNT -S1M $DIR/$tdir || error "setstripe failed" + fallocate -o 1G -l ${OSTCOUNT}m $DIR/$tdir || error "fallocate failed" + sync; sync_all_data + cancel_lru_locks $OSC + sleep 5 + bytes=$(($(stat -c '%b * %B' $DIR/$tdir))) + want=$((OSTCOUNT * 1048576)) + + # Must allocate all requested space, not more than 5% extra + (( $bytes >= $want && $bytes < $want * 105 / 100 )) || + error "bytes $bytes is not $want" +} +run_test 150d "Verify fallocate Size and Blocks - Non zero start" #LU-2902 roc_hit was not able to read all values from lproc function roc_hit_init() { diff --git a/lustre/tests/sanityn.sh b/lustre/tests/sanityn.sh index 8e260e5..47216b9 100755 --- a/lustre/tests/sanityn.sh +++ b/lustre/tests/sanityn.sh @@ -396,8 +396,8 @@ test_16a() { rm -f $file1 $LFS setstripe -c -1 $file1 # b=10919 - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 \ - || error "fsx failed" + fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + error "fsx failed" rm -f $file1 # O_DIRECT reads and writes must be aligned to the device block size. @@ -423,8 +423,8 @@ test_16b() { lfs setstripe -c -1 $file1 # b=10919 # -o is set to 8192 because writes < 1 page and between 1 and 2 pages # create a mix of tiny writes & normal writes - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 $file1 \ - $file2 || error "fsx with tiny write failed." + fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -o 8192 -S 0 \ + $file1 $file2 || error "fsx with tiny write failed." } run_test 16b "$FSXNUM iterations of dual-mount fsx at small size" @@ -452,8 +452,8 @@ test_16c() { set_osd_param $list '' writethrough_cache_enable 0 $LFS setstripe -c -1 $file1 # b=10919 - fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 \ - || error "fsx failed" + fsx -c 50 -p $FSXP -N $FSXNUM -l $((SIZE * 256)) -S 0 $file1 $file2 || + error "fsx failed" rm -f $file1 set_osd_param $list '' read_cache_enable 1