From 947af94817f8eeb5e6108b0b3cde65419b13c8d3 Mon Sep 17 00:00:00 2001 From: Mikhail Pershin Date: Mon, 17 Aug 2020 19:36:37 +0300 Subject: [PATCH] LU-10810 osd: implement lseek method in OSD Introduce lseek support in dt_object methods and at OSD level This is server support for SEEK_HOLE and SEEK_DATA distributed request, it returns a resulting offset or error code ZFS support is added as well but it is not yet available due to missing export of dmu_offset_next() function. Patch adds also osd_quasi_file() common method to create fake file structure to use in inode i_fop calls Signed-off-by: Mikhail Pershin Change-Id: I3802743e31c94d2de219b72a540c5df9c72f1897 Reviewed-on: https://review.whamcloud.com/39706 Tested-by: jenkins Tested-by: Maloo Reviewed-by: Bobi Jam Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- config/lustre-build-zfs.m4 | 13 +++++++++++ lustre/include/dt_object.h | 24 +++++++++++++++++++++ lustre/osd-ldiskfs/osd_handler.c | 29 ++++--------------------- lustre/osd-ldiskfs/osd_internal.h | 37 ++++++++++++++++++++++++++++++++ lustre/osd-ldiskfs/osd_io.c | 40 ++++++++++++++++++++++++++-------- lustre/osd-ldiskfs/osd_scrub.c | 31 +++++++++++---------------- lustre/osd-zfs/osd_io.c | 45 +++++++++++++++++++++++++++++++++++++++ 7 files changed, 166 insertions(+), 53 deletions(-) diff --git a/config/lustre-build-zfs.m4 b/config/lustre-build-zfs.m4 index 8390f92..873ad8a 100644 --- a/config/lustre-build-zfs.m4 +++ b/config/lustre-build-zfs.m4 @@ -736,6 +736,19 @@ your distribution. AC_DEFINE(HAVE_DMU_OBJSET_DISOWN_3ARG, 1, [Have dmu_objset_disown() with 3 args]) ]) + dnl # + dnl # ZFS exports dmu_offet_next + dnl # + AC_CACHE_CHECK([if ZFS exports 'dmu_offset_next'], + [lb_cv_dmu_offset_next], [ + lb_cv_dmu_offset_next="no" + AS_IF([grep -q -E "EXPORT_SYMBOL.*\(dmu_offset_next\)" "$zfssrc/module/zfs/dmu.c" 2>/dev/null], + [lb_cv_dmu_offset_next="yes"]) + ]) + AS_IF([test "x$lb_cv_dmu_offset_next" = "xyes"], [ + AC_DEFINE(HAVE_DMU_OFFSET_NEXT, 1, + [Have dmu_offset_next() exported]) + ]) ]) AS_IF([test "x$enable_zfs" = xyes], [ diff --git a/lustre/include/dt_object.h b/lustre/include/dt_object.h index fe778cf..f260afb 100644 --- a/lustre/include/dt_object.h +++ b/lustre/include/dt_object.h @@ -1457,6 +1457,19 @@ struct dt_body_operations { __u64 end, int mode, struct thandle *th); + /** + * Do SEEK_HOLE/SEEK_DATA request on object + * + * \param[in] env execution environment for this thread + * \param[in] dt object + * \param[in] offset the offset to start seek from + * \param[in] whence seek mode, SEEK_HOLE or SEEK_DATA + * + * \retval hole/data offset on success + * \retval negative negated errno on error + */ + loff_t (*dbo_lseek)(const struct lu_env *env, struct dt_object *dt, + loff_t offset, int whence); }; /** @@ -2607,6 +2620,17 @@ static inline int dt_fiemap_get(const struct lu_env *env, struct dt_object *d, return d->do_body_ops->dbo_fiemap_get(env, d, fm); } +static inline loff_t dt_lseek(const struct lu_env *env, struct dt_object *d, + loff_t offset, int whence) +{ + LASSERT(d); + if (d->do_body_ops == NULL) + return -EPROTO; + if (d->do_body_ops->dbo_lseek == NULL) + return -EOPNOTSUPP; + return d->do_body_ops->dbo_lseek(env, d, offset, whence); +} + static inline int dt_statfs_info(const struct lu_env *env, struct dt_device *dev, struct obd_statfs *osfs, diff --git a/lustre/osd-ldiskfs/osd_handler.c b/lustre/osd-ldiskfs/osd_handler.c index ce4834a..3696908 100644 --- a/lustre/osd-ldiskfs/osd_handler.c +++ b/lustre/osd-ldiskfs/osd_handler.c @@ -957,8 +957,7 @@ static int osd_check_lmv(struct osd_thread_info *oti, struct osd_device *dev, { struct lu_buf *buf = &oti->oti_big_buf; struct dentry *dentry = &oti->oti_obj_dentry; - struct file *filp = &oti->oti_file; - const struct file_operations *fops; + struct file *filp; struct lmv_mds_md_v1 *lmv1; struct osd_check_lmv_buf oclb = { .ctx.actor = osd_stripe_dir_filldir, @@ -1003,18 +1002,7 @@ again: if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1) GOTO(out, rc = 0); - fops = inode->i_fop; - dentry->d_inode = inode; - dentry->d_sb = inode->i_sb; - filp->f_pos = 0; - filp->f_path.dentry = dentry; - filp->f_flags |= O_NOATIME; - filp->f_mode = FMODE_64BITHASH | FMODE_NONOTIFY; - filp->f_mapping = inode->i_mapping; - filp->f_op = fops; - filp->private_data = NULL; - filp->f_cred = current_cred(); - filp->f_inode = inode; + filp = osd_quasi_file(oti->oti_env, inode); rc = osd_security_file_alloc(filp); if (rc) goto out; @@ -1024,7 +1012,7 @@ again: rc = iterate_dir(filp, &oclb.ctx); } while (rc >= 0 && oclb.oclb_items > 0 && !oclb.oclb_found && filp->f_pos != LDISKFS_HTREE_EOF_64BIT); - fops->release(inode, filp); + inode->i_fop->release(inode, filp); out: if (rc < 0) @@ -4830,20 +4818,11 @@ static int osd_object_sync(const struct lu_env *env, struct dt_object *dt, { struct osd_object *obj = osd_dt_obj(dt); struct inode *inode = obj->oo_inode; - struct osd_thread_info *info = osd_oti_get(env); - struct dentry *dentry = &info->oti_obj_dentry; - struct file *file = &info->oti_file; + struct file *file = osd_quasi_file(env, inode); int rc; ENTRY; - dentry->d_inode = inode; - dentry->d_sb = inode->i_sb; - file->f_path.dentry = dentry; - file->f_mapping = inode->i_mapping; - file->f_op = inode->i_fop; - file->f_inode = inode; - rc = vfs_fsync_range(file, start, end, 0); RETURN(rc); diff --git a/lustre/osd-ldiskfs/osd_internal.h b/lustre/osd-ldiskfs/osd_internal.h index 7dc4bde..2ff5711 100644 --- a/lustre/osd-ldiskfs/osd_internal.h +++ b/lustre/osd-ldiskfs/osd_internal.h @@ -1153,6 +1153,43 @@ struct dentry *osd_child_dentry_by_inode(const struct lu_env *env, return child_dentry; } +/* build quasi file structure when it is needed to call an inode i_fop */ +static inline struct file *osd_quasi_file_init(const struct lu_env *env, + struct dentry *dentry, + struct inode *inode) +{ + struct osd_thread_info *info = osd_oti_get(env); + + info->oti_file.f_path.dentry = dentry; + info->oti_file.f_mapping = inode->i_mapping; + info->oti_file.f_op = inode->i_fop; + info->oti_file.f_inode = inode; + info->oti_file.f_pos = 0; + info->oti_file.private_data = NULL; + info->oti_file.f_cred = current_cred(); + info->oti_file.f_flags = O_NOATIME; + info->oti_file.f_mode = FMODE_64BITHASH | FMODE_NONOTIFY; + + return &info->oti_file; +} + +static inline struct file *osd_quasi_file(const struct lu_env *env, + struct inode *inode) +{ + struct osd_thread_info *info = osd_oti_get(env); + + info->oti_obj_dentry.d_inode = inode; + info->oti_obj_dentry.d_sb = inode->i_sb; + + return osd_quasi_file_init(env, &info->oti_obj_dentry, inode); +} + +static inline struct file *osd_quasi_file_by_dentry(const struct lu_env *env, + struct dentry *dentry) +{ + return osd_quasi_file_init(env, dentry, dentry->d_inode); +} + extern int osd_trans_declare_op2rb[]; extern int ldiskfs_track_declares_assert; void osd_trans_dump_creds(const struct lu_env *env, struct thandle *th); diff --git a/lustre/osd-ldiskfs/osd_io.c b/lustre/osd-ldiskfs/osd_io.c index ab48eb8..78f88e6 100644 --- a/lustre/osd-ldiskfs/osd_io.c +++ b/lustre/osd-ldiskfs/osd_io.c @@ -1909,10 +1909,8 @@ static int osd_fallocate(const struct lu_env *env, struct dt_object *dt, { struct osd_object *obj = osd_dt_obj(dt); struct inode *inode = obj->oo_inode; + struct file *file; int rc = 0; - struct osd_thread_info *info = osd_oti_get(env); - struct dentry *dentry = &info->oti_obj_dentry; - struct file *file = &info->oti_file; ENTRY; /* @@ -1934,12 +1932,7 @@ static int osd_fallocate(const struct lu_env *env, struct dt_object *dt, /* * Because f_op->fallocate() does not have an inode arg */ - dentry->d_inode = inode; - dentry->d_sb = inode->i_sb; - file->f_path.dentry = dentry; - file->f_mapping = inode->i_mapping; - file->f_op = inode->i_fop; - file->f_inode = inode; + file = osd_quasi_file(env, inode); rc = file->f_op->fallocate(file, mode, start, end - start); RETURN(rc); @@ -2154,6 +2147,34 @@ static int osd_ladvise(const struct lu_env *env, struct dt_object *dt, RETURN(rc); } +static loff_t osd_lseek(const struct lu_env *env, struct dt_object *dt, + loff_t offset, int whence) +{ + struct osd_object *obj = osd_dt_obj(dt); + struct inode *inode = obj->oo_inode; + struct file *file; + loff_t result; + + ENTRY; + + LASSERT(dt_object_exists(dt)); + LASSERT(osd_invariant(obj)); + LASSERT(inode); + + file = osd_quasi_file(env, inode); + result = file->f_op->llseek(file, offset, whence); + /* when result is out of file range then it must be virtual hole + * at the end of file, but this is not real file end, so return + * just -ENXIO and LOV will merge all results + */ + if (result == i_size_read(inode)) + result = -ENXIO; + + CDEBUG(D_INFO, "seek %s from %lld: %lld\n", whence == SEEK_HOLE ? + "hole" : "data", offset, result); + RETURN(result); +} + /* * in some cases we may need declare methods for objects being created * e.g., when we create symlink @@ -2178,6 +2199,7 @@ const struct dt_body_operations osd_body_ops = { .dbo_ladvise = osd_ladvise, .dbo_declare_fallocate = osd_declare_fallocate, .dbo_fallocate = osd_fallocate, + .dbo_lseek = osd_lseek, }; /** diff --git a/lustre/osd-ldiskfs/osd_scrub.c b/lustre/osd-ldiskfs/osd_scrub.c index dd6f29f..8cc3d38 100644 --- a/lustre/osd-ldiskfs/osd_scrub.c +++ b/lustre/osd-ldiskfs/osd_scrub.c @@ -2249,28 +2249,21 @@ static int osd_ios_general_scan(struct osd_thread_info *info, struct osd_device *dev, struct dentry *dentry, filldir_t filldir) { - struct osd_ios_filldir_buf buf = { - .ctx.actor = filldir, - .oifb_info = info, - .oifb_dev = dev, - .oifb_dentry = dentry }; - struct file *filp = &info->oti_file; - struct inode *inode = dentry->d_inode; - const struct file_operations *fops = inode->i_fop; - int rc; + struct osd_ios_filldir_buf buf = { + .ctx.actor = filldir, + .oifb_info = info, + .oifb_dev = dev, + .oifb_dentry = dentry + }; + struct file *filp; + struct inode *inode = dentry->d_inode; + int rc; + ENTRY; LASSERT(filldir != NULL); - filp->f_pos = 0; - filp->f_path.dentry = dentry; - filp->f_flags |= O_NOATIME; - filp->f_mode = FMODE_64BITHASH | FMODE_NONOTIFY; - filp->f_mapping = inode->i_mapping; - filp->f_op = fops; - filp->private_data = NULL; - filp->f_cred = current_cred(); - filp->f_inode = inode; + filp = osd_quasi_file_by_dentry(info->oti_env, dentry); rc = osd_security_file_alloc(filp); if (rc) RETURN(rc); @@ -2280,7 +2273,7 @@ osd_ios_general_scan(struct osd_thread_info *info, struct osd_device *dev, rc = iterate_dir(filp, &buf.ctx); } while (rc >= 0 && buf.oifb_items > 0 && filp->f_pos != LDISKFS_HTREE_EOF_64BIT); - fops->release(inode, filp); + inode->i_fop->release(inode, filp); RETURN(rc); } diff --git a/lustre/osd-zfs/osd_io.c b/lustre/osd-zfs/osd_io.c index b270228..063607f 100644 --- a/lustre/osd-zfs/osd_io.c +++ b/lustre/osd-zfs/osd_io.c @@ -1177,6 +1177,50 @@ static int osd_declare_fallocate(const struct lu_env *env, RETURN(rc); } +static loff_t osd_lseek(const struct lu_env *env, struct dt_object *dt, + loff_t offset, int whence) +{ + struct osd_object *obj = osd_dt_obj(dt); + uint64_t size = obj->oo_attr.la_size; + uint64_t result = offset; + int rc; + boolean_t hole = whence == SEEK_HOLE; + + ENTRY; + + LASSERT(dt_object_exists(dt)); + LASSERT(osd_invariant(obj)); + + if (offset < 0 || offset >= size) + RETURN(-ENXIO); + +#ifdef HAVE_DMU_OFFSET_NEXT + rc = dmu_offset_next(osd_obj2dev(obj)->od_os, obj->oo_dn->dn_object, + hole, &result); + if (rc == ESRCH) + RETURN(-ENXIO); +#else + /* + * In absence of dmu_offset_next() just do nothing but + * return EBUSY as does dmu_offset_next() and that means + * generic approach should be used. + */ + rc = EBUSY; +#endif + /* file was dirty, so fall back to using generic logic */ + if (rc == EBUSY && hole) + RETURN(-ENXIO); /* see comment below */ + + /* when result is out of file range then it must be virtual hole + * at the end of file, but this is not real file end, so return + * just -ENXIO and LOV will translate it properly. + */ + if (result >= size) + RETURN(-ENXIO); + + RETURN(result); +} + struct dt_body_operations osd_body_ops = { .dbo_read = osd_read, .dbo_declare_write = osd_declare_write, @@ -1192,6 +1236,7 @@ struct dt_body_operations osd_body_ops = { .dbo_ladvise = osd_ladvise, .dbo_declare_fallocate = osd_declare_fallocate, .dbo_fallocate = osd_fallocate, + .dbo_lseek = osd_lseek, }; struct dt_body_operations osd_body_scrub_ops = { -- 1.8.3.1