Whamcloud - gitweb
LU-10810 osd: implement lseek method in OSD 06/39706/4
authorMikhail Pershin <mpershin@whamcloud.com>
Mon, 17 Aug 2020 16:36:37 +0000 (19:36 +0300)
committerOleg Drokin <green@whamcloud.com>
Sat, 12 Sep 2020 15:45:50 +0000 (15:45 +0000)
Introduce lseek support in dt_object methods and at OSD level

This is server support for SEEK_HOLE and SEEK_DATA distributed
request, it returns a resulting offset or error code

ZFS support is added as well but it is not yet available due to
missing export of dmu_offset_next() function.

Patch adds also osd_quasi_file() common method to create fake
file structure to use in inode i_fop calls

Signed-off-by: Mikhail Pershin <mpershin@whamcloud.com>
Change-Id: I3802743e31c94d2de219b72a540c5df9c72f1897
Reviewed-on: https://review.whamcloud.com/39706
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
config/lustre-build-zfs.m4
lustre/include/dt_object.h
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_io.c
lustre/osd-ldiskfs/osd_scrub.c
lustre/osd-zfs/osd_io.c

index 8390f92..873ad8a 100644 (file)
@@ -736,6 +736,19 @@ your distribution.
                        AC_DEFINE(HAVE_DMU_OBJSET_DISOWN_3ARG, 1,
                                [Have dmu_objset_disown() with 3 args])
                ])
                        AC_DEFINE(HAVE_DMU_OBJSET_DISOWN_3ARG, 1,
                                [Have dmu_objset_disown() with 3 args])
                ])
+               dnl #
+               dnl # ZFS exports dmu_offet_next
+               dnl #
+               AC_CACHE_CHECK([if ZFS exports 'dmu_offset_next'],
+               [lb_cv_dmu_offset_next], [
+               lb_cv_dmu_offset_next="no"
+               AS_IF([grep -q -E "EXPORT_SYMBOL.*\(dmu_offset_next\)" "$zfssrc/module/zfs/dmu.c" 2>/dev/null],
+                       [lb_cv_dmu_offset_next="yes"])
+               ])
+               AS_IF([test "x$lb_cv_dmu_offset_next" = "xyes"], [
+                       AC_DEFINE(HAVE_DMU_OFFSET_NEXT, 1,
+                               [Have dmu_offset_next() exported])
+               ])
        ])
 
        AS_IF([test "x$enable_zfs" = xyes], [
        ])
 
        AS_IF([test "x$enable_zfs" = xyes], [
index fe778cf..f260afb 100644 (file)
@@ -1457,6 +1457,19 @@ struct dt_body_operations {
                            __u64 end,
                            int mode,
                            struct thandle *th);
                            __u64 end,
                            int mode,
                            struct thandle *th);
+       /**
+        * Do SEEK_HOLE/SEEK_DATA request on object
+        *
+        * \param[in] env       execution environment for this thread
+        * \param[in] dt        object
+        * \param[in] offset    the offset to start seek from
+        * \param[in] whence    seek mode, SEEK_HOLE or SEEK_DATA
+        *
+        * \retval hole/data offset     on success
+        * \retval negative             negated errno on error
+        */
+       loff_t (*dbo_lseek)(const struct lu_env *env, struct dt_object *dt,
+                           loff_t offset, int whence);
 };
 
 /**
 };
 
 /**
@@ -2607,6 +2620,17 @@ static inline int dt_fiemap_get(const struct lu_env *env, struct dt_object *d,
         return d->do_body_ops->dbo_fiemap_get(env, d, fm);
 }
 
         return d->do_body_ops->dbo_fiemap_get(env, d, fm);
 }
 
+static inline loff_t dt_lseek(const struct lu_env *env, struct dt_object *d,
+                             loff_t offset, int whence)
+{
+       LASSERT(d);
+       if (d->do_body_ops == NULL)
+               return -EPROTO;
+       if (d->do_body_ops->dbo_lseek == NULL)
+               return -EOPNOTSUPP;
+       return d->do_body_ops->dbo_lseek(env, d, offset, whence);
+}
+
 static inline int dt_statfs_info(const struct lu_env *env,
                                 struct dt_device *dev,
                                struct obd_statfs *osfs,
 static inline int dt_statfs_info(const struct lu_env *env,
                                 struct dt_device *dev,
                                struct obd_statfs *osfs,
index ce4834a..3696908 100644 (file)
@@ -957,8 +957,7 @@ static int osd_check_lmv(struct osd_thread_info *oti, struct osd_device *dev,
 {
        struct lu_buf *buf = &oti->oti_big_buf;
        struct dentry *dentry = &oti->oti_obj_dentry;
 {
        struct lu_buf *buf = &oti->oti_big_buf;
        struct dentry *dentry = &oti->oti_obj_dentry;
-       struct file *filp = &oti->oti_file;
-       const struct file_operations *fops;
+       struct file *filp;
        struct lmv_mds_md_v1 *lmv1;
        struct osd_check_lmv_buf oclb = {
                .ctx.actor = osd_stripe_dir_filldir,
        struct lmv_mds_md_v1 *lmv1;
        struct osd_check_lmv_buf oclb = {
                .ctx.actor = osd_stripe_dir_filldir,
@@ -1003,18 +1002,7 @@ again:
        if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
                GOTO(out, rc = 0);
 
        if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
                GOTO(out, rc = 0);
 
-       fops = inode->i_fop;
-       dentry->d_inode = inode;
-       dentry->d_sb = inode->i_sb;
-       filp->f_pos = 0;
-       filp->f_path.dentry = dentry;
-       filp->f_flags |= O_NOATIME;
-       filp->f_mode = FMODE_64BITHASH | FMODE_NONOTIFY;
-       filp->f_mapping = inode->i_mapping;
-       filp->f_op = fops;
-       filp->private_data = NULL;
-       filp->f_cred = current_cred();
-       filp->f_inode = inode;
+       filp = osd_quasi_file(oti->oti_env, inode);
        rc = osd_security_file_alloc(filp);
        if (rc)
                goto out;
        rc = osd_security_file_alloc(filp);
        if (rc)
                goto out;
@@ -1024,7 +1012,7 @@ again:
                rc = iterate_dir(filp, &oclb.ctx);
        } while (rc >= 0 && oclb.oclb_items > 0 && !oclb.oclb_found &&
                 filp->f_pos != LDISKFS_HTREE_EOF_64BIT);
                rc = iterate_dir(filp, &oclb.ctx);
        } while (rc >= 0 && oclb.oclb_items > 0 && !oclb.oclb_found &&
                 filp->f_pos != LDISKFS_HTREE_EOF_64BIT);
-       fops->release(inode, filp);
+       inode->i_fop->release(inode, filp);
 
 out:
        if (rc < 0)
 
 out:
        if (rc < 0)
@@ -4830,20 +4818,11 @@ static int osd_object_sync(const struct lu_env *env, struct dt_object *dt,
 {
        struct osd_object *obj = osd_dt_obj(dt);
        struct inode *inode = obj->oo_inode;
 {
        struct osd_object *obj = osd_dt_obj(dt);
        struct inode *inode = obj->oo_inode;
-       struct osd_thread_info *info = osd_oti_get(env);
-       struct dentry *dentry = &info->oti_obj_dentry;
-       struct file *file = &info->oti_file;
+       struct file *file = osd_quasi_file(env, inode);
        int rc;
 
        ENTRY;
 
        int rc;
 
        ENTRY;
 
-       dentry->d_inode = inode;
-       dentry->d_sb = inode->i_sb;
-       file->f_path.dentry = dentry;
-       file->f_mapping = inode->i_mapping;
-       file->f_op = inode->i_fop;
-       file->f_inode = inode;
-
        rc = vfs_fsync_range(file, start, end, 0);
 
        RETURN(rc);
        rc = vfs_fsync_range(file, start, end, 0);
 
        RETURN(rc);
index 7dc4bde..2ff5711 100644 (file)
@@ -1153,6 +1153,43 @@ struct dentry *osd_child_dentry_by_inode(const struct lu_env *env,
         return child_dentry;
 }
 
         return child_dentry;
 }
 
+/* build quasi file structure when it is needed to call an inode i_fop */
+static inline struct file *osd_quasi_file_init(const struct lu_env *env,
+                                              struct dentry *dentry,
+                                              struct inode *inode)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+
+       info->oti_file.f_path.dentry = dentry;
+       info->oti_file.f_mapping = inode->i_mapping;
+       info->oti_file.f_op = inode->i_fop;
+       info->oti_file.f_inode = inode;
+       info->oti_file.f_pos = 0;
+       info->oti_file.private_data = NULL;
+       info->oti_file.f_cred = current_cred();
+       info->oti_file.f_flags = O_NOATIME;
+       info->oti_file.f_mode = FMODE_64BITHASH | FMODE_NONOTIFY;
+
+       return &info->oti_file;
+}
+
+static inline struct file *osd_quasi_file(const struct lu_env *env,
+                                         struct inode *inode)
+{
+       struct osd_thread_info *info = osd_oti_get(env);
+
+       info->oti_obj_dentry.d_inode = inode;
+       info->oti_obj_dentry.d_sb = inode->i_sb;
+
+       return osd_quasi_file_init(env, &info->oti_obj_dentry, inode);
+}
+
+static inline struct file *osd_quasi_file_by_dentry(const struct lu_env *env,
+                                                   struct dentry *dentry)
+{
+       return osd_quasi_file_init(env, dentry, dentry->d_inode);
+}
+
 extern int osd_trans_declare_op2rb[];
 extern int ldiskfs_track_declares_assert;
 void osd_trans_dump_creds(const struct lu_env *env, struct thandle *th);
 extern int osd_trans_declare_op2rb[];
 extern int ldiskfs_track_declares_assert;
 void osd_trans_dump_creds(const struct lu_env *env, struct thandle *th);
index ab48eb8..78f88e6 100644 (file)
@@ -1909,10 +1909,8 @@ static int osd_fallocate(const struct lu_env *env, struct dt_object *dt,
 {
        struct osd_object *obj = osd_dt_obj(dt);
        struct inode *inode = obj->oo_inode;
 {
        struct osd_object *obj = osd_dt_obj(dt);
        struct inode *inode = obj->oo_inode;
+       struct file *file;
        int rc = 0;
        int rc = 0;
-       struct osd_thread_info *info = osd_oti_get(env);
-       struct dentry *dentry = &info->oti_obj_dentry;
-       struct file *file = &info->oti_file;
 
        ENTRY;
        /*
 
        ENTRY;
        /*
@@ -1934,12 +1932,7 @@ static int osd_fallocate(const struct lu_env *env, struct dt_object *dt,
        /*
         * Because f_op->fallocate() does not have an inode arg
         */
        /*
         * Because f_op->fallocate() does not have an inode arg
         */
-       dentry->d_inode = inode;
-       dentry->d_sb = inode->i_sb;
-       file->f_path.dentry = dentry;
-       file->f_mapping = inode->i_mapping;
-       file->f_op = inode->i_fop;
-       file->f_inode = inode;
+       file = osd_quasi_file(env, inode);
        rc = file->f_op->fallocate(file, mode, start, end - start);
 
        RETURN(rc);
        rc = file->f_op->fallocate(file, mode, start, end - start);
 
        RETURN(rc);
@@ -2154,6 +2147,34 @@ static int osd_ladvise(const struct lu_env *env, struct dt_object *dt,
        RETURN(rc);
 }
 
        RETURN(rc);
 }
 
+static loff_t osd_lseek(const struct lu_env *env, struct dt_object *dt,
+                       loff_t offset, int whence)
+{
+       struct osd_object *obj = osd_dt_obj(dt);
+       struct inode *inode = obj->oo_inode;
+       struct file *file;
+       loff_t result;
+
+       ENTRY;
+
+       LASSERT(dt_object_exists(dt));
+       LASSERT(osd_invariant(obj));
+       LASSERT(inode);
+
+       file = osd_quasi_file(env, inode);
+       result = file->f_op->llseek(file, offset, whence);
+       /* when result is out of file range then it must be virtual hole
+        * at the end of file, but this is not real file end, so return
+        * just -ENXIO and LOV will merge all results
+        */
+       if (result == i_size_read(inode))
+               result = -ENXIO;
+
+       CDEBUG(D_INFO, "seek %s from %lld: %lld\n", whence == SEEK_HOLE ?
+                      "hole" : "data", offset, result);
+       RETURN(result);
+}
+
 /*
  * in some cases we may need declare methods for objects being created
  * e.g., when we create symlink
 /*
  * in some cases we may need declare methods for objects being created
  * e.g., when we create symlink
@@ -2178,6 +2199,7 @@ const struct dt_body_operations osd_body_ops = {
        .dbo_ladvise                    = osd_ladvise,
        .dbo_declare_fallocate          = osd_declare_fallocate,
        .dbo_fallocate                  = osd_fallocate,
        .dbo_ladvise                    = osd_ladvise,
        .dbo_declare_fallocate          = osd_declare_fallocate,
        .dbo_fallocate                  = osd_fallocate,
+       .dbo_lseek                      = osd_lseek,
 };
 
 /**
 };
 
 /**
index dd6f29f..8cc3d38 100644 (file)
@@ -2249,28 +2249,21 @@ static int
 osd_ios_general_scan(struct osd_thread_info *info, struct osd_device *dev,
                     struct dentry *dentry, filldir_t filldir)
 {
 osd_ios_general_scan(struct osd_thread_info *info, struct osd_device *dev,
                     struct dentry *dentry, filldir_t filldir)
 {
-       struct osd_ios_filldir_buf    buf   = {
-                                               .ctx.actor = filldir,
-                                               .oifb_info = info,
-                                               .oifb_dev = dev,
-                                               .oifb_dentry = dentry };
-       struct file                  *filp  = &info->oti_file;
-       struct inode                 *inode = dentry->d_inode;
-       const struct file_operations *fops  = inode->i_fop;
-       int                           rc;
+       struct osd_ios_filldir_buf buf = {
+               .ctx.actor = filldir,
+               .oifb_info = info,
+               .oifb_dev = dev,
+               .oifb_dentry = dentry
+       };
+       struct file *filp;
+       struct inode *inode = dentry->d_inode;
+       int rc;
+
        ENTRY;
 
        LASSERT(filldir != NULL);
 
        ENTRY;
 
        LASSERT(filldir != NULL);
 
-       filp->f_pos = 0;
-       filp->f_path.dentry = dentry;
-       filp->f_flags |= O_NOATIME;
-       filp->f_mode = FMODE_64BITHASH | FMODE_NONOTIFY;
-       filp->f_mapping = inode->i_mapping;
-       filp->f_op = fops;
-       filp->private_data = NULL;
-       filp->f_cred = current_cred();
-       filp->f_inode = inode;
+       filp = osd_quasi_file_by_dentry(info->oti_env, dentry);
        rc = osd_security_file_alloc(filp);
        if (rc)
                RETURN(rc);
        rc = osd_security_file_alloc(filp);
        if (rc)
                RETURN(rc);
@@ -2280,7 +2273,7 @@ osd_ios_general_scan(struct osd_thread_info *info, struct osd_device *dev,
                rc = iterate_dir(filp, &buf.ctx);
        } while (rc >= 0 && buf.oifb_items > 0 &&
                 filp->f_pos != LDISKFS_HTREE_EOF_64BIT);
                rc = iterate_dir(filp, &buf.ctx);
        } while (rc >= 0 && buf.oifb_items > 0 &&
                 filp->f_pos != LDISKFS_HTREE_EOF_64BIT);
-       fops->release(inode, filp);
+       inode->i_fop->release(inode, filp);
 
        RETURN(rc);
 }
 
        RETURN(rc);
 }
index b270228..063607f 100644 (file)
@@ -1177,6 +1177,50 @@ static int osd_declare_fallocate(const struct lu_env *env,
        RETURN(rc);
 }
 
        RETURN(rc);
 }
 
+static loff_t osd_lseek(const struct lu_env *env, struct dt_object *dt,
+                       loff_t offset, int whence)
+{
+       struct osd_object *obj = osd_dt_obj(dt);
+       uint64_t size = obj->oo_attr.la_size;
+       uint64_t result = offset;
+       int rc;
+       boolean_t hole = whence == SEEK_HOLE;
+
+       ENTRY;
+
+       LASSERT(dt_object_exists(dt));
+       LASSERT(osd_invariant(obj));
+
+       if (offset < 0 || offset >= size)
+               RETURN(-ENXIO);
+
+#ifdef HAVE_DMU_OFFSET_NEXT
+       rc = dmu_offset_next(osd_obj2dev(obj)->od_os, obj->oo_dn->dn_object,
+                            hole, &result);
+       if (rc == ESRCH)
+               RETURN(-ENXIO);
+#else
+       /*
+        * In absence of dmu_offset_next() just do nothing but
+        * return EBUSY as does dmu_offset_next() and that means
+        * generic approach should be used.
+        */
+       rc = EBUSY;
+#endif
+       /* file was dirty, so fall back to using generic logic */
+       if (rc == EBUSY && hole)
+               RETURN(-ENXIO); /* see comment below */
+
+       /* when result is out of file range then it must be virtual hole
+        * at the end of file, but this is not real file end, so return
+        * just -ENXIO and LOV will translate it properly.
+        */
+       if (result >= size)
+               RETURN(-ENXIO);
+
+       RETURN(result);
+}
+
 struct dt_body_operations osd_body_ops = {
        .dbo_read                       = osd_read,
        .dbo_declare_write              = osd_declare_write,
 struct dt_body_operations osd_body_ops = {
        .dbo_read                       = osd_read,
        .dbo_declare_write              = osd_declare_write,
@@ -1192,6 +1236,7 @@ struct dt_body_operations osd_body_ops = {
        .dbo_ladvise                    = osd_ladvise,
        .dbo_declare_fallocate          = osd_declare_fallocate,
        .dbo_fallocate                  = osd_fallocate,
        .dbo_ladvise                    = osd_ladvise,
        .dbo_declare_fallocate          = osd_declare_fallocate,
        .dbo_fallocate                  = osd_fallocate,
+       .dbo_lseek                      = osd_lseek,
 };
 
 struct dt_body_operations osd_body_scrub_ops = {
 };
 
 struct dt_body_operations osd_body_scrub_ops = {