Whamcloud - gitweb
LU-18595 osd-ldiskfs: use local fake file for iteration 74/57574/5
authorAlexander Boyko <alexander.boyko@hpe.com>
Mon, 23 Dec 2024 11:03:54 +0000 (12:03 +0100)
committerOleg Drokin <green@whamcloud.com>
Wed, 22 Jan 2025 18:41:03 +0000 (18:41 +0000)
LU-13783 introduced the alloc_file_pseudo() for directory iteration.
It leads to many file descriptors for a kernel and a special
logic to drop it LU-16973.

One of the reason of alloc_file_pseudo() was a security_alloc() call.
However we could initialize iteration like kernel and skip
security checks for a pseudo files.

HPE-bug-id: LUS-12253
Fixes: b0f150eba4c2 ("LU-13783 osd-ldiskfs: use alloc_file_pseudo to create fake files")
Signed-off-by: Alexander Boyko <alexander.boyko@hpe.com>
Change-Id: I67d95d84913520b088578923841065e5b1d8b6df
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/57574
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andrew Perepechko <andrew.perepechko@hpe.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: James Simmons <jsimmons@infradead.org>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/autoconf/lustre-core.m4
lustre/osd-ldiskfs/osd_handler.c
lustre/osd-ldiskfs/osd_internal.h
lustre/osd-ldiskfs/osd_scrub.c

index 639835d..f09b100 100644 (file)
@@ -528,6 +528,27 @@ AC_DEFUN([LC_HAVE_LIBAIO], [
 ]) # LC_HAVE_LIBAIO
 
 #
+# LC_FOP_READDIR
+#
+# Kernel v3.10+ lost readdir
+#
+AC_DEFUN([LC_SRC_FOP_READDIR], [
+       LB2_LINUX_TEST_SRC([fop_readdir], [
+               #include <linux/fs.h>
+       ],[
+               struct file_operations fop;
+               fop.readdir = NULL;
+       ])
+])
+AC_DEFUN([LC_FOP_READDIR], [
+       LB2_MSG_LINUX_TEST_RESULT([if 'file_operations' has 'readdir'],
+       [fop_readdir], [
+               AC_DEFINE(HAVE_FOP_READDIR, 1,
+                       [file_operations has readdir])
+       ])
+]) # LC_FOP_READDIR
+
+#
 # LC_INVALIDATE_RANGE
 #
 # 3.11 invalidatepage requires the length of the range to invalidate
@@ -4764,6 +4785,7 @@ AC_DEFUN([LC_PROG_LINUX_SRC], [
        LC_SRC_D_COMPARE_5ARGS
        LC_SRC_HAVE_DCOUNT
        LC_SRC_PID_NS_FOR_CHILDREN
+       LC_SRC_FOP_READDIR
 
        # 3.12
        LC_SRC_OLDSIZE_TRUNCATE_PAGECACHE
@@ -5073,6 +5095,7 @@ AC_DEFUN([LC_PROG_LINUX_RESULTS], [
        LC_HAVE_DENTRY_D_U_D_ALIAS_HLIST
        LC_HAVE_DENTRY_D_CHILD
        LC_PID_NS_FOR_CHILDREN
+       LC_FOP_READDIR
 
        # 3.12
        LC_OLDSIZE_TRUNCATE_PAGECACHE
index 1ead9c0..d467c0b 100644 (file)
@@ -6989,27 +6989,20 @@ struct osd_it_ea *osd_it_dir_init(const struct lu_env *env,
        struct osd_thread_info *info = osd_oti_get(env);
        struct osd_it_ea *oie;
        struct file *file;
+       struct dentry *obj_dentry;
 
        ENTRY;
-       file = osd_alloc_file_pseudo(inode, dev->od_mnt, "/", O_NOATIME,
-                                    inode->i_fop);
-       if (IS_ERR(file))
-               RETURN(ERR_CAST(file));
-
-       /* Only FMODE_64BITHASH or FMODE_32BITHASH should be set, NOT both. */
-       if (attr & LUDA_64BITHASH)
-               file->f_mode |= FMODE_64BITHASH;
-       else
-               file->f_mode |= FMODE_32BITHASH;
-       ihold(inode);
-
-       OBD_SLAB_ALLOC_PTR(oie, osd_itea_cachep);
-       if (!oie)
-               goto out_fput;
-
-       oie->oie_rd_dirent       = 0;
-       oie->oie_it_dirent       = 0;
-       oie->oie_dirent          = NULL;
+       OBD_SLAB_ALLOC_PTR_GFP(oie, osd_itea_cachep, GFP_NOFS);
+       if (oie == NULL)
+               RETURN(ERR_PTR(-ENOMEM));
+       obj_dentry = &oie->oie_dentry;
+       obj_dentry->d_inode = inode;
+       obj_dentry->d_sb = inode->i_sb;
+       obj_dentry->d_name.hash = 0;
+
+       oie->oie_rd_dirent = 0;
+       oie->oie_it_dirent = 0;
+       oie->oie_dirent = NULL;
        if (unlikely(!info->oti_it_ea_buf_used)) {
                oie->oie_buf = info->oti_it_ea_buf;
                info->oti_it_ea_buf_used = 1;
@@ -7019,14 +7012,22 @@ struct osd_it_ea *osd_it_dir_init(const struct lu_env *env,
                        goto out_free;
        }
        oie->oie_obj = NULL;
-       oie->oie_file = file;
+       file = &oie->oie_file;
+       /* Only FMODE_64BITHASH or FMODE_32BITHASH should be set, NOT both. */
+       if (attr & LUDA_64BITHASH)
+               file->f_mode |= FMODE_64BITHASH;
+       else
+               file->f_mode |= FMODE_32BITHASH;
+       file->f_path.dentry = obj_dentry;
+       file->f_flags = O_NOATIME | __FMODE_NONOTIFY;
+       file->f_mapping = inode->i_mapping;
+       file->f_op = inode->i_fop;
+       file->f_inode = inode;
 
        RETURN(oie);
 
 out_free:
        OBD_SLAB_FREE_PTR(oie, osd_itea_cachep);
-out_fput:
-       fput(file);
 
        return ERR_PTR(-ENOMEM);
 }
@@ -7066,11 +7067,12 @@ void osd_it_dir_fini(const struct lu_env *env, struct osd_it_ea *oie,
        struct osd_thread_info *info = osd_oti_get(env);
 
        ENTRY;
-       fput(oie->oie_file);
+       oie->oie_file.f_op->release(inode, &oie->oie_file);
        if (unlikely(oie->oie_buf != info->oti_it_ea_buf))
                OBD_FREE(oie->oie_buf, OSD_IT_EA_BUFSIZE);
        else
                info->oti_it_ea_buf_used = 0;
+
        OBD_SLAB_FREE_PTR(oie, osd_itea_cachep);
        EXIT;
 }
@@ -7109,7 +7111,7 @@ static int osd_it_ea_get(const struct lu_env *env,
 
        ENTRY;
        LASSERT(((const char *)key)[0] == '\0');
-       it->oie_file->f_pos = 0;
+       it->oie_file.f_pos = 0;
        it->oie_rd_dirent = 0;
        it->oie_it_dirent = 0;
        it->oie_dirent = NULL;
@@ -7229,7 +7231,8 @@ int osd_ldiskfs_it_fill(const struct lu_env *env, const struct dt_it *di)
        struct osd_it_ea *it = (struct osd_it_ea *)di;
        struct osd_object *obj = it->oie_obj;
        struct htree_lock *hlock = NULL;
-       struct file *filp = it->oie_file;
+       struct file *filp = &it->oie_file;
+       struct inode *ino = file_inode(filp);
        int rc = 0;
        struct osd_filldir_cbs buf = {
                .ctx.actor = osd_ldiskfs_filldir,
@@ -7251,7 +7254,30 @@ int osd_ldiskfs_it_fill(const struct lu_env *env, const struct dt_it *di)
                }
        }
 
-       rc = iterate_dir(filp, &buf.ctx);
+#ifdef HAVE_FOP_ITERATE_SHARED
+       inode_lock_shared(ino);
+#else
+       inode_lock(ino);
+#endif
+       if (!IS_DEADDIR(ino)) {
+               if (filp->f_op->iterate_shared) {
+                       buf.ctx.pos = filp->f_pos;
+                       rc = filp->f_op->iterate_shared(filp, &buf.ctx);
+                       filp->f_pos = buf.ctx.pos;
+               } else {
+#ifdef HAVE_FOP_READDIR
+                       rc = filp->f_op->readdir(filp, &buf.ctx, buf.ctx.actor);
+                        buf.ctx.pos = filp->f_pos;
+#else
+                       rc = -ENOTDIR;
+#endif
+               }
+       }
+#ifdef HAVE_FOP_ITERATE_SHARED
+       inode_unlock_shared(ino);
+#else
+       inode_unlock(ino);
+#endif
        if (rc)
                GOTO(unlock, rc);
 
@@ -7260,7 +7286,7 @@ int osd_ldiskfs_it_fill(const struct lu_env *env, const struct dt_it *di)
                 * If it does not get any dirent, it means it has been reached
                 * to the end of the dir
                 */
-               it->oie_file->f_pos = ldiskfs_get_htree_eof(it->oie_file);
+               it->oie_file.f_pos = ldiskfs_get_htree_eof(&it->oie_file);
                if (rc == 0)
                        rc = 1;
        } else {
@@ -7304,7 +7330,7 @@ static int osd_it_ea_next(const struct lu_env *env, struct dt_it *di)
                it->oie_it_dirent++;
                rc = 0;
        } else {
-               if (it->oie_file->f_pos == ldiskfs_get_htree_eof(it->oie_file))
+               if (it->oie_file.f_pos == ldiskfs_get_htree_eof(&it->oie_file))
                        rc = 1;
                else
                        rc = osd_ldiskfs_it_fill(env, di);
@@ -7918,7 +7944,7 @@ static int osd_it_ea_load(const struct lu_env *env,
        int rc;
 
        ENTRY;
-       it->oie_file->f_pos = hash;
+       it->oie_file.f_pos = hash;
 
        rc =  osd_ldiskfs_it_fill(env, di);
        if (rc > 0)
index 4c99a4b..6b91eae 100644 (file)
@@ -520,7 +520,7 @@ struct osd_it_ea_dirent {
 struct osd_it_ea {
        struct osd_object       *oie_obj;
        /** used in ldiskfs iterator, to stored file pointer */
-       struct file             *oie_file;
+       struct file             oie_file;
        /** how many entries have been read-cached from storage */
        int                     oie_rd_dirent;
        /** current entry is being iterated by caller */
@@ -529,6 +529,7 @@ struct osd_it_ea {
        struct osd_it_ea_dirent *oie_dirent;
        /** buffer to hold entries, size == OSD_IT_EA_BUFSIZE */
        void                    *oie_buf;
+       struct dentry           oie_dentry;
 };
 
 /**
index 12609ab..0ff58e1 100644 (file)
@@ -3032,7 +3032,7 @@ static int osd_scan_dir(const struct lu_env *env, struct osd_device *dev,
        if (IS_ERR(oie))
                RETURN(PTR_ERR(oie));
 
-       oie->oie_file->f_pos = 0;
+       oie->oie_file.f_pos = 0;
        rc = osd_ldiskfs_it_fill(env, (struct dt_it *)oie);
        if (rc > 0)
                rc = -ENODATA;
@@ -3052,8 +3052,8 @@ static int osd_scan_dir(const struct lu_env *env, struct osd_device *dev,
                if (oie->oie_it_dirent <= oie->oie_rd_dirent)
                        continue;
 
-               if (oie->oie_file->f_pos ==
-                   ldiskfs_get_htree_eof(oie->oie_file))
+               if (oie->oie_file.f_pos ==
+                   ldiskfs_get_htree_eof(&oie->oie_file))
                        break;
 
                rc = osd_ldiskfs_it_fill(env, (struct dt_it *)oie);