Whamcloud - gitweb
LU-15913 mdt: disable parallel rename for striped dirs 93/47593/9
authorAndreas Dilger <adilger@whamcloud.com>
Sat, 11 Jun 2022 01:47:00 +0000 (19:47 -0600)
committerOleg Drokin <green@whamcloud.com>
Mon, 18 Jul 2022 05:33:48 +0000 (05:33 +0000)
Parallel rename should not be done within striped directories to
avoid remote updates.  These are like cross-directory renames.

Add tunables for parallel directory rename in case of problems.
These can be configured separately for files and directories.

    mdt.*.enable_parallel_rename_dir
    mdt.*.enable_parallel_rename_file

Fixes: 90979ab390 ("LU-12125 mds: allow parallel directory rename")
Fixes: d76cc65d5d ("LU-12125 mds: allow parallel regular file rename")
Signed-off-by: Andreas Dilger <adilger@whamcloud.com>
Change-Id: I384976cd1c9f401169336ee7a479ba0e3dd9f4ee
Reviewed-on: https://review.whamcloud.com/47593
Reviewed-by: Artem Blagodarenko <ablagodarenko@ddn.com>
Reviewed-by: Lai Siyao <lai.siyao@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
lustre/include/lprocfs_status.h
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_lproc.c
lustre/mdt/mdt_reint.c

index 85fab90..64c56e2 100644 (file)
@@ -107,18 +107,6 @@ struct obd_hist_pcpu {
        bool                    oh_initialized;
 };
 
-enum {
-        RENAME_SAMEDIR_SIZE = 0,
-        RENAME_CROSSDIR_SRC_SIZE,
-        RENAME_CROSSDIR_TGT_SIZE,
-        RENAME_LAST,
-};
-
-struct rename_stats {
-       ktime_t                 rs_init;
-       struct obd_histogram    rs_hist[RENAME_LAST];
-};
-
 /* An lprocfs counter can be configured using the enum bit masks below.
  *
  * LPROCFS_CNTR_EXTERNALLOCK indicates that an external lock already
index 8b62753..3c5ebd5 100644 (file)
@@ -5995,16 +5995,19 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        INIT_LIST_HEAD(&m->mdt_squash.rsi_nosquash_nids);
        spin_lock_init(&m->mdt_squash.rsi_lock);
        spin_lock_init(&m->mdt_lock);
-       m->mdt_enable_remote_dir = 1;
-       m->mdt_enable_striped_dir = 1;
+       m->mdt_enable_chprojid_gid = 0;
        m->mdt_enable_dir_migration = 1;
        m->mdt_enable_dir_restripe = 0;
        m->mdt_enable_dir_auto_split = 0;
+       m->mdt_enable_parallel_rename_dir = 1;
+       m->mdt_enable_parallel_rename_file = 1;
+       m->mdt_enable_remote_dir = 1;
        m->mdt_enable_remote_dir_gid = 0;
-       m->mdt_enable_chprojid_gid = 0;
        m->mdt_enable_remote_rename = 1;
-       m->mdt_dir_restripe_nsonly = 1;
        m->mdt_enable_remote_subdir_mount = 1;
+       m->mdt_enable_striped_dir = 1;
+       m->mdt_dir_restripe_nsonly = 1;
+       m->mdt_rename_stats.rs_init = ktime_get();
 
        atomic_set(&m->mdt_mds_mds_conns, 0);
        atomic_set(&m->mdt_async_commit_count, 0);
index a5a9178..8715ec3 100644 (file)
@@ -205,6 +205,18 @@ struct mdt_statfs_cache {
        __u64 msf_age;
 };
 
+enum mdt_rename_type {
+       RENAME_SAMEDIR_SIZE = 0,
+       RENAME_CROSSDIR_SRC_SIZE,
+       RENAME_CROSSDIR_TGT_SIZE,
+       RENAME_LAST
+};
+
+struct rename_stats {
+       ktime_t                 rs_init;
+       struct obd_histogram    rs_hist[RENAME_LAST];
+};
+
 /* split directory automatically when sub file count exceeds 50k */
 #define DIR_SPLIT_COUNT_DEFAULT        50000
 
@@ -262,35 +274,37 @@ struct mdt_device {
                                   mo_dom_read_open:1,
                                   mo_migrate_hsm_allowed:1,
                                   mo_enable_strict_som:1;
-               unsigned int       mo_dom_lock;
+               unsigned int       mo_dom_lock;
        } mdt_opts;
-        /* mdt state flags */
-        unsigned long              mdt_state;
+       /* mdt state flags */
+       unsigned long              mdt_state;
 
-        /* transaction callbacks */
-        struct dt_txn_callback     mdt_txn_cb;
+       /* transaction callbacks */
+       struct dt_txn_callback     mdt_txn_cb;
 
-        /* these values should be updated from lov if necessary.
-         * or should be placed somewhere else. */
-        int                        mdt_max_mdsize;
+       /* these values should be updated from lov if necessary.
+        * or should be placed somewhere else. */
+       int                        mdt_max_mdsize;
 
        int                        mdt_max_ea_size;
 
        /* preferred BRW size, decided by storage type and capability */
        __u32                      mdt_brw_size;
 
-        struct upcall_cache        *mdt_identity_cache;
+       struct upcall_cache       *mdt_identity_cache;
 
-       unsigned int               mdt_capa_conf:1,
+       unsigned int               mdt_capa_conf:1,
                                   /* Enable remote dir on non-MDT0 */
-                                  mdt_enable_remote_dir:1,
-                                  mdt_enable_striped_dir:1,
                                   mdt_enable_dir_migration:1,
                                   mdt_enable_dir_restripe:1,
                                   mdt_enable_dir_auto_split:1,
+                                  mdt_enable_parallel_rename_dir:1,
+                                  mdt_enable_parallel_rename_file:1,
+                                  mdt_enable_remote_dir:1,
                                   mdt_enable_remote_rename:1,
-                                  mdt_skip_lfsck:1,
+                                  mdt_enable_striped_dir:1,
                                   mdt_readonly:1,
+                                  mdt_skip_lfsck:1,
                                   /* dir restripe migrate dirent only */
                                   mdt_dir_restripe_nsonly:1,
                                   /* subdirectory mount of remote dir */
@@ -310,9 +324,9 @@ struct mdt_device {
        struct mdt_statfs_cache    mdt_osfs;
 
         /* root squash */
-       struct root_squash_info    mdt_squash;
+       struct root_squash_info    mdt_squash;
 
-        struct rename_stats        mdt_rename_stats;
+       struct rename_stats        mdt_rename_stats;
        struct lu_fid              mdt_md_root_fid;
 
        /* connection to quota master */
@@ -1298,8 +1312,10 @@ enum mdt_stat_idx {
         LPROC_MDT_SETXATTR,
         LPROC_MDT_STATFS,
         LPROC_MDT_SYNC,
-       LPROC_MDT_SAMEDIR_RENAME,
-       LPROC_MDT_CROSSDIR_RENAME,
+       LPROC_MDT_RENAME_SAMEDIR,
+       LPROC_MDT_RENAME_PAR_FILE,
+       LPROC_MDT_RENAME_PAR_DIR,
+       LPROC_MDT_RENAME_CROSSDIR,
        LPROC_MDT_IO_READ,
        LPROC_MDT_IO_WRITE,
        LPROC_MDT_IO_READ_BYTES,
@@ -1322,7 +1338,7 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info,
                              struct mdt_device *mdt,
                              struct ptlrpc_request *req,
                              struct mdt_object *src, struct mdt_object *tgt,
-                             long count);
+                             enum mdt_stat_idx msi, s64 count);
 
 static inline struct obd_device *mdt2obd_dev(const struct mdt_device *mdt)
 {
index 78fb699..6dfc117 100644 (file)
@@ -164,13 +164,15 @@ static int lproc_mdt_attach_rename_seqstat(struct mdt_device *mdt)
 void mdt_rename_counter_tally(struct mdt_thread_info *info,
                              struct mdt_device *mdt,
                              struct ptlrpc_request *req,
-                             struct mdt_object *src,
-                             struct mdt_object *tgt, long count)
+                             struct mdt_object *src, struct mdt_object *tgt,
+                             enum mdt_stat_idx msi, s64 ktime_delta)
 {
        struct md_attr *ma = &info->mti_attr;
        struct rename_stats *rstats = &mdt->mdt_rename_stats;
        int rc;
 
+       mdt_counter_incr(req, LPROC_MDT_RENAME, ktime_delta);
+
        ma->ma_need = MA_INODE;
        ma->ma_valid = 0;
        rc = mo_attr_get(info->mti_env, mdt_object_child(src), ma);
@@ -181,13 +183,15 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info,
        }
 
        if (src == tgt) {
-               mdt_counter_incr(req, LPROC_MDT_SAMEDIR_RENAME, count);
+               mdt_counter_incr(req, LPROC_MDT_RENAME_SAMEDIR, ktime_delta);
+               if (msi) /* parallel rename type */
+                       mdt_counter_incr(req, msi, ktime_delta);
                lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_SAMEDIR_SIZE],
                                      (unsigned int)ma->ma_attr.la_size);
                return;
        }
 
-       mdt_counter_incr(req, LPROC_MDT_CROSSDIR_RENAME, count);
+       mdt_counter_incr(req, LPROC_MDT_RENAME_CROSSDIR, ktime_delta);
        lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_CROSSDIR_SRC_SIZE],
                              (unsigned int)ma->ma_attr.la_size);
 
@@ -708,6 +712,72 @@ static ssize_t enable_chprojid_gid_store(struct kobject *kobj,
 }
 LUSTRE_RW_ATTR(enable_chprojid_gid);
 
+static ssize_t enable_parallel_rename_dir_show(struct kobject *kobj,
+                                              struct attribute *attr,
+                                              char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        mdt->mdt_enable_parallel_rename_dir);
+}
+
+static ssize_t enable_parallel_rename_dir_store(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               const char *buffer,
+                                               size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
+
+       mdt->mdt_enable_parallel_rename_dir = val;
+
+       return count;
+}
+LUSTRE_RW_ATTR(enable_parallel_rename_dir);
+
+static ssize_t enable_parallel_rename_file_show(struct kobject *kobj,
+                                               struct attribute *attr,
+                                               char *buf)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+
+       return scnprintf(buf, PAGE_SIZE, "%u\n",
+                        mdt->mdt_enable_parallel_rename_file);
+}
+
+static ssize_t enable_parallel_rename_file_store(struct kobject *kobj,
+                                                struct attribute *attr,
+                                                const char *buffer,
+                                                size_t count)
+{
+       struct obd_device *obd = container_of(kobj, struct obd_device,
+                                             obd_kset.kobj);
+       struct mdt_device *mdt = mdt_dev(obd->obd_lu_dev);
+       bool val;
+       int rc;
+
+       rc = kstrtobool(buffer, &val);
+       if (rc)
+               return rc;
+
+       mdt->mdt_enable_parallel_rename_file = val;
+
+       return count;
+}
+LUSTRE_RW_ATTR(enable_parallel_rename_file);
+
 static ssize_t enable_striped_dir_show(struct kobject *kobj,
                                       struct attribute *attr, char *buf)
 {
@@ -1451,14 +1521,16 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_identity_upcall.attr,
        &lustre_attr_identity_flush.attr,
        &lustre_attr_evict_tgt_nids.attr,
-       &lustre_attr_enable_remote_dir.attr,
-       &lustre_attr_enable_remote_dir_gid.attr,
        &lustre_attr_enable_chprojid_gid.attr,
-       &lustre_attr_enable_striped_dir.attr,
        &lustre_attr_enable_dir_migration.attr,
        &lustre_attr_enable_dir_restripe.attr,
        &lustre_attr_enable_dir_auto_split.attr,
+       &lustre_attr_enable_parallel_rename_dir.attr,
+       &lustre_attr_enable_parallel_rename_file.attr,
+       &lustre_attr_enable_remote_dir.attr,
+       &lustre_attr_enable_remote_dir_gid.attr,
        &lustre_attr_enable_remote_rename.attr,
+       &lustre_attr_enable_striped_dir.attr,
        &lustre_attr_commit_on_sharing.attr,
        &lustre_attr_local_recovery.attr,
        &lustre_attr_async_commit_count.attr,
@@ -1575,8 +1647,10 @@ static const char * const mdt_stats[] = {
        [LPROC_MDT_SETXATTR]            = "setxattr",
        [LPROC_MDT_STATFS]              = "statfs",
        [LPROC_MDT_SYNC]                = "sync",
-       [LPROC_MDT_SAMEDIR_RENAME]      = "samedir_rename",
-       [LPROC_MDT_CROSSDIR_RENAME]     = "crossdir_rename",
+       [LPROC_MDT_RENAME_SAMEDIR]      = "samedir_rename",
+       [LPROC_MDT_RENAME_PAR_FILE]     = "parallel_rename_file",
+       [LPROC_MDT_RENAME_PAR_DIR]      = "parallel_rename_dir",
+       [LPROC_MDT_RENAME_CROSSDIR]     = "crossdir_rename",
        [LPROC_MDT_IO_READ_BYTES]       = "read_bytes",
        [LPROC_MDT_IO_WRITE_BYTES]      = "write_bytes",
        [LPROC_MDT_IO_READ]             = "read",
index 3f1eca6..a60f1d1 100644 (file)
@@ -2694,6 +2694,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
        bool reverse = false, discard = false;
        bool cos_incompat;
        ktime_t kstart = ktime_get();
+       enum mdt_stat_idx msi = 0;
        int rc;
 
        ENTRY;
@@ -2739,6 +2740,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
         * get rename lock, which will cause deadlock.
         */
        if (!req_is_replay(req)) {
+               bool remote = mdt_object_remote(msrcdir);
+
                /*
                 * Normally rename RPC is handled on the MDT with the target
                 * directory (if target exists, it's on the MDT with the
@@ -2747,16 +2750,21 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
                 * cause any issue), return -EXDEV early to avoid taking
                 * rename_lock.
                 */
-               if (!mdt->mdt_enable_remote_rename &&
-                   mdt_object_remote(msrcdir))
+               if (!mdt->mdt_enable_remote_rename && remote)
                        GOTO(out_put_tgtdir, rc = -EXDEV);
 
                /* This might be further relaxed in the future for regular file
                 * renames in different source and target parents. Start with
                 * only same-directory renames for simplicity and because this
                 * is by far the most the common use case.
+                *
+                * Striped directories should be considered "remote".
                 */
-               if (msrcdir != mtgtdir) {
+               if (msrcdir != mtgtdir || remote ||
+                   (S_ISDIR(ma->ma_attr.la_mode) &&
+                    !mdt->mdt_enable_parallel_rename_dir) ||
+                   (!S_ISDIR(ma->ma_attr.la_mode) &&
+                    !mdt->mdt_enable_parallel_rename_file)) {
                        rc = mdt_rename_lock(info, &rename_lh);
                        if (rc != 0) {
                                CERROR("%s: cannot lock for rename: rc = %d\n",
@@ -2764,7 +2772,13 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
                                GOTO(out_put_tgtdir, rc);
                        }
                } else {
-                       CDEBUG(D_INFO, "%s: samedir rename "DFID"/"DNAME"\n",
+                       if (S_ISDIR(ma->ma_attr.la_mode))
+                               msi = LPROC_MDT_RENAME_PAR_DIR;
+                       else
+                               msi = LPROC_MDT_RENAME_PAR_FILE;
+
+                       CDEBUG(D_INFO,
+                              "%s: samedir parallel rename "DFID"/"DNAME"\n",
                               mdt_obd_name(mdt), PFID(rr->rr_fid1),
                               PNAME(&rr->rr_name));
                }
@@ -3004,14 +3018,12 @@ relock:
 
        /* handle last link of tgt object */
        if (rc == 0) {
-               mdt_counter_incr(req, LPROC_MDT_RENAME,
-                                ktime_us_delta(ktime_get(), kstart));
                if (mnew) {
                        mdt_handle_last_unlink(info, mnew, ma);
                        discard = mdt_dom_check_for_discard(info, mnew);
                }
                mdt_rename_counter_tally(info, info->mti_mdt, req,
-                                        msrcdir, mtgtdir,
+                                        msrcdir, mtgtdir, msi,
                                         ktime_us_delta(ktime_get(), kstart));
        }