Whamcloud - gitweb
LU-17426 mdt: relax same MDT file rename lock 26/53726/5
authorLai Siyao <lai.siyao@whamcloud.com>
Tue, 16 Jan 2024 01:33:22 +0000 (20:33 -0500)
committerOleg Drokin <green@whamcloud.com>
Sun, 4 Feb 2024 08:26:16 +0000 (08:26 +0000)
Allow cross-directory rename of regular files (strictly, any
non-directory) on the same MDT without holding the BigFilesystemLock
(BFL), as file renames cannot change the directory hierarchy.

This should improve the performance for these rename operations, and
reduce contention between local MDT file renames in different parts of
the directory tree.

Add "mdt.*.enable_parallel_rename_crossdir" parameter to disable
cross-directory file renames if there is an issue with this change.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I511b392e46c46140cac6aa3ede02bfe793729f7f
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53726
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_lproc.c
lustre/mdt/mdt_reint.c

index 1d33c99..3a2a3a2 100644 (file)
@@ -6208,6 +6208,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        m->mdt_enable_dir_auto_split = 0;
        m->mdt_enable_parallel_rename_dir = 1;
        m->mdt_enable_parallel_rename_file = 1;
+       m->mdt_enable_parallel_rename_crossdir = 1;
        m->mdt_enable_remote_dir = 1;
        m->mdt_enable_remote_dir_gid = 0;
        m->mdt_enable_remote_rename = 1;
index 22efed4..c92ae96 100644 (file)
@@ -303,6 +303,7 @@ struct mdt_device {
                                   mdt_enable_dir_auto_split:1,
                                   mdt_enable_parallel_rename_dir:1,
                                   mdt_enable_parallel_rename_file:1,
+                                  mdt_enable_parallel_rename_crossdir:1,
                                   mdt_enable_remote_dir:1,
                                   mdt_enable_remote_rename:1,
                                   mdt_enable_striped_dir:1,
index c6108eb..48b6ae8 100644 (file)
@@ -178,10 +178,11 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info,
                return;
        }
 
+       if (msi) /* parallel rename type */
+               mdt_counter_incr(req, msi, ktime_delta);
+
        if (src == tgt) {
                mdt_counter_incr(req, LPROC_MDT_RENAME_SAMEDIR, ktime_delta);
-               if (msi) /* parallel rename type */
-                       mdt_counter_incr(req, msi, ktime_delta);
                lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_SAMEDIR_SIZE],
                                      (unsigned int)ma->ma_attr.la_size);
                return;
@@ -730,6 +731,7 @@ MDT_BOOL_RW_ATTR(enable_remote_dir);
 MDT_BOOL_RW_ATTR(enable_remote_rename);
 MDT_BOOL_RW_ATTR(enable_parallel_rename_dir);
 MDT_BOOL_RW_ATTR(enable_parallel_rename_file);
+MDT_BOOL_RW_ATTR(enable_parallel_rename_crossdir);
 MDT_BOOL_RW_ATTR(enable_striped_dir);
 MDT_BOOL_RW_ATTR(enable_dir_migration);
 MDT_BOOL_RW_ATTR(enable_dir_restripe);
@@ -1323,6 +1325,7 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_enable_dir_auto_split.attr,
        &lustre_attr_enable_parallel_rename_dir.attr,
        &lustre_attr_enable_parallel_rename_file.attr,
+       &lustre_attr_enable_parallel_rename_crossdir.attr,
        &lustre_attr_enable_remote_dir.attr,
        &lustre_attr_enable_remote_dir_gid.attr,
        &lustre_attr_enable_remote_rename.attr,
index 81c531c..6b4e66a 100644 (file)
@@ -2609,6 +2609,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
        bool reverse = false, discard = false;
        ktime_t kstart = ktime_get();
        enum mdt_stat_idx msi = 0;
+       bool remote;
+       bool bfl = false;
        int rc;
 
        ENTRY;
@@ -2632,6 +2634,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
        if (rc)
                GOTO(out_put_srcdir, rc);
 
+       remote = mdt_object_remote(msrcdir);
        CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME3, 5);
 
        if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2)) {
@@ -2658,8 +2661,6 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
         * get rename lock, which will cause deadlock.
         */
        if (!req_is_replay(req)) {
-               bool remote = mdt_object_remote(msrcdir);
-
                /*
                 * Normally rename RPC is handled on the MDT with the target
                 * directory (if target exists, it's on the MDT with the
@@ -2671,24 +2672,21 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
                if (!mdt->mdt_enable_remote_rename && remote)
                        GOTO(out_put_tgtdir, rc = -EXDEV);
 
-               /* This might be further relaxed in the future for regular file
-                * renames in different source and target parents. Start with
-                * only same-directory renames for simplicity and because this
-                * is by far the most the common use case.
-                *
-                * Striped directories should be considered "remote".
-                */
-               if (msrcdir != mtgtdir || remote ||
+               if (remote ||
                    (S_ISDIR(ma->ma_attr.la_mode) &&
-                    !mdt->mdt_enable_parallel_rename_dir) ||
+                    (msrcdir != mtgtdir ||
+                     !mdt->mdt_enable_parallel_rename_dir)) ||
                    (!S_ISDIR(ma->ma_attr.la_mode) &&
-                    !mdt->mdt_enable_parallel_rename_file)) {
+                    (!mdt->mdt_enable_parallel_rename_file ||
+                     (msrcdir != mtgtdir &&
+                      !mdt->mdt_enable_parallel_rename_crossdir)))) {
                        rc = mdt_rename_lock(info, rename_lh);
                        if (rc != 0) {
                                CERROR("%s: cannot lock for rename: rc = %d\n",
                                       mdt_obd_name(mdt), rc);
                                GOTO(out_put_tgtdir, rc);
                        }
+                       bfl = true;
                } else {
                        if (S_ISDIR(ma->ma_attr.la_mode))
                                msi = LPROC_MDT_RENAME_PAR_DIR;
@@ -2696,12 +2694,15 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
                                msi = LPROC_MDT_RENAME_PAR_FILE;
 
                        CDEBUG(D_INFO,
-                              "%s: samedir parallel rename "DFID"/"DNAME"\n",
-                              mdt_obd_name(mdt), PFID(rr->rr_fid1),
-                              PNAME(&rr->rr_name));
+                              "%s: %s %s parallel rename "DFID"/"DNAME"\n",
+                              mdt_obd_name(mdt),
+                              msrcdir == mtgtdir ? "samedir" : "crossdir",
+                              S_ISDIR(ma->ma_attr.la_mode) ? "dir" : "file",
+                              PFID(rr->rr_fid1), PNAME(&rr->rr_name));
                }
        }
 
+lock_parents:
        rc = mdt_rename_determine_lock_order(info, msrcdir, mtgtdir);
        if (rc < 0)
                GOTO(out_unlock_rename, rc);
@@ -2766,6 +2767,26 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
        if (mdt_object_remote(mold) && !mdt->mdt_enable_remote_rename)
                GOTO(out_put_old, rc = -EXDEV);
 
+       /* we used msrcdir as a hint to take BFL, but it may be wrong */
+       if (unlikely(!bfl && !req_is_replay(req) &&
+                    !S_ISDIR(ma->ma_attr.la_mode) &&
+                    mdt_object_remote(mold))) {
+               LASSERT(!remote);
+               mdt_object_put(info->mti_env, mold);
+               mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc);
+               mdt_object_unlock(info, msrcdir, lh_srcdirp, rc);
+
+               rc = mdt_rename_lock(info, rename_lh);
+               if (rc != 0) {
+                       CERROR("%s: cannot re-lock for rename: rc = %d\n",
+                              mdt_obd_name(mdt), rc);
+                       GOTO(out_put_tgtdir, rc);
+               }
+               bfl = true;
+               msi = 0;
+               goto lock_parents;
+       }
+
        /* Check if @mtgtdir is subdir of @mold, before locking child
         * to avoid reverse locking.
         */