Whamcloud - gitweb
LU-17426 mdt: relax same MDT file rename lock
authorLai Siyao <lai.siyao@whamcloud.com>
Fri, 19 Jan 2024 19:13:19 +0000 (12:13 -0700)
committerAndreas Dilger <adilger@whamcloud.com>
Mon, 29 Jan 2024 08:51:58 +0000 (08:51 +0000)
Allow cross-directory rename of regular files (strictly, any
non-directory) on the same MDT without holding the BigFilesystemLock
(BFL), as file renames cannot change the directory hierarchy.

This should improve the performance for these rename operations, and
reduce contention between local MDT file renames in different parts of
the directory tree.

Add "mdt.*.enable_parallel_rename_crossdir" parameter to disable
cross-directory file renames if there is an issue with this change.

Lustre-change: https://review.whamcloud.com/53726
Lustre-commit: TBD (from d466465f9add30faec256ce7f725e0f36d4e8a66)

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I511b392e46c46140cac6aa3ede02bfe793729f7f
Reviewed-on: https://review.whamcloud.com/c/ex/lustre-release/+/53744
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_lproc.c
lustre/mdt/mdt_reint.c

index c5439c2..97a12e3 100644 (file)
@@ -6060,6 +6060,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m,
        m->mdt_enable_dir_auto_split = 0;
        m->mdt_enable_parallel_rename_dir = 1;
        m->mdt_enable_parallel_rename_file = 1;
+       m->mdt_enable_parallel_rename_crossdir = 1;
        m->mdt_enable_pin_gid = 0;
        m->mdt_enable_remote_dir = 1;
        m->mdt_enable_remote_dir_gid = 0;
index 5b4a4eb..0ccb593 100644 (file)
@@ -304,6 +304,7 @@ struct mdt_device {
                                   mdt_enable_dir_auto_split:1,
                                   mdt_enable_parallel_rename_dir:1,
                                   mdt_enable_parallel_rename_file:1,
+                                  mdt_enable_parallel_rename_crossdir:1,
                                   mdt_enable_remote_dir:1,
                                   mdt_enable_remote_rename:1,
                                   mdt_enable_striped_dir:1,
index 982038b..675abd7 100644 (file)
@@ -179,10 +179,10 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info,
                return;
        }
 
+       if (msi) /* parallel rename type */
+               mdt_counter_incr(req, msi, ktime_delta);
        if (src == tgt) {
                mdt_counter_incr(req, LPROC_MDT_RENAME_SAMEDIR, ktime_delta);
-               if (msi) /* parallel rename type */
-                       mdt_counter_incr(req, msi, ktime_delta);
                lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_SAMEDIR_SIZE],
                                      (unsigned int)ma->ma_attr.la_size);
                return;
@@ -761,6 +761,7 @@ MDT_BOOL_RW_ATTR(enable_remote_dir);
 MDT_BOOL_RW_ATTR(enable_remote_rename);
 MDT_BOOL_RW_ATTR(enable_parallel_rename_dir);
 MDT_BOOL_RW_ATTR(enable_parallel_rename_file);
+MDT_BOOL_RW_ATTR(enable_parallel_rename_crossdir);
 MDT_BOOL_RW_ATTR(enable_striped_dir);
 MDT_BOOL_RW_ATTR(enable_dir_migration);
 MDT_BOOL_RW_ATTR(enable_dir_restripe);
@@ -1401,6 +1402,7 @@ static struct attribute *mdt_attrs[] = {
        &lustre_attr_enable_dir_auto_split.attr,
        &lustre_attr_enable_parallel_rename_dir.attr,
        &lustre_attr_enable_parallel_rename_file.attr,
+       &lustre_attr_enable_parallel_rename_crossdir.attr,
        &lustre_attr_enable_pin_gid.attr,
        &lustre_attr_enable_remote_dir.attr,
        &lustre_attr_enable_remote_dir_gid.attr,
index 1bc109f..e52e84f 100644 (file)
@@ -2790,6 +2790,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
        bool cos_incompat;
        ktime_t kstart = ktime_get();
        enum mdt_stat_idx msi = 0;
+       bool remote;
+       bool bfl = false;
        int rc;
 
        ENTRY;
@@ -2813,6 +2815,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
        if (rc)
                GOTO(out_put_srcdir, rc);
 
+       remote = mdt_object_remote(msrcdir);
        OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME3, 5);
 
        if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2)) {
@@ -2839,8 +2842,6 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
         * get rename lock, which will cause deadlock.
         */
        if (!req_is_replay(req)) {
-               bool remote = mdt_object_remote(msrcdir);
-
                /*
                 * Normally rename RPC is handled on the MDT with the target
                 * directory (if target exists, it's on the MDT with the
@@ -2852,24 +2853,21 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
                if (!mdt->mdt_enable_remote_rename && remote)
                        GOTO(out_put_tgtdir, rc = -EXDEV);
 
-               /* This might be further relaxed in the future for regular file
-                * renames in different source and target parents. Start with
-                * only same-directory renames for simplicity and because this
-                * is by far the most the common use case.
-                *
-                * Striped directories should be considered "remote".
-                */
-               if (msrcdir != mtgtdir || remote ||
+               if (remote ||
                    (S_ISDIR(ma->ma_attr.la_mode) &&
-                    !mdt->mdt_enable_parallel_rename_dir) ||
+                    (msrcdir != mtgtdir ||
+                     !mdt->mdt_enable_parallel_rename_dir)) ||
                    (!S_ISDIR(ma->ma_attr.la_mode) &&
-                    !mdt->mdt_enable_parallel_rename_file)) {
+                    (!mdt->mdt_enable_parallel_rename_file ||
+                     (msrcdir != mtgtdir ||
+                      !mdt->mdt_enable_parallel_rename_crossdir)))) {
                        rc = mdt_rename_lock(info, &rename_lh);
                        if (rc != 0) {
                                CERROR("%s: cannot lock for rename: rc = %d\n",
                                       mdt_obd_name(mdt), rc);
                                GOTO(out_put_tgtdir, rc);
                        }
+                       bfl = true;
                } else {
                        if (S_ISDIR(ma->ma_attr.la_mode))
                                msi = LPROC_MDT_RENAME_PAR_DIR;
@@ -2877,12 +2875,15 @@ static int mdt_reint_rename(struct mdt_thread_info *info,
                                msi = LPROC_MDT_RENAME_PAR_FILE;
 
                        CDEBUG(D_INFO,
-                              "%s: samedir parallel rename "DFID"/"DNAME"\n",
-                              mdt_obd_name(mdt), PFID(rr->rr_fid1),
-                              PNAME(&rr->rr_name));
+                              "%s: %s %s parallel rename "DFID"/"DNAME"\n",
+                              mdt_obd_name(mdt),
+                              msrcdir == mtgtdir ? "samedir" : "crossdir",
+                              S_ISDIR(ma->ma_attr.la_mode) ? "dir" : "file",
+                              PFID(rr->rr_fid1), PNAME(&rr->rr_name));
                }
        }
 
+lock_parents:
        rc = mdt_rename_determine_lock_order(info, msrcdir, mtgtdir);
        if (rc < 0)
                GOTO(out_unlock_rename, rc);
@@ -2956,6 +2957,26 @@ relock:
        if (mdt_object_remote(mold) && !mdt->mdt_enable_remote_rename)
                GOTO(out_put_old, rc = -EXDEV);
 
+       /* we used msrcdir as a hint to take BFL, but it may be wrong */
+       if (unlikely(!bfl && !req_is_replay(req) &&
+                    !S_ISDIR(ma->ma_attr.la_mode) &&
+                    mdt_object_remote(mold))) {
+               LASSERT(!remote);
+               mdt_object_put(info->mti_env, mold);
+               mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc);
+               mdt_object_unlock(info, msrcdir, lh_srcdirp, rc);
+
+               rc = mdt_rename_lock(info, &rename_lh);
+               if (rc != 0) {
+                       CERROR("%s: cannot re-lock for rename: rc = %d\n",
+                              mdt_obd_name(mdt), rc);
+                       GOTO(out_put_tgtdir, rc);
+               }
+               bfl = true;
+               msi = 0;
+               goto lock_parents;
+       }
+
        /* Check if @mtgtdir is subdir of @mold, before locking child
         * to avoid reverse locking.
         */