From e66dd92775a547d755846349bfed97d831194f70 Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Mon, 15 Jan 2024 20:33:22 -0500 Subject: [PATCH] LU-17426 mdt: relax same MDT file rename lock Allow cross-directory rename of regular files (strictly, any non-directory) on the same MDT without holding the BigFilesystemLock (BFL), as file renames cannot change the directory hierarchy. This should improve the performance for these rename operations, and reduce contention between local MDT file renames in different parts of the directory tree. Add "mdt.*.enable_parallel_rename_crossdir" parameter to disable cross-directory file renames if there is an issue with this change. Signed-off-by: Lai Siyao Change-Id: I511b392e46c46140cac6aa3ede02bfe793729f7f Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/53726 Reviewed-by: Andreas Dilger Reviewed-by: Hongchao Zhang Reviewed-by: Oleg Drokin Tested-by: jenkins Tested-by: Maloo --- lustre/mdt/mdt_handler.c | 1 + lustre/mdt/mdt_internal.h | 1 + lustre/mdt/mdt_lproc.c | 7 +++++-- lustre/mdt/mdt_reint.c | 51 +++++++++++++++++++++++++++++++++-------------- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/lustre/mdt/mdt_handler.c b/lustre/mdt/mdt_handler.c index 1d33c99..3a2a3a2 100644 --- a/lustre/mdt/mdt_handler.c +++ b/lustre/mdt/mdt_handler.c @@ -6208,6 +6208,7 @@ static int mdt_init0(const struct lu_env *env, struct mdt_device *m, m->mdt_enable_dir_auto_split = 0; m->mdt_enable_parallel_rename_dir = 1; m->mdt_enable_parallel_rename_file = 1; + m->mdt_enable_parallel_rename_crossdir = 1; m->mdt_enable_remote_dir = 1; m->mdt_enable_remote_dir_gid = 0; m->mdt_enable_remote_rename = 1; diff --git a/lustre/mdt/mdt_internal.h b/lustre/mdt/mdt_internal.h index 22efed4..c92ae96 100644 --- a/lustre/mdt/mdt_internal.h +++ b/lustre/mdt/mdt_internal.h @@ -303,6 +303,7 @@ struct mdt_device { mdt_enable_dir_auto_split:1, mdt_enable_parallel_rename_dir:1, mdt_enable_parallel_rename_file:1, + mdt_enable_parallel_rename_crossdir:1, mdt_enable_remote_dir:1, mdt_enable_remote_rename:1, mdt_enable_striped_dir:1, diff --git a/lustre/mdt/mdt_lproc.c b/lustre/mdt/mdt_lproc.c index c6108eb..48b6ae8 100644 --- a/lustre/mdt/mdt_lproc.c +++ b/lustre/mdt/mdt_lproc.c @@ -178,10 +178,11 @@ void mdt_rename_counter_tally(struct mdt_thread_info *info, return; } + if (msi) /* parallel rename type */ + mdt_counter_incr(req, msi, ktime_delta); + if (src == tgt) { mdt_counter_incr(req, LPROC_MDT_RENAME_SAMEDIR, ktime_delta); - if (msi) /* parallel rename type */ - mdt_counter_incr(req, msi, ktime_delta); lprocfs_oh_tally_log2(&rstats->rs_hist[RENAME_SAMEDIR_SIZE], (unsigned int)ma->ma_attr.la_size); return; @@ -730,6 +731,7 @@ MDT_BOOL_RW_ATTR(enable_remote_dir); MDT_BOOL_RW_ATTR(enable_remote_rename); MDT_BOOL_RW_ATTR(enable_parallel_rename_dir); MDT_BOOL_RW_ATTR(enable_parallel_rename_file); +MDT_BOOL_RW_ATTR(enable_parallel_rename_crossdir); MDT_BOOL_RW_ATTR(enable_striped_dir); MDT_BOOL_RW_ATTR(enable_dir_migration); MDT_BOOL_RW_ATTR(enable_dir_restripe); @@ -1323,6 +1325,7 @@ static struct attribute *mdt_attrs[] = { &lustre_attr_enable_dir_auto_split.attr, &lustre_attr_enable_parallel_rename_dir.attr, &lustre_attr_enable_parallel_rename_file.attr, + &lustre_attr_enable_parallel_rename_crossdir.attr, &lustre_attr_enable_remote_dir.attr, &lustre_attr_enable_remote_dir_gid.attr, &lustre_attr_enable_remote_rename.attr, diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 81c531c..6b4e66a 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -2609,6 +2609,8 @@ static int mdt_reint_rename(struct mdt_thread_info *info, bool reverse = false, discard = false; ktime_t kstart = ktime_get(); enum mdt_stat_idx msi = 0; + bool remote; + bool bfl = false; int rc; ENTRY; @@ -2632,6 +2634,7 @@ static int mdt_reint_rename(struct mdt_thread_info *info, if (rc) GOTO(out_put_srcdir, rc); + remote = mdt_object_remote(msrcdir); CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME3, 5); if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2)) { @@ -2658,8 +2661,6 @@ static int mdt_reint_rename(struct mdt_thread_info *info, * get rename lock, which will cause deadlock. */ if (!req_is_replay(req)) { - bool remote = mdt_object_remote(msrcdir); - /* * Normally rename RPC is handled on the MDT with the target * directory (if target exists, it's on the MDT with the @@ -2671,24 +2672,21 @@ static int mdt_reint_rename(struct mdt_thread_info *info, if (!mdt->mdt_enable_remote_rename && remote) GOTO(out_put_tgtdir, rc = -EXDEV); - /* This might be further relaxed in the future for regular file - * renames in different source and target parents. Start with - * only same-directory renames for simplicity and because this - * is by far the most the common use case. - * - * Striped directories should be considered "remote". - */ - if (msrcdir != mtgtdir || remote || + if (remote || (S_ISDIR(ma->ma_attr.la_mode) && - !mdt->mdt_enable_parallel_rename_dir) || + (msrcdir != mtgtdir || + !mdt->mdt_enable_parallel_rename_dir)) || (!S_ISDIR(ma->ma_attr.la_mode) && - !mdt->mdt_enable_parallel_rename_file)) { + (!mdt->mdt_enable_parallel_rename_file || + (msrcdir != mtgtdir && + !mdt->mdt_enable_parallel_rename_crossdir)))) { rc = mdt_rename_lock(info, rename_lh); if (rc != 0) { CERROR("%s: cannot lock for rename: rc = %d\n", mdt_obd_name(mdt), rc); GOTO(out_put_tgtdir, rc); } + bfl = true; } else { if (S_ISDIR(ma->ma_attr.la_mode)) msi = LPROC_MDT_RENAME_PAR_DIR; @@ -2696,12 +2694,15 @@ static int mdt_reint_rename(struct mdt_thread_info *info, msi = LPROC_MDT_RENAME_PAR_FILE; CDEBUG(D_INFO, - "%s: samedir parallel rename "DFID"/"DNAME"\n", - mdt_obd_name(mdt), PFID(rr->rr_fid1), - PNAME(&rr->rr_name)); + "%s: %s %s parallel rename "DFID"/"DNAME"\n", + mdt_obd_name(mdt), + msrcdir == mtgtdir ? "samedir" : "crossdir", + S_ISDIR(ma->ma_attr.la_mode) ? "dir" : "file", + PFID(rr->rr_fid1), PNAME(&rr->rr_name)); } } +lock_parents: rc = mdt_rename_determine_lock_order(info, msrcdir, mtgtdir); if (rc < 0) GOTO(out_unlock_rename, rc); @@ -2766,6 +2767,26 @@ static int mdt_reint_rename(struct mdt_thread_info *info, if (mdt_object_remote(mold) && !mdt->mdt_enable_remote_rename) GOTO(out_put_old, rc = -EXDEV); + /* we used msrcdir as a hint to take BFL, but it may be wrong */ + if (unlikely(!bfl && !req_is_replay(req) && + !S_ISDIR(ma->ma_attr.la_mode) && + mdt_object_remote(mold))) { + LASSERT(!remote); + mdt_object_put(info->mti_env, mold); + mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc); + mdt_object_unlock(info, msrcdir, lh_srcdirp, rc); + + rc = mdt_rename_lock(info, rename_lh); + if (rc != 0) { + CERROR("%s: cannot re-lock for rename: rc = %d\n", + mdt_obd_name(mdt), rc); + GOTO(out_put_tgtdir, rc); + } + bfl = true; + msi = 0; + goto lock_parents; + } + /* Check if @mtgtdir is subdir of @mold, before locking child * to avoid reverse locking. */ -- 1.8.3.1