From 82ec537d8b4cc9261828f4efe6b03d8d33f38432 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Mon, 29 Nov 2021 16:45:16 -0500 Subject: [PATCH] LU-15285 mdt: fix same-dir racing rename deadlock With LU-12125 lifting the BFL for same directory rename, a deadlock possibility opens up since we lock source and target of rename in the source-target order, if there are two renames racing to rename arguments in reverse order: mv a b & mv b a a lock inversion happens and a deadlock has been observed. To avert this - instill additional order requirement: lower PDO hash value is to be locked ahead of the higher one. Fixes: d76cc65d5d68 ("LU-12125 mds: allow parallel regular file rename") Fixes: b50bb830f92e ("LU-3538 dne: Commit-on-Sharing for DNE") Fixes: 9f1711f3d7d1 ("LU-12081 mdt: rename shouldn't PDO lock if parent is remote") Change-Id: I88dd3aebb394ea40e97e6029d6dcc161116f982e Signed-off-by: Oleg Drokin Reviewed-on: https://review.whamcloud.com/45676 Reviewed-by: Andreas Dilger Tested-by: jenkins Reviewed-by: Patrick Farrell Tested-by: Maloo Reviewed-by: Lai Siyao --- lustre/mdt/mdt_reint.c | 92 +++++++++++++++++++++++++++++--------------------- 1 file changed, 54 insertions(+), 38 deletions(-) diff --git a/lustre/mdt/mdt_reint.c b/lustre/mdt/mdt_reint.c index 07b8818..3d40151 100644 --- a/lustre/mdt/mdt_reint.c +++ b/lustre/mdt/mdt_reint.c @@ -2615,6 +2615,43 @@ static int mdt_rename_source_lock(struct mdt_thread_info *info, return rc; } +/* Helper function for mdt_reint_rename so we don't need to opencode + * two different order lockings + */ +static int mdt_lock_two_dirs(struct mdt_thread_info *info, + struct mdt_object *mfirstdir, + struct mdt_lock_handle *lh_firstdirp, + struct mdt_object *mseconddir, + struct mdt_lock_handle *lh_seconddirp, + bool cos_incompat) +{ + int rc; + + rc = mdt_object_lock_save(info, mfirstdir, lh_firstdirp, 0, + cos_incompat); + if (rc) + return rc; + + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME, 5); + + if (mfirstdir != mseconddir) { + rc = mdt_object_lock_save(info, mseconddir, lh_seconddirp, 1, + cos_incompat); + } else if (!mdt_object_remote(mseconddir) && + lh_firstdirp->mlh_pdo_hash != + lh_seconddirp->mlh_pdo_hash) { + rc = mdt_pdir_hash_lock(info, lh_seconddirp, mseconddir, + MDS_INODELOCK_UPDATE, + cos_incompat); + OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PDO_LOCK2, 10); + } + + if (rc != 0) + mdt_object_unlock(info, mfirstdir, lh_firstdirp, rc); + + return rc; +} + /* * VBR: rename versions in reply: 0 - srcdir parent; 1 - tgtdir parent; * 2 - srcdir child; 3 - tgtdir child. @@ -2714,7 +2751,6 @@ static int mdt_reint_rename(struct mdt_thread_info *info, rc = mdt_rename_determine_lock_order(info, msrcdir, mtgtdir); if (rc < 0) GOTO(out_unlock_rename, rc); - reverse = rc; /* source needs to be looked up after locking source parent, otherwise @@ -2737,44 +2773,24 @@ relock: mdt_lock_pdo_init(lh_srcdirp, LCK_PW, &rr->rr_name); mdt_lock_pdo_init(lh_tgtdirp, LCK_PW, &rr->rr_tgt_name); - if (reverse) { - rc = mdt_object_lock_save(info, mtgtdir, lh_tgtdirp, 1, - cos_incompat); - if (rc) - GOTO(out_unlock_rename, rc); - - OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME, 5); - - rc = mdt_object_lock_save(info, msrcdir, lh_srcdirp, 0, - cos_incompat); - if (rc != 0) { - mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc); - GOTO(out_unlock_rename, rc); - } - } else { - rc = mdt_object_lock_save(info, msrcdir, lh_srcdirp, 0, - cos_incompat); - if (rc) - GOTO(out_unlock_rename, rc); - - OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME, 5); + /* In case of same dir local rename we must sort by the hash, + * otherwise a lock deadlock is possible when renaming + * a to b and b to a at the same time LU-15285 + */ + if (!mdt_object_remote(mtgtdir) && mtgtdir == msrcdir) + reverse = lh_srcdirp->mlh_pdo_hash > lh_tgtdirp->mlh_pdo_hash; + if (unlikely(OBD_FAIL_PRECHECK(OBD_FAIL_MDS_PDO_LOCK))) + reverse = 0; + + if (reverse) + rc = mdt_lock_two_dirs(info, mtgtdir, lh_tgtdirp, msrcdir, + lh_srcdirp, cos_incompat); + else + rc = mdt_lock_two_dirs(info, msrcdir, lh_srcdirp, mtgtdir, + lh_tgtdirp, cos_incompat); - if (mtgtdir != msrcdir) { - rc = mdt_object_lock_save(info, mtgtdir, lh_tgtdirp, 1, - cos_incompat); - } else if (!mdt_object_remote(mtgtdir) && - lh_srcdirp->mlh_pdo_hash != - lh_tgtdirp->mlh_pdo_hash) { - rc = mdt_pdir_hash_lock(info, lh_tgtdirp, mtgtdir, - MDS_INODELOCK_UPDATE, - cos_incompat); - OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PDO_LOCK2, 10); - } - if (rc != 0) { - mdt_object_unlock(info, msrcdir, lh_srcdirp, rc); - GOTO(out_unlock_rename, rc); - } - } + if (rc != 0) + GOTO(out_unlock_rename, rc); OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME4, 5); OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME2, 5); -- 1.8.3.1