Whamcloud - gitweb
LU-15491 mdt: rename lock inversion deadlock 52/46352/8
authorOleg Drokin <green@whamcloud.com>
Fri, 21 Jun 2024 04:43:37 +0000 (12:43 +0800)
committerOleg Drokin <green@whamcloud.com>
Sat, 13 Jul 2024 20:50:25 +0000 (20:50 +0000)
MDT rewrite got rid of lock ordering by fid and replaced it
with parent/child ordering, but child to child lock inversion is
still possible with hardlinks as follows:

thread 1: mv dir1/10 dir2/10
thread 2: mv dir2/11 dir2/2
where dir1/10 is hardlink to dir4/2 and dir2/10 is hardlink to dir2/11

To solve this we enforce child ordering by fid in case of local rename

This should not create problems aynwhere else but rename since
the next closest candidate - link() does not delete the target
if it exists so it's not locked in that case.

Fixes: d76cc65d5d68 ("LU-12125 mds: allow parallel regular file rename")
Change-Id: Idd3525e8b1a0de411766bdcaa480c60d3b5e491b
Signed-off-by: Oleg Drokin <green@whamcloud.com>
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/46352
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
lustre/mdt/mdt_reint.c

index 620fecf..d674fa3 100644 (file)
@@ -2880,6 +2880,8 @@ lock_parents:
        rc = mdt_lookup_version_check(info, mtgtdir, &rr->rr_tgt_name, new_fid,
                                      3);
        if (rc == 0) {
+               bool child_reverse_lock = false;
+
                /* the new_fid should have been filled at this moment */
                if (lu_fid_eq(old_fid, new_fid))
                        GOTO(out_put_old, rc);
@@ -2923,13 +2925,30 @@ lock_parents:
                        GOTO(out_put_new, rc = -EISDIR);
 
                lh_oldp = &info->mti_lh[MDT_LH_OLD];
+               lh_newp = &info->mti_lh[MDT_LH_NEW];
+
+               /* We will lock in child fid order here to avoid a
+                * deadlock related to hardlinks thats only possible with
+                * regular files. LU-15491
+                */
+               if (!S_ISDIR(lu_object_attr(&mold->mot_obj)) &&
+                   lu_fid_cmp(old_fid, new_fid) > 0) {
+                       child_reverse_lock = true;
+                       rc = mdt_object_check_lock(info, mtgtdir, mnew, lh_newp,
+                                                  MDS_INODELOCK_LOOKUP |
+                                                  MDS_INODELOCK_UPDATE,
+                                                  LCK_EX);
+                       if (rc < 0)
+                               GOTO(out_unlock_new, rc);
+               }
+
                lh_lookup = &info->mti_lh[MDT_LH_LOOKUP];
                rc = mdt_rename_source_lock(info, msrcdir, mold, lh_oldp,
                                            lh_lookup,
                                            MDS_INODELOCK_LOOKUP |
                                            MDS_INODELOCK_XATTR);
                if (rc < 0)
-                       GOTO(out_put_new, rc);
+                       GOTO(out_unlock_new, rc);
 
                /* Check if @msrcdir is subdir of @mnew, before locking child
                 * to avoid reverse locking.
@@ -2940,7 +2959,7 @@ lock_parents:
                        if (rc) {
                                if (rc == 1)
                                        rc = -EINVAL;
-                               GOTO(out_unlock_old, rc);
+                               GOTO(out_unlock_new, rc);
                        }
                }
 
@@ -2950,12 +2969,14 @@ lock_parents:
                 * lock. See LU-4002.
                 */
 
-               lh_newp = &info->mti_lh[MDT_LH_NEW];
-               rc = mdt_object_check_lock(info, mtgtdir, mnew, lh_newp,
-                                          MDS_INODELOCK_LOOKUP |
-                                          MDS_INODELOCK_UPDATE, LCK_EX);
-               if (rc != 0)
-                       GOTO(out_unlock_new, rc);
+               if (!child_reverse_lock) {
+                       rc = mdt_object_check_lock(info, mtgtdir, mnew, lh_newp,
+                                                  MDS_INODELOCK_LOOKUP |
+                                                  MDS_INODELOCK_UPDATE,
+                                                  LCK_EX);
+                       if (rc != 0)
+                               GOTO(out_unlock_new, rc);
+               }
 
                /* get and save version after locking */
                mdt_version_get_save(info, mnew, 3);
@@ -3007,7 +3028,7 @@ out_unlock_new:
        if (mnew != NULL)
                /* mnew is gone, no need to keep lock */
                mdt_object_unlock(info, mnew, lh_newp, 1);
-out_unlock_old:
+
        mdt_object_unlock(info, NULL, lh_lookup, rc);
        mdt_object_unlock(info, mold, lh_oldp, rc);
 out_put_new: