Whamcloud - gitweb
LU-4725 mdt: child-parent lock ordering in rename 38/9538/7
authorHongchao Zhang <hongchao.zhang@intel.com>
Sat, 5 Apr 2014 04:41:03 +0000 (12:41 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 9 May 2014 14:32:38 +0000 (14:32 +0000)
change rename so that it always has parent-child lock ordering,
otherwise it may deadlock with other operations.

Signed-off-by: Vitaly Fertman <vitaly_fertman@xyratex.com>
Signed-off-by: Hongchao Zhang <hongchao.zhang@intel.com>
Change-Id: If676da82ca50a20a4bb3aadef0f81c9c5ed3cbcb
Xyratex-bug-id: MRP-1700
Reviewed-on: http://review.whamcloud.com/9538
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: wangdi <di.wang@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd_support.h
lustre/mdt/mdt_reint.c
lustre/tests/sanityn.sh

index 7365f60..f66a509 100644 (file)
@@ -243,6 +243,9 @@ int obd_alloc_fail(const void *ptr, const char *name, const char *type,
 #define OBD_FAIL_MDS_HSM_ACTION_NET            0x150
 #define OBD_FAIL_MDS_CHANGELOG_INIT            0x151
 #define OBD_FAIL_MDS_HSM_SWAP_LAYOUTS          0x152
+#define OBD_FAIL_MDS_RENAME              0x153
+#define OBD_FAIL_MDS_RENAME2             0x154
+#define OBD_FAIL_MDS_RENAME3             0x155
 
 /* layout lock */
 #define OBD_FAIL_MDS_NO_LL_GETATTR      0x170
index 948a7b3..1cdb175 100644 (file)
@@ -1248,17 +1248,18 @@ static void mdt_rename_unlock(struct lustre_handle *lh)
  * target. Source should not be ancestor of target dir. May be other rename
  * checks can be moved here later.
  */
-static int mdt_rename_sanity(struct mdt_thread_info *info, struct lu_fid *fid)
+static int mdt_rename_sanity(struct mdt_thread_info *info,
+                            const struct lu_fid *dir_fid,
+                            const struct lu_fid *fid)
 {
-        struct mdt_reint_record *rr = &info->mti_rr;
-        struct lu_fid dst_fid = *rr->rr_fid2;
         struct mdt_object *dst;
+       struct lu_fid dst_fid = *dir_fid;
         int rc = 0;
         ENTRY;
 
        /* If the source and target are in the same directory, they can not
         * be parent/child relationship, so subdir check is not needed */
-       if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2))
+       if (lu_fid_eq(dir_fid, fid))
                return 0;
 
        do {
@@ -1270,7 +1271,13 @@ static int mdt_rename_sanity(struct mdt_thread_info *info, struct lu_fid *fid)
                                           &dst_fid);
                        mdt_object_put(info->mti_env, dst);
                        if (rc != -EREMOTE && rc < 0) {
-                               CERROR("Failed mdo_is_subdir(), rc %d\n", rc);
+                               CERROR("%s: failed subdir check in "DFID" for "
+                                      DFID": rc = %d\n",
+                                      mdt_obd_name(info->mti_mdt),
+                                      PFID(dir_fid), PFID(fid), rc);
+                               /* Return EINVAL only if a parent is the @fid */
+                               if (rc == -EINVAL)
+                                       rc = -EIO;
                        } else {
                                /* check the found fid */
                                if (lu_fid_eq(&dst_fid, fid))
@@ -1570,6 +1577,144 @@ out_put_parent:
        RETURN(rc);
 }
 
+static struct mdt_object *mdt_object_find_check(struct mdt_thread_info *info,
+                                               const struct lu_fid *fid,
+                                               int idx)
+{
+       struct mdt_object *dir;
+       int rc;
+       ENTRY;
+
+       dir = mdt_object_find(info->mti_env, info->mti_mdt, fid);
+       if (IS_ERR(dir))
+               RETURN(dir);
+
+       /* check early, the real version will be saved after locking */
+       rc = mdt_version_get_check(info, dir, idx);
+       if (rc)
+               GOTO(out_put, rc);
+
+       RETURN(dir);
+out_put:
+       mdt_object_put(info->mti_env, dir);
+       return ERR_PTR(rc);
+}
+
+static int mdt_object_lock_save(struct mdt_thread_info *info,
+                               struct mdt_object *dir,
+                               struct mdt_lock_handle *lh,
+                               int idx)
+{
+       int rc;
+
+       /* we lock the target dir if it is local */
+       rc = mdt_object_lock(info, dir, lh, MDS_INODELOCK_UPDATE,
+                            MDT_LOCAL_LOCK);
+       if (rc != 0)
+               return rc;
+
+       /* get and save correct version after locking */
+       mdt_version_get_save(info, dir, idx);
+       return 0;
+}
+
+
+static int mdt_rename_parents_lock(struct mdt_thread_info *info,
+                                  struct mdt_object **srcp,
+                                  struct mdt_object **tgtp)
+{
+       struct mdt_reint_record *rr = &info->mti_rr;
+       const struct lu_fid     *fid_src = rr->rr_fid1;
+       const struct lu_fid     *fid_tgt = rr->rr_fid2;
+       struct mdt_lock_handle  *lh_src = &info->mti_lh[MDT_LH_PARENT];
+       struct mdt_lock_handle  *lh_tgt = &info->mti_lh[MDT_LH_CHILD];
+       struct mdt_object       *src;
+       struct mdt_object       *tgt;
+       int                      reverse = 0;
+       int                      rc;
+       ENTRY;
+
+       /* Check if the @src is not a child of the @tgt, otherwise a
+        * reverse locking must take place.
+        *
+        * Note: cannot be called after object_find, because if the object
+        * is destroyed in between it gets stuck in lu_object_find_at(),
+        * waiting for the last ref. */
+       rc = mdt_rename_sanity(info, fid_src, fid_tgt);
+       if (rc == -EINVAL)
+               reverse = 1;
+       else if (rc)
+               RETURN(rc);
+
+       /* find both parents. */
+       src = mdt_object_find_check(info, fid_src, 0);
+       if (IS_ERR(src))
+               RETURN(PTR_ERR(src));
+
+       OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME3, 5);
+
+       if (lu_fid_eq(fid_src, fid_tgt)) {
+               tgt = src;
+               mdt_object_get(info->mti_env, tgt);
+       } else {
+               tgt = mdt_object_find_check(info, fid_tgt, 1);
+               if (IS_ERR(tgt))
+                       GOTO(err_src_put, rc = PTR_ERR(tgt));
+
+               if (unlikely(mdt_object_remote(tgt))) {
+                       CDEBUG(D_INFO, "Source dir "DFID" target dir "DFID
+                              "on different MDTs\n", PFID(fid_src),
+                              PFID(fid_tgt));
+                       GOTO(err_tgt_put, rc = -EXDEV);
+               }
+       }
+
+       /* lock parents in the proper order. */
+       if (reverse) {
+               rc = mdt_object_lock_save(info, tgt, lh_tgt, 1);
+               if (rc)
+                       GOTO(err_tgt_put, rc);
+
+               OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME, 5);
+
+               rc = mdt_object_lock_save(info, src, lh_src, 0);
+       } else {
+               rc = mdt_object_lock_save(info, src, lh_src, 0);
+               if (rc)
+                       GOTO(err_tgt_put, rc);
+
+               OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME, 5);
+
+               if (tgt != src)
+                       rc = mdt_object_lock_save(info, tgt, lh_tgt, 1);
+               else if (lh_src->mlh_pdo_hash != lh_tgt->mlh_pdo_hash) {
+                       rc = mdt_pdir_hash_lock(info, lh_tgt, tgt,
+                                               MDS_INODELOCK_UPDATE);
+                       OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PDO_LOCK2, 10);
+               }
+       }
+       if (rc)
+               GOTO(err_unlock, rc);
+
+       if (lu_object_is_dying(&tgt->mot_header))
+               GOTO(err_unlock, rc = -ENOENT);
+
+       *srcp = src;
+       *tgtp = tgt;
+       RETURN(0);
+
+err_unlock:
+       /* The order does not matter as the handle is checked inside,
+        * as well as not used handle. */
+       mdt_object_unlock(info, src, lh_src, rc);
+       mdt_object_unlock(info, tgt, lh_tgt, rc);
+err_tgt_put:
+       mdt_object_put(info->mti_env, tgt);
+err_src_put:
+       mdt_object_put(info->mti_env, src);
+       RETURN(rc);
+}
+
 /*
  * VBR: rename versions in reply: 0 - src parent; 1 - tgt parent;
  * 2 - src child; 3 - tgt child.
@@ -1592,8 +1737,8 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
        struct mdt_reint_record *rr = &info->mti_rr;
        struct md_attr          *ma = &info->mti_attr;
        struct ptlrpc_request   *req = mdt_info_req(info);
-       struct mdt_object       *msrcdir;
-       struct mdt_object       *mtgtdir;
+       struct mdt_object       *msrcdir = NULL;
+       struct mdt_object       *mtgtdir = NULL;
        struct mdt_object       *mold;
        struct mdt_object       *mnew = NULL;
        struct mdt_lock_handle  *lh_srcdirp;
@@ -1609,78 +1754,39 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
                  PFID(rr->rr_fid1), PNAME(&rr->rr_name),
                  PFID(rr->rr_fid2), PNAME(&rr->rr_tgt_name));
 
-       /* step 1: lock the source dir. */
        lh_srcdirp = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lh_srcdirp, LCK_PW, &rr->rr_name);
-       msrcdir = mdt_object_find_lock(info, rr->rr_fid1, lh_srcdirp,
-                                      MDS_INODELOCK_UPDATE);
-       if (IS_ERR(msrcdir))
-               RETURN(PTR_ERR(msrcdir));
-
-       rc = mdt_version_get_check_save(info, msrcdir, 0);
-       if (rc)
-               GOTO(out_unlock_source, rc);
-
-       /* step 2: find & lock the target dir. */
        lh_tgtdirp = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_pdo_init(lh_tgtdirp, LCK_PW, &rr->rr_tgt_name);
-       if (lu_fid_eq(rr->rr_fid1, rr->rr_fid2)) {
-               mdt_object_get(info->mti_env, msrcdir);
-               mtgtdir = msrcdir;
-               if (lh_tgtdirp->mlh_pdo_hash != lh_srcdirp->mlh_pdo_hash) {
-                       rc = mdt_pdir_hash_lock(info, lh_tgtdirp, mtgtdir,
-                                        MDS_INODELOCK_UPDATE);
-                       if (rc != 0)
-                               GOTO(out_unlock_source, rc);
-                       OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PDO_LOCK2, 10);
-               }
-       } else {
-               mtgtdir = mdt_object_find(info->mti_env, info->mti_mdt,
-                                         rr->rr_fid2);
-               if (IS_ERR(mtgtdir))
-                       GOTO(out_unlock_source, rc = PTR_ERR(mtgtdir));
 
-               /* check early, the real version will be saved after locking */
-               rc = mdt_version_get_check(info, mtgtdir, 1);
-               if (rc)
-                       GOTO(out_put_target, rc);
+       /* step 1&2: lock the source and target dirs. */
+       rc = mdt_rename_parents_lock(info, &msrcdir, &mtgtdir);
+       if (rc)
+               RETURN(rc);
 
-               if (unlikely(mdt_object_remote(mtgtdir))) {
-                       CDEBUG(D_INFO, "Source dir "DFID" target dir "DFID
-                              "on different MDTs\n", PFID(rr->rr_fid1),
-                              PFID(rr->rr_fid2));
-                       GOTO(out_put_target, rc = -EXDEV);
-               } else {
-                       if (likely(mdt_object_exists(mtgtdir))) {
-                               /* we lock the target dir if it is local */
-                               rc = mdt_object_lock(info, mtgtdir, lh_tgtdirp,
-                                                    MDS_INODELOCK_UPDATE,
-                                                    MDT_LOCAL_LOCK);
-                               if (rc != 0)
-                                       GOTO(out_put_target, rc);
-                               /* get and save correct version after locking */
-                               mdt_version_get_save(info, mtgtdir, 1);
-                       } else {
-                               GOTO(out_put_target, rc = -ESTALE);
-                       }
-               }
-       }
+       OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME2, 5);
 
        /* step 3: find & lock the old object. */
        fid_zero(old_fid);
        rc = mdt_lookup_version_check(info, msrcdir, &rr->rr_name, old_fid, 2);
        if (rc != 0)
-               GOTO(out_unlock_target, rc);
+               GOTO(out_unlock_parents, rc);
 
        if (lu_fid_eq(old_fid, rr->rr_fid1) || lu_fid_eq(old_fid, rr->rr_fid2))
-               GOTO(out_unlock_target, rc = -EINVAL);
+               GOTO(out_unlock_parents, rc = -EINVAL);
 
        if (!fid_is_md_operative(old_fid))
-               GOTO(out_unlock_target, rc = -EPERM);
+               GOTO(out_unlock_parents, rc = -EPERM);
 
        mold = mdt_object_find(info->mti_env, info->mti_mdt, old_fid);
        if (IS_ERR(mold))
-               GOTO(out_unlock_target, rc = PTR_ERR(mold));
+               GOTO(out_unlock_parents, rc = PTR_ERR(mold));
+
+       /* Check if @mtgtdir is subdir of @mold, before locking child
+        * to avoid reverse locking. */
+       rc = mdt_rename_sanity(info, rr->rr_fid2, old_fid);
+       if (rc)
+               GOTO(out_put_old, rc);
 
        tgt_vbr_obj_set(info->mti_env, mdt_obj2dt(mold));
        /* save version after locking */
@@ -1728,6 +1834,12 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
                if (rc != 0)
                        GOTO(out_put_new, rc);
 
+               /* Check if @msrcdir is subdir of @mnew, before locking child
+                * to avoid reverse locking. */
+               rc = mdt_rename_sanity(info, rr->rr_fid1, new_fid);
+               if (rc)
+                       GOTO(out_unlock_old, rc);
+
                /* We used to acquire MDS_INODELOCK_FULL here but we
                 * can't do this now because a running HSM restore on
                 * the rename onto victim will hold the layout
@@ -1772,11 +1884,6 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
        mdt_fail_write(info->mti_env, info->mti_mdt->mdt_bottom,
                       OBD_FAIL_MDS_REINT_RENAME_WRITE);
 
-       /* Check if @dst is subdir of @src. */
-       rc = mdt_rename_sanity(info, old_fid);
-       if (rc)
-               GOTO(out_unlock_new, rc);
-
        if (mnew != NULL)
                mutex_lock(&mnew->mot_lov_mutex);
 
@@ -1799,7 +1906,6 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
        }
 
        EXIT;
-out_unlock_new:
        if (mnew != NULL)
                mdt_object_unlock(info, mnew, lh_newp, rc);
 out_unlock_old:
@@ -1809,11 +1915,8 @@ out_put_new:
                mdt_object_put(info->mti_env, mnew);
 out_put_old:
        mdt_object_put(info->mti_env, mold);
-out_unlock_target:
-       mdt_object_unlock(info, mtgtdir, lh_tgtdirp, rc);
-out_put_target:
-       mdt_object_put(info->mti_env, mtgtdir);
-out_unlock_source:
+out_unlock_parents:
+       mdt_object_unlock_put(info, mtgtdir, lh_tgtdirp, rc);
        mdt_object_unlock_put(info, msrcdir, lh_srcdirp, rc);
        return rc;
 }
index 8918f5d..2f5ab1b 100644 (file)
@@ -2370,6 +2370,109 @@ test_51d() {
 }
 run_test 51d "layout lock: losing layout lock should clean up memory map region"
 
+test_54_part1()
+{
+       echo "==> rename vs getattr vs setxattr should not deadlock"
+       mkdir -p $DIR/d1/d2/d3 || error "(1) mkdir failed"
+
+       do_facet mds $LCTL set_param fail_loc=$1
+
+       mv -T $DIR/d1/d2/d3 $DIR/d1/d3 &
+       PID1=$!
+       sleep 1
+
+       stat $DIR/d1/d2 &
+       PID2=$!
+       sleep 1
+
+       setfattr -n user.attr1 -v value1 $DIR2/d1 || error "(2) setfattr failed"
+       wait $PID1 || error "(3) mv failed"
+       wait $PID2 || error "(4) stat failed"
+       echo
+
+       rm -rf $DIR/d1
+}
+
+test_54_part2() {
+       echo "==> rename vs getattr vs open vs getattr should not deadlock"
+       mkdir -p $DIR/d1/d2/d3 || error "(1) mkdir failed"
+
+       do_facet mds $LCTL set_param fail_loc=$1
+
+       mv -T $DIR/d1/d2/d3 $DIR/d1/d3 &
+       PID1=$!
+       sleep 1
+
+       stat $DIR/d1/d2 &
+       PID2=$!
+       sleep 1
+
+       $MULTIOP $DIR2/d1/d2 Oc &
+       PID3=$!
+       sleep 1
+
+       stat $DIR/d1 || error "(2) stat failed"
+
+       wait $PID1 || error "(3) mv failed"
+       wait $PID2 || error "(4) stat failed"
+       wait $PID3 && error "(5) multiop failed"
+       echo
+       rm -rf $DIR/d1
+}
+
+test_54() {
+       local p="$TMP/$TESTSUITE-$TESTNAME.parameters"
+       save_lustre_params client "llite.*.xattr_cache" > $p
+       lctl set_param llite.*.xattr_cache 1 ||
+               { skip "xattr cache is not supported"; return 0; }
+
+#define OBD_FAIL_MDS_RENAME              0x153
+#define OBD_FAIL_MDS_RENAME2             0x154
+       test_54_part1 0x80000153 || error 10
+       test_54_part1 0x80000154 || error 11
+       test_54_part2 0x80000153 || error 12
+       test_54_part2 0x80000154 || error 13
+
+       restore_lustre_params < $p
+       rm -f $p
+}
+run_test 54 "rename locking"
+
+test_55a() {
+       mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed"
+
+#define OBD_FAIL_MDS_RENAME              0x153
+       do_facet mds $LCTL set_param fail_loc=0x80000153
+
+       mv -T $DIR/d1/d2 $DIR/d3/d2 &
+       PID1=$!
+       sleep 1
+
+       rm -r $DIR2/d3
+       wait $PID1 && error "(2) mv succeeded"
+
+       rm -rf $DIR/d1
+}
+run_test 55a "rename vs unlink target dir"
+
+test_55b()
+{
+       mkdir -p $DIR/d1/d2 $DIR/d3 || error "(1) mkdir failed"
+
+#define OBD_FAIL_MDS_RENAME              0x155
+       do_facet mds $LCTL set_param fail_loc=0x80000155
+
+       mv -T $DIR/d1/d2 $DIR/d3/d2 &
+       PID1=$!
+       sleep 1
+
+       rm -r $DIR2/d1
+       wait $PID1 && error "(2) mv succeeded"
+
+       rm -rf $DIR/d1
+}
+run_test 55b "rename vs unlink source dir"
+
 test_60() {
        [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.3.0) ]] ||
        { skip "Need MDS version at least 2.3.0"; return; }