Whamcloud - gitweb
LU-3537 mdt: allow cross-MDT rename and link 82/12282/48
authorWang Di <di.wang@intel.com>
Wed, 8 Oct 2014 03:42:47 +0000 (20:42 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Thu, 11 Jun 2015 16:12:05 +0000 (16:12 +0000)
Remove checks for cross-MDT operation, so cross-MDT
rename and link will be allowed.

Remove obsolete locality parameters in MDT lock API after all of
cross-MDT operations are allowed.

Change-Id: I29874d929f98593d00f5cbd836c7ce681d51add7
Signed-off-by: Wang Di <di.wang@intel.com>
Reviewed-on: http://review.whamcloud.com/12282
Tested-by: Jenkins
Reviewed-by: James Simmons <uja.ornl@yahoo.com>
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Lai Siyao <lai.siyao@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/lmv/lmv_obd.c
lustre/mdd/mdd_dir.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_hsm.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_open.c
lustre/mdt/mdt_reint.c
lustre/tests/recovery-small.sh
lustre/tests/sanity.sh
lustre/tests/test-framework.sh

index 300710d..a2c9e1f 100644 (file)
@@ -2040,6 +2040,9 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
        struct obd_device       *obd = exp->exp_obd;
        struct lmv_obd          *lmv = &obd->u.lmv;
        struct lmv_tgt_desc     *src_tgt;
+       struct lmv_tgt_desc     *tgt_tgt;
+       struct obd_export       *target_exp;
+       struct mdt_body         *body;
        int                     rc;
        ENTRY;
 
@@ -2080,6 +2083,10 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
                        RETURN(rc);
 
                src_tgt = lmv_find_target(lmv, &op_data->op_fid3);
+               if (IS_ERR(src_tgt))
+                       RETURN(PTR_ERR(src_tgt));
+
+               target_exp = src_tgt->ltd_exp;
        } else {
                if (op_data->op_mea1 != NULL) {
                        struct lmv_stripe_md    *lsm = op_data->op_mea1;
@@ -2088,29 +2095,29 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
                                                             oldlen,
                                                             &op_data->op_fid1,
                                                             &op_data->op_mds);
-                       if (IS_ERR(src_tgt))
-                               RETURN(PTR_ERR(src_tgt));
                } else {
                        src_tgt = lmv_find_target(lmv, &op_data->op_fid1);
-                       if (IS_ERR(src_tgt))
-                               RETURN(PTR_ERR(src_tgt));
-
-                       op_data->op_mds = src_tgt->ltd_idx;
                }
+               if (IS_ERR(src_tgt))
+                       RETURN(PTR_ERR(src_tgt));
+
 
-               if (op_data->op_mea2) {
+               if (op_data->op_mea2 != NULL) {
                        struct lmv_stripe_md    *lsm = op_data->op_mea2;
-                       const struct lmv_oinfo  *oinfo;
 
-                       oinfo = lsm_name_to_stripe_info(lsm, new, newlen);
-                       if (IS_ERR(oinfo))
-                               RETURN(PTR_ERR(oinfo));
+                       tgt_tgt = lmv_locate_target_for_name(lmv, lsm, new,
+                                                            newlen,
+                                                            &op_data->op_fid2,
+                                                            &op_data->op_mds);
+               } else {
+                       tgt_tgt = lmv_find_target(lmv, &op_data->op_fid2);
 
-                       op_data->op_fid2 = oinfo->lmo_fid;
                }
+               if (IS_ERR(tgt_tgt))
+                       RETURN(PTR_ERR(tgt_tgt));
+
+               target_exp = tgt_tgt->ltd_exp;
        }
-       if (IS_ERR(src_tgt))
-               RETURN(PTR_ERR(src_tgt));
 
        /*
         * LOOKUP lock on src child (fid3) should also be cancelled for
@@ -2152,21 +2159,52 @@ static int lmv_rename(struct obd_export *exp, struct md_op_data *op_data,
                        RETURN(rc);
        }
 
+retry_rename:
        /*
         * Cancel all the locks on tgt child (fid4).
         */
-       if (fid_is_sane(&op_data->op_fid4))
+       if (fid_is_sane(&op_data->op_fid4)) {
+               struct lmv_tgt_desc *tgt;
+
                rc = lmv_early_cancel(exp, NULL, op_data, src_tgt->ltd_idx,
                                      LCK_EX, MDS_INODELOCK_FULL,
                                      MF_MDC_CANCEL_FID4);
+               if (rc != 0)
+                       RETURN(rc);
+
+               tgt = lmv_find_target(lmv, &op_data->op_fid4);
+               if (IS_ERR(tgt))
+                       RETURN(PTR_ERR(tgt));
 
-       CDEBUG(D_INODE, DFID":m%d to "DFID"\n", PFID(&op_data->op_fid1),
-              op_data->op_mds, PFID(&op_data->op_fid2));
+               /* Since the target child might be destroyed, and it might
+                * become orphan, and we can only check orphan on the local
+                * MDT right now, so we send rename request to the MDT where
+                * target child is located. If target child does not exist,
+                * then it will send the request to the target parent */
+               target_exp = tgt->ltd_exp;
+       }
 
-       rc = md_rename(src_tgt->ltd_exp, op_data, old, oldlen, new, newlen,
+       rc = md_rename(target_exp, op_data, old, oldlen, new, newlen,
                       request);
 
-       RETURN(rc);
+       if (rc != 0 && rc != -EREMOTE)
+               RETURN(rc);
+
+       body = req_capsule_server_get(&(*request)->rq_pill, &RMF_MDT_BODY);
+       if (body == NULL)
+               RETURN(-EPROTO);
+
+       /* Not cross-ref case, just get out of here. */
+       if (likely(!(body->mbo_valid & OBD_MD_MDS)))
+               RETURN(rc);
+
+       CDEBUG(D_INODE, "%s: try rename to another MDT for "DFID"\n",
+              exp->exp_obd->obd_name, PFID(&body->mbo_fid1));
+
+       op_data->op_fid4 = body->mbo_fid1;
+       ptlrpc_req_finished(*request);
+       *request = NULL;
+       goto retry_rename;
 }
 
 static int lmv_setattr(struct obd_export *exp, struct md_op_data *op_data,
index 25f3e77..82f88e0 100644 (file)
@@ -225,18 +225,12 @@ static int mdd_is_parent(const struct lu_env *env,
                        GOTO(out, rc = 0);
                 if (lu_fid_eq(pfid, lf))
                         GOTO(out, rc = 1);
-                if (parent)
-                        mdd_object_put(env, parent);
+               if (parent != NULL)
+                       mdd_object_put(env, parent);
 
                parent = mdd_object_find(env, mdd, pfid);
-               if (IS_ERR(parent)) {
+               if (IS_ERR(parent))
                        GOTO(out, rc = PTR_ERR(parent));
-               } else if (mdd_object_remote(parent)) {
-                       /*FIXME: Because of the restriction of rename in Phase I.
-                        * If the parent is remote, we just assumed lf is not the
-                        * parent of P1 for now */
-                       GOTO(out, rc = 0);
-               }
                p1 = parent;
         }
         EXIT;
@@ -2660,15 +2654,13 @@ static int mdd_declare_rename(const struct lu_env *env,
        if (rc != 0)
                return rc;
 
-        /* name from target dir (old name), we declare it unconditionally
-         * as mdd_rename() calls delete unconditionally as well. so just
-         * to balance declarations vs calls to change ... */
-        rc = mdo_declare_index_delete(env, mdd_tpobj, tname->ln_name, handle);
-        if (rc)
-                return rc;
-
         if (mdd_tobj && mdd_object_exists(mdd_tobj)) {
                 /* delete target child in target parent directory */
+               rc = mdo_declare_index_delete(env, mdd_tpobj, tname->ln_name,
+                                             handle);
+               if (rc)
+                       return rc;
+
                 rc = mdo_declare_ref_del(env, mdd_tobj, handle);
                 if (rc)
                         return rc;
@@ -2805,19 +2797,12 @@ static int mdd_rename(const struct lu_env *env,
                         GOTO(fixup_spobj, rc);
         }
 
-        /* Remove target name from target directory
-         * Here tobj can be remote one, so we do index_delete unconditionally
-         * and -ENOENT is allowed.
-         */
-       rc = __mdd_index_delete(env, mdd_tpobj, tname, is_dir, handle);
-        if (rc != 0) {
-                if (mdd_tobj) {
-                        /* tname might been renamed to something else */
-                        GOTO(fixup_spobj, rc);
-                }
-                if (rc != -ENOENT)
-                        GOTO(fixup_spobj, rc);
-        }
+       if (mdd_tobj != NULL && mdd_object_exists(mdd_tobj)) {
+               rc = __mdd_index_delete(env, mdd_tpobj, tname, is_dir, handle);
+               if (rc != 0)
+                       /* tname might been renamed to something else */
+                       GOTO(fixup_spobj, rc);
+       }
 
         /* Insert new fid with target name into target dir */
        rc = __mdd_index_insert(env, mdd_tpobj, lf, cattr->la_mode,
index 381f056..1b50be9 100644 (file)
@@ -1202,12 +1202,12 @@ static int mdt_swap_layouts(struct tgt_session_info *tsi)
        mdt_lock_reg_init(lh2, LCK_EX);
 
        rc = mdt_object_lock(info, o1, lh1, MDS_INODELOCK_LAYOUT |
-                            MDS_INODELOCK_XATTR, MDT_LOCAL_LOCK);
+                            MDS_INODELOCK_XATTR);
        if (rc < 0)
                GOTO(put, rc);
 
        rc = mdt_object_lock(info, o2, lh2, MDS_INODELOCK_LAYOUT |
-                            MDS_INODELOCK_XATTR, MDT_LOCAL_LOCK);
+                            MDS_INODELOCK_XATTR);
        if (rc < 0)
                GOTO(unlock1, rc);
 
@@ -1314,8 +1314,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                                        MDS_INODELOCK_LAYOUT);
                        child_bits |= MDS_INODELOCK_PERM | MDS_INODELOCK_UPDATE;
 
-                       rc = mdt_object_lock(info, child, lhc, child_bits,
-                                            MDT_LOCAL_LOCK);
+                       rc = mdt_object_lock(info, child, lhc, child_bits);
                        if (rc < 0)
                                RETURN(rc);
                }
@@ -1392,12 +1391,11 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                if (S_ISDIR(lu_object_attr(&parent->mot_obj))) {
                        lhp = &info->mti_lh[MDT_LH_PARENT];
                        mdt_lock_pdo_init(lhp, LCK_PR, lname);
-                        rc = mdt_object_lock(info, parent, lhp,
-                                             MDS_INODELOCK_UPDATE,
-                                             MDT_LOCAL_LOCK);
-                        if (unlikely(rc != 0))
-                                RETURN(rc);
-                }
+                       rc = mdt_object_lock(info, parent, lhp,
+                                            MDS_INODELOCK_UPDATE);
+                       if (unlikely(rc != 0))
+                               RETURN(rc);
+               }
 
                 /* step 2: lookup child's fid by name */
                 fid_zero(child_fid);
@@ -1486,12 +1484,12 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                        child_bits |= MDS_INODELOCK_LAYOUT;
                        /* try layout lock, it may fail to be granted due to
                         * contention at LOOKUP or UPDATE */
-                       if (!mdt_object_lock_try(info, child, lhc, child_bits,
-                                                MDT_CROSS_LOCK)) {
+                       if (!mdt_object_lock_try(info, child, lhc,
+                                                child_bits)) {
                                child_bits &= ~MDS_INODELOCK_LAYOUT;
                                LASSERT(child_bits != 0);
                                rc = mdt_object_lock(info, child, lhc,
-                                               child_bits, MDT_CROSS_LOCK);
+                                                    child_bits);
                        } else {
                                ma_need |= MA_LOV;
                        }
@@ -1500,8 +1498,7 @@ static int mdt_getattr_name_lock(struct mdt_thread_info *info,
                         * client will enqueue the lock to the remote MDT */
                        if (mdt_object_remote(child))
                                child_bits &= ~MDS_INODELOCK_UPDATE;
-                       rc = mdt_object_lock(info, child, lhc, child_bits,
-                                               MDT_CROSS_LOCK);
+                       rc = mdt_object_lock(info, child, lhc, child_bits);
                }
                 if (unlikely(rc != 0))
                         GOTO(out_child, rc);
@@ -2335,7 +2332,7 @@ int mdt_remote_object_lock(struct mdt_thread_info *mti,
 static int mdt_object_local_lock(struct mdt_thread_info *info,
                                 struct mdt_object *o,
                                 struct mdt_lock_handle *lh, __u64 ibits,
-                                bool nonblock, int locality)
+                                bool nonblock)
 {
         struct ldlm_namespace *ns = info->mti_mdt->mdt_namespace;
         ldlm_policy_data_t *policy = &info->mti_policy;
@@ -2426,21 +2423,13 @@ out_unlock:
 static int
 mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o,
                         struct mdt_lock_handle *lh, __u64 ibits,
-                        bool nonblock, int locality)
+                        bool nonblock)
 {
        int rc;
        ENTRY;
 
        if (!mdt_object_remote(o))
-               return mdt_object_local_lock(info, o, lh, ibits, nonblock,
-                                            locality);
-
-       if (locality == MDT_LOCAL_LOCK) {
-               CERROR("%s: try to get local lock for remote object"
-                      DFID".\n", mdt_obd_name(info->mti_mdt),
-                      PFID(mdt_object_fid(o)));
-               RETURN(-EPROTO);
-       }
+               return mdt_object_local_lock(info, o, lh, ibits, nonblock);
 
        /* XXX do not support PERM/LAYOUT/XATTR lock for remote object yet */
        ibits &= ~(MDS_INODELOCK_PERM | MDS_INODELOCK_LAYOUT |
@@ -2471,7 +2460,7 @@ mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o,
        if (ibits & MDS_INODELOCK_LOOKUP) {
                rc = mdt_object_local_lock(info, o, lh,
                                           MDS_INODELOCK_LOOKUP,
-                                          nonblock, locality);
+                                          nonblock);
                if (rc != ELDLM_OK)
                        RETURN(rc);
        }
@@ -2480,18 +2469,18 @@ mdt_object_lock_internal(struct mdt_thread_info *info, struct mdt_object *o,
 }
 
 int mdt_object_lock(struct mdt_thread_info *info, struct mdt_object *o,
-                   struct mdt_lock_handle *lh, __u64 ibits, int locality)
+                   struct mdt_lock_handle *lh, __u64 ibits)
 {
-       return mdt_object_lock_internal(info, o, lh, ibits, false, locality);
+       return mdt_object_lock_internal(info, o, lh, ibits, false);
 }
 
 int mdt_object_lock_try(struct mdt_thread_info *info, struct mdt_object *o,
-                       struct mdt_lock_handle *lh, __u64 ibits, int locality)
+                       struct mdt_lock_handle *lh, __u64 ibits)
 {
        struct mdt_lock_handle tmp = *lh;
        int rc;
 
-       rc = mdt_object_lock_internal(info, o, &tmp, ibits, true, locality);
+       rc = mdt_object_lock_internal(info, o, &tmp, ibits, true);
        if (rc == 0)
                *lh = tmp;
 
@@ -2595,8 +2584,7 @@ struct mdt_object *mdt_object_find_lock(struct mdt_thread_info *info,
         if (!IS_ERR(o)) {
                 int rc;
 
-                rc = mdt_object_lock(info, o, lh, ibits,
-                                     MDT_LOCAL_LOCK);
+               rc = mdt_object_lock(info, o, lh, ibits);
                 if (rc != 0) {
                         mdt_object_put(info->mti_env, o);
                         o = ERR_PTR(rc);
@@ -3067,8 +3055,7 @@ static int mdt_intent_getxattr(enum mdt_it_code opcode,
        if (!lustre_handle_is_used(&lhc->mlh_reg_lh)) {
                mdt_lock_reg_init(lhc, (*lockp)->l_req_mode);
                rc = mdt_object_lock(info, info->mti_object, lhc,
-                                       MDS_INODELOCK_XATTR,
-                                       MDT_LOCAL_LOCK);
+                                    MDS_INODELOCK_XATTR);
                if (rc)
                        return rc;
        }
index edf77ea..20502b9 100644 (file)
@@ -214,8 +214,7 @@ int mdt_hsm_state_get(struct tgt_session_info *tsi)
 
        lh = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_reg_init(lh, LCK_PR);
-       rc = mdt_object_lock(info, obj, lh, MDS_INODELOCK_LOOKUP,
-                            MDT_LOCAL_LOCK);
+       rc = mdt_object_lock(info, obj, lh, MDS_INODELOCK_LOOKUP);
        if (rc < 0)
                GOTO(out_ucred, rc);
 
@@ -276,7 +275,7 @@ int mdt_hsm_state_set(struct tgt_session_info *tsi)
        lh = &info->mti_lh[MDT_LH_CHILD];
        mdt_lock_reg_init(lh, LCK_PW);
        rc = mdt_object_lock(info, obj, lh, MDS_INODELOCK_LOOKUP |
-                            MDS_INODELOCK_XATTR, MDT_LOCAL_LOCK);
+                            MDS_INODELOCK_XATTR);
        if (rc < 0)
                GOTO(out_ucred, rc);
 
index 042bc50..639a9ef 100644 (file)
@@ -592,10 +592,10 @@ int mdt_check_resent_lock(struct mdt_thread_info *info, struct mdt_object *mo,
                          struct mdt_lock_handle *lhc);
 
 int mdt_object_lock(struct mdt_thread_info *info, struct mdt_object *mo,
-                   struct mdt_lock_handle *lh, __u64 ibits, int locality);
+                   struct mdt_lock_handle *lh, __u64 ibits);
 
 int mdt_object_lock_try(struct mdt_thread_info *info, struct mdt_object *mo,
-                       struct mdt_lock_handle *lh, __u64 ibits, int locality);
+                       struct mdt_lock_handle *lh, __u64 ibits);
 
 void mdt_object_unlock(struct mdt_thread_info *info, struct mdt_object *mo,
                       struct mdt_lock_handle *lh, int decref);
index 768c944..36d96d6 100644 (file)
@@ -857,15 +857,13 @@ static int mdt_object_open_lock(struct mdt_thread_info *info,
                 * However this is a double-edged sword because changing
                 * permission will revoke huge # of LOOKUP locks. */
                *ibits |= MDS_INODELOCK_LAYOUT | MDS_INODELOCK_LOOKUP;
-               if (!mdt_object_lock_try(info, obj, lhc, *ibits,
-                                        MDT_CROSS_LOCK)) {
+               if (!mdt_object_lock_try(info, obj, lhc, *ibits)) {
                        *ibits &= ~(MDS_INODELOCK_LAYOUT|MDS_INODELOCK_LOOKUP);
                        if (*ibits != 0)
-                               rc = mdt_object_lock(info, obj, lhc, *ibits,
-                                               MDT_CROSS_LOCK);
+                               rc = mdt_object_lock(info, obj, lhc, *ibits);
                }
        } else if (*ibits != 0) {
-               rc = mdt_object_lock(info, obj, lhc, *ibits, MDT_CROSS_LOCK);
+               rc = mdt_object_lock(info, obj, lhc, *ibits);
        }
 
        CDEBUG(D_INODE, "Requested bits lock:"DFID ", ibits = "LPX64
@@ -892,8 +890,7 @@ static int mdt_object_open_lock(struct mdt_thread_info *info,
                LASSERT(!try_layout);
                mdt_lock_handle_init(ll);
                mdt_lock_reg_init(ll, LCK_EX);
-               rc = mdt_object_lock(info, obj, ll, MDS_INODELOCK_LAYOUT,
-                                       MDT_LOCAL_LOCK);
+               rc = mdt_object_lock(info, obj, ll, MDS_INODELOCK_LAYOUT);
 
                OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_LL_BLOCK, 2);
        }
@@ -1269,10 +1266,15 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
                          (create_flags & MDS_OPEN_CREAT) ? LCK_PW : LCK_PR,
                          &rr->rr_name);
 
-        parent = mdt_object_find_lock(info, rr->rr_fid1, lh,
-                                      MDS_INODELOCK_UPDATE);
-        if (IS_ERR(parent))
-                GOTO(out, result = PTR_ERR(parent));
+       parent = mdt_object_find(info->mti_env, mdt, rr->rr_fid1);
+       if (IS_ERR(parent))
+               GOTO(out, result = PTR_ERR(parent));
+
+       result = mdt_object_lock(info, parent, lh, MDS_INODELOCK_UPDATE);
+       if (result != 0) {
+               mdt_object_put(info->mti_env, parent);
+               GOTO(out, result);
+       }
 
         /* get and check version of parent */
         result = mdt_version_get_check(info, parent, 0);
@@ -1390,13 +1392,12 @@ int mdt_reint_open(struct mdt_thread_info *info, struct mdt_lock_handle *lhc)
                        if (rc < 0) {
                                GOTO(out_child, result = rc);
                        } else if (rc > 0) {
-                                mdt_lock_handle_init(lhc);
-                                mdt_lock_reg_init(lhc, LCK_PR);
+                               mdt_lock_handle_init(lhc);
+                               mdt_lock_reg_init(lhc, LCK_PR);
 
-                                rc = mdt_object_lock(info, child, lhc,
-                                                     MDS_INODELOCK_LOOKUP,
-                                                     MDT_CROSS_LOCK);
-                        }
+                               rc = mdt_object_lock(info, child, lhc,
+                                                    MDS_INODELOCK_LOOKUP);
+                       }
                        repbody->mbo_fid1 = *mdt_object_fid(child);
                        repbody->mbo_valid |= (OBD_MD_FLID | OBD_MD_MDS);
                         if (rc != 0)
@@ -1696,7 +1697,7 @@ static int mdt_hsm_release(struct mdt_thread_info *info, struct mdt_object *o,
 
        mdt_lock_reg_init(lh, LCK_EX);
        rc = mdt_object_lock(info, o, lh, MDS_INODELOCK_LAYOUT |
-                            MDS_INODELOCK_XATTR, MDT_LOCAL_LOCK);
+                            MDS_INODELOCK_XATTR);
        if (rc != 0)
                GOTO(out_close, rc);
 
@@ -1830,13 +1831,13 @@ static int mdt_close_swap_layouts(struct mdt_thread_info *info,
 
        mdt_lock_reg_init(lh1, LCK_EX);
        rc = mdt_object_lock(info, o1, lh1, MDS_INODELOCK_LAYOUT |
-                            MDS_INODELOCK_XATTR, MDT_LOCAL_LOCK);
+                            MDS_INODELOCK_XATTR);
        if (rc < 0)
                GOTO(out_unlock_sem, rc);
 
        mdt_lock_reg_init(lh2, LCK_EX);
        rc = mdt_object_lock(info, o2, lh2, MDS_INODELOCK_LAYOUT |
-                            MDS_INODELOCK_XATTR, MDT_LOCAL_LOCK);
+                            MDS_INODELOCK_XATTR);
        if (rc < 0)
                GOTO(out_unlock1, rc);
 
index ff2ad6b..f206e6b 100644 (file)
@@ -268,21 +268,6 @@ static int mdt_remote_permission(struct mdt_thread_info *info,
                        return -EPERM;
        }
 
-       if (mdt->mdt_enable_remote_dir == 0) {
-               struct seq_server_site  *ss = mdt_seq_site(mdt);
-               struct lu_seq_range     range = { 0 };
-               int                     rc;
-
-               fld_range_set_type(&range, LU_SEQ_RANGE_MDT);
-               rc = fld_server_lookup(info->mti_env, ss->ss_server_fld,
-                                      fid_seq(mdt_object_fid(parent)), &range);
-               if (rc != 0)
-                       return rc;
-
-               if (range.lsr_index != 0)
-                       return -EPERM;
-       }
-
        if (!mdt_is_dne_client(exp))
                return -ENOTSUPP;
 
@@ -336,8 +321,7 @@ static int mdt_md_create(struct mdt_thread_info *info)
 
        lh = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lh, LCK_PW, &rr->rr_name);
-       rc = mdt_object_lock(info, parent, lh, MDS_INODELOCK_UPDATE,
-                            MDT_CROSS_LOCK);
+       rc = mdt_object_lock(info, parent, lh, MDS_INODELOCK_UPDATE);
        if (rc)
                GOTO(put_parent, rc);
 
@@ -531,9 +515,9 @@ static int mdt_attr_set(struct mdt_thread_info *info, struct mdt_object *mo,
        if (ma->ma_attr.la_valid & (LA_MODE|LA_UID|LA_GID))
                lockpart |= MDS_INODELOCK_LOOKUP | MDS_INODELOCK_PERM;
 
-        rc = mdt_object_lock(info, mo, lh, lockpart, MDT_LOCAL_LOCK);
-        if (rc != 0)
-                RETURN(rc);
+       rc = mdt_object_lock(info, mo, lh, lockpart);
+       if (rc != 0)
+               RETURN(rc);
 
        s0_lh = &info->mti_lh[MDT_LH_LOCAL];
        mdt_lock_reg_init(s0_lh, LCK_PW);
@@ -607,8 +591,7 @@ int mdt_add_dirty_flag(struct mdt_thread_info *info, struct mdt_object *mo,
                ma->ma_hsm.mh_flags |= HS_DIRTY;
 
                mdt_lock_reg_init(lh, LCK_PW);
-               rc = mdt_object_lock(info, mo, lh, MDS_INODELOCK_XATTR,
-                                    MDT_LOCAL_LOCK);
+               rc = mdt_object_lock(info, mo, lh, MDS_INODELOCK_XATTR);
                if (rc != 0)
                        RETURN(rc);
 
@@ -687,8 +670,7 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
                mdt_lock_reg_init(lh, LCK_PW);
 
                rc = mdt_object_lock(info, mo, lh,
-                                    MDS_INODELOCK_XATTR,
-                                    MDT_LOCAL_LOCK);
+                                    MDS_INODELOCK_XATTR);
                if (rc != 0)
                        GOTO(out_put, rc);
 
@@ -786,10 +768,10 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
         struct mdt_object       *mc;
         struct mdt_lock_handle  *parent_lh;
         struct mdt_lock_handle  *child_lh;
-       __u64                   lock_ibits;
        struct ldlm_enqueue_info *einfo = &info->mti_einfo;
        struct mdt_lock_handle  *s0_lh = NULL;
        struct mdt_object       *s0_obj = NULL;
+       __u64                   lock_ibits;
        int                     rc;
        int                     no_name = 0;
        ENTRY;
@@ -817,8 +799,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
 
        parent_lh = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(parent_lh, LCK_PW, &rr->rr_name);
-       rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE,
-                            MDT_CROSS_LOCK);
+       rc = mdt_object_lock(info, mp, parent_lh, MDS_INODELOCK_UPDATE);
        if (rc != 0)
                GOTO(put_parent, rc);
 
@@ -906,8 +887,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
                 * it will release the LOOKUP lock right away. Then What
                 * would happen if another client try to grab the LOOKUP
                 * lock at the same time with unlink XXX */
-               mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP,
-                               MDT_CROSS_LOCK);
+               mdt_object_lock(info, mc, child_lh, MDS_INODELOCK_LOOKUP);
                repbody = req_capsule_server_get(info->mti_pill, &RMF_MDT_BODY);
                LASSERT(repbody != NULL);
                repbody->mbo_fid1 = *mdt_object_fid(mc);
@@ -937,7 +917,7 @@ static int mdt_reint_unlink(struct mdt_thread_info *info,
                lock_ibits &= ~MDS_INODELOCK_LOOKUP;
        }
 
-       rc = mdt_object_lock(info, mc, child_lh, lock_ibits, MDT_CROSS_LOCK);
+       rc = mdt_object_lock(info, mc, child_lh, lock_ibits);
        if (rc != 0)
                GOTO(put_child, rc);
        /*
@@ -1069,16 +1049,8 @@ static int mdt_reint_link(struct mdt_thread_info *info,
                GOTO(out_unlock_parent, rc = -ENOENT);
        }
 
-       if (mdt_object_remote(ms)) {
-               mdt_object_put(info->mti_env, ms);
-               CERROR("%s: source inode "DFID" on remote MDT from "DFID"\n",
-                      mdt_obd_name(info->mti_mdt), PFID(rr->rr_fid1),
-                      PFID(rr->rr_fid2));
-               GOTO(out_unlock_parent, rc = -EXDEV);
-       }
-
        rc = mdt_object_lock(info, ms, lhs, MDS_INODELOCK_UPDATE |
-                            MDS_INODELOCK_XATTR, MDT_LOCAL_LOCK);
+                            MDS_INODELOCK_XATTR);
         if (rc != 0) {
                 mdt_object_put(info->mti_env, ms);
                 GOTO(out_unlock_parent, rc);
@@ -1154,29 +1126,16 @@ static int mdt_pdir_hash_lock(struct mdt_thread_info *info,
        return rc;
 }
 
-enum mdt_rename_lock {
-       MRL_RENAME,
-       MRL_MIGRATE,
-};
-
 /**
- * Get BFL lock for rename or migrate process, right now, it does not support
- * cross-MDT rename, so we only need global rename lock during migration.
+ * Get BFL lock for rename or migrate process.
  **/
 static int mdt_rename_lock(struct mdt_thread_info *info,
-                          struct lustre_handle *lh,
-                          enum mdt_rename_lock rename_lock)
+                          struct lustre_handle *lh)
 {
-       struct ldlm_namespace   *ns = info->mti_mdt->mdt_namespace;
-       ldlm_policy_data_t      *policy = &info->mti_policy;
-       struct ldlm_res_id      *res_id = &info->mti_res_id;
-       __u64                   flags = 0;
-       int                     rc;
+       int     rc;
        ENTRY;
 
-       /* XXX only do global rename lock for migration */
-       if (mdt_seq_site(info->mti_mdt)->ss_node_id != 0 &&
-           rename_lock == MRL_MIGRATE) {
+       if (mdt_seq_site(info->mti_mdt)->ss_node_id != 0) {
                struct lu_fid *fid = &info->mti_tmp_fid1;
                struct mdt_object *obj;
 
@@ -1188,25 +1147,29 @@ static int mdt_rename_lock(struct mdt_thread_info *info,
                if (IS_ERR(obj))
                        RETURN(PTR_ERR(obj));
 
-               LASSERT(mdt_object_remote(obj));
                rc = mdt_remote_object_lock(info, obj,
                                            &LUSTRE_BFL_FID, lh,
                                            LCK_EX,
                                            MDS_INODELOCK_UPDATE);
                mdt_object_put(info->mti_env, obj);
        } else {
+               struct ldlm_namespace   *ns = info->mti_mdt->mdt_namespace;
+               ldlm_policy_data_t      *policy = &info->mti_policy;
+               struct ldlm_res_id      *res_id = &info->mti_res_id;
+               __u64                   flags = 0;
+
                fid_build_reg_res_name(&LUSTRE_BFL_FID, res_id);
                memset(policy, 0, sizeof *policy);
                policy->l_inodebits.bits = MDS_INODELOCK_UPDATE;
                flags = LDLM_FL_LOCAL_ONLY | LDLM_FL_ATOMIC_CB;
                rc = ldlm_cli_enqueue_local(ns, res_id, LDLM_IBITS, policy,
-                                           LCK_EX, &flags, ldlm_blocking_ast,
-                                           ldlm_completion_ast, NULL, NULL, 0,
-                                           LVB_T_NONE,
-                                           &info->mti_exp->exp_handle.h_cookie,
-                                           lh);
+                                          LCK_EX, &flags, ldlm_blocking_ast,
+                                          ldlm_completion_ast, NULL, NULL, 0,
+                                          LVB_T_NONE,
+                                          &info->mti_exp->exp_handle.h_cookie,
+                                          lh);
+               RETURN(rc);
        }
-
        RETURN(rc);
 }
 
@@ -1346,8 +1309,7 @@ static int mdt_lock_objects_in_linkea(struct mdt_thread_info *info,
 
                mdt_lock_pdo_init(&mll->mll_lh, LCK_PW, &name);
                rc = mdt_object_lock(info, mdt_pobj, &mll->mll_lh,
-                                    MDS_INODELOCK_UPDATE,
-                                    MDT_CROSS_LOCK);
+                                    MDS_INODELOCK_UPDATE);
                if (rc != 0) {
                        CERROR("%s: cannot lock "DFID": rc =%d\n",
                               mdt_obd_name(mdt), PFID(&fid), rc);
@@ -1399,8 +1361,7 @@ static int mdt_reint_migrate_internal(struct mdt_thread_info *info,
        lh_dirp = &info->mti_lh[MDT_LH_PARENT];
        mdt_lock_pdo_init(lh_dirp, LCK_PW, &rr->rr_name);
        rc = mdt_object_lock(info, msrcdir, lh_dirp,
-                            MDS_INODELOCK_UPDATE,
-                            MDT_CROSS_LOCK);
+                            MDS_INODELOCK_UPDATE);
        if (rc)
                GOTO(out_put_parent, rc);
 
@@ -1468,7 +1429,7 @@ static int mdt_reint_migrate_internal(struct mdt_thread_info *info,
                lock_ibits &= ~MDS_INODELOCK_LOOKUP;
        }
 
-       rc = mdt_object_lock(info, mold, lh_childp, lock_ibits, MDT_CROSS_LOCK);
+       rc = mdt_object_lock(info, mold, lh_childp, lock_ibits);
        if (rc != 0)
                GOTO(out_unlock_child, rc);
 
@@ -1593,8 +1554,7 @@ static int mdt_object_lock_save(struct mdt_thread_info *info,
        int rc;
 
        /* we lock the target dir if it is local */
-       rc = mdt_object_lock(info, dir, lh, MDS_INODELOCK_UPDATE,
-                            MDT_LOCAL_LOCK);
+       rc = mdt_object_lock(info, dir, lh, MDS_INODELOCK_UPDATE);
        if (rc != 0)
                return rc;
 
@@ -1641,13 +1601,6 @@ static int mdt_rename_parents_lock(struct mdt_thread_info *info,
                tgt = mdt_object_find_check(info, fid_tgt, 1);
                if (IS_ERR(tgt))
                        GOTO(err_src_put, rc = PTR_ERR(tgt));
-
-               if (unlikely(mdt_object_remote(tgt))) {
-                       CDEBUG(D_INFO, "Source dir "DFID" target dir "DFID
-                              "on different MDTs\n", PFID(fid_src),
-                              PFID(fid_tgt));
-                       GOTO(err_tgt_put, rc = -EXDEV);
-               }
        }
 
        OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_RENAME4, 5);
@@ -1729,6 +1682,7 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
        struct mdt_lock_handle  *lh_newp = NULL;
        struct lu_fid           *old_fid = &info->mti_tmp_fid1;
        struct lu_fid           *new_fid = &info->mti_tmp_fid2;
+       __u64                   lock_ibits;
        int                      rc;
        ENTRY;
 
@@ -1792,22 +1746,21 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
                if (!fid_is_md_operative(new_fid))
                        GOTO(out_put_old, rc = -EPERM);
 
-               if (mdt_object_remote(mold)) {
-                       CDEBUG(D_INFO, "Src child "DFID" is on another MDT\n",
-                              PFID(old_fid));
-                       GOTO(out_put_old, rc = -EXDEV);
-               }
-
                mnew = mdt_object_find(info->mti_env, info->mti_mdt, new_fid);
                if (IS_ERR(mnew))
                        GOTO(out_put_old, rc = PTR_ERR(mnew));
 
                if (mdt_object_remote(mnew)) {
-                       CDEBUG(D_INFO, "src child "DFID" is on another MDT\n",
-                              PFID(new_fid));
-                       GOTO(out_put_new, rc = -EXDEV);
+                       struct mdt_body  *repbody;
+
+                       /* Always send rename req to the target child MDT */
+                       repbody = req_capsule_server_get(info->mti_pill,
+                                                        &RMF_MDT_BODY);
+                       LASSERT(repbody != NULL);
+                       repbody->mbo_fid1 = *new_fid;
+                       repbody->mbo_valid |= (OBD_MD_FLID | OBD_MD_MDS);
+                       GOTO(out_put_old, rc = -EREMOTE);
                }
-
                /* Before locking the target dir, check we do not replace
                 * a dir with a non-dir, otherwise it may deadlock with
                 * link op which tries to create a link in this dir
@@ -1818,10 +1771,24 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
 
                lh_oldp = &info->mti_lh[MDT_LH_OLD];
                mdt_lock_reg_init(lh_oldp, LCK_EX);
-               rc = mdt_object_lock(info, mold, lh_oldp, MDS_INODELOCK_LOOKUP |
-                                    MDS_INODELOCK_XATTR, MDT_CROSS_LOCK);
+
+               lock_ibits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_XATTR;
+               if (mdt_object_remote(msrcdir)) {
+                       /* Enqueue lookup lock from the parent MDT */
+                       rc = mdt_remote_object_lock(info, msrcdir,
+                                                   mdt_object_fid(mold),
+                                                   &lh_oldp->mlh_rreg_lh,
+                                                   lh_oldp->mlh_rreg_mode,
+                                                   MDS_INODELOCK_LOOKUP);
+                       if (rc != ELDLM_OK)
+                               GOTO(out_put_new, rc);
+
+                       lock_ibits &= ~MDS_INODELOCK_LOOKUP;
+               }
+
+               rc = mdt_object_lock(info, mold, lh_oldp, lock_ibits);
                if (rc != 0)
-                       GOTO(out_put_new, rc);
+                       GOTO(out_unlock_old, rc);
 
                /* Check if @msrcdir is subdir of @mnew, before locking child
                 * to avoid reverse locking. */
@@ -1838,8 +1805,7 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
                mdt_lock_reg_init(lh_newp, LCK_EX);
                rc = mdt_object_lock(info, mnew, lh_newp,
                                     MDS_INODELOCK_LOOKUP |
-                                    MDS_INODELOCK_UPDATE,
-                                    MDT_LOCAL_LOCK);
+                                    MDS_INODELOCK_UPDATE);
                if (rc != 0)
                        GOTO(out_unlock_old, rc);
 
@@ -1848,18 +1814,24 @@ static int mdt_reint_rename_internal(struct mdt_thread_info *info,
        } else if (rc != -EREMOTE && rc != -ENOENT) {
                GOTO(out_put_old, rc);
        } else {
-               /* If mnew does not exist and mold are remote directory,
-                * it only allows rename if they are under same directory */
-               if (mtgtdir != msrcdir && mdt_object_remote(mold)) {
-                       CDEBUG(D_INFO, "Src child "DFID" is on another MDT\n",
-                              PFID(old_fid));
-                       GOTO(out_put_old, rc = -EXDEV);
-               }
-
                lh_oldp = &info->mti_lh[MDT_LH_OLD];
                mdt_lock_reg_init(lh_oldp, LCK_EX);
-               rc = mdt_object_lock(info, mold, lh_oldp, MDS_INODELOCK_LOOKUP |
-                                    MDS_INODELOCK_XATTR, MDT_CROSS_LOCK);
+
+               lock_ibits = MDS_INODELOCK_LOOKUP | MDS_INODELOCK_XATTR;
+               if (mdt_object_remote(msrcdir)) {
+                       /* Enqueue lookup lock from the parent MDT */
+                       rc = mdt_remote_object_lock(info, msrcdir,
+                                                   mdt_object_fid(mold),
+                                                   &lh_oldp->mlh_rreg_lh,
+                                                   lh_oldp->mlh_rreg_mode,
+                                                   MDS_INODELOCK_LOOKUP);
+                       if (rc != ELDLM_OK)
+                               GOTO(out_put_new, rc);
+
+                       lock_ibits &= ~MDS_INODELOCK_LOOKUP;
+               }
+
+               rc = mdt_object_lock(info, mold, lh_oldp, lock_ibits);
                if (rc != 0)
                        GOTO(out_put_old, rc);
 
@@ -1910,8 +1882,7 @@ out_unlock_parents:
 }
 
 static int mdt_reint_rename_or_migrate(struct mdt_thread_info *info,
-                                      struct mdt_lock_handle *lhc,
-                                      enum mdt_rename_lock rename_lock)
+                                      struct mdt_lock_handle *lhc, bool rename)
 {
        struct mdt_reint_record *rr = &info->mti_rr;
        struct ptlrpc_request   *req = mdt_info_req(info);
@@ -1926,14 +1897,20 @@ static int mdt_reint_rename_or_migrate(struct mdt_thread_info *info,
            !fid_is_md_operative(rr->rr_fid2))
                RETURN(-EPERM);
 
-       rc = mdt_rename_lock(info, &rename_lh, rename_lock);
-       if (rc != 0) {
-               CERROR("%s: can't lock FS for rename: rc  = %d\n",
-                      mdt_obd_name(info->mti_mdt), rc);
-               RETURN(rc);
+       /* Note: do not enqueue rename lock for replay request, because
+        * if other MDT holds rename lock, but being blocked to wait for
+        * this MDT to finish its recovery, and the failover MDT can not
+        * get rename lock, which will cause deadlock. */
+       if (!req_is_replay(req)) {
+               rc = mdt_rename_lock(info, &rename_lh);
+               if (rc != 0) {
+                       CERROR("%s: can't lock FS for rename: rc  = %d\n",
+                              mdt_obd_name(info->mti_mdt), rc);
+                       RETURN(rc);
+               }
        }
 
-       if (rename_lock == MRL_RENAME)
+       if (rename)
                rc = mdt_reint_rename_internal(info, lhc);
        else
                rc = mdt_reint_migrate_internal(info, lhc);
@@ -1947,13 +1924,13 @@ static int mdt_reint_rename_or_migrate(struct mdt_thread_info *info,
 static int mdt_reint_rename(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
 {
-       return mdt_reint_rename_or_migrate(info, lhc, MRL_RENAME);
+       return mdt_reint_rename_or_migrate(info, lhc, true);
 }
 
 static int mdt_reint_migrate(struct mdt_thread_info *info,
                            struct mdt_lock_handle *lhc)
 {
-       return mdt_reint_rename_or_migrate(info, lhc, MRL_MIGRATE);
+       return mdt_reint_rename_or_migrate(info, lhc, false);
 }
 
 struct mdt_reinter {
index c486dd7..b58e276 100755 (executable)
@@ -1912,6 +1912,77 @@ test_110g () {
 }
 run_test 110g "drop reply during migration"
 
+test_110h () {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+       local src_dir=$DIR/$tdir/source_dir
+       local tgt_dir=$DIR/$tdir/target_dir
+       local MDTIDX=1
+
+       mkdir -p $src_dir
+       $LFS mkdir -i $MDTIDX $tgt_dir
+
+       dd if=/etc/hosts of=$src_dir/src_file
+       touch $tgt_dir/tgt_file
+       drop_update_reply $MDTIDX \
+               "mrename $src_dir/src_file $tgt_dir/tgt_file" ||
+               error "mrename failed"
+
+       $CHECKSTAT -t file $src_dir/src_file &&
+                               error "src_file present after rename"
+
+       diff /etc/hosts $tgt_dir/tgt_file ||
+                       error "file changed after rename"
+
+}
+run_test 110h "drop update reply during cross-MDT file rename"
+
+test_110i () {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+       local src_dir=$DIR/$tdir/source_dir
+       local tgt_dir=$DIR/$tdir/target_dir
+       local MDTIDX=1
+
+       mkdir -p $src_dir
+       $LFS mkdir -i $MDTIDX $tgt_dir
+
+       mkdir $src_dir/src_dir
+       touch $src_dir/src_dir/a
+       mkdir $tgt_dir/tgt_dir
+       drop_update_reply $MDTIDX \
+               "mrename $src_dir/src_dir $tgt_dir/tgt_dir" ||
+               error "mrename failed"
+
+       $CHECKSTAT -t dir $src_dir/src_dir &&
+                       error "src_dir present after rename"
+
+       $CHECKSTAT -t dir $tgt_dir/tgt_dir ||
+                               error "tgt_dir not present after rename"
+
+       $CHECKSTAT -t file $tgt_dir/tgt_dir/a ||
+                               error "a not present after rename"
+}
+run_test 110i "drop update reply during cross-MDT dir rename"
+
+test_110j () {
+       [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return 0
+       local remote_dir=$DIR/$tdir/remote_dir
+       local local_dir=$DIR/$tdir/local_dir
+       local MDTIDX=1
+
+       mkdir -p $DIR/$tdir
+       mkdir $DIR/$tdir/local_dir
+       $LFS mkdir -i $MDTIDX $remote_dir
+
+       touch $local_dir/local_file
+       drop_update_reply $MDTIDX \
+               "ln $local_dir/local_file $remote_dir/remote_file" ||
+               error "ln failed"
+
+       $CHECKSTAT -t file $remote_dir/remote_file ||
+                               error "remote not present after ln"
+}
+run_test 110j "drop update reply during cross-MDT ln"
+
 # LU-2844 mdt prepare fail should not cause umount oops
 test_111 ()
 {
index 3217c8f..4d53d89 100644 (file)
@@ -276,7 +276,7 @@ run_test 7a "mkdir .../d7; mcreate .../d7/f; chmod .../d7/f ===="
 
 test_7b() {
        if [ ! -d $DIR/$tdir ]; then
-               mkdir $DIR/$tdir
+               test_mkdir $DIR/$tdir
        fi
        $MCREATE $DIR/$tdir/$tfile
        echo -n foo > $DIR/$tdir/$tfile
@@ -354,6 +354,7 @@ test_15() {
        mv $DIR/$tdir/$tfile $DIR/$tdir/${tfile}_2
        $CHECKSTAT -t file $DIR/$tdir/${tfile}_2 ||
                error "$tdir/${tfile_2} not a file after rename"
+       rm $DIR/$tdir/${tfile}_2 || error "unlink failed after rename"
 }
 run_test 15 "touch .../d15/f; mv .../d15/f .../d15/f2 =========="
 
@@ -519,7 +520,7 @@ run_test 17k "symlinks: rsync with xattrs enabled ========================="
 test_17l() { # LU-279
        [[ -z "$(which getfattr 2>/dev/null)" ]] &&
                skip "no getfattr command" && return 0
-       mkdir -p $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
        touch $DIR/$tdir/$tfile
        ln -s $DIR/$tdir/$tfile $DIR/$tdir/$tfile.lnk
        for path in "$DIR/$tdir" "$DIR/$tdir/$tfile" "$DIR/$tdir/$tfile.lnk"; do
@@ -554,7 +555,7 @@ test_17m() {
 
        [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
 
-       mkdir -p $WDIR
+       test_mkdir -p $WDIR
        long_sym=$short_sym
        # create a long symlink file
        for ((i = 0; i < 4; ++i)); do
@@ -637,7 +638,7 @@ test_17n() {
 
        [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
 
-       mkdir $DIR/$tdir
+       test_mkdir $DIR/$tdir
        for ((i=0; i<10; i++)); do
                $LFS mkdir -i1 -c2 $DIR/$tdir/remote_dir_${i} ||
                        error "create remote dir error $i"
@@ -686,7 +687,7 @@ test_17o() {
        local mdt_index
        local rc=0
 
-       mkdir -p $WDIR
+       test_mkdir -p $WDIR
        mdt_index=$($LFS getstripe -M $WDIR)
        mdt_index=$((mdt_index+1))
 
@@ -1040,7 +1041,7 @@ test_24v() {
        # Performance issue on ZFS see LU-4072 (c.f. LU-2887)
        [ $(facet_fstype $SINGLEMDS) = "zfs" ] && NRFILES=10000
 
-       mkdir -p $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
        createmany -m $DIR/$tdir/$tfile $NRFILES
 
        cancel_lru_locks mdc
@@ -1083,30 +1084,28 @@ test_24x() {
        local MDTIDX=1
        local remote_dir=$DIR/$tdir/remote_dir
 
-       mkdir -p $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
        $LFS mkdir -i $MDTIDX $remote_dir ||
                error "create remote directory failed"
 
-       mkdir -p $DIR/$tdir/src_dir
+       test_mkdir -p $DIR/$tdir/src_dir
        touch $DIR/$tdir/src_file
-       mkdir -p $remote_dir/tgt_dir
+       test_mkdir -p $remote_dir/tgt_dir
        touch $remote_dir/tgt_file
 
-       mrename $remote_dir $DIR/ &&
-               error "rename dir cross MDT works!"
-
-       mrename $DIR/$tdir/src_dir $remote_dir/tgt_dir &&
-               error "rename dir cross MDT works!"
+       mrename $DIR/$tdir/src_dir $remote_dir/tgt_dir ||
+               error "rename dir cross MDT failed!"
 
-       mrename $DIR/$tdir/src_file $remote_dir/tgt_file &&
-               error "rename file cross MDT works!"
+       mrename $DIR/$tdir/src_file $remote_dir/tgt_file ||
+               error "rename file cross MDT failed!"
 
-       ln $DIR/$tdir/src_file $remote_dir/tgt_file1 &&
-               error "ln file cross MDT should not work!"
+       touch $DIR/$tdir/ln_file
+       ln $DIR/$tdir/ln_file $remote_dir/ln_name ||
+               error "ln file cross MDT failed"
 
        rm -rf $DIR/$tdir || error "Can not delete directories"
 }
-run_test 24x "cross rename/link should be failed"
+run_test 24x "cross MDT rename/link"
 
 test_24y() {
        [[ $MDSCOUNT -lt 2 ]] && skip "needs >= 2 MDTs" && return
@@ -1114,13 +1113,13 @@ test_24y() {
        local MDTIDX=1
        local remote_dir=$DIR/$tdir/remote_dir
 
-       mkdir -p $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
        $LFS mkdir -i $MDTIDX $remote_dir ||
                   error "create remote directory failed"
 
-       mkdir -p $remote_dir/src_dir
+       test_mkdir -p $remote_dir/src_dir
        touch $remote_dir/src_file
-       mkdir -p $remote_dir/tgt_dir
+       test_mkdir -p $remote_dir/tgt_dir
        touch $remote_dir/tgt_file
 
        mrename $remote_dir/src_dir $remote_dir/tgt_dir ||
@@ -1136,37 +1135,11 @@ test_24y() {
 }
 run_test 24y "rename/link on the same dir should succeed"
 
-test_24z() {
-       [[ $MDSCOUNT -lt 2 ]] && skip "needs >= 2 MDTs" && return
-       [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
-       local MDTIDX=1
-       local remote_src=$DIR/$tdir/remote_dir
-       local remote_tgt=$DIR/$tdir/remote_tgt
-
-       mkdir -p $DIR/$tdir
-       $LFS mkdir -i $MDTIDX $remote_src ||
-                  error "create remote directory failed"
-
-       $LFS mkdir -i $MDTIDX $remote_tgt ||
-                  error "create remote directory failed"
-
-       mrename $remote_src $remote_tgt &&
-               error "rename remote dirs should not work!"
-
-       # If target dir does not exists, it should succeed
-       rm -rf $remote_tgt
-       mrename $remote_src $remote_tgt ||
-               error "rename remote dirs(tgt dir does not exists) failed!"
-
-       rm -rf $DIR/$tdir || error "Can not delete directories"
-}
-run_test 24z "rename one remote dir to another remote dir should fail"
-
 test_24A() { # LU-3182
        local NFILES=5000
 
        rm -rf $DIR/$tdir
-       mkdir -p $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
        createmany -m $DIR/$tdir/$tfile $NFILES
        local t=$(ls $DIR/$tdir | wc -l)
        local u=$(ls $DIR/$tdir | sort -u | wc -l)
@@ -1183,7 +1156,7 @@ test_24B() { # LU-4805
        [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
        local count
 
-       mkdir $DIR/$tdir
+       test_mkdir $DIR/$tdir
        $LFS setdirstripe -i0 -c$MDSCOUNT $DIR/$tdir/striped_dir ||
                error "create striped dir failed"
 
@@ -1250,6 +1223,38 @@ test_24D() { # LU-6101
 }
 run_test 24D "readdir() returns correct number of entries after cursor reload"
 
+test_24E() {
+       [[ $MDSCOUNT -lt 4 ]] && skip "needs >= 4 MDTs" && return
+       [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+
+       mkdir -p $DIR/$tdir
+       mkdir $DIR/$tdir/src_dir
+       $LFS mkdir -i 1 $DIR/$tdir/src_dir/src_child ||
+               error "create remote source failed"
+
+       touch $DIR/$tdir/src_dir/src_child/a
+
+       $LFS mkdir -i 2 $DIR/$tdir/tgt_dir ||
+               error "create remote target dir failed"
+
+       $LFS mkdir -i 3 $DIR/$tdir/tgt_dir/tgt_child ||
+               error "create remote target child failed"
+
+       mrename $DIR/$tdir/src_dir/src_child $DIR/$tdir/tgt_dir/tgt_child ||
+               error "rename dir cross MDT failed!"
+
+       find $DIR/$tdir
+
+       $CHECKSTAT -t dir $DIR/$tdir/src_dir/src_child &&
+               error "src_child still exists after rename"
+
+       $CHECKSTAT -t file $DIR/$tdir/tgt_dir/tgt_child/a ||
+               error "missing file(a) after rename"
+
+       rm -rf $DIR/$tdir || error "Can not delete directories"
+}
+run_test 24E "cross MDT rename/link"
+
 test_25a() {
        echo '== symlink sanity ============================================='
 
@@ -1756,7 +1761,7 @@ test_27y() {
        done
 
        OSTIDX=$(index_from_ostuuid $OST)
-       mkdir -p $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
        $SETSTRIPE -c 1 $DIR/$tdir      # 1 stripe / file
 
        for OSC in $MDS_OSCS; do
@@ -2010,7 +2015,7 @@ test_27D() {
        local ost_list=$(seq $first_ost $ost_step $last_ost)
        local ost_range="$first_ost $last_ost $ost_step"
 
-       mkdir -p $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
        pool_add $POOL || error "pool_add failed"
        pool_add_targets $POOL $ost_range || error "pool_add_targets failed"
        llapi_layout_test -d$DIR/$tdir -p$POOL -o$OSTCOUNT ||
@@ -2304,7 +2309,7 @@ link_one() {
 }
 
 test_31o() { # LU-2901
-       mkdir -p $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
        for LOOP in $(seq 100); do
                rm -f $DIR/$tdir/$tfile*
                for THREAD in $(seq 8); do
@@ -2322,7 +2327,7 @@ run_test 31o "duplicate hard links with same filename"
 test_31p() {
        [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
 
-       mkdir $DIR/$tdir
+       test_mkdir $DIR/$tdir
        $LFS setdirstripe -i0 -c2 $DIR/$tdir/striped_dir
        $LFS setdirstripe -D -c2 -t all_char $DIR/$tdir/striped_dir
 
@@ -2675,7 +2680,7 @@ test_33d() {
        local MDTIDX=1
        local remote_dir=$DIR/$tdir/remote_dir
 
-       mkdir -p $DIR/$tdir
+       test_mkdir -p $DIR/$tdir
        $LFS mkdir -i $MDTIDX $remote_dir ||
                error "create remote directory failed"
 
@@ -2994,7 +2999,7 @@ run_test 36h "utime on file racing with OST BRW write =========="
 test_36i() {
        [ $MDSCOUNT -lt 2 ] && skip "needs >= 2 MDTs" && return
 
-       mkdir $DIR/$tdir
+       test_mkdir $DIR/$tdir
        $LFS setdirstripe -i0 -c$MDSCOUNT $DIR/$tdir/striped_dir
 
        local mtime=$(stat -c%Y $DIR/$tdir/striped_dir)
@@ -3411,7 +3416,7 @@ run_test 39n "check that O_NOATIME is honored"
 test_39o() {
        TESTDIR=$DIR/$tdir/$tfile
        [ -e $TESTDIR ] && rm -rf $TESTDIR
-       test_mkdir -p $TESTDIR
+       mkdir -p $TESTDIR
        cd $TESTDIR
        links1=2
        ls
@@ -3433,7 +3438,7 @@ test_39p() {
        local MDTIDX=1
        TESTDIR=$DIR/$tdir/$tfile
        [ -e $TESTDIR ] && rm -rf $TESTDIR
-       mkdir -p $TESTDIR
+       test_mkdir -p $TESTDIR
        cd $TESTDIR
        links1=2
        ls
@@ -4925,7 +4930,7 @@ test_56x() {
                skip_env "need 2 OST, skipping test" && return
 
        local dir0=$DIR/$tdir/$testnum
-       mkdir -p $dir0 || error "creating dir $dir0"
+       test_mkdir -p $dir0 || error "creating dir $dir0"
 
        local ref1=/etc/passwd
        local file1=$dir0/file1
@@ -4949,7 +4954,7 @@ test_56y() {
 
        local res=""
        local dir0=$DIR/$tdir/$testnum
-       mkdir -p $dir0 || error "creating dir $dir0"
+       test_mkdir -p $dir0 || error "creating dir $dir0"
        local f1=$dir0/file1
        local f2=$dir0/file2
 
@@ -5567,8 +5572,9 @@ test_69() {
 run_test 69 "verify oa2dentry return -ENOENT doesn't LBUG ======"
 
 test_71() {
-    test_mkdir -p $DIR/$tdir
-    sh rundbench -C -D $DIR/$tdir 2 || error "dbench failed!"
+       test_mkdir -p $DIR/$tdir
+       $LFS setdirstripe -D -c$MDSCOUNT $DIR/$tdir
+       sh rundbench -C -D $DIR/$tdir 2 || error "dbench failed!"
 }
 run_test 71 "Running dbench on lustre (don't segment fault) ===="
 
@@ -13189,19 +13195,16 @@ test_300e() {
        $LFS setdirstripe -i 0 -c 2 -t all_char $DIR/$tdir/striped_dir/stp_c ||
                error "set striped dir under striped dir error"
 
-       mrename $DIR/$tdir/striped_dir/a $DIR/$tdir/striped_dir/b &&
-               error "rename file under striped dir should fail"
+       mrename $DIR/$tdir/striped_dir/dir_a $DIR/$tdir/striped_dir/dir_b ||
+               error "rename dir under striped dir fails"
 
-       mrename $DIR/$tdir/striped_dir/dir_a $DIR/$tdir/striped_dir/dir_b &&
-               error "rename dir under striped dir should fail"
-
-       mrename $DIR/$tdir/striped_dir/stp_a $DIR/$tdir/striped_dir/stp_b &&
-               error "rename dir under different stripes should fail"
+       mrename $DIR/$tdir/striped_dir/stp_a $DIR/$tdir/striped_dir/stp_b ||
+               error "rename dir under different stripes fails"
 
        mrename $DIR/$tdir/striped_dir/a $DIR/$tdir/striped_dir/c ||
                error "rename file under striped dir should succeed"
 
-       mrename $DIR/$tdir/striped_dir/dir_a $DIR/$tdir/striped_dir/dir_c ||
+       mrename $DIR/$tdir/striped_dir/dir_b $DIR/$tdir/striped_dir/dir_c ||
                error "rename dir under striped dir should succeed"
 
        rm -rf $DIR/$tdir
@@ -13233,21 +13236,15 @@ test_300f() {
        $LFS setdirstripe -i 0 -c 2 $DIR/$tdir/striped_dir/stp_b ||
                error "create striped dir under striped dir fails"
 
-       mrename $DIR/$tdir/striped_dir/a $DIR/$tdir/striped_dir1/b &&
-               error "rename file under different striped dir should fail"
-
-       mrename $DIR/$tdir/striped_dir/dir_a $DIR/$tdir/striped_dir1/dir_b &&
+       mrename $DIR/$tdir/striped_dir/dir_a $DIR/$tdir/striped_dir1/dir_b ||
                error "rename dir under different striped dir should fail"
 
-       mrename $DIR/$tdir/striped_dir/stp_a $DIR/$tdir/striped_dir1/stp_b &&
+       mrename $DIR/$tdir/striped_dir/stp_a $DIR/$tdir/striped_dir1/stp_b ||
                error "rename striped dir under diff striped dir should fail"
 
        mrename $DIR/$tdir/striped_dir/a $DIR/$tdir/striped_dir1/a ||
                error "rename file under diff striped dirs fails"
 
-       mrename $DIR/$tdir/striped_dir/dir_a $DIR/$tdir/striped_dir1/dir_a ||
-               error "rename dir under diff striped dirs fails"
-
        rm -rf $DIR/$tdir
 }
 run_test 300f "check rename cross striped directory"
@@ -13399,6 +13396,80 @@ test_300i() {
 }
 run_test 300i "client handle unknown hash type striped directory"
 
+prepare_remote_file() {
+       mkdir $DIR/$tdir/src_dir ||
+               error "create remote source failed"
+
+       cp /etc/hosts $DIR/$tdir/src_dir/a || error
+       touch $DIR/$tdir/src_dir/a
+
+       $LFS mkdir -i 1 $DIR/$tdir/tgt_dir ||
+               error "create remote target dir failed"
+
+       touch $DIR/$tdir/tgt_dir/b
+
+       mrename $DIR/$tdir/src_dir/a $DIR/$tdir/tgt_dir/b ||
+               error "rename dir cross MDT failed!"
+
+       $CHECKSTAT -t file $DIR/$tdir/src_dir/a &&
+               error "src_child still exists after rename"
+
+       $CHECKSTAT -t file $DIR/$tdir/tgt_dir/b ||
+               error "missing file(a) after rename"
+
+       diff /etc/hosts $DIR/$tdir/tgt_dir/b ||
+               error "diff after rename"
+}
+
+test_310a() {
+       [[ $MDSCOUNT -lt 2 ]] && skip "needs >= 4 MDTs" && return
+       [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+       local remote_file=$DIR/$tdir/tgt_dir/b
+
+       mkdir -p $DIR/$tdir
+
+       prepare_remote_file || error "prepare remote file failed"
+
+       #open-unlink file
+       $OPENUNLINK $remote_file $remote_file || error
+       $CHECKSTAT -a $remote_file || error
+}
+run_test 310a "open unlink remote file"
+
+test_310b() {
+       [[ $MDSCOUNT -lt 2 ]] && skip "needs >= 4 MDTs" && return
+       [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+       local remote_file=$DIR/$tdir/tgt_dir/b
+
+       mkdir -p $DIR/$tdir
+
+       prepare_remote_file || error "prepare remote file failed"
+
+       ln $remote_file $DIR/$tfile || error "link failed for remote file"
+       $MULTIOP $DIR/$tfile Ouc || error "mulitop failed"
+       $CHECKSTAT -t file $remote_file || error "check file failed"
+}
+run_test 310b "unlink remote file with multiple links while open"
+
+test_310c() {
+       [[ $MDSCOUNT -lt 4 ]] && skip "needs >= 4 MDTs" && return
+       [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
+       local remote_file=$DIR/$tdir/tgt_dir/b
+
+       mkdir -p $DIR/$tdir
+
+       prepare_remote_file || error "prepare remote file failed"
+
+       ln $remote_file $DIR/$tfile || error "link failed for remote file"
+       multiop_bg_pause $remote_file O_uc ||
+                       error "mulitop failed for remote file"
+       MULTIPID=$!
+       $MULTIOP $DIR/$tfile Ouc
+       kill -USR1 $MULTIPID
+       wait $MULTIPID
+}
+run_test 310c "open-unlink remote file with multiple links"
+
 test_400a() { # LU-1606, was conf-sanity test_74
        local extra_flags=''
        local out=$TMP/$tfile
index fda3034..27d0606 100755 (executable)
@@ -4497,7 +4497,7 @@ drop_reint_reply() {
 }
 
 drop_update_reply() {
-# OBD_FAIL_UPDATE_OBJ_NET_REP
+# OBD_FAIL_OUT_UPDATE_NET_REP
        local index=$1
        shift 1
        RC=0