Whamcloud - gitweb
LU-14470 dne: striped mkdir replay by client request 85/47385/22
authorLai Siyao <lai.siyao@whamcloud.com>
Sun, 21 Nov 2021 09:53:09 +0000 (04:53 -0500)
committerOleg Drokin <green@whamcloud.com>
Thu, 28 Sep 2023 07:59:30 +0000 (07:59 +0000)
Once all involved MDTs of a striped mkdir were rebooted, or MDT
recovery was aborted, this mkdir will be replayed by client request.
To correctly replay such mkdir, pack directory LMV in mkdir reply,
and save it to request from reply, and MDS should use this layout to
replay mkdir.

For MDT recovery abort case, the original mkdir may be partially
executed, so mkdir replay should check below cases and don't treat
them as error:
* name entry is found on parent directory on remote MDT.
* stripe exists on remote MDT.

For backward compatibility, Add MDS_MKDIR_LMV flag to indicate a
client requires directory LMV in mkdir reply.

Updated replay-single 100c since striped mkdir can replay now.

Updated recovery-small 130 since create fetches layout now.

Added replay-single 100e.

Test-Parameters: mdscount=2 mdtcount=4 testlist=racer,racer,racer,racer,racer
Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: If0cc8f4aebbe55cc28786d6b4198dbb57743feb3
Reviewed-on: https://review.whamcloud.com/c/fs/lustre-release/+/47385
Tested-by: jenkins <devops@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Reviewed-by: Qian Yingjin <qian@ddn.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
18 files changed:
lustre/include/lustre_lmv.h
lustre/include/md_object.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/lod/lod_object.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_lib.c
lustre/mdc/mdc_locks.c
lustre/mdc/mdc_reint.c
lustre/mdd/mdd_dir.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_open.c
lustre/mdt/mdt_recovery.c
lustre/mdt/mdt_reint.c
lustre/osd-ldiskfs/osd_handler.c
lustre/ptlrpc/layout.c
lustre/tests/racer.sh [changed mode: 0644->0755]
lustre/tests/recovery-small.sh
lustre/tests/replay-single.sh

index dd833b5..a323dd7 100644 (file)
@@ -163,7 +163,7 @@ lmv_stripe_object_dump(int mask, const struct lmv_stripe_object *lsmo)
        const struct lmv_stripe_md *lsm = &lsmo->lso_lsm;
        int i;
 
-       CDEBUG_LIMIT(mask,
+       CDEBUG(mask,
               "dump LMV: refs %u magic=%#x count=%u index=%u hash=%s:%#x max_inherit=%hhu max_inherit_rr=%hhu version=%u migrate_offset=%u migrate_hash=%s:%x pool=%.*s\n",
               lsm->lsm_md_magic, atomic_read(&lsmo->lso_refs),
               lsm->lsm_md_stripe_count, lsm->lsm_md_master_mdt_index,
@@ -181,8 +181,8 @@ lmv_stripe_object_dump(int mask, const struct lmv_stripe_object *lsmo)
                return;
 
        for (i = 0; i < lsm->lsm_md_stripe_count; i++)
-               CDEBUG(mask, "stripe[%d] "DFID"\n",
-                      i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
+               CDEBUG_LIMIT(mask, "stripe[%d] "DFID"\n",
+                            i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
 }
 
 static inline bool
index f6a039f..7deef4e 100644 (file)
@@ -173,7 +173,8 @@ struct md_op_spec {
                     sp_permitted:1, /* do not check permission */
                     sp_migrate_close:1, /* close the file during migrate */
                     sp_migrate_nsonly:1, /* migrate dirent only */
-                    sp_dmv_imp_inherit:1; /* implicit default LMV inherit */
+                    sp_dmv_imp_inherit:1, /* implicit default LMV inherit */
+                    sp_replay:1; /* replay, op may be partially executed */
 
        /** to create directory */
        const struct dt_index_features *sp_feat;
index 3619ae5..0c09c76 100644 (file)
@@ -1656,6 +1656,8 @@ enum la_valid {
                              MDS_OPEN_PCC | MDS_OP_WITH_FID |          \
                              MDS_OPEN_DEFAULT_LMV)
 
+/* mkdir fetches LMV, reuse bit of MDS_OPEN_RESYNC */
+#define MDS_MKDIR_LMV  MDS_OPEN_RESYNC
 
 /********* Changelogs **********/
 /** Changelog record types */
index 95f6984..a438d4f 100644 (file)
@@ -971,7 +971,8 @@ int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo,
                        }
 
                        /* The slot has been occupied. */
-                       if (!fid_is_zero(&lmv1->lmv_stripe_fids[index])) {
+                       if (!fid_is_zero(&lmv1->lmv_stripe_fids[index]) &&
+                           !CFS_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_SLAVE_NAME)) {
                                struct lu_fid fid0;
 
                                fid_le_to_cpu(&fid0,
@@ -2303,6 +2304,30 @@ int lod_alloc_foreign_lmv(struct lod_object *lo, size_t size)
        return 0;
 }
 
+static int lod_prep_md_replayed_create(const struct lu_env *env,
+                                      struct dt_object *dt,
+                                      struct lu_attr *attr,
+                                      const struct lu_buf *lmv_buf,
+                                      struct dt_object_format *dof,
+                                      struct thandle *th)
+{
+       struct lod_object *lo = lod_dt_obj(dt);
+       int rc;
+
+       ENTRY;
+
+       mutex_lock(&lo->ldo_layout_mutex);
+       rc = lod_parse_dir_striping(env, lo, lmv_buf);
+       if (rc == 0) {
+               lo->ldo_dir_stripe_loaded = 1;
+               lo->ldo_dir_striped = 1;
+               rc = lod_dir_declare_create_stripes(env, dt, attr, dof, th);
+       }
+       mutex_unlock(&lo->ldo_layout_mutex);
+
+       RETURN(rc);
+}
+
 /**
  *
  * Free cached foreign LMV
@@ -2362,22 +2387,26 @@ static int lod_declare_xattr_set_lmv(const struct lu_env *env,
                if (lo->ldo_is_foreign) {
                        rc = lod_alloc_foreign_lmv(lo, lum_buf->lb_len);
                        if (rc != 0)
-                               GOTO(out, rc);
+                               RETURN(rc);
                        memcpy(lo->ldo_foreign_lmv, lum, lum_buf->lb_len);
                        lo->ldo_dir_stripe_loaded = 1;
                }
-               GOTO(out, rc = 0);
+               RETURN(0);
        }
 
-       /* prepare dir striped objects */
-       rc = lod_prep_md_striped_create(env, dt, attr, lum, dof, th);
-       if (rc != 0) {
+       /* client replay striped directory creation with LMV, this happens when
+        * all involved MDTs were rebooted, or MDT recovery was aborted.
+        */
+       if (le32_to_cpu(lum->lum_magic) == LMV_MAGIC_V1)
+               rc = lod_prep_md_replayed_create(env, dt, attr, lum_buf, dof,
+                                                th);
+       else
+               rc = lod_prep_md_striped_create(env, dt, attr, lum, dof, th);
+       if (rc != 0)
                /* failed to create striping, let's reset
                 * config so that others don't get confused */
                lod_striping_free(env, lo);
-               GOTO(out, rc);
-       }
-out:
+
        RETURN(rc);
 }
 
@@ -4236,7 +4265,7 @@ static int lod_xattr_set_default_lmv_on_dir(const struct lu_env *env,
  *
  * \param[in] env      execution environment
  * \param[in] dt       the striped object
- * \param[in] buf      not used currently
+ * \param[in] buf      buf lmv_user_md for create, or lmv_mds_md for replay
  * \param[in] name     not used currently
  * \param[in] fl       xattr flag (see OSD API description)
  * \param[in] th       transaction handle
@@ -4248,19 +4277,22 @@ static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
                             const struct lu_buf *buf, const char *name,
                             int fl, struct thandle *th)
 {
-       struct lod_object       *lo = lod_dt_obj(dt);
-       struct lod_thread_info  *info = lod_env_info(env);
-       struct lu_attr          *attr = &info->lti_attr;
+       struct lod_object *lo = lod_dt_obj(dt);
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lu_attr *attr = &info->lti_attr;
        struct dt_object_format *dof = &info->lti_format;
-       struct lu_buf           lmv_buf;
-       struct lu_buf           slave_lmv_buf;
-       struct lmv_mds_md_v1    *lmm;
-       struct lmv_mds_md_v1    *slave_lmm = NULL;
-       struct dt_insert_rec    *rec = &info->lti_dt_rec;
-       int                     i;
-       int                     rc;
-       ENTRY;
+       struct lu_buf lmv_buf;
+       struct lu_buf slave_lmv_buf;
+       struct lmv_user_md *lum = buf->lb_buf;
+       struct lmv_mds_md_v1 *lmm;
+       struct lmv_mds_md_v1 *slave_lmm = NULL;
+       struct dt_insert_rec *rec = &info->lti_dt_rec;
+       int i;
+       int rc;
 
+       ENTRY;
+       /* lum is used to know whether it's replay */
+       LASSERT(lum);
        if (!S_ISDIR(dt->do_lu.lo_header->loh_attr))
                RETURN(-ENOTDIR);
 
@@ -4304,6 +4336,7 @@ static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
                struct lu_name *sname;
                struct linkea_data ldata = { NULL };
                struct lu_buf linkea_buf;
+               bool stripe_created = false;
 
                /* OBD_FAIL_MDS_STRIPE_FID may leave stripe uninitialized */
                if (!dto)
@@ -4313,6 +4346,15 @@ static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
                if (i && CFS_FAIL_CHECK(OBD_FAIL_MDS_STRIPE_CREATE))
                        continue;
 
+               /* if it's replay by client request, and stripe exists on remote
+                * MDT, it means mkdir was partially executed: stripe was
+                * created on remote MDT successfully, but target not in last
+                * run.
+                */
+               if (unlikely((le32_to_cpu(lum->lum_magic) == LMV_MAGIC_V1) &&
+                            dt_object_exists(dto) && dt_object_remote(dto)))
+                       stripe_created = true;
+
                /* don't create stripe if:
                 * 1. it's source stripe of migrating directory
                 * 2. it's existed stripe of splitting directory
@@ -4321,7 +4363,7 @@ static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
                    (lod_is_splitting(lo) && i < lo->ldo_dir_split_offset)) {
                        if (!dt_object_exists(dto))
                                GOTO(out, rc = -EINVAL);
-               } else {
+               } else if (!stripe_created) {
                        dt_write_lock(env, dto, DT_TGT_CHILD);
                        rc = lod_sub_create(env, dto, attr, NULL, dof, th);
                        if (rc != 0) {
@@ -4367,12 +4409,6 @@ static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
                    lo->ldo_dir_split_offset > i)
                        continue;
 
-               rec->rec_fid = lu_object_fid(&dt->do_lu);
-               rc = lod_sub_insert(env, dto, (struct dt_rec *)rec,
-                                   (const struct dt_key *)dotdot, th);
-               if (rc != 0)
-                       GOTO(out, rc);
-
                if (CFS_FAIL_CHECK(OBD_FAIL_LFSCK_BAD_SLAVE_NAME) &&
                    cfs_fail_val == i)
                        snprintf(stripe_name, sizeof(info->lti_key), DFID":%d",
@@ -4381,18 +4417,27 @@ static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
                        snprintf(stripe_name, sizeof(info->lti_key), DFID":%d",
                                 PFID(lu_object_fid(&dto->do_lu)), i);
 
-               sname = lod_name_get(env, stripe_name, strlen(stripe_name));
-               rc = linkea_links_new(&ldata, &info->lti_linkea_buf,
-                                     sname, lu_object_fid(&dt->do_lu));
-               if (rc != 0)
-                       GOTO(out, rc);
+               if (!stripe_created) {
+                       rec->rec_fid = lu_object_fid(&dt->do_lu);
+                       rc = lod_sub_insert(env, dto, (struct dt_rec *)rec,
+                                           (const struct dt_key *)dotdot, th);
+                       if (rc != 0)
+                               GOTO(out, rc);
 
-               linkea_buf.lb_buf = ldata.ld_buf->lb_buf;
-               linkea_buf.lb_len = ldata.ld_leh->leh_len;
-               rc = lod_sub_xattr_set(env, dto, &linkea_buf,
-                                      XATTR_NAME_LINK, 0, th);
-               if (rc != 0)
-                       GOTO(out, rc);
+                       sname = lod_name_get(env, stripe_name,
+                                            strlen(stripe_name));
+                       rc = linkea_links_new(&ldata, &info->lti_linkea_buf,
+                                             sname, lu_object_fid(&dt->do_lu));
+                       if (rc != 0)
+                               GOTO(out, rc);
+
+                       linkea_buf.lb_buf = ldata.ld_buf->lb_buf;
+                       linkea_buf.lb_len = ldata.ld_leh->leh_len;
+                       rc = lod_sub_xattr_set(env, dto, &linkea_buf,
+                                              XATTR_NAME_LINK, 0, th);
+                       if (rc != 0)
+                               GOTO(out, rc);
+               }
 
                rec->rec_fid = lu_object_fid(&dto->do_lu);
                rc = lod_sub_insert(env, dt_object_child(dt),
@@ -4456,10 +4501,12 @@ static int lod_dir_striping_create_internal(const struct lu_env *env,
        LASSERT(ergo(lds != NULL,
                     lds->lds_def_striping_set ||
                     lds->lds_dir_def_striping_set));
+       LASSERT(lmu);
 
        if (!LMVEA_DELETE_VALUES(lo->ldo_dir_stripe_count,
                                 lo->ldo_dir_stripe_offset)) {
-               if (!lmu) {
+               if (!lmu->lb_buf) {
+                       /* mkdir by default LMV */
                        struct lmv_user_md_v1 *v1 = info->lti_ea_store;
                        int stripe_count = lo->ldo_dir_stripe_count;
 
@@ -4489,25 +4536,22 @@ static int lod_dir_striping_create_internal(const struct lu_env *env,
                                               th);
                if (rc != 0)
                        RETURN(rc);
-       } else {
+       } else if (lmu->lb_buf) {
                /* foreign LMV EA case */
-               if (lmu) {
+               if (declare) {
                        struct lmv_foreign_md *lfm = lmu->lb_buf;
 
-                       if (lfm->lfm_magic == LMV_MAGIC_FOREIGN) {
+                       if (lfm->lfm_magic == LMV_MAGIC_FOREIGN)
                                rc = lod_declare_xattr_set_lmv(env, dt, attr,
                                                               lmu, dof, th);
-                       }
-               } else {
-                       if (lo->ldo_is_foreign) {
-                               LASSERT(lo->ldo_foreign_lmv != NULL &&
-                                       lo->ldo_foreign_lmv_size > 0);
-                               info->lti_buf.lb_buf = lo->ldo_foreign_lmv;
-                               info->lti_buf.lb_len = lo->ldo_foreign_lmv_size;
-                               lmu = &info->lti_buf;
-                               rc = lod_xattr_set_lmv(env, dt, lmu,
-                                                      XATTR_NAME_LMV, 0, th);
-                       }
+               } else if (lo->ldo_is_foreign) {
+                       LASSERT(lo->ldo_foreign_lmv != NULL &&
+                               lo->ldo_foreign_lmv_size > 0);
+                       info->lti_buf.lb_buf = lo->ldo_foreign_lmv;
+                       info->lti_buf.lb_len = lo->ldo_foreign_lmv_size;
+                       lmu = &info->lti_buf;
+                       rc = lod_xattr_set_lmv(env, dt, lmu, XATTR_NAME_LMV, 0,
+                                              th);
                }
        }
 
@@ -4609,10 +4653,11 @@ static int lod_declare_dir_striping_create(const struct lu_env *env,
 static int lod_dir_striping_create(const struct lu_env *env,
                                   struct dt_object *dt,
                                   struct lu_attr *attr,
+                                  const struct lu_buf *lmu,
                                   struct dt_object_format *dof,
                                   struct thandle *th)
 {
-       return lod_dir_striping_create_internal(env, dt, attr, NULL, dof, th,
+       return lod_dir_striping_create_internal(env, dt, attr, lmu, dof, th,
                                                false);
 }
 
@@ -5015,7 +5060,8 @@ static int lod_xattr_set(const struct lu_env *env,
            !strcmp(name, XATTR_NAME_LMV)) {
                switch (fl) {
                case LU_XATTR_CREATE:
-                       rc = lod_dir_striping_create(env, dt, NULL, NULL, th);
+                       rc = lod_dir_striping_create(env, dt, NULL, buf, NULL,
+                                                    th);
                        break;
                case 0:
                case LU_XATTR_REPLACE:
@@ -5716,7 +5762,7 @@ static void lod_ah_init(const struct lu_env *env,
                const struct lmv_user_md_v1 *lum1 = ah->dah_eadata;
 
                /* other default values are 0 */
-               lc->ldo_dir_stripe_offset = -1;
+               lc->ldo_dir_stripe_offset = LMV_OFFSET_DEFAULT;
 
                /* no default striping configuration is needed for
                 * foreign dirs
@@ -5733,14 +5779,11 @@ static void lod_ah_init(const struct lu_env *env,
                        lod_get_default_striping(env, lp, ah, lds);
 
                /* It should always honour the specified stripes */
-               /* Note: old client (< 2.7)might also do lfs mkdir, whose EA
-                * will have old magic. In this case, we should ignore the
-                * stripe count and try to create dir by default stripe.
-                */
                if (ah->dah_eadata && ah->dah_eadata_len &&
                    !ah->dah_eadata_is_dmv &&
                    (le32_to_cpu(lum1->lum_magic) == LMV_USER_MAGIC ||
-                    le32_to_cpu(lum1->lum_magic) == LMV_USER_MAGIC_SPECIFIC)) {
+                    le32_to_cpu(lum1->lum_magic) == LMV_USER_MAGIC_SPECIFIC ||
+                    le32_to_cpu(lum1->lum_magic) == LMV_MAGIC_V1)) {
                        lc->ldo_dir_stripe_count =
                                le32_to_cpu(lum1->lum_stripe_count);
                        lc->ldo_dir_stripe_offset =
@@ -6206,7 +6249,6 @@ static int lod_declare_create(const struct lu_env *env, struct dt_object *dt,
        } else if (dof->dof_type == DFT_DIR) {
                struct seq_server_site *ss;
                struct lu_buf buf = { NULL };
-               struct lu_buf *lmu = NULL;
 
                ss = lu_site2seq(dt->do_lu.lo_dev->ld_site);
 
@@ -6248,12 +6290,11 @@ static int lod_declare_create(const struct lu_env *env, struct dt_object *dt,
                                        GOTO(out, rc = -EINVAL);
                        }
                } else if (hint && hint->dah_eadata) {
-                       lmu = &buf;
-                       lmu->lb_buf = (void *)hint->dah_eadata;
-                       lmu->lb_len = hint->dah_eadata_len;
+                       buf.lb_buf = (void *)hint->dah_eadata;
+                       buf.lb_len = hint->dah_eadata_len;
                }
 
-               rc = lod_declare_dir_striping_create(env, dt, attr, lmu, dof,
+               rc = lod_declare_dir_striping_create(env, dt, attr, &buf, dof,
                                                     th);
        }
 out:
@@ -8689,6 +8730,7 @@ static int lod_dir_declare_layout_attach(const struct lu_env *env,
        lo->ldo_dir_migrate_offset = lo->ldo_dir_stripe_count;
        lo->ldo_dir_migrate_hash = le32_to_cpu(lmv->lmv_hash_type);
        lo->ldo_dir_stripe_count += stripe_count;
+       lo->ldo_dir_layout_version++;
        lo->ldo_dir_stripes_allocated += stripe_count;
 
        /* plain directory split creates target as a plain directory, while
index 8d71d1b..82da07a 100644 (file)
@@ -103,6 +103,7 @@ struct obd_client_handle;
 int mdc_set_open_replay_data(struct obd_export *exp,
                             struct obd_client_handle *och,
                             struct lookup_intent *it);
+int mdc_save_lmm(struct ptlrpc_request *req, void *data, u32 size);
 
 void mdc_commit_open(struct ptlrpc_request *req);
 void mdc_replay_open(struct ptlrpc_request *req);
index 78c81ff..d9bdeeb 100644 (file)
@@ -217,6 +217,9 @@ void mdc_create_pack(struct req_capsule *pill, struct md_op_data *op_data,
        rec->cr_suppgid1 = op_data->op_suppgids[0];
        rec->cr_suppgid2 = op_data->op_suppgids[1];
        flags = 0;
+
+       if (S_ISDIR(mode))
+               flags |= MDS_MKDIR_LMV;
        if (op_data->op_bias & MDS_CREATE_VOLATILE)
                flags |= MDS_OPEN_VOLATILE;
        if (op_data->op_bias & MDS_SETSTRIPE_CREATE)
index a3a6fbb..85bc771 100644 (file)
@@ -204,7 +204,7 @@ static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
 }
 
 /**
- * Save a large LOV EA into the request buffer so that it is available
+ * Save a large LOV/LMV EA into the request buffer so that it is available
  * for replay.  We don't do this in the initial request because the
  * original request doesn't need this buffer (at most it sends just the
  * lov_mds_md) and it is a waste of RAM/bandwidth to send the empty
@@ -216,10 +216,10 @@ static inline void mdc_clear_replay_flag(struct ptlrpc_request *req, int rc)
  * but this is incredibly unlikely, and questionable whether the client
  * could do MDS recovery under OOM anyways...
  */
-static int mdc_save_lovea(struct ptlrpc_request *req, void *data, u32 size)
+int mdc_save_lmm(struct ptlrpc_request *req, void *data, u32 size)
 {
        struct req_capsule *pill = &req->rq_pill;
-       void *lovea;
+       void *lmm;
        int rc = 0;
 
        if (req_capsule_get_size(pill, &RMF_EADATA, RCL_CLIENT) < size) {
@@ -235,10 +235,10 @@ static int mdc_save_lovea(struct ptlrpc_request *req, void *data, u32 size)
        }
 
        req_capsule_set_size(pill, &RMF_EADATA, RCL_CLIENT, size);
-       lovea = req_capsule_client_get(pill, &RMF_EADATA);
-       if (lovea) {
-               memcpy(lovea, data, size);
-               lov_fix_ea_for_replay(lovea);
+       lmm = req_capsule_client_get(pill, &RMF_EADATA);
+       if (lmm) {
+               memcpy(lmm, data, size);
+               lov_fix_ea_for_replay(lmm);
        }
 
        return rc;
@@ -799,8 +799,8 @@ int mdc_finish_enqueue(struct obd_export *exp,
                         * (for example error one).
                         */
                        if ((it->it_op & IT_OPEN) && req->rq_replay) {
-                               rc = mdc_save_lovea(req, eadata,
-                                                   body->mbo_eadatasize);
+                               rc = mdc_save_lmm(req, eadata,
+                                                 body->mbo_eadatasize);
                                if (rc) {
                                        body->mbo_valid &= ~OBD_MD_FLEASIZE;
                                        body->mbo_eadatasize = 0;
@@ -827,7 +827,7 @@ int mdc_finish_enqueue(struct obd_export *exp,
                         * another set of OST objects).
                         */
                        if (req->rq_transno)
-                               (void)mdc_save_lovea(req, lvb_data, lvb_len);
+                               mdc_save_lmm(req, lvb_data, lvb_len);
                }
        }
 
index b1d4ee4..5d61dbe 100644 (file)
@@ -163,13 +163,14 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
                kernel_cap_t cap_effective, __u64 rdev,
                struct ptlrpc_request **request)
 {
-        struct ptlrpc_request *req;
-        int level, rc;
-        int count, resends = 0;
-        struct obd_import *import = exp->exp_obd->u.cli.cl_import;
-        int generation = import->imp_generation;
+       struct ptlrpc_request *req;
+       int level, rc;
+       int count, resends = 0;
+       struct obd_import *import = exp->exp_obd->u.cli.cl_import;
+       int generation = import->imp_generation;
        LIST_HEAD(cancels);
-        ENTRY;
+
+       ENTRY;
 
        /* For case if upper layer did not alloc fid, do it now. */
        if (!fid_is_sane(&op_data->op_fid2)) {
@@ -183,24 +184,24 @@ int mdc_create(struct obd_export *exp, struct md_op_data *op_data,
        }
 
 rebuild:
-        count = 0;
-        if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
-            (fid_is_sane(&op_data->op_fid1)))
-                count = mdc_resource_get_unused(exp, &op_data->op_fid1,
-                                                &cancels, LCK_EX,
-                                                MDS_INODELOCK_UPDATE);
+       count = 0;
+       if ((op_data->op_flags & MF_MDC_CANCEL_FID1) &&
+           (fid_is_sane(&op_data->op_fid1)))
+               count = mdc_resource_get_unused(exp, &op_data->op_fid1,
+                                               &cancels, LCK_EX,
+                                               MDS_INODELOCK_UPDATE);
 
-        req = ptlrpc_request_alloc(class_exp2cliimp(exp),
+       req = ptlrpc_request_alloc(class_exp2cliimp(exp),
                                   &RQF_MDS_REINT_CREATE_ACL);
-        if (req == NULL) {
-                ldlm_lock_list_put(&cancels, l_bl_ast, count);
-                RETURN(-ENOMEM);
-        }
+       if (req == NULL) {
+               ldlm_lock_list_put(&cancels, l_bl_ast, count);
+               RETURN(-ENOMEM);
+       }
 
-        req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
-                             op_data->op_namelen + 1);
-        req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
-                             data && datalen ? datalen : 0);
+       req_capsule_set_size(&req->rq_pill, &RMF_NAME, RCL_CLIENT,
+                            op_data->op_namelen + 1);
+       req_capsule_set_size(&req->rq_pill, &RMF_EADATA, RCL_CLIENT,
+                            data && datalen ? datalen : 0);
 
        req_capsule_set_size(&req->rq_pill, &RMF_FILE_SECCTX_NAME,
                             RCL_CLIENT, op_data->op_file_secctx_name != NULL ?
@@ -228,33 +229,35 @@ rebuild:
                RETURN(rc);
        }
 
-        /*
-         * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with
-         * tgt, for symlinks or lov MD data.
-         */
+       /*
+        * mdc_create_pack() fills msg->bufs[1] with name and msg->bufs[2] with
+        * tgt, for symlinks or lov MD data.
+        */
        mdc_create_pack(&req->rq_pill, op_data, data, datalen, mode, uid,
                        gid, cap_effective, rdev);
 
-        ptlrpc_request_set_replen(req);
+       req_capsule_set_size(&req->rq_pill, &RMF_MDT_MD, RCL_SERVER,
+                            exp->exp_obd->u.cli.cl_default_mds_easize);
+       ptlrpc_request_set_replen(req);
 
        /* ask ptlrpc not to resend on EINPROGRESS since we have our own retry
         * logic here */
        req->rq_no_retry_einprogress = 1;
 
-        if (resends) {
-                req->rq_generation_set = 1;
-                req->rq_import_generation = generation;
+       if (resends) {
+               req->rq_generation_set = 1;
+               req->rq_import_generation = generation;
                req->rq_sent = ktime_get_real_seconds() + resends;
-        }
-        level = LUSTRE_IMP_FULL;
+       }
+       level = LUSTRE_IMP_FULL;
  resend:
        rc = mdc_reint(req, level);
 
-        /* Resend if we were told to. */
-        if (rc == -ERESTARTSYS) {
-                level = LUSTRE_IMP_RECOVER;
-                goto resend;
-        } else if (rc == -EINPROGRESS) {
+       /* Resend if we were told to. */
+       if (rc == -ERESTARTSYS) {
+               level = LUSTRE_IMP_RECOVER;
+               goto resend;
+       } else if (rc == -EINPROGRESS) {
                /* Retry create infinitely until succeed or get other
                 * error code or interrupted. */
                ptlrpc_req_finished(req);
@@ -268,14 +271,48 @@ rebuild:
                               PFID(&op_data->op_fid1),
                               PFID(&op_data->op_fid2));
                        goto rebuild;
-                } else {
-                        CDEBUG(D_HA, "resend cross eviction\n");
-                        RETURN(-EIO);
-                }
-        }
+               } else {
+                       CDEBUG(D_HA, "resend cross eviction\n");
+                       RETURN(-EIO);
+               }
+       } else if (rc == 0 && S_ISDIR(mode)) {
+               struct mdt_body *body;
+
+               body = req_capsule_server_get(&req->rq_pill, &RMF_MDT_BODY);
+               if (body == NULL) {
+                       rc = -EPROTO;
+                       CERROR("%s: cannot swab mdt_body: rc = %d\n",
+                              exp->exp_obd->obd_name, rc);
+                       RETURN(rc);
+               }
+
+               if ((body->mbo_valid & (OBD_MD_FLDIREA | OBD_MD_MEA)) ==
+                   (OBD_MD_FLDIREA | OBD_MD_MEA)) {
+                       void *eadata;
+
+                       /* clear valid, because mkdir doesn't need to initialize
+                        * LMV, which will be delayed to lookup.
+                        */
+                       body->mbo_valid &= ~(OBD_MD_FLDIREA | OBD_MD_MEA);
+                       mdc_update_max_ea_from_body(exp, body);
+                       /* The eadata is opaque; just check that it is there.
+                        * Eventually, obd_unpackmd() will check the contents.
+                        */
+                       eadata = req_capsule_server_sized_get(&req->rq_pill,
+                                                         &RMF_MDT_MD,
+                                                         body->mbo_eadatasize);
+                       if (eadata == NULL)
+                               RETURN(-EPROTO);
+
+                       /* save the reply LMV EA in case we have to replay a
+                        * create for recovery.
+                        */
+                       rc = mdc_save_lmm(req, eadata, body->mbo_eadatasize);
+               }
+       }
 
-        *request = req;
-        RETURN(rc);
+       *request = req;
+       RETURN(rc);
 }
 
 int mdc_unlink(struct obd_export *exp, struct md_op_data *op_data,
index 09840db..71d1aa1 100644 (file)
@@ -2202,7 +2202,8 @@ static int mdd_create_sanity_check(const struct lu_env *env,
                const struct lmv_user_md *lum = spec->u.sp_ea.eadata;
 
                if (!lmv_user_magic_supported(le32_to_cpu(lum->lum_magic)) &&
-                   le32_to_cpu(lum->lum_magic) != LMV_USER_MAGIC_V0) {
+                   !(spec->sp_replay &&
+                     lum->lum_magic == cpu_to_le32(LMV_MAGIC_V1))) {
                        rc = -EINVAL;
                        CERROR("%s: invalid lmv_user_md: magic=%x hash=%x stripe_offset=%d stripe_count=%u: rc = %d\n",
                               mdd2obd_dev(m)->obd_name,
@@ -2210,7 +2211,6 @@ static int mdd_create_sanity_check(const struct lu_env *env,
                               le32_to_cpu(lum->lum_hash_type),
                               (int)le32_to_cpu(lum->lum_stripe_offset),
                               le32_to_cpu(lum->lum_stripe_count), rc);
-
                        RETURN(rc);
                }
        }
@@ -2625,9 +2625,8 @@ static int mdd_create_object(const struct lu_env *env, struct mdd_object *pobj,
                jobid_len = strnlen(jobid, LUSTRE_JOBID_SIZE);
                buf = mdd_buf_get_const(env, jobid, jobid_len);
 
-               rc = mdo_xattr_set(env, son, buf, spec->sp_cr_job_xattr,
-                                  LU_XATTR_CREATE, handle);
-
+               rc = mdo_xattr_set(env, son, buf, spec->sp_cr_job_xattr, 0,
+                                  handle);
                /* this xattr is nonessential, so ignore errors. */
                if (rc != 0) {
                        CDEBUG(D_INODE,
@@ -2761,6 +2760,7 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj,
        const char *name = lname->ln_name;
        struct dt_allocation_hint *hint = &mdd_env_info(env)->mdi_hint;
        int acl_size = LUSTRE_POSIX_ACL_MAX_SIZE_OLD;
+       bool name_inserted = false;
        int rc, rc2;
 
        ENTRY;
@@ -2771,7 +2771,14 @@ int mdd_create(const struct lu_env *env, struct md_object *pobj,
 
        /* Sanity checks before big job. */
        rc = mdd_create_sanity_check(env, pobj, pattr, lname, attr, spec);
-       if (rc)
+       if (unlikely(rc == -EEXIST && S_ISDIR(attr->la_mode) &&
+                    spec->sp_replay && mdd_object_remote(mdd_pobj)))
+               /* if it's replay by client request, and name is found in
+                * parent directory on remote MDT, it means mkdir was partially
+                * executed: name was successfully added, but target not.
+                */
+               name_inserted = true;
+       else if (rc)
                RETURN(rc);
 
        if (CFS_FAIL_CHECK(OBD_FAIL_MDS_DQACQ_NET))
@@ -2811,7 +2818,8 @@ use_bigger_buffer:
                /* migrate may create 1-stripe directory, adjust stripe count
                 * before lod_ah_init().
                 */
-               if (lmu && lmu->lum_stripe_count == cpu_to_le32(1))
+               if (lmu && lmu->lum_magic == cpu_to_le32(LMV_USER_MAGIC) &&
+                   lmu->lum_stripe_count == cpu_to_le32(1))
                        lmu->lum_stripe_count = 0;
        }
 
@@ -2860,10 +2868,13 @@ use_bigger_buffer:
                rc = mdd_orphan_insert(env, son, handle);
                GOTO(out_volatile, rc);
        } else {
-               rc = __mdd_index_insert(env, mdd_pobj, mdd_object_fid(son),
-                                       attr->la_mode, name, handle);
-               if (rc != 0)
-                       GOTO(err_created, rc);
+               if (likely(!name_inserted)) {
+                       rc = __mdd_index_insert(env, mdd_pobj,
+                                               mdd_object_fid(son),
+                                               attr->la_mode, name, handle);
+                       if (rc != 0)
+                               GOTO(err_created, rc);
+               }
 
                mdd_links_add(env, son, mdd_object_fid(mdd_pobj), lname,
                              handle, ldata, 1);
@@ -5122,8 +5133,13 @@ int mdd_dir_layout_split(const struct lu_env *env, struct md_object *o,
                rc = mdd_dir_split_plain(env, mdd, pobj, obj, tobj, &xattrs,
                                         mlc, hint, handle);
        } else {
+               struct lu_buf *buf = &info->mdi_buf[0];
+
+               buf->lb_buf = mlc->mlc_spec->u.sp_ea.eadata;
+               buf->lb_len = mlc->mlc_spec->u.sp_ea.eadatalen;
+
                mdd_write_lock(env, obj, DT_TGT_CHILD);
-               rc = mdo_xattr_set(env, obj, NULL, XATTR_NAME_LMV,
+               rc = mdo_xattr_set(env, obj, buf, XATTR_NAME_LMV,
                                   LU_XATTR_CREATE, handle);
                mdd_write_unlock(env, obj);
                if (rc)
index 30c6f2a..33cfa1a 100644 (file)
@@ -1014,6 +1014,9 @@ int mdt_pack_secctx_in_reply(struct mdt_thread_info *info,
                             struct mdt_object *child);
 int mdt_pack_encctx_in_reply(struct mdt_thread_info *info,
                             struct mdt_object *child);
+void mdt_prep_ma_buf_from_rep(struct mdt_thread_info *info,
+                             struct mdt_object *obj, struct md_attr *ma,
+                             __u64 open_flags);
 
 static inline struct mdt_device *mdt_dev(struct lu_device *d)
 {
index cd67d54..93f4e23 100644 (file)
@@ -309,9 +309,9 @@ void mdt_mfd_set_mode(struct mdt_file_data *mfd, u64 open_flags)
 /**
  * prep ma_lmm/ma_lmv for md_attr from reply
  */
-static void mdt_prep_ma_buf_from_rep(struct mdt_thread_info *info,
-                                    struct mdt_object *obj,
-                                    struct md_attr *ma, __u64 open_flags)
+void mdt_prep_ma_buf_from_rep(struct mdt_thread_info *info,
+                             struct mdt_object *obj, struct md_attr *ma,
+                             __u64 open_flags)
 {
        struct req_capsule *pill;
 
index 2c1e2ac..5c955ca 100644 (file)
@@ -187,6 +187,7 @@ static void mdt_reconstruct_create(struct mdt_thread_info *mti,
        struct ptlrpc_request  *req = mdt_info_req(mti);
        struct obd_export *exp = req->rq_export;
        struct mdt_device *mdt = mti->mti_mdt;
+       struct md_attr *ma = &mti->mti_attr;
        struct mdt_object *child;
        struct mdt_body *body;
        int rc;
@@ -209,11 +210,14 @@ static void mdt_reconstruct_create(struct mdt_thread_info *mti,
        }
 
        body = req_capsule_server_get(mti->mti_pill, &RMF_MDT_BODY);
-       mti->mti_attr.ma_need = MA_INODE;
-       mti->mti_attr.ma_valid = 0;
-       rc = mdt_attr_get_complex(mti, child, &mti->mti_attr);
+       ma->ma_need = MA_INODE;
+       if (S_ISDIR(ma->ma_attr.la_mode) &&
+           (mti->mti_spec.sp_cr_flags & MDS_MKDIR_LMV))
+               mdt_prep_ma_buf_from_rep(mti, child, ma, 0);
+       ma->ma_valid = 0;
+       rc = mdt_attr_get_complex(mti, child, ma);
        if (rc == -ENOENT) {
-               mdt_fake_ma(&mti->mti_attr);
+               mdt_fake_ma(ma);
        } else if (rc == -EREMOTE) {
                /* object was created on remote server */
                if (!mdt_is_dne_client(exp))
@@ -223,8 +227,11 @@ static void mdt_reconstruct_create(struct mdt_thread_info *mti,
                req->rq_status = rc;
                body->mbo_valid |= OBD_MD_MDS;
        }
-       mdt_pack_attr2body(mti, body, &mti->mti_attr.ma_attr,
-                          mdt_object_fid(child));
+       if (ma->ma_valid & MA_LMV) {
+               body->mbo_eadatasize = ma->ma_lmv_size;
+               body->mbo_valid |= (OBD_MD_FLDIREA|OBD_MD_MEA);
+       }
+       mdt_pack_attr2body(mti, body, &ma->ma_attr, mdt_object_fid(child));
        mdt_object_put(mti->mti_env, child);
 }
 
index 701863e..9579e9b 100644 (file)
@@ -498,6 +498,7 @@ static int mdt_create(struct mdt_thread_info *info)
        struct md_op_spec *spec = &info->mti_spec;
        struct lu_ucred *uc = mdt_ucred(info);
        bool restripe = false;
+       bool recreate_obj = false;
        int rc;
 
        ENTRY;
@@ -573,28 +574,56 @@ static int mdt_create(struct mdt_thread_info *info)
            parent->mot_obj.lo_header->loh_attr & LOHA_FSCRYPT_MD)
                GOTO(put_parent, rc = -EPERM);
 
+       info->mti_spec.sp_replay = req_is_replay(mdt_info_req(info));
+
        /*
         * LU-10235: check if name exists locklessly first to avoid massive
         * lock recalls on existing directories.
         */
-       rc = mdt_lookup_version_check(info, parent, &rr->rr_name,
-                                     &info->mti_tmp_fid1, 1);
+       rc = mdo_lookup(info->mti_env, mdt_object_child(parent), &rr->rr_name,
+                       &info->mti_tmp_fid1, &info->mti_spec);
        if (rc == 0) {
-               if (!restripe)
+               /* mkdir may be partially executed: name entry was successfully
+                * inserted into parent diretory on remote MDT, while target not
+                * created on local MDT. This happens when update log recovery
+                * is aborted, and mkdir is replayed by client request.
+                */
+               if (unlikely(!(info->mti_spec.sp_replay &&
+                              mdt_object_remote(parent)) &&
+                            !restripe))
                        GOTO(put_parent, rc = -EEXIST);
 
-               rc = mdt_restripe(info, parent, &rr->rr_name, rr->rr_fid2, spec,
-                                 ma);
-       }
+               child = mdt_object_find(info->mti_env, info->mti_mdt,
+                                       &info->mti_tmp_fid1);
+               if (unlikely(IS_ERR(child)))
+                       GOTO(put_parent, rc = PTR_ERR(child));
 
-       /* -ENOENT is expected here */
-       if (rc != -ENOENT)
+               if (mdt_object_exists(child)) {
+                       mdt_object_put(info->mti_env, child);
+                       rc = -EEXIST;
+                       if (restripe)
+                               rc = mdt_restripe(info, parent, &rr->rr_name,
+                                                 rr->rr_fid2, spec, ma);
+                       GOTO(put_parent, rc);
+               }
+               mdt_object_put(info->mti_env, child);
+               recreate_obj = true;
+       } else if (rc != -ENOENT) {
                GOTO(put_parent, rc);
+       }
 
-       OBD_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_CREATE_AFTER_LOOKUP, cfs_fail_val);
+       if (unlikely(info->mti_spec.sp_replay)) {
+               /* check version only during replay */
+               rc = mdt_version_check(mdt_info_req(info), ENOENT_VERSION, 1);
+               if (rc)
+                       GOTO(put_parent, rc);
+       } else {
+               CFS_FAIL_TIMEOUT(OBD_FAIL_MDS_PAUSE_CREATE_AFTER_LOOKUP,
+                                cfs_fail_val);
 
-       /* save version of file name for replay, it must be ENOENT here */
-       mdt_enoent_version_save(info, 1);
+               /* save version of file name for replay, must be ENOENT here */
+               mdt_enoent_version_save(info, 1);
+       }
 
        CFS_RACE(OBD_FAIL_MDS_CREATE_RACE);
 
@@ -617,7 +646,7 @@ static int mdt_create(struct mdt_thread_info *info)
         */
        rc = mdo_lookup(info->mti_env, mdt_object_child(parent), &rr->rr_name,
                        &info->mti_tmp_fid1, &info->mti_spec);
-       if (unlikely(rc == 0))
+       if (unlikely(rc == 0 && !recreate_obj))
                GOTO(unlock_parent, rc = -EEXIST);
 
        child = mdt_object_new(info->mti_env, mdt, rr->rr_fid2);
@@ -657,12 +686,23 @@ static int mdt_create(struct mdt_thread_info *info)
 
        rc = mdo_create(info->mti_env, mdt_object_child(parent), &rr->rr_name,
                        mdt_object_child(child), &info->mti_spec, ma);
-       if (rc == 0)
-               rc = mdt_attr_get_complex(info, child, ma);
+       if (rc < 0)
+               GOTO(put_child, rc);
 
+       if (S_ISDIR(ma->ma_attr.la_mode) &&
+           (info->mti_spec.sp_cr_flags & MDS_MKDIR_LMV))
+               mdt_prep_ma_buf_from_rep(info, child, ma, 0);
+
+       rc = mdt_attr_get_complex(info, child, ma);
        if (rc < 0)
                GOTO(put_child, rc);
 
+       if (ma->ma_valid & MA_LMV) {
+               mdt_dump_lmv(D_INFO, ma->ma_lmv);
+               repbody->mbo_eadatasize = ma->ma_lmv_size;
+               repbody->mbo_valid |= (OBD_MD_FLDIREA|OBD_MD_MEA);
+       }
+
        /* save child locks to eliminate dependey between 'mkdir a' and
         * 'mkdir a/b' if b is a remote directory
         */
index cd2e93f..3a38bff 100644 (file)
@@ -4985,8 +4985,10 @@ static int osd_xattr_set(const struct lu_env *env, struct dt_object *dt,
                 */
                LASSERT(buf->lb_len == sizeof(dt_obj_version_t));
 
-               CDEBUG(D_INODE, "Set version %#llx (old %#llx) for inode %lu\n",
-                      *version, LDISKFS_I(inode)->i_fs_version, inode->i_ino);
+               CDEBUG(D_INODE,
+                      DFID" set version %#llx (old %#llx) for inode %lu\n",
+                      PFID(lu_object_fid(&dt->do_lu)), *version,
+                      LDISKFS_I(inode)->i_fs_version, inode->i_ino);
 
                LDISKFS_I(inode)->i_fs_version = *version;
                /*
index 288b3ec..493dce8 100644 (file)
@@ -230,6 +230,13 @@ static const struct req_msg_field *mds_reint_create_sym_client[] = {
        &RMF_FILE_ENCCTX,
 };
 
+static const struct req_msg_field *mds_reint_create_acl_server[] = {
+       &RMF_PTLRPC_BODY,
+       &RMF_MDT_BODY,
+       &RMF_CAPA1,
+       &RMF_MDT_MD
+};
+
 static const struct req_msg_field *mds_reint_open_client[] = {
        &RMF_PTLRPC_BODY,
        &RMF_REC_REINT,
@@ -1497,7 +1504,8 @@ EXPORT_SYMBOL(RQF_MDS_REINT_CREATE);
 
 struct req_format RQF_MDS_REINT_CREATE_ACL =
        DEFINE_REQ_FMT0("MDS_REINT_CREATE_ACL",
-                       mds_reint_create_acl_client, mdt_body_capa);
+                       mds_reint_create_acl_client,
+                       mds_reint_create_acl_server);
 EXPORT_SYMBOL(RQF_MDS_REINT_CREATE_ACL);
 
 struct req_format RQF_MDS_REINT_CREATE_SLAVE =
old mode 100644 (file)
new mode 100755 (executable)
index 266a54a..edfa244 100755 (executable)
@@ -2603,7 +2603,7 @@ run_test 113 "ldlm enqueue dropped reply should not cause deadlocks"
 
 T130_PID=0
 test_130_base() {
-       test_mkdir -p $DIR/$tdir
+       test_mkdir -p -c1 $DIR/$tdir
 
        # Prevent interference from layout intent RPCs due to
        # asynchronous writeback. These will be tested in 130c below.
index 943079a..58fd571 100755 (executable)
@@ -3507,18 +3507,23 @@ test_100c() {
        replay_barrier mds2
        $LFS mkdir -i1 -c2 $striped_dir
 
-       stack_trap fail_abort_cleanup RETURN
        fail_abort mds2 abort_recov_mdt
 
-       createmany -o $striped_dir/f-%d 20 &&
-               error "createmany -o $DIR/$tfile should fail"
+       if (( $MDS1_VERSION >= $(version_code 2.15.54.138) )); then
+               # after 2.15.54.138 striped mkdir can replay by client request
+               createmany -o $striped_dir/f-%d 20 ||
+                       error "createmany -o $DIR/$tfile failed"
+       fi
 
        fail mds2
 
        # LU-16159 abort_recovery will cancel update logs, the second recovery
        # won't replay $striped_dir creation
-       (( $MDS1_VERSION >= $(version_code 2.15.52) )) ||
-               striped_dir_check_100 || error "striped dir check failed"
+       (( $MDS1_VERSION >= $(version_code 2.15.52) &&
+          $MDS1_VERSION < $(version_code 2.15.54.138) )) &&
+               fail_abort_cleanup && return 0
+
+       striped_dir_check_100 || error "striped dir check failed"
 }
 run_test 100c "DNE: create striped dir, abort_recov_mdt mds2"
 
@@ -3554,6 +3559,37 @@ test_100d() {
 }
 run_test 100d "DNE: cancel update logs upon recovery abort"
 
+test_100e() {
+       (( MDSCOUNT > 1 )) || skip "needs >= 2 MDTs"
+       (( MDS1_VERSION >= $(version_code 2.15.54.79) )) ||
+               skip "Need MDS version 2.15.54.79+"
+       [[ $FAILURE_MODE != "HARD" ||
+          "$(facet_host mds1)" != "$(facet_host mds2)" ]] ||
+               skip "MDTs needs to be on diff hosts for HARD fail mode"
+
+       local old
+       local new
+       local striped_dir=$DIR/$tdir/striped_dir
+
+       mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+
+       replay_barrier mds1
+       replay_barrier mds2
+
+       $LFS mkdir -i 0,1 $striped_dir
+       old=$($LFS getdirstripe $striped_dir)
+       echo $old
+
+       fail mds1,mds2
+
+       new=$($LFS getdirstripe $striped_dir)
+       echo $new
+       [ "$old" == "$new" ] ||
+               error "$striped_dir layout mismatch"
+       rm -rf $DIR/$tdir || error "rmdir failed"
+}
+run_test 100e "DNE: create striped dir on MDT0 and MDT1, fail MDT0, MDT1"
+
 test_101() { #LU-5648
        mkdir -p $DIR/$tdir/d1
        mkdir -p $DIR/$tdir/d2