Whamcloud - gitweb
LU-14448 lod: verify LOV before set/inherit
[fs/lustre-release.git] / lustre / lod / lod_object.c
index f14680c..95319e5 100644 (file)
@@ -344,7 +344,7 @@ static int lod_it_key_rec(const struct lu_env *env, const struct dt_it *di,
                                                         key_rec);
 }
 
-static struct dt_index_operations lod_index_ops = {
+static const struct dt_index_operations lod_index_ops = {
        .dio_lookup             = lod_lookup,
        .dio_declare_insert     = lod_declare_insert,
        .dio_insert             = lod_insert,
@@ -787,7 +787,7 @@ static int lod_striped_it_load(const struct lu_env *env,
        return next->do_index_ops->dio_it.load(env, it->lit_it, hash);
 }
 
-static struct dt_index_operations lod_striped_index_ops = {
+static const struct dt_index_operations lod_striped_index_ops = {
        .dio_lookup             = lod_striped_lookup,
        .dio_declare_insert     = lod_declare_insert,
        .dio_insert             = lod_insert,
@@ -920,13 +920,12 @@ int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo,
                /* The ent->lde_name is composed of ${FID}:${index} */
                if (ent->lde_namelen < len + 1 ||
                    memcmp(ent->lde_name, name, len) != 0) {
-                       CDEBUG(lod->lod_lmv_failout ? D_ERROR : D_INFO,
-                              "%s: invalid shard name %.*s with the FID "DFID
-                              " for the striped directory "DFID", %s\n",
-                              lod2obd(lod)->obd_name, ent->lde_namelen,
-                              ent->lde_name, PFID(&fid),
-                              PFID(lu_object_fid(&obj->do_lu)),
-                              lod->lod_lmv_failout ? "failout" : "skip");
+                       CDEBUG_LIMIT(lod->lod_lmv_failout ? D_ERROR : D_INFO,
+                                    "%s: invalid shard name %.*s with the FID "DFID" for the striped directory "DFID", %s\n",
+                                    lod2obd(lod)->obd_name, ent->lde_namelen,
+                                    ent->lde_name, PFID(&fid),
+                                    PFID(lu_object_fid(&obj->do_lu)),
+                                    lod->lod_lmv_failout ? "failout" : "skip");
 
                        if (lod->lod_lmv_failout)
                                break;
@@ -938,15 +937,15 @@ int lod_load_lmv_shards(const struct lu_env *env, struct lod_object *lo,
                do {
                        if (ent->lde_name[len] < '0' ||
                            ent->lde_name[len] > '9') {
-                               CDEBUG(lod->lod_lmv_failout ? D_ERROR : D_INFO,
-                                      "%s: invalid shard name %.*s with the "
-                                      "FID "DFID" for the striped directory "
-                                      DFID", %s\n",
-                                      lod2obd(lod)->obd_name, ent->lde_namelen,
-                                      ent->lde_name, PFID(&fid),
-                                      PFID(lu_object_fid(&obj->do_lu)),
-                                      lod->lod_lmv_failout ?
-                                      "failout" : "skip");
+                               CDEBUG_LIMIT(lod->lod_lmv_failout ?
+                                            D_ERROR : D_INFO,
+                                            "%s: invalid shard name %.*s with the FID "DFID" for the striped directory "DFID", %s\n",
+                                            lod2obd(lod)->obd_name,
+                                            ent->lde_namelen,
+                                            ent->lde_name, PFID(&fid),
+                                            PFID(lu_object_fid(&obj->do_lu)),
+                                            lod->lod_lmv_failout ?
+                                            "failout" : "skip");
 
                                if (lod->lod_lmv_failout)
                                        break;
@@ -1136,15 +1135,21 @@ void lod_adjust_stripe_size(struct lod_layout_component *comp,
                else
                        comp->llc_stripe_size = comp_end & ~(comp_end - 1);
        } else {
+               if (comp_end != LUSTRE_EOF &&
+                   comp_end & (LOV_MIN_STRIPE_SIZE - 1)) {
+                       CWARN("Component end %llu is not a multiple of min size %u\n",
+                             comp_end, LOV_MIN_STRIPE_SIZE);
+                       comp_end = round_up(comp_end, LOV_MIN_STRIPE_SIZE);
+               }
                /* check stripe size is multiplier of comp_end */
                if (comp_end != LUSTRE_EOF &&
+                   comp_end != comp->llc_extent.e_start &&
                    comp_end % comp->llc_stripe_size) {
                        /* fix that even for defined stripe size but warn
                         * about the problem, that must not happen
                         */
                        CWARN("Component end %llu is not aligned by the stripe size %u\n",
                              comp_end, comp->llc_stripe_size);
-                       dump_stack();
                        comp->llc_stripe_size = comp_end & ~(comp_end - 1);
                }
        }
@@ -1171,10 +1176,10 @@ int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo,
                            struct lod_obj_stripe_cb_data *data)
 {
        struct lod_layout_component *lod_comp;
-       int i, j, rc;
+       int i, j, rc = 0;
        ENTRY;
 
-       LASSERT(lo->ldo_comp_cnt != 0 && lo->ldo_comp_entries != NULL);
+       mutex_lock(&lo->ldo_layout_mutex);
        for (i = 0; i < lo->ldo_comp_cnt; i++) {
                lod_comp = &lo->ldo_comp_entries[i];
 
@@ -1194,7 +1199,7 @@ int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo,
                if (data->locd_comp_cb) {
                        rc = data->locd_comp_cb(env, lo, i, data);
                        if (rc)
-                               RETURN(rc);
+                               GOTO(unlock, rc);
                }
 
                /* could used just to do sth about component, not each
@@ -1211,10 +1216,12 @@ int lod_obj_for_each_stripe(const struct lu_env *env, struct lod_object *lo,
                                continue;
                        rc = data->locd_stripe_cb(env, lo, dt, th, i, j, data);
                        if (rc != 0)
-                               RETURN(rc);
+                               GOTO(unlock, rc);
                }
        }
-       RETURN(0);
+unlock:
+       mutex_unlock(&lo->ldo_layout_mutex);
+       RETURN(rc);
 }
 
 static bool lod_obj_attr_set_comp_skip_cb(const struct lu_env *env,
@@ -1255,9 +1262,11 @@ static bool lod_obj_attr_set_comp_skip_cb(const struct lu_env *env,
                }
                break;
        }
+       case LCM_FL_RDONLY:
+       case LCM_FL_SYNC_PENDING:
+               break;
        default:
                LASSERTF(0, "impossible: %d\n", lo->ldo_flr_state);
-       case LCM_FL_SYNC_PENDING:
                break;
        }
 
@@ -1606,8 +1615,8 @@ static int lod_xattr_get(const struct lu_env *env, struct dt_object *dt,
                         * the FIDs of all shards of the striped directory. */
                        if (le32_to_cpu(lmv1->lmv_magic) == LMV_MAGIC_V1)
                                rc = lmv_mds_md_size(
-                                               le32_to_cpu(lmv1->lmv_stripe_count),
-                                               le32_to_cpu(lmv1->lmv_magic));
+                                       le32_to_cpu(lmv1->lmv_stripe_count),
+                                       le32_to_cpu(lmv1->lmv_magic));
                } else {
                        lmv1 = buf->lb_buf;
                        if (le32_to_cpu(lmv1->lmv_magic) != LMV_MAGIC_V1)
@@ -2075,7 +2084,7 @@ static int lod_mdt_alloc_specific(const struct lu_env *env,
        int rc;
 
        master_index = lu_site2seq(lod2lu_dev(lod)->ld_site)->ss_node_id;
-       if (stripe_count > 1)
+       if (!is_specific && stripe_count > 1)
                /* Set the start index for the 2nd stripe allocation */
                mdt_indices[1] = (mdt_indices[0] + 1) %
                                        (lod->lod_remote_mdt_count + 1);
@@ -2108,7 +2117,7 @@ static int lod_mdt_alloc_specific(const struct lu_env *env,
 
                        /* Sigh, this index is not in the bitmap, let's check
                         * next available target */
-                       if (!cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx) &&
+                       if (!test_bit(idx, ltd->ltd_tgt_bitmap) &&
                            idx != master_index)
                                continue;
 
@@ -2404,9 +2413,11 @@ static int lod_dir_layout_set(const struct lu_env *env,
                RETURN(-EINVAL);
 
        /* adjust hash for dir merge, which may not be set in user command */
-       if (lmv_is_merging(lmv) && !lmv->lmv_migrate_hash)
-               lmv->lmv_merge_hash =
-                       lod->lod_mdt_descs.ltd_lmv_desc.ld_pattern;
+       if (lmv_is_merging(lmv) &&
+           !(lmv->lmv_migrate_hash & LMV_HASH_TYPE_MASK))
+               lmv->lmv_merge_hash |=
+                       lod->lod_mdt_descs.ltd_lmv_desc.ld_pattern &
+                       LMV_HASH_TYPE_MASK;
 
        LMV_DEBUG(D_INFO, lmv, "set");
 
@@ -2631,21 +2642,25 @@ static int lod_replace_parent_fid(const struct lu_env *env,
        RETURN(rc);
 }
 
-inline __u16 lod_comp_entry_stripe_count(struct lod_object *lo,
-                                        struct lod_layout_component *entry,
-                                        bool is_dir)
+__u16 lod_comp_entry_stripe_count(struct lod_object *lo,
+                                 int comp_idx, bool is_dir)
 {
        struct lod_device *lod = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
+       struct lod_layout_component *entry;
 
        if (is_dir)
                return  0;
-       else if (lod_comp_inited(entry))
+
+       entry = &lo->ldo_comp_entries[comp_idx];
+       if (lod_comp_inited(entry))
                return entry->llc_stripe_count;
        else if ((__u16)-1 == entry->llc_stripe_count)
                return lod->lod_ost_count;
        else
-               return lod_get_stripe_count(lod, lo,
-                                           entry->llc_stripe_count, false);
+               return lod_get_stripe_count(lod, lo, comp_idx,
+                                           entry->llc_stripe_count,
+                                           entry->llc_pattern &
+                                           LOV_PATTERN_OVERSTRIPING);
 }
 
 static int lod_comp_md_size(struct lod_object *lo, bool is_dir)
@@ -2681,8 +2696,7 @@ static int lod_comp_md_size(struct lod_object *lo, bool is_dir)
                __u16 stripe_count;
 
                magic = comp_entries[i].llc_pool ? LOV_MAGIC_V3 : LOV_MAGIC_V1;
-               stripe_count = lod_comp_entry_stripe_count(lo, &comp_entries[i],
-                                                          is_dir);
+               stripe_count = lod_comp_entry_stripe_count(lo, i, is_dir);
                if (!is_dir && is_composite)
                        lod_comp_shrink_stripe_count(&comp_entries[i],
                                                     &stripe_count);
@@ -2741,10 +2755,15 @@ static int lod_declare_layout_add(const struct lu_env *env,
        if (magic != LOV_USER_MAGIC_COMP_V1)
                RETURN(-EINVAL);
 
+       mutex_lock(&lo->ldo_layout_mutex);
+
        array_cnt = lo->ldo_comp_cnt + comp_v1->lcm_entry_count;
        OBD_ALLOC_PTR_ARRAY(comp_array, array_cnt);
-       if (comp_array == NULL)
+       if (comp_array == NULL) {
+               mutex_unlock(&lo->ldo_layout_mutex);
                RETURN(-ENOMEM);
+       }
+
 
        memcpy(comp_array, lo->ldo_comp_entries,
               sizeof(*comp_array) * lo->ldo_comp_cnt);
@@ -2801,6 +2820,8 @@ static int lod_declare_layout_add(const struct lu_env *env,
        LASSERT(lo->ldo_mirror_count == 1);
        lo->ldo_mirrors[0].lme_end = array_cnt - 1;
 
+       mutex_unlock(&lo->ldo_layout_mutex);
+
        RETURN(0);
 
 error:
@@ -2813,6 +2834,8 @@ error:
                }
        }
        OBD_FREE_PTR_ARRAY(comp_array, array_cnt);
+       mutex_unlock(&lo->ldo_layout_mutex);
+
        RETURN(rc);
 }
 
@@ -2908,6 +2931,7 @@ static int lod_declare_layout_set(const struct lu_env *env,
                RETURN(-EINVAL);
        }
 
+       mutex_lock(&lo->ldo_layout_mutex);
        for (i = 0; i < comp_v1->lcm_entry_count; i++) {
                __u32 id = comp_v1->lcm_entries[i].lcme_id;
                __u32 flags = comp_v1->lcm_entries[i].lcme_flags;
@@ -2917,7 +2941,8 @@ static int lod_declare_layout_set(const struct lu_env *env,
 
                if (flags & LCME_FL_INIT) {
                        if (changed)
-                               lod_striping_free(env, lo);
+                               lod_striping_free_nolock(env, lo);
+                       mutex_unlock(&lo->ldo_layout_mutex);
                        RETURN(-EINVAL);
                }
 
@@ -2940,8 +2965,11 @@ static int lod_declare_layout_set(const struct lu_env *env,
                                if (flags) {
                                        if ((flags & LCME_FL_STALE) &&
                                            lod_last_non_stale_mirror(mirror_id,
-                                                                     lo))
+                                                                     lo)) {
+                                               mutex_unlock(
+                                                       &lo->ldo_layout_mutex);
                                                RETURN(-EUCLEAN);
+                                       }
                                        lod_comp->llc_flags |= flags;
                                }
                                if (mirror_flag) {
@@ -2954,6 +2982,7 @@ static int lod_declare_layout_set(const struct lu_env *env,
                        changed = true;
                }
        }
+       mutex_unlock(&lo->ldo_layout_mutex);
 
        if (!changed) {
                CDEBUG(D_LAYOUT, "%s: requested component(s) not found.\n",
@@ -3036,9 +3065,13 @@ static int lod_declare_layout_del(const struct lu_env *env,
                flags = 0;
        }
 
+       mutex_lock(&lo->ldo_layout_mutex);
+
        left = lo->ldo_comp_cnt;
-       if (left <= 0)
+       if (left <= 0) {
+               mutex_unlock(&lo->ldo_layout_mutex);
                RETURN(-EINVAL);
+       }
 
        for (i = (lo->ldo_comp_cnt - 1); i >= 0; i--) {
                struct lod_layout_component *lod_comp;
@@ -3055,6 +3088,7 @@ static int lod_declare_layout_del(const struct lu_env *env,
                if (left != (i + 1)) {
                        CDEBUG(D_LAYOUT, "%s: this deletion will create "
                               "a hole.\n", lod2obd(d)->obd_name);
+                       mutex_unlock(&lo->ldo_layout_mutex);
                        RETURN(-EINVAL);
                }
                left--;
@@ -3073,8 +3107,10 @@ static int lod_declare_layout_del(const struct lu_env *env,
                        if (obj == NULL)
                                continue;
                        rc = lod_sub_declare_destroy(env, obj, th);
-                       if (rc)
+                       if (rc) {
+                               mutex_unlock(&lo->ldo_layout_mutex);
                                RETURN(rc);
+                       }
                }
        }
 
@@ -3082,9 +3118,12 @@ static int lod_declare_layout_del(const struct lu_env *env,
        if (left == lo->ldo_comp_cnt) {
                CDEBUG(D_LAYOUT, "%s: requested component id:%#x not found\n",
                       lod2obd(d)->obd_name, id);
+               mutex_unlock(&lo->ldo_layout_mutex);
                RETURN(-EINVAL);
        }
 
+       mutex_unlock(&lo->ldo_layout_mutex);
+
        memset(attr, 0, sizeof(*attr));
        attr->la_valid = LA_SIZE;
        rc = lod_sub_declare_attr_set(env, next, attr, th);
@@ -3206,13 +3245,13 @@ static int lod_layout_convert(struct lod_thread_info *info)
        }
 
        lcm = info->lti_ea_store;
+       memset(lcm, 0, sizeof(*lcm) + sizeof(*lcme));
        lcm->lcm_magic = cpu_to_le32(LOV_MAGIC_COMP_V1);
        lcm->lcm_size = cpu_to_le32(size);
        lcm->lcm_layout_gen = cpu_to_le32(le16_to_cpu(
                                                lmm_save->lmm_layout_gen));
        lcm->lcm_flags = cpu_to_le16(LCM_FL_NONE);
        lcm->lcm_entry_count = cpu_to_le16(1);
-       lcm->lcm_mirror_count = 0;
 
        lcme = &lcm->lcm_entries[0];
        lcme->lcme_flags = cpu_to_le32(LCME_FL_INIT);
@@ -3237,13 +3276,14 @@ static int lod_declare_layout_merge(const struct lu_env *env,
                struct dt_object *dt, const struct lu_buf *mbuf,
                struct thandle *th)
 {
-       struct lod_thread_info  *info = lod_env_info(env);
-       struct lu_buf           *buf = &info->lti_buf;
-       struct lod_object       *lo = lod_dt_obj(dt);
-       struct lov_comp_md_v1   *lcm;
-       struct lov_comp_md_v1   *cur_lcm;
-       struct lov_comp_md_v1   *merge_lcm;
-       struct lov_comp_md_entry_v1     *lcme;
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lu_attr *layout_attr = &info->lti_layout_attr;
+       struct lu_buf *buf = &info->lti_buf;
+       struct lod_object *lo = lod_dt_obj(dt);
+       struct lov_comp_md_v1 *lcm;
+       struct lov_comp_md_v1 *cur_lcm;
+       struct lov_comp_md_v1 *merge_lcm;
+       struct lov_comp_md_entry_v1 *lcme;
        struct lov_mds_md_v1 *lmm;
        size_t size = 0;
        size_t offset;
@@ -3252,7 +3292,7 @@ static int lod_declare_layout_merge(const struct lu_env *env,
        __u32 id = 0;
        __u16 mirror_id = 0;
        __u32 mirror_count;
-       int     rc, i;
+       int rc, i;
        bool merge_has_dom;
 
        ENTRY;
@@ -3371,8 +3411,6 @@ static int lod_declare_layout_merge(const struct lu_env *env,
        }
 
        /* fixup layout information */
-       lod_obj_inc_layout_gen(lo);
-       lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen);
        lcm->lcm_size = cpu_to_le32(size);
        lcm->lcm_entry_count = cpu_to_le16(cur_entry_count + merge_entry_count);
        lcm->lcm_mirror_count = cpu_to_le16(mirror_count);
@@ -3383,6 +3421,23 @@ static int lod_declare_layout_merge(const struct lu_env *env,
        if (rc)
                GOTO(out, rc);
 
+       lod_obj_inc_layout_gen(lo);
+       lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen);
+
+       /* transfer layout version to OST objects. */
+       if (lo->ldo_mirror_count > 1) {
+               struct lod_obj_stripe_cb_data data = { {0} };
+
+               layout_attr->la_valid = LA_LAYOUT_VERSION;
+               layout_attr->la_layout_version = 0;
+               data.locd_attr = layout_attr;
+               data.locd_declare = true;
+               data.locd_stripe_cb = lod_obj_stripe_attr_set_cb;
+               rc = lod_obj_for_each_stripe(env, lo, th, &data);
+               if (rc)
+                       GOTO(out, rc);
+       }
+
        rc = lod_sub_declare_xattr_set(env, dt_object_child(dt), buf,
                                        XATTR_NAME_LOV, LU_XATTR_REPLACE, th);
 
@@ -3398,23 +3453,231 @@ static int lod_declare_layout_split(const struct lu_env *env,
                struct dt_object *dt, const struct lu_buf *mbuf,
                struct thandle *th)
 {
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lu_attr *layout_attr = &info->lti_layout_attr;
        struct lod_object *lo = lod_dt_obj(dt);
        struct lov_comp_md_v1 *lcm = mbuf->lb_buf;
        int rc;
        ENTRY;
 
-       lod_obj_inc_layout_gen(lo);
-       lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen);
-
        rc = lod_striping_reload(env, lo, mbuf);
        if (rc)
                RETURN(rc);
 
+       lod_obj_inc_layout_gen(lo);
+       /* fix on-disk layout gen */
+       lcm->lcm_layout_gen = cpu_to_le32(lo->ldo_layout_gen);
+
+
+       /* transfer layout version to OST objects. */
+       if (lo->ldo_mirror_count > 1) {
+               struct lod_obj_stripe_cb_data data = { {0} };
+
+               layout_attr->la_valid = LA_LAYOUT_VERSION;
+               layout_attr->la_layout_version = 0;
+               data.locd_attr = layout_attr;
+               data.locd_declare = true;
+               data.locd_stripe_cb = lod_obj_stripe_attr_set_cb;
+               rc = lod_obj_for_each_stripe(env, lo, th, &data);
+               if (rc)
+                       RETURN(rc);
+       }
+
        rc = lod_sub_declare_xattr_set(env, dt_object_child(dt), mbuf,
                                       XATTR_NAME_LOV, LU_XATTR_REPLACE, th);
        RETURN(rc);
 }
 
+static int lod_layout_declare_or_purge_mirror(const struct lu_env *env,
+                       struct dt_object *dt, const struct lu_buf *buf,
+                       struct thandle *th, bool declare)
+{
+       struct lod_thread_info *info = lod_env_info(env);
+       struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
+       struct lod_object *lo = lod_dt_obj(dt);
+       struct lov_comp_md_v1 *comp_v1 = buf->lb_buf;
+       struct lov_comp_md_entry_v1 *entry;
+       struct lov_mds_md_v1 *lmm;
+       struct dt_object **sub_objs = NULL;
+       int rc = 0, i, k, array_count = 0;
+
+       ENTRY;
+
+       /**
+        * other ops (like lod_declare_destroy) could destroying sub objects
+        * as well.
+        */
+       mutex_lock(&lo->ldo_layout_mutex);
+
+       if (!declare) {
+               /* prepare sub-objects array */
+               for (i = 0; i < comp_v1->lcm_entry_count; i++) {
+                       entry = &comp_v1->lcm_entries[i];
+
+                       if (!(entry->lcme_flags & LCME_FL_INIT))
+                               continue;
+
+                       lmm = (struct lov_mds_md_v1 *)
+                                       ((char *)comp_v1 + entry->lcme_offset);
+                       array_count += lmm->lmm_stripe_count;
+               }
+               OBD_ALLOC_PTR_ARRAY(sub_objs, array_count);
+               if (sub_objs == NULL) {
+                       mutex_unlock(&lo->ldo_layout_mutex);
+                       RETURN(-ENOMEM);
+               }
+       }
+
+       k = 0;  /* sub_objs index */
+       for (i = 0; i < comp_v1->lcm_entry_count; i++) {
+               struct lov_ost_data_v1 *objs;
+               struct lu_object *o, *n;
+               struct dt_object *dto;
+               struct lu_device *nd;
+               struct lov_mds_md_v3 *v3;
+               __u32 idx;
+               int j;
+
+               entry = &comp_v1->lcm_entries[i];
+
+               if (!(entry->lcme_flags & LCME_FL_INIT))
+                       continue;
+
+               lmm = (struct lov_mds_md_v1 *)
+                               ((char *)comp_v1 + entry->lcme_offset);
+               v3 = (struct lov_mds_md_v3 *)lmm;
+               if (lmm->lmm_magic == LOV_MAGIC_V3)
+                       objs = &v3->lmm_objects[0];
+               else
+                       objs = &lmm->lmm_objects[0];
+
+               for (j = 0; j < lmm->lmm_stripe_count; j++) {
+                       idx = objs[j].l_ost_idx;
+                       rc = ostid_to_fid(&info->lti_fid, &objs[j].l_ost_oi,
+                                         idx);
+                       if (rc)
+                               GOTO(out, rc);
+
+                       if (!fid_is_sane(&info->lti_fid)) {
+                               CERROR("%s: sub-object insane fid "DFID"\n",
+                                      lod2obd(d)->obd_name,
+                                      PFID(&info->lti_fid));
+                               GOTO(out, rc = -EINVAL);
+                       }
+
+                       lod_getref(&d->lod_ost_descs);
+
+                       rc = validate_lod_and_idx(d, idx);
+                       if (unlikely(rc)) {
+                               lod_putref(d, &d->lod_ost_descs);
+                               GOTO(out, rc);
+                       }
+
+                       nd = &OST_TGT(d, idx)->ltd_tgt->dd_lu_dev;
+                       lod_putref(d, &d->lod_ost_descs);
+
+                       o = lu_object_find_at(env, nd, &info->lti_fid, NULL);
+                       if (IS_ERR(o))
+                               GOTO(out, rc = PTR_ERR(o));
+
+                       n = lu_object_locate(o->lo_header, nd->ld_type);
+                       if (unlikely(!n)) {
+                               lu_object_put(env, n);
+                               GOTO(out, rc = -ENOENT);
+                       }
+
+                       dto = container_of(n, struct dt_object, do_lu);
+
+                       if (declare) {
+                               rc = lod_sub_declare_destroy(env, dto, th);
+                               dt_object_put(env, dto);
+                               if (rc)
+                                       GOTO(out, rc);
+                       } else {
+                               /**
+                                * collect to-be-destroyed sub objects, the
+                                * reference would be released after actual
+                                * deletion.
+                                */
+                               sub_objs[k] = dto;
+                               k++;
+                       }
+               } /* for each stripe */
+       } /* for each component in the mirror */
+out:
+       if (!declare) {
+               i = 0;
+               if (!rc) {
+                       /* destroy the sub objects */
+                       for (; i < k; i++) {
+                               rc = lod_sub_destroy(env, sub_objs[i], th);
+                               if (rc)
+                                       break;
+                               dt_object_put(env, sub_objs[i]);
+                       }
+               }
+               /**
+                * if a sub object destroy failed, we'd release sub objects
+                * reference get from above sub_objs collection.
+                */
+               for (; i < k; i++)
+                       dt_object_put(env, sub_objs[i]);
+
+               OBD_FREE_PTR_ARRAY(sub_objs, array_count);
+       }
+       mutex_unlock(&lo->ldo_layout_mutex);
+
+       RETURN(rc);
+}
+
+/**
+ * Purge layouts, delete sub objects in the mirror stored in the vic_buf,
+ * and set the LOVEA with the layout from mbuf.
+ */
+static int lod_declare_layout_purge(const struct lu_env *env,
+               struct dt_object *dt, const struct lu_buf *buf,
+               struct thandle *th)
+{
+       struct lod_device *d = lu2lod_dev(dt->do_lu.lo_dev);
+       struct lov_comp_md_v1 *comp_v1 = buf->lb_buf;
+       int rc;
+
+       ENTRY;
+
+       if (le32_to_cpu(comp_v1->lcm_magic) != LOV_MAGIC_COMP_V1) {
+               CERROR("%s: invalid layout magic %#x != %#x\n",
+                      lod2obd(d)->obd_name, le32_to_cpu(comp_v1->lcm_magic),
+                      LOV_MAGIC_COMP_V1);
+               RETURN(-EINVAL);
+       }
+
+       if (cpu_to_le32(LOV_MAGIC_COMP_V1) != LOV_MAGIC_COMP_V1)
+               lustre_swab_lov_comp_md_v1(comp_v1);
+
+       /* from now on, @buf contains cpu endian data */
+
+       if (comp_v1->lcm_mirror_count != 0) {
+               CERROR("%s: can only purge one mirror from "DFID"\n",
+                      lod2obd(d)->obd_name, PFID(lu_object_fid(&dt->do_lu)));
+               RETURN(-EINVAL);
+       }
+
+       /* delcare sub objects deletion in the mirror stored in @buf */
+       rc = lod_layout_declare_or_purge_mirror(env, dt, buf, th, true);
+       RETURN(rc);
+}
+
+/* delete sub objects from the mirror stored in @buf */
+static int lod_layout_purge(const struct lu_env *env, struct dt_object *dt,
+                           const struct lu_buf *buf, struct thandle *th)
+{
+       int rc;
+
+       ENTRY;
+       rc = lod_layout_declare_or_purge_mirror(env, dt, buf, th, false);
+       RETURN(rc);
+}
+
 /**
  * Implementation of dt_object_operations::do_declare_xattr_set.
  *
@@ -3439,7 +3702,8 @@ static int lod_declare_xattr_set(const struct lu_env *env,
 
        mode = dt->do_lu.lo_header->loh_attr & S_IFMT;
        if ((S_ISREG(mode) || mode == 0) &&
-           !(fl & (LU_XATTR_REPLACE | LU_XATTR_MERGE | LU_XATTR_SPLIT)) &&
+           !(fl & (LU_XATTR_REPLACE | LU_XATTR_MERGE | LU_XATTR_SPLIT |
+                   LU_XATTR_PURGE)) &&
            (strcmp(name, XATTR_NAME_LOV) == 0 ||
             strcmp(name, XATTR_LUSTRE_LOV) == 0)) {
                /*
@@ -3469,6 +3733,10 @@ static int lod_declare_xattr_set(const struct lu_env *env,
                LASSERT(strcmp(name, XATTR_NAME_LOV) == 0 ||
                        strcmp(name, XATTR_LUSTRE_LOV) == 0);
                rc = lod_declare_layout_split(env, dt, buf, th);
+       } else if (fl & LU_XATTR_PURGE) {
+               LASSERT(strcmp(name, XATTR_NAME_LOV) == 0 ||
+                       strcmp(name, XATTR_LUSTRE_LOV) == 0);
+               rc = lod_declare_layout_purge(env, dt, buf, th);
        } else if (S_ISREG(mode) &&
                   strlen(name) >= sizeof(XATTR_LUSTRE_LOV) + 3 &&
                   allowed_lustre_lov(name)) {
@@ -3562,10 +3830,11 @@ static int lod_xattr_del_internal(const struct lu_env *env,
                                  struct dt_object *dt,
                                  const char *name, struct thandle *th)
 {
-       struct dt_object        *next = dt_object_child(dt);
-       struct lod_object       *lo = lod_dt_obj(dt);
-       int                     rc;
-       int                     i;
+       struct dt_object *next = dt_object_child(dt);
+       struct lod_object *lo = lod_dt_obj(dt);
+       int i;
+       int rc;
+
        ENTRY;
 
        rc = lod_sub_xattr_del(env, next, name, th);
@@ -3576,7 +3845,11 @@ static int lod_xattr_del_internal(const struct lu_env *env,
                RETURN(rc);
 
        for (i = 0; i < lo->ldo_dir_stripe_count; i++) {
-               LASSERT(lo->ldo_stripe[i]);
+               if (!lo->ldo_stripe[i])
+                       continue;
+
+               if (!dt_object_exists(lo->ldo_stripe[i]))
+                       continue;
 
                rc = lod_sub_xattr_del(env, lo->ldo_stripe[i], name, th);
                if (rc != 0)
@@ -3785,7 +4058,7 @@ static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
        if (rc != 0)
                RETURN(rc);
 
-       attr->la_valid = LA_ATIME | LA_MTIME | LA_CTIME |
+       attr->la_valid = LA_ATIME | LA_MTIME | LA_CTIME | LA_FLAGS |
                         LA_MODE | LA_UID | LA_GID | LA_TYPE | LA_PROJID;
        dof->dof_type = DFT_DIR;
 
@@ -4018,6 +4291,8 @@ static int lod_dir_striping_create_internal(const struct lu_env *env,
 
        /* Transfer default LMV striping from the parent */
        if (lds != NULL && lds->lds_dir_def_striping_set &&
+           lds->lds_dir_def_max_inherit != LMV_INHERIT_END &&
+           lds->lds_dir_def_max_inherit != LMV_INHERIT_NONE &&
            !(LMVEA_DELETE_VALUES(lds->lds_dir_def_stripe_count,
                                 lds->lds_dir_def_stripe_offset) &&
              le32_to_cpu(lds->lds_dir_def_hash_type) !=
@@ -4039,6 +4314,10 @@ static int lod_dir_striping_create_internal(const struct lu_env *env,
                        cpu_to_le32(lds->lds_dir_def_stripe_offset);
                v1->lum_hash_type =
                        cpu_to_le32(lds->lds_dir_def_hash_type);
+               v1->lum_max_inherit =
+                       lmv_inherit_next(lds->lds_dir_def_max_inherit);
+               v1->lum_max_inherit_rr =
+                       lmv_inherit_rr_next(lds->lds_dir_def_max_inherit_rr);
 
                info->lti_buf.lb_buf = v1;
                info->lti_buf.lb_len = sizeof(*v1);
@@ -4138,7 +4417,7 @@ static int lod_generate_and_set_lovea(const struct lu_env *env,
        LASSERT(lo);
 
        if (lo->ldo_comp_cnt == 0 && !lo->ldo_is_foreign) {
-               lod_striping_free(env, lo);
+               lod_striping_free_nolock(env, lo);
                rc = lod_sub_xattr_del(env, next, XATTR_NAME_LOV, th);
                RETURN(rc);
        }
@@ -4223,6 +4502,7 @@ static int lod_layout_repeat_comp(const struct lu_env *env,
        new_comp->llc_flags &= ~LCME_FL_INIT;
        new_comp->llc_stripe = NULL;
        new_comp->llc_stripes_allocated = 0;
+       new_comp->llc_ost_indices = NULL;
        new_comp->llc_stripe_offset = LOV_OFFSET_DEFAULT;
        /* for uninstantiated components, layout gen stores default stripe
         * offset */
@@ -4433,6 +4713,8 @@ static int lod_layout_del(const struct lu_env *env, struct dt_object *dt,
 
        LASSERT(lo->ldo_mirror_count == 1);
 
+       mutex_lock(&lo->ldo_layout_mutex);
+
        rc = lod_layout_del_prep_layout(env, lo, th);
        if (rc < 0)
                GOTO(out, rc);
@@ -4460,7 +4742,10 @@ static int lod_layout_del(const struct lu_env *env, struct dt_object *dt,
        EXIT;
 out:
        if (rc)
-               lod_striping_free(env, lo);
+               lod_striping_free_nolock(env, lo);
+
+       mutex_unlock(&lo->ldo_layout_mutex);
+
        return rc;
 }
 
@@ -4492,7 +4777,10 @@ static int lod_xattr_set(const struct lu_env *env,
                         const char *name, int fl, struct thandle *th)
 {
        struct dt_object *next = dt_object_child(dt);
-       int rc;
+       struct lu_attr *layout_attr = &lod_env_info(env)->lti_layout_attr;
+       struct lod_object *lo = lod_dt_obj(dt);
+       struct lod_obj_stripe_cb_data data = { {0} };
+       int rc = 0;
 
        ENTRY;
 
@@ -4586,6 +4874,31 @@ static int lod_xattr_set(const struct lu_env *env,
                        lod_striping_free(env, lod_dt_obj(dt));
 
                        rc = lod_sub_xattr_set(env, next, buf, name, fl, th);
+               } else if (fl & LU_XATTR_SPLIT) {
+                       rc = lod_sub_xattr_set(env, next, buf, name, fl, th);
+                       if (rc)
+                               RETURN(rc);
+
+                       rc = lod_striping_reload(env, lo, buf);
+                       if (rc)
+                               RETURN(rc);
+
+                       if (lo->ldo_mirror_count > 1 &&
+                           layout_attr->la_valid & LA_LAYOUT_VERSION) {
+                               /* mirror split */
+                               layout_attr->la_layout_version =
+                                               lo->ldo_layout_gen;
+                               data.locd_attr = layout_attr;
+                               data.locd_declare = false;
+                               data.locd_stripe_cb =
+                                               lod_obj_stripe_attr_set_cb;
+                               rc = lod_obj_for_each_stripe(env, lo, th,
+                                                            &data);
+                               if (rc)
+                                       RETURN(rc);
+                       }
+               } else if (fl & LU_XATTR_PURGE) {
+                       rc = lod_layout_purge(env, dt, buf, th);
                } else if (dt_object_remote(dt)) {
                        /* This only happens during migration, see
                         * mdd_migrate_create(), in which Master MDT will
@@ -4614,6 +4927,23 @@ static int lod_xattr_set(const struct lu_env *env,
                                !lod_dt_obj(dt)->ldo_comp_cached));
 
                        rc = lod_striped_create(env, dt, NULL, NULL, th);
+                       if (rc)
+                               RETURN(rc);
+
+                       if (fl & LU_XATTR_MERGE && lo->ldo_mirror_count > 1 &&
+                           layout_attr->la_valid & LA_LAYOUT_VERSION) {
+                               /* mirror merge exec phase */
+                               layout_attr->la_layout_version =
+                                               lo->ldo_layout_gen;
+                               data.locd_attr = layout_attr;
+                               data.locd_declare = false;
+                               data.locd_stripe_cb =
+                                               lod_obj_stripe_attr_set_cb;
+                               rc = lod_obj_for_each_stripe(env, lo, th,
+                                                            &data);
+                               if (rc)
+                                       RETURN(rc);
+                       }
                }
                RETURN(rc);
        } else if (strcmp(name, XATTR_NAME_FID) == 0) {
@@ -4671,6 +5001,9 @@ static int lod_declare_xattr_del(const struct lu_env *env,
                if (!dto)
                        continue;
 
+               if (!dt_object_exists(dto))
+                       continue;
+
                rc = lod_sub_declare_xattr_del(env, dto, name, th);
                if (rc != 0)
                        break;
@@ -4690,35 +5023,14 @@ static int lod_declare_xattr_del(const struct lu_env *env,
 static int lod_xattr_del(const struct lu_env *env, struct dt_object *dt,
                         const char *name, struct thandle *th)
 {
-       struct dt_object        *next = dt_object_child(dt);
-       struct lod_object       *lo = lod_dt_obj(dt);
-       int                     rc;
-       int                     i;
+       int rc;
+
        ENTRY;
 
        if (!strcmp(name, XATTR_NAME_LOV) || !strcmp(name, XATTR_NAME_LMV))
                lod_striping_free(env, lod_dt_obj(dt));
 
-       rc = lod_sub_xattr_del(env, next, name, th);
-       if (rc != 0 || !S_ISDIR(dt->do_lu.lo_header->loh_attr))
-               RETURN(rc);
-
-       if (!strcmp(name, XATTR_NAME_LMV))
-               RETURN(0);
-
-       if (lo->ldo_dir_stripe_count == 0)
-               RETURN(0);
-
-       for (i = 0; i < lo->ldo_dir_stripe_count; i++) {
-               struct dt_object *dto = lo->ldo_stripe[i];
-
-               if (!dto)
-                       continue;
-
-               rc = lod_sub_xattr_del(env, dto, name, th);
-               if (rc != 0)
-                       break;
-       }
+       rc = lod_xattr_del_internal(env, dt, name, th);
 
        RETURN(rc);
 }
@@ -4813,6 +5125,8 @@ static int lod_get_default_lov_striping(const struct lu_env *env,
 
        ENTRY;
 
+       lds->lds_def_striping_set = 0;
+
        rc = lod_get_lov_ea(env, lo);
        if (rc < 0)
                RETURN(rc);
@@ -4971,6 +5285,8 @@ static int lod_get_default_lmv_striping(const struct lu_env *env,
                                le32_to_cpu(lmu->lum_stripe_offset);
                lds->lds_dir_def_hash_type =
                                le32_to_cpu(lmu->lum_hash_type);
+               lds->lds_dir_def_max_inherit = lmu->lum_max_inherit;
+               lds->lds_dir_def_max_inherit_rr = lmu->lum_max_inherit_rr;
                lds->lds_dir_def_striping_set = 1;
        }
 
@@ -5200,7 +5516,12 @@ static void lod_ah_init(const struct lu_env *env,
                 */
                if (likely(lp != NULL)) {
                        lod_get_default_striping(env, lp, lds);
-
+                       if (lds->lds_def_striping_set) {
+                               rc = lod_verify_striping(env, d, lp,
+                                                        &info->lti_buf, false);
+                               if (rc)
+                                       lds->lds_def_striping_set = 0;
+                       }
                        /* inherit default striping except ROOT */
                        if ((lds->lds_def_striping_set ||
                             lds->lds_dir_def_striping_set) &&
@@ -5269,8 +5590,12 @@ static void lod_ah_init(const struct lu_env *env,
         */
        if (likely(lp != NULL)) {
                rc = lod_get_default_lov_striping(env, lp, lds, ah);
-               if (rc == 0)
-                       lod_striping_from_default(lc, lds, child_mode);
+               if (rc == 0 && lds->lds_def_striping_set) {
+                       rc = lod_verify_striping(env, d, lp, &info->lti_buf,
+                                                false);
+                       if (rc == 0)
+                               lod_striping_from_default(lc, lds, child_mode);
+               }
        }
 
        /* Initialize lod_device::lod_md_root object reference */
@@ -5300,8 +5625,14 @@ static void lod_ah_init(const struct lu_env *env,
            lod_need_inherit_more(lc, true, ah->dah_append_pool)) {
                rc = lod_get_default_lov_striping(env, d->lod_md_root, lds,
                                                  ah);
+               if (rc || !lds->lds_def_striping_set)
+                       goto out;
+
+               rc = lod_verify_striping(env, d, d->lod_md_root, &info->lti_buf,
+                                        false);
                if (rc)
                        goto out;
+
                if (lc->ldo_comp_cnt == 0) {
                        lod_striping_from_default(lc, lds, child_mode);
                } else if (!lds->lds_def_striping_is_composite) {
@@ -5613,24 +5944,8 @@ static int lod_declare_create(const struct lu_env *env, struct dt_object *dt,
                        if (OBD_FAIL_CHECK(OBD_FAIL_MDS_STALE_DIR_LAYOUT))
                                GOTO(out, rc = -EREMOTE);
 
-                       if (lo->ldo_dir_stripe_offset == LMV_OFFSET_DEFAULT) {
-                               struct lod_default_striping *lds;
-
-                               lds = lo->ldo_def_striping;
-                               /*
-                                * child and parent should be on the same MDT,
-                                * but if parent has default LMV, and the start
-                                * MDT offset is -1, it's allowed. This check
-                                * is not necessary after 2.12.22 because client
-                                * follows this already, but old client may not.
-                                */
-                               if (hint->dah_parent &&
-                                   dt_object_remote(hint->dah_parent) && lds &&
-                                   lds->lds_dir_def_stripe_offset !=
-                                   LMV_OFFSET_DEFAULT)
-                                       GOTO(out, rc = -EREMOTE);
-                       } else if (lo->ldo_dir_stripe_offset !=
-                                  ss->ss_node_id) {
+                       if (lo->ldo_dir_stripe_offset != LMV_OFFSET_DEFAULT &&
+                           lo->ldo_dir_stripe_offset != ss->ss_node_id) {
                                struct lod_device *lod;
                                struct lu_tgt_desc *mdt = NULL;
                                bool found_mdt = false;
@@ -5746,6 +6061,8 @@ int lod_striped_create(const struct lu_env *env, struct dt_object *dt,
        int     rc = 0, i, j;
        ENTRY;
 
+       mutex_lock(&lo->ldo_layout_mutex);
+
        LASSERT((lo->ldo_comp_cnt != 0 && lo->ldo_comp_entries != NULL) ||
                lo->ldo_is_foreign);
 
@@ -5804,15 +6121,20 @@ int lod_striped_create(const struct lu_env *env, struct dt_object *dt,
        if (rc)
                GOTO(out, rc);
 
+       lo->ldo_comp_cached = 1;
+
        rc = lod_generate_and_set_lovea(env, lo, th);
        if (rc)
                GOTO(out, rc);
 
-       lo->ldo_comp_cached = 1;
+       mutex_unlock(&lo->ldo_layout_mutex);
+
        RETURN(0);
 
 out:
-       lod_striping_free(env, lo);
+       lod_striping_free_nolock(env, lo);
+       mutex_unlock(&lo->ldo_layout_mutex);
+
        RETURN(rc);
 }
 
@@ -5872,11 +6194,12 @@ lod_obj_stripe_destroy_cb(const struct lu_env *env, struct lod_object *lo,
 {
        if (data->locd_declare)
                return lod_sub_declare_destroy(env, dt, th);
-       else if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_SPEOBJ) ||
-                stripe_idx == cfs_fail_val)
+
+       if (!OBD_FAIL_CHECK(OBD_FAIL_LFSCK_LOST_SPEOBJ) ||
+           stripe_idx == cfs_fail_val)
                return lod_sub_destroy(env, dt, th);
-       else
-               return 0;
+
+       return 0;
 }
 
 /**
@@ -6290,7 +6613,9 @@ static int lod_invalidate(const struct lu_env *env, struct dt_object *dt)
 }
 
 static int lod_declare_instantiate_components(const struct lu_env *env,
-               struct lod_object *lo, struct thandle *th)
+                                             struct lod_object *lo,
+                                             struct thandle *th,
+                                             __u64 reserve)
 {
        struct lod_thread_info *info = lod_env_info(env);
        int i;
@@ -6301,7 +6626,7 @@ static int lod_declare_instantiate_components(const struct lu_env *env,
 
        for (i = 0; i < info->lti_count; i++) {
                rc = lod_qos_prep_create(env, lo, NULL, th,
-                                        info->lti_comp_idx[i]);
+                                        info->lti_comp_idx[i], reserve);
                if (rc)
                        break;
        }
@@ -6337,15 +6662,15 @@ static int lod_declare_instantiate_components(const struct lu_env *env,
  */
 static bool lod_sel_osts_allowed(const struct lu_env *env,
                                 struct lod_object *lo,
-                                int index, __u64 extension_size,
+                                int index, __u64 reserve,
                                 struct lu_extent *extent,
                                 struct lu_extent *comp_extent, int write)
 {
        struct lod_layout_component *lod_comp = &lo->ldo_comp_entries[index];
        struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
-       struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
+       struct lod_thread_info *tinfo = lod_env_info(env);
+       struct obd_statfs *sfs = &tinfo->lti_osfs;
        __u64 available = 0;
-       __u64 size;
        bool ret = true;
        int i, rc;
 
@@ -6353,21 +6678,6 @@ static bool lod_sel_osts_allowed(const struct lu_env *env,
 
        LASSERT(lod_comp->llc_stripe_count != 0);
 
-       if (write == 0 ||
-           (extent->e_start == 0 && extent->e_end == OBD_OBJECT_EOF)) {
-               /* truncate or append */
-               size = extension_size;
-       } else {
-               /* In case of write op, check the real write extent,
-                * it may be larger than the extension_size */
-               size = roundup(min(extent->e_end, comp_extent->e_end) -
-                              max(extent->e_start, comp_extent->e_start),
-                              extension_size);
-       }
-       /* extension_size is file level, so we must divide by stripe count to
-        * compare it to available space on a single OST */
-       size /= lod_comp->llc_stripe_count;
-
        lod_getref(&lod->lod_ost_descs);
        for (i = 0; i < lod_comp->llc_stripe_count; i++) {
                int index = lod_comp->llc_ost_indices[i];
@@ -6394,7 +6704,7 @@ static bool lod_sel_osts_allowed(const struct lu_env *env,
                if (j < lod_comp->llc_stripe_count)
                        continue;
 
-               if (!cfs_bitmap_check(lod->lod_ost_bitmap, index)) {
+               if (!test_bit(index, lod->lod_ost_bitmap)) {
                        CDEBUG(D_LAYOUT, "ost %d no longer present\n", index);
                        ret = false;
                        break;
@@ -6408,9 +6718,9 @@ static bool lod_sel_osts_allowed(const struct lu_env *env,
                        break;
                }
 
-               if (sfs->os_state & OS_STATE_ENOSPC ||
-                   sfs->os_state & OS_STATE_READONLY ||
-                   sfs->os_state & OS_STATE_DEGRADED) {
+               if (sfs->os_state & OS_STATFS_ENOSPC ||
+                   sfs->os_state & OS_STATFS_READONLY ||
+                   sfs->os_state & OS_STATFS_DEGRADED) {
                        CDEBUG(D_LAYOUT, "ost %d is not availble for SEL "
                               "extension, state %u\n", index, sfs->os_state);
                        ret = false;
@@ -6428,11 +6738,11 @@ static bool lod_sel_osts_allowed(const struct lu_env *env,
                       (100ull * sfs->os_bavail) / sfs->os_blocks,
                       (100ull * sfs->os_bfree) / sfs->os_blocks);
 
-               if (size * repeated > available) {
+               if (reserve * repeated > available) {
                        ret = false;
                        CDEBUG(D_LAYOUT, "low space on ost %d, available %llu "
-                              "< extension size %llu\n", index, available,
-                              extension_size);
+                              "< extension size %llu repeated %d\n", index,
+                              available, reserve, repeated);
                        break;
                }
        }
@@ -6538,6 +6848,26 @@ static __u64 lod_extension_new_end(__u64 extension_size, __u64 extent_end,
        return new_end;
 }
 
+/**
+ * Calculate the exact reservation (per-OST extension_size) on the OSTs being
+ * instantiated. It needs to be calculated in advance and taken into account at
+ * the instantiation time, because otherwise lod_statfs_and_check() may consider
+ * an OST as OK, but SEL needs its extension_size to fit the free space and the
+ * OST may turn out to be low-on-space, thus inappropriate OST may be used and
+ * ENOSPC occurs.
+ *
+ * \param[in] lod_comp         lod component we are checking
+ *
+ * \retval     size to reserved on each OST of lod_comp's stripe.
+ */
+static __u64 lod_sel_stripe_reserved(struct lod_layout_component *lod_comp)
+{
+       /* extension_size is file level, so we must divide by stripe count to
+        * compare it to available space on a single OST */
+       return  lod_comp->llc_stripe_size * SEL_UNIT_SIZE /
+               lod_comp->llc_stripe_count;
+}
+
 /* As lod_sel_handler() could be re-entered for the same component several
  * times, this is the data for the next call. Fields could be changed to
  * component indexes when needed, (e.g. if there is no need to instantiate
@@ -6619,7 +6949,7 @@ static int lod_sel_handler(const struct lu_env *env,
        struct lod_layout_component *lod_comp;
        struct lod_layout_component *prev;
        struct lod_layout_component *next = NULL;
-       __u64 extension_size;
+       __u64 extension_size, reserve;
        __u64 new_end = 0;
        bool repeated;
        int change = 0;
@@ -6656,11 +6986,13 @@ static int lod_sel_handler(const struct lu_env *env,
                RETURN(-EINVAL);
        }
 
+       reserve = lod_sel_stripe_reserved(lod_comp);
+
        if (!prev->llc_stripe) {
                CDEBUG(D_LAYOUT, "Previous component not inited\n");
                info->lti_count = 1;
                info->lti_comp_idx[0] = index - 1;
-               rc = lod_declare_instantiate_components(env, lo, th);
+               rc = lod_declare_instantiate_components(env, lo, th, reserve);
                /* ENOSPC tells us we can't use this component.  If there is
                 * a next or we are repeating, we either spill over (next) or
                 * extend the original comp (repeat).  Otherwise, return the
@@ -6672,8 +7004,7 @@ static int lod_sel_handler(const struct lu_env *env,
        }
 
        if (sd->sd_force == 0 && rc == 0)
-               rc = !lod_sel_osts_allowed(env, lo, index - 1,
-                                          extension_size, extent,
+               rc = !lod_sel_osts_allowed(env, lo, index - 1, reserve, extent,
                                           &lod_comp->llc_extent, write);
 
        repeated = !!(sd->sd_repeat);
@@ -6987,7 +7318,7 @@ static int lod_declare_update_plain(const struct lu_env *env,
                RETURN(-EALREADY);
 
        lod_obj_inc_layout_gen(lo);
-       rc = lod_declare_instantiate_components(env, lo, th);
+       rc = lod_declare_instantiate_components(env, lo, th, 0);
        EXIT;
 out:
        if (rc)
@@ -7097,8 +7428,8 @@ static inline int lod_check_ost_avail(const struct lu_env *env,
 
        ost = OST_TGT(lod, idx);
        if (ost->ltd_statfs.os_state &
-               (OS_STATE_READONLY | OS_STATE_ENOSPC | OS_STATE_ENOINO |
-                OS_STATE_NOPRECREATE) ||
+               (OS_STATFS_READONLY | OS_STATFS_ENOSPC | OS_STATFS_ENOINO |
+                OS_STATFS_NOPRECREATE) ||
            ost->ltd_active == 0) {
                CDEBUG(D_LAYOUT, DFID ": mirror %d OST%d unavail, rc = %d\n",
                       PFID(lod_object_fid(lo)), index, idx, rc);
@@ -7136,6 +7467,11 @@ static int lod_primary_pick(const struct lu_env *env, struct lod_object *lo,
         * cluster.
         */
        lod_qos_statfs_update(env, lod, &lod->lod_ost_descs);
+
+       rc = lod_fill_mirrors(lo);
+       if (rc)
+               RETURN(rc);
+
        for (i = 0; i < lo->ldo_mirror_count; i++) {
                bool ost_avail = true;
                int index = (i + seq) % lo->ldo_mirror_count;
@@ -7147,7 +7483,7 @@ static int lod_primary_pick(const struct lu_env *env, struct lod_object *lo,
                }
 
                /* 2nd pick is for the primary mirror containing unavail OST */
-               if (lo->ldo_mirrors[index].lme_primary && second_pick < 0)
+               if (lo->ldo_mirrors[index].lme_prefer && second_pick < 0)
                        second_pick = index;
 
                /* 3rd pick is for non-primary mirror containing unavail OST */
@@ -7158,7 +7494,7 @@ static int lod_primary_pick(const struct lu_env *env, struct lod_object *lo,
                 * we found a non-primary 1st pick, we'd like to find a
                 * potential pirmary mirror.
                 */
-               if (picked >= 0 && !lo->ldo_mirrors[index].lme_primary)
+               if (picked >= 0 && !lo->ldo_mirrors[index].lme_prefer)
                        continue;
 
                /* check the availability of OSTs */
@@ -7195,7 +7531,7 @@ static int lod_primary_pick(const struct lu_env *env, struct lod_object *lo,
                 * primary with all OSTs are available, this is the perfect
                 * 1st pick.
                 */
-               if (lo->ldo_mirrors[index].lme_primary)
+               if (lo->ldo_mirrors[index].lme_prefer)
                        break;
        } /* for all mirrors */
 
@@ -7404,7 +7740,7 @@ static int lod_declare_update_rdonly(const struct lu_env *env,
                lo->ldo_layout_gen = layout_version & 0xffff;
        }
 
-       rc = lod_declare_instantiate_components(env, lo, th);
+       rc = lod_declare_instantiate_components(env, lo, th, 0);
        if (rc)
                GOTO(out, rc);
 
@@ -7439,22 +7775,29 @@ static int lod_declare_update_write_pending(const struct lu_env *env,
        LASSERT(mlc->mlc_opc == MD_LAYOUT_WRITE ||
                mlc->mlc_opc == MD_LAYOUT_RESYNC);
 
-       /* look for the primary mirror */
+       /* look for the first preferred mirror */
        for (i = 0; i < lo->ldo_mirror_count; i++) {
                if (lo->ldo_mirrors[i].lme_stale)
                        continue;
-
-               LASSERTF(primary < 0, DFID " has multiple primary: %u / %u",
-                        PFID(lod_object_fid(lo)),
-                        lo->ldo_mirrors[i].lme_id,
-                        lo->ldo_mirrors[primary].lme_id);
+               if (lo->ldo_mirrors[i].lme_prefer == 0)
+                       continue;
 
                primary = i;
+               break;
        }
        if (primary < 0) {
-               CERROR(DFID ": doesn't have a primary mirror\n",
-                      PFID(lod_object_fid(lo)));
-               GOTO(out, rc = -ENODATA);
+               /* no primary, use any in-sync */
+               for (i = 0; i < lo->ldo_mirror_count; i++) {
+                       if (lo->ldo_mirrors[i].lme_stale)
+                               continue;
+                       primary = i;
+                       break;
+               }
+               if (primary < 0) {
+                       CERROR(DFID ": doesn't have a primary mirror\n",
+                              PFID(lod_object_fid(lo)));
+                       GOTO(out, rc = -ENODATA);
+               }
        }
 
        CDEBUG(D_LAYOUT, DFID": found primary %u\n",
@@ -7548,7 +7891,7 @@ static int lod_declare_update_write_pending(const struct lu_env *env,
                lo->ldo_flr_state = LCM_FL_SYNC_PENDING;
        }
 
-       rc = lod_declare_instantiate_components(env, lo, th);
+       rc = lod_declare_instantiate_components(env, lo, th, 0);
        if (rc)
                GOTO(out, rc);
 
@@ -7577,6 +7920,7 @@ static int lod_declare_update_sync_pending(const struct lu_env *env,
                struct thandle *th)
 {
        struct lod_thread_info  *info = lod_env_info(env);
+       struct lu_attr *layout_attr = &info->lti_layout_attr;
        unsigned sync_components = 0;
        unsigned resync_components = 0;
        int i;
@@ -7649,6 +7993,12 @@ static int lod_declare_update_sync_pending(const struct lu_env *env,
        lo->ldo_flr_state = LCM_FL_RDONLY;
        lod_obj_inc_layout_gen(lo);
 
+       layout_attr->la_valid = LA_LAYOUT_VERSION;
+       layout_attr->la_layout_version = 0; /* set current version */
+       rc = lod_declare_attr_set(env, &lo->ldo_obj, layout_attr, th);
+       if (rc)
+               GOTO(out, rc);
+
        info->lti_buf.lb_len = lod_comp_md_size(lo, false);
        rc = lod_sub_declare_xattr_set(env, lod_object_child(lo),
                                       &info->lti_buf, XATTR_NAME_LOV, 0, th);
@@ -7926,9 +8276,9 @@ static int lod_dir_declare_layout_shrink(const struct lu_env *env,
        struct lod_object *lo = lod_dt_obj(dt);
        struct dt_object *next = dt_object_child(dt);
        struct lmv_user_md *lmu = mlc->mlc_buf.lb_buf;
-       __u32 final_stripe_count;
        char *stripe_name = info->lti_key;
        struct lu_buf *lmv_buf = &info->lti_buf;
+       __u32 final_stripe_count;
        struct dt_object *dto;
        int i;
        int rc;
@@ -7954,9 +8304,6 @@ static int lod_dir_declare_layout_shrink(const struct lu_env *env,
                        continue;
 
                if (i < final_stripe_count) {
-                       if (final_stripe_count == 1)
-                               continue;
-
                        rc = lod_sub_declare_xattr_set(env, dto, lmv_buf,
                                                       XATTR_NAME_LMV,
                                                       LU_XATTR_REPLACE, th);
@@ -8146,6 +8493,7 @@ static int lod_dir_layout_detach(const struct lu_env *env,
        lo->ldo_stripe = NULL;
        lo->ldo_dir_stripes_allocated = 0;
        lo->ldo_dir_stripe_count = 0;
+       dt->do_index_ops = &lod_index_ops;
 
        RETURN(rc);
 }
@@ -8179,9 +8527,12 @@ static int lod_dir_layout_shrink(const struct lu_env *env,
        lmv->lmv_magic = cpu_to_le32(LMV_MAGIC_STRIPE);
        lmv->lmv_stripe_count = cpu_to_le32(final_stripe_count);
        lmv->lmv_hash_type = cpu_to_le32(lo->ldo_dir_hash_type) &
-                            cpu_to_le32(LMV_HASH_TYPE_MASK);
+                            cpu_to_le32(LMV_HASH_TYPE_MASK |
+                                        LMV_HASH_FLAG_FIXED);
        lmv->lmv_layout_version =
                        cpu_to_le32(lo->ldo_dir_layout_version + 1);
+       lmv->lmv_migrate_offset = 0;
+       lmv->lmv_migrate_hash = 0;
 
        for (i = 0; i < lo->ldo_dir_stripe_count; i++) {
                dto = lo->ldo_stripe[i];
@@ -8189,14 +8540,6 @@ static int lod_dir_layout_shrink(const struct lu_env *env,
                        continue;
 
                if (i < final_stripe_count) {
-                       /* if only one stripe left, no need to update
-                        * LMV because this stripe will replace master
-                        * object and act as a plain directory.
-                        */
-                       if (final_stripe_count == 1)
-                               continue;
-
-
                        rc = lod_fld_lookup(env, lod,
                                            lu_object_fid(&dto->do_lu),
                                            &mdtidx, &type);
@@ -8350,7 +8693,7 @@ static int lod_layout_change(const struct lu_env *env, struct dt_object *dt,
        RETURN(rc);
 }
 
-struct dt_object_operations lod_obj_ops = {
+const struct dt_object_operations lod_obj_ops = {
        .do_read_lock           = lod_read_lock,
        .do_write_lock          = lod_write_lock,
        .do_read_unlock         = lod_read_unlock,
@@ -8451,7 +8794,7 @@ static int lod_punch(const struct lu_env *env, struct dt_object *dt,
  * body_ops themselves will check file type inside, see lod_read/write/punch for
  * details.
  */
-const struct dt_body_operations lod_body_ops = {
+static const struct dt_body_operations lod_body_ops = {
        .dbo_read               = lod_read,
        .dbo_declare_write      = lod_declare_write,
        .dbo_write              = lod_write,
@@ -8502,7 +8845,7 @@ static int lod_object_init(const struct lu_env *env, struct lu_object *lo,
 
        if (ltd != NULL) {
                if (ltd->ltd_tgts_size > idx &&
-                   cfs_bitmap_check(ltd->ltd_tgt_bitmap, idx)) {
+                   test_bit(idx, ltd->ltd_tgt_bitmap)) {
                        tgt = LTD_TGT(ltd, idx);
 
                        LASSERT(tgt != NULL);
@@ -8689,7 +9032,7 @@ static int lod_object_print(const struct lu_env *env, void *cookie,
        return (*p)(env, cookie, LUSTRE_LOD_NAME"-object@%p", o);
 }
 
-struct lu_object_operations lod_lu_obj_ops = {
+const struct lu_object_operations lod_lu_obj_ops = {
        .loo_object_init        = lod_object_init,
        .loo_object_free        = lod_object_free,
        .loo_object_release     = lod_object_release,