Whamcloud - gitweb
LU-15727 lod: honor append_pool with default composite layouts
[fs/lustre-release.git] / lustre / lod / lod_object.c
index 5729d2f..a2d36fa 100644 (file)
@@ -1868,6 +1868,7 @@ int lod_parse_dir_striping(const struct lu_env *env, struct lod_object *lo,
        }
 out:
        lo->ldo_stripe = stripe;
+       lo->ldo_is_foreign = 0;
        lo->ldo_dir_stripe_count = le32_to_cpu(lmv1->lmv_stripe_count);
        lo->ldo_dir_stripes_allocated = le32_to_cpu(lmv1->lmv_stripe_count);
        lo->ldo_dir_layout_version = le32_to_cpu(lmv1->lmv_layout_version);
@@ -2300,6 +2301,41 @@ out:
 
 /**
  *
+ * Alloc cached foreign LOV
+ *
+ * \param[in] lo        object
+ * \param[in] size      size of foreign LOV
+ *
+ * \retval             0 on success
+ * \retval             negative if failed
+ */
+int lod_alloc_foreign_lov(struct lod_object *lo, size_t size)
+{
+       OBD_ALLOC_LARGE(lo->ldo_foreign_lov, size);
+       if (lo->ldo_foreign_lov == NULL)
+               return -ENOMEM;
+       lo->ldo_foreign_lov_size = size;
+       lo->ldo_is_foreign = 1;
+       return 0;
+}
+
+/**
+ *
+ * Free cached foreign LOV
+ *
+ * \param[in] lo        object
+ */
+void lod_free_foreign_lov(struct lod_object *lo)
+{
+       if (lo->ldo_foreign_lov != NULL)
+               OBD_FREE_LARGE(lo->ldo_foreign_lov, lo->ldo_foreign_lov_size);
+       lo->ldo_foreign_lov = NULL;
+       lo->ldo_foreign_lov_size = 0;
+       lo->ldo_is_foreign = 0;
+}
+
+/**
+ *
  * Alloc cached foreign LMV
  *
  * \param[in] lo        object
@@ -2314,12 +2350,27 @@ int lod_alloc_foreign_lmv(struct lod_object *lo, size_t size)
        if (lo->ldo_foreign_lmv == NULL)
                return -ENOMEM;
        lo->ldo_foreign_lmv_size = size;
-       lo->ldo_dir_is_foreign = 1;
+       lo->ldo_is_foreign = 1;
 
        return 0;
 }
 
 /**
+ *
+ * Free cached foreign LMV
+ *
+ * \param[in] lo        object
+ */
+void lod_free_foreign_lmv(struct lod_object *lo)
+{
+       if (lo->ldo_foreign_lmv != NULL)
+               OBD_FREE_LARGE(lo->ldo_foreign_lmv, lo->ldo_foreign_lmv_size);
+       lo->ldo_foreign_lmv = NULL;
+       lo->ldo_foreign_lmv_size = 0;
+       lo->ldo_is_foreign = 0;
+}
+
+/**
  * Declare create striped md object.
  *
  * The function declares intention to create a striped directory. This is a
@@ -2345,19 +2396,22 @@ static int lod_declare_xattr_set_lmv(const struct lu_env *env,
                                     struct dt_object_format *dof,
                                     struct thandle *th)
 {
-       struct lod_object       *lo = lod_dt_obj(dt);
-       struct lmv_user_md_v1   *lum = lum_buf->lb_buf;
-       int                     rc;
-       ENTRY;
+       struct lod_object *lo = lod_dt_obj(dt);
+       struct lmv_user_md_v1 *lum = lum_buf->lb_buf;
+       int rc;
 
+       ENTRY;
        LASSERT(lum != NULL);
 
-       CDEBUG(D_INFO, "lum magic = %x count = %u offset = %d\n",
-              le32_to_cpu(lum->lum_magic), le32_to_cpu(lum->lum_stripe_count),
-              (int)le32_to_cpu(lum->lum_stripe_offset));
+       CDEBUG(D_INFO,
+              "lum magic=%x hash=%x count=%u offset=%d inherit=%u rr=%u\n",
+              le32_to_cpu(lum->lum_magic), le32_to_cpu(lum->lum_hash_type),
+              le32_to_cpu(lum->lum_stripe_count),
+              (int)le32_to_cpu(lum->lum_stripe_offset),
+              lum->lum_max_inherit, lum->lum_max_inherit_rr);
 
        if (lo->ldo_dir_stripe_count == 0) {
-               if (lo->ldo_dir_is_foreign) {
+               if (lo->ldo_is_foreign) {
                        rc = lod_alloc_foreign_lmv(lo, lum_buf->lb_len);
                        if (rc != 0)
                                GOTO(out, rc);
@@ -2385,7 +2439,7 @@ out:
  *
  * \param[in] env      execution environment
  * \param[in] dt       target object
- * \param[in] buf      LMV buf which contains source stripe fids
+ * \param[in] lmv_buf  LMV buf which contains source stripe FIDs
  * \param[in] fl       set or replace
  * \param[in] th       transaction handle
  *
@@ -2394,14 +2448,14 @@ out:
  */
 static int lod_dir_layout_set(const struct lu_env *env,
                              struct dt_object *dt,
-                             const struct lu_buf *buf,
+                             const struct lu_buf *lmv_buf,
                              int fl,
                              struct thandle *th)
 {
        struct dt_object *next = dt_object_child(dt);
        struct lod_object *lo = lod_dt_obj(dt);
        struct lod_device *lod = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
-       struct lmv_mds_md_v1 *lmv = buf->lb_buf;
+       struct lmv_mds_md_v1 *lmv = lmv_buf->lb_buf;
        struct lmv_mds_md_v1 *slave_lmv;
        struct lu_buf slave_buf;
        int i;
@@ -2421,7 +2475,7 @@ static int lod_dir_layout_set(const struct lu_env *env,
 
        LMV_DEBUG(D_INFO, lmv, "set");
 
-       rc = lod_sub_xattr_set(env, next, buf, XATTR_NAME_LMV, fl, th);
+       rc = lod_sub_xattr_set(env, next, lmv_buf, XATTR_NAME_LMV, fl, th);
        if (rc)
                RETURN(rc);
 
@@ -2658,7 +2712,9 @@ __u16 lod_comp_entry_stripe_count(struct lod_object *lo,
                return lod->lod_ost_count;
        else
                return lod_get_stripe_count(lod, lo, comp_idx,
-                                           entry->llc_stripe_count, false);
+                                           entry->llc_stripe_count,
+                                           entry->llc_pattern &
+                                           LOV_PATTERN_OVERSTRIPING);
 }
 
 static int lod_comp_md_size(struct lod_object *lo, bool is_dir)
@@ -3415,7 +3471,7 @@ static int lod_declare_layout_merge(const struct lu_env *env,
        if ((le16_to_cpu(lcm->lcm_flags) & LCM_FL_FLR_MASK) == LCM_FL_NONE)
                lcm->lcm_flags = cpu_to_le32(LCM_FL_RDONLY);
 
-       rc = lod_striping_reload(env, lo, buf);
+       rc = lod_striping_reload(env, lo, buf, 0);
        if (rc)
                GOTO(out, rc);
 
@@ -3458,7 +3514,7 @@ static int lod_declare_layout_split(const struct lu_env *env,
        int rc;
        ENTRY;
 
-       rc = lod_striping_reload(env, lo, mbuf);
+       rc = lod_striping_reload(env, lo, mbuf, LVF_ALL_STALE);
        if (rc)
                RETURN(rc);
 
@@ -3897,7 +3953,7 @@ static int lod_xattr_set_lov_on_dir(const struct lu_env *env,
                v3 = buf->lb_buf;
                if (v3->lmm_pool_name[0] != '\0')
                        pool_name = v3->lmm_pool_name;
-               /* fall through */
+               fallthrough;
        case LOV_USER_MAGIC_V1:
                /* if { size, offset, count } = { 0, -1, 0 } and no pool
                 * (i.e. all default values specified) then delete default
@@ -3948,6 +4004,192 @@ static int lod_xattr_set_lov_on_dir(const struct lu_env *env,
        RETURN(rc);
 }
 
+static int lod_get_default_lov_striping(const struct lu_env *env,
+                                      struct lod_object *lo,
+                                      struct lod_default_striping *lds,
+                                      struct dt_allocation_hint *ah);
+
+/**
+ * Helper function to convert compound layout to compound layout with
+ * pool
+ *
+ * Copy lcm_entries array of \a src to \a tgt. Replace lov_user_md_v1
+ * components of \a src with lov_user_md_v3 using \a pool.
+ *
+ * \param[in] src      source layout
+ * \param[in] pool     pool to use in \a tgt
+ * \param[out] tgt     target layout
+ */
+static void embed_pool_to_comp_v1(const struct lov_comp_md_v1 *src,
+                                 const char *pool,
+                                 struct lov_comp_md_v1 *tgt)
+{
+       size_t shift;
+       struct lov_user_md_v1 *lum;
+       struct lov_user_md_v3 *lum3;
+       struct lov_comp_md_entry_v1 *entry;
+       int i;
+       __u32 offset;
+
+       entry = tgt->lcm_entries;
+       shift = 0;
+       for (i = 0; i < le16_to_cpu(src->lcm_entry_count); i++, entry++) {
+               *entry = src->lcm_entries[i];
+               offset = le32_to_cpu(src->lcm_entries[i].lcme_offset);
+               entry->lcme_offset = cpu_to_le32(offset + shift);
+
+               lum = (struct lov_user_md_v1 *)((char *)src + offset);
+               lum3 = (struct lov_user_md_v3 *)((char *)tgt + offset + shift);
+               *(struct lov_user_md_v1 *)lum3 = *lum;
+               if (lum->lmm_pattern == cpu_to_le32(LOV_PATTERN_MDT)) {
+                       lum3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V1);
+               } else {
+                       lum3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
+                       entry->lcme_size = cpu_to_le32(sizeof(*lum3));
+                       strlcpy(lum3->lmm_pool_name, pool,
+                               sizeof(lum3->lmm_pool_name));
+                       shift += sizeof(*lum3) - sizeof(*lum);
+               }
+       }
+}
+
+/**
+ * Set default striping on a directory.
+ *
+ * Sets specified striping on a directory object unless it matches the default
+ * striping (LOVEA_DELETE_VALUES() macro). In the latter case remove existing
+ * EA. This striping will be used when regular file is being created in this
+ * directory.
+ * If current default striping includes a pool but specifed striping
+ * does not - retain the pool if it exists.
+ *
+ * \param[in] env      execution environment
+ * \param[in] dt       the striped object
+ * \param[in] buf      buffer with the striping
+ * \param[in] name     name of EA
+ * \param[in] fl       xattr flag (see OSD API description)
+ * \param[in] th       transaction handle
+ *
+ * \retval             0 on success
+ * \retval             negative if failed
+ */
+static int lod_xattr_set_default_lov_on_dir(const struct lu_env *env,
+                                           struct dt_object *dt,
+                                           const struct lu_buf *buf,
+                                           const char *name, int fl,
+                                           struct thandle *th)
+{
+       struct lod_default_striping     *lds = lod_lds_buf_get(env);
+       struct lov_user_md_v1           *v1 = buf->lb_buf;
+       char                             pool[LOV_MAXPOOLNAME + 1];
+       bool                             is_del;
+       int                              rc;
+
+       ENTRY;
+
+       /* get existing striping config */
+       rc = lod_get_default_lov_striping(env, lod_dt_obj(dt), lds, NULL);
+       if (rc)
+               RETURN(rc);
+
+       memset(pool, 0, sizeof(pool));
+       if (lds->lds_def_striping_set == 1)
+               lod_layout_get_pool(lds->lds_def_comp_entries,
+                                   lds->lds_def_comp_cnt, pool,
+                                   sizeof(pool));
+
+       is_del = LOVEA_DELETE_VALUES(v1->lmm_stripe_size,
+                                    v1->lmm_stripe_count,
+                                    v1->lmm_stripe_offset,
+                                    NULL);
+
+       /* Retain the pool name if it is not given */
+       if (v1->lmm_magic == LOV_USER_MAGIC_V1 && pool[0] != '\0' &&
+           !is_del) {
+               struct lod_thread_info *info = lod_env_info(env);
+               struct lov_user_md_v3 *v3  = info->lti_ea_store;
+
+               memset(v3, 0, sizeof(*v3));
+               v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
+               v3->lmm_pattern = cpu_to_le32(v1->lmm_pattern);
+               v3->lmm_stripe_count = cpu_to_le32(v1->lmm_stripe_count);
+               v3->lmm_stripe_offset = cpu_to_le32(v1->lmm_stripe_offset);
+               v3->lmm_stripe_size = cpu_to_le32(v1->lmm_stripe_size);
+
+               strlcpy(v3->lmm_pool_name, pool, sizeof(v3->lmm_pool_name));
+
+               info->lti_buf.lb_buf = v3;
+               info->lti_buf.lb_len = sizeof(*v3);
+               rc = lod_xattr_set_lov_on_dir(env, dt, &info->lti_buf,
+                                             name, fl, th);
+       } else if (v1->lmm_magic == LOV_USER_MAGIC_COMP_V1 &&
+                  pool[0] != '\0' && !is_del) {
+               /*
+                * try to retain the pool from default layout if the
+                * specified component layout does not provide pool
+                * info explicitly
+                */
+               struct lod_thread_info *info = lod_env_info(env);
+               struct lov_comp_md_v1 *comp_v1 = buf->lb_buf;
+               struct lov_comp_md_v1 *comp_v1p;
+               struct lov_user_md_v1 *lum;
+               int entry_count;
+               int i;
+               __u32 offset;
+               struct lov_comp_md_entry_v1 *entry;
+               int size;
+
+               entry_count = le16_to_cpu(comp_v1->lcm_entry_count);
+               size = sizeof(*comp_v1) +
+                       entry_count * sizeof(comp_v1->lcm_entries[0]);
+               entry = comp_v1->lcm_entries;
+               for (i = 0; i < entry_count; i++, entry++) {
+                       offset = le32_to_cpu(entry->lcme_offset);
+                       lum = (struct lov_user_md_v1 *)((char *)comp_v1 +
+                                                       offset);
+                       if (le32_to_cpu(lum->lmm_magic) != LOV_USER_MAGIC_V1)
+                               /* the i-th component includes pool info */
+                               break;
+                       if (lum->lmm_pattern == cpu_to_le32(LOV_PATTERN_MDT))
+                               size += sizeof(struct lov_user_md_v1);
+                       else
+                               size += sizeof(struct lov_user_md_v3);
+               }
+
+               if (i == entry_count) {
+                       /*
+                        * re-compose the layout to include the pool for
+                        * each component
+                        */
+                       if (info->lti_ea_store_size < size)
+                               rc = lod_ea_store_resize(info, size);
+
+                       if (rc == 0) {
+                               comp_v1p = info->lti_ea_store;
+                               *comp_v1p = *comp_v1;
+                               comp_v1p->lcm_size = cpu_to_le32(size);
+                               embed_pool_to_comp_v1(comp_v1, pool, comp_v1p);
+
+                               info->lti_buf.lb_buf = comp_v1p;
+                               info->lti_buf.lb_len = size;
+                               rc = lod_xattr_set_lov_on_dir(env, dt,
+                                                             &info->lti_buf,
+                                                             name, fl, th);
+                       }
+               } else {
+                       rc = lod_xattr_set_lov_on_dir(env, dt, buf, name, fl,
+                                                     th);
+               }
+       } else {
+               rc = lod_xattr_set_lov_on_dir(env, dt, buf, name, fl, th);
+       }
+
+       if (lds->lds_def_striping_set == 1 && lds->lds_def_comp_entries != NULL)
+               lod_free_def_comp_entries(lds);
+
+       RETURN(rc);
+}
+
 /**
  * Set default striping on a directory object.
  *
@@ -4043,7 +4285,7 @@ static int lod_xattr_set_lmv(const struct lu_env *env, struct dt_object *dt,
        /* The stripes are supposed to be allocated in declare phase,
         * if there are no stripes being allocated, it will skip */
        if (lo->ldo_dir_stripe_count == 0) {
-               if (lo->ldo_dir_is_foreign) {
+               if (lo->ldo_is_foreign) {
                        rc = lod_sub_xattr_set(env, dt_object_child(dt), buf,
                                               XATTR_NAME_LMV, fl, th);
                        if (rc != 0)
@@ -4275,7 +4517,7 @@ static int lod_dir_striping_create_internal(const struct lu_env *env,
                                                               lmu, dof, th);
                        }
                } else {
-                       if (lo->ldo_dir_is_foreign) {
+                       if (lo->ldo_is_foreign) {
                                LASSERT(lo->ldo_foreign_lmv != NULL &&
                                        lo->ldo_foreign_lmv_size > 0);
                                info->lti_buf.lb_buf = lo->ldo_foreign_lmv;
@@ -4362,6 +4604,12 @@ static int lod_dir_striping_create_internal(const struct lu_env *env,
                        RETURN(rc);
        }
 
+       /* ldo_def_striping is not allocated, clear after use, in case directory
+        * layout is changed later.
+        */
+       if (!declare)
+               lo->ldo_def_striping = NULL;
+
        RETURN(0);
 }
 
@@ -4748,10 +4996,6 @@ out:
 }
 
 
-static int lod_get_default_lov_striping(const struct lu_env *env,
-                                       struct lod_object *lo,
-                                       struct lod_default_striping *lds,
-                                       struct dt_allocation_hint *ah);
 /**
  * Implementation of dt_object_operations::do_xattr_set.
  *
@@ -4799,59 +5043,8 @@ static int lod_xattr_set(const struct lu_env *env,
                RETURN(rc);
        } else if (S_ISDIR(dt->do_lu.lo_header->loh_attr) &&
                   strcmp(name, XATTR_NAME_LOV) == 0) {
-               struct lod_default_striping *lds = lod_lds_buf_get(env);
-               struct lov_user_md_v1 *v1 = buf->lb_buf;
-               char pool[LOV_MAXPOOLNAME + 1];
-               bool is_del;
-
-               /* get existing striping config */
-               rc = lod_get_default_lov_striping(env, lod_dt_obj(dt), lds,
-                                                 NULL);
-               if (rc)
-                       RETURN(rc);
-
-               memset(pool, 0, sizeof(pool));
-               if (lds->lds_def_striping_set == 1)
-                       lod_layout_get_pool(lds->lds_def_comp_entries,
-                                           lds->lds_def_comp_cnt, pool,
-                                           sizeof(pool));
-
-               is_del = LOVEA_DELETE_VALUES(v1->lmm_stripe_size,
-                                            v1->lmm_stripe_count,
-                                            v1->lmm_stripe_offset,
-                                            NULL);
-
-               /* Retain the pool name if it is not given */
-               if (v1->lmm_magic == LOV_USER_MAGIC_V1 && pool[0] != '\0' &&
-                       !is_del) {
-                       struct lod_thread_info *info = lod_env_info(env);
-                       struct lov_user_md_v3 *v3  = info->lti_ea_store;
-
-                       memset(v3, 0, sizeof(*v3));
-                       v3->lmm_magic = cpu_to_le32(LOV_USER_MAGIC_V3);
-                       v3->lmm_pattern = cpu_to_le32(v1->lmm_pattern);
-                       v3->lmm_stripe_count =
-                                       cpu_to_le32(v1->lmm_stripe_count);
-                       v3->lmm_stripe_offset =
-                                       cpu_to_le32(v1->lmm_stripe_offset);
-                       v3->lmm_stripe_size = cpu_to_le32(v1->lmm_stripe_size);
-
-                       strlcpy(v3->lmm_pool_name, pool,
-                               sizeof(v3->lmm_pool_name));
-
-                       info->lti_buf.lb_buf = v3;
-                       info->lti_buf.lb_len = sizeof(*v3);
-                       rc = lod_xattr_set_lov_on_dir(env, dt, &info->lti_buf,
-                                                     name, fl, th);
-               } else {
-                       rc = lod_xattr_set_lov_on_dir(env, dt, buf, name,
-                                                     fl, th);
-               }
-
-               if (lds->lds_def_striping_set == 1 &&
-                   lds->lds_def_comp_entries != NULL)
-                       lod_free_def_comp_entries(lds);
-
+               rc = lod_xattr_set_default_lov_on_dir(env, dt, buf, name, fl,
+                                                     th);
                RETURN(rc);
        } else if (S_ISDIR(dt->do_lu.lo_header->loh_attr) &&
                   strcmp(name, XATTR_NAME_DEFAULT_LMV) == 0) {
@@ -4877,7 +5070,7 @@ static int lod_xattr_set(const struct lu_env *env,
                        if (rc)
                                RETURN(rc);
 
-                       rc = lod_striping_reload(env, lo, buf);
+                       rc = lod_striping_reload(env, lo, buf, LVF_ALL_STALE);
                        if (rc)
                                RETURN(rc);
 
@@ -5110,19 +5303,27 @@ skip:
 static int lod_get_default_lov_striping(const struct lu_env *env,
                                        struct lod_object *lo,
                                        struct lod_default_striping *lds,
-                                       struct dt_allocation_hint *ah)
+                                       struct dt_allocation_hint *dah)
 {
        struct lod_thread_info *info = lod_env_info(env);
        struct lov_user_md_v1 *v1 = NULL;
        struct lov_user_md_v3 *v3 = NULL;
-       struct lov_comp_md_v1 *comp_v1 = NULL;
-       __u16 comp_cnt;
-       __u16 mirror_cnt;
-       bool composite;
+       struct lov_comp_md_v1 *lcm = NULL;
+       __u32 magic;
+       int append_stripe_count = dah != NULL ? dah->dah_append_stripe_count : 0;
+       const char *append_pool = (dah != NULL &&
+                                  dah->dah_append_pool != NULL &&
+                                  dah->dah_append_pool[0] != '\0') ?
+                                 dah->dah_append_pool : NULL;
+       __u16 entry_count = 1;
+       __u16 mirror_count = 0;
+       bool want_composite = false;
        int rc, i, j;
 
        ENTRY;
 
+       lds->lds_def_striping_set = 0;
+
        rc = lod_get_lov_ea(env, lo);
        if (rc < 0)
                RETURN(rc);
@@ -5130,116 +5331,133 @@ static int lod_get_default_lov_striping(const struct lu_env *env,
        if (rc < (typeof(rc))sizeof(struct lov_user_md))
                RETURN(0);
 
-       v1 = info->lti_ea_store;
-       if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V1)) {
-               lustre_swab_lov_user_md_v1(v1);
-       } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_V3)) {
-               v3 = (struct lov_user_md_v3 *)v1;
-               lustre_swab_lov_user_md_v3(v3);
-       } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_SPECIFIC)) {
-               v3 = (struct lov_user_md_v3 *)v1;
+       magic = *(__u32 *)info->lti_ea_store;
+       if (magic == __swab32(LOV_USER_MAGIC_V1)) {
+               lustre_swab_lov_user_md_v1(info->lti_ea_store);
+       } else if (magic == __swab32(LOV_USER_MAGIC_V3)) {
+               lustre_swab_lov_user_md_v3(info->lti_ea_store);
+       } else if (magic == __swab32(LOV_USER_MAGIC_SPECIFIC)) {
+               v3 = (struct lov_user_md_v3 *)info->lti_ea_store;
                lustre_swab_lov_user_md_v3(v3);
                lustre_swab_lov_user_md_objects(v3->lmm_objects,
                                                v3->lmm_stripe_count);
-       } else if (v1->lmm_magic == __swab32(LOV_USER_MAGIC_COMP_V1) ||
-                  v1->lmm_magic == __swab32(LOV_USER_MAGIC_SEL)) {
-               comp_v1 = (struct lov_comp_md_v1 *)v1;
-               lustre_swab_lov_comp_md_v1(comp_v1);
+       } else if (magic == __swab32(LOV_USER_MAGIC_COMP_V1) ||
+                  magic == __swab32(LOV_USER_MAGIC_SEL)) {
+               lustre_swab_lov_comp_md_v1(info->lti_ea_store);
        }
 
-       if (v1->lmm_magic != LOV_MAGIC_V3 && v1->lmm_magic != LOV_MAGIC_V1 &&
-           v1->lmm_magic != LOV_MAGIC_COMP_V1 &&
-           v1->lmm_magic != LOV_MAGIC_SEL &&
-           v1->lmm_magic != LOV_USER_MAGIC_SPECIFIC)
+       switch (magic) {
+       case LOV_MAGIC_V1:
+       case LOV_MAGIC_V3:
+       case LOV_USER_MAGIC_SPECIFIC:
+               v1 = info->lti_ea_store;
+               break;
+       case LOV_MAGIC_COMP_V1:
+       case LOV_MAGIC_SEL:
+               lcm = info->lti_ea_store;
+               entry_count = lcm->lcm_entry_count;
+               if (entry_count == 0)
+                       RETURN(-EINVAL);
+
+               mirror_count = lcm->lcm_mirror_count + 1;
+               want_composite = true;
+               break;
+       default:
                RETURN(-ENOTSUPP);
+       }
 
-       if ((v1->lmm_magic == LOV_MAGIC_COMP_V1 ||
-           v1->lmm_magic == LOV_MAGIC_SEL) &&
-            !(ah && ah->dah_append_stripes)) {
-               comp_v1 = (struct lov_comp_md_v1 *)v1;
-               comp_cnt = comp_v1->lcm_entry_count;
-               if (comp_cnt == 0)
-                       RETURN(-EINVAL);
-               mirror_cnt = comp_v1->lcm_mirror_count + 1;
-               composite = true;
-       } else {
-               comp_cnt = 1;
-               mirror_cnt = 0;
-               composite = false;
+       if (append_stripe_count != 0 || append_pool != NULL) {
+               entry_count = 1;
+               mirror_count = 0;
+               want_composite = false;
        }
 
        /* realloc default comp entries if necessary */
-       rc = lod_def_striping_comp_resize(lds, comp_cnt);
+       rc = lod_def_striping_comp_resize(lds, entry_count);
        if (rc < 0)
                RETURN(rc);
 
-       lds->lds_def_comp_cnt = comp_cnt;
-       lds->lds_def_striping_is_composite = composite;
-       lds->lds_def_mirror_cnt = mirror_cnt;
+       lds->lds_def_comp_cnt = entry_count;
+       lds->lds_def_striping_is_composite = want_composite;
+       lds->lds_def_mirror_cnt = mirror_count;
 
-       for (i = 0; i < comp_cnt; i++) {
-               struct lod_layout_component *lod_comp;
-               char *pool;
+       for (i = 0; i < entry_count; i++) {
+               struct lod_layout_component *llc = &lds->lds_def_comp_entries[i];
+               const char *pool;
 
-               lod_comp = &lds->lds_def_comp_entries[i];
                /*
-                * reset lod_comp values, llc_stripes is always NULL in
-                * the default striping template, llc_pool will be reset
-                * later below.
+                * reset llc values, llc_stripes is always NULL in the
+                * default striping template, llc_pool will be reset
+                * later below using lod_set_pool().
+                *
+                * XXX At this point llc_pool may point to valid (!)
+                * kmalloced strings from previous RPCs.
                 */
-               memset(lod_comp, 0, offsetof(typeof(*lod_comp), llc_pool));
-
-               if (composite) {
-                       v1 = (struct lov_user_md *)((char *)comp_v1 +
-                                       comp_v1->lcm_entries[i].lcme_offset);
-                       lod_comp->llc_extent =
-                                       comp_v1->lcm_entries[i].lcme_extent;
-                       /* We only inherit certain flags from the layout */
-                       lod_comp->llc_flags =
-                                       comp_v1->lcm_entries[i].lcme_flags &
+               memset(llc, 0, offsetof(typeof(*llc), llc_pool));
+
+               if (lcm != NULL) {
+                       v1 = (struct lov_user_md *)((char *)lcm +
+                                                   lcm->lcm_entries[i].lcme_offset);
+
+                       if (want_composite) {
+                               llc->llc_extent = lcm->lcm_entries[i].lcme_extent;
+                               /* We only inherit certain flags from the layout */
+                               llc->llc_flags = lcm->lcm_entries[i].lcme_flags &
                                        LCME_TEMPLATE_FLAGS;
+                       }
                }
 
+               CDEBUG(D_LAYOUT, DFID" magic = %#08x, pattern = %#x, stripe_count = %hu, stripe_size = %u, stripe_offset = %hu, append_pool = '%s', append_stripe_count = %d\n",
+                      PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
+                      v1->lmm_magic,
+                      v1->lmm_pattern,
+                      v1->lmm_stripe_count,
+                      v1->lmm_stripe_size,
+                      v1->lmm_stripe_offset,
+                      append_pool ?: "",
+                      append_stripe_count);
+
                if (!lov_pattern_supported(v1->lmm_pattern) &&
                    !(v1->lmm_pattern & LOV_PATTERN_F_RELEASED)) {
                        lod_free_def_comp_entries(lds);
                        RETURN(-EINVAL);
                }
 
-               CDEBUG(D_LAYOUT, DFID" stripe_count=%d stripe_size=%d stripe_offset=%d append_stripes=%d\n",
-                      PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
-                      (int)v1->lmm_stripe_count, (int)v1->lmm_stripe_size,
-                      (int)v1->lmm_stripe_offset,
-                      ah ? ah->dah_append_stripes : 0);
+               llc->llc_stripe_count = v1->lmm_stripe_count;
+               llc->llc_stripe_size = v1->lmm_stripe_size;
+               llc->llc_stripe_offset = v1->lmm_stripe_offset;
+               llc->llc_pattern = v1->lmm_pattern;
 
-               if (ah && ah->dah_append_stripes)
-                       lod_comp->llc_stripe_count = ah->dah_append_stripes;
-               else
-                       lod_comp->llc_stripe_count = v1->lmm_stripe_count;
-               lod_comp->llc_stripe_size = v1->lmm_stripe_size;
-               lod_comp->llc_stripe_offset = v1->lmm_stripe_offset;
-               lod_comp->llc_pattern = v1->lmm_pattern;
+               if (append_stripe_count != 0 || append_pool != NULL)
+                       llc->llc_pattern = LOV_PATTERN_RAID0;
+
+               if (append_stripe_count != 0)
+                       llc->llc_stripe_count = append_stripe_count;
 
                pool = NULL;
-               if (ah && ah->dah_append_pool && ah->dah_append_pool[0]) {
-                       pool = ah->dah_append_pool;
+               if (append_pool != NULL) {
+                       pool = append_pool;
                } else if (v1->lmm_magic == LOV_USER_MAGIC_V3) {
                        /* XXX: sanity check here */
-                       v3 = (struct lov_user_md_v3 *) v1;
+                       v3 = (struct lov_user_md_v3 *)v1;
                        if (v3->lmm_pool_name[0] != '\0')
                                pool = v3->lmm_pool_name;
                }
-               lod_set_def_pool(lds, i, pool);
-               if (v1->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
+
+               lod_set_pool(&llc->llc_pool, pool);
+
+               if (append_stripe_count != 0 || append_pool != NULL) {
+                       /* Ignore specific striping for append. */
+               } else if (v1->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
                        v3 = (struct lov_user_md_v3 *)v1;
-                       rc = lod_comp_copy_ost_lists(lod_comp, v3);
+                       rc = lod_comp_copy_ost_lists(llc, v3);
                        if (rc)
                                RETURN(rc);
-               } else if (lod_comp->llc_ostlist.op_array &&
-                          lod_comp->llc_ostlist.op_count) {
-                       for (j = 0; j < lod_comp->llc_ostlist.op_count; j++)
-                               lod_comp->llc_ostlist.op_array[j] = -1;
-                       lod_comp->llc_ostlist.op_count = 0;
+               } else if (llc->llc_ostlist.op_array &&
+                          llc->llc_ostlist.op_count) {
+                       for (j = 0; j < llc->llc_ostlist.op_count; j++)
+                               llc->llc_ostlist.op_array[j] = -1;
+                       llc->llc_ostlist.op_count = 0;
                }
        }
 
@@ -5308,6 +5526,15 @@ static int lod_get_default_striping(const struct lu_env *env,
        int rc, rc1;
 
        rc = lod_get_default_lov_striping(env, lo, lds, NULL);
+       if (lds->lds_def_striping_set) {
+               struct lod_thread_info *info = lod_env_info(env);
+               struct lod_device *d = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+
+               rc = lod_verify_striping(env, d, lo, &info->lti_buf, false);
+               if (rc)
+                       lds->lds_def_striping_set = 0;
+       }
+
        rc1 = lod_get_default_lmv_striping(env, lo, lds);
        if (rc == 0 && rc1 < 0)
                rc = rc1;
@@ -5350,8 +5577,9 @@ static void lod_striping_from_default(struct lod_object *lo,
                        struct lod_layout_component *def_comp =
                                                &lds->lds_def_comp_entries[i];
 
-                       CDEBUG(D_LAYOUT, "Inherit from default: flags=%#x "
-                              "size=%hu nr=%u offset=%u pattern=%#x pool=%s\n",
+                       CDEBUG(D_LAYOUT,
+                              "inherit "DFID" file layout from default: flags=%#x size=%hu nr=%u offset=%u pattern=%#x pool=%s\n",
+                              PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
                               def_comp->llc_flags,
                               def_comp->llc_stripe_size,
                               def_comp->llc_stripe_count,
@@ -5400,18 +5628,19 @@ static void lod_striping_from_default(struct lod_object *lo,
                if (lo->ldo_dir_stripe_offset == -1)
                        lo->ldo_dir_stripe_offset =
                                lds->lds_dir_def_stripe_offset;
-               if (lo->ldo_dir_hash_type == 0)
+               if (lo->ldo_dir_hash_type == LMV_HASH_TYPE_UNKNOWN)
                        lo->ldo_dir_hash_type = lds->lds_dir_def_hash_type;
 
-               CDEBUG(D_LAYOUT, "striping from default dir: count:%hu, "
-                      "offset:%u, hash_type:%u\n",
+               CDEBUG(D_LAYOUT,
+                      "inherit "DFID" dir layout from default: count=%hu offset=%u hash_type=%x\n",
+                      PFID(lu_object_fid(&lo->ldo_obj.do_lu)),
                       lo->ldo_dir_stripe_count, lo->ldo_dir_stripe_offset,
                       lo->ldo_dir_hash_type);
        }
 }
 
 static inline bool lod_need_inherit_more(struct lod_object *lo, bool from_root,
-                                        char *append_pool)
+                                        const char *append_pool)
 {
        struct lod_layout_component *lod_comp;
 
@@ -5443,8 +5672,8 @@ static inline bool lod_need_inherit_more(struct lod_object *lo, bool from_root,
  * This method is used to make a decision on the striping configuration for the
  * object being created. It can be taken from the \a parent object if it exists,
  * or filesystem's default. The resulting configuration (number of stripes,
- * stripe size/offset, pool name, etc) is stored in the object itself and will
- * be used by the methods like ->doo_declare_create().
+ * stripe size/offset, pool name, hash_type, etc.) is stored in the object
+ * itself and will be used by the methods like ->doo_declare_create().
  *
  * \see dt_object_operations::do_ah_init() in the API description for details.
  */
@@ -5468,8 +5697,8 @@ static void lod_ah_init(const struct lu_env *env,
 
        LASSERT(child);
 
-       if (ah->dah_append_stripes == -1)
-               ah->dah_append_stripes =
+       if (ah->dah_append_stripe_count == -1)
+               ah->dah_append_stripe_count =
                        d->lod_ost_descs.ltd_lov_desc.ld_tgt_count;
 
        if (likely(parent)) {
@@ -5500,26 +5729,15 @@ static void lod_ah_init(const struct lu_env *env,
                 */
                if (ah->dah_eadata != NULL && ah->dah_eadata_len != 0 &&
                    le32_to_cpu(lum1->lum_magic) == LMV_MAGIC_FOREIGN) {
-                       lc->ldo_dir_is_foreign = true;
+                       lc->ldo_is_foreign = true;
                        /* keep stripe_count 0 and stripe_offset -1 */
                        CDEBUG(D_INFO, "no default striping for foreign dir\n");
                        RETURN_EXIT;
                }
 
-               /*
-                * If parent object is not root directory,
-                * then get default striping from parent object.
-                */
-               if (likely(lp != NULL)) {
+               if (likely(lp != NULL))
                        lod_get_default_striping(env, lp, lds);
 
-                       /* inherit default striping except ROOT */
-                       if ((lds->lds_def_striping_set ||
-                            lds->lds_dir_def_striping_set) &&
-                           !fid_is_root(lod_object_fid(lp)))
-                               lc->ldo_def_striping = lds;
-               }
-
                /* It should always honour the specified stripes */
                /* Note: old client (< 2.7)might also do lfs mkdir, whose EA
                 * will have old magic. In this case, we should ignore the
@@ -5535,11 +5753,61 @@ static void lod_ah_init(const struct lu_env *env,
                        lc->ldo_dir_hash_type =
                                le32_to_cpu(lum1->lum_hash_type);
                        CDEBUG(D_INFO,
-                              "set dirstripe: count %hu, offset %d, hash %u\n",
+                              "set dirstripe: count %hu, offset %d, hash %x\n",
                                lc->ldo_dir_stripe_count,
                                (int)lc->ldo_dir_stripe_offset,
                                lc->ldo_dir_hash_type);
+
+                       if (d->lod_mdt_descs.ltd_lmv_desc.ld_active_tgt_count &&
+                           lc->ldo_dir_stripe_count < 2 &&
+                           lum1->lum_max_inherit != LMV_INHERIT_NONE) {
+                               /* when filesystem-wide default LMV is set, dirs
+                                * will be created on MDT by space usage, but if
+                                * dir is created with "lfs mkdir -c 1 ...", its
+                                * subdirs should be kept on the same MDT. To
+                                * guarantee this, set default LMV for such dir.
+                                */
+                               lds->lds_dir_def_stripe_count =
+                                       le32_to_cpu(lum1->lum_stripe_count);
+                               /* if "-1" stripe offset is set, save current
+                                * MDT index in default LMV.
+                                */
+                               if (le32_to_cpu(lum1->lum_stripe_offset) ==
+                                   LMV_OFFSET_DEFAULT)
+                                       lds->lds_dir_def_stripe_offset =
+                                               lod2lu_dev(d)->ld_site->ld_seq_site->ss_node_id;
+                               else
+                                       lds->lds_dir_def_stripe_offset =
+                                               le32_to_cpu(lum1->lum_stripe_offset);
+                               lds->lds_dir_def_hash_type =
+                                       le32_to_cpu(lum1->lum_hash_type);
+                               lds->lds_dir_def_max_inherit =
+                                       lum1->lum_max_inherit;
+                               /* it will be decreased by 1 later in setting */
+                               if (lum1->lum_max_inherit >= LMV_INHERIT_END &&
+                                   lum1->lum_max_inherit < LMV_INHERIT_MAX)
+                                       lds->lds_dir_def_max_inherit++;
+                               lds->lds_dir_def_max_inherit_rr =
+                                       lum1->lum_max_inherit_rr;
+                               lds->lds_dir_def_striping_set = 1;
+                               /* don't inherit LOV from ROOT */
+                               if (lds->lds_def_striping_set &&
+                                   fid_is_root(lod_object_fid(lp)))
+                                       lds->lds_def_striping_set = 0;
+                               lc->ldo_def_striping = lds;
+                       } else if (lds->lds_def_striping_set &&
+                                  !fid_is_root(lod_object_fid(lp))) {
+                               /* don't inherit default LMV for "lfs mkdir" */
+                               lds->lds_dir_def_striping_set = 0;
+                               lc->ldo_def_striping = lds;
+                       }
                } else {
+                       /* inherit default striping except ROOT */
+                       if ((lds->lds_def_striping_set ||
+                            lds->lds_dir_def_striping_set) &&
+                           !fid_is_root(lod_object_fid(lp)))
+                               lc->ldo_def_striping = lds;
+
                        /* transfer defaults LMV to new directory */
                        lod_striping_from_default(lc, lds, child_mode);
 
@@ -5548,6 +5816,13 @@ static void lod_ah_init(const struct lu_env *env,
                                lc->ldo_dir_stripe_count = 0;
                }
 
+               /* shrink the stripe count to max_mdt_stripecount if it is -1
+                * and max_mdt_stripecount is not 0
+                */
+               if (lc->ldo_dir_stripe_count == (__u16)(-1) &&
+                   d->lod_max_mdt_stripecount)
+                       lc->ldo_dir_stripe_count = d->lod_max_mdt_stripecount;
+
                /* shrink the stripe_count to the avaible MDT count */
                if (lc->ldo_dir_stripe_count > d->lod_remote_mdt_count + 1 &&
                    !OBD_FAIL_CHECK(OBD_FAIL_LARGE_STRIPE)) {
@@ -5556,11 +5831,12 @@ static void lod_ah_init(const struct lu_env *env,
                                lc->ldo_dir_stripe_count = 0;
                }
 
-               if (!(lc->ldo_dir_hash_type & LMV_HASH_TYPE_MASK))
-                       lc->ldo_dir_hash_type |=
+               if (!lmv_is_known_hash_type(lc->ldo_dir_hash_type))
+                       lc->ldo_dir_hash_type =
+                               (lc->ldo_dir_hash_type & LMV_HASH_FLAG_KNOWN) |
                                d->lod_mdt_descs.ltd_lmv_desc.ld_pattern;
 
-               CDEBUG(D_INFO, "final dir stripe [%hu %d %u]\n",
+               CDEBUG(D_INFO, "final dir stripe_count=%hu offset=%d hash=%u\n",
                       lc->ldo_dir_stripe_count,
                       (int)lc->ldo_dir_stripe_offset, lc->ldo_dir_hash_type);
 
@@ -5581,8 +5857,12 @@ static void lod_ah_init(const struct lu_env *env,
         */
        if (likely(lp != NULL)) {
                rc = lod_get_default_lov_striping(env, lp, lds, ah);
-               if (rc == 0)
-                       lod_striping_from_default(lc, lds, child_mode);
+               if (rc == 0 && lds->lds_def_striping_set) {
+                       rc = lod_verify_striping(env, d, lp, &info->lti_buf,
+                                                false);
+                       if (rc == 0)
+                               lod_striping_from_default(lc, lds, child_mode);
+               }
        }
 
        /* Initialize lod_device::lod_md_root object reference */
@@ -5612,8 +5892,14 @@ static void lod_ah_init(const struct lu_env *env,
            lod_need_inherit_more(lc, true, ah->dah_append_pool)) {
                rc = lod_get_default_lov_striping(env, d->lod_md_root, lds,
                                                  ah);
+               if (rc || !lds->lds_def_striping_set)
+                       goto out;
+
+               rc = lod_verify_striping(env, d, d->lod_md_root, &info->lti_buf,
+                                        false);
                if (rc)
                        goto out;
+
                if (lc->ldo_comp_cnt == 0) {
                        lod_striping_from_default(lc, lds, child_mode);
                } else if (!lds->lds_def_striping_is_composite) {
@@ -5656,7 +5942,8 @@ out:
                LASSERT(!lc->ldo_is_composite);
                lod_comp = &lc->ldo_comp_entries[0];
                desc = &d->lod_ost_descs.ltd_lov_desc;
-               lod_adjust_stripe_info(lod_comp, desc, ah->dah_append_stripes);
+               lod_adjust_stripe_info(lod_comp, desc,
+                                      ah->dah_append_stripe_count);
                if (ah->dah_append_pool && ah->dah_append_pool[0])
                        lod_obj_set_pool(lc, 0, ah->dah_append_pool);
        }
@@ -8140,6 +8427,7 @@ static int lod_dir_declare_layout_attach(const struct lu_env *env,
                OBD_FREE_PTR_ARRAY(lo->ldo_stripe,
                                   lo->ldo_dir_stripes_allocated);
        lo->ldo_stripe = stripes;
+       lo->ldo_is_foreign = 0;
        lo->ldo_dir_migrate_offset = lo->ldo_dir_stripe_count;
        lo->ldo_dir_migrate_hash = le32_to_cpu(lmv->lmv_hash_type);
        lo->ldo_dir_stripe_count += stripe_count;
@@ -8384,6 +8672,7 @@ static int lod_dir_declare_layout_split(const struct lu_env *env,
        OBD_FREE(lo->ldo_stripe,
                 sizeof(*stripes) * lo->ldo_dir_stripes_allocated);
        lo->ldo_stripe = stripes;
+       lo->ldo_is_foreign = 0;
        lo->ldo_dir_striped = 1;
        lo->ldo_dir_stripe_count = rc;
        lo->ldo_dir_stripes_allocated = stripe_count;
@@ -8853,56 +9142,6 @@ static int lod_object_init(const struct lu_env *env, struct lu_object *lo,
 
 /**
  *
- * Alloc cached foreign LOV
- *
- * \param[in] lo        object
- * \param[in] size      size of foreign LOV
- *
- * \retval             0 on success
- * \retval             negative if failed
- */
-int lod_alloc_foreign_lov(struct lod_object *lo, size_t size)
-{
-       OBD_ALLOC_LARGE(lo->ldo_foreign_lov, size);
-       if (lo->ldo_foreign_lov == NULL)
-               return -ENOMEM;
-       lo->ldo_foreign_lov_size = size;
-       lo->ldo_is_foreign = 1;
-       return 0;
-}
-
-/**
- *
- * Free cached foreign LOV
- *
- * \param[in] lo        object
- */
-void lod_free_foreign_lov(struct lod_object *lo)
-{
-       if (lo->ldo_foreign_lov != NULL)
-               OBD_FREE_LARGE(lo->ldo_foreign_lov, lo->ldo_foreign_lov_size);
-       lo->ldo_foreign_lov = NULL;
-       lo->ldo_foreign_lov_size = 0;
-       lo->ldo_is_foreign = 0;
-}
-
-/**
- *
- * Free cached foreign LMV
- *
- * \param[in] lo        object
- */
-void lod_free_foreign_lmv(struct lod_object *lo)
-{
-       if (lo->ldo_foreign_lmv != NULL)
-               OBD_FREE_LARGE(lo->ldo_foreign_lmv, lo->ldo_foreign_lmv_size);
-       lo->ldo_foreign_lmv = NULL;
-       lo->ldo_foreign_lmv_size = 0;
-       lo->ldo_dir_is_foreign = 0;
-}
-
-/**
- *
  * Release resources associated with striping.
  *
  * If the object is striped (regular or directory), then release
@@ -8914,14 +9153,17 @@ void lod_free_foreign_lmv(struct lod_object *lo)
 void lod_striping_free_nolock(const struct lu_env *env, struct lod_object *lo)
 {
        struct lod_layout_component *lod_comp;
+       __u32 obj_attr = lo->ldo_obj.do_lu.lo_header->loh_attr;
        int i, j;
 
        if (unlikely(lo->ldo_is_foreign)) {
-               lod_free_foreign_lov(lo);
-               lo->ldo_comp_cached = 0;
-       } else if (unlikely(lo->ldo_dir_is_foreign)) {
-               lod_free_foreign_lmv(lo);
-               lo->ldo_dir_stripe_loaded = 0;
+               if (S_ISREG(obj_attr)) {
+                       lod_free_foreign_lov(lo);
+                       lo->ldo_comp_cached = 0;
+               } else if (S_ISDIR(obj_attr)) {
+                       lod_free_foreign_lmv(lo);
+                       lo->ldo_dir_stripe_loaded = 0;
+               }
        } else if (lo->ldo_stripe != NULL) {
                LASSERT(lo->ldo_comp_entries == NULL);
                LASSERT(lo->ldo_dir_stripes_allocated > 0);
@@ -8937,6 +9179,7 @@ void lod_striping_free_nolock(const struct lu_env *env, struct lod_object *lo)
                lo->ldo_dir_stripes_allocated = 0;
                lo->ldo_dir_stripe_loaded = 0;
                lo->ldo_dir_stripe_count = 0;
+               lo->ldo_obj.do_index_ops = NULL;
        } else if (lo->ldo_comp_entries != NULL) {
                for (i = 0; i < lo->ldo_comp_cnt; i++) {
                        /* free lod_layout_component::llc_stripe array */