Whamcloud - gitweb
LU-9846 lod: Add overstriping support 25/28425/43
authorPatrick Farrell <pfarrell@whamcloud.com>
Wed, 29 May 2019 14:42:59 +0000 (10:42 -0400)
committerOleg Drokin <green@whamcloud.com>
Sat, 1 Jun 2019 03:54:51 +0000 (03:54 +0000)
Each stripe in a shared file in Lustre corresponds to a
single LDLM extent locking domain and also to a single
object on disk (and in the OSS page cache).  LDLM locks are
extent locks, but there are still significant issues with
false sharing with multiple writers.  On-disk file systems
also have per-object performance limitations for both read
and write.

The LDLM limitation means it is best to have a single
writer per stripe, but modern OSTs can be faster than a
single client, so this restricts maximum performance unless
special methods are used (eg, Lustre lock ahead).

The on disk file system limitations mean that even if LDLM
locking is not an issue (read and not write, or lockahead),
OST performance in a shared file is still limited by having
only one object per OST.

These limitations make it impossible to get the full
performance of a modern Lustre FS with a single shared
file.

This patch makes it possible to have >1 stripe on a given
OST in each layout component.  This is known as
overstriping.  It works exactly like a normally striped
file, and is largely transparent to users.

By raising the object count per OST, this avoids the single
object limits, and by creating more stripes, also avoids
the "single effective writer per stripe" LDLM limitation.

However, it is only desirable in some situations, so users
must request it with a special setstripe command:

lfs setstripe -C [count] [file]

Users can also access overstriping using the standard '-o'
option to manually select OSTs:

lfs setstripe -o [ost_indices] [file]

Overstriping also makes it easy to test layout size limits,so we add a
test for that.

Signed-off-by: Patrick Farrell <pfarrell@whamcloud.com>
Change-Id: I14bb94b05642b3542a965e84fda4615b997a4dea
Reviewed-on: https://review.whamcloud.com/28425
Tested-by: Jenkins
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Bobi Jam <bobijam@hotmail.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
28 files changed:
lustre/doc/lfs-setstripe.1
lustre/include/lustre/lustreapi.h
lustre/include/uapi/linux/lustre/lustre_idl.h
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/lfsck/lfsck_layout.c
lustre/llite/llite_lib.c
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lod_object.c
lustre/lod/lod_qos.c
lustre/lov/lov_cl_internal.h
lustre/lov/lov_ea.c
lustre/lov/lov_obd.c
lustre/mdt/mdt_handler.c
lustre/mdt/mdt_internal.h
lustre/mdt/mdt_open.c
lustre/mdt/mdt_reint.c
lustre/ptlrpc/wiretest.c
lustre/tests/conf-sanity.sh
lustre/tests/llapi_layout_test.c
lustre/tests/sanity-pfl.sh
lustre/tests/sanity.sh
lustre/tests/test-framework.sh
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/liblustreapi_layout.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 5bca972..ddd474d 100644 (file)
@@ -146,6 +146,12 @@ The number of OSTs to stripe a file over. \fB0 \fRmeans to use the
 filesystem-wide default stripe count (default 1), and \fB-1 \fRmeans to stripe
 over all available OSTs.
 .TP
+.B -C\fR, \fB--overstripe-count \fR<\fIstripe_count\fR>
+The number of stripes to create, creating > 1 stripe per OST if count exceeds
+the number of OSTs in the file system. \fB0 \fRmeans to use the filesystem-wide
+default stripe count (default 1), and \fB-1 \fRmeans to stripe over all
+available OSTs.
+.TP
 .B -S\fR, \fB--stripe-size \fR<\fIstripe_size\fR>
 The number of bytes to store on each OST before moving to the next OST. A
 stripe size of
index 074dd3d..2ae985c 100644 (file)
@@ -126,6 +126,8 @@ struct llapi_stripe_param {
 
 #define lsp_tgts       lsp_osts
 
+__u32 llapi_pattern_to_lov(uint64_t pattern);
+
 int llapi_file_open_param(const char *name, int flags, mode_t mode,
                          const struct llapi_stripe_param *param);
 int llapi_file_create_foreign(const char *name, mode_t mode, __u32 type,
@@ -656,8 +658,9 @@ int llapi_layout_merge(struct llapi_layout **dst_layout,
  * stored using RAID0.  That is, data will be split evenly and without
  * redundancy across all OSTs in the layout.
  */
-#define LLAPI_LAYOUT_RAID0     0ULL
-#define LLAPI_LAYOUT_MDT       2ULL
+#define LLAPI_LAYOUT_RAID0             0ULL
+#define LLAPI_LAYOUT_MDT               2ULL
+#define LLAPI_LAYOUT_OVERSTRIPING      4ULL
 
 /**
 * The layout includes a specific set of OSTs on which to allocate.
index 6d7f66c..9557039 100644 (file)
@@ -892,6 +892,7 @@ struct ptlrpc_body_v2 {
 #define MDT_CONNECT_SUPPORTED2 (OBD_CONNECT2_FILE_SECCTX | \
                                OBD_CONNECT2_DIR_MIGRATE | \
                                OBD_CONNECT2_SUM_STATFS | \
+                               OBD_CONNECT2_OVERSTRIPING | \
                                OBD_CONNECT2_FLR |\
                                OBD_CONNECT2_LOCK_CONVERT | \
                                OBD_CONNECT2_ARCHIVE_ID_ARRAY | \
index 31a6d4e..0edb929 100644 (file)
@@ -547,11 +547,11 @@ struct fsxattr {
 #define LMV_USER_MAGIC_V0      0x0CD20CD0    /* old default lmv magic*/
 #define LMV_USER_MAGIC_SPECIFIC        0x0CD40CD0
 
-#define LOV_PATTERN_NONE       0x000
-#define LOV_PATTERN_RAID0      0x001
-#define LOV_PATTERN_RAID1      0x002
-#define LOV_PATTERN_MDT                0x100
-#define LOV_PATTERN_CMOBD      0x200
+#define LOV_PATTERN_NONE               0x000
+#define LOV_PATTERN_RAID0              0x001
+#define LOV_PATTERN_RAID1              0x002
+#define LOV_PATTERN_MDT                        0x100
+#define LOV_PATTERN_OVERSTRIPING       0x200
 
 #define LOV_PATTERN_F_MASK     0xffff0000
 #define LOV_PATTERN_F_HOLE     0x40000000 /* there is hole in LOV EA */
@@ -563,9 +563,22 @@ struct fsxattr {
 static inline bool lov_pattern_supported(__u32 pattern)
 {
        return (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_RAID0 ||
+              (pattern & ~LOV_PATTERN_F_RELEASED) ==
+                       (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING) ||
               (pattern & ~LOV_PATTERN_F_RELEASED) == LOV_PATTERN_MDT;
 }
 
+/* RELEASED and MDT patterns are not valid in many places, so rather than
+ * having many extra checks on lov_pattern_supported, we have this separate
+ * check for non-released, non-DOM components
+ */
+static inline bool lov_pattern_supported_normal_comp(__u32 pattern)
+{
+       return pattern == LOV_PATTERN_RAID0 ||
+              pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING);
+
+}
+
 #define LOV_MAXPOOLNAME 15
 #define LOV_POOLNAMEF "%.15s"
 
@@ -581,7 +594,7 @@ static inline bool lov_pattern_supported(__u32 pattern)
  * allocation that is sufficient for the current generation of systems.
  *
  * (max buffer size - lov+rpc header) / sizeof(struct lov_ost_data_v1) */
-#define LOV_MAX_STRIPE_COUNT 2000  /* ((12 * 4096 - 256) / 24) */
+#define LOV_MAX_STRIPE_COUNT 2000  /* ~((12 * 4096 - 256) / 24) */
 #define LOV_ALL_STRIPES       0xffff /* only valid for directories */
 #define LOV_V1_INSANE_STRIPE_COUNT 65532 /* maximum stripe count bz13933 */
 
index 6da073c..aad367c 100644 (file)
@@ -368,7 +368,7 @@ static int lfsck_layout_verify_header_v1v3(struct dt_object *obj,
        }
 #endif
 
-       if (lov_pattern(pattern) != LOV_PATTERN_RAID0) {
+       if (!lov_pattern_supported_normal_comp(lov_pattern(pattern))) {
                CDEBUG(D_LFSCK, "Unsupported LOV EA pattern %u for the file "
                       DFID" in the component %x\n",
                       pattern, PFID(lfsck_dto2fid(obj)), comp_id);
index 69bbc79..6139bf3 100644 (file)
@@ -223,6 +223,7 @@ static int client_common_fill_super(struct super_block *sb, char *md, char *dt,
 
        data->ocd_connect_flags2 = OBD_CONNECT2_DIR_MIGRATE |
                                   OBD_CONNECT2_SUM_STATFS |
+                                  OBD_CONNECT2_OVERSTRIPING |
                                   OBD_CONNECT2_FLR |
                                   OBD_CONNECT2_LOCK_CONVERT |
                                   OBD_CONNECT2_ARCHIVE_ID_ARRAY |
index 45df47d..46fc4c0 100644 (file)
@@ -764,7 +764,7 @@ __u16 lod_comp_entry_stripe_count(struct lod_object *lo,
                                  struct lod_layout_component *entry,
                                  bool is_dir);
 __u16 lod_get_stripe_count(struct lod_device *lod, struct lod_object *lo,
-                          __u16 stripe_count);
+                          __u16 stripe_count, bool overstriping);
 void lod_qos_statfs_update(const struct lu_env *env, struct lod_device *lod);
 
 /* lproc_lod.c */
index 673ca5b..c139e49 100644 (file)
@@ -1346,8 +1346,7 @@ int lod_parse_striping(const struct lu_env *env, struct lod_object *lo,
                }
 
                pattern = le32_to_cpu(lmm->lmm_pattern);
-               if (lov_pattern(pattern) != LOV_PATTERN_RAID0 &&
-                   lov_pattern(pattern) != LOV_PATTERN_MDT)
+               if (!lov_pattern_supported(lov_pattern(pattern)))
                        GOTO(out, rc = -EINVAL);
 
                lod_comp->llc_pattern = pattern;
@@ -2114,9 +2113,8 @@ void lod_fix_desc_stripe_count(__u32 *val)
 void lod_fix_desc_pattern(__u32 *val)
 {
        /* from lov_setstripe */
-       if ((*val != 0) && (*val != LOV_PATTERN_RAID0) &&
-           (*val != LOV_PATTERN_MDT)) {
-               LCONSOLE_WARN("Unknown stripe pattern: %#x\n", *val);
+       if ((*val != 0) && !lov_pattern_supported_normal_comp(*val)) {
+               LCONSOLE_WARN("lod: Unknown stripe pattern: %#x\n", *val);
                *val = 0;
        }
 }
index 43437f9..445130e 100644 (file)
@@ -2675,7 +2675,8 @@ inline __u16 lod_comp_entry_stripe_count(struct lod_object *lo,
        else if ((__u16)-1 == entry->llc_stripe_count)
                return lod->lod_desc.ld_tgt_count;
        else
-               return lod_get_stripe_count(lod, lo, entry->llc_stripe_count);
+               return lod_get_stripe_count(lod, lo,
+                                           entry->llc_stripe_count, false);
 }
 
 static int lod_comp_md_size(struct lod_object *lo, bool is_dir)
@@ -4635,9 +4636,8 @@ static int lod_get_default_lov_striping(const struct lu_env *env,
                                        LCME_TEMPLATE_FLAGS;
                }
 
-               if (v1->lmm_pattern != LOV_PATTERN_RAID0 &&
-                   v1->lmm_pattern != LOV_PATTERN_MDT &&
-                   v1->lmm_pattern != 0) {
+               if (!lov_pattern_supported(v1->lmm_pattern) &&
+                   !(v1->lmm_pattern & LOV_PATTERN_F_RELEASED)) {
                        lod_free_def_comp_entries(lds);
                        RETURN(-EINVAL);
                }
index 934d6bc..5433232 100644 (file)
@@ -936,11 +936,13 @@ static inline bool lod_should_avoid_ost(struct lod_object *lo,
 
 static int lod_check_and_reserve_ost(const struct lu_env *env,
                                     struct lod_object *lo,
+                                    struct lod_layout_component *lod_comp,
                                     struct obd_statfs *sfs, __u32 ost_idx,
                                     __u32 speed, __u32 *s_idx,
                                     struct dt_object **stripe,
                                     __u32 *ost_indices,
-                                    struct thandle *th)
+                                    struct thandle *th,
+                                    bool *overstriped)
 {
        struct lod_device *lod = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
        struct lod_avoid_guide *lag = &lod_env_info(env)->lti_avoid;
@@ -989,11 +991,14 @@ static int lod_check_and_reserve_ost(const struct lu_env *env,
                          "component\n", speed, ost_idx);
                RETURN(rc);
        }
-       /*
-        * do not put >1 objects on a single OST
-        */
-       if (lod_qos_is_ost_used(env, ost_idx, stripe_idx))
-               RETURN(rc);
+
+       /* do not put >1 objects on a single OST, except for overstriping */
+       if (lod_qos_is_ost_used(env, ost_idx, stripe_idx)) {
+               if (lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)
+                       *overstriped = true;
+               else
+                       RETURN(rc);
+       }
 
        o = lod_qos_declare_object_on(env, lod, ost_idx, th);
        if (IS_ERR(o)) {
@@ -1058,6 +1063,8 @@ static int lod_alloc_rr(const struct lu_env *env, struct lod_object *lo,
        __u32 stripe_idx = 0;
        __u32 stripe_count, stripe_count_min, ost_idx;
        int rc, speed = 0, ost_connecting = 0;
+       int stripes_per_ost = 1;
+       bool overstriped = false;
        ENTRY;
 
        LASSERT(lo->ldo_comp_cnt > comp_idx && lo->ldo_comp_entries != NULL);
@@ -1111,7 +1118,12 @@ repeat_find:
                  stripe_count, lqr->lqr_start_idx, lqr->lqr_start_count,
                  lqr->lqr_offset_idx, osts->op_count, osts->op_count);
 
-       for (i = 0; i < osts->op_count && stripe_idx < stripe_count; i++) {
+       if (lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)
+               stripes_per_ost =
+                       (lod_comp->llc_stripe_count - 1)/osts->op_count + 1;
+
+       for (i = 0; i < osts->op_count * stripes_per_ost
+            && stripe_idx < stripe_count; i++) {
                array_idx = (lqr->lqr_start_idx + lqr->lqr_offset_idx) %
                                osts->op_count;
                ++lqr->lqr_start_idx;
@@ -1131,9 +1143,9 @@ repeat_find:
                        continue;
 
                spin_unlock(&lqr->lqr_alloc);
-               rc = lod_check_and_reserve_ost(env, lo, sfs, ost_idx, speed,
-                                              &stripe_idx, stripe, ost_indices,
-                                              th);
+               rc = lod_check_and_reserve_ost(env, lo, lod_comp, sfs, ost_idx,
+                                              speed, &stripe_idx, stripe,
+                                              ost_indices, th, &overstriped);
                spin_lock(&lqr->lqr_alloc);
 
                if (rc != 0 && OST_TGT(m, ost_idx)->ltd_connecting)
@@ -1151,6 +1163,12 @@ repeat_find:
        spin_unlock(&lqr->lqr_alloc);
        up_read(&m->lod_qos.lq_rw_sem);
 
+       /* If there are enough OSTs, a component with overstriping requested
+        * will not actually end up overstriped.  The comp should reflect this.
+        */
+       if (!overstriped)
+               lod_comp->llc_pattern &= ~LOV_PATTERN_OVERSTRIPING;
+
        if (stripe_idx) {
                lod_comp->llc_stripe_count = stripe_idx;
                /* at least one stripe is allocated */
@@ -1249,10 +1267,11 @@ static int lod_alloc_ost_list(const struct lu_env *env, struct lod_object *lo,
                        break;
                }
 
-               /*
-                * do not put >1 objects on a single OST
+               /* do not put >1 objects on a single OST, except for
+                * overstriping
                 */
-               if (lod_qos_is_ost_used(env, ost_idx, stripe_count)) {
+               if (lod_qos_is_ost_used(env, ost_idx, stripe_count) &&
+                   !(lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)) {
                        rc = -EINVAL;
                        break;
                }
@@ -1315,13 +1334,15 @@ static int lod_alloc_specific(const struct lu_env *env, struct lod_object *lo,
        struct lod_layout_component *lod_comp;
        struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
        struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
-       struct dt_object  *o;
-       __u32              ost_idx;
-       unsigned int       i, array_idx, ost_count;
-       int                rc, stripe_num = 0;
-       int                speed = 0;
+       struct dt_object *o;
+       __u32 ost_idx;
+       unsigned int i, array_idx, ost_count;
+       int rc, stripe_num = 0;
+       int speed = 0;
        struct pool_desc  *pool = NULL;
        struct ost_pool   *osts;
+       int stripes_per_ost = 1;
+       bool overstriped = false;
        ENTRY;
 
        LASSERT(lo->ldo_comp_cnt > comp_idx && lo->ldo_comp_entries != NULL);
@@ -1359,7 +1380,11 @@ repeat_find:
                GOTO(out, rc = -EINVAL);
        }
 
-       for (i = 0; i < ost_count;
+       if (lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)
+               stripes_per_ost =
+                       (lod_comp->llc_stripe_count - 1)/ost_count + 1;
+
+       for (i = 0; i < ost_count * stripes_per_ost;
                        i++, array_idx = (array_idx + 1) % ost_count) {
                ost_idx = osts->op_array[array_idx];
 
@@ -1372,10 +1397,15 @@ repeat_find:
                        continue;
 
                /*
-                * do not put >1 objects on a single OST
+                * do not put >1 objects on a single OST, except for
+                * overstriping, where it is intended
                 */
-               if (lod_qos_is_ost_used(env, ost_idx, stripe_num))
-                       continue;
+               if (lod_qos_is_ost_used(env, ost_idx, stripe_num)) {
+                       if (lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)
+                               overstriped = true;
+                       else
+                               continue;
+               }
 
                /*
                 * try not allocate on the OST used by other component
@@ -1437,6 +1467,13 @@ repeat_find:
               PFID(lu_object_fid(lod2lu_obj(lo))), stripe_num,
               lod_comp->llc_stripe_count);
        rc = stripe_num == 0 ? -ENOSPC : -EFBIG;
+
+       /* If there are enough OSTs, a component with overstriping requessted
+        * will not actually end up overstriped.  The comp should reflect this.
+        */
+       if (rc == 0 && !overstriped)
+               lod_comp->llc_pattern &= ~LOV_PATTERN_OVERSTRIPING;
+
 out:
        if (pool != NULL) {
                up_read(&pool_tgt_rw_sem(pool));
@@ -1527,6 +1564,8 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo,
        struct ost_pool *osts;
        unsigned int i;
        __u32 nfound, good_osts, stripe_count, stripe_count_min;
+       bool overstriped = false;
+       int stripes_per_ost = 1;
        int rc = 0;
        ENTRY;
 
@@ -1551,6 +1590,10 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo,
        if (!lod_qos_is_usable(lod))
                GOTO(out_nolock, rc = -EAGAIN);
 
+       if (lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING)
+               stripes_per_ost =
+                       (lod_comp->llc_stripe_count - 1)/osts->op_count + 1;
+
        /* Do actual allocation, use write lock here. */
        down_write(&lod->lod_qos.lq_rw_sem);
 
@@ -1605,9 +1648,11 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo,
        if (good_osts < stripe_count_min)
                GOTO(out, rc = -EAGAIN);
 
-       /* We have enough osts */
-       if (good_osts < stripe_count)
-               stripe_count = good_osts;
+       /* If we do not have enough OSTs for the requested stripe count, do not
+        * put more stripes per OST than requested.
+        */
+       if (stripe_count / stripes_per_ost > good_osts)
+               stripe_count = good_osts * stripes_per_ost;
 
        /* Find enough OSTs with weighted random allocation. */
        nfound = 0;
@@ -1666,12 +1711,21 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo,
 
                        QOS_DEBUG("stripe=%d to idx=%d\n", nfound, idx);
                        /*
-                        * do not put >1 objects on a single OST
+                        * do not put >1 objects on a single OST, except for
+                        * overstriping
                         */
-                       if (lod_qos_is_ost_used(env, idx, nfound) ||
-                           lod_comp_is_ost_used(env, lo, idx))
+                       if ((lod_comp_is_ost_used(env, lo, idx)) &&
+                           !(lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING))
                                continue;
 
+                       if (lod_qos_is_ost_used(env, idx, nfound)) {
+                               if (lod_comp->llc_pattern &
+                                   LOV_PATTERN_OVERSTRIPING)
+                                       overstriped = true;
+                               else
+                                       continue;
+                       }
+
                        o = lod_qos_declare_object_on(env, lod, idx, th);
                        if (IS_ERR(o)) {
                                QOS_DEBUG("can't declare object on #%u: %d\n",
@@ -1718,6 +1772,12 @@ static int lod_alloc_qos(const struct lu_env *env, struct lod_object *lo,
                rc = -EAGAIN;
        }
 
+       /* If there are enough OSTs, a component with overstriping requessted
+        * will not actually end up overstriped.  The comp should reflect this.
+        */
+       if (rc == 0 && !overstriped)
+               lod_comp->llc_pattern &= ~LOV_PATTERN_OVERSTRIPING;
+
 out:
        up_write(&lod->lod_qos.lq_rw_sem);
 
@@ -1749,7 +1809,7 @@ out_nolock:
  * \retval             the maximum usable stripe count
  */
 __u16 lod_get_stripe_count(struct lod_device *lod, struct lod_object *lo,
-                          __u16 stripe_count)
+                          __u16 stripe_count, bool overstriping)
 {
        __u32 max_stripes = LOV_MAX_STRIPE_COUNT_OLD;
        /* max stripe count is based on OSD ea size */
@@ -1759,10 +1819,11 @@ __u16 lod_get_stripe_count(struct lod_device *lod, struct lod_object *lo,
 
        if (!stripe_count)
                stripe_count = lod->lod_desc.ld_default_stripe_count;
-       if (stripe_count > lod->lod_desc.ld_active_tgt_count)
-               stripe_count = lod->lod_desc.ld_active_tgt_count;
        if (!stripe_count)
                stripe_count = 1;
+       /* Overstriping allows more stripes than targets */
+       if (stripe_count > lod->lod_desc.ld_active_tgt_count && !overstriping)
+               stripe_count = lod->lod_desc.ld_active_tgt_count;
 
        if (lo->ldo_is_composite) {
                struct lod_layout_component *lod_comp;
@@ -2128,7 +2189,9 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo,
                if (v1->lmm_pattern == 0)
                        v1->lmm_pattern = LOV_PATTERN_RAID0;
                if (lov_pattern(v1->lmm_pattern) != LOV_PATTERN_RAID0 &&
-                   lov_pattern(v1->lmm_pattern) != LOV_PATTERN_MDT) {
+                   lov_pattern(v1->lmm_pattern) != LOV_PATTERN_MDT &&
+                   lov_pattern(v1->lmm_pattern) !=
+                       (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING)) {
                        CDEBUG(D_LAYOUT, "%s: invalid pattern: %x\n",
                               lod2obd(d)->obd_name, v1->lmm_pattern);
                        GOTO(free_comp, rc = -EINVAL);
@@ -2172,7 +2235,8 @@ int lod_qos_parse_config(const struct lu_env *env, struct lod_object *lo,
                        }
                }
 
-               if (lod_comp->llc_stripe_count > pool_tgt_count(pool))
+               if (lod_comp->llc_stripe_count > pool_tgt_count(pool) &&
+                   !(lod_comp->llc_pattern & LOV_PATTERN_OVERSTRIPING))
                        lod_comp->llc_stripe_count = pool_tgt_count(pool);
 
                lod_pool_putref(pool);
@@ -2381,7 +2445,10 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                 */
                lod_qos_statfs_update(env, d);
                stripe_len = lod_get_stripe_count(d, lo,
-                                                 lod_comp->llc_stripe_count);
+                                                 lod_comp->llc_stripe_count,
+                                                 lod_comp->llc_pattern &
+                                                 LOV_PATTERN_OVERSTRIPING);
+
                if (stripe_len == 0)
                        GOTO(out, rc = -ERANGE);
                lod_comp->llc_stripe_count = stripe_len;
index 179e450..a72d455 100644 (file)
@@ -150,9 +150,10 @@ static inline char *llt2str(enum lov_layout_type llt)
  */
 static inline __u32 lov_entry_type(struct lov_stripe_md_entry *lsme)
 {
-       if ((lov_pattern(lsme->lsme_pattern) == LOV_PATTERN_RAID0) ||
+       if ((lov_pattern(lsme->lsme_pattern) & LOV_PATTERN_RAID0) ||
            (lov_pattern(lsme->lsme_pattern) == LOV_PATTERN_MDT))
-               return lov_pattern(lsme->lsme_pattern);
+               return lov_pattern(lsme->lsme_pattern &
+                                  ~LOV_PATTERN_OVERSTRIPING);
        return 0;
 }
 
index 49673c5..54f7936 100644 (file)
@@ -81,33 +81,44 @@ static loff_t lov_tgt_maxbytes(struct lov_tgt_desc *tgt)
 static int lsm_lmm_verify_v1v3(struct lov_mds_md *lmm, size_t lmm_size,
                               u16 stripe_count)
 {
+       int rc = 0;
+
        if (stripe_count > LOV_V1_INSANE_STRIPE_COUNT) {
-               CERROR("bad stripe count %d\n", stripe_count);
+               rc = -EINVAL;
+               CERROR("lov: bad stripe count %d: rc = %d\n",
+                      stripe_count, rc);
                lov_dump_lmm_common(D_WARNING, lmm);
-               return -EINVAL;
+               goto out;
        }
 
        if (lmm_oi_id(&lmm->lmm_oi) == 0) {
-               CERROR("zero object id\n");
+               rc = -EINVAL;
+               CERROR("lov: zero object id: rc = %d\n", rc);
                lov_dump_lmm_common(D_WARNING, lmm);
-               return -EINVAL;
+               goto out;
        }
 
        if (lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_MDT &&
-           lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_RAID0) {
-               CERROR("bad striping pattern\n");
+           lov_pattern(le32_to_cpu(lmm->lmm_pattern)) != LOV_PATTERN_RAID0 &&
+           lov_pattern(le32_to_cpu(lmm->lmm_pattern)) !=
+                       (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING)) {
+               rc = -EINVAL;
+               CERROR("lov: unrecognized striping pattern: rc = %d\n", rc);
                lov_dump_lmm_common(D_WARNING, lmm);
-               return -EINVAL;
+               goto out;
        }
 
        if (lmm->lmm_stripe_size == 0 ||
            (le32_to_cpu(lmm->lmm_stripe_size)&(LOV_MIN_STRIPE_SIZE-1)) != 0) {
-               CERROR("bad stripe size %u\n",
-                      le32_to_cpu(lmm->lmm_stripe_size));
+               rc = -EINVAL;
+               CERROR("lov: bad stripe size %u: rc = %d\n",
+                      le32_to_cpu(lmm->lmm_stripe_size), rc);
                lov_dump_lmm_common(D_WARNING, lmm);
-               return -EINVAL;
+               goto out;
        }
-       return 0;
+
+out:
+       return rc;
 }
 
 static void lsme_free(struct lov_stripe_md_entry *lsme)
index be9e51e..6ca463e 100644 (file)
@@ -696,10 +696,10 @@ void lov_fix_desc_stripe_count(__u32 *val)
 void lov_fix_desc_pattern(__u32 *val)
 {
         /* from lov_setstripe */
-        if ((*val != 0) && (*val != LOV_PATTERN_RAID0)) {
-                LCONSOLE_WARN("Unknown stripe pattern: %#x\n", *val);
-                *val = 0;
-        }
+       if ((*val != 0) && !lov_pattern_supported_normal_comp(*val)) {
+               LCONSOLE_WARN("lov: Unknown stripe pattern: %#x\n", *val);
+               *val = 0;
+       }
 }
 
 void lov_fix_desc_qos_maxage(__u32 *val)
index 33d9847..e6855bf 100644 (file)
@@ -1205,8 +1205,15 @@ static int mdt_getattr_internal(struct mdt_thread_info *info,
                ma->ma_lmm = buffer->lb_buf;
                ma->ma_lmm_size = buffer->lb_len;
                ma->ma_need = MA_INODE | MA_HSM;
-               if (ma->ma_lmm_size > 0)
+               if (ma->ma_lmm_size > 0) {
                        ma->ma_need |= MA_LOV;
+                       /* Older clients may crash if they getattr overstriped
+                        * files
+                        */
+                       if (!exp_connect_overstriping(exp) &&
+                           mdt_lmm_is_overstriping(ma->ma_lmm))
+                               RETURN(-EOPNOTSUPP);
+               }
        }
 
         if (S_ISDIR(lu_object_attr(&next->mo_lu)) &&
index 546ad8a..f6a04f1 100644 (file)
@@ -709,6 +709,43 @@ static inline bool mdt_lmm_is_flr(struct lov_mds_md *lmm)
               le16_to_cpu(lcm->lcm_mirror_count) > 0;
 }
 
+static inline bool lmm_is_overstriping(struct lov_mds_md *lmm)
+{
+       if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V1 ||
+           le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_V3)
+               return le16_to_cpu(lmm->lmm_pattern) & LOV_PATTERN_OVERSTRIPING;
+
+       return false;
+}
+
+static inline bool mdt_lmm_comp_overstriping(struct lov_mds_md *lmm)
+{
+       struct lov_comp_md_v1 *comp_v1;
+       struct lov_mds_md *v1;
+       __u32 off;
+       int i;
+
+       comp_v1 = (struct lov_comp_md_v1 *)lmm;
+
+       for (i = 1; i < le16_to_cpu(comp_v1->lcm_entry_count); i++) {
+               off = le32_to_cpu(comp_v1->lcm_entries[i].lcme_offset);
+               v1 = (struct lov_mds_md *)((char *)comp_v1 + off);
+
+               if (lmm_is_overstriping(v1))
+                       return true;
+       }
+
+       return false;
+}
+
+static inline bool mdt_lmm_is_overstriping(struct lov_mds_md *lmm)
+{
+       if (le32_to_cpu(lmm->lmm_magic) == LOV_MAGIC_COMP_V1)
+               return mdt_lmm_comp_overstriping(lmm);
+
+       return lmm_is_overstriping(lmm);
+}
+
 static inline bool mdt_is_sum_statfs_client(struct obd_export *exp)
 {
        return exp_connect_flags(exp) & OBD_CONNECT_FLAGS2 &&
index 9211ee2..43d891c 100644 (file)
@@ -542,6 +542,11 @@ static int mdt_finish_open(struct mdt_thread_info *info,
                RETURN(-EOPNOTSUPP);
        }
 
+       /* Overstriped files can crash older clients */
+       if (isreg && !exp_connect_overstriping(exp) &&
+           mdt_lmm_is_overstriping(ma->ma_lmm))
+               RETURN(-EOPNOTSUPP);
+
        /* LU-2275, simulate broken behaviour (esp. prevalent in
         * pre-2.4 servers where a very strange reply is sent on error
         * that looks like it was actually almost successful and a
index fdb9798..2971d6e 100644 (file)
@@ -681,13 +681,23 @@ static int mdt_reint_setattr(struct mdt_thread_info *info,
 
                /* LU-10286: compatibility check for FLR.
                 * Please check the comment in mdt_finish_open() for details */
-               if (!exp_connect_flr(info->mti_exp)) {
+               if (!exp_connect_flr(info->mti_exp) ||
+                   !exp_connect_overstriping(info->mti_exp)) {
                        rc = mdt_big_xattr_get(info, mo, XATTR_NAME_LOV);
                        if (rc < 0 && rc != -ENODATA)
                                GOTO(out_put, rc);
 
-                       if (rc > 0 && mdt_lmm_is_flr(info->mti_big_lmm))
-                               GOTO(out_put, rc = -EOPNOTSUPP);
+                       if (!exp_connect_flr(info->mti_exp)) {
+                               if (rc > 0 &&
+                                   mdt_lmm_is_flr(info->mti_big_lmm))
+                                       GOTO(out_put, rc = -EOPNOTSUPP);
+                       }
+
+                       if (!exp_connect_overstriping(info->mti_exp)) {
+                               if (rc > 0 &&
+                                   mdt_lmm_is_overstriping(info->mti_big_lmm))
+                                       GOTO(out_put, rc = -EOPNOTSUPP);
+                       }
                }
 
                /* For truncate, the file size sent from client
index b0f4d81..d4ef2fe 100644 (file)
@@ -1715,8 +1715,8 @@ void lustre_assert_wire_constants(void)
                (unsigned)LOV_PATTERN_RAID1);
        LASSERTF(LOV_PATTERN_MDT == 0x00000100UL, "found 0x%.8xUL\n",
                (unsigned)LOV_PATTERN_MDT);
-       LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
-               (unsigned)LOV_PATTERN_CMOBD);
+       LASSERTF(LOV_PATTERN_OVERSTRIPING == 0x00000200UL, "found 0x%.8xUL\n",
+               (unsigned)LOV_PATTERN_OVERSTRIPING);
 
        /* Checks for struct lov_comp_md_entry_v1 */
        LASSERTF((int)sizeof(struct lov_comp_md_entry_v1) == 48, "found %lld\n",
index 7219536..1714bdb 100644 (file)
@@ -4542,7 +4542,7 @@ test_60() { # LU-471
 run_test 60 "check mkfs.lustre --mkfsoptions -E -O options setting"
 
 test_61() { # LU-80
-       local lxattr=false
+       local lxattr=$(large_xattr_enabled)
 
        [ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.1.53) ] ||
                skip "Need MDS version at least 2.1.53"
@@ -4601,7 +4601,7 @@ test_61() { # LU-80
        log "remove large xattr $name from $file"
        setfattr -x $name $file || error "removing $name from $file failed"
 
-       if $lxattr; then
+       if $lxattr && [ $(facet_fstype $SINGLEMDS) == ldiskfs ]; then
                stopall || error "stopping for e2fsck run"
                for num in $(seq $MDSCOUNT); do
                        run_e2fsck $(facet_active_host mds$num) \
index 3002929..7ff1340 100644 (file)
@@ -49,6 +49,7 @@
 #include <sys/stat.h>
 #include <getopt.h>
 #include <inttypes.h>
+#include <sys/ioctl.h>
 
 #define ERROR(fmt, ...)                                                        \
        fprintf(stderr, "%s: %s:%d: %s: " fmt "\n",                     \
@@ -1529,7 +1530,99 @@ void test31(void)
                "s: %"PRIu64", e: %"PRIu64"", s, e);
 }
 
-#define TEST_DESC_LEN  50
+#define T32FILE                        "t32"
+#define T32_STRIPE_COUNT       (num_osts*2)
+#define T32_DESC               "Test overstriping with layout_file_create"
+void test32(void)
+{
+       int rc;
+       int fd;
+       uint64_t count;
+       struct llapi_layout *layout = llapi_layout_alloc();
+       void *lmdbuf = NULL;
+       struct lov_user_md *lmd;
+       char path[PATH_MAX];
+
+       ASSERTF(layout != NULL, "errno %d", errno);
+
+       /* Maximum possible, to be on the safe side - num_osts could be large */
+       lmdbuf = malloc(XATTR_SIZE_MAX);
+       ASSERTF(lmdbuf != NULL, "errno %d", errno);
+       lmd = lmdbuf;
+
+       snprintf(path, sizeof(path), "%s/%s", lustre_dir, T32FILE);
+
+       rc = unlink(path);
+       ASSERTF(rc >= 0 || errno == ENOENT, "errno = %d", errno);
+
+       /* stripe count */
+       rc = llapi_layout_stripe_count_set(layout, T32_STRIPE_COUNT);
+       ASSERTF(rc == 0, "errno = %d", errno);
+       rc = llapi_layout_stripe_count_get(layout, &count);
+       ASSERTF(rc == 0 && count == T32_STRIPE_COUNT, "%"PRIu64" != %d", count,
+               T32_STRIPE_COUNT);
+
+       rc = llapi_layout_pattern_set(layout, LLAPI_LAYOUT_OVERSTRIPING);
+       ASSERTF(rc == 0, "errno = %d", errno);
+
+       /* create */
+       fd = llapi_layout_file_create(path, 0, 0660, layout);
+       ASSERTF(fd >= 0, "path = %s, errno = %d", path, errno);
+
+       rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE_NEW, lmdbuf);
+       ASSERTF(rc == 0, "errno = %d", errno);
+
+       count = lmd->lmm_stripe_count;
+       ASSERTF(count == T32_STRIPE_COUNT,
+               "stripe count (%"PRIu64") not equal to expected (%d)",
+               count, T32_STRIPE_COUNT);
+
+       rc = close(fd);
+       ASSERTF(rc == 0, "errno = %d", errno);
+       llapi_layout_free(layout);
+       free(lmdbuf);
+}
+
+#define T33FILE                        "t33"
+#define T33_STRIPE_COUNT       (num_osts*2)
+#define T33_DESC               "Test overstriping with llapi_file_open"
+void test33(void)
+{
+       int rc;
+       int fd;
+       uint64_t count;
+       void *lmdbuf = NULL;
+       struct lov_user_md *lmd;
+       char path[PATH_MAX];
+
+       /* Maximum possible, to be on the safe side - num_osts could be large */
+       lmdbuf = malloc(XATTR_SIZE_MAX);
+       ASSERTF(lmdbuf != NULL, "errno %d", errno);
+       lmd = lmdbuf;
+
+       snprintf(path, sizeof(path), "%s/%s", lustre_dir, T33FILE);
+
+       rc = unlink(path);
+       ASSERTF(rc >= 0 || errno == ENOENT, "errno = %d", errno);
+
+       fd = llapi_file_open(path, O_CREAT | O_RDWR, 0660, 0, -1, num_osts*2,
+                            LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING);
+       ASSERTF(fd >= 0, "path = %s, errno = %d", path, errno);
+
+       rc = ioctl(fd, LL_IOC_LOV_GETSTRIPE_NEW, lmdbuf);
+       ASSERTF(rc == 0, "errno = %d", errno);
+
+       count = lmd->lmm_stripe_count;
+       ASSERTF(count == T33_STRIPE_COUNT,
+               "stripe count (%"PRIu64") not equal to expected (%d)",
+               count, T33_STRIPE_COUNT);
+
+       rc = close(fd);
+       ASSERTF(rc == 0, "errno = %d", errno);
+       free(lmdbuf);
+}
+
+#define TEST_DESC_LEN  80
 struct test_tbl_entry {
        void (*tte_fn)(void);
        char tte_desc[TEST_DESC_LEN];
@@ -1569,6 +1662,8 @@ static struct test_tbl_entry test_tbl[] = {
        { .tte_fn = &test29, .tte_desc = T29_DESC, .tte_skip = false },
        { .tte_fn = &test30, .tte_desc = T30_DESC, .tte_skip = false },
        { .tte_fn = &test31, .tte_desc = T31_DESC, .tte_skip = false },
+       { .tte_fn = &test32, .tte_desc = T32_DESC, .tte_skip = false },
+       { .tte_fn = &test32, .tte_desc = T33_DESC, .tte_skip = false },
 };
 
 #define NUM_TESTS      (sizeof(test_tbl) / sizeof(struct test_tbl_entry))
@@ -1713,5 +1808,6 @@ int main(int argc, char *argv[])
                if (test(tst->tte_fn, tst->tte_desc, tst->tte_skip, i) != 0)
                        rc++;
        }
+
        return rc;
 }
index 30f2492..12d6abf 100644 (file)
@@ -39,7 +39,7 @@ check_runas_id $RUNAS_ID $RUNAS_GID $RUNAS
 assert_DIR
 rm -rf $DIR/[Rdfs][0-9]*
 
-test_0() {
+test_0a() {
        [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs"
 
        local comp_file=$DIR/$tdir/$tfile
@@ -63,9 +63,34 @@ test_0() {
 
        rm -f $comp_file || error "Delete $comp_file failed"
 }
-run_test 0 "Create full components file, no reused OSTs"
+run_test 0a "Create full components file, no reused OSTs"
+
+test_0b() {
+       [[ $($LCTL get_param mdc.*.import |
+               grep "connect_flags:.*overstriping") ]] ||
+               skip "server does not support overstriping"
+       large_xattr_enabled || skip_env "no large xattr support"
 
-test_1() {
+       local comp_file=$DIR/$tdir/$tfile
+
+       test_mkdir $DIR/$tdir
+
+       # Create file with 1.1*LOV_MAX_STRIPE_COUNT stripes should succeed
+       $LFS setstripe -E 1m -C $((LOV_MAX_STRIPE_COUNT / 10)) -E -1 \
+               -C $LOV_MAX_STRIPE_COUNT $comp_file ||
+       error "Create $comp_file failed"
+
+       rm -f $comp_file || error "Delete $comp_file failed"
+
+       # Create file with 2*LOV_MAX_STRIPE_COUNT stripes should fail
+       $LFS setstripe -E 1m -C $LOV_MAX_STRIPE_COUNT -E -1 -C $LOV_MAX_STRIPE_COUNT \
+               $comp_file && error "Create $comp_file succeeded"
+
+       rm -f $comp_file || error "Delete $comp_file failed"
+}
+run_test 0b "Verify comp stripe count limits"
+
+test_1a() {
        local comp_file=$DIR/$tdir/$tfile
        local rw_len=$((3 * 1024 * 1024))       # 3M
 
@@ -87,7 +112,76 @@ test_1() {
 
        rm -f $comp_file || error "Delete $comp_file failed"
 }
-run_test 1 "Create full components file, reused OSTs"
+run_test 1a "Create full components file, reused OSTs"
+
+# test overstriping (>1 stripe/OST within a component)
+test_1b() {
+       [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs" && return
+       [[ $($LCTL get_param mdc.*.import |
+               grep "connect_flags:.*overstriping") ]] ||
+               skip "server does not support overstriping"
+
+       local comp_file=$DIR/$tdir/$tfile
+       local rw_len=$((3 * 1024 * 1024))       # 3M
+
+       test_mkdir $DIR/$tdir
+
+       $LFS setstripe -E 1m -S 1m -o 0,0 -E -1 -o 1,1,0,0 $comp_file ||
+               error "Create $comp_file failed"
+
+       #instantiate all components, so that objs are allocted
+       dd if=/dev/zero of=$comp_file bs=1k count=1 seek=1M
+
+       $LFS getstripe $comp_file
+       local OSTS_1=$($LFS getstripe -I1 $comp_file | grep -o 'l_ost_idx.*' |
+                     awk -e '{print $2}' | tr "\n" "\0")
+       local OSTS_2=$($LFS getstripe -I2 $comp_file | grep -o 'l_ost_idx.*' |
+                     awk -e '{print $2}' | tr "\n" "\0")
+
+       echo ":"$OSTS_1":"
+       echo ":"$OSTS_2":"
+       [ "$OSTS_1" = "0,0," ] || error "incorrect OSTs($OSTS_1) in component 1"
+       [ "$OSTS_2" = "1,1,0,0," ] ||
+               error "incorrect OSTs($OSTS_2) in component 2"
+
+       small_write $comp_file $rw_len || error "Verify RW failed"
+
+       rm -f $comp_file || error "Delete $comp_file failed"
+}
+run_test 1b "Create full components file, overstriping in components"
+
+# test overstriping with max stripe count
+test_1c() {
+       [[ $($LCTL get_param mdc.*.import |
+               grep "connect_flags:.*overstriping") ]] ||
+               skip "server does not support overstriping"
+       large_xattr_enabled || skip_env "no large xattr support"
+
+       local comp_file=$DIR/$tdir/$tfile
+       local rw_len=$((3 * 1024 * 1024))       # 3M
+
+       test_mkdir $DIR/$tdir
+
+       $LFS setstripe -E 1m -C 10 -E 10M -C 100 -E -1 \
+           -C $LOV_MAX_STRIPE_COUNT $comp_file ||
+               error "Create $comp_file failed"
+
+       # Seek & write in to last component so all objects are allocated
+       dd if=/dev/zero of=$comp_file bs=1k count=1 seek=20000
+
+       local count=$($LFS getstripe -c -I1 $DIR/$tdir/$tfile)
+       [ $count -eq 10 ] || error "comp1 stripe count $count, should be 10"
+       count=$($LFS getstripe -c -I2 $DIR/$tdir/$tfile)
+       [ $count -eq 100 ] || error "comp2 stripe count $count, should be 100"
+       count=$($LFS getstripe -c -I3 $DIR/$tdir/$tfile)
+       [ $count -eq $LOV_MAX_STRIPE_COUNT ] ||
+               error "comp4 stripe count $count != $LOV_MAX_STRIPE_COUNT"
+
+       small_write $comp_file $rw_len || error "Verify RW failed"
+
+       rm -f $comp_file || error "Delete $comp_file failed"
+}
+run_test 1c "Test overstriping w/max stripe count"
 
 test_2() {
        local comp_file=$DIR/$tdir/$tfile
@@ -653,8 +747,9 @@ test_15() {
 }
 run_test 15 "Verify component options for lfs find"
 
-test_16() {
+test_16a() {
        [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs"
+       large_xattr_enabled || skip_env "ea_inode feature disabled"
 
        local file=$DIR/$tdir/$tfile
        local dir=$DIR/$tdir/dir
@@ -700,7 +795,65 @@ test_16() {
        echo "4. plain dir"
        verify_yaml_layout $dir $dir.copy $temp.dir "4. plain dir"
 }
-run_test 16 "Verify setstripe/getstripe with YAML config file"
+run_test 16a "Verify setstripe/getstripe with YAML config file"
+
+test_16b() {
+       [[ $($LCTL get_param mdc.*.import |
+               grep "connect_flags:.*overstriping") ]] ||
+               skip "server does not support overstriping"
+       [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs"
+       [[ $OSTCOUNT -ge $(($LOV_MAX_STRIPE_COUNT / 2)) ]] &&
+               skip_env "too many osts, skipping"
+       large_xattr_enabled || skip_env "ea_inode feature disabled"
+
+       local file=$DIR/$tdir/$tfile
+       local dir=$DIR/$tdir/dir
+       local temp=$DIR/$tdir/template
+       # We know OSTCOUNT < (LOV_MAX_STRIPE_COUNT / 2), so this is overstriping
+       local large_count=$((LOV_MAX_STRIPE_COUNT / 2 + 10))
+
+       rm -rf $DIR/$tdir
+       test_mkdir $DIR/$tdir
+
+       #####################################################################
+       #                           1. PFL file, overstriping in first comps
+       # set stripe for source file
+       $LFS setstripe -E1m -S 1M -o0,0 -E2m -o1,1 -E3m -C $large_count -E-1 \
+               $file || error "Create $file failed"
+
+       echo "1. PFL file"
+       verify_yaml_layout $file $file.copy $temp "1. PFL file"
+
+       #####################################################################
+       #                           2. plain file + overstriping
+       # set stripe for source file
+       rm -f $file
+       $LFS setstripe -C $large_count -i1 $file || error "Create $file failed"
+
+       rm -f $file.copy
+       echo "2. plain file"
+       verify_yaml_layout $file $file.copy $temp "2. plain file"
+
+       #####################################################################
+       #                           3. PFL dir + overstriping
+       # set stripe for source dir
+       test_mkdir $dir
+       $LFS setstripe -E1m -S 1M -o 0,0 -E2m -C $large_count -E-1 $dir ||
+               error "setstripe $dir failed"
+
+       test_mkdir $dir.copy
+       echo "3. PFL dir"
+       verify_yaml_layout $dir $dir.copy $temp.dir "3. PFL dir"
+
+       #####################################################################
+       #                           4. plain dir + overstriping
+       # set stripe for source dir
+       $LFS setstripe -C $large_count $dir || error "setstripe $dir failed"
+
+       echo "4. plain dir"
+       verify_yaml_layout $dir $dir.copy $temp.dir "4. plain dir"
+}
+run_test 16b "Verify setstripe/getstripe with YAML config file + overstriping"
 
 test_17() {
        [ $OSTCOUNT -lt 2 ] && skip "needs >= 2 OSTs"
index a82727d..6522197 100755 (executable)
@@ -1533,6 +1533,112 @@ test_27b() {
 }
 run_test 27b "create and write to two stripe file"
 
+# 27c family tests specific striping, setstripe -o
+test_27ca() {
+       [[ $OSTCOUNT -lt 2 ]] && skip_env "skipping 2-stripe test"
+       test_mkdir -p $DIR/$tdir
+       local osts="1"
+
+       $LFS setstripe -o $osts $DIR/$tdir/$tfile  || error "setstripe failed"
+       $LFS getstripe -i $DIR/$tdir/$tfile
+       [ $($LFS getstripe -i $DIR/$tdir/$tfile ) -eq $osts ] ||
+               error "stripe not on specified OST"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile  bs=1M count=4 || error "dd failed"
+}
+run_test 27ca "one stripe on specified OST"
+
+test_27cb() {
+       [[ $OSTCOUNT -lt 2 ]] && skip_env "skipping 2-stripe test"
+       test_mkdir -p $DIR/$tdir
+       local osts="1,0"
+       $LFS setstripe -o $osts $DIR/$tdir/$tfile || error "setstripe failed"
+       local getstripe=$($LFS getstripe $DIR/$tdir/$tfile)
+       echo "$getstripe"
+
+       # Strip getstripe output to a space separated list of OSTs
+       local getstripe_osts=$(echo "$getstripe" | sed -e '1,/obdidx/d' |\
+               awk '{print $1}' | tr '\n' '\ ' | sed -e 's/[[:space:]]*$//')
+       [ "$getstripe_osts" = "${osts//,/ }" ] ||
+               error "stripes not on specified OSTs"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=4 || error "dd failed"
+}
+run_test 27cb "two stripes on specified OSTs"
+
+test_27cc() {
+       [[ $OSTCOUNT -lt 2 ]] && skip_env "skipping 2-stripe test"
+       [[ $($LCTL get_param mdc.*.import) =~ connect_flags.*overstriping ]] ||
+               skip "server does not support overstriping"
+
+       test_mkdir -p $DIR/$tdir
+       local osts="0,0"
+       $LFS setstripe -o $osts $DIR/$tdir/$tfile || error "setstripe failed"
+       local getstripe=$($LFS getstripe $DIR/$tdir/$tfile)
+       echo "$getstripe"
+
+       # Strip getstripe output to a space separated list of OSTs
+       local getstripe_osts=$(echo "$getstripe" | sed -e '1,/obdidx/d' |\
+               awk '{print $1}' | tr '\n' '\ ' | sed -e 's/[[:space:]]*$//')
+       [ "$getstripe_osts" = "${osts//,/ }" ] ||
+               error "stripes not on specified OSTs"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=4 || error "dd failed"
+}
+run_test 27cc "two stripes on the same OST"
+
+test_27cd() {
+       [[ $OSTCOUNT -lt 2 ]] && skip_env "skipping 2-stripe test"
+       [[ $($LCTL get_param mdc.*.import) =~ connect_flags.*overstriping ]] ||
+               skip "server does not support overstriping"
+       test_mkdir -p $DIR/$tdir
+       local osts="0,1,1,0"
+       $LFS setstripe -o $osts $DIR/$tdir/$tfile || error "setstripe failed"
+       local getstripe=$($LFS getstripe $DIR/$tdir/$tfile)
+       echo "$getstripe"
+
+       # Strip getstripe output to a space separated list of OSTs
+       local getstripe_osts=$(echo "$getstripe" | sed -e '1,/obdidx/d' |\
+               awk '{print $1}' | tr '\n' '\ ' | sed -e 's/[[:space:]]*$//')
+       [ "$getstripe_osts" = "${osts//,/ }" ] ||
+               error "stripes not on specified OSTs"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=4 || error "dd failed"
+}
+run_test 27cd "four stripes on two OSTs"
+
+test_27ce() {
+       [[ $OSTCOUNT -ge $(($LOV_MAX_STRIPE_COUNT / 2)) ]] &&
+               skip_env "too many osts, skipping"
+       [[ $($LCTL get_param mdc.*.import) =~ connect_flags.*overstriping ]] ||
+               skip "server does not support overstriping"
+       # We do one more stripe than we have OSTs
+       [ $OSTCOUNT -ge 159 ] || large_xattr_enabled ||
+               skip_env "ea_inode feature disabled"
+
+       test_mkdir -p $DIR/$tdir
+       local osts=""
+       for i in $(seq 0 $OSTCOUNT);
+       do
+               osts=$osts"0"
+               if [ $i -ne $OSTCOUNT ]; then
+                       osts=$osts","
+               fi
+       done
+       $LFS setstripe -o $osts $DIR/$tdir/$tfile || error "setstripe failed"
+       local getstripe=$($LFS getstripe $DIR/$tdir/$tfile)
+       echo "$getstripe"
+
+       # Strip getstripe output to a space separated list of OSTs
+       local getstripe_osts=$(echo "$getstripe" | sed -e '1,/obdidx/d' |\
+               awk '{print $1}' | tr '\n' '\ ' | sed -e 's/[[:space:]]*$//')
+       [ "$getstripe_osts" = "${osts//,/ }" ] ||
+               error "stripes not on specified OSTs"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=4 || error "dd failed"
+}
+run_test 27ce "more stripes than OSTs with -o"
+
 test_27d() {
        test_mkdir $DIR/$tdir
        $LFS setstripe -c 0 -i -1 -S 0 $DIR/$tdir/$tfile ||
@@ -2188,7 +2294,8 @@ test_27B() { # LU-2523
 }
 run_test 27B "call setstripe on open unlinked file/rename victim"
 
-test_27C() { #LU-2871
+# 27C family tests full striping and overstriping
+test_27Ca() { #LU-2871
        [[ $OSTCOUNT -lt 2 ]] && skip_env "needs >= 2 OSTs"
 
        declare -a ost_idx
@@ -2224,7 +2331,143 @@ test_27C() { #LU-2871
                done
        done
 }
-run_test 27C "check full striping across all OSTs"
+run_test 27Ca "check full striping across all OSTs"
+
+test_27Cb() {
+       [[ $($LCTL get_param mdc.*.import) =~ connect_flags.*overstriping ]] ||
+               skip "server does not support overstriping"
+       [[ $OSTCOUNT -ge $(($LOV_MAX_STRIPE_COUNT / 2)) ]] &&
+               skip_env "too many osts, skipping"
+
+       test_mkdir -p $DIR/$tdir
+       local setcount=$(($OSTCOUNT * 2))
+       [ $setcount -ge 160 ] || large_xattr_enabled ||
+               skip_env "ea_inode feature disabled"
+
+       $LFS setstripe -C $setcount $DIR/$tdir/$tfile ||
+               error "setstripe failed"
+
+       local count=$($LFS getstripe -c $DIR/$tdir/$tfile)
+       [ $count -eq $setcount ] ||
+               error "stripe count $count, should be $setcount"
+
+       $LFS getstripe $DIR/$tdir/$tfile 2>&1 | grep "overstriped" ||
+               error "overstriped should be set in pattern"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=4 conv=notrunc ||
+               error "dd failed"
+}
+run_test 27Cb "more stripes than OSTs with -C"
+
+test_27Cc() {
+       [[ $($LCTL get_param mdc.*.import) =~ connect_flags.*overstriping ]] ||
+               skip "server does not support overstriping"
+       [[ $OSTCOUNT -lt 2 ]] && skip_env "need > 1 OST"
+
+       test_mkdir -p $DIR/$tdir
+       local setcount=$(($OSTCOUNT - 1))
+
+       [ $setcount -ge 160 ] || large_xattr_enabled ||
+               skip_env "ea_inode feature disabled"
+
+       $LFS setstripe -C $setcount $DIR/$tdir/$tfile ||
+               error "setstripe failed"
+
+       local count=$($LFS getstripe -c $DIR/$tdir/$tfile)
+       [ $count -eq $setcount ] ||
+               error "stripe count $count, should be $setcount"
+
+       $LFS getstripe $DIR/$tdir/$tfile 2>&1 | grep "overstriped" &&
+               error "overstriped should not be set in pattern"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=4 conv=notrunc ||
+               error "dd failed"
+}
+run_test 27Cc "fewer stripes than OSTs does not set overstriping"
+
+test_27Cd() {
+       [[ $($LCTL get_param mdc.*.import) =~ connect_flags.*overstriping ]] ||
+               skip "server does not support overstriping"
+       [[ $OSTCOUNT -lt 2 ]] && skip_env "need > 1 OST"
+       large_xattr_enabled || skip_env "ea_inode feature disabled"
+
+       test_mkdir -p $DIR/$tdir
+       local setcount=$LOV_MAX_STRIPE_COUNT
+
+       $LFS setstripe -C $setcount $DIR/$tdir/$tfile ||
+               error "setstripe failed"
+
+       local count=$($LFS getstripe -c $DIR/$tdir/$tfile)
+       [ $count -eq $setcount ] ||
+               error "stripe count $count, should be $setcount"
+
+       $LFS getstripe $DIR/$tdir/$tfile 2>&1 | grep "overstriped" ||
+               error "overstriped should be set in pattern"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=4 conv=notrunc ||
+               error "dd failed"
+
+       rm -f $DIR/$tdir/$tfile || error "Delete $tfile failed"
+}
+run_test 27Cd "test maximum stripe count"
+
+test_27Ce() {
+       [[ $($LCTL get_param mdc.*.import) =~ connect_flags.*overstriping ]] ||
+               skip "server does not support overstriping"
+       test_mkdir -p $DIR/$tdir
+
+       pool_add $TESTNAME || error "Pool creation failed"
+       pool_add_targets $TESTNAME 0 || error "pool_add_targets failed"
+
+       local setcount=8
+
+       $LFS setstripe  -C $setcount -p "$TESTNAME" $DIR/$tdir/$tfile ||
+               error "setstripe failed"
+
+       local count=$($LFS getstripe -c $DIR/$tdir/$tfile)
+       [ $count -eq $setcount ] ||
+               error "stripe count $count, should be $setcount"
+
+       $LFS getstripe $DIR/$tdir/$tfile 2>&1 | grep "overstriped" ||
+               error "overstriped should be set in pattern"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=4 conv=notrunc ||
+               error "dd failed"
+
+       rm -f $DIR/$tdir/$tfile || error "Delete $tfile failed"
+}
+run_test 27Ce "test pool with overstriping"
+
+test_27Cf() {
+       [[ $($LCTL get_param mdc.*.import) =~ connect_flags.*overstriping ]] ||
+               skip "server does not support overstriping"
+       [[ $OSTCOUNT -ge $(($LOV_MAX_STRIPE_COUNT / 2)) ]] &&
+               skip_env "too many osts, skipping"
+
+       test_mkdir -p $DIR/$tdir
+
+       local setcount=$(($OSTCOUNT * 2))
+       [ $setcount -ge 160 ] || large_xattr_enabled ||
+               skip_env "ea_inode feature disabled"
+
+       $LFS setstripe  -C $setcount $DIR/$tdir/ ||
+               error "setstripe failed"
+
+       echo 1 > $DIR/$tdir/$tfile
+
+       local count=$($LFS getstripe -c $DIR/$tdir/$tfile)
+       [ $count -eq $setcount ] ||
+               error "stripe count $count, should be $setcount"
+
+       $LFS getstripe $DIR/$tdir/$tfile 2>&1 | grep "overstriped" ||
+               error "overstriped should be set in pattern"
+
+       dd if=/dev/zero of=$DIR/$tdir/$tfile bs=1M count=4 conv=notrunc ||
+               error "dd failed"
+
+       rm -f $DIR/$tdir/$tfile || error "Delete $tfile failed"
+}
+run_test 27Cf "test default inheritance with overstriping"
 
 test_27D() {
        [ $OSTCOUNT -lt 2 ] && skip_env "needs >= 2 OSTs"
@@ -2252,6 +2495,9 @@ test_27D() {
        [ $MDS1_VERSION -lt $(version_code 2.9.55) ] ||
                [ $CLIENT_VERSION -lt $(version_code 2.9.55) ] &&
                        skip27D+=" -s 30,31"
+       [[ ! $($LCTL get_param mdc.*.import) =~ connect_flags.*overstriping ||
+         $OSTCOUNT -ge $(($LOV_MAX_STRIPE_COUNT / 2)) ]] &&
+               skip27D+=" -s 32,33"
        llapi_layout_test -d$DIR/$tdir -p$POOL -o$OSTCOUNT $skip27D ||
                error "llapi_layout_test failed"
 
@@ -6965,9 +7211,9 @@ test_65d() {
 
        if [[ $STRIPECOUNT -le 0 ]]; then
                sc=1
-       elif [[ $STRIPECOUNT -gt 2000 ]]; then
-#LOV_MAX_STRIPE_COUNT is 2000
-               [[ $OSTCOUNT -gt 2000 ]] && sc=2000 || sc=$(($OSTCOUNT - 1))
+       elif [[ $STRIPECOUNT -gt $LOV_MAX_STRIPE_COUNT ]]; then
+               [[ $OSTCOUNT -gt $LOV_MAX_STRIPE_COUNT ]] &&
+                       sc=$LOV_MAX_STRIPE_COUNT || sc=$(($OSTCOUNT - 1))
        else
                sc=$(($STRIPECOUNT - 1))
        fi
index 8be83a6..a0b18eb 100755 (executable)
@@ -418,6 +418,9 @@ init_test_env() {
        fi
 
        export TF_FAIL=${TF_FAIL:-$TMP/tf.fail}
+
+       # Constants used in more than one test script
+       export LOV_MAX_STRIPE_COUNT=2000
 }
 
 check_cpt_number() {
@@ -4304,11 +4307,7 @@ mkfs_opts() {
                opts+=${L_GETIDENTITY:+" --param=mdt.identity_upcall=$L_GETIDENTITY"}
 
                if [ $fstype == ldiskfs ]; then
-                       # Check for wide striping
-                       if [ $OSTCOUNT -gt 160 ]; then
-                               MDSJOURNALSIZE=${MDSJOURNALSIZE:-4096}
-                               fs_mkfs_opts+="-O ea_inode"
-                       fi
+                       fs_mkfs_opts+="-O ea_inode"
 
                        var=${facet}_JRN
                        if [ -n "${!var}" ]; then
@@ -8524,7 +8523,11 @@ pool_add_targets() {
        local last=$3
        local step=${4:-1}
 
-       local list=$(seq $first $step $last)
+       if [ -z $last ]; then
+               local list=$first
+       else
+               local list=$(seq $first $step $last)
+       fi
 
        local t=$(for i in $list; do printf "$FSNAME-OST%04x_UUID " $i; done)
        do_facet mgs $LCTL pool_add \
@@ -9464,4 +9467,3 @@ verify_yaml_layout() {
        [ "$layout1" == "$layout2" ] ||
                error "$msg_prefix $src/$dst layouts are not equal"
 }
-
index b3df1d2..e229763 100644 (file)
@@ -164,6 +164,7 @@ static inline int lfs_mirror_split(int argc, char **argv)
 #define SSM_CMD_COMMON(cmd) \
        "usage: "cmd" [--component-end|-E <comp_end>]\n"                \
        "                 [--stripe-count|-c <stripe_count>]\n"         \
+       "                 [--overstripe-count|-C <stripe_count>]\n"     \
        "                 [--stripe-index|-i <start_ost_idx>]\n"        \
        "                 [--stripe-size|-S <stripe_size>]\n"           \
        "                 [--layout|-L <pattern>]\n"                    \
@@ -174,6 +175,9 @@ static inline int lfs_mirror_split(int argc, char **argv)
 
 #define SSM_HELP_COMMON \
        "\tstripe_count: Number of OSTs to stripe over (0=fs default, -1 all)\n" \
+       "\t              Using -C instead of -c allows overstriping, which\n" \
+        "\t             will place more than one stripe per OST if\n" \
+        "\t             stripe_count is greater than the number of OSTs\n" \
        "\tstart_ost_idx: OST index of first stripe (-1=default round robin)\n"\
        "\tstripe_size:  Number of bytes on each OST (0=fs default)\n" \
        "\t              Can be specified with K, M or G (for KB, MB, GB\n" \
@@ -569,6 +573,7 @@ command_t cmdlist[] = {
         "layout\nto another (may be not safe with concurent writes).\n"
         "usage: migrate  "
         "[--stripe-count|-c] <stripe_count>\n"
+        "[--overstripe-count|-C] <stripe_count>\n"
         "              [--stripe-index|-i] <start_ost_index>\n"
         "              [--stripe-size|-S] <stripe_size>\n"
         "              [--pool|-p] <pool_name>\n"
@@ -578,6 +583,9 @@ command_t cmdlist[] = {
         "              [--non-direct|-D]\n"
         "              <file|directory>\n"
         "\tstripe_count:     number of OSTs to stripe a file over\n"
+        "\t              Using -C instead of -c allows overstriping, which\n"
+        "\t              will place more than one stripe per OST if\n"
+        "\t              stripe_count is greater than the number of OSTs\n"
         "\tstripe_ost_index: index of the first OST to stripe a file over\n"
         "\tstripe_size:      number of bytes to store before moving to the next OST\n"
         "\tpool_name:        name of the predefined pool of OSTs\n"
@@ -1951,20 +1959,23 @@ free_layout:
  * indices and ranges, for example "1,2-4,7". Add the indices into the
  * \a tgts array and remove duplicates.
  *
- * \param[out] tgts    array to store indices in
- * \param[in] size     size of \a tgts array
- * \param[in] offset   starting index in \a tgts
- * \param[in] arg      string containing OST index list
+ * \param[out] tgts            array to store indices in
+ * \param[in] size             size of \a tgts array
+ * \param[in] offset           starting index in \a tgts
+ * \param[in] arg              string containing OST index list
+ * \param[in/out] overstriping index list may contain duplicates
  *
  * \retval positive    number of indices in \a tgts
  * \retval -EINVAL     unable to parse \a arg
  */
-static int parse_targets(__u32 *tgts, int size, int offset, char *arg)
+static int parse_targets(__u32 *tgts, int size, int offset, char *arg,
+                        unsigned long long *pattern)
 {
        int rc;
        int nr = offset;
        int slots = size - offset;
        char *ptr = NULL;
+       bool overstriped = false;
        bool end_of_loop;
 
        if (arg == NULL)
@@ -1972,8 +1983,8 @@ static int parse_targets(__u32 *tgts, int size, int offset, char *arg)
 
        end_of_loop = false;
        while (!end_of_loop) {
-               int start_index;
-               int end_index;
+               int start_index = 0;
+               int end_index = 0;
                int i;
                char *endptr = NULL;
 
@@ -2004,14 +2015,21 @@ static int parse_targets(__u32 *tgts, int size, int offset, char *arg)
 
                        /* remove duplicate */
                        for (j = 0; j < offset; j++) {
-                               if (tgts[j] == i)
-                                       break;
+                               if (tgts[j] == i && pattern &&
+                                   *pattern == LLAPI_LAYOUT_OVERSTRIPING)
+                                       overstriped = true;
+                               else if (tgts[j] == i)
+                                       return -EINVAL;
                        }
-                       if (j == offset) { /* no duplicate */
+
+                       j = offset;
+
+                       if (j == offset) { /* check complete */
                                tgts[nr++] = i;
                                --slots;
                        }
                }
+
                if (slots == 0 && i < end_index)
                        break;
 
@@ -2023,6 +2041,9 @@ static int parse_targets(__u32 *tgts, int size, int offset, char *arg)
        if (!end_of_loop && ptr != NULL)
                *ptr = ',';
 
+       if (!overstriped && pattern)
+               *pattern = LLAPI_LAYOUT_DEFAULT;
+
        return rc < 0 ? rc : nr;
 }
 
@@ -2201,6 +2222,13 @@ static int comp_args_to_layout(struct llapi_layout **composite,
                }
                /* Data-on-MDT component has always single stripe up to end */
                lsa->lsa_stripe_size = lsa->lsa_comp_end;
+       } else if (lsa->lsa_pattern == LLAPI_LAYOUT_OVERSTRIPING) {
+               rc = llapi_layout_pattern_set(layout, lsa->lsa_pattern);
+               if (rc) {
+                       fprintf(stderr, "Set stripe pattern %#llx failed. %s\n",
+                               lsa->lsa_pattern, strerror(errno));
+                       return rc;
+               }
        }
 
        rc = llapi_layout_stripe_size_set(layout, lsa->lsa_stripe_size);
@@ -2326,6 +2354,10 @@ static int build_layout_from_yaml_node(struct cYAML *node,
                                } else if (!strcmp(string, "pattern")) {
                                        if (!strcmp(node->cy_valuestring, "mdt"))
                                                lsa->lsa_pattern = LLAPI_LAYOUT_MDT;
+                                       if (!strcmp(node->cy_valuestring,
+                                                   "raid0,overstriped"))
+                                               lsa->lsa_pattern =
+                                                       LLAPI_LAYOUT_OVERSTRIPING;
                                } else if (!strcmp(string, "lcme_flags")) {
                                        rc = comp_str2flags(node->cy_valuestring,
                                                            &lsa->lsa_comp_flags,
@@ -2679,7 +2711,8 @@ static int lfs_setstripe_internal(int argc, char **argv,
        { .val = 'c',   .name = "stripe-count", .has_arg = required_argument},
        { .val = 'c',   .name = "stripe_count", .has_arg = required_argument},
        { .val = 'c',   .name = "mdt-count",    .has_arg = required_argument},
-/* find        { .val = 'C',   .name = "ctime",        .has_arg = required_argument }*/
+       { .val = 'C',   .name = "overstripe-count",
+                                               .has_arg = required_argument},
        { .val = 'd',   .name = "delete",       .has_arg = no_argument},
        { .val = 'd',   .name = "destroy",      .has_arg = no_argument},
        /* --non-direct is only valid in migrate mode */
@@ -2735,7 +2768,7 @@ static int lfs_setstripe_internal(int argc, char **argv,
        snprintf(cmd, sizeof(cmd), "%s %s", progname, argv[0]);
        progname = cmd;
        while ((c = getopt_long(argc, argv,
-                               "bc:dDE:f:H:i:I:m:N::no:p:L:s:S:vx:y:",
+                               "bc:C:dDE:f:H:i:I:m:N::no:p:L:s:S:vx:y:",
                                long_opts, NULL)) >= 0) {
                switch (c) {
                case 0:
@@ -2866,6 +2899,9 @@ static int lfs_setstripe_internal(int argc, char **argv,
                        }
                        migration_block = true;
                        break;
+               case 'C':
+                       lsa.lsa_pattern = LLAPI_LAYOUT_OVERSTRIPING;
+                       /* fall through */
                case 'c':
                        lsa.lsa_stripe_count = strtoul(optarg, &end, 0);
                        if (*end != '\0') {
@@ -3021,7 +3057,7 @@ static int lfs_setstripe_internal(int argc, char **argv,
                        migrate_mdt_mode = true;
                        lsa.lsa_nr_tgts = parse_targets(tgts,
                                                sizeof(tgts) / sizeof(__u32),
-                                               lsa.lsa_nr_tgts, optarg);
+                                               lsa.lsa_nr_tgts, optarg, NULL);
                        if (lsa.lsa_nr_tgts < 0) {
                                fprintf(stderr,
                                        "%s %s: invalid MDT target(s) '%s'\n",
@@ -3090,9 +3126,15 @@ static int lfs_setstripe_internal(int argc, char **argv,
                                fprintf(stderr, "warning: '--ost-list' is "
                                        "deprecated, use '--ost' instead\n");
 #endif
+                       /* -o allows overstriping, and must note it because
+                        * parse_targets is shared with MDT striping, which
+                        * does not allow duplicates
+                        */
+                       lsa.lsa_pattern = LLAPI_LAYOUT_OVERSTRIPING;
                        lsa.lsa_nr_tgts = parse_targets(tgts,
                                                sizeof(tgts) / sizeof(__u32),
-                                               lsa.lsa_nr_tgts, optarg);
+                                               lsa.lsa_nr_tgts, optarg,
+                                               &lsa.lsa_pattern);
                        if (lsa.lsa_nr_tgts < 0) {
                                fprintf(stderr,
                                        "%s %s: invalid OST target(s) '%s'\n",
@@ -3395,6 +3437,14 @@ static int lfs_setstripe_internal(int argc, char **argv,
                        param->lsp_stripe_offset = -1;
                else
                        param->lsp_stripe_offset = lsa.lsa_stripe_off;
+               param->lsp_stripe_pattern =
+                               llapi_pattern_to_lov(lsa.lsa_pattern);
+               if (param->lsp_stripe_pattern == EINVAL) {
+                       fprintf(stderr, "error: %s: invalid stripe pattern\n",
+                               argv[0]);
+                       free(param);
+                       goto usage_error;
+               }
                param->lsp_pool = lsa.lsa_pool_name;
                param->lsp_is_specific = false;
                if (lsa.lsa_nr_tgts > 0) {
@@ -3656,6 +3706,8 @@ static int name2layout(__u32 *layout, char *name)
                        *layout |= LOV_PATTERN_RAID0;
                else if (strcmp(layout_name, "mdt") == 0)
                        *layout |= LOV_PATTERN_MDT;
+               else if (strcmp(layout_name, "overstriping") == 0)
+                       *layout |= LOV_PATTERN_OVERSTRIPING;
                else
                        return -1;
        }
@@ -5253,15 +5305,17 @@ static int lfs_setdirstripe(int argc, char **argv)
                                        "%s %s: warning: '--index' deprecated, use '--mdt-index' instead\n",
                                        progname, argv[0]);
 #endif
+                       lsa.lsa_pattern = LLAPI_LAYOUT_OVERSTRIPING;
                        lsa.lsa_nr_tgts = parse_targets(mdts,
                                                sizeof(mdts) / sizeof(__u32),
-                                               lsa.lsa_nr_tgts, optarg);
+                                               lsa.lsa_nr_tgts, optarg, NULL);
                        if (lsa.lsa_nr_tgts < 0) {
                                fprintf(stderr,
                                        "%s %s: invalid MDT target(s) '%s'\n",
                                        progname, argv[0], optarg);
                                return CMD_HELP;
                        }
+                       lsa.lsa_pattern = 0;
 
                        lsa.lsa_tgts = mdts;
                        if (lsa.lsa_stripe_off == LLAPI_LAYOUT_DEFAULT)
index a738211..e58a668 100644 (file)
@@ -2683,12 +2683,15 @@ int sattr_cache_get_defaults(const char *const fsname,
 
 static char *layout2name(__u32 layout_pattern)
 {
-       if (layout_pattern == LOV_PATTERN_MDT)
+       if (layout_pattern & LOV_PATTERN_F_RELEASED)
+               return "released";
+       else if (layout_pattern == LOV_PATTERN_MDT)
                return "mdt";
        else if (layout_pattern == LOV_PATTERN_RAID0)
                return "raid0";
-       else if (layout_pattern == (LOV_PATTERN_RAID0 | LOV_PATTERN_F_RELEASED))
-               return "released";
+       else if (layout_pattern ==
+                       (LOV_PATTERN_RAID0 | LOV_PATTERN_OVERSTRIPING))
+               return "raid0,overstriped";
        else
                return "unknown";
 }
index bb1b5fc..8bdb276 100644 (file)
@@ -545,6 +545,9 @@ struct llapi_layout *llapi_layout_get_by_xattr(void *lov_xattr,
 
                if (v1->lmm_pattern == LOV_PATTERN_RAID0)
                        comp->llc_pattern = LLAPI_LAYOUT_RAID0;
+               else if (v1->lmm_pattern == (LOV_PATTERN_RAID0 |
+                                        LOV_PATTERN_OVERSTRIPING))
+                       comp->llc_pattern = LLAPI_LAYOUT_OVERSTRIPING;
                else
                        /* Lustre only supports RAID0 for now. */
                        comp->llc_pattern = v1->lmm_pattern;
@@ -600,6 +603,30 @@ out_layout:
        goto out;
 }
 
+__u32 llapi_pattern_to_lov(uint64_t pattern)
+{
+       __u32 lov_pattern;
+
+       switch (pattern) {
+       case LLAPI_LAYOUT_DEFAULT:
+               lov_pattern = LOV_PATTERN_RAID0;
+               break;
+       case LLAPI_LAYOUT_RAID0:
+               lov_pattern = LOV_PATTERN_RAID0;
+               break;
+       case LLAPI_LAYOUT_MDT:
+               lov_pattern = LOV_PATTERN_MDT;
+               break;
+       case LLAPI_LAYOUT_OVERSTRIPING:
+               lov_pattern = LOV_PATTERN_OVERSTRIPING | LOV_PATTERN_RAID0;
+               break;
+       default:
+               lov_pattern = EINVAL;
+       }
+
+       return lov_pattern;
+}
+
 /**
  * Convert the data from a llapi_layout to a newly allocated lov_user_md.
  * The caller is responsible for freeing the returned pointer.
@@ -694,12 +721,11 @@ llapi_layout_to_lum(const struct llapi_layout *layout)
                }
 
                blob->lmm_magic = magic;
-               if (pattern == LLAPI_LAYOUT_DEFAULT)
-                       blob->lmm_pattern = LOV_PATTERN_RAID0;
-               else if (pattern == LLAPI_LAYOUT_MDT)
-                       blob->lmm_pattern = LOV_PATTERN_MDT;
-               else
-                       blob->lmm_pattern = pattern;
+               blob->lmm_pattern = llapi_pattern_to_lov(pattern);
+               if (blob->lmm_pattern == EINVAL) {
+                       errno = EINVAL;
+                       goto error;
+               }
 
                if (comp->llc_stripe_size == LLAPI_LAYOUT_DEFAULT)
                        blob->lmm_stripe_size = 0;
@@ -1272,7 +1298,8 @@ int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
                return -1;
 
        if (pattern != LLAPI_LAYOUT_DEFAULT &&
-           pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT) {
+           pattern != LLAPI_LAYOUT_RAID0 && pattern != LLAPI_LAYOUT_MDT
+           && pattern != LLAPI_LAYOUT_OVERSTRIPING) {
                errno = EOPNOTSUPP;
                return -1;
        }
index 349db93..a166338 100644 (file)
@@ -786,7 +786,7 @@ check_lov_mds_md_v3(void)
        CHECK_VALUE_X(LOV_PATTERN_RAID0);
        CHECK_VALUE_X(LOV_PATTERN_RAID1);
        CHECK_VALUE_X(LOV_PATTERN_MDT);
-       CHECK_VALUE_X(LOV_PATTERN_CMOBD);
+       CHECK_VALUE_X(LOV_PATTERN_OVERSTRIPING);
 }
 
 static void
index 6c75192..9597231 100644 (file)
@@ -1736,8 +1736,8 @@ void lustre_assert_wire_constants(void)
                (unsigned)LOV_PATTERN_RAID1);
        LASSERTF(LOV_PATTERN_MDT == 0x00000100UL, "found 0x%.8xUL\n",
                (unsigned)LOV_PATTERN_MDT);
-       LASSERTF(LOV_PATTERN_CMOBD == 0x00000200UL, "found 0x%.8xUL\n",
-               (unsigned)LOV_PATTERN_CMOBD);
+       LASSERTF(LOV_PATTERN_OVERSTRIPING == 0x00000200UL, "found 0x%.8xUL\n",
+               (unsigned)LOV_PATTERN_OVERSTRIPING);
 
        /* Checks for struct lov_comp_md_entry_v1 */
        LASSERTF((int)sizeof(struct lov_comp_md_entry_v1) == 48, "found %lld\n",