Whamcloud - gitweb
LU-4665 utils: lfs setstripe to specify OSTs 83/9383/29
authorJinshan Xiong <jinshan.xiong@intel.com>
Sat, 18 Oct 2014 03:29:33 +0000 (20:29 -0700)
committerOleg Drokin <oleg.drokin@intel.com>
Sat, 1 Nov 2014 04:55:54 +0000 (04:55 +0000)
Extend lfs setstripe command to support specifying individual
OSTs. [--ost|-o] option is invented for this purpose. Support
specifying OSTs for regular files only. Directory support will
be implemented later in a separate project.

OSTs can be spcified by lfs setstripe [--ost|-o] as follows:
  lfs setstripe [[--ost-list|-o] <ost_1>,<ost_i>-<ost_j>,<ost_n>]

For example, -o 1,2,4-6,8 will be expanded as OSTs:
  1,2,4,5,6,8

In addition, duplicate indices will be eliminated automatically.

Calculate the max easize by ld_active_tgt_count instead of
ld_tgt_count. However this may introduce problems when the OSTs
are in recovery because non sufficient buffer may be allocated
to store EA.

This patch adds the following test cases into conf-sanity.sh:
test 81: sparse OST indexing
test 82a: specify OSTs for file (succeed) or directory (fail)
test 82b: specify OSTs for file with --pool and --ost-list options

Signed-off-by: Jian Yu <jian.yu@intel.com>
Signed-off-by: Jinshan Xiong <jinshan.xiong@intel.com>
Signed-off-by: James Simmons <uja.ornl@gmail.com>
Change-Id: I055ea492e909d1073e08d46e00cfb5389e958ad2
Reviewed-on: http://review.whamcloud.com/9383
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: John L. Hammond <john.hammond@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
24 files changed:
lustre/doc/lfs.1
lustre/include/lustre/lustre_idl.h
lustre/include/lustre/lustre_user.h
lustre/include/lustre/lustreapi.h
lustre/llite/file.c
lustre/llite/llite_internal.h
lustre/llite/llite_lib.c
lustre/llite/xattr.c
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lod_object.c
lustre/lod/lod_qos.c
lustre/lov/lov_pack.c
lustre/mgs/mgs_llog.c
lustre/obdclass/llog_osd.c
lustre/osp/osp_dev.c
lustre/osp/osp_sync.c
lustre/ptlrpc/pack_generic.c
lustre/tests/conf-sanity.sh
lustre/tests/sanity.sh
lustre/tests/test-framework.sh
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/mkfs_lustre.c

index b2ffcd2..6c1e5a3 100644 (file)
@@ -32,12 +32,12 @@ lfs \- Lustre utility to create a file with specific striping pattern, find the
 .B lfs getstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v] 
         \fB[--stripe-count|-c ] [--stripe-index|-i] [--mdt-index|-M]
         \fB[--stripe-size|-S] [--directory|-d]
-       \fB[--layout|-L]
+        \fB[--layout|-L]
         \fB[--pool|-p] [--recursive|-r] [--raw|-R] <dirname|filename> ...\fR
 .br
 .B lfs setstripe [--stripe-size|-S stripe_size] [--stripe-count|-c stripe_count]
-        \fB[--stripe-index|-i start_ost_index ] [--pool|-p <poolname>]
-        \fB<directory|filename>\fR
+        \fB[--stripe-index|-i start_ost_index] [--pool|-p <poolname>]
+        \fB[--ost-list|-o <ost_indices>] <directory|filename>\fR
 .br
 .B lfs setstripe -d <dir>
 .br
@@ -134,7 +134,7 @@ to that filesystem are displayed.
 .B getstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v] 
         \fB[--count | -c ] [--index | -i | --offset | -o  ]
         \fB[--pool | -p ] [--size | -s ] [--directory | -d ]
-       \fB[--layout | -L]
+        \fB[--layout | -L]
         \fB[--recursive | -r ] [--raw | -R ] <dirname|filename>\fR
 .br
 List the striping information for a given filename or directory tree.
@@ -176,9 +176,10 @@ You can limit the returned files to those with objects on a specific OST with
 .TP
 .B setstripe [--stripe-count|-c stripe_count] [--stripe-size|-S stripe_size]
         \fB[--stripe-index|-i start_ost_index] [--pool <poolname>]
-        \fB<dirname|filename>\fR
+        \fB[--ost-index|-o <ost_indices>] <dirname|filename>\fR
 .br
-To create a new file, or set the directory default, with the specified striping parameters.  The
+To create a new file, or set the directory default, with the specified striping
+parameters.  The
 .I stripe_count
 is the number of OSTs to stripe a file over. A
 .I stripe_count
@@ -192,18 +193,48 @@ of 0 means to use the filesystem-wide default stripe_size (default 1MB).  The
 .I start_ost_index
 is the OST index (starting at 0) on which to start striping for this file.  A
 .I start_ost_index
-of -1 allows the MDS to choose the starting index and it is strongly recommended, as this allows space and load balancing to be done by the MDS as needed.  The
+of -1 allows the MDS to choose the starting index and it is strongly
+recommended, as this allows space and load balancing to be done by the MDS as
+needed. The
+.B -o
+option is used to specify the exact stripe layout on the file system.
+.I ost_indices
+is a list of OSTs referenced by their indices, which are specified in decimal
+or hex form and can be obtained using the
+.B lfs osts
+command. The list format consists of individual OST indices and index ranges
+separated by commas, e.g. 1,2-4,7. The
+.B -o
+option may be specified multiple times to stripe across the union of all listed
+OSTs. If the
+.B -c
+option is combined with
+.B -o
+the
+.I stripe_count
+must agree with the number of OSTs in
+.IR ost_indices .
+If the
+.B -i
+option is combined with
+.B -o
+the
+.I start_ost_index
+must be in the OST list, and it will be used as the index on which to start
+striping the file. Otherwise the striping will occur in the order specified in
+.IR ost_indices .
+The
 .I poolname
-is the name of a predefined pool of OSTs (see 
-.B lctl
-) that will be used for striping. The 
+is the name of a predefined pool of OSTs (see
+.BR lctl (8))
+that will be used for striping. The
 .IR stripe_count ,
 .IR stripe_size ,
 and
 .I start_ost_index
-will be used as well; the 
+will be used as well; the
 .I start_ost_index
-must be part of the pool or an error will be returned. 
+must be part of the pool or an error will be returned.
 .TP
 .B setstripe -d
 Delete the default striping on the specified directory.
index 90ab5eb..dfc7933 100644 (file)
@@ -1579,6 +1579,8 @@ enum obdo_flags {
 #define LOV_MAGIC_JOIN_V1      (0x0BD20000 | LOV_MAGIC_MAGIC)
 #define LOV_MAGIC_V3           (0x0BD30000 | LOV_MAGIC_MAGIC)
 #define LOV_MAGIC_MIGRATE      (0x0BD40000 | LOV_MAGIC_MAGIC)
+/* reserved for specifying OSTs */
+#define LOV_MAGIC_SPECIFIC     (0x0BD50000 | LOV_MAGIC_MAGIC)
 #define LOV_MAGIC              LOV_MAGIC_V1
 
 /*
@@ -3680,6 +3682,8 @@ extern void lustre_swab_lov_user_md_v3(struct lov_user_md_v3 *lum);
 extern void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
                                             int stripe_count);
 extern void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
+void lustre_print_user_md(unsigned int level, struct lov_user_md *lum,
+                         const char *msg);
 
 /* llog_swab.c */
 extern void lustre_swab_llogd_body (struct llogd_body *d);
index 9f4063f..6beed4e 100644 (file)
@@ -299,10 +299,12 @@ enum ll_lease_type {
 #define LL_FILE_LOCKLESS_IO     0x00000010 /* server-side locks with cio */
 #define LL_FILE_RMTACL          0x00000020
 
-#define LOV_USER_MAGIC_V1 0x0BD10BD0
-#define LOV_USER_MAGIC    LOV_USER_MAGIC_V1
-#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
-#define LOV_USER_MAGIC_V3 0x0BD30BD0
+#define LOV_USER_MAGIC_V1      0x0BD10BD0
+#define LOV_USER_MAGIC         LOV_USER_MAGIC_V1
+#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
+#define LOV_USER_MAGIC_V3      0x0BD30BD0
+/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
+#define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0     /* for specific OSTs */
 
 #define LMV_USER_MAGIC    0x0CD30CD0    /*default lmv magic*/
 
@@ -378,11 +380,10 @@ struct lov_user_md_v3 {           /* LOV EA user data (host-endian) */
 
 static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
 {
-       if (lmm_magic == LOV_USER_MAGIC_V3)
-               return sizeof(struct lov_user_md_v3) +
-                               stripes * sizeof(struct lov_user_ost_data_v1);
-       else
+       if (lmm_magic == LOV_USER_MAGIC_V1)
                return sizeof(struct lov_user_md_v1) +
+                             stripes * sizeof(struct lov_user_ost_data_v1);
+       return sizeof(struct lov_user_md_v3) +
                                stripes * sizeof(struct lov_user_ost_data_v1);
 }
 
index b30f9e2..52b352d 100644 (file)
@@ -92,6 +92,19 @@ void llapi_error(enum llapi_message_level level, int err, const char *fmt, ...)
 void llapi_printf(enum llapi_message_level level, const char *fmt, ...)
        __attribute__((__format__(__printf__, 2, 3)));
 
+struct llapi_stripe_param {
+       unsigned long long      lsp_stripe_size;
+       char                    *lsp_pool;
+       int                     lsp_stripe_offset;
+       int                     lsp_stripe_pattern;
+       /* Number of stripes. Size of lsp_osts[] if lsp_specific is true.*/
+       int                     lsp_stripe_count;
+       bool                    lsp_is_specific;
+       __u32                   lsp_osts[0];
+};
+
+extern int llapi_file_open_param(const char *name, int flags, mode_t mode,
+                                const struct llapi_stripe_param *param);
 extern int llapi_file_create(const char *name, unsigned long long stripe_size,
                              int stripe_offset, int stripe_count,
                              int stripe_pattern);
index 72a8caa..59a828f 100644 (file)
@@ -1670,40 +1670,32 @@ static int ll_lov_setea(struct inode *inode, struct file *file,
 static int ll_lov_setstripe(struct inode *inode, struct file *file,
                            unsigned long arg)
 {
-       struct lov_user_md_v3    lumv3;
-       struct lov_user_md_v1   *lumv1 = (struct lov_user_md_v1 *)&lumv3;
-       struct lov_user_md_v1 __user *lumv1p =
-               (struct lov_user_md_v1 __user *)arg;
-       struct lov_user_md_v3 __user *lumv3p =
-               (struct lov_user_md_v3 __user *)arg;
-       int                      lum_size, rc;
-       __u64                    flags = FMODE_WRITE;
+       struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
+       struct lov_user_md        *klum;
+       int                        lum_size, rc;
+       __u64                      flags = FMODE_WRITE;
        ENTRY;
 
-       /* first try with v1 which is smaller than v3 */
-       lum_size = sizeof(struct lov_user_md_v1);
-       if (copy_from_user(lumv1, lumv1p, lum_size))
-               RETURN(-EFAULT);
-
-       if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
-               lum_size = sizeof(struct lov_user_md_v3);
-               if (copy_from_user(&lumv3, lumv3p, lum_size))
-                       RETURN(-EFAULT);
-       }
+       rc = ll_copy_user_md(lum, &klum);
+       if (rc < 0)
+               RETURN(rc);
 
-       rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
+       lum_size = rc;
+       rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
        if (rc == 0) {
                struct lov_stripe_md *lsm;
                __u32 gen;
 
-               put_user(0, &lumv1p->lmm_stripe_count);
+               put_user(0, &lum->lmm_stripe_count);
 
                ll_layout_refresh(inode, &gen);
                lsm = ccc_inode_lsm_get(inode);
                rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
-                                  0, lsm, (void __user *)arg);
+                                  0, lsm, lum);
                ccc_inode_lsm_put(inode, lsm);
        }
+
+       OBD_FREE(klum, lum_size);
        RETURN(rc);
 }
 
index 4f9b890..fbc51f4 100644 (file)
@@ -966,6 +966,27 @@ void ll_finish_md_op_data(struct md_op_data *op_data);
 int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
 char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
 void ll_compute_rootsquash_state(struct ll_sb_info *sbi);
+ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
+                       struct lov_user_md **kbuf);
+
+/* Compute expected user md size when passing in a md from user space */
+static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
+{
+       switch (lum->lmm_magic) {
+       case LOV_USER_MAGIC_V1:
+               return sizeof(struct lov_user_md_v1);
+       case LOV_USER_MAGIC_V3:
+               return sizeof(struct lov_user_md_v3);
+       case LOV_USER_MAGIC_SPECIFIC:
+               if (lum->lmm_stripe_count > LOV_MAX_STRIPE_COUNT)
+                       return -EINVAL;
+
+               return lov_user_md_size(lum->lmm_stripe_count,
+                                       LOV_USER_MAGIC_SPECIFIC);
+       }
+
+       return -EINVAL;
+}
 
 /* llite/llite_nfs.c */
 extern struct export_operations lustre_export_operations;
index 17e388f..bfc14a9 100644 (file)
@@ -2742,6 +2742,32 @@ void ll_dirty_page_discard_warn(struct page *page, int ioret)
                free_page((unsigned long)buf);
 }
 
+ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
+                       struct lov_user_md **kbuf)
+{
+       struct lov_user_md      lum;
+       ssize_t                 lum_size;
+       ENTRY;
+
+       if (copy_from_user(&lum, md, sizeof(lum)))
+               RETURN(-EFAULT);
+
+       lum_size = ll_lov_user_md_size(&lum);
+       if (lum_size < 0)
+               RETURN(lum_size);
+
+       OBD_ALLOC(*kbuf, lum_size);
+       if (*kbuf == NULL)
+               RETURN(-ENOMEM);
+
+       if (copy_from_user(*kbuf, md, lum_size) != 0) {
+               OBD_FREE(*kbuf, lum_size);
+               RETURN(-EFAULT);
+       }
+
+       RETURN(lum_size);
+}
+
 /*
  * Compute llite root squash state after a change of root squash
  * configuration setting or add/remove of a lnet nid
index f0d48f4..95c2fe2 100644 (file)
@@ -234,8 +234,8 @@ int ll_setxattr(struct dentry *dentry, const char *name,
             (strncmp(name, XATTR_LUSTRE_PREFIX,
                      sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
              strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
-                struct lov_user_md *lump = (struct lov_user_md *)value;
-                int rc = 0;
+               struct lov_user_md *lump = (struct lov_user_md *)value;
+               int                 rc = 0;
 
                 /* Attributes that are saved via getxattr will always have
                  * the stripe_offset as 0.  Instead, the MDS should be
@@ -246,14 +246,17 @@ int ll_setxattr(struct dentry *dentry, const char *name,
                if (lump != NULL && S_ISREG(inode->i_mode)) {
                        struct file     f;
                        __u64           it_flags = FMODE_WRITE;
-                       int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
-                               sizeof(*lump) : sizeof(struct lov_user_md_v3);
+                       int             lum_size;
+
+                       lum_size = ll_lov_user_md_size(lump);
+                       if (lum_size < 0 || size < lum_size)
+                               return 0; /* b=10667: ignore error */
 
                        memset(&f, 0, sizeof(f)); /* f.f_flags is used below */
                        f.f_dentry = dentry;
                        rc = ll_lov_setstripe_ea_info(inode, &f, it_flags, lump,
                                                      lum_size);
-                       /* b10667: rc always be 0 here for now */
+                       /* b=10667: rc always be 0 here for now */
                        rc = 0;
                 } else if (S_ISDIR(inode->i_mode)) {
                         rc = ll_dir_setstripe(inode, lump, 0);
index b1f828c..36dfee2 100644 (file)
@@ -52,6 +52,8 @@
 #define LMVEA_DELETE_VALUES(count, offset)                             \
        ((count) == 0 && (offset) == (typeof(offset))(-1))
 
+#define LOV_OFFSET_DEFAULT             ((__u16)-1)
+
 struct lod_qos_rr {
        __u32                    lqr_start_idx; /* start index of new inode */
        __u32                    lqr_offset_idx; /* aliasing for start_idx */
index a65eec6..6b352e8 100644 (file)
@@ -1160,9 +1160,8 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                GOTO(out, rc = -EINVAL);
        }
 
-       /* an offset of -1 is treated as a "special" valid offset */
        stripe_offset = le16_to_cpu(lum->lmm_stripe_offset);
-       if (stripe_offset != (typeof(stripe_offset))-1) {
+       if (stripe_offset != LOV_OFFSET_DEFAULT) {
                /* if offset is not within valid range [0, osts_size) */
                if (stripe_offset >= d->lod_osts_size) {
                        CDEBUG(D_IOCTL, "stripe offset %u >= bitmap size %u\n",
@@ -1212,7 +1211,7 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
        if (pool == NULL)
                goto out;
 
-       if (stripe_offset != (typeof(stripe_offset))-1) {
+       if (stripe_offset != LOV_OFFSET_DEFAULT) {
                rc = lod_check_index_in_pool(stripe_offset, pool);
                if (rc < 0)
                        GOTO(out, rc = -EINVAL);
index 1a393c0..9417ba3 100644 (file)
@@ -3192,7 +3192,7 @@ static void lod_ah_init(const struct lu_env *env,
        if (likely(parent)) {
                lod_cache_parent_striping(env, lp, child_mode);
 
-               lc->ldo_def_stripe_offset = (__u16) -1;
+               lc->ldo_def_stripe_offset = LOV_OFFSET_DEFAULT;
 
                if (lp->ldo_def_striping_set) {
                        if (lp->ldo_pool)
index 1ac8b28..065ade0 100644 (file)
@@ -1022,6 +1022,107 @@ out:
 }
 
 /**
+ * Allocate a specific striping layout on a user defined set of OSTs.
+ *
+ * Allocates new striping using the OST index range provided by the data from
+ * the lmm_obejcts contained in the lov_user_md passed to this method. Full
+ * OSTs are not considered. The exact order of OSTs requested by the user
+ * is respected as much as possible depending on OST status. The number of
+ * stripes needed and stripe offset are taken from the object. If that number
+ * can not be met, then the function returns a failure and then it's the
+ * caller's responsibility to release the stripes allocated. All the internal
+ * structures are protected, but no concurrent allocation is allowed on the
+ * same objects.
+ *
+ * \param[in] env      execution environment for this thread
+ * \param[in] lo       LOD object
+ * \param[out] stripe  striping created
+ * \param[in] lum      stripe md to specify list of OSTs
+ * \param[in] th       transaction handle
+ *
+ * \retval 0           on success
+ * \retval -ENODEV     OST index does not exist on file system
+ * \retval -EINVAL     requested OST index is invalid
+ * \retval negative    negated errno on error
+ */
+static int lod_alloc_ost_list(const struct lu_env *env,
+                             struct lod_object *lo, struct dt_object **stripe,
+                             struct lov_user_md *lum, struct thandle *th)
+{
+       struct lod_device       *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+       struct obd_statfs       *sfs = &lod_env_info(env)->lti_osfs;
+       struct dt_object        *o;
+       struct lov_user_md_v3   *v3;
+       unsigned int            array_idx = 0;
+       int                     stripe_count = 0;
+       int                     i;
+       int                     rc;
+       ENTRY;
+
+       /* for specific OSTs layout */
+       LASSERT(lum != NULL && lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC);
+       lustre_print_user_md(D_OTHER, lum, __func__);
+
+       rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr);
+       if (rc < 0)
+               RETURN(rc);
+
+       v3 = (struct lov_user_md_v3 *)lum;
+       for (i = 0; i < lo->ldo_stripenr; i++) {
+               if (v3->lmm_objects[i].l_ost_idx == lo->ldo_def_stripe_offset) {
+                       array_idx = i;
+                       break;
+               }
+       }
+       if (i == lo->ldo_stripenr) {
+               CDEBUG(D_OTHER,
+                      "%s: start index %d not in the specified list of OSTs\n",
+                      lod2obd(m)->obd_name, lo->ldo_def_stripe_offset);
+               RETURN(-EINVAL);
+       }
+
+       for (i = 0; i < lo->ldo_stripenr;
+            i++, array_idx = (array_idx + 1) % lo->ldo_stripenr) {
+               __u32 ost_idx = v3->lmm_objects[array_idx].l_ost_idx;
+
+               if (!cfs_bitmap_check(m->lod_ost_bitmap, ost_idx)) {
+                       rc = -ENODEV;
+                       break;
+               }
+
+               /*
+                * do not put >1 objects on a single OST
+                */
+               if (lod_qos_is_ost_used(env, ost_idx, stripe_count)) {
+                       rc = -EINVAL;
+                       break;
+               }
+
+               rc = lod_statfs_and_check(env, m, ost_idx, sfs);
+               if (rc < 0) /* this OSP doesn't feel well */
+                       break;
+
+               o = lod_qos_declare_object_on(env, m, ost_idx, th);
+               if (IS_ERR(o)) {
+                       rc = PTR_ERR(o);
+                       CDEBUG(D_OTHER,
+                              "%s: can't declare new object on #%u: %d\n",
+                              lod2obd(m)->obd_name, ost_idx, rc);
+                       break;
+               }
+
+               /*
+                * We've successfuly declared (reserved) an object
+                */
+               lod_qos_ost_in_use(env, stripe_count, ost_idx);
+               stripe[stripe_count] = o;
+               stripe_count++;
+       }
+
+       RETURN(rc);
+}
+
+/**
  * Allocate a striping on a predefined set of OSTs.
  *
  * Allocates new striping starting from OST provided lo->ldo_def_stripe_offset.
@@ -1562,97 +1663,122 @@ static int lod_qos_parse_config(const struct lu_env *env,
        struct lod_device     *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
        struct lov_user_md_v1 *v1 = NULL;
        struct lov_user_md_v3 *v3 = NULL;
-       struct pool_desc      *pool;
+       char                  *pool_name = NULL;
        __u32                  magic;
        int                    rc;
+       unsigned int           size;
        ENTRY;
 
        if (buf == NULL || buf->lb_buf == NULL || buf->lb_len == 0)
                RETURN(0);
 
+       v3 = buf->lb_buf;
        v1 = buf->lb_buf;
        magic = v1->lmm_magic;
 
-       if (magic == __swab32(LOV_USER_MAGIC_V1)) {
+       if (unlikely(magic == LOV_MAGIC_V1_DEF || magic == LOV_MAGIC_V3_DEF)) {
+               /* try to use as fully defined striping */
+               rc = lod_use_defined_striping(env, lo, buf);
+               RETURN(rc);
+       }
+
+       switch (magic) {
+       case __swab32(LOV_USER_MAGIC_V1):
                lustre_swab_lov_user_md_v1(v1);
                magic = v1->lmm_magic;
-       } else if (magic == __swab32(LOV_USER_MAGIC_V3)) {
-               v3 = buf->lb_buf;
+               /* fall through */
+       case LOV_USER_MAGIC_V1:
+               size = sizeof(*v1);
+               break;
+
+       case __swab32(LOV_USER_MAGIC_V3):
                lustre_swab_lov_user_md_v3(v3);
                magic = v3->lmm_magic;
-       }
+               /* fall through */
+       case LOV_USER_MAGIC_V3:
+               size = sizeof(*v3);
+               pool_name = v3->lmm_pool_name;
+               break;
 
-       if (unlikely(magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)) {
-               /* try to use as fully defined striping */
-               rc = lod_use_defined_striping(env, lo, buf);
-               RETURN(rc);
+       case __swab32(LOV_USER_MAGIC_SPECIFIC):
+               lustre_swab_lov_user_md_v3(v3);
+               lustre_swab_lov_user_md_objects(v3->lmm_objects,
+                                               v3->lmm_stripe_count);
+               magic = v3->lmm_magic;
+               /* fall through */
+       case LOV_USER_MAGIC_SPECIFIC:
+               if (v3->lmm_stripe_offset == LOV_OFFSET_DEFAULT)
+                       v3->lmm_stripe_offset = v3->lmm_objects[0].l_ost_idx;
+               if (v3->lmm_pool_name[0] != '\0')
+                       pool_name = v3->lmm_pool_name;
+               size = lov_user_md_size(v3->lmm_stripe_count,
+                                       LOV_USER_MAGIC_SPECIFIC);
+               break;
+
+       default:
+               CERROR("%s: unrecognized magic %X\n",
+                      lod2obd(d)->obd_name, magic);
+               RETURN(-EINVAL);
        }
 
-       if (unlikely(buf->lb_len < sizeof(*v1))) {
-               CERROR("wrong size: %u\n", (unsigned) buf->lb_len);
+       if (unlikely(buf->lb_len < size)) {
+               CERROR("%s: wrong size: %zd, expect: %u\n",
+                      lod2obd(d)->obd_name, buf->lb_len, size);
                RETURN(-EINVAL);
        }
 
+       lustre_print_user_md(D_OTHER, v1, "parse config");
+
        v1->lmm_magic = magic;
        if (v1->lmm_pattern == 0)
                v1->lmm_pattern = LOV_PATTERN_RAID0;
        if (lov_pattern(v1->lmm_pattern) != LOV_PATTERN_RAID0) {
-               CERROR("invalid pattern: %x\n", v1->lmm_pattern);
+               CERROR("%s: invalid pattern: %x\n",
+                      lod2obd(d)->obd_name, v1->lmm_pattern);
                RETURN(-EINVAL);
        }
        lo->ldo_pattern = v1->lmm_pattern;
 
-       if (v1->lmm_stripe_size)
+       if (v1->lmm_stripe_size > 0)
                lo->ldo_stripe_size = v1->lmm_stripe_size;
+
        if (lo->ldo_stripe_size & (LOV_MIN_STRIPE_SIZE - 1))
                lo->ldo_stripe_size = LOV_MIN_STRIPE_SIZE;
 
-       if (v1->lmm_stripe_count)
+       if (v1->lmm_stripe_count > 0)
                lo->ldo_stripenr = v1->lmm_stripe_count;
 
-       if ((v1->lmm_stripe_offset >= d->lod_desc.ld_tgt_count) &&
-           (v1->lmm_stripe_offset != (typeof(v1->lmm_stripe_offset))(-1))) {
-               CERROR("invalid offset: %x\n", v1->lmm_stripe_offset);
-               RETURN(-EINVAL);
-       }
        lo->ldo_def_stripe_offset = v1->lmm_stripe_offset;
 
-       CDEBUG(D_OTHER, "lsm: %u size, %u stripes, %u offset\n",
-              v1->lmm_stripe_size, v1->lmm_stripe_count,
-              v1->lmm_stripe_offset);
-
-       if (v1->lmm_magic == LOV_MAGIC_V3) {
-               if (buf->lb_len < sizeof(*v3)) {
-                       CERROR("wrong size: %u\n", (unsigned) buf->lb_len);
-                       RETURN(-EINVAL);
-               }
-
-               v3 = buf->lb_buf;
-               lod_object_set_pool(lo, v3->lmm_pool_name);
+       lod_object_set_pool(lo, NULL);
+       if (pool_name != NULL) {
+               struct pool_desc *pool;
 
                /* In the function below, .hs_keycmp resolves to
                 * pool_hashkey_keycmp() */
                /* coverity[overrun-buffer-val] */
-               pool = lod_find_pool(d, v3->lmm_pool_name);
+               pool = lod_find_pool(d, pool_name);
                if (pool != NULL) {
-                       if (lo->ldo_def_stripe_offset !=
-                           (typeof(v1->lmm_stripe_offset))(-1)) {
-                               rc = lo->ldo_def_stripe_offset;
-                               rc = lod_check_index_in_pool(rc, pool);
+                       if (lo->ldo_def_stripe_offset != LOV_OFFSET_DEFAULT) {
+                               rc = lod_check_index_in_pool(
+                                              lo->ldo_def_stripe_offset, pool);
                                if (rc < 0) {
                                        lod_pool_putref(pool);
-                                       CERROR("invalid offset\n");
+                                       CERROR("%s: invalid offset, %u\n",
+                                              lod2obd(d)->obd_name,
+                                              lo->ldo_def_stripe_offset);
                                        RETURN(-EINVAL);
                                }
                        }
 
                        if (lo->ldo_stripenr > pool_tgt_count(pool))
-                               lo->ldo_stripenr= pool_tgt_count(pool);
+                               lo->ldo_stripenr = pool_tgt_count(pool);
 
                        lod_pool_putref(pool);
                }
-       } else
-               lod_object_set_pool(lo, NULL);
+
+               lod_object_set_pool(lo, pool_name);
+       }
 
        /* fixup for released file */
        if (lo->ldo_pattern & LOV_PATTERN_F_RELEASED) {
@@ -1718,6 +1844,8 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                GOTO(out, rc = 0);
 
        if (likely(lo->ldo_stripe == NULL)) {
+               struct lov_user_md *lum = NULL;
+
                /*
                 * no striping has been created so far
                 */
@@ -1728,7 +1856,7 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                 */
                lod_qos_statfs_update(env, d);
                lo->ldo_stripenr = lod_get_stripecnt(d, LOV_MAGIC,
-                               lo->ldo_stripenr);
+                                                    lo->ldo_stripenr);
 
                stripe_len = lo->ldo_stripenr;
                OBD_ALLOC(stripe, sizeof(stripe[0]) * stripe_len);
@@ -1739,7 +1867,13 @@ int lod_qos_prep_create(const struct lu_env *env, struct lod_object *lo,
                /* XXX: support for non-0 files w/o objects */
                CDEBUG(D_OTHER, "tgt_count %d stripenr %d\n",
                                d->lod_desc.ld_tgt_count, stripe_len);
-               if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) {
+
+               if (buf != NULL && buf->lb_buf != NULL)
+                       lum = buf->lb_buf;
+
+               if (lum != NULL && lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
+                       rc = lod_alloc_ost_list(env, lo, stripe, lum, th);
+               } else if (lo->ldo_def_stripe_offset == LOV_OFFSET_DEFAULT) {
                        rc = lod_alloc_qos(env, lo, stripe, flag, th);
                        if (rc == -EAGAIN)
                                rc = lod_alloc_rr(env, lo, stripe, flag, th);
index 0ede0a3..3c6f7c9 100644 (file)
@@ -172,17 +172,11 @@ int lov_packmd(struct obd_export *exp, struct lov_mds_md **lmmp,
                } else {
                        stripe_count = 0;
                }
-        } else {
-                /* No need to allocate more than maximum supported stripes.
-                 * Anyway, this is pretty inaccurate since ld_tgt_count now
-                 * represents max index and we should rely on the actual number
-                 * of OSTs instead */
-               stripe_count = lov_mds_md_max_stripe_count(
-                       lov->lov_ocd.ocd_max_easize, lmm_magic);
-
-                if (stripe_count > lov->desc.ld_tgt_count)
-                        stripe_count = lov->desc.ld_tgt_count;
-        }
+       } else {
+               /* To calculate maximum easize by active targets at present,
+                * which is exactly the maximum easize to be seen by LOV */
+               stripe_count = lov->desc.ld_active_tgt_count;
+       }
 
         /* XXX LOV STACKING call into osc for sizes */
         lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
@@ -434,9 +428,11 @@ int lov_getstripe(struct obd_export *exp, struct lov_stripe_md *lsm,
         lum_size = sizeof(struct lov_user_md_v1);
        if (copy_from_user(&lum, lump, lum_size))
                 GOTO(out_set, rc = -EFAULT);
-        else if ((lum.lmm_magic != LOV_USER_MAGIC) &&
-                 (lum.lmm_magic != LOV_USER_MAGIC_V3))
-                GOTO(out_set, rc = -EINVAL);
+
+       if (lum.lmm_magic != LOV_USER_MAGIC_V1 &&
+           lum.lmm_magic != LOV_USER_MAGIC_V3 &&
+           lum.lmm_magic != LOV_USER_MAGIC_SPECIFIC)
+               GOTO(out_set, rc = -EINVAL);
 
         if (lum.lmm_stripe_count &&
             (lum.lmm_stripe_count < lsm->lsm_stripe_count)) {
index 1fbc87d..64be40c 100644 (file)
@@ -559,13 +559,14 @@ static int mgs_set_index(const struct lu_env *env,
                         fsdb->fsdb_mdt_count ++;
         }
 
-        if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) {
-                LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %d, "
-                                   "but the max index is %d.\n",
-                                   mti->mti_svname, mti->mti_stripe_index,
-                                   INDEX_MAP_SIZE * 8);
+       /* the last index(0xffff) is reserved for default value. */
+       if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8 - 1) {
+               LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %u, "
+                                  "but index must be less than %u.\n",
+                                  mti->mti_svname, mti->mti_stripe_index,
+                                  INDEX_MAP_SIZE * 8 - 1);
                GOTO(out_up, rc = -ERANGE);
-        }
+       }
 
        if (test_bit(mti->mti_stripe_index, imap)) {
                 if ((mti->mti_flags & LDD_F_VIRGIN) &&
index e74f552..4934d08 100644 (file)
@@ -715,7 +715,7 @@ static int llog_osd_next_block(const struct lu_env *env,
                                                sizeof(struct llog_rec_tail));
                /* get the last record in block */
                last_rec = (struct llog_rec_hdr *)((char *)buf + rc -
-                                                  le32_to_cpu(tail->lrt_len));
+                                                  tail->lrt_len);
 
                if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec))
                        lustre_swab_llog_rec(last_rec);
@@ -1503,7 +1503,9 @@ out_trans:
        lgi->lgi_buf.lb_buf = idarray;
        lgi->lgi_buf.lb_len = size;
        rc = dt_record_read(env, o, &lgi->lgi_buf, &lgi->lgi_off);
-       if (rc) {
+       /* -EFAULT means the llog is a sparse file. This is not an error
+        * after arbitrary OST index is supported. */
+       if (rc < 0 && rc != -EFAULT) {
                CERROR("%s: error reading CATALOGS: rc = %d\n",
                       o->do_lu.lo_dev->ld_obd->obd_name,  rc);
                GOTO(out, rc);
index 5208b9f..41c7513 100644 (file)
@@ -249,7 +249,7 @@ static int osp_init_last_objid(const struct lu_env *env, struct osp_device *osp)
        struct osp_thread_info  *osi = osp_env_info(env);
        struct lu_fid           *fid = &osp->opd_last_used_fid;
        struct dt_object        *dto;
-       int                     rc;
+       int                     rc = -EFAULT;
        ENTRY;
 
        dto = osp_find_or_create_local_file(env, osp, &osi->osi_attr,
@@ -263,9 +263,11 @@ static int osp_init_last_objid(const struct lu_env *env, struct osp_device *osp)
                osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_oid,
                                   osp->opd_index);
                rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
-               if (rc != 0)
+               if (rc != 0 && rc != -EFAULT)
                        GOTO(out, rc);
-       } else {
+       }
+
+       if (rc == -EFAULT) { /* fresh LAST_ID */
                fid->f_oid = 0;
                osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_oid,
                                   osp->opd_index);
@@ -304,7 +306,7 @@ static int osp_init_last_seq(const struct lu_env *env, struct osp_device *osp)
        struct osp_thread_info  *osi = osp_env_info(env);
        struct lu_fid           *fid = &osp->opd_last_used_fid;
        struct dt_object        *dto;
-       int                     rc;
+       int                     rc = -EFAULT;
        ENTRY;
 
        dto = osp_find_or_create_local_file(env, osp, &osi->osi_attr,
@@ -318,14 +320,18 @@ static int osp_init_last_seq(const struct lu_env *env, struct osp_device *osp)
                osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
                                   osp->opd_index);
                rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
-               if (rc != 0)
+               if (rc != 0 && rc != -EFAULT)
                        GOTO(out, rc);
-       } else {
+       }
+
+       if (rc == -EFAULT) { /* fresh OSP */
                fid->f_seq = 0;
                osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
                                    osp->opd_index);
                rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
                                          osi->osi_off);
+               if (rc != 0)
+                       GOTO(out, rc);
        }
        osp->opd_last_used_seq_file = dto;
        RETURN(0);
index 4d266fc..083a751 100644 (file)
@@ -1301,10 +1301,19 @@ static int osp_sync_llog_init(const struct lu_env *env, struct osp_device *d)
 
        rc = llog_osd_get_cat_list(env, d->opd_storage, d->opd_index, 1,
                                   &osi->osi_cid, fid);
-       if (rc) {
-               CERROR("%s: can't get id from catalogs: rc = %d\n",
-                      obd->obd_name, rc);
-               RETURN(rc);
+       if (rc < 0) {
+               if (rc != -EFAULT) {
+                       CERROR("%s: can't get id from catalogs: rc = %d\n",
+                              obd->obd_name, rc);
+                       RETURN(rc);
+               }
+
+               /* After sparse OST indices is supported, the CATALOG file
+                * may become a sparse file that results in failure on
+                * reading. Skip this error as the llog will be created
+                * later */
+               memset(&osi->osi_cid, 0, sizeof(osi->osi_cid));
+               rc = 0;
        }
 
        CDEBUG(D_INFO, "%s: Init llog for %d - catid "DOSTID":%x\n",
index 6ce2fe7..5a817bb 100644 (file)
@@ -2149,18 +2149,38 @@ void lustre_swab_lmv_user_md(struct lmv_user_md *lum)
 }
 EXPORT_SYMBOL(lustre_swab_lmv_user_md);
 
-static void print_lum (struct lov_user_md *lum)
+void lustre_print_user_md(unsigned int lvl, struct lov_user_md *lum,
+                         const char *msg)
 {
-       CDEBUG(D_OTHER, "lov_user_md %p:\n", lum);
-       CDEBUG(D_OTHER, "\tlmm_magic: %#x\n", lum->lmm_magic);
-       CDEBUG(D_OTHER, "\tlmm_pattern: %#x\n", lum->lmm_pattern);
-       CDEBUG(D_OTHER, "\tlmm_object_id: "LPU64"\n", lmm_oi_id(&lum->lmm_oi));
-       CDEBUG(D_OTHER, "\tlmm_object_gr: "LPU64"\n", lmm_oi_seq(&lum->lmm_oi));
-       CDEBUG(D_OTHER, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size);
-       CDEBUG(D_OTHER, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count);
-       CDEBUG(D_OTHER, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n",
-                       lum->lmm_stripe_offset);
+       if (likely(!cfs_cdebug_show(lvl, DEBUG_SUBSYSTEM)))
+               return;
+
+       CDEBUG(lvl, "%s lov_user_md %p:\n", msg, lum);
+       CDEBUG(lvl, "\tlmm_magic: %#x\n", lum->lmm_magic);
+       CDEBUG(lvl, "\tlmm_pattern: %#x\n", lum->lmm_pattern);
+       CDEBUG(lvl, "\tlmm_object_id: "LPU64"\n", lmm_oi_id(&lum->lmm_oi));
+       CDEBUG(lvl, "\tlmm_object_gr: "LPU64"\n", lmm_oi_seq(&lum->lmm_oi));
+       CDEBUG(lvl, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size);
+       CDEBUG(lvl, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count);
+       CDEBUG(lvl, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n",
+              lum->lmm_stripe_offset);
+       if (lum->lmm_magic == LOV_USER_MAGIC_V3) {
+               struct lov_user_md_v3 *v3 = (void *)lum;
+               CDEBUG(lvl, "\tlmm_pool_name: %s\n", v3->lmm_pool_name);
+       }
+       if (lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
+               struct lov_user_md_v3 *v3 = (void *)lum;
+               int i;
+
+               if (v3->lmm_pool_name[0] != '\0')
+                       CDEBUG(lvl, "\tlmm_pool_name: %s\n", v3->lmm_pool_name);
+
+               CDEBUG(lvl, "\ttarget list:\n");
+               for (i = 0; i < v3->lmm_stripe_count; i++)
+                       CDEBUG(lvl, "\t\t%u\n", v3->lmm_objects[i].l_ost_idx);
+       }
 }
+EXPORT_SYMBOL(lustre_print_user_md);
 
 static void lustre_swab_lmm_oi(struct ost_id *oi)
 {
@@ -2177,7 +2197,6 @@ static void lustre_swab_lov_user_md_common(struct lov_user_md_v1 *lum)
        __swab32s(&lum->lmm_stripe_size);
        __swab16s(&lum->lmm_stripe_count);
        __swab16s(&lum->lmm_stripe_offset);
-       print_lum(lum);
        EXIT;
 }
 
index 1cd8c21..e0e8337 100644 (file)
@@ -4857,6 +4857,309 @@ test_80() {
 }
 run_test 80 "mgc import reconnect race"
 
+# Save the original values of $OSTCOUNT and $OSTINDEX$i.
+save_ostindex() {
+       local new_ostcount=$1
+       saved_ostcount=$OSTCOUNT
+       OSTCOUNT=$new_ostcount
+
+       local i
+       local index
+       for i in $(seq $OSTCOUNT); do
+               index=OSTINDEX$i
+               eval saved_ostindex$i=${!index}
+               eval OSTINDEX$i=""
+       done
+}
+
+# Restore the original values of $OSTCOUNT and $OSTINDEX$i.
+restore_ostindex() {
+       trap 0
+
+       local i
+       local index
+       for i in $(seq $OSTCOUNT); do
+               index=saved_ostindex$i
+               eval OSTINDEX$i=${!index}
+       done
+       OSTCOUNT=$saved_ostcount
+
+       formatall
+}
+
+# The main purpose of this test is to ensure the OST_INDEX_LIST functions as
+# expected. This test uses OST_INDEX_LIST to format OSTs with a randomly
+# assigned index and ensures we can mount such a formatted file system
+test_81() { # LU-4665
+       [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.6.54) ]] ||
+               { skip "Need MDS version at least 2.6.54" && return; }
+       [[ $OSTCOUNT -ge 3 ]] || { skip_env "Need at least 3 OSTs" && return; }
+
+       stopall
+
+       # Each time RANDOM is referenced, a random integer between 0 and 32767
+       # is generated.
+       local i
+       local saved_ostindex1=$OSTINDEX1
+       for i in 65535 $((RANDOM + 65536)); do
+               echo -e "\nFormat ost1 with --index=$i, should fail"
+               OSTINDEX1=$i
+               if add ost1 $(mkfs_opts ost1 $(ostdevname 1)) --reformat \
+                  $(ostdevname 1) $(ostvdevname 1); then
+                       OSTINDEX1=$saved_ostindex1
+                       error "format ost1 with --index=$i should fail"
+               fi
+       done
+       OSTINDEX1=$saved_ostindex1
+
+       save_ostindex 3
+
+       # Format OSTs with random sparse indices.
+       trap "restore_ostindex" EXIT
+       echo -e "\nFormat $OSTCOUNT OSTs with sparse indices"
+       OST_INDEX_LIST=[0,$((RANDOM * 2 % 65533 + 1)),65534] formatall
+
+       # Setup and check Lustre filesystem.
+       start_mgsmds || error "start_mgsmds failed"
+       for i in $(seq $OSTCOUNT); do
+               start ost$i $(ostdevname $i) $OST_MOUNT_OPTS ||
+                       error "start ost$i failed"
+       done
+
+       mount_client $MOUNT || error "mount client $MOUNT failed"
+       check_mount || error "check client $MOUNT failed"
+
+       # Check max_easize.
+       local max_easize=$($LCTL get_param -n llite.*.max_easize)
+       [[ $max_easize -eq 128 ]] ||
+               error "max_easize is $max_easize, should be 128 bytes"
+
+       restore_ostindex
+}
+run_test 81 "sparse OST indexing"
+
+# Wait OSTs to be active on both client and MDT side.
+wait_osts_up() {
+       local cmd="$LCTL get_param -n lov.$FSNAME-clilov-*.target_obd |
+               awk 'BEGIN {c = 0} /ACTIVE/{c += 1} END {printf \\\"%d\\\", c}'"
+       wait_update $HOSTNAME "eval $cmd" $OSTCOUNT ||
+               error "wait_update OSTs up on client failed"
+
+       cmd="$LCTL get_param -n lod.$FSNAME-MDT*-*.target_obd | sort -u |
+            awk 'BEGIN {c = 0} /ACTIVE/{c += 1} END {printf \\\"%d\\\", c}'"
+       wait_update_facet $SINGLEMDS "eval $cmd" $OSTCOUNT ||
+               error "wait_update OSTs up on MDT failed"
+}
+
+# Here we exercise the stripe placement functionality on a file system that
+# has formatted the OST with a random index. With the file system the following
+# functionality is tested:
+#
+# 1. Creating a new file with a specific stripe layout.
+#
+# 2. Modifiy a existing empty file with a specific stripe layout.
+#
+# 3. Ensure we fail to set the stripe layout of a file that already has one.
+#
+# 4. If ost-index is defined we need to ensure it is the first entry in the
+#    ost index list returned by lfs getstripe.
+#
+# 5. Lastly ensure this functionality fails with directories.
+test_82a() { # LU-4665
+       [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.6.54) ]] ||
+               { skip "Need MDS version at least 2.6.54" && return; }
+       [[ $OSTCOUNT -ge 3 ]] || { skip_env "Need at least 3 OSTs" && return; }
+
+       stopall
+
+       save_ostindex 3
+
+       # Format OSTs with random sparse indices.
+       local i
+       local index
+       local ost_indices
+       for i in $(seq $OSTCOUNT); do
+               index=$((RANDOM * 2))
+               ost_indices+=" $index"
+       done
+       ost_indices=$(comma_list $ost_indices)
+
+       trap "restore_ostindex" EXIT
+       echo -e "\nFormat $OSTCOUNT OSTs with sparse indices $ost_indices"
+       OST_INDEX_LIST=[$ost_indices] formatall
+
+       # Setup Lustre filesystem.
+       start_mgsmds || error "start_mgsmds failed"
+       for i in $(seq $OSTCOUNT); do
+               start ost$i $(ostdevname $i) $OST_MOUNT_OPTS ||
+                       error "start ost$i failed"
+       done
+
+       mount_client $MOUNT || error "mount client $MOUNT failed"
+       wait_osts_up
+
+       $LFS df $MOUNT || error "$LFS df $MOUNT failed"
+       mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+
+       # 1. If the file does not exist, new file will be created
+       #    with specified OSTs.
+       local file=$DIR/$tdir/$tfile-1
+       local cmd="$SETSTRIPE -o $ost_indices $file"
+       echo -e "\n$cmd"
+       eval $cmd || error "$cmd failed"
+       check_stripe_count $file $OSTCOUNT
+       check_obdidx $file $ost_indices
+       dd if=/dev/urandom of=$file count=1 bs=1M > /dev/null 2>&1 ||
+               error "write $file failed"
+
+       # 2. If the file already exists and is an empty file, the file
+       #    will be attached with specified layout.
+       file=$DIR/$tdir/$tfile-2
+       mcreate $file || error "mcreate $file failed"
+       cmd="$SETSTRIPE -o $ost_indices $file"
+       echo -e "\n$cmd"
+       eval $cmd || error "$cmd failed"
+       dd if=/dev/urandom of=$file count=1 bs=1M > /dev/null 2>&1 ||
+               error "write $file failed"
+       check_stripe_count $file $OSTCOUNT
+       check_obdidx $file $ost_indices
+
+       # 3. If the file already has a valid layout attached, the command
+       #    should fail with EBUSY.
+       echo -e "\n$cmd"
+       eval $cmd && error "stripe is already set on $file, $cmd should fail"
+
+       # 4. If [--stripe-index|-i <start_ost_idx>] is used, the index must
+       #    be in the OST indices list.
+       local start_ost_idx=${ost_indices##*,}
+       file=$DIR/$tdir/$tfile-3
+       cmd="$SETSTRIPE -o $ost_indices -i $start_ost_idx $file"
+       echo -e "\n$cmd"
+       eval $cmd || error "$cmd failed"
+       check_stripe_count $file $OSTCOUNT
+       check_obdidx $file $ost_indices
+       check_start_ost_idx $file $start_ost_idx
+
+       file=$DIR/$tdir/$tfile-4
+       cmd="$SETSTRIPE"
+       cmd+=" -o $(exclude_items_from_list $ost_indices $start_ost_idx)"
+       cmd+=" -i $start_ost_idx $file"
+       echo -e "\n$cmd"
+       eval $cmd && error "index $start_ost_idx should be in $ost_indices"
+
+       # 5. Specifying OST indices for directory should fail with ENOSUPP.
+       local dir=$DIR/$tdir/$tdir
+       mkdir $dir || error "mkdir $dir failed"
+       cmd="$SETSTRIPE -o $ost_indices $dir"
+       echo -e "\n$cmd"
+       eval $cmd && error "$cmd should fail, specifying OST indices" \
+                          "for directory is not supported"
+
+       restore_ostindex
+}
+run_test 82a "specify OSTs for file (succeed) or directory (fail)"
+
+cleanup_82b() {
+       trap 0
+
+       # Remove OSTs from a pool and destroy the pool.
+       destroy_pool $ost_pool || true
+
+       restore_ostindex
+}
+
+# Test 82b is run to ensure that if the user supplies a pool with a specific
+# stripe layout that it behaves proprerly. It should fail in the case that
+# the supplied OST index list points to OSTs not contained in the user
+# supplied pool.
+test_82b() { # LU-4665
+       [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.6.54) ]] ||
+               { skip "Need MDS version at least 2.6.54" && return; }
+       [[ $OSTCOUNT -ge 4 ]] || { skip_env "Need at least 4 OSTs" && return; }
+
+       stopall
+
+       save_ostindex 4
+
+       # Format OSTs with random sparse indices.
+       local i
+       local index
+       local ost_indices
+       for i in $(seq $OSTCOUNT); do
+               index=$((RANDOM * 2))
+               ost_indices+=" $index"
+       done
+       ost_indices=$(comma_list $ost_indices)
+
+       trap "restore_ostindex" EXIT
+       echo -e "\nFormat $OSTCOUNT OSTs with sparse indices $ost_indices"
+       OST_INDEX_LIST=[$ost_indices] formatall
+
+       # Setup Lustre filesystem.
+       start_mgsmds || error "start_mgsmds failed"
+       for i in $(seq $OSTCOUNT); do
+               start ost$i $(ostdevname $i) $OST_MOUNT_OPTS ||
+                       error "start ost$i failed"
+       done
+
+       mount_client $MOUNT || error "mount client $MOUNT failed"
+       wait_osts_up
+       $LFS df $MOUNT || error "$LFS df $MOUNT failed"
+       mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+
+       # Create a new pool and add OSTs into it.
+       local ost_pool=$FSNAME.$TESTNAME
+       create_pool $ost_pool || error "create OST pool $ost_pool failed"
+
+       trap - EXIT
+       trap "cleanup_82b" EXIT
+
+       local ost_idx_in_list=${ost_indices##*,}
+       local ost_idx_in_pool=$(exclude_items_from_list $ost_indices \
+                               $ost_idx_in_list)
+
+       local ost_targets="$FSNAME-OST["
+       for i in ${ost_idx_in_pool//,/ }; do
+               ost_targets=$ost_targets$(printf "%04x," $i)
+       done
+       ost_targets="${ost_targets%,}]"
+
+       local ost_targets_uuid=$(for i in ${ost_idx_in_pool//,/ }; \
+                                do printf "$FSNAME-OST%04x_UUID\n" $i; done |
+                                sort -u | tr '\n' ' ')
+
+       local cmd="$LCTL pool_add $ost_pool $ost_targets"
+       do_facet mgs $cmd || error "$cmd failed"
+       wait_update $HOSTNAME "$LCTL get_param -n lov.$FSNAME-*.pools.$TESTNAME|
+                              sort -u | tr '\n' ' ' " "$ost_targets_uuid" ||
+                                       error "wait_update $ost_pool failed"
+       pool_list $ost_pool || error "list OST pool $ost_pool failed"
+
+       # If [--pool|-p <pool_name>] is set with [--ost-list|-o <ost_indices>],
+       # then the OSTs must be the members of the pool.
+       local file=$DIR/$tdir/$tfile
+       cmd="$SETSTRIPE -p $ost_pool -o $ost_idx_in_list $file"
+       echo -e "\n$cmd"
+       eval $cmd && error "OST with index $ost_idx_in_list should be" \
+                          "in OST pool $ost_pool"
+
+       # Only select OST $ost_idx_in_list from $ost_pool for file.
+       ost_idx_in_list=${ost_idx_in_pool#*,}
+       cmd="$SETSTRIPE -p $ost_pool -o $ost_idx_in_list $file"
+       echo -e "\n$cmd"
+       eval $cmd || error "$cmd failed"
+       cmd="$GETSTRIPE $file"
+       echo -e "\n$cmd"
+       eval $cmd || error "$cmd failed"
+       check_stripe_count $file 2
+       check_obdidx $file $ost_idx_in_list
+       dd if=/dev/urandom of=$file count=1 bs=1M > /dev/null 2>&1 ||
+               error "write $file failed"
+
+       cleanup_82b
+}
+run_test 82b "specify OSTs for file with --pool and --ost-list options"
+
 if ! combined_mgs_mds ; then
        stop mgs
 fi
index 2850a99..2920c3d 100644 (file)
@@ -4765,28 +4765,6 @@ test_56v() {
 }
 run_test 56v "check 'lfs find -mdt match with lfs getstripe -M' ======="
 
-# Get and check the actual stripe count of one file.
-# Usage: check_stripe_count <file> <expected_stripe_count>
-check_stripe_count() {
-    local file=$1
-    local expected=$2
-    local actual
-
-    [[ -z "$file" || -z "$expected" ]] &&
-        error "check_stripe_count: invalid argument!"
-
-    local cmd="$GETSTRIPE -c $file"
-    actual=$($cmd) || error "$cmd failed"
-    actual=${actual%% *}
-
-    if [[ $actual -ne $expected ]]; then
-        [[ $expected -eq -1 ]] ||
-            error "$cmd wrong: found $actual, expected $expected"
-        [[ $actual -eq $OSTCOUNT ]] ||
-            error "$cmd wrong: found $actual, expected $OSTCOUNT"
-    fi
-}
-
 test_56w() {
        [ $PARALLEL == "yes" ] && skip "skip parallel run" && return
        TDIR=$DIR/${tdir}w
index 9290abb..40239de 100755 (executable)
@@ -7245,3 +7245,61 @@ pool_remove() {
        error_noexit "Pool $FSNAME.$pool is not destroyed"
        return 3
 }
+
+# Get and check the actual stripe count of one file.
+# Usage: check_stripe_count <file> <expected_stripe_count>
+check_stripe_count() {
+       local file=$1
+       local expected=$2
+       local actual
+
+       [[ -z "$file" || -z "$expected" ]] &&
+               error "check_stripe_count: invalid argument"
+
+       local cmd="$GETSTRIPE -c $file"
+       actual=$($cmd) || error "$cmd failed"
+       actual=${actual%% *}
+
+       if [[ $actual -ne $expected ]]; then
+               [[ $expected -eq -1 ]] ||
+                       error "$cmd wrong: found $actual, expected $expected"
+               [[ $actual -eq $OSTCOUNT ]] ||
+                       error "$cmd wrong: found $actual, expected $OSTCOUNT"
+       fi
+}
+
+# Get and check the actual list of OST indices on one file.
+# Usage: check_obdidx <file> <expected_comma_separated_list_of_ost_indices>
+check_obdidx() {
+       local file=$1
+       local expected=$2
+       local obdidx
+
+       [[ -z "$file" || -z "$expected" ]] &&
+               error "check_obdidx: invalid argument!"
+
+       obdidx=$(comma_list $($GETSTRIPE $file | grep -A $OSTCOUNT obdidx |
+                             grep -v obdidx | awk '{print $1}' | xargs))
+
+       [[ $obdidx = $expected ]] ||
+               error "list of OST indices on $file is $obdidx," \
+                     "should be $expected"
+}
+
+# Get and check the actual OST index of the first stripe on one file.
+# Usage: check_start_ost_idx <file> <expected_start_ost_idx>
+check_start_ost_idx() {
+       local file=$1
+       local expected=$2
+       local start_ost_idx
+
+       [[ -z "$file" || -z "$expected" ]] &&
+               error "check_start_ost_idx: invalid argument!"
+
+       start_ost_idx=$($GETSTRIPE $file | grep -A 1 obdidx | grep -v obdidx |
+                       awk '{print $1}')
+
+       [[ $start_ost_idx = $expected ]] ||
+               error "OST index of the first stripe on $file is" \
+                     "$start_ost_idx, should be $expected"
+}
index 5fba86f..e1e5a62 100644 (file)
@@ -121,13 +121,21 @@ static int lfs_mv(int argc, char **argv);
        "                 [--stripe-size|-S <stripe_size>]\n"\
        "                 [--pool|-p <pool_name>]\n"\
        "                 [--block|-b] "_tgt"\n"\
+       "                 [--ost-list|-o <ost_indices>]\n"\
        "\tstripe_size:  Number of bytes on each OST (0 filesystem default)\n"\
        "\t              Can be specified with k, m or g (in KB, MB and GB\n"\
        "\t              respectively)\n"\
        "\tstart_ost_idx: OST index of first stripe (-1 default)\n"\
        "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n"\
        "\tpool_name:    Name of OST pool to use (default none)\n"\
-       "\tblock:        Block file access during data migration"
+       "\tblock:        Block file access during data migration\n"\
+       "\tost_indices:  List of OST indices, can be repeated multiple times\n"\
+       "\t              Indices be specified in a format of:\n"\
+       "\t                -o <ost_1>,<ost_i>-<ost_j>,<ost_n>\n"\
+       "\t              Or:\n"\
+       "\t                -o <ost_1> -o <ost_i>-<ost_j> -o <ost_n>\n"\
+       "\t              If --pool is set with --ost-list, then the OSTs\n"\
+       "\t              must be the members of the pool."
 
 /* all avaialable commands */
 command_t cmdlist[] = {
@@ -377,10 +385,8 @@ out:
 
 #define MIGRATION_BLOCKS 1
 
-static int lfs_migrate(char *name, unsigned long long stripe_size,
-                      int stripe_offset, int stripe_count,
-                      int stripe_pattern, char *pool_name,
-                      __u64 migration_flags)
+static int lfs_migrate(char *name, __u64 migration_flags,
+                      struct llapi_stripe_param *param)
 {
        int                      fd, fdv;
        char                     volatile_file[PATH_MAX +
@@ -459,9 +465,8 @@ static int lfs_migrate(char *name, unsigned long long stripe_size,
        /* create, open a volatile file, use caching (ie no directio) */
        /* exclusive create is not needed because volatile files cannot
         * conflict on name by construction */
-       fdv = llapi_file_open_pool(volatile_file, O_CREAT | O_WRONLY,
-                                  0644, stripe_size, stripe_offset,
-                                  stripe_count, stripe_pattern, pool_name);
+       fdv = llapi_file_open_param(volatile_file, O_CREAT | O_WRONLY, 0644,
+                                   param);
        if (fdv < 0) {
                rc = fdv;
                fprintf(stderr, "cannot create volatile file in %s (%s)\n",
@@ -618,23 +623,110 @@ free:
        return rc;
 }
 
+/**
+ * Parse a string containing an OST index list into an array of integers.
+ *
+ * The input string contains a comma delimited list of individual
+ * indices and ranges, for example "1,2-4,7". Add the indices into the
+ * \a osts array and remove duplicates.
+ *
+ * \param[out] osts    array to store indices in
+ * \param[in] size     size of \a osts array
+ * \param[in] offset   starting index in \a osts
+ * \param[in] arg      string containing OST index list
+ *
+ * \retval positive    number of indices in \a osts
+ * \retval -EINVAL     unable to parse \a arg
+ */
+static int parse_targets(__u32 *osts, int size, int offset, char *arg)
+{
+       int rc;
+       int nr = offset;
+       int slots = size - offset;
+       char *ptr = NULL;
+       bool end_of_loop;
+
+       if (arg == NULL)
+               return -EINVAL;
+
+       end_of_loop = false;
+       while (!end_of_loop) {
+               int start_index;
+               int end_index;
+               int i;
+               char *endptr = NULL;
+
+               rc = -EINVAL;
+
+               ptr = strchrnul(arg, ',');
+
+               end_of_loop = *ptr == '\0';
+               *ptr = '\0';
+
+               start_index = strtol(arg, &endptr, 0);
+               if (endptr == arg) /* no data at all */
+                       break;
+               if (*endptr != '-' && *endptr != '\0') /* has invalid data */
+                       break;
+               if (start_index < 0)
+                       break;
+
+               end_index = start_index;
+               if (*endptr == '-') {
+                       end_index = strtol(endptr + 1, &endptr, 0);
+                       if (*endptr != '\0')
+                               break;
+                       if (end_index < start_index)
+                               break;
+               }
+
+               for (i = start_index; i <= end_index && slots > 0; i++) {
+                       int j;
+
+                       /* remove duplicate */
+                       for (j = 0; j < offset; j++) {
+                               if (osts[j] == i)
+                                       break;
+                       }
+                       if (j == offset) { /* no duplicate */
+                               osts[nr++] = i;
+                               --slots;
+                       }
+               }
+               if (slots == 0 && i < end_index)
+                       break;
+
+               *ptr = ',';
+               arg = ++ptr;
+               offset = nr;
+               rc = 0;
+       }
+       if (!end_of_loop && ptr != NULL)
+               *ptr = ',';
+
+       return rc < 0 ? rc : nr;
+}
+
 /* functions */
 static int lfs_setstripe(int argc, char **argv)
 {
-       char                    *fname;
-       int                      result;
-       unsigned long long       st_size;
-       int                      st_offset, st_count;
-       char                    *end;
-       int                      c;
-       int                      delete = 0;
-       char                    *stripe_size_arg = NULL;
-       char                    *stripe_off_arg = NULL;
-       char                    *stripe_count_arg = NULL;
-       char                    *pool_name_arg = NULL;
-       unsigned long long       size_units = 1;
-       int                      migrate_mode = 0;
-       __u64                    migration_flags = 0;
+       struct llapi_stripe_param       *param;
+       char                            *fname;
+       int                              result;
+       unsigned long long               st_size;
+       int                              st_offset, st_count;
+       char                            *end;
+       int                              c;
+       int                              delete = 0;
+       char                            *stripe_size_arg = NULL;
+       char                            *stripe_off_arg = NULL;
+       char                            *stripe_count_arg = NULL;
+       char                            *pool_name_arg = NULL;
+       unsigned long long               size_units = 1;
+       bool                             migrate_mode = false;
+       __u64                            migration_flags = 0;
+       __u32                            osts[LOV_MAX_STRIPE_COUNT] = { 0 };
+       int                              nr_osts = 0;
 
        struct option            long_opts[] = {
                /* valid only in migrate mode */
@@ -656,12 +748,8 @@ static int lfs_setstripe(int argc, char **argv)
 #endif
                {"stripe-index", required_argument, 0, 'i'},
                {"stripe_index", required_argument, 0, 'i'},
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
-               /* This formerly implied "stripe-index", but was confusing
-                * with "file offset" (which will eventually be needed for
-                * with different layouts by offset), so deprecate it. */
-               {"offset",       required_argument, 0, 'o'},
-#endif
+               {"ost-list",     required_argument, 0, 'o'},
+               {"ost_list",     required_argument, 0, 'o'},
                {"pool",         required_argument, 0, 'p'},
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
                /* This formerly implied "--stripe-size", but was confusing
@@ -674,12 +762,12 @@ static int lfs_setstripe(int argc, char **argv)
                {0, 0, 0, 0}
        };
 
-        st_size = 0;
-        st_offset = -1;
-        st_count = 0;
+       st_size = 0;
+       st_offset = -1;
+       st_count = 0;
 
        if (strcmp(argv[0], "migrate") == 0)
-               migrate_mode = 1;
+               migrate_mode = true;
 
        optind = 0;
        while ((c = getopt_long(argc, argv, "c:di:o:p:s:S:",
@@ -689,7 +777,7 @@ static int lfs_setstripe(int argc, char **argv)
                        /* Long options. */
                        break;
                case 'b':
-                       if (migrate_mode == 0) {
+                       if (!migrate_mode) {
                                fprintf(stderr, "--block is valid only for"
                                                " migrate mode");
                                return CMD_HELP;
@@ -708,11 +796,19 @@ static int lfs_setstripe(int argc, char **argv)
                        /* delete the default striping pattern */
                        delete = 1;
                        break;
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
                case 'o':
-                       fprintf(stderr, "warning: '--offset|-o' deprecated, "
-                               "use '--stripe-index|-i' instead\n");
-#endif
+                       nr_osts = parse_targets(osts, ARRAY_SIZE(osts), nr_osts,
+                                               optarg);
+                       if (nr_osts < 0) {
+                               fprintf(stderr,
+                                       "error: %s: bad OST indices '%s'\n",
+                                       argv[0], optarg);
+                               return CMD_HELP;
+                       }
+
+                       if (st_offset == -1) /* first in the command line */
+                               st_offset = osts[0];
+                       break;
                case 'i':
 #if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 6, 53, 0)
                        if (strcmp(argv[optind - 1], "--index") == 0)
@@ -785,15 +881,44 @@ static int lfs_setstripe(int argc, char **argv)
                 }
         }
 
+       /* initialize stripe parameters */
+       param = calloc(1, offsetof(typeof(*param), lsp_osts[nr_osts]));
+       if (param == NULL) {
+               fprintf(stderr, "error: %s: run out of memory\n", argv[0]);
+               return CMD_HELP;
+       }
+
+       param->lsp_stripe_size = st_size;
+       param->lsp_stripe_offset = st_offset;
+       param->lsp_stripe_count = st_count;
+       param->lsp_stripe_pattern = 0;
+       param->lsp_pool = pool_name_arg;
+       param->lsp_is_specific = false;
+       if (nr_osts > 0) {
+               if (st_count > 0 && nr_osts != st_count) {
+                       fprintf(stderr, "error: %s: stripe count '%d' doesn't "
+                               "match the number of OSTs: %d\n",
+                               argv[0], st_count, nr_osts);
+                       return CMD_HELP;
+               }
+
+               param->lsp_is_specific = true;
+               param->lsp_stripe_count = nr_osts;
+               memcpy(param->lsp_osts, osts, sizeof(*osts) * nr_osts);
+       }
+
        do {
-               if (migrate_mode)
-                       result = lfs_migrate(fname, st_size, st_offset,
-                                            st_count, 0, pool_name_arg,
-                                            migration_flags);
-               else
-                       result = llapi_file_create_pool(fname, st_size,
-                                                       st_offset, st_count,
-                                                       0, pool_name_arg);
+               if (!migrate_mode) {
+                       result = llapi_file_open_param(fname,
+                                                      O_CREAT | O_WRONLY,
+                                                      0644, param);
+                       if (result >= 0) {
+                               close(result);
+                               result = 0;
+                       }
+               } else {
+                       result = lfs_migrate(fname, migration_flags, param);
+               }
                if (result) {
                        fprintf(stderr,
                                "error: %s: %s stripe file '%s' failed\n",
@@ -804,6 +929,7 @@ static int lfs_setstripe(int argc, char **argv)
                fname = argv[++optind];
        } while (fname != NULL);
 
+       free(param);
        return result;
 }
 
@@ -1901,6 +2027,9 @@ static int mntdf(char *mntdir, char *fsname, char *pool, int ishow,
                         if (rc == -ENODEV)
                                 break;
 
+                       if (rc == -EAGAIN)
+                               continue;
+
                         if (poolname && tp->st_op == LL_STATFS_LOV &&
                             llapi_search_ost(fsname, poolname,
                                              obd_uuid2str(&uuid_buf)) != 1)
index 735c00e..4a81941 100644 (file)
@@ -660,55 +660,114 @@ int llapi_search_ost(char *fsname, char *poolname, char *ostname)
         return 0;
 }
 
-int llapi_file_open_pool(const char *name, int flags, int mode,
-                        unsigned long long stripe_size, int stripe_offset,
-                        int stripe_count, int stripe_pattern, char *pool_name)
+/**
+ * Open a Lustre file.
+ *
+ * \param name the name of the file to be opened
+ * \param flags access mode, see flags in open(2)
+ * \param mode permisson of the file if it is created, see mode in open(2)
+ * \param param stripe pattern of the newly created file
+ *
+ * \return file descriptor of opened file
+ * \return -error failure
+ */
+int llapi_file_open_param(const char *name, int flags, mode_t mode,
+                         const struct llapi_stripe_param *param)
 {
-       struct lov_user_md_v3 lum = { 0 };
-       int fd = -1;
-       int rc = 0;
+       char fsname[MAX_OBD_NAME + 1] = { 0 };
+       char *pool_name = param->lsp_pool;
+       struct lov_user_md *lum = NULL;
+       size_t lum_size = sizeof(*lum);
+       int fd, rc;
 
-        /* Make sure we have a good pool */
-        if (pool_name != NULL) {
-                char fsname[MAX_OBD_NAME + 1], *ptr;
+       /* Make sure we are on a Lustre file system */
+       rc = llapi_search_fsname(name, fsname);
+       if (rc) {
+               llapi_error(LLAPI_MSG_ERROR, rc,
+                           "'%s' is not on a Lustre filesystem",
+                           name);
+               return rc;
+       }
 
-                rc = llapi_search_fsname(name, fsname);
-                if (rc) {
-                        llapi_error(LLAPI_MSG_ERROR, rc,
-                                    "'%s' is not on a Lustre filesystem",
-                                    name);
-                        return rc;
-                }
+       /* Check if the stripe pattern is sane. */
+       rc = llapi_stripe_limit_check(param->lsp_stripe_size,
+                                     param->lsp_stripe_offset,
+                                     param->lsp_stripe_count,
+                                     param->lsp_stripe_pattern);
+       if (rc != 0)
+               return rc;
 
-                /* in case user gives the full pool name <fsname>.<poolname>,
-                 * strip the fsname */
-                ptr = strchr(pool_name, '.');
-                if (ptr != NULL) {
-                        *ptr = '\0';
-                        if (strcmp(pool_name, fsname) != 0) {
-                                *ptr = '.';
-                                llapi_err_noerrno(LLAPI_MSG_ERROR,
-                                          "Pool '%s' is not on filesystem '%s'",
-                                          pool_name, fsname);
-                                return -EINVAL;
-                        }
-                        pool_name = ptr + 1;
-                }
+       /* Make sure we have a good pool */
+       if (pool_name != NULL) {
+               /* in case user gives the full pool name <fsname>.<poolname>,
+                * strip the fsname */
+               char *ptr = strchr(pool_name, '.');
+               if (ptr != NULL) {
+                       *ptr = '\0';
+                       if (strcmp(pool_name, fsname) != 0) {
+                               *ptr = '.';
+                               llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                       "Pool '%s' is not on filesystem '%s'",
+                                       pool_name, fsname);
+                               return -EINVAL;
+                       }
+                       pool_name = ptr + 1;
+               }
 
-                /* Make sure the pool exists and is non-empty */
-                rc = llapi_search_ost(fsname, pool_name, NULL);
-                if (rc < 1) {
-                        llapi_err_noerrno(LLAPI_MSG_ERROR,
-                                          "pool '%s.%s' %s", fsname, pool_name,
-                                          rc == 0 ? "has no OSTs" : "does not exist");
-                        return -EINVAL;
-                }
-        }
+               /* Make sure the pool exists and is non-empty */
+               rc = llapi_search_ost(fsname, pool_name, NULL);
+               if (rc < 1) {
+                       char *err = rc == 0 ? "has no OSTs" : "does not exist";
 
-       rc = llapi_stripe_limit_check(stripe_size, stripe_offset, stripe_count,
-                                     stripe_pattern);
-       if (rc != 0)
-               return rc;
+                       llapi_err_noerrno(LLAPI_MSG_ERROR, "pool '%s.%s' %s",
+                                         fsname, pool_name, err);
+                       return -EINVAL;
+               }
+
+               lum_size = sizeof(struct lov_user_md_v3);
+       }
+
+       /* sanity check of target list */
+       if (param->lsp_is_specific) {
+               char ostname[MAX_OBD_NAME + 1];
+               bool found = false;
+               int i;
+
+               for (i = 0; i < param->lsp_stripe_count; i++) {
+                       snprintf(ostname, sizeof(ostname), "%s-OST%04x_UUID",
+                                fsname, param->lsp_osts[i]);
+                       rc = llapi_search_ost(fsname, pool_name, ostname);
+                       if (rc <= 0) {
+                               if (rc == 0)
+                                       rc = -ENODEV;
+
+                               llapi_error(LLAPI_MSG_ERROR, rc,
+                                           "%s: cannot find OST %s in %s",
+                                           __func__, ostname,
+                                           pool_name != NULL ?
+                                           "pool" : "system");
+                               return rc;
+                       }
+
+                       /* Make sure stripe offset is in OST list. */
+                       if (param->lsp_osts[i] == param->lsp_stripe_offset)
+                               found = true;
+               }
+               if (!found) {
+                       llapi_error(LLAPI_MSG_ERROR, -EINVAL,
+                                   "%s: stripe offset '%d' is not in the "
+                                   "target list",
+                                   __func__, param->lsp_stripe_offset);
+                       return -EINVAL;
+               }
+
+               lum_size = lov_user_md_size(param->lsp_stripe_count,
+                                           LOV_USER_MAGIC_SPECIFIC);
+       }
+
+       lum = calloc(1, lum_size);
+       if (lum == NULL)
+               return -ENOMEM;
 
 retry_open:
        fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
@@ -719,45 +778,77 @@ retry_open:
                }
        }
 
-        if (fd < 0) {
-                rc = -errno;
-                llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'", name);
-                return rc;
-        }
+       if (fd < 0) {
+               rc = -errno;
+               llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'", name);
+               free(lum);
+               return rc;
+       }
 
-        /*  Initialize IOCTL striping pattern structure */
-        lum.lmm_magic = LOV_USER_MAGIC_V3;
-        lum.lmm_pattern = stripe_pattern;
-        lum.lmm_stripe_size = stripe_size;
-        lum.lmm_stripe_count = stripe_count;
-        lum.lmm_stripe_offset = stripe_offset;
-        if (pool_name != NULL) {
-               strlcpy(lum.lmm_pool_name, pool_name,
-                       sizeof(lum.lmm_pool_name));
-        } else {
-                /* If no pool is specified at all, use V1 request */
-                lum.lmm_magic = LOV_USER_MAGIC_V1;
-        }
+       /*  Initialize IOCTL striping pattern structure */
+       lum->lmm_magic = LOV_USER_MAGIC_V1;
+       lum->lmm_pattern = param->lsp_stripe_pattern;
+       lum->lmm_stripe_size = param->lsp_stripe_size;
+       lum->lmm_stripe_count = param->lsp_stripe_count;
+       lum->lmm_stripe_offset = param->lsp_stripe_offset;
+       if (pool_name != NULL) {
+               struct lov_user_md_v3 *lumv3 = (void *)lum;
 
-        if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, &lum)) {
-                char *errmsg = "stripe already set";
-                rc = -errno;
-                if (errno != EEXIST && errno != EALREADY)
-                        errmsg = strerror(errno);
+               lumv3->lmm_magic = LOV_USER_MAGIC_V3;
+               strncpy(lumv3->lmm_pool_name, pool_name, LOV_MAXPOOLNAME);
+       }
+       if (param->lsp_is_specific) {
+               struct lov_user_md_v3 *lumv3 = (void *)lum;
+               int i;
+
+               lumv3->lmm_magic = LOV_USER_MAGIC_SPECIFIC;
+               if (pool_name == NULL) {
+                       /* LOV_USER_MAGIC_SPECIFIC uses v3 format plus specified
+                        * OST list, therefore if pool is not specified we have
+                        * to pack a null pool name for placeholder. */
+                       memset(lumv3->lmm_pool_name, 0, LOV_MAXPOOLNAME);
+               }
 
-                llapi_err_noerrno(LLAPI_MSG_ERROR,
-                                  "error on ioctl "LPX64" for '%s' (%d): %s",
-                                  (__u64)LL_IOC_LOV_SETSTRIPE, name, fd,errmsg);
-        }
+               for (i = 0; i < param->lsp_stripe_count; i++)
+                       lumv3->lmm_objects[i].l_ost_idx = param->lsp_osts[i];
+       }
+
+       if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, lum) != 0) {
+               char *errmsg = "stripe already set";
+
+               rc = -errno;
+               if (errno != EEXIST && errno != EALREADY)
+                       errmsg = strerror(errno);
+
+                       llapi_err_noerrno(LLAPI_MSG_ERROR,
+                                 "error on ioctl "LPX64" for '%s' (%d): %s",
+                                 (__u64)LL_IOC_LOV_SETSTRIPE, name, fd,
+                                 errmsg);
+       }
 
        if (rc) {
                close(fd);
                fd = rc;
        }
-
+       if (lum != NULL)
+               free(lum);
        return fd;
 }
 
+int llapi_file_open_pool(const char *name, int flags, int mode,
+                        unsigned long long stripe_size, int stripe_offset,
+                        int stripe_count, int stripe_pattern, char *pool_name)
+{
+       const struct llapi_stripe_param param = {
+               .lsp_stripe_size = stripe_size,
+               .lsp_stripe_count = stripe_count,
+               .lsp_stripe_pattern = stripe_pattern,
+               .lsp_stripe_offset = stripe_offset,
+               .lsp_pool = pool_name
+       };
+       return llapi_file_open_param(name, flags, mode, &param);
+}
+
 int llapi_file_open(const char *name, int flags, int mode,
                     unsigned long long stripe_size, int stripe_offset,
                     int stripe_count, int stripe_pattern)
index 5bca28c..d984ed6 100644 (file)
@@ -408,6 +408,13 @@ int parse_opts(int argc, char *const argv[], struct mkfs_opts *mop,
                         }
                        /* LU-2374: check whether it is OST/MDT later */
                        mop->mo_ldd.ldd_svindex = atol(optarg);
+                       if (mop->mo_ldd.ldd_svindex >= INDEX_UNASSIGNED) {
+                               fprintf(stderr, "%s: wrong index %u. "
+                                       "Target index must be less than %u.\n",
+                                       progname, mop->mo_ldd.ldd_svindex,
+                                       INDEX_UNASSIGNED);
+                               return 1;
+                       }
                        mop->mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX;
                         break;
                 case 'k':