.B lfs getstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v]
\fB[--stripe-count|-c ] [--stripe-index|-i] [--mdt-index|-M]
\fB[--stripe-size|-S] [--directory|-d]
- \fB[--layout|-L]
+ \fB[--layout|-L]
\fB[--pool|-p] [--recursive|-r] [--raw|-R] <dirname|filename> ...\fR
.br
.B lfs setstripe [--stripe-size|-S stripe_size] [--stripe-count|-c stripe_count]
- \fB[--stripe-index|-i start_ost_index ] [--pool|-p <poolname>]
- \fB<directory|filename>\fR
+ \fB[--stripe-index|-i start_ost_index] [--pool|-p <poolname>]
+ \fB[--ost-list|-o <ost_indices>] <directory|filename>\fR
.br
.B lfs setstripe -d <dir>
.br
.B getstripe [--obd|-O <uuid>] [--quiet|-q] [--verbose|-v]
\fB[--count | -c ] [--index | -i | --offset | -o ]
\fB[--pool | -p ] [--size | -s ] [--directory | -d ]
- \fB[--layout | -L]
+ \fB[--layout | -L]
\fB[--recursive | -r ] [--raw | -R ] <dirname|filename>\fR
.br
List the striping information for a given filename or directory tree.
.TP
.B setstripe [--stripe-count|-c stripe_count] [--stripe-size|-S stripe_size]
\fB[--stripe-index|-i start_ost_index] [--pool <poolname>]
- \fB<dirname|filename>\fR
+ \fB[--ost-index|-o <ost_indices>] <dirname|filename>\fR
.br
-To create a new file, or set the directory default, with the specified striping parameters. The
+To create a new file, or set the directory default, with the specified striping
+parameters. The
.I stripe_count
is the number of OSTs to stripe a file over. A
.I stripe_count
.I start_ost_index
is the OST index (starting at 0) on which to start striping for this file. A
.I start_ost_index
-of -1 allows the MDS to choose the starting index and it is strongly recommended, as this allows space and load balancing to be done by the MDS as needed. The
+of -1 allows the MDS to choose the starting index and it is strongly
+recommended, as this allows space and load balancing to be done by the MDS as
+needed. The
+.B -o
+option is used to specify the exact stripe layout on the file system.
+.I ost_indices
+is a list of OSTs referenced by their indices, which are specified in decimal
+or hex form and can be obtained using the
+.B lfs osts
+command. The list format consists of individual OST indices and index ranges
+separated by commas, e.g. 1,2-4,7. The
+.B -o
+option may be specified multiple times to stripe across the union of all listed
+OSTs. If the
+.B -c
+option is combined with
+.B -o
+the
+.I stripe_count
+must agree with the number of OSTs in
+.IR ost_indices .
+If the
+.B -i
+option is combined with
+.B -o
+the
+.I start_ost_index
+must be in the OST list, and it will be used as the index on which to start
+striping the file. Otherwise the striping will occur in the order specified in
+.IR ost_indices .
+The
.I poolname
-is the name of a predefined pool of OSTs (see
-.B lctl
-) that will be used for striping. The
+is the name of a predefined pool of OSTs (see
+.BR lctl (8))
+that will be used for striping. The
.IR stripe_count ,
.IR stripe_size ,
and
.I start_ost_index
-will be used as well; the
+will be used as well; the
.I start_ost_index
-must be part of the pool or an error will be returned.
+must be part of the pool or an error will be returned.
.TP
.B setstripe -d
Delete the default striping on the specified directory.
#define LOV_MAGIC_JOIN_V1 (0x0BD20000 | LOV_MAGIC_MAGIC)
#define LOV_MAGIC_V3 (0x0BD30000 | LOV_MAGIC_MAGIC)
#define LOV_MAGIC_MIGRATE (0x0BD40000 | LOV_MAGIC_MAGIC)
+/* reserved for specifying OSTs */
+#define LOV_MAGIC_SPECIFIC (0x0BD50000 | LOV_MAGIC_MAGIC)
#define LOV_MAGIC LOV_MAGIC_V1
/*
extern void lustre_swab_lov_user_md_objects(struct lov_user_ost_data *lod,
int stripe_count);
extern void lustre_swab_lov_mds_md(struct lov_mds_md *lmm);
+void lustre_print_user_md(unsigned int level, struct lov_user_md *lum,
+ const char *msg);
/* llog_swab.c */
extern void lustre_swab_llogd_body (struct llogd_body *d);
#define LL_FILE_LOCKLESS_IO 0x00000010 /* server-side locks with cio */
#define LL_FILE_RMTACL 0x00000020
-#define LOV_USER_MAGIC_V1 0x0BD10BD0
-#define LOV_USER_MAGIC LOV_USER_MAGIC_V1
-#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
-#define LOV_USER_MAGIC_V3 0x0BD30BD0
+#define LOV_USER_MAGIC_V1 0x0BD10BD0
+#define LOV_USER_MAGIC LOV_USER_MAGIC_V1
+#define LOV_USER_MAGIC_JOIN_V1 0x0BD20BD0
+#define LOV_USER_MAGIC_V3 0x0BD30BD0
+/* 0x0BD40BD0 is occupied by LOV_MAGIC_MIGRATE */
+#define LOV_USER_MAGIC_SPECIFIC 0x0BD50BD0 /* for specific OSTs */
#define LMV_USER_MAGIC 0x0CD30CD0 /*default lmv magic*/
static inline __u32 lov_user_md_size(__u16 stripes, __u32 lmm_magic)
{
- if (lmm_magic == LOV_USER_MAGIC_V3)
- return sizeof(struct lov_user_md_v3) +
- stripes * sizeof(struct lov_user_ost_data_v1);
- else
+ if (lmm_magic == LOV_USER_MAGIC_V1)
return sizeof(struct lov_user_md_v1) +
+ stripes * sizeof(struct lov_user_ost_data_v1);
+ return sizeof(struct lov_user_md_v3) +
stripes * sizeof(struct lov_user_ost_data_v1);
}
void llapi_printf(enum llapi_message_level level, const char *fmt, ...)
__attribute__((__format__(__printf__, 2, 3)));
+struct llapi_stripe_param {
+ unsigned long long lsp_stripe_size;
+ char *lsp_pool;
+ int lsp_stripe_offset;
+ int lsp_stripe_pattern;
+ /* Number of stripes. Size of lsp_osts[] if lsp_specific is true.*/
+ int lsp_stripe_count;
+ bool lsp_is_specific;
+ __u32 lsp_osts[0];
+};
+
+extern int llapi_file_open_param(const char *name, int flags, mode_t mode,
+ const struct llapi_stripe_param *param);
extern int llapi_file_create(const char *name, unsigned long long stripe_size,
int stripe_offset, int stripe_count,
int stripe_pattern);
static int ll_lov_setstripe(struct inode *inode, struct file *file,
unsigned long arg)
{
- struct lov_user_md_v3 lumv3;
- struct lov_user_md_v1 *lumv1 = (struct lov_user_md_v1 *)&lumv3;
- struct lov_user_md_v1 __user *lumv1p =
- (struct lov_user_md_v1 __user *)arg;
- struct lov_user_md_v3 __user *lumv3p =
- (struct lov_user_md_v3 __user *)arg;
- int lum_size, rc;
- __u64 flags = FMODE_WRITE;
+ struct lov_user_md __user *lum = (struct lov_user_md __user *)arg;
+ struct lov_user_md *klum;
+ int lum_size, rc;
+ __u64 flags = FMODE_WRITE;
ENTRY;
- /* first try with v1 which is smaller than v3 */
- lum_size = sizeof(struct lov_user_md_v1);
- if (copy_from_user(lumv1, lumv1p, lum_size))
- RETURN(-EFAULT);
-
- if (lumv1->lmm_magic == LOV_USER_MAGIC_V3) {
- lum_size = sizeof(struct lov_user_md_v3);
- if (copy_from_user(&lumv3, lumv3p, lum_size))
- RETURN(-EFAULT);
- }
+ rc = ll_copy_user_md(lum, &klum);
+ if (rc < 0)
+ RETURN(rc);
- rc = ll_lov_setstripe_ea_info(inode, file, flags, lumv1, lum_size);
+ lum_size = rc;
+ rc = ll_lov_setstripe_ea_info(inode, file, flags, klum, lum_size);
if (rc == 0) {
struct lov_stripe_md *lsm;
__u32 gen;
- put_user(0, &lumv1p->lmm_stripe_count);
+ put_user(0, &lum->lmm_stripe_count);
ll_layout_refresh(inode, &gen);
lsm = ccc_inode_lsm_get(inode);
rc = obd_iocontrol(LL_IOC_LOV_GETSTRIPE, ll_i2dtexp(inode),
- 0, lsm, (void __user *)arg);
+ 0, lsm, lum);
ccc_inode_lsm_put(inode, lsm);
}
+
+ OBD_FREE(klum, lum_size);
RETURN(rc);
}
int ll_get_obd_name(struct inode *inode, unsigned int cmd, unsigned long arg);
char *ll_get_fsname(struct super_block *sb, char *buf, int buflen);
void ll_compute_rootsquash_state(struct ll_sb_info *sbi);
+ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
+ struct lov_user_md **kbuf);
+
+/* Compute expected user md size when passing in a md from user space */
+static inline ssize_t ll_lov_user_md_size(const struct lov_user_md *lum)
+{
+ switch (lum->lmm_magic) {
+ case LOV_USER_MAGIC_V1:
+ return sizeof(struct lov_user_md_v1);
+ case LOV_USER_MAGIC_V3:
+ return sizeof(struct lov_user_md_v3);
+ case LOV_USER_MAGIC_SPECIFIC:
+ if (lum->lmm_stripe_count > LOV_MAX_STRIPE_COUNT)
+ return -EINVAL;
+
+ return lov_user_md_size(lum->lmm_stripe_count,
+ LOV_USER_MAGIC_SPECIFIC);
+ }
+
+ return -EINVAL;
+}
/* llite/llite_nfs.c */
extern struct export_operations lustre_export_operations;
free_page((unsigned long)buf);
}
+ssize_t ll_copy_user_md(const struct lov_user_md __user *md,
+ struct lov_user_md **kbuf)
+{
+ struct lov_user_md lum;
+ ssize_t lum_size;
+ ENTRY;
+
+ if (copy_from_user(&lum, md, sizeof(lum)))
+ RETURN(-EFAULT);
+
+ lum_size = ll_lov_user_md_size(&lum);
+ if (lum_size < 0)
+ RETURN(lum_size);
+
+ OBD_ALLOC(*kbuf, lum_size);
+ if (*kbuf == NULL)
+ RETURN(-ENOMEM);
+
+ if (copy_from_user(*kbuf, md, lum_size) != 0) {
+ OBD_FREE(*kbuf, lum_size);
+ RETURN(-EFAULT);
+ }
+
+ RETURN(lum_size);
+}
+
/*
* Compute llite root squash state after a change of root squash
* configuration setting or add/remove of a lnet nid
(strncmp(name, XATTR_LUSTRE_PREFIX,
sizeof(XATTR_LUSTRE_PREFIX) - 1) == 0 &&
strcmp(name + sizeof(XATTR_LUSTRE_PREFIX) - 1, "lov") == 0)) {
- struct lov_user_md *lump = (struct lov_user_md *)value;
- int rc = 0;
+ struct lov_user_md *lump = (struct lov_user_md *)value;
+ int rc = 0;
/* Attributes that are saved via getxattr will always have
* the stripe_offset as 0. Instead, the MDS should be
if (lump != NULL && S_ISREG(inode->i_mode)) {
struct file f;
__u64 it_flags = FMODE_WRITE;
- int lum_size = (lump->lmm_magic == LOV_USER_MAGIC_V1) ?
- sizeof(*lump) : sizeof(struct lov_user_md_v3);
+ int lum_size;
+
+ lum_size = ll_lov_user_md_size(lump);
+ if (lum_size < 0 || size < lum_size)
+ return 0; /* b=10667: ignore error */
memset(&f, 0, sizeof(f)); /* f.f_flags is used below */
f.f_dentry = dentry;
rc = ll_lov_setstripe_ea_info(inode, &f, it_flags, lump,
lum_size);
- /* b10667: rc always be 0 here for now */
+ /* b=10667: rc always be 0 here for now */
rc = 0;
} else if (S_ISDIR(inode->i_mode)) {
rc = ll_dir_setstripe(inode, lump, 0);
#define LMVEA_DELETE_VALUES(count, offset) \
((count) == 0 && (offset) == (typeof(offset))(-1))
+#define LOV_OFFSET_DEFAULT ((__u16)-1)
+
struct lod_qos_rr {
__u32 lqr_start_idx; /* start index of new inode */
__u32 lqr_offset_idx; /* aliasing for start_idx */
GOTO(out, rc = -EINVAL);
}
- /* an offset of -1 is treated as a "special" valid offset */
stripe_offset = le16_to_cpu(lum->lmm_stripe_offset);
- if (stripe_offset != (typeof(stripe_offset))-1) {
+ if (stripe_offset != LOV_OFFSET_DEFAULT) {
/* if offset is not within valid range [0, osts_size) */
if (stripe_offset >= d->lod_osts_size) {
CDEBUG(D_IOCTL, "stripe offset %u >= bitmap size %u\n",
if (pool == NULL)
goto out;
- if (stripe_offset != (typeof(stripe_offset))-1) {
+ if (stripe_offset != LOV_OFFSET_DEFAULT) {
rc = lod_check_index_in_pool(stripe_offset, pool);
if (rc < 0)
GOTO(out, rc = -EINVAL);
if (likely(parent)) {
lod_cache_parent_striping(env, lp, child_mode);
- lc->ldo_def_stripe_offset = (__u16) -1;
+ lc->ldo_def_stripe_offset = LOV_OFFSET_DEFAULT;
if (lp->ldo_def_striping_set) {
if (lp->ldo_pool)
}
/**
+ * Allocate a specific striping layout on a user defined set of OSTs.
+ *
+ * Allocates new striping using the OST index range provided by the data from
+ * the lmm_obejcts contained in the lov_user_md passed to this method. Full
+ * OSTs are not considered. The exact order of OSTs requested by the user
+ * is respected as much as possible depending on OST status. The number of
+ * stripes needed and stripe offset are taken from the object. If that number
+ * can not be met, then the function returns a failure and then it's the
+ * caller's responsibility to release the stripes allocated. All the internal
+ * structures are protected, but no concurrent allocation is allowed on the
+ * same objects.
+ *
+ * \param[in] env execution environment for this thread
+ * \param[in] lo LOD object
+ * \param[out] stripe striping created
+ * \param[in] lum stripe md to specify list of OSTs
+ * \param[in] th transaction handle
+ *
+ * \retval 0 on success
+ * \retval -ENODEV OST index does not exist on file system
+ * \retval -EINVAL requested OST index is invalid
+ * \retval negative negated errno on error
+ */
+static int lod_alloc_ost_list(const struct lu_env *env,
+ struct lod_object *lo, struct dt_object **stripe,
+ struct lov_user_md *lum, struct thandle *th)
+{
+ struct lod_device *m = lu2lod_dev(lo->ldo_obj.do_lu.lo_dev);
+ struct obd_statfs *sfs = &lod_env_info(env)->lti_osfs;
+ struct dt_object *o;
+ struct lov_user_md_v3 *v3;
+ unsigned int array_idx = 0;
+ int stripe_count = 0;
+ int i;
+ int rc;
+ ENTRY;
+
+ /* for specific OSTs layout */
+ LASSERT(lum != NULL && lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC);
+ lustre_print_user_md(D_OTHER, lum, __func__);
+
+ rc = lod_qos_ost_in_use_clear(env, lo->ldo_stripenr);
+ if (rc < 0)
+ RETURN(rc);
+
+ v3 = (struct lov_user_md_v3 *)lum;
+ for (i = 0; i < lo->ldo_stripenr; i++) {
+ if (v3->lmm_objects[i].l_ost_idx == lo->ldo_def_stripe_offset) {
+ array_idx = i;
+ break;
+ }
+ }
+ if (i == lo->ldo_stripenr) {
+ CDEBUG(D_OTHER,
+ "%s: start index %d not in the specified list of OSTs\n",
+ lod2obd(m)->obd_name, lo->ldo_def_stripe_offset);
+ RETURN(-EINVAL);
+ }
+
+ for (i = 0; i < lo->ldo_stripenr;
+ i++, array_idx = (array_idx + 1) % lo->ldo_stripenr) {
+ __u32 ost_idx = v3->lmm_objects[array_idx].l_ost_idx;
+
+ if (!cfs_bitmap_check(m->lod_ost_bitmap, ost_idx)) {
+ rc = -ENODEV;
+ break;
+ }
+
+ /*
+ * do not put >1 objects on a single OST
+ */
+ if (lod_qos_is_ost_used(env, ost_idx, stripe_count)) {
+ rc = -EINVAL;
+ break;
+ }
+
+ rc = lod_statfs_and_check(env, m, ost_idx, sfs);
+ if (rc < 0) /* this OSP doesn't feel well */
+ break;
+
+ o = lod_qos_declare_object_on(env, m, ost_idx, th);
+ if (IS_ERR(o)) {
+ rc = PTR_ERR(o);
+ CDEBUG(D_OTHER,
+ "%s: can't declare new object on #%u: %d\n",
+ lod2obd(m)->obd_name, ost_idx, rc);
+ break;
+ }
+
+ /*
+ * We've successfuly declared (reserved) an object
+ */
+ lod_qos_ost_in_use(env, stripe_count, ost_idx);
+ stripe[stripe_count] = o;
+ stripe_count++;
+ }
+
+ RETURN(rc);
+}
+
+/**
* Allocate a striping on a predefined set of OSTs.
*
* Allocates new striping starting from OST provided lo->ldo_def_stripe_offset.
struct lod_device *d = lu2lod_dev(lod2lu_obj(lo)->lo_dev);
struct lov_user_md_v1 *v1 = NULL;
struct lov_user_md_v3 *v3 = NULL;
- struct pool_desc *pool;
+ char *pool_name = NULL;
__u32 magic;
int rc;
+ unsigned int size;
ENTRY;
if (buf == NULL || buf->lb_buf == NULL || buf->lb_len == 0)
RETURN(0);
+ v3 = buf->lb_buf;
v1 = buf->lb_buf;
magic = v1->lmm_magic;
- if (magic == __swab32(LOV_USER_MAGIC_V1)) {
+ if (unlikely(magic == LOV_MAGIC_V1_DEF || magic == LOV_MAGIC_V3_DEF)) {
+ /* try to use as fully defined striping */
+ rc = lod_use_defined_striping(env, lo, buf);
+ RETURN(rc);
+ }
+
+ switch (magic) {
+ case __swab32(LOV_USER_MAGIC_V1):
lustre_swab_lov_user_md_v1(v1);
magic = v1->lmm_magic;
- } else if (magic == __swab32(LOV_USER_MAGIC_V3)) {
- v3 = buf->lb_buf;
+ /* fall through */
+ case LOV_USER_MAGIC_V1:
+ size = sizeof(*v1);
+ break;
+
+ case __swab32(LOV_USER_MAGIC_V3):
lustre_swab_lov_user_md_v3(v3);
magic = v3->lmm_magic;
- }
+ /* fall through */
+ case LOV_USER_MAGIC_V3:
+ size = sizeof(*v3);
+ pool_name = v3->lmm_pool_name;
+ break;
- if (unlikely(magic != LOV_MAGIC_V1 && magic != LOV_MAGIC_V3)) {
- /* try to use as fully defined striping */
- rc = lod_use_defined_striping(env, lo, buf);
- RETURN(rc);
+ case __swab32(LOV_USER_MAGIC_SPECIFIC):
+ lustre_swab_lov_user_md_v3(v3);
+ lustre_swab_lov_user_md_objects(v3->lmm_objects,
+ v3->lmm_stripe_count);
+ magic = v3->lmm_magic;
+ /* fall through */
+ case LOV_USER_MAGIC_SPECIFIC:
+ if (v3->lmm_stripe_offset == LOV_OFFSET_DEFAULT)
+ v3->lmm_stripe_offset = v3->lmm_objects[0].l_ost_idx;
+ if (v3->lmm_pool_name[0] != '\0')
+ pool_name = v3->lmm_pool_name;
+ size = lov_user_md_size(v3->lmm_stripe_count,
+ LOV_USER_MAGIC_SPECIFIC);
+ break;
+
+ default:
+ CERROR("%s: unrecognized magic %X\n",
+ lod2obd(d)->obd_name, magic);
+ RETURN(-EINVAL);
}
- if (unlikely(buf->lb_len < sizeof(*v1))) {
- CERROR("wrong size: %u\n", (unsigned) buf->lb_len);
+ if (unlikely(buf->lb_len < size)) {
+ CERROR("%s: wrong size: %zd, expect: %u\n",
+ lod2obd(d)->obd_name, buf->lb_len, size);
RETURN(-EINVAL);
}
+ lustre_print_user_md(D_OTHER, v1, "parse config");
+
v1->lmm_magic = magic;
if (v1->lmm_pattern == 0)
v1->lmm_pattern = LOV_PATTERN_RAID0;
if (lov_pattern(v1->lmm_pattern) != LOV_PATTERN_RAID0) {
- CERROR("invalid pattern: %x\n", v1->lmm_pattern);
+ CERROR("%s: invalid pattern: %x\n",
+ lod2obd(d)->obd_name, v1->lmm_pattern);
RETURN(-EINVAL);
}
lo->ldo_pattern = v1->lmm_pattern;
- if (v1->lmm_stripe_size)
+ if (v1->lmm_stripe_size > 0)
lo->ldo_stripe_size = v1->lmm_stripe_size;
+
if (lo->ldo_stripe_size & (LOV_MIN_STRIPE_SIZE - 1))
lo->ldo_stripe_size = LOV_MIN_STRIPE_SIZE;
- if (v1->lmm_stripe_count)
+ if (v1->lmm_stripe_count > 0)
lo->ldo_stripenr = v1->lmm_stripe_count;
- if ((v1->lmm_stripe_offset >= d->lod_desc.ld_tgt_count) &&
- (v1->lmm_stripe_offset != (typeof(v1->lmm_stripe_offset))(-1))) {
- CERROR("invalid offset: %x\n", v1->lmm_stripe_offset);
- RETURN(-EINVAL);
- }
lo->ldo_def_stripe_offset = v1->lmm_stripe_offset;
- CDEBUG(D_OTHER, "lsm: %u size, %u stripes, %u offset\n",
- v1->lmm_stripe_size, v1->lmm_stripe_count,
- v1->lmm_stripe_offset);
-
- if (v1->lmm_magic == LOV_MAGIC_V3) {
- if (buf->lb_len < sizeof(*v3)) {
- CERROR("wrong size: %u\n", (unsigned) buf->lb_len);
- RETURN(-EINVAL);
- }
-
- v3 = buf->lb_buf;
- lod_object_set_pool(lo, v3->lmm_pool_name);
+ lod_object_set_pool(lo, NULL);
+ if (pool_name != NULL) {
+ struct pool_desc *pool;
/* In the function below, .hs_keycmp resolves to
* pool_hashkey_keycmp() */
/* coverity[overrun-buffer-val] */
- pool = lod_find_pool(d, v3->lmm_pool_name);
+ pool = lod_find_pool(d, pool_name);
if (pool != NULL) {
- if (lo->ldo_def_stripe_offset !=
- (typeof(v1->lmm_stripe_offset))(-1)) {
- rc = lo->ldo_def_stripe_offset;
- rc = lod_check_index_in_pool(rc, pool);
+ if (lo->ldo_def_stripe_offset != LOV_OFFSET_DEFAULT) {
+ rc = lod_check_index_in_pool(
+ lo->ldo_def_stripe_offset, pool);
if (rc < 0) {
lod_pool_putref(pool);
- CERROR("invalid offset\n");
+ CERROR("%s: invalid offset, %u\n",
+ lod2obd(d)->obd_name,
+ lo->ldo_def_stripe_offset);
RETURN(-EINVAL);
}
}
if (lo->ldo_stripenr > pool_tgt_count(pool))
- lo->ldo_stripenr= pool_tgt_count(pool);
+ lo->ldo_stripenr = pool_tgt_count(pool);
lod_pool_putref(pool);
}
- } else
- lod_object_set_pool(lo, NULL);
+
+ lod_object_set_pool(lo, pool_name);
+ }
/* fixup for released file */
if (lo->ldo_pattern & LOV_PATTERN_F_RELEASED) {
GOTO(out, rc = 0);
if (likely(lo->ldo_stripe == NULL)) {
+ struct lov_user_md *lum = NULL;
+
/*
* no striping has been created so far
*/
*/
lod_qos_statfs_update(env, d);
lo->ldo_stripenr = lod_get_stripecnt(d, LOV_MAGIC,
- lo->ldo_stripenr);
+ lo->ldo_stripenr);
stripe_len = lo->ldo_stripenr;
OBD_ALLOC(stripe, sizeof(stripe[0]) * stripe_len);
/* XXX: support for non-0 files w/o objects */
CDEBUG(D_OTHER, "tgt_count %d stripenr %d\n",
d->lod_desc.ld_tgt_count, stripe_len);
- if (lo->ldo_def_stripe_offset >= d->lod_desc.ld_tgt_count) {
+
+ if (buf != NULL && buf->lb_buf != NULL)
+ lum = buf->lb_buf;
+
+ if (lum != NULL && lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
+ rc = lod_alloc_ost_list(env, lo, stripe, lum, th);
+ } else if (lo->ldo_def_stripe_offset == LOV_OFFSET_DEFAULT) {
rc = lod_alloc_qos(env, lo, stripe, flag, th);
if (rc == -EAGAIN)
rc = lod_alloc_rr(env, lo, stripe, flag, th);
} else {
stripe_count = 0;
}
- } else {
- /* No need to allocate more than maximum supported stripes.
- * Anyway, this is pretty inaccurate since ld_tgt_count now
- * represents max index and we should rely on the actual number
- * of OSTs instead */
- stripe_count = lov_mds_md_max_stripe_count(
- lov->lov_ocd.ocd_max_easize, lmm_magic);
-
- if (stripe_count > lov->desc.ld_tgt_count)
- stripe_count = lov->desc.ld_tgt_count;
- }
+ } else {
+ /* To calculate maximum easize by active targets at present,
+ * which is exactly the maximum easize to be seen by LOV */
+ stripe_count = lov->desc.ld_active_tgt_count;
+ }
/* XXX LOV STACKING call into osc for sizes */
lmm_size = lov_mds_md_size(stripe_count, lmm_magic);
lum_size = sizeof(struct lov_user_md_v1);
if (copy_from_user(&lum, lump, lum_size))
GOTO(out_set, rc = -EFAULT);
- else if ((lum.lmm_magic != LOV_USER_MAGIC) &&
- (lum.lmm_magic != LOV_USER_MAGIC_V3))
- GOTO(out_set, rc = -EINVAL);
+
+ if (lum.lmm_magic != LOV_USER_MAGIC_V1 &&
+ lum.lmm_magic != LOV_USER_MAGIC_V3 &&
+ lum.lmm_magic != LOV_USER_MAGIC_SPECIFIC)
+ GOTO(out_set, rc = -EINVAL);
if (lum.lmm_stripe_count &&
(lum.lmm_stripe_count < lsm->lsm_stripe_count)) {
fsdb->fsdb_mdt_count ++;
}
- if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8) {
- LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %d, "
- "but the max index is %d.\n",
- mti->mti_svname, mti->mti_stripe_index,
- INDEX_MAP_SIZE * 8);
+ /* the last index(0xffff) is reserved for default value. */
+ if (mti->mti_stripe_index >= INDEX_MAP_SIZE * 8 - 1) {
+ LCONSOLE_ERROR_MSG(0x13f, "Server %s requested index %u, "
+ "but index must be less than %u.\n",
+ mti->mti_svname, mti->mti_stripe_index,
+ INDEX_MAP_SIZE * 8 - 1);
GOTO(out_up, rc = -ERANGE);
- }
+ }
if (test_bit(mti->mti_stripe_index, imap)) {
if ((mti->mti_flags & LDD_F_VIRGIN) &&
sizeof(struct llog_rec_tail));
/* get the last record in block */
last_rec = (struct llog_rec_hdr *)((char *)buf + rc -
- le32_to_cpu(tail->lrt_len));
+ tail->lrt_len);
if (LLOG_REC_HDR_NEEDS_SWABBING(last_rec))
lustre_swab_llog_rec(last_rec);
lgi->lgi_buf.lb_buf = idarray;
lgi->lgi_buf.lb_len = size;
rc = dt_record_read(env, o, &lgi->lgi_buf, &lgi->lgi_off);
- if (rc) {
+ /* -EFAULT means the llog is a sparse file. This is not an error
+ * after arbitrary OST index is supported. */
+ if (rc < 0 && rc != -EFAULT) {
CERROR("%s: error reading CATALOGS: rc = %d\n",
o->do_lu.lo_dev->ld_obd->obd_name, rc);
GOTO(out, rc);
struct osp_thread_info *osi = osp_env_info(env);
struct lu_fid *fid = &osp->opd_last_used_fid;
struct dt_object *dto;
- int rc;
+ int rc = -EFAULT;
ENTRY;
dto = osp_find_or_create_local_file(env, osp, &osi->osi_attr,
osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_oid,
osp->opd_index);
rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
- if (rc != 0)
+ if (rc != 0 && rc != -EFAULT)
GOTO(out, rc);
- } else {
+ }
+
+ if (rc == -EFAULT) { /* fresh LAST_ID */
fid->f_oid = 0;
osp_objid_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_oid,
osp->opd_index);
struct osp_thread_info *osi = osp_env_info(env);
struct lu_fid *fid = &osp->opd_last_used_fid;
struct dt_object *dto;
- int rc;
+ int rc = -EFAULT;
ENTRY;
dto = osp_find_or_create_local_file(env, osp, &osi->osi_attr,
osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
osp->opd_index);
rc = dt_record_read(env, dto, &osi->osi_lb, &osi->osi_off);
- if (rc != 0)
+ if (rc != 0 && rc != -EFAULT)
GOTO(out, rc);
- } else {
+ }
+
+ if (rc == -EFAULT) { /* fresh OSP */
fid->f_seq = 0;
osp_objseq_buf_prep(&osi->osi_lb, &osi->osi_off, &fid->f_seq,
osp->opd_index);
rc = osp_write_local_file(env, osp, dto, &osi->osi_lb,
osi->osi_off);
+ if (rc != 0)
+ GOTO(out, rc);
}
osp->opd_last_used_seq_file = dto;
RETURN(0);
rc = llog_osd_get_cat_list(env, d->opd_storage, d->opd_index, 1,
&osi->osi_cid, fid);
- if (rc) {
- CERROR("%s: can't get id from catalogs: rc = %d\n",
- obd->obd_name, rc);
- RETURN(rc);
+ if (rc < 0) {
+ if (rc != -EFAULT) {
+ CERROR("%s: can't get id from catalogs: rc = %d\n",
+ obd->obd_name, rc);
+ RETURN(rc);
+ }
+
+ /* After sparse OST indices is supported, the CATALOG file
+ * may become a sparse file that results in failure on
+ * reading. Skip this error as the llog will be created
+ * later */
+ memset(&osi->osi_cid, 0, sizeof(osi->osi_cid));
+ rc = 0;
}
CDEBUG(D_INFO, "%s: Init llog for %d - catid "DOSTID":%x\n",
}
EXPORT_SYMBOL(lustre_swab_lmv_user_md);
-static void print_lum (struct lov_user_md *lum)
+void lustre_print_user_md(unsigned int lvl, struct lov_user_md *lum,
+ const char *msg)
{
- CDEBUG(D_OTHER, "lov_user_md %p:\n", lum);
- CDEBUG(D_OTHER, "\tlmm_magic: %#x\n", lum->lmm_magic);
- CDEBUG(D_OTHER, "\tlmm_pattern: %#x\n", lum->lmm_pattern);
- CDEBUG(D_OTHER, "\tlmm_object_id: "LPU64"\n", lmm_oi_id(&lum->lmm_oi));
- CDEBUG(D_OTHER, "\tlmm_object_gr: "LPU64"\n", lmm_oi_seq(&lum->lmm_oi));
- CDEBUG(D_OTHER, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size);
- CDEBUG(D_OTHER, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count);
- CDEBUG(D_OTHER, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n",
- lum->lmm_stripe_offset);
+ if (likely(!cfs_cdebug_show(lvl, DEBUG_SUBSYSTEM)))
+ return;
+
+ CDEBUG(lvl, "%s lov_user_md %p:\n", msg, lum);
+ CDEBUG(lvl, "\tlmm_magic: %#x\n", lum->lmm_magic);
+ CDEBUG(lvl, "\tlmm_pattern: %#x\n", lum->lmm_pattern);
+ CDEBUG(lvl, "\tlmm_object_id: "LPU64"\n", lmm_oi_id(&lum->lmm_oi));
+ CDEBUG(lvl, "\tlmm_object_gr: "LPU64"\n", lmm_oi_seq(&lum->lmm_oi));
+ CDEBUG(lvl, "\tlmm_stripe_size: %#x\n", lum->lmm_stripe_size);
+ CDEBUG(lvl, "\tlmm_stripe_count: %#x\n", lum->lmm_stripe_count);
+ CDEBUG(lvl, "\tlmm_stripe_offset/lmm_layout_gen: %#x\n",
+ lum->lmm_stripe_offset);
+ if (lum->lmm_magic == LOV_USER_MAGIC_V3) {
+ struct lov_user_md_v3 *v3 = (void *)lum;
+ CDEBUG(lvl, "\tlmm_pool_name: %s\n", v3->lmm_pool_name);
+ }
+ if (lum->lmm_magic == LOV_USER_MAGIC_SPECIFIC) {
+ struct lov_user_md_v3 *v3 = (void *)lum;
+ int i;
+
+ if (v3->lmm_pool_name[0] != '\0')
+ CDEBUG(lvl, "\tlmm_pool_name: %s\n", v3->lmm_pool_name);
+
+ CDEBUG(lvl, "\ttarget list:\n");
+ for (i = 0; i < v3->lmm_stripe_count; i++)
+ CDEBUG(lvl, "\t\t%u\n", v3->lmm_objects[i].l_ost_idx);
+ }
}
+EXPORT_SYMBOL(lustre_print_user_md);
static void lustre_swab_lmm_oi(struct ost_id *oi)
{
__swab32s(&lum->lmm_stripe_size);
__swab16s(&lum->lmm_stripe_count);
__swab16s(&lum->lmm_stripe_offset);
- print_lum(lum);
EXIT;
}
}
run_test 80 "mgc import reconnect race"
+# Save the original values of $OSTCOUNT and $OSTINDEX$i.
+save_ostindex() {
+ local new_ostcount=$1
+ saved_ostcount=$OSTCOUNT
+ OSTCOUNT=$new_ostcount
+
+ local i
+ local index
+ for i in $(seq $OSTCOUNT); do
+ index=OSTINDEX$i
+ eval saved_ostindex$i=${!index}
+ eval OSTINDEX$i=""
+ done
+}
+
+# Restore the original values of $OSTCOUNT and $OSTINDEX$i.
+restore_ostindex() {
+ trap 0
+
+ local i
+ local index
+ for i in $(seq $OSTCOUNT); do
+ index=saved_ostindex$i
+ eval OSTINDEX$i=${!index}
+ done
+ OSTCOUNT=$saved_ostcount
+
+ formatall
+}
+
+# The main purpose of this test is to ensure the OST_INDEX_LIST functions as
+# expected. This test uses OST_INDEX_LIST to format OSTs with a randomly
+# assigned index and ensures we can mount such a formatted file system
+test_81() { # LU-4665
+ [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.6.54) ]] ||
+ { skip "Need MDS version at least 2.6.54" && return; }
+ [[ $OSTCOUNT -ge 3 ]] || { skip_env "Need at least 3 OSTs" && return; }
+
+ stopall
+
+ # Each time RANDOM is referenced, a random integer between 0 and 32767
+ # is generated.
+ local i
+ local saved_ostindex1=$OSTINDEX1
+ for i in 65535 $((RANDOM + 65536)); do
+ echo -e "\nFormat ost1 with --index=$i, should fail"
+ OSTINDEX1=$i
+ if add ost1 $(mkfs_opts ost1 $(ostdevname 1)) --reformat \
+ $(ostdevname 1) $(ostvdevname 1); then
+ OSTINDEX1=$saved_ostindex1
+ error "format ost1 with --index=$i should fail"
+ fi
+ done
+ OSTINDEX1=$saved_ostindex1
+
+ save_ostindex 3
+
+ # Format OSTs with random sparse indices.
+ trap "restore_ostindex" EXIT
+ echo -e "\nFormat $OSTCOUNT OSTs with sparse indices"
+ OST_INDEX_LIST=[0,$((RANDOM * 2 % 65533 + 1)),65534] formatall
+
+ # Setup and check Lustre filesystem.
+ start_mgsmds || error "start_mgsmds failed"
+ for i in $(seq $OSTCOUNT); do
+ start ost$i $(ostdevname $i) $OST_MOUNT_OPTS ||
+ error "start ost$i failed"
+ done
+
+ mount_client $MOUNT || error "mount client $MOUNT failed"
+ check_mount || error "check client $MOUNT failed"
+
+ # Check max_easize.
+ local max_easize=$($LCTL get_param -n llite.*.max_easize)
+ [[ $max_easize -eq 128 ]] ||
+ error "max_easize is $max_easize, should be 128 bytes"
+
+ restore_ostindex
+}
+run_test 81 "sparse OST indexing"
+
+# Wait OSTs to be active on both client and MDT side.
+wait_osts_up() {
+ local cmd="$LCTL get_param -n lov.$FSNAME-clilov-*.target_obd |
+ awk 'BEGIN {c = 0} /ACTIVE/{c += 1} END {printf \\\"%d\\\", c}'"
+ wait_update $HOSTNAME "eval $cmd" $OSTCOUNT ||
+ error "wait_update OSTs up on client failed"
+
+ cmd="$LCTL get_param -n lod.$FSNAME-MDT*-*.target_obd | sort -u |
+ awk 'BEGIN {c = 0} /ACTIVE/{c += 1} END {printf \\\"%d\\\", c}'"
+ wait_update_facet $SINGLEMDS "eval $cmd" $OSTCOUNT ||
+ error "wait_update OSTs up on MDT failed"
+}
+
+# Here we exercise the stripe placement functionality on a file system that
+# has formatted the OST with a random index. With the file system the following
+# functionality is tested:
+#
+# 1. Creating a new file with a specific stripe layout.
+#
+# 2. Modifiy a existing empty file with a specific stripe layout.
+#
+# 3. Ensure we fail to set the stripe layout of a file that already has one.
+#
+# 4. If ost-index is defined we need to ensure it is the first entry in the
+# ost index list returned by lfs getstripe.
+#
+# 5. Lastly ensure this functionality fails with directories.
+test_82a() { # LU-4665
+ [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.6.54) ]] ||
+ { skip "Need MDS version at least 2.6.54" && return; }
+ [[ $OSTCOUNT -ge 3 ]] || { skip_env "Need at least 3 OSTs" && return; }
+
+ stopall
+
+ save_ostindex 3
+
+ # Format OSTs with random sparse indices.
+ local i
+ local index
+ local ost_indices
+ for i in $(seq $OSTCOUNT); do
+ index=$((RANDOM * 2))
+ ost_indices+=" $index"
+ done
+ ost_indices=$(comma_list $ost_indices)
+
+ trap "restore_ostindex" EXIT
+ echo -e "\nFormat $OSTCOUNT OSTs with sparse indices $ost_indices"
+ OST_INDEX_LIST=[$ost_indices] formatall
+
+ # Setup Lustre filesystem.
+ start_mgsmds || error "start_mgsmds failed"
+ for i in $(seq $OSTCOUNT); do
+ start ost$i $(ostdevname $i) $OST_MOUNT_OPTS ||
+ error "start ost$i failed"
+ done
+
+ mount_client $MOUNT || error "mount client $MOUNT failed"
+ wait_osts_up
+
+ $LFS df $MOUNT || error "$LFS df $MOUNT failed"
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+
+ # 1. If the file does not exist, new file will be created
+ # with specified OSTs.
+ local file=$DIR/$tdir/$tfile-1
+ local cmd="$SETSTRIPE -o $ost_indices $file"
+ echo -e "\n$cmd"
+ eval $cmd || error "$cmd failed"
+ check_stripe_count $file $OSTCOUNT
+ check_obdidx $file $ost_indices
+ dd if=/dev/urandom of=$file count=1 bs=1M > /dev/null 2>&1 ||
+ error "write $file failed"
+
+ # 2. If the file already exists and is an empty file, the file
+ # will be attached with specified layout.
+ file=$DIR/$tdir/$tfile-2
+ mcreate $file || error "mcreate $file failed"
+ cmd="$SETSTRIPE -o $ost_indices $file"
+ echo -e "\n$cmd"
+ eval $cmd || error "$cmd failed"
+ dd if=/dev/urandom of=$file count=1 bs=1M > /dev/null 2>&1 ||
+ error "write $file failed"
+ check_stripe_count $file $OSTCOUNT
+ check_obdidx $file $ost_indices
+
+ # 3. If the file already has a valid layout attached, the command
+ # should fail with EBUSY.
+ echo -e "\n$cmd"
+ eval $cmd && error "stripe is already set on $file, $cmd should fail"
+
+ # 4. If [--stripe-index|-i <start_ost_idx>] is used, the index must
+ # be in the OST indices list.
+ local start_ost_idx=${ost_indices##*,}
+ file=$DIR/$tdir/$tfile-3
+ cmd="$SETSTRIPE -o $ost_indices -i $start_ost_idx $file"
+ echo -e "\n$cmd"
+ eval $cmd || error "$cmd failed"
+ check_stripe_count $file $OSTCOUNT
+ check_obdidx $file $ost_indices
+ check_start_ost_idx $file $start_ost_idx
+
+ file=$DIR/$tdir/$tfile-4
+ cmd="$SETSTRIPE"
+ cmd+=" -o $(exclude_items_from_list $ost_indices $start_ost_idx)"
+ cmd+=" -i $start_ost_idx $file"
+ echo -e "\n$cmd"
+ eval $cmd && error "index $start_ost_idx should be in $ost_indices"
+
+ # 5. Specifying OST indices for directory should fail with ENOSUPP.
+ local dir=$DIR/$tdir/$tdir
+ mkdir $dir || error "mkdir $dir failed"
+ cmd="$SETSTRIPE -o $ost_indices $dir"
+ echo -e "\n$cmd"
+ eval $cmd && error "$cmd should fail, specifying OST indices" \
+ "for directory is not supported"
+
+ restore_ostindex
+}
+run_test 82a "specify OSTs for file (succeed) or directory (fail)"
+
+cleanup_82b() {
+ trap 0
+
+ # Remove OSTs from a pool and destroy the pool.
+ destroy_pool $ost_pool || true
+
+ restore_ostindex
+}
+
+# Test 82b is run to ensure that if the user supplies a pool with a specific
+# stripe layout that it behaves proprerly. It should fail in the case that
+# the supplied OST index list points to OSTs not contained in the user
+# supplied pool.
+test_82b() { # LU-4665
+ [[ $(lustre_version_code $SINGLEMDS) -ge $(version_code 2.6.54) ]] ||
+ { skip "Need MDS version at least 2.6.54" && return; }
+ [[ $OSTCOUNT -ge 4 ]] || { skip_env "Need at least 4 OSTs" && return; }
+
+ stopall
+
+ save_ostindex 4
+
+ # Format OSTs with random sparse indices.
+ local i
+ local index
+ local ost_indices
+ for i in $(seq $OSTCOUNT); do
+ index=$((RANDOM * 2))
+ ost_indices+=" $index"
+ done
+ ost_indices=$(comma_list $ost_indices)
+
+ trap "restore_ostindex" EXIT
+ echo -e "\nFormat $OSTCOUNT OSTs with sparse indices $ost_indices"
+ OST_INDEX_LIST=[$ost_indices] formatall
+
+ # Setup Lustre filesystem.
+ start_mgsmds || error "start_mgsmds failed"
+ for i in $(seq $OSTCOUNT); do
+ start ost$i $(ostdevname $i) $OST_MOUNT_OPTS ||
+ error "start ost$i failed"
+ done
+
+ mount_client $MOUNT || error "mount client $MOUNT failed"
+ wait_osts_up
+ $LFS df $MOUNT || error "$LFS df $MOUNT failed"
+ mkdir $DIR/$tdir || error "mkdir $DIR/$tdir failed"
+
+ # Create a new pool and add OSTs into it.
+ local ost_pool=$FSNAME.$TESTNAME
+ create_pool $ost_pool || error "create OST pool $ost_pool failed"
+
+ trap - EXIT
+ trap "cleanup_82b" EXIT
+
+ local ost_idx_in_list=${ost_indices##*,}
+ local ost_idx_in_pool=$(exclude_items_from_list $ost_indices \
+ $ost_idx_in_list)
+
+ local ost_targets="$FSNAME-OST["
+ for i in ${ost_idx_in_pool//,/ }; do
+ ost_targets=$ost_targets$(printf "%04x," $i)
+ done
+ ost_targets="${ost_targets%,}]"
+
+ local ost_targets_uuid=$(for i in ${ost_idx_in_pool//,/ }; \
+ do printf "$FSNAME-OST%04x_UUID\n" $i; done |
+ sort -u | tr '\n' ' ')
+
+ local cmd="$LCTL pool_add $ost_pool $ost_targets"
+ do_facet mgs $cmd || error "$cmd failed"
+ wait_update $HOSTNAME "$LCTL get_param -n lov.$FSNAME-*.pools.$TESTNAME|
+ sort -u | tr '\n' ' ' " "$ost_targets_uuid" ||
+ error "wait_update $ost_pool failed"
+ pool_list $ost_pool || error "list OST pool $ost_pool failed"
+
+ # If [--pool|-p <pool_name>] is set with [--ost-list|-o <ost_indices>],
+ # then the OSTs must be the members of the pool.
+ local file=$DIR/$tdir/$tfile
+ cmd="$SETSTRIPE -p $ost_pool -o $ost_idx_in_list $file"
+ echo -e "\n$cmd"
+ eval $cmd && error "OST with index $ost_idx_in_list should be" \
+ "in OST pool $ost_pool"
+
+ # Only select OST $ost_idx_in_list from $ost_pool for file.
+ ost_idx_in_list=${ost_idx_in_pool#*,}
+ cmd="$SETSTRIPE -p $ost_pool -o $ost_idx_in_list $file"
+ echo -e "\n$cmd"
+ eval $cmd || error "$cmd failed"
+ cmd="$GETSTRIPE $file"
+ echo -e "\n$cmd"
+ eval $cmd || error "$cmd failed"
+ check_stripe_count $file 2
+ check_obdidx $file $ost_idx_in_list
+ dd if=/dev/urandom of=$file count=1 bs=1M > /dev/null 2>&1 ||
+ error "write $file failed"
+
+ cleanup_82b
+}
+run_test 82b "specify OSTs for file with --pool and --ost-list options"
+
if ! combined_mgs_mds ; then
stop mgs
fi
}
run_test 56v "check 'lfs find -mdt match with lfs getstripe -M' ======="
-# Get and check the actual stripe count of one file.
-# Usage: check_stripe_count <file> <expected_stripe_count>
-check_stripe_count() {
- local file=$1
- local expected=$2
- local actual
-
- [[ -z "$file" || -z "$expected" ]] &&
- error "check_stripe_count: invalid argument!"
-
- local cmd="$GETSTRIPE -c $file"
- actual=$($cmd) || error "$cmd failed"
- actual=${actual%% *}
-
- if [[ $actual -ne $expected ]]; then
- [[ $expected -eq -1 ]] ||
- error "$cmd wrong: found $actual, expected $expected"
- [[ $actual -eq $OSTCOUNT ]] ||
- error "$cmd wrong: found $actual, expected $OSTCOUNT"
- fi
-}
-
test_56w() {
[ $PARALLEL == "yes" ] && skip "skip parallel run" && return
TDIR=$DIR/${tdir}w
error_noexit "Pool $FSNAME.$pool is not destroyed"
return 3
}
+
+# Get and check the actual stripe count of one file.
+# Usage: check_stripe_count <file> <expected_stripe_count>
+check_stripe_count() {
+ local file=$1
+ local expected=$2
+ local actual
+
+ [[ -z "$file" || -z "$expected" ]] &&
+ error "check_stripe_count: invalid argument"
+
+ local cmd="$GETSTRIPE -c $file"
+ actual=$($cmd) || error "$cmd failed"
+ actual=${actual%% *}
+
+ if [[ $actual -ne $expected ]]; then
+ [[ $expected -eq -1 ]] ||
+ error "$cmd wrong: found $actual, expected $expected"
+ [[ $actual -eq $OSTCOUNT ]] ||
+ error "$cmd wrong: found $actual, expected $OSTCOUNT"
+ fi
+}
+
+# Get and check the actual list of OST indices on one file.
+# Usage: check_obdidx <file> <expected_comma_separated_list_of_ost_indices>
+check_obdidx() {
+ local file=$1
+ local expected=$2
+ local obdidx
+
+ [[ -z "$file" || -z "$expected" ]] &&
+ error "check_obdidx: invalid argument!"
+
+ obdidx=$(comma_list $($GETSTRIPE $file | grep -A $OSTCOUNT obdidx |
+ grep -v obdidx | awk '{print $1}' | xargs))
+
+ [[ $obdidx = $expected ]] ||
+ error "list of OST indices on $file is $obdidx," \
+ "should be $expected"
+}
+
+# Get and check the actual OST index of the first stripe on one file.
+# Usage: check_start_ost_idx <file> <expected_start_ost_idx>
+check_start_ost_idx() {
+ local file=$1
+ local expected=$2
+ local start_ost_idx
+
+ [[ -z "$file" || -z "$expected" ]] &&
+ error "check_start_ost_idx: invalid argument!"
+
+ start_ost_idx=$($GETSTRIPE $file | grep -A 1 obdidx | grep -v obdidx |
+ awk '{print $1}')
+
+ [[ $start_ost_idx = $expected ]] ||
+ error "OST index of the first stripe on $file is" \
+ "$start_ost_idx, should be $expected"
+}
" [--stripe-size|-S <stripe_size>]\n"\
" [--pool|-p <pool_name>]\n"\
" [--block|-b] "_tgt"\n"\
+ " [--ost-list|-o <ost_indices>]\n"\
"\tstripe_size: Number of bytes on each OST (0 filesystem default)\n"\
"\t Can be specified with k, m or g (in KB, MB and GB\n"\
"\t respectively)\n"\
"\tstart_ost_idx: OST index of first stripe (-1 default)\n"\
"\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n"\
"\tpool_name: Name of OST pool to use (default none)\n"\
- "\tblock: Block file access during data migration"
+ "\tblock: Block file access during data migration\n"\
+ "\tost_indices: List of OST indices, can be repeated multiple times\n"\
+ "\t Indices be specified in a format of:\n"\
+ "\t -o <ost_1>,<ost_i>-<ost_j>,<ost_n>\n"\
+ "\t Or:\n"\
+ "\t -o <ost_1> -o <ost_i>-<ost_j> -o <ost_n>\n"\
+ "\t If --pool is set with --ost-list, then the OSTs\n"\
+ "\t must be the members of the pool."
/* all avaialable commands */
command_t cmdlist[] = {
#define MIGRATION_BLOCKS 1
-static int lfs_migrate(char *name, unsigned long long stripe_size,
- int stripe_offset, int stripe_count,
- int stripe_pattern, char *pool_name,
- __u64 migration_flags)
+static int lfs_migrate(char *name, __u64 migration_flags,
+ struct llapi_stripe_param *param)
{
int fd, fdv;
char volatile_file[PATH_MAX +
/* create, open a volatile file, use caching (ie no directio) */
/* exclusive create is not needed because volatile files cannot
* conflict on name by construction */
- fdv = llapi_file_open_pool(volatile_file, O_CREAT | O_WRONLY,
- 0644, stripe_size, stripe_offset,
- stripe_count, stripe_pattern, pool_name);
+ fdv = llapi_file_open_param(volatile_file, O_CREAT | O_WRONLY, 0644,
+ param);
if (fdv < 0) {
rc = fdv;
fprintf(stderr, "cannot create volatile file in %s (%s)\n",
return rc;
}
+/**
+ * Parse a string containing an OST index list into an array of integers.
+ *
+ * The input string contains a comma delimited list of individual
+ * indices and ranges, for example "1,2-4,7". Add the indices into the
+ * \a osts array and remove duplicates.
+ *
+ * \param[out] osts array to store indices in
+ * \param[in] size size of \a osts array
+ * \param[in] offset starting index in \a osts
+ * \param[in] arg string containing OST index list
+ *
+ * \retval positive number of indices in \a osts
+ * \retval -EINVAL unable to parse \a arg
+ */
+static int parse_targets(__u32 *osts, int size, int offset, char *arg)
+{
+ int rc;
+ int nr = offset;
+ int slots = size - offset;
+ char *ptr = NULL;
+ bool end_of_loop;
+
+ if (arg == NULL)
+ return -EINVAL;
+
+ end_of_loop = false;
+ while (!end_of_loop) {
+ int start_index;
+ int end_index;
+ int i;
+ char *endptr = NULL;
+
+ rc = -EINVAL;
+
+ ptr = strchrnul(arg, ',');
+
+ end_of_loop = *ptr == '\0';
+ *ptr = '\0';
+
+ start_index = strtol(arg, &endptr, 0);
+ if (endptr == arg) /* no data at all */
+ break;
+ if (*endptr != '-' && *endptr != '\0') /* has invalid data */
+ break;
+ if (start_index < 0)
+ break;
+
+ end_index = start_index;
+ if (*endptr == '-') {
+ end_index = strtol(endptr + 1, &endptr, 0);
+ if (*endptr != '\0')
+ break;
+ if (end_index < start_index)
+ break;
+ }
+
+ for (i = start_index; i <= end_index && slots > 0; i++) {
+ int j;
+
+ /* remove duplicate */
+ for (j = 0; j < offset; j++) {
+ if (osts[j] == i)
+ break;
+ }
+ if (j == offset) { /* no duplicate */
+ osts[nr++] = i;
+ --slots;
+ }
+ }
+ if (slots == 0 && i < end_index)
+ break;
+
+ *ptr = ',';
+ arg = ++ptr;
+ offset = nr;
+ rc = 0;
+ }
+ if (!end_of_loop && ptr != NULL)
+ *ptr = ',';
+
+ return rc < 0 ? rc : nr;
+}
+
/* functions */
static int lfs_setstripe(int argc, char **argv)
{
- char *fname;
- int result;
- unsigned long long st_size;
- int st_offset, st_count;
- char *end;
- int c;
- int delete = 0;
- char *stripe_size_arg = NULL;
- char *stripe_off_arg = NULL;
- char *stripe_count_arg = NULL;
- char *pool_name_arg = NULL;
- unsigned long long size_units = 1;
- int migrate_mode = 0;
- __u64 migration_flags = 0;
+ struct llapi_stripe_param *param;
+ char *fname;
+ int result;
+ unsigned long long st_size;
+ int st_offset, st_count;
+ char *end;
+ int c;
+ int delete = 0;
+ char *stripe_size_arg = NULL;
+ char *stripe_off_arg = NULL;
+ char *stripe_count_arg = NULL;
+ char *pool_name_arg = NULL;
+ unsigned long long size_units = 1;
+ bool migrate_mode = false;
+ __u64 migration_flags = 0;
+ __u32 osts[LOV_MAX_STRIPE_COUNT] = { 0 };
+ int nr_osts = 0;
struct option long_opts[] = {
/* valid only in migrate mode */
#endif
{"stripe-index", required_argument, 0, 'i'},
{"stripe_index", required_argument, 0, 'i'},
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
- /* This formerly implied "stripe-index", but was confusing
- * with "file offset" (which will eventually be needed for
- * with different layouts by offset), so deprecate it. */
- {"offset", required_argument, 0, 'o'},
-#endif
+ {"ost-list", required_argument, 0, 'o'},
+ {"ost_list", required_argument, 0, 'o'},
{"pool", required_argument, 0, 'p'},
#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
/* This formerly implied "--stripe-size", but was confusing
{0, 0, 0, 0}
};
- st_size = 0;
- st_offset = -1;
- st_count = 0;
+ st_size = 0;
+ st_offset = -1;
+ st_count = 0;
if (strcmp(argv[0], "migrate") == 0)
- migrate_mode = 1;
+ migrate_mode = true;
optind = 0;
while ((c = getopt_long(argc, argv, "c:di:o:p:s:S:",
/* Long options. */
break;
case 'b':
- if (migrate_mode == 0) {
+ if (!migrate_mode) {
fprintf(stderr, "--block is valid only for"
" migrate mode");
return CMD_HELP;
/* delete the default striping pattern */
delete = 1;
break;
-#if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 9, 53, 0)
case 'o':
- fprintf(stderr, "warning: '--offset|-o' deprecated, "
- "use '--stripe-index|-i' instead\n");
-#endif
+ nr_osts = parse_targets(osts, ARRAY_SIZE(osts), nr_osts,
+ optarg);
+ if (nr_osts < 0) {
+ fprintf(stderr,
+ "error: %s: bad OST indices '%s'\n",
+ argv[0], optarg);
+ return CMD_HELP;
+ }
+
+ if (st_offset == -1) /* first in the command line */
+ st_offset = osts[0];
+ break;
case 'i':
#if LUSTRE_VERSION_CODE >= OBD_OCD_VERSION(2, 6, 53, 0)
if (strcmp(argv[optind - 1], "--index") == 0)
}
}
+ /* initialize stripe parameters */
+ param = calloc(1, offsetof(typeof(*param), lsp_osts[nr_osts]));
+ if (param == NULL) {
+ fprintf(stderr, "error: %s: run out of memory\n", argv[0]);
+ return CMD_HELP;
+ }
+
+ param->lsp_stripe_size = st_size;
+ param->lsp_stripe_offset = st_offset;
+ param->lsp_stripe_count = st_count;
+ param->lsp_stripe_pattern = 0;
+ param->lsp_pool = pool_name_arg;
+ param->lsp_is_specific = false;
+ if (nr_osts > 0) {
+ if (st_count > 0 && nr_osts != st_count) {
+ fprintf(stderr, "error: %s: stripe count '%d' doesn't "
+ "match the number of OSTs: %d\n",
+ argv[0], st_count, nr_osts);
+ return CMD_HELP;
+ }
+
+ param->lsp_is_specific = true;
+ param->lsp_stripe_count = nr_osts;
+ memcpy(param->lsp_osts, osts, sizeof(*osts) * nr_osts);
+ }
+
do {
- if (migrate_mode)
- result = lfs_migrate(fname, st_size, st_offset,
- st_count, 0, pool_name_arg,
- migration_flags);
- else
- result = llapi_file_create_pool(fname, st_size,
- st_offset, st_count,
- 0, pool_name_arg);
+ if (!migrate_mode) {
+ result = llapi_file_open_param(fname,
+ O_CREAT | O_WRONLY,
+ 0644, param);
+ if (result >= 0) {
+ close(result);
+ result = 0;
+ }
+ } else {
+ result = lfs_migrate(fname, migration_flags, param);
+ }
if (result) {
fprintf(stderr,
"error: %s: %s stripe file '%s' failed\n",
fname = argv[++optind];
} while (fname != NULL);
+ free(param);
return result;
}
if (rc == -ENODEV)
break;
+ if (rc == -EAGAIN)
+ continue;
+
if (poolname && tp->st_op == LL_STATFS_LOV &&
llapi_search_ost(fsname, poolname,
obd_uuid2str(&uuid_buf)) != 1)
return 0;
}
-int llapi_file_open_pool(const char *name, int flags, int mode,
- unsigned long long stripe_size, int stripe_offset,
- int stripe_count, int stripe_pattern, char *pool_name)
+/**
+ * Open a Lustre file.
+ *
+ * \param name the name of the file to be opened
+ * \param flags access mode, see flags in open(2)
+ * \param mode permisson of the file if it is created, see mode in open(2)
+ * \param param stripe pattern of the newly created file
+ *
+ * \return file descriptor of opened file
+ * \return -error failure
+ */
+int llapi_file_open_param(const char *name, int flags, mode_t mode,
+ const struct llapi_stripe_param *param)
{
- struct lov_user_md_v3 lum = { 0 };
- int fd = -1;
- int rc = 0;
+ char fsname[MAX_OBD_NAME + 1] = { 0 };
+ char *pool_name = param->lsp_pool;
+ struct lov_user_md *lum = NULL;
+ size_t lum_size = sizeof(*lum);
+ int fd, rc;
- /* Make sure we have a good pool */
- if (pool_name != NULL) {
- char fsname[MAX_OBD_NAME + 1], *ptr;
+ /* Make sure we are on a Lustre file system */
+ rc = llapi_search_fsname(name, fsname);
+ if (rc) {
+ llapi_error(LLAPI_MSG_ERROR, rc,
+ "'%s' is not on a Lustre filesystem",
+ name);
+ return rc;
+ }
- rc = llapi_search_fsname(name, fsname);
- if (rc) {
- llapi_error(LLAPI_MSG_ERROR, rc,
- "'%s' is not on a Lustre filesystem",
- name);
- return rc;
- }
+ /* Check if the stripe pattern is sane. */
+ rc = llapi_stripe_limit_check(param->lsp_stripe_size,
+ param->lsp_stripe_offset,
+ param->lsp_stripe_count,
+ param->lsp_stripe_pattern);
+ if (rc != 0)
+ return rc;
- /* in case user gives the full pool name <fsname>.<poolname>,
- * strip the fsname */
- ptr = strchr(pool_name, '.');
- if (ptr != NULL) {
- *ptr = '\0';
- if (strcmp(pool_name, fsname) != 0) {
- *ptr = '.';
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "Pool '%s' is not on filesystem '%s'",
- pool_name, fsname);
- return -EINVAL;
- }
- pool_name = ptr + 1;
- }
+ /* Make sure we have a good pool */
+ if (pool_name != NULL) {
+ /* in case user gives the full pool name <fsname>.<poolname>,
+ * strip the fsname */
+ char *ptr = strchr(pool_name, '.');
+ if (ptr != NULL) {
+ *ptr = '\0';
+ if (strcmp(pool_name, fsname) != 0) {
+ *ptr = '.';
+ llapi_err_noerrno(LLAPI_MSG_ERROR,
+ "Pool '%s' is not on filesystem '%s'",
+ pool_name, fsname);
+ return -EINVAL;
+ }
+ pool_name = ptr + 1;
+ }
- /* Make sure the pool exists and is non-empty */
- rc = llapi_search_ost(fsname, pool_name, NULL);
- if (rc < 1) {
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "pool '%s.%s' %s", fsname, pool_name,
- rc == 0 ? "has no OSTs" : "does not exist");
- return -EINVAL;
- }
- }
+ /* Make sure the pool exists and is non-empty */
+ rc = llapi_search_ost(fsname, pool_name, NULL);
+ if (rc < 1) {
+ char *err = rc == 0 ? "has no OSTs" : "does not exist";
- rc = llapi_stripe_limit_check(stripe_size, stripe_offset, stripe_count,
- stripe_pattern);
- if (rc != 0)
- return rc;
+ llapi_err_noerrno(LLAPI_MSG_ERROR, "pool '%s.%s' %s",
+ fsname, pool_name, err);
+ return -EINVAL;
+ }
+
+ lum_size = sizeof(struct lov_user_md_v3);
+ }
+
+ /* sanity check of target list */
+ if (param->lsp_is_specific) {
+ char ostname[MAX_OBD_NAME + 1];
+ bool found = false;
+ int i;
+
+ for (i = 0; i < param->lsp_stripe_count; i++) {
+ snprintf(ostname, sizeof(ostname), "%s-OST%04x_UUID",
+ fsname, param->lsp_osts[i]);
+ rc = llapi_search_ost(fsname, pool_name, ostname);
+ if (rc <= 0) {
+ if (rc == 0)
+ rc = -ENODEV;
+
+ llapi_error(LLAPI_MSG_ERROR, rc,
+ "%s: cannot find OST %s in %s",
+ __func__, ostname,
+ pool_name != NULL ?
+ "pool" : "system");
+ return rc;
+ }
+
+ /* Make sure stripe offset is in OST list. */
+ if (param->lsp_osts[i] == param->lsp_stripe_offset)
+ found = true;
+ }
+ if (!found) {
+ llapi_error(LLAPI_MSG_ERROR, -EINVAL,
+ "%s: stripe offset '%d' is not in the "
+ "target list",
+ __func__, param->lsp_stripe_offset);
+ return -EINVAL;
+ }
+
+ lum_size = lov_user_md_size(param->lsp_stripe_count,
+ LOV_USER_MAGIC_SPECIFIC);
+ }
+
+ lum = calloc(1, lum_size);
+ if (lum == NULL)
+ return -ENOMEM;
retry_open:
fd = open(name, flags | O_LOV_DELAY_CREATE, mode);
}
}
- if (fd < 0) {
- rc = -errno;
- llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'", name);
- return rc;
- }
+ if (fd < 0) {
+ rc = -errno;
+ llapi_error(LLAPI_MSG_ERROR, rc, "unable to open '%s'", name);
+ free(lum);
+ return rc;
+ }
- /* Initialize IOCTL striping pattern structure */
- lum.lmm_magic = LOV_USER_MAGIC_V3;
- lum.lmm_pattern = stripe_pattern;
- lum.lmm_stripe_size = stripe_size;
- lum.lmm_stripe_count = stripe_count;
- lum.lmm_stripe_offset = stripe_offset;
- if (pool_name != NULL) {
- strlcpy(lum.lmm_pool_name, pool_name,
- sizeof(lum.lmm_pool_name));
- } else {
- /* If no pool is specified at all, use V1 request */
- lum.lmm_magic = LOV_USER_MAGIC_V1;
- }
+ /* Initialize IOCTL striping pattern structure */
+ lum->lmm_magic = LOV_USER_MAGIC_V1;
+ lum->lmm_pattern = param->lsp_stripe_pattern;
+ lum->lmm_stripe_size = param->lsp_stripe_size;
+ lum->lmm_stripe_count = param->lsp_stripe_count;
+ lum->lmm_stripe_offset = param->lsp_stripe_offset;
+ if (pool_name != NULL) {
+ struct lov_user_md_v3 *lumv3 = (void *)lum;
- if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, &lum)) {
- char *errmsg = "stripe already set";
- rc = -errno;
- if (errno != EEXIST && errno != EALREADY)
- errmsg = strerror(errno);
+ lumv3->lmm_magic = LOV_USER_MAGIC_V3;
+ strncpy(lumv3->lmm_pool_name, pool_name, LOV_MAXPOOLNAME);
+ }
+ if (param->lsp_is_specific) {
+ struct lov_user_md_v3 *lumv3 = (void *)lum;
+ int i;
+
+ lumv3->lmm_magic = LOV_USER_MAGIC_SPECIFIC;
+ if (pool_name == NULL) {
+ /* LOV_USER_MAGIC_SPECIFIC uses v3 format plus specified
+ * OST list, therefore if pool is not specified we have
+ * to pack a null pool name for placeholder. */
+ memset(lumv3->lmm_pool_name, 0, LOV_MAXPOOLNAME);
+ }
- llapi_err_noerrno(LLAPI_MSG_ERROR,
- "error on ioctl "LPX64" for '%s' (%d): %s",
- (__u64)LL_IOC_LOV_SETSTRIPE, name, fd,errmsg);
- }
+ for (i = 0; i < param->lsp_stripe_count; i++)
+ lumv3->lmm_objects[i].l_ost_idx = param->lsp_osts[i];
+ }
+
+ if (ioctl(fd, LL_IOC_LOV_SETSTRIPE, lum) != 0) {
+ char *errmsg = "stripe already set";
+
+ rc = -errno;
+ if (errno != EEXIST && errno != EALREADY)
+ errmsg = strerror(errno);
+
+ llapi_err_noerrno(LLAPI_MSG_ERROR,
+ "error on ioctl "LPX64" for '%s' (%d): %s",
+ (__u64)LL_IOC_LOV_SETSTRIPE, name, fd,
+ errmsg);
+ }
if (rc) {
close(fd);
fd = rc;
}
-
+ if (lum != NULL)
+ free(lum);
return fd;
}
+int llapi_file_open_pool(const char *name, int flags, int mode,
+ unsigned long long stripe_size, int stripe_offset,
+ int stripe_count, int stripe_pattern, char *pool_name)
+{
+ const struct llapi_stripe_param param = {
+ .lsp_stripe_size = stripe_size,
+ .lsp_stripe_count = stripe_count,
+ .lsp_stripe_pattern = stripe_pattern,
+ .lsp_stripe_offset = stripe_offset,
+ .lsp_pool = pool_name
+ };
+ return llapi_file_open_param(name, flags, mode, ¶m);
+}
+
int llapi_file_open(const char *name, int flags, int mode,
unsigned long long stripe_size, int stripe_offset,
int stripe_count, int stripe_pattern)
}
/* LU-2374: check whether it is OST/MDT later */
mop->mo_ldd.ldd_svindex = atol(optarg);
+ if (mop->mo_ldd.ldd_svindex >= INDEX_UNASSIGNED) {
+ fprintf(stderr, "%s: wrong index %u. "
+ "Target index must be less than %u.\n",
+ progname, mop->mo_ldd.ldd_svindex,
+ INDEX_UNASSIGNED);
+ return 1;
+ }
mop->mo_ldd.ldd_flags &= ~LDD_F_NEED_INDEX;
break;
case 'k':