.I start_ost_index
must be part of the pool or an error will be returned.
.TP
+.B -L, --layout <\fIlayout type\fB>\fR
+The type of stripe layout, can be
+.BR raid0 ", " released " or " mdt ".
+It is
+.BR raid0
+by default. The
+.BR mdt
+type allows place the first component of the file on the MDT where the inode
+is located. This is used with composite file layouts and can be defined as
+first component only. The
+.IR stripe_size
+of MDT part is always equal to the component size. There is also per-MDT
+parameter
+.IR lod.dom_stripesize
+to limit maximum size of DoM stripe which can be changed with
+.BR lctl\ set_param
+command, (e.g.
+.IR lctl\ set_param\ lod.*.dom_stripesize=0
+, see
+.BR lctl (8))
+.TP
There are two options available only for \fBlfs migrate\fR:
.TP
.B -b, --block
.TP
.B $ lfs setstripe --component-del -I 1 /mnt/lustre/file1
This deletes the component with ID equals 1 from an existing file.
+.TP
+.B $ lfs setstripe -E 1M -L mdt -E -1 /mnt/lustre/file1
+This created file with Data-on-MDT layout. The first 1M is placed on MDT and \
+rest of file is placed on OST with default striping.
.SH SEE ALSO
.BR lfs (1),
.BR lfs-migrate (1),
* stored using RAID0. That is, data will be split evenly and without
* redundancy across all OSTs in the layout.
*/
-#define LLAPI_LAYOUT_RAID0 0
+#define LLAPI_LAYOUT_RAID0 0ULL
+#define LLAPI_LAYOUT_MDT 2ULL
/**
* The layout includes a specific set of OSTs on which to allocate.
GOTO(out, rc);
}
+
obd = lod2obd(lod);
- rc = class_process_proc_param(PARAM_LOV, obd->obd_vars,
+ if (strstr(param, PARAM_LOD) != NULL)
+ rc = class_process_proc_param(PARAM_LOD, obd->obd_vars,
+ lcfg, obd);
+ else
+ rc = class_process_proc_param(PARAM_LOV, obd->obd_vars,
lcfg, obd);
if (rc > 0)
rc = 0;
+
GOTO(out, rc);
}
case LCFG_PRE_CLEANUP: {
dt_conf_get(env, &lod->lod_dt_dev, &ddp);
lod->lod_osd_max_easize = ddp.ddp_max_ea_size;
+ lod->lod_dom_max_stripesize = (1ULL << 20); /* 1Mb as default value */
/* setup obd to be used with old lov code */
rc = lod_pools_init(lod, cfg);
/* maximum EA size underlied OSD may have */
unsigned int lod_osd_max_easize;
+ /* maximum size of MDT stripe for Data-on-MDT files. */
+ unsigned int lod_dom_max_stripesize;
/*FIXME: When QOS and pool is implemented for MDT, probably these
* structure should be moved to lod_tgt_descs as well.
le32_to_cpu(ent->lcme_offset);
tmp.lb_len = le32_to_cpu(ent->lcme_size);
- /* Check DoM entry is always the first one */
+ /* Checks for DoM entry in composite layout. */
lum = tmp.lb_buf;
if (lov_pattern(le32_to_cpu(lum->lmm_pattern)) ==
- LOV_PATTERN_MDT && i > 0) {
- CDEBUG(D_LAYOUT, "invalid DoM layout entry "
- "found at %i index\n", i);
- RETURN(-EINVAL);
-
+ LOV_PATTERN_MDT) {
+ /* DoM component can be only the first entry */
+ if (i > 0) {
+ CDEBUG(D_LAYOUT, "invalid DoM layout "
+ "entry found at %i index\n", i);
+ RETURN(-EINVAL);
+ }
+ stripe_size = le32_to_cpu(lum->lmm_stripe_size);
+ /* There is just one stripe on MDT and it must
+ * cover whole component size. */
+ if (stripe_size != prev_end) {
+ CDEBUG(D_LAYOUT, "invalid DoM layout "
+ "stripe size %u != %llu "
+ "(component size)\n",
+ stripe_size, prev_end);
+ RETURN(-EINVAL);
+ }
+ /* Check stripe size againts per-MDT limit */
+ if (stripe_size > d->lod_dom_max_stripesize) {
+ CDEBUG(D_LAYOUT, "DoM component size "
+ "%u is bigger than MDT limit "
+ "%u, check dom_max_stripesize"
+ " parameter\n",
+ stripe_size,
+ d->lod_dom_max_stripesize);
+ RETURN(-EINVAL);
+ }
}
rc = lod_verify_v1v3(d, &tmp, is_from_disk);
if (rc)
* \retval 0 on success
* \retval negative error code if failed
*/
+static int lod_dom_stripesize_seq_show(struct seq_file *m, void *v)
+{
+ struct obd_device *dev = m->private;
+ struct lod_device *lod;
+
+ LASSERT(dev != NULL);
+ lod = lu2lod_dev(dev->obd_lu_dev);
+ seq_printf(m, "%u\n", lod->lod_dom_max_stripesize);
+ return 0;
+}
+
+/**
+ * Set default stripe size.
+ *
+ * \param[in] file proc file
+ * \param[in] buffer string containing the maximum number of bytes stored in
+ * each object before moving to the next object in the
+ * layout (if any)
+ * \param[in] count @buffer length
+ * \param[in] off unused for single entry
+ *
+ * \retval @count on success
+ * \retval negative error code if failed
+ */
+static ssize_t
+lod_dom_stripesize_seq_write(struct file *file, const char __user *buffer,
+ size_t count, loff_t *off)
+{
+ struct seq_file *m = file->private_data;
+ struct obd_device *dev = m->private;
+ struct lod_device *lod;
+ __s64 val;
+ int rc;
+
+ LASSERT(dev != NULL);
+ lod = lu2lod_dev(dev->obd_lu_dev);
+ rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '1');
+ if (rc)
+ return rc;
+ if (val < 0)
+ return -ERANGE;
+
+ /* */
+ if (val > (1ULL << 30))
+ return -ERANGE;
+ else if (val > 0)
+ lod_fix_desc_stripe_size(&val);
+
+ lod->lod_dom_max_stripesize = val;
+
+ return count;
+}
+LPROC_SEQ_FOPS(lod_dom_stripesize);
+
+/**
+ * Show default stripe size.
+ *
+ * \param[in] m seq file
+ * \param[in] v unused for single entry
+ *
+ * \retval 0 on success
+ * \retval negative error code if failed
+ */
static int lod_stripesize_seq_show(struct seq_file *m, void *v)
{
struct obd_device *dev = m->private;
.fops = &lod_qos_maxage_fops },
{ .name = "lmv_failout",
.fops = &lod_lmv_failout_fops },
+ {
+ .name = "dom_stripesize",
+ .fops = &lod_dom_stripesize_fops
+ },
{ NULL }
};
/* All mdd., ost. and osd. params in proc */
if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
+ (class_match_param(ptr, PARAM_LOD, NULL) == 0) ||
(class_match_param(ptr, PARAM_OST, NULL) == 0) ||
(class_match_param(ptr, PARAM_OSD, NULL) == 0)) {
CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
"usage: "cmd" [--stripe-count|-c <stripe_count>]\n" \
" [--stripe-index|-i <start_ost_idx>]\n" \
" [--stripe-size|-S <stripe_size>]\n" \
+ " [--layout|-L <pattern>]\n" \
" [--pool|-p <pool_name>]\n" \
" [--ost|-o <ost_indices>]\n" \
" [--component-end|-E <comp_end>]\n"
"\t respectively)\n" \
"\tstart_ost_idx: OST index of first stripe (-1 default)\n" \
"\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n" \
+ "\tlayout: stripe pattern type: raid0, mdt (default raid0)\n"\
"\tpool_name: Name of OST pool to use (default none)\n" \
"\tost_indices: List of OST indices, can be repeated multiple times\n"\
"\t Indices be specified in a format of:\n" \
" [[!] --gid|-g|--group|-G <gid>|<gname>]\n"
" [[!] --uid|-u|--user|-U <uid>|<uname>] [[!] --pool <pool>]\n"
" [[!] --projid <projid>]\n"
- " [[!] --layout|-L released,raid0]\n"
+ " [[!] --layout|-L released,raid0,mdt]\n"
" [[!] --component-count [+-]<comp_cnt>]\n"
" [[!] --component-start [+-]N[kMGTPE]]\n"
" [[!] --component-end|-E [+-]N[kMGTPE]]\n"
int lsa_stripe_off;
__u32 lsa_comp_flags;
int lsa_nr_osts;
+ int lsa_pattern;
__u32 *lsa_osts;
char *lsa_pool_name;
};
{
return (lsa->lsa_stripe_size != 0 || lsa->lsa_stripe_count != 0 ||
lsa->lsa_stripe_off != -1 || lsa->lsa_pool_name != NULL ||
- lsa->lsa_comp_end != 0);
+ lsa->lsa_comp_end != 0 || lsa->lsa_pattern != 0);
}
static int comp_args_to_layout(struct llapi_layout **composite,
return rc;
}
+ /* Data-on-MDT component setting */
+ if (lsa->lsa_pattern == LLAPI_LAYOUT_MDT) {
+ /* In case of Data-on-MDT patterns the only extra option
+ * applicable is stripe size option. */
+ if (lsa->lsa_stripe_count) {
+ fprintf(stderr, "Option 'stripe-count' can't be "
+ "specified with Data-on-MDT component: %i\n",
+ lsa->lsa_stripe_count);
+ return -EINVAL;
+ }
+ if (lsa->lsa_stripe_size) {
+ fprintf(stderr, "Option 'stripe-size' can't be "
+ "specified with Data-on-MDT component: %llu\n",
+ lsa->lsa_stripe_size);
+ return -EINVAL;
+ }
+ if (lsa->lsa_nr_osts != 0) {
+ fprintf(stderr, "Option 'ost-list' can't be specified "
+ "with Data-on-MDT component: '%i'\n",
+ lsa->lsa_nr_osts);
+ return -EINVAL;
+ }
+ if (lsa->lsa_stripe_off != -1) {
+ fprintf(stderr, "Option 'stripe-offset' can't be "
+ "specified with Data-on-MDT component: %i\n",
+ lsa->lsa_stripe_off);
+ return -EINVAL;
+ }
+ if (lsa->lsa_pool_name != 0) {
+ fprintf(stderr, "Option 'pool' can't be specified "
+ "with Data-on-MDT component: '%s'\n",
+ lsa->lsa_pool_name);
+ return -EINVAL;
+ }
+
+ rc = llapi_layout_pattern_set(layout, lsa->lsa_pattern);
+ if (rc) {
+ fprintf(stderr, "Set stripe pattern %#x failed. %s\n",
+ lsa->lsa_pattern, strerror(errno));
+ return rc;
+ }
+ /* Data-on-MDT component has always single stripe up to end */
+ lsa->lsa_stripe_size = lsa->lsa_comp_end;
+ }
+
if (lsa->lsa_stripe_size != 0) {
rc = llapi_layout_stripe_size_set(layout,
lsa->lsa_stripe_size);
* the consistent "--stripe-size|-S" for all commands. */
{ .val = 's', .name = "size", .has_arg = required_argument },
#endif
+ { .val = 'L', .name = "layout", .has_arg = required_argument },
{ .val = 'S', .name = "stripe-size", .has_arg = required_argument },
{ .val = 'S', .name = "stripe_size", .has_arg = required_argument },
/* dirstripe {"mdt-count", required_argument, 0, 'T'}, */
if (strcmp(argv[0], "migrate") == 0)
migrate_mode = true;
- while ((c = getopt_long(argc, argv, "bc:dE:i:I:m:no:p:s:S:v",
+ while ((c = getopt_long(argc, argv, "bc:dE:i:I:m:no:p:L:s:S:v",
long_opts, NULL)) >= 0) {
switch (c) {
case 0:
}
}
break;
+ case 'L':
+ if (strcmp(argv[optind - 1], "mdt") == 0) {
+ /* Can be only the first component */
+ if (layout != NULL) {
+ fprintf(stderr, "error: 'mdt' layout "
+ "can be only the first one\n");
+ goto error;
+ }
+ if (lsa.lsa_comp_end > (1ULL << 30)) { /* 1Gb */
+ fprintf(stderr, "error: 'mdt' layout "
+ "size is too big\n");
+ goto error;
+ }
+ lsa.lsa_pattern = LLAPI_LAYOUT_MDT;
+ } else if (strcmp(argv[optind - 1], "raid0") != 0) {
+ fprintf(stderr, "error: layout '%s' is "
+ "unknown, supported layouts are: "
+ "'mdt', 'raid0'\n", argv[optind]);
+ goto error;
+ }
+ break;
case 'i':
if (strcmp(argv[optind - 1], "--index") == 0)
fprintf(stderr, "warning: '--index' deprecated"
param->lsp_stripe_size = lsa.lsa_stripe_size;
param->lsp_stripe_offset = lsa.lsa_stripe_off;
param->lsp_stripe_count = lsa.lsa_stripe_count;
- param->lsp_stripe_pattern = 0;
param->lsp_pool = lsa.lsa_pool_name;
param->lsp_is_specific = false;
if (lsa.lsa_nr_osts > 0) {
static int name2layout(__u32 *layout, char *name)
{
- char *ptr, *lyt;
+ char *ptr, *layout_name;
*layout = 0;
for (ptr = name; ; ptr = NULL) {
- lyt = strtok(ptr, ",");
- if (lyt == NULL)
+ layout_name = strtok(ptr, ",");
+ if (layout_name == NULL)
break;
- if (strcmp(lyt, "released") == 0)
+ if (strcmp(layout_name, "released") == 0)
*layout |= LOV_PATTERN_F_RELEASED;
- else if (strcmp(lyt, "raid0") == 0)
+ else if (strcmp(layout_name, "raid0") == 0)
*layout |= LOV_PATTERN_RAID0;
+ else if (strcmp(layout_name, "mdt") == 0)
+ *layout |= LOV_PATTERN_MDT;
else
return -1;
}
llapi_printf(LLAPI_MSG_NORMAL, "%s%sstripe_count: ",
space, prefix);
if (is_dir) {
- if (!is_raw && lum->lmm_stripe_count == 0) {
+ if (!is_raw && lum->lmm_stripe_count == 0 &&
+ lov_pattern(lum->lmm_pattern) != LOV_PATTERN_MDT) {
unsigned int scount;
rc = sattr_cache_get_defaults(NULL, path,
&scount, NULL,
separator = "\n";
}
- if ((verbose & VERBOSE_LAYOUT) && !is_dir) {
+ if ((verbose & VERBOSE_LAYOUT)) {
llapi_printf(LLAPI_MSG_NORMAL, "%s", separator);
if (verbose & ~VERBOSE_LAYOUT)
llapi_printf(LLAPI_MSG_NORMAL, "%s%spattern: ",
space, prefix);
llapi_printf(LLAPI_MSG_NORMAL, "%.x", lum->lmm_pattern);
- separator = "\n";
+ separator = is_dir ? " " : "\n";
}
if ((verbose & VERBOSE_GENERATION) && !is_dir) {
blob->lmm_pattern = 0;
else if (pattern == LLAPI_LAYOUT_RAID0)
blob->lmm_pattern = LOV_PATTERN_RAID0;
+ else if (pattern == LLAPI_LAYOUT_MDT)
+ blob->lmm_pattern = LOV_PATTERN_MDT;
else
blob->lmm_pattern = pattern;
}
/**
- * Set the RAID pattern of \a layout.
+ * Set the pattern of \a layout.
*
* \param[in] layout layout to set pattern in
* \param[in] pattern value to be set
return -1;
if (pattern != LLAPI_LAYOUT_DEFAULT &&
- pattern != LLAPI_LAYOUT_RAID0) {
+ pattern != LLAPI_LAYOUT_RAID0 &&
+ pattern != LLAPI_LAYOUT_MDT) {
errno = EOPNOTSUPP;
return -1;
}