From: Mikhail Pershin Date: Mon, 23 Jun 2014 18:05:45 +0000 (+0400) Subject: LU-3285 lfs: add parameter for Data-on-MDT file X-Git-Tag: 2.10.56~64^2~18 X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=commitdiff_plain;h=6744eb8eeb9e0a7a745a9a42e5fe09b376e16a82 LU-3285 lfs: add parameter for Data-on-MDT file Add --layout|-L parameter to lfs setstripe to create file with data on MDT layout. Add DoM size limit at LOD level - lod.dom_stripesize parameter. Signed-off-by: Mikhail Pershin Change-Id: Ib97142cc23c75e890dc619c14e5ffb3f3c47cda5 Reviewed-on: https://review.whamcloud.com/28012 Reviewed-by: Jinshan Xiong Reviewed-by: Andreas Dilger Tested-by: Jenkins Tested-by: Maloo --- diff --git a/lustre/doc/lfs-setstripe.1 b/lustre/doc/lfs-setstripe.1 index 52dc756..332f1b6 100644 --- a/lustre/doc/lfs-setstripe.1 +++ b/lustre/doc/lfs-setstripe.1 @@ -115,6 +115,27 @@ will be used as well; the .I start_ost_index must be part of the pool or an error will be returned. .TP +.B -L, --layout <\fIlayout type\fB>\fR +The type of stripe layout, can be +.BR raid0 ", " released " or " mdt ". +It is +.BR raid0 +by default. The +.BR mdt +type allows place the first component of the file on the MDT where the inode +is located. This is used with composite file layouts and can be defined as +first component only. The +.IR stripe_size +of MDT part is always equal to the component size. There is also per-MDT +parameter +.IR lod.dom_stripesize +to limit maximum size of DoM stripe which can be changed with +.BR lctl\ set_param +command, (e.g. +.IR lctl\ set_param\ lod.*.dom_stripesize=0 +, see +.BR lctl (8)) +.TP There are two options available only for \fBlfs migrate\fR: .TP .B -b, --block @@ -164,6 +185,10 @@ the end of file. .TP .B $ lfs setstripe --component-del -I 1 /mnt/lustre/file1 This deletes the component with ID equals 1 from an existing file. +.TP +.B $ lfs setstripe -E 1M -L mdt -E -1 /mnt/lustre/file1 +This created file with Data-on-MDT layout. The first 1M is placed on MDT and \ +rest of file is placed on OST with default striping. .SH SEE ALSO .BR lfs (1), .BR lfs-migrate (1), diff --git a/lustre/include/lustre/lustreapi.h b/lustre/include/lustre/lustreapi.h index 6956768..79f2f17 100644 --- a/lustre/include/lustre/lustreapi.h +++ b/lustre/include/lustre/lustreapi.h @@ -520,7 +520,8 @@ void llapi_layout_free(struct llapi_layout *layout); * stored using RAID0. That is, data will be split evenly and without * redundancy across all OSTs in the layout. */ -#define LLAPI_LAYOUT_RAID0 0 +#define LLAPI_LAYOUT_RAID0 0ULL +#define LLAPI_LAYOUT_MDT 2ULL /** * The layout includes a specific set of OSTs on which to allocate. diff --git a/lustre/lod/lod_dev.c b/lustre/lod/lod_dev.c index d6d77c6..5f80430 100644 --- a/lustre/lod/lod_dev.c +++ b/lustre/lod/lod_dev.c @@ -1031,11 +1031,17 @@ static int lod_process_config(const struct lu_env *env, GOTO(out, rc); } + obd = lod2obd(lod); - rc = class_process_proc_param(PARAM_LOV, obd->obd_vars, + if (strstr(param, PARAM_LOD) != NULL) + rc = class_process_proc_param(PARAM_LOD, obd->obd_vars, + lcfg, obd); + else + rc = class_process_proc_param(PARAM_LOV, obd->obd_vars, lcfg, obd); if (rc > 0) rc = 0; + GOTO(out, rc); } case LCFG_PRE_CLEANUP: { @@ -1639,6 +1645,7 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod, dt_conf_get(env, &lod->lod_dt_dev, &ddp); lod->lod_osd_max_easize = ddp.ddp_max_ea_size; + lod->lod_dom_max_stripesize = (1ULL << 20); /* 1Mb as default value */ /* setup obd to be used with old lov code */ rc = lod_pools_init(lod, cfg); diff --git a/lustre/lod/lod_internal.h b/lustre/lod/lod_internal.h index bfb3660..b0dfdbf 100644 --- a/lustre/lod/lod_internal.h +++ b/lustre/lod/lod_internal.h @@ -192,6 +192,8 @@ struct lod_device { /* maximum EA size underlied OSD may have */ unsigned int lod_osd_max_easize; + /* maximum size of MDT stripe for Data-on-MDT files. */ + unsigned int lod_dom_max_stripesize; /*FIXME: When QOS and pool is implemented for MDT, probably these * structure should be moved to lod_tgt_descs as well. diff --git a/lustre/lod/lod_lov.c b/lustre/lod/lod_lov.c index a7e6129..663cbb2 100644 --- a/lustre/lod/lod_lov.c +++ b/lustre/lod/lod_lov.c @@ -1705,14 +1705,36 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf, le32_to_cpu(ent->lcme_offset); tmp.lb_len = le32_to_cpu(ent->lcme_size); - /* Check DoM entry is always the first one */ + /* Checks for DoM entry in composite layout. */ lum = tmp.lb_buf; if (lov_pattern(le32_to_cpu(lum->lmm_pattern)) == - LOV_PATTERN_MDT && i > 0) { - CDEBUG(D_LAYOUT, "invalid DoM layout entry " - "found at %i index\n", i); - RETURN(-EINVAL); - + LOV_PATTERN_MDT) { + /* DoM component can be only the first entry */ + if (i > 0) { + CDEBUG(D_LAYOUT, "invalid DoM layout " + "entry found at %i index\n", i); + RETURN(-EINVAL); + } + stripe_size = le32_to_cpu(lum->lmm_stripe_size); + /* There is just one stripe on MDT and it must + * cover whole component size. */ + if (stripe_size != prev_end) { + CDEBUG(D_LAYOUT, "invalid DoM layout " + "stripe size %u != %llu " + "(component size)\n", + stripe_size, prev_end); + RETURN(-EINVAL); + } + /* Check stripe size againts per-MDT limit */ + if (stripe_size > d->lod_dom_max_stripesize) { + CDEBUG(D_LAYOUT, "DoM component size " + "%u is bigger than MDT limit " + "%u, check dom_max_stripesize" + " parameter\n", + stripe_size, + d->lod_dom_max_stripesize); + RETURN(-EINVAL); + } } rc = lod_verify_v1v3(d, &tmp, is_from_disk); if (rc) diff --git a/lustre/lod/lproc_lod.c b/lustre/lod/lproc_lod.c index 8003fb6..45ab539 100644 --- a/lustre/lod/lproc_lod.c +++ b/lustre/lod/lproc_lod.c @@ -54,6 +54,69 @@ * \retval 0 on success * \retval negative error code if failed */ +static int lod_dom_stripesize_seq_show(struct seq_file *m, void *v) +{ + struct obd_device *dev = m->private; + struct lod_device *lod; + + LASSERT(dev != NULL); + lod = lu2lod_dev(dev->obd_lu_dev); + seq_printf(m, "%u\n", lod->lod_dom_max_stripesize); + return 0; +} + +/** + * Set default stripe size. + * + * \param[in] file proc file + * \param[in] buffer string containing the maximum number of bytes stored in + * each object before moving to the next object in the + * layout (if any) + * \param[in] count @buffer length + * \param[in] off unused for single entry + * + * \retval @count on success + * \retval negative error code if failed + */ +static ssize_t +lod_dom_stripesize_seq_write(struct file *file, const char __user *buffer, + size_t count, loff_t *off) +{ + struct seq_file *m = file->private_data; + struct obd_device *dev = m->private; + struct lod_device *lod; + __s64 val; + int rc; + + LASSERT(dev != NULL); + lod = lu2lod_dev(dev->obd_lu_dev); + rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '1'); + if (rc) + return rc; + if (val < 0) + return -ERANGE; + + /* */ + if (val > (1ULL << 30)) + return -ERANGE; + else if (val > 0) + lod_fix_desc_stripe_size(&val); + + lod->lod_dom_max_stripesize = val; + + return count; +} +LPROC_SEQ_FOPS(lod_dom_stripesize); + +/** + * Show default stripe size. + * + * \param[in] m seq file + * \param[in] v unused for single entry + * + * \retval 0 on success + * \retval negative error code if failed + */ static int lod_stripesize_seq_show(struct seq_file *m, void *v) { struct obd_device *dev = m->private; @@ -758,6 +821,10 @@ static struct lprocfs_vars lprocfs_lod_obd_vars[] = { .fops = &lod_qos_maxage_fops }, { .name = "lmv_failout", .fops = &lod_lmv_failout_fops }, + { + .name = "dom_stripesize", + .fops = &lod_dom_stripesize_fops + }, { NULL } }; diff --git a/lustre/mgs/mgs_llog.c b/lustre/mgs/mgs_llog.c index 135e78b..22617cd 100644 --- a/lustre/mgs/mgs_llog.c +++ b/lustre/mgs/mgs_llog.c @@ -3925,6 +3925,7 @@ active_err: /* All mdd., ost. and osd. params in proc */ if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) || + (class_match_param(ptr, PARAM_LOD, NULL) == 0) || (class_match_param(ptr, PARAM_OST, NULL) == 0) || (class_match_param(ptr, PARAM_OSD, NULL) == 0)) { CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4); diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index dc07fb9..00ac0b1 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -116,6 +116,7 @@ static int lfs_list_commands(int argc, char **argv); "usage: "cmd" [--stripe-count|-c ]\n" \ " [--stripe-index|-i ]\n" \ " [--stripe-size|-S ]\n" \ + " [--layout|-L ]\n" \ " [--pool|-p ]\n" \ " [--ost|-o ]\n" \ " [--component-end|-E ]\n" @@ -126,6 +127,7 @@ static int lfs_list_commands(int argc, char **argv); "\t respectively)\n" \ "\tstart_ost_idx: OST index of first stripe (-1 default)\n" \ "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n" \ + "\tlayout: stripe pattern type: raid0, mdt (default raid0)\n"\ "\tpool_name: Name of OST pool to use (default none)\n" \ "\tost_indices: List of OST indices, can be repeated multiple times\n"\ "\t Indices be specified in a format of:\n" \ @@ -249,7 +251,7 @@ command_t cmdlist[] = { " [[!] --gid|-g|--group|-G |]\n" " [[!] --uid|-u|--user|-U |] [[!] --pool ]\n" " [[!] --projid ]\n" - " [[!] --layout|-L released,raid0]\n" + " [[!] --layout|-L released,raid0,mdt]\n" " [[!] --component-count [+-]]\n" " [[!] --component-start [+-]N[kMGTPE]]\n" " [[!] --component-end|-E [+-]N[kMGTPE]]\n" @@ -1017,6 +1019,7 @@ struct lfs_setstripe_args { int lsa_stripe_off; __u32 lsa_comp_flags; int lsa_nr_osts; + int lsa_pattern; __u32 *lsa_osts; char *lsa_pool_name; }; @@ -1031,7 +1034,7 @@ static inline bool setstripe_args_specified(struct lfs_setstripe_args *lsa) { return (lsa->lsa_stripe_size != 0 || lsa->lsa_stripe_count != 0 || lsa->lsa_stripe_off != -1 || lsa->lsa_pool_name != NULL || - lsa->lsa_comp_end != 0); + lsa->lsa_comp_end != 0 || lsa->lsa_pattern != 0); } static int comp_args_to_layout(struct llapi_layout **composite, @@ -1076,6 +1079,51 @@ static int comp_args_to_layout(struct llapi_layout **composite, return rc; } + /* Data-on-MDT component setting */ + if (lsa->lsa_pattern == LLAPI_LAYOUT_MDT) { + /* In case of Data-on-MDT patterns the only extra option + * applicable is stripe size option. */ + if (lsa->lsa_stripe_count) { + fprintf(stderr, "Option 'stripe-count' can't be " + "specified with Data-on-MDT component: %i\n", + lsa->lsa_stripe_count); + return -EINVAL; + } + if (lsa->lsa_stripe_size) { + fprintf(stderr, "Option 'stripe-size' can't be " + "specified with Data-on-MDT component: %llu\n", + lsa->lsa_stripe_size); + return -EINVAL; + } + if (lsa->lsa_nr_osts != 0) { + fprintf(stderr, "Option 'ost-list' can't be specified " + "with Data-on-MDT component: '%i'\n", + lsa->lsa_nr_osts); + return -EINVAL; + } + if (lsa->lsa_stripe_off != -1) { + fprintf(stderr, "Option 'stripe-offset' can't be " + "specified with Data-on-MDT component: %i\n", + lsa->lsa_stripe_off); + return -EINVAL; + } + if (lsa->lsa_pool_name != 0) { + fprintf(stderr, "Option 'pool' can't be specified " + "with Data-on-MDT component: '%s'\n", + lsa->lsa_pool_name); + return -EINVAL; + } + + rc = llapi_layout_pattern_set(layout, lsa->lsa_pattern); + if (rc) { + fprintf(stderr, "Set stripe pattern %#x failed. %s\n", + lsa->lsa_pattern, strerror(errno)); + return rc; + } + /* Data-on-MDT component has always single stripe up to end */ + lsa->lsa_stripe_size = lsa->lsa_comp_end; + } + if (lsa->lsa_stripe_size != 0) { rc = llapi_layout_stripe_size_set(layout, lsa->lsa_stripe_size); @@ -1386,6 +1434,7 @@ static int lfs_setstripe(int argc, char **argv) * the consistent "--stripe-size|-S" for all commands. */ { .val = 's', .name = "size", .has_arg = required_argument }, #endif + { .val = 'L', .name = "layout", .has_arg = required_argument }, { .val = 'S', .name = "stripe-size", .has_arg = required_argument }, { .val = 'S', .name = "stripe_size", .has_arg = required_argument }, /* dirstripe {"mdt-count", required_argument, 0, 'T'}, */ @@ -1410,7 +1459,7 @@ static int lfs_setstripe(int argc, char **argv) if (strcmp(argv[0], "migrate") == 0) migrate_mode = true; - while ((c = getopt_long(argc, argv, "bc:dE:i:I:m:no:p:s:S:v", + while ((c = getopt_long(argc, argv, "bc:dE:i:I:m:no:p:L:s:S:v", long_opts, NULL)) >= 0) { switch (c) { case 0: @@ -1481,6 +1530,27 @@ static int lfs_setstripe(int argc, char **argv) } } break; + case 'L': + if (strcmp(argv[optind - 1], "mdt") == 0) { + /* Can be only the first component */ + if (layout != NULL) { + fprintf(stderr, "error: 'mdt' layout " + "can be only the first one\n"); + goto error; + } + if (lsa.lsa_comp_end > (1ULL << 30)) { /* 1Gb */ + fprintf(stderr, "error: 'mdt' layout " + "size is too big\n"); + goto error; + } + lsa.lsa_pattern = LLAPI_LAYOUT_MDT; + } else if (strcmp(argv[optind - 1], "raid0") != 0) { + fprintf(stderr, "error: layout '%s' is " + "unknown, supported layouts are: " + "'mdt', 'raid0'\n", argv[optind]); + goto error; + } + break; case 'i': if (strcmp(argv[optind - 1], "--index") == 0) fprintf(stderr, "warning: '--index' deprecated" @@ -1684,7 +1754,6 @@ static int lfs_setstripe(int argc, char **argv) param->lsp_stripe_size = lsa.lsa_stripe_size; param->lsp_stripe_offset = lsa.lsa_stripe_off; param->lsp_stripe_count = lsa.lsa_stripe_count; - param->lsp_stripe_pattern = 0; param->lsp_pool = lsa.lsa_pool_name; param->lsp_is_specific = false; if (lsa.lsa_nr_osts > 0) { @@ -1850,17 +1919,19 @@ static inline int gid2name(char **name, unsigned int id) static int name2layout(__u32 *layout, char *name) { - char *ptr, *lyt; + char *ptr, *layout_name; *layout = 0; for (ptr = name; ; ptr = NULL) { - lyt = strtok(ptr, ","); - if (lyt == NULL) + layout_name = strtok(ptr, ","); + if (layout_name == NULL) break; - if (strcmp(lyt, "released") == 0) + if (strcmp(layout_name, "released") == 0) *layout |= LOV_PATTERN_F_RELEASED; - else if (strcmp(lyt, "raid0") == 0) + else if (strcmp(layout_name, "raid0") == 0) *layout |= LOV_PATTERN_RAID0; + else if (strcmp(layout_name, "mdt") == 0) + *layout |= LOV_PATTERN_MDT; else return -1; } diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index d237cab..e6008a9 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -2276,7 +2276,8 @@ static void lov_dump_user_lmm_header(struct lov_user_md *lum, char *path, llapi_printf(LLAPI_MSG_NORMAL, "%s%sstripe_count: ", space, prefix); if (is_dir) { - if (!is_raw && lum->lmm_stripe_count == 0) { + if (!is_raw && lum->lmm_stripe_count == 0 && + lov_pattern(lum->lmm_pattern) != LOV_PATTERN_MDT) { unsigned int scount; rc = sattr_cache_get_defaults(NULL, path, &scount, NULL, @@ -2329,13 +2330,13 @@ static void lov_dump_user_lmm_header(struct lov_user_md *lum, char *path, separator = "\n"; } - if ((verbose & VERBOSE_LAYOUT) && !is_dir) { + if ((verbose & VERBOSE_LAYOUT)) { llapi_printf(LLAPI_MSG_NORMAL, "%s", separator); if (verbose & ~VERBOSE_LAYOUT) llapi_printf(LLAPI_MSG_NORMAL, "%s%spattern: ", space, prefix); llapi_printf(LLAPI_MSG_NORMAL, "%.x", lum->lmm_pattern); - separator = "\n"; + separator = is_dir ? " " : "\n"; } if ((verbose & VERBOSE_GENERATION) && !is_dir) { diff --git a/lustre/utils/liblustreapi_layout.c b/lustre/utils/liblustreapi_layout.c index f02bf44..f6e477c 100644 --- a/lustre/utils/liblustreapi_layout.c +++ b/lustre/utils/liblustreapi_layout.c @@ -569,6 +569,8 @@ llapi_layout_to_lum(const struct llapi_layout *layout) blob->lmm_pattern = 0; else if (pattern == LLAPI_LAYOUT_RAID0) blob->lmm_pattern = LOV_PATTERN_RAID0; + else if (pattern == LLAPI_LAYOUT_MDT) + blob->lmm_pattern = LOV_PATTERN_MDT; else blob->lmm_pattern = pattern; @@ -1206,7 +1208,7 @@ int llapi_layout_pattern_get(const struct llapi_layout *layout, } /** - * Set the RAID pattern of \a layout. + * Set the pattern of \a layout. * * \param[in] layout layout to set pattern in * \param[in] pattern value to be set @@ -1224,7 +1226,8 @@ int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern) return -1; if (pattern != LLAPI_LAYOUT_DEFAULT && - pattern != LLAPI_LAYOUT_RAID0) { + pattern != LLAPI_LAYOUT_RAID0 && + pattern != LLAPI_LAYOUT_MDT) { errno = EOPNOTSUPP; return -1; }