Whamcloud - gitweb
LU-3285 lfs: add parameter for Data-on-MDT file 12/28012/16
authorMikhail Pershin <mike.pershin@intel.com>
Mon, 23 Jun 2014 18:05:45 +0000 (22:05 +0400)
committerMike Pershin <mike.pershin@intel.com>
Tue, 17 Oct 2017 19:07:57 +0000 (19:07 +0000)
Add --layout|-L parameter to lfs setstripe to create
file with data on MDT layout.
Add DoM size limit at LOD level - lod.dom_stripesize
parameter.

Signed-off-by: Mikhail Pershin <mike.pershin@intel.com>
Change-Id: Ib97142cc23c75e890dc619c14e5ffb3f3c47cda5
Reviewed-on: https://review.whamcloud.com/28012
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
lustre/doc/lfs-setstripe.1
lustre/include/lustre/lustreapi.h
lustre/lod/lod_dev.c
lustre/lod/lod_internal.h
lustre/lod/lod_lov.c
lustre/lod/lproc_lod.c
lustre/mgs/mgs_llog.c
lustre/utils/lfs.c
lustre/utils/liblustreapi.c
lustre/utils/liblustreapi_layout.c

index 52dc756..332f1b6 100644 (file)
@@ -115,6 +115,27 @@ will be used as well; the
 .I start_ost_index
 must be part of the pool or an error will be returned.
 .TP
+.B -L, --layout <\fIlayout type\fB>\fR
+The type of stripe layout, can be
+.BR raid0 ", " released " or " mdt ".
+It is
+.BR raid0
+by default. The
+.BR mdt
+type allows place the first component of the file on the MDT where the inode
+is located. This is used with composite file layouts and can be defined as
+first component only. The
+.IR stripe_size
+of MDT part is always equal to the component size. There is also per-MDT
+parameter
+.IR lod.dom_stripesize
+to limit maximum size of DoM stripe which can be changed with
+.BR lctl\ set_param
+command, (e.g.
+.IR lctl\ set_param\ lod.*.dom_stripesize=0
+, see
+.BR lctl (8))
+.TP
 There are two options available only for \fBlfs migrate\fR:
 .TP
 .B -b, --block
@@ -164,6 +185,10 @@ the end of file.
 .TP
 .B $ lfs setstripe --component-del -I 1 /mnt/lustre/file1
 This deletes the component with ID equals 1 from an existing file.
+.TP
+.B $ lfs setstripe -E 1M -L mdt -E -1 /mnt/lustre/file1
+This created file with Data-on-MDT layout. The first 1M is placed on MDT and \
+rest of file is placed on OST with default striping.
 .SH SEE ALSO
 .BR lfs (1),
 .BR lfs-migrate (1),
index 6956768..79f2f17 100644 (file)
@@ -520,7 +520,8 @@ void llapi_layout_free(struct llapi_layout *layout);
  * stored using RAID0.  That is, data will be split evenly and without
  * redundancy across all OSTs in the layout.
  */
-#define LLAPI_LAYOUT_RAID0     0
+#define LLAPI_LAYOUT_RAID0     0ULL
+#define LLAPI_LAYOUT_MDT       2ULL
 
 /**
 * The layout includes a specific set of OSTs on which to allocate.
index d6d77c6..5f80430 100644 (file)
@@ -1031,11 +1031,17 @@ static int lod_process_config(const struct lu_env *env,
                        GOTO(out, rc);
                }
 
+
                obd = lod2obd(lod);
-               rc = class_process_proc_param(PARAM_LOV, obd->obd_vars,
+               if (strstr(param, PARAM_LOD) != NULL)
+                       rc = class_process_proc_param(PARAM_LOD, obd->obd_vars,
+                                             lcfg, obd);
+               else
+                       rc = class_process_proc_param(PARAM_LOV, obd->obd_vars,
                                              lcfg, obd);
                if (rc > 0)
                        rc = 0;
+
                GOTO(out, rc);
        }
        case LCFG_PRE_CLEANUP: {
@@ -1639,6 +1645,7 @@ static int lod_init0(const struct lu_env *env, struct lod_device *lod,
 
        dt_conf_get(env, &lod->lod_dt_dev, &ddp);
        lod->lod_osd_max_easize = ddp.ddp_max_ea_size;
+       lod->lod_dom_max_stripesize = (1ULL << 20); /* 1Mb as default value */
 
        /* setup obd to be used with old lov code */
        rc = lod_pools_init(lod, cfg);
index bfb3660..b0dfdbf 100644 (file)
@@ -192,6 +192,8 @@ struct lod_device {
 
        /* maximum EA size underlied OSD may have */
        unsigned int          lod_osd_max_easize;
+       /* maximum size of MDT stripe for Data-on-MDT files. */
+       unsigned int          lod_dom_max_stripesize;
 
        /*FIXME: When QOS and pool is implemented for MDT, probably these
         * structure should be moved to lod_tgt_descs as well.
index a7e6129..663cbb2 100644 (file)
@@ -1705,14 +1705,36 @@ int lod_verify_striping(struct lod_device *d, const struct lu_buf *buf,
                                     le32_to_cpu(ent->lcme_offset);
                        tmp.lb_len = le32_to_cpu(ent->lcme_size);
 
-                       /* Check DoM entry is always the first one */
+                       /* Checks for DoM entry in composite layout. */
                        lum = tmp.lb_buf;
                        if (lov_pattern(le32_to_cpu(lum->lmm_pattern)) ==
-                           LOV_PATTERN_MDT && i > 0) {
-                               CDEBUG(D_LAYOUT, "invalid DoM layout entry "
-                                      "found at %i index\n", i);
-                               RETURN(-EINVAL);
-
+                           LOV_PATTERN_MDT) {
+                               /* DoM component can be only the first entry */
+                               if (i > 0) {
+                                       CDEBUG(D_LAYOUT, "invalid DoM layout "
+                                              "entry found at %i index\n", i);
+                                       RETURN(-EINVAL);
+                               }
+                               stripe_size = le32_to_cpu(lum->lmm_stripe_size);
+                               /* There is just one stripe on MDT and it must
+                                * cover whole component size. */
+                               if (stripe_size != prev_end) {
+                                       CDEBUG(D_LAYOUT, "invalid DoM layout "
+                                              "stripe size %u != %llu "
+                                              "(component size)\n",
+                                              stripe_size, prev_end);
+                                       RETURN(-EINVAL);
+                               }
+                               /* Check stripe size againts per-MDT limit */
+                               if (stripe_size > d->lod_dom_max_stripesize) {
+                                       CDEBUG(D_LAYOUT, "DoM component size "
+                                              "%u is bigger than MDT limit "
+                                              "%u, check dom_max_stripesize"
+                                              " parameter\n",
+                                              stripe_size,
+                                              d->lod_dom_max_stripesize);
+                                       RETURN(-EINVAL);
+                               }
                        }
                        rc = lod_verify_v1v3(d, &tmp, is_from_disk);
                        if (rc)
index 8003fb6..45ab539 100644 (file)
  * \retval 0           on success
  * \retval negative    error code if failed
  */
+static int lod_dom_stripesize_seq_show(struct seq_file *m, void *v)
+{
+       struct obd_device *dev = m->private;
+       struct lod_device *lod;
+
+       LASSERT(dev != NULL);
+       lod  = lu2lod_dev(dev->obd_lu_dev);
+       seq_printf(m, "%u\n", lod->lod_dom_max_stripesize);
+       return 0;
+}
+
+/**
+ * Set default stripe size.
+ *
+ * \param[in] file     proc file
+ * \param[in] buffer   string containing the maximum number of bytes stored in
+ *                     each object before moving to the next object in the
+ *                     layout (if any)
+ * \param[in] count    @buffer length
+ * \param[in] off      unused for single entry
+ *
+ * \retval @count      on success
+ * \retval negative    error code if failed
+ */
+static ssize_t
+lod_dom_stripesize_seq_write(struct file *file, const char __user *buffer,
+                             size_t count, loff_t *off)
+{
+       struct seq_file *m = file->private_data;
+       struct obd_device *dev = m->private;
+       struct lod_device *lod;
+       __s64 val;
+       int rc;
+
+       LASSERT(dev != NULL);
+       lod  = lu2lod_dev(dev->obd_lu_dev);
+       rc = lprocfs_str_with_units_to_s64(buffer, count, &val, '1');
+       if (rc)
+               return rc;
+       if (val < 0)
+               return -ERANGE;
+
+       /* */
+       if (val > (1ULL << 30))
+               return -ERANGE;
+       else if (val > 0)
+               lod_fix_desc_stripe_size(&val);
+
+       lod->lod_dom_max_stripesize = val;
+
+       return count;
+}
+LPROC_SEQ_FOPS(lod_dom_stripesize);
+
+/**
+ * Show default stripe size.
+ *
+ * \param[in] m                seq file
+ * \param[in] v                unused for single entry
+ *
+ * \retval 0           on success
+ * \retval negative    error code if failed
+ */
 static int lod_stripesize_seq_show(struct seq_file *m, void *v)
 {
        struct obd_device *dev = m->private;
@@ -758,6 +821,10 @@ static struct lprocfs_vars lprocfs_lod_obd_vars[] = {
          .fops =       &lod_qos_maxage_fops    },
        { .name =       "lmv_failout",
          .fops =       &lod_lmv_failout_fops   },
+       {
+         .name = "dom_stripesize",
+         .fops = &lod_dom_stripesize_fops
+       },
        { NULL }
 };
 
index 135e78b..22617cd 100644 (file)
@@ -3925,6 +3925,7 @@ active_err:
 
        /* All mdd., ost. and osd. params in proc */
        if ((class_match_param(ptr, PARAM_MDD, NULL) == 0) ||
+           (class_match_param(ptr, PARAM_LOD, NULL) == 0) ||
            (class_match_param(ptr, PARAM_OST, NULL) == 0) ||
            (class_match_param(ptr, PARAM_OSD, NULL) == 0)) {
                CDEBUG(D_MGS, "%.3s param %s\n", ptr, ptr + 4);
index dc07fb9..00ac0b1 100644 (file)
@@ -116,6 +116,7 @@ static int lfs_list_commands(int argc, char **argv);
        "usage: "cmd" [--stripe-count|-c <stripe_count>]\n"             \
        "                 [--stripe-index|-i <start_ost_idx>]\n"        \
        "                 [--stripe-size|-S <stripe_size>]\n"           \
+       "                 [--layout|-L <pattern>]\n"            \
        "                 [--pool|-p <pool_name>]\n"                    \
        "                 [--ost|-o <ost_indices>]\n"                   \
        "                 [--component-end|-E <comp_end>]\n"
@@ -126,6 +127,7 @@ static int lfs_list_commands(int argc, char **argv);
        "\t              respectively)\n"                               \
        "\tstart_ost_idx: OST index of first stripe (-1 default)\n"     \
        "\tstripe_count: Number of OSTs to stripe over (0 default, -1 all)\n" \
+       "\tlayout:       stripe pattern type: raid0, mdt (default raid0)\n"\
        "\tpool_name:    Name of OST pool to use (default none)\n"      \
        "\tost_indices:  List of OST indices, can be repeated multiple times\n"\
        "\t              Indices be specified in a format of:\n"        \
@@ -249,7 +251,7 @@ command_t cmdlist[] = {
          "     [[!] --gid|-g|--group|-G <gid>|<gname>]\n"
          "     [[!] --uid|-u|--user|-U <uid>|<uname>] [[!] --pool <pool>]\n"
         "     [[!] --projid <projid>]\n"
-        "     [[!] --layout|-L released,raid0]\n"
+        "     [[!] --layout|-L released,raid0,mdt]\n"
         "     [[!] --component-count [+-]<comp_cnt>]\n"
         "     [[!] --component-start [+-]N[kMGTPE]]\n"
         "     [[!] --component-end|-E [+-]N[kMGTPE]]\n"
@@ -1017,6 +1019,7 @@ struct lfs_setstripe_args {
        int                      lsa_stripe_off;
        __u32                    lsa_comp_flags;
        int                      lsa_nr_osts;
+       int                      lsa_pattern;
        __u32                   *lsa_osts;
        char                    *lsa_pool_name;
 };
@@ -1031,7 +1034,7 @@ static inline bool setstripe_args_specified(struct lfs_setstripe_args *lsa)
 {
        return (lsa->lsa_stripe_size != 0 || lsa->lsa_stripe_count != 0 ||
                lsa->lsa_stripe_off != -1 || lsa->lsa_pool_name != NULL ||
-               lsa->lsa_comp_end != 0);
+               lsa->lsa_comp_end != 0 || lsa->lsa_pattern != 0);
 }
 
 static int comp_args_to_layout(struct llapi_layout **composite,
@@ -1076,6 +1079,51 @@ static int comp_args_to_layout(struct llapi_layout **composite,
                return rc;
        }
 
+       /* Data-on-MDT component setting */
+       if (lsa->lsa_pattern == LLAPI_LAYOUT_MDT) {
+               /* In case of Data-on-MDT patterns the only extra option
+                * applicable is stripe size option. */
+               if (lsa->lsa_stripe_count) {
+                       fprintf(stderr, "Option 'stripe-count' can't be "
+                               "specified with Data-on-MDT component: %i\n",
+                               lsa->lsa_stripe_count);
+                       return -EINVAL;
+               }
+               if (lsa->lsa_stripe_size) {
+                       fprintf(stderr, "Option 'stripe-size' can't be "
+                               "specified with Data-on-MDT component: %llu\n",
+                               lsa->lsa_stripe_size);
+                       return -EINVAL;
+               }
+               if (lsa->lsa_nr_osts != 0) {
+                       fprintf(stderr, "Option 'ost-list' can't be specified "
+                               "with Data-on-MDT component: '%i'\n",
+                               lsa->lsa_nr_osts);
+                       return -EINVAL;
+               }
+               if (lsa->lsa_stripe_off != -1) {
+                       fprintf(stderr, "Option 'stripe-offset' can't be "
+                               "specified with Data-on-MDT component: %i\n",
+                               lsa->lsa_stripe_off);
+                       return -EINVAL;
+               }
+               if (lsa->lsa_pool_name != 0) {
+                       fprintf(stderr, "Option 'pool' can't be specified "
+                               "with Data-on-MDT component: '%s'\n",
+                               lsa->lsa_pool_name);
+                       return -EINVAL;
+               }
+
+               rc = llapi_layout_pattern_set(layout, lsa->lsa_pattern);
+               if (rc) {
+                       fprintf(stderr, "Set stripe pattern %#x failed. %s\n",
+                               lsa->lsa_pattern, strerror(errno));
+                       return rc;
+               }
+               /* Data-on-MDT component has always single stripe up to end */
+               lsa->lsa_stripe_size = lsa->lsa_comp_end;
+       }
+
        if (lsa->lsa_stripe_size != 0) {
                rc = llapi_layout_stripe_size_set(layout,
                                                  lsa->lsa_stripe_size);
@@ -1386,6 +1434,7 @@ static int lfs_setstripe(int argc, char **argv)
         * the consistent "--stripe-size|-S" for all commands. */
        { .val = 's',   .name = "size",         .has_arg = required_argument },
 #endif
+       { .val = 'L',   .name = "layout",       .has_arg = required_argument },
        { .val = 'S',   .name = "stripe-size",  .has_arg = required_argument },
        { .val = 'S',   .name = "stripe_size",  .has_arg = required_argument },
        /* dirstripe {"mdt-count",    required_argument, 0, 'T'}, */
@@ -1410,7 +1459,7 @@ static int lfs_setstripe(int argc, char **argv)
        if (strcmp(argv[0], "migrate") == 0)
                migrate_mode = true;
 
-       while ((c = getopt_long(argc, argv, "bc:dE:i:I:m:no:p:s:S:v",
+       while ((c = getopt_long(argc, argv, "bc:dE:i:I:m:no:p:L:s:S:v",
                                long_opts, NULL)) >= 0) {
                switch (c) {
                case 0:
@@ -1481,6 +1530,27 @@ static int lfs_setstripe(int argc, char **argv)
                                }
                        }
                        break;
+               case 'L':
+                       if (strcmp(argv[optind - 1], "mdt") == 0) {
+                               /* Can be only the first component */
+                               if (layout != NULL) {
+                                       fprintf(stderr, "error: 'mdt' layout "
+                                               "can be only the first one\n");
+                                       goto error;
+                               }
+                               if (lsa.lsa_comp_end > (1ULL << 30)) { /* 1Gb */
+                                       fprintf(stderr, "error: 'mdt' layout "
+                                               "size is too big\n");
+                                       goto error;
+                               }
+                               lsa.lsa_pattern = LLAPI_LAYOUT_MDT;
+                       } else if (strcmp(argv[optind - 1], "raid0") != 0) {
+                               fprintf(stderr, "error: layout '%s' is "
+                                       "unknown, supported layouts are: "
+                                       "'mdt', 'raid0'\n", argv[optind]);
+                               goto error;
+                       }
+                       break;
                case 'i':
                        if (strcmp(argv[optind - 1], "--index") == 0)
                                fprintf(stderr, "warning: '--index' deprecated"
@@ -1684,7 +1754,6 @@ static int lfs_setstripe(int argc, char **argv)
                param->lsp_stripe_size = lsa.lsa_stripe_size;
                param->lsp_stripe_offset = lsa.lsa_stripe_off;
                param->lsp_stripe_count = lsa.lsa_stripe_count;
-               param->lsp_stripe_pattern = 0;
                param->lsp_pool = lsa.lsa_pool_name;
                param->lsp_is_specific = false;
                if (lsa.lsa_nr_osts > 0) {
@@ -1850,17 +1919,19 @@ static inline int gid2name(char **name, unsigned int id)
 
 static int name2layout(__u32 *layout, char *name)
 {
-       char *ptr, *lyt;
+       char *ptr, *layout_name;
 
        *layout = 0;
        for (ptr = name; ; ptr = NULL) {
-               lyt = strtok(ptr, ",");
-               if (lyt == NULL)
+               layout_name = strtok(ptr, ",");
+               if (layout_name == NULL)
                        break;
-               if (strcmp(lyt, "released") == 0)
+               if (strcmp(layout_name, "released") == 0)
                        *layout |= LOV_PATTERN_F_RELEASED;
-               else if (strcmp(lyt, "raid0") == 0)
+               else if (strcmp(layout_name, "raid0") == 0)
                        *layout |= LOV_PATTERN_RAID0;
+               else if (strcmp(layout_name, "mdt") == 0)
+                       *layout |= LOV_PATTERN_MDT;
                else
                        return -1;
        }
index d237cab..e6008a9 100644 (file)
@@ -2276,7 +2276,8 @@ static void lov_dump_user_lmm_header(struct lov_user_md *lum, char *path,
                        llapi_printf(LLAPI_MSG_NORMAL, "%s%sstripe_count:  ",
                                     space, prefix);
                if (is_dir) {
-                       if (!is_raw && lum->lmm_stripe_count == 0) {
+                       if (!is_raw && lum->lmm_stripe_count == 0 &&
+                           lov_pattern(lum->lmm_pattern) != LOV_PATTERN_MDT) {
                                unsigned int scount;
                                rc = sattr_cache_get_defaults(NULL, path,
                                                              &scount, NULL,
@@ -2329,13 +2330,13 @@ static void lov_dump_user_lmm_header(struct lov_user_md *lum, char *path,
                        separator = "\n";
        }
 
-       if ((verbose & VERBOSE_LAYOUT) && !is_dir) {
+       if ((verbose & VERBOSE_LAYOUT)) {
                llapi_printf(LLAPI_MSG_NORMAL, "%s", separator);
                if (verbose & ~VERBOSE_LAYOUT)
                        llapi_printf(LLAPI_MSG_NORMAL, "%s%spattern:       ",
                                     space, prefix);
                llapi_printf(LLAPI_MSG_NORMAL, "%.x", lum->lmm_pattern);
-               separator = "\n";
+               separator = is_dir ? " " : "\n";
        }
 
        if ((verbose & VERBOSE_GENERATION) && !is_dir) {
index f02bf44..f6e477c 100644 (file)
@@ -569,6 +569,8 @@ llapi_layout_to_lum(const struct llapi_layout *layout)
                        blob->lmm_pattern = 0;
                else if (pattern == LLAPI_LAYOUT_RAID0)
                        blob->lmm_pattern = LOV_PATTERN_RAID0;
+               else if (pattern == LLAPI_LAYOUT_MDT)
+                       blob->lmm_pattern = LOV_PATTERN_MDT;
                else
                        blob->lmm_pattern = pattern;
 
@@ -1206,7 +1208,7 @@ int llapi_layout_pattern_get(const struct llapi_layout *layout,
 }
 
 /**
- * Set the RAID pattern of \a layout.
+ * Set the pattern of \a layout.
  *
  * \param[in] layout   layout to set pattern in
  * \param[in] pattern  value to be set
@@ -1224,7 +1226,8 @@ int llapi_layout_pattern_set(struct llapi_layout *layout, uint64_t pattern)
                return -1;
 
        if (pattern != LLAPI_LAYOUT_DEFAULT &&
-           pattern != LLAPI_LAYOUT_RAID0) {
+           pattern != LLAPI_LAYOUT_RAID0 &&
+           pattern != LLAPI_LAYOUT_MDT) {
                errno = EOPNOTSUPP;
                return -1;
        }