Whamcloud - gitweb
LU-11963 osd: Add nonrotational flag to statfs
[fs/lustre-release.git] / lustre / utils / lfs.c
index b897626..701868a 100644 (file)
@@ -164,6 +164,7 @@ static inline int lfs_mirror_split(int argc, char **argv)
 #define SSM_CMD_COMMON(cmd) \
        "usage: "cmd" [--component-end|-E <comp_end>]\n"                \
        "                 [--stripe-count|-c <stripe_count>]\n"         \
+       "                 [--overstripe-count|-C <stripe_count>]\n"     \
        "                 [--stripe-index|-i <start_ost_idx>]\n"        \
        "                 [--stripe-size|-S <stripe_size>]\n"           \
        "                 [--layout|-L <pattern>]\n"                    \
@@ -174,6 +175,9 @@ static inline int lfs_mirror_split(int argc, char **argv)
 
 #define SSM_HELP_COMMON \
        "\tstripe_count: Number of OSTs to stripe over (0=fs default, -1 all)\n" \
+       "\t              Using -C instead of -c allows overstriping, which\n" \
+        "\t             will place more than one stripe per OST if\n" \
+        "\t             stripe_count is greater than the number of OSTs\n" \
        "\tstart_ost_idx: OST index of first stripe (-1=default round robin)\n"\
        "\tstripe_size:  Number of bytes on each OST (0=fs default)\n" \
        "\t              Can be specified with K, M or G (for KB, MB, GB\n" \
@@ -255,8 +259,14 @@ static inline int lfs_mirror_split(int argc, char **argv)
        "\tmdt_hash:  hash type of the striped directory. mdt types:\n" \
        "       fnv_1a_64 FNV-1a hash algorithm (default)\n"            \
        "       all_char  sum of characters % MDT_COUNT (not recommended)\n" \
+       "       space     create subdirectories with balanced space usage\n" \
        "\tdefault_stripe: set default dirstripe of the directory\n"    \
-       "\tmode: the file access permission of the directory (octal)\n"
+       "\tmode: the file access permission of the directory (octal)\n" \
+       "To create dir with a foreign (free format) layout :\n" \
+       "setdirstripe|mkdir --foreign[=<foreign_type>] -x|-xattr <string> " \
+               "[--mode|-m mode] [--flags <hex>] <dir>\n" \
+       "\tmode: the mode of the directory\n" \
+       "\tforeign_type: none or daos\n"
 
 /**
  * command_t mirror_cmdlist - lfs mirror commands.
@@ -373,7 +383,7 @@ command_t cmdlist[] = {
         "usage: getdirstripe [--mdt-count|-c] [--mdt-index|-m|-i]\n"
         "                    [--mdt-hash|-H] [--obd|-O <uuid>]\n"
         "                    [--recursive|-r] [--yaml|-y]\n"
-        "                    [--default|-D] <dir> ..."},
+        "                    [--verbose|-v] [--default|-D] <dir> ..."},
        {"mkdir", lfs_setdirstripe, 0,
         "To create a striped directory on a specified MDT. This can only\n"
         "be done on MDT0 with the right of administrator.\n"
@@ -402,6 +412,7 @@ command_t cmdlist[] = {
         "     [[!] --gid|-g|--group|-G <gid>|<gname>]\n"
         "     [[!] --uid|-u|--user|-U <uid>|<uname>] [[!] --pool <pool>]\n"
         "     [[!] --projid <projid>]\n"
+        "     [[!] --foreign[=<foreign_type>]]\n"
         "     [[!] --layout|-L released,raid0,mdt]\n"
         "     [[!] --foreign[=<foreign_type>]]\n"
         "     [[!] --component-count [+-]<comp_cnt>]\n"
@@ -563,6 +574,7 @@ command_t cmdlist[] = {
         "layout\nto another (may be not safe with concurent writes).\n"
         "usage: migrate  "
         "[--stripe-count|-c] <stripe_count>\n"
+        "[--overstripe-count|-C] <stripe_count>\n"
         "              [--stripe-index|-i] <start_ost_index>\n"
         "              [--stripe-size|-S] <stripe_size>\n"
         "              [--pool|-p] <pool_name>\n"
@@ -572,6 +584,9 @@ command_t cmdlist[] = {
         "              [--non-direct|-D]\n"
         "              <file|directory>\n"
         "\tstripe_count:     number of OSTs to stripe a file over\n"
+        "\t              Using -C instead of -c allows overstriping, which\n"
+        "\t              will place more than one stripe per OST if\n"
+        "\t              stripe_count is greater than the number of OSTs\n"
         "\tstripe_ost_index: index of the first OST to stripe a file over\n"
         "\tstripe_size:      number of bytes to store before moving to the next OST\n"
         "\tpool_name:        name of the predefined pool of OSTs\n"
@@ -637,19 +652,19 @@ static int check_hashtype(const char *hashtype)
        return 0;
 }
 
-static uint32_t lov_check_foreign_type_name(const char *foreign_type_name)
+static uint32_t check_foreign_type_name(const char *foreign_type_name)
 {
        uint32_t i;
 
-       for (i = 0; i < LOV_FOREIGN_TYPE_UNKNOWN; i++) {
-               if (lov_foreign_type[i].lft_name == NULL)
+       for (i = 0; i < LU_FOREIGN_TYPE_UNKNOWN; i++) {
+               if (lu_foreign_types[i].lft_name == NULL)
                        break;
                if (strcmp(foreign_type_name,
-                          lov_foreign_type[i].lft_name) == 0)
-                       return lov_foreign_type[i].lft_type;
+                          lu_foreign_types[i].lft_name) == 0)
+                       return lu_foreign_types[i].lft_type;
        }
 
-       return LOV_FOREIGN_TYPE_UNKNOWN;
+       return LU_FOREIGN_TYPE_UNKNOWN;
 }
 
 static const char *error_loc = "syserror";
@@ -1945,20 +1960,23 @@ free_layout:
  * indices and ranges, for example "1,2-4,7". Add the indices into the
  * \a tgts array and remove duplicates.
  *
- * \param[out] tgts    array to store indices in
- * \param[in] size     size of \a tgts array
- * \param[in] offset   starting index in \a tgts
- * \param[in] arg      string containing OST index list
+ * \param[out] tgts            array to store indices in
+ * \param[in] size             size of \a tgts array
+ * \param[in] offset           starting index in \a tgts
+ * \param[in] arg              string containing OST index list
+ * \param[in/out] overstriping index list may contain duplicates
  *
  * \retval positive    number of indices in \a tgts
  * \retval -EINVAL     unable to parse \a arg
  */
-static int parse_targets(__u32 *tgts, int size, int offset, char *arg)
+static int parse_targets(__u32 *tgts, int size, int offset, char *arg,
+                        unsigned long long *pattern)
 {
        int rc;
        int nr = offset;
        int slots = size - offset;
        char *ptr = NULL;
+       bool overstriped = false;
        bool end_of_loop;
 
        if (arg == NULL)
@@ -1966,8 +1984,8 @@ static int parse_targets(__u32 *tgts, int size, int offset, char *arg)
 
        end_of_loop = false;
        while (!end_of_loop) {
-               int start_index;
-               int end_index;
+               int start_index = 0;
+               int end_index = 0;
                int i;
                char *endptr = NULL;
 
@@ -1998,14 +2016,21 @@ static int parse_targets(__u32 *tgts, int size, int offset, char *arg)
 
                        /* remove duplicate */
                        for (j = 0; j < offset; j++) {
-                               if (tgts[j] == i)
-                                       break;
+                               if (tgts[j] == i && pattern &&
+                                   *pattern == LLAPI_LAYOUT_OVERSTRIPING)
+                                       overstriped = true;
+                               else if (tgts[j] == i)
+                                       return -EINVAL;
                        }
-                       if (j == offset) { /* no duplicate */
+
+                       j = offset;
+
+                       if (j == offset) { /* check complete */
                                tgts[nr++] = i;
                                --slots;
                        }
                }
+
                if (slots == 0 && i < end_index)
                        break;
 
@@ -2017,6 +2042,9 @@ static int parse_targets(__u32 *tgts, int size, int offset, char *arg)
        if (!end_of_loop && ptr != NULL)
                *ptr = ',';
 
+       if (!overstriped && pattern)
+               *pattern = LLAPI_LAYOUT_DEFAULT;
+
        return rc < 0 ? rc : nr;
 }
 
@@ -2195,6 +2223,13 @@ static int comp_args_to_layout(struct llapi_layout **composite,
                }
                /* Data-on-MDT component has always single stripe up to end */
                lsa->lsa_stripe_size = lsa->lsa_comp_end;
+       } else if (lsa->lsa_pattern == LLAPI_LAYOUT_OVERSTRIPING) {
+               rc = llapi_layout_pattern_set(layout, lsa->lsa_pattern);
+               if (rc) {
+                       fprintf(stderr, "Set stripe pattern %#llx failed. %s\n",
+                               lsa->lsa_pattern, strerror(errno));
+                       return rc;
+               }
        }
 
        rc = llapi_layout_stripe_size_set(layout, lsa->lsa_stripe_size);
@@ -2320,6 +2355,10 @@ static int build_layout_from_yaml_node(struct cYAML *node,
                                } else if (!strcmp(string, "pattern")) {
                                        if (!strcmp(node->cy_valuestring, "mdt"))
                                                lsa->lsa_pattern = LLAPI_LAYOUT_MDT;
+                                       if (!strcmp(node->cy_valuestring,
+                                                   "raid0,overstriped"))
+                                               lsa->lsa_pattern =
+                                                       LLAPI_LAYOUT_OVERSTRIPING;
                                } else if (!strcmp(string, "lcme_flags")) {
                                        rc = comp_str2flags(node->cy_valuestring,
                                                            &lsa->lsa_comp_flags,
@@ -2630,7 +2669,7 @@ static int lfs_setstripe_internal(int argc, char **argv,
        char *template = NULL;
        bool foreign_mode = false;
        char *xattr = NULL;
-       uint32_t type = LOV_FOREIGN_TYPE_NONE, flags = 0;
+       uint32_t type = LU_FOREIGN_TYPE_NONE, flags = 0;
        char *mode_opt = NULL;
        mode_t previous_umask = 0;
        mode_t mode = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH | S_IWOTH;
@@ -2673,7 +2712,8 @@ static int lfs_setstripe_internal(int argc, char **argv,
        { .val = 'c',   .name = "stripe-count", .has_arg = required_argument},
        { .val = 'c',   .name = "stripe_count", .has_arg = required_argument},
        { .val = 'c',   .name = "mdt-count",    .has_arg = required_argument},
-/* find        { .val = 'C',   .name = "ctime",        .has_arg = required_argument }*/
+       { .val = 'C',   .name = "overstripe-count",
+                                               .has_arg = required_argument},
        { .val = 'd',   .name = "delete",       .has_arg = no_argument},
        { .val = 'd',   .name = "destroy",      .has_arg = no_argument},
        /* --non-direct is only valid in migrate mode */
@@ -2729,7 +2769,7 @@ static int lfs_setstripe_internal(int argc, char **argv,
        snprintf(cmd, sizeof(cmd), "%s %s", progname, argv[0]);
        progname = cmd;
        while ((c = getopt_long(argc, argv,
-                               "bc:dDE:f:H:i:I:m:N::no:p:L:s:S:vx:y:",
+                               "bc:C:dDE:f:H:i:I:m:N::no:p:L:s:S:vx:y:",
                                long_opts, NULL)) >= 0) {
                switch (c) {
                case 0:
@@ -2822,8 +2862,8 @@ static int lfs_setstripe_internal(int argc, char **argv,
                                type = strtoul(optarg, &end, 0);
                                if (*end) {
                                        /* check name */
-                                       type = lov_check_foreign_type_name(optarg);
-                                       if (type == LOV_FOREIGN_TYPE_UNKNOWN) {
+                                       type = check_foreign_type_name(optarg);
+                                       if (type == LU_FOREIGN_TYPE_UNKNOWN) {
                                                fprintf(stderr,
                                                        "%s %s: unrecognized foreign type '%s'\n",
                                                        progname, argv[0],
@@ -2840,7 +2880,7 @@ static int lfs_setstripe_internal(int argc, char **argv,
                                mode = strtoul(mode_opt, &end, 8);
                                if (*end != '\0') {
                                        fprintf(stderr,
-                                               "%s %s: bad MODE '%s'\n",
+                                               "%s %s: bad mode '%s'\n",
                                                progname, argv[0], mode_opt);
                                        return CMD_HELP;
                                }
@@ -2860,6 +2900,9 @@ static int lfs_setstripe_internal(int argc, char **argv,
                        }
                        migration_block = true;
                        break;
+               case 'C':
+                       lsa.lsa_pattern = LLAPI_LAYOUT_OVERSTRIPING;
+                       /* fall through */
                case 'c':
                        lsa.lsa_stripe_count = strtoul(optarg, &end, 0);
                        if (*end != '\0') {
@@ -3015,7 +3058,7 @@ static int lfs_setstripe_internal(int argc, char **argv,
                        migrate_mdt_mode = true;
                        lsa.lsa_nr_tgts = parse_targets(tgts,
                                                sizeof(tgts) / sizeof(__u32),
-                                               lsa.lsa_nr_tgts, optarg);
+                                               lsa.lsa_nr_tgts, optarg, NULL);
                        if (lsa.lsa_nr_tgts < 0) {
                                fprintf(stderr,
                                        "%s %s: invalid MDT target(s) '%s'\n",
@@ -3084,9 +3127,15 @@ static int lfs_setstripe_internal(int argc, char **argv,
                                fprintf(stderr, "warning: '--ost-list' is "
                                        "deprecated, use '--ost' instead\n");
 #endif
+                       /* -o allows overstriping, and must note it because
+                        * parse_targets is shared with MDT striping, which
+                        * does not allow duplicates
+                        */
+                       lsa.lsa_pattern = LLAPI_LAYOUT_OVERSTRIPING;
                        lsa.lsa_nr_tgts = parse_targets(tgts,
                                                sizeof(tgts) / sizeof(__u32),
-                                               lsa.lsa_nr_tgts, optarg);
+                                               lsa.lsa_nr_tgts, optarg,
+                                               &lsa.lsa_pattern);
                        if (lsa.lsa_nr_tgts < 0) {
                                fprintf(stderr,
                                        "%s %s: invalid OST target(s) '%s'\n",
@@ -3340,7 +3389,7 @@ static int lfs_setstripe_internal(int argc, char **argv,
                        lmu->lum_hash_type = LMV_HASH_TYPE_FNV_1A_64;
                if (lsa.lsa_pool_name)
                        strncpy(lmu->lum_pool_name, lsa.lsa_pool_name,
-                               sizeof(lmu->lum_pool_name));
+                               sizeof(lmu->lum_pool_name) - 1);
                if (lsa.lsa_nr_tgts > 1) {
                        int i;
 
@@ -3389,6 +3438,14 @@ static int lfs_setstripe_internal(int argc, char **argv,
                        param->lsp_stripe_offset = -1;
                else
                        param->lsp_stripe_offset = lsa.lsa_stripe_off;
+               param->lsp_stripe_pattern =
+                               llapi_pattern_to_lov(lsa.lsa_pattern);
+               if (param->lsp_stripe_pattern == EINVAL) {
+                       fprintf(stderr, "error: %s: invalid stripe pattern\n",
+                               argv[0]);
+                       free(param);
+                       goto usage_error;
+               }
                param->lsp_pool = lsa.lsa_pool_name;
                param->lsp_is_specific = false;
                if (lsa.lsa_nr_tgts > 0) {
@@ -3497,7 +3554,7 @@ static int lfs_setstripe_internal(int argc, char **argv,
                }
        }
 
-       if (mode_opt != NULL && previous_umask != 0)
+       if (mode_opt != NULL)
                umask(previous_umask);
 
        free(param);
@@ -3650,6 +3707,8 @@ static int name2layout(__u32 *layout, char *name)
                        *layout |= LOV_PATTERN_RAID0;
                else if (strcmp(layout_name, "mdt") == 0)
                        *layout |= LOV_PATTERN_MDT;
+               else if (strcmp(layout_name, "overstriping") == 0)
+                       *layout |= LOV_PATTERN_OVERSTRIPING;
                else
                        return -1;
        }
@@ -3697,6 +3756,8 @@ static int lfs_find(int argc, char **argv)
        { .val = 'E',   .name = "component-end",
                                                .has_arg = required_argument },
 /* find        { .val = 'F',   .name = "fid",          .has_arg = no_argument }, */
+       { .val = LFS_LAYOUT_FOREIGN_OPT,
+                       .name = "foreign",      .has_arg = optional_argument},
        { .val = 'g',   .name = "gid",          .has_arg = required_argument },
        { .val = 'G',   .name = "group",        .has_arg = required_argument },
        { .val = 'H',   .name = "mdt-hash",     .has_arg = required_argument },
@@ -3896,30 +3957,6 @@ static int lfs_find(int argc, char **argv)
                                param.fp_mirror_state = state;
                        }
                        break;
-               case LFS_LAYOUT_FOREIGN_OPT: {
-                       /* all types by default */
-                       uint32_t type = LOV_FOREIGN_TYPE_UNKNOWN;
-
-                       if (optarg != NULL) {
-                               /* check pure numeric */
-                               type = strtoul(optarg, &endptr, 0);
-                               if (*endptr) {
-                                       /* check name */
-                                       type = lov_check_foreign_type_name(optarg);
-                                       if (type == LOV_FOREIGN_TYPE_UNKNOWN) {
-                                               fprintf(stderr,
-                                                       "%s %s: unrecognized foreign type '%s'\n",
-                                                       progname, argv[0],
-                                                       optarg);
-                                               return CMD_HELP;
-                                       }
-                               }
-                       }
-                       param.fp_foreign_type = type;
-                       param.fp_check_foreign = 1;
-                       param.fp_exclude_foreign = !!neg_opt;
-                       break;
-               }
                 case 'c':
                         if (optarg[0] == '+') {
                                param.fp_stripe_count_sign = -1;
@@ -3968,6 +4005,30 @@ static int lfs_find(int argc, char **argv)
                        param.fp_check_comp_end = 1;
                        param.fp_exclude_comp_end = !!neg_opt;
                        break;
+               case LFS_LAYOUT_FOREIGN_OPT: {
+                       /* all types by default */
+                       uint32_t type = LU_FOREIGN_TYPE_UNKNOWN;
+
+                       if (optarg != NULL) {
+                               /* check pure numeric */
+                               type = strtoul(optarg, &endptr, 0);
+                               if (*endptr) {
+                                       /* check name */
+                                       type = check_foreign_type_name(optarg);
+                                       if (type == LU_FOREIGN_TYPE_UNKNOWN) {
+                                               fprintf(stderr,
+                                                       "%s %s: unknown foreign type '%s'\n",
+                                                       progname, argv[0],
+                                                       optarg);
+                                               return CMD_HELP;
+                                       }
+                               }
+                       }
+                       param.fp_foreign_type = type;
+                       param.fp_check_foreign = 1;
+                       param.fp_exclude_foreign = !!neg_opt;
+                       break;
+               }
                case 'g':
                case 'G':
                        rc = name2gid(&param.fp_gid, optarg);
@@ -4707,6 +4768,7 @@ static int lfs_getdirstripe(int argc, char **argv)
        { .val = 'O',   .name = "obd",          .has_arg = required_argument },
        { .val = 'r',   .name = "recursive",    .has_arg = no_argument },
        { .val = 'T',   .name = "mdt-count",    .has_arg = no_argument },
+       { .val = 'v',   .name = "verbose",      .has_arg = no_argument },
        { .val = 'y',   .name = "yaml",         .has_arg = no_argument },
        { .name = NULL } };
        int c, rc;
@@ -4714,7 +4776,7 @@ static int lfs_getdirstripe(int argc, char **argv)
        param.fp_get_lmv = 1;
 
        while ((c = getopt_long(argc, argv,
-                               "cDHimO:rtTy", long_opts, NULL)) != -1)
+                               "cDHimO:rtTvy", long_opts, NULL)) != -1)
        {
                switch (c) {
                case 'c':
@@ -4748,10 +4810,15 @@ static int lfs_getdirstripe(int argc, char **argv)
                case 'r':
                        param.fp_recursive = 1;
                        break;
+               case 'v':
+                       param.fp_verbose |= VERBOSE_DETAIL;
+                       break;
                case 'y':
                        param.fp_yaml = 1;
                        break;
                default:
+                       fprintf(stderr, "%s %s: unrecognized option '%s'\n",
+                               progname, argv[0], argv[optind - 1]);
                        return CMD_HELP;
                }
        }
@@ -4817,6 +4884,30 @@ static inline int obd_statfs_ratio(const struct obd_statfs *st, bool inodes)
        return (ratio - (int)ratio) > 0 ? (int)(ratio + 1) : (int)ratio;
 }
 
+/* This is only used to reflect various problem states for lfs df, so we only
+ * translate the flags reflecting those states.
+ */
+static char obd_statfs_state_names[] = {
+       [OS_STATE_DEGRADED]     = 'D',
+       [OS_STATE_READONLY]     = 'R',
+       [OS_STATE_NOPRECREATE]  = 'N',
+       [OS_STATE_ENOSPC]       = 'S',
+       [OS_STATE_ENOINO]       = 'I',
+};
+
+static char obd_statfs_state2char(int s)
+{
+       /* Not an error state, do not print here  */
+       if (s == OS_STATE_NONROT)
+               return '\0';
+       /* Unknown name */
+       if (s > ARRAY_SIZE(obd_statfs_state_names)/sizeof(char) ||
+           obd_statfs_state_names[s] == 0)
+               return '?';
+
+       return obd_statfs_state_names[s];
+}
+
 static int showdf(char *mntdir, struct obd_statfs *stat,
                  char *uuid, enum mntdf_flags flags,
                  char *type, int index, int rc)
@@ -4890,21 +4981,16 @@ static int showdf(char *mntdir, struct obd_statfs *stat,
                        printf("[%s:%d]", type, index);
 
                if (stat->os_state) {
-                       /*
-                        * Each character represents the matching
-                        * OS_STATE_* bit.
-                        */
-                       const char state_names[] = "DRSI";
-                       __u32      state;
-                       __u32      i;
+                       uint32_t state;
+                       uint32_t i;
 
                        printf(" ");
-                       for (i = 0, state = stat->os_state;
-                            state && i < sizeof(state_names); i++) {
-                               if (!(state & (1 << i)))
+                       for (i = 0, state = stat->os_state; state != 0; i++) {
+                               uint32_t mask = 1 << i;
+                               if (!(state & mask))
                                        continue;
-                               printf("%c", state_names[i]);
-                               state ^= 1 << i;
+                               printf("%c", obd_statfs_state2char(mask));
+                               state &= ~mask;
                        }
                }
 
@@ -4949,6 +5035,7 @@ static int mntdf(char *mntdir, char *fsname, char *pool, enum mntdf_flags flags,
                { .st_op = LL_STATFS_LOV,       .st_name = "OST" },
                { .st_name = NULL } };
        struct ll_stat_type *tp;
+       __u64 ost_files = 0;
        __u64 ost_ffree = 0;
        __u32 index;
        __u32 type;
@@ -5056,6 +5143,7 @@ static int mntdf(char *mntdir, char *fsname, char *pool, enum mntdf_flags flags,
                                sum.os_ffree += stat_buf.os_ffree;
                                sum.os_files += stat_buf.os_files;
                        } else /* if (tp->st_op == LL_STATFS_LOV) */ {
+                               ost_files += stat_buf.os_files;
                                ost_ffree += stat_buf.os_ffree;
                        }
                        sum.os_blocks += stat_buf.os_blocks *
@@ -5069,11 +5157,12 @@ static int mntdf(char *mntdir, char *fsname, char *pool, enum mntdf_flags flags,
 
        close(fd);
 
-       /* If we don't have as many objects free on the OST as inodes
-        * on the MDS, we reduce the total number of inodes to
-        * compensate, so that the "inodes in use" number is correct.
-        * Matches ll_statfs_internal() so the results are consistent. */
-       if (ost_ffree < sum.os_ffree) {
+       /* If we have _some_ OSTs, but don't have as many free objects on the
+        * OST as inodes on the MDTs, reduce the reported number of inodes
+        * to compensate, so that the "inodes in use" number is correct.
+        * This should be kept in sync with ll_statfs_internal().
+        */
+       if (ost_files && ost_ffree < sum.os_ffree) {
                sum.os_files = (sum.os_files - sum.os_ffree) + ost_ffree;
                sum.os_ffree = ost_ffree;
        }
@@ -5111,28 +5200,33 @@ static int ll_statfs_data_comp(const void *sd1, const void *sd2)
 /* functions */
 static int lfs_setdirstripe(int argc, char **argv)
 {
-       char                    *dname;
-       int                     result;
-       struct lfs_setstripe_args        lsa = { 0 };
-       struct llapi_stripe_param       *param = NULL;
-       __u32                   mdts[LMV_MAX_STRIPE_COUNT] = { 0 };
-       char                    *end;
-       int                     c;
-       char                    *mode_opt = NULL;
-       bool                    default_stripe = false;
-       mode_t                  mode = S_IRWXU | S_IRWXG | S_IRWXO;
-       mode_t                  previous_mode = 0;
-       bool                    delete = false;
-       struct ll_statfs_buf    *lsb = NULL;
-       char                    mntdir[PATH_MAX] = "";
-       bool                    auto_distributed = false;
-
+       char *dname;
+       struct lfs_setstripe_args lsa = { 0 };
+       struct llapi_stripe_param *param = NULL;
+       __u32 mdts[LMV_MAX_STRIPE_COUNT] = { 0 };
+       char *end;
+       int c;
+       char *mode_opt = NULL;
+       bool default_stripe = false;
+       bool delete = false;
+       bool auto_distributed = false;
+       bool foreign_mode = false;
+       mode_t mode = S_IRWXU | S_IRWXG | S_IRWXO;
+       mode_t previous_mode = 0;
+       struct ll_statfs_buf *lsb = NULL;
+       char mntdir[PATH_MAX] = "";
+       char *xattr = NULL;
+       __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0;
        struct option long_opts[] = {
        { .val = 'c',   .name = "count",        .has_arg = required_argument },
        { .val = 'c',   .name = "mdt-count",    .has_arg = required_argument },
        { .val = 'd',   .name = "delete",       .has_arg = no_argument },
        { .val = 'D',   .name = "default",      .has_arg = no_argument },
        { .val = 'D',   .name = "default_stripe", .has_arg = no_argument },
+       { .val = LFS_LAYOUT_FLAGS_OPT,
+                       .name = "flags",        .has_arg = required_argument },
+       { .val = LFS_LAYOUT_FOREIGN_OPT,
+                       .name = "foreign",      .has_arg = optional_argument},
        { .val = 'H',   .name = "mdt-hash",     .has_arg = required_argument },
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(2, 17, 53, 0)
        { .val = 'i',   .name = "mdt-index",    .has_arg = required_argument },
@@ -5150,12 +5244,14 @@ static int lfs_setdirstripe(int argc, char **argv)
 #endif
        { .val = 'T',   .name = "mdt-count",    .has_arg = required_argument },
 /* setstripe { .val = 'y', .name = "yaml",     .has_arg = no_argument }, */
+       { .val = 'x',   .name = "xattr",        .has_arg = required_argument },
        { .name = NULL } };
+       int result = 0;
 
        setstripe_args_init(&lsa);
 
-       while ((c = getopt_long(argc, argv, "c:dDi:H:m:o:t:T:", long_opts,
-                               NULL)) >= 0) {
+       while ((c = getopt_long(argc, argv, "c:dDi:H:m:o:t:T:x:",
+                               long_opts, NULL)) >= 0) {
                switch (c) {
                case 0:
                        /* Long options. */
@@ -5177,6 +5273,33 @@ static int lfs_setdirstripe(int argc, char **argv)
                case 'D':
                        default_stripe = true;
                        break;
+               case LFS_LAYOUT_FOREIGN_OPT:
+                       if (optarg != NULL) {
+                               /* check pure numeric */
+                               type = strtoul(optarg, &end, 0);
+                               if (*end) {
+                                       /* check name */
+                                       type = check_foreign_type_name(optarg);
+                                       if (type == LU_FOREIGN_TYPE_UNKNOWN) {
+                                               fprintf(stderr,
+                                                       "%s %s: unknown foreign type '%s'\n",
+                                                       progname, argv[0],
+                                                       optarg);
+                                               return CMD_HELP;
+                                       }
+                               }
+                       }
+                       foreign_mode = true;
+                       break;
+               case LFS_LAYOUT_FLAGS_OPT:
+                       flags = strtoul(optarg, &end, 16);
+                       if (*end != '\0') {
+                               fprintf(stderr,
+                                       "%s %s: bad flags '%s'\n",
+                                       progname, argv[0], optarg);
+                               return CMD_HELP;
+                       }
+                       break;
 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 0, 53, 0)
                case 't':
                        fprintf(stderr, "warning: '--hash-type' and '-t' "
@@ -5201,15 +5324,17 @@ static int lfs_setdirstripe(int argc, char **argv)
                                        "%s %s: warning: '--index' deprecated, use '--mdt-index' instead\n",
                                        progname, argv[0]);
 #endif
+                       lsa.lsa_pattern = LLAPI_LAYOUT_OVERSTRIPING;
                        lsa.lsa_nr_tgts = parse_targets(mdts,
                                                sizeof(mdts) / sizeof(__u32),
-                                               lsa.lsa_nr_tgts, optarg);
+                                               lsa.lsa_nr_tgts, optarg, NULL);
                        if (lsa.lsa_nr_tgts < 0) {
                                fprintf(stderr,
                                        "%s %s: invalid MDT target(s) '%s'\n",
                                        progname, argv[0], optarg);
                                return CMD_HELP;
                        }
+                       lsa.lsa_pattern = 0;
 
                        lsa.lsa_tgts = mdts;
                        if (lsa.lsa_stripe_off == LLAPI_LAYOUT_DEFAULT)
@@ -5223,6 +5348,9 @@ static int lfs_setdirstripe(int argc, char **argv)
                case 'o':
                        mode_opt = optarg;
                        break;
+               case 'x':
+                       xattr = optarg;
+                       break;
                default:
                        fprintf(stderr, "%s %s: unrecognized option '%s'\n",
                                progname, argv[0], argv[optind - 1]);
@@ -5236,8 +5364,30 @@ static int lfs_setdirstripe(int argc, char **argv)
                return CMD_HELP;
        }
 
+       if (xattr && !foreign_mode) {
+               /* only print a warning as this is armless and will be
+                * ignored
+                */
+               fprintf(stderr,
+                       "%s %s: xattr has been specified for non-foreign layout\n",
+                       progname, argv[0]);
+       } else if (foreign_mode && !xattr) {
+               fprintf(stderr,
+                       "%s %s: xattr must be provided in foreign mode\n",
+                       progname, argv[0]);
+               return CMD_HELP;
+       }
+
+       if (foreign_mode && (delete || default_stripe || lsa.lsa_nr_tgts ||
+           lsa.lsa_tgts || setstripe_args_specified(&lsa))) {
+               fprintf(stderr,
+                       "%s %s: only --xattr/--flags/--mode options are valid with --foreign\n",
+                       progname, argv[0]);
+               return CMD_HELP;
+       }
+
        if (!delete && lsa.lsa_stripe_off == LLAPI_LAYOUT_DEFAULT &&
-           lsa.lsa_stripe_count == LLAPI_LAYOUT_DEFAULT) {
+           lsa.lsa_stripe_count == LLAPI_LAYOUT_DEFAULT && !foreign_mode) {
                fprintf(stderr,
                        "%s %s: stripe offset and count must be specified\n",
                        progname, argv[0]);
@@ -5264,6 +5414,25 @@ static int lfs_setdirstripe(int argc, char **argv)
                previous_mode = umask(0);
        }
 
+       /* foreign LMV/dir case */
+       if (foreign_mode) {
+               if (argc > optind + 1) {
+                       fprintf(stderr,
+                               "%s %s: cannot specify multiple foreign dirs\n",
+                               progname, argv[0]);
+                       return CMD_HELP;
+               }
+
+               dname = argv[optind];
+               result = llapi_dir_create_foreign(dname, mode, type, flags,
+                                                 xattr);
+               if (result != 0)
+                       fprintf(stderr,
+                               "%s mkdir: can't create foreign dir '%s': %s\n",
+                               progname, dname, strerror(-result));
+               return result;
+       }
+
        /*
         * initialize stripe parameters, in case param is converted to specific,
         * i.e, 'lfs mkdir -i -1 -c N', always allocate space for lsp_tgts.
@@ -5306,113 +5475,124 @@ static int lfs_setdirstripe(int argc, char **argv)
                memcpy(param->lsp_tgts, mdts, sizeof(*mdts) * lsa.lsa_nr_tgts);
        }
 
+       if (!default_stripe && lsa.lsa_pattern == LMV_HASH_TYPE_SPACE) {
+               fprintf(stderr, "%s %s: can only specify -H space with -D\n",
+                       progname, argv[0]);
+               free(param);
+               return CMD_HELP;
+       }
+
        dname = argv[optind];
        do {
                if (default_stripe) {
                        result = llapi_dir_set_default_lmv(dname, param);
-               } else {
-                       /* if current \a dname isn't under the same \a mntdir
-                        * as the last one, and the last one was
-                        * auto-distributed, restore \a param.
-                        */
-                       if (mntdir[0] != '\0' &&
-                           strncmp(dname, mntdir, strlen(mntdir)) &&
-                           auto_distributed) {
-                               param->lsp_is_specific = false;
-                               param->lsp_stripe_offset = -1;
-                               auto_distributed = false;
-                       }
-
-                       if (!param->lsp_is_specific &&
-                           param->lsp_stripe_offset == -1) {
-                               char path[PATH_MAX] = "";
+                       if (result)
+                               fprintf(stderr,
+                                       "%s setdirstripe: cannot set default stripe on dir '%s': %s\n",
+                                       progname, dname, strerror(-result));
+                       continue;
+               }
 
-                               if (!lsb) {
-                                       lsb = malloc(sizeof(*lsb));
-                                       if (!lsb) {
-                                               result = -ENOMEM;
-                                               break;
-                                       }
-                               }
-                               lsb->sb_count = 0;
+               /*
+                * if current \a dname isn't under the same \a mntdir as the
+                * last one, and the last one was auto-distributed, restore
+                * \a param.
+                */
+               if (mntdir[0] != '\0' &&
+                   strncmp(dname, mntdir, strlen(mntdir)) &&
+                   auto_distributed) {
+                       param->lsp_is_specific = false;
+                       param->lsp_stripe_offset = -1;
+                       auto_distributed = false;
+               }
 
-                               /* use mntdir for dirname() temporarily */
-                               strncpy(mntdir, dname, sizeof(mntdir));
-                               if (!realpath(dirname(mntdir), path)) {
-                                       result = -errno;
-                                       fprintf(stderr,
-                                               "error: invalid path '%s': %s\n",
-                                               argv[optind], strerror(errno));
-                                       break;
-                               }
-                               mntdir[0] = '\0';
+               /*
+                * TODO: when MDT can allocate object with QoS (LU-9435), below
+                * code should be removed, instead we should let LMV to allocate
+                * the starting MDT object, and then let LOD allocate other MDT
+                * objects.
+                */
+               if (!param->lsp_is_specific && param->lsp_stripe_offset == -1) {
+                       char path[PATH_MAX] = "";
 
-                               result = llapi_search_mounts(path, 0, mntdir,
-                                                            NULL);
-                               if (result < 0 || mntdir[0] == '\0') {
-                                       fprintf(stderr,
-                                               "No suitable Lustre mount found\n");
+                       if (!lsb) {
+                               lsb = malloc(sizeof(*lsb));
+                               if (!lsb) {
+                                       result = -ENOMEM;
                                        break;
                                }
+                       }
+                       lsb->sb_count = 0;
 
-                               result = mntdf(mntdir, NULL, NULL, 0,
-                                              LL_STATFS_LMV, lsb);
-                               if (result < 0)
-                                       break;
+                       /* use mntdir for dirname() temporarily */
+                       strncpy(mntdir, dname, sizeof(mntdir));
+                       if (!realpath(dirname(mntdir), path)) {
+                               result = -errno;
+                               fprintf(stderr,
+                                       "error: invalid path '%s': %s\n",
+                                       argv[optind], strerror(errno));
+                               break;
+                       }
+                       mntdir[0] = '\0';
 
-                               if (param->lsp_stripe_count > lsb->sb_count) {
-                                       fprintf(stderr,
-                                               "error: stripe count %d is too big\n",
-                                               param->lsp_stripe_count);
-                                       result = -ERANGE;
-                                       break;
-                               }
+                       result = llapi_search_mounts(path, 0, mntdir, NULL);
+                       if (result < 0 || mntdir[0] == '\0') {
+                               fprintf(stderr,
+                                       "No suitable Lustre mount found\n");
+                               break;
+                       }
 
-                               qsort(lsb->sb_buf, lsb->sb_count,
-                                     sizeof(struct ll_statfs_data),
-                                     ll_statfs_data_comp);
+                       result = mntdf(mntdir, NULL, NULL, 0, LL_STATFS_LMV,
+                                      lsb);
+                       if (result < 0)
+                               break;
 
-                               auto_distributed = true;
+                       if (param->lsp_stripe_count > lsb->sb_count) {
+                               fprintf(stderr,
+                                       "error: stripe count %d is too big\n",
+                                       param->lsp_stripe_count);
+                               result = -ERANGE;
+                               break;
                        }
 
-                       if (auto_distributed) {
-                               int r;
-                               int nr = MAX(param->lsp_stripe_count,
-                                            lsb->sb_count / 2);
+                       qsort(lsb->sb_buf, lsb->sb_count,
+                             sizeof(struct ll_statfs_data),
+                             ll_statfs_data_comp);
 
-                               /* don't use server whose usage is above 90% */
-                               while (nr != param->lsp_stripe_count &&
-                                      obd_statfs_ratio(&lsb->sb_buf[nr].sd_st,
-                                                       false) > 90)
-                                       nr = MAX(param->lsp_stripe_count,
-                                                nr / 2);
+                       auto_distributed = true;
+               }
 
-                               /* get \a r between [0, nr) */
-                               r = rand() % nr;
+               if (auto_distributed) {
+                       int r;
+                       int nr = MAX(param->lsp_stripe_count,
+                                    lsb->sb_count / 2);
 
-                               param->lsp_stripe_offset =
-                                       lsb->sb_buf[r].sd_index;
-                               if (param->lsp_stripe_count > 1) {
-                                       int i = 0;
+                       /* don't use server whose usage is above 90% */
+                       while (nr != param->lsp_stripe_count &&
+                              obd_statfs_ratio(&lsb->sb_buf[nr].sd_st, false) >
+                              90)
+                               nr = MAX(param->lsp_stripe_count, nr / 2);
 
-                                       param->lsp_is_specific = true;
-                                       for (; i < param->lsp_stripe_count; i++)
-                                               param->lsp_tgts[(i + r) % nr] =
-                                                       lsb->sb_buf[i].sd_index;
-                               }
-                       }
+                       /* get \a r between [0, nr) */
+                       r = rand() % nr;
 
-                       result = llapi_dir_create(dname, mode, param);
+                       param->lsp_stripe_offset = lsb->sb_buf[r].sd_index;
+                       if (param->lsp_stripe_count > 1) {
+                               int i = 0;
+
+                               param->lsp_is_specific = true;
+                               for (; i < param->lsp_stripe_count; i++)
+                                       param->lsp_tgts[(i + r) % nr] =
+                                               lsb->sb_buf[i].sd_index;
+                       }
                }
 
-               if (result) {
+               result = llapi_dir_create(dname, mode, param);
+               if (result)
                        fprintf(stderr,
-                               "%s setdirstripe: cannot create stripe dir '%s': %s\n",
+                               "%s setdirstripe: cannot create dir '%s': %s\n",
                                progname, dname, strerror(-result));
-                       break;
-               }
-               dname = argv[++optind];
-       } while (dname != NULL);
+       } while (!result && (dname = argv[++optind]));
 
        if (mode_opt != NULL)
                umask(previous_mode);
@@ -8116,16 +8296,15 @@ next:
        return rc;
 }
 
-
 static const char *const heat_names[] = LU_HEAT_NAMES;
 
 static int lfs_heat_get(int argc, char **argv)
 {
-       struct lu_heat  *heat;
-       int              rc = 0, rc2;
-       char            *path;
-       int              fd;
-       int              i;
+       struct lu_heat *heat;
+       int rc = 0, rc2;
+       char *path;
+       int fd;
+       int i;
 
        if (argc <= 1)
                return CMD_HELP;
@@ -8171,25 +8350,22 @@ next:
 
 static int lfs_heat_set(int argc, char **argv)
 {
-       struct option    long_opts[] = {
-               {"clear", no_argument, 0, 'c'},
-               {"off", no_argument, 0, 'o'},
-               {"on", no_argument, 0, 'O'},
-               {0, 0, 0, 0}
-       };
-       char             short_opts[] = "coO";
-       int              rc = 0, rc2;
-       char            *path;
-       int              fd;
-       __u64            flags = 0;
-       int              c;
+       struct option long_opts[] = {
+       { .val = 'c',   .name = "clear",        .has_arg = no_argument },
+       { .val = 'o',   .name = "off",          .has_arg = no_argument },
+       { .val = 'O',   .name = "on",           .has_arg = no_argument },
+       { .name = NULL } };
+       enum lu_heat_flag flags = 0;
+       int rc = 0, rc2;
+       char *path;
+       int fd;
+       int c;
 
        if (argc <= 1)
                return CMD_HELP;
 
        optind = 0;
-       while ((c = getopt_long(argc, argv, short_opts,
-                               long_opts, NULL)) != -1) {
+       while ((c = getopt_long(argc, argv, "coO", long_opts, NULL)) != -1) {
                switch (c) {
                case 'c':
                        flags |= LU_HEAT_FLAG_CLEAR;