Whamcloud - gitweb
LU-11213 dne: add new dir hash type "space" 58/34358/12
authorLai Siyao <lai.siyao@whamcloud.com>
Thu, 14 Feb 2019 21:16:33 +0000 (05:16 +0800)
committerOleg Drokin <green@whamcloud.com>
Tue, 4 Jun 2019 05:26:47 +0000 (05:26 +0000)
Add a new hash type "space", if this is set on default LMV of
a directory, its subdirs will be created on all MDTs with
balanced space usage.

* new hash type LMV_HASH_TYPE_SPACE.

Signed-off-by: Lai Siyao <lai.siyao@whamcloud.com>
Change-Id: I8edf38f94e24965b1cffb21253c3be0eef68c707
Reviewed-on: https://review.whamcloud.com/34358
Reviewed-by: Andreas Dilger <adilger@whamcloud.com>
Tested-by: Jenkins
Tested-by: Maloo <maloo@whamcloud.com>
Reviewed-by: Hongchao Zhang <hongchao@whamcloud.com>
lustre/include/uapi/linux/lustre/lustre_user.h
lustre/lmv/lmv_obd.c
lustre/lod/lod_object.c
lustre/utils/lfs.c
lustre/utils/liblustreapi.c

index 0edb929..9ead0fc 100644 (file)
@@ -804,11 +804,16 @@ enum lmv_hash_type {
        LMV_HASH_TYPE_UNKNOWN   = 0,    /* 0 is reserved for testing purpose */
        LMV_HASH_TYPE_ALL_CHARS = 1,
        LMV_HASH_TYPE_FNV_1A_64 = 2,
        LMV_HASH_TYPE_UNKNOWN   = 0,    /* 0 is reserved for testing purpose */
        LMV_HASH_TYPE_ALL_CHARS = 1,
        LMV_HASH_TYPE_FNV_1A_64 = 2,
+       LMV_HASH_TYPE_SPACE     = 3,    /*
+                                        * distribute subdirs among all MDTs
+                                        * with balanced space usage.
+                                        */
        LMV_HASH_TYPE_MAX,
 };
 
 #define LMV_HASH_NAME_ALL_CHARS        "all_char"
 #define LMV_HASH_NAME_FNV_1A_64        "fnv_1a_64"
        LMV_HASH_TYPE_MAX,
 };
 
 #define LMV_HASH_NAME_ALL_CHARS        "all_char"
 #define LMV_HASH_NAME_FNV_1A_64        "fnv_1a_64"
+#define LMV_HASH_NAME_SPACE    "space"
 
 extern char *mdt_hash_name[LMV_HASH_TYPE_MAX];
 
 
 extern char *mdt_hash_name[LMV_HASH_TYPE_MAX];
 
@@ -838,7 +843,7 @@ struct lmv_user_md_v1 {
        __u32   lum_stripe_count;  /* dirstripe count */
        __u32   lum_stripe_offset; /* MDT idx for default dirstripe */
        __u32   lum_hash_type;     /* Dir stripe policy */
        __u32   lum_stripe_count;  /* dirstripe count */
        __u32   lum_stripe_offset; /* MDT idx for default dirstripe */
        __u32   lum_hash_type;     /* Dir stripe policy */
-       __u32   lum_type;         /* LMV type: default or normal */
+       __u32   lum_type;         /* LMV type: default */
        __u32   lum_padding1;
        __u32   lum_padding2;
        __u32   lum_padding3;
        __u32   lum_padding1;
        __u32   lum_padding2;
        __u32   lum_padding3;
@@ -856,6 +861,15 @@ static inline __u32 lmv_foreign_to_md_stripes(__u32 size)
               sizeof(struct lmv_user_mds_data);
 }
 
               sizeof(struct lmv_user_mds_data);
 }
 
+/*
+ * NB, historically default layout didn't set type, but use XATTR name to differ
+ * from normal layout, for backward compatibility, define LMV_TYPE_DEFAULT 0x0,
+ * and still use the same method.
+ */
+enum lmv_type {
+       LMV_TYPE_DEFAULT = 0x0000,
+};
+
 static inline int lmv_user_md_size(int stripes, int lmm_magic)
 {
        int size = sizeof(struct lmv_user_md);
 static inline int lmv_user_md_size(int stripes, int lmm_magic)
 {
        int size = sizeof(struct lmv_user_md);
index bd54773..8278969 100644 (file)
@@ -3105,7 +3105,6 @@ static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp,
                 */
                lsm_size = lmv_stripe_md_size(0);
 
                 */
                lsm_size = lmv_stripe_md_size(0);
 
-       lsm_size = lmv_stripe_md_size(lmv_mds_md_stripe_count_get(lmm));
        if (lsm == NULL) {
                OBD_ALLOC(lsm, lsm_size);
                if (lsm == NULL)
        if (lsm == NULL) {
                OBD_ALLOC(lsm, lsm_size);
                if (lsm == NULL)
index 445130e..c781ed5 100644 (file)
@@ -3685,7 +3685,8 @@ static int lod_xattr_set_default_lmv_on_dir(const struct lu_env *env,
 
        if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)),
                                 le32_to_cpu(lum->lum_stripe_offset)) &&
 
        if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)),
                                 le32_to_cpu(lum->lum_stripe_offset)) &&
-                               le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) {
+           le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC &&
+           le32_to_cpu(lum->lum_hash_type) == LMV_HASH_TYPE_UNKNOWN) {
                rc = lod_xattr_del_internal(env, dt, name, th);
                if (rc == -ENODATA)
                        rc = 0;
                rc = lod_xattr_del_internal(env, dt, name, th);
                if (rc == -ENODATA)
                        rc = 0;
@@ -3971,8 +3972,10 @@ static int lod_dir_striping_create_internal(const struct lu_env *env,
 
        /* Transfer default LMV striping from the parent */
        if (lds != NULL && lds->lds_dir_def_striping_set &&
 
        /* Transfer default LMV striping from the parent */
        if (lds != NULL && lds->lds_dir_def_striping_set &&
-           !LMVEA_DELETE_VALUES(lds->lds_dir_def_stripe_count,
-                                lds->lds_dir_def_stripe_offset)) {
+           !(LMVEA_DELETE_VALUES(lds->lds_dir_def_stripe_count,
+                                lds->lds_dir_def_stripe_offset) &&
+             le32_to_cpu(lds->lds_dir_def_hash_type) !=
+             LMV_HASH_TYPE_UNKNOWN)) {
                struct lmv_user_md_v1 *v1 = info->lti_ea_store;
 
                if (info->lti_ea_store_size < sizeof(*v1)) {
                struct lmv_user_md_v1 *v1 = info->lti_ea_store;
 
                if (info->lti_ea_store_size < sizeof(*v1)) {
index e229763..8b68b44 100644 (file)
@@ -259,6 +259,7 @@ static inline int lfs_mirror_split(int argc, char **argv)
        "\tmdt_hash:  hash type of the striped directory. mdt types:\n" \
        "       fnv_1a_64 FNV-1a hash algorithm (default)\n"            \
        "       all_char  sum of characters % MDT_COUNT (not recommended)\n" \
        "\tmdt_hash:  hash type of the striped directory. mdt types:\n" \
        "       fnv_1a_64 FNV-1a hash algorithm (default)\n"            \
        "       all_char  sum of characters % MDT_COUNT (not recommended)\n" \
+       "       space     create subdirectories with balanced space usage\n" \
        "\tdefault_stripe: set default dirstripe of the directory\n"    \
        "\tmode: the file access permission of the directory (octal)\n" \
        "To create dir with a foreign (free format) layout :\n" \
        "\tdefault_stripe: set default dirstripe of the directory\n"    \
        "\tmode: the file access permission of the directory (octal)\n" \
        "To create dir with a foreign (free format) layout :\n" \
@@ -5180,25 +5181,23 @@ static int ll_statfs_data_comp(const void *sd1, const void *sd2)
 /* functions */
 static int lfs_setdirstripe(int argc, char **argv)
 {
 /* functions */
 static int lfs_setdirstripe(int argc, char **argv)
 {
-       char                    *dname;
-       int                     result;
-       struct lfs_setstripe_args        lsa = { 0 };
-       struct llapi_stripe_param       *param = NULL;
-       __u32                   mdts[LMV_MAX_STRIPE_COUNT] = { 0 };
-       char                    *end;
-       int                     c;
-       char                    *mode_opt = NULL;
-       bool                    default_stripe = false;
-       mode_t                  mode = S_IRWXU | S_IRWXG | S_IRWXO;
-       mode_t                  previous_mode = 0;
-       bool                    delete = false;
-       struct ll_statfs_buf    *lsb = NULL;
-       char                    mntdir[PATH_MAX] = "";
-       bool                    auto_distributed = false;
-       bool                    foreign_mode = false;
-       char                    *xattr = NULL;
-       __u32                   type = LU_FOREIGN_TYPE_DAOS, flags = 0;
-
+       char *dname;
+       struct lfs_setstripe_args lsa = { 0 };
+       struct llapi_stripe_param *param = NULL;
+       __u32 mdts[LMV_MAX_STRIPE_COUNT] = { 0 };
+       char *end;
+       int c;
+       char *mode_opt = NULL;
+       bool default_stripe = false;
+       bool delete = false;
+       bool auto_distributed = false;
+       bool foreign_mode = false;
+       mode_t mode = S_IRWXU | S_IRWXG | S_IRWXO;
+       mode_t previous_mode = 0;
+       struct ll_statfs_buf *lsb = NULL;
+       char mntdir[PATH_MAX] = "";
+       char *xattr = NULL;
+       __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0;
        struct option long_opts[] = {
        { .val = 'c',   .name = "count",        .has_arg = required_argument },
        { .val = 'c',   .name = "mdt-count",    .has_arg = required_argument },
        struct option long_opts[] = {
        { .val = 'c',   .name = "count",        .has_arg = required_argument },
        { .val = 'c',   .name = "mdt-count",    .has_arg = required_argument },
@@ -5228,6 +5227,7 @@ static int lfs_setdirstripe(int argc, char **argv)
 /* setstripe { .val = 'y', .name = "yaml",     .has_arg = no_argument }, */
        { .val = 'x',   .name = "xattr",        .has_arg = required_argument },
        { .name = NULL } };
 /* setstripe { .val = 'y', .name = "yaml",     .has_arg = no_argument }, */
        { .val = 'x',   .name = "xattr",        .has_arg = required_argument },
        { .name = NULL } };
+       int result = 0;
 
        setstripe_args_init(&lsa);
 
 
        setstripe_args_init(&lsa);
 
@@ -5456,113 +5456,124 @@ static int lfs_setdirstripe(int argc, char **argv)
                memcpy(param->lsp_tgts, mdts, sizeof(*mdts) * lsa.lsa_nr_tgts);
        }
 
                memcpy(param->lsp_tgts, mdts, sizeof(*mdts) * lsa.lsa_nr_tgts);
        }
 
+       if (!default_stripe && lsa.lsa_pattern == LMV_HASH_TYPE_SPACE) {
+               fprintf(stderr, "%s %s: can only specify -H space with -D\n",
+                       progname, argv[0]);
+               free(param);
+               return CMD_HELP;
+       }
+
        dname = argv[optind];
        do {
                if (default_stripe) {
                        result = llapi_dir_set_default_lmv(dname, param);
        dname = argv[optind];
        do {
                if (default_stripe) {
                        result = llapi_dir_set_default_lmv(dname, param);
-               } else {
-                       /* if current \a dname isn't under the same \a mntdir
-                        * as the last one, and the last one was
-                        * auto-distributed, restore \a param.
-                        */
-                       if (mntdir[0] != '\0' &&
-                           strncmp(dname, mntdir, strlen(mntdir)) &&
-                           auto_distributed) {
-                               param->lsp_is_specific = false;
-                               param->lsp_stripe_offset = -1;
-                               auto_distributed = false;
-                       }
-
-                       if (!param->lsp_is_specific &&
-                           param->lsp_stripe_offset == -1) {
-                               char path[PATH_MAX] = "";
+                       if (result)
+                               fprintf(stderr,
+                                       "%s setdirstripe: cannot set default stripe on dir '%s': %s\n",
+                                       progname, dname, strerror(-result));
+                       continue;
+               }
 
 
-                               if (!lsb) {
-                                       lsb = malloc(sizeof(*lsb));
-                                       if (!lsb) {
-                                               result = -ENOMEM;
-                                               break;
-                                       }
-                               }
-                               lsb->sb_count = 0;
+               /*
+                * if current \a dname isn't under the same \a mntdir as the
+                * last one, and the last one was auto-distributed, restore
+                * \a param.
+                */
+               if (mntdir[0] != '\0' &&
+                   strncmp(dname, mntdir, strlen(mntdir)) &&
+                   auto_distributed) {
+                       param->lsp_is_specific = false;
+                       param->lsp_stripe_offset = -1;
+                       auto_distributed = false;
+               }
 
 
-                               /* use mntdir for dirname() temporarily */
-                               strncpy(mntdir, dname, sizeof(mntdir) - 1);
-                               if (!realpath(dirname(mntdir), path)) {
-                                       result = -errno;
-                                       fprintf(stderr,
-                                               "error: invalid path '%s': %s\n",
-                                               argv[optind], strerror(errno));
-                                       break;
-                               }
-                               mntdir[0] = '\0';
+               /*
+                * TODO: when MDT can allocate object with QoS (LU-9435), below
+                * code should be removed, instead we should let LMV to allocate
+                * the starting MDT object, and then let LOD allocate other MDT
+                * objects.
+                */
+               if (!param->lsp_is_specific && param->lsp_stripe_offset == -1) {
+                       char path[PATH_MAX] = "";
 
 
-                               result = llapi_search_mounts(path, 0, mntdir,
-                                                            NULL);
-                               if (result < 0 || mntdir[0] == '\0') {
-                                       fprintf(stderr,
-                                               "No suitable Lustre mount found\n");
+                       if (!lsb) {
+                               lsb = malloc(sizeof(*lsb));
+                               if (!lsb) {
+                                       result = -ENOMEM;
                                        break;
                                }
                                        break;
                                }
+                       }
+                       lsb->sb_count = 0;
 
 
-                               result = mntdf(mntdir, NULL, NULL, 0,
-                                              LL_STATFS_LMV, lsb);
-                               if (result < 0)
-                                       break;
+                       /* use mntdir for dirname() temporarily */
+                       strncpy(mntdir, dname, sizeof(mntdir));
+                       if (!realpath(dirname(mntdir), path)) {
+                               result = -errno;
+                               fprintf(stderr,
+                                       "error: invalid path '%s': %s\n",
+                                       argv[optind], strerror(errno));
+                               break;
+                       }
+                       mntdir[0] = '\0';
 
 
-                               if (param->lsp_stripe_count > lsb->sb_count) {
-                                       fprintf(stderr,
-                                               "error: stripe count %d is too big\n",
-                                               param->lsp_stripe_count);
-                                       result = -ERANGE;
-                                       break;
-                               }
+                       result = llapi_search_mounts(path, 0, mntdir, NULL);
+                       if (result < 0 || mntdir[0] == '\0') {
+                               fprintf(stderr,
+                                       "No suitable Lustre mount found\n");
+                               break;
+                       }
 
 
-                               qsort(lsb->sb_buf, lsb->sb_count,
-                                     sizeof(struct ll_statfs_data),
-                                     ll_statfs_data_comp);
+                       result = mntdf(mntdir, NULL, NULL, 0, LL_STATFS_LMV,
+                                      lsb);
+                       if (result < 0)
+                               break;
 
 
-                               auto_distributed = true;
+                       if (param->lsp_stripe_count > lsb->sb_count) {
+                               fprintf(stderr,
+                                       "error: stripe count %d is too big\n",
+                                       param->lsp_stripe_count);
+                               result = -ERANGE;
+                               break;
                        }
 
                        }
 
-                       if (auto_distributed) {
-                               int r;
-                               int nr = MAX(param->lsp_stripe_count,
-                                            lsb->sb_count / 2);
+                       qsort(lsb->sb_buf, lsb->sb_count,
+                             sizeof(struct ll_statfs_data),
+                             ll_statfs_data_comp);
 
 
-                               /* don't use server whose usage is above 90% */
-                               while (nr != param->lsp_stripe_count &&
-                                      obd_statfs_ratio(&lsb->sb_buf[nr].sd_st,
-                                                       false) > 90)
-                                       nr = MAX(param->lsp_stripe_count,
-                                                nr / 2);
+                       auto_distributed = true;
+               }
 
 
-                               /* get \a r between [0, nr) */
-                               r = rand() % nr;
+               if (auto_distributed) {
+                       int r;
+                       int nr = MAX(param->lsp_stripe_count,
+                                    lsb->sb_count / 2);
 
 
-                               param->lsp_stripe_offset =
-                                       lsb->sb_buf[r].sd_index;
-                               if (param->lsp_stripe_count > 1) {
-                                       int i = 0;
+                       /* don't use server whose usage is above 90% */
+                       while (nr != param->lsp_stripe_count &&
+                              obd_statfs_ratio(&lsb->sb_buf[nr].sd_st, false) >
+                              90)
+                               nr = MAX(param->lsp_stripe_count, nr / 2);
 
 
-                                       param->lsp_is_specific = true;
-                                       for (; i < param->lsp_stripe_count; i++)
-                                               param->lsp_tgts[(i + r) % nr] =
-                                                       lsb->sb_buf[i].sd_index;
-                               }
-                       }
+                       /* get \a r between [0, nr) */
+                       r = rand() % nr;
 
 
-                       result = llapi_dir_create(dname, mode, param);
+                       param->lsp_stripe_offset = lsb->sb_buf[r].sd_index;
+                       if (param->lsp_stripe_count > 1) {
+                               int i = 0;
+
+                               param->lsp_is_specific = true;
+                               for (; i < param->lsp_stripe_count; i++)
+                                       param->lsp_tgts[(i + r) % nr] =
+                                               lsb->sb_buf[i].sd_index;
+                       }
                }
 
                }
 
-               if (result) {
+               result = llapi_dir_create(dname, mode, param);
+               if (result)
                        fprintf(stderr,
                        fprintf(stderr,
-                               "%s setdirstripe: cannot create stripe dir '%s': %s\n",
+                               "%s setdirstripe: cannot create dir '%s': %s\n",
                                progname, dname, strerror(-result));
                                progname, dname, strerror(-result));
-                       break;
-               }
-               dname = argv[++optind];
-       } while (dname != NULL);
+       } while (!result && (dname = argv[++optind]));
 
        if (mode_opt != NULL)
                umask(previous_mode);
 
        if (mode_opt != NULL)
                umask(previous_mode);
index e58a668..a81fb92 100644 (file)
@@ -84,7 +84,9 @@ const char *liblustreapi_cmd;
 
 char *mdt_hash_name[] = { "none",
                          LMV_HASH_NAME_ALL_CHARS,
 
 char *mdt_hash_name[] = { "none",
                          LMV_HASH_NAME_ALL_CHARS,
-                         LMV_HASH_NAME_FNV_1A_64 };
+                         LMV_HASH_NAME_FNV_1A_64,
+                         LMV_HASH_NAME_SPACE,
+};
 
 struct lustre_foreign_type lu_foreign_types[] = {
        {.lft_type = LU_FOREIGN_TYPE_NONE, .lft_name = "none"},
 
 struct lustre_foreign_type lu_foreign_types[] = {
        {.lft_type = LU_FOREIGN_TYPE_NONE, .lft_name = "none"},