From a24f6153292753bf6e40f5638930d6cffa78e1ac Mon Sep 17 00:00:00 2001 From: Lai Siyao Date: Fri, 15 Feb 2019 05:16:33 +0800 Subject: [PATCH] LU-11213 dne: add new dir hash type "space" Add a new hash type "space", if this is set on default LMV of a directory, its subdirs will be created on all MDTs with balanced space usage. * new hash type LMV_HASH_TYPE_SPACE. Signed-off-by: Lai Siyao Change-Id: I8edf38f94e24965b1cffb21253c3be0eef68c707 Reviewed-on: https://review.whamcloud.com/34358 Reviewed-by: Andreas Dilger Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Hongchao Zhang --- lustre/include/uapi/linux/lustre/lustre_user.h | 16 +- lustre/lmv/lmv_obd.c | 1 - lustre/lod/lod_object.c | 9 +- lustre/utils/lfs.c | 215 +++++++++++++------------ lustre/utils/liblustreapi.c | 4 +- 5 files changed, 137 insertions(+), 108 deletions(-) diff --git a/lustre/include/uapi/linux/lustre/lustre_user.h b/lustre/include/uapi/linux/lustre/lustre_user.h index 0edb929..9ead0fc 100644 --- a/lustre/include/uapi/linux/lustre/lustre_user.h +++ b/lustre/include/uapi/linux/lustre/lustre_user.h @@ -804,11 +804,16 @@ enum lmv_hash_type { LMV_HASH_TYPE_UNKNOWN = 0, /* 0 is reserved for testing purpose */ LMV_HASH_TYPE_ALL_CHARS = 1, LMV_HASH_TYPE_FNV_1A_64 = 2, + LMV_HASH_TYPE_SPACE = 3, /* + * distribute subdirs among all MDTs + * with balanced space usage. + */ LMV_HASH_TYPE_MAX, }; #define LMV_HASH_NAME_ALL_CHARS "all_char" #define LMV_HASH_NAME_FNV_1A_64 "fnv_1a_64" +#define LMV_HASH_NAME_SPACE "space" extern char *mdt_hash_name[LMV_HASH_TYPE_MAX]; @@ -838,7 +843,7 @@ struct lmv_user_md_v1 { __u32 lum_stripe_count; /* dirstripe count */ __u32 lum_stripe_offset; /* MDT idx for default dirstripe */ __u32 lum_hash_type; /* Dir stripe policy */ - __u32 lum_type; /* LMV type: default or normal */ + __u32 lum_type; /* LMV type: default */ __u32 lum_padding1; __u32 lum_padding2; __u32 lum_padding3; @@ -856,6 +861,15 @@ static inline __u32 lmv_foreign_to_md_stripes(__u32 size) sizeof(struct lmv_user_mds_data); } +/* + * NB, historically default layout didn't set type, but use XATTR name to differ + * from normal layout, for backward compatibility, define LMV_TYPE_DEFAULT 0x0, + * and still use the same method. + */ +enum lmv_type { + LMV_TYPE_DEFAULT = 0x0000, +}; + static inline int lmv_user_md_size(int stripes, int lmm_magic) { int size = sizeof(struct lmv_user_md); diff --git a/lustre/lmv/lmv_obd.c b/lustre/lmv/lmv_obd.c index bd54773..8278969 100644 --- a/lustre/lmv/lmv_obd.c +++ b/lustre/lmv/lmv_obd.c @@ -3105,7 +3105,6 @@ static int lmv_unpackmd(struct obd_export *exp, struct lmv_stripe_md **lsmp, */ lsm_size = lmv_stripe_md_size(0); - lsm_size = lmv_stripe_md_size(lmv_mds_md_stripe_count_get(lmm)); if (lsm == NULL) { OBD_ALLOC(lsm, lsm_size); if (lsm == NULL) diff --git a/lustre/lod/lod_object.c b/lustre/lod/lod_object.c index 445130e..c781ed5 100644 --- a/lustre/lod/lod_object.c +++ b/lustre/lod/lod_object.c @@ -3685,7 +3685,8 @@ static int lod_xattr_set_default_lmv_on_dir(const struct lu_env *env, if (LMVEA_DELETE_VALUES((le32_to_cpu(lum->lum_stripe_count)), le32_to_cpu(lum->lum_stripe_offset)) && - le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC) { + le32_to_cpu(lum->lum_magic) == LMV_USER_MAGIC && + le32_to_cpu(lum->lum_hash_type) == LMV_HASH_TYPE_UNKNOWN) { rc = lod_xattr_del_internal(env, dt, name, th); if (rc == -ENODATA) rc = 0; @@ -3971,8 +3972,10 @@ static int lod_dir_striping_create_internal(const struct lu_env *env, /* Transfer default LMV striping from the parent */ if (lds != NULL && lds->lds_dir_def_striping_set && - !LMVEA_DELETE_VALUES(lds->lds_dir_def_stripe_count, - lds->lds_dir_def_stripe_offset)) { + !(LMVEA_DELETE_VALUES(lds->lds_dir_def_stripe_count, + lds->lds_dir_def_stripe_offset) && + le32_to_cpu(lds->lds_dir_def_hash_type) != + LMV_HASH_TYPE_UNKNOWN)) { struct lmv_user_md_v1 *v1 = info->lti_ea_store; if (info->lti_ea_store_size < sizeof(*v1)) { diff --git a/lustre/utils/lfs.c b/lustre/utils/lfs.c index e229763..8b68b44 100644 --- a/lustre/utils/lfs.c +++ b/lustre/utils/lfs.c @@ -259,6 +259,7 @@ static inline int lfs_mirror_split(int argc, char **argv) "\tmdt_hash: hash type of the striped directory. mdt types:\n" \ " fnv_1a_64 FNV-1a hash algorithm (default)\n" \ " all_char sum of characters % MDT_COUNT (not recommended)\n" \ + " space create subdirectories with balanced space usage\n" \ "\tdefault_stripe: set default dirstripe of the directory\n" \ "\tmode: the file access permission of the directory (octal)\n" \ "To create dir with a foreign (free format) layout :\n" \ @@ -5180,25 +5181,23 @@ static int ll_statfs_data_comp(const void *sd1, const void *sd2) /* functions */ static int lfs_setdirstripe(int argc, char **argv) { - char *dname; - int result; - struct lfs_setstripe_args lsa = { 0 }; - struct llapi_stripe_param *param = NULL; - __u32 mdts[LMV_MAX_STRIPE_COUNT] = { 0 }; - char *end; - int c; - char *mode_opt = NULL; - bool default_stripe = false; - mode_t mode = S_IRWXU | S_IRWXG | S_IRWXO; - mode_t previous_mode = 0; - bool delete = false; - struct ll_statfs_buf *lsb = NULL; - char mntdir[PATH_MAX] = ""; - bool auto_distributed = false; - bool foreign_mode = false; - char *xattr = NULL; - __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0; - + char *dname; + struct lfs_setstripe_args lsa = { 0 }; + struct llapi_stripe_param *param = NULL; + __u32 mdts[LMV_MAX_STRIPE_COUNT] = { 0 }; + char *end; + int c; + char *mode_opt = NULL; + bool default_stripe = false; + bool delete = false; + bool auto_distributed = false; + bool foreign_mode = false; + mode_t mode = S_IRWXU | S_IRWXG | S_IRWXO; + mode_t previous_mode = 0; + struct ll_statfs_buf *lsb = NULL; + char mntdir[PATH_MAX] = ""; + char *xattr = NULL; + __u32 type = LU_FOREIGN_TYPE_DAOS, flags = 0; struct option long_opts[] = { { .val = 'c', .name = "count", .has_arg = required_argument }, { .val = 'c', .name = "mdt-count", .has_arg = required_argument }, @@ -5228,6 +5227,7 @@ static int lfs_setdirstripe(int argc, char **argv) /* setstripe { .val = 'y', .name = "yaml", .has_arg = no_argument }, */ { .val = 'x', .name = "xattr", .has_arg = required_argument }, { .name = NULL } }; + int result = 0; setstripe_args_init(&lsa); @@ -5456,113 +5456,124 @@ static int lfs_setdirstripe(int argc, char **argv) memcpy(param->lsp_tgts, mdts, sizeof(*mdts) * lsa.lsa_nr_tgts); } + if (!default_stripe && lsa.lsa_pattern == LMV_HASH_TYPE_SPACE) { + fprintf(stderr, "%s %s: can only specify -H space with -D\n", + progname, argv[0]); + free(param); + return CMD_HELP; + } + dname = argv[optind]; do { if (default_stripe) { result = llapi_dir_set_default_lmv(dname, param); - } else { - /* if current \a dname isn't under the same \a mntdir - * as the last one, and the last one was - * auto-distributed, restore \a param. - */ - if (mntdir[0] != '\0' && - strncmp(dname, mntdir, strlen(mntdir)) && - auto_distributed) { - param->lsp_is_specific = false; - param->lsp_stripe_offset = -1; - auto_distributed = false; - } - - if (!param->lsp_is_specific && - param->lsp_stripe_offset == -1) { - char path[PATH_MAX] = ""; + if (result) + fprintf(stderr, + "%s setdirstripe: cannot set default stripe on dir '%s': %s\n", + progname, dname, strerror(-result)); + continue; + } - if (!lsb) { - lsb = malloc(sizeof(*lsb)); - if (!lsb) { - result = -ENOMEM; - break; - } - } - lsb->sb_count = 0; + /* + * if current \a dname isn't under the same \a mntdir as the + * last one, and the last one was auto-distributed, restore + * \a param. + */ + if (mntdir[0] != '\0' && + strncmp(dname, mntdir, strlen(mntdir)) && + auto_distributed) { + param->lsp_is_specific = false; + param->lsp_stripe_offset = -1; + auto_distributed = false; + } - /* use mntdir for dirname() temporarily */ - strncpy(mntdir, dname, sizeof(mntdir) - 1); - if (!realpath(dirname(mntdir), path)) { - result = -errno; - fprintf(stderr, - "error: invalid path '%s': %s\n", - argv[optind], strerror(errno)); - break; - } - mntdir[0] = '\0'; + /* + * TODO: when MDT can allocate object with QoS (LU-9435), below + * code should be removed, instead we should let LMV to allocate + * the starting MDT object, and then let LOD allocate other MDT + * objects. + */ + if (!param->lsp_is_specific && param->lsp_stripe_offset == -1) { + char path[PATH_MAX] = ""; - result = llapi_search_mounts(path, 0, mntdir, - NULL); - if (result < 0 || mntdir[0] == '\0') { - fprintf(stderr, - "No suitable Lustre mount found\n"); + if (!lsb) { + lsb = malloc(sizeof(*lsb)); + if (!lsb) { + result = -ENOMEM; break; } + } + lsb->sb_count = 0; - result = mntdf(mntdir, NULL, NULL, 0, - LL_STATFS_LMV, lsb); - if (result < 0) - break; + /* use mntdir for dirname() temporarily */ + strncpy(mntdir, dname, sizeof(mntdir)); + if (!realpath(dirname(mntdir), path)) { + result = -errno; + fprintf(stderr, + "error: invalid path '%s': %s\n", + argv[optind], strerror(errno)); + break; + } + mntdir[0] = '\0'; - if (param->lsp_stripe_count > lsb->sb_count) { - fprintf(stderr, - "error: stripe count %d is too big\n", - param->lsp_stripe_count); - result = -ERANGE; - break; - } + result = llapi_search_mounts(path, 0, mntdir, NULL); + if (result < 0 || mntdir[0] == '\0') { + fprintf(stderr, + "No suitable Lustre mount found\n"); + break; + } - qsort(lsb->sb_buf, lsb->sb_count, - sizeof(struct ll_statfs_data), - ll_statfs_data_comp); + result = mntdf(mntdir, NULL, NULL, 0, LL_STATFS_LMV, + lsb); + if (result < 0) + break; - auto_distributed = true; + if (param->lsp_stripe_count > lsb->sb_count) { + fprintf(stderr, + "error: stripe count %d is too big\n", + param->lsp_stripe_count); + result = -ERANGE; + break; } - if (auto_distributed) { - int r; - int nr = MAX(param->lsp_stripe_count, - lsb->sb_count / 2); + qsort(lsb->sb_buf, lsb->sb_count, + sizeof(struct ll_statfs_data), + ll_statfs_data_comp); - /* don't use server whose usage is above 90% */ - while (nr != param->lsp_stripe_count && - obd_statfs_ratio(&lsb->sb_buf[nr].sd_st, - false) > 90) - nr = MAX(param->lsp_stripe_count, - nr / 2); + auto_distributed = true; + } - /* get \a r between [0, nr) */ - r = rand() % nr; + if (auto_distributed) { + int r; + int nr = MAX(param->lsp_stripe_count, + lsb->sb_count / 2); - param->lsp_stripe_offset = - lsb->sb_buf[r].sd_index; - if (param->lsp_stripe_count > 1) { - int i = 0; + /* don't use server whose usage is above 90% */ + while (nr != param->lsp_stripe_count && + obd_statfs_ratio(&lsb->sb_buf[nr].sd_st, false) > + 90) + nr = MAX(param->lsp_stripe_count, nr / 2); - param->lsp_is_specific = true; - for (; i < param->lsp_stripe_count; i++) - param->lsp_tgts[(i + r) % nr] = - lsb->sb_buf[i].sd_index; - } - } + /* get \a r between [0, nr) */ + r = rand() % nr; - result = llapi_dir_create(dname, mode, param); + param->lsp_stripe_offset = lsb->sb_buf[r].sd_index; + if (param->lsp_stripe_count > 1) { + int i = 0; + + param->lsp_is_specific = true; + for (; i < param->lsp_stripe_count; i++) + param->lsp_tgts[(i + r) % nr] = + lsb->sb_buf[i].sd_index; + } } - if (result) { + result = llapi_dir_create(dname, mode, param); + if (result) fprintf(stderr, - "%s setdirstripe: cannot create stripe dir '%s': %s\n", + "%s setdirstripe: cannot create dir '%s': %s\n", progname, dname, strerror(-result)); - break; - } - dname = argv[++optind]; - } while (dname != NULL); + } while (!result && (dname = argv[++optind])); if (mode_opt != NULL) umask(previous_mode); diff --git a/lustre/utils/liblustreapi.c b/lustre/utils/liblustreapi.c index e58a668..a81fb92 100644 --- a/lustre/utils/liblustreapi.c +++ b/lustre/utils/liblustreapi.c @@ -84,7 +84,9 @@ const char *liblustreapi_cmd; char *mdt_hash_name[] = { "none", LMV_HASH_NAME_ALL_CHARS, - LMV_HASH_NAME_FNV_1A_64 }; + LMV_HASH_NAME_FNV_1A_64, + LMV_HASH_NAME_SPACE, +}; struct lustre_foreign_type lu_foreign_types[] = { {.lft_type = LU_FOREIGN_TYPE_NONE, .lft_name = "none"}, -- 1.8.3.1