X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Finclude%2Flustre_lmv.h;h=363dfb0fd04868bbde8c087dad269a6e30285ca7;hb=ca754ec8b43416d41bbd401bad7d9f93746fb867;hp=e797e7790a286f81260c5f26e5603268cc00d3bc;hpb=169738e30a7e0b57f27a517d78d2c928b3bb0f5c;p=fs%2Flustre-release.git diff --git a/lustre/include/lustre_lmv.h b/lustre/include/lustre_lmv.h index e797e77..363dfb0 100644 --- a/lustre/include/lustre_lmv.h +++ b/lustre/include/lustre_lmv.h @@ -54,6 +54,33 @@ struct lmv_stripe_md { struct lmv_oinfo lsm_md_oinfo[0]; }; +static inline bool lmv_dir_striped(const struct lmv_stripe_md *lsm) +{ + return lsm && lsm->lsm_md_magic == LMV_MAGIC; +} + +static inline bool lmv_dir_foreign(const struct lmv_stripe_md *lsm) +{ + return lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN; +} + +static inline bool lmv_dir_layout_changing(const struct lmv_stripe_md *lsm) +{ + return lmv_dir_striped(lsm) && + lmv_hash_is_layout_changing(lsm->lsm_md_hash_type); +} + +static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm) +{ + if (!lmv_dir_striped(lsm)) + return false; + + if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE) + return true; + + return !lmv_is_known_hash_type(lsm->lsm_md_hash_type); +} + static inline bool lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2) { @@ -70,26 +97,48 @@ lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2) lsm2->lsm_md_migrate_offset || lsm1->lsm_md_migrate_hash != lsm2->lsm_md_migrate_hash || - strcmp(lsm1->lsm_md_pool_name, - lsm2->lsm_md_pool_name) != 0) + strncmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name, + sizeof(lsm1->lsm_md_pool_name)) != 0) return false; - for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) { - if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid, - &lsm2->lsm_md_oinfo[idx].lmo_fid)) - return false; + if (lmv_dir_striped(lsm1)) { + for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) { + if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid, + &lsm2->lsm_md_oinfo[idx].lmo_fid)) + return false; + } } return true; } + +static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm) +{ + int i; + + /* If lsm_md_magic == LMV_MAGIC_FOREIGN pool_name may not be a null + * terminated string so only print LOV_MAXPOOLNAME bytes. + */ + CDEBUG(mask, + "magic %#x stripe count %d master mdt %d hash type %#x version %d migrate offset %d migrate hash %#x pool %.*s\n", + lsm->lsm_md_magic, lsm->lsm_md_stripe_count, + lsm->lsm_md_master_mdt_index, lsm->lsm_md_hash_type, + lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset, + lsm->lsm_md_migrate_hash, + LOV_MAXPOOLNAME, lsm->lsm_md_pool_name); + + if (!lmv_dir_striped(lsm)) + return; + + for (i = 0; i < lsm->lsm_md_stripe_count; i++) + CDEBUG(mask, "stripe[%d] "DFID"\n", + i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid)); +} + union lmv_mds_md; void lmv_free_memmd(struct lmv_stripe_md *lsm); -int lmvea_load_shards(const struct lu_env *env, struct dt_object *obj, - struct lu_dirent *ent, struct lu_buf *buf, - bool resize); - static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst, const struct lmv_mds_md_v1 *lmv_src) { @@ -143,39 +192,297 @@ lmv_hash_fnv1a(unsigned int count, const char *name, int namelen) return do_div(hash, count); } -static inline int lmv_name_to_stripe_index(__u32 lmv_hash_type, - unsigned int stripe_count, - const char *name, int namelen) +/* + * Robert Jenkins' function for mixing 32-bit values + * http://burtleburtle.net/bob/hash/evahash.html + * a, b = random bits, c = input and output + * + * Mixing inputs to generate an evenly distributed hash. + */ +#define crush_hashmix(a, b, c) \ +do { \ + a = a - b; a = a - c; a = a ^ (c >> 13); \ + b = b - c; b = b - a; b = b ^ (a << 8); \ + c = c - a; c = c - b; c = c ^ (b >> 13); \ + a = a - b; a = a - c; a = a ^ (c >> 12); \ + b = b - c; b = b - a; b = b ^ (a << 16); \ + c = c - a; c = c - b; c = c ^ (b >> 5); \ + a = a - b; a = a - c; a = a ^ (c >> 3); \ + b = b - c; b = b - a; b = b ^ (a << 10); \ + c = c - a; c = c - b; c = c ^ (b >> 15); \ +} while (0) + +#define crush_hash_seed 1315423911 + +static inline __u32 crush_hash(__u32 a, __u32 b) { - int idx; + __u32 hash = crush_hash_seed ^ a ^ b; + __u32 x = 231232; + __u32 y = 1232; - LASSERT(namelen > 0); + crush_hashmix(a, b, hash); + crush_hashmix(x, a, hash); + crush_hashmix(b, y, hash); - if (stripe_count <= 1) - return 0; + return hash; +} - switch (lmv_hash_type & LMV_HASH_TYPE_MASK) { - case LMV_HASH_TYPE_ALL_CHARS: - idx = lmv_hash_all_chars(stripe_count, name, namelen); - break; - case LMV_HASH_TYPE_FNV_1A_64: - idx = lmv_hash_fnv1a(stripe_count, name, namelen); - break; - default: - idx = -EBADFD; - break; +/* refer to https://github.com/ceph/ceph/blob/master/src/crush/hash.c and + * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf for details of CRUSH + * algorithm. + */ +static inline unsigned int +lmv_hash_crush(unsigned int count, const char *name, int namelen) +{ + unsigned long long straw; + unsigned long long highest_straw = 0; + unsigned int pg_id; + unsigned int idx = 0; + int i; + + /* put temp and backup file on the same MDT where target is located. + * temporary file naming rule: + * 1. rsync: ..XXXXXX + * 2. dstripe: .XXXXXXXX + */ + if (lu_name_is_temp_file(name, namelen, true, 6)) { + name++; + namelen -= 8; + } else if (lu_name_is_temp_file(name, namelen, false, 8)) { + namelen -= 9; + } else if (lu_name_is_backup_file(name, namelen, &i)) { + LASSERT(i < namelen); + namelen -= i; } - CDEBUG(D_INFO, "name %.*s hash_type %#x idx %d/%u\n", namelen, name, - lmv_hash_type, idx, stripe_count); + pg_id = lmv_hash_fnv1a(LMV_CRUSH_PG_COUNT, name, namelen); + + /* distribute PG among all stripes pseudo-randomly, so they are almost + * evenly distributed, and when stripe count changes, only (delta / + * total) sub files need to be moved, herein 'delta' is added or removed + * stripe count, 'total' is total stripe count before change for + * removal, or count after change for addition. + */ + for (i = 0; i < count; i++) { + straw = crush_hash(pg_id, i); + if (straw > highest_straw) { + highest_straw = straw; + idx = i; + } + } + LASSERT(idx < count); return idx; } -static inline bool lmv_is_known_hash_type(__u32 type) +/* directory layout may change in three ways: + * 1. directory migration, in its LMV source stripes are appended after + * target stripes, \a migrate_hash is source hash type, \a migrate_offset is + * target stripe count, + * 2. directory split, \a migrate_hash is hash type before split, + * \a migrate_offset is stripe count before split. + * 3. directory merge, \a migrate_hash is hash type after merge, + * \a migrate_offset is stripe count after merge. + */ +static inline int +__lmv_name_to_stripe_index(__u32 hash_type, __u32 stripe_count, + __u32 migrate_hash, __u32 migrate_offset, + const char *name, int namelen, bool new_layout) +{ + __u32 saved_hash = hash_type; + __u32 saved_count = stripe_count; + int stripe_index = 0; + + LASSERT(namelen > 0); + LASSERT(stripe_count > 0); + + if (lmv_hash_is_splitting(hash_type)) { + if (!new_layout) { + hash_type = migrate_hash; + stripe_count = migrate_offset; + } + } else if (lmv_hash_is_merging(hash_type)) { + if (new_layout) { + hash_type = migrate_hash; + stripe_count = migrate_offset; + } + } else if (lmv_hash_is_migrating(hash_type)) { + if (new_layout) { + stripe_count = migrate_offset; + } else { + hash_type = migrate_hash; + stripe_count -= migrate_offset; + } + } + + if (stripe_count > 1) { + switch (hash_type & LMV_HASH_TYPE_MASK) { + case LMV_HASH_TYPE_ALL_CHARS: + stripe_index = lmv_hash_all_chars(stripe_count, name, + namelen); + break; + case LMV_HASH_TYPE_FNV_1A_64: + stripe_index = lmv_hash_fnv1a(stripe_count, name, + namelen); + break; + case LMV_HASH_TYPE_CRUSH: + stripe_index = lmv_hash_crush(stripe_count, name, + namelen); + break; + default: + return -EBADFD; + } + } + + LASSERT(stripe_index < stripe_count); + + if (!new_layout && lmv_hash_is_migrating(saved_hash)) + stripe_index += migrate_offset; + + LASSERT(stripe_index < saved_count); + + CDEBUG(D_INFO, "name %.*s hash=%#x/%#x idx=%d/%u/%u under %s layout\n", + namelen, name, saved_hash, migrate_hash, stripe_index, + saved_count, migrate_offset, new_layout ? "new" : "old"); + + return stripe_index; +} + +static inline int lmv_name_to_stripe_index(struct lmv_mds_md_v1 *lmv, + const char *name, int namelen) +{ + if (lmv->lmv_magic == LMV_MAGIC_V1) + return __lmv_name_to_stripe_index(lmv->lmv_hash_type, + lmv->lmv_stripe_count, + lmv->lmv_migrate_hash, + lmv->lmv_migrate_offset, + name, namelen, true); + + if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1)) + return __lmv_name_to_stripe_index( + le32_to_cpu(lmv->lmv_hash_type), + le32_to_cpu(lmv->lmv_stripe_count), + le32_to_cpu(lmv->lmv_migrate_hash), + le32_to_cpu(lmv->lmv_migrate_offset), + name, namelen, true); + + return -EINVAL; +} + +static inline int lmv_name_to_stripe_index_old(struct lmv_mds_md_v1 *lmv, + const char *name, int namelen) +{ + if (lmv->lmv_magic == LMV_MAGIC_V1 || + lmv->lmv_magic == LMV_MAGIC_STRIPE) + return __lmv_name_to_stripe_index(lmv->lmv_hash_type, + lmv->lmv_stripe_count, + lmv->lmv_migrate_hash, + lmv->lmv_migrate_offset, + name, namelen, false); + + if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1) || + lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_STRIPE)) + return __lmv_name_to_stripe_index( + le32_to_cpu(lmv->lmv_hash_type), + le32_to_cpu(lmv->lmv_stripe_count), + le32_to_cpu(lmv->lmv_migrate_hash), + le32_to_cpu(lmv->lmv_migrate_offset), + name, namelen, false); + + return -EINVAL; +} + +static inline bool lmv_user_magic_supported(__u32 lum_magic) +{ + return lum_magic == LMV_USER_MAGIC || + lum_magic == LMV_USER_MAGIC_SPECIFIC || + lum_magic == LMV_MAGIC_FOREIGN; +} + +/* master LMV is sane */ +static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv) +{ + if (!lmv) + return false; + + if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1) + goto insane; + + if (le32_to_cpu(lmv->lmv_stripe_count) == 0) + goto insane; + + if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type))) + goto insane; + + return true; +insane: + LMV_DEBUG(D_ERROR, lmv, "insane"); + return false; +} + +/* LMV can be either master or stripe LMV */ +static inline bool lmv_is_sane2(const struct lmv_mds_md_v1 *lmv) { - return (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_FNV_1A_64 || - (type & LMV_HASH_TYPE_MASK) == LMV_HASH_TYPE_ALL_CHARS; + if (!lmv) + return false; + + if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1 && + le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_STRIPE) + goto insane; + + if (le32_to_cpu(lmv->lmv_stripe_count) == 0) + goto insane; + + if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type))) + goto insane; + + return true; +insane: + LMV_DEBUG(D_ERROR, lmv, "insane"); + return false; +} + +static inline bool lmv_is_splitting(const struct lmv_mds_md_v1 *lmv) +{ + if (!lmv_is_sane2(lmv)) + return false; + + return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)); +} + +static inline bool lmv_is_merging(const struct lmv_mds_md_v1 *lmv) +{ + if (!lmv_is_sane2(lmv)) + return false; + + return lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)); +} + +static inline bool lmv_is_migrating(const struct lmv_mds_md_v1 *lmv) +{ + if (!lmv_is_sane(lmv)) + return false; + + return lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type)); +} + +static inline bool lmv_is_restriping(const struct lmv_mds_md_v1 *lmv) +{ + if (!lmv_is_sane2(lmv)) + return false; + + return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) || + lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)); +} + +static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv) +{ + if (!lmv_is_sane2(lmv)) + return false; + + return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) || + lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)) || + lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type)); } #endif