__u32 lsm_md_stripe_count;
__u32 lsm_md_master_mdt_index;
__u32 lsm_md_hash_type;
+ __u8 lsm_md_max_inherit;
+ __u8 lsm_md_max_inherit_rr;
__u32 lsm_md_layout_version;
__u32 lsm_md_migrate_offset;
__u32 lsm_md_migrate_hash;
return lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN;
}
-static inline bool lmv_dir_migrating(const struct lmv_stripe_md *lsm)
+static inline bool lmv_dir_layout_changing(const struct lmv_stripe_md *lsm)
{
return lmv_dir_striped(lsm) &&
- lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION;
+ lmv_hash_is_layout_changing(lsm->lsm_md_hash_type);
}
static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
if (!lmv_dir_striped(lsm))
return false;
- if (lmv_dir_migrating(lsm) &&
- lsm->lsm_md_stripe_count - lsm->lsm_md_migrate_offset <= 1)
- return false;
+ if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
+ return true;
return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
}
static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
{
+ bool valid_hash = lmv_dir_bad_hash(lsm);
int i;
/* If lsm_md_magic == LMV_MAGIC_FOREIGN pool_name may not be a null
* terminated string so only print LOV_MAXPOOLNAME bytes.
*/
CDEBUG(mask,
- "magic %#x stripe count %d master mdt %d hash type %#x version %d migrate offset %d migrate hash %#x pool %.*s\n",
+ "magic %#x stripe count %d master mdt %d hash type %s:%#x max inherit %hhu version %d migrate offset %d migrate hash %#x pool %.*s\n",
lsm->lsm_md_magic, lsm->lsm_md_stripe_count,
- lsm->lsm_md_master_mdt_index, lsm->lsm_md_hash_type,
- lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset,
- lsm->lsm_md_migrate_hash,
+ lsm->lsm_md_master_mdt_index,
+ valid_hash ? "invalid hash" :
+ mdt_hash_name[lsm->lsm_md_hash_type & (LMV_HASH_TYPE_MAX - 1)],
+ lsm->lsm_md_hash_type, lsm->lsm_md_max_inherit,
+ lsm->lsm_md_layout_version,
+ lsm->lsm_md_migrate_offset, lsm->lsm_md_migrate_hash,
LOV_MAXPOOLNAME, lsm->lsm_md_pool_name);
if (!lmv_dir_striped(lsm))
le32_to_cpu(lmv_src->lmv_master_mdt_index);
lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type);
lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version);
+ if (lmv_src->lmv_stripe_count > LMV_MAX_STRIPE_COUNT)
+ return;
for (i = 0; i < lmv_src->lmv_stripe_count; i++)
fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i],
&lmv_src->lmv_stripe_fids[i]);
return do_div(hash, count);
}
-static inline int lmv_name_to_stripe_index(__u32 lmv_hash_type,
- unsigned int stripe_count,
- const char *name, int namelen)
+/*
+ * Robert Jenkins' function for mixing 32-bit values
+ * http://burtleburtle.net/bob/hash/evahash.html
+ * a, b = random bits, c = input and output
+ *
+ * Mixing inputs to generate an evenly distributed hash.
+ */
+#define crush_hashmix(a, b, c) \
+do { \
+ a = a - b; a = a - c; a = a ^ (c >> 13); \
+ b = b - c; b = b - a; b = b ^ (a << 8); \
+ c = c - a; c = c - b; c = c ^ (b >> 13); \
+ a = a - b; a = a - c; a = a ^ (c >> 12); \
+ b = b - c; b = b - a; b = b ^ (a << 16); \
+ c = c - a; c = c - b; c = c ^ (b >> 5); \
+ a = a - b; a = a - c; a = a ^ (c >> 3); \
+ b = b - c; b = b - a; b = b ^ (a << 10); \
+ c = c - a; c = c - b; c = c ^ (b >> 15); \
+} while (0)
+
+#define crush_hash_seed 1315423911
+
+static inline __u32 crush_hash(__u32 a, __u32 b)
{
- int idx;
+ __u32 hash = crush_hash_seed ^ a ^ b;
+ __u32 x = 231232;
+ __u32 y = 1232;
- LASSERT(namelen > 0);
+ crush_hashmix(a, b, hash);
+ crush_hashmix(x, a, hash);
+ crush_hashmix(b, y, hash);
- if (stripe_count <= 1)
- return 0;
+ return hash;
+}
- switch (lmv_hash_type & LMV_HASH_TYPE_MASK) {
- case LMV_HASH_TYPE_ALL_CHARS:
- idx = lmv_hash_all_chars(stripe_count, name, namelen);
- break;
- case LMV_HASH_TYPE_FNV_1A_64:
- idx = lmv_hash_fnv1a(stripe_count, name, namelen);
- break;
- default:
- idx = -EBADFD;
- break;
+/* refer to https://github.com/ceph/ceph/blob/master/src/crush/hash.c and
+ * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf for details of CRUSH
+ * algorithm.
+ */
+static inline unsigned int
+lmv_hash_crush(unsigned int count, const char *name, int namelen)
+{
+ unsigned long long straw;
+ unsigned long long highest_straw = 0;
+ unsigned int pg_id;
+ unsigned int idx = 0;
+ int i;
+
+ /* put temp and backup file on the same MDT where target is located.
+ * temporary file naming rule:
+ * 1. rsync: .<target>.XXXXXX
+ * 2. dstripe: <target>.XXXXXXXX
+ */
+ if (lu_name_is_temp_file(name, namelen, true, 6)) {
+ name++;
+ namelen -= 8;
+ } else if (lu_name_is_temp_file(name, namelen, false, 8)) {
+ namelen -= 9;
+ } else if (lu_name_is_backup_file(name, namelen, &i)) {
+ LASSERT(i < namelen);
+ namelen -= i;
}
- CDEBUG(D_INFO, "name %.*s hash_type %#x idx %d/%u\n", namelen, name,
- lmv_hash_type, idx, stripe_count);
+ pg_id = lmv_hash_fnv1a(LMV_CRUSH_PG_COUNT, name, namelen);
+
+ /* distribute PG among all stripes pseudo-randomly, so they are almost
+ * evenly distributed, and when stripe count changes, only (delta /
+ * total) sub files need to be moved, herein 'delta' is added or removed
+ * stripe count, 'total' is total stripe count before change for
+ * removal, or count after change for addition.
+ */
+ for (i = 0; i < count; i++) {
+ straw = crush_hash(pg_id, i);
+ if (straw > highest_straw) {
+ highest_straw = straw;
+ idx = i;
+ }
+ }
+ LASSERT(idx < count);
return idx;
}
-static inline bool lmv_magic_supported(__u32 lum_magic)
+/* directory layout may change in three ways:
+ * 1. directory migration, in its LMV source stripes are appended after
+ * target stripes, \a migrate_hash is source hash type, \a migrate_offset is
+ * target stripe count,
+ * 2. directory split, \a migrate_hash is hash type before split,
+ * \a migrate_offset is stripe count before split.
+ * 3. directory merge, \a migrate_hash is hash type after merge,
+ * \a migrate_offset is stripe count after merge.
+ */
+static inline int
+__lmv_name_to_stripe_index(__u32 hash_type, __u32 stripe_count,
+ __u32 migrate_hash, __u32 migrate_offset,
+ const char *name, int namelen, bool new_layout)
+{
+ __u32 saved_hash = hash_type;
+ __u32 saved_count = stripe_count;
+ int stripe_index = 0;
+
+ LASSERT(namelen > 0);
+ LASSERT(stripe_count > 0);
+
+ if (lmv_hash_is_splitting(hash_type)) {
+ if (!new_layout) {
+ hash_type = migrate_hash;
+ stripe_count = migrate_offset;
+ }
+ } else if (lmv_hash_is_merging(hash_type)) {
+ if (new_layout) {
+ hash_type = migrate_hash;
+ stripe_count = migrate_offset;
+ }
+ } else if (lmv_hash_is_migrating(hash_type)) {
+ if (new_layout) {
+ stripe_count = migrate_offset;
+ } else {
+ hash_type = migrate_hash;
+ stripe_count -= migrate_offset;
+ }
+ }
+
+ if (stripe_count > 1) {
+ switch (hash_type & LMV_HASH_TYPE_MASK) {
+ case LMV_HASH_TYPE_ALL_CHARS:
+ stripe_index = lmv_hash_all_chars(stripe_count, name,
+ namelen);
+ break;
+ case LMV_HASH_TYPE_FNV_1A_64:
+ stripe_index = lmv_hash_fnv1a(stripe_count, name,
+ namelen);
+ break;
+ case LMV_HASH_TYPE_CRUSH:
+ stripe_index = lmv_hash_crush(stripe_count, name,
+ namelen);
+ break;
+ default:
+ return -EBADFD;
+ }
+ }
+
+ LASSERT(stripe_index < stripe_count);
+
+ if (!new_layout && lmv_hash_is_migrating(saved_hash))
+ stripe_index += migrate_offset;
+
+ LASSERT(stripe_index < saved_count);
+
+ CDEBUG(D_INFO, "name %.*s hash=%#x/%#x idx=%d/%u/%u under %s layout\n",
+ namelen, name, saved_hash, migrate_hash, stripe_index,
+ saved_count, migrate_offset, new_layout ? "new" : "old");
+
+ return stripe_index;
+}
+
+static inline int lmv_name_to_stripe_index(struct lmv_mds_md_v1 *lmv,
+ const char *name, int namelen)
+{
+ if (lmv->lmv_magic == LMV_MAGIC_V1)
+ return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
+ lmv->lmv_stripe_count,
+ lmv->lmv_migrate_hash,
+ lmv->lmv_migrate_offset,
+ name, namelen, true);
+
+ if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1))
+ return __lmv_name_to_stripe_index(
+ le32_to_cpu(lmv->lmv_hash_type),
+ le32_to_cpu(lmv->lmv_stripe_count),
+ le32_to_cpu(lmv->lmv_migrate_hash),
+ le32_to_cpu(lmv->lmv_migrate_offset),
+ name, namelen, true);
+
+ return -EINVAL;
+}
+
+static inline int lmv_name_to_stripe_index_old(struct lmv_mds_md_v1 *lmv,
+ const char *name, int namelen)
+{
+ if (lmv->lmv_magic == LMV_MAGIC_V1 ||
+ lmv->lmv_magic == LMV_MAGIC_STRIPE)
+ return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
+ lmv->lmv_stripe_count,
+ lmv->lmv_migrate_hash,
+ lmv->lmv_migrate_offset,
+ name, namelen, false);
+
+ if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1) ||
+ lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_STRIPE))
+ return __lmv_name_to_stripe_index(
+ le32_to_cpu(lmv->lmv_hash_type),
+ le32_to_cpu(lmv->lmv_stripe_count),
+ le32_to_cpu(lmv->lmv_migrate_hash),
+ le32_to_cpu(lmv->lmv_migrate_offset),
+ name, namelen, false);
+
+ return -EINVAL;
+}
+
+static inline bool lmv_user_magic_supported(__u32 lum_magic)
{
return lum_magic == LMV_USER_MAGIC ||
lum_magic == LMV_USER_MAGIC_SPECIFIC ||
lum_magic == LMV_MAGIC_FOREIGN;
}
+#define LMV_DEBUG(mask, lmv, msg) \
+ CDEBUG(mask, \
+ "%s LMV: magic=%#x count=%u index=%u hash=%s:%#x version=%u migrate offset=%u migrate hash=%s:%u.\n",\
+ msg, (lmv)->lmv_magic, (lmv)->lmv_stripe_count, \
+ (lmv)->lmv_master_mdt_index, \
+ mdt_hash_name[(lmv)->lmv_hash_type & (LMV_HASH_TYPE_MAX - 1)],\
+ (lmv)->lmv_hash_type, (lmv)->lmv_layout_version, \
+ (lmv)->lmv_migrate_offset, \
+ mdt_hash_name[(lmv)->lmv_migrate_hash & (LMV_HASH_TYPE_MAX - 1)],\
+ (lmv)->lmv_migrate_hash)
+
+/* master LMV is sane */
+static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
+{
+ if (!lmv)
+ return false;
+
+ if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
+ goto insane;
+
+ if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
+ goto insane;
+
+ if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
+ goto insane;
+
+ return true;
+insane:
+ LMV_DEBUG(D_ERROR, lmv, "insane");
+ return false;
+}
+
+/* LMV can be either master or stripe LMV */
+static inline bool lmv_is_sane2(const struct lmv_mds_md_v1 *lmv)
+{
+ if (!lmv)
+ return false;
+
+ if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1 &&
+ le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_STRIPE)
+ goto insane;
+
+ if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
+ goto insane;
+
+ if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
+ goto insane;
+
+ return true;
+insane:
+ LMV_DEBUG(D_ERROR, lmv, "insane");
+ return false;
+}
+
+static inline bool lmv_is_splitting(const struct lmv_mds_md_v1 *lmv)
+{
+ if (!lmv_is_sane2(lmv))
+ return false;
+
+ return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type));
+}
+
+static inline bool lmv_is_merging(const struct lmv_mds_md_v1 *lmv)
+{
+ if (!lmv_is_sane2(lmv))
+ return false;
+
+ return lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
+}
+
+static inline bool lmv_is_migrating(const struct lmv_mds_md_v1 *lmv)
+{
+ if (!lmv_is_sane(lmv))
+ return false;
+
+ return lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
+}
+
+static inline bool lmv_is_restriping(const struct lmv_mds_md_v1 *lmv)
+{
+ if (!lmv_is_sane2(lmv))
+ return false;
+
+ return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
+ lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
+}
+
+static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv)
+{
+ if (!lmv_is_sane2(lmv))
+ return false;
+
+ return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
+ lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)) ||
+ lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
+}
+
+static inline bool lmv_is_fixed(const struct lmv_mds_md_v1 *lmv)
+{
+ return cpu_to_le32(lmv->lmv_hash_type) & LMV_HASH_FLAG_FIXED;
+}
+
#endif