4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2014, 2016, Intel Corporation.
26 * lustre/include/lustre_lmv.h
28 * Lustre LMV structures and functions.
30 * Author: Di Wang <di.wang@intel.com>
35 #include <uapi/linux/lustre/lustre_idl.h>
38 struct lu_fid lmo_fid;
40 struct inode *lmo_root;
43 struct lmv_stripe_md {
45 __u32 lsm_md_stripe_count;
46 __u32 lsm_md_master_mdt_index;
47 __u32 lsm_md_hash_type;
48 __u8 lsm_md_max_inherit;
49 __u8 lsm_md_max_inherit_rr;
50 __u32 lsm_md_layout_version;
51 __u32 lsm_md_migrate_offset;
52 __u32 lsm_md_migrate_hash;
53 __u32 lsm_md_default_count;
54 __u32 lsm_md_default_index;
55 char lsm_md_pool_name[LOV_MAXPOOLNAME + 1];
56 struct lmv_oinfo lsm_md_oinfo[0];
59 static inline bool lmv_dir_striped(const struct lmv_stripe_md *lsm)
61 return lsm && lsm->lsm_md_magic == LMV_MAGIC;
64 static inline bool lmv_dir_foreign(const struct lmv_stripe_md *lsm)
66 return lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN;
69 static inline bool lmv_dir_layout_changing(const struct lmv_stripe_md *lsm)
71 return lmv_dir_striped(lsm) &&
72 lmv_hash_is_layout_changing(lsm->lsm_md_hash_type);
75 static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
77 if (!lmv_dir_striped(lsm))
80 if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
83 return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
87 lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
91 if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
92 lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count ||
93 lsm1->lsm_md_master_mdt_index !=
94 lsm2->lsm_md_master_mdt_index ||
95 lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type ||
96 lsm1->lsm_md_max_inherit != lsm2->lsm_md_max_inherit ||
97 lsm1->lsm_md_max_inherit_rr != lsm2->lsm_md_max_inherit_rr ||
98 lsm1->lsm_md_layout_version !=
99 lsm2->lsm_md_layout_version ||
100 lsm1->lsm_md_migrate_offset !=
101 lsm2->lsm_md_migrate_offset ||
102 lsm1->lsm_md_migrate_hash !=
103 lsm2->lsm_md_migrate_hash ||
104 strncmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name,
105 sizeof(lsm1->lsm_md_pool_name)) != 0)
108 if (lmv_dir_striped(lsm1)) {
109 for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
110 if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
111 &lsm2->lsm_md_oinfo[idx].lmo_fid))
114 } else if (lsm1->lsm_md_magic == LMV_USER_MAGIC_SPECIFIC) {
115 for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
116 if (lsm1->lsm_md_oinfo[idx].lmo_mds !=
117 lsm2->lsm_md_oinfo[idx].lmo_mds)
125 static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
127 bool valid_hash = lmv_dir_bad_hash(lsm);
130 /* If lsm_md_magic == LMV_MAGIC_FOREIGN pool_name may not be a null
131 * terminated string so only print LOV_MAXPOOLNAME bytes.
134 "magic %#x stripe count %d master mdt %d hash type %s:%#x max-inherit %hhu max-inherit-rr %hhu version %d migrate offset %d migrate hash %#x pool %.*s\n",
135 lsm->lsm_md_magic, lsm->lsm_md_stripe_count,
136 lsm->lsm_md_master_mdt_index,
137 valid_hash ? "invalid hash" :
138 mdt_hash_name[lsm->lsm_md_hash_type & (LMV_HASH_TYPE_MAX - 1)],
139 lsm->lsm_md_hash_type, lsm->lsm_md_max_inherit,
140 lsm->lsm_md_max_inherit_rr, lsm->lsm_md_layout_version,
141 lsm->lsm_md_migrate_offset, lsm->lsm_md_migrate_hash,
142 LOV_MAXPOOLNAME, lsm->lsm_md_pool_name);
144 if (!lmv_dir_striped(lsm))
147 for (i = 0; i < lsm->lsm_md_stripe_count; i++)
148 CDEBUG(mask, "stripe[%d] "DFID"\n",
149 i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
154 void lmv_free_memmd(struct lmv_stripe_md *lsm);
156 static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
157 const struct lmv_mds_md_v1 *lmv_src)
161 lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic);
162 lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count);
163 lmv_dst->lmv_master_mdt_index =
164 le32_to_cpu(lmv_src->lmv_master_mdt_index);
165 lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type);
166 lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version);
167 if (lmv_src->lmv_stripe_count > LMV_MAX_STRIPE_COUNT)
169 for (i = 0; i < lmv_src->lmv_stripe_count; i++)
170 fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i],
171 &lmv_src->lmv_stripe_fids[i]);
174 static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst,
175 const union lmv_mds_md *lmv_src)
177 switch (le32_to_cpu(lmv_src->lmv_magic)) {
179 lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1);
186 /* This hash is only for testing purpose */
187 static inline unsigned int
188 lmv_hash_all_chars(unsigned int count, const char *name, int namelen)
191 const unsigned char *p = (const unsigned char *)name;
193 while (--namelen >= 0)
201 static inline unsigned int
202 lmv_hash_fnv1a(unsigned int count, const char *name, int namelen)
206 hash = lustre_hash_fnv_1a_64(name, namelen);
208 return do_div(hash, count);
212 * Robert Jenkins' function for mixing 32-bit values
213 * http://burtleburtle.net/bob/hash/evahash.html
214 * a, b = random bits, c = input and output
216 * Mixing inputs to generate an evenly distributed hash.
218 #define crush_hashmix(a, b, c) \
220 a = a - b; a = a - c; a = a ^ (c >> 13); \
221 b = b - c; b = b - a; b = b ^ (a << 8); \
222 c = c - a; c = c - b; c = c ^ (b >> 13); \
223 a = a - b; a = a - c; a = a ^ (c >> 12); \
224 b = b - c; b = b - a; b = b ^ (a << 16); \
225 c = c - a; c = c - b; c = c ^ (b >> 5); \
226 a = a - b; a = a - c; a = a ^ (c >> 3); \
227 b = b - c; b = b - a; b = b ^ (a << 10); \
228 c = c - a; c = c - b; c = c ^ (b >> 15); \
231 #define crush_hash_seed 1315423911
233 static inline __u32 crush_hash(__u32 a, __u32 b)
235 __u32 hash = crush_hash_seed ^ a ^ b;
239 crush_hashmix(a, b, hash);
240 crush_hashmix(x, a, hash);
241 crush_hashmix(b, y, hash);
246 /* refer to https://github.com/ceph/ceph/blob/master/src/crush/hash.c and
247 * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf for details of CRUSH
250 static inline unsigned int
251 lmv_hash_crush(unsigned int count, const char *name, int namelen)
253 unsigned long long straw;
254 unsigned long long highest_straw = 0;
256 unsigned int idx = 0;
259 /* put temp and backup file on the same MDT where target is located.
260 * temporary file naming rule:
261 * 1. rsync: .<target>.XXXXXX
262 * 2. dstripe: <target>.XXXXXXXX
264 if (lu_name_is_temp_file(name, namelen, true, 6)) {
267 } else if (lu_name_is_temp_file(name, namelen, false, 8)) {
269 } else if (lu_name_is_backup_file(name, namelen, &i)) {
270 LASSERT(i < namelen);
274 pg_id = lmv_hash_fnv1a(LMV_CRUSH_PG_COUNT, name, namelen);
276 /* distribute PG among all stripes pseudo-randomly, so they are almost
277 * evenly distributed, and when stripe count changes, only (delta /
278 * total) sub files need to be moved, herein 'delta' is added or removed
279 * stripe count, 'total' is total stripe count before change for
280 * removal, or count after change for addition.
282 for (i = 0; i < count; i++) {
283 straw = crush_hash(pg_id, i);
284 if (straw > highest_straw) {
285 highest_straw = straw;
289 LASSERT(idx < count);
294 /* directory layout may change in three ways:
295 * 1. directory migration, in its LMV source stripes are appended after
296 * target stripes, \a migrate_hash is source hash type, \a migrate_offset is
297 * target stripe count,
298 * 2. directory split, \a migrate_hash is hash type before split,
299 * \a migrate_offset is stripe count before split.
300 * 3. directory merge, \a migrate_hash is hash type after merge,
301 * \a migrate_offset is stripe count after merge.
304 __lmv_name_to_stripe_index(__u32 hash_type, __u32 stripe_count,
305 __u32 migrate_hash, __u32 migrate_offset,
306 const char *name, int namelen, bool new_layout)
308 __u32 saved_hash = hash_type;
309 __u32 saved_count = stripe_count;
310 int stripe_index = 0;
312 LASSERT(namelen > 0);
313 LASSERT(stripe_count > 0);
315 if (lmv_hash_is_splitting(hash_type)) {
317 hash_type = migrate_hash;
318 stripe_count = migrate_offset;
320 } else if (lmv_hash_is_merging(hash_type)) {
322 hash_type = migrate_hash;
323 stripe_count = migrate_offset;
325 } else if (lmv_hash_is_migrating(hash_type)) {
327 stripe_count = migrate_offset;
329 hash_type = migrate_hash;
330 stripe_count -= migrate_offset;
334 if (stripe_count > 1) {
335 switch (hash_type & LMV_HASH_TYPE_MASK) {
336 case LMV_HASH_TYPE_ALL_CHARS:
337 stripe_index = lmv_hash_all_chars(stripe_count, name,
340 case LMV_HASH_TYPE_FNV_1A_64:
341 stripe_index = lmv_hash_fnv1a(stripe_count, name,
344 case LMV_HASH_TYPE_CRUSH:
345 stripe_index = lmv_hash_crush(stripe_count, name,
353 LASSERT(stripe_index < stripe_count);
355 if (!new_layout && lmv_hash_is_migrating(saved_hash))
356 stripe_index += migrate_offset;
358 LASSERT(stripe_index < saved_count);
360 CDEBUG(D_INFO, "name %.*s hash=%#x/%#x idx=%d/%u/%u under %s layout\n",
361 namelen, name, saved_hash, migrate_hash, stripe_index,
362 saved_count, migrate_offset, new_layout ? "new" : "old");
367 static inline int lmv_name_to_stripe_index(struct lmv_mds_md_v1 *lmv,
368 const char *name, int namelen)
370 if (lmv->lmv_magic == LMV_MAGIC_V1)
371 return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
372 lmv->lmv_stripe_count,
373 lmv->lmv_migrate_hash,
374 lmv->lmv_migrate_offset,
375 name, namelen, true);
377 if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1))
378 return __lmv_name_to_stripe_index(
379 le32_to_cpu(lmv->lmv_hash_type),
380 le32_to_cpu(lmv->lmv_stripe_count),
381 le32_to_cpu(lmv->lmv_migrate_hash),
382 le32_to_cpu(lmv->lmv_migrate_offset),
383 name, namelen, true);
388 static inline int lmv_name_to_stripe_index_old(struct lmv_mds_md_v1 *lmv,
389 const char *name, int namelen)
391 if (lmv->lmv_magic == LMV_MAGIC_V1 ||
392 lmv->lmv_magic == LMV_MAGIC_STRIPE)
393 return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
394 lmv->lmv_stripe_count,
395 lmv->lmv_migrate_hash,
396 lmv->lmv_migrate_offset,
397 name, namelen, false);
399 if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1) ||
400 lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_STRIPE))
401 return __lmv_name_to_stripe_index(
402 le32_to_cpu(lmv->lmv_hash_type),
403 le32_to_cpu(lmv->lmv_stripe_count),
404 le32_to_cpu(lmv->lmv_migrate_hash),
405 le32_to_cpu(lmv->lmv_migrate_offset),
406 name, namelen, false);
411 static inline bool lmv_user_magic_supported(__u32 lum_magic)
413 return lum_magic == LMV_USER_MAGIC ||
414 lum_magic == LMV_USER_MAGIC_SPECIFIC ||
415 lum_magic == LMV_MAGIC_FOREIGN;
418 #define LMV_DEBUG(mask, lmv, msg) \
420 "%s LMV: magic=%#x count=%u index=%u hash=%s:%#x version=%u migrate offset=%u migrate hash=%s:%u.\n",\
421 msg, (lmv)->lmv_magic, (lmv)->lmv_stripe_count, \
422 (lmv)->lmv_master_mdt_index, \
423 mdt_hash_name[(lmv)->lmv_hash_type & (LMV_HASH_TYPE_MAX - 1)],\
424 (lmv)->lmv_hash_type, (lmv)->lmv_layout_version, \
425 (lmv)->lmv_migrate_offset, \
426 mdt_hash_name[(lmv)->lmv_migrate_hash & (LMV_HASH_TYPE_MAX - 1)],\
427 (lmv)->lmv_migrate_hash)
429 /* master LMV is sane */
430 static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
435 if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
438 if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
441 if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
446 LMV_DEBUG(D_ERROR, lmv, "insane");
450 /* LMV can be either master or stripe LMV */
451 static inline bool lmv_is_sane2(const struct lmv_mds_md_v1 *lmv)
456 if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1 &&
457 le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_STRIPE)
460 if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
463 if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
468 LMV_DEBUG(D_ERROR, lmv, "insane");
472 static inline bool lmv_is_splitting(const struct lmv_mds_md_v1 *lmv)
474 if (!lmv_is_sane2(lmv))
477 return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type));
480 static inline bool lmv_is_merging(const struct lmv_mds_md_v1 *lmv)
482 if (!lmv_is_sane2(lmv))
485 return lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
488 static inline bool lmv_is_migrating(const struct lmv_mds_md_v1 *lmv)
490 if (!lmv_is_sane(lmv))
493 return lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
496 static inline bool lmv_is_restriping(const struct lmv_mds_md_v1 *lmv)
498 if (!lmv_is_sane2(lmv))
501 return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
502 lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
505 static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv)
507 if (!lmv_is_sane2(lmv))
510 return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
511 lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)) ||
512 lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
515 static inline bool lmv_is_fixed(const struct lmv_mds_md_v1 *lmv)
517 return cpu_to_le32(lmv->lmv_hash_type) & LMV_HASH_FLAG_FIXED;