Whamcloud - gitweb
LU-15850 llite: pass dmv inherit depth instead of dir depth
[fs/lustre-release.git] / lustre / include / lustre_lmv.h
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2014, 2016, Intel Corporation.
24  */
25 /*
26  * lustre/include/lustre_lmv.h
27  *
28  * Lustre LMV structures and functions.
29  *
30  * Author: Di Wang <di.wang@intel.com>
31  */
32
33 #ifndef _LUSTRE_LMV_H
34 #define _LUSTRE_LMV_H
35 #include <uapi/linux/lustre/lustre_idl.h>
36
37 struct lmv_oinfo {
38         struct lu_fid   lmo_fid;
39         u32             lmo_mds;
40         struct inode    *lmo_root;
41 };
42
43 struct lmv_stripe_md {
44         __u32   lsm_md_magic;
45         __u32   lsm_md_stripe_count;
46         __u32   lsm_md_master_mdt_index;
47         __u32   lsm_md_hash_type;
48         __u8    lsm_md_max_inherit;
49         __u8    lsm_md_max_inherit_rr;
50         __u32   lsm_md_layout_version;
51         __u32   lsm_md_migrate_offset;
52         __u32   lsm_md_migrate_hash;
53         char    lsm_md_pool_name[LOV_MAXPOOLNAME + 1];
54         struct lmv_oinfo lsm_md_oinfo[0];
55 };
56
57 static inline bool lmv_dir_striped(const struct lmv_stripe_md *lsm)
58 {
59         return lsm && lsm->lsm_md_magic == LMV_MAGIC;
60 }
61
62 static inline bool lmv_dir_foreign(const struct lmv_stripe_md *lsm)
63 {
64         return lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN;
65 }
66
67 static inline bool lmv_dir_layout_changing(const struct lmv_stripe_md *lsm)
68 {
69         return lmv_dir_striped(lsm) &&
70                lmv_hash_is_layout_changing(lsm->lsm_md_hash_type);
71 }
72
73 static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
74 {
75         if (!lmv_dir_striped(lsm))
76                 return false;
77
78         if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
79                 return true;
80
81         return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
82 }
83
84 static inline bool
85 lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
86 {
87         __u32 idx;
88
89         if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
90             lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count ||
91             lsm1->lsm_md_master_mdt_index !=
92                                 lsm2->lsm_md_master_mdt_index ||
93             lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type ||
94             lsm1->lsm_md_max_inherit != lsm2->lsm_md_max_inherit ||
95             lsm1->lsm_md_max_inherit_rr != lsm2->lsm_md_max_inherit_rr ||
96             lsm1->lsm_md_layout_version !=
97                                 lsm2->lsm_md_layout_version ||
98             lsm1->lsm_md_migrate_offset !=
99                                 lsm2->lsm_md_migrate_offset ||
100             lsm1->lsm_md_migrate_hash !=
101                                 lsm2->lsm_md_migrate_hash ||
102             strncmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name,
103                     sizeof(lsm1->lsm_md_pool_name)) != 0)
104                 return false;
105
106         if (lmv_dir_striped(lsm1)) {
107                 for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
108                         if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
109                                        &lsm2->lsm_md_oinfo[idx].lmo_fid))
110                                 return false;
111                 }
112         } else if (lsm1->lsm_md_magic == LMV_USER_MAGIC_SPECIFIC) {
113                 for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
114                         if (lsm1->lsm_md_oinfo[idx].lmo_mds !=
115                             lsm2->lsm_md_oinfo[idx].lmo_mds)
116                                 return false;
117                 }
118         }
119
120         return true;
121 }
122
123 static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
124 {
125         int i;
126
127         CDEBUG_LIMIT(mask,
128                "dump LMV: magic=%#x count=%u index=%u hash=%s:%#x max_inherit=%hhu max_inherit_rr=%hhu version=%u migrate_offset=%u migrate_hash=%s:%x pool=%.*s\n",
129                lsm->lsm_md_magic, lsm->lsm_md_stripe_count,
130                lsm->lsm_md_master_mdt_index,
131                lmv_is_known_hash_type(lsm->lsm_md_hash_type) ?
132                 mdt_hash_name[lsm->lsm_md_hash_type & LMV_HASH_TYPE_MASK] :
133                 "invalid", lsm->lsm_md_hash_type,
134                lsm->lsm_md_max_inherit, lsm->lsm_md_max_inherit_rr,
135                lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset,
136                lmv_is_known_hash_type(lsm->lsm_md_migrate_hash) ?
137                 mdt_hash_name[lsm->lsm_md_migrate_hash & LMV_HASH_TYPE_MASK] :
138                 "invalid", lsm->lsm_md_migrate_hash,
139                LOV_MAXPOOLNAME, lsm->lsm_md_pool_name);
140
141         if (!lmv_dir_striped(lsm))
142                 return;
143
144         for (i = 0; i < lsm->lsm_md_stripe_count; i++)
145                 CDEBUG(mask, "stripe[%d] "DFID"\n",
146                        i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
147 }
148
149 union lmv_mds_md;
150
151 void lmv_free_memmd(struct lmv_stripe_md *lsm);
152
153 static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
154                                   const struct lmv_mds_md_v1 *lmv_src)
155 {
156         __u32 i;
157
158         lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic);
159         lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count);
160         lmv_dst->lmv_master_mdt_index =
161                                 le32_to_cpu(lmv_src->lmv_master_mdt_index);
162         lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type);
163         lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version);
164         if (lmv_src->lmv_stripe_count > LMV_MAX_STRIPE_COUNT)
165                 return;
166         for (i = 0; i < lmv_src->lmv_stripe_count; i++)
167                 fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i],
168                               &lmv_src->lmv_stripe_fids[i]);
169 }
170
171 static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst,
172                                  const union lmv_mds_md *lmv_src)
173 {
174         switch (le32_to_cpu(lmv_src->lmv_magic)) {
175         case LMV_MAGIC_V1:
176                 lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1);
177                 break;
178         default:
179                 break;
180         }
181 }
182
183 /* This hash is only for testing purpose */
184 static inline unsigned int
185 lmv_hash_all_chars(unsigned int count, const char *name, int namelen)
186 {
187         unsigned int c = 0;
188         const unsigned char *p = (const unsigned char *)name;
189
190         while (--namelen >= 0)
191                 c += p[namelen];
192
193         c = c % count;
194
195         return c;
196 }
197
198 static inline unsigned int
199 lmv_hash_fnv1a(unsigned int count, const char *name, int namelen)
200 {
201         __u64 hash;
202
203         hash = lustre_hash_fnv_1a_64(name, namelen);
204
205         return do_div(hash, count);
206 }
207
208 /*
209  * Robert Jenkins' function for mixing 32-bit values
210  * http://burtleburtle.net/bob/hash/evahash.html
211  * a, b = random bits, c = input and output
212  *
213  * Mixing inputs to generate an evenly distributed hash.
214  */
215 #define crush_hashmix(a, b, c)                          \
216 do {                                                    \
217         a = a - b;  a = a - c;  a = a ^ (c >> 13);      \
218         b = b - c;  b = b - a;  b = b ^ (a << 8);       \
219         c = c - a;  c = c - b;  c = c ^ (b >> 13);      \
220         a = a - b;  a = a - c;  a = a ^ (c >> 12);      \
221         b = b - c;  b = b - a;  b = b ^ (a << 16);      \
222         c = c - a;  c = c - b;  c = c ^ (b >> 5);       \
223         a = a - b;  a = a - c;  a = a ^ (c >> 3);       \
224         b = b - c;  b = b - a;  b = b ^ (a << 10);      \
225         c = c - a;  c = c - b;  c = c ^ (b >> 15);      \
226 } while (0)
227
228 #define crush_hash_seed 1315423911
229
230 static inline __u32 crush_hash(__u32 a, __u32 b)
231 {
232         __u32 hash = crush_hash_seed ^ a ^ b;
233         __u32 x = 231232;
234         __u32 y = 1232;
235
236         crush_hashmix(a, b, hash);
237         crush_hashmix(x, a, hash);
238         crush_hashmix(b, y, hash);
239
240         return hash;
241 }
242
243 /* refer to https://github.com/ceph/ceph/blob/master/src/crush/hash.c and
244  * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf for details of CRUSH
245  * algorithm.
246  */
247 static inline unsigned int
248 lmv_hash_crush(unsigned int count, const char *name, int namelen, bool crush2)
249 {
250         unsigned long long straw;
251         unsigned long long highest_straw = 0;
252         unsigned int pg_id;
253         unsigned int idx = 0;
254         int i;
255
256         /* put temp and backup file on the same MDT where target is located.
257          * temporary file naming rule:
258          * 1. rsync: .<target>.XXXXXX
259          * 2. dstripe: <target>.XXXXXXXX
260          */
261         if (lu_name_is_temp_file(name, namelen, true, 6, crush2)) {
262                 name++;
263                 namelen -= 8;
264         } else if (lu_name_is_temp_file(name, namelen, false, 8, crush2)) {
265                 namelen -= 9;
266         } else if (lu_name_is_backup_file(name, namelen, &i)) {
267                 LASSERT(i < namelen);
268                 namelen -= i;
269         }
270
271         pg_id = lmv_hash_fnv1a(LMV_CRUSH_PG_COUNT, name, namelen);
272
273         /* distribute PG among all stripes pseudo-randomly, so they are almost
274          * evenly distributed, and when stripe count changes, only (delta /
275          * total) sub files need to be moved, herein 'delta' is added or removed
276          * stripe count, 'total' is total stripe count before change for
277          * removal, or count after change for addition.
278          */
279         for (i = 0; i < count; i++) {
280                 straw = crush_hash(pg_id, i);
281                 if (straw > highest_straw) {
282                         highest_straw = straw;
283                         idx = i;
284                 }
285         }
286         LASSERT(idx < count);
287
288         return idx;
289 }
290
291 /* directory layout may change in three ways:
292  * 1. directory migration, in its LMV source stripes are appended after
293  *    target stripes, \a migrate_hash is source hash type, \a migrate_offset is
294  *    target stripe count,
295  * 2. directory split, \a migrate_hash is hash type before split,
296  *    \a migrate_offset is stripe count before split.
297  * 3. directory merge, \a migrate_hash is hash type after merge,
298  *    \a migrate_offset is stripe count after merge.
299  */
300 static inline int
301 __lmv_name_to_stripe_index(__u32 hash_type, __u32 stripe_count,
302                            __u32 migrate_hash, __u32 migrate_offset,
303                            const char *name, int namelen, bool new_layout)
304 {
305         __u32 saved_hash = hash_type;
306         __u32 saved_count = stripe_count;
307         int stripe_index = 0;
308
309         LASSERT(namelen > 0);
310         LASSERT(stripe_count > 0);
311
312         if (lmv_hash_is_splitting(hash_type)) {
313                 if (!new_layout) {
314                         hash_type = migrate_hash;
315                         stripe_count = migrate_offset;
316                 }
317         } else if (lmv_hash_is_merging(hash_type)) {
318                 if (new_layout) {
319                         hash_type = migrate_hash;
320                         stripe_count = migrate_offset;
321                 }
322         } else if (lmv_hash_is_migrating(hash_type)) {
323                 if (new_layout) {
324                         stripe_count = migrate_offset;
325                 } else {
326                         hash_type = migrate_hash;
327                         stripe_count -= migrate_offset;
328                 }
329         }
330
331         if (stripe_count > 1) {
332                 switch (hash_type & LMV_HASH_TYPE_MASK) {
333                 case LMV_HASH_TYPE_ALL_CHARS:
334                         stripe_index = lmv_hash_all_chars(stripe_count, name,
335                                                           namelen);
336                         break;
337                 case LMV_HASH_TYPE_FNV_1A_64:
338                         stripe_index = lmv_hash_fnv1a(stripe_count, name,
339                                                       namelen);
340                         break;
341                 case LMV_HASH_TYPE_CRUSH:
342                         stripe_index = lmv_hash_crush(stripe_count, name,
343                                                       namelen, false);
344                         break;
345                 case LMV_HASH_TYPE_CRUSH2:
346                         stripe_index = lmv_hash_crush(stripe_count, name,
347                                                       namelen, true);
348                         break;
349                 default:
350                         return -EBADFD;
351                 }
352         }
353
354         LASSERT(stripe_index < stripe_count);
355
356         if (!new_layout && lmv_hash_is_migrating(saved_hash))
357                 stripe_index += migrate_offset;
358
359         LASSERT(stripe_index < saved_count);
360
361         CDEBUG(D_INFO, "name %.*s hash=%#x/%#x idx=%d/%u/%u under %s layout\n",
362                namelen, name, saved_hash, migrate_hash, stripe_index,
363                saved_count, migrate_offset, new_layout ? "new" : "old");
364
365         return stripe_index;
366 }
367
368 static inline int lmv_name_to_stripe_index(struct lmv_mds_md_v1 *lmv,
369                                            const char *name, int namelen)
370 {
371         if (lmv->lmv_magic == LMV_MAGIC_V1 ||
372             lmv->lmv_magic == LMV_MAGIC_STRIPE)
373                 return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
374                                                   lmv->lmv_stripe_count,
375                                                   lmv->lmv_migrate_hash,
376                                                   lmv->lmv_migrate_offset,
377                                                   name, namelen, true);
378
379         if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1) ||
380             lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_STRIPE))
381                 return __lmv_name_to_stripe_index(
382                                         le32_to_cpu(lmv->lmv_hash_type),
383                                         le32_to_cpu(lmv->lmv_stripe_count),
384                                         le32_to_cpu(lmv->lmv_migrate_hash),
385                                         le32_to_cpu(lmv->lmv_migrate_offset),
386                                         name, namelen, true);
387
388         return -EINVAL;
389 }
390
391 static inline int lmv_name_to_stripe_index_old(struct lmv_mds_md_v1 *lmv,
392                                                const char *name, int namelen)
393 {
394         if (lmv->lmv_magic == LMV_MAGIC_V1 ||
395             lmv->lmv_magic == LMV_MAGIC_STRIPE)
396                 return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
397                                                   lmv->lmv_stripe_count,
398                                                   lmv->lmv_migrate_hash,
399                                                   lmv->lmv_migrate_offset,
400                                                   name, namelen, false);
401
402         if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1) ||
403             lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_STRIPE))
404                 return __lmv_name_to_stripe_index(
405                                         le32_to_cpu(lmv->lmv_hash_type),
406                                         le32_to_cpu(lmv->lmv_stripe_count),
407                                         le32_to_cpu(lmv->lmv_migrate_hash),
408                                         le32_to_cpu(lmv->lmv_migrate_offset),
409                                         name, namelen, false);
410
411         return -EINVAL;
412 }
413
414 static inline bool lmv_user_magic_supported(__u32 lum_magic)
415 {
416         return lum_magic == LMV_USER_MAGIC ||
417                lum_magic == LMV_USER_MAGIC_SPECIFIC ||
418                lum_magic == LMV_MAGIC_FOREIGN;
419 }
420
421 #define LMV_DEBUG(mask, lmv, msg)                                             \
422         CDEBUG_LIMIT(mask,                                                    \
423                "%s LMV: magic=%#x count=%u index=%u hash=%s:%#x version=%u migrate_offset=%u migrate_hash=%s:%x pool=%.*s\n",\
424                msg, (lmv)->lmv_magic, (lmv)->lmv_stripe_count,                \
425                (lmv)->lmv_master_mdt_index,                                   \
426                lmv_is_known_hash_type((lmv)->lmv_hash_type) ?                 \
427                 mdt_hash_name[(lmv)->lmv_hash_type & LMV_HASH_TYPE_MASK] :    \
428                 "invalid", (lmv)->lmv_hash_type,                              \
429                (lmv)->lmv_layout_version, (lmv)->lmv_migrate_offset,          \
430                lmv_is_known_hash_type((lmv)->lmv_migrate_hash) ?              \
431                 mdt_hash_name[(lmv)->lmv_migrate_hash & LMV_HASH_TYPE_MASK] : \
432                 "invalid", (lmv)->lmv_migrate_hash,                           \
433                LOV_MAXPOOLNAME, lmv->lmv_pool_name)
434
435 /* master LMV is sane */
436 static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
437 {
438         if (!lmv)
439                 return false;
440
441         if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
442                 goto insane;
443
444         if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
445                 goto insane;
446
447         if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
448                 goto insane;
449
450         return true;
451 insane:
452         LMV_DEBUG(D_ERROR, lmv, "unknown layout");
453         return false;
454 }
455
456 /* LMV can be either master or stripe LMV */
457 static inline bool lmv_is_sane2(const struct lmv_mds_md_v1 *lmv)
458 {
459         if (!lmv)
460                 return false;
461
462         if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1 &&
463             le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_STRIPE)
464                 goto insane;
465
466         if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
467                 goto insane;
468
469         if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
470                 goto insane;
471
472         return true;
473 insane:
474         LMV_DEBUG(D_ERROR, lmv, "unknown layout");
475         return false;
476 }
477
478 static inline bool lmv_is_splitting(const struct lmv_mds_md_v1 *lmv)
479 {
480         if (!lmv_is_sane2(lmv))
481                 return false;
482
483         return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type));
484 }
485
486 static inline bool lmv_is_merging(const struct lmv_mds_md_v1 *lmv)
487 {
488         if (!lmv_is_sane2(lmv))
489                 return false;
490
491         return lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
492 }
493
494 static inline bool lmv_is_migrating(const struct lmv_mds_md_v1 *lmv)
495 {
496         if (!lmv_is_sane(lmv))
497                 return false;
498
499         return lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
500 }
501
502 static inline bool lmv_is_restriping(const struct lmv_mds_md_v1 *lmv)
503 {
504         if (!lmv_is_sane2(lmv))
505                 return false;
506
507         return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
508                lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
509 }
510
511 static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv)
512 {
513         if (!lmv_is_sane2(lmv))
514                 return false;
515
516         return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
517                lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)) ||
518                lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
519 }
520
521 static inline bool lmv_is_fixed(const struct lmv_mds_md_v1 *lmv)
522 {
523         return cpu_to_le32(lmv->lmv_hash_type) & LMV_HASH_FLAG_FIXED;
524 }
525
526 static inline __u8 lmv_inherit_next(__u8 inherit)
527 {
528         if (inherit == LMV_INHERIT_END || inherit == LMV_INHERIT_NONE)
529                 return LMV_INHERIT_NONE;
530
531         if (inherit == LMV_INHERIT_UNLIMITED || inherit > LMV_INHERIT_MAX)
532                 return inherit;
533
534         return inherit - 1;
535 }
536
537 static inline __u8 lmv_inherit_rr_next(__u8 inherit_rr)
538 {
539         if (inherit_rr == LMV_INHERIT_RR_NONE ||
540             inherit_rr == LMV_INHERIT_RR_UNLIMITED ||
541             inherit_rr > LMV_INHERIT_RR_MAX)
542                 return inherit_rr;
543
544         return inherit_rr - 1;
545 }
546
547 #endif