Whamcloud - gitweb
LU-14475 log: Rewrite some log messages
[fs/lustre-release.git] / lustre / include / lustre_lmv.h
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2014, 2016, Intel Corporation.
24  */
25 /*
26  * lustre/include/lustre_lmv.h
27  *
28  * Lustre LMV structures and functions.
29  *
30  * Author: Di Wang <di.wang@intel.com>
31  */
32
33 #ifndef _LUSTRE_LMV_H
34 #define _LUSTRE_LMV_H
35 #include <uapi/linux/lustre/lustre_idl.h>
36
37 struct lmv_oinfo {
38         struct lu_fid   lmo_fid;
39         u32             lmo_mds;
40         struct inode    *lmo_root;
41 };
42
43 struct lmv_stripe_md {
44         __u32   lsm_md_magic;
45         __u32   lsm_md_stripe_count;
46         __u32   lsm_md_master_mdt_index;
47         __u32   lsm_md_hash_type;
48         __u8    lsm_md_max_inherit;
49         __u8    lsm_md_max_inherit_rr;
50         __u32   lsm_md_layout_version;
51         __u32   lsm_md_migrate_offset;
52         __u32   lsm_md_migrate_hash;
53         __u32   lsm_md_default_count;
54         __u32   lsm_md_default_index;
55         char    lsm_md_pool_name[LOV_MAXPOOLNAME + 1];
56         struct lmv_oinfo lsm_md_oinfo[0];
57 };
58
59 static inline bool lmv_dir_striped(const struct lmv_stripe_md *lsm)
60 {
61         return lsm && lsm->lsm_md_magic == LMV_MAGIC;
62 }
63
64 static inline bool lmv_dir_foreign(const struct lmv_stripe_md *lsm)
65 {
66         return lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN;
67 }
68
69 static inline bool lmv_dir_layout_changing(const struct lmv_stripe_md *lsm)
70 {
71         return lmv_dir_striped(lsm) &&
72                lmv_hash_is_layout_changing(lsm->lsm_md_hash_type);
73 }
74
75 static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
76 {
77         if (!lmv_dir_striped(lsm))
78                 return false;
79
80         if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
81                 return true;
82
83         return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
84 }
85
86 static inline bool
87 lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
88 {
89         __u32 idx;
90
91         if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
92             lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count ||
93             lsm1->lsm_md_master_mdt_index !=
94                                 lsm2->lsm_md_master_mdt_index ||
95             lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type ||
96             lsm1->lsm_md_layout_version !=
97                                 lsm2->lsm_md_layout_version ||
98             lsm1->lsm_md_migrate_offset !=
99                                 lsm2->lsm_md_migrate_offset ||
100             lsm1->lsm_md_migrate_hash !=
101                                 lsm2->lsm_md_migrate_hash ||
102             strncmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name,
103                     sizeof(lsm1->lsm_md_pool_name)) != 0)
104                 return false;
105
106         if (lmv_dir_striped(lsm1)) {
107                 for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
108                         if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
109                                        &lsm2->lsm_md_oinfo[idx].lmo_fid))
110                                 return false;
111                 }
112         }
113
114         return true;
115 }
116
117 static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
118 {
119         bool valid_hash = lmv_dir_bad_hash(lsm);
120         int i;
121
122         /* If lsm_md_magic == LMV_MAGIC_FOREIGN pool_name may not be a null
123          * terminated string so only print LOV_MAXPOOLNAME bytes.
124          */
125         CDEBUG(mask,
126                "magic %#x stripe count %d master mdt %d hash type %s:%#x max inherit %hhu version %d migrate offset %d migrate hash %#x pool %.*s\n",
127                lsm->lsm_md_magic, lsm->lsm_md_stripe_count,
128                lsm->lsm_md_master_mdt_index,
129                valid_hash ? "invalid hash" :
130                             mdt_hash_name[lsm->lsm_md_hash_type & (LMV_HASH_TYPE_MAX - 1)],
131                lsm->lsm_md_hash_type, lsm->lsm_md_max_inherit,
132                lsm->lsm_md_layout_version,
133                lsm->lsm_md_migrate_offset, lsm->lsm_md_migrate_hash,
134                LOV_MAXPOOLNAME, lsm->lsm_md_pool_name);
135
136         if (!lmv_dir_striped(lsm))
137                 return;
138
139         for (i = 0; i < lsm->lsm_md_stripe_count; i++)
140                 CDEBUG(mask, "stripe[%d] "DFID"\n",
141                        i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
142 }
143
144 union lmv_mds_md;
145
146 void lmv_free_memmd(struct lmv_stripe_md *lsm);
147
148 static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
149                                   const struct lmv_mds_md_v1 *lmv_src)
150 {
151         __u32 i;
152
153         lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic);
154         lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count);
155         lmv_dst->lmv_master_mdt_index =
156                                 le32_to_cpu(lmv_src->lmv_master_mdt_index);
157         lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type);
158         lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version);
159         if (lmv_src->lmv_stripe_count > LMV_MAX_STRIPE_COUNT)
160                 return;
161         for (i = 0; i < lmv_src->lmv_stripe_count; i++)
162                 fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i],
163                               &lmv_src->lmv_stripe_fids[i]);
164 }
165
166 static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst,
167                                  const union lmv_mds_md *lmv_src)
168 {
169         switch (le32_to_cpu(lmv_src->lmv_magic)) {
170         case LMV_MAGIC_V1:
171                 lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1);
172                 break;
173         default:
174                 break;
175         }
176 }
177
178 /* This hash is only for testing purpose */
179 static inline unsigned int
180 lmv_hash_all_chars(unsigned int count, const char *name, int namelen)
181 {
182         unsigned int c = 0;
183         const unsigned char *p = (const unsigned char *)name;
184
185         while (--namelen >= 0)
186                 c += p[namelen];
187
188         c = c % count;
189
190         return c;
191 }
192
193 static inline unsigned int
194 lmv_hash_fnv1a(unsigned int count, const char *name, int namelen)
195 {
196         __u64 hash;
197
198         hash = lustre_hash_fnv_1a_64(name, namelen);
199
200         return do_div(hash, count);
201 }
202
203 /*
204  * Robert Jenkins' function for mixing 32-bit values
205  * http://burtleburtle.net/bob/hash/evahash.html
206  * a, b = random bits, c = input and output
207  *
208  * Mixing inputs to generate an evenly distributed hash.
209  */
210 #define crush_hashmix(a, b, c)                          \
211 do {                                                    \
212         a = a - b;  a = a - c;  a = a ^ (c >> 13);      \
213         b = b - c;  b = b - a;  b = b ^ (a << 8);       \
214         c = c - a;  c = c - b;  c = c ^ (b >> 13);      \
215         a = a - b;  a = a - c;  a = a ^ (c >> 12);      \
216         b = b - c;  b = b - a;  b = b ^ (a << 16);      \
217         c = c - a;  c = c - b;  c = c ^ (b >> 5);       \
218         a = a - b;  a = a - c;  a = a ^ (c >> 3);       \
219         b = b - c;  b = b - a;  b = b ^ (a << 10);      \
220         c = c - a;  c = c - b;  c = c ^ (b >> 15);      \
221 } while (0)
222
223 #define crush_hash_seed 1315423911
224
225 static inline __u32 crush_hash(__u32 a, __u32 b)
226 {
227         __u32 hash = crush_hash_seed ^ a ^ b;
228         __u32 x = 231232;
229         __u32 y = 1232;
230
231         crush_hashmix(a, b, hash);
232         crush_hashmix(x, a, hash);
233         crush_hashmix(b, y, hash);
234
235         return hash;
236 }
237
238 /* refer to https://github.com/ceph/ceph/blob/master/src/crush/hash.c and
239  * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf for details of CRUSH
240  * algorithm.
241  */
242 static inline unsigned int
243 lmv_hash_crush(unsigned int count, const char *name, int namelen)
244 {
245         unsigned long long straw;
246         unsigned long long highest_straw = 0;
247         unsigned int pg_id;
248         unsigned int idx = 0;
249         int i;
250
251         /* put temp and backup file on the same MDT where target is located.
252          * temporary file naming rule:
253          * 1. rsync: .<target>.XXXXXX
254          * 2. dstripe: <target>.XXXXXXXX
255          */
256         if (lu_name_is_temp_file(name, namelen, true, 6)) {
257                 name++;
258                 namelen -= 8;
259         } else if (lu_name_is_temp_file(name, namelen, false, 8)) {
260                 namelen -= 9;
261         } else if (lu_name_is_backup_file(name, namelen, &i)) {
262                 LASSERT(i < namelen);
263                 namelen -= i;
264         }
265
266         pg_id = lmv_hash_fnv1a(LMV_CRUSH_PG_COUNT, name, namelen);
267
268         /* distribute PG among all stripes pseudo-randomly, so they are almost
269          * evenly distributed, and when stripe count changes, only (delta /
270          * total) sub files need to be moved, herein 'delta' is added or removed
271          * stripe count, 'total' is total stripe count before change for
272          * removal, or count after change for addition.
273          */
274         for (i = 0; i < count; i++) {
275                 straw = crush_hash(pg_id, i);
276                 if (straw > highest_straw) {
277                         highest_straw = straw;
278                         idx = i;
279                 }
280         }
281         LASSERT(idx < count);
282
283         return idx;
284 }
285
286 /* directory layout may change in three ways:
287  * 1. directory migration, in its LMV source stripes are appended after
288  *    target stripes, \a migrate_hash is source hash type, \a migrate_offset is
289  *    target stripe count,
290  * 2. directory split, \a migrate_hash is hash type before split,
291  *    \a migrate_offset is stripe count before split.
292  * 3. directory merge, \a migrate_hash is hash type after merge,
293  *    \a migrate_offset is stripe count after merge.
294  */
295 static inline int
296 __lmv_name_to_stripe_index(__u32 hash_type, __u32 stripe_count,
297                            __u32 migrate_hash, __u32 migrate_offset,
298                            const char *name, int namelen, bool new_layout)
299 {
300         __u32 saved_hash = hash_type;
301         __u32 saved_count = stripe_count;
302         int stripe_index = 0;
303
304         LASSERT(namelen > 0);
305         LASSERT(stripe_count > 0);
306
307         if (lmv_hash_is_splitting(hash_type)) {
308                 if (!new_layout) {
309                         hash_type = migrate_hash;
310                         stripe_count = migrate_offset;
311                 }
312         } else if (lmv_hash_is_merging(hash_type)) {
313                 if (new_layout) {
314                         hash_type = migrate_hash;
315                         stripe_count = migrate_offset;
316                 }
317         } else if (lmv_hash_is_migrating(hash_type)) {
318                 if (new_layout) {
319                         stripe_count = migrate_offset;
320                 } else {
321                         hash_type = migrate_hash;
322                         stripe_count -= migrate_offset;
323                 }
324         }
325
326         if (stripe_count > 1) {
327                 switch (hash_type & LMV_HASH_TYPE_MASK) {
328                 case LMV_HASH_TYPE_ALL_CHARS:
329                         stripe_index = lmv_hash_all_chars(stripe_count, name,
330                                                           namelen);
331                         break;
332                 case LMV_HASH_TYPE_FNV_1A_64:
333                         stripe_index = lmv_hash_fnv1a(stripe_count, name,
334                                                       namelen);
335                         break;
336                 case LMV_HASH_TYPE_CRUSH:
337                         stripe_index = lmv_hash_crush(stripe_count, name,
338                                                       namelen);
339                         break;
340                 default:
341                         return -EBADFD;
342                 }
343         }
344
345         LASSERT(stripe_index < stripe_count);
346
347         if (!new_layout && lmv_hash_is_migrating(saved_hash))
348                 stripe_index += migrate_offset;
349
350         LASSERT(stripe_index < saved_count);
351
352         CDEBUG(D_INFO, "name %.*s hash=%#x/%#x idx=%d/%u/%u under %s layout\n",
353                namelen, name, saved_hash, migrate_hash, stripe_index,
354                saved_count, migrate_offset, new_layout ? "new" : "old");
355
356         return stripe_index;
357 }
358
359 static inline int lmv_name_to_stripe_index(struct lmv_mds_md_v1 *lmv,
360                                            const char *name, int namelen)
361 {
362         if (lmv->lmv_magic == LMV_MAGIC_V1)
363                 return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
364                                                   lmv->lmv_stripe_count,
365                                                   lmv->lmv_migrate_hash,
366                                                   lmv->lmv_migrate_offset,
367                                                   name, namelen, true);
368
369         if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1))
370                 return __lmv_name_to_stripe_index(
371                                         le32_to_cpu(lmv->lmv_hash_type),
372                                         le32_to_cpu(lmv->lmv_stripe_count),
373                                         le32_to_cpu(lmv->lmv_migrate_hash),
374                                         le32_to_cpu(lmv->lmv_migrate_offset),
375                                         name, namelen, true);
376
377         return -EINVAL;
378 }
379
380 static inline int lmv_name_to_stripe_index_old(struct lmv_mds_md_v1 *lmv,
381                                                const char *name, int namelen)
382 {
383         if (lmv->lmv_magic == LMV_MAGIC_V1 ||
384             lmv->lmv_magic == LMV_MAGIC_STRIPE)
385                 return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
386                                                   lmv->lmv_stripe_count,
387                                                   lmv->lmv_migrate_hash,
388                                                   lmv->lmv_migrate_offset,
389                                                   name, namelen, false);
390
391         if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1) ||
392             lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_STRIPE))
393                 return __lmv_name_to_stripe_index(
394                                         le32_to_cpu(lmv->lmv_hash_type),
395                                         le32_to_cpu(lmv->lmv_stripe_count),
396                                         le32_to_cpu(lmv->lmv_migrate_hash),
397                                         le32_to_cpu(lmv->lmv_migrate_offset),
398                                         name, namelen, false);
399
400         return -EINVAL;
401 }
402
403 static inline bool lmv_user_magic_supported(__u32 lum_magic)
404 {
405         return lum_magic == LMV_USER_MAGIC ||
406                lum_magic == LMV_USER_MAGIC_SPECIFIC ||
407                lum_magic == LMV_MAGIC_FOREIGN;
408 }
409
410 #define LMV_DEBUG(mask, lmv, msg)                                       \
411         CDEBUG(mask,                                                    \
412                "%s LMV: magic=%#x count=%u index=%u hash=%s:%#x version=%u migrate offset=%u migrate hash=%s:%u.\n",\
413                msg, (lmv)->lmv_magic, (lmv)->lmv_stripe_count,          \
414                (lmv)->lmv_master_mdt_index,                             \
415                mdt_hash_name[(lmv)->lmv_hash_type & (LMV_HASH_TYPE_MAX - 1)],\
416                (lmv)->lmv_hash_type, (lmv)->lmv_layout_version,         \
417                (lmv)->lmv_migrate_offset,                               \
418                mdt_hash_name[(lmv)->lmv_migrate_hash & (LMV_HASH_TYPE_MAX - 1)],\
419                (lmv)->lmv_migrate_hash)
420
421 /* master LMV is sane */
422 static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
423 {
424         if (!lmv)
425                 return false;
426
427         if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
428                 goto insane;
429
430         if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
431                 goto insane;
432
433         if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
434                 goto insane;
435
436         return true;
437 insane:
438         LMV_DEBUG(D_ERROR, lmv, "insane");
439         return false;
440 }
441
442 /* LMV can be either master or stripe LMV */
443 static inline bool lmv_is_sane2(const struct lmv_mds_md_v1 *lmv)
444 {
445         if (!lmv)
446                 return false;
447
448         if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1 &&
449             le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_STRIPE)
450                 goto insane;
451
452         if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
453                 goto insane;
454
455         if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
456                 goto insane;
457
458         return true;
459 insane:
460         LMV_DEBUG(D_ERROR, lmv, "insane");
461         return false;
462 }
463
464 static inline bool lmv_is_splitting(const struct lmv_mds_md_v1 *lmv)
465 {
466         if (!lmv_is_sane2(lmv))
467                 return false;
468
469         return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type));
470 }
471
472 static inline bool lmv_is_merging(const struct lmv_mds_md_v1 *lmv)
473 {
474         if (!lmv_is_sane2(lmv))
475                 return false;
476
477         return lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
478 }
479
480 static inline bool lmv_is_migrating(const struct lmv_mds_md_v1 *lmv)
481 {
482         if (!lmv_is_sane(lmv))
483                 return false;
484
485         return lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
486 }
487
488 static inline bool lmv_is_restriping(const struct lmv_mds_md_v1 *lmv)
489 {
490         if (!lmv_is_sane2(lmv))
491                 return false;
492
493         return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
494                lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
495 }
496
497 static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv)
498 {
499         if (!lmv_is_sane2(lmv))
500                 return false;
501
502         return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
503                lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)) ||
504                lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
505 }
506
507 static inline bool lmv_is_fixed(const struct lmv_mds_md_v1 *lmv)
508 {
509         return cpu_to_le32(lmv->lmv_hash_type) & LMV_HASH_FLAG_FIXED;
510 }
511
512 #endif