Whamcloud - gitweb
LU-14507 mdt: handle default stripe_count=-1 properly
[fs/lustre-release.git] / lustre / include / lustre_lmv.h
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13  * GNU General Public License version 2 for more details.  A copy is
14  * included in the COPYING file that accompanied this code.
15
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2014, 2016, Intel Corporation.
24  */
25 /*
26  * lustre/include/lustre_lmv.h
27  *
28  * Lustre LMV structures and functions.
29  *
30  * Author: Di Wang <di.wang@intel.com>
31  */
32
33 #ifndef _LUSTRE_LMV_H
34 #define _LUSTRE_LMV_H
35 #include <uapi/linux/lustre/lustre_idl.h>
36
37 struct lmv_oinfo {
38         struct lu_fid   lmo_fid;
39         u32             lmo_mds;
40         struct inode    *lmo_root;
41 };
42
43 struct lmv_stripe_md {
44         __u32   lsm_md_magic;
45         __u32   lsm_md_stripe_count;
46         __u32   lsm_md_master_mdt_index;
47         __u32   lsm_md_hash_type;
48         __u32   lsm_md_layout_version;
49         __u32   lsm_md_migrate_offset;
50         __u32   lsm_md_migrate_hash;
51         __u32   lsm_md_default_count;
52         __u32   lsm_md_default_index;
53         char    lsm_md_pool_name[LOV_MAXPOOLNAME + 1];
54         struct lmv_oinfo lsm_md_oinfo[0];
55 };
56
57 static inline bool lmv_dir_striped(const struct lmv_stripe_md *lsm)
58 {
59         return lsm && lsm->lsm_md_magic == LMV_MAGIC;
60 }
61
62 static inline bool lmv_dir_foreign(const struct lmv_stripe_md *lsm)
63 {
64         return lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN;
65 }
66
67 static inline bool lmv_dir_layout_changing(const struct lmv_stripe_md *lsm)
68 {
69         return lmv_dir_striped(lsm) &&
70                lmv_hash_is_layout_changing(lsm->lsm_md_hash_type);
71 }
72
73 static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
74 {
75         if (!lmv_dir_striped(lsm))
76                 return false;
77
78         if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
79                 return true;
80
81         return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
82 }
83
84 static inline bool
85 lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
86 {
87         __u32 idx;
88
89         if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
90             lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count ||
91             lsm1->lsm_md_master_mdt_index !=
92                                 lsm2->lsm_md_master_mdt_index ||
93             lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type ||
94             lsm1->lsm_md_layout_version !=
95                                 lsm2->lsm_md_layout_version ||
96             lsm1->lsm_md_migrate_offset !=
97                                 lsm2->lsm_md_migrate_offset ||
98             lsm1->lsm_md_migrate_hash !=
99                                 lsm2->lsm_md_migrate_hash ||
100             strncmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name,
101                     sizeof(lsm1->lsm_md_pool_name)) != 0)
102                 return false;
103
104         if (lmv_dir_striped(lsm1)) {
105                 for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
106                         if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
107                                        &lsm2->lsm_md_oinfo[idx].lmo_fid))
108                                 return false;
109                 }
110         }
111
112         return true;
113 }
114
115 static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
116 {
117         int i;
118
119         /* If lsm_md_magic == LMV_MAGIC_FOREIGN pool_name may not be a null
120          * terminated string so only print LOV_MAXPOOLNAME bytes.
121          */
122         CDEBUG(mask,
123                "magic %#x stripe count %d master mdt %d hash type %#x version %d migrate offset %d migrate hash %#x pool %.*s\n",
124                lsm->lsm_md_magic, lsm->lsm_md_stripe_count,
125                lsm->lsm_md_master_mdt_index, lsm->lsm_md_hash_type,
126                lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset,
127                lsm->lsm_md_migrate_hash,
128                LOV_MAXPOOLNAME, lsm->lsm_md_pool_name);
129
130         if (!lmv_dir_striped(lsm))
131                 return;
132
133         for (i = 0; i < lsm->lsm_md_stripe_count; i++)
134                 CDEBUG(mask, "stripe[%d] "DFID"\n",
135                        i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
136 }
137
138 union lmv_mds_md;
139
140 void lmv_free_memmd(struct lmv_stripe_md *lsm);
141
142 static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
143                                   const struct lmv_mds_md_v1 *lmv_src)
144 {
145         __u32 i;
146
147         lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic);
148         lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count);
149         lmv_dst->lmv_master_mdt_index =
150                                 le32_to_cpu(lmv_src->lmv_master_mdt_index);
151         lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type);
152         lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version);
153         if (lmv_src->lmv_stripe_count > LMV_MAX_STRIPE_COUNT)
154                 return;
155         for (i = 0; i < lmv_src->lmv_stripe_count; i++)
156                 fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i],
157                               &lmv_src->lmv_stripe_fids[i]);
158 }
159
160 static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst,
161                                  const union lmv_mds_md *lmv_src)
162 {
163         switch (le32_to_cpu(lmv_src->lmv_magic)) {
164         case LMV_MAGIC_V1:
165                 lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1);
166                 break;
167         default:
168                 break;
169         }
170 }
171
172 /* This hash is only for testing purpose */
173 static inline unsigned int
174 lmv_hash_all_chars(unsigned int count, const char *name, int namelen)
175 {
176         unsigned int c = 0;
177         const unsigned char *p = (const unsigned char *)name;
178
179         while (--namelen >= 0)
180                 c += p[namelen];
181
182         c = c % count;
183
184         return c;
185 }
186
187 static inline unsigned int
188 lmv_hash_fnv1a(unsigned int count, const char *name, int namelen)
189 {
190         __u64 hash;
191
192         hash = lustre_hash_fnv_1a_64(name, namelen);
193
194         return do_div(hash, count);
195 }
196
197 /*
198  * Robert Jenkins' function for mixing 32-bit values
199  * http://burtleburtle.net/bob/hash/evahash.html
200  * a, b = random bits, c = input and output
201  *
202  * Mixing inputs to generate an evenly distributed hash.
203  */
204 #define crush_hashmix(a, b, c)                          \
205 do {                                                    \
206         a = a - b;  a = a - c;  a = a ^ (c >> 13);      \
207         b = b - c;  b = b - a;  b = b ^ (a << 8);       \
208         c = c - a;  c = c - b;  c = c ^ (b >> 13);      \
209         a = a - b;  a = a - c;  a = a ^ (c >> 12);      \
210         b = b - c;  b = b - a;  b = b ^ (a << 16);      \
211         c = c - a;  c = c - b;  c = c ^ (b >> 5);       \
212         a = a - b;  a = a - c;  a = a ^ (c >> 3);       \
213         b = b - c;  b = b - a;  b = b ^ (a << 10);      \
214         c = c - a;  c = c - b;  c = c ^ (b >> 15);      \
215 } while (0)
216
217 #define crush_hash_seed 1315423911
218
219 static inline __u32 crush_hash(__u32 a, __u32 b)
220 {
221         __u32 hash = crush_hash_seed ^ a ^ b;
222         __u32 x = 231232;
223         __u32 y = 1232;
224
225         crush_hashmix(a, b, hash);
226         crush_hashmix(x, a, hash);
227         crush_hashmix(b, y, hash);
228
229         return hash;
230 }
231
232 /* refer to https://github.com/ceph/ceph/blob/master/src/crush/hash.c and
233  * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf for details of CRUSH
234  * algorithm.
235  */
236 static inline unsigned int
237 lmv_hash_crush(unsigned int count, const char *name, int namelen)
238 {
239         unsigned long long straw;
240         unsigned long long highest_straw = 0;
241         unsigned int pg_id;
242         unsigned int idx = 0;
243         int i;
244
245         /* put temp and backup file on the same MDT where target is located.
246          * temporary file naming rule:
247          * 1. rsync: .<target>.XXXXXX
248          * 2. dstripe: <target>.XXXXXXXX
249          */
250         if (lu_name_is_temp_file(name, namelen, true, 6)) {
251                 name++;
252                 namelen -= 8;
253         } else if (lu_name_is_temp_file(name, namelen, false, 8)) {
254                 namelen -= 9;
255         } else if (lu_name_is_backup_file(name, namelen, &i)) {
256                 LASSERT(i < namelen);
257                 namelen -= i;
258         }
259
260         pg_id = lmv_hash_fnv1a(LMV_CRUSH_PG_COUNT, name, namelen);
261
262         /* distribute PG among all stripes pseudo-randomly, so they are almost
263          * evenly distributed, and when stripe count changes, only (delta /
264          * total) sub files need to be moved, herein 'delta' is added or removed
265          * stripe count, 'total' is total stripe count before change for
266          * removal, or count after change for addition.
267          */
268         for (i = 0; i < count; i++) {
269                 straw = crush_hash(pg_id, i);
270                 if (straw > highest_straw) {
271                         highest_straw = straw;
272                         idx = i;
273                 }
274         }
275         LASSERT(idx < count);
276
277         return idx;
278 }
279
280 /* directory layout may change in three ways:
281  * 1. directory migration, in its LMV source stripes are appended after
282  *    target stripes, \a migrate_hash is source hash type, \a migrate_offset is
283  *    target stripe count,
284  * 2. directory split, \a migrate_hash is hash type before split,
285  *    \a migrate_offset is stripe count before split.
286  * 3. directory merge, \a migrate_hash is hash type after merge,
287  *    \a migrate_offset is stripe count after merge.
288  */
289 static inline int
290 __lmv_name_to_stripe_index(__u32 hash_type, __u32 stripe_count,
291                            __u32 migrate_hash, __u32 migrate_offset,
292                            const char *name, int namelen, bool new_layout)
293 {
294         __u32 saved_hash = hash_type;
295         __u32 saved_count = stripe_count;
296         int stripe_index = 0;
297
298         LASSERT(namelen > 0);
299         LASSERT(stripe_count > 0);
300
301         if (lmv_hash_is_splitting(hash_type)) {
302                 if (!new_layout) {
303                         hash_type = migrate_hash;
304                         stripe_count = migrate_offset;
305                 }
306         } else if (lmv_hash_is_merging(hash_type)) {
307                 if (new_layout) {
308                         hash_type = migrate_hash;
309                         stripe_count = migrate_offset;
310                 }
311         } else if (lmv_hash_is_migrating(hash_type)) {
312                 if (new_layout) {
313                         stripe_count = migrate_offset;
314                 } else {
315                         hash_type = migrate_hash;
316                         stripe_count -= migrate_offset;
317                 }
318         }
319
320         if (stripe_count > 1) {
321                 switch (hash_type & LMV_HASH_TYPE_MASK) {
322                 case LMV_HASH_TYPE_ALL_CHARS:
323                         stripe_index = lmv_hash_all_chars(stripe_count, name,
324                                                           namelen);
325                         break;
326                 case LMV_HASH_TYPE_FNV_1A_64:
327                         stripe_index = lmv_hash_fnv1a(stripe_count, name,
328                                                       namelen);
329                         break;
330                 case LMV_HASH_TYPE_CRUSH:
331                         stripe_index = lmv_hash_crush(stripe_count, name,
332                                                       namelen);
333                         break;
334                 default:
335                         return -EBADFD;
336                 }
337         }
338
339         LASSERT(stripe_index < stripe_count);
340
341         if (!new_layout && lmv_hash_is_migrating(saved_hash))
342                 stripe_index += migrate_offset;
343
344         LASSERT(stripe_index < saved_count);
345
346         CDEBUG(D_INFO, "name %.*s hash=%#x/%#x idx=%d/%u/%u under %s layout\n",
347                namelen, name, saved_hash, migrate_hash, stripe_index,
348                saved_count, migrate_offset, new_layout ? "new" : "old");
349
350         return stripe_index;
351 }
352
353 static inline int lmv_name_to_stripe_index(struct lmv_mds_md_v1 *lmv,
354                                            const char *name, int namelen)
355 {
356         if (lmv->lmv_magic == LMV_MAGIC_V1)
357                 return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
358                                                   lmv->lmv_stripe_count,
359                                                   lmv->lmv_migrate_hash,
360                                                   lmv->lmv_migrate_offset,
361                                                   name, namelen, true);
362
363         if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1))
364                 return __lmv_name_to_stripe_index(
365                                         le32_to_cpu(lmv->lmv_hash_type),
366                                         le32_to_cpu(lmv->lmv_stripe_count),
367                                         le32_to_cpu(lmv->lmv_migrate_hash),
368                                         le32_to_cpu(lmv->lmv_migrate_offset),
369                                         name, namelen, true);
370
371         return -EINVAL;
372 }
373
374 static inline int lmv_name_to_stripe_index_old(struct lmv_mds_md_v1 *lmv,
375                                                const char *name, int namelen)
376 {
377         if (lmv->lmv_magic == LMV_MAGIC_V1 ||
378             lmv->lmv_magic == LMV_MAGIC_STRIPE)
379                 return __lmv_name_to_stripe_index(lmv->lmv_hash_type,
380                                                   lmv->lmv_stripe_count,
381                                                   lmv->lmv_migrate_hash,
382                                                   lmv->lmv_migrate_offset,
383                                                   name, namelen, false);
384
385         if (lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_V1) ||
386             lmv->lmv_magic == cpu_to_le32(LMV_MAGIC_STRIPE))
387                 return __lmv_name_to_stripe_index(
388                                         le32_to_cpu(lmv->lmv_hash_type),
389                                         le32_to_cpu(lmv->lmv_stripe_count),
390                                         le32_to_cpu(lmv->lmv_migrate_hash),
391                                         le32_to_cpu(lmv->lmv_migrate_offset),
392                                         name, namelen, false);
393
394         return -EINVAL;
395 }
396
397 static inline bool lmv_user_magic_supported(__u32 lum_magic)
398 {
399         return lum_magic == LMV_USER_MAGIC ||
400                lum_magic == LMV_USER_MAGIC_SPECIFIC ||
401                lum_magic == LMV_MAGIC_FOREIGN;
402 }
403
404 /* master LMV is sane */
405 static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
406 {
407         if (!lmv)
408                 return false;
409
410         if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
411                 goto insane;
412
413         if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
414                 goto insane;
415
416         if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
417                 goto insane;
418
419         return true;
420 insane:
421         LMV_DEBUG(D_ERROR, lmv, "insane");
422         return false;
423 }
424
425 /* LMV can be either master or stripe LMV */
426 static inline bool lmv_is_sane2(const struct lmv_mds_md_v1 *lmv)
427 {
428         if (!lmv)
429                 return false;
430
431         if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1 &&
432             le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_STRIPE)
433                 goto insane;
434
435         if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
436                 goto insane;
437
438         if (!lmv_is_known_hash_type(le32_to_cpu(lmv->lmv_hash_type)))
439                 goto insane;
440
441         return true;
442 insane:
443         LMV_DEBUG(D_ERROR, lmv, "insane");
444         return false;
445 }
446
447 static inline bool lmv_is_splitting(const struct lmv_mds_md_v1 *lmv)
448 {
449         if (!lmv_is_sane2(lmv))
450                 return false;
451
452         return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type));
453 }
454
455 static inline bool lmv_is_merging(const struct lmv_mds_md_v1 *lmv)
456 {
457         if (!lmv_is_sane2(lmv))
458                 return false;
459
460         return lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
461 }
462
463 static inline bool lmv_is_migrating(const struct lmv_mds_md_v1 *lmv)
464 {
465         if (!lmv_is_sane(lmv))
466                 return false;
467
468         return lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
469 }
470
471 static inline bool lmv_is_restriping(const struct lmv_mds_md_v1 *lmv)
472 {
473         if (!lmv_is_sane2(lmv))
474                 return false;
475
476         return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
477                lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type));
478 }
479
480 static inline bool lmv_is_layout_changing(const struct lmv_mds_md_v1 *lmv)
481 {
482         if (!lmv_is_sane2(lmv))
483                 return false;
484
485         return lmv_hash_is_splitting(cpu_to_le32(lmv->lmv_hash_type)) ||
486                lmv_hash_is_merging(cpu_to_le32(lmv->lmv_hash_type)) ||
487                lmv_hash_is_migrating(cpu_to_le32(lmv->lmv_hash_type));
488 }
489
490 #endif