4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License version 2 for more details. A copy is
14 * included in the COPYING file that accompanied this code.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23 * Copyright (c) 2014, 2016, Intel Corporation.
26 * lustre/include/lustre_lmv.h
28 * Lustre LMV structures and functions.
30 * Author: Di Wang <di.wang@intel.com>
35 #include <uapi/linux/lustre/lustre_idl.h>
38 struct lu_fid lmo_fid;
40 struct inode *lmo_root;
43 struct lmv_stripe_md {
45 __u32 lsm_md_stripe_count;
46 __u32 lsm_md_master_mdt_index;
47 __u32 lsm_md_hash_type;
48 __u32 lsm_md_layout_version;
49 __u32 lsm_md_migrate_offset;
50 __u32 lsm_md_migrate_hash;
51 __u32 lsm_md_default_count;
52 __u32 lsm_md_default_index;
53 char lsm_md_pool_name[LOV_MAXPOOLNAME + 1];
54 struct lmv_oinfo lsm_md_oinfo[0];
57 static inline bool lmv_dir_striped(const struct lmv_stripe_md *lsm)
59 return lsm && lsm->lsm_md_magic == LMV_MAGIC;
62 static inline bool lmv_dir_foreign(const struct lmv_stripe_md *lsm)
64 return lsm && lsm->lsm_md_magic == LMV_MAGIC_FOREIGN;
67 static inline bool lmv_dir_migrating(const struct lmv_stripe_md *lsm)
69 return lmv_dir_striped(lsm) &&
70 lsm->lsm_md_hash_type & LMV_HASH_FLAG_MIGRATION;
73 static inline bool lmv_dir_bad_hash(const struct lmv_stripe_md *lsm)
75 if (!lmv_dir_striped(lsm))
78 if (lmv_dir_migrating(lsm) &&
79 lsm->lsm_md_stripe_count - lsm->lsm_md_migrate_offset <= 1)
82 if (lsm->lsm_md_hash_type & LMV_HASH_FLAG_BAD_TYPE)
85 return !lmv_is_known_hash_type(lsm->lsm_md_hash_type);
89 lsm_md_eq(const struct lmv_stripe_md *lsm1, const struct lmv_stripe_md *lsm2)
93 if (lsm1->lsm_md_magic != lsm2->lsm_md_magic ||
94 lsm1->lsm_md_stripe_count != lsm2->lsm_md_stripe_count ||
95 lsm1->lsm_md_master_mdt_index !=
96 lsm2->lsm_md_master_mdt_index ||
97 lsm1->lsm_md_hash_type != lsm2->lsm_md_hash_type ||
98 lsm1->lsm_md_layout_version !=
99 lsm2->lsm_md_layout_version ||
100 lsm1->lsm_md_migrate_offset !=
101 lsm2->lsm_md_migrate_offset ||
102 lsm1->lsm_md_migrate_hash !=
103 lsm2->lsm_md_migrate_hash ||
104 strncmp(lsm1->lsm_md_pool_name, lsm2->lsm_md_pool_name,
105 sizeof(lsm1->lsm_md_pool_name)) != 0)
108 if (lmv_dir_striped(lsm1)) {
109 for (idx = 0; idx < lsm1->lsm_md_stripe_count; idx++) {
110 if (!lu_fid_eq(&lsm1->lsm_md_oinfo[idx].lmo_fid,
111 &lsm2->lsm_md_oinfo[idx].lmo_fid))
119 static inline void lsm_md_dump(int mask, const struct lmv_stripe_md *lsm)
123 /* If lsm_md_magic == LMV_MAGIC_FOREIGN pool_name may not be a null
124 * terminated string so only print LOV_MAXPOOLNAME bytes.
127 "magic %#x stripe count %d master mdt %d hash type %#x version %d migrate offset %d migrate hash %#x pool %.*s\n",
128 lsm->lsm_md_magic, lsm->lsm_md_stripe_count,
129 lsm->lsm_md_master_mdt_index, lsm->lsm_md_hash_type,
130 lsm->lsm_md_layout_version, lsm->lsm_md_migrate_offset,
131 lsm->lsm_md_migrate_hash,
132 LOV_MAXPOOLNAME, lsm->lsm_md_pool_name);
134 if (!lmv_dir_striped(lsm))
137 for (i = 0; i < lsm->lsm_md_stripe_count; i++)
138 CDEBUG(mask, "stripe[%d] "DFID"\n",
139 i, PFID(&lsm->lsm_md_oinfo[i].lmo_fid));
144 void lmv_free_memmd(struct lmv_stripe_md *lsm);
146 static inline void lmv1_le_to_cpu(struct lmv_mds_md_v1 *lmv_dst,
147 const struct lmv_mds_md_v1 *lmv_src)
151 lmv_dst->lmv_magic = le32_to_cpu(lmv_src->lmv_magic);
152 lmv_dst->lmv_stripe_count = le32_to_cpu(lmv_src->lmv_stripe_count);
153 lmv_dst->lmv_master_mdt_index =
154 le32_to_cpu(lmv_src->lmv_master_mdt_index);
155 lmv_dst->lmv_hash_type = le32_to_cpu(lmv_src->lmv_hash_type);
156 lmv_dst->lmv_layout_version = le32_to_cpu(lmv_src->lmv_layout_version);
157 for (i = 0; i < lmv_src->lmv_stripe_count; i++)
158 fid_le_to_cpu(&lmv_dst->lmv_stripe_fids[i],
159 &lmv_src->lmv_stripe_fids[i]);
162 static inline void lmv_le_to_cpu(union lmv_mds_md *lmv_dst,
163 const union lmv_mds_md *lmv_src)
165 switch (le32_to_cpu(lmv_src->lmv_magic)) {
167 lmv1_le_to_cpu(&lmv_dst->lmv_md_v1, &lmv_src->lmv_md_v1);
174 /* This hash is only for testing purpose */
175 static inline unsigned int
176 lmv_hash_all_chars(unsigned int count, const char *name, int namelen)
179 const unsigned char *p = (const unsigned char *)name;
181 while (--namelen >= 0)
189 static inline unsigned int
190 lmv_hash_fnv1a(unsigned int count, const char *name, int namelen)
194 hash = lustre_hash_fnv_1a_64(name, namelen);
196 return do_div(hash, count);
200 * Robert Jenkins' function for mixing 32-bit values
201 * http://burtleburtle.net/bob/hash/evahash.html
202 * a, b = random bits, c = input and output
204 * Mixing inputs to generate an evenly distributed hash.
206 #define crush_hashmix(a, b, c) \
208 a = a - b; a = a - c; a = a ^ (c >> 13); \
209 b = b - c; b = b - a; b = b ^ (a << 8); \
210 c = c - a; c = c - b; c = c ^ (b >> 13); \
211 a = a - b; a = a - c; a = a ^ (c >> 12); \
212 b = b - c; b = b - a; b = b ^ (a << 16); \
213 c = c - a; c = c - b; c = c ^ (b >> 5); \
214 a = a - b; a = a - c; a = a ^ (c >> 3); \
215 b = b - c; b = b - a; b = b ^ (a << 10); \
216 c = c - a; c = c - b; c = c ^ (b >> 15); \
219 #define crush_hash_seed 1315423911
221 static inline __u32 crush_hash(__u32 a, __u32 b)
223 __u32 hash = crush_hash_seed ^ a ^ b;
227 crush_hashmix(a, b, hash);
228 crush_hashmix(x, a, hash);
229 crush_hashmix(b, y, hash);
234 /* refer to https://github.com/ceph/ceph/blob/master/src/crush/hash.c and
235 * https://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf for details of CRUSH
238 static inline unsigned int
239 lmv_hash_crush(unsigned int count, const char *name, int namelen)
241 unsigned long long straw;
242 unsigned long long highest_straw = 0;
244 unsigned int idx = 0;
247 /* put temp and backup file on the same MDT where target is located.
248 * temporary file naming rule:
249 * 1. rsync: .<target>.XXXXXX
250 * 2. dstripe: <target>.XXXXXXXX
252 if (lu_name_is_temp_file(name, namelen, true, 6)) {
255 } else if (lu_name_is_temp_file(name, namelen, false, 8)) {
257 } else if (lu_name_is_backup_file(name, namelen, &i)) {
258 LASSERT(i < namelen);
262 pg_id = lmv_hash_fnv1a(LMV_CRUSH_PG_COUNT, name, namelen);
264 /* distribute PG among all stripes pseudo-randomly, so they are almost
265 * evenly distributed, and when stripe count changes, only (delta /
266 * total) sub files need to be moved, herein 'delta' is added or removed
267 * stripe count, 'total' is total stripe count before change for
268 * removal, or count after change for addition.
270 for (i = 0; i < count; i++) {
271 straw = crush_hash(pg_id, i);
272 if (straw > highest_straw) {
273 highest_straw = straw;
277 LASSERT(idx < count);
282 static inline int lmv_name_to_stripe_index(__u32 hash_type,
283 unsigned int stripe_count,
284 const char *name, int namelen)
288 LASSERT(namelen > 0);
289 LASSERT(stripe_count > 0);
291 if (stripe_count == 1)
294 switch (hash_type & LMV_HASH_TYPE_MASK) {
295 case LMV_HASH_TYPE_ALL_CHARS:
296 idx = lmv_hash_all_chars(stripe_count, name, namelen);
298 case LMV_HASH_TYPE_FNV_1A_64:
299 idx = lmv_hash_fnv1a(stripe_count, name, namelen);
301 case LMV_HASH_TYPE_CRUSH:
302 idx = lmv_hash_crush(stripe_count, name, namelen);
308 CDEBUG(D_INFO, "name %.*s hash_type %#x idx %d/%u\n", namelen, name,
309 hash_type, idx, stripe_count);
314 static inline bool lmv_user_magic_supported(__u32 lum_magic)
316 return lum_magic == LMV_USER_MAGIC ||
317 lum_magic == LMV_USER_MAGIC_SPECIFIC ||
318 lum_magic == LMV_MAGIC_FOREIGN;
321 static inline bool lmv_is_sane(const struct lmv_mds_md_v1 *lmv)
323 if (le32_to_cpu(lmv->lmv_magic) != LMV_MAGIC_V1)
326 if (le32_to_cpu(lmv->lmv_stripe_count) == 0)
329 if (!lmv_is_known_hash_type(lmv->lmv_hash_type))