Whamcloud - gitweb
LU-10391 obd: Update lmd_parse to handle IPv6 NIDs
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_iam_lvar.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  *
31  * iam_lvar.c
32  *
33  * implementation of iam format for fixed size records, variable sized keys.
34  *
35  * Author: Nikita Danilov <nikita@clusterfs.com>
36  */
37
38 #include <linux/types.h>
39 #include "osd_internal.h"
40
41 /*
42  * Leaf operations.
43  */
44
45 enum {
46         /* This is duplicated in lustre/utils/create_iam.c */
47         IAM_LVAR_LEAF_MAGIC = 0x1973
48 };
49
50 /* This is duplicated in lustre/utils/create_iam.c */
51 struct lvar_leaf_header {
52         __le16 vlh_magic; /* magic number IAM_LVAR_LEAF_MAGIC */
53         __le16 vlh_used;  /* used bytes, including header */
54 };
55
56 /*
57  * Format of leaf entry:
58  *
59  * __le16 keysize
60  *     u8 key[keysize]
61  *     u8 record[rec_size]
62  *
63  * Entries are ordered in key order.
64  */
65
66 /* This is duplicated in lustre/utils/create_iam.c */
67 typedef u32 lvar_hash_t;
68
69 /* This is duplicated in lustre/utils/create_iam.c */
70 struct lvar_leaf_entry {
71         __le32 vle_hash;
72         __le16 vle_keysize;
73         u8 vle_key[0];
74 };
75
76 #define PDIFF(ptr0, ptr1) (((char *)(ptr0)) - ((char *)(ptr1)))
77
78
79 static inline int blocksize(const struct iam_leaf *leaf)
80 {
81         return iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
82 }
83
84 static inline const char *kchar(const struct iam_key *key)
85 {
86         return (void *)key;
87 }
88
89 static inline struct iam_lentry *lvar_lentry(const struct lvar_leaf_entry *ent)
90 {
91         return (struct iam_lentry *)ent;
92 }
93
94 static inline struct lvar_leaf_entry *lentry_lvar(const struct iam_lentry *lent)
95 {
96         return (struct lvar_leaf_entry *)lent;
97 }
98
99
100 static inline int e_keysize(const struct lvar_leaf_entry *ent)
101 {
102         return le16_to_cpu(ent->vle_keysize);
103 }
104
105 /* This is duplicated in lustre/utils/create_iam.c */
106 enum {
107         LVAR_PAD   = 4,
108         LVAR_ROUND = LVAR_PAD - 1
109 };
110
111 static inline int getsize(const struct iam_leaf *leaf, int namelen, int recsize)
112 {
113         BUILD_BUG_ON((LVAR_PAD & (LVAR_PAD - 1)));
114
115         return (offsetof(struct lvar_leaf_entry, vle_key) +
116                         namelen + recsize + LVAR_ROUND) & ~LVAR_ROUND;
117 }
118
119 static inline int rec_size(const struct iam_rec *rec)
120 {
121         return *(const char *)rec;
122 }
123
124 static inline struct iam_rec *e_rec(const struct lvar_leaf_entry *ent)
125 {
126         return ((void *)ent) +
127                 offsetof(struct lvar_leaf_entry, vle_key) + e_keysize(ent);
128 }
129
130 static inline int e_size(const struct iam_leaf *leaf,
131                          const struct lvar_leaf_entry *ent)
132 {
133         return getsize(leaf, e_keysize(ent), rec_size(e_rec(ent)));
134 }
135
136 static inline char *e_char(const struct lvar_leaf_entry *ent)
137 {
138         return (char *)&ent->vle_key;
139 }
140
141 static inline struct iam_key *e_key(const struct lvar_leaf_entry *ent)
142 {
143         return (struct iam_key *)e_char(ent);
144 }
145
146 static inline lvar_hash_t e_hash(const struct lvar_leaf_entry *ent)
147 {
148         return le32_to_cpu(ent->vle_hash);
149 }
150
151 static void e_print(const struct lvar_leaf_entry *ent)
152 {
153         CERROR("        %p %8.8x \"%*.*s\"\n", ent, e_hash(ent),
154                         e_keysize(ent), e_keysize(ent), e_char(ent));
155 }
156
157 static inline struct lvar_leaf_entry *e_next(const struct iam_leaf *leaf,
158                                              const struct lvar_leaf_entry *ent)
159 {
160         return ((void *)ent) + e_size(leaf, ent);
161 }
162
163 #define LVAR_HASH_SANDWICH  (0)
164 #define LVAR_HASH_TEA       (1)
165 #define LVAR_HASH_R5        (0)
166 #define LVAR_HASH_PREFIX    (0)
167
168 #ifdef HAVE_LDISKFSFS_DIRHASH_WITH_DIR
169 #define e_ldiskfsfs_dirhash(dir, name, len, info) \
170                 ldiskfsfs_dirhash((dir), (name), (len), (info))
171 #else
172 #define e_ldiskfsfs_dirhash(dir, name, len, info) \
173                 ldiskfsfs_dirhash((name), (len), (info))
174 #endif
175
176 static u32 hash_build0(const struct inode *dir, const char *name, int namelen)
177 {
178         u32 result;
179
180         if (namelen == 0)
181                 return 0;
182         if (strncmp(name, ".", 1) == 0 && namelen == 1)
183                 return 1;
184         if (strncmp(name, "..", 2) == 0 && namelen == 2)
185                 return 2;
186
187         if (LVAR_HASH_PREFIX) {
188                 result = 0;
189                 strncpy((void *)&result,
190                         name, min_t(int, namelen, sizeof(result)));
191         } else {
192                 struct ldiskfs_dx_hash_info hinfo;
193
194                 hinfo.hash_version = LDISKFS_DX_HASH_TEA;
195                 hinfo.seed = NULL;
196                 e_ldiskfsfs_dirhash(dir, name, namelen, &hinfo);
197                 result = hinfo.hash;
198                 if (LVAR_HASH_SANDWICH) {
199                         u32 result2;
200
201                         hinfo.hash_version = LDISKFS_DX_HASH_TEA;
202                         hinfo.seed = NULL;
203                         e_ldiskfsfs_dirhash(dir, name, namelen, &hinfo);
204                         result2 = hinfo.hash;
205                         result = (0xfc000000 & result2) | (0x03ffffff & result);
206                 }
207         }
208         return result;
209 }
210
211 enum {
212         HASH_GRAY_AREA = 1024,
213         HASH_MAX_SIZE  = 0x7fffffffUL
214 };
215
216 static u32 hash_build(const struct inode *dir, const char *name, int namelen)
217 {
218         u32 hash;
219
220         hash = (hash_build0(dir, name, namelen) << 1) & HASH_MAX_SIZE;
221         if (hash > HASH_MAX_SIZE - HASH_GRAY_AREA)
222                 hash &= HASH_GRAY_AREA - 1;
223         return hash;
224 }
225
226 static inline lvar_hash_t get_hash(const struct inode *dir,
227                                    const char *name, int namelen)
228 {
229         return hash_build(dir, name, namelen);
230 }
231
232 static inline lvar_hash_t iam_get_hash(const struct iam_leaf *leaf,
233                                        const char *name, int namelen)
234 {
235         struct iam_path *iam_path = iam_leaf_path(leaf);
236
237         return get_hash(iam_path_obj(iam_path), name, namelen);
238 }
239
240 static inline int e_eq(const struct lvar_leaf_entry *ent,
241                        const char *name, int namelen)
242 {
243         return namelen == e_keysize(ent) && !memcmp(e_char(ent), name, namelen);
244 }
245
246 static inline int e_cmp(const struct iam_leaf *leaf,
247                         const struct lvar_leaf_entry *ent, lvar_hash_t hash)
248 {
249         lvar_hash_t ehash;
250
251         ehash = e_hash(ent);
252         return ehash == hash ? 0 : (ehash < hash ? -1 : 1);
253 }
254
255 static struct lvar_leaf_header *n_head(const struct iam_leaf *l)
256 {
257         return (struct lvar_leaf_header *)l->il_bh->b_data;
258 }
259
260 static int h_used(const struct lvar_leaf_header *hdr)
261 {
262         return le16_to_cpu(hdr->vlh_used);
263 }
264
265 static void h_used_adj(const struct iam_leaf *leaf,
266                        struct lvar_leaf_header *hdr, int adj)
267 {
268         int used;
269
270         used = h_used(hdr) + adj;
271         assert_corr(sizeof(*hdr) <= used && used <= blocksize(leaf));
272         hdr->vlh_used = cpu_to_le16(used);
273 }
274
275 static struct lvar_leaf_entry *n_start(const struct iam_leaf *leaf)
276 {
277         return (void *)leaf->il_bh->b_data + sizeof(struct lvar_leaf_header);
278 }
279
280 static struct lvar_leaf_entry *n_end(const struct iam_leaf *l)
281 {
282         return (void *)l->il_bh->b_data + h_used(n_head(l));
283 }
284
285 static struct lvar_leaf_entry *n_cur(const struct iam_leaf *l)
286 {
287         return lentry_lvar(l->il_at);
288 }
289
290 void n_print(const struct iam_leaf *l)
291 {
292         struct lvar_leaf_entry *scan;
293
294         CERROR("used: %d\n", h_used(n_head(l)));
295         for (scan = n_start(l); scan < n_end(l); scan = e_next(l, scan))
296                 e_print(scan);
297 }
298
299 #if LDISKFS_CORRECTNESS_ON
300 static int n_at_rec(const struct iam_leaf *folio)
301 {
302         return n_start(folio) <= lentry_lvar(folio->il_at) &&
303                 lentry_lvar(folio->il_at) < n_end(folio);
304 }
305
306 #if LDISKFS_INVARIANT_ON
307 static int n_invariant(const struct iam_leaf *leaf)
308 {
309         struct iam_path *path;
310         struct inode *dir;
311         struct lvar_leaf_entry *scan;
312         struct lvar_leaf_entry *end;
313         lvar_hash_t hash;
314         lvar_hash_t nexthash;
315         lvar_hash_t starthash;
316
317         end  = n_end(leaf);
318         hash = 0;
319         path = leaf->il_path;
320
321         if (h_used(n_head(leaf)) > blocksize(leaf))
322                 return 0;
323
324         dir = iam_path_obj(iam_path);
325         /*
326          * Delimiting key in the parent index node. Clear least bit to account
327          * for hash collision marker.
328          */
329         starthash = *(lvar_hash_t *)iam_ikey_at(path, path->ip_frame->at) & ~1;
330         for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
331                 nexthash = e_hash(scan);
332                 if (nexthash != get_hash(dir, e_char(scan), e_keysize(scan))) {
333                         BREAKPOINT();
334                         return 0;
335                 }
336                 if (0 && nexthash < starthash) {
337                         /*
338                          * Unfortunately this useful invariant cannot be
339                          * reliably checked as parent node is not necessarily
340                          * locked.
341                          */
342                         n_print(leaf);
343                         CERROR("invalid hash value less than start hash: %#x < %#x\n",
344                                 nexthash, starthash);
345                         dump_stack();
346                         return 0;
347                 }
348                 if (nexthash < hash) {
349                         BREAKPOINT();
350                         return 0;
351                 }
352                 hash = nexthash;
353         }
354         if (scan != end) {
355                 BREAKPOINT();
356                 return 0;
357         }
358         return 1;
359 }
360 /* LDISKFS_INVARIANT_ON */
361 #endif
362
363 /* LDISKFS_CORRECTNESS_ON */
364 #endif
365
366 static struct iam_ikey *lvar_ikey(const struct iam_leaf *l,
367                                   struct iam_ikey *key)
368 {
369         lvar_hash_t *hash;
370
371         assert_corr(n_at_rec(l));
372
373         hash = (void *)key;
374         *hash = e_hash(n_cur(l));
375         return key;
376 }
377
378 static struct iam_key *lvar_key(const struct iam_leaf *l)
379 {
380         return e_key(n_cur(l));
381 }
382
383 static int lvar_key_size(const struct iam_leaf *l)
384 {
385         return e_keysize(n_cur(l));
386 }
387
388 static void lvar_start(struct iam_leaf *l)
389 {
390         l->il_at = lvar_lentry(n_start(l));
391 }
392
393 static int lvar_init(struct iam_leaf *l)
394 {
395         int result;
396         int used;
397         struct lvar_leaf_header *head;
398
399         assert_corr(l->il_bh != NULL);
400
401         head = n_head(l);
402         used = h_used(head);
403         if (le16_to_cpu(head->vlh_magic) == IAM_LVAR_LEAF_MAGIC &&
404                         used <= blocksize(l)) {
405                 l->il_at = l->il_entries = lvar_lentry(n_start(l));
406                 result = 0;
407         } else {
408                 struct inode *obj;
409
410                 obj = iam_leaf_container(l)->ic_object;
411                 CERROR(
412                 "Bad magic in node %llu (#%lu): %#x != %#x or wrong used: %d\n",
413                 (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
414                 le16_to_cpu(head->vlh_magic), IAM_LVAR_LEAF_MAGIC,
415                 used);
416                 result = -EIO;
417         }
418         return result;
419 }
420
421 static void lvar_fini(struct iam_leaf *l)
422 {
423         l->il_entries = l->il_at = NULL;
424 }
425
426 static struct iam_rec *lvar_rec(const struct iam_leaf *l)
427 {
428         assert_corr(n_at_rec(l));
429         return e_rec(n_cur(l));
430 }
431
432 static void lvar_next(struct iam_leaf *l)
433 {
434         assert_corr(n_at_rec(l));
435         assert_corr(iam_leaf_is_locked(l));
436         l->il_at = lvar_lentry(e_next(l, n_cur(l)));
437 }
438
439 static int lvar_lookup(struct iam_leaf *leaf, const struct iam_key *k)
440 {
441         struct lvar_leaf_entry *found;
442         struct lvar_leaf_entry *scan;
443         struct lvar_leaf_entry *end;
444         int result;
445         const char *name;
446         int namelen;
447         int found_equal;
448         lvar_hash_t hash;
449         int last;
450
451         assert_inv(n_invariant(leaf));
452         end = n_end(leaf);
453
454         name = kchar(k);
455         namelen = strlen(name);
456         hash = iam_get_hash(leaf, name, namelen);
457         found = NULL;
458         found_equal = 0;
459         last = 1;
460
461         for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
462                 lvar_hash_t scan_hash;
463
464                 scan_hash = e_hash(scan);
465                 if (scan_hash < hash)
466                         found = scan;
467                 else if (scan_hash == hash) {
468                         if (e_eq(scan, name, namelen)) {
469                                 /*
470                                  * perfect match
471                                  */
472                                 leaf->il_at = lvar_lentry(scan);
473                                 return IAM_LOOKUP_EXACT;
474                         } else if (!found_equal) {
475                                 found = scan;
476                                 found_equal = 1;
477                         }
478                 } else {
479                         last = 0;
480                         break;
481                 }
482         }
483         if (found == NULL) {
484                 /*
485                  * @k is less than all hashes in the leaf.
486                  */
487                 lvar_start(leaf);
488                 result = IAM_LOOKUP_BEFORE;
489         } else {
490                 leaf->il_at = lvar_lentry(found);
491                 result = IAM_LOOKUP_OK;
492                 assert_corr(n_at_rec(leaf));
493         }
494         if (last)
495                 result |= IAM_LOOKUP_LAST;
496         assert_inv(n_invariant(leaf));
497
498         return result;
499 }
500
501 static int lvar_ilookup(struct iam_leaf *leaf, const struct iam_ikey *ik)
502 {
503         struct lvar_leaf_entry *scan;
504         struct lvar_leaf_entry *end;
505         lvar_hash_t hash;
506
507         assert_inv(n_invariant(leaf));
508         end  = n_end(leaf);
509         hash = *(const lvar_hash_t *)ik;
510
511         lvar_start(leaf);
512         for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
513                 lvar_hash_t scan_hash;
514
515                 scan_hash = e_hash(scan);
516                 if (scan_hash > hash)
517                         return scan == n_start(leaf) ?
518                                 IAM_LOOKUP_BEFORE : IAM_LOOKUP_OK;
519                 leaf->il_at = lvar_lentry(scan);
520                 if (scan_hash == hash)
521                         return IAM_LOOKUP_EXACT;
522         }
523         assert_inv(n_invariant(leaf));
524         /*
525          * @ik is greater than any key in the node. Return last record in the
526          * node.
527          */
528         return IAM_LOOKUP_OK;
529 }
530
531 static void __lvar_key_set(struct iam_leaf *l, const struct iam_key *k)
532 {
533         memcpy(e_key(n_cur(l)), k, e_keysize(n_cur(l)));
534 }
535
536 static void lvar_key_set(struct iam_leaf *l, const struct iam_key *k)
537 {
538         assert_corr(n_at_rec(l));
539         assert_corr(strlen(kchar(k)) == e_keysize(n_cur(l)));
540         assert_corr(iam_leaf_is_locked(l));
541         __lvar_key_set(l, k);
542         assert_inv(n_invariant(l));
543 }
544
545 static int lvar_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
546 {
547         lvar_hash_t hash;
548         const char *name;
549
550         name = kchar(k);
551
552         hash = iam_get_hash(l, name, strlen(name));
553         return e_cmp(l, n_cur(l), hash);
554 }
555
556 static int lvar_key_eq(const struct iam_leaf *l, const struct iam_key *k)
557 {
558         const char *name;
559
560         name = kchar(k);
561         return e_eq(n_cur(l), name, strlen(name));
562 }
563
564 static void __lvar_rec_set(struct iam_leaf *l, const struct iam_rec *r)
565 {
566         memcpy(e_rec(n_cur(l)), r, rec_size(r));
567 }
568
569 static void lvar_rec_set(struct iam_leaf *l, const struct iam_rec *r)
570 {
571         assert_corr(n_at_rec(l));
572         assert_corr(iam_leaf_is_locked(l));
573         __lvar_rec_set(l, r);
574         assert_inv(n_invariant(l));
575 }
576
577 static int lvar_rec_eq(const struct iam_leaf *l, const struct iam_rec *r)
578 {
579         struct iam_rec *rec = e_rec(n_cur(l));
580
581         if (rec_size(rec) != rec_size(r))
582                 return 0;
583         return !memcmp(rec, r, rec_size(r));
584 }
585
586 static void lvar_rec_get(const struct iam_leaf *l, struct iam_rec *r)
587 {
588         struct iam_rec *rec;
589
590         rec = e_rec(n_cur(l));
591         assert_corr(n_at_rec(l));
592         assert_corr(iam_leaf_is_locked(l));
593         memcpy(r, rec, rec_size(rec));
594         assert_inv(n_invariant(l));
595 }
596
597 static int lvar_can_add(const struct iam_leaf *l,
598                         const struct iam_key *k, const struct iam_rec *r)
599 {
600         assert_corr(iam_leaf_is_locked(l));
601         return h_used(n_head(l)) +
602                 getsize(l, strlen(kchar(k)), rec_size(r)) <= blocksize(l);
603 }
604
605 static int lvar_at_end(const struct iam_leaf *folio)
606 {
607         assert_corr(iam_leaf_is_locked(folio));
608         return n_cur(folio) == n_end(folio);
609 }
610
611 static void lvar_rec_add(struct iam_leaf *leaf,
612                          const struct iam_key *k, const struct iam_rec *r)
613 {
614         const char *key;
615         int ksize;
616         int shift;
617         void *end;
618         void *start;
619         ptrdiff_t diff;
620
621         assert_corr(lvar_can_add(leaf, k, r));
622         assert_inv(n_invariant(leaf));
623         assert_corr(iam_leaf_is_locked(leaf));
624
625         key   = kchar(k);
626         ksize = strlen(key);
627         shift = getsize(leaf, ksize, rec_size(r));
628
629         if (!lvar_at_end(leaf)) {
630                 assert_corr(n_cur(leaf) < n_end(leaf));
631                 end = n_end(leaf);
632                 if (lvar_key_cmp(leaf, k) <= 0)
633                         lvar_next(leaf);
634                 else
635                         /*
636                          * Another exceptional case: insertion with the key
637                          * less than least key in the leaf.
638                          */
639                         assert_corr(leaf->il_at == leaf->il_entries);
640
641                 start = leaf->il_at;
642                 diff  = PDIFF(end, start);
643                 assert_corr(diff >= 0);
644                 memmove(start + shift, start, diff);
645         }
646         h_used_adj(leaf, n_head(leaf), shift);
647         n_cur(leaf)->vle_keysize = cpu_to_le16(ksize);
648         n_cur(leaf)->vle_hash = cpu_to_le32(iam_get_hash(leaf, key, ksize));
649         __lvar_key_set(leaf, k);
650         __lvar_rec_set(leaf, r);
651         assert_corr(n_at_rec(leaf));
652         assert_inv(n_invariant(leaf));
653 }
654
655 static void lvar_rec_del(struct iam_leaf *leaf, int shift)
656 {
657         void *next;
658         void *end;
659         int nob;
660
661         assert_corr(n_at_rec(leaf));
662         assert_inv(n_invariant(leaf));
663         assert_corr(iam_leaf_is_locked(leaf));
664
665         end  = n_end(leaf);
666         next = e_next(leaf, n_cur(leaf));
667         nob  = e_size(leaf, n_cur(leaf));
668         memmove(leaf->il_at, next, end - next);
669         h_used_adj(leaf, n_head(leaf), -nob);
670         assert_inv(n_invariant(leaf));
671 }
672
673 static void lvar_init_new(struct iam_container *c, struct buffer_head *bh)
674 {
675         struct lvar_leaf_header *hdr;
676
677         hdr = (struct lvar_leaf_header *)bh->b_data;
678         hdr->vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC);
679         hdr->vlh_used  = sizeof(*hdr);
680 }
681
682 static struct lvar_leaf_entry *find_pivot(const struct iam_leaf *leaf,
683                                           struct lvar_leaf_entry **prev)
684 {
685         void *scan;
686         void *start;
687         int threshold;
688
689         *prev = NULL;
690         threshold = blocksize(leaf) / 2;
691         for (scan = start = n_start(leaf); scan - start <= threshold;
692                         *prev = scan, scan = e_next(leaf, scan)) {
693                 ;
694         }
695         return scan;
696 }
697
698 static void lvar_split(struct iam_leaf *leaf, struct buffer_head **bh,
699                        iam_ptr_t new_blknr)
700 {
701         struct lvar_leaf_entry *first_to_move;
702         struct lvar_leaf_entry *last_to_stay;
703         struct iam_path *path;
704         struct lvar_leaf_header *hdr;
705         struct buffer_head *new_leaf;
706         ptrdiff_t tomove;
707         lvar_hash_t hash;
708
709         assert_inv(n_invariant(leaf));
710         assert_corr(iam_leaf_is_locked(leaf));
711
712         new_leaf = *bh;
713         path = iam_leaf_path(leaf);
714
715         hdr = (void *)new_leaf->b_data;
716
717         first_to_move = find_pivot(leaf, &last_to_stay);
718         assert_corr(last_to_stay != NULL);
719         assert_corr(e_next(leaf, last_to_stay) == first_to_move);
720
721         hash = e_hash(first_to_move);
722         if (hash == e_hash(last_to_stay))
723                 /*
724                  * Duplicate hash.
725                  */
726                 hash |= 1;
727
728         tomove = PDIFF(n_end(leaf), first_to_move);
729         memmove(hdr + 1, first_to_move, tomove);
730
731         h_used_adj(leaf, hdr, tomove);
732         h_used_adj(leaf, n_head(leaf), -tomove);
733
734         assert_corr(n_end(leaf) == first_to_move);
735
736         if (n_cur(leaf) >= first_to_move) {
737                 /*
738                  * insertion point moves into new leaf.
739                  */
740                 ptrdiff_t shift;
741
742                 shift = PDIFF(leaf->il_at, first_to_move);
743                 *bh = leaf->il_bh;
744                 leaf->il_bh = new_leaf;
745                 leaf->il_curidx = new_blknr;
746
747                 assert_corr(iam_leaf_is_locked(leaf));
748                 lvar_init(leaf);
749                 /*
750                  * init cannot fail, as node was just initialized.
751                  */
752                 assert_corr(result == 0);
753                 leaf->il_at = ((void *)leaf->il_at) + shift;
754         }
755         /*
756          * Insert pointer to the new node (together with the least key in
757          * the node) into index node.
758          */
759         iam_insert_key_lock(path, path->ip_frame, (struct iam_ikey *)&hash,
760                             new_blknr);
761         assert_corr(n_cur(leaf) < n_end(leaf));
762         assert_inv(n_invariant(leaf));
763 }
764
765 static int lvar_leaf_empty(struct iam_leaf *leaf)
766 {
767         return h_used(n_head(leaf)) == sizeof(struct lvar_leaf_header);
768 }
769
770 static const struct iam_leaf_operations lvar_leaf_ops = {
771         .init           = lvar_init,
772         .init_new       = lvar_init_new,
773         .fini           = lvar_fini,
774         .start          = lvar_start,
775         .next           = lvar_next,
776         .key            = lvar_key,
777         .ikey           = lvar_ikey,
778         .rec            = lvar_rec,
779         .key_set        = lvar_key_set,
780         .key_cmp        = lvar_key_cmp,
781         .key_eq         = lvar_key_eq,
782         .key_size       = lvar_key_size,
783         .rec_set        = lvar_rec_set,
784         .rec_eq         = lvar_rec_eq,
785         .rec_get        = lvar_rec_get,
786         .lookup         = lvar_lookup,
787         .ilookup        = lvar_ilookup,
788         .at_end         = lvar_at_end,
789         .rec_add        = lvar_rec_add,
790         .rec_del        = lvar_rec_del,
791         .can_add        = lvar_can_add,
792         .split          = lvar_split,
793         .leaf_empty     = lvar_leaf_empty,
794 };
795
796 /*
797  * Index operations.
798  */
799
800 enum {
801         /* This is duplicated in lustre/utils/create_iam.c */
802         /* egrep -i '^o?x?[olabcdef]*$' /usr/share/dict/words */
803         IAM_LVAR_ROOT_MAGIC = 0xb01dface
804 };
805
806 /* This is duplicated in lustre/utils/create_iam.c */
807 struct lvar_root {
808         __le32 vr_magic;
809         __le16 vr_recsize;
810         __le16 vr_ptrsize;
811         u8 vr_indirect_levels;
812         u8 vr_padding0;
813         __le16 vr_padding1;
814 };
815
816 static u32 lvar_root_ptr(struct iam_container *c)
817 {
818         return 0;
819 }
820
821 static int lvar_node_init(struct iam_container *c, struct buffer_head *bh,
822                           int root)
823 {
824         return 0;
825 }
826
827 static struct iam_entry *lvar_root_inc(struct iam_container *c,
828                                        struct iam_path *path,
829                                        struct iam_frame *frame)
830 {
831         struct lvar_root *root;
832         struct iam_entry *entries;
833
834         assert_corr(iam_frame_is_locked(path, frame));
835         entries = frame->entries;
836
837         dx_set_count(entries, 2);
838         assert_corr(dx_get_limit(entries) == dx_root_limit(path));
839
840         root = (void *)frame->bh->b_data;
841         assert_corr(le64_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC);
842         root->vr_indirect_levels++;
843         frame->at = entries = iam_entry_shift(path, entries, 1);
844         memset(iam_ikey_at(path, entries), 0,
845                iam_path_descr(path)->id_ikey_size);
846         return entries;
847 }
848
849 static int lvar_node_check(struct iam_path *path, struct iam_frame *frame)
850 {
851         unsigned int count;
852         unsigned int limit;
853         unsigned int limit_correct;
854         struct iam_entry *entries;
855
856         entries = dx_node_get_entries(path, frame);
857
858         if (frame == path->ip_frames) {
859                 struct lvar_root *root;
860
861                 root = (void *)frame->bh->b_data;
862                 if (le32_to_cpu(root->vr_magic) != IAM_LVAR_ROOT_MAGIC)
863                         return -EIO;
864                 limit_correct = dx_root_limit(path);
865         } else
866                 limit_correct = dx_node_limit(path);
867         count = dx_get_count(entries);
868         limit = dx_get_limit(entries);
869         if (count > limit)
870                 return -EIO;
871         if (limit != limit_correct)
872                 return -EIO;
873         return 0;
874 }
875
876 static int lvar_node_load(struct iam_path *path, struct iam_frame *frame)
877 {
878         struct iam_entry *entries;
879         void *data;
880
881         entries = dx_node_get_entries(path, frame);
882         data = frame->bh->b_data;
883
884         if (frame == path->ip_frames) {
885                 struct lvar_root *root;
886                 const char *name;
887
888                 root = data;
889                 name = kchar(path->ip_key_target);
890                 path->ip_indirect = root->vr_indirect_levels;
891                 if (path->ip_ikey_target == NULL) {
892                         path->ip_ikey_target = iam_path_ikey(path, 4);
893                         *(lvar_hash_t *)path->ip_ikey_target =
894                                 get_hash(iam_path_obj(path), name,
895                                          strlen(name));
896                 }
897         }
898         frame->entries = frame->at = entries;
899         return 0;
900 }
901
902 static int lvar_ikeycmp(const struct iam_container *c,
903                         const struct iam_ikey *k1, const struct iam_ikey *k2)
904 {
905         lvar_hash_t p1 = le32_to_cpu(*(lvar_hash_t *)k1);
906         lvar_hash_t p2 = le32_to_cpu(*(lvar_hash_t *)k2);
907
908         return p1 > p2 ? 1 : (p1 < p2 ? -1 : 0);
909 }
910
911 static struct iam_path_descr *lvar_ipd_alloc(const struct iam_container *c,
912                                              void *area)
913 {
914         return iam_ipd_alloc(area, c->ic_descr->id_ikey_size);
915 }
916
917 static void lvar_root(void *buf,
918                       int blocksize, int keysize, int ptrsize, int recsize)
919 {
920         struct lvar_root *root;
921         struct dx_countlimit *limit;
922         void *entry;
923         int isize;
924
925         isize = sizeof(lvar_hash_t) + ptrsize;
926         root = buf;
927         *root = (typeof(*root)) {
928                 .vr_magic            = cpu_to_le32(IAM_LVAR_ROOT_MAGIC),
929                 .vr_recsize          = cpu_to_le16(recsize),
930                 .vr_ptrsize          = cpu_to_le16(ptrsize),
931                 .vr_indirect_levels  = 0
932         };
933
934         limit = (void *)(root + 1);
935         *limit = (typeof(*limit)){
936                 /*
937                  * limit itself + one pointer to the leaf.
938                  */
939                 .count = cpu_to_le16(2),
940                 .limit = iam_root_limit(sizeof(struct lvar_root), blocksize,
941                                         sizeof(lvar_hash_t) + ptrsize)
942         };
943
944         /* To guarantee that the padding "keysize + ptrsize"
945          * covers the "dx_countlimit" and the "idle_blocks". */
946         LASSERT((keysize + ptrsize) >=
947                 (sizeof(struct dx_countlimit) + sizeof(u32)));
948
949         entry = (void *)(limit + 1);
950         /* Put "idle_blocks" just after the limit. There was padding after
951          * the limit, the "idle_blocks" re-uses part of the padding, so no
952          * compatibility issues with old layout.
953          */
954         *(u32 *)entry = 0;
955
956         /*
957          * Skip over @limit.
958          */
959         entry = (void *)(root + 1) + isize;
960
961         /*
962          * Entry format is <key> followed by <ptr>. In the minimal tree
963          * consisting of a root and single node, <key> is a minimal possible
964          * key.
965          */
966         *(lvar_hash_t *)entry = 0;
967         entry += sizeof(lvar_hash_t);
968         /* now @entry points to <ptr> */
969         if (ptrsize == 4)
970                 *(u_int32_t *)entry = cpu_to_le32(1);
971         else
972                 *(u_int64_t *)entry = cpu_to_le64(1);
973 }
974
975 static int lvar_esize(int namelen, int recsize)
976 {
977         return (offsetof(struct lvar_leaf_entry, vle_key) +
978                         namelen + recsize + LVAR_ROUND) & ~LVAR_ROUND;
979 }
980
981 static void lvar_leaf(void *buf,
982                       int blocksize, int keysize, int ptrsize, int recsize)
983 {
984         struct lvar_leaf_header *head;
985         struct lvar_leaf_entry *entry;
986
987         /* form leaf */
988         head = buf;
989         *head = (typeof(*head)) {
990                 .vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC),
991                 .vlh_used  = cpu_to_le16(sizeof(*head) + lvar_esize(0, recsize))
992         };
993         entry = (void *)(head + 1);
994         *entry = (typeof(*entry)) {
995                 .vle_hash    = 0,
996                 .vle_keysize = 0
997         };
998         memset(e_rec(entry), 0, recsize);
999         *(char *)e_rec(entry) = recsize;
1000 }
1001
1002 int iam_lvar_create(struct inode *obj,
1003                     int keysize, int ptrsize, int recsize, handle_t *handle)
1004 {
1005         struct buffer_head *root_node;
1006         struct buffer_head *leaf_node;
1007         struct super_block *sb;
1008
1009         u32 blknr;
1010         int result = 0;
1011         unsigned long bsize;
1012
1013         assert_corr(obj->i_size == 0);
1014
1015         sb = obj->i_sb;
1016         bsize = sb->s_blocksize;
1017         root_node = osd_ldiskfs_append(handle, obj, &blknr);
1018         if (IS_ERR(root_node))
1019                 GOTO(out, result = PTR_ERR(root_node));
1020
1021         leaf_node = osd_ldiskfs_append(handle, obj, &blknr);
1022         if (IS_ERR(leaf_node))
1023                 GOTO(out_root, result = PTR_ERR(leaf_node));
1024
1025         lvar_root(root_node->b_data, bsize, keysize, ptrsize, recsize);
1026         lvar_leaf(leaf_node->b_data, bsize, keysize, ptrsize, recsize);
1027         ldiskfs_mark_inode_dirty(handle, obj);
1028         result = ldiskfs_handle_dirty_metadata(handle, NULL, root_node);
1029         if (result == 0)
1030                 result = ldiskfs_handle_dirty_metadata(handle, NULL, leaf_node);
1031         if (result != 0)
1032                 ldiskfs_std_error(sb, result);
1033
1034         brelse(leaf_node);
1035
1036         GOTO(out_root, result);
1037
1038 out_root:
1039         brelse(root_node);
1040 out:
1041         return result;
1042 }
1043
1044 static const struct iam_operations lvar_ops = {
1045         .id_root_ptr    = lvar_root_ptr,
1046         .id_node_read   = iam_node_read,
1047         .id_node_init   = lvar_node_init,
1048         .id_node_check  = lvar_node_check,
1049         .id_node_load   = lvar_node_load,
1050         .id_ikeycmp     = lvar_ikeycmp,
1051         .id_root_inc    = lvar_root_inc,
1052         .id_ipd_alloc   = lvar_ipd_alloc,
1053         .id_ipd_free    = iam_ipd_free,
1054         .id_name        = "lvar"
1055 };
1056
1057 int iam_lvar_guess(struct iam_container *c)
1058 {
1059         int result;
1060         struct buffer_head *bh;
1061         const struct lvar_root *root;
1062
1063         assert_corr(c->ic_object != NULL);
1064
1065         result = iam_node_read(c, lvar_root_ptr(c), NULL, &bh);
1066         if (result == 0) {
1067                 root = (void *)bh->b_data;
1068
1069                 if (le32_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC) {
1070                         struct iam_descr *descr;
1071
1072                         descr = c->ic_descr;
1073                         descr->id_key_size  = LDISKFS_NAME_LEN;
1074                         descr->id_ikey_size = sizeof(lvar_hash_t);
1075                         descr->id_rec_size  = le16_to_cpu(root->vr_recsize);
1076                         descr->id_ptr_size  = le16_to_cpu(root->vr_ptrsize);
1077                         descr->id_root_gap  = sizeof(*root);
1078                         descr->id_node_gap  = 0;
1079                         descr->id_ops       = &lvar_ops;
1080                         descr->id_leaf_ops  = &lvar_leaf_ops;
1081                         c->ic_root_bh = bh;
1082                 } else {
1083                         result = -EBADF;
1084                         brelse(bh);
1085                 }
1086         }
1087         return result;
1088 }