Whamcloud - gitweb
LU-1347 build: remove the vim/emacs modelines
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_iam_lvar.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see [sun.com URL with a
18  * copy of GPLv2].
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  */
30 /*
31  * This file is part of Lustre, http://www.lustre.org/
32  * Lustre is a trademark of Sun Microsystems, Inc.
33  *
34  * iam_lvar.c
35  *
36  * implementation of iam format for fixed size records, variable sized keys.
37  *
38  * Author: Nikita Danilov <nikita@clusterfs.com>
39  */
40
41 #include <linux/types.h>
42 #include "osd_internal.h"
43
44 /*
45  * Leaf operations.
46  */
47
48 enum {
49         IAM_LVAR_LEAF_MAGIC = 0x1973 /* This is duplicated in
50                                       * lustre/utils/create_iam.c */
51 };
52
53 /* This is duplicated in lustre/utils/create_iam.c */
54 struct lvar_leaf_header {
55         __le16 vlh_magic; /* magic number IAM_LVAR_LEAF_MAGIC */
56         __le16 vlh_used;  /* used bytes, including header */
57 };
58
59 /*
60  * Format of leaf entry:
61  *
62  * __le16 keysize
63  *     u8 key[keysize]
64  *     u8 record[rec_size]
65  *
66  * Entries are ordered in key order.
67  */
68
69 /* This is duplicated in lustre/utils/create_iam.c */
70 typedef __u32 lvar_hash_t;
71
72 /* This is duplicated in lustre/utils/create_iam.c */
73 struct lvar_leaf_entry {
74         __le32 vle_hash;
75         __le16 vle_keysize;
76         u8     vle_key[0];
77 };
78
79 #define PDIFF(ptr0, ptr1) (((char *)(ptr0)) - ((char *)(ptr1)))
80
81
82 static inline int blocksize(const struct iam_leaf *leaf)
83 {
84         return iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
85 }
86
87 static inline const char *kchar(const struct iam_key *key)
88 {
89         return (void *)key;
90 }
91
92 static inline struct iam_lentry *lvar_lentry(const struct lvar_leaf_entry *ent)
93 {
94         return (struct iam_lentry *)ent;
95 }
96
97 static inline struct lvar_leaf_entry *lentry_lvar(const struct iam_lentry *lent)
98 {
99         return (struct lvar_leaf_entry *)lent;
100 }
101
102
103 static inline int e_keysize(const struct lvar_leaf_entry *ent)
104 {
105         return le16_to_cpu(ent->vle_keysize);
106 }
107
108 /* This is duplicated in lustre/utils/create_iam.c */
109 enum {
110         LVAR_PAD   = 4,
111         LVAR_ROUND = LVAR_PAD - 1
112 };
113
114 static inline int getsize(const struct iam_leaf *leaf, int namelen, int recsize)
115 {
116         CLASSERT(!(LVAR_PAD & (LVAR_PAD - 1)));
117
118         return (offsetof(struct lvar_leaf_entry, vle_key) +
119                 namelen + recsize + LVAR_ROUND) & ~LVAR_ROUND;
120 }
121
122 static inline int rec_size(const struct iam_rec *rec)
123 {
124         return *(const char *)rec;
125 }
126
127 static inline struct iam_rec *e_rec(const struct lvar_leaf_entry *ent)
128 {
129         return ((void *)ent) +
130                 offsetof(struct lvar_leaf_entry, vle_key) + e_keysize(ent);
131 }
132
133 static inline int e_size(const struct iam_leaf *leaf,
134                          const struct lvar_leaf_entry *ent)
135 {
136         return getsize(leaf, e_keysize(ent), rec_size(e_rec(ent)));
137 }
138
139 static inline char *e_char(const struct lvar_leaf_entry *ent)
140 {
141         return (char *)&ent->vle_key;
142 }
143
144 static inline struct iam_key *e_key(const struct lvar_leaf_entry *ent)
145 {
146         return (struct iam_key *)e_char(ent);
147 }
148
149 static inline lvar_hash_t e_hash(const struct lvar_leaf_entry *ent)
150 {
151         return le32_to_cpu(ent->vle_hash);
152 }
153
154 static void e_print(const struct lvar_leaf_entry *ent)
155 {
156         printk("        %p %8.8x \"%*.*s\"\n", ent, e_hash(ent),
157                e_keysize(ent), e_keysize(ent), e_char(ent));
158 }
159 #if 0
160 static int e_check(const struct iam_leaf *leaf,
161                    const struct lvar_leaf_entry *ent)
162 {
163         const void *point = ent;
164         const void *start = leaf->il_bh->b_data;
165         return
166                 start + sizeof(struct lvar_leaf_header) <= point &&
167                 point + e_size(leaf, ent) < start + blocksize(leaf);
168 }
169 #endif
170
171 static inline struct lvar_leaf_entry *e_next(const struct iam_leaf *leaf,
172                                              const struct lvar_leaf_entry *ent)
173 {
174         return ((void *)ent) + e_size(leaf, ent);
175 }
176
177 #define LVAR_HASH_SANDWICH  (0)
178 #define LVAR_HASH_TEA       (1)
179 #define LVAR_HASH_R5        (0)
180 #define LVAR_HASH_PREFIX    (0)
181
182 static __u32 hash_build0(const char *name, int namelen)
183 {
184         __u32 result;
185
186         if (namelen == 0)
187                 return 0;
188         if (strncmp(name, ".", 1) == 0 && namelen == 1)
189                 return 1;
190         if (strncmp(name, "..", 2) == 0 && namelen == 2)
191                 return 2;
192
193         if (LVAR_HASH_PREFIX) {
194                 result = 0;
195                 strncpy((void *)&result,
196                         name, min(namelen, (int)sizeof result));
197         } else {
198                 struct ldiskfs_dx_hash_info hinfo;
199
200                 hinfo.hash_version = LDISKFS_DX_HASH_TEA;
201                 hinfo.seed = 0;
202                 ldiskfsfs_dirhash(name, namelen, &hinfo);
203                 result = hinfo.hash;
204                 if (LVAR_HASH_SANDWICH) {
205                         __u32 result2;
206
207                         hinfo.hash_version = LDISKFS_DX_HASH_TEA;
208                         hinfo.seed = 0;
209                         ldiskfsfs_dirhash(name, namelen, &hinfo);
210                         result2 = hinfo.hash;
211                         result = (0xfc000000 & result2) | (0x03ffffff & result);
212                 }
213         }
214         return result;
215 }
216
217 enum {
218         HASH_GRAY_AREA = 1024,
219         HASH_MAX_SIZE  = 0x7fffffffUL
220 };
221
222 static __u32 hash_build(const char *name, int namelen)
223 {
224         __u32 hash;
225
226         hash = (hash_build0(name, namelen) << 1) & HASH_MAX_SIZE;
227         if (hash > HASH_MAX_SIZE - HASH_GRAY_AREA)
228                 hash &= HASH_GRAY_AREA - 1;
229         return hash;
230 }
231
232 static inline lvar_hash_t get_hash(const struct iam_container *bag,
233                                    const char *name, int namelen)
234 {
235         return hash_build(name, namelen);
236 }
237
238 static inline int e_eq(const struct lvar_leaf_entry *ent,
239                        const char *name, int namelen)
240 {
241         return namelen == e_keysize(ent) && !memcmp(e_char(ent), name, namelen);
242 }
243
244 static inline int e_cmp(const struct iam_leaf *leaf,
245                         const struct lvar_leaf_entry *ent, lvar_hash_t hash)
246 {
247         lvar_hash_t ehash;
248
249         ehash = e_hash(ent);
250         return ehash == hash ? 0 : (ehash < hash ? -1 : +1);
251 }
252
253 static struct lvar_leaf_header *n_head(const struct iam_leaf *l)
254 {
255         return (struct lvar_leaf_header *)l->il_bh->b_data;
256 }
257
258 static int h_used(const struct lvar_leaf_header *hdr)
259 {
260         return le16_to_cpu(hdr->vlh_used);
261 }
262
263 static void h_used_adj(const struct iam_leaf *leaf,
264                        struct lvar_leaf_header *hdr, int adj)
265 {
266         int used;
267
268         used = h_used(hdr) + adj;
269         assert_corr(sizeof *hdr <= used && used <= blocksize(leaf));
270         hdr->vlh_used = cpu_to_le16(used);
271 }
272
273 static struct lvar_leaf_entry *n_start(const struct iam_leaf *leaf)
274 {
275         return (void *)leaf->il_bh->b_data + sizeof(struct lvar_leaf_header);
276 }
277
278 static struct lvar_leaf_entry *n_end(const struct iam_leaf *l)
279 {
280         return (void *)l->il_bh->b_data + h_used(n_head(l));
281 }
282
283 static struct lvar_leaf_entry *n_cur(const struct iam_leaf *l)
284 {
285         return lentry_lvar(l->il_at);
286 }
287
288 void n_print(const struct iam_leaf *l)
289 {
290         struct lvar_leaf_entry *scan;
291
292         printk(CFS_KERN_EMERG "used: %d\n", h_used(n_head(l)));
293         for (scan = n_start(l); scan < n_end(l); scan = e_next(l, scan))
294                 e_print(scan);
295 }
296
297 #if LDISKFS_CORRECTNESS_ON
298 static int n_at_rec(const struct iam_leaf *folio)
299 {
300         return
301                 n_start(folio) <= lentry_lvar(folio->il_at) &&
302                 lentry_lvar(folio->il_at) < n_end(folio);
303 }
304
305 #if LDISKFS_INVARIANT_ON
306 static int n_invariant(const struct iam_leaf *leaf)
307 {
308         struct iam_path        *path;
309         struct lvar_leaf_entry *scan;
310         struct lvar_leaf_entry *end;
311         lvar_hash_t             hash;
312         lvar_hash_t             nexthash;
313         lvar_hash_t             starthash;
314
315         end  = n_end(leaf);
316         hash = 0;
317         path = leaf->il_path;
318
319         if (h_used(n_head(leaf)) > blocksize(leaf))
320                 return 0;
321
322         /*
323          * Delimiting key in the parent index node. Clear least bit to account
324          * for hash collision marker.
325          */
326         starthash = *(lvar_hash_t *)iam_ikey_at(path, path->ip_frame->at) & ~1;
327         for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
328                 nexthash = e_hash(scan);
329                 if (nexthash != get_hash(iam_leaf_container(leaf),
330                                          e_char(scan), e_keysize(scan))) {
331                         BREAKPOINT();
332                         return 0;
333                 }
334                 if (0 && nexthash < starthash) {
335                         /*
336                          * Unfortunately this useful invariant cannot be
337                          * reliably checked as parent node is nor necessarily
338                          * locked.
339                          */
340                         n_print(leaf);
341                         printk("%#x < %#x\n", nexthash, starthash);
342                         dump_stack();
343                         return 0;
344                 }
345                 if (nexthash < hash) {
346                         BREAKPOINT();
347                         return 0;
348                 }
349                 hash = nexthash;
350         }
351         if (scan != end) {
352                 BREAKPOINT();
353                 return 0;
354         }
355         return 1;
356 }
357 /* LDISKFS_INVARIANT_ON */
358 #endif
359
360 /* LDISKFS_CORRECTNESS_ON */
361 #endif
362
363 static struct iam_ikey *lvar_ikey(const struct iam_leaf *l,
364                                   struct iam_ikey *key)
365 {
366         lvar_hash_t *hash;
367
368         assert_corr(n_at_rec(l));
369
370         hash = (void *)key;
371         *hash = e_hash(n_cur(l));
372         return key;
373 }
374
375 static struct iam_key *lvar_key(const struct iam_leaf *l)
376 {
377         return e_key(n_cur(l));
378 }
379
380 static int lvar_key_size(const struct iam_leaf *l)
381 {
382         return e_keysize(n_cur(l));
383 }
384
385 static void lvar_start(struct iam_leaf *l)
386 {
387         l->il_at = lvar_lentry(n_start(l));
388 }
389
390 static int lvar_init(struct iam_leaf *l)
391 {
392         int result;
393         int used;
394         struct lvar_leaf_header *head;
395
396         assert_corr(l->il_bh != NULL);
397
398         head = n_head(l);
399         used = h_used(head);
400         if (head->vlh_magic == le16_to_cpu(IAM_LVAR_LEAF_MAGIC) &&
401             used <= blocksize(l)) {
402                 l->il_at = l->il_entries = lvar_lentry(n_start(l));
403                 result = 0;
404         } else {
405                 struct inode *obj;
406
407                 obj = iam_leaf_container(l)->ic_object;
408                 CERROR("Wrong magic in node %llu (#%lu): %#x != %#x or "
409                        "wrong used: %d",
410                        (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
411                        head->vlh_magic, le16_to_cpu(IAM_LVAR_LEAF_MAGIC),
412                        used);
413                 result = -EIO;
414         }
415         return result;
416 }
417
418 static void lvar_fini(struct iam_leaf *l)
419 {
420         l->il_entries = l->il_at = NULL;
421 }
422
423 struct iam_rec *lvar_rec(const struct iam_leaf *l)
424 {
425         assert_corr(n_at_rec(l));
426         return e_rec(n_cur(l));
427 }
428
429 static void lvar_next(struct iam_leaf *l)
430 {
431         assert_corr(n_at_rec(l));
432         assert_corr(iam_leaf_is_locked(l));
433         l->il_at = lvar_lentry(e_next(l, n_cur(l)));
434 }
435
436 static int lvar_lookup(struct iam_leaf *leaf, const struct iam_key *k)
437 {
438         struct lvar_leaf_entry *found;
439         struct lvar_leaf_entry *scan;
440         struct lvar_leaf_entry *end;
441         int                     result;
442         const char             *name;
443         int                     namelen;
444         int                     found_equal;
445         lvar_hash_t             hash;
446         int                     last;
447
448         assert_inv(n_invariant(leaf));
449         end = n_end(leaf);
450
451         name = kchar(k);
452         namelen = strlen(name);
453         hash = get_hash(iam_leaf_container(leaf), name, namelen);
454         found = NULL;
455         found_equal = 0;
456         last = 1;
457
458         for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
459                 lvar_hash_t scan_hash;
460
461                 scan_hash = e_hash(scan);
462                 if (scan_hash < hash)
463                         found = scan;
464                 else if (scan_hash == hash) {
465                         if (e_eq(scan, name, namelen)) {
466                                 /*
467                                  * perfect match
468                                  */
469                                 leaf->il_at = lvar_lentry(scan);
470                                 return IAM_LOOKUP_EXACT;
471                         } else if (!found_equal) {
472                                         found = scan;
473                                         found_equal = 1;
474                         }
475                 } else {
476                         last = 0;
477                         break;
478                 }
479         }
480         if (found == NULL) {
481                 /*
482                  * @k is less than all hashes in the leaf.
483                  */
484                 lvar_start(leaf);
485                 result = IAM_LOOKUP_BEFORE;
486         } else {
487                 leaf->il_at = lvar_lentry(found);
488                 result = IAM_LOOKUP_OK;
489                 assert_corr(n_at_rec(leaf));
490         }
491         if (last)
492                 result |= IAM_LOOKUP_LAST;
493         assert_inv(n_invariant(leaf));
494
495         return result;
496 }
497
498 static int lvar_ilookup(struct iam_leaf *leaf, const struct iam_ikey *ik)
499 {
500         struct lvar_leaf_entry *scan;
501         struct lvar_leaf_entry *end;
502         lvar_hash_t             hash;
503
504         assert_inv(n_invariant(leaf));
505         end  = n_end(leaf);
506         hash = *(const lvar_hash_t *)ik;
507
508         lvar_start(leaf);
509         for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
510                 lvar_hash_t scan_hash;
511
512                 scan_hash = e_hash(scan);
513                 if (scan_hash > hash)
514                         return scan == n_start(leaf) ?
515                                 IAM_LOOKUP_BEFORE : IAM_LOOKUP_OK;
516                 leaf->il_at = lvar_lentry(scan);
517                 if (scan_hash == hash)
518                         return IAM_LOOKUP_EXACT;
519         }
520         assert_inv(n_invariant(leaf));
521         /*
522          * @ik is greater than any key in the node. Return last record in the
523          * node.
524          */
525         return IAM_LOOKUP_OK;
526 }
527
528 static void __lvar_key_set(struct iam_leaf *l, const struct iam_key *k)
529 {
530         memcpy(e_key(n_cur(l)), k, e_keysize(n_cur(l)));
531 }
532
533 static void lvar_key_set(struct iam_leaf *l, const struct iam_key *k)
534 {
535         assert_corr(n_at_rec(l));
536         assert_corr(strlen(kchar(k)) == e_keysize(n_cur(l)));
537         assert_corr(iam_leaf_is_locked(l));
538         __lvar_key_set(l, k);
539         assert_inv(n_invariant(l));
540 }
541
542 static int lvar_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
543 {
544         lvar_hash_t hash;
545         const char *name;
546
547         name = kchar(k);
548
549         hash = get_hash(iam_leaf_container(l), name, strlen(name));
550         return e_cmp(l, n_cur(l), hash);
551 }
552
553 static int lvar_key_eq(const struct iam_leaf *l, const struct iam_key *k)
554 {
555         const char *name;
556
557         name = kchar(k);
558         return e_eq(n_cur(l), name, strlen(name));
559 }
560
561 static void __lvar_rec_set(struct iam_leaf *l, const struct iam_rec *r)
562 {
563         memcpy(e_rec(n_cur(l)), r, rec_size(r));
564 }
565
566 static void lvar_rec_set(struct iam_leaf *l, const struct iam_rec *r)
567 {
568         assert_corr(n_at_rec(l));
569         assert_corr(iam_leaf_is_locked(l));
570         __lvar_rec_set(l, r);
571         assert_inv(n_invariant(l));
572 }
573
574 static void lvar_rec_get(const struct iam_leaf *l, struct iam_rec *r)
575 {
576         struct iam_rec *rec;
577
578         rec = e_rec(n_cur(l));
579         assert_corr(n_at_rec(l));
580         assert_corr(iam_leaf_is_locked(l));
581         memcpy(r, rec, rec_size(rec));
582         assert_inv(n_invariant(l));
583 }
584
585 static int lvar_can_add(const struct iam_leaf *l,
586                         const struct iam_key *k, const struct iam_rec *r)
587 {
588         assert_corr(iam_leaf_is_locked(l));
589         return
590                 h_used(n_head(l)) +
591                 getsize(l, strlen(kchar(k)), rec_size(r)) <= blocksize(l);
592 }
593
594 static int lvar_at_end(const struct iam_leaf *folio)
595 {
596         assert_corr(iam_leaf_is_locked(folio));
597         return n_cur(folio) == n_end(folio);
598 }
599
600 static void lvar_rec_add(struct iam_leaf *leaf,
601                          const struct iam_key *k, const struct iam_rec *r)
602 {
603         const char *key;
604         int   ksize;
605         int   shift;
606         void *end;
607         void *start;
608         ptrdiff_t diff;
609
610         assert_corr(lvar_can_add(leaf, k, r));
611         assert_inv(n_invariant(leaf));
612         assert_corr(iam_leaf_is_locked(leaf));
613
614         key   = kchar(k);
615         ksize = strlen(key);
616         shift = getsize(leaf, ksize, rec_size(r));
617
618         if (!lvar_at_end(leaf)) {
619                 assert_corr(n_cur(leaf) < n_end(leaf));
620                 end = n_end(leaf);
621                 if (lvar_key_cmp(leaf, k) <= 0)
622                         lvar_next(leaf);
623                 else
624                         /*
625                          * Another exceptional case: insertion with the key
626                          * less than least key in the leaf.
627                          */
628                         assert_corr(leaf->il_at == leaf->il_entries);
629
630                 start = leaf->il_at;
631                 diff  = PDIFF(end, start);
632                 assert_corr(diff >= 0);
633                 memmove(start + shift, start, diff);
634         }
635         h_used_adj(leaf, n_head(leaf), shift);
636         n_cur(leaf)->vle_keysize = cpu_to_le16(ksize);
637         n_cur(leaf)->vle_hash = cpu_to_le32(get_hash(iam_leaf_container(leaf),
638                                                      key, ksize));
639         __lvar_key_set(leaf, k);
640         __lvar_rec_set(leaf, r);
641         assert_corr(n_at_rec(leaf));
642         assert_inv(n_invariant(leaf));
643 }
644
645 static void lvar_rec_del(struct iam_leaf *leaf, int shift)
646 {
647         void *next;
648         void *end;
649         int nob;
650
651         assert_corr(n_at_rec(leaf));
652         assert_inv(n_invariant(leaf));
653         assert_corr(iam_leaf_is_locked(leaf));
654
655         end  = n_end(leaf);
656         next = e_next(leaf, n_cur(leaf));
657         nob  = e_size(leaf, n_cur(leaf));
658         memmove(leaf->il_at, next, end - next);
659         h_used_adj(leaf, n_head(leaf), -nob);
660         assert_inv(n_invariant(leaf));
661 }
662
663 static void lvar_init_new(struct iam_container *c, struct buffer_head *bh)
664 {
665         struct lvar_leaf_header *hdr;
666
667         hdr = (struct lvar_leaf_header *)bh->b_data;
668         hdr->vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC);
669         hdr->vlh_used  = sizeof *hdr;
670 }
671
672 static struct lvar_leaf_entry *find_pivot(const struct iam_leaf *leaf,
673                                           struct lvar_leaf_entry **prev)
674 {
675         void *scan;
676         void *start;
677         int threshold;
678
679         *prev = NULL;
680         threshold = blocksize(leaf) / 2;
681         for (scan = start = n_start(leaf); scan - start <= threshold;
682              *prev = scan, scan = e_next(leaf, scan)) {
683                 ;
684         }
685         return scan;
686 }
687
688 static void lvar_split(struct iam_leaf *leaf, struct buffer_head **bh,
689                        iam_ptr_t new_blknr)
690 {
691         struct lvar_leaf_entry  *first_to_move;
692         struct lvar_leaf_entry  *last_to_stay;
693         struct iam_path         *path;
694         struct lvar_leaf_header *hdr;
695         struct buffer_head      *new_leaf;
696
697         ptrdiff_t   tomove;
698         lvar_hash_t hash;
699
700         assert_inv(n_invariant(leaf));
701         assert_corr(iam_leaf_is_locked(leaf));
702
703         new_leaf = *bh;
704         path = iam_leaf_path(leaf);
705
706         hdr = (void *)new_leaf->b_data;
707
708         first_to_move = find_pivot(leaf, &last_to_stay);
709         assert_corr(last_to_stay != NULL);
710         assert_corr(e_next(leaf, last_to_stay) == first_to_move);
711
712         hash = e_hash(first_to_move);
713         if (hash == e_hash(last_to_stay))
714                 /*
715                  * Duplicate hash.
716                  */
717                 hash |= 1;
718
719         tomove = PDIFF(n_end(leaf), first_to_move);
720         memmove(hdr + 1, first_to_move, tomove);
721
722         h_used_adj(leaf, hdr, tomove);
723         h_used_adj(leaf, n_head(leaf), -tomove);
724
725         assert_corr(n_end(leaf) == first_to_move);
726
727         if (n_cur(leaf) >= first_to_move) {
728                 /*
729                  * insertion point moves into new leaf.
730                  */
731                 ptrdiff_t shift;
732                 int result;
733
734                 shift = PDIFF(leaf->il_at, first_to_move);
735                 *bh = leaf->il_bh;
736                 leaf->il_bh = new_leaf;
737                 leaf->il_curidx = new_blknr;
738
739                 assert_corr(iam_leaf_is_locked(leaf));
740                 result = lvar_init(leaf);
741                 /*
742                  * init cannot fail, as node was just initialized.
743                  */
744                 assert_corr(result == 0);
745                 leaf->il_at = ((void *)leaf->il_at) + shift;
746         }
747         /*
748          * Insert pointer to the new node (together with the least key in
749          * the node) into index node.
750          */
751         iam_insert_key_lock(path, path->ip_frame, (struct iam_ikey *)&hash,
752                             new_blknr);
753         assert_corr(n_cur(leaf) < n_end(leaf));
754         assert_inv(n_invariant(leaf));
755 }
756
757 static struct iam_leaf_operations lvar_leaf_ops = {
758         .init           = lvar_init,
759         .init_new       = lvar_init_new,
760         .fini           = lvar_fini,
761         .start          = lvar_start,
762         .next           = lvar_next,
763         .key            = lvar_key,
764         .ikey           = lvar_ikey,
765         .rec            = lvar_rec,
766         .key_set        = lvar_key_set,
767         .key_cmp        = lvar_key_cmp,
768         .key_eq         = lvar_key_eq,
769         .key_size       = lvar_key_size,
770         .rec_set        = lvar_rec_set,
771         .rec_get        = lvar_rec_get,
772         .lookup         = lvar_lookup,
773         .ilookup        = lvar_ilookup,
774         .at_end         = lvar_at_end,
775         .rec_add        = lvar_rec_add,
776         .rec_del        = lvar_rec_del,
777         .can_add        = lvar_can_add,
778         .split          = lvar_split
779 };
780
781 /*
782  * Index operations.
783  */
784
785 enum {
786         /* This is duplicated in lustre/utils/create_iam.c */
787         /* egrep -i '^o?x?[olabcdef]*$' /usr/share/dict/words */
788         IAM_LVAR_ROOT_MAGIC = 0xb01dface
789 };
790
791 /* This is duplicated in lustre/utils/create_iam.c */
792 struct lvar_root {
793         __le32 vr_magic;
794         __le16 vr_recsize;
795         __le16 vr_ptrsize;
796         u8     vr_indirect_levels;
797         u8     vr_padding0;
798         __le16 vr_padding1;
799 };
800
801 static __u32 lvar_root_ptr(struct iam_container *c)
802 {
803         return 0;
804 }
805
806 static int lvar_node_init(struct iam_container *c, struct buffer_head *bh,
807                           int root)
808 {
809         return 0;
810 }
811
812 static struct iam_entry *lvar_root_inc(struct iam_container *c,
813                                        struct iam_path *path,
814                                        struct iam_frame *frame)
815 {
816         struct lvar_root *root;
817         struct iam_entry *entries;
818
819         assert_corr(iam_frame_is_locked(path, frame));
820         entries = frame->entries;
821
822         dx_set_count(entries, 2);
823         assert_corr(dx_get_limit(entries) == dx_root_limit(path));
824
825         root = (void *)frame->bh->b_data;
826         assert_corr(le64_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC);
827         root->vr_indirect_levels ++;
828         frame->at = entries = iam_entry_shift(path, entries, 1);
829         memset(iam_ikey_at(path, entries), 0,
830                iam_path_descr(path)->id_ikey_size);
831         return entries;
832 }
833
834 static int lvar_node_check(struct iam_path *path, struct iam_frame *frame)
835 {
836         unsigned count;
837         unsigned limit;
838         unsigned limit_correct;
839         struct iam_entry *entries;
840
841         entries = dx_node_get_entries(path, frame);
842
843         if (frame == path->ip_frames) {
844                 struct lvar_root *root;
845
846                 root = (void *)frame->bh->b_data;
847                 if (le64_to_cpu(root->vr_magic) != IAM_LVAR_ROOT_MAGIC)
848                         return -EIO;
849                 limit_correct = dx_root_limit(path);
850         } else
851                 limit_correct = dx_node_limit(path);
852         count = dx_get_count(entries);
853         limit = dx_get_limit(entries);
854         if (count > limit)
855                 return -EIO;
856         if (limit != limit_correct)
857                 return -EIO;
858         return 0;
859 }
860
861 static int lvar_node_load(struct iam_path *path, struct iam_frame *frame)
862 {
863         struct iam_entry *entries;
864         void *data;
865         entries = dx_node_get_entries(path, frame);
866
867         data = frame->bh->b_data;
868
869         if (frame == path->ip_frames) {
870                 struct lvar_root *root;
871                 const char *name;
872
873                 root = data;
874                 name = kchar(path->ip_key_target);
875                 path->ip_indirect = root->vr_indirect_levels;
876                 if (path->ip_ikey_target == NULL) {
877                         path->ip_ikey_target = iam_path_ikey(path, 4);
878                         *(lvar_hash_t *)path->ip_ikey_target =
879                                 get_hash(path->ip_container, name,
880                                          strlen(name));
881                 }
882         }
883         frame->entries = frame->at = entries;
884         return 0;
885 }
886
887 static int lvar_ikeycmp(const struct iam_container *c,
888                         const struct iam_ikey *k1, const struct iam_ikey *k2)
889 {
890         lvar_hash_t p1 = le32_to_cpu(*(lvar_hash_t *)k1);
891         lvar_hash_t p2 = le32_to_cpu(*(lvar_hash_t *)k2);
892
893         return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
894 }
895
896 static struct iam_path_descr *lvar_ipd_alloc(const struct iam_container *c,
897                                              void *area)
898 {
899         return iam_ipd_alloc(area, c->ic_descr->id_ikey_size);
900 }
901
902 static void lvar_root(void *buf,
903                       int blocksize, int keysize, int ptrsize, int recsize)
904 {
905         struct lvar_root *root;
906         struct dx_countlimit *limit;
907         void                 *entry;
908         int isize;
909
910         isize = sizeof(lvar_hash_t) + ptrsize;
911         root = buf;
912         *root = (typeof(*root)) {
913                 .vr_magic            = cpu_to_le32(IAM_LVAR_ROOT_MAGIC),
914                 .vr_recsize          = cpu_to_le16(recsize),
915                 .vr_ptrsize          = cpu_to_le16(ptrsize),
916                 .vr_indirect_levels  = 0
917         };
918
919         limit = (void *)(root + 1);
920         *limit = (typeof(*limit)){
921                 /*
922                  * limit itself + one pointer to the leaf.
923                  */
924                 .count = cpu_to_le16(2),
925                 .limit = iam_root_limit(sizeof(struct lvar_root), blocksize,
926                                         sizeof (lvar_hash_t) + ptrsize)
927         };
928
929         entry = root + 1;
930         /*
931          * Skip over @limit.
932          */
933         entry += isize;
934
935         /*
936          * Entry format is <key> followed by <ptr>. In the minimal tree
937          * consisting of a root and single node, <key> is a minimal possible
938          * key.
939          */
940         *(lvar_hash_t *)entry = 0;
941         entry += sizeof(lvar_hash_t);
942         /* now @entry points to <ptr> */
943         if (ptrsize == 4)
944                 *(u_int32_t *)entry = cpu_to_le32(1);
945         else
946                 *(u_int64_t *)entry = cpu_to_le64(1);
947 }
948
949 static int lvar_esize(int namelen, int recsize)
950 {
951         return (offsetof(struct lvar_leaf_entry, vle_key) +
952                 namelen + recsize + LVAR_ROUND) & ~LVAR_ROUND;
953 }
954
955 static void lvar_leaf(void *buf,
956                       int blocksize, int keysize, int ptrsize, int recsize)
957 {
958         struct lvar_leaf_header *head;
959         struct lvar_leaf_entry  *entry;
960
961         /* form leaf */
962         head = buf;
963         *head = (typeof(*head)) {
964                 .vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC),
965                 .vlh_used  = cpu_to_le16(sizeof *head + lvar_esize(0, recsize))
966         };
967         entry = (void *)(head + 1);
968         *entry = (typeof(*entry)) {
969                 .vle_hash    = 0,
970                 .vle_keysize = 0
971         };
972         memset(e_rec(entry), 0, recsize);
973         *(char *)e_rec(entry) = recsize;
974 }
975
976 int iam_lvar_create(struct inode *obj,
977                     int keysize, int ptrsize, int recsize, handle_t *handle)
978 {
979         struct buffer_head *root_node;
980         struct buffer_head *leaf_node;
981         struct super_block *sb;
982
983         u32 blknr;
984         int result;
985         unsigned long bsize;
986
987         assert_corr(obj->i_size == 0);
988
989         sb = obj->i_sb;
990         bsize = sb->s_blocksize;
991         root_node = ldiskfs_append(handle, obj, &blknr, &result);
992         leaf_node = ldiskfs_append(handle, obj, &blknr, &result);
993         if (root_node != NULL && leaf_node != NULL) {
994                 lvar_root(root_node->b_data, bsize, keysize, ptrsize, recsize);
995                 lvar_leaf(leaf_node->b_data, bsize, keysize, ptrsize, recsize);
996                 ldiskfs_mark_inode_dirty(handle, obj);
997                 result = ldiskfs_journal_dirty_metadata(handle, root_node);
998                 if (result == 0)
999                         result = ldiskfs_journal_dirty_metadata(handle, leaf_node);
1000                 if (result != 0)
1001                         ldiskfs_std_error(sb, result);
1002         }
1003         brelse(leaf_node);
1004         brelse(root_node);
1005         return result;
1006 }
1007 EXPORT_SYMBOL(iam_lvar_create);
1008
1009 static struct iam_operations lvar_ops = {
1010         .id_root_ptr    = lvar_root_ptr,
1011         .id_node_read   = iam_node_read,
1012         .id_node_init   = lvar_node_init,
1013         .id_node_check  = lvar_node_check,
1014         .id_node_load   = lvar_node_load,
1015         .id_ikeycmp     = lvar_ikeycmp,
1016         .id_root_inc    = lvar_root_inc,
1017         .id_ipd_alloc   = lvar_ipd_alloc,
1018         .id_ipd_free    = iam_ipd_free,
1019         .id_name        = "lvar"
1020 };
1021
1022 static int lvar_guess(struct iam_container *c)
1023 {
1024         int result;
1025         struct buffer_head *bh;
1026         const struct lvar_root *root;
1027
1028         assert_corr(c->ic_object != NULL);
1029
1030         result = iam_node_read(c, lvar_root_ptr(c), NULL, &bh);
1031         if (result == 0) {
1032                 root = (void *)bh->b_data;
1033                 if (le64_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC) {
1034                         struct iam_descr *descr;
1035
1036                         descr = c->ic_descr;
1037                         descr->id_key_size  = LDISKFS_NAME_LEN;
1038                         descr->id_ikey_size = sizeof (lvar_hash_t);
1039                         descr->id_rec_size  = le16_to_cpu(root->vr_recsize);
1040                         descr->id_ptr_size  = le16_to_cpu(root->vr_ptrsize);
1041                         descr->id_root_gap  = sizeof *root;
1042                         descr->id_node_gap  = 0;
1043                         descr->id_ops       = &lvar_ops;
1044                         descr->id_leaf_ops  = &lvar_leaf_ops;
1045                 } else
1046                         result = -EBADF;
1047                 brelse(bh);
1048         }
1049         return result;
1050 }
1051
1052 static struct iam_format lvar_format = {
1053         .if_guess = lvar_guess
1054 };
1055
1056 void iam_lvar_format_init(void)
1057 {
1058         iam_format_register(&lvar_format);
1059 }
1060