Whamcloud - gitweb
cab85754326cba39e2fc263cf255f0136d89f6e3
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_iam_lfix.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * iam_lfix.c
33  * implementation of iam format for fixed size records.
34  *
35  * Author: Wang Di <wangdi@clusterfs.com>
36  * Author: Nikita Danilov <nikita@clusterfs.com>
37  */
38
39 #include <linux/types.h>
40 #include "osd_internal.h"
41
42 /*
43  * Leaf operations.
44  */
45
46 enum {
47         IAM_LEAF_HEADER_MAGIC = 0x1976 /* This is duplicated in
48                                         * lustre/utils/create_iam.c */
49 };
50
51 /* This is duplicated in lustre/utils/create_iam.c */
52 struct iam_leaf_head {
53         __le16 ill_magic;
54         __le16 ill_count;
55 };
56
57 static inline int iam_lfix_entry_size(const struct iam_leaf *l)
58 {
59         return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
60 }
61
62 static inline struct iam_lentry *
63 iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
64 {
65         return (void *)entry + shift * iam_lfix_entry_size(l);
66 }
67
68 static inline struct iam_key *iam_leaf_key_at(struct iam_lentry *entry)
69 {
70         return (struct iam_key *)entry;
71 }
72
73 static inline int lfix_keycmp(const struct iam_container *c,
74                               const struct iam_key *k1,
75                               const struct iam_key *k2)
76 {
77         return memcmp(k1, k2, c->ic_descr->id_key_size);
78 }
79
80 static struct iam_leaf_head *iam_get_head(const struct iam_leaf *l)
81 {
82         return (struct iam_leaf_head *)l->il_bh->b_data;
83 }
84
85 static struct iam_lentry *iam_entries(const struct buffer_head *bh)
86 {
87         return (void *)bh->b_data + sizeof(struct iam_leaf_head);
88 }
89
90 static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
91 {
92         return iam_entries(l->il_bh);
93 }
94
95 static int leaf_count_limit(const struct iam_leaf *leaf)
96 {
97         int free_space;
98
99         free_space = iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
100         free_space -= sizeof(struct iam_leaf_head);
101         return free_space / iam_lfix_entry_size(leaf);
102 }
103
104 static int lentry_count_get(const struct iam_leaf *leaf)
105 {
106         return le16_to_cpu(iam_get_head(leaf)->ill_count);
107 }
108
109 static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
110 {
111         assert_corr(0 <= count && count <= leaf_count_limit(leaf));
112         iam_get_head(leaf)->ill_count = cpu_to_le16(count);
113 }
114
115 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l);
116
117 #if LDISKFS_CORRECTNESS_ON || LDISKFS_INVARIANT_ON
118 static int iam_leaf_at_rec(const struct iam_leaf *folio)
119 {
120         return
121                 iam_get_lentries(folio) <= folio->il_at &&
122                 folio->il_at < iam_lfix_get_end(folio);
123 }
124 #endif
125
126 static struct iam_ikey *iam_lfix_ikey(const struct iam_leaf *l,
127                                       struct iam_ikey *key)
128 {
129         void *ie = l->il_at;
130         assert_corr(iam_leaf_at_rec(l));
131         return (struct iam_ikey*)ie;
132 }
133
134 static struct iam_key *iam_lfix_key(const struct iam_leaf *l)
135 {
136         void *ie = l->il_at;
137         assert_corr(iam_leaf_at_rec(l));
138         return (struct iam_key*)ie;
139 }
140
141 static int iam_lfix_key_size(const struct iam_leaf *l)
142 {
143         return iam_leaf_descr(l)->id_key_size;
144 }
145
146 static void iam_lfix_start(struct iam_leaf *l)
147 {
148         l->il_at = iam_get_lentries(l);
149 }
150
151 static inline ptrdiff_t iam_lfix_diff(const struct iam_leaf *l,
152                                       const struct iam_lentry *e1,
153                                       const struct iam_lentry *e2)
154 {
155         ptrdiff_t diff;
156         int esize;
157
158         esize = iam_lfix_entry_size(l);
159         diff = (void *)e1 - (void *)e2;
160         assert_corr(diff / esize * esize == diff);
161         return diff / esize;
162 }
163
164 static int iam_lfix_init(struct iam_leaf *l)
165 {
166         int result;
167         struct iam_leaf_head *ill;
168         int count;
169
170         assert_corr(l->il_bh != NULL);
171
172         ill = iam_get_head(l);
173         count = le16_to_cpu(ill->ill_count);
174         if (le16_to_cpu(ill->ill_magic) == IAM_LEAF_HEADER_MAGIC &&
175             0 <= count && count <= leaf_count_limit(l)) {
176                 l->il_at = l->il_entries = iam_get_lentries(l);
177                 result = 0;
178         } else {
179                 struct inode *obj;
180
181                 obj = iam_leaf_container(l)->ic_object;
182                 CERROR("Wrong magic in node %llu (#%lu): %#x != %#x or "
183                        "wrong count: %d (%d)\n",
184                        (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
185                        le16_to_cpu(ill->ill_magic), IAM_LEAF_HEADER_MAGIC,
186                        count, leaf_count_limit(l));
187                 result = -EIO;
188         }
189         return result;
190 }
191
192 static void iam_lfix_fini(struct iam_leaf *l)
193 {
194         l->il_entries = l->il_at = NULL;
195 }
196
197 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
198 {
199         int count = lentry_count_get(l);
200         struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
201
202         return ile;
203 }
204
205 static struct iam_rec *iam_lfix_rec(const struct iam_leaf *l)
206 {
207         void *e = l->il_at;
208         assert_corr(iam_leaf_at_rec(l));
209         return e + iam_leaf_descr(l)->id_key_size;
210 }
211
212 static void iam_lfix_next(struct iam_leaf *l)
213 {
214         assert_corr(iam_leaf_at_rec(l));
215         l->il_at = iam_lfix_shift(l, l->il_at, 1);
216 }
217
218 /*
219  * Bug chasing.
220  */
221 int lfix_dump = 0;
222
223 static char hdigit(char ch)
224 {
225         static char d[] = "0123456789abcdef";
226         return d[ch & 0xf];
227 }
228
229 static char *hex(char ch, char *area)
230 {
231         area[0] = hdigit(ch >> 4);
232         area[1] = hdigit(ch);
233         area[2] = 0;
234         return area;
235 }
236
237 static void l_print(struct iam_leaf *leaf, struct iam_lentry *entry)
238 {
239         int i;
240         char *area;
241         char h[3];
242
243         area = (char *)entry;
244         printk(KERN_EMERG "[");
245         for (i = iam_lfix_key_size(leaf); i > 0; --i, ++area)
246                 printk("%s", hex(*area, h));
247         printk("]-(");
248         for (i = iam_leaf_descr(leaf)->id_rec_size; i > 0; --i, ++area)
249                 printk("%s", hex(*area, h));
250         printk(")\n");
251 }
252
253 static void lfix_print(struct iam_leaf *leaf)
254 {
255         struct iam_lentry *entry;
256         int count;
257         int i;
258
259         entry = leaf->il_entries;
260         count = lentry_count_get(leaf);
261         printk(KERN_EMERG "lfix: %p %p %d\n", leaf, leaf->il_at, count);
262         for (i = 0; i < count; ++i, entry = iam_lfix_shift(leaf, entry, 1))
263                 l_print(leaf, entry);
264 }
265
266 static int iam_lfix_lookup(struct iam_leaf *l, const struct iam_key *k)
267 {
268         struct iam_lentry *p, *q, *m, *t;
269         struct iam_container *c;
270         int count;
271         int result;
272
273         count = lentry_count_get(l);
274         if (count == 0)
275                 return IAM_LOOKUP_EMPTY;
276
277         result = IAM_LOOKUP_OK;
278         c = iam_leaf_container(l);
279
280         p = l->il_entries;
281         q = iam_lfix_shift(l, p, count - 1);
282         if (lfix_keycmp(c, k, iam_leaf_key_at(p)) < 0) {
283                 /*
284                  * @k is less than the least key in the leaf
285                  */
286                 l->il_at = p;
287                 result = IAM_LOOKUP_BEFORE;
288         } else if (lfix_keycmp(c, iam_leaf_key_at(q), k) <= 0) {
289                 l->il_at = q;
290         } else {
291                 /*
292                  * EWD1293
293                  */
294                 while (iam_lfix_shift(l, p, 1) != q) {
295                         m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
296                         assert_corr(p < m && m < q);
297                         if (lfix_keycmp(c, iam_leaf_key_at(m), k) <= 0)
298                                 p = m;
299                         else
300                                 q = m;
301                 }
302                 assert_corr(lfix_keycmp(c, iam_leaf_key_at(p), k) <= 0 &&
303                             lfix_keycmp(c, k, iam_leaf_key_at(q)) < 0);
304                 /*
305                  * skip over records with duplicate keys.
306                  */
307                 while (p > l->il_entries) {
308                         t = iam_lfix_shift(l, p, -1);
309                         if (lfix_keycmp(c, iam_leaf_key_at(t), k) == 0)
310                                 p = t;
311                         else
312                                 break;
313                 }
314                 l->il_at = p;
315         }
316         assert_corr(iam_leaf_at_rec(l));
317
318         if (lfix_keycmp(c, iam_leaf_key_at(l->il_at), k) == 0)
319                 result = IAM_LOOKUP_EXACT;
320
321         if (lfix_dump)
322                 lfix_print(l);
323
324         return result;
325 }
326
327 static int iam_lfix_ilookup(struct iam_leaf *l, const struct iam_ikey *ik)
328 {
329         return iam_lfix_lookup(l, (const struct iam_key *)ik);
330 }
331
332 static void iam_lfix_key_set(struct iam_leaf *l, const struct iam_key *k)
333 {
334         assert_corr(iam_leaf_at_rec(l));
335         memcpy(iam_leaf_key_at(l->il_at), k, iam_leaf_descr(l)->id_key_size);
336 }
337
338 static int iam_lfix_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
339 {
340         return lfix_keycmp(iam_leaf_container(l), iam_leaf_key_at(l->il_at), k);
341 }
342
343 static int iam_lfix_key_eq(const struct iam_leaf *l, const struct iam_key *k)
344 {
345         return !lfix_keycmp(iam_leaf_container(l),
346                             iam_leaf_key_at(l->il_at), k);
347 }
348
349 static void iam_lfix_rec_set(struct iam_leaf *l, const struct iam_rec *r)
350 {
351         assert_corr(iam_leaf_at_rec(l));
352         memcpy(iam_lfix_rec(l), r, iam_leaf_descr(l)->id_rec_size);
353 }
354
355 static inline int lfix_reccmp(const struct iam_container *c,
356                               const struct iam_rec *r1,
357                               const struct iam_rec *r2)
358 {
359         return memcmp(r1, r2, c->ic_descr->id_rec_size);
360 }
361
362 static int iam_lfix_rec_eq(const struct iam_leaf *l, const struct iam_rec *r)
363 {
364         return !lfix_reccmp(iam_leaf_container(l), iam_lfix_rec(l), r);
365 }
366
367 static void iam_lfix_rec_get(const struct iam_leaf *l, struct iam_rec *r)
368 {
369         assert_corr(iam_leaf_at_rec(l));
370         memcpy(r, iam_lfix_rec(l), iam_leaf_descr(l)->id_rec_size);
371 }
372
373 static void iam_lfix_rec_add(struct iam_leaf *leaf,
374                              const struct iam_key *k, const struct iam_rec *r)
375 {
376         struct iam_lentry *end;
377         struct iam_lentry *cur;
378         struct iam_lentry *start;
379         ptrdiff_t diff;
380         int count;
381
382         assert_corr(iam_leaf_can_add(leaf, k, r));
383
384         count = lentry_count_get(leaf);
385         /*
386          * This branch handles two exceptional cases:
387          *
388          *   - leaf positioned beyond last record, and
389          *
390          *   - empty leaf.
391          */
392         if (!iam_leaf_at_end(leaf)) {
393                 end   = iam_lfix_get_end(leaf);
394                 cur   = leaf->il_at;
395                 if (lfix_keycmp(iam_leaf_container(leaf),
396                                k, iam_leaf_key_at(cur)) >= 0)
397                         iam_lfix_next(leaf);
398                 else
399                         /*
400                          * Another exceptional case: insertion with the key
401                          * less than least key in the leaf.
402                          */
403                         assert_corr(cur == leaf->il_entries);
404
405                 start = leaf->il_at;
406                 diff  = (void *)end - (void *)start;
407                 assert_corr(diff >= 0);
408                 memmove(iam_lfix_shift(leaf, start, 1), start, diff);
409         }
410         lentry_count_set(leaf, count + 1);
411         iam_lfix_key_set(leaf, k);
412         iam_lfix_rec_set(leaf, r);
413         assert_corr(iam_leaf_at_rec(leaf));
414 }
415
416 static void iam_lfix_rec_del(struct iam_leaf *leaf, int shift)
417 {
418         struct iam_lentry *next, *end;
419         int count;
420         ptrdiff_t diff;
421
422         assert_corr(iam_leaf_at_rec(leaf));
423
424         count = lentry_count_get(leaf);
425         end = iam_lfix_get_end(leaf);
426         next = iam_lfix_shift(leaf, leaf->il_at, 1);
427         diff = (void *)end - (void *)next;
428         memmove(leaf->il_at, next, diff);
429
430         lentry_count_set(leaf, count - 1);
431 }
432
433 static int iam_lfix_can_add(const struct iam_leaf *l,
434                             const struct iam_key *k, const struct iam_rec *r)
435 {
436         return lentry_count_get(l) < leaf_count_limit(l);
437 }
438
439 static int iam_lfix_at_end(const struct iam_leaf *folio)
440 {
441         return folio->il_at == iam_lfix_get_end(folio);
442 }
443
444 static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
445 {
446         struct iam_leaf_head *hdr;
447
448         hdr = (struct iam_leaf_head*)bh->b_data;
449         hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
450         hdr->ill_count = cpu_to_le16(0);
451 }
452
453 static void iam_lfix_split(struct iam_leaf *l, struct buffer_head **bh,
454                            iam_ptr_t new_blknr)
455 {
456         struct iam_path       *path;
457         struct iam_leaf_head  *hdr;
458         const struct iam_ikey *pivot;
459         struct buffer_head    *new_leaf;
460
461         unsigned count;
462         unsigned split;
463
464         void *start;
465         void *finis;
466
467         new_leaf = *bh;
468         path = iam_leaf_path(l);
469
470         hdr = (void *)new_leaf->b_data;
471
472         count = lentry_count_get(l);
473         split = count / 2;
474
475         start = iam_lfix_shift(l, iam_get_lentries(l), split);
476         finis = iam_lfix_shift(l, iam_get_lentries(l), count);
477
478         pivot = (const struct iam_ikey *)iam_leaf_key_at(start);
479
480         memmove(iam_entries(new_leaf), start, finis - start);
481         hdr->ill_count = cpu_to_le16(count - split);
482         lentry_count_set(l, split);
483         if ((void *)l->il_at >= start) {
484                 /*
485                  * insertion point moves into new leaf.
486                  */
487                 int shift;
488                 int result;
489
490                 shift = iam_lfix_diff(l, l->il_at, start);
491                 *bh = l->il_bh;
492                 l->il_bh = new_leaf;
493                 l->il_curidx = new_blknr;
494                 result = iam_lfix_init(l);
495                 /*
496                  * init cannot fail, as node was just initialized.
497                  */
498                 assert_corr(result == 0);
499                 l->il_at = iam_lfix_shift(l, iam_get_lentries(l), shift);
500         }
501         /*
502          * Insert pointer to the new node (together with the least key in
503          * the node) into index node.
504          */
505         iam_insert_key_lock(path, path->ip_frame, pivot, new_blknr);
506 }
507
508 static int iam_lfix_leaf_empty(struct iam_leaf *leaf)
509 {
510         return lentry_count_get(leaf) == 0;
511 }
512
513 static struct iam_leaf_operations iam_lfix_leaf_ops = {
514         .init           = iam_lfix_init,
515         .init_new       = iam_lfix_init_new,
516         .fini           = iam_lfix_fini,
517         .start          = iam_lfix_start,
518         .next           = iam_lfix_next,
519         .key            = iam_lfix_key,
520         .ikey           = iam_lfix_ikey,
521         .rec            = iam_lfix_rec,
522         .key_set        = iam_lfix_key_set,
523         .key_cmp        = iam_lfix_key_cmp,
524         .key_eq         = iam_lfix_key_eq,
525         .key_size       = iam_lfix_key_size,
526         .rec_set        = iam_lfix_rec_set,
527         .rec_eq         = iam_lfix_rec_eq,
528         .rec_get        = iam_lfix_rec_get,
529         .lookup         = iam_lfix_lookup,
530         .ilookup        = iam_lfix_ilookup,
531         .at_end         = iam_lfix_at_end,
532         .rec_add        = iam_lfix_rec_add,
533         .rec_del        = iam_lfix_rec_del,
534         .can_add        = iam_lfix_can_add,
535         .split          = iam_lfix_split,
536         .leaf_empty     = iam_lfix_leaf_empty,
537 };
538
539 /*
540  * Index operations.
541  */
542
543 enum {
544         /* This is duplicated in lustre/utils/create_iam.c */
545         /*
546          * Then shalt thou see the dew-BEDABBLED wretch
547          * Turn, and return, indenting with the way;
548          * Each envious brier his weary legs doth scratch,
549          * Each shadow makes him stop, each murmur stay:
550          * For misery is trodden on by many,
551          * And being low never relieved by any.
552          */
553         IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL // d01efull
554 };
555
556 /* This is duplicated in lustre/utils/create_iam.c */
557 struct iam_lfix_root {
558         __le64  ilr_magic;
559         __le16  ilr_keysize;
560         __le16  ilr_recsize;
561         __le16  ilr_ptrsize;
562         u8      ilr_indirect_levels;
563         u8      ilr_padding;
564 };
565
566 static __u32 iam_lfix_root_ptr(struct iam_container *c)
567 {
568         return 0;
569 }
570
571 static int iam_lfix_node_init(struct iam_container *c, struct buffer_head *bh,
572                               int root)
573 {
574         return 0;
575 }
576
577 static struct iam_entry *iam_lfix_root_inc(struct iam_container *c,
578                                            struct iam_path *path,
579                                            struct iam_frame *frame)
580 {
581         struct iam_lfix_root *root;
582         struct iam_entry     *entries;
583
584         entries = frame->entries;
585
586         dx_set_count(entries, 2);
587         assert_corr(dx_get_limit(entries) == dx_root_limit(path));
588
589         root = (void *)frame->bh->b_data;
590         assert_corr(le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC);
591         root->ilr_indirect_levels ++;
592         frame->at = entries = iam_entry_shift(path, entries, 1);
593         memset(iam_ikey_at(path, entries), 0,
594                iam_path_descr(path)->id_ikey_size);
595         return entries;
596 }
597
598 static int iam_lfix_node_check(struct iam_path *path, struct iam_frame *frame)
599 {
600         unsigned count;
601         unsigned limit;
602         unsigned limit_correct;
603         struct iam_entry *entries;
604
605         entries = dx_node_get_entries(path, frame);
606
607         if (frame == path->ip_frames) {
608                 struct iam_lfix_root *root;
609
610                 root = (void *)frame->bh->b_data;
611                 if (le64_to_cpu(root->ilr_magic) != IAM_LFIX_ROOT_MAGIC) {
612                         return -EIO;
613                 }
614                 limit_correct = dx_root_limit(path);
615         } else
616                 limit_correct = dx_node_limit(path);
617         count = dx_get_count(entries);
618         limit = dx_get_limit(entries);
619         if (count > limit) {
620                 return -EIO;
621         }
622         if (limit != limit_correct) {
623                 return -EIO;
624         }
625         return 0;
626 }
627
628 static int iam_lfix_node_load(struct iam_path *path, struct iam_frame *frame)
629 {
630         struct iam_entry *entries;
631         void *data;
632         entries = dx_node_get_entries(path, frame);
633
634         data = frame->bh->b_data;
635
636         if (frame == path->ip_frames) {
637                 struct iam_lfix_root *root;
638
639                 root = data;
640                 path->ip_indirect = root->ilr_indirect_levels;
641                 if (path->ip_ikey_target == NULL)
642                         path->ip_ikey_target =
643                                 (struct iam_ikey *)path->ip_key_target;
644         }
645         frame->entries = frame->at = entries;
646         return 0;
647 }
648
649 static int iam_lfix_ikeycmp(const struct iam_container *c,
650                             const struct iam_ikey *k1,
651                             const struct iam_ikey *k2)
652 {
653         return memcmp(k1, k2, c->ic_descr->id_ikey_size);
654 }
655
656 static struct iam_path_descr *iam_lfix_ipd_alloc(const struct iam_container *c,
657                                                  void *area)
658 {
659         return iam_ipd_alloc(area, c->ic_descr->id_ikey_size);
660 }
661
662 static struct iam_operations iam_lfix_ops = {
663         .id_root_ptr    = iam_lfix_root_ptr,
664         .id_node_read   = iam_node_read,
665         .id_node_init   = iam_lfix_node_init,
666         .id_node_check  = iam_lfix_node_check,
667         .id_node_load   = iam_lfix_node_load,
668         .id_ikeycmp     = iam_lfix_ikeycmp,
669         .id_root_inc    = iam_lfix_root_inc,
670         .id_ipd_alloc   = iam_lfix_ipd_alloc,
671         .id_ipd_free    = iam_ipd_free,
672         .id_name        = "lfix"
673 };
674
675 static int iam_lfix_guess(struct iam_container *c)
676 {
677         int result;
678         struct buffer_head *bh;
679         const struct iam_lfix_root *root;
680
681         assert_corr(c->ic_object != NULL);
682
683         result = iam_node_read(c, iam_lfix_root_ptr(c), NULL, &bh);
684         if (result == 0) {
685                 root = (void *)bh->b_data;
686                 if (le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC) {
687                         struct iam_descr *descr;
688
689                         descr = c->ic_descr;
690                         descr->id_key_size  = le16_to_cpu(root->ilr_keysize);
691                         descr->id_ikey_size = le16_to_cpu(root->ilr_keysize);
692                         descr->id_rec_size  = le16_to_cpu(root->ilr_recsize);
693                         descr->id_ptr_size  = le16_to_cpu(root->ilr_ptrsize);
694                         descr->id_root_gap  = sizeof(struct iam_lfix_root);
695                         descr->id_node_gap  = 0;
696                         descr->id_ops       = &iam_lfix_ops;
697                         descr->id_leaf_ops  = &iam_lfix_leaf_ops;
698
699                         c->ic_root_bh = bh;
700                 } else {
701                         result = -EBADF;
702                         brelse(bh);
703                 }
704         }
705         return result;
706 }
707
708 static struct iam_format iam_lfix_format = {
709         .if_guess = iam_lfix_guess
710 };
711
712 void iam_lfix_format_init(void)
713 {
714         iam_format_register(&iam_lfix_format);
715 }
716
717 /*
718  * Debugging aid.
719  */
720
721 #define KEYSIZE (8)
722 #define RECSIZE (8)
723 #define PTRSIZE (4)
724
725 #define LFIX_ROOT_RECNO \
726         ((4096 - sizeof(struct iam_lfix_root)) / (KEYSIZE + PTRSIZE))
727
728 #define LFIX_INDEX_RECNO (4096 / (KEYSIZE + PTRSIZE))
729
730 #define LFIX_LEAF_RECNO \
731         ((4096 - sizeof(struct iam_leaf_head)) / (KEYSIZE + RECSIZE))
732
733 struct lfix_root {
734         struct iam_lfix_root lr_root;
735         struct {
736                 char key[KEYSIZE];
737                 char ptr[PTRSIZE];
738         } lr_entry[LFIX_ROOT_RECNO];
739 };
740
741 struct lfix_index {
742         struct dx_countlimit li_cl;
743         char   li_padding[KEYSIZE + PTRSIZE - sizeof(struct dx_countlimit)];
744         struct {
745                 char key[KEYSIZE];
746                 char ptr[PTRSIZE];
747         } li_entry[LFIX_INDEX_RECNO - 1];
748 };
749
750 struct lfix_leaf {
751         struct iam_leaf_head ll_head;
752         struct {
753                 char key[KEYSIZE];
754                 char rec[RECSIZE];
755         } ll_entry[LFIX_LEAF_RECNO];
756 };
757
758 #define STORE_UNALIGNED(val, dst)                       \
759 ({                                                      \
760         typeof(val) __val = (val);                      \
761         CLASSERT(sizeof(val) == sizeof(*(dst)));        \
762         memcpy(dst, &__val, sizeof(*(dst)));            \
763 })
764
765 static void lfix_root(void *buf,
766                       int blocksize, int keysize, int ptrsize, int recsize)
767 {
768         struct iam_lfix_root *root;
769         struct dx_countlimit *limit;
770         void                 *entry;
771
772         root = buf;
773         *root = (typeof(*root)) {
774                 .ilr_magic           = cpu_to_le64(IAM_LFIX_ROOT_MAGIC),
775                 .ilr_keysize         = cpu_to_le16(keysize),
776                 .ilr_recsize         = cpu_to_le16(recsize),
777                 .ilr_ptrsize         = cpu_to_le16(ptrsize),
778                 .ilr_indirect_levels = 0
779         };
780
781         limit = (void *)(root + 1);
782         *limit = (typeof(*limit)){
783                 /*
784                  * limit itself + one pointer to the leaf.
785                  */
786                 .count = cpu_to_le16(2),
787                 .limit = iam_root_limit(sizeof(struct iam_lfix_root),
788                                         blocksize, keysize + ptrsize)
789         };
790
791         /* To guarantee that the padding "keysize + ptrsize"
792          * covers the "dx_countlimit" and the "idle_blocks". */
793         LASSERT((keysize + ptrsize) >=
794                 (sizeof(struct dx_countlimit) + sizeof(__u32)));
795
796         entry = (void *)(limit + 1);
797         /* Put "idle_blocks" just after the limit. There was padding after
798          * the limit, the "idle_blocks" re-uses part of the padding, so no
799          * compatibility issues with old layout.
800          */
801         *(__u32 *)entry = 0;
802
803         /*
804          * Skip over @limit.
805          */
806         entry = (void *)(root + 1) + keysize + ptrsize;
807
808         /*
809          * Entry format is <key> followed by <ptr>. In the minimal tree
810          * consisting of a root and single node, <key> is a minimal possible
811          * key.
812          *
813          * XXX: this key is hard-coded to be a sequence of 0's.
814          */
815
816         memset(entry, 0, keysize);
817         entry += keysize;
818         /* now @entry points to <ptr> */
819         if (ptrsize == 4)
820                 STORE_UNALIGNED(cpu_to_le32(1), (u_int32_t *)entry);
821         else
822                 STORE_UNALIGNED(cpu_to_le64(1), (u_int64_t *)entry);
823 }
824
825 static void lfix_leaf(void *buf,
826                       int blocksize, int keysize, int ptrsize, int recsize)
827 {
828         struct iam_leaf_head *head;
829         void *entry;
830
831         /* form leaf */
832         head = buf;
833         *head = (struct iam_leaf_head) {
834                 .ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC),
835                 /*
836                  * Leaf contains an entry with the smallest possible key
837                  * (created by zeroing).
838                  */
839                 .ill_count = cpu_to_le16(1),
840         };
841
842         entry = (void *)(head + 1);
843         memset(entry, 0, keysize + recsize);
844 }
845
846 int iam_lfix_create(struct inode *obj,
847                     int keysize, int ptrsize, int recsize, handle_t *handle)
848 {
849         struct buffer_head *root_node;
850         struct buffer_head *leaf_node;
851         struct super_block *sb;
852
853         u32 blknr;
854         int result = 0;
855         unsigned long bsize;
856
857         assert_corr(obj->i_size == 0);
858
859         sb = obj->i_sb;
860         bsize = sb->s_blocksize;
861         root_node = osd_ldiskfs_append(handle, obj, &blknr);
862         if (IS_ERR(root_node))
863                 GOTO(out, result = PTR_ERR(root_node));
864
865         leaf_node = osd_ldiskfs_append(handle, obj, &blknr);
866         if (IS_ERR(leaf_node))
867                 GOTO(out_root, result = PTR_ERR(leaf_node));
868
869         lfix_root(root_node->b_data, bsize, keysize, ptrsize, recsize);
870         lfix_leaf(leaf_node->b_data, bsize, keysize, ptrsize, recsize);
871         ldiskfs_mark_inode_dirty(handle, obj);
872         result = ldiskfs_handle_dirty_metadata(handle, NULL, root_node);
873         if (result == 0)
874                 result = ldiskfs_handle_dirty_metadata(handle, NULL, leaf_node);
875         if (result != 0)
876                 ldiskfs_std_error(sb, result);
877
878         brelse(leaf_node);
879
880         GOTO(out_root, result);
881
882 out_root:
883         brelse(root_node);
884 out:
885         return result;
886 }