Whamcloud - gitweb
db100f231f6bdd59bfddee0b92091af767d8d9ee
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_iam_lfix.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see [sun.com URL with a
18  * copy of GPLv2].
19  *
20  * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21  * CA 95054 USA or visit www.sun.com if you need additional information or
22  * have any questions.
23  *
24  * GPL HEADER END
25  */
26 /*
27  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
28  * Use is subject to license terms.
29  *
30  * Copyright (c) 2012, 2014, Intel Corporation.
31  */
32 /*
33  * This file is part of Lustre, http://www.lustre.org/
34  * Lustre is a trademark of Sun Microsystems, Inc.
35  *
36  * iam_lfix.c
37  * implementation of iam format for fixed size records.
38  *
39  * Author: Wang Di <wangdi@clusterfs.com>
40  * Author: Nikita Danilov <nikita@clusterfs.com>
41  */
42
43 #include <linux/types.h>
44 #include "osd_internal.h"
45
46 /*
47  * Leaf operations.
48  */
49
50 enum {
51         IAM_LEAF_HEADER_MAGIC = 0x1976 /* This is duplicated in
52                                         * lustre/utils/create_iam.c */
53 };
54
55 /* This is duplicated in lustre/utils/create_iam.c */
56 struct iam_leaf_head {
57         __le16 ill_magic;
58         __le16 ill_count;
59 };
60
61 static inline int iam_lfix_entry_size(const struct iam_leaf *l)
62 {
63         return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
64 }
65
66 static inline struct iam_lentry *
67 iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
68 {
69         return (void *)entry + shift * iam_lfix_entry_size(l);
70 }
71
72 static inline struct iam_key *iam_leaf_key_at(struct iam_lentry *entry)
73 {
74         return (struct iam_key *)entry;
75 }
76
77 static inline int lfix_keycmp(const struct iam_container *c,
78                               const struct iam_key *k1,
79                               const struct iam_key *k2)
80 {
81         return memcmp(k1, k2, c->ic_descr->id_key_size);
82 }
83
84 static struct iam_leaf_head *iam_get_head(const struct iam_leaf *l)
85 {
86         return (struct iam_leaf_head *)l->il_bh->b_data;
87 }
88
89 static struct iam_lentry *iam_entries(const struct buffer_head *bh)
90 {
91         return (void *)bh->b_data + sizeof(struct iam_leaf_head);
92 }
93
94 static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
95 {
96         return iam_entries(l->il_bh);
97 }
98
99 static int leaf_count_limit(const struct iam_leaf *leaf)
100 {
101         int free_space;
102
103         free_space = iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
104         free_space -= sizeof(struct iam_leaf_head);
105         return free_space / iam_lfix_entry_size(leaf);
106 }
107
108 static int lentry_count_get(const struct iam_leaf *leaf)
109 {
110         return le16_to_cpu(iam_get_head(leaf)->ill_count);
111 }
112
113 static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
114 {
115         assert_corr(0 <= count && count <= leaf_count_limit(leaf));
116         iam_get_head(leaf)->ill_count = cpu_to_le16(count);
117 }
118
119 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l);
120
121 #if LDISKFS_CORRECTNESS_ON || LDISKFS_INVARIANT_ON
122 static int iam_leaf_at_rec(const struct iam_leaf *folio)
123 {
124         return
125                 iam_get_lentries(folio) <= folio->il_at &&
126                 folio->il_at < iam_lfix_get_end(folio);
127 }
128 #endif
129
130 static struct iam_ikey *iam_lfix_ikey(const struct iam_leaf *l,
131                                       struct iam_ikey *key)
132 {
133         void *ie = l->il_at;
134         assert_corr(iam_leaf_at_rec(l));
135         return (struct iam_ikey*)ie;
136 }
137
138 static struct iam_key *iam_lfix_key(const struct iam_leaf *l)
139 {
140         void *ie = l->il_at;
141         assert_corr(iam_leaf_at_rec(l));
142         return (struct iam_key*)ie;
143 }
144
145 static int iam_lfix_key_size(const struct iam_leaf *l)
146 {
147         return iam_leaf_descr(l)->id_key_size;
148 }
149
150 static void iam_lfix_start(struct iam_leaf *l)
151 {
152         l->il_at = iam_get_lentries(l);
153 }
154
155 static inline ptrdiff_t iam_lfix_diff(const struct iam_leaf *l,
156                                       const struct iam_lentry *e1,
157                                       const struct iam_lentry *e2)
158 {
159         ptrdiff_t diff;
160         int esize;
161
162         esize = iam_lfix_entry_size(l);
163         diff = (void *)e1 - (void *)e2;
164         assert_corr(diff / esize * esize == diff);
165         return diff / esize;
166 }
167
168 static int iam_lfix_init(struct iam_leaf *l)
169 {
170         int result;
171         struct iam_leaf_head *ill;
172         int count;
173
174         assert_corr(l->il_bh != NULL);
175
176         ill = iam_get_head(l);
177         count = le16_to_cpu(ill->ill_count);
178         if (le16_to_cpu(ill->ill_magic) == IAM_LEAF_HEADER_MAGIC &&
179             0 <= count && count <= leaf_count_limit(l)) {
180                 l->il_at = l->il_entries = iam_get_lentries(l);
181                 result = 0;
182         } else {
183                 struct inode *obj;
184
185                 obj = iam_leaf_container(l)->ic_object;
186                 CERROR("Wrong magic in node %llu (#%lu): %#x != %#x or "
187                        "wrong count: %d (%d)\n",
188                        (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
189                        le16_to_cpu(ill->ill_magic), IAM_LEAF_HEADER_MAGIC,
190                        count, leaf_count_limit(l));
191                 result = -EIO;
192         }
193         return result;
194 }
195
196 static void iam_lfix_fini(struct iam_leaf *l)
197 {
198         l->il_entries = l->il_at = NULL;
199 }
200
201 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
202 {
203         int count = lentry_count_get(l);
204         struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
205
206         return ile;
207 }
208
209 static struct iam_rec *iam_lfix_rec(const struct iam_leaf *l)
210 {
211         void *e = l->il_at;
212         assert_corr(iam_leaf_at_rec(l));
213         return e + iam_leaf_descr(l)->id_key_size;
214 }
215
216 static void iam_lfix_next(struct iam_leaf *l)
217 {
218         assert_corr(iam_leaf_at_rec(l));
219         l->il_at = iam_lfix_shift(l, l->il_at, 1);
220 }
221
222 /*
223  * Bug chasing.
224  */
225 int lfix_dump = 0;
226
227 static char hdigit(char ch)
228 {
229         static char d[] = "0123456789abcdef";
230         return d[ch & 0xf];
231 }
232
233 static char *hex(char ch, char *area)
234 {
235         area[0] = hdigit(ch >> 4);
236         area[1] = hdigit(ch);
237         area[2] = 0;
238         return area;
239 }
240
241 static void l_print(struct iam_leaf *leaf, struct iam_lentry *entry)
242 {
243         int i;
244         char *area;
245         char h[3];
246
247         area = (char *)entry;
248         printk(KERN_EMERG "[");
249         for (i = iam_lfix_key_size(leaf); i > 0; --i, ++area)
250                 printk("%s", hex(*area, h));
251         printk("]-(");
252         for (i = iam_leaf_descr(leaf)->id_rec_size; i > 0; --i, ++area)
253                 printk("%s", hex(*area, h));
254         printk(")\n");
255 }
256
257 static void lfix_print(struct iam_leaf *leaf)
258 {
259         struct iam_lentry *entry;
260         int count;
261         int i;
262
263         entry = leaf->il_entries;
264         count = lentry_count_get(leaf);
265         printk(KERN_EMERG "lfix: %p %p %d\n", leaf, leaf->il_at, count);
266         for (i = 0; i < count; ++i, entry = iam_lfix_shift(leaf, entry, 1))
267                 l_print(leaf, entry);
268 }
269
270 static int iam_lfix_lookup(struct iam_leaf *l, const struct iam_key *k)
271 {
272         struct iam_lentry *p, *q, *m, *t;
273         struct iam_container *c;
274         int count;
275         int result;
276
277         count = lentry_count_get(l);
278         if (count == 0)
279                 return IAM_LOOKUP_EMPTY;
280
281         result = IAM_LOOKUP_OK;
282         c = iam_leaf_container(l);
283
284         p = l->il_entries;
285         q = iam_lfix_shift(l, p, count - 1);
286         if (lfix_keycmp(c, k, iam_leaf_key_at(p)) < 0) {
287                 /*
288                  * @k is less than the least key in the leaf
289                  */
290                 l->il_at = p;
291                 result = IAM_LOOKUP_BEFORE;
292         } else if (lfix_keycmp(c, iam_leaf_key_at(q), k) <= 0) {
293                 l->il_at = q;
294         } else {
295                 /*
296                  * EWD1293
297                  */
298                 while (iam_lfix_shift(l, p, 1) != q) {
299                         m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
300                         assert_corr(p < m && m < q);
301                         if (lfix_keycmp(c, iam_leaf_key_at(m), k) <= 0)
302                                 p = m;
303                         else
304                                 q = m;
305                 }
306                 assert_corr(lfix_keycmp(c, iam_leaf_key_at(p), k) <= 0 &&
307                             lfix_keycmp(c, k, iam_leaf_key_at(q)) < 0);
308                 /*
309                  * skip over records with duplicate keys.
310                  */
311                 while (p > l->il_entries) {
312                         t = iam_lfix_shift(l, p, -1);
313                         if (lfix_keycmp(c, iam_leaf_key_at(t), k) == 0)
314                                 p = t;
315                         else
316                                 break;
317                 }
318                 l->il_at = p;
319         }
320         assert_corr(iam_leaf_at_rec(l));
321
322         if (lfix_keycmp(c, iam_leaf_key_at(l->il_at), k) == 0)
323                 result = IAM_LOOKUP_EXACT;
324
325         if (lfix_dump)
326                 lfix_print(l);
327
328         return result;
329 }
330
331 static int iam_lfix_ilookup(struct iam_leaf *l, const struct iam_ikey *ik)
332 {
333         return iam_lfix_lookup(l, (const struct iam_key *)ik);
334 }
335
336 static void iam_lfix_key_set(struct iam_leaf *l, const struct iam_key *k)
337 {
338         assert_corr(iam_leaf_at_rec(l));
339         memcpy(iam_leaf_key_at(l->il_at), k, iam_leaf_descr(l)->id_key_size);
340 }
341
342 static int iam_lfix_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
343 {
344         return lfix_keycmp(iam_leaf_container(l), iam_leaf_key_at(l->il_at), k);
345 }
346
347 static int iam_lfix_key_eq(const struct iam_leaf *l, const struct iam_key *k)
348 {
349         return !lfix_keycmp(iam_leaf_container(l),
350                             iam_leaf_key_at(l->il_at), k);
351 }
352
353 static void iam_lfix_rec_set(struct iam_leaf *l, const struct iam_rec *r)
354 {
355         assert_corr(iam_leaf_at_rec(l));
356         memcpy(iam_lfix_rec(l), r, iam_leaf_descr(l)->id_rec_size);
357 }
358
359 static inline int lfix_reccmp(const struct iam_container *c,
360                               const struct iam_rec *r1,
361                               const struct iam_rec *r2)
362 {
363         return memcmp(r1, r2, c->ic_descr->id_rec_size);
364 }
365
366 static int iam_lfix_rec_eq(const struct iam_leaf *l, const struct iam_rec *r)
367 {
368         return !lfix_reccmp(iam_leaf_container(l), iam_lfix_rec(l), r);
369 }
370
371 static void iam_lfix_rec_get(const struct iam_leaf *l, struct iam_rec *r)
372 {
373         assert_corr(iam_leaf_at_rec(l));
374         memcpy(r, iam_lfix_rec(l), iam_leaf_descr(l)->id_rec_size);
375 }
376
377 static void iam_lfix_rec_add(struct iam_leaf *leaf,
378                              const struct iam_key *k, const struct iam_rec *r)
379 {
380         struct iam_lentry *end;
381         struct iam_lentry *cur;
382         struct iam_lentry *start;
383         ptrdiff_t diff;
384         int count;
385
386         assert_corr(iam_leaf_can_add(leaf, k, r));
387
388         count = lentry_count_get(leaf);
389         /*
390          * This branch handles two exceptional cases:
391          *
392          *   - leaf positioned beyond last record, and
393          *
394          *   - empty leaf.
395          */
396         if (!iam_leaf_at_end(leaf)) {
397                 end   = iam_lfix_get_end(leaf);
398                 cur   = leaf->il_at;
399                 if (lfix_keycmp(iam_leaf_container(leaf),
400                                k, iam_leaf_key_at(cur)) >= 0)
401                         iam_lfix_next(leaf);
402                 else
403                         /*
404                          * Another exceptional case: insertion with the key
405                          * less than least key in the leaf.
406                          */
407                         assert_corr(cur == leaf->il_entries);
408
409                 start = leaf->il_at;
410                 diff  = (void *)end - (void *)start;
411                 assert_corr(diff >= 0);
412                 memmove(iam_lfix_shift(leaf, start, 1), start, diff);
413         }
414         lentry_count_set(leaf, count + 1);
415         iam_lfix_key_set(leaf, k);
416         iam_lfix_rec_set(leaf, r);
417         assert_corr(iam_leaf_at_rec(leaf));
418 }
419
420 static void iam_lfix_rec_del(struct iam_leaf *leaf, int shift)
421 {
422         struct iam_lentry *next, *end;
423         int count;
424         ptrdiff_t diff;
425
426         assert_corr(iam_leaf_at_rec(leaf));
427
428         count = lentry_count_get(leaf);
429         end = iam_lfix_get_end(leaf);
430         next = iam_lfix_shift(leaf, leaf->il_at, 1);
431         diff = (void *)end - (void *)next;
432         memmove(leaf->il_at, next, diff);
433
434         lentry_count_set(leaf, count - 1);
435 }
436
437 static int iam_lfix_can_add(const struct iam_leaf *l,
438                             const struct iam_key *k, const struct iam_rec *r)
439 {
440         return lentry_count_get(l) < leaf_count_limit(l);
441 }
442
443 static int iam_lfix_at_end(const struct iam_leaf *folio)
444 {
445         return folio->il_at == iam_lfix_get_end(folio);
446 }
447
448 static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
449 {
450         struct iam_leaf_head *hdr;
451
452         hdr = (struct iam_leaf_head*)bh->b_data;
453         hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
454         hdr->ill_count = cpu_to_le16(0);
455 }
456
457 static void iam_lfix_split(struct iam_leaf *l, struct buffer_head **bh,
458                            iam_ptr_t new_blknr)
459 {
460         struct iam_path       *path;
461         struct iam_leaf_head  *hdr;
462         const struct iam_ikey *pivot;
463         struct buffer_head    *new_leaf;
464
465         unsigned count;
466         unsigned split;
467
468         void *start;
469         void *finis;
470
471         new_leaf = *bh;
472         path = iam_leaf_path(l);
473
474         hdr = (void *)new_leaf->b_data;
475
476         count = lentry_count_get(l);
477         split = count / 2;
478
479         start = iam_lfix_shift(l, iam_get_lentries(l), split);
480         finis = iam_lfix_shift(l, iam_get_lentries(l), count);
481
482         pivot = (const struct iam_ikey *)iam_leaf_key_at(start);
483
484         memmove(iam_entries(new_leaf), start, finis - start);
485         hdr->ill_count = cpu_to_le16(count - split);
486         lentry_count_set(l, split);
487         if ((void *)l->il_at >= start) {
488                 /*
489                  * insertion point moves into new leaf.
490                  */
491                 int shift;
492                 int result;
493
494                 shift = iam_lfix_diff(l, l->il_at, start);
495                 *bh = l->il_bh;
496                 l->il_bh = new_leaf;
497                 l->il_curidx = new_blknr;
498                 result = iam_lfix_init(l);
499                 /*
500                  * init cannot fail, as node was just initialized.
501                  */
502                 assert_corr(result == 0);
503                 l->il_at = iam_lfix_shift(l, iam_get_lentries(l), shift);
504         }
505         /*
506          * Insert pointer to the new node (together with the least key in
507          * the node) into index node.
508          */
509         iam_insert_key_lock(path, path->ip_frame, pivot, new_blknr);
510 }
511
512 static int iam_lfix_leaf_empty(struct iam_leaf *leaf)
513 {
514         return lentry_count_get(leaf) == 0;
515 }
516
517 static struct iam_leaf_operations iam_lfix_leaf_ops = {
518         .init           = iam_lfix_init,
519         .init_new       = iam_lfix_init_new,
520         .fini           = iam_lfix_fini,
521         .start          = iam_lfix_start,
522         .next           = iam_lfix_next,
523         .key            = iam_lfix_key,
524         .ikey           = iam_lfix_ikey,
525         .rec            = iam_lfix_rec,
526         .key_set        = iam_lfix_key_set,
527         .key_cmp        = iam_lfix_key_cmp,
528         .key_eq         = iam_lfix_key_eq,
529         .key_size       = iam_lfix_key_size,
530         .rec_set        = iam_lfix_rec_set,
531         .rec_eq         = iam_lfix_rec_eq,
532         .rec_get        = iam_lfix_rec_get,
533         .lookup         = iam_lfix_lookup,
534         .ilookup        = iam_lfix_ilookup,
535         .at_end         = iam_lfix_at_end,
536         .rec_add        = iam_lfix_rec_add,
537         .rec_del        = iam_lfix_rec_del,
538         .can_add        = iam_lfix_can_add,
539         .split          = iam_lfix_split,
540         .leaf_empty     = iam_lfix_leaf_empty,
541 };
542
543 /*
544  * Index operations.
545  */
546
547 enum {
548         /* This is duplicated in lustre/utils/create_iam.c */
549         /*
550          * Then shalt thou see the dew-BEDABBLED wretch
551          * Turn, and return, indenting with the way;
552          * Each envious brier his weary legs doth scratch,
553          * Each shadow makes him stop, each murmur stay:
554          * For misery is trodden on by many,
555          * And being low never relieved by any.
556          */
557         IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL // d01efull
558 };
559
560 /* This is duplicated in lustre/utils/create_iam.c */
561 struct iam_lfix_root {
562         __le64  ilr_magic;
563         __le16  ilr_keysize;
564         __le16  ilr_recsize;
565         __le16  ilr_ptrsize;
566         u8      ilr_indirect_levels;
567         u8      ilr_padding;
568 };
569
570 static __u32 iam_lfix_root_ptr(struct iam_container *c)
571 {
572         return 0;
573 }
574
575 static int iam_lfix_node_init(struct iam_container *c, struct buffer_head *bh,
576                               int root)
577 {
578         return 0;
579 }
580
581 static struct iam_entry *iam_lfix_root_inc(struct iam_container *c,
582                                            struct iam_path *path,
583                                            struct iam_frame *frame)
584 {
585         struct iam_lfix_root *root;
586         struct iam_entry     *entries;
587
588         entries = frame->entries;
589
590         dx_set_count(entries, 2);
591         assert_corr(dx_get_limit(entries) == dx_root_limit(path));
592
593         root = (void *)frame->bh->b_data;
594         assert_corr(le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC);
595         root->ilr_indirect_levels ++;
596         frame->at = entries = iam_entry_shift(path, entries, 1);
597         memset(iam_ikey_at(path, entries), 0,
598                iam_path_descr(path)->id_ikey_size);
599         return entries;
600 }
601
602 static int iam_lfix_node_check(struct iam_path *path, struct iam_frame *frame)
603 {
604         unsigned count;
605         unsigned limit;
606         unsigned limit_correct;
607         struct iam_entry *entries;
608
609         entries = dx_node_get_entries(path, frame);
610
611         if (frame == path->ip_frames) {
612                 struct iam_lfix_root *root;
613
614                 root = (void *)frame->bh->b_data;
615                 if (le64_to_cpu(root->ilr_magic) != IAM_LFIX_ROOT_MAGIC) {
616                         return -EIO;
617                 }
618                 limit_correct = dx_root_limit(path);
619         } else
620                 limit_correct = dx_node_limit(path);
621         count = dx_get_count(entries);
622         limit = dx_get_limit(entries);
623         if (count > limit) {
624                 return -EIO;
625         }
626         if (limit != limit_correct) {
627                 return -EIO;
628         }
629         return 0;
630 }
631
632 static int iam_lfix_node_load(struct iam_path *path, struct iam_frame *frame)
633 {
634         struct iam_entry *entries;
635         void *data;
636         entries = dx_node_get_entries(path, frame);
637
638         data = frame->bh->b_data;
639
640         if (frame == path->ip_frames) {
641                 struct iam_lfix_root *root;
642
643                 root = data;
644                 path->ip_indirect = root->ilr_indirect_levels;
645                 if (path->ip_ikey_target == NULL)
646                         path->ip_ikey_target =
647                                 (struct iam_ikey *)path->ip_key_target;
648         }
649         frame->entries = frame->at = entries;
650         return 0;
651 }
652
653 static int iam_lfix_ikeycmp(const struct iam_container *c,
654                             const struct iam_ikey *k1,
655                             const struct iam_ikey *k2)
656 {
657         return memcmp(k1, k2, c->ic_descr->id_ikey_size);
658 }
659
660 static struct iam_path_descr *iam_lfix_ipd_alloc(const struct iam_container *c,
661                                                  void *area)
662 {
663         return iam_ipd_alloc(area, c->ic_descr->id_ikey_size);
664 }
665
666 static struct iam_operations iam_lfix_ops = {
667         .id_root_ptr    = iam_lfix_root_ptr,
668         .id_node_read   = iam_node_read,
669         .id_node_init   = iam_lfix_node_init,
670         .id_node_check  = iam_lfix_node_check,
671         .id_node_load   = iam_lfix_node_load,
672         .id_ikeycmp     = iam_lfix_ikeycmp,
673         .id_root_inc    = iam_lfix_root_inc,
674         .id_ipd_alloc   = iam_lfix_ipd_alloc,
675         .id_ipd_free    = iam_ipd_free,
676         .id_name        = "lfix"
677 };
678
679 static int iam_lfix_guess(struct iam_container *c)
680 {
681         int result;
682         struct buffer_head *bh;
683         const struct iam_lfix_root *root;
684
685         assert_corr(c->ic_object != NULL);
686
687         result = iam_node_read(c, iam_lfix_root_ptr(c), NULL, &bh);
688         if (result == 0) {
689                 root = (void *)bh->b_data;
690                 if (le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC) {
691                         struct iam_descr *descr;
692
693                         descr = c->ic_descr;
694                         descr->id_key_size  = le16_to_cpu(root->ilr_keysize);
695                         descr->id_ikey_size = le16_to_cpu(root->ilr_keysize);
696                         descr->id_rec_size  = le16_to_cpu(root->ilr_recsize);
697                         descr->id_ptr_size  = le16_to_cpu(root->ilr_ptrsize);
698                         descr->id_root_gap  = sizeof(struct iam_lfix_root);
699                         descr->id_node_gap  = 0;
700                         descr->id_ops       = &iam_lfix_ops;
701                         descr->id_leaf_ops  = &iam_lfix_leaf_ops;
702
703                         c->ic_root_bh = bh;
704                 } else {
705                         result = -EBADF;
706                         brelse(bh);
707                 }
708         }
709         return result;
710 }
711
712 static struct iam_format iam_lfix_format = {
713         .if_guess = iam_lfix_guess
714 };
715
716 void iam_lfix_format_init(void)
717 {
718         iam_format_register(&iam_lfix_format);
719 }
720
721 /*
722  * Debugging aid.
723  */
724
725 #define KEYSIZE (8)
726 #define RECSIZE (8)
727 #define PTRSIZE (4)
728
729 #define LFIX_ROOT_RECNO \
730         ((4096 - sizeof(struct iam_lfix_root)) / (KEYSIZE + PTRSIZE))
731
732 #define LFIX_INDEX_RECNO (4096 / (KEYSIZE + PTRSIZE))
733
734 #define LFIX_LEAF_RECNO \
735         ((4096 - sizeof(struct iam_leaf_head)) / (KEYSIZE + RECSIZE))
736
737 struct lfix_root {
738         struct iam_lfix_root lr_root;
739         struct {
740                 char key[KEYSIZE];
741                 char ptr[PTRSIZE];
742         } lr_entry[LFIX_ROOT_RECNO];
743 };
744
745 struct lfix_index {
746         struct dx_countlimit li_cl;
747         char   li_padding[KEYSIZE + PTRSIZE - sizeof(struct dx_countlimit)];
748         struct {
749                 char key[KEYSIZE];
750                 char ptr[PTRSIZE];
751         } li_entry[LFIX_INDEX_RECNO - 1];
752 };
753
754 struct lfix_leaf {
755         struct iam_leaf_head ll_head;
756         struct {
757                 char key[KEYSIZE];
758                 char rec[RECSIZE];
759         } ll_entry[LFIX_LEAF_RECNO];
760 };
761
762 #define STORE_UNALIGNED(val, dst)                       \
763 ({                                                      \
764         typeof(val) __val = (val);                      \
765         CLASSERT(sizeof(val) == sizeof(*(dst)));        \
766         memcpy(dst, &__val, sizeof(*(dst)));            \
767 })
768
769 static void lfix_root(void *buf,
770                       int blocksize, int keysize, int ptrsize, int recsize)
771 {
772         struct iam_lfix_root *root;
773         struct dx_countlimit *limit;
774         void                 *entry;
775
776         root = buf;
777         *root = (typeof(*root)) {
778                 .ilr_magic           = cpu_to_le64(IAM_LFIX_ROOT_MAGIC),
779                 .ilr_keysize         = cpu_to_le16(keysize),
780                 .ilr_recsize         = cpu_to_le16(recsize),
781                 .ilr_ptrsize         = cpu_to_le16(ptrsize),
782                 .ilr_indirect_levels = 0
783         };
784
785         limit = (void *)(root + 1);
786         *limit = (typeof(*limit)){
787                 /*
788                  * limit itself + one pointer to the leaf.
789                  */
790                 .count = cpu_to_le16(2),
791                 .limit = iam_root_limit(sizeof(struct iam_lfix_root),
792                                         blocksize, keysize + ptrsize)
793         };
794
795         /* To guarantee that the padding "keysize + ptrsize"
796          * covers the "dx_countlimit" and the "idle_blocks". */
797         LASSERT((keysize + ptrsize) >=
798                 (sizeof(struct dx_countlimit) + sizeof(__u32)));
799
800         entry = (void *)(limit + 1);
801         /* Put "idle_blocks" just after the limit. There was padding after
802          * the limit, the "idle_blocks" re-uses part of the padding, so no
803          * compatibility issues with old layout.
804          */
805         *(__u32 *)entry = 0;
806
807         /*
808          * Skip over @limit.
809          */
810         entry = (void *)(root + 1) + keysize + ptrsize;
811
812         /*
813          * Entry format is <key> followed by <ptr>. In the minimal tree
814          * consisting of a root and single node, <key> is a minimal possible
815          * key.
816          *
817          * XXX: this key is hard-coded to be a sequence of 0's.
818          */
819
820         memset(entry, 0, keysize);
821         entry += keysize;
822         /* now @entry points to <ptr> */
823         if (ptrsize == 4)
824                 STORE_UNALIGNED(cpu_to_le32(1), (u_int32_t *)entry);
825         else
826                 STORE_UNALIGNED(cpu_to_le64(1), (u_int64_t *)entry);
827 }
828
829 static void lfix_leaf(void *buf,
830                       int blocksize, int keysize, int ptrsize, int recsize)
831 {
832         struct iam_leaf_head *head;
833         void *entry;
834
835         /* form leaf */
836         head = buf;
837         *head = (struct iam_leaf_head) {
838                 .ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC),
839                 /*
840                  * Leaf contains an entry with the smallest possible key
841                  * (created by zeroing).
842                  */
843                 .ill_count = cpu_to_le16(1),
844         };
845
846         entry = (void *)(head + 1);
847         memset(entry, 0, keysize + recsize);
848 }
849
850 int iam_lfix_create(struct inode *obj,
851                     int keysize, int ptrsize, int recsize, handle_t *handle)
852 {
853         struct buffer_head *root_node;
854         struct buffer_head *leaf_node;
855         struct super_block *sb;
856
857         u32 blknr;
858         int result = 0;
859         unsigned long bsize;
860
861         assert_corr(obj->i_size == 0);
862
863         sb = obj->i_sb;
864         bsize = sb->s_blocksize;
865         root_node = osd_ldiskfs_append(handle, obj, &blknr, &result);
866         leaf_node = osd_ldiskfs_append(handle, obj, &blknr, &result);
867         if (root_node != NULL && leaf_node != NULL) {
868                 lfix_root(root_node->b_data, bsize, keysize, ptrsize, recsize);
869                 lfix_leaf(leaf_node->b_data, bsize, keysize, ptrsize, recsize);
870                 ldiskfs_mark_inode_dirty(handle, obj);
871                 result = ldiskfs_handle_dirty_metadata(handle, NULL, root_node);
872                 if (result == 0)
873                         result = ldiskfs_handle_dirty_metadata(handle, NULL,
874                                                                leaf_node);
875                 if (result != 0)
876                         ldiskfs_std_error(sb, result);
877         }
878         brelse(leaf_node);
879         brelse(root_node);
880         return result;
881 }