Whamcloud - gitweb
LU-13004 ptlrpc: Allow BULK_BUF_KIOV to accept a kvec
[fs/lustre-release.git] / lustre / osd-ldiskfs / osd_iam_lfix.c
1 /*
2  * GPL HEADER START
3  *
4  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 only,
8  * as published by the Free Software Foundation.
9  *
10  * This program is distributed in the hope that it will be useful, but
11  * WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  * General Public License version 2 for more details (a copy is included
14  * in the LICENSE file that accompanied this code).
15  *
16  * You should have received a copy of the GNU General Public License
17  * version 2 along with this program; If not, see
18  * http://www.gnu.org/licenses/gpl-2.0.html
19  *
20  * GPL HEADER END
21  */
22 /*
23  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
24  * Use is subject to license terms.
25  *
26  * Copyright (c) 2012, 2015, Intel Corporation.
27  */
28 /*
29  * This file is part of Lustre, http://www.lustre.org/
30  * Lustre is a trademark of Sun Microsystems, Inc.
31  *
32  * iam_lfix.c
33  * implementation of iam format for fixed size records.
34  *
35  * Author: Wang Di <wangdi@clusterfs.com>
36  * Author: Nikita Danilov <nikita@clusterfs.com>
37  */
38
39 #include <linux/types.h>
40 #include "osd_internal.h"
41
42 /*
43  * Leaf operations.
44  */
45
46 enum {
47         IAM_LEAF_HEADER_MAGIC = 0x1976 /* This is duplicated in
48                                         * lustre/utils/create_iam.c */
49 };
50
51 /* This is duplicated in lustre/utils/create_iam.c */
52 struct iam_leaf_head {
53         __le16 ill_magic;
54         __le16 ill_count;
55 };
56
57 static inline int iam_lfix_entry_size(const struct iam_leaf *l)
58 {
59         return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
60 }
61
62 static inline struct iam_lentry *
63 iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
64 {
65         return (void *)entry + shift * iam_lfix_entry_size(l);
66 }
67
68 static inline struct iam_key *iam_leaf_key_at(struct iam_lentry *entry)
69 {
70         return (struct iam_key *)entry;
71 }
72
73 static inline int lfix_keycmp(const struct iam_container *c,
74                               const struct iam_key *k1,
75                               const struct iam_key *k2)
76 {
77         return memcmp(k1, k2, c->ic_descr->id_key_size);
78 }
79
80 static struct iam_leaf_head *iam_get_head(const struct iam_leaf *l)
81 {
82         return (struct iam_leaf_head *)l->il_bh->b_data;
83 }
84
85 static struct iam_lentry *iam_entries(const struct buffer_head *bh)
86 {
87         return (void *)bh->b_data + sizeof(struct iam_leaf_head);
88 }
89
90 static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
91 {
92         return iam_entries(l->il_bh);
93 }
94
95 static int leaf_count_limit(const struct iam_leaf *leaf)
96 {
97         int free_space;
98
99         free_space = iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
100         free_space -= sizeof(struct iam_leaf_head);
101         return free_space / iam_lfix_entry_size(leaf);
102 }
103
104 static int lentry_count_get(const struct iam_leaf *leaf)
105 {
106         return le16_to_cpu(iam_get_head(leaf)->ill_count);
107 }
108
109 static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
110 {
111         assert_corr(0 <= count && count <= leaf_count_limit(leaf));
112         iam_get_head(leaf)->ill_count = cpu_to_le16(count);
113 }
114
115 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l);
116
117 #if LDISKFS_CORRECTNESS_ON || LDISKFS_INVARIANT_ON
118 static int iam_leaf_at_rec(const struct iam_leaf *folio)
119 {
120         return iam_get_lentries(folio) <= folio->il_at &&
121                 folio->il_at < iam_lfix_get_end(folio);
122 }
123 #endif
124
125 static struct iam_ikey *iam_lfix_ikey(const struct iam_leaf *l,
126                                       struct iam_ikey *key)
127 {
128         void *ie = l->il_at;
129
130         assert_corr(iam_leaf_at_rec(l));
131         return (struct iam_ikey *)ie;
132 }
133
134 static struct iam_key *iam_lfix_key(const struct iam_leaf *l)
135 {
136         void *ie = l->il_at;
137
138         assert_corr(iam_leaf_at_rec(l));
139         return (struct iam_key *)ie;
140 }
141
142 static int iam_lfix_key_size(const struct iam_leaf *l)
143 {
144         return iam_leaf_descr(l)->id_key_size;
145 }
146
147 static void iam_lfix_start(struct iam_leaf *l)
148 {
149         l->il_at = iam_get_lentries(l);
150 }
151
152 static inline ptrdiff_t iam_lfix_diff(const struct iam_leaf *l,
153                                       const struct iam_lentry *e1,
154                                       const struct iam_lentry *e2)
155 {
156         ptrdiff_t diff;
157         int esize;
158
159         esize = iam_lfix_entry_size(l);
160         diff = (void *)e1 - (void *)e2;
161         assert_corr(diff / esize * esize == diff);
162         return diff / esize;
163 }
164
165 static int iam_lfix_init(struct iam_leaf *l)
166 {
167         int result;
168         struct iam_leaf_head *ill;
169         int count;
170
171         assert_corr(l->il_bh != NULL);
172
173         ill = iam_get_head(l);
174         count = le16_to_cpu(ill->ill_count);
175         if (le16_to_cpu(ill->ill_magic) == IAM_LEAF_HEADER_MAGIC &&
176                         0 <= count && count <= leaf_count_limit(l)) {
177                 l->il_at = l->il_entries = iam_get_lentries(l);
178                 result = 0;
179         } else {
180                 struct inode *obj;
181
182                 result = -EIO;
183                 obj = iam_leaf_container(l)->ic_object;
184                 CERROR(
185                 "Bad magic in node %llu #%lu: %#x != %#x or bad cnt: %d %d: rc = %d\n",
186                         (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
187                         le16_to_cpu(ill->ill_magic), IAM_LEAF_HEADER_MAGIC,
188                         count, leaf_count_limit(l), result);
189         }
190         return result;
191 }
192
193 static void iam_lfix_fini(struct iam_leaf *l)
194 {
195         l->il_entries = l->il_at = NULL;
196 }
197
198 static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
199 {
200         int count = lentry_count_get(l);
201         struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
202
203         return ile;
204 }
205
206 static struct iam_rec *iam_lfix_rec(const struct iam_leaf *l)
207 {
208         void *e = l->il_at;
209
210         assert_corr(iam_leaf_at_rec(l));
211         return e + iam_leaf_descr(l)->id_key_size;
212 }
213
214 static void iam_lfix_next(struct iam_leaf *l)
215 {
216         assert_corr(iam_leaf_at_rec(l));
217         l->il_at = iam_lfix_shift(l, l->il_at, 1);
218 }
219
220 /*
221  * Bug chasing.
222  */
223 int lfix_dump = 0;
224
225 static char hdigit(char ch)
226 {
227         static const char d[] = "0123456789abcdef";
228
229         return d[ch & 0xf];
230 }
231
232 static char *hex(char ch, char *area)
233 {
234         area[0] = hdigit(ch >> 4);
235         area[1] = hdigit(ch);
236         area[2] = 0;
237         return area;
238 }
239
240 static void l_print(struct iam_leaf *leaf, struct iam_lentry *entry)
241 {
242         int i;
243         char *area;
244         char h[3];
245
246         area = (char *)entry;
247         printk(KERN_EMERG "[");
248         for (i = iam_lfix_key_size(leaf); i > 0; --i, ++area)
249                 printk("%s", hex(*area, h));
250         printk("]-(");
251         for (i = iam_leaf_descr(leaf)->id_rec_size; i > 0; --i, ++area)
252                 printk("%s", hex(*area, h));
253         printk(")\n");
254 }
255
256 static void lfix_print(struct iam_leaf *leaf)
257 {
258         struct iam_lentry *entry;
259         int count;
260         int i;
261
262         entry = leaf->il_entries;
263         count = lentry_count_get(leaf);
264         printk(KERN_EMERG "lfix: %p %p %d\n", leaf, leaf->il_at, count);
265         for (i = 0; i < count; ++i, entry = iam_lfix_shift(leaf, entry, 1))
266                 l_print(leaf, entry);
267 }
268
269 static int iam_lfix_lookup(struct iam_leaf *l, const struct iam_key *k)
270 {
271         struct iam_lentry *p, *q, *m, *t;
272         struct iam_container *c;
273         int count;
274         int result;
275
276         count = lentry_count_get(l);
277         if (count == 0)
278                 return IAM_LOOKUP_EMPTY;
279
280         result = IAM_LOOKUP_OK;
281         c = iam_leaf_container(l);
282
283         p = l->il_entries;
284         q = iam_lfix_shift(l, p, count - 1);
285         if (lfix_keycmp(c, k, iam_leaf_key_at(p)) < 0) {
286                 /*
287                  * @k is less than the least key in the leaf
288                  */
289                 l->il_at = p;
290                 result = IAM_LOOKUP_BEFORE;
291         } else if (lfix_keycmp(c, iam_leaf_key_at(q), k) <= 0) {
292                 l->il_at = q;
293         } else {
294                 /*
295                  * EWD1293
296                  */
297                 while (iam_lfix_shift(l, p, 1) != q) {
298                         m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
299                         assert_corr(p < m && m < q);
300                         if (lfix_keycmp(c, iam_leaf_key_at(m), k) <= 0)
301                                 p = m;
302                         else
303                                 q = m;
304                 }
305                 assert_corr(lfix_keycmp(c, iam_leaf_key_at(p), k) <= 0 &&
306                             lfix_keycmp(c, k, iam_leaf_key_at(q)) < 0);
307                 /*
308                  * skip over records with duplicate keys.
309                  */
310                 while (p > l->il_entries) {
311                         t = iam_lfix_shift(l, p, -1);
312                         if (lfix_keycmp(c, iam_leaf_key_at(t), k) == 0)
313                                 p = t;
314                         else
315                                 break;
316                 }
317                 l->il_at = p;
318         }
319         assert_corr(iam_leaf_at_rec(l));
320
321         if (lfix_keycmp(c, iam_leaf_key_at(l->il_at), k) == 0)
322                 result = IAM_LOOKUP_EXACT;
323
324         if (lfix_dump)
325                 lfix_print(l);
326
327         return result;
328 }
329
330 static int iam_lfix_ilookup(struct iam_leaf *l, const struct iam_ikey *ik)
331 {
332         return iam_lfix_lookup(l, (const struct iam_key *)ik);
333 }
334
335 static void iam_lfix_key_set(struct iam_leaf *l, const struct iam_key *k)
336 {
337         assert_corr(iam_leaf_at_rec(l));
338         memcpy(iam_leaf_key_at(l->il_at), k, iam_leaf_descr(l)->id_key_size);
339 }
340
341 static int iam_lfix_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
342 {
343         return lfix_keycmp(iam_leaf_container(l), iam_leaf_key_at(l->il_at), k);
344 }
345
346 static int iam_lfix_key_eq(const struct iam_leaf *l, const struct iam_key *k)
347 {
348         return !lfix_keycmp(iam_leaf_container(l),
349                             iam_leaf_key_at(l->il_at), k);
350 }
351
352 static void iam_lfix_rec_set(struct iam_leaf *l, const struct iam_rec *r)
353 {
354         assert_corr(iam_leaf_at_rec(l));
355         memcpy(iam_lfix_rec(l), r, iam_leaf_descr(l)->id_rec_size);
356 }
357
358 static inline int lfix_reccmp(const struct iam_container *c,
359                               const struct iam_rec *r1,
360                               const struct iam_rec *r2)
361 {
362         return memcmp(r1, r2, c->ic_descr->id_rec_size);
363 }
364
365 static int iam_lfix_rec_eq(const struct iam_leaf *l, const struct iam_rec *r)
366 {
367         return !lfix_reccmp(iam_leaf_container(l), iam_lfix_rec(l), r);
368 }
369
370 static void iam_lfix_rec_get(const struct iam_leaf *l, struct iam_rec *r)
371 {
372         assert_corr(iam_leaf_at_rec(l));
373         memcpy(r, iam_lfix_rec(l), iam_leaf_descr(l)->id_rec_size);
374 }
375
376 static void iam_lfix_rec_add(struct iam_leaf *leaf,
377                              const struct iam_key *k, const struct iam_rec *r)
378 {
379         struct iam_lentry *end;
380         struct iam_lentry *cur;
381         struct iam_lentry *start;
382         ptrdiff_t diff;
383         int count;
384
385         assert_corr(iam_leaf_can_add(leaf, k, r));
386
387         count = lentry_count_get(leaf);
388         /*
389          * This branch handles two exceptional cases:
390          *
391          *   - leaf positioned beyond last record, and
392          *
393          *   - empty leaf.
394          */
395         if (!iam_leaf_at_end(leaf)) {
396                 end   = iam_lfix_get_end(leaf);
397                 cur   = leaf->il_at;
398                 if (lfix_keycmp(iam_leaf_container(leaf),
399                                 k, iam_leaf_key_at(cur)) >= 0)
400                         iam_lfix_next(leaf);
401                 else
402                         /*
403                          * Another exceptional case: insertion with the key
404                          * less than least key in the leaf.
405                          */
406                         assert_corr(cur == leaf->il_entries);
407
408                 start = leaf->il_at;
409                 diff  = (void *)end - (void *)start;
410                 assert_corr(diff >= 0);
411                 memmove(iam_lfix_shift(leaf, start, 1), start, diff);
412         }
413         lentry_count_set(leaf, count + 1);
414         iam_lfix_key_set(leaf, k);
415         iam_lfix_rec_set(leaf, r);
416         assert_corr(iam_leaf_at_rec(leaf));
417 }
418
419 static void iam_lfix_rec_del(struct iam_leaf *leaf, int shift)
420 {
421         struct iam_lentry *next, *end;
422         int count;
423         ptrdiff_t diff;
424
425         assert_corr(iam_leaf_at_rec(leaf));
426
427         count = lentry_count_get(leaf);
428         end = iam_lfix_get_end(leaf);
429         next = iam_lfix_shift(leaf, leaf->il_at, 1);
430         diff = (void *)end - (void *)next;
431         memmove(leaf->il_at, next, diff);
432
433         lentry_count_set(leaf, count - 1);
434 }
435
436 static int iam_lfix_can_add(const struct iam_leaf *l,
437                             const struct iam_key *k, const struct iam_rec *r)
438 {
439         return lentry_count_get(l) < leaf_count_limit(l);
440 }
441
442 static int iam_lfix_at_end(const struct iam_leaf *folio)
443 {
444         return folio->il_at == iam_lfix_get_end(folio);
445 }
446
447 static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
448 {
449         struct iam_leaf_head *hdr;
450
451         hdr = (struct iam_leaf_head *)bh->b_data;
452         hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
453         hdr->ill_count = cpu_to_le16(0);
454 }
455
456 static void iam_lfix_split(struct iam_leaf *l, struct buffer_head **bh,
457                            iam_ptr_t new_blknr)
458 {
459         struct iam_path *path;
460         struct iam_leaf_head *hdr;
461         const struct iam_ikey *pivot;
462         struct buffer_head *new_leaf;
463
464         unsigned int count;
465         unsigned int split;
466
467         void *start;
468         void *finis;
469
470         new_leaf = *bh;
471         path = iam_leaf_path(l);
472
473         hdr = (void *)new_leaf->b_data;
474
475         count = lentry_count_get(l);
476         split = count / 2;
477
478         start = iam_lfix_shift(l, iam_get_lentries(l), split);
479         finis = iam_lfix_shift(l, iam_get_lentries(l), count);
480
481         pivot = (const struct iam_ikey *)iam_leaf_key_at(start);
482
483         memmove(iam_entries(new_leaf), start, finis - start);
484         hdr->ill_count = cpu_to_le16(count - split);
485         lentry_count_set(l, split);
486         if ((void *)l->il_at >= start) {
487                 /*
488                  * insertion point moves into new leaf.
489                  */
490                 int shift;
491
492                 shift = iam_lfix_diff(l, l->il_at, start);
493                 *bh = l->il_bh;
494                 l->il_bh = new_leaf;
495                 l->il_curidx = new_blknr;
496                 iam_lfix_init(l);
497                 /*
498                  * init cannot fail, as node was just initialized.
499                  */
500                 assert_corr(result == 0);
501                 l->il_at = iam_lfix_shift(l, iam_get_lentries(l), shift);
502         }
503         /*
504          * Insert pointer to the new node (together with the least key in
505          * the node) into index node.
506          */
507         iam_insert_key_lock(path, path->ip_frame, pivot, new_blknr);
508 }
509
510 static int iam_lfix_leaf_empty(struct iam_leaf *leaf)
511 {
512         return lentry_count_get(leaf) == 0;
513 }
514
515 static struct iam_leaf_operations iam_lfix_leaf_ops = {
516         .init           = iam_lfix_init,
517         .init_new       = iam_lfix_init_new,
518         .fini           = iam_lfix_fini,
519         .start          = iam_lfix_start,
520         .next           = iam_lfix_next,
521         .key            = iam_lfix_key,
522         .ikey           = iam_lfix_ikey,
523         .rec            = iam_lfix_rec,
524         .key_set        = iam_lfix_key_set,
525         .key_cmp        = iam_lfix_key_cmp,
526         .key_eq         = iam_lfix_key_eq,
527         .key_size       = iam_lfix_key_size,
528         .rec_set        = iam_lfix_rec_set,
529         .rec_eq         = iam_lfix_rec_eq,
530         .rec_get        = iam_lfix_rec_get,
531         .lookup         = iam_lfix_lookup,
532         .ilookup        = iam_lfix_ilookup,
533         .at_end         = iam_lfix_at_end,
534         .rec_add        = iam_lfix_rec_add,
535         .rec_del        = iam_lfix_rec_del,
536         .can_add        = iam_lfix_can_add,
537         .split          = iam_lfix_split,
538         .leaf_empty     = iam_lfix_leaf_empty,
539 };
540
541 /*
542  * Index operations.
543  */
544
545 enum {
546         /* This is duplicated in lustre/utils/create_iam.c */
547         /*
548          * Then shalt thou see the dew-BEDABBLED wretch
549          * Turn, and return, indenting with the way;
550          * Each envious brier his weary legs doth scratch,
551          * Each shadow makes him stop, each murmur stay:
552          * For misery is trodden on by many,
553          * And being low never relieved by any.
554          */
555         IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL /* d01efull */
556 };
557
558 /* This is duplicated in lustre/utils/create_iam.c */
559 struct iam_lfix_root {
560         __le64  ilr_magic;
561         __le16  ilr_keysize;
562         __le16  ilr_recsize;
563         __le16  ilr_ptrsize;
564         u8      ilr_indirect_levels;
565         u8      ilr_padding;
566 };
567
568 static __u32 iam_lfix_root_ptr(struct iam_container *c)
569 {
570         return 0;
571 }
572
573 static int iam_lfix_node_init(struct iam_container *c, struct buffer_head *bh,
574                               int root)
575 {
576         return 0;
577 }
578
579 static struct iam_entry *iam_lfix_root_inc(struct iam_container *c,
580                                            struct iam_path *path,
581                                            struct iam_frame *frame)
582 {
583         struct iam_lfix_root *root;
584         struct iam_entry *entries;
585
586         entries = frame->entries;
587
588         dx_set_count(entries, 2);
589         assert_corr(dx_get_limit(entries) == dx_root_limit(path));
590
591         root = (void *)frame->bh->b_data;
592         assert_corr(le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC);
593         root->ilr_indirect_levels++;
594         frame->at = entries = iam_entry_shift(path, entries, 1);
595         memset(iam_ikey_at(path, entries), 0,
596                iam_path_descr(path)->id_ikey_size);
597         return entries;
598 }
599
600 static int iam_lfix_node_check(struct iam_path *path, struct iam_frame *frame)
601 {
602         unsigned int count;
603         unsigned int limit;
604         unsigned int limit_correct;
605         struct iam_entry *entries;
606
607         entries = dx_node_get_entries(path, frame);
608
609         if (frame == path->ip_frames) {
610                 struct iam_lfix_root *root;
611
612                 root = (void *)frame->bh->b_data;
613                 if (le64_to_cpu(root->ilr_magic) != IAM_LFIX_ROOT_MAGIC)
614                         return -EIO;
615                 limit_correct = dx_root_limit(path);
616         } else {
617                 limit_correct = dx_node_limit(path);
618         }
619         count = dx_get_count(entries);
620         limit = dx_get_limit(entries);
621         if (count > limit)
622                 return -EIO;
623         if (limit != limit_correct)
624                 return -EIO;
625         return 0;
626 }
627
628 static int iam_lfix_node_load(struct iam_path *path, struct iam_frame *frame)
629 {
630         struct iam_entry *entries;
631         void *data;
632
633         entries = dx_node_get_entries(path, frame);
634         data = frame->bh->b_data;
635
636         if (frame == path->ip_frames) {
637                 struct iam_lfix_root *root;
638
639                 root = data;
640                 path->ip_indirect = root->ilr_indirect_levels;
641                 if (path->ip_ikey_target == NULL)
642                         path->ip_ikey_target =
643                                 (struct iam_ikey *)path->ip_key_target;
644         }
645         frame->entries = frame->at = entries;
646         return 0;
647 }
648
649 static int iam_lfix_ikeycmp(const struct iam_container *c,
650                             const struct iam_ikey *k1,
651                             const struct iam_ikey *k2)
652 {
653         return memcmp(k1, k2, c->ic_descr->id_ikey_size);
654 }
655
656 static struct iam_path_descr *iam_lfix_ipd_alloc(const struct iam_container *c,
657                                                  void *area)
658 {
659         return iam_ipd_alloc(area, c->ic_descr->id_ikey_size);
660 }
661
662 static struct iam_operations iam_lfix_ops = {
663         .id_root_ptr    = iam_lfix_root_ptr,
664         .id_node_read   = iam_node_read,
665         .id_node_init   = iam_lfix_node_init,
666         .id_node_check  = iam_lfix_node_check,
667         .id_node_load   = iam_lfix_node_load,
668         .id_ikeycmp     = iam_lfix_ikeycmp,
669         .id_root_inc    = iam_lfix_root_inc,
670         .id_ipd_alloc   = iam_lfix_ipd_alloc,
671         .id_ipd_free    = iam_ipd_free,
672         .id_name        = "lfix"
673 };
674
675 static int iam_lfix_guess(struct iam_container *c)
676 {
677         int result;
678         struct buffer_head *bh;
679         const struct iam_lfix_root *root;
680
681         assert_corr(c->ic_object != NULL);
682
683         result = iam_node_read(c, iam_lfix_root_ptr(c), NULL, &bh);
684         if (result == 0) {
685                 root = (void *)bh->b_data;
686                 if (le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC) {
687                         struct iam_descr *descr;
688
689                         descr = c->ic_descr;
690                         descr->id_key_size  = le16_to_cpu(root->ilr_keysize);
691                         descr->id_ikey_size = le16_to_cpu(root->ilr_keysize);
692                         descr->id_rec_size  = le16_to_cpu(root->ilr_recsize);
693                         descr->id_ptr_size  = le16_to_cpu(root->ilr_ptrsize);
694                         descr->id_root_gap  = sizeof(struct iam_lfix_root);
695                         descr->id_node_gap  = 0;
696                         descr->id_ops       = &iam_lfix_ops;
697                         descr->id_leaf_ops  = &iam_lfix_leaf_ops;
698                         c->ic_root_bh = bh;
699                 } else {
700                         result = -EBADF;
701                         brelse(bh);
702                 }
703         }
704         return result;
705 }
706
707 static struct iam_format iam_lfix_format = {
708         .if_guess = iam_lfix_guess
709 };
710
711 void iam_lfix_format_init(void)
712 {
713         iam_format_register(&iam_lfix_format);
714 }
715
716 /*
717  * Debugging aid.
718  */
719
720 #define KEYSIZE (8)
721 #define RECSIZE (8)
722 #define PTRSIZE (4)
723
724 #define LFIX_ROOT_RECNO \
725         ((4096 - sizeof(struct iam_lfix_root)) / (KEYSIZE + PTRSIZE))
726
727 #define LFIX_INDEX_RECNO (4096 / (KEYSIZE + PTRSIZE))
728
729 #define LFIX_LEAF_RECNO \
730         ((4096 - sizeof(struct iam_leaf_head)) / (KEYSIZE + RECSIZE))
731
732         struct lfix_root {
733         struct iam_lfix_root lr_root;
734         struct {
735                 char key[KEYSIZE];
736                 char ptr[PTRSIZE];
737         } lr_entry[LFIX_ROOT_RECNO];
738 };
739
740         struct lfix_index {
741         struct dx_countlimit li_cl;
742         char   li_padding[KEYSIZE + PTRSIZE - sizeof(struct dx_countlimit)];
743         struct {
744                 char key[KEYSIZE];
745                 char ptr[PTRSIZE];
746         } li_entry[LFIX_INDEX_RECNO - 1];
747 };
748
749         struct lfix_leaf {
750                 struct iam_leaf_head ll_head;
751                 struct {
752                         char key[KEYSIZE];
753                         char rec[RECSIZE];
754                 } ll_entry[LFIX_LEAF_RECNO];
755         };
756
757 #define STORE_UNALIGNED(val, dst)                       \
758 ({                                                      \
759         typeof(val) __val = (val);                      \
760         BUILD_BUG_ON(sizeof(val) != sizeof(*(dst)));    \
761         memcpy(dst, &__val, sizeof(*(dst)));            \
762 })
763
764 static void lfix_root(void *buf,
765                       int blocksize, int keysize, int ptrsize, int recsize)
766 {
767         struct iam_lfix_root *root;
768         struct dx_countlimit *limit;
769         void *entry;
770
771         root = buf;
772         *root = (typeof(*root)) {
773                 .ilr_magic           = cpu_to_le64(IAM_LFIX_ROOT_MAGIC),
774                 .ilr_keysize         = cpu_to_le16(keysize),
775                 .ilr_recsize         = cpu_to_le16(recsize),
776                 .ilr_ptrsize         = cpu_to_le16(ptrsize),
777                 .ilr_indirect_levels = 0
778         };
779
780         limit = (void *)(root + 1);
781         *limit = (typeof(*limit)){
782                 /*
783                  * limit itself + one pointer to the leaf.
784                  */
785                 .count = cpu_to_le16(2),
786                 .limit = iam_root_limit(sizeof(struct iam_lfix_root),
787                                         blocksize, keysize + ptrsize)
788         };
789
790         /* To guarantee that the padding "keysize + ptrsize"
791          * covers the "dx_countlimit" and the "idle_blocks". */
792         LASSERT((keysize + ptrsize) >=
793                 (sizeof(struct dx_countlimit) + sizeof(__u32)));
794
795         entry = (void *)(limit + 1);
796         /* Put "idle_blocks" just after the limit. There was padding after
797          * the limit, the "idle_blocks" re-uses part of the padding, so no
798          * compatibility issues with old layout.
799          */
800         *(__u32 *)entry = 0;
801
802         /*
803          * Skip over @limit.
804          */
805         entry = (void *)(root + 1) + keysize + ptrsize;
806
807         /*
808          * Entry format is <key> followed by <ptr>. In the minimal tree
809          * consisting of a root and single node, <key> is a minimal possible
810          * key.
811          *
812          * XXX: this key is hard-coded to be a sequence of 0's.
813          */
814
815         memset(entry, 0, keysize);
816         entry += keysize;
817         /* now @entry points to <ptr> */
818         if (ptrsize == 4)
819                 STORE_UNALIGNED(cpu_to_le32(1), (u_int32_t *)entry);
820         else
821                 STORE_UNALIGNED(cpu_to_le64(1), (u_int64_t *)entry);
822 }
823
824 static void lfix_leaf(void *buf,
825                       int blocksize, int keysize, int ptrsize, int recsize)
826 {
827         struct iam_leaf_head *head;
828         void *entry;
829
830         /* form leaf */
831         head = buf;
832         *head = (struct iam_leaf_head) {
833                 .ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC),
834                 /*
835                  * Leaf contains an entry with the smallest possible key
836                  * (created by zeroing).
837                  */
838                 .ill_count = cpu_to_le16(1),
839         };
840
841         entry = (void *)(head + 1);
842         memset(entry, 0, keysize + recsize);
843 }
844
845 int iam_lfix_create(struct inode *obj,
846                     int keysize, int ptrsize, int recsize, handle_t *handle)
847 {
848         struct buffer_head *root_node;
849         struct buffer_head *leaf_node;
850         struct super_block *sb;
851         u32 blknr;
852         int result = 0;
853         unsigned long bsize;
854
855         assert_corr(obj->i_size == 0);
856
857         sb = obj->i_sb;
858         bsize = sb->s_blocksize;
859         root_node = osd_ldiskfs_append(handle, obj, &blknr);
860         if (IS_ERR(root_node))
861                 GOTO(out, result = PTR_ERR(root_node));
862
863         leaf_node = osd_ldiskfs_append(handle, obj, &blknr);
864         if (IS_ERR(leaf_node))
865                 GOTO(out_root, result = PTR_ERR(leaf_node));
866
867         lfix_root(root_node->b_data, bsize, keysize, ptrsize, recsize);
868         lfix_leaf(leaf_node->b_data, bsize, keysize, ptrsize, recsize);
869         ldiskfs_mark_inode_dirty(handle, obj);
870         result = ldiskfs_handle_dirty_metadata(handle, NULL, root_node);
871         if (result == 0)
872                 result = ldiskfs_handle_dirty_metadata(handle, NULL, leaf_node);
873         if (result != 0)
874                 ldiskfs_std_error(sb, result);
875
876         brelse(leaf_node);
877
878         GOTO(out_root, result);
879
880 out_root:
881         brelse(root_node);
882 out:
883         return result;
884 }