Whamcloud - gitweb
iam: revert changes to IAM_LOOKUP_LAST that might caused spurious -ENOENT.
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-iam-separate.patch
1 Index: iam/fs/ext3/Makefile
2 ===================================================================
3 --- iam.orig/fs/ext3/Makefile
4 +++ iam/fs/ext3/Makefile
5 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
6  
7  ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
8            ioctl.o namei.o super.o symlink.o hash.o resize.o \
9 -          extents.o mballoc.o
10 +          extents.o mballoc.o iam.o iam_lfix.o
11  
12  ext3-$(CONFIG_EXT3_FS_XATTR)    += xattr.o xattr_user.o xattr_trusted.o
13  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
14 Index: iam/fs/ext3/iam.c
15 ===================================================================
16 --- iam.orig/fs/ext3/iam.c
17 +++ iam/fs/ext3/iam.c
18 @@ -0,0 +1,1445 @@
19 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
20 + * vim:expandtab:shiftwidth=8:tabstop=8:
21 + *
22 + *  iam.c
23 + *  Top-level entry points into iam module
24 + *
25 + *  Copyright (c) 2006 Cluster File Systems, Inc.
26 + *   Author: Wang Di <wangdi@clusterfs.com>
27 + *   Author: Nikita Danilov <nikita@clusterfs.com>
28 + *
29 + *   This file is part of the Lustre file system, http://www.lustre.org
30 + *   Lustre is a trademark of Cluster File Systems, Inc.
31 + *
32 + *   You may have signed or agreed to another license before downloading
33 + *   this software.  If so, you are bound by the terms and conditions
34 + *   of that agreement, and the following does not apply to you.  See the
35 + *   LICENSE file included with this distribution for more information.
36 + *
37 + *   If you did not agree to a different license, then this copy of Lustre
38 + *   is open source software; you can redistribute it and/or modify it
39 + *   under the terms of version 2 of the GNU General Public License as
40 + *   published by the Free Software Foundation.
41 + *
42 + *   In either case, Lustre is distributed in the hope that it will be
43 + *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
44 + *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
45 + *   license text for more details.
46 + */
47 +
48 +/*
49 + * iam: big theory statement.
50 + *
51 + * iam (Index Access Module) is a module providing abstraction of persistent
52 + * transactional container on top of generalized ext3 htree.
53 + *
54 + * iam supports:
55 + *
56 + *     - key, pointer, and record size specifiable per container.
57 + *
58 + *     - trees taller than 2 index levels.
59 + *
60 + *     - read/write to existing ext3 htree directories as iam containers.
61 + *
62 + * iam container is a tree, consisting of leaf nodes containing keys and
63 + * records stored in this container, and index nodes, containing keys and
64 + * pointers to leaf or index nodes.
65 + *
66 + * iam does not work with keys directly, instead it calls user-supplied key
67 + * comparison function (->dpo_keycmp()).
68 + *
69 + * Pointers are (currently) interpreted as logical offsets (measured in
70 + * blocksful) within underlying flat file on top of which iam tree lives.
71 + *
72 + * On-disk format:
73 + *
74 + * iam mostly tries to reuse existing htree formats.
75 + *
76 + * Format of index node:
77 + *
78 + * +-----+-------+-------+-------+------+-------+------------+
79 + * |     | count |       |       |      |       |            |
80 + * | gap |   /   | entry | entry | .... | entry | free space |
81 + * |     | limit |       |       |      |       |            |
82 + * +-----+-------+-------+-------+------+-------+------------+
83 + *
84 + *       gap           this part of node is never accessed by iam code. It
85 + *                     exists for binary compatibility with ext3 htree (that,
86 + *                     in turn, stores fake struct ext2_dirent for ext2
87 + *                     compatibility), and to keep some unspecified per-node
88 + *                     data. Gap can be different for root and non-root index
89 + *                     nodes. Gap size can be specified for each container
90 + *                     (gap of 0 is allowed).
91 + *
92 + *       count/limit   current number of entries in this node, and the maximal
93 + *                     number of entries that can fit into node. count/limit
94 + *                     has the same size as entry, and is itself counted in
95 + *                     count.
96 + *
97 + *       entry         index entry: consists of a key immediately followed by
98 + *                     a pointer to a child node. Size of a key and size of a
99 + *                     pointer depends on container. Entry has neither
100 + *                     alignment nor padding.
101 + *
102 + *       free space    portion of node new entries are added to
103 + *
104 + * Entries in index node are sorted by their key value.
105 + *
106 + * Format of a leaf node is not specified. Generic iam code accesses leaf
107 + * nodes through ->id_leaf methods in struct iam_descr.
108 + *
109 + */
110 +
111 +#include <linux/module.h>
112 +#include <linux/fs.h>
113 +#include <linux/pagemap.h>
114 +#include <linux/jbd.h>
115 +#include <linux/time.h>
116 +#include <linux/ext3_fs.h>
117 +#include <linux/ext3_jbd.h>
118 +#include <linux/fcntl.h>
119 +#include <linux/stat.h>
120 +#include <linux/string.h>
121 +#include <linux/quotaops.h>
122 +#include <linux/buffer_head.h>
123 +#include <linux/smp_lock.h>
124 +#include <linux/lustre_iam.h>
125 +
126 +#include <libcfs/libcfs.h>
127 +#include <libcfs/kp30.h>
128 +
129 +#include "xattr.h"
130 +#include "iopen.h"
131 +#include "acl.h"
132 +
133 +/*
134 + * List of all registered formats.
135 + *
136 + * No locking. Callers synchronize.
137 + */
138 +static LIST_HEAD(iam_formats);
139 +
140 +void iam_format_register(struct iam_format *fmt)
141 +{
142 +        list_add(&fmt->if_linkage, &iam_formats);
143 +}
144 +EXPORT_SYMBOL(iam_format_register);
145 +
146 +/*
147 + * Determine format of given container. This is done by scanning list of
148 + * registered formats and calling ->if_guess() method of each in turn.
149 + */
150 +static int iam_format_guess(struct iam_container *c)
151 +{
152 +        int result;
153 +        struct iam_format *fmt;
154 +
155 +        /*
156 +         * XXX temporary initialization hook.
157 +         */
158 +        {
159 +                static int initialized = 0;
160 +
161 +                if (!initialized) {
162 +                        /*
163 +                         * Keep that order: htree should be registered first,
164 +                         * so that iam_htree_guess() runs last.
165 +                         */
166 +                        iam_htree_format_init();
167 +                        iam_lvar_format_init();
168 +                        iam_lfix_format_init();
169 +                        initialized = 1;
170 +                }
171 +        }
172 +
173 +        result = -ENOENT;
174 +        list_for_each_entry(fmt, &iam_formats, if_linkage) {
175 +                result = fmt->if_guess(c);
176 +                if (result == 0)
177 +                        break;
178 +        }
179 +        return result;
180 +}
181 +
182 +/*
183 + * Initialize container @c.
184 + */
185 +int iam_container_init(struct iam_container *c,
186 +                      struct iam_descr *descr, struct inode *inode)
187 +{
188 +       memset(c, 0, sizeof *c);
189 +       c->ic_descr  = descr;
190 +       c->ic_object = inode;
191 +        init_rwsem(&c->ic_sem);
192 +        return 0;
193 +}
194 +EXPORT_SYMBOL(iam_container_init);
195 +
196 +/*
197 + * Determine container format.
198 + */
199 +int iam_container_setup(struct iam_container *c)
200 +{
201 +        return iam_format_guess(c);
202 +}
203 +EXPORT_SYMBOL(iam_container_setup);
204 +
205 +/*
206 + * Finalize container @c, release all resources.
207 + */
208 +void iam_container_fini(struct iam_container *c)
209 +{
210 +}
211 +EXPORT_SYMBOL(iam_container_fini);
212 +
213 +void iam_path_init(struct iam_path *path, struct iam_container *c,
214 +                   struct iam_path_descr *pd)
215 +{
216 +       memset(path, 0, sizeof *path);
217 +       path->ip_container = c;
218 +       path->ip_frame = path->ip_frames;
219 +       path->ip_data = pd;
220 +        path->ip_leaf.il_path = path;
221 +}
222 +
223 +static void iam_leaf_fini(struct iam_leaf *leaf);
224 +
225 +void iam_path_release(struct iam_path *path)
226 +{
227 +       int i;
228 +
229 +       for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
230 +               if (path->ip_frames[i].bh != NULL) {
231 +                       brelse(path->ip_frames[i].bh);
232 +                       path->ip_frames[i].bh = NULL;
233 +               }
234 +       }
235 +}
236 +
237 +void iam_path_fini(struct iam_path *path)
238 +{
239 +       iam_leaf_fini(&path->ip_leaf);
240 +        iam_path_release(path);
241 +}
242 +
243 +void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode)
244 +{
245 +       int i;
246 +
247 +        path->ipc_hinfo = &path->ipc_hinfo_area;
248 +       for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i)
249 +               path->ipc_descr.ipd_key_scratch[i] =
250 +                       (struct iam_ikey *)&path->ipc_scratch[i];
251 +
252 +       iam_container_init(&path->ipc_container,
253 +                           &iam_htree_compat_param, inode);
254 +       iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr);
255 +}
256 +
257 +void iam_path_compat_fini(struct iam_path_compat *path)
258 +{
259 +       iam_path_fini(&path->ipc_path);
260 +       iam_container_fini(&path->ipc_container);
261 +}
262 +
263 +/*
264 + * Helper function allocating iam_path_descr and initializing its key scratch
265 + * area.
266 + */
267 +struct iam_path_descr *iam_ipd_alloc(int keysize)
268 +{
269 +        struct iam_path_descr *ipd;
270 +        void *karea;
271 +        int i;
272 +
273 +        ipd = kmalloc(ARRAY_SIZE(ipd->ipd_key_scratch) * keysize +
274 +                      sizeof *ipd, GFP_KERNEL);
275 +        if (ipd != NULL) {
276 +                karea = ipd + 1;
277 +                for (i = 0; i < ARRAY_SIZE(ipd->ipd_key_scratch);
278 +                     ++i, karea += keysize)
279 +                        ipd->ipd_key_scratch[i] = karea;
280 +        }
281 +        return ipd;
282 +}
283 +EXPORT_SYMBOL(iam_ipd_alloc);
284 +
285 +void iam_ipd_free(struct iam_path_descr *ipd)
286 +{
287 +        kfree(ipd);
288 +}
289 +EXPORT_SYMBOL(iam_ipd_free);
290 +
291 +int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
292 +                  handle_t *h, struct buffer_head **bh)
293 +{
294 +       int result = 0;
295 +
296 +       *bh = ext3_bread(h, c->ic_object, (int)ptr, 0, &result);
297 +       if (*bh == NULL)
298 +               result = -EIO;
299 +       return result;
300 +}
301 +
302 +/*
303 + * Return pointer to current leaf record. Pointer is valid while corresponding
304 + * leaf node is locked and pinned.
305 + */
306 +static struct iam_rec *iam_leaf_rec(const struct iam_leaf *leaf)
307 +{
308 +       return iam_leaf_ops(leaf)->rec(leaf);
309 +}
310 +
311 +/*
312 + * Return pointer to the current leaf key. This function returns pointer to
313 + * the key stored in node.
314 + *
315 + * Caller should assume that returned pointer is only valid while leaf node is
316 + * pinned and locked.
317 + */
318 +static struct iam_key *iam_leaf_key(const struct iam_leaf *leaf)
319 +{
320 +       return iam_leaf_ops(leaf)->key(leaf);
321 +}
322 +
323 +static int iam_leaf_key_size(const struct iam_leaf *leaf)
324 +{
325 +       return iam_leaf_ops(leaf)->key_size(leaf);
326 +}
327 +
328 +static struct iam_ikey *iam_leaf_ikey(const struct iam_leaf *leaf,
329 +                                      struct iam_ikey *key)
330 +{
331 +       return iam_leaf_ops(leaf)->ikey(leaf, key);
332 +}
333 +
334 +static int iam_leaf_keycmp(const struct iam_leaf *leaf,
335 +                           const struct iam_key *key)
336 +{
337 +       return iam_leaf_ops(leaf)->key_cmp(leaf, key);
338 +}
339 +
340 +static int iam_leaf_keyeq(const struct iam_leaf *leaf,
341 +                          const struct iam_key *key)
342 +{
343 +       return iam_leaf_ops(leaf)->key_eq(leaf, key);
344 +}
345 +
346 +#if EXT3_INVARIANT_ON
347 +static int iam_leaf_check(struct iam_leaf *leaf);
348 +extern int dx_node_check(struct iam_path *p, struct iam_frame *f);
349 +
350 +static int iam_path_check(struct iam_path *p)
351 +{
352 +        int i;
353 +        int result;
354 +        struct iam_frame *f;
355 +        struct iam_descr *param;
356 +
357 +        result = 1;
358 +        param = iam_path_descr(p);
359 +        for (i = 0; result && i < ARRAY_SIZE(p->ip_frames); ++i) {
360 +                f = &p->ip_frames[i];
361 +                if (f->bh != NULL) {
362 +                        result = dx_node_check(p, f);
363 +                        if (result)
364 +                                result = !param->id_ops->id_node_check(p, f);
365 +                }
366 +        }
367 +        if (result && p->ip_leaf.il_bh != NULL)
368 +                result = iam_leaf_check(&p->ip_leaf);
369 +        if (result == 0) {
370 +                BREAKPOINT();
371 +                ext3_std_error(iam_path_obj(p)->i_sb, result);
372 +        }
373 +        return result;
374 +}
375 +#endif
376 +
377 +static int iam_leaf_load(struct iam_path *path)
378 +{
379 +       iam_ptr_t block;
380 +       int err;
381 +       struct iam_container *c;
382 +       struct buffer_head   *bh;
383 +       struct iam_leaf      *leaf;
384 +       struct iam_descr     *descr;
385 +       
386 +       c     = path->ip_container;
387 +       leaf  = &path->ip_leaf;
388 +       descr = iam_path_descr(path);
389 +       block = path->ip_frame->leaf;
390 +        if (block == 0) {
391 +                /* XXX bug 11027 */
392 +                printk(KERN_EMERG "wrong leaf: %lu %d [%p %p %p]\n",
393 +                       (long unsigned)path->ip_frame->leaf,
394 +                       dx_get_count(dx_node_get_entries(path, path->ip_frame)),
395 +                       path->ip_frames[0].bh, path->ip_frames[1].bh,
396 +                       path->ip_frames[2].bh);
397 +        }
398 +       err   = descr->id_ops->id_node_read(c, block, NULL, &bh);
399 +       if (err == 0) {
400 +               leaf->il_bh = bh;
401 +                leaf->il_curidx = block;
402 +               err = iam_leaf_ops(leaf)->init(leaf);
403 +                assert_inv(ergo(err == 0, iam_leaf_check(leaf)));
404 +       }
405 +       return err;
406 +}
407 +
408 +static void iam_leaf_unlock(struct iam_leaf *leaf)
409 +{
410 +        if (leaf->il_lock != NULL) {
411 +                dx_unlock_htree(iam_leaf_container(leaf)->ic_object,
412 +                                leaf->il_lock);
413 +               do_corr(schedule());
414 +                leaf->il_lock = NULL;
415 +        }
416 +}
417 +
418 +static void iam_leaf_fini(struct iam_leaf *leaf)
419 +{
420 +        if (leaf->il_path != NULL) {
421 +                iam_leaf_unlock(leaf);
422 +                assert_inv(ergo(leaf->il_bh != NULL, iam_leaf_check(leaf)));
423 +                iam_leaf_ops(leaf)->fini(leaf);
424 +                if (leaf->il_bh) {
425 +                        brelse(leaf->il_bh);
426 +                        leaf->il_bh = NULL;
427 +                        leaf->il_curidx = 0;
428 +                }
429 +        }
430 +}
431 +
432 +static void iam_leaf_start(struct iam_leaf *folio)
433 +{
434 +       iam_leaf_ops(folio)->start(folio);
435 +}
436 +
437 +void iam_leaf_next(struct iam_leaf *folio)
438 +{
439 +       iam_leaf_ops(folio)->next(folio);
440 +}
441 +
442 +static void iam_leaf_rec_add(struct iam_leaf *leaf, const struct iam_key *key,
443 +                             const struct iam_rec *rec)
444 +{
445 +        iam_leaf_ops(leaf)->rec_add(leaf, key, rec);
446 +}
447 +
448 +static void iam_rec_del(struct iam_leaf *leaf, int shift)
449 +{
450 +        iam_leaf_ops(leaf)->rec_del(leaf, shift);
451 +}
452 +
453 +int iam_leaf_at_end(const struct iam_leaf *leaf)
454 +{
455 +        return iam_leaf_ops(leaf)->at_end(leaf);
456 +}
457 +
458 +void iam_leaf_split(struct iam_leaf *l, struct buffer_head **bh, iam_ptr_t nr)
459 +{
460 +        iam_leaf_ops(l)->split(l, bh, nr);
461 +}
462 +
463 +int iam_leaf_can_add(const struct iam_leaf *l,
464 +                     const struct iam_key *k, const struct iam_rec *r)
465 +{
466 +        return iam_leaf_ops(l)->can_add(l, k, r);
467 +}
468 +
469 +#if EXT3_INVARIANT_ON
470 +static int iam_leaf_check(struct iam_leaf *leaf)
471 +{
472 +        return 1;
473 +#if 0
474 +        struct iam_lentry    *orig;
475 +        struct iam_path      *path;
476 +        struct iam_container *bag;
477 +        struct iam_ikey       *k0;
478 +        struct iam_ikey       *k1;
479 +        int result;
480 +        int first;
481 +
482 +        orig = leaf->il_at;
483 +        path = iam_leaf_path(leaf);
484 +        bag  = iam_leaf_container(leaf);
485 +
486 +        result = iam_leaf_ops(leaf)->init(leaf);
487 +        if (result != 0)
488 +                return result;
489 +
490 +        first = 1;
491 +        iam_leaf_start(leaf);
492 +        k0 = iam_path_ikey(path, 0);
493 +        k1 = iam_path_ikey(path, 1);
494 +        while (!iam_leaf_at_end(leaf)) {
495 +               iam_ikeycpy(bag, k0, k1);
496 +               iam_ikeycpy(bag, k1, iam_leaf_ikey(leaf, k1));
497 +                if (!first && iam_ikeycmp(bag, k0, k1) > 0) {
498 +                        BREAKPOINT();
499 +                        return 0;
500 +                }
501 +                first = 0;
502 +                iam_leaf_next(leaf);
503 +        }
504 +        leaf->il_at = orig;
505 +        return 1;
506 +#endif
507 +}
508 +#endif
509 +
510 +static int iam_txn_dirty(handle_t *handle,
511 +                         struct iam_path *path, struct buffer_head *bh)
512 +{
513 +        int result;
514 +
515 +        result = ext3_journal_dirty_metadata(handle, bh);
516 +        if (result != 0)
517 +                ext3_std_error(iam_path_obj(path)->i_sb, result);
518 +        return result;
519 +}
520 +
521 +static int iam_txn_add(handle_t *handle,
522 +                       struct iam_path *path, struct buffer_head *bh)
523 +{
524 +        int result;
525 +
526 +        result = ext3_journal_get_write_access(handle, bh);
527 +        if (result != 0)
528 +                ext3_std_error(iam_path_obj(path)->i_sb, result);
529 +        return result;
530 +}
531 +
532 +/***********************************************************************/
533 +/* iterator interface                                                  */
534 +/***********************************************************************/
535 +
536 +static enum iam_it_state it_state(const struct iam_iterator *it)
537 +{
538 +        return it->ii_state;
539 +}
540 +
541 +/*
542 + * Helper function returning scratch key.
543 + */
544 +static struct iam_container *iam_it_container(const struct iam_iterator *it)
545 +{
546 +       return it->ii_path.ip_container;
547 +}
548 +
549 +static inline int it_keycmp(const struct iam_iterator *it,
550 +                            const struct iam_key *k)
551 +{
552 +       return iam_leaf_keycmp(&it->ii_path.ip_leaf, k);
553 +}
554 +
555 +static inline int it_keyeq(const struct iam_iterator *it,
556 +                           const struct iam_key *k)
557 +{
558 +       return iam_leaf_keyeq(&it->ii_path.ip_leaf, k);
559 +}
560 +
561 +static int it_ikeycmp(const struct iam_iterator *it, const struct iam_ikey *ik)
562 +{
563 +        return iam_ikeycmp(it->ii_path.ip_container,
564 +                           iam_leaf_ikey(&it->ii_path.ip_leaf,
565 +                                         iam_path_ikey(&it->ii_path, 0)), ik);
566 +}
567 +
568 +static inline int it_at_rec(const struct iam_iterator *it)
569 +{
570 +        return !iam_leaf_at_end(&it->ii_path.ip_leaf);
571 +}
572 +
573 +static inline int it_before(const struct iam_iterator *it)
574 +{
575 +        return it_state(it) == IAM_IT_SKEWED && it_at_rec(it);
576 +}
577 +
578 +/*
579 + * Helper wrapper around iam_it_get(): returns 0 (success) only when record
580 + * with exactly the same key as asked is found.
581 + */
582 +static int iam_it_get_exact(struct iam_iterator *it, const struct iam_key *k)
583 +{
584 +        int result;
585 +
586 +        result = iam_it_get(it, k);
587 +        if (result > 0)
588 +                result = 0;
589 +        else if (result == 0)
590 +                /*
591 +                 * Return -ENOENT if cursor is located above record with a key
592 +                 * different from one specified, or in the empty leaf.
593 +                 *
594 +                 * XXX returning -ENOENT only works if iam_it_get() never
595 +                 * returns -ENOENT as a legitimate error.
596 +                 */
597 +                result = -ENOENT;
598 +        return result;
599 +}
600 +
601 +void iam_container_write_lock(struct iam_container *ic)
602 +{
603 +        down_write(&ic->ic_sem);
604 +}
605 +
606 +void iam_container_write_unlock(struct iam_container *ic)
607 +{
608 +        up_write(&ic->ic_sem);
609 +}
610 +
611 +void iam_container_read_lock(struct iam_container *ic)
612 +{
613 +        down_read(&ic->ic_sem);
614 +}
615 +
616 +void iam_container_read_unlock(struct iam_container *ic)
617 +{
618 +        up_read(&ic->ic_sem);
619 +}
620 +
621 +/*
622 + * Initialize iterator to IAM_IT_DETACHED state.
623 + *
624 + * postcondition: it_state(it) == IAM_IT_DETACHED
625 + */
626 +int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
627 +                struct iam_path_descr *pd)
628 +{
629 +       memset(it, 0, sizeof *it);
630 +       it->ii_flags  = flags;
631 +       it->ii_state  = IAM_IT_DETACHED;
632 +       iam_path_init(&it->ii_path, c, pd);
633 +       return 0;
634 +}
635 +EXPORT_SYMBOL(iam_it_init);
636 +
637 +/*
638 + * Finalize iterator and release all resources.
639 + *
640 + * precondition: it_state(it) == IAM_IT_DETACHED
641 + */
642 +void iam_it_fini(struct iam_iterator *it)
643 +{
644 +       assert_corr(it_state(it) == IAM_IT_DETACHED);
645 +       iam_path_fini(&it->ii_path);
646 +}
647 +EXPORT_SYMBOL(iam_it_fini);
648 +
649 +/*
650 + * Performs tree top-to-bottom traversal starting from root, and loads leaf
651 + * node.
652 + */
653 +static int iam_path_lookup(struct iam_path *path, int index)
654 +{
655 +       struct iam_container *c;
656 +       struct iam_descr *descr;
657 +       struct iam_leaf  *leaf;
658 +       int result;
659 +       
660 +       c = path->ip_container;
661 +       leaf = &path->ip_leaf;
662 +       descr = iam_path_descr(path);
663 +       result = dx_lookup_lock(path, &leaf->il_lock, DLT_WRITE);
664 +        assert_inv(iam_path_check(path));
665 +        do_corr(schedule());
666 +       if (result == 0) {
667 +               result = iam_leaf_load(path);
668 +                assert_inv(ergo(result == 0, iam_leaf_check(leaf)));
669 +               if (result == 0) {
670 +                        do_corr(schedule());
671 +                        if (index)
672 +                                result = iam_leaf_ops(leaf)->
673 +                                        ilookup(leaf, path->ip_ikey_target);
674 +                        else
675 +                                result = iam_leaf_ops(leaf)->
676 +                                        lookup(leaf, path->ip_key_target);
677 +                        do_corr(schedule());
678 +                }
679 +                if (result < 0)
680 +                        iam_leaf_unlock(leaf);
681 +       }
682 +       return result;
683 +}
684 +
685 +/*
686 + * Common part of iam_it_{i,}get().
687 + */
688 +static int __iam_it_get(struct iam_iterator *it, int index)
689 +{
690 +        int result;
691 +        assert_corr(it_state(it) == IAM_IT_DETACHED);
692 +
693 +        result = iam_path_lookup(&it->ii_path, index);
694 +        if (result >= 0) {
695 +                int collision;
696 +
697 +                collision = result & IAM_LOOKUP_LAST;
698 +                switch (result & ~IAM_LOOKUP_LAST) {
699 +                case IAM_LOOKUP_EXACT:
700 +                        result = +1;
701 +                        it->ii_state = IAM_IT_ATTACHED;
702 +                        break;
703 +                case IAM_LOOKUP_OK:
704 +                        result = 0;
705 +                        it->ii_state = IAM_IT_ATTACHED;
706 +                        break;
707 +                case IAM_LOOKUP_BEFORE:
708 +                case IAM_LOOKUP_EMPTY:
709 +                        result = 0;
710 +                        it->ii_state = IAM_IT_SKEWED;
711 +                        break;
712 +                default:
713 +                        assert(0);
714 +                }
715 +                result |= collision;
716 +        }
717 +        /*
718 +         * See iam_it_get_exact() for explanation.
719 +         */
720 +        assert_corr(result != -ENOENT);
721 +        return result;
722 +}
723 +
724 +/*
725 + * Correct hash, but not the same key was found, iterate through hash
726 + * collision chain, looking for correct record.
727 + */
728 +static int iam_it_collision(struct iam_iterator *it)
729 +{
730 +        int result;
731 +
732 +        assert(ergo(it_at_rec(it), !it_keyeq(it, it->ii_path.ip_key_target)));
733 +
734 +        while ((result = iam_it_next(it)) == 0) {
735 +               do_corr(schedule());
736 +                if (it_ikeycmp(it, it->ii_path.ip_ikey_target) != 0)
737 +                        return -ENOENT;
738 +                if (it_keyeq(it, it->ii_path.ip_key_target))
739 +                        return 0;
740 +        }
741 +        return result;
742 +}
743 +
744 +/*
745 + * Attach iterator. After successful completion, @it points to record with
746 + * least key not larger than @k.
747 + *
748 + * Return value: 0: positioned on existing record,
749 + *             +ve: exact position found,
750 + *             -ve: error.
751 + *
752 + * precondition:  it_state(it) == IAM_IT_DETACHED
753 + * postcondition: ergo(result == 0 && it_state(it) == IAM_IT_ATTACHED,
754 + *                     it_keycmp(it, k) <= 0)
755 + */
756 +int iam_it_get(struct iam_iterator *it, const struct iam_key *k)
757 +{
758 +        int result;
759 +        assert_corr(it_state(it) == IAM_IT_DETACHED);
760 +
761 +        it->ii_path.ip_ikey_target = NULL;
762 +        it->ii_path.ip_key_target  = k;
763 +
764 +        result = __iam_it_get(it, 0);
765 +
766 +        if (result == IAM_LOOKUP_LAST) {
767 +                result = iam_it_collision(it);
768 +                if (result != 0) {
769 +                        iam_it_put(it);
770 +                        iam_it_fini(it);
771 +                        result = __iam_it_get(it, 0);
772 +                } else
773 +                        result = +1;
774 +        }
775 +        if (result > 0)
776 +                result &= ~IAM_LOOKUP_LAST;
777 +
778 +        assert_corr(ergo(result > 0, it_keycmp(it, k) == 0));
779 +       assert_corr(ergo(result == 0 && it_state(it) == IAM_IT_ATTACHED,
780 +                         it_keycmp(it, k) <= 0));
781 +        return result;
782 +}
783 +EXPORT_SYMBOL(iam_it_get);
784 +
785 +/*
786 + * Attach iterator by index key.
787 + */
788 +static int iam_it_iget(struct iam_iterator *it, const struct iam_ikey *k)
789 +{
790 +        assert_corr(it_state(it) == IAM_IT_DETACHED);
791 +
792 +        it->ii_path.ip_ikey_target = k;
793 +        return __iam_it_get(it, 1) & ~IAM_LOOKUP_LAST;
794 +}
795 +
796 +/*
797 + * Attach iterator, and assure it points to the record (not skewed).
798 + *
799 + * Return value: 0: positioned on existing record,
800 + *             +ve: exact position found,
801 + *             -ve: error.
802 + *
803 + * precondition:  it_state(it) == IAM_IT_DETACHED &&
804 + *                !(it->ii_flags&IAM_IT_WRITE)
805 + * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
806 + */
807 +int iam_it_get_at(struct iam_iterator *it, const struct iam_key *k)
808 +{
809 +        int result;
810 +        assert_corr(it_state(it) == IAM_IT_DETACHED &&
811 +                    !(it->ii_flags&IAM_IT_WRITE));
812 +        result = iam_it_get(it, k);
813 +        if (result == 0) {
814 +                if (it_state(it) != IAM_IT_ATTACHED) {
815 +                        assert_corr(it_state(it) == IAM_IT_SKEWED);
816 +                        result = iam_it_next(it);
817 +                }
818 +        }
819 +        assert_corr(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
820 +        return result;
821 +}
822 +EXPORT_SYMBOL(iam_it_get_at);
823 +
824 +/*
825 + * Duplicates iterator.
826 + *
827 + * postcondition: it_state(dst) == it_state(src) &&
828 + *                iam_it_container(dst) == iam_it_container(src) &&
829 + *                dst->ii_flags = src->ii_flags &&
830 + *                ergo(it_state(src) == IAM_IT_ATTACHED,
831 + *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
832 + *                     iam_it_key_get(dst) == iam_it_key_get(src))
833 + */
834 +void iam_it_dup(struct iam_iterator *dst, const struct iam_iterator *src)
835 +{
836 +        dst->ii_flags     = src->ii_flags;
837 +        dst->ii_state     = src->ii_state;
838 +        /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */
839 +        /*
840 +         * XXX: duplicate lock.
841 +         */
842 +       assert_corr(it_state(dst) == it_state(src));
843 +       assert_corr(iam_it_container(dst) == iam_it_container(src));
844 +       assert_corr(dst->ii_flags = src->ii_flags);
845 +       assert_corr(ergo(it_state(src) == IAM_IT_ATTACHED,
846 +                   iam_it_rec_get(dst) == iam_it_rec_get(src) &&
847 +                   iam_it_key_get(dst) == iam_it_key_get(src)));
848 +
849 +}
850 +
851 +/*
852 + * Detach iterator. Does nothing it detached state.
853 + *
854 + * postcondition: it_state(it) == IAM_IT_DETACHED
855 + */
856 +void iam_it_put(struct iam_iterator *it)
857 +{
858 +        if (it->ii_state != IAM_IT_DETACHED) {
859 +                it->ii_state = IAM_IT_DETACHED;
860 +               iam_leaf_fini(&it->ii_path.ip_leaf);
861 +        }
862 +}
863 +EXPORT_SYMBOL(iam_it_put);
864 +
865 +static struct iam_ikey *iam_it_ikey_get(const struct iam_iterator *it,
866 +                                        struct iam_ikey *ikey);
867 +/*
868 + * Move iterator one record right.
869 + *
870 + * Return value: 0: success,
871 + *              +1: end of container reached
872 + *             -ve: error
873 + *
874 + * precondition:  (it_state(it) == IAM_IT_ATTACHED ||
875 + *                 it_state(it) == IAM_IT_SKEWED) && it->ii_flags&IAM_IT_MOVE
876 + * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) &&
877 + *                ergo(result >  0, it_state(it) == IAM_IT_DETACHED)
878 + */
879 +int iam_it_next(struct iam_iterator *it)
880 +{
881 +        int result;
882 +        struct iam_path      *path;
883 +        struct iam_leaf      *leaf;
884 +        struct inode         *obj;
885 +        do_corr(struct iam_ikey *ik_orig);
886 +
887 +        /* assert_corr(it->ii_flags&IAM_IT_MOVE); */
888 +        assert_corr(it_state(it) == IAM_IT_ATTACHED ||
889 +                    it_state(it) == IAM_IT_SKEWED);
890 +
891 +        path = &it->ii_path;
892 +        leaf = &path->ip_leaf;
893 +        obj  = iam_path_obj(path);
894 +
895 +        assert_corr(iam_leaf_is_locked(leaf));
896 +
897 +        result = 0;
898 +        do_corr(ik_orig = iam_it_ikey_get(it, iam_path_ikey(path, 2)));
899 +        if (it_before(it)) {
900 +                assert_corr(!iam_leaf_at_end(leaf));
901 +                it->ii_state = IAM_IT_ATTACHED;
902 +        } else {
903 +                if (!iam_leaf_at_end(leaf))
904 +                        /* advance within leaf node */
905 +                        iam_leaf_next(leaf);
906 +                /*
907 +                 * multiple iterations may be necessary due to empty leaves.
908 +                 */
909 +                while (result == 0 && iam_leaf_at_end(leaf)) {
910 +                        do_corr(schedule());
911 +                        /* advance index portion of the path */
912 +                        result = iam_index_next(iam_it_container(it), path);
913 +                        assert_corr(iam_leaf_is_locked(leaf));
914 +                        if (result == 1) {
915 +                                struct dynlock_handle *lh;
916 +                                lh = dx_lock_htree(obj, path->ip_frame->leaf,
917 +                                                   DLT_WRITE);
918 +                                if (lh != NULL) {
919 +                                        iam_leaf_fini(leaf);
920 +                                        leaf->il_lock = lh;
921 +                                        result = iam_leaf_load(path);
922 +                                        if (result == 0)
923 +                                                iam_leaf_start(leaf);
924 +                                } else
925 +                                        result = -ENOMEM;
926 +                        } else if (result == 0)
927 +                                /* end of container reached */
928 +                                result = +1;
929 +                        if (result != 0)
930 +                                iam_it_put(it);
931 +                }
932 +                if (result == 0)
933 +                        it->ii_state = IAM_IT_ATTACHED;
934 +        }
935 +        assert_corr(ergo(result == 0, it_state(it) == IAM_IT_ATTACHED));
936 +        assert_corr(ergo(result >  0, it_state(it) == IAM_IT_DETACHED));
937 +        assert_corr(ergo(result == 0, it_ikeycmp(it, ik_orig) >= 0));
938 +        return result;
939 +}
940 +EXPORT_SYMBOL(iam_it_next);
941 +
942 +/*
943 + * Return pointer to the record under iterator.
944 + *
945 + * precondition:  it_state(it) == IAM_IT_ATTACHED && it_at_rec(it)
946 + * postcondition: it_state(it) == IAM_IT_ATTACHED
947 + */
948 +struct iam_rec *iam_it_rec_get(const struct iam_iterator *it)
949 +{
950 +        assert_corr(it_state(it) == IAM_IT_ATTACHED);
951 +        assert_corr(it_at_rec(it));
952 +        return iam_leaf_rec(&it->ii_path.ip_leaf);
953 +}
954 +EXPORT_SYMBOL(iam_it_rec_get);
955 +
956 +static void iam_it_reccpy(struct iam_iterator *it, const struct iam_rec *r)
957 +{
958 +        struct iam_leaf *folio;
959 +
960 +        folio = &it->ii_path.ip_leaf;
961 +        iam_leaf_ops(folio)->rec_set(folio, r);
962 +}
963 +
964 +/*
965 + * Replace contents of record under iterator.
966 + *
967 + * precondition:  it_state(it) == IAM_IT_ATTACHED &&
968 + *                it->ii_flags&IAM_IT_WRITE
969 + * postcondition: it_state(it) == IAM_IT_ATTACHED &&
970 + *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
971 + */
972 +int iam_it_rec_set(handle_t *h,
973 +                   struct iam_iterator *it, const struct iam_rec *r)
974 +{
975 +        int result;
976 +        struct iam_path *path;
977 +        struct buffer_head *bh;
978 +
979 +        assert_corr(it_state(it) == IAM_IT_ATTACHED &&
980 +                    it->ii_flags&IAM_IT_WRITE);
981 +        assert_corr(it_at_rec(it));
982 +
983 +        path = &it->ii_path;
984 +        bh   = path->ip_leaf.il_bh;
985 +        result = iam_txn_add(h, path, bh);
986 +        if (result == 0) {
987 +                iam_it_reccpy(it, r);
988 +                result = iam_txn_dirty(h, path, bh);
989 +        }
990 +        return result;
991 +}
992 +EXPORT_SYMBOL(iam_it_rec_set);
993 +
994 +/*
995 + * Return pointer to the index key under iterator.
996 + *
997 + * precondition:  it_state(it) == IAM_IT_ATTACHED ||
998 + *                it_state(it) == IAM_IT_SKEWED
999 + */
1000 +static struct iam_ikey *iam_it_ikey_get(const struct iam_iterator *it,
1001 +                                        struct iam_ikey *ikey)
1002 +{
1003 +        assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1004 +                    it_state(it) == IAM_IT_SKEWED);
1005 +        assert_corr(it_at_rec(it));
1006 +        return iam_leaf_ikey(&it->ii_path.ip_leaf, ikey);
1007 +}
1008 +
1009 +/*
1010 + * Return pointer to the key under iterator.
1011 + *
1012 + * precondition:  it_state(it) == IAM_IT_ATTACHED ||
1013 + *                it_state(it) == IAM_IT_SKEWED
1014 + */
1015 +struct iam_key *iam_it_key_get(const struct iam_iterator *it)
1016 +{
1017 +        assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1018 +                    it_state(it) == IAM_IT_SKEWED);
1019 +        assert_corr(it_at_rec(it));
1020 +        return iam_leaf_key(&it->ii_path.ip_leaf);
1021 +}
1022 +EXPORT_SYMBOL(iam_it_key_get);
1023 +
1024 +/*
1025 + * Return size of key under iterator (in bytes)
1026 + *
1027 + * precondition:  it_state(it) == IAM_IT_ATTACHED ||
1028 + *                it_state(it) == IAM_IT_SKEWED
1029 + */
1030 +int iam_it_key_size(const struct iam_iterator *it)
1031 +{
1032 +        assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1033 +                    it_state(it) == IAM_IT_SKEWED);
1034 +        assert_corr(it_at_rec(it));
1035 +        return iam_leaf_key_size(&it->ii_path.ip_leaf);
1036 +}
1037 +EXPORT_SYMBOL(iam_it_key_size);
1038 +
1039 +/*
1040 + * Insertion of new record. Interaction with jbd during non-trivial case (when
1041 + * split happens) is as following:
1042 + *
1043 + *  - new leaf node is involved into transaction by ext3_append();
1044 + *
1045 + *  - old leaf node is involved into transaction by iam_add_rec();
1046 + *
1047 + *  - leaf where insertion point ends in, is marked dirty by iam_add_rec();
1048 + *
1049 + *  - leaf without insertion point is marked dirty (as @new_leaf) by
1050 + *  iam_new_leaf();
1051 + *
1052 + *  - split index nodes are involved into transaction and marked dirty by
1053 + *  split_index_node().
1054 + *
1055 + *  - "safe" index node, which is no split, but where new pointer is inserted
1056 + *  is involved into transaction and marked dirty by split_index_node().
1057 + *
1058 + *  - index node where pointer to new leaf is inserted is involved into
1059 + *  transaction by split_index_node() and marked dirty by iam_add_rec().
1060 + *
1061 + *  - inode is marked dirty by iam_add_rec().
1062 + *
1063 + */
1064 +
1065 +static int iam_new_leaf(handle_t *handle, struct iam_leaf *leaf)
1066 +{
1067 +        int err;
1068 +        iam_ptr_t blknr;
1069 +        struct buffer_head   *new_leaf;
1070 +        struct buffer_head   *old_leaf;
1071 +        struct iam_container *c;
1072 +        struct inode         *obj;
1073 +        struct iam_path      *path;
1074 +
1075 +        assert_inv(iam_leaf_check(leaf));
1076 +
1077 +        c = iam_leaf_container(leaf);
1078 +        path = leaf->il_path;
1079 +
1080 +        obj = c->ic_object;
1081 +        new_leaf = ext3_append(handle, obj, (__u32 *)&blknr, &err);
1082 +        do_corr(schedule());
1083 +        if (new_leaf != NULL) {
1084 +                struct dynlock_handle *lh;
1085 +
1086 +                lh = dx_lock_htree(obj, blknr, DLT_WRITE);
1087 +                do_corr(schedule());
1088 +                if (lh != NULL) {
1089 +                        iam_leaf_ops(leaf)->init_new(c, new_leaf);
1090 +                        do_corr(schedule());
1091 +                        old_leaf = leaf->il_bh;
1092 +                        iam_leaf_split(leaf, &new_leaf, blknr);
1093 +                        if (old_leaf != leaf->il_bh) {
1094 +                                /*
1095 +                                 * Switched to the new leaf.
1096 +                                 */
1097 +                                iam_leaf_unlock(leaf);
1098 +                                leaf->il_lock = lh;
1099 +                                path->ip_frame->leaf = blknr;
1100 +                        } else
1101 +                                dx_unlock_htree(obj, lh);
1102 +                        do_corr(schedule());
1103 +                        err = iam_txn_dirty(handle, path, new_leaf);
1104 +                        brelse(new_leaf);
1105 +                        if (err == 0)
1106 +                                err = ext3_mark_inode_dirty(handle, obj);
1107 +                        do_corr(schedule());
1108 +                } else
1109 +                        err = -ENOMEM;
1110 +        }
1111 +        assert_inv(iam_leaf_check(leaf));
1112 +        assert_inv(iam_leaf_check(&iam_leaf_path(leaf)->ip_leaf));
1113 +        assert_inv(iam_path_check(iam_leaf_path(leaf)));
1114 +        return err;
1115 +}
1116 +
1117 +static int iam_add_rec(handle_t *handle, struct iam_iterator *it,
1118 +                       struct iam_path *path,
1119 +                       const struct iam_key *k, const struct iam_rec *r)
1120 +{
1121 +       int err;
1122 +        struct iam_leaf *leaf;
1123 +
1124 +        leaf = &path->ip_leaf;
1125 +        assert_inv(iam_leaf_check(leaf));
1126 +        assert_inv(iam_path_check(path));
1127 +        err = iam_txn_add(handle, path, leaf->il_bh);
1128 +        if (err == 0) {
1129 +               do_corr(schedule());
1130 +                if (!iam_leaf_can_add(leaf, k, r)) {
1131 +                        struct dynlock_handle *lh = NULL;
1132 +
1133 +                        do {
1134 +                                assert_corr(lh == NULL);
1135 +                                do_corr(schedule());
1136 +                                err = split_index_node(handle, path, &lh);
1137 +                                if (err == -EAGAIN) {
1138 +                                        assert_corr(lh == NULL);
1139 +
1140 +                                        iam_path_fini(path);
1141 +                                        it->ii_state = IAM_IT_DETACHED;
1142 +
1143 +                                        do_corr(schedule());
1144 +                                        err = iam_it_get_exact(it, k);
1145 +                                        if (err == -ENOENT)
1146 +                                                err = +1; /* repeat split */
1147 +                                        else if (err == 0)
1148 +                                                err = -EEXIST;
1149 +                                }
1150 +                        } while (err > 0);
1151 +                        assert_inv(iam_path_check(path));
1152 +                        if (err == 0) {
1153 +                                assert_corr(lh != NULL);
1154 +                                do_corr(schedule());
1155 +                                err = iam_new_leaf(handle, leaf);
1156 +                                if (err == 0)
1157 +                                        err = iam_txn_dirty(handle, path,
1158 +                                                            path->ip_frame->bh);
1159 +                        }
1160 +                        dx_unlock_htree(iam_path_obj(path), lh);
1161 +                        do_corr(schedule());
1162 +                }
1163 +                if (err == 0) {
1164 +                        iam_leaf_rec_add(leaf, k, r);
1165 +                        err = iam_txn_dirty(handle, path, leaf->il_bh);
1166 +                }
1167 +        }
1168 +        assert_inv(iam_leaf_check(leaf));
1169 +        assert_inv(iam_leaf_check(&path->ip_leaf));
1170 +        assert_inv(iam_path_check(path));
1171 +       return err;
1172 +}
1173 +
1174 +/*
1175 + * Insert new record with key @k and contents from @r, shifting records to the
1176 + * right. On success, iterator is positioned on the newly inserted record.
1177 + *
1178 + * precondition: it->ii_flags&IAM_IT_WRITE &&
1179 + *               (it_state(it) == IAM_IT_ATTACHED ||
1180 + *                it_state(it) == IAM_IT_SKEWED) &&
1181 + *               ergo(it_state(it) == IAM_IT_ATTACHED,
1182 + *                    it_keycmp(it, k) <= 0) &&
1183 + *               ergo(it_before(it), it_keycmp(it, k) > 0));
1184 + * postcondition: ergo(result == 0,
1185 + *                     it_state(it) == IAM_IT_ATTACHED &&
1186 + *                     it_keycmp(it, k) == 0 &&
1187 + *                     !memcmp(iam_it_rec_get(it), r, ...))
1188 + */
1189 +int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
1190 +                      const struct iam_key *k, const struct iam_rec *r)
1191 +{
1192 +        int result;
1193 +        struct iam_path *path;
1194 +
1195 +        path = &it->ii_path;
1196 +
1197 +        assert_corr(it->ii_flags&IAM_IT_WRITE);
1198 +        assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1199 +                    it_state(it) == IAM_IT_SKEWED);
1200 +        assert_corr(ergo(it_state(it) == IAM_IT_ATTACHED,
1201 +                         it_keycmp(it, k) <= 0));
1202 +        assert_corr(ergo(it_before(it), it_keycmp(it, k) > 0));
1203 +       result = iam_add_rec(h, it, path, k, r);
1204 +        if (result == 0)
1205 +                it->ii_state = IAM_IT_ATTACHED;
1206 +        assert_corr(ergo(result == 0,
1207 +                         it_state(it) == IAM_IT_ATTACHED &&
1208 +                         it_keycmp(it, k) == 0 &&
1209 +                         !memcmp(iam_it_rec_get(it), r,
1210 +                                 iam_it_container(it)->ic_descr->id_rec_size)));
1211 +        return result;
1212 +}
1213 +EXPORT_SYMBOL(iam_it_rec_insert);
1214 +
1215 +/*
1216 + * Delete record under iterator.
1217 + *
1218 + * precondition:  it_state(it) == IAM_IT_ATTACHED &&
1219 + *                it->ii_flags&IAM_IT_WRITE &&
1220 + *                it_at_rec(it)
1221 + * postcondition: it_state(it) == IAM_IT_ATTACHED ||
1222 + *                it_state(it) == IAM_IT_DETACHED
1223 + */
1224 +int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
1225 +{
1226 +        int result;
1227 +        struct iam_leaf *leaf;
1228 +        struct iam_path *path;
1229 +
1230 +        assert_corr(it_state(it) == IAM_IT_ATTACHED &&
1231 +                    it->ii_flags&IAM_IT_WRITE);
1232 +        assert_corr(it_at_rec(it));
1233 +
1234 +        path = &it->ii_path;
1235 +        leaf = &path->ip_leaf;
1236 +
1237 +        assert_inv(iam_leaf_check(leaf));
1238 +        assert_inv(iam_path_check(path));
1239 +
1240 +        result = iam_txn_add(h, path, leaf->il_bh);
1241 +        /*
1242 +         * no compaction for now.
1243 +         */
1244 +        if (result == 0) {
1245 +                iam_rec_del(leaf, it->ii_flags&IAM_IT_MOVE);
1246 +                result = iam_txn_dirty(h, path, leaf->il_bh);
1247 +                if (result == 0 && iam_leaf_at_end(leaf) &&
1248 +                    it->ii_flags&IAM_IT_MOVE) {
1249 +                        result = iam_it_next(it);
1250 +                        if (result > 0)
1251 +                                result = 0;
1252 +                }
1253 +        }
1254 +        assert_inv(iam_leaf_check(leaf));
1255 +        assert_inv(iam_path_check(path));
1256 +        assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1257 +                    it_state(it) == IAM_IT_DETACHED);
1258 +       return result;
1259 +}
1260 +EXPORT_SYMBOL(iam_it_rec_delete);
1261 +
1262 +/*
1263 + * Convert iterator to cookie.
1264 + *
1265 + * precondition:  it_state(it) == IAM_IT_ATTACHED &&
1266 + *                iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
1267 + * postcondition: it_state(it) == IAM_IT_ATTACHED
1268 + */
1269 +iam_pos_t iam_it_store(const struct iam_iterator *it)
1270 +{
1271 +        iam_pos_t result;
1272 +
1273 +        assert_corr(it_state(it) == IAM_IT_ATTACHED);
1274 +        assert_corr(it_at_rec(it));
1275 +        assert_corr(iam_it_container(it)->ic_descr->id_ikey_size <=
1276 +                    sizeof result);
1277 +
1278 +        result = 0;
1279 +        return *(iam_pos_t *)iam_it_ikey_get(it, (void *)&result);
1280 +}
1281 +EXPORT_SYMBOL(iam_it_store);
1282 +
1283 +/*
1284 + * Restore iterator from cookie.
1285 + *
1286 + * precondition:  it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
1287 + *                iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
1288 + * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
1289 + *                                  iam_it_store(it) == pos)
1290 + */
1291 +int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
1292 +{
1293 +        assert_corr(it_state(it) == IAM_IT_DETACHED &&
1294 +                    it->ii_flags&IAM_IT_MOVE);
1295 +        assert_corr(iam_it_container(it)->ic_descr->id_ikey_size <= sizeof pos);
1296 +        return iam_it_iget(it, (struct iam_ikey *)&pos);
1297 +}
1298 +EXPORT_SYMBOL(iam_it_load);
1299 +
1300 +/***********************************************************************/
1301 +/* invariants                                                          */
1302 +/***********************************************************************/
1303 +
1304 +static inline int ptr_inside(void *base, size_t size, void *ptr)
1305 +{
1306 +        return (base <= ptr) && (ptr < base + size);
1307 +}
1308 +
1309 +int iam_frame_invariant(struct iam_frame *f)
1310 +{
1311 +        return
1312 +                (f->bh != NULL &&
1313 +                f->bh->b_data != NULL &&
1314 +                ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
1315 +                ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
1316 +                f->entries <= f->at);
1317 +}
1318 +int iam_leaf_invariant(struct iam_leaf *l)
1319 +{
1320 +        return
1321 +                l->il_bh != NULL &&
1322 +                l->il_bh->b_data != NULL &&
1323 +                ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) &&
1324 +                ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) &&
1325 +                l->il_entries <= l->il_at;
1326 +}
1327 +
1328 +int iam_path_invariant(struct iam_path *p)
1329 +{
1330 +        int i;
1331 +
1332 +        if (p->ip_container == NULL ||
1333 +            p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
1334 +            p->ip_frame != p->ip_frames + p->ip_indirect ||
1335 +            !iam_leaf_invariant(&p->ip_leaf))
1336 +                return 0;
1337 +        for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
1338 +                if (i <= p->ip_indirect) {
1339 +                        if (!iam_frame_invariant(&p->ip_frames[i]))
1340 +                                return 0;
1341 +                }
1342 +        }
1343 +        return 1;
1344 +}
1345 +
1346 +int iam_it_invariant(struct iam_iterator *it)
1347 +{
1348 +        return
1349 +                (it->ii_state == IAM_IT_DETACHED ||
1350 +                 it->ii_state == IAM_IT_ATTACHED ||
1351 +                 it->ii_state == IAM_IT_SKEWED) &&
1352 +                !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
1353 +                ergo(it->ii_state == IAM_IT_ATTACHED ||
1354 +                     it->ii_state == IAM_IT_SKEWED,
1355 +                     iam_path_invariant(&it->ii_path) &&
1356 +                     equi(it_at_rec(it), it->ii_state == IAM_IT_SKEWED));
1357 +}
1358 +
1359 +/*
1360 + * Search container @c for record with key @k. If record is found, its data
1361 + * are moved into @r.
1362 + *
1363 + * Return values: 0: found, -ENOENT: not-found, -ve: error
1364 + */
1365 +int iam_lookup(struct iam_container *c, const struct iam_key *k,
1366 +               struct iam_rec *r, struct iam_path_descr *pd)
1367 +{
1368 +        struct iam_iterator it;
1369 +        int result;
1370 +
1371 +        iam_it_init(&it, c, 0, pd);
1372 +
1373 +        result = iam_it_get_exact(&it, k);
1374 +        if (result == 0)
1375 +                /*
1376 +                 * record with required key found, copy it into user buffer
1377 +                 */
1378 +                iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it));
1379 +        iam_it_put(&it);
1380 +        iam_it_fini(&it);
1381 +        return result;
1382 +}
1383 +EXPORT_SYMBOL(iam_lookup);
1384 +
1385 +/*
1386 + * Insert new record @r with key @k into container @c (within context of
1387 + * transaction @h).
1388 + *
1389 + * Return values: 0: success, -ve: error, including -EEXIST when record with
1390 + * given key is already present.
1391 + *
1392 + * postcondition: ergo(result == 0 || result == -EEXIST,
1393 + *                                  iam_lookup(c, k, r2) > 0 &&
1394 + *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
1395 + */
1396 +int iam_insert(handle_t *h, struct iam_container *c, const struct iam_key *k,
1397 +               const struct iam_rec *r, struct iam_path_descr *pd)
1398 +{
1399 +        struct iam_iterator it;
1400 +        int result;
1401 +
1402 +        iam_it_init(&it, c, IAM_IT_WRITE, pd);
1403 +
1404 +        result = iam_it_get_exact(&it, k);
1405 +        if (result == -ENOENT)
1406 +                result = iam_it_rec_insert(h, &it, k, r);
1407 +        else if (result == 0)
1408 +                result = -EEXIST;
1409 +        iam_it_put(&it);
1410 +        iam_it_fini(&it);
1411 +        return result;
1412 +}
1413 +EXPORT_SYMBOL(iam_insert);
1414 +
1415 +/*
1416 + * Update record with the key @k in container @c (within context of
1417 + * transaction @h), new record is given by @r.
1418 + *
1419 + * Return values: 0: success, -ve: error, including -ENOENT if no record with
1420 + * the given key found.
1421 + */
1422 +int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
1423 +               const struct iam_rec *r, struct iam_path_descr *pd)
1424 +{
1425 +        struct iam_iterator it;
1426 +        int result;
1427 +
1428 +        iam_it_init(&it, c, IAM_IT_WRITE, pd);
1429 +
1430 +        result = iam_it_get_exact(&it, k);
1431 +        if (result == 0)
1432 +                iam_it_rec_set(h, &it, r);
1433 +        iam_it_put(&it);
1434 +        iam_it_fini(&it);
1435 +        return result;
1436 +}
1437 +EXPORT_SYMBOL(iam_update);
1438 +
1439 +/*
1440 + * Delete existing record with key @k.
1441 + *
1442 + * Return values: 0: success, -ENOENT: not-found, -ve: other error.
1443 + *
1444 + * postcondition: ergo(result == 0 || result == -ENOENT,
1445 + *                                 !iam_lookup(c, k, *));
1446 + */
1447 +int iam_delete(handle_t *h, struct iam_container *c, const struct iam_key *k,
1448 +              struct iam_path_descr *pd)
1449 +{
1450 +        struct iam_iterator it;
1451 +        int result;
1452 +
1453 +        iam_it_init(&it, c, IAM_IT_WRITE, pd);
1454 +
1455 +        result = iam_it_get_exact(&it, k);
1456 +        if (result == 0)
1457 +                iam_it_rec_delete(h, &it);
1458 +        iam_it_put(&it);
1459 +        iam_it_fini(&it);
1460 +        return result;
1461 +}
1462 +EXPORT_SYMBOL(iam_delete);
1463 +
1464 Index: iam/fs/ext3/iam_htree.c
1465 ===================================================================
1466 --- iam.orig/fs/ext3/iam_htree.c
1467 +++ iam/fs/ext3/iam_htree.c
1468 @@ -0,0 +1,684 @@
1469 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
1470 + * vim:expandtab:shiftwidth=8:tabstop=8:
1471 + *
1472 + *  iam_htree.c
1473 + *  implementation of iam format for ext3/htree.
1474 + *
1475 + *  Copyright (c) 2006 Cluster File Systems, Inc.
1476 + *   Author: Nikita Danilov <nikita@clusterfs.com>
1477 + *
1478 + *   This file is part of the Lustre file system, http://www.lustre.org
1479 + *   Lustre is a trademark of Cluster File Systems, Inc.
1480 + *
1481 + *   You may have signed or agreed to another license before downloading
1482 + *   this software.  If so, you are bound by the terms and conditions
1483 + *   of that agreement, and the following does not apply to you.  See the
1484 + *   LICENSE file included with this distribution for more information.
1485 + *
1486 + *   If you did not agree to a different license, then this copy of Lustre
1487 + *   is open source software; you can redistribute it and/or modify it
1488 + *   under the terms of version 2 of the GNU General Public License as
1489 + *   published by the Free Software Foundation.
1490 + *
1491 + *   In either case, Lustre is distributed in the hope that it will be
1492 + *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
1493 + *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
1494 + *   license text for more details.
1495 + */
1496 +
1497 +#include <linux/types.h>
1498 +#include <linux/jbd.h>
1499 +/* ext3_error(), EXT3_DIR_ROUND() */
1500 +#include <linux/ext3_fs.h>
1501 +
1502 +#include <linux/lustre_iam.h>
1503 +
1504 +#include <libcfs/libcfs.h>
1505 +#include <libcfs/kp30.h>
1506 +
1507 +static inline struct ext3_dir_entry_2 *dent(struct iam_lentry *ent)
1508 +{
1509 +        return (struct ext3_dir_entry_2 *)ent;
1510 +}
1511 +
1512 +static inline struct iam_path_compat *getipc(const struct iam_leaf *folio)
1513 +{
1514 +        struct iam_path *path;
1515 +
1516 +        path = iam_leaf_path(folio);
1517 +        assert_corr(dx_index_is_compat(path));
1518 +        assert_corr(path->ip_data != NULL);
1519 +        return container_of(path->ip_data, struct iam_path_compat, ipc_descr);
1520 +}
1521 +
1522 +static inline struct ext3_dir_entry_2 *getent(const struct iam_leaf *folio)
1523 +{
1524 +        return dent(folio->il_at);
1525 +}
1526 +
1527 +static __u32 hashname(const struct iam_leaf *folio,
1528 +                      const char *name, int namelen)
1529 +{
1530 +        int result;
1531 +        struct dx_hash_info *hinfo;
1532 +
1533 +        hinfo = getipc(folio)->ipc_hinfo;
1534 +        assert_corr(hinfo != NULL);
1535 +        result = ext3fs_dirhash(name, namelen, hinfo);
1536 +        assert_corr(result == 0);
1537 +        return hinfo->hash;
1538 +}
1539 +
1540 +static __u32 gethash(const struct iam_leaf *folio,
1541 +                     const struct ext3_dir_entry_2 *ent)
1542 +{
1543 +        return hashname(folio, ent->name, ent->name_len);
1544 +}
1545 +
1546 +static inline size_t recsize(size_t namelen)
1547 +{
1548 +        return EXT3_DIR_REC_LEN(namelen);
1549 +}
1550 +
1551 +static struct ext3_dir_entry_2 *getlast(const struct iam_leaf *folio, int namelen)
1552 +{
1553 +        return
1554 +                (void *)folio->il_bh->b_data +
1555 +                iam_leaf_container(folio)->ic_object->i_sb->s_blocksize -
1556 +                recsize(namelen);
1557 +}
1558 +
1559 +static struct ext3_dir_entry_2 *gettop(const struct iam_leaf *folio)
1560 +{
1561 +        return getlast(folio, 0);
1562 +}
1563 +
1564 +static inline int ent_is_live(const struct ext3_dir_entry_2 *ent)
1565 +{
1566 +        return ent->inode != 0;
1567 +}
1568 +
1569 +static struct ext3_dir_entry_2 *entnext(const struct ext3_dir_entry_2 *ent)
1570 +{
1571 +       return (void *)ent + le16_to_cpu(ent->rec_len);
1572 +}
1573 +
1574 +static struct ext3_dir_entry_2 *skipdead(struct ext3_dir_entry_2 *ent)
1575 +{
1576 +        if (!ent_is_live(ent))
1577 +                ent = entnext(ent);
1578 +        /*
1579 +         * There can be no more than one dead entry in a row.
1580 +         */
1581 +        return ent;
1582 +}
1583 +
1584 +static struct ext3_dir_entry_2 *getstart(const struct iam_leaf *folio)
1585 +{
1586 +        return (void *)folio->il_bh->b_data;
1587 +}
1588 +
1589 +static int getfreespace(const struct ext3_dir_entry_2 *ent)
1590 +{
1591 +        int free;
1592 +
1593 +        free = le16_to_cpu(ent->rec_len);
1594 +        if (ent_is_live(ent))
1595 +                free -= recsize(ent->name_len);
1596 +        assert_corr(free >= 0);
1597 +        return free;
1598 +}
1599 +
1600 +static int entcmp(const struct iam_leaf *folio,
1601 +                  const struct ext3_dir_entry_2 *e0, const struct ext3_dir_entry_2 *e1)
1602 +{
1603 +        __u32 hash0;
1604 +        __u32 hash1;
1605 +
1606 +        assert_corr(ent_is_live(e0));
1607 +        assert_corr(ent_is_live(e1));
1608 +
1609 +        hash0 = gethash(folio, e0);
1610 +        hash1 = gethash(folio, e1);
1611 +        if (hash0 < hash1)
1612 +                return -1;
1613 +        else if (hash0 > hash1)
1614 +                return +1;
1615 +        else if (e0 < e1)
1616 +                return -1;
1617 +        else if (e0 > e1)
1618 +                return +1;
1619 +        else
1620 +                return 0;
1621 +}
1622 +
1623 +#if EXT3_CORRECTNESS_ON || EXT3_INVARIANT_ON
1624 +static int iam_leaf_at_rec(const struct iam_leaf *folio)
1625 +{
1626 +        struct ext3_dir_entry_2 *ent;
1627 +
1628 +        ent = getent(folio);
1629 +        return getstart(folio) <= ent &&
1630 +                ent < gettop(folio) && ent_is_live(ent);
1631 +}
1632 +#endif
1633 +
1634 +/*
1635 + * Leaf operations.
1636 + */
1637 +
1638 +static struct iam_ikey *iam_htree_ikey(const struct iam_leaf *l,
1639 +                                       struct iam_ikey *key)
1640 +{
1641 +        __u32 *hash;
1642 +        assert_corr(iam_leaf_at_rec(l));
1643 +
1644 +        hash = (void *)key;
1645 +        *hash = gethash(l, getent(l));
1646 +        return key;
1647 +}
1648 +
1649 +static struct iam_key *iam_htree_key(const struct iam_leaf *l)
1650 +{
1651 +        assert_corr(iam_leaf_at_rec(l));
1652 +
1653 +        return (struct iam_key *)&getent(l)->name;
1654 +}
1655 +
1656 +static int iam_htree_key_size(const struct iam_leaf *l)
1657 +{
1658 +        assert_corr(iam_leaf_at_rec(l));
1659 +
1660 +        return getent(l)->name_len;
1661 +}
1662 +
1663 +static void iam_htree_start(struct iam_leaf *l)
1664 +{
1665 +        l->il_at = (void *)skipdead(getstart(l));
1666 +}
1667 +
1668 +static int iam_htree_init(struct iam_leaf *l)
1669 +{
1670 +        assert_corr(l->il_bh != NULL);
1671 +
1672 +        l->il_at = l->il_entries = (void *)getstart(l);
1673 +        return 0;
1674 +}
1675 +
1676 +static void iam_htree_fini(struct iam_leaf *l)
1677 +{
1678 +        l->il_entries = l->il_at = NULL;
1679 +}
1680 +
1681 +struct iam_rec *iam_htree_rec(const struct iam_leaf *l)
1682 +{
1683 +        assert_corr(iam_leaf_at_rec(l));
1684 +        return (void *)&getent(l)->inode;
1685 +}
1686 +
1687 +static void iam_htree_next(struct iam_leaf *l)
1688 +{
1689 +        struct ext3_dir_entry_2 *scan;
1690 +        struct ext3_dir_entry_2 *found;
1691 +
1692 +        assert_corr(iam_leaf_at_rec(l));
1693 +        found = NULL;
1694 +        for (scan = getstart(l); scan < gettop(l); scan = entnext(scan)) {
1695 +                if (scan != getent(l) && ent_is_live(scan) &&
1696 +                    entcmp(l, getent(l), scan) < 0 &&
1697 +                    (found == NULL || entcmp(l, scan, found) < 0))
1698 +                        found = scan;
1699 +        }
1700 +        assert_corr(ergo(found != NULL,
1701 +                         gethash(l, getent(l)) <= gethash(l, found)));
1702 +        l->il_at = (void *)(found ? : gettop(l));
1703 +}
1704 +
1705 +static int iam_htree_at_end(const struct iam_leaf *folio)
1706 +{
1707 +        return getent(folio) >= gettop(folio);
1708 +}
1709 +
1710 +
1711 +static inline int match(int len, const char *const name,
1712 +                        struct ext3_dir_entry_2 *de)
1713 +{
1714 +       if (len != de->name_len)
1715 +               return 0;
1716 +       if (!de->inode)
1717 +               return 0;
1718 +       return !memcmp(name, de->name, len);
1719 +}
1720 +
1721 +static int iam_htree_lookup(struct iam_leaf *l, const struct iam_key *k)
1722 +{
1723 +        struct iam_container *c;
1724 +        struct ext3_dir_entry_2 *scan;
1725 +        struct ext3_dir_entry_2 *found;
1726 +        __u32 hash;
1727 +        int result;
1728 +        int namelen;
1729 +        int last = 1;
1730 +        const char *name;
1731 +
1732 +        c = iam_leaf_container(l);
1733 +        name = (const char *)k;
1734 +        namelen = strlen(name);
1735 +        hash = hashname(l, name, namelen);
1736 +        found = NULL;
1737 +        result = IAM_LOOKUP_OK;
1738 +        for (scan = getstart(l); scan < getlast(l, namelen);
1739 +             scan = entnext(scan)) {
1740 +                if (match(namelen, name, scan)) {
1741 +                        found = scan;
1742 +                        result = IAM_LOOKUP_EXACT;
1743 +                        break;
1744 +                } else if (ent_is_live(scan)) {
1745 +                        if (gethash(l, scan) <= hash)
1746 +                                found = scan;
1747 +                        else
1748 +                                last = 0;
1749 +                }
1750 +        }
1751 +        if (found == NULL) {
1752 +                /*
1753 +                 * @k is less than all hashes in the leaf.
1754 +                 */
1755 +                iam_htree_start(l);
1756 +                result = IAM_LOOKUP_BEFORE;
1757 +        } else {
1758 +                l->il_at = (void *)found;
1759 +                assert_corr(iam_leaf_at_rec(l));
1760 +        }
1761 +        if (last)
1762 +                result |= IAM_LOOKUP_LAST;
1763 +        return result;
1764 +}
1765 +
1766 +static int iam_htree_ilookup(struct iam_leaf *l, const struct iam_ikey *ik)
1767 +{
1768 +        assert(0);
1769 +        return IAM_LOOKUP_OK;
1770 +}
1771 +
1772 +static void iam_htree_key_set(struct iam_leaf *l, const struct iam_key *k)
1773 +{
1774 +        assert_corr(iam_leaf_at_rec(l));
1775 +        assert(0);
1776 +}
1777 +
1778 +static int iam_htree_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
1779 +{
1780 +        const char *name;
1781 +        __u32 h0;
1782 +        __u32 h1;
1783 +
1784 +        name = (const char *)k;
1785 +
1786 +        assert_corr(ent_is_live(getent(l)));
1787 +
1788 +        h0 = gethash(l, getent(l));
1789 +        h1 = hashname(l, name, strlen(name));
1790 +
1791 +        return h0 < h1 ? -1 : (h0 == h1 ? 0 : +1);
1792 +}
1793 +
1794 +static int iam_htree_key_eq(const struct iam_leaf *l, const struct iam_key *k)
1795 +{
1796 +        const char *name;
1797 +
1798 +        name = (const char *)k;
1799 +        return match(strlen(name), name, getent(l));
1800 +}
1801 +
1802 +static void iam_htree_rec_set(struct iam_leaf *l, const struct iam_rec *r)
1803 +{
1804 +        __u32 *ino;
1805 +
1806 +        ino = (void *)r;
1807 +        getent(l)->inode = cpu_to_le32(*ino);
1808 +}
1809 +
1810 +static void iam_htree_rec_add(struct iam_leaf *leaf, const struct iam_key *k,
1811 +                              const struct iam_rec *r)
1812 +{
1813 +        struct ext3_dir_entry_2 *scan;
1814 +        struct inode        *dir;
1815 +        const  char         *name;
1816 +
1817 +        __u32 *ino;
1818 +        int    namelen;
1819 +
1820 +        assert_corr(iam_leaf_can_add(leaf, k, r));
1821 +
1822 +        dir = iam_leaf_container(leaf)->ic_object;
1823 +        ino = (void *)r;
1824 +        name = (const char *)k;
1825 +        namelen = strlen(name);
1826 +
1827 +        scan = find_insertion_point(dir, leaf->il_bh, name, namelen);
1828 +        assert_corr(!IS_ERR(scan));
1829 +        scan = split_entry(dir, scan, *ino, EXT3_FT_UNKNOWN, name, namelen);
1830 +        leaf->il_at = (void *)scan;
1831 +}
1832 +
1833 +static void iam_htree_rec_del(struct iam_leaf *leaf, int shift)
1834 +{
1835 +        struct ext3_dir_entry_2 *orig;
1836 +        struct ext3_dir_entry_2 *scan;
1837 +        struct ext3_dir_entry_2 *prev;
1838 +
1839 +        assert_corr(iam_leaf_at_rec(leaf));
1840 +
1841 +        orig = getent(leaf);
1842 +
1843 +        if (shift)
1844 +                iam_htree_next(leaf);
1845 +
1846 +        for (prev = NULL, scan = getstart(leaf); scan < orig;
1847 +             prev = scan, scan = entnext(scan))
1848 +                ;
1849 +
1850 +        assert_corr(scan == orig);
1851 +        if (prev != NULL) {
1852 +                prev->rec_len = cpu_to_le16(le16_to_cpu(prev->rec_len) +
1853 +                                              le16_to_cpu(scan->rec_len));
1854 +        } else {
1855 +                assert_corr(scan == getstart(leaf));
1856 +                scan->inode = 0;
1857 +        }
1858 +        iam_leaf_container(leaf)->ic_object->i_version ++;
1859 +}
1860 +
1861 +static int iam_htree_can_add(const struct iam_leaf *leaf,
1862 +                             const struct iam_key *k, const struct iam_rec *r)
1863 +{
1864 +        struct ext3_dir_entry_2 *scan;
1865 +        int size;
1866 +
1867 +        size = recsize(strlen((const char *)k));
1868 +        for (scan = getstart(leaf);
1869 +             scan < gettop(leaf); scan = entnext(scan)) {
1870 +                if (getfreespace(scan) >= size)
1871 +                        return 1;
1872 +        }
1873 +        return 0;
1874 +}
1875 +
1876 +static void iam_htree_init_new(struct iam_container *c, struct buffer_head *bh)
1877 +{
1878 +        /*
1879 +         * Do nothing, all work is done by iam_htree_split().
1880 +         */
1881 +}
1882 +
1883 +static void iam_htree_split(struct iam_leaf *l, struct buffer_head **bh,
1884 +                           iam_ptr_t new_blknr)
1885 +{
1886 +        __u32 delim_hash;
1887 +        __u32 old_hash;
1888 +        struct buffer_head *newbh = *bh;
1889 +        struct iam_path *path;
1890 +
1891 +        old_hash = gethash(l, getent(l));
1892 +        move_entries(iam_leaf_container(l)->ic_object,
1893 +                     getipc(l)->ipc_hinfo, &l->il_bh, bh, &delim_hash);
1894 +        /*
1895 +         * Insert pointer to the new node (together with the least key in
1896 +         * the node) into index node.
1897 +         */
1898 +        path = iam_leaf_path(l);
1899 +        if (l->il_bh == newbh) {
1900 +                /*
1901 +                 * insertion point moves into new leaf.
1902 +                 */
1903 +                assert_corr(delim_hash >= old_hash);
1904 +                l->il_curidx = new_blknr;
1905 +                iam_htree_lookup(l, (void *)&old_hash);
1906 +        }
1907 +        iam_insert_key_lock(path,
1908 +                            path->ip_frame, (void *)&delim_hash, new_blknr);
1909 +}
1910 +
1911 +static struct iam_leaf_operations iam_htree_leaf_ops = {
1912 +        .init           = iam_htree_init,
1913 +        .init_new       = iam_htree_init_new,
1914 +        .fini           = iam_htree_fini,
1915 +        .start          = iam_htree_start,
1916 +        .next           = iam_htree_next,
1917 +        .key            = iam_htree_key,
1918 +        .ikey           = iam_htree_ikey,
1919 +        .rec            = iam_htree_rec,
1920 +        .key_set        = iam_htree_key_set,
1921 +        .key_cmp        = iam_htree_key_cmp,
1922 +        .key_eq         = iam_htree_key_eq,
1923 +        .key_size       = iam_htree_key_size,
1924 +        .rec_set        = iam_htree_rec_set,
1925 +        .lookup         = iam_htree_lookup,
1926 +        .ilookup        = iam_htree_ilookup,
1927 +        .at_end         = iam_htree_at_end,
1928 +        .rec_add        = iam_htree_rec_add,
1929 +        .rec_del        = iam_htree_rec_del,
1930 +        .can_add        = iam_htree_can_add,
1931 +        .split          = iam_htree_split
1932 +};
1933 +
1934 +/*
1935 + * Index operations.
1936 + */
1937 +
1938 +static __u32 iam_htree_root_ptr(struct iam_container *c)
1939 +{
1940 +       return 0;
1941 +}
1942 +
1943 +static int iam_htree_node_check(struct iam_path *path, struct iam_frame *frame)
1944 +{
1945 +       /* XXX no checks yet */
1946 +       return 0;
1947 +}
1948 +
1949 +static int is_htree(struct super_block *sb,
1950 +                    const struct dx_root *root, int silent)
1951 +{
1952 +        if (root->info.hash_version > DX_HASH_MAX) {
1953 +                if (!silent)
1954 +                        ext3_warning(sb, __FUNCTION__,
1955 +                                     "Unrecognised inode hash code %d",
1956 +                                     root->info.hash_version);
1957 +                return -EIO;
1958 +        }
1959 +
1960 +        if (root->info.unused_flags & 1) {
1961 +                if (!silent)
1962 +                       ext3_warning(sb, __FUNCTION__,
1963 +                                    "Unimplemented inode hash flags: %#06x",
1964 +                                    root->info.unused_flags);
1965 +                       return -EIO;
1966 +        }
1967 +
1968 +        if (root->info.indirect_levels > DX_MAX_TREE_HEIGHT - 1) {
1969 +                if (!silent)
1970 +                        ext3_warning(sb, __FUNCTION__,
1971 +                                     "Unimplemented inode hash depth: %#06x",
1972 +                                     root->info.indirect_levels);
1973 +                return -EIO;
1974 +        }
1975 +        return 0;
1976 +}
1977 +
1978 +static int iam_htree_node_load(struct iam_path *path, struct iam_frame *frame)
1979 +{
1980 +       void *data;
1981 +       struct iam_entry *entries;
1982 +       struct super_block *sb;
1983 +
1984 +       data = frame->bh->b_data;
1985 +       entries = dx_node_get_entries(path, frame);
1986 +       sb = iam_path_obj(path)->i_sb;
1987 +       if (frame == path->ip_frames) {
1988 +               /* root node */
1989 +               struct dx_root *root;
1990 +               struct iam_path_compat *ipc;
1991 +                int check;
1992 +                const char *name;
1993 +                int namelen;
1994 +
1995 +               root = data;
1996 +               assert_corr(path->ip_data != NULL);
1997 +               ipc = container_of(path->ip_data, struct iam_path_compat,
1998 +                                  ipc_descr);
1999 +
2000 +                check = is_htree(sb, root, 0);
2001 +                if (check != 0)
2002 +                        return check;
2003 +               path->ip_indirect = root->info.indirect_levels;
2004 +
2005 +               assert_corr((char *)entries == (((char *)&root->info) +
2006 +                                                root->info.info_length));
2007 +               assert_corr(dx_get_limit(entries) == dx_root_limit(path));
2008 +
2009 +               ipc->ipc_hinfo->hash_version = root->info.hash_version;
2010 +               ipc->ipc_hinfo->seed = EXT3_SB(sb)->s_hash_seed;
2011 +                name = NULL;
2012 +               if (ipc->ipc_qstr) {
2013 +                        name = ipc->ipc_qstr->name;
2014 +                        namelen = ipc->ipc_qstr->len;
2015 +                } else if (ipc->ipc_hinfo == &ipc->ipc_hinfo_area){
2016 +                        name = (const char *)path->ip_key_target;
2017 +                        namelen = strlen(name);
2018 +                }
2019 +                if (name != NULL)
2020 +                        ext3fs_dirhash(name, namelen, ipc->ipc_hinfo);
2021 +                if (path->ip_ikey_target == NULL) {
2022 +                        path->ip_ikey_target = iam_path_ikey(path, 4);
2023 +                        *(__u32 *)path->ip_ikey_target = ipc->ipc_hinfo->hash;
2024 +                }
2025 +       } else {
2026 +               /* non-root index */
2027 +               assert_corr(entries ==
2028 +                            data + iam_path_descr(path)->id_node_gap);
2029 +               assert_corr(dx_get_limit(entries) == dx_node_limit(path));
2030 +       }
2031 +       frame->entries = frame->at = entries;
2032 +       return 0;
2033 +}
2034 +
2035 +static int iam_htree_node_init(struct iam_container *c,
2036 +                               struct buffer_head *bh, int root)
2037 +{
2038 +       struct dx_node *node;
2039 +
2040 +       assert_corr(!root);
2041 +
2042 +       node = (void *)bh->b_data;
2043 +       node->fake.rec_len = cpu_to_le16(c->ic_object->i_sb->s_blocksize);
2044 +       node->fake.inode = 0;
2045 +       return 0;
2046 +}
2047 +
2048 +static struct iam_entry *iam_htree_root_inc(struct iam_container *c,
2049 +                                            struct iam_path *path,
2050 +                                            struct iam_frame *frame)
2051 +{
2052 +        struct dx_root   *root;
2053 +        struct iam_entry *entries;
2054 +
2055 +        entries = frame->entries;
2056 +
2057 +        dx_set_count(entries, 1);
2058 +        root = (struct dx_root *) frame->bh->b_data;
2059 +        root->info.indirect_levels++;
2060 +
2061 +        return entries;
2062 +}
2063 +
2064 +static int iam_htree_ikeycmp(const struct iam_container *c,
2065 +                             const struct iam_ikey *k1,
2066 +                             const struct iam_ikey *k2)
2067 +{
2068 +       __u32 p1 = le32_to_cpu(*(__u32 *)k1);
2069 +       __u32 p2 = le32_to_cpu(*(__u32 *)k2);
2070 +
2071 +       return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
2072 +}
2073 +
2074 +static struct iam_path_descr *iam_htree_ipd_alloc(const struct iam_container *c)
2075 +{
2076 +       struct iam_path_compat *ipc;
2077 +
2078 +       ipc = kmalloc(sizeof *ipc, GFP_KERNEL);
2079 +       if (ipc != NULL) {
2080 +                memset(ipc, 0, sizeof *ipc);
2081 +               iam_path_compat_init(ipc, c->ic_object);
2082 +               return &ipc->ipc_descr;
2083 +       } else
2084 +               return NULL;
2085 +}
2086 +
2087 +static void iam_htree_ipd_free(struct iam_path_descr *ipd)
2088 +{
2089 +       struct iam_path_compat *ipc;
2090 +
2091 +       ipc = container_of(ipd, struct iam_path_compat, ipc_descr);
2092 +       kfree(ipc);
2093 +}
2094 +
2095 +static struct iam_operations iam_htree_ops = {
2096 +        .id_root_ptr    = iam_htree_root_ptr,
2097 +        .id_node_read   = iam_node_read,
2098 +        .id_node_init   = iam_htree_node_init,
2099 +        .id_node_check  = iam_htree_node_check,
2100 +        .id_node_load   = iam_htree_node_load,
2101 +        .id_ikeycmp     = iam_htree_ikeycmp,
2102 +        .id_root_inc    = iam_htree_root_inc,
2103 +        .id_ipd_alloc   = iam_htree_ipd_alloc,
2104 +        .id_ipd_free    = iam_htree_ipd_free,
2105 +        .id_name        = "htree"
2106 +};
2107 +
2108 +/*
2109 + * Parameters describing iam compatibility mode in which existing ext3 htrees
2110 + * can be manipulated.
2111 + */
2112 +struct iam_descr iam_htree_compat_param = {
2113 +       .id_key_size  = EXT3_NAME_LEN,
2114 +        .id_rec_size  = sizeof ((struct ext3_dir_entry_2 *)NULL)->inode,
2115 +       .id_ikey_size = sizeof ((struct dx_map_entry *)NULL)->hash,
2116 +       .id_ptr_size  = sizeof ((struct dx_map_entry *)NULL)->offs,
2117 +       .id_node_gap  = offsetof(struct dx_node, entries),
2118 +       .id_root_gap  = offsetof(struct dx_root, entries),
2119 +       .id_ops       = &iam_htree_ops,
2120 +       .id_leaf_ops  = &iam_htree_leaf_ops
2121 +};
2122 +EXPORT_SYMBOL(iam_htree_compat_param);
2123 +
2124 +static int iam_htree_guess(struct iam_container *c)
2125 +{
2126 +        int result;
2127 +        struct buffer_head *bh;
2128 +        const struct dx_root *root;
2129 +
2130 +        assert_corr(c->ic_object != NULL);
2131 +
2132 +        result = iam_node_read(c, iam_htree_root_ptr(c), NULL, &bh);
2133 +        if (result == 0) {
2134 +                root = (void *)bh->b_data;
2135 +                result = is_htree(c->ic_object->i_sb, root, 1);
2136 +                if (result == 0)
2137 +                        c->ic_descr = &iam_htree_compat_param;
2138 +                else
2139 +                        result = -EBADF;
2140 +                brelse(bh);
2141 +        }
2142 +        return result;
2143 +}
2144 +
2145 +static struct iam_format iam_htree_format = {
2146 +        .if_guess = iam_htree_guess
2147 +};
2148 +
2149 +void iam_htree_format_init(void)
2150 +{
2151 +        iam_format_register(&iam_htree_format);
2152 +}
2153 Index: iam/fs/ext3/iam_lfix.c
2154 ===================================================================
2155 --- iam.orig/fs/ext3/iam_lfix.c
2156 +++ iam/fs/ext3/iam_lfix.c
2157 @@ -0,0 +1,728 @@
2158 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2159 + * vim:expandtab:shiftwidth=8:tabstop=8:
2160 + *
2161 + *  iam_lfix.c
2162 + *  implementation of iam format for fixed size records.
2163 + *
2164 + *  Copyright (c) 2006 Cluster File Systems, Inc.
2165 + *   Author: Wang Di <wangdi@clusterfs.com>
2166 + *   Author: Nikita Danilov <nikita@clusterfs.com>
2167 + *
2168 + *   This file is part of the Lustre file system, http://www.lustre.org
2169 + *   Lustre is a trademark of Cluster File Systems, Inc.
2170 + *
2171 + *   You may have signed or agreed to another license before downloading
2172 + *   this software.  If so, you are bound by the terms and conditions
2173 + *   of that agreement, and the following does not apply to you.  See the
2174 + *   LICENSE file included with this distribution for more information.
2175 + *
2176 + *   If you did not agree to a different license, then this copy of Lustre
2177 + *   is open source software; you can redistribute it and/or modify it
2178 + *   under the terms of version 2 of the GNU General Public License as
2179 + *   published by the Free Software Foundation.
2180 + *
2181 + *   In either case, Lustre is distributed in the hope that it will be
2182 + *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
2183 + *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2184 + *   license text for more details.
2185 + */
2186 +
2187 +#include <linux/types.h>
2188 +#include <linux/jbd.h>
2189 +/* ext3_error() */
2190 +#include <linux/ext3_fs.h>
2191 +
2192 +#include <linux/lustre_iam.h>
2193 +
2194 +#include <libcfs/libcfs.h>
2195 +#include <libcfs/kp30.h>
2196 +
2197 +/*
2198 + * Leaf operations.
2199 + */
2200 +
2201 +enum {
2202 +        IAM_LEAF_HEADER_MAGIC = 0x1976 /* This is duplicated in
2203 +                                        * lustre/utils/create_iam.c */
2204 +};
2205 +
2206 +/* This is duplicated in lustre/utils/create_iam.c */
2207 +struct iam_leaf_head {
2208 +        __le16 ill_magic;
2209 +        __le16 ill_count;
2210 +};
2211 +
2212 +static inline int iam_lfix_entry_size(const struct iam_leaf *l)
2213 +{
2214 +        return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
2215 +}
2216 +
2217 +static inline struct iam_lentry *
2218 +iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
2219 +{
2220 +        return (void *)entry + shift * iam_lfix_entry_size(l);
2221 +}
2222 +
2223 +static inline struct iam_key *iam_leaf_key_at(struct iam_lentry *entry)
2224 +{
2225 +        return (struct iam_key *)entry;
2226 +}
2227 +
2228 +static inline int lfix_keycmp(const struct iam_container *c,
2229 +                              const struct iam_key *k1,
2230 +                              const struct iam_key *k2)
2231 +{
2232 +       return memcmp(k1, k2, c->ic_descr->id_key_size);
2233 +}
2234 +
2235 +static struct iam_leaf_head *iam_get_head(const struct iam_leaf *l)
2236 +{
2237 +        return (struct iam_leaf_head *)l->il_bh->b_data;
2238 +}
2239 +
2240 +static struct iam_lentry *iam_entries(const struct buffer_head *bh)
2241 +{
2242 +        return (void *)bh->b_data + sizeof(struct iam_leaf_head);
2243 +}
2244 +
2245 +static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
2246 +{
2247 +        return iam_entries(l->il_bh);
2248 +}
2249 +
2250 +static int leaf_count_limit(const struct iam_leaf *leaf)
2251 +{
2252 +        int free_space;
2253 +
2254 +        free_space = iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
2255 +        free_space -= sizeof(struct iam_leaf_head);
2256 +        return free_space / iam_lfix_entry_size(leaf);
2257 +}
2258 +
2259 +static int lentry_count_get(const struct iam_leaf *leaf)
2260 +{
2261 +        return le16_to_cpu(iam_get_head(leaf)->ill_count);
2262 +}
2263 +
2264 +static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
2265 +{
2266 +        assert_corr(0 <= count && count <= leaf_count_limit(leaf));
2267 +        iam_get_head(leaf)->ill_count = cpu_to_le16(count);
2268 +}
2269 +
2270 +static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l);
2271 +
2272 +#if EXT3_CORRECTNESS_ON || EXT3_INVARIANT_ON
2273 +static int iam_leaf_at_rec(const struct iam_leaf *folio)
2274 +{
2275 +        return
2276 +                iam_get_lentries(folio) <= folio->il_at &&
2277 +                folio->il_at < iam_lfix_get_end(folio);
2278 +}
2279 +#endif
2280 +
2281 +static struct iam_ikey *iam_lfix_ikey(const struct iam_leaf *l,
2282 +                                      struct iam_ikey *key)
2283 +{
2284 +        void *ie = l->il_at;
2285 +        assert_corr(iam_leaf_at_rec(l));
2286 +        return (struct iam_ikey*)ie;
2287 +}
2288 +
2289 +static struct iam_key *iam_lfix_key(const struct iam_leaf *l)
2290 +{
2291 +        void *ie = l->il_at;
2292 +        assert_corr(iam_leaf_at_rec(l));
2293 +        return (struct iam_key*)ie;
2294 +}
2295 +
2296 +static int iam_lfix_key_size(const struct iam_leaf *l)
2297 +{
2298 +        return iam_leaf_descr(l)->id_key_size;
2299 +}
2300 +
2301 +static void iam_lfix_start(struct iam_leaf *l)
2302 +{
2303 +        l->il_at = iam_get_lentries(l);
2304 +}
2305 +
2306 +static inline ptrdiff_t iam_lfix_diff(const struct iam_leaf *l,
2307 +                                      const struct iam_lentry *e1,
2308 +                                      const struct iam_lentry *e2)
2309 +{
2310 +        ptrdiff_t diff;
2311 +        int esize;
2312 +
2313 +        esize = iam_lfix_entry_size(l);
2314 +        diff = (void *)e1 - (void *)e2;
2315 +        assert_corr(diff / esize * esize == diff);
2316 +        return diff / esize;
2317 +}
2318 +
2319 +static int iam_lfix_init(struct iam_leaf *l)
2320 +{
2321 +        int result;
2322 +        struct iam_leaf_head *ill;
2323 +        int count;
2324 +
2325 +        assert_corr(l->il_bh != NULL);
2326 +
2327 +        ill = iam_get_head(l);
2328 +        count = le16_to_cpu(ill->ill_count);
2329 +        if (ill->ill_magic == le16_to_cpu(IAM_LEAF_HEADER_MAGIC) &&
2330 +            0 <= count && count <= leaf_count_limit(l)) {
2331 +                l->il_at = l->il_entries = iam_get_lentries(l);
2332 +                result = 0;
2333 +        } else {
2334 +                struct inode *obj;
2335 +
2336 +                obj = iam_leaf_container(l)->ic_object;
2337 +                ext3_error(obj->i_sb, __FUNCTION__,
2338 +                           "Wrong magic in node %llu (#%lu): %#x != %#x or "
2339 +                           "wrong count: %i (%i)",
2340 +                           (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
2341 +                           ill->ill_magic, le16_to_cpu(IAM_LEAF_HEADER_MAGIC),
2342 +                           count, leaf_count_limit(l));
2343 +                result = -EIO;
2344 +                BREAKPOINT();
2345 +        }
2346 +        return result;
2347 +}
2348 +
2349 +static void iam_lfix_fini(struct iam_leaf *l)
2350 +{
2351 +        l->il_entries = l->il_at = NULL;
2352 +}
2353 +
2354 +static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
2355 +{
2356 +        int count = lentry_count_get(l);
2357 +        struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
2358 +
2359 +        return ile;
2360 +}
2361 +
2362 +struct iam_rec *iam_lfix_rec(const struct iam_leaf *l)
2363 +{
2364 +        void *e = l->il_at;
2365 +        assert_corr(iam_leaf_at_rec(l));
2366 +        return e + iam_leaf_descr(l)->id_key_size;
2367 +}
2368 +
2369 +static void iam_lfix_next(struct iam_leaf *l)
2370 +{
2371 +        assert_corr(iam_leaf_at_rec(l));
2372 +        l->il_at = iam_lfix_shift(l, l->il_at, 1);
2373 +}
2374 +
2375 +/*
2376 + * Bug chasing.
2377 + */
2378 +int lfix_dump = 0;
2379 +EXPORT_SYMBOL(lfix_dump);
2380 +
2381 +static char hdigit(char ch)
2382 +{
2383 +        static char d[] = "0123456789abcdef";
2384 +        return d[ch & 0xf];
2385 +}
2386 +
2387 +static char *hex(char ch, char *area)
2388 +{
2389 +        area[0] = hdigit(ch >> 4);
2390 +        area[1] = hdigit(ch);
2391 +        area[2] = 0;
2392 +        return area;
2393 +}
2394 +
2395 +static void l_print(struct iam_leaf *leaf, struct iam_lentry *entry)
2396 +{
2397 +        int i;
2398 +        char *area;
2399 +        char h[3];
2400 +
2401 +        area = (char *)entry;
2402 +        printk(KERN_EMERG "[");
2403 +        for (i = iam_lfix_key_size(leaf); i > 0; --i, ++area)
2404 +                printk("%s", hex(*area, h));
2405 +        printk("]-(");
2406 +        for (i = iam_leaf_descr(leaf)->id_rec_size; i > 0; --i, ++area)
2407 +                printk("%s", hex(*area, h));
2408 +        printk(")\n");
2409 +}
2410 +
2411 +static void lfix_print(struct iam_leaf *leaf)
2412 +{
2413 +        struct iam_lentry *entry;
2414 +        int count;
2415 +        int i;
2416 +
2417 +        entry = leaf->il_entries;
2418 +        count = lentry_count_get(leaf);
2419 +        printk(KERN_EMERG "lfix: %p %p %d\n", leaf, leaf->il_at, count);
2420 +        for (i = 0; i < count; ++i, entry = iam_lfix_shift(leaf, entry, 1))
2421 +                l_print(leaf, entry);
2422 +}
2423 +
2424 +static int iam_lfix_lookup(struct iam_leaf *l, const struct iam_key *k)
2425 +{
2426 +        struct iam_lentry *p, *q, *m, *t;
2427 +        struct iam_container *c;
2428 +        int count;
2429 +        int result;
2430 +
2431 +        count = lentry_count_get(l);
2432 +        if (count == 0)
2433 +                return IAM_LOOKUP_EMPTY;
2434 +
2435 +        result = IAM_LOOKUP_OK;
2436 +        c = iam_leaf_container(l);
2437 +
2438 +        p = l->il_entries;
2439 +        q = iam_lfix_shift(l, p, count - 1);
2440 +        if (lfix_keycmp(c, k, iam_leaf_key_at(p)) < 0) {
2441 +                /*
2442 +                 * @k is less than the least key in the leaf
2443 +                 */
2444 +                l->il_at = p;
2445 +                result = IAM_LOOKUP_BEFORE;
2446 +        } else if (lfix_keycmp(c, iam_leaf_key_at(q), k) <= 0) {
2447 +                l->il_at = q;
2448 +        } else {
2449 +                /*
2450 +                 * EWD1293
2451 +                 */
2452 +                while (iam_lfix_shift(l, p, 1) != q) {
2453 +                        m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
2454 +                        assert_corr(p < m && m < q);
2455 +                        (lfix_keycmp(c, iam_leaf_key_at(m), k) <= 0 ? p : q) = m;
2456 +                }
2457 +                assert_corr(lfix_keycmp(c, iam_leaf_key_at(p), k) <= 0 &&
2458 +                            lfix_keycmp(c, k, iam_leaf_key_at(q)) < 0);
2459 +                /*
2460 +                 * skip over records with duplicate keys.
2461 +                 */
2462 +                while (p > l->il_entries) {
2463 +                        t = iam_lfix_shift(l, p, -1);
2464 +                        if (lfix_keycmp(c, iam_leaf_key_at(t), k) == 0)
2465 +                                p = t;
2466 +                        else
2467 +                                break;
2468 +                }
2469 +                l->il_at = p;
2470 +        }
2471 +        assert_corr(iam_leaf_at_rec(l));
2472 +
2473 +        if (lfix_keycmp(c, iam_leaf_key_at(l->il_at), k) == 0)
2474 +                result = IAM_LOOKUP_EXACT;
2475 +
2476 +        if (lfix_dump)
2477 +                lfix_print(l);
2478 +
2479 +        return result;
2480 +}
2481 +
2482 +static int iam_lfix_ilookup(struct iam_leaf *l, const struct iam_ikey *ik)
2483 +{
2484 +        assert(0);
2485 +        return IAM_LOOKUP_OK;
2486 +}
2487 +
2488 +static void iam_lfix_key_set(struct iam_leaf *l, const struct iam_key *k)
2489 +{
2490 +        assert_corr(iam_leaf_at_rec(l));
2491 +        memcpy(iam_leaf_key_at(l->il_at), k, iam_leaf_descr(l)->id_key_size);
2492 +}
2493 +
2494 +static int iam_lfix_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
2495 +{
2496 +        return lfix_keycmp(iam_leaf_container(l), iam_leaf_key_at(l->il_at), k);
2497 +}
2498 +
2499 +static int iam_lfix_key_eq(const struct iam_leaf *l, const struct iam_key *k)
2500 +{
2501 +        return !lfix_keycmp(iam_leaf_container(l),
2502 +                            iam_leaf_key_at(l->il_at), k);
2503 +}
2504 +
2505 +static void iam_lfix_rec_set(struct iam_leaf *l, const struct iam_rec *r)
2506 +{
2507 +        assert_corr(iam_leaf_at_rec(l));
2508 +        iam_reccpy(iam_leaf_path(l), iam_lfix_rec(l), r);
2509 +}
2510 +
2511 +static void iam_lfix_rec_add(struct iam_leaf *leaf,
2512 +                             const struct iam_key *k, const struct iam_rec *r)
2513 +{
2514 +        struct iam_lentry *end;
2515 +        struct iam_lentry *cur;
2516 +        struct iam_lentry *start;
2517 +        ptrdiff_t diff;
2518 +        int count;
2519 +
2520 +        assert_corr(iam_leaf_can_add(leaf, k, r));
2521 +
2522 +        count = lentry_count_get(leaf);
2523 +        /*
2524 +         * This branch handles two exceptional cases:
2525 +         *
2526 +         *   - leaf positioned beyond last record, and
2527 +         *
2528 +         *   - empty leaf.
2529 +         */
2530 +        if (!iam_leaf_at_end(leaf)) {
2531 +                end   = iam_lfix_get_end(leaf);
2532 +                cur   = leaf->il_at;
2533 +                if (lfix_keycmp(iam_leaf_container(leaf),
2534 +                               k, iam_leaf_key_at(cur)) >= 0)
2535 +                        iam_lfix_next(leaf);
2536 +                else
2537 +                        /*
2538 +                         * Another exceptional case: insertion with the key
2539 +                         * less than least key in the leaf.
2540 +                         */
2541 +                        assert_corr(cur == leaf->il_entries);
2542 +
2543 +                start = leaf->il_at;
2544 +                diff  = (void *)end - (void *)start;
2545 +                assert_corr(diff >= 0);
2546 +                memmove(iam_lfix_shift(leaf, start, 1), start, diff);
2547 +        }
2548 +        lentry_count_set(leaf, count + 1);
2549 +        iam_lfix_key_set(leaf, k);
2550 +        iam_lfix_rec_set(leaf, r);
2551 +        assert_corr(iam_leaf_at_rec(leaf));
2552 +}
2553 +
2554 +static void iam_lfix_rec_del(struct iam_leaf *leaf, int shift)
2555 +{
2556 +        struct iam_lentry *next, *end;
2557 +        int count;
2558 +        ptrdiff_t diff;
2559 +
2560 +        assert_corr(iam_leaf_at_rec(leaf));
2561 +
2562 +        count = lentry_count_get(leaf);
2563 +        end = iam_lfix_get_end(leaf);
2564 +        next = iam_lfix_shift(leaf, leaf->il_at, 1);
2565 +        diff = (void *)end - (void *)next;
2566 +        memmove(leaf->il_at, next, diff);
2567 +
2568 +        lentry_count_set(leaf, count - 1);
2569 +}
2570 +
2571 +static int iam_lfix_can_add(const struct iam_leaf *l,
2572 +                            const struct iam_key *k, const struct iam_rec *r)
2573 +{
2574 +        return lentry_count_get(l) < leaf_count_limit(l);
2575 +}
2576 +
2577 +static int iam_lfix_at_end(const struct iam_leaf *folio)
2578 +{
2579 +        return folio->il_at == iam_lfix_get_end(folio);
2580 +}
2581 +
2582 +static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
2583 +{
2584 +        struct iam_leaf_head *hdr;
2585 +
2586 +        hdr = (struct iam_leaf_head*)bh->b_data;
2587 +        hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
2588 +        hdr->ill_count = cpu_to_le16(0);
2589 +}
2590 +
2591 +static void iam_lfix_split(struct iam_leaf *l, struct buffer_head **bh,
2592 +                           iam_ptr_t new_blknr)
2593 +{
2594 +        struct iam_path       *path;
2595 +        struct iam_leaf_head  *hdr;
2596 +        const struct iam_ikey *pivot;
2597 +        struct buffer_head    *new_leaf;
2598 +
2599 +        unsigned count;
2600 +        unsigned split;
2601 +
2602 +        void *start;
2603 +        void *finis;
2604 +
2605 +        new_leaf = *bh;
2606 +        path = iam_leaf_path(l);
2607 +
2608 +        hdr = (void *)new_leaf->b_data;
2609 +
2610 +        count = lentry_count_get(l);
2611 +        split = count / 2;
2612 +
2613 +        start = iam_lfix_shift(l, iam_get_lentries(l), split);
2614 +        finis = iam_lfix_shift(l, iam_get_lentries(l), count);
2615 +
2616 +        pivot = (const struct iam_ikey *)iam_leaf_key_at(start);
2617 +
2618 +        memmove(iam_entries(new_leaf), start, finis - start);
2619 +        hdr->ill_count = count - split;
2620 +        lentry_count_set(l, split);
2621 +        if ((void *)l->il_at >= start) {
2622 +                /*
2623 +                 * insertion point moves into new leaf.
2624 +                 */
2625 +                int shift;
2626 +                int result;
2627 +
2628 +                shift = iam_lfix_diff(l, l->il_at, start);
2629 +                *bh = l->il_bh;
2630 +                l->il_bh = new_leaf;
2631 +                l->il_curidx = new_blknr;
2632 +                result = iam_lfix_init(l);
2633 +                /*
2634 +                 * init cannot fail, as node was just initialized.
2635 +                 */
2636 +                assert_corr(result == 0);
2637 +                l->il_at = iam_lfix_shift(l, iam_get_lentries(l), shift);
2638 +        }
2639 +        /*
2640 +         * Insert pointer to the new node (together with the least key in
2641 +         * the node) into index node.
2642 +         */
2643 +        iam_insert_key_lock(path, path->ip_frame, pivot, new_blknr);
2644 +}
2645 +
2646 +static struct iam_leaf_operations iam_lfix_leaf_ops = {
2647 +        .init           = iam_lfix_init,
2648 +        .init_new       = iam_lfix_init_new,
2649 +        .fini           = iam_lfix_fini,
2650 +        .start          = iam_lfix_start,
2651 +        .next           = iam_lfix_next,
2652 +        .key            = iam_lfix_key,
2653 +        .ikey           = iam_lfix_ikey,
2654 +        .rec            = iam_lfix_rec,
2655 +        .key_set        = iam_lfix_key_set,
2656 +        .key_cmp        = iam_lfix_key_cmp,
2657 +        .key_eq         = iam_lfix_key_eq,
2658 +        .key_size       = iam_lfix_key_size,
2659 +        .rec_set        = iam_lfix_rec_set,
2660 +        .lookup         = iam_lfix_lookup,
2661 +        .ilookup        = iam_lfix_ilookup,
2662 +        .at_end         = iam_lfix_at_end,
2663 +        .rec_add        = iam_lfix_rec_add,
2664 +        .rec_del        = iam_lfix_rec_del,
2665 +        .can_add        = iam_lfix_can_add,
2666 +        .split          = iam_lfix_split
2667 +};
2668 +
2669 +/*
2670 + * Index operations.
2671 + */
2672 +
2673 +enum {
2674 +        /* This is duplicated in lustre/utils/create_iam.c */
2675 +        /*
2676 +         * Then shalt thou see the dew-BEDABBLED wretch
2677 +         * Turn, and return, indenting with the way;
2678 +         * Each envious brier his weary legs doth scratch,
2679 +         * Each shadow makes him stop, each murmur stay:
2680 +         * For misery is trodden on by many,
2681 +         * And being low never relieved by any.
2682 +         */
2683 +        IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL // d01efull
2684 +};
2685 +
2686 +/* This is duplicated in lustre/utils/create_iam.c */
2687 +struct iam_lfix_root {
2688 +        __le64  ilr_magic;
2689 +        __le16  ilr_keysize;
2690 +        __le16  ilr_recsize;
2691 +        __le16  ilr_ptrsize;
2692 +        u8      ilr_indirect_levels;
2693 +        u8      ilr_padding;
2694 +};
2695 +
2696 +static __u32 iam_lfix_root_ptr(struct iam_container *c)
2697 +{
2698 +        return 0;
2699 +}
2700 +
2701 +static int iam_lfix_node_init(struct iam_container *c, struct buffer_head *bh,
2702 +                              int root)
2703 +{
2704 +        return 0;
2705 +}
2706 +
2707 +static struct iam_entry *iam_lfix_root_inc(struct iam_container *c,
2708 +                                           struct iam_path *path,
2709 +                                           struct iam_frame *frame)
2710 +{
2711 +        struct iam_lfix_root *root;
2712 +        struct iam_entry     *entries;
2713 +
2714 +        entries = frame->entries;
2715 +
2716 +        dx_set_count(entries, 2);
2717 +        assert_corr(dx_get_limit(entries) == dx_root_limit(path));
2718 +
2719 +        root = (void *)frame->bh->b_data;
2720 +        assert_corr(le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC);
2721 +        root->ilr_indirect_levels ++;
2722 +        frame->at = entries = iam_entry_shift(path, entries, 1);
2723 +        memset(iam_ikey_at(path, entries), 0,
2724 +               iam_path_descr(path)->id_ikey_size);
2725 +        return entries;
2726 +}
2727 +
2728 +static int iam_lfix_node_check(struct iam_path *path, struct iam_frame *frame)
2729 +{
2730 +        unsigned count;
2731 +        unsigned limit;
2732 +        unsigned limit_correct;
2733 +        struct iam_entry *entries;
2734 +
2735 +        entries = dx_node_get_entries(path, frame);
2736 +
2737 +        if (frame == path->ip_frames) {
2738 +                struct iam_lfix_root *root;
2739 +
2740 +                root = (void *)frame->bh->b_data;
2741 +                if (le64_to_cpu(root->ilr_magic) != IAM_LFIX_ROOT_MAGIC) {
2742 +                        BREAKPOINT();
2743 +                        return -EIO;
2744 +                }
2745 +                limit_correct = dx_root_limit(path);
2746 +        } else
2747 +                limit_correct = dx_node_limit(path);
2748 +        count = dx_get_count(entries);
2749 +        limit = dx_get_limit(entries);
2750 +        if (count > limit) {
2751 +                BREAKPOINT();
2752 +                return -EIO;
2753 +        }
2754 +        if (limit != limit_correct) {
2755 +                BREAKPOINT();
2756 +                return -EIO;
2757 +        }
2758 +        return 0;
2759 +}
2760 +
2761 +static int iam_lfix_node_load(struct iam_path *path, struct iam_frame *frame)
2762 +{
2763 +        struct iam_entry *entries;
2764 +        void *data;
2765 +        entries = dx_node_get_entries(path, frame);
2766 +
2767 +        data = frame->bh->b_data;
2768 +
2769 +        if (frame == path->ip_frames) {
2770 +                struct iam_lfix_root *root;
2771 +
2772 +                root = data;
2773 +                path->ip_indirect = root->ilr_indirect_levels;
2774 +                if (path->ip_ikey_target == NULL)
2775 +                        path->ip_ikey_target =
2776 +                                (struct iam_ikey *)path->ip_key_target;
2777 +        }
2778 +        frame->entries = frame->at = entries;
2779 +        return 0;
2780 +}
2781 +
2782 +static int iam_lfix_ikeycmp(const struct iam_container *c,
2783 +                            const struct iam_ikey *k1,
2784 +                            const struct iam_ikey *k2)
2785 +{
2786 +        return memcmp(k1, k2, c->ic_descr->id_ikey_size);
2787 +}
2788 +
2789 +static struct iam_path_descr *iam_lfix_ipd_alloc(const struct iam_container *c)
2790 +{
2791 +        return iam_ipd_alloc(c->ic_descr->id_ikey_size);
2792 +}
2793 +
2794 +static struct iam_operations iam_lfix_ops = {
2795 +        .id_root_ptr    = iam_lfix_root_ptr,
2796 +        .id_node_read   = iam_node_read,
2797 +        .id_node_init   = iam_lfix_node_init,
2798 +        .id_node_check  = iam_lfix_node_check,
2799 +        .id_node_load   = iam_lfix_node_load,
2800 +        .id_ikeycmp     = iam_lfix_ikeycmp,
2801 +        .id_root_inc    = iam_lfix_root_inc,
2802 +        .id_ipd_alloc   = iam_lfix_ipd_alloc,
2803 +        .id_ipd_free    = iam_ipd_free,
2804 +        .id_name        = "lfix"
2805 +};
2806 +
2807 +static int iam_lfix_guess(struct iam_container *c)
2808 +{
2809 +        int result;
2810 +        struct buffer_head *bh;
2811 +        const struct iam_lfix_root *root;
2812 +
2813 +        assert_corr(c->ic_object != NULL);
2814 +
2815 +        result = iam_node_read(c, iam_lfix_root_ptr(c), NULL, &bh);
2816 +        if (result == 0) {
2817 +                root = (void *)bh->b_data;
2818 +                if (le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC) {
2819 +                        struct iam_descr *descr;
2820 +
2821 +                        descr = c->ic_descr;
2822 +                        descr->id_key_size  = le16_to_cpu(root->ilr_keysize);
2823 +                        descr->id_ikey_size = le16_to_cpu(root->ilr_keysize);
2824 +                        descr->id_rec_size  = le16_to_cpu(root->ilr_recsize);
2825 +                        descr->id_ptr_size  = le16_to_cpu(root->ilr_ptrsize);
2826 +                        descr->id_root_gap  = sizeof(struct iam_lfix_root);
2827 +                        descr->id_node_gap  = 0;
2828 +                        descr->id_ops       = &iam_lfix_ops;
2829 +                        descr->id_leaf_ops  = &iam_lfix_leaf_ops;
2830 +                } else
2831 +                        result = -EBADF;
2832 +                brelse(bh);
2833 +        }
2834 +        return result;
2835 +}
2836 +
2837 +static struct iam_format iam_lfix_format = {
2838 +        .if_guess = iam_lfix_guess
2839 +};
2840 +
2841 +void iam_lfix_format_init(void)
2842 +{
2843 +        iam_format_register(&iam_lfix_format);
2844 +}
2845 +
2846 +/*
2847 + * Debugging aid.
2848 + */
2849 +
2850 +#define KEYSIZE (8)
2851 +#define RECSIZE (8)
2852 +#define PTRSIZE (4)
2853 +
2854 +#define LFIX_ROOT_RECNO \
2855 +        ((4096 - sizeof(struct iam_lfix_root)) / (KEYSIZE + PTRSIZE))
2856 +
2857 +#define LFIX_INDEX_RECNO (4096 / (KEYSIZE + PTRSIZE))
2858 +
2859 +#define LFIX_LEAF_RECNO \
2860 +        ((4096 - sizeof(struct iam_leaf_head)) / (KEYSIZE + RECSIZE))
2861 +
2862 +struct lfix_root {
2863 +        struct iam_lfix_root lr_root;
2864 +        struct {
2865 +                char key[KEYSIZE];
2866 +                char ptr[PTRSIZE];
2867 +        } lr_entry[LFIX_ROOT_RECNO];
2868 +};
2869 +
2870 +struct lfix_index {
2871 +        struct dx_countlimit li_cl;
2872 +        char   li_padding[KEYSIZE + PTRSIZE - sizeof(struct dx_countlimit)];
2873 +        struct {
2874 +                char key[KEYSIZE];
2875 +                char ptr[PTRSIZE];
2876 +        } li_entry[LFIX_INDEX_RECNO - 1];
2877 +};
2878 +
2879 +struct lfix_leaf {
2880 +        struct iam_leaf_head ll_head;
2881 +        struct {
2882 +                char key[KEYSIZE];
2883 +                char rec[RECSIZE];
2884 +        } ll_entry[LFIX_LEAF_RECNO];
2885 +};
2886 Index: iam/fs/ext3/iam_lvar.c
2887 ===================================================================
2888 --- iam.orig/fs/ext3/iam_lvar.c
2889 +++ iam/fs/ext3/iam_lvar.c
2890 @@ -0,0 +1,1042 @@
2891 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2892 + * vim:expandtab:shiftwidth=8:tabstop=8:
2893 + *
2894 + *  iam_lvar.c
2895 + *  implementation of iam format for fixed size records, variable sized keys.
2896 + *
2897 + *  Copyright (c) 2006 Cluster File Systems, Inc.
2898 + *   Author: Nikita Danilov <nikita@clusterfs.com>
2899 + *
2900 + *   This file is part of the Lustre file system, http://www.lustre.org
2901 + *   Lustre is a trademark of Cluster File Systems, Inc.
2902 + *
2903 + *   You may have signed or agreed to another license before downloading
2904 + *   this software.  If so, you are bound by the terms and conditions
2905 + *   of that agreement, and the following does not apply to you.  See the
2906 + *   LICENSE file included with this distribution for more information.
2907 + *
2908 + *   If you did not agree to a different license, then this copy of Lustre
2909 + *   is open source software; you can redistribute it and/or modify it
2910 + *   under the terms of version 2 of the GNU General Public License as
2911 + *   published by the Free Software Foundation.
2912 + *
2913 + *   In either case, Lustre is distributed in the hope that it will be
2914 + *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
2915 + *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
2916 + *   license text for more details.
2917 + */
2918 +
2919 +#include <linux/types.h>
2920 +#include <linux/jbd.h>
2921 +/* ext3_error() */
2922 +#include <linux/ext3_fs.h>
2923 +
2924 +#include <linux/lustre_iam.h>
2925 +
2926 +#include <libcfs/libcfs.h>
2927 +#include <libcfs/kp30.h>
2928 +
2929 +/*
2930 + * Leaf operations.
2931 + */
2932 +
2933 +enum {
2934 +        IAM_LVAR_LEAF_MAGIC = 0x1973 /* This is duplicated in
2935 +                                      * lustre/utils/create_iam.c */
2936 +};
2937 +
2938 +/* This is duplicated in lustre/utils/create_iam.c */
2939 +struct lvar_leaf_header {
2940 +        __le16 vlh_magic; /* magic number IAM_LVAR_LEAF_MAGIC */
2941 +        __le16 vlh_used;  /* used bytes, including header */
2942 +};
2943 +
2944 +/*
2945 + * Format of leaf entry:
2946 + *
2947 + * __le16 keysize
2948 + *     u8 key[keysize]
2949 + *     u8 record[rec_size]
2950 + *
2951 + * Entries are ordered in key order.
2952 + */
2953 +
2954 +/* This is duplicated in lustre/utils/create_iam.c */
2955 +typedef __u32 lvar_hash_t;
2956 +
2957 +/* This is duplicated in lustre/utils/create_iam.c */
2958 +struct lvar_leaf_entry {
2959 +        __le32 vle_hash;
2960 +        __le16 vle_keysize;
2961 +        u8     vle_key[0];
2962 +};
2963 +
2964 +#define PDIFF(ptr0, ptr1) (((char *)(ptr0)) - ((char *)(ptr1)))
2965 +
2966 +
2967 +static inline int blocksize(const struct iam_leaf *leaf)
2968 +{
2969 +        return iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
2970 +}
2971 +
2972 +static inline const char *kchar(const struct iam_key *key)
2973 +{
2974 +        return (void *)key;
2975 +}
2976 +
2977 +static inline struct iam_lentry *lvar_lentry(const struct lvar_leaf_entry *ent)
2978 +{
2979 +        return (struct iam_lentry *)ent;
2980 +}
2981 +
2982 +static inline struct lvar_leaf_entry *lentry_lvar(const struct iam_lentry *lent)
2983 +{
2984 +        return (struct lvar_leaf_entry *)lent;
2985 +}
2986 +
2987 +
2988 +static inline int recsize(const struct iam_leaf *leaf)
2989 +{
2990 +        return iam_leaf_descr(leaf)->id_rec_size;
2991 +}
2992 +
2993 +static inline int e_keysize(const struct lvar_leaf_entry *ent)
2994 +{
2995 +        return le16_to_cpu(ent->vle_keysize);
2996 +}
2997 +
2998 +/* This is duplicated in lustre/utils/create_iam.c */
2999 +enum {
3000 +        LVAR_PAD   = 4,
3001 +        LVAR_ROUND = LVAR_PAD - 1
3002 +};
3003 +
3004 +static inline int getsize(const struct iam_leaf *leaf, int namelen)
3005 +{
3006 +        CLASSERT(!(LVAR_PAD & (LVAR_PAD - 1)));
3007 +
3008 +        return (offsetof(struct lvar_leaf_entry, vle_key) +
3009 +                namelen + recsize(leaf) + LVAR_ROUND) & ~LVAR_ROUND;
3010 +}
3011 +
3012 +static inline int e_size(const struct iam_leaf *leaf,
3013 +                         const struct lvar_leaf_entry *ent)
3014 +{
3015 +        return getsize(leaf, e_keysize(ent));
3016 +}
3017 +
3018 +static inline char *e_char(const struct lvar_leaf_entry *ent)
3019 +{
3020 +        return (char *)&ent->vle_key;
3021 +}
3022 +
3023 +static inline struct iam_key *e_key(const struct lvar_leaf_entry *ent)
3024 +{
3025 +        return (struct iam_key *)e_char(ent);
3026 +}
3027 +
3028 +static inline lvar_hash_t e_hash(const struct lvar_leaf_entry *ent)
3029 +{
3030 +        return le32_to_cpu(ent->vle_hash);
3031 +}
3032 +
3033 +static inline struct iam_rec *e_rec(const struct lvar_leaf_entry *ent)
3034 +{
3035 +        return ((void *)ent) +
3036 +                offsetof(struct lvar_leaf_entry, vle_key) + e_keysize(ent);
3037 +}
3038 +
3039 +static void e_print(const struct lvar_leaf_entry *ent)
3040 +{
3041 +        printk("        %p %8.8x \"%*.*s\"\n", ent, e_hash(ent),
3042 +               e_keysize(ent), e_keysize(ent), e_char(ent));
3043 +}
3044 +#if 0
3045 +static int e_check(const struct iam_leaf *leaf,
3046 +                   const struct lvar_leaf_entry *ent)
3047 +{
3048 +        const void *point = ent;
3049 +        const void *start = leaf->il_bh->b_data;
3050 +        return
3051 +                start + sizeof(struct lvar_leaf_header) <= point &&
3052 +                point + e_size(leaf, ent) < start + blocksize(leaf);
3053 +}
3054 +#endif
3055 +
3056 +static struct lvar_leaf_entry *e_next(const struct iam_leaf *leaf,
3057 +                                      const struct lvar_leaf_entry *ent)
3058 +{
3059 +        return ((void *)ent) + e_size(leaf, ent);
3060 +}
3061 +
3062 +#define LVAR_HASH_TEA    (1)
3063 +#define LVAR_HASH_R5     (0)
3064 +#define LVAR_HASH_PREFIX (0)
3065 +
3066 +static inline lvar_hash_t get_hash(const struct iam_container *bag,
3067 +                                   const char *name, int namelen)
3068 +{
3069 +        lvar_hash_t result;
3070 +
3071 +        if (namelen == 0)
3072 +                return 0;
3073 +        if (strncmp(name, ".", 1) == 0 && namelen == 1)
3074 +                return 2;
3075 +        if (strncmp(name, "..", 2) == 0 && namelen == 2)
3076 +                return 4;
3077 +
3078 +        if (LVAR_HASH_PREFIX) {
3079 +                result = 0;
3080 +                strncpy((void *)&result,
3081 +                        name, min(namelen, (int)sizeof result));
3082 +        } else {
3083 +                struct dx_hash_info hinfo;
3084 +
3085 +                if (LVAR_HASH_TEA)
3086 +                        hinfo.hash_version = DX_HASH_TEA;
3087 +                else if (LVAR_HASH_R5)
3088 +                        hinfo.hash_version = DX_HASH_R5;
3089 +                hinfo.seed = 0;
3090 +                ext3fs_dirhash(name, namelen, &hinfo);
3091 +                result = hinfo.hash;
3092 +        }
3093 +
3094 +        return (result << 1) & 0x7fffffff;
3095 +}
3096 +
3097 +static inline int e_eq(const struct lvar_leaf_entry *ent,
3098 +                       const char *name, int namelen)
3099 +{
3100 +        return namelen == e_keysize(ent) && !memcmp(e_char(ent), name, namelen);
3101 +}
3102 +
3103 +static inline int e_cmp(const struct iam_leaf *leaf,
3104 +                        const struct lvar_leaf_entry *ent, lvar_hash_t hash)
3105 +{
3106 +        lvar_hash_t ehash;
3107 +
3108 +        ehash = e_hash(ent);
3109 +        return ehash == hash ? 0 : (ehash < hash ? -1 : +1);
3110 +}
3111 +
3112 +static struct lvar_leaf_header *n_head(const struct iam_leaf *l)
3113 +{
3114 +        return (struct lvar_leaf_header *)l->il_bh->b_data;
3115 +}
3116 +
3117 +static int h_used(const struct lvar_leaf_header *hdr)
3118 +{
3119 +        return le16_to_cpu(hdr->vlh_used);
3120 +}
3121 +
3122 +static void h_used_adj(const struct iam_leaf *leaf,
3123 +                       struct lvar_leaf_header *hdr, int adj)
3124 +{
3125 +        int used;
3126 +
3127 +        used = h_used(hdr) + adj;
3128 +        assert_corr(sizeof *hdr <= used && used <= blocksize(leaf));
3129 +        hdr->vlh_used = cpu_to_le16(used);
3130 +}
3131 +
3132 +static struct lvar_leaf_entry *n_start(const struct iam_leaf *leaf)
3133 +{
3134 +        return (void *)leaf->il_bh->b_data + sizeof(struct lvar_leaf_header);
3135 +}
3136 +
3137 +static struct lvar_leaf_entry *n_end(const struct iam_leaf *l)
3138 +{
3139 +        return (void *)l->il_bh->b_data + h_used(n_head(l));
3140 +}
3141 +
3142 +static struct lvar_leaf_entry *n_cur(const struct iam_leaf *l)
3143 +{
3144 +        return lentry_lvar(l->il_at);
3145 +}
3146 +
3147 +void n_print(const struct iam_leaf *l)
3148 +{
3149 +        struct lvar_leaf_entry *scan;
3150 +
3151 +        printk(KERN_EMERG "used: %d\n", h_used(n_head(l)));
3152 +        for (scan = n_start(l); scan < n_end(l); scan = e_next(l, scan))
3153 +                e_print(scan);
3154 +}
3155 +
3156 +#if EXT3_CORRECTNESS_ON
3157 +static int n_at_rec(const struct iam_leaf *folio)
3158 +{
3159 +        return
3160 +                n_start(folio) <= lentry_lvar(folio->il_at) &&
3161 +                lentry_lvar(folio->il_at) < n_end(folio);
3162 +}
3163 +
3164 +#if EXT3_INVARIANT_ON
3165 +static int n_invariant(const struct iam_leaf *leaf)
3166 +{
3167 +        struct iam_path        *path;
3168 +        struct lvar_leaf_entry *scan;
3169 +        struct lvar_leaf_entry *end;
3170 +        lvar_hash_t             hash;
3171 +        lvar_hash_t             nexthash;
3172 +        lvar_hash_t             starthash;
3173 +
3174 +        end  = n_end(leaf);
3175 +        hash = 0;
3176 +        path = leaf->il_path;
3177 +
3178 +        if (h_used(n_head(leaf)) > blocksize(leaf))
3179 +                return 0;
3180 +
3181 +        /*
3182 +         * Delimiting key in the parent index node. Clear least bit to account
3183 +         * for hash collision marker.
3184 +         */
3185 +        starthash = *(lvar_hash_t *)iam_ikey_at(path, path->ip_frame->at) & ~1;
3186 +        for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
3187 +                nexthash = e_hash(scan);
3188 +                if (nexthash != get_hash(iam_leaf_container(leaf),
3189 +                                         e_char(scan), e_keysize(scan))) {
3190 +                        BREAKPOINT();
3191 +                        return 0;
3192 +                }
3193 +                if (0 && nexthash < starthash) {
3194 +                        /*
3195 +                         * Unfortunately this useful invariant cannot be
3196 +                         * reliably checked as parent node is nor necessarily
3197 +                         * locked.
3198 +                         */
3199 +                        n_print(leaf);
3200 +                        printk("%#x < %#x\n", nexthash, starthash);
3201 +                        dump_stack();
3202 +                        BREAKPOINT();
3203 +                        return 0;
3204 +                }
3205 +                if (nexthash < hash) {
3206 +                        BREAKPOINT();
3207 +                        return 0;
3208 +                }
3209 +                hash = nexthash;
3210 +        }
3211 +        if (scan != end) {
3212 +                BREAKPOINT();
3213 +                return 0;
3214 +        }
3215 +        return 1;
3216 +}
3217 +/* EXT3_INVARIANT_ON */
3218 +#endif
3219 +
3220 +/* EXT3_CORRECTNESS_ON */
3221 +#endif
3222 +
3223 +static struct iam_ikey *lvar_ikey(const struct iam_leaf *l,
3224 +                                  struct iam_ikey *key)
3225 +{
3226 +        lvar_hash_t *hash;
3227 +
3228 +        assert_corr(n_at_rec(l));
3229 +
3230 +        hash = (void *)key;
3231 +        *hash = e_hash(n_cur(l));
3232 +        return key;
3233 +}
3234 +
3235 +static struct iam_key *lvar_key(const struct iam_leaf *l)
3236 +{
3237 +        return e_key(n_cur(l));
3238 +}
3239 +
3240 +static int lvar_key_size(const struct iam_leaf *l)
3241 +{
3242 +        return e_keysize(n_cur(l));
3243 +}
3244 +
3245 +static void lvar_start(struct iam_leaf *l)
3246 +{
3247 +        l->il_at = lvar_lentry(n_start(l));
3248 +}
3249 +
3250 +static int lvar_init(struct iam_leaf *l)
3251 +{
3252 +        int result;
3253 +        int used;
3254 +        struct lvar_leaf_header *head;
3255 +
3256 +        assert_corr(l->il_bh != NULL);
3257 +
3258 +        head = n_head(l);
3259 +        used = h_used(head);
3260 +        if (head->vlh_magic == le16_to_cpu(IAM_LVAR_LEAF_MAGIC) &&
3261 +            used <= blocksize(l)) {
3262 +                l->il_at = l->il_entries = lvar_lentry(n_start(l));
3263 +                result = 0;
3264 +        } else {
3265 +                struct inode *obj;
3266 +
3267 +                obj = iam_leaf_container(l)->ic_object;
3268 +                ext3_error(obj->i_sb, __FUNCTION__,
3269 +                           "Wrong magic in node %llu (#%lu): %#x != %#x or "
3270 +                           "wrong used: %i",
3271 +                           (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
3272 +                           head->vlh_magic, le16_to_cpu(IAM_LVAR_LEAF_MAGIC),
3273 +                           used);
3274 +                result = -EIO;
3275 +                BREAKPOINT();
3276 +        }
3277 +        return result;
3278 +}
3279 +
3280 +static void lvar_fini(struct iam_leaf *l)
3281 +{
3282 +        l->il_entries = l->il_at = NULL;
3283 +}
3284 +
3285 +struct iam_rec *lvar_rec(const struct iam_leaf *l)
3286 +{
3287 +        assert_corr(n_at_rec(l));
3288 +        return e_rec(n_cur(l));
3289 +}
3290 +
3291 +static void lvar_next(struct iam_leaf *l)
3292 +{
3293 +        assert_corr(n_at_rec(l));
3294 +        assert_corr(iam_leaf_is_locked(l));
3295 +        l->il_at = lvar_lentry(e_next(l, n_cur(l)));
3296 +}
3297 +
3298 +static int lvar_lookup(struct iam_leaf *leaf, const struct iam_key *k)
3299 +{
3300 +        struct lvar_leaf_entry *found;
3301 +        struct lvar_leaf_entry *scan;
3302 +        struct lvar_leaf_entry *end;
3303 +        int                     result;
3304 +        const char             *name;
3305 +        int                     namelen;
3306 +        int                     found_equal;
3307 +        lvar_hash_t             hash;
3308 +        int                     last;
3309 +
3310 +        assert_inv(n_invariant(leaf));
3311 +        end = n_end(leaf);
3312 +
3313 +        name = kchar(k);
3314 +        namelen = strlen(name);
3315 +        hash = get_hash(iam_leaf_container(leaf), name, namelen);
3316 +        found = NULL;
3317 +        found_equal = 0;
3318 +        last = 1;
3319 +
3320 +        for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
3321 +                lvar_hash_t scan_hash;
3322 +
3323 +                scan_hash = e_hash(scan);
3324 +                if (scan_hash < hash)
3325 +                        found = scan;
3326 +                else if (scan_hash == hash) {
3327 +                        if (e_eq(scan, name, namelen)) {
3328 +                                /*
3329 +                                 * perfect match
3330 +                                 */
3331 +                                leaf->il_at = lvar_lentry(scan);
3332 +                                return IAM_LOOKUP_EXACT;
3333 +                        } else if (!found_equal) {
3334 +                                        found = scan;
3335 +                                        found_equal = 1;
3336 +                        }
3337 +                } else {
3338 +                        last = 0;
3339 +                        break;
3340 +                }
3341 +        }
3342 +        if (found == NULL) {
3343 +                /*
3344 +                 * @k is less than all hashes in the leaf.
3345 +                 */
3346 +                lvar_start(leaf);
3347 +                result = IAM_LOOKUP_BEFORE;
3348 +        } else {
3349 +                leaf->il_at = lvar_lentry(found);
3350 +                result = IAM_LOOKUP_OK;
3351 +                assert_corr(n_at_rec(leaf));
3352 +        }
3353 +        if (last)
3354 +                result |= IAM_LOOKUP_LAST;
3355 +        assert_inv(n_invariant(leaf));
3356 +
3357 +        if (unlikely(current->debugging1 & 0x1)) {
3358 +                struct iam_path *path;
3359 +
3360 +                path = leaf->il_path;
3361 +                printk(KERN_EMERG "lvar noent: `%s'/%#x/%#x\n", name, hash,
3362 +                       *(lvar_hash_t *)iam_ikey_at(path, path->ip_frame->at));
3363 +                n_print(leaf);
3364 +        }
3365 +
3366 +        return result;
3367 +}
3368 +
3369 +static int lvar_ilookup(struct iam_leaf *leaf, const struct iam_ikey *ik)
3370 +{
3371 +        struct lvar_leaf_entry *scan;
3372 +        struct lvar_leaf_entry *end;
3373 +        lvar_hash_t             hash;
3374 +
3375 +        assert_inv(n_invariant(leaf));
3376 +        end  = n_end(leaf);
3377 +        hash = *(const lvar_hash_t *)ik;
3378 +
3379 +        lvar_start(leaf);
3380 +        for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
3381 +                lvar_hash_t scan_hash;
3382 +
3383 +                scan_hash = e_hash(scan);
3384 +                if (scan_hash > hash)
3385 +                        return scan == n_start(leaf) ?
3386 +                                IAM_LOOKUP_BEFORE : IAM_LOOKUP_OK;
3387 +                leaf->il_at = lvar_lentry(scan);
3388 +                if (scan_hash == hash)
3389 +                        return IAM_LOOKUP_EXACT;
3390 +        }
3391 +        assert_inv(n_invariant(leaf));
3392 +        /*
3393 +         * @ik is greater than any key in the node. Return last record in the
3394 +         * node.
3395 +         */
3396 +        return IAM_LOOKUP_OK;
3397 +}
3398 +
3399 +static void lvar_key_set(struct iam_leaf *l, const struct iam_key *k)
3400 +{
3401 +        assert_corr(n_at_rec(l));
3402 +        assert_corr(strlen(kchar(k)) == e_keysize(n_cur(l)));
3403 +        assert_corr(iam_leaf_is_locked(l));
3404 +        memcpy(e_key(n_cur(l)), k, e_keysize(n_cur(l)));
3405 +        assert_inv(n_invariant(l));
3406 +}
3407 +
3408 +static int lvar_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
3409 +{
3410 +        lvar_hash_t hash;
3411 +        const char *name;
3412 +
3413 +        name = kchar(k);
3414 +
3415 +        hash = get_hash(iam_leaf_container(l), name, strlen(name));
3416 +        return e_cmp(l, n_cur(l), hash);
3417 +}
3418 +
3419 +static int lvar_key_eq(const struct iam_leaf *l, const struct iam_key *k)
3420 +{
3421 +        const char *name;
3422 +
3423 +        name = kchar(k);
3424 +        return e_eq(n_cur(l), name, strlen(name));
3425 +}
3426 +
3427 +static void lvar_rec_set(struct iam_leaf *l, const struct iam_rec *r)
3428 +{
3429 +        assert_corr(n_at_rec(l));
3430 +        assert_corr(iam_leaf_is_locked(l));
3431 +        iam_reccpy(iam_leaf_path(l), e_rec(n_cur(l)), r);
3432 +        assert_inv(n_invariant(l));
3433 +}
3434 +
3435 +static int lvar_can_add(const struct iam_leaf *l,
3436 +                        const struct iam_key *k, const struct iam_rec *r)
3437 +{
3438 +        assert_corr(iam_leaf_is_locked(l));
3439 +        return h_used(n_head(l)) + getsize(l, strlen(kchar(k))) <= blocksize(l);
3440 +}
3441 +
3442 +static int lvar_at_end(const struct iam_leaf *folio)
3443 +{
3444 +        assert_corr(iam_leaf_is_locked(folio));
3445 +        return n_cur(folio) == n_end(folio);
3446 +}
3447 +
3448 +static void lvar_rec_add(struct iam_leaf *leaf,
3449 +                         const struct iam_key *k, const struct iam_rec *r)
3450 +{
3451 +        const char *key;
3452 +        int   ksize;
3453 +        int   shift;
3454 +        void *end;
3455 +        void *start;
3456 +        ptrdiff_t diff;
3457 +
3458 +        assert_corr(lvar_can_add(leaf, k, r));
3459 +        assert_inv(n_invariant(leaf));
3460 +        assert_corr(iam_leaf_is_locked(leaf));
3461 +
3462 +        key   = kchar(k);
3463 +        ksize = strlen(key);
3464 +        shift = getsize(leaf, ksize);
3465 +
3466 +        if (!lvar_at_end(leaf)) {
3467 +                assert_corr(n_cur(leaf) < n_end(leaf));
3468 +                end = n_end(leaf);
3469 +                if (lvar_key_cmp(leaf, k) <= 0)
3470 +                        lvar_next(leaf);
3471 +                else
3472 +                        /*
3473 +                         * Another exceptional case: insertion with the key
3474 +                         * less than least key in the leaf.
3475 +                         */
3476 +                        assert_corr(leaf->il_at == leaf->il_entries);
3477 +
3478 +                start = leaf->il_at;
3479 +                diff  = PDIFF(end, start);
3480 +                assert_corr(diff >= 0);
3481 +                memmove(start + shift, start, diff);
3482 +        }
3483 +        h_used_adj(leaf, n_head(leaf), shift);
3484 +        n_cur(leaf)->vle_keysize = cpu_to_le16(ksize);
3485 +        n_cur(leaf)->vle_hash = cpu_to_le32(get_hash(iam_leaf_container(leaf),
3486 +                                                     key, ksize));
3487 +        lvar_key_set(leaf, k);
3488 +        lvar_rec_set(leaf, r);
3489 +        assert_corr(n_at_rec(leaf));
3490 +        assert_inv(n_invariant(leaf));
3491 +}
3492 +
3493 +static void lvar_rec_del(struct iam_leaf *leaf, int shift)
3494 +{
3495 +        void *next;
3496 +        void *end;
3497 +        int nob;
3498 +
3499 +        assert_corr(n_at_rec(leaf));
3500 +        assert_inv(n_invariant(leaf));
3501 +        assert_corr(iam_leaf_is_locked(leaf));
3502 +
3503 +        end  = n_end(leaf);
3504 +        next = e_next(leaf, n_cur(leaf));
3505 +        nob  = e_size(leaf, n_cur(leaf));
3506 +        memmove(leaf->il_at, next, end - next);
3507 +        h_used_adj(leaf, n_head(leaf), -nob);
3508 +        assert_inv(n_invariant(leaf));
3509 +}
3510 +
3511 +static void lvar_init_new(struct iam_container *c, struct buffer_head *bh)
3512 +{
3513 +        struct lvar_leaf_header *hdr;
3514 +
3515 +        hdr = (struct lvar_leaf_header *)bh->b_data;
3516 +        hdr->vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC);
3517 +        hdr->vlh_used  = sizeof *hdr;
3518 +}
3519 +
3520 +static struct lvar_leaf_entry *find_pivot(const struct iam_leaf *leaf,
3521 +                                          struct lvar_leaf_entry **prev)
3522 +{
3523 +        void *scan;
3524 +        void *start;
3525 +        int threshold;
3526 +
3527 +        *prev = NULL;
3528 +        threshold = blocksize(leaf) / 2;
3529 +        for (scan = start = n_start(leaf); scan - start <= threshold;
3530 +             *prev = scan, scan = e_next(leaf, scan)) {
3531 +                ;
3532 +        }
3533 +        return scan;
3534 +}
3535 +
3536 +static void lvar_split(struct iam_leaf *leaf, struct buffer_head **bh,
3537 +                       iam_ptr_t new_blknr)
3538 +{
3539 +        struct lvar_leaf_entry  *first_to_move;
3540 +        struct lvar_leaf_entry  *last_to_stay;
3541 +        struct iam_path         *path;
3542 +        struct lvar_leaf_header *hdr;
3543 +        struct buffer_head      *new_leaf;
3544 +
3545 +        ptrdiff_t   tomove;
3546 +        lvar_hash_t hash;
3547 +
3548 +        assert_inv(n_invariant(leaf));
3549 +        assert_corr(iam_leaf_is_locked(leaf));
3550 +
3551 +        new_leaf = *bh;
3552 +        path = iam_leaf_path(leaf);
3553 +
3554 +        hdr = (void *)new_leaf->b_data;
3555 +
3556 +        first_to_move = find_pivot(leaf, &last_to_stay);
3557 +        assert_corr(last_to_stay != NULL);
3558 +        assert_corr(e_next(leaf, last_to_stay) == first_to_move);
3559 +
3560 +        hash = e_hash(first_to_move);
3561 +        if (hash == e_hash(last_to_stay))
3562 +                /*
3563 +                 * Duplicate hash.
3564 +                 */
3565 +                hash |= 1;
3566 +
3567 +        tomove = PDIFF(n_end(leaf), first_to_move);
3568 +        memmove(hdr + 1, first_to_move, tomove);
3569 +
3570 +        h_used_adj(leaf, hdr, tomove);
3571 +        h_used_adj(leaf, n_head(leaf), -tomove);
3572 +
3573 +        assert_corr(n_end(leaf) == first_to_move);
3574 +
3575 +        if (n_cur(leaf) >= first_to_move) {
3576 +                /*
3577 +                 * insertion point moves into new leaf.
3578 +                 */
3579 +                ptrdiff_t shift;
3580 +                int result;
3581 +
3582 +                shift = PDIFF(leaf->il_at, first_to_move);
3583 +                *bh = leaf->il_bh;
3584 +                leaf->il_bh = new_leaf;
3585 +                leaf->il_curidx = new_blknr;
3586 +
3587 +                assert_corr(iam_leaf_is_locked(leaf));
3588 +                result = lvar_init(leaf);
3589 +                /*
3590 +                 * init cannot fail, as node was just initialized.
3591 +                 */
3592 +                assert_corr(result == 0);
3593 +                leaf->il_at = ((void *)leaf->il_at) + shift;
3594 +        }
3595 +        /*
3596 +         * Insert pointer to the new node (together with the least key in
3597 +         * the node) into index node.
3598 +         */
3599 +        iam_insert_key_lock(path, path->ip_frame, (struct iam_ikey *)&hash,
3600 +                            new_blknr);
3601 +        assert_corr(n_cur(leaf) < n_end(leaf));
3602 +        assert_inv(n_invariant(leaf));
3603 +}
3604 +
3605 +static struct iam_leaf_operations lvar_leaf_ops = {
3606 +        .init           = lvar_init,
3607 +        .init_new       = lvar_init_new,
3608 +        .fini           = lvar_fini,
3609 +        .start          = lvar_start,
3610 +        .next           = lvar_next,
3611 +        .key            = lvar_key,
3612 +        .ikey           = lvar_ikey,
3613 +        .rec            = lvar_rec,
3614 +        .key_set        = lvar_key_set,
3615 +        .key_cmp        = lvar_key_cmp,
3616 +        .key_eq         = lvar_key_eq,
3617 +        .key_size       = lvar_key_size,
3618 +        .rec_set        = lvar_rec_set,
3619 +        .lookup         = lvar_lookup,
3620 +        .ilookup        = lvar_ilookup,
3621 +        .at_end         = lvar_at_end,
3622 +        .rec_add        = lvar_rec_add,
3623 +        .rec_del        = lvar_rec_del,
3624 +        .can_add        = lvar_can_add,
3625 +        .split          = lvar_split
3626 +};
3627 +
3628 +/*
3629 + * Index operations.
3630 + */
3631 +
3632 +enum {
3633 +        /* This is duplicated in lustre/utils/create_iam.c */
3634 +        /* egrep -i '^o?x?[olabcdef]*$' /usr/share/dict/words */
3635 +        IAM_LVAR_ROOT_MAGIC = 0xb01dface
3636 +};
3637 +
3638 +/* This is duplicated in lustre/utils/create_iam.c */
3639 +struct lvar_root {
3640 +        __le32 vr_magic;
3641 +        __le16 vr_recsize;
3642 +        __le16 vr_ptrsize;
3643 +        u8     vr_indirect_levels;
3644 +        u8     vr_padding0;
3645 +        __le16 vr_padding1;
3646 +};
3647 +
3648 +static __u32 lvar_root_ptr(struct iam_container *c)
3649 +{
3650 +        return 0;
3651 +}
3652 +
3653 +static int lvar_node_init(struct iam_container *c, struct buffer_head *bh,
3654 +                          int root)
3655 +{
3656 +        return 0;
3657 +}
3658 +
3659 +static struct iam_entry *lvar_root_inc(struct iam_container *c,
3660 +                                       struct iam_path *path,
3661 +                                       struct iam_frame *frame)
3662 +{
3663 +        struct lvar_root *root;
3664 +        struct iam_entry *entries;
3665 +
3666 +        assert_corr(iam_frame_is_locked(path, frame));
3667 +        entries = frame->entries;
3668 +
3669 +        dx_set_count(entries, 2);
3670 +        assert_corr(dx_get_limit(entries) == dx_root_limit(path));
3671 +
3672 +        root = (void *)frame->bh->b_data;
3673 +        assert_corr(le64_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC);
3674 +        root->vr_indirect_levels ++;
3675 +        frame->at = entries = iam_entry_shift(path, entries, 1);
3676 +        memset(iam_ikey_at(path, entries), 0,
3677 +               iam_path_descr(path)->id_ikey_size);
3678 +        return entries;
3679 +}
3680 +
3681 +static int lvar_node_check(struct iam_path *path, struct iam_frame *frame)
3682 +{
3683 +        unsigned count;
3684 +        unsigned limit;
3685 +        unsigned limit_correct;
3686 +        struct iam_entry *entries;
3687 +
3688 +        entries = dx_node_get_entries(path, frame);
3689 +
3690 +        if (frame == path->ip_frames) {
3691 +                struct lvar_root *root;
3692 +
3693 +                root = (void *)frame->bh->b_data;
3694 +                if (le64_to_cpu(root->vr_magic) != IAM_LVAR_ROOT_MAGIC) {
3695 +                        BREAKPOINT();
3696 +                        return -EIO;
3697 +                }
3698 +                limit_correct = dx_root_limit(path);
3699 +        } else
3700 +                limit_correct = dx_node_limit(path);
3701 +        count = dx_get_count(entries);
3702 +        limit = dx_get_limit(entries);
3703 +        if (count > limit) {
3704 +                BREAKPOINT();
3705 +                return -EIO;
3706 +        }
3707 +        if (limit != limit_correct) {
3708 +                BREAKPOINT();
3709 +                return -EIO;
3710 +        }
3711 +        return 0;
3712 +}
3713 +
3714 +static int lvar_node_load(struct iam_path *path, struct iam_frame *frame)
3715 +{
3716 +        struct iam_entry *entries;
3717 +        void *data;
3718 +        entries = dx_node_get_entries(path, frame);
3719 +
3720 +        data = frame->bh->b_data;
3721 +
3722 +        if (frame == path->ip_frames) {
3723 +                struct lvar_root *root;
3724 +                const char *name;
3725 +
3726 +                root = data;
3727 +                name = kchar(path->ip_key_target);
3728 +                path->ip_indirect = root->vr_indirect_levels;
3729 +                if (path->ip_ikey_target == NULL) {
3730 +                        path->ip_ikey_target = iam_path_ikey(path, 4);
3731 +                        *(lvar_hash_t *)path->ip_ikey_target =
3732 +                                get_hash(path->ip_container, name,
3733 +                                         strlen(name));
3734 +                }
3735 +        }
3736 +        frame->entries = frame->at = entries;
3737 +        return 0;
3738 +}
3739 +
3740 +static int lvar_ikeycmp(const struct iam_container *c,
3741 +                        const struct iam_ikey *k1, const struct iam_ikey *k2)
3742 +{
3743 +       lvar_hash_t p1 = le32_to_cpu(*(lvar_hash_t *)k1);
3744 +       lvar_hash_t p2 = le32_to_cpu(*(lvar_hash_t *)k2);
3745 +
3746 +       return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
3747 +}
3748 +
3749 +static struct iam_path_descr *lvar_ipd_alloc(const struct iam_container *c)
3750 +{
3751 +        return iam_ipd_alloc(c->ic_descr->id_ikey_size);
3752 +}
3753 +
3754 +static int root_limit(int rootgap, int blocksize, int size)
3755 +{
3756 +        int limit;
3757 +        int nlimit;
3758 +
3759 +        limit = (blocksize - rootgap) / size;
3760 +        nlimit = blocksize / size;
3761 +        if (limit == nlimit)
3762 +                limit--;
3763 +        return limit;
3764 +}
3765 +
3766 +static int lvar_root_limit(int blocksize, int size)
3767 +{
3768 +        return root_limit(sizeof(struct lvar_root), blocksize, size);
3769 +}
3770 +
3771 +static void lvar_root(void *buf,
3772 +                      int blocksize, int keysize, int ptrsize, int recsize)
3773 +{
3774 +        struct lvar_root *root;
3775 +        struct dx_countlimit *limit;
3776 +        void                 *entry;
3777 +        int isize;
3778 +
3779 +        isize = sizeof(lvar_hash_t) + ptrsize;
3780 +        root = buf;
3781 +        *root = (typeof(*root)) {
3782 +                .vr_magic            = cpu_to_le32(IAM_LVAR_ROOT_MAGIC),
3783 +                .vr_recsize          = cpu_to_le16(recsize),
3784 +                .vr_ptrsize          = cpu_to_le16(ptrsize),
3785 +                .vr_indirect_levels  = 0
3786 +        };
3787 +
3788 +        limit = (void *)(root + 1);
3789 +        *limit = (typeof(*limit)){
3790 +                /*
3791 +                 * limit itself + one pointer to the leaf.
3792 +                 */
3793 +                .count = cpu_to_le16(2),
3794 +                .limit = lvar_root_limit(blocksize,
3795 +                                         sizeof (lvar_hash_t) + ptrsize)
3796 +        };
3797 +
3798 +        entry = root + 1;
3799 +        /*
3800 +         * Skip over @limit.
3801 +         */
3802 +        entry += isize;
3803 +
3804 +        /*
3805 +         * Entry format is <key> followed by <ptr>. In the minimal tree
3806 +         * consisting of a root and single node, <key> is a minimal possible
3807 +         * key.
3808 +         */
3809 +        *(lvar_hash_t *)entry = 0;
3810 +        entry += sizeof(lvar_hash_t);
3811 +        /* now @entry points to <ptr> */
3812 +        if (ptrsize == 4)
3813 +                *(u_int32_t *)entry = cpu_to_le32(1);
3814 +        else
3815 +                *(u_int64_t *)entry = cpu_to_le64(1);
3816 +}
3817 +
3818 +static int lvar_esize(int namelen, int recsize)
3819 +{
3820 +        return (offsetof(struct lvar_leaf_entry, vle_key) +
3821 +                namelen + recsize + LVAR_ROUND) & ~LVAR_ROUND;
3822 +}
3823 +
3824 +static void lvar_leaf(void *buf,
3825 +                      int blocksize, int keysize, int ptrsize, int recsize)
3826 +{
3827 +        struct lvar_leaf_header *head;
3828 +        struct lvar_leaf_entry  *entry;
3829 +
3830 +        /* form leaf */
3831 +        head = buf;
3832 +        *head = (typeof(*head)) {
3833 +                .vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC),
3834 +                .vlh_used  = cpu_to_le16(sizeof *head + lvar_esize(0, recsize))
3835 +        };
3836 +        entry = (void *)(head + 1);
3837 +        *entry = (typeof(*entry)) {
3838 +                .vle_hash    = 0,
3839 +                .vle_keysize = 0
3840 +        };
3841 +        memset(e_rec(entry), 0, recsize);
3842 +}
3843 +
3844 +#include <linux/jbd.h>
3845 +#include <linux/ext3_fs.h>
3846 +#include <linux/ext3_jbd.h>
3847 +
3848 +int iam_lvar_create(struct inode *obj,
3849 +                    int keysize, int ptrsize, int recsize, handle_t *handle)
3850 +{
3851 +        struct buffer_head *root_node;
3852 +        struct buffer_head *leaf_node;
3853 +        struct super_block *sb;
3854 +
3855 +        u32 blknr;
3856 +        int result;
3857 +        unsigned long bsize;
3858 +
3859 +        assert_corr(obj->i_size == 0);
3860 +
3861 +        sb = obj->i_sb;
3862 +        bsize = sb->s_blocksize;
3863 +        root_node = ext3_append(handle, obj, &blknr, &result);
3864 +        leaf_node = ext3_append(handle, obj, &blknr, &result);
3865 +        if (root_node != NULL && leaf_node != NULL) {
3866 +                lvar_root(root_node->b_data, bsize, keysize, ptrsize, recsize);
3867 +                lvar_leaf(leaf_node->b_data, bsize, keysize, ptrsize, recsize);
3868 +                ext3_mark_inode_dirty(handle, obj);
3869 +                result = ext3_journal_dirty_metadata(handle, root_node);
3870 +                if (result == 0)
3871 +                        result = ext3_journal_dirty_metadata(handle, leaf_node);
3872 +                if (result != 0)
3873 +                        ext3_std_error(sb, result);
3874 +        }
3875 +        brelse(leaf_node);
3876 +        brelse(root_node);
3877 +        return result;
3878 +}
3879 +EXPORT_SYMBOL(iam_lvar_create);
3880 +
3881 +static struct iam_operations lvar_ops = {
3882 +        .id_root_ptr    = lvar_root_ptr,
3883 +        .id_node_read   = iam_node_read,
3884 +        .id_node_init   = lvar_node_init,
3885 +        .id_node_check  = lvar_node_check,
3886 +        .id_node_load   = lvar_node_load,
3887 +        .id_ikeycmp     = lvar_ikeycmp,
3888 +        .id_root_inc    = lvar_root_inc,
3889 +        .id_ipd_alloc   = lvar_ipd_alloc,
3890 +        .id_ipd_free    = iam_ipd_free,
3891 +        .id_name        = "lvar"
3892 +};
3893 +
3894 +static int lvar_guess(struct iam_container *c)
3895 +{
3896 +        int result;
3897 +        struct buffer_head *bh;
3898 +        const struct lvar_root *root;
3899 +
3900 +        assert_corr(c->ic_object != NULL);
3901 +
3902 +        result = iam_node_read(c, lvar_root_ptr(c), NULL, &bh);
3903 +        if (result == 0) {
3904 +                root = (void *)bh->b_data;
3905 +                if (le64_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC) {
3906 +                        struct iam_descr *descr;
3907 +
3908 +                        descr = c->ic_descr;
3909 +                        descr->id_key_size  = EXT3_NAME_LEN;
3910 +                        descr->id_ikey_size = sizeof (lvar_hash_t);
3911 +                        descr->id_rec_size  = le16_to_cpu(root->vr_recsize);
3912 +                        descr->id_ptr_size  = le16_to_cpu(root->vr_ptrsize);
3913 +                        descr->id_root_gap  = sizeof *root;
3914 +                        descr->id_node_gap  = 0;
3915 +                        descr->id_ops       = &lvar_ops;
3916 +                        descr->id_leaf_ops  = &lvar_leaf_ops;
3917 +                } else
3918 +                        result = -EBADF;
3919 +                brelse(bh);
3920 +        }
3921 +        return result;
3922 +}
3923 +
3924 +static struct iam_format lvar_format = {
3925 +        .if_guess = lvar_guess
3926 +};
3927 +
3928 +void iam_lvar_format_init(void)
3929 +{
3930 +        iam_format_register(&lvar_format);
3931 +}
3932 +
3933 Index: iam/fs/ext3/namei.c
3934 ===================================================================
3935 --- iam.orig/fs/ext3/namei.c
3936 +++ iam/fs/ext3/namei.c
3937 @@ -24,81 +24,6 @@
3938   *     Theodore Ts'o, 2002
3939   */
3940  
3941 -/*
3942 - * iam: big theory statement.
3943 - *
3944 - * iam (Index Access Module) is a module providing abstraction of persistent
3945 - * transactional container on top of generalized ext3 htree.
3946 - *
3947 - * iam supports:
3948 - *
3949 - *     - key, pointer, and record size specifiable per container.
3950 - *
3951 - *     - trees taller than 2 index levels.
3952 - *
3953 - *     - read/write to existing ext3 htree directories as iam containers.
3954 - *
3955 - * iam container is a tree, consisting of leaf nodes containing keys and
3956 - * records stored in this container, and index nodes, containing keys and
3957 - * pointers to leaf or index nodes.
3958 - *
3959 - * iam does not work with keys directly, instead it calls user-supplied key
3960 - * comparison function (->dpo_keycmp()).
3961 - *
3962 - * Pointers are (currently) interpreted as logical offsets (measured in
3963 - * blocksful) within underlying flat file on top of which iam tree lives.
3964 - *
3965 - * On-disk format:
3966 - *
3967 - * iam mostly tries to reuse existing htree formats.
3968 - *
3969 - * Format of index node:
3970 - *
3971 - * +-----+-------+-------+-------+------+-------+------------+
3972 - * |     | count |       |       |      |       |            |
3973 - * | gap |   /   | entry | entry | .... | entry | free space |
3974 - * |     | limit |       |       |      |       |            |
3975 - * +-----+-------+-------+-------+------+-------+------------+
3976 - *
3977 - *       gap           this part of node is never accessed by iam code. It
3978 - *                     exists for binary compatibility with ext3 htree (that,
3979 - *                     in turn, stores fake struct ext2_dirent for ext2
3980 - *                     compatibility), and to keep some unspecified per-node
3981 - *                     data. Gap can be different for root and non-root index
3982 - *                     nodes. Gap size can be specified for each container
3983 - *                     (gap of 0 is allowed).
3984 - *
3985 - *       count/limit   current number of entries in this node, and the maximal
3986 - *                     number of entries that can fit into node. count/limit
3987 - *                     has the same size as entry, and is itself counted in
3988 - *                     count.
3989 - *
3990 - *       entry         index entry: consists of a key immediately followed by
3991 - *                     a pointer to a child node. Size of a key and size of a
3992 - *                     pointer depends on container. Entry has neither
3993 - *                     alignment nor padding.
3994 - *
3995 - *       free space    portion of node new entries are added to
3996 - *
3997 - * Entries in index node are sorted by their key value.
3998 - *
3999 - * Format of leaf node:
4000 - *
4001 - * +-----+-------+-------+-------+------+-------+------------+
4002 - * |     | count |       |       |      |       |            |
4003 - * | gap |   /   | leaf  | leaf  | .... | leaf  | free space |
4004 - * |     | limit |       |       |      |       |            |
4005 - * +-----+-------+-------+-------+------+-------+------------+
4006 -
4007 - *       leaf          For leaf entry: consists of a rec immediately followd by 
4008 - *                     a key. size of a key and size of a rec depends on container.  
4009 - *
4010 - *
4011 - *
4012 - *
4013 - *
4014 - */
4015 -
4016  #include <linux/module.h>
4017  #include <linux/fs.h>
4018  #include <linux/pagemap.h>
4019 @@ -112,10 +37,10 @@
4020  #include <linux/quotaops.h>
4021  #include <linux/buffer_head.h>
4022  #include <linux/smp_lock.h>
4023 +#include <linux/lustre_iam.h>
4024  #include "xattr.h"
4025  #include "iopen.h"
4026  #include "acl.h"
4027 -#include <linux/lustre_iam.h>
4028  /*
4029   * define how far ahead to read directories while searching them.
4030   */
4031 @@ -125,7 +50,7 @@
4032  #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
4033  
4034  
4035 -static struct buffer_head *ext3_append(handle_t *handle,
4036 +struct buffer_head *ext3_append(handle_t *handle,
4037                                         struct inode *inode,
4038                                         u32 *block, int *err)
4039  {
4040 @@ -136,14 +61,15 @@ static struct buffer_head *ext3_append(h
4041         if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
4042                 inode->i_size += inode->i_sb->s_blocksize;
4043                 EXT3_I(inode)->i_disksize = inode->i_size;
4044 -               ext3_journal_get_write_access(handle,bh);
4045 +               *err = ext3_journal_get_write_access(handle, bh);
4046 +               if (*err != 0) {
4047 +                       brelse(bh);
4048 +                       bh = NULL;
4049 +               }
4050         }
4051         return bh;
4052  }
4053  
4054 -#ifndef assert
4055 -#define assert(test) J_ASSERT(test)
4056 -#endif
4057  
4058  #ifndef swap
4059  #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
4060 @@ -155,293 +81,10 @@ static struct buffer_head *ext3_append(h
4061  #define dxtrace(command)
4062  #endif
4063  
4064 -struct fake_dirent {
4065 -       __le32 inode;
4066 -       __le16 rec_len;
4067 -       u8 name_len;
4068 -       u8 file_type;
4069 -};
4070 -
4071 -struct dx_countlimit {
4072 -       __le16 limit;
4073 -       __le16 count;
4074 -};
4075 -
4076 -/*
4077 - * dx_root_info is laid out so that if it should somehow get overlaid by a
4078 - * dirent the two low bits of the hash version will be zero.  Therefore, the
4079 - * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
4080 - */
4081 -
4082 -struct dx_root {
4083 -       struct fake_dirent dot;
4084 -       char dot_name[4];
4085 -       struct fake_dirent dotdot;
4086 -       char dotdot_name[4];
4087 -       struct dx_root_info
4088 -       {
4089 -               __le32 reserved_zero;
4090 -               u8 hash_version;
4091 -               u8 info_length; /* 8 */
4092 -               u8 indirect_levels;
4093 -               u8 unused_flags;
4094 -       }
4095 -       info;
4096 -       struct {} entries[0];
4097 -};
4098 -
4099 -struct dx_node
4100 -{
4101 -       struct fake_dirent fake;
4102 -       struct {} entries[0];
4103 -};
4104 -
4105 -struct dx_map_entry
4106 -{
4107 -       u32 hash;
4108 -       u32 offs;
4109 -};
4110 -
4111 -
4112 -static u32 htree_root_ptr(struct iam_container *c);
4113 -static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
4114 -static int htree_node_init(struct iam_container *c,
4115 -                          struct buffer_head *bh, int root);
4116 -static int htree_keycmp(struct iam_container *c,
4117 -                       struct iam_key *k1, struct iam_key *k2);
4118 -static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
4119 -                          handle_t *h, struct buffer_head **bh);
4120 -
4121 -/*
4122 - * Parameters describing iam compatibility mode in which existing ext3 htrees
4123 - * can be manipulated.
4124 - */
4125 -static struct iam_descr htree_compat_param = {
4126 -       .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
4127 -       .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
4128 -       .id_node_gap = offsetof(struct dx_node, entries),
4129 -       .id_root_gap = offsetof(struct dx_root, entries),
4130 -
4131 -       .id_root_ptr   = htree_root_ptr,
4132 -       .id_node_check = htree_node_check,
4133 -       .id_node_init  = htree_node_init,
4134 -       .id_node_read  = htree_node_read,
4135 -       .id_keycmp     = htree_keycmp
4136 -};
4137 -
4138 -
4139 -struct iam_key;
4140 -struct iam_rec;
4141 -struct iam_descr;
4142 -struct iam_container;
4143 -struct iam_path;
4144 -
4145 -
4146 -
4147 -/*
4148 - * iam cursor (iterator) api.
4149 - */
4150 -
4151 -/*
4152 - * Flags controlling iterator functionality.
4153 - */
4154 -enum iam_it_flags {
4155 -       /*
4156 -        * this iterator will move (iam_it_{prev,next}() will be called on it)
4157 -        */
4158 -       IAM_IT_MOVE  = (1 << 0),
4159 -       /*
4160 -        * tree can be updated through this iterator.
4161 -        */
4162 -       IAM_IT_WRITE = (1 << 1)
4163 -};
4164 -
4165 -/*
4166 - * States of iterator state machine.
4167 - */
4168 -enum iam_it_state {
4169 -       /* initial state */
4170 -       IAM_IT_DETACHED,
4171 -       /* iterator is above particular record in the container */
4172 -       IAM_IT_ATTACHED
4173 -};
4174 -
4175 -struct htree_cookie {
4176 -       struct dx_hash_info *hinfo;
4177 -       struct dentry       *dentry;
4178 -};
4179 -
4180 -/*
4181 - * Iterator.
4182 - *
4183 - * Immediately after call to iam_it_init() iterator is in "detached"
4184 - * (IAM_IT_DETACHED) state: it is associated with given parent container, but
4185 - * doesn't point to any particular record in this container.
4186 - *
4187 - * After successful call to iam_it_get() and until corresponding call to
4188 - * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
4189 - *
4190 - * Attached iterator can move through records in a container (provided
4191 - * IAM_IT_MOVE permission) in a key order, can get record and key values as it
4192 - * passes over them, and can modify container (provided IAM_IT_WRITE
4193 - * permission).
4194 - *
4195 - * Concurrency: iterators are supposed to be local to thread. Interfaces below
4196 - * do no internal serialization.
4197 - *
4198 - */
4199 -struct iam_iterator {
4200 -       /*
4201 -        * iterator flags, taken from enum iam_it_flags.
4202 -        */
4203 -       __u32                 ii_flags;
4204 -       enum iam_it_state     ii_state;
4205 -       /*
4206 -        * path to the record. Valid in IAM_IT_ATTACHED state.
4207 -        */
4208 -       struct iam_path       ii_path;
4209 -};
4210 -
4211 -static inline struct iam_key *keycpy(struct iam_container *c,
4212 -                                    struct iam_key *k1, struct iam_key *k2)
4213 -{
4214 -       return memcpy(k1, k2, c->ic_descr->id_key_size);
4215 -}
4216 -
4217 -static inline int keycmp(struct iam_container *c,
4218 -                        struct iam_key *k1, struct iam_key *k2)
4219 -{
4220 -       return c->ic_descr->id_keycmp(c, k1, k2);
4221 -}
4222 -
4223 -static struct iam_container *iam_it_container(struct iam_iterator *it)
4224 -{
4225 -       return it->ii_path.ip_container;
4226 -}
4227 -
4228 -static inline int it_keycmp(struct iam_iterator *it,
4229 -                           struct iam_key *k1, struct iam_key *k2)
4230 -{
4231 -       return keycmp(iam_it_container(it), k1, k2);
4232 -}
4233 -
4234 -/*
4235 - * Initialize iterator to IAM_IT_DETACHED state.
4236 - *
4237 - * postcondition: it_state(it) == IAM_IT_DETACHED
4238 - */
4239 -int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
4240 -/*
4241 - * Finalize iterator and release all resources.
4242 - *
4243 - * precondition: it_state(it) == IAM_IT_DETACHED
4244 - */
4245 -void iam_it_fini(struct iam_iterator *it);
4246 -
4247 -/*
4248 - * Attach iterator. After successful completion, @it points to record with the
4249 - * largest key not larger than @k. Semantics of ->id_create() method guarantee
4250 - * that such record will always be found.
4251 - *
4252 - * Return value: 0: positioned on existing record,
4253 - *             -ve: error.
4254 - *
4255 - * precondition:  it_state(it) == IAM_IT_DETACHED
4256 - * postcondition: ergo(result == 0,
4257 - *                     (it_state(it) == IAM_IT_ATTACHED &&
4258 - *                      it_keycmp(it, iam_it_key_get(it, *), k) < 0))
4259 - */
4260 -int iam_it_get(struct iam_iterator *it, struct iam_key *k);
4261 -
4262 -/*
4263 - * Duplicates iterator.
4264 - *
4265 - * postcondition: it_state(dst) == it_state(src) &&
4266 - *                iam_it_container(dst) == iam_it_container(src) &&
4267 - *                dst->ii_flags = src->ii_flags &&
4268 - *                ergo(it_state(it) == IAM_IT_ATTACHED,
4269 - *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
4270 - *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
4271 - */
4272 -void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
4273 -
4274 -/*
4275 - * Detach iterator. Does nothing it detached state.
4276 - *
4277 - * postcondition: it_state(it) == IAM_IT_DETACHED
4278 - */
4279 -void iam_it_put(struct iam_iterator *it);
4280 -
4281 -/*
4282 - * Move iterator one record right.
4283 - *
4284 - * Return value: 0: success,
4285 - *              +1: end of container reached
4286 - *             -ve: error
4287 - *
4288 - * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
4289 - * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
4290 - */
4291 -int iam_it_next(struct iam_iterator *it);
4292 -
4293 -/*
4294 - * Return pointer to the record under iterator.
4295 - *
4296 - * precondition:  it_state(it) == IAM_IT_ATTACHED
4297 - * postcondition: it_state(it) == IAM_IT_ATTACHED
4298 - */
4299 -const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
4300 -
4301 -/*
4302 - * Replace contents of record under iterator.
4303 - *
4304 - * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
4305 - * postcondition: it_state(it) == IAM_IT_ATTACHED &&
4306 - *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
4307 - */
4308 -int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
4309 -
4310 -/*
4311 - * Place key under iterator in @k, return @k
4312 - *
4313 - * precondition:  it_state(it) == IAM_IT_ATTACHED
4314 - * postcondition: it_state(it) == IAM_IT_ATTACHED
4315 - */
4316 -const struct iam_key *iam_it_key_get(struct iam_iterator *it,
4317 -                                    struct iam_key *k);
4318 -
4319 -/*
4320 - * Insert new record with key @k and contents from @r, shifting records to the
4321 - * right.
4322 - *
4323 - * precondition:  it_state(it) == IAM_IT_ATTACHED &&
4324 - *                it->ii_flags&IAM_IT_WRITE &&
4325 - *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
4326 - * postcondition: it_state(it) == IAM_IT_ATTACHED &&
4327 - *                ergo(result == 0,
4328 - *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
4329 - *                     !memcmp(iam_it_rec_get(it), r, ...))
4330 - */
4331 -int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
4332 -                     struct iam_key *k, struct iam_rec *r);
4333 -/*
4334 - * Delete record under iterator.
4335 - *
4336 - * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
4337 - * postcondition: it_state(it) == IAM_IT_ATTACHED
4338 - */
4339 -int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
4340 -
4341  #ifdef CONFIG_EXT3_INDEX
4342  static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry);
4343  static void dx_set_block(struct iam_path *p,
4344                          struct iam_entry *entry, unsigned value);
4345 -static inline struct iam_key *dx_get_key(struct iam_path *p,
4346 -                                       struct iam_entry *entry,
4347 -                                       struct iam_key *key);
4348 -static void dx_set_key(struct iam_path *p, struct iam_entry *entry,
4349 -                      struct iam_key *key);
4350 -static unsigned dx_get_count(struct iam_entry *entries);
4351  static unsigned dx_get_limit(struct iam_entry *entries);
4352  static void dx_set_count(struct iam_entry *entries, unsigned value);
4353  static void dx_set_limit(struct iam_entry *entries, unsigned value);
4354 @@ -457,264 +100,62 @@ static void dx_sort_map(struct dx_map_en
4355  static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
4356                 struct dx_map_entry *offsets, int count);
4357  static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
4358 -static void dx_insert_block (struct iam_path *path,
4359 -                            struct iam_frame *frame, u32 hash, u32 block);
4360 -static int ext3_htree_next_block(struct inode *dir, __u32 hash,
4361 -                                struct iam_path *path, __u32 *start_hash);
4362  static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
4363                        struct ext3_dir_entry_2 **res_dir, int *err);
4364  static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
4365                              struct inode *inode);
4366  
4367 -static inline void iam_path_init(struct iam_path *path,
4368 -                                struct iam_container *c, struct htree_cookie *hc);
4369 -static inline void iam_path_fini(struct iam_path *path);
4370 -
4371 -
4372 -/*
4373 - * Future: use high four bits of block for coalesce-on-delete flags
4374 - * Mask them off for now.
4375 - */
4376 -
4377 -static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
4378 -{
4379 -       return (void *)((char *)entry + off);
4380 -}
4381 -
4382 -static inline struct iam_descr *path_descr(struct iam_path *p)
4383 -{
4384 -       return p->ip_container->ic_descr;
4385 -}
4386 -
4387 -static inline struct inode *path_obj(struct iam_path *p)
4388 -{
4389 -       return p->ip_container->ic_object;
4390 -}
4391 -
4392 -static inline size_t iam_entry_size(struct iam_path *p)
4393 -{
4394 -       return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
4395 -}
4396 -
4397 -static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
4398 -                                             struct iam_entry *entry, int shift)
4399 -{
4400 -       void *e = entry;
4401 -       return e + shift * iam_entry_size(p);
4402 -}
4403 -
4404 -static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
4405 -                                     struct iam_entry *e1, struct iam_entry *e2)
4406 -{
4407 -       ptrdiff_t diff;
4408 -
4409 -       diff = (void *)e1 - (void *)e2;
4410 -       assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
4411 -       return diff / iam_entry_size(p);
4412 -}
4413 -
4414 -static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
4415 -{
4416 -       return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
4417 -               & 0x00ffffff;
4418 -}
4419 -
4420 -static inline void dx_set_block(struct iam_path *p,
4421 -                               struct iam_entry *entry, unsigned value)
4422 -{
4423 -       *(u32*)entry_off(entry,
4424 -                        path_descr(p)->id_key_size) = cpu_to_le32(value);
4425 -}
4426 -
4427 -static inline struct iam_key *dx_get_key(struct iam_path *p,
4428 -                                       struct iam_entry *entry,
4429 -                                       struct iam_key *key)
4430 -{
4431 -       memcpy(key, entry, path_descr(p)->id_key_size);
4432 -       return key;
4433 -}
4434 -
4435 -static inline struct iam_key *iam_key_at(struct iam_path *p,
4436 -                                      struct iam_entry *entry)
4437 -{
4438 -       return (struct iam_key *)entry;
4439 -}
4440 -
4441 -static inline void dx_set_key(struct iam_path *p,
4442 -                             struct iam_entry *entry, struct iam_key *key)
4443 -{
4444 -       memcpy(entry, key, path_descr(p)->id_key_size);
4445 -}
4446 -
4447 -static inline unsigned dx_get_count (struct iam_entry *entries)
4448 -{
4449 -       return le16_to_cpu(((struct dx_countlimit *) entries)->count);
4450 -}
4451 -
4452 -static inline unsigned dx_get_limit (struct iam_entry *entries)
4453 -{
4454 -       return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
4455 -}
4456 -
4457 -static inline void dx_set_count (struct iam_entry *entries, unsigned value)
4458 -{
4459 -       ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
4460 -}
4461 -
4462 -static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
4463 +static inline void dx_set_limit(struct iam_entry *entries, unsigned value)
4464  {
4465         ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
4466  }
4467  
4468 -static inline unsigned dx_root_limit(struct iam_path *p)
4469 -{
4470 -       struct iam_descr *param = path_descr(p);
4471 -       unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
4472 -               param->id_root_gap;
4473 -       return entry_space / (param->id_key_size + param->id_ptr_size);
4474 -}
4475 -
4476 -static inline unsigned dx_node_limit(struct iam_path *p)
4477 -{
4478 -       struct iam_descr *param = path_descr(p);
4479 -       unsigned entry_space   = path_obj(p)->i_sb->s_blocksize -
4480 -               param->id_node_gap;
4481 -       return entry_space / (param->id_key_size + param->id_ptr_size);
4482 -}
4483 -
4484 -static inline int dx_index_is_compat(struct iam_path *path)
4485 -{
4486 -       return path_descr(path) == &htree_compat_param;
4487 -}
4488 -
4489 -static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
4490 -                                      int root)
4491 +int dx_index_is_compat(struct iam_path *path)
4492  {
4493 -       return data +
4494 -               (root ?
4495 -                path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
4496 +       return iam_path_descr(path) == &iam_htree_compat_param;
4497  }
4498  
4499 -static struct iam_entry *dx_node_get_entries(struct iam_path *path,
4500 -                                           struct iam_frame *frame)
4501 -{
4502 -       return dx_get_entries(path,
4503 -                             frame->bh->b_data, frame == path->ip_frames);
4504 -}
4505  
4506 -static int dx_node_check(struct iam_path *p, struct iam_frame *f)
4507 +int dx_node_check(struct iam_path *p, struct iam_frame *f)
4508  {
4509         struct iam_entry     *e;
4510         struct iam_container *c;
4511         unsigned count;
4512         unsigned  i;
4513 +       iam_ptr_t  blk;
4514 +       iam_ptr_t  root;
4515 +       struct inode *inode;
4516  
4517         c = p->ip_container;
4518         e = dx_node_get_entries(p, f);
4519         count = dx_get_count(e);
4520         e = iam_entry_shift(p, e, 1);
4521 +       root = iam_path_descr(p)->id_ops->id_root_ptr(c);
4522 +
4523 +       inode = iam_path_obj(p);
4524         for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) {
4525 -               keycpy(c, p->ip_key_scratch[0], p->ip_key_scratch[1]);
4526 -               dx_get_key(p, e, p->ip_key_scratch[1]);
4527 +               iam_ikeycpy(c, iam_path_ikey(p, 0), iam_path_ikey(p, 1));
4528 +               iam_get_ikey(p, e, iam_path_ikey(p, 1));
4529                 if (i > 0 &&
4530 -                   keycmp(c, p->ip_key_scratch[0], p->ip_key_scratch[1]) > 0)
4531 +                   iam_ikeycmp(c, iam_path_ikey(p, 0),
4532 +                               iam_path_ikey(p, 1)) > 0) {
4533 +                       BREAKPOINT();
4534                         return 0;
4535         }
4536 -       return 1;
4537 -}
4538 -
4539 -static u32 htree_root_ptr(struct iam_container *c)
4540 -{
4541 +               blk = dx_get_block(p, e);
4542 +               if (inode->i_size < (blk + 1) * inode->i_sb->s_blocksize) {
4543 +                       BREAKPOINT();
4544         return 0;
4545 -}
4546 -
4547 -static int htree_node_check(struct iam_path *path, struct iam_frame *frame)
4548 -{
4549 -       void *data;
4550 -       struct iam_entry *entries;
4551 -       struct super_block *sb;
4552 -
4553 -       data = frame->bh->b_data;
4554 -       entries = dx_node_get_entries(path, frame);
4555 -       sb = path_obj(path)->i_sb;
4556 -       if (frame == path->ip_frames) {
4557 -               /* root node */
4558 -               struct dx_root *root;
4559 -               struct htree_cookie *hc = path->ip_descr_data;
4560 -
4561 -               root = data;
4562 -               if (root->info.hash_version > DX_HASH_MAX) {
4563 -                       ext3_warning(sb, __FUNCTION__,
4564 -                                    "Unrecognised inode hash code %d",
4565 -                                    root->info.hash_version);
4566 -                       return ERR_BAD_DX_DIR;
4567 -               }
4568 -
4569 -               if (root->info.unused_flags & 1) {
4570 -                       ext3_warning(sb, __FUNCTION__,
4571 -                                    "Unimplemented inode hash flags: %#06x",
4572 -                                    root->info.unused_flags);
4573 -                       return ERR_BAD_DX_DIR;
4574                 }
4575 -
4576 -               path->ip_indirect = root->info.indirect_levels;
4577 -               if (path->ip_indirect > DX_MAX_TREE_HEIGHT - 1) {
4578 -                       ext3_warning(sb, __FUNCTION__,
4579 -                                    "Unimplemented inode hash depth: %#06x",
4580 -                                    root->info.indirect_levels);
4581 -                       return ERR_BAD_DX_DIR;
4582 +               /*
4583 +                * By definition of a tree, no node points to the root.
4584 +                */
4585 +               if (blk == root) {
4586 +                       BREAKPOINT();
4587 +                       return 0;
4588                 }
4589 -
4590 -               assert((char *)entries == (((char *)&root->info) +
4591 -                                          root->info.info_length));
4592 -               assert(dx_get_limit(entries) == dx_root_limit(path));
4593 -
4594 -               hc->hinfo->hash_version = root->info.hash_version;
4595 -               hc->hinfo->seed = EXT3_SB(sb)->s_hash_seed;
4596 -               if (hc->dentry)
4597 -                       ext3fs_dirhash(hc->dentry->d_name.name,
4598 -                                      hc->dentry->d_name.len, hc->hinfo);
4599 -               path->ip_key_target = (struct iam_key *)&hc->hinfo->hash;
4600 -       } else {
4601 -               /* non-root index */
4602 -               assert(entries == data + path_descr(path)->id_node_gap);
4603 -               assert(dx_get_limit(entries) == dx_node_limit(path));
4604         }
4605 -       frame->entries = frame->at = entries;
4606 -       return 0;
4607 -}
4608 -
4609 -static int htree_node_init(struct iam_container *c,
4610 -                          struct buffer_head *bh, int root)
4611 -{
4612 -       struct dx_node *node;
4613 -
4614 -       assert(!root);
4615 -
4616 -       node = (void *)bh->b_data;
4617 -       node->fake.rec_len = cpu_to_le16(c->ic_object->i_sb->s_blocksize);
4618 -       node->fake.inode = 0;
4619 -       return 0;
4620 -}
4621 -
4622 -static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
4623 -                          handle_t *handle, struct buffer_head **bh)
4624 -{
4625 -       int result = 0;
4626 -
4627 -       *bh = ext3_bread(handle, c->ic_object, (int)ptr, 0, &result);
4628 -       if (*bh == NULL)
4629 -               result = -EIO;
4630 -       return result;
4631 -}
4632 -
4633 -static int htree_keycmp(struct iam_container *c,
4634 -                       struct iam_key *k1, struct iam_key *k2)
4635 -{
4636 -       __u32 p1 = le32_to_cpu(*(__u32 *)k1);
4637 -       __u32 p2 = le32_to_cpu(*(__u32 *)k2);
4638 -
4639 -       return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
4640 +       return 1;
4641  }
4642  
4643  /*
4644 @@ -800,598 +241,121 @@ struct stats dx_show_entries(struct dx_h
4645  }
4646  #endif /* DX_DEBUG */
4647  
4648 -static int dx_lookup(struct iam_path *path)
4649 +int dx_lookup(struct iam_path *path)
4650  {
4651         u32 ptr;
4652         int err = 0;
4653         int i;
4654 +       int delta;
4655  
4656         struct iam_descr *param;
4657         struct iam_frame *frame;
4658         struct iam_container *c;
4659  
4660 -       param = path_descr(path);
4661 +       param = iam_path_descr(path);
4662         c = path->ip_container;
4663         
4664 +       delta = dx_index_is_compat(path) ? 1 : 2;
4665 +
4666         for (frame = path->ip_frames, i = 0,
4667 -                    ptr = param->id_root_ptr(path->ip_container);
4668 +                    ptr = param->id_ops->id_root_ptr(c);
4669              i <= path->ip_indirect;
4670              ptr = dx_get_block(path, frame->at), ++frame, ++i) {
4671                 struct iam_entry *entries;
4672 -               struct iam_entry *p;
4673 -               struct iam_entry *q;
4674 -               struct iam_entry *m;
4675 -               unsigned count;
4676 -
4677 -               err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh);
4678 -               if (err != 0)
4679 -                       break;
4680 -               err = param->id_node_check(path, frame);
4681 -               if (err != 0)
4682 -                       break;
4683 -
4684 -               assert(dx_node_check(path, frame));
4685 -
4686 -               entries = frame->entries;
4687 -               count = dx_get_count(entries);
4688 -               assert(count && count <= dx_get_limit(entries));
4689 -               p = iam_entry_shift(path, entries, 1);
4690 -               q = iam_entry_shift(path, entries, count - 1);
4691 -               while (p <= q) {
4692 -                       m = iam_entry_shift(path,
4693 -                                          p, iam_entry_diff(path, q, p) / 2);
4694 -                       dxtrace(printk("."));
4695 -                       if (keycmp(c, iam_key_at(path, m),
4696 -                                  path->ip_key_target) > 0)
4697 -                               q = iam_entry_shift(path, m, -1);
4698 -                       else
4699 -                               p = iam_entry_shift(path, m, +1);
4700 -               }
4701 -
4702 -               frame->at = iam_entry_shift(path, p, -1);
4703 -               if (1) { // linear search cross check
4704 -                       unsigned n = count - 1;
4705 -                       struct iam_entry *at;
4706 -
4707 -                       at = entries;
4708 -                       while (n--) {
4709 -                               dxtrace(printk(","));
4710 -                               at = iam_entry_shift(path, at, +1);
4711 -                               if (keycmp(c, iam_key_at(path, at),
4712 -                                          path->ip_key_target) > 0) {
4713 -                                       if (at != iam_entry_shift(path, frame->at, 1)) {
4714 -                                               BREAKPOINT;
4715 -                                               printk(KERN_EMERG "%i\n",
4716 -                                                      keycmp(c, iam_key_at(path, at),
4717 -                                                             path->ip_key_target));
4718 -                                       }
4719 -                                       at = iam_entry_shift(path, at, -1);
4720 -                                       break;
4721 -                               }
4722 -                       }
4723 -                       assert(at == frame->at);
4724 -               }
4725 -       }
4726 -       if (err != 0)
4727 -               iam_path_fini(path);
4728 -       path->ip_frame = --frame;
4729 -       return err;
4730 -}
4731 -
4732 -/*
4733 - * Probe for a directory leaf block to search.
4734 - *
4735 - * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
4736 - * error in the directory index, and the caller should fall back to
4737 - * searching the directory normally.  The callers of dx_probe **MUST**
4738 - * check for this error code, and make sure it never gets reflected
4739 - * back to userspace.
4740 - */
4741 -static int dx_probe(struct dentry *dentry, struct inode *dir,
4742 -                   struct dx_hash_info *hinfo, struct iam_path *path)
4743 -{
4744 -       int err;
4745 -       struct htree_cookie hc = {
4746 -               .dentry = dentry,
4747 -               .hinfo  = hinfo
4748 -       };
4749 -
4750 -       assert(dx_index_is_compat(path));
4751 -       path->ip_descr_data = &hc;
4752 -       err = dx_lookup(path);
4753 -       assert(err != 0 || path->ip_frames[path->ip_indirect].bh != NULL);
4754 -       return err;
4755 -}
4756 -
4757 -/*
4758 - * Initialize container @c, acquires additional reference on @inode.
4759 - */
4760 -int iam_container_init(struct iam_container *c,
4761 -                      struct iam_descr *descr, struct inode *inode)
4762 -{
4763 -       memset(c, 0, sizeof *c);
4764 -       c->ic_descr  = descr;
4765 -       c->ic_object = igrab(inode);
4766 -       if (c->ic_object != NULL)
4767 -               return 0;
4768 -       else
4769 -               return -ENOENT;
4770 -}
4771 -
4772 -/*
4773 - * Finalize container @c, release all resources.
4774 - */
4775 -void iam_container_fini(struct iam_container *c)
4776 -{
4777 -       if (c->ic_object != NULL) {
4778 -               iput(c->ic_object);
4779 -               c->ic_object = NULL;
4780 -       }
4781 -}
4782 -
4783 -static inline void iam_path_init(struct iam_path *path, struct iam_container *c, 
4784 -                                struct htree_cookie *hc)
4785 -{
4786 -       memset(path, 0, sizeof *path);
4787 -       path->ip_container = c;
4788 -       path->ip_frame = path->ip_frames;
4789 -       path->ip_descr_data = hc;
4790 -}
4791 -
4792 -static inline void iam_path_fini(struct iam_path *path)
4793 -{
4794 -       int i;
4795 -
4796 -       for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
4797 -               if (path->ip_frames[i].bh != NULL) {
4798 -                       brelse(path->ip_frames[i].bh);
4799 -                       path->ip_frames[i].bh = NULL;
4800 -               }
4801 -       }
4802 -}
4803 -
4804 -static void iam_path_compat_init(struct iam_path_compat *path,
4805 -                                struct inode *inode)
4806 -{
4807 -       int i;
4808 -
4809 -       iam_container_init(&path->ipc_container, &htree_compat_param, inode);
4810 -       /*
4811 -        * XXX hack allowing finalization of iam_path_compat with
4812 -        * iam_path_fini().
4813 -        */
4814 -       iput(inode);
4815 -       iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
4816 -       for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
4817 -               path->ipc_path.ip_key_scratch[i] =
4818 -                       (struct iam_key *)&path->ipc_scrach[i];
4819 -}
4820 -
4821 -static void iam_path_compat_fini(struct iam_path_compat *path)
4822 -{
4823 -       iam_path_fini(&path->ipc_path);
4824 -       iam_container_fini(&path->ipc_container);
4825 -}
4826 -
4827 -static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
4828 -{
4829 -       int block, err;
4830 -       struct buffer_head *bh;
4831 -       
4832 -       block = dx_get_block(path, path->ip_frame->at);
4833 -       err = path_descr(path)->id_node_read(path->ip_container, block, 
4834 -                                            NULL, &bh);
4835 -       if (err)
4836 -               return err;
4837 -
4838 -       leaf->bh = bh;
4839 -       leaf->entries = (struct iam_leaf_entry *)bh->b_data;
4840 -       return 0;
4841 -}
4842 -
4843 -static void iam_leaf_fini(struct iam_leaf *leaf)
4844 -{
4845 -       if (leaf->bh)
4846 -               brelse(leaf->bh);
4847 -}
4848 -
4849 -/*
4850 - * Search container @c for record with key @k. If record is found, its data
4851 - * are moved into @r.
4852 - *
4853 - *
4854 - *
4855 - * Return values: +ve: found, 0: not-found, -ve: error
4856 - */
4857 -
4858 -int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
4859 -{
4860 -       struct dx_hash_info     hinfo;
4861 -       struct iam_path_compat cpath;
4862 -       struct iam_path *path = &cpath.ipc_path;
4863 -       struct htree_cookie hc = {
4864 -               .hinfo  = &hinfo
4865 -       };
4866 -       int err, i;
4867 -
4868 -       iam_path_init(path, c, &hc);
4869 -       for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
4870 -               path->ip_key_scratch[i] =
4871 -                       (struct iam_key *)&cpath.ipc_scrach[i];
4872 -       err = dx_lookup(path);
4873 -       do {
4874 -               struct iam_leaf leaf;
4875 -               err = iam_leaf_init(path, &leaf);
4876 -               if (err)
4877 -                       goto errout;
4878 -
4879 -               for (path_descr(path)->id_leaf.start(c, &leaf);
4880 -                    !path_descr(path)->id_leaf.at_end(c, &leaf);
4881 -                    path_descr(path)->id_leaf.next(c, &leaf)) {
4882 -                       struct iam_key *key;
4883 -
4884 -                       key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
4885 -                       path_descr(path)->id_leaf.key(c, &leaf, key);
4886 -                       if (keycmp(c, k, key) == 0) {
4887 -                               memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
4888 -                                      path_descr(path)->id_rec_size);
4889 -                               iam_path_fini(path);
4890 -                               iam_leaf_fini(&leaf);
4891 -                               return 0;
4892 -                       }
4893 -               }
4894 -
4895 -               iam_leaf_fini(&leaf);
4896 -               /* Check to see if we should continue to search */
4897 -               err = ext3_htree_next_block(c->ic_object, hinfo.hash, path, NULL);
4898 -               if (err < 0)
4899 -                       goto errout;
4900 -       } while (err == 1);
4901 -errout:
4902 -       iam_path_fini(path);
4903 -       return(err);
4904 -}
4905 -EXPORT_SYMBOL(iam_lookup);
4906 -
4907 -static inline size_t iam_leaf_entry_size(struct iam_path *p)
4908 -{
4909 -       return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
4910 -}
4911 -
4912 -static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
4913 -                                     struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
4914 -{
4915 -       ptrdiff_t diff;
4916 -
4917 -       diff = (void *)e1 - (void *)e2;
4918 -       assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
4919 -       return diff / iam_leaf_entry_size(p);
4920 -}
4921 -
4922 -static inline struct iam_leaf_entry* 
4923 -iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
4924 -{
4925 -       void *e = entry;
4926 -       return e + shift * iam_leaf_entry_size(p);
4927 -}
4928 -
4929 -static inline struct iam_key *
4930 -dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
4931 -{
4932 -       memcpy(key, e, path_descr(p)->id_key_size);
4933 -       return key;
4934 -}
4935 -
4936 -static inline struct iam_key *
4937 -iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
4938 -{
4939 -       void *e = entry;
4940 -       return e + path_descr(p)->id_rec_size;
4941 -}
4942 -static inline struct iam_leaf_entry *
4943 -iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
4944 -{
4945 -       return entry; 
4946 -}
4947 -
4948 -static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf, 
4949 -                          struct iam_key *k)
4950 -{
4951 -       struct iam_leaf_entry *p, *q, *m;
4952 -       struct iam_leaf_entry *entries = leaf->entries;
4953 -       int count = dx_get_count((struct iam_entry *)entries);
4954 -       
4955 -       p = iam_leaf_entry_shift(path, entries, 1);
4956 -       q = iam_leaf_entry_shift(path, entries, count - 1);
4957 -       while (p <= q) {
4958 -               m = iam_leaf_entry_shift(path,
4959 -                                  p, iam_leaf_entry_diff(path, q, p) / 2);
4960 -               dxtrace(printk("."));
4961 -               if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
4962 -                          path->ip_key_target) > 0)
4963 -                       q = iam_leaf_entry_shift(path, m, -1);
4964 -               else
4965 -                       p = iam_leaf_entry_shift(path, m, +1);
4966 -       }
4967 -       leaf->at = q; 
4968 -       return 0;
4969 -}
4970 -
4971 -/*XXX what kind of lock should this entry be locked: WangDi */
4972 -static int iam_leaf_insert(handle_t *handle, struct iam_path *path, 
4973 -                          struct iam_key *k, struct iam_rec *r)
4974 -{
4975 -       struct iam_leaf leaf;
4976 -       struct iam_leaf_entry *p, *q;
4977 -       int err, count;
4978 -
4979 -       err = iam_leaf_init(path, &leaf);
4980 -       if (err)
4981 -               goto errout;
4982 -       path_descr(path)->id_leaf.start(path->ip_container, &leaf);
4983 -       count = dx_get_count((struct iam_entry *)leaf.entries);
4984 -       if (dx_get_count((struct iam_entry *)leaf.entries) >= 
4985 -           dx_get_limit((struct iam_entry *)leaf.entries)){
4986 -               err = -ENOSPC;
4987 -               goto errout;
4988 -       }
4989 -
4990 -       err = iam_leaf_lookup(path, &leaf, k);
4991 -       if (err)
4992 -               goto errout;
4993 -       
4994 -       /*insert the k/r to leaf entries*/
4995 -       p = iam_leaf_entry_shift(path, leaf.at, 1);
4996 -       q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
4997 -       while (q < p) {
4998 -               memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
4999 -               q = iam_leaf_entry_shift(path, q, -1);  
5000 -       }
5001 -       memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
5002 -       memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
5003 -
5004 -       dx_set_count((struct iam_entry*)leaf.entries, count + 1);
5005 -       err = ext3_journal_dirty_metadata(handle, leaf.bh);
5006 -       if (err)
5007 -               ext3_std_error(path->ip_container->ic_object->i_sb, err);
5008 -errout:        
5009 -       iam_leaf_fini(&leaf);
5010 -       return err;
5011 -} 
5012 -
5013 -static int split_leaf_node(handle_t *handle, struct iam_path *path)
5014 -{
5015 -       struct inode *dir = path_obj(path);
5016 -       unsigned continued = 0;
5017 -       struct buffer_head *bh2;
5018 -       u32 newblock, hash_split;
5019 -       char *data2;
5020 -       struct iam_leaf leaf;
5021 -       unsigned split;
5022 -       int     err;
5023 -
5024 -       bh2 = ext3_append (handle, dir, &newblock, &err);
5025 -       if (!(bh2)) {
5026 -               err = -ENOSPC;
5027 -               goto errout;
5028 -       }
5029 -       err = iam_leaf_init(path, &leaf);
5030 -       if (err)
5031 -               goto errout;
5032 -
5033 -       BUFFER_TRACE(leaf.bh, "get_write_access");
5034 -       err = ext3_journal_get_write_access(handle, leaf.bh);
5035 -       if (err) {
5036 -       journal_error:
5037 -               iam_leaf_fini(&leaf);
5038 -               brelse(bh2);
5039 -               ext3_std_error(dir->i_sb, err);
5040 -               err = -EIO;
5041 -               goto errout;
5042 -       }
5043 -       data2 = bh2->b_data;
5044 -       split = dx_get_count((struct iam_entry*)leaf.entries)/2;
5045 -       hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
5046 -       if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
5047 -                  iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
5048 -               continued = 1;
5049 -
5050 -       memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
5051 -              iam_leaf_entry_shift(path, leaf.entries, split),
5052 -              split * iam_leaf_entry_size(path));
5053
5054 -       /* Which block gets the new entry? */
5055 -       dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
5056 -       err = ext3_journal_dirty_metadata (handle, bh2);
5057 -       if (err)
5058 -               goto journal_error;
5059 -       err = ext3_journal_dirty_metadata (handle, leaf.bh);
5060 -       if (err)
5061 -               goto journal_error;
5062 -       brelse (bh2);
5063 -       iam_leaf_fini(&leaf);
5064 -errout:
5065 -       return err;
5066 -}
5067 -
5068 -static int split_index_node(handle_t *handle, struct iam_path *path);
5069 -/*
5070 - * Insert new record @r with key @k into container @c (within context of
5071 - * transaction @h.
5072 - *
5073 - * Return values: 0: success, -ve: error, including -EEXIST when record with
5074 - * given key is already present.
5075 - *
5076 - * postcondition: ergo(result == 0 || result == -EEXIST,
5077 - *                                  iam_lookup(c, k, r2) > 0 &&
5078 - *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
5079 - */
5080 -int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, 
5081 -              struct iam_rec *r)
5082 -{
5083 -       struct dx_hash_info     hinfo;
5084 -       struct iam_path_compat cpath;
5085 -       struct iam_path *path = &cpath.ipc_path;
5086 -       struct htree_cookie hc = {
5087 -               .hinfo  = &hinfo
5088 -       };
5089 -       int err, i;
5090 -
5091 -       iam_path_init(path, c, &hc);
5092 -       for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
5093 -               path->ip_key_scratch[i] =
5094 -                       (struct iam_key *)&cpath.ipc_scrach[i];
5095 -       err = dx_lookup(path);
5096 -       if (err)
5097 -               goto errout; 
5098 -
5099 -       err = iam_leaf_insert(handle, path, k, r);
5100 -       
5101 -       if (err != -ENOSPC) 
5102 -               goto errout;    
5103 -
5104 -       err = split_index_node(handle, path);
5105 -       if (err)
5106 -               goto errout;    
5107 -
5108 -       err = split_leaf_node(handle, path);
5109 -       if (err)
5110 -               goto errout;
5111 -       
5112 -       err = iam_leaf_insert(handle, path, k, r);
5113 -errout:
5114 -       iam_path_fini(path);
5115 -       return(err);
5116 -}
5117 -
5118 -EXPORT_SYMBOL(iam_insert);
5119 -static int iam_leaf_delete(handle_t *handle, struct iam_path *path, 
5120 -                          struct iam_key *k)
5121 -{
5122 -       struct iam_leaf leaf;
5123 -       struct iam_leaf_entry *p, *q;
5124 -       int err, count;
5125 -
5126 -       err = iam_leaf_init(path, &leaf);
5127 -       if (err)
5128 -               goto errout;
5129 -       
5130 -       err = iam_leaf_lookup(path, &leaf, k);
5131 -       if (err)
5132 -               goto errout;
5133 -
5134 -       count = dx_get_count((struct iam_entry*)leaf.entries);
5135 -       /*delete the k to leaf entries*/
5136 -       p = iam_leaf_entry_shift(path, leaf.at, 1);
5137 -       q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
5138 -       while (p < q) {
5139 -               memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
5140 -               p = iam_leaf_entry_shift(path, p, 1);
5141 -       }
5142 -       dx_set_count((struct iam_entry*)leaf.entries, count - 1);
5143 -
5144 -       err = ext3_journal_dirty_metadata(handle, leaf.bh);
5145 -       if (err)
5146 -               ext3_std_error(path_obj(path)->i_sb, err);
5147 -errout:        
5148 -       iam_leaf_fini(&leaf);
5149 -       return err;
5150 -}
5151 -
5152 -/*
5153 - * Delete existing record with key @k.
5154 - *
5155 - * Return values: 0: success, -ENOENT: not-found, -ve: other error.
5156 - *
5157 - * postcondition: ergo(result == 0 || result == -ENOENT,
5158 - *                                 !iam_lookup(c, k, *));
5159 - */
5160 -int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
5161 -{
5162 -       struct dx_hash_info     hinfo;
5163 -       struct iam_path_compat cpath;
5164 -       struct iam_path *path = &cpath.ipc_path;
5165 -       struct htree_cookie hc = {
5166 -               .hinfo  = &hinfo
5167 -       };
5168 -       int err, i;
5169 -
5170 -       iam_path_init(path, c, &hc);
5171 -       for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
5172 -               path->ip_key_scratch[i] =
5173 -                       (struct iam_key *)&cpath.ipc_scrach[i];
5174 -       err = dx_lookup(path);
5175 -       if (err)
5176 -               goto errout; 
5177 +               struct iam_entry *p;
5178 +               struct iam_entry *q;
5179 +               struct iam_entry *m;
5180 +               unsigned count;
5181  
5182 -       err = iam_leaf_delete(h, path, k);
5183 -errout:
5184 -       iam_path_fini(path);
5185 -       return err;
5186 -}
5187 +               err = param->id_ops->id_node_read(c, (iam_ptr_t)ptr, NULL,
5188 +                                                 &frame->bh);
5189 +               if (err != 0)
5190 +                       break;
5191  
5192 -EXPORT_SYMBOL(iam_delete);
5193 +               if (EXT3_INVARIANT_ON) {
5194 +                       err = param->id_ops->id_node_check(path, frame);
5195 +                       if (err != 0)
5196 +                               break;
5197 +               }
5198  
5199 -static int iam_leaf_update(handle_t *handle, struct iam_path *path, 
5200 -                          struct iam_key *k, struct iam_rec *r)
5201 -{
5202 -       struct iam_leaf leaf;
5203 -       int err;
5204 +               err = param->id_ops->id_node_load(path, frame);
5205 +               if (err != 0)
5206 +                       break;
5207  
5208 -       err = iam_leaf_init(path, &leaf);
5209 -       if (err)
5210 -               goto errout;
5211 +               assert_inv(dx_node_check(path, frame));
5212         
5213 -       err = iam_leaf_lookup(path, &leaf, k);
5214 -       if (err)
5215 -               goto errout;
5216 +               entries = frame->entries;
5217 +               count = dx_get_count(entries);
5218 +               assert_corr(count && count <= dx_get_limit(entries));
5219 +               p = iam_entry_shift(path, entries, delta);
5220 +               q = iam_entry_shift(path, entries, count - 1);
5221 +               while (p <= q) {
5222 +                       m = iam_entry_shift(path,
5223 +                                          p, iam_entry_diff(path, q, p) / 2);
5224 +                       dxtrace(printk("."));
5225 +                       if (iam_ikeycmp(c, iam_ikey_at(path, m),
5226 +                                       path->ip_ikey_target) > 0)
5227 +                               q = iam_entry_shift(path, m, -1);
5228 +                       else
5229 +                               p = iam_entry_shift(path, m, +1);
5230 +               }
5231  
5232 -       memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
5233 -       memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
5234 +               frame->at = iam_entry_shift(path, p, -1);
5235 +               if (EXT3_INVARIANT_ON) { // linear search cross check
5236 +                       unsigned n = count - 1;
5237 +                       struct iam_entry *at;
5238  
5239 -       err = ext3_journal_dirty_metadata(handle, leaf.bh);
5240 -       if (err)
5241 -               ext3_std_error(path_obj(path)->i_sb, err);
5242 -errout:        
5243 -       iam_leaf_fini(&leaf);
5244 +                       at = entries;
5245 +                       while (n--) {
5246 +                               dxtrace(printk(","));
5247 +                               at = iam_entry_shift(path, at, +1);
5248 +                               if (iam_ikeycmp(c, iam_ikey_at(path, at),
5249 +                                              path->ip_ikey_target) > 0) {
5250 +                                       if (at != iam_entry_shift(path, frame->at, 1)) {
5251 +                                               BREAKPOINT();
5252 +                                               printk(KERN_EMERG "%i\n",
5253 +                                                      iam_ikeycmp(c, iam_ikey_at(path, at),
5254 +                                                             path->ip_ikey_target));
5255 +                                       }
5256 +                                       at = iam_entry_shift(path, at, -1);
5257 +                                       break;
5258 +                               }
5259 +                       }
5260 +                       assert_corr(at == frame->at);
5261 +               }
5262 +       }
5263 +       if (err != 0)
5264 +               iam_path_fini(path);
5265 +       path->ip_frame = --frame;
5266         return err;
5267  }
5268 +
5269  /*
5270 - * Replace existing record with key @k, or insert new one. New record data are
5271 - * in @r.
5272 - *
5273 - * Return values: 0: success, -ve: error.
5274 + * Probe for a directory leaf block to search.
5275   *
5276 - * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
5277 - *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
5278 + * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
5279 + * error in the directory index, and the caller should fall back to
5280 + * searching the directory normally.  The callers of dx_probe **MUST**
5281 + * check for this error code, and make sure it never gets reflected
5282 + * back to userspace.
5283   */
5284 -int iam_update(handle_t *h, struct iam_container *c,
5285 -              struct iam_key *k, struct iam_rec *r)
5286 +static int dx_probe(struct dentry *dentry, struct inode *dir,
5287 +                   struct dx_hash_info *hinfo, struct iam_path *path)
5288  {
5289 -       struct dx_hash_info     hinfo;
5290 -       struct iam_path_compat cpath;
5291 -       struct iam_path *path = &cpath.ipc_path;
5292 -       struct htree_cookie hc = {
5293 -               .hinfo  = &hinfo
5294 -       };
5295 -       int err, i;
5296 +       int err;
5297 +       struct iam_path_compat *ipc;
5298         
5299 -       iam_path_init(path, c, &hc);
5300 -       for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
5301 -               path->ip_key_scratch[i] =
5302 -                       (struct iam_key *)&cpath.ipc_scrach[i];
5303 -       err = dx_lookup(path);
5304 -       if (err)
5305 -               goto errout; 
5306 +       assert_corr(path->ip_data != NULL);
5307 +       ipc = container_of(path->ip_data, struct iam_path_compat, ipc_descr);
5308 +       ipc->ipc_dentry = dentry;
5309 +       ipc->ipc_hinfo = hinfo;
5310  
5311 -       err = iam_leaf_update(h, path, k, r);
5312 -errout:
5313 -       iam_path_fini(path);
5314 +       assert_corr(dx_index_is_compat(path));
5315 +       err = dx_lookup(path);
5316 +       assert_corr(err != 0 || path->ip_frames[path->ip_indirect].bh != NULL);
5317         return err;
5318  }
5319  
5320 -EXPORT_SYMBOL(iam_update);
5321 -
5322  /*
5323   * This function increments the frame pointer to search the next leaf
5324   * block, and reads in the necessary intervening nodes if the search
5325 @@ -1409,16 +373,15 @@ EXPORT_SYMBOL(iam_update);
5326   * If start_hash is non-null, it will be filled in with the starting
5327   * hash of the next page.
5328   */
5329 -static int ext3_htree_next_block(struct inode *dir, __u32 hash,
5330 -                                struct iam_path *path, __u32 *start_hash)
5331 +static int ext3_htree_advance(struct inode *dir, __u32 hash,
5332 +                             struct iam_path *path, __u32 *start_hash,
5333 +                             int compat)
5334  {
5335         struct iam_frame *p;
5336         struct buffer_head *bh;
5337         int err, num_frames = 0;
5338         __u32 bhash;
5339  
5340 -       assert(dx_index_is_compat(path));
5341 -
5342         p = path->ip_frame;
5343         /*
5344          * Find the next leaf page by incrementing the frame pointer.
5345 @@ -1438,6 +401,10 @@ static int ext3_htree_next_block(struct 
5346                 --p;
5347         }
5348  
5349 +       if (compat) {
5350 +               /*
5351 +                * Htree hash magic.
5352 +                */
5353         /*
5354          * If the hash is 1, then continue only if the next page has a
5355          * continuation hash of any value.  This is used for readdir
5356 @@ -1445,19 +412,21 @@ static int ext3_htree_next_block(struct 
5357          * desired contiuation hash.  If it doesn't, return since
5358          * there's no point to read in the successive index pages.
5359          */
5360 -       dx_get_key(path, p->at, (struct iam_key *)&bhash);
5361 +               iam_get_ikey(path, p->at, (struct iam_ikey *)&bhash);
5362         if (start_hash)
5363                 *start_hash = bhash;
5364         if ((hash & 1) == 0) {
5365                 if ((bhash & ~1) != hash)
5366                         return 0;
5367         }
5368 +       }
5369         /*
5370          * If the hash is HASH_NB_ALWAYS, we always go to the next
5371          * block so no check is necessary
5372          */
5373         while (num_frames--) {
5374 -               err = path_descr(path)->id_node_read(path->ip_container,
5375 +               err = iam_path_descr(path)->id_ops->
5376 +                       id_node_read(path->ip_container,
5377                                                      (iam_ptr_t)dx_get_block(path, p->at),
5378                                                      NULL, &bh);
5379                 if (err != 0)
5380 @@ -1465,12 +434,23 @@ static int ext3_htree_next_block(struct 
5381                 ++p;
5382                 brelse (p->bh);
5383                 p->bh = bh;
5384 -               p->at = p->entries = dx_node_get_entries(path, p);
5385 -               assert(dx_node_check(path, p));
5386 +               p->entries = dx_node_get_entries(path, p);
5387 +               p->at = iam_entry_shift(path, p->entries, !compat);
5388 +               assert_inv(dx_node_check(path, p));
5389         }
5390         return 1;
5391  }
5392  
5393 +int iam_index_next(struct iam_container *c, struct iam_path *path)
5394 +{
5395 +       return ext3_htree_advance(c->ic_object, 0, path, NULL, 0);
5396 +}
5397 +
5398 +int ext3_htree_next_block(struct inode *dir, __u32 hash,
5399 +                         struct iam_path *path, __u32 *start_hash)
5400 +{
5401 +       return ext3_htree_advance(dir, hash, path, start_hash, 1);
5402 +}
5403  
5404  /*
5405   * p is at least 6 bytes before the end of page
5406 @@ -1662,21 +642,30 @@ static void dx_sort_map (struct dx_map_e
5407         } while(more);
5408  }
5409  
5410 -static void dx_insert_block(struct iam_path *path,
5411 -                           struct iam_frame *frame, u32 hash, u32 block)
5412 +void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
5413 +                   const struct iam_ikey *key, iam_ptr_t ptr)
5414  {
5415         struct iam_entry *entries = frame->entries;
5416 -       struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
5417 +       struct iam_entry *new = iam_entry_shift(path, frame->at, +1);
5418         int count = dx_get_count(entries);
5419  
5420 -       assert(count < dx_get_limit(entries));
5421 -       assert(old < iam_entry_shift(path, entries, count));
5422 +       assert_corr(count < dx_get_limit(entries));
5423 +       assert_corr(frame->at < iam_entry_shift(path, entries, count));
5424 +
5425         memmove(iam_entry_shift(path, new, 1), new,
5426                 (char *)iam_entry_shift(path, entries, count) - (char *)new);
5427 -       dx_set_key(path, new, (struct iam_key *)&hash);
5428 -       dx_set_block(path, new, block);
5429 +       dx_set_ikey(path, new, key);
5430 +       dx_set_block(path, new, ptr);
5431         dx_set_count(entries, count + 1);
5432  }
5433 +
5434 +void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
5435 +                    u32 hash, u32 block)
5436 +{
5437 +       assert_corr(dx_index_is_compat(path));
5438 +       iam_insert_key(path, frame, (struct iam_ikey *)&hash, block);
5439 +}
5440 +
5441  #endif
5442  
5443  
5444 @@ -1903,7 +892,8 @@ static struct buffer_head * ext3_dx_find
5445         hash = hinfo.hash;
5446         do {
5447                 block = dx_get_block(path, path->ip_frame->at);
5448 -               *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
5449 +               *err = iam_path_descr(path)->id_ops->id_node_read(path->ip_container,
5450 +                                                         (iam_ptr_t)block,
5451                                                      NULL, &bh);
5452                 if (*err != 0)
5453                         goto errout;
5454 @@ -2093,22 +1083,69 @@ static struct ext3_dir_entry_2* dx_pack_
5455         return prev;
5456  }
5457  
5458 +struct ext3_dir_entry_2 *move_entries(struct inode *dir,
5459 +                                     struct dx_hash_info *hinfo,
5460 +                                     struct buffer_head **bh1,
5461 +                                     struct buffer_head **bh2,
5462 +                                     __u32 *delim_hash)
5463 +{
5464 +       char *data1;
5465 +       char *data2;
5466 +       unsigned blocksize = dir->i_sb->s_blocksize;
5467 +       unsigned count;
5468 +       unsigned continued;
5469 +       unsigned split;
5470 +       u32 hash2;
5471 +
5472 +       struct dx_map_entry     *map;
5473 +       struct ext3_dir_entry_2 *de1;
5474 +       struct ext3_dir_entry_2 *de2;
5475 +
5476 +       data1 = (*bh1)->b_data;
5477 +       data2 = (*bh2)->b_data;
5478 +
5479 +       /* create map in the end of data2 block */
5480 +       map = (struct dx_map_entry *) (data2 + blocksize);
5481 +       count = dx_make_map((struct ext3_dir_entry_2 *) data1,
5482 +                           blocksize, hinfo, map);
5483 +       map -= count;
5484 +       split = count/2; // need to adjust to actual middle
5485 +       dx_sort_map(map, count);
5486 +       hash2 = map[split].hash;
5487 +       continued = hash2 == map[split - 1].hash;
5488 +       dxtrace(printk("Split block %i at %x, %i/%i\n",
5489 +               dx_get_block(frame->at), hash2, split, count - split));
5490 +
5491 +       /* Fancy dance to stay within two buffers */
5492 +       de2 = dx_move_dirents(data1, data2, map + split, count - split);
5493 +       de1 = dx_pack_dirents(data1, blocksize);
5494 +       de1->rec_len = cpu_to_le16(data1 + blocksize - (char *) de1);
5495 +       de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
5496 +       dxtrace(dx_show_leaf(hinfo,
5497 +                            (struct ext3_dir_entry_2 *) data1, blocksize, 1));
5498 +       dxtrace(dx_show_leaf(hinfo,
5499 +                            (struct ext3_dir_entry_2 *) data2, blocksize, 1));
5500 +
5501 +       /* Which block gets the new entry? */
5502 +       if (hinfo->hash >= hash2) {
5503 +               swap(*bh1, *bh2);
5504 +               de1 = de2;
5505 +       }
5506 +       *delim_hash = hash2 + continued;
5507 +       return de1;
5508 +}
5509 +
5510  /* Allocate new node, and split leaf node @bh into it, inserting new pointer
5511   * into parent node identified by @frame */
5512  static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct iam_path *path,
5513                         struct buffer_head **bh,struct iam_frame *frame,
5514                         struct dx_hash_info *hinfo, int *error)
5515  {
5516 -       struct inode *dir = path_obj(path);
5517 -       unsigned blocksize = dir->i_sb->s_blocksize;
5518 -       unsigned count, continued;
5519 +       struct inode *dir = iam_path_obj(path);
5520         struct buffer_head *bh2;
5521         u32 newblock;
5522         u32 hash2;
5523 -       struct dx_map_entry *map;
5524 -       char *data1 = (*bh)->b_data, *data2;
5525 -       unsigned split;
5526 -       struct ext3_dir_entry_2 *de = NULL, *de2;
5527 +       struct ext3_dir_entry_2 *de = NULL;
5528         int     err;
5529  
5530         bh2 = ext3_append (handle, dir, &newblock, error);
5531 @@ -2133,35 +1170,9 @@ static struct ext3_dir_entry_2 *do_split
5532         if (err)
5533                 goto journal_error;
5534  
5535 -       data2 = bh2->b_data;
5536 -
5537 -       /* create map in the end of data2 block */
5538 -       map = (struct dx_map_entry *) (data2 + blocksize);
5539 -       count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
5540 -                            blocksize, hinfo, map);
5541 -       map -= count;
5542 -       split = count/2; // need to adjust to actual middle
5543 -       dx_sort_map (map, count);
5544 -       hash2 = map[split].hash;
5545 -       continued = hash2 == map[split - 1].hash;
5546 -       dxtrace(printk("Split block %i at %x, %i/%i\n",
5547 -               dx_get_block(frame->at), hash2, split, count-split));
5548 -
5549 -       /* Fancy dance to stay within two buffers */
5550 -       de2 = dx_move_dirents(data1, data2, map + split, count - split);
5551 -       de = dx_pack_dirents(data1,blocksize);
5552 -       de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
5553 -       de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
5554 -       dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
5555 -       dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
5556 +       de = move_entries(dir, hinfo, bh, &bh2, &hash2);
5557  
5558 -       /* Which block gets the new entry? */
5559 -       if (hinfo->hash >= hash2)
5560 -       {
5561 -               swap(*bh, bh2);
5562 -               de = de2;
5563 -       }
5564 -       dx_insert_block(path, frame, hash2 + continued, newblock);
5565 +       dx_insert_block(path, frame, hash2, newblock);
5566         err = ext3_journal_dirty_metadata (handle, bh2);
5567         if (err)
5568                 goto journal_error;
5569 @@ -2175,6 +1186,63 @@ errout:
5570  }
5571  #endif
5572  
5573 +struct ext3_dir_entry_2 *find_insertion_point(struct inode *dir,
5574 +                                             struct buffer_head *bh,
5575 +                                             const char *name, int namelen)
5576 +{
5577 +       struct ext3_dir_entry_2 *de;
5578 +       char *top;
5579 +       unsigned long offset;
5580 +       int nlen;
5581 +       int rlen;
5582 +       int reclen;
5583 +
5584 +       reclen = EXT3_DIR_REC_LEN(namelen);
5585 +       de = (struct ext3_dir_entry_2 *)bh->b_data;
5586 +       top = bh->b_data + dir->i_sb->s_blocksize - reclen;
5587 +       offset = 0;
5588 +       while ((char *) de <= top) {
5589 +               if (!ext3_check_dir_entry("ext3_add_entry",
5590 +                                         dir, de, bh, offset))
5591 +                       return ERR_PTR(-EIO);
5592 +               if (ext3_match(namelen, name, de))
5593 +                       return ERR_PTR(-EEXIST);
5594 +               nlen = EXT3_DIR_REC_LEN(de->name_len);
5595 +               rlen = le16_to_cpu(de->rec_len);
5596 +               if ((de->inode? rlen - nlen: rlen) >= reclen)
5597 +                       return de;
5598 +               de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
5599 +               offset += rlen;
5600 +       }
5601 +       return ERR_PTR(-ENOSPC);
5602 +}
5603 +
5604 +struct ext3_dir_entry_2 *split_entry(struct inode *dir,
5605 +                                    struct ext3_dir_entry_2 *de,
5606 +                                    unsigned long ino, mode_t mode,
5607 +                                    const char *name, int namelen)
5608 +{
5609 +       int nlen;
5610 +       int rlen;
5611 +
5612 +       nlen = EXT3_DIR_REC_LEN(de->name_len);
5613 +       rlen = le16_to_cpu(de->rec_len);
5614 +       if (de->inode) {
5615 +               struct ext3_dir_entry_2 *de1;
5616 +
5617 +               de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
5618 +               de1->rec_len = cpu_to_le16(rlen - nlen);
5619 +               de->rec_len = cpu_to_le16(nlen);
5620 +               de = de1;
5621 +       }
5622 +       de->file_type = EXT3_FT_UNKNOWN;
5623 +       de->inode = cpu_to_le32(ino);
5624 +       if (ino != 0)
5625 +               ext3_set_de_type(dir->i_sb, de, mode);
5626 +       de->name_len = namelen;
5627 +       memcpy(de->name, name, namelen);
5628 +       return de;
5629 +}
5630  
5631  /*
5632   * Add a new entry into a directory (leaf) block.  If de is non-NULL,
5633 @@ -2194,34 +1262,16 @@ static int add_dirent_to_buf(handle_t *h
5634         struct inode    *dir = dentry->d_parent->d_inode;
5635         const char      *name = dentry->d_name.name;
5636         int             namelen = dentry->d_name.len;
5637 -       unsigned long   offset = 0;
5638 -       unsigned short  reclen;
5639 -       int             nlen, rlen, err;
5640 -       char            *top;
5641 +       int             err;
5642  
5643 -       reclen = EXT3_DIR_REC_LEN(namelen);
5644         if (!de) {
5645 -               de = (struct ext3_dir_entry_2 *)bh->b_data;
5646 -               top = bh->b_data + dir->i_sb->s_blocksize - reclen;
5647 -               while ((char *) de <= top) {
5648 -                       if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
5649 -                                                 bh, offset)) {
5650 -                               brelse (bh);
5651 -                               return -EIO;
5652 -                       }
5653 -                       if (ext3_match (namelen, name, de)) {
5654 -                               brelse (bh);
5655 -                               return -EEXIST;
5656 -                       }
5657 -                       nlen = EXT3_DIR_REC_LEN(de->name_len);
5658 -                       rlen = le16_to_cpu(de->rec_len);
5659 -                       if ((de->inode? rlen - nlen: rlen) >= reclen)
5660 -                               break;
5661 -                       de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
5662 -                       offset += rlen;
5663 +               de = find_insertion_point(dir, bh, name, namelen);
5664 +               if (IS_ERR(de)) {
5665 +                       err = PTR_ERR(de);
5666 +                       if (err != -ENOSPC)
5667 +                               brelse(bh);
5668 +                       return err;
5669                 }
5670 -               if ((char *) de > top)
5671 -                       return -ENOSPC;
5672         }
5673         BUFFER_TRACE(bh, "get_write_access");
5674         err = ext3_journal_get_write_access(handle, bh);
5675 @@ -2232,22 +1282,9 @@ static int add_dirent_to_buf(handle_t *h
5676         }
5677  
5678         /* By now the buffer is marked for journaling */
5679 -       nlen = EXT3_DIR_REC_LEN(de->name_len);
5680 -       rlen = le16_to_cpu(de->rec_len);
5681 -       if (de->inode) {
5682 -               struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
5683 -               de1->rec_len = cpu_to_le16(rlen - nlen);
5684 -               de->rec_len = cpu_to_le16(nlen);
5685 -               de = de1;
5686 -       }
5687 -       de->file_type = EXT3_FT_UNKNOWN;
5688 -       if (inode) {
5689 -               de->inode = cpu_to_le32(inode->i_ino);
5690 -               ext3_set_de_type(dir->i_sb, de, inode->i_mode);
5691 -       } else
5692 -               de->inode = 0;
5693 -       de->name_len = namelen;
5694 -       memcpy (de->name, name, namelen);
5695 +
5696 +       split_entry(dir, de, inode ? inode->i_ino : 0,
5697 +                   inode ? inode->i_mode : 0, name, namelen);
5698         /*
5699          * XXX shouldn't update any times until successful
5700          * completion of syscall, but too many callers depend
5701 @@ -2423,8 +1460,40 @@ static int ext3_add_entry (handle_t *han
5702         return add_dirent_to_buf(handle, dentry, inode, de, bh);
5703  }
5704  
5705 +static int shift_entries(struct iam_path *path,
5706 +                        struct iam_frame *frame, unsigned count,
5707 +                        struct iam_entry *entries, struct iam_entry *entries2,
5708 +                        u32 newblock)
5709 +{
5710 +       unsigned count1;
5711 +       unsigned count2;
5712 +       int delta;
5713 +
5714 +       struct iam_frame *parent = frame - 1;
5715 +       struct iam_ikey *pivot = iam_path_ikey(path, 3);
5716 +
5717 +       delta = dx_index_is_compat(path) ? 0 : +1;
5718 +
5719 +       count1 = count/2 + delta;
5720 +       count2 = count - count1;
5721 +       iam_get_ikey(path, iam_entry_shift(path, entries, count1), pivot);
5722 +
5723 +       dxtrace(printk("Split index %i/%i\n", count1, count2));
5724 +
5725 +       memcpy((char *) iam_entry_shift(path, entries2, delta),
5726 +              (char *) iam_entry_shift(path, entries, count1),
5727 +              count2 * iam_entry_size(path));
5728 +
5729 +       dx_set_count(entries, count1);
5730 +       dx_set_count(entries2, count2 + delta);
5731 +       dx_set_limit(entries2, dx_node_limit(path));
5732 +
5733 +       iam_insert_key(path, parent, pivot, newblock);
5734 +       return count1;
5735 +}
5736 +
5737  #ifdef CONFIG_EXT3_INDEX
5738 -static int split_index_node(handle_t *handle, struct iam_path *path)
5739 +int split_index_node(handle_t *handle, struct iam_path *path)
5740  {
5741  
5742         struct iam_entry *entries;   /* old block contents */
5743 @@ -2432,10 +1501,17 @@ static int split_index_node(handle_t *ha
5744         struct iam_frame *frame, *safe;
5745         struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
5746         u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
5747 -       struct inode *dir = path_obj(path);
5748 +       struct inode *dir = iam_path_obj(path);
5749 +       struct iam_descr *descr;
5750         int nr_splet;
5751         int i, err;
5752  
5753 +       descr = iam_path_descr(path);
5754 +       /*
5755 +        * Algorithm below depends on this.
5756 +        */
5757 +       assert_corr(dx_root_limit(path) < dx_node_limit(path));
5758 +
5759         frame = path->ip_frame;
5760         entries = frame->entries;
5761  
5762 @@ -2474,7 +1550,8 @@ static int split_index_node(handle_t *ha
5763         for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
5764                 bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
5765                 if (!bh_new[i] ||
5766 -                   path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
5767 +                   descr->id_ops->id_node_init(path->ip_container,
5768 +                                               bh_new[i], 0) != 0)
5769                         goto cleanup;
5770                 BUFFER_TRACE(frame->bh, "get_write_access");
5771                 err = ext3_journal_get_write_access(handle, frame->bh);
5772 @@ -2493,6 +1570,7 @@ static int split_index_node(handle_t *ha
5773                 unsigned count;
5774                 int idx;
5775                 struct buffer_head *bh2;
5776 +               struct buffer_head *bh;
5777  
5778                 entries = frame->entries;
5779                 count = dx_get_count(entries);
5780 @@ -2501,6 +1579,7 @@ static int split_index_node(handle_t *ha
5781                 bh2 = bh_new[i];
5782                 entries2 = dx_get_entries(path, bh2->b_data, 0);
5783  
5784 +               bh = frame->bh;
5785                 if (frame == path->ip_frames) {
5786                         /* splitting root node. Tricky point:
5787                          *
5788 @@ -2512,22 +1591,20 @@ static int split_index_node(handle_t *ha
5789                          * capacity of the root node is smaller than that of
5790                          * non-root one.
5791                          */
5792 -                       struct dx_root *root;
5793 -                       u8 indirects;
5794                         struct iam_frame *frames;
5795 +                       struct iam_entry *next;
5796 +
5797 +                       assert_corr(i == 0);
5798  
5799                         frames = path->ip_frames;
5800 -                       root = (struct dx_root *) frames->bh->b_data;
5801 -                       indirects = root->info.indirect_levels;
5802 -                       dxtrace(printk("Creating new root %d\n", indirects));
5803                         memcpy((char *) entries2, (char *) entries,
5804                                count * iam_entry_size(path));
5805                         dx_set_limit(entries2, dx_node_limit(path));
5806  
5807                         /* Set up root */
5808 -                       dx_set_count(entries, 1);
5809 -                       dx_set_block(path, entries, newblock[i]);
5810 -                       root->info.indirect_levels = indirects + 1;
5811 +                       next = descr->id_ops->id_root_inc(path->ip_container,
5812 +                                                         path, frame);
5813 +                       dx_set_block(path, next, newblock[0]);
5814  
5815                         /* Shift frames in the path */
5816                         memmove(frames + 2, frames + 1,
5817 @@ -2536,49 +1613,61 @@ static int split_index_node(handle_t *ha
5818                         frames[1].at = iam_entry_shift(path, entries2, idx);
5819                         frames[1].entries = entries = entries2;
5820                         frames[1].bh = bh2;
5821 -                       assert(dx_node_check(path, frame));
5822 +                       assert_inv(dx_node_check(path, frame));
5823 +                       ++ path->ip_frame;
5824                         ++ frame;
5825 -                       assert(dx_node_check(path, frame));
5826 -                       bh_new[i] = NULL; /* buffer head is "consumed" */
5827 +                       assert_inv(dx_node_check(path, frame));
5828 +                       bh_new[0] = NULL; /* buffer head is "consumed" */
5829                         err = ext3_journal_get_write_access(handle, bh2);
5830                         if (err)
5831                                 goto journal_error;
5832                 } else {
5833                         /* splitting non-root index node. */
5834 -                       unsigned count1 = count/2, count2 = count - count1;
5835 -                       unsigned hash2;
5836 -
5837 -                       dx_get_key(path,
5838 -                                  iam_entry_shift(path, entries, count1),
5839 -                                  (struct iam_key *)&hash2);
5840 -
5841 -                       dxtrace(printk("Split index %i/%i\n", count1, count2));
5842 -
5843 -                       memcpy ((char *) entries2,
5844 -                               (char *) iam_entry_shift(path, entries, count1),
5845 -                               count2 * iam_entry_size(path));
5846 -                       dx_set_count (entries, count1);
5847 -                       dx_set_count (entries2, count2);
5848 -                       dx_set_limit (entries2, dx_node_limit(path));
5849 +                       struct iam_frame *parent = frame - 1;
5850  
5851 +                       count = shift_entries(path, frame, count,
5852 +                                             entries, entries2, newblock[i]);
5853                         /* Which index block gets the new entry? */
5854 -                       if (idx >= count1) {
5855 +                       if (idx >= count) {
5856 +                               int d = dx_index_is_compat(path) ? 0 : +1;
5857 +
5858                                 frame->at = iam_entry_shift(path, entries2,
5859 -                                                           idx - count1);
5860 +                                                           idx - count + d);
5861                                 frame->entries = entries = entries2;
5862                                 swap(frame->bh, bh2);
5863                                 bh_new[i] = bh2;
5864 +                               parent->at = iam_entry_shift(path,
5865 +                                                            parent->at, +1);
5866                         }
5867 -                       dx_insert_block(path, frame - 1, hash2, newblock[i]);
5868 -                       assert(dx_node_check(path, frame));
5869 -                       assert(dx_node_check(path, frame - 1));
5870 +                       assert_inv(dx_node_check(path, frame));
5871 +                       assert_inv(dx_node_check(path, parent));
5872                         dxtrace(dx_show_index ("node", frame->entries));
5873                         dxtrace(dx_show_index ("node",
5874                                ((struct dx_node *) bh2->b_data)->entries));
5875                         err = ext3_journal_dirty_metadata(handle, bh2);
5876                         if (err)
5877                                 goto journal_error;
5878 +                       err = ext3_journal_dirty_metadata(handle, parent->bh);
5879 +                       if (err)
5880 +                               goto journal_error;
5881                 }
5882 +               err = ext3_journal_dirty_metadata(handle, bh);
5883 +               if (err)
5884 +                       goto journal_error;
5885 +               /*
5886 +                * This function was called to make insertion of new leaf
5887 +                * possible. Check that it fulfilled its obligations.
5888 +                */
5889 +               assert_corr(dx_get_count(path->ip_frame->entries) <
5890 +                           dx_get_limit(path->ip_frame->entries));
5891 +               }
5892 +       if (nr_splet > 0) {
5893 +               /*
5894 +                * Log ->i_size modification.
5895 +                */
5896 +               err = ext3_mark_inode_dirty(handle, dir);
5897 +               if (err)
5898 +                       goto journal_error;
5899         }
5900         goto cleanup;
5901  journal_error:
5902 @@ -2610,7 +1699,7 @@ static int ext3_dx_add_entry(handle_t *h
5903         size_t isize;
5904  
5905         iam_path_compat_init(&cpath, dir);
5906 -       param = path_descr(path);
5907 +       param = iam_path_descr(path);
5908  
5909         err = dx_probe(dentry, NULL, &hinfo, path);
5910         if (err != 0)
5911 @@ -2620,7 +1709,8 @@ static int ext3_dx_add_entry(handle_t *h
5912         /* XXX nikita: global serialization! */
5913         isize = dir->i_size;
5914  
5915 -       err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at), 
5916 +       err = param->id_ops->id_node_read(path->ip_container,
5917 +                       (iam_ptr_t)dx_get_block(path, frame->at),
5918                                   handle, &bh);
5919         if (err != 0)
5920                 goto cleanup;
5921 @@ -2641,11 +1731,11 @@ static int ext3_dx_add_entry(handle_t *h
5922                 goto cleanup;   
5923  
5924         /*copy split inode too*/
5925 -       de = do_split(handle, path, &bh, --frame, &hinfo, &err);
5926 +       de = do_split(handle, path, &bh, path->ip_frame, &hinfo, &err);
5927         if (!de)
5928                 goto cleanup;
5929  
5930 -       assert(dx_node_check(path, frame));
5931 +       assert_inv(dx_node_check(path, frame));
5932         err = add_dirent_to_buf(handle, dentry, inode, de, bh);
5933         goto cleanup2;
5934  
5935 @@ -2752,6 +1842,26 @@ static struct inode * ext3_new_inode_wan
5936         return ext3_new_inode(handle, dir, mode, inum);
5937  }
5938  
5939 +struct inode *ext3_create_inode(handle_t *handle, struct inode * dir, int mode)
5940 +{
5941 +       struct inode *inode;
5942 +
5943 +       inode = ext3_new_inode(handle, dir, mode, 0);
5944 +       if (!IS_ERR(inode)) {
5945 +               if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode)) {
5946 +#ifdef CONFIG_LDISKFS_FS_XATTR
5947 +                       inode->i_op = &ext3_special_inode_operations;
5948 +#endif
5949 +               } else {
5950 +                       inode->i_op = &ext3_file_inode_operations;
5951 +                       inode->i_fop = &ext3_file_operations;
5952 +                       ext3_set_aops(inode);
5953 +               }
5954 +       }
5955 +       return inode;
5956 +}
5957 +EXPORT_SYMBOL(ext3_create_inode);
5958 +
5959  /*
5960   * By the time this is called, we already have created
5961   * the directory cache entry for the new file, but it
5962 Index: iam/include/linux/ext3_fs.h
5963 ===================================================================
5964 --- iam.orig/include/linux/ext3_fs.h
5965 +++ iam/include/linux/ext3_fs.h
5966 @@ -758,9 +758,7 @@ extern int ext3_should_retry_alloc(struc
5967  extern void rsv_window_add(struct super_block *sb, struct reserve_window_node *rsv);
5968  
5969  /* dir.c */
5970 -extern int ext3_check_dir_entry(const char *, struct inode *,
5971 -                               struct ext3_dir_entry_2 *,
5972 -                               struct buffer_head *, unsigned long);
5973 +
5974  extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
5975                                     __u32 minor_hash,
5976                                     struct ext3_dir_entry_2 *dirent);
5977 Index: iam/include/linux/lustre_iam.h
5978 ===================================================================
5979 --- iam.orig/include/linux/lustre_iam.h
5980 +++ iam/include/linux/lustre_iam.h
5981 @@ -1,9 +1,68 @@
5982 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
5983 + * vim:expandtab:shiftwidth=8:tabstop=8:
5984 + *
5985 + *  lustre_iam.c
5986 + *  Top-level entry points into osd module
5987 + *
5988 + *  Copyright (c) 2006 Cluster File Systems, Inc.
5989 + *   Author: Wang Di <wangdi@clusterfs.com>
5990 + *   Author: Nikita Danilov <nikita@clusterfs.com>
5991 + *
5992 + *   This file is part of the Lustre file system, http://www.lustre.org
5993 + *   Lustre is a trademark of Cluster File Systems, Inc.
5994 + *
5995 + *   You may have signed or agreed to another license before downloading
5996 + *   this software.  If so, you are bound by the terms and conditions
5997 + *   of that agreement, and the following does not apply to you.  See the
5998 + *   LICENSE file included with this distribution for more information.
5999 + *
6000 + *   If you did not agree to a different license, then this copy of Lustre
6001 + *   is open source software; you can redistribute it and/or modify it
6002 + *   under the terms of version 2 of the GNU General Public License as
6003 + *   published by the Free Software Foundation.
6004 + *
6005 + *   In either case, Lustre is distributed in the hope that it will be
6006 + *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
6007 + *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
6008 + *   license text for more details.
6009 + */
6010 +
6011 +#ifndef __LINUX_LUSTRE_IAM_H__
6012 +#define __LINUX_LUSTRE_IAM_H__
6013 +
6014 +/* handle_t, journal_start(), journal_stop() */
6015 +#include <linux/jbd.h>
6016 +
6017  /*
6018 - * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
6019 + *  linux/include/linux/lustre_iam.h
6020   */
6021 +
6022  enum {
6023 +        /*
6024 +         * Maximal number of non-leaf levels in htree. In the stock ext3 this
6025 +         * is 2.
6026 +         */
6027         DX_MAX_TREE_HEIGHT = 5,
6028 -       DX_SCRATCH_KEYS    = 2
6029 +        /*
6030 +         * Scratch keys used by generic code for temporaries.
6031 +         *
6032 +         * Allocation:
6033 +         *
6034 +         *         [0] reserved for assertions and as a staging area for
6035 +         *         record keys immediately used for key comparisons.
6036 +         *
6037 +         *         [1] reserved for record key, stored during iteration over
6038 +         *         node records (see dx_node_check()).
6039 +         *
6040 +         *         [2] reserved for leaf node operations.
6041 +         *
6042 +         *         [3] reserved for index operations.
6043 +         */
6044 +       DX_SCRATCH_KEYS    = 4,
6045 +        /*
6046 +         * Maximal format name length.
6047 +         */
6048 +        DX_FMT_NAME_LEN    = 16
6049  };
6050  
6051  /*
6052 @@ -30,6 +89,11 @@ struct iam_key;
6053  /* Incomplete type use to refer to the records stored in iam containers. */
6054  struct iam_rec;
6055  
6056 +struct iam_cookie {
6057 +       struct iam_key *ic_key;
6058 +       struct iam_rec *ic_rec;
6059 +};
6060 +
6061  typedef __u64 iam_ptr_t;
6062  
6063  /*
6064 @@ -41,45 +105,25 @@ struct iam_frame {
6065         struct iam_entry *at;      /* target entry, found by binary search */
6066  };
6067  
6068 -/* leaf node reached by tree lookup */
6069 -#define iam_leaf_entry iam_rec
6070 -struct iam_leaf {
6071 -       struct buffer_head *bh;
6072 -       struct iam_leaf_entry *entries;
6073 -       struct iam_leaf_entry *at;
6074 -};
6075 +/*
6076 + * Opaque entry in the leaf node.
6077 + */
6078 +struct iam_lentry;
6079  
6080  struct iam_path;
6081  struct iam_container;
6082  
6083 -/*
6084 - * Parameters, describing a flavor of iam container.
6085 - */
6086 -struct iam_descr {
6087 -       /*
6088 -        * Size of a key in this container, in bytes.
6089 -        */
6090 -       size_t       id_key_size;
6091 -       /*
6092 -        * Size of a pointer to the next level (stored in index nodes), in
6093 -        * bytes.
6094 -        */
6095 -       size_t       id_ptr_size;
6096 -       /*
6097 -        * Size of a record (stored in leaf nodes), in bytes.
6098 -        */
6099 -       size_t       id_rec_size;
6100 -       /*
6101 -        * Size of unused (by iam) space at the beginning of every non-root
6102 -        * node, in bytes. Used for compatibility with ext3.
6103 -        */
6104 -       size_t       id_node_gap;
6105 -       /*
6106 -        * Size of unused (by iam) space at the beginning of root node, in
6107 -        * bytes. Used for compatibility with ext3.
6108 -        */
6109 -       size_t       id_root_gap;
6110  
6111 +/* leaf node reached by tree lookup */
6112 +struct iam_leaf {
6113 +        struct iam_path    *il_path;
6114 +       struct buffer_head *il_bh;
6115 +       struct iam_lentry  *il_entries;
6116 +       struct iam_lentry  *il_at;
6117 +       void               *il_descr_data;
6118 +};
6119 +
6120 +struct iam_operations {
6121         /*
6122          * Returns pointer (in the same sense as pointer in index entry) to
6123          * the root node.
6124 @@ -102,8 +146,8 @@ struct iam_descr {
6125         /*
6126          * Key comparison function. Returns -1, 0, +1.
6127          */
6128 -       int (*id_keycmp)(struct iam_container *c,
6129 -                        struct iam_key *k1, struct iam_key *k2);
6130 +       int (*id_keycmp)(const struct iam_container *c,
6131 +                        const struct iam_key *k1, const struct iam_key *k2);
6132         /*
6133          * Create new container.
6134          *
6135 @@ -111,25 +155,113 @@ struct iam_descr {
6136          * contains single record with the smallest possible key.
6137          */
6138         int (*id_create)(struct iam_container *c);
6139 -       struct {
6140 +        /*
6141 +         * Format name.
6142 +         */
6143 +        char id_name[DX_FMT_NAME_LEN];
6144 +};
6145 +
6146 +struct iam_leaf_operations {
6147                 /*
6148                  * leaf operations.
6149                  */
6150 +
6151 +        /*
6152 +         * initialize just loaded leaf node.
6153 +         */
6154 +        int (*init)(struct iam_leaf *p);
6155 +        /*
6156 +         * Format new node.
6157 +         */
6158 +        void (*init_new)(struct iam_container *c, struct buffer_head *bh);
6159 +        /*
6160 +         * Release resources.
6161 +         */
6162 +        void (*fini)(struct iam_leaf *l);
6163                 /*
6164                  * returns true iff leaf is positioned at the last entry.
6165                  */
6166 -               int (*at_end)(struct iam_container *c, struct iam_leaf *l);
6167 +        int (*at_end)(const struct iam_leaf *l);
6168                 /* position leaf at the first entry */
6169 -               void (*start)(struct iam_container *c, struct iam_leaf *l);
6170 +        void (*start)(struct iam_leaf *l);
6171                 /* more leaf to the next entry. */
6172 -               void (*next)(struct iam_container *c, struct iam_leaf *l);
6173 -               /* return key of current leaf record in @k */
6174 -               void (*key)(struct iam_container *c, struct iam_leaf *l,
6175 -                           struct iam_key *k);
6176 -               /* return pointer to entry body */
6177 -               struct iam_rec *(*rec)(struct iam_container *c,
6178 -                                      struct iam_leaf *l);
6179 -       } id_leaf;
6180 +        void (*next)(struct iam_leaf *l);
6181 +        /* return key of current leaf record. This method may return
6182 +         * either pointer to the key stored in node, or copy key into
6183 +         * @k buffer supplied by caller and return pointer to this
6184 +         * buffer. The latter approach is used when keys in nodes are
6185 +         * not stored in plain form (e.g., htree doesn't store keys at
6186 +         * all).
6187 +         *
6188 +         * Caller should assume that returned pointer is only valid
6189 +         * while leaf node is pinned and locked.*/
6190 +        struct iam_key *(*key)(const struct iam_leaf *l, struct iam_key *k);
6191 +        /* return pointer to entry body. Pointer is valid while
6192 +           corresponding leaf node is locked and pinned. */
6193 +        struct iam_rec *(*rec)(const struct iam_leaf *l);
6194 +
6195 +        void (*key_set)(struct iam_leaf *l, const struct iam_key *k);
6196 +        void (*rec_set)(struct iam_leaf *l, const struct iam_rec *r);
6197 +
6198 +        /*
6199 +         * Search leaf @l for a record with key @k or for a place
6200 +         * where such record is to be inserted.
6201 +         *
6202 +         * Scratch keys from @path can be used.
6203 +         */
6204 +        int (*lookup)(struct iam_leaf *l, const struct iam_key *k);
6205 +
6206 +        int (*can_add)(const struct iam_leaf *l,
6207 +                       const struct iam_key *k, const struct iam_rec *r);
6208 +        /*
6209 +         * add rec for a leaf
6210 +         */
6211 +        void (*rec_add)(struct iam_leaf *l,
6212 +                        const struct iam_key *k, const struct iam_rec *r);
6213 +        /*
6214 +         * remove rec for a leaf
6215 +         */
6216 +        void (*rec_del)(struct iam_leaf *l);
6217 +        /*
6218 +         * split leaf node, moving some entries into @bh (the latter currently
6219 +         * is assumed to be empty).
6220 +         */
6221 +        void (*split)(struct iam_leaf *l, struct buffer_head *bh);
6222 +};
6223 +
6224 +struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
6225 +struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
6226 +
6227 +/*
6228 + * Parameters, describing a flavor of iam container.
6229 + */
6230 +struct iam_descr {
6231 +       /*
6232 +        * Size of a key in this container, in bytes.
6233 +        */
6234 +       size_t       id_key_size;
6235 +       /*
6236 +        * Size of a pointer to the next level (stored in index nodes), in
6237 +        * bytes.
6238 +        */
6239 +       size_t       id_ptr_size;
6240 +       /*
6241 +        * Size of a record (stored in leaf nodes), in bytes.
6242 +        */
6243 +       size_t       id_rec_size;
6244 +       /*
6245 +        * Size of unused (by iam) space at the beginning of every non-root
6246 +        * node, in bytes. Used for compatibility with ext3.
6247 +        */
6248 +       size_t       id_node_gap;
6249 +       /*
6250 +        * Size of unused (by iam) space at the beginning of root node, in
6251 +        * bytes. Used for compatibility with ext3.
6252 +        */
6253 +       size_t       id_root_gap;
6254 +
6255 +        struct iam_operations           *id_ops;
6256 +        struct iam_leaf_operations      *id_leaf_ops;
6257  };
6258  
6259  struct iam_container {
6260 @@ -142,10 +274,17 @@ struct iam_container {
6261          * container flavor.
6262          */
6263         struct iam_descr *ic_descr;
6264 +};
6265 +
6266 +/*
6267 + * description-specific part of iam_path. This is usually embedded into larger
6268 + * structure.
6269 + */
6270 +struct iam_path_descr {
6271         /*
6272 -        * pointer to flavor-specific per-container data.
6273 +        * Scratch-pad area for temporary keys.
6274          */
6275 -       void             *ic_descr_data;
6276 +       struct iam_key        *ipd_key_scratch[DX_SCRATCH_KEYS];
6277  };
6278  
6279  /*
6280 @@ -172,36 +311,240 @@ struct iam_path {
6281         /*
6282          * Leaf node: a child of ->ip_frame.
6283          */
6284 -       struct iam_leaf       *ip_leaf;
6285 +       struct iam_leaf        ip_leaf;
6286         /*
6287          * Key searched for.
6288          */
6289 -       struct iam_key        *ip_key_target;
6290 +       const struct iam_key  *ip_key_target;
6291         /*
6292 -        * Scratch-pad area for temporary keys.
6293 +        * Description-specific data.
6294          */
6295 -       struct iam_key        *ip_key_scratch[DX_SCRATCH_KEYS];
6296 -       /*
6297 -        * pointer to flavor-specific per-container data.
6298 -        */
6299 -       void                  *ip_descr_data;
6300 +       struct iam_path_descr *ip_data;
6301  };
6302  
6303 +struct dx_hash_info;
6304 +
6305  /*
6306   * Helper structure for legacy htrees.
6307   */
6308  struct iam_path_compat {
6309         struct iam_path      ipc_path;
6310         struct iam_container ipc_container;
6311 -       __u32                ipc_scrach[DX_SCRATCH_KEYS];
6312 +       __u32                 ipc_scratch[DX_SCRATCH_KEYS];
6313 +       struct dx_hash_info  *ipc_hinfo;
6314 +       struct dentry        *ipc_dentry;
6315 +       struct iam_path_descr ipc_descr;
6316  };
6317  
6318 -int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r);
6319 -int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k);
6320 -int iam_update(handle_t *h, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
6321 -int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
6322  /*
6323 - * Initialize container @c, acquires additional reference on @inode.
6324 + * iam cursor (iterator) api.
6325 + */
6326 +
6327 +/*
6328 + * States of iterator state machine.
6329 + */
6330 +enum iam_it_state {
6331 +       /* initial state */
6332 +       IAM_IT_DETACHED,
6333 +       /* iterator is above particular record in the container */
6334 +       IAM_IT_ATTACHED
6335 +};
6336 +
6337 +/*
6338 + * Flags controlling iterator functionality.
6339 + */
6340 +enum iam_it_flags {
6341 +       /*
6342 +        * this iterator will move (iam_it_{prev,next}() will be called on it)
6343 +        */
6344 +       IAM_IT_MOVE  = (1 << 0),
6345 +       /*
6346 +        * tree can be updated through this iterator.
6347 +        */
6348 +       IAM_IT_WRITE = (1 << 1)
6349 +};
6350 +
6351 +/*
6352 + * Iterator.
6353 + *
6354 + * Immediately after call to iam_it_init() iterator is in "detached"
6355 + * (IAM_IT_DETACHED) state: it is associated with given parent container, but
6356 + * doesn't point to any particular record in this container.
6357 + *
6358 + * After successful call to iam_it_get() and until corresponding call to
6359 + * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
6360 + *
6361 + * Attached iterator can move through records in a container (provided
6362 + * IAM_IT_MOVE permission) in a key order, can get record and key values as it
6363 + * passes over them, and can modify container (provided IAM_IT_WRITE
6364 + * permission).
6365 + *
6366 + * Concurrency: iterators are supposed to be local to thread. Interfaces below
6367 + * do no internal serialization.
6368 + *
6369 + */
6370 +struct iam_iterator {
6371 +       /*
6372 +        * iterator flags, taken from enum iam_it_flags.
6373 +        */
6374 +       __u32                 ii_flags;
6375 +       enum iam_it_state     ii_state;
6376 +       /*
6377 +        * path to the record. Valid in IAM_IT_ATTACHED state.
6378 +        */
6379 +       struct iam_path       ii_path;
6380 +};
6381 +
6382 +void iam_path_init(struct iam_path *path, struct iam_container *c,
6383 +                  struct iam_path_descr *pd);
6384 +void iam_path_fini(struct iam_path *path);
6385 +
6386 +void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode);
6387 +void iam_path_compat_fini(struct iam_path_compat *path);
6388 +
6389 +struct iam_path_descr *iam_ipd_alloc(int keysize);
6390 +void iam_ipd_free(struct iam_path_descr *ipd);
6391 +
6392 +/*
6393 + * Initialize iterator to IAM_IT_DETACHED state.
6394 + *
6395 + * postcondition: it_state(it) == IAM_IT_DETACHED
6396 + */
6397 +int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
6398 +                struct iam_path_descr *pd);
6399 +/*
6400 + * Finalize iterator and release all resources.
6401 + *
6402 + * precondition: it_state(it) == IAM_IT_DETACHED
6403 + */
6404 +void iam_it_fini(struct iam_iterator *it);
6405 +
6406 +/*
6407 + * Attach iterator. After successful completion, @it points to record with the
6408 + * largest key not larger than @k. Semantics of ->id_create() method guarantee
6409 + * that such record will always be found.
6410 + *
6411 + * Return value: 0: positioned on existing record,
6412 + *             -ve: error.
6413 + *
6414 + * precondition:  it_state(it) == IAM_IT_DETACHED
6415 + * postcondition: ergo(result == 0,
6416 + *                     (it_state(it) == IAM_IT_ATTACHED &&
6417 + *                      it_keycmp(it, iam_it_key_get(it, *), k) < 0))
6418 + */
6419 +int iam_it_get(struct iam_iterator *it, const struct iam_key *k);
6420 +
6421 +/*
6422 + * Duplicates iterator.
6423 + *
6424 + * postcondition: it_state(dst) == it_state(src) &&
6425 + *                iam_it_container(dst) == iam_it_container(src) &&
6426 + *                dst->ii_flags = src->ii_flags &&
6427 + *                ergo(it_state(it) == IAM_IT_ATTACHED,
6428 + *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
6429 + *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
6430 + */
6431 +void iam_it_dup(struct iam_iterator *dst, const struct iam_iterator *src);
6432 +
6433 +/*
6434 + * Detach iterator. Does nothing it detached state.
6435 + *
6436 + * postcondition: it_state(it) == IAM_IT_DETACHED
6437 + */
6438 +void iam_it_put(struct iam_iterator *it);
6439 +
6440 +/*
6441 + * Move iterator one record right.
6442 + *
6443 + * Return value: 0: success,
6444 + *              +1: end of container reached
6445 + *             -ve: error
6446 + *
6447 + * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
6448 + * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
6449 + */
6450 +int iam_it_next(struct iam_iterator *it);
6451 +
6452 +/*
6453 + * Return pointer to the record under iterator.
6454 + *
6455 + * precondition:  it_state(it) == IAM_IT_ATTACHED
6456 + * postcondition: it_state(it) == IAM_IT_ATTACHED
6457 + */
6458 +struct iam_rec *iam_it_rec_get(const struct iam_iterator *it);
6459 +
6460 +/*
6461 + * Replace contents of record under iterator.
6462 + *
6463 + * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
6464 + * postcondition: it_state(it) == IAM_IT_ATTACHED &&
6465 + *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
6466 + */
6467 +int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
6468 +
6469 +/*
6470 + * Place key under iterator in @k, return @k
6471 + *
6472 + * precondition:  it_state(it) == IAM_IT_ATTACHED
6473 + * postcondition: it_state(it) == IAM_IT_ATTACHED
6474 + */
6475 +struct iam_key *iam_it_key_get(const struct iam_iterator *it,
6476 +                               struct iam_key *k);
6477 +
6478 +/*
6479 + * Insert new record with key @k and contents from @r, shifting records to the
6480 + * right.
6481 + *
6482 + * precondition:  it_state(it) == IAM_IT_ATTACHED &&
6483 + *                it->ii_flags&IAM_IT_WRITE &&
6484 + *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
6485 + * postcondition: it_state(it) == IAM_IT_ATTACHED &&
6486 + *                ergo(result == 0,
6487 + *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
6488 + *                     !memcmp(iam_it_rec_get(it), r, ...))
6489 + */
6490 +int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
6491 +                     const struct iam_key *k, const struct iam_rec *r);
6492 +/*
6493 + * Delete record under iterator.
6494 + *
6495 + * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
6496 + * postcondition: it_state(it) == IAM_IT_ATTACHED
6497 + */
6498 +int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
6499 +
6500 +typedef __u64 iam_pos_t;
6501 +
6502 +/*
6503 + * Convert iterator to cookie.
6504 + *
6505 + * precondition:  it_state(it) == IAM_IT_ATTACHED &&
6506 + *                path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
6507 + * postcondition: it_state(it) == IAM_IT_ATTACHED
6508 + */
6509 +iam_pos_t iam_it_store(const struct iam_iterator *it);
6510 +
6511 +/*
6512 + * Restore iterator from cookie.
6513 + *
6514 + * precondition:  it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
6515 + *                path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
6516 + * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
6517 + *                                  iam_it_store(it) == pos)
6518 + */
6519 +int iam_it_load(struct iam_iterator *it, iam_pos_t pos);
6520 +
6521 +int iam_lookup(struct iam_container *c, const struct iam_key *k,
6522 +               struct iam_rec *r, struct iam_path_descr *pd);
6523 +int iam_delete(handle_t *h, struct iam_container *c, const struct iam_key *k,
6524 +              struct iam_path_descr *pd);
6525 +int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
6526 +              struct iam_rec *r, struct iam_path_descr *pd);
6527 +int iam_insert(handle_t *handle, struct iam_container *c,
6528 +               const struct iam_key *k,
6529 +              struct iam_rec *r, struct iam_path_descr *pd);
6530 +/*
6531 + * Initialize container @c.
6532   */
6533  int iam_container_init(struct iam_container *c,
6534                        struct iam_descr *descr, struct inode *inode);
6535 @@ -210,3 +553,170 @@ int iam_container_init(struct iam_contai
6536   */
6537  void iam_container_fini(struct iam_container *c);
6538  
6539 +/*
6540 + * Determine container format.
6541 + */
6542 +int iam_container_setup(struct iam_container *c);
6543 +
6544 +#ifndef assert
6545 +#define assert(test) J_ASSERT(test)
6546 +#endif
6547 +
6548 +static inline struct iam_descr *iam_container_descr(struct iam_container *c)
6549 +{
6550 +        return c->ic_descr;
6551 +}
6552 +
6553 +static inline struct iam_descr *iam_path_descr(const struct iam_path *p)
6554 +{
6555 +       return p->ip_container->ic_descr;
6556 +}
6557 +
6558 +static inline struct inode *iam_path_obj(struct iam_path *p)
6559 +{
6560 +       return p->ip_container->ic_object;
6561 +}
6562 +
6563 +static inline void iam_keycpy(const struct iam_container *c,
6564 +                              struct iam_key *k1, const struct iam_key *k2)
6565 +{
6566 +       memcpy(k1, k2, c->ic_descr->id_key_size);
6567 +}
6568 +
6569 +static inline int iam_keycmp(const struct iam_container *c,
6570 +                            const struct iam_key *k1, const struct iam_key *k2)
6571 +{
6572 +       return c->ic_descr->id_ops->id_keycmp(c, k1, k2);
6573 +}
6574 +
6575 +static inline void iam_reccpy(const struct iam_path *p, struct iam_rec *rec_dst,
6576 +                             const struct iam_rec *rec_src)
6577 +{
6578 +       memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size);
6579 +}
6580 +
6581 +static inline void *iam_entry_off(struct iam_entry *entry, size_t off)
6582 +{
6583 +       return (void *)((char *)entry + off);
6584 +}
6585 +
6586 +/*XXX These stuff put here, just because they are used by iam.c and namei.c*/
6587 +static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
6588 +{
6589 +       return le32_to_cpu(*(u32*)iam_entry_off(entry,
6590 +                                               iam_path_descr(p)->id_key_size))
6591 +               & 0x00ffffff;
6592 +}
6593 +
6594 +static inline void dx_set_block(struct iam_path *p,
6595 +                               struct iam_entry *entry, unsigned value)
6596 +{
6597 +       *(u32*)iam_entry_off(entry,
6598 +                            iam_path_descr(p)->id_key_size) =
6599 +               cpu_to_le32(value);
6600 +}
6601 +
6602 +static inline void dx_set_key(struct iam_path *p, struct iam_entry *entry,
6603 +                              const struct iam_key *key)
6604 +{
6605 +        iam_keycpy(p->ip_container, iam_entry_off(entry, 0), key);
6606 +}
6607 +
6608 +struct dx_countlimit {
6609 +       __le16 limit;
6610 +       __le16 count;
6611 +};
6612 +
6613 +static inline unsigned dx_get_count(struct iam_entry *entries)
6614 +{
6615 +       return le16_to_cpu(((struct dx_countlimit *) entries)->count);
6616 +}
6617 +
6618 +static inline unsigned dx_get_limit(struct iam_entry *entries)
6619 +{
6620 +       return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
6621 +}
6622 +
6623 +static inline void dx_set_count(struct iam_entry *entries, unsigned value)
6624 +{
6625 +       ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
6626 +}
6627 +
6628 +static inline unsigned dx_node_limit(struct iam_path *p)
6629 +{
6630 +       struct iam_descr *param = iam_path_descr(p);
6631 +       unsigned entry_space   = iam_path_obj(p)->i_sb->s_blocksize -
6632 +               param->id_node_gap;
6633 +       return entry_space / (param->id_key_size + param->id_ptr_size);
6634 +}
6635 +
6636 +static inline struct iam_entry *dx_get_entries(struct iam_path *path,
6637 +                                              void *data, int root)
6638 +{
6639 +       struct iam_descr *param = iam_path_descr(path);
6640 +       return data + (root ? param->id_root_gap : param->id_node_gap);
6641 +}
6642 +
6643 +
6644 +static inline struct iam_entry *dx_node_get_entries(struct iam_path *path,
6645 +                                                   struct iam_frame *frame)
6646 +{
6647 +       return dx_get_entries(path,
6648 +                             frame->bh->b_data, frame == path->ip_frames);
6649 +}
6650 +
6651 +static inline struct iam_key *iam_path_key(const struct iam_path *path, int nr)
6652 +{
6653 +       assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch));
6654 +       return path->ip_data->ipd_key_scratch[nr];
6655 +}
6656 +
6657 +int dx_lookup(struct iam_path *path);
6658 +void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
6659 +                    u32 hash, u32 block);
6660 +
6661 +int ext3_htree_next_block(struct inode *dir, __u32 hash,
6662 +                         struct iam_path *path, __u32 *start_hash);
6663 +
6664 +struct buffer_head *ext3_append(handle_t *handle, struct inode *inode,
6665 +                               u32 *block, int *err);
6666 +int split_index_node(handle_t *handle, struct iam_path *path);
6667 +
6668 +/*
6669 + * external
6670 + */
6671 +void iam_container_write_lock(struct iam_container *c);
6672 +void iam_container_write_unlock(struct iam_container *c);
6673 +
6674 +void iam_container_read_lock(struct iam_container *c);
6675 +void iam_container_read_unlock(struct iam_container *c);
6676 +
6677 +int iam_index_next(struct iam_container *c, struct iam_path *p);
6678 +int iam_read_leaf(struct iam_path *p);
6679 +
6680 +int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
6681 +                 handle_t *handle, struct buffer_head **bh);
6682 +
6683 +void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
6684 +                   const struct iam_key *key, iam_ptr_t ptr);
6685 +
6686 +int  iam_leaf_at_end(const struct iam_leaf *l);
6687 +void iam_leaf_next(struct iam_leaf *folio);
6688 +
6689 +struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
6690 +struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
6691 +struct iam_descr *iam_leaf_descr(const struct iam_leaf *leaf);
6692 +struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf);
6693 +
6694 +
6695 +struct iam_format {
6696 +        int (*if_guess)(struct iam_container *c);
6697 +        struct list_head if_linkage;
6698 +};
6699 +
6700 +void iam_format_register(struct iam_format *fmt);
6701 +
6702 +void iam_lfix_format_init(void);
6703 +
6704 +/* __LINUX_LUSTRE_IAM_H__ */
6705 +#endif