Whamcloud - gitweb
iam: fixes
authornikita <nikita>
Wed, 10 May 2006 23:42:46 +0000 (23:42 +0000)
committernikita <nikita>
Wed, 10 May 2006 23:42:46 +0000 (23:42 +0000)
lustre/kernel_patches/patches/ext3-iam-separate.patch

index 5307270..1be2b38 100644 (file)
-Index: linux-2.6.9/fs/ext3/namei.c
+Index: iam/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.9.orig/fs/ext3/namei.c   2006-05-09 13:37:46.000000000 +0800
-+++ linux-2.6.9/fs/ext3/namei.c        2006-05-09 13:37:46.000000000 +0800
-@@ -24,81 +24,6 @@
-  *    Theodore Ts'o, 2002
-  */
--/*
-- * iam: big theory statement.
-- *
-- * iam (Index Access Module) is a module providing abstraction of persistent
-- * transactional container on top of generalized ext3 htree.
-- *
-- * iam supports:
-- *
-- *     - key, pointer, and record size specifiable per container.
-- *
-- *     - trees taller than 2 index levels.
-- *
-- *     - read/write to existing ext3 htree directories as iam containers.
-- *
-- * iam container is a tree, consisting of leaf nodes containing keys and
-- * records stored in this container, and index nodes, containing keys and
-- * pointers to leaf or index nodes.
-- *
-- * iam does not work with keys directly, instead it calls user-supplied key
-- * comparison function (->dpo_keycmp()).
-- *
-- * Pointers are (currently) interpreted as logical offsets (measured in
-- * blocksful) within underlying flat file on top of which iam tree lives.
-- *
-- * On-disk format:
-- *
-- * iam mostly tries to reuse existing htree formats.
-- *
-- * Format of index node:
-- *
-- * +-----+-------+-------+-------+------+-------+------------+
-- * |     | count |       |       |      |       |            |
-- * | gap |   /   | entry | entry | .... | entry | free space |
-- * |     | limit |       |       |      |       |            |
-- * +-----+-------+-------+-------+------+-------+------------+
-- *
-- *       gap           this part of node is never accessed by iam code. It
-- *                     exists for binary compatibility with ext3 htree (that,
-- *                     in turn, stores fake struct ext2_dirent for ext2
-- *                     compatibility), and to keep some unspecified per-node
-- *                     data. Gap can be different for root and non-root index
-- *                     nodes. Gap size can be specified for each container
-- *                     (gap of 0 is allowed).
-- *
-- *       count/limit   current number of entries in this node, and the maximal
-- *                     number of entries that can fit into node. count/limit
-- *                     has the same size as entry, and is itself counted in
-- *                     count.
-- *
-- *       entry         index entry: consists of a key immediately followed by
-- *                     a pointer to a child node. Size of a key and size of a
-- *                     pointer depends on container. Entry has neither
-- *                     alignment nor padding.
-- *
-- *       free space    portion of node new entries are added to
-- *
-- * Entries in index node are sorted by their key value.
-- *
-- * Format of leaf node:
-- *
-- * +-----+-------+-------+-------+------+-------+------------+
-- * |     | count |       |       |      |       |            |
-- * | gap |   /   | leaf  | leaf  | .... | leaf  | free space |
-- * |     | limit |       |       |      |       |            |
-- * +-----+-------+-------+-------+------+-------+------------+
--
-- *       leaf          For leaf entry: consists of a rec immediately followd by 
-- *                     a key. size of a key and size of a rec depends on container.  
-- *
-- *
-- *
-- *
-- *
-- */
--
- #include <linux/module.h>
- #include <linux/fs.h>
- #include <linux/pagemap.h>
-@@ -112,10 +37,10 @@
- #include <linux/quotaops.h>
- #include <linux/buffer_head.h>
- #include <linux/smp_lock.h>
-+#include <linux/lustre_iam.h>
- #include "xattr.h"
- #include "iopen.h"
- #include "acl.h"
--#include <linux/lustre_iam.h>
- /*
-  * define how far ahead to read directories while searching them.
-  */
-@@ -125,9 +50,9 @@
- #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
--static struct buffer_head *ext3_append(handle_t *handle,
--                                      struct inode *inode,
--                                      u32 *block, int *err)
-+struct buffer_head *ext3_append(handle_t *handle,
-+                              struct inode *inode,
-+                              u32 *block, int *err)
- {
-       struct buffer_head *bh;
-@@ -141,9 +66,6 @@
-       return bh;
- }
--#ifndef assert
--#define assert(test) J_ASSERT(test)
--#endif
- #ifndef swap
- #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-@@ -162,10 +84,6 @@
-       u8 file_type;
- };
--struct dx_countlimit {
--      __le16 limit;
--      __le16 count;
--};
- /*
-  * dx_root_info is laid out so that if it should somehow get overlaid by a
-@@ -203,235 +121,6 @@
- };
+--- iam.orig/fs/ext3/Makefile  2006-05-10 18:21:01.000000000 +0400
++++ iam/fs/ext3/Makefile       2006-05-10 18:21:01.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
  
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o mballoc.o
++         extents.o mballoc.o iam.o
  
--static u32 htree_root_ptr(struct iam_container *c);
--static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
--static int htree_node_init(struct iam_container *c,
--                         struct buffer_head *bh, int root);
--static int htree_keycmp(struct iam_container *c,
--                      struct iam_key *k1, struct iam_key *k2);
--static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
--                         handle_t *h, struct buffer_head **bh);
--
--/*
-- * Parameters describing iam compatibility mode in which existing ext3 htrees
-- * can be manipulated.
-- */
--static struct iam_descr htree_compat_param = {
--      .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
--      .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
--      .id_node_gap = offsetof(struct dx_node, entries),
--      .id_root_gap = offsetof(struct dx_root, entries),
--
--      .id_root_ptr   = htree_root_ptr,
--      .id_node_check = htree_node_check,
--      .id_node_init  = htree_node_init,
--      .id_node_read  = htree_node_read,
--      .id_keycmp     = htree_keycmp
--};
--
--
--struct iam_key;
--struct iam_rec;
--struct iam_descr;
--struct iam_container;
--struct iam_path;
--
--
--
--/*
-- * iam cursor (iterator) api.
-- */
--
--/*
-- * Flags controlling iterator functionality.
-- */
--enum iam_it_flags {
--      /*
--       * this iterator will move (iam_it_{prev,next}() will be called on it)
--       */
--      IAM_IT_MOVE  = (1 << 0),
--      /*
--       * tree can be updated through this iterator.
--       */
--      IAM_IT_WRITE = (1 << 1)
--};
--
--/*
-- * States of iterator state machine.
-- */
--enum iam_it_state {
--      /* initial state */
--      IAM_IT_DETACHED,
--      /* iterator is above particular record in the container */
--      IAM_IT_ATTACHED
--};
--
--struct htree_cookie {
--      struct dx_hash_info *hinfo;
--      struct dentry       *dentry;
--};
--
--/*
-- * Iterator.
-- *
-- * Immediately after call to iam_it_init() iterator is in "detached"
-- * (IAM_IT_DETACHED) state: it is associated with given parent container, but
-- * doesn't point to any particular record in this container.
-- *
-- * After successful call to iam_it_get() and until corresponding call to
-- * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
-- *
-- * Attached iterator can move through records in a container (provided
-- * IAM_IT_MOVE permission) in a key order, can get record and key values as it
-- * passes over them, and can modify container (provided IAM_IT_WRITE
-- * permission).
-- *
-- * Concurrency: iterators are supposed to be local to thread. Interfaces below
-- * do no internal serialization.
-- *
-- */
--struct iam_iterator {
--      /*
--       * iterator flags, taken from enum iam_it_flags.
--       */
--      __u32                 ii_flags;
--      enum iam_it_state     ii_state;
--      /*
--       * path to the record. Valid in IAM_IT_ATTACHED state.
--       */
--      struct iam_path       ii_path;
--};
--
--static inline struct iam_key *keycpy(struct iam_container *c,
--                                   struct iam_key *k1, struct iam_key *k2)
--{
--      return memcpy(k1, k2, c->ic_descr->id_key_size);
--}
--
--static inline int keycmp(struct iam_container *c,
--                       struct iam_key *k1, struct iam_key *k2)
--{
--      return c->ic_descr->id_keycmp(c, k1, k2);
--}
--
--static struct iam_container *iam_it_container(struct iam_iterator *it)
--{
--      return it->ii_path.ip_container;
--}
--
--static inline int it_keycmp(struct iam_iterator *it,
--                          struct iam_key *k1, struct iam_key *k2)
--{
--      return keycmp(iam_it_container(it), k1, k2);
--}
--
--/*
-- * Initialize iterator to IAM_IT_DETACHED state.
-- *
-- * postcondition: it_state(it) == IAM_IT_DETACHED
-- */
--int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
--/*
-- * Finalize iterator and release all resources.
-- *
-- * precondition: it_state(it) == IAM_IT_DETACHED
-- */
--void iam_it_fini(struct iam_iterator *it);
--
--/*
-- * Attach iterator. After successful completion, @it points to record with the
-- * largest key not larger than @k. Semantics of ->id_create() method guarantee
-- * that such record will always be found.
-- *
-- * Return value: 0: positioned on existing record,
-- *             -ve: error.
-- *
-- * precondition:  it_state(it) == IAM_IT_DETACHED
-- * postcondition: ergo(result == 0,
-- *                     (it_state(it) == IAM_IT_ATTACHED &&
-- *                      it_keycmp(it, iam_it_key_get(it, *), k) < 0))
-- */
--int iam_it_get(struct iam_iterator *it, struct iam_key *k);
--
--/*
-- * Duplicates iterator.
-- *
-- * postcondition: it_state(dst) == it_state(src) &&
-- *                iam_it_container(dst) == iam_it_container(src) &&
-- *                dst->ii_flags = src->ii_flags &&
-- *                ergo(it_state(it) == IAM_IT_ATTACHED,
-- *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
-- *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
-- */
--void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
--
--/*
-- * Detach iterator. Does nothing it detached state.
-- *
-- * postcondition: it_state(it) == IAM_IT_DETACHED
-- */
--void iam_it_put(struct iam_iterator *it);
--
--/*
-- * Move iterator one record right.
-- *
-- * Return value: 0: success,
-- *              +1: end of container reached
-- *             -ve: error
-- *
-- * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
-- * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
-- */
--int iam_it_next(struct iam_iterator *it);
--
--/*
-- * Return pointer to the record under iterator.
-- *
-- * precondition:  it_state(it) == IAM_IT_ATTACHED
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
--const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
--
--/*
-- * Replace contents of record under iterator.
-- *
-- * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-- *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
-- */
--int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
--
--/*
-- * Place key under iterator in @k, return @k
-- *
-- * precondition:  it_state(it) == IAM_IT_ATTACHED
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
--const struct iam_key *iam_it_key_get(struct iam_iterator *it,
--                                   struct iam_key *k);
--
--/*
-- * Insert new record with key @k and contents from @r, shifting records to the
-- * right.
-- *
-- * precondition:  it_state(it) == IAM_IT_ATTACHED &&
-- *                it->ii_flags&IAM_IT_WRITE &&
-- *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
-- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-- *                ergo(result == 0,
-- *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
-- *                     !memcmp(iam_it_rec_get(it), r, ...))
-- */
--int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
--                    struct iam_key *k, struct iam_rec *r);
--/*
-- * Delete record under iterator.
-- *
-- * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
--int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
--
- #ifdef CONFIG_EXT3_INDEX
- static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry);
- static void dx_set_block(struct iam_path *p,
-@@ -457,150 +146,41 @@
- static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
-               struct dx_map_entry *offsets, int count);
- static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
--static void dx_insert_block (struct iam_path *path,
--                           struct iam_frame *frame, u32 hash, u32 block);
--static int ext3_htree_next_block(struct inode *dir, __u32 hash,
--                               struct iam_path *path, __u32 *start_hash);
- static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-                      struct ext3_dir_entry_2 **res_dir, int *err);
- static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
-                            struct inode *inode);
--static inline void iam_path_init(struct iam_path *path,
--                               struct iam_container *c, struct htree_cookie *hc);
--static inline void iam_path_fini(struct iam_path *path);
--
--
-+static u32 htree_root_ptr(struct iam_container *c);
-+static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
-+static int htree_node_init(struct iam_container *c, struct buffer_head *bh, int root);
-+static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
-+                         handle_t *handle, struct buffer_head **bh);
-+static int htree_keycmp(struct iam_container *c,
-+                      struct iam_key *k1, struct iam_key *k2);
- /*
-- * Future: use high four bits of block for coalesce-on-delete flags
-- * Mask them off for now.
-+ * Parameters describing iam compatibility mode in which existing ext3 htrees
-+ * can be manipulated.
-  */
-+struct iam_descr htree_compat_param = {
-+      .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
-+      .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
-+      .id_node_gap = offsetof(struct dx_node, entries),
-+      .id_root_gap = offsetof(struct dx_root, entries),
--static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
--{
--      return (void *)((char *)entry + off);
--}
--
--static inline struct iam_descr *path_descr(struct iam_path *p)
--{
--      return p->ip_container->ic_descr;
--}
--
--static inline struct inode *path_obj(struct iam_path *p)
--{
--      return p->ip_container->ic_object;
--}
--
--static inline size_t iam_entry_size(struct iam_path *p)
--{
--      return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
--}
--
--static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
--                                            struct iam_entry *entry, int shift)
--{
--      void *e = entry;
--      return e + shift * iam_entry_size(p);
--}
--
--static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
--                                    struct iam_entry *e1, struct iam_entry *e2)
--{
--      ptrdiff_t diff;
--
--      diff = (void *)e1 - (void *)e2;
--      assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
--      return diff / iam_entry_size(p);
--}
--
--static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
--{
--      return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
--              & 0x00ffffff;
--}
--
--static inline void dx_set_block(struct iam_path *p,
--                              struct iam_entry *entry, unsigned value)
--{
--      *(u32*)entry_off(entry,
--                       path_descr(p)->id_key_size) = cpu_to_le32(value);
--}
--
--static inline struct iam_key *dx_get_key(struct iam_path *p,
--                                      struct iam_entry *entry,
--                                      struct iam_key *key)
--{
--      memcpy(key, entry, path_descr(p)->id_key_size);
--      return key;
--}
--
--static inline struct iam_key *iam_key_at(struct iam_path *p,
--                                     struct iam_entry *entry)
--{
--      return (struct iam_key *)entry;
--}
--
--static inline void dx_set_key(struct iam_path *p,
--                            struct iam_entry *entry, struct iam_key *key)
--{
--      memcpy(entry, key, path_descr(p)->id_key_size);
--}
--
--static inline unsigned dx_get_count (struct iam_entry *entries)
--{
--      return le16_to_cpu(((struct dx_countlimit *) entries)->count);
--}
--
--static inline unsigned dx_get_limit (struct iam_entry *entries)
--{
--      return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
--}
--
--static inline void dx_set_count (struct iam_entry *entries, unsigned value)
--{
--      ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
--}
--
--static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
--{
--      ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
--}
--
--static inline unsigned dx_root_limit(struct iam_path *p)
--{
--      struct iam_descr *param = path_descr(p);
--      unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
--              param->id_root_gap;
--      return entry_space / (param->id_key_size + param->id_ptr_size);
--}
-+      .id_root_ptr   = htree_root_ptr,
-+      .id_node_check = htree_node_check,
-+      .id_node_init  = htree_node_init,
-+      .id_node_read  = htree_node_read,
-+      .id_keycmp     = htree_keycmp
-+};
--static inline unsigned dx_node_limit(struct iam_path *p)
--{
--      struct iam_descr *param = path_descr(p);
--      unsigned entry_space   = path_obj(p)->i_sb->s_blocksize -
--              param->id_node_gap;
--      return entry_space / (param->id_key_size + param->id_ptr_size);
--}
- static inline int dx_index_is_compat(struct iam_path *path)
- {
-       return path_descr(path) == &htree_compat_param;
- }
--static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
--                                     int root)
--{
--      return data +
--              (root ?
--               path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
--}
--
--static struct iam_entry *dx_node_get_entries(struct iam_path *path,
--                                          struct iam_frame *frame)
--{
--      return dx_get_entries(path,
--                            frame->bh->b_data, frame == path->ip_frames);
--}
- static int dx_node_check(struct iam_path *p, struct iam_frame *f)
- {
-@@ -623,6 +203,15 @@
-       return 1;
- }
-+/*
-+ * States of iterator state machine.
-+ */
-+
-+struct htree_cookie {
-+      struct dx_hash_info *hinfo;
-+      struct dentry       *dentry;
-+};
-+
- static u32 htree_root_ptr(struct iam_container *c)
- {
-       return 0;
-@@ -800,7 +389,7 @@
- }
- #endif /* DX_DEBUG */
--static int dx_lookup(struct iam_path *path)
-+int dx_lookup(struct iam_path *path)
- {
-       u32 ptr;
-       int err = 0;
-@@ -904,495 +493,6 @@
- }
- /*
-- * Initialize container @c, acquires additional reference on @inode.
-- */
--int iam_container_init(struct iam_container *c,
--                     struct iam_descr *descr, struct inode *inode)
--{
--      memset(c, 0, sizeof *c);
--      c->ic_descr  = descr;
--      c->ic_object = igrab(inode);
--      if (c->ic_object != NULL)
--              return 0;
--      else
--              return -ENOENT;
--}
--
--/*
-- * Finalize container @c, release all resources.
-- */
--void iam_container_fini(struct iam_container *c)
--{
--      if (c->ic_object != NULL) {
--              iput(c->ic_object);
--              c->ic_object = NULL;
--      }
--}
--
--static inline void iam_path_init(struct iam_path *path, struct iam_container *c, 
--                               struct htree_cookie *hc)
--{
--      memset(path, 0, sizeof *path);
--      path->ip_container = c;
--      path->ip_frame = path->ip_frames;
--      path->ip_descr_data = hc;
--}
--
--static inline void iam_path_fini(struct iam_path *path)
--{
--      int i;
--
--      for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
--              if (path->ip_frames[i].bh != NULL) {
--                      brelse(path->ip_frames[i].bh);
--                      path->ip_frames[i].bh = NULL;
--              }
--      }
--}
--
--static void iam_path_compat_init(struct iam_path_compat *path,
--                               struct inode *inode)
--{
--      int i;
--
--      iam_container_init(&path->ipc_container, &htree_compat_param, inode);
--      /*
--       * XXX hack allowing finalization of iam_path_compat with
--       * iam_path_fini().
--       */
--      iput(inode);
--      iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
--      for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
--              path->ipc_path.ip_key_scratch[i] =
--                      (struct iam_key *)&path->ipc_scrach[i];
--}
--
--static void iam_path_compat_fini(struct iam_path_compat *path)
--{
--      iam_path_fini(&path->ipc_path);
--      iam_container_fini(&path->ipc_container);
--}
--
--static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
--{
--      int block, err;
--      struct buffer_head *bh;
--      
--      block = dx_get_block(path, path->ip_frame->at);
--      err = path_descr(path)->id_node_read(path->ip_container, block, 
--                                           NULL, &bh);
--      if (err)
--              return err;
--
--      leaf->bh = bh;
--      leaf->entries = (struct iam_leaf_entry *)bh->b_data;
--      return 0;
--}
--
--static void iam_leaf_fini(struct iam_leaf *leaf)
--{
--      if (leaf->bh)
--              brelse(leaf->bh);
--}
--
--/*
-- * Search container @c for record with key @k. If record is found, its data
-- * are moved into @r.
-- *
-- *
-- *
-- * Return values: +ve: found, 0: not-found, -ve: error
-- */
--
--int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
--{
--      struct dx_hash_info     hinfo;
--      struct iam_path_compat cpath;
--      struct iam_path *path = &cpath.ipc_path;
--      struct htree_cookie hc = {
--              .hinfo  = &hinfo
--      };
--      int err, i;
--
--      iam_path_init(path, c, &hc);
--      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
--              path->ip_key_scratch[i] =
--                      (struct iam_key *)&cpath.ipc_scrach[i];
--      err = dx_lookup(path);
--      do {
--              struct iam_leaf leaf;
--              err = iam_leaf_init(path, &leaf);
--              if (err)
--                      goto errout;
--
--              for (path_descr(path)->id_leaf.start(c, &leaf);
--                   !path_descr(path)->id_leaf.at_end(c, &leaf);
--                   path_descr(path)->id_leaf.next(c, &leaf)) {
--                      struct iam_key *key;
--
--                      key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
--                      path_descr(path)->id_leaf.key(c, &leaf, key);
--                      if (keycmp(c, k, key) == 0) {
--                              memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
--                                     path_descr(path)->id_rec_size);
--                              iam_path_fini(path);
--                              iam_leaf_fini(&leaf);
--                              return 0;
--                      }
--              }
--
--              iam_leaf_fini(&leaf);
--              /* Check to see if we should continue to search */
--              err = ext3_htree_next_block(c->ic_object, hinfo.hash, path, NULL);
--              if (err < 0)
--                      goto errout;
--      } while (err == 1);
--errout:
--      iam_path_fini(path);
--      return(err);
--}
--EXPORT_SYMBOL(iam_lookup);
--
--static inline size_t iam_leaf_entry_size(struct iam_path *p)
--{
--      return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
--}
--
--static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
--                                    struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
--{
--      ptrdiff_t diff;
--
--      diff = (void *)e1 - (void *)e2;
--      assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
--      return diff / iam_leaf_entry_size(p);
--}
--
--static inline struct iam_leaf_entry* 
--iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
--{
--      void *e = entry;
--      return e + shift * iam_leaf_entry_size(p);
--}
--
--static inline struct iam_key *
--dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
--{
--      memcpy(key, e, path_descr(p)->id_key_size);
--      return key;
--}
--
--static inline struct iam_key *
--iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
--{
--      void *e = entry;
--      return e + path_descr(p)->id_rec_size;
--}
--static inline struct iam_leaf_entry *
--iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
--{
--      return entry; 
--}
--
--static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf, 
--                         struct iam_key *k)
--{
--      struct iam_leaf_entry *p, *q, *m;
--      struct iam_leaf_entry *entries = leaf->entries;
--      int count = dx_get_count((struct iam_entry *)entries);
--      
--      p = iam_leaf_entry_shift(path, entries, 1);
--      q = iam_leaf_entry_shift(path, entries, count - 1);
--      while (p <= q) {
--              m = iam_leaf_entry_shift(path,
--                                 p, iam_leaf_entry_diff(path, q, p) / 2);
--              dxtrace(printk("."));
--              if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
--                         path->ip_key_target) > 0)
--                      q = iam_leaf_entry_shift(path, m, -1);
--              else
--                      p = iam_leaf_entry_shift(path, m, +1);
--      }
--      leaf->at = q; 
--      return 0;
--}
--
--/*XXX what kind of lock should this entry be locked: WangDi */
--static int iam_leaf_insert(handle_t *handle, struct iam_path *path, 
--                         struct iam_key *k, struct iam_rec *r)
--{
--      struct iam_leaf leaf;
--      struct iam_leaf_entry *p, *q;
--      int err, count;
--
--      err = iam_leaf_init(path, &leaf);
--      if (err)
--              goto errout;
--      path_descr(path)->id_leaf.start(path->ip_container, &leaf);
--      count = dx_get_count((struct iam_entry *)leaf.entries);
--      if (dx_get_count((struct iam_entry *)leaf.entries) >= 
--          dx_get_limit((struct iam_entry *)leaf.entries)){
--              err = -ENOSPC;
--              goto errout;
--      }
--
--      err = iam_leaf_lookup(path, &leaf, k);
--      if (err)
--              goto errout;
--      
--      /*insert the k/r to leaf entries*/
--      p = iam_leaf_entry_shift(path, leaf.at, 1);
--      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
--      while (q < p) {
--              memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
--              q = iam_leaf_entry_shift(path, q, -1);  
--      }
--      memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
--      memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
--
--      dx_set_count((struct iam_entry*)leaf.entries, count + 1);
--      err = ext3_journal_dirty_metadata(handle, leaf.bh);
--      if (err)
--              ext3_std_error(path->ip_container->ic_object->i_sb, err);
--errout:       
--      iam_leaf_fini(&leaf);
--      return err;
--} 
--
--static int split_leaf_node(handle_t *handle, struct iam_path *path)
--{
--      struct inode *dir = path_obj(path);
--      unsigned continued = 0;
--      struct buffer_head *bh2;
--      u32 newblock, hash_split;
--      char *data2;
--      struct iam_leaf leaf;
--      unsigned split;
--      int     err;
--
--      bh2 = ext3_append (handle, dir, &newblock, &err);
--      if (!(bh2)) {
--              err = -ENOSPC;
--              goto errout;
--      }
--      err = iam_leaf_init(path, &leaf);
--      if (err)
--              goto errout;
--
--      BUFFER_TRACE(leaf.bh, "get_write_access");
--      err = ext3_journal_get_write_access(handle, leaf.bh);
--      if (err) {
--      journal_error:
--              iam_leaf_fini(&leaf);
--              brelse(bh2);
--              ext3_std_error(dir->i_sb, err);
--              err = -EIO;
--              goto errout;
--      }
--      data2 = bh2->b_data;
--      split = dx_get_count((struct iam_entry*)leaf.entries)/2;
--      hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
--      if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
--                 iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
--              continued = 1;
--
--      memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
--             iam_leaf_entry_shift(path, leaf.entries, split),
--             split * iam_leaf_entry_size(path));
-- 
--      /* Which block gets the new entry? */
--      dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
--      err = ext3_journal_dirty_metadata (handle, bh2);
--      if (err)
--              goto journal_error;
--      err = ext3_journal_dirty_metadata (handle, leaf.bh);
--      if (err)
--              goto journal_error;
--      brelse (bh2);
--      iam_leaf_fini(&leaf);
--errout:
--      return err;
--}
--
--static int split_index_node(handle_t *handle, struct iam_path *path);
--/*
-- * Insert new record @r with key @k into container @c (within context of
-- * transaction @h.
-- *
-- * Return values: 0: success, -ve: error, including -EEXIST when record with
-- * given key is already present.
-- *
-- * postcondition: ergo(result == 0 || result == -EEXIST,
-- *                                  iam_lookup(c, k, r2) > 0 &&
-- *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
-- */
--int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, 
--             struct iam_rec *r)
--{
--      struct dx_hash_info     hinfo;
--      struct iam_path_compat cpath;
--      struct iam_path *path = &cpath.ipc_path;
--      struct htree_cookie hc = {
--              .hinfo  = &hinfo
--      };
--      int err, i;
--
--      iam_path_init(path, c, &hc);
--      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
--              path->ip_key_scratch[i] =
--                      (struct iam_key *)&cpath.ipc_scrach[i];
--      err = dx_lookup(path);
--      if (err)
--              goto errout; 
--
--      err = iam_leaf_insert(handle, path, k, r);
--      
--      if (err != -ENOSPC) 
--              goto errout;    
--
--      err = split_index_node(handle, path);
--      if (err)
--              goto errout;    
--
--      err = split_leaf_node(handle, path);
--      if (err)
--              goto errout;
--      
--      err = iam_leaf_insert(handle, path, k, r);
--errout:
--      iam_path_fini(path);
--      return(err);
--}
--
--EXPORT_SYMBOL(iam_insert);
--static int iam_leaf_delete(handle_t *handle, struct iam_path *path, 
--                         struct iam_key *k)
--{
--      struct iam_leaf leaf;
--      struct iam_leaf_entry *p, *q;
--      int err, count;
--
--      err = iam_leaf_init(path, &leaf);
--      if (err)
--              goto errout;
--      
--      err = iam_leaf_lookup(path, &leaf, k);
--      if (err)
--              goto errout;
--
--      count = dx_get_count((struct iam_entry*)leaf.entries);
--      /*delete the k to leaf entries*/
--      p = iam_leaf_entry_shift(path, leaf.at, 1);
--      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
--      while (p < q) {
--              memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
--              p = iam_leaf_entry_shift(path, p, 1);
--      }
--      dx_set_count((struct iam_entry*)leaf.entries, count - 1);
--
--      err = ext3_journal_dirty_metadata(handle, leaf.bh);
--      if (err)
--              ext3_std_error(path_obj(path)->i_sb, err);
--errout:       
--      iam_leaf_fini(&leaf);
--      return err;
--}
--
--/*
-- * Delete existing record with key @k.
-- *
-- * Return values: 0: success, -ENOENT: not-found, -ve: other error.
-- *
-- * postcondition: ergo(result == 0 || result == -ENOENT,
-- *                                 !iam_lookup(c, k, *));
-- */
--int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
--{
--      struct dx_hash_info     hinfo;
--      struct iam_path_compat cpath;
--      struct iam_path *path = &cpath.ipc_path;
--      struct htree_cookie hc = {
--              .hinfo  = &hinfo
--      };
--      int err, i;
--
--      iam_path_init(path, c, &hc);
--      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
--              path->ip_key_scratch[i] =
--                      (struct iam_key *)&cpath.ipc_scrach[i];
--      err = dx_lookup(path);
--      if (err)
--              goto errout; 
--
--      err = iam_leaf_delete(h, path, k);
--errout:
--      iam_path_fini(path);
--      return err;
--}
--
--EXPORT_SYMBOL(iam_delete);
--
--static int iam_leaf_update(handle_t *handle, struct iam_path *path, 
--                         struct iam_key *k, struct iam_rec *r)
--{
--      struct iam_leaf leaf;
--      int err;
--
--      err = iam_leaf_init(path, &leaf);
--      if (err)
--              goto errout;
--      
--      err = iam_leaf_lookup(path, &leaf, k);
--      if (err)
--              goto errout;
--
--      memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
--      memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
--
--      err = ext3_journal_dirty_metadata(handle, leaf.bh);
--      if (err)
--              ext3_std_error(path_obj(path)->i_sb, err);
--errout:       
--      iam_leaf_fini(&leaf);
--      return err;
--}
--/*
-- * Replace existing record with key @k, or insert new one. New record data are
-- * in @r.
-- *
-- * Return values: 0: success, -ve: error.
-- *
-- * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
-- *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
-- */
--int iam_update(handle_t *h, struct iam_container *c,
--             struct iam_key *k, struct iam_rec *r)
--{
--      struct dx_hash_info     hinfo;
--      struct iam_path_compat cpath;
--      struct iam_path *path = &cpath.ipc_path;
--      struct htree_cookie hc = {
--              .hinfo  = &hinfo
--      };
--      int err, i;
--      
--      iam_path_init(path, c, &hc);
--      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
--              path->ip_key_scratch[i] =
--                      (struct iam_key *)&cpath.ipc_scrach[i];
--      err = dx_lookup(path);
--      if (err)
--              goto errout; 
--
--      err = iam_leaf_update(h, path, k, r);
--errout:
--      iam_path_fini(path);
--      return err;
--}
--
--EXPORT_SYMBOL(iam_update);
--
--/*
-  * This function increments the frame pointer to search the next leaf
-  * block, and reads in the necessary intervening nodes if the search
-  * should be necessary.  Whether or not the search is necessary is
-@@ -1409,8 +509,8 @@
-  * If start_hash is non-null, it will be filled in with the starting
-  * hash of the next page.
-  */
--static int ext3_htree_next_block(struct inode *dir, __u32 hash,
--                               struct iam_path *path, __u32 *start_hash)
-+int ext3_htree_next_block(struct inode *dir, __u32 hash,
-+                        struct iam_path *path, __u32 *start_hash)
- {
-       struct iam_frame *p;
-       struct buffer_head *bh;
-@@ -1662,8 +762,8 @@
-       } while(more);
- }
--static void dx_insert_block(struct iam_path *path,
--                          struct iam_frame *frame, u32 hash, u32 block)
-+void dx_insert_block(struct iam_path *path, struct iam_frame *frame, 
-+                   u32 hash, u32 block)
- {
-       struct iam_entry *entries = frame->entries;
-       struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
-@@ -2392,7 +1492,7 @@
- }
- #ifdef CONFIG_EXT3_INDEX
--static int split_index_node(handle_t *handle, struct iam_path *path)
-+int split_index_node(handle_t *handle, struct iam_path *path)
- { 
-       struct iam_entry *entries;   /* old block contents */
-Index: linux-2.6.9/fs/ext3/iam.c
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: iam/fs/ext3/iam.c
 ===================================================================
---- linux-2.6.9.orig/fs/ext3/iam.c     2006-05-09 19:54:43.573589592 +0800
-+++ linux-2.6.9/fs/ext3/iam.c  2006-05-09 13:50:57.000000000 +0800
-@@ -0,0 +1,1025 @@
+--- iam.orig/fs/ext3/iam.c     2004-04-06 17:27:52.000000000 +0400
++++ iam/fs/ext3/iam.c  2006-05-11 01:24:29.000000000 +0400
+@@ -0,0 +1,945 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ *  iam.c
++ *  Top-level entry points into osd module
++ *
++ *  Copyright (c) 2006 Cluster File Systems, Inc.
++ *   Author: Wang Di <wangdi@clusterfs.com>
++ *   Author: Nikita Danilov <nikita@clusterfs.com>
++ *
++ *   This file is part of the Lustre file system, http://www.lustre.org
++ *   Lustre is a trademark of Cluster File Systems, Inc.
++ *
++ *   You may have signed or agreed to another license before downloading
++ *   this software.  If so, you are bound by the terms and conditions
++ *   of that agreement, and the following does not apply to you.  See the
++ *   LICENSE file included with this distribution for more information.
++ *
++ *   If you did not agree to a different license, then this copy of Lustre
++ *   is open source software; you can redistribute it and/or modify it
++ *   under the terms of version 2 of the GNU General Public License as
++ *   published by the Free Software Foundation.
++ *
++ *   In either case, Lustre is distributed in the hope that it will be
++ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
++ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   license text for more details.
++ */
++
 +/*
 + * iam: big theory statement.
 + *
@@ -1152,20 +103,8 @@ Index: linux-2.6.9/fs/ext3/iam.c
 + *
 + * Entries in index node are sorted by their key value.
 + *
-+ * Format of leaf node:
-+ *
-+ * +-----+-------+-------+-------+------+-------+------------+
-+ * |     | count |       |       |      |       |            |
-+ * | gap |   /   | leaf  | leaf  | .... | leaf  | free space |
-+ * |     | limit |       |       |      |       |            |
-+ * +-----+-------+-------+-------+------+-------+------------+
-+
-+ *       leaf          For leaf entry: consists of a rec immediately followd by 
-+ *                     a key. size of a key and size of a rec depends on container.  
-+ *
-+ *
-+ *
-+ *
++ * Format of a leaf node is not specified. Generic iam code accesses leaf
++ * nodes through ->id_leaf methods in struct iam_descr.
 + *
 + */
 +
@@ -1183,93 +122,45 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +#include <linux/buffer_head.h>
 +#include <linux/smp_lock.h>
 +#include <linux/lustre_iam.h>
++
++#include <libcfs/libcfs.h>
++#include <libcfs/kp30.h>
++
 +#include "xattr.h"
 +#include "iopen.h"
 +#include "acl.h"
 +
-+struct iam_key;
-+struct iam_rec;
-+struct iam_descr;
-+struct iam_container;
-+struct iam_path;
-+
-+
-+#define key_cmp(e1, e2) ({                              \
-+        typeof(e1) __e1 = (e1);                         \
-+        typeof(e2) __e2 = (e2);                         \
-+        __e1 > __e2 ? +1 : (__e1 < __e2 ? -1 : 0);      \
-+})
-+
-+/*
-+ * iam cursor (iterator) api.
-+ */
-+
-+/*
-+ * Flags controlling iterator functionality.
-+ */
-+enum iam_it_flags {
-+      /*
-+       * this iterator will move (iam_it_{prev,next}() will be called on it)
-+       */
-+      IAM_IT_MOVE  = (1 << 0),
-+      /*
-+       * tree can be updated through this iterator.
-+       */
-+      IAM_IT_WRITE = (1 << 1)
-+};
-+
-+static struct iam_leaf_entry *iam_leaf_entries(struct iam_path *p)
-+{
-+      return p->ip_leaf->entries;     
-+}
-+
-+static inline size_t iam_leaf_entry_size(struct iam_path *p)
-+{
-+      return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
-+}
-+
-+static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
-+                                    struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
-+{
-+      ptrdiff_t diff;
-+
-+      diff = (void *)e1 - (void *)e2;
-+      assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
-+      return diff / iam_leaf_entry_size(p);
-+}
-+
-+static inline struct iam_leaf_entry* 
-+iam_leaf_entry_shift(struct iam_path *path, struct iam_leaf_entry *entry, 
-+                   int shift)
-+{
-+      void *e = entry;
-+      return e + shift * iam_leaf_entry_size(path);
-+}
-+
-+static inline struct iam_key *
-+iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
++static inline void iam_reccpy(struct iam_path *p, struct iam_rec *rec_dst,
++                            struct iam_rec *rec_src)
 +{
-+      void *e = entry;
-+      return e + path_descr(p)->id_rec_size;
++      memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size);
 +}
 +
-+static inline struct iam_leaf_entry *
-+iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
++/*
++ * Return pointer to current leaf record. Pointer is valid while corresponding
++ * leaf node is locked and pinned.
++ */
++struct iam_rec *iam_leaf_rec(struct iam_container *c, struct iam_leaf *leaf)
 +{
-+      return entry; 
++      return c->ic_descr->id_leaf.rec(c, leaf);
 +}
 +
-+struct iam_rec *
-+iam_leaf_rec(struct iam_container *c, struct iam_leaf_entry *entry)
++/*
++ * Return pointer to the current leaf key. This function may return either
++ * pointer to the key stored in node, or copy key into @key buffer supplied by
++ * caller and return pointer to this buffer. The latter approach is used when
++ * keys in nodes are not stored in plain form (e.g., htree doesn't store keys
++ * at all).
++ *
++ * Caller should assume that returned pointer is only valid while leaf node is
++ * pinned and locked.
++ */
++struct iam_key *iam_leaf_key(struct iam_container *c, struct iam_leaf *leaf,
++                           struct iam_key *key)
 +{
-+      return (struct iam_rec *)entry;
++      return c->ic_descr->id_leaf.key(c, leaf, key);
 +}
 +
-+struct iam_key *
-+iam_leaf_key(struct iam_container *c, struct iam_key *key)
-+{
-+      return key;
-+}
 +/*
 + * Initialize container @c, acquires additional reference on @inode.
 + */
@@ -1298,28 +189,22 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +}
 +EXPORT_SYMBOL(iam_container_fini);
 +
-+void __iam_path_init(struct iam_path *path, struct iam_container *c)
++void iam_path_init(struct iam_path *path, struct iam_container *c,
++                struct iam_path_descr *pd)
 +{
 +      memset(path, 0, sizeof *path);
 +      path->ip_container = c;
 +      path->ip_frame = path->ip_frames;
++      path->ip_data = pd;
 +}
 +
-+void iam_path_init(struct iam_path *path, struct iam_container *c)
-+{
-+      int i;
-+      __iam_path_init(path, c);
-+
-+      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); i++)
-+              path->ip_key_scratch[i] = kmalloc(path_descr(path)->id_key_size,
-+                                                GFP_KERNEL);
-+      path->ip_leaf = kmalloc(sizeof(struct iam_leaf), GFP_KERNEL);
-+}
++static void iam_leaf_fini(struct iam_path *path);
 +
-+void __iam_path_fini(struct iam_path *path)
++void iam_path_fini(struct iam_path *path)
 +{
 +      int i;
 +
++      iam_leaf_fini(path);
 +      for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
 +              if (path->ip_frames[i].bh != NULL) {
 +                      brelse(path->ip_frames[i].bh);
@@ -1328,61 +213,74 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +      }
 +}
 +
-+void iam_path_fini(struct iam_path *path)
-+{
-+      int i;
-+      __iam_path_fini(path);
-+
-+      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); i++)
-+              kfree(path->ip_key_scratch);
-+
-+      kfree(path->ip_leaf);
-+}
-+
 +extern struct iam_descr htree_compat_param;
++
 +void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode)
 +{
 +      int i;
 +
++      for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i)
++              path->ipc_descr.ipd_key_scratch[i] =
++                      (struct iam_key *)&path->ipc_scratch[i];
++
 +      iam_container_init(&path->ipc_container, &htree_compat_param, inode);
 +      /*
 +       * XXX hack allowing finalization of iam_path_compat with
 +       * iam_path_fini().
 +       */
 +      iput(inode);
-+      __iam_path_init(&path->ipc_path, &path->ipc_container);
-+      for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
-+              path->ipc_path.ip_key_scratch[i] =
-+                      (struct iam_key *)&path->ipc_scrach[i];
++      iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr);
 +}
 +
 +void iam_path_compat_fini(struct iam_path_compat *path)
 +{
-+      __iam_path_fini(&path->ipc_path);
++      iam_path_fini(&path->ipc_path);
 +      iam_container_fini(&path->ipc_container);
 +}
 +
-+static int iam_leaf_init(struct iam_path *path)
++static int iam_leaf_load(struct iam_path *path)
 +{
-+      int block, err;
-+      struct buffer_head *bh;
-+      struct iam_leaf *leaf = path->ip_leaf;
++      int block;
++      int err;
++      struct iam_container *c;
++      struct buffer_head   *bh;
++      struct iam_leaf      *leaf;
++      struct iam_descr     *descr;
 +      
++      c     = path->ip_container;
++      leaf  = &path->ip_leaf;
++      descr = iam_path_descr(path);
 +      block = dx_get_block(path, path->ip_frame->at);
-+      err = path_descr(path)->id_node_read(path->ip_container, block,
-+                                           NULL, &bh);
-+      if (err)
-+              return err;
-+
-+      leaf->bh = bh;
-+      leaf->entries = (struct iam_leaf_entry *)bh->b_data;
-+      return 0;
++      err   = descr->id_node_read(c, block, NULL, &bh);
++      if (err == 0) {
++              leaf->il_bh = bh;
++              err = descr->id_leaf.init(c, leaf);
++      }
++      return err;
 +}
 +
 +static void iam_leaf_fini(struct iam_path *path)
 +{
-+      if (path && path->ip_leaf && path->ip_leaf->bh)
-+              brelse(path->ip_leaf->bh);
++      iam_path_descr(path)->id_leaf.fini(path->ip_container, &path->ip_leaf);
++      if (path && path->ip_leaf.il_bh) {
++              brelse(path->ip_leaf.il_bh);
++              path->ip_leaf.il_bh = NULL;
++      }
++}
++
++static void iam_leaf_start(struct iam_container *c, struct iam_leaf *folio)
++{
++      c->ic_descr->id_leaf.start(c, folio);
++}
++
++static void iam_leaf_next(struct iam_container *c, struct iam_leaf *folio)
++{
++      c->ic_descr->id_leaf.next(c, folio);
++}
++
++static int iam_leaf_at_end(struct iam_container *c, struct iam_leaf *folio)
++{
++      c->ic_descr->id_leaf.at_end(c, folio);
 +}
 +
 +/*
@@ -1390,8 +288,18 @@ Index: linux-2.6.9/fs/ext3/iam.c
 + */
 +static struct iam_key *it_scratch_key(struct iam_iterator *it, int n)
 +{
-+        assert(0 <= n && n < ARRAY_SIZE(it->ii_path.ip_key_scratch));
-+        return it->ii_path.ip_key_scratch[n];
++        return iam_path_key(&it->ii_path, n);
++}
++
++static struct iam_container *iam_it_container(struct iam_iterator *it)
++{
++      return it->ii_path.ip_container;
++}
++
++static inline int it_keycmp(struct iam_iterator *it,
++                          struct iam_key *k1, struct iam_key *k2)
++{
++      return iam_keycmp(iam_it_container(it), k1, k2);
 +}
 +
 +/*
@@ -1456,9 +364,9 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +static void iam_it_unlock(struct iam_iterator *it)
 +{
 +      if (it->ii_flags&IAM_IT_WRITE)
-+                iam_container_write_unlock(iam_it_container(it));
-+        else
-+                iam_container_read_unlock(iam_it_container(it));
++              iam_container_write_unlock(iam_it_container(it));
++      else
++              iam_container_read_unlock(iam_it_container(it));
 +}
 +
 +/*
@@ -1466,12 +374,13 @@ Index: linux-2.6.9/fs/ext3/iam.c
 + *
 + * postcondition: it_state(it) == IAM_IT_DETACHED
 + */
-+int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags)
++int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
++               struct iam_path_descr *pd)
 +{
-+        memset(it, 0, sizeof *it);
-+        it->ii_flags  = flags;
-+        it->ii_state  = IAM_IT_DETACHED;
-+        iam_path_init(&it->ii_path, c);
++      memset(it, 0, sizeof *it);
++      it->ii_flags  = flags;
++      it->ii_state  = IAM_IT_DETACHED;
++      iam_path_init(&it->ii_path, c, pd);
 +      return 0;
 +}
 +
@@ -1482,41 +391,28 @@ Index: linux-2.6.9/fs/ext3/iam.c
 + */
 +void iam_it_fini(struct iam_iterator *it)
 +{
-+        assert(it_state(it) == IAM_IT_DETACHED);
-+        iam_path_fini(&it->ii_path);
++      assert(it_state(it) == IAM_IT_DETACHED);
++      iam_path_fini(&it->ii_path);
 +}
 +
 +int iam_path_lookup(struct iam_path *path)
 +{
-+      struct iam_leaf_entry *entries, *e;
-+      struct iam_container *c;        
-+      int err, count, i;
++      struct iam_container *c;
++      struct iam_descr *descr;
++      struct iam_leaf  *leaf;
++      int result;
 +      
-+      err = dx_lookup(path);
-+      if (err)
-+              goto errout;
-+
-+      err = iam_leaf_init(path);
-+      if (err)
-+              goto errout;
-+
-+      c = path->ip_container;
-+      entries = iam_leaf_entries(path);
-+
-+      count = dx_get_count((struct iam_entry*)entries);
-+      e = iam_leaf_entry_shift(path, iam_leaf_entries(path), 1);
-+      for (i = 0; i < count - 1; ++i,
-+                  e = iam_leaf_entry_shift(path, entries, 1)) {
-+              dx_get_key(path, (struct iam_entry*)e, path->ip_key_scratch[0]);
-+              if(keycmp(c, path->ip_key_scratch[0], path->ip_key_target)
-+                        >= 0) {
-+                      path->ip_leaf->at = e;
-+                      return 0;
-+              }
-+      }
-+errout:
-+      iam_leaf_fini(path);
-+      return err;
++      c = path->ip_container;
++      leaf = &path->ip_leaf;
++      descr = iam_path_descr(path);
++      result = dx_lookup(path);
++      if (result == 0) {
++              result = iam_leaf_load(path);
++              if (result == 0)
++                      result = descr->id_leaf.lookup(c, path, leaf,
++                                                     path->ip_key_target);
++      }
++      return result;
 +}
 +
 +/*
@@ -1543,23 +439,20 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +                it->ii_state = IAM_IT_ATTACHED;
 +        else
 +                iam_it_unlock(it);
-+#if 0        
 +      assert(ergo(result == 0,
 +                    it_keycmp(it,
 +                              iam_it_key_get(it, it_scratch_key(it, 0)),
 +                            k) < 0));
-+#endif
 +        return result;
 +}
 +
-+#if 0
 +/*
 + * Duplicates iterator.
 + *
 + * postcondition: it_state(dst) == it_state(src) &&
-+ *                iam_it_container(it) == iam_it_container(it) &&
++ *                iam_it_container(dst) == iam_it_container(src) &&
 + *                dst->ii_flags = src->ii_flags &&
-+ *                ergo(it_state(it) == IAM_IT_ATTACHED,
++ *                ergo(it_state(src) == IAM_IT_ATTACHED,
 + *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
 + *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
 + */
@@ -1567,12 +460,19 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +{
 +        dst->ii_flags     = src->ii_flags;
 +        dst->ii_state     = src->ii_state;
-+        iam_path_dup(&dst->ii_path, &src->ii_path);
++        /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */
 +        /*
 +         * XXX: duplicate lock.
 +         */
++      assert(it_state(dst) == it_state(src));
++      assert(iam_it_container(dst) == iam_it_container(src));
++      assert(dst->ii_flags = src->ii_flags);
++      assert(ergo(it_state(src) == IAM_IT_ATTACHED,
++                  iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++                  iam_it_key_get(dst, it_scratch_key(dst, 0)) ==
++                  iam_it_key_get(src, it_scratch_key(src, 0))));
++
 +}
-+#endif
 +/*
 + * Detach iterator. Does nothing it detached state.
 + *
@@ -1587,7 +487,6 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +        }
 +}
 +
-+#if 0
 +/*
 + * Move iterator one record right.
 + *
@@ -1606,13 +505,13 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE);
 +
 +        c = iam_it_container(it);
-+        if (iam_leaf_at_end(c, it->ii_path.ip_leaf)) {
++        if (iam_leaf_at_end(c, &it->ii_path.ip_leaf)) {
 +                /* advance index portion of the path */
-+                result = iam_index_next(&it->ii_path);
++                result = 0; /* XXX not yet iam_index_next(&it->ii_path); */
 +                if (result == 0) {
-+                        result = iam_read_leaf(&it->ii_path);
++                        result = 0; /* XXX not yet iam_read_leaf(&it->ii_path); */
 +                        if (result == 0)
-+                                iam_leaf_start(c, it->ii_path.ip_leaf);
++                                iam_leaf_start(c, &it->ii_path.ip_leaf);
 +                } else if (result > 0)
 +                        /* end of container reached */
 +                        result = +1;
@@ -1620,13 +519,13 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +                        iam_it_put(it);
 +        } else {
 +                /* advance within leaf node */
-+                iam_leaf_next(c, it->ii_path.ip_leaf);
++                iam_leaf_next(c, &it->ii_path.ip_leaf);
 +                result = 0;
 +        }
 +        assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
 +        return result;
 +}
-+#endif
++
 +/*
 + * Return pointer to the record under iterator.
 + *
@@ -1636,12 +535,12 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +struct iam_rec *iam_it_rec_get(struct iam_iterator *it)
 +{
 +        assert(it_state(it) == IAM_IT_ATTACHED);
-+        return iam_leaf_rec(iam_it_container(it), it->ii_path.ip_leaf->at);
++        return iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf);
 +}
 +
 +static void iam_it_reccpy(struct iam_iterator *it, struct iam_rec *r)
 +{
-+        memcpy(iam_leaf_rec(iam_it_container(it), it->ii_path.ip_leaf->at), r,
++        memcpy(iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf), r,
 +               iam_it_container(it)->ic_descr->id_rec_size);
 +}
 +
@@ -1658,7 +557,7 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +
 +        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
 +
-+        result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf->bh);
++        result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
 +        if (result == 0)
 +                iam_it_reccpy(it, r);
 +        return result;
@@ -1673,479 +572,1781 @@ Index: linux-2.6.9/fs/ext3/iam.c
 +struct iam_key *iam_it_key_get(struct iam_iterator *it, struct iam_key *k)
 +{
 +        assert(it_state(it) == IAM_IT_ATTACHED);
-+        return iam_leaf_key(iam_it_container(it), k);
++        return iam_leaf_key(iam_it_container(it), &it->ii_path.ip_leaf, k);
 +}
 +
 +static int iam_leaf_rec_add(handle_t *handle, struct iam_path *path)
 +{
-+      struct iam_leaf_entry *p, *q;
++#if 0
++      struct iam_lentry *p, *q;
++      int count, err;
++
++      count = dx_get_count((struct iam_entry *)path->ip_leaf.il_entries);
++      p = iam_lentry_shift(path, path->ip_leaf.il_entries, count - 1);
++      while (p > path->ip_leaf.il_at) {
++              q = iam_lentry_shift(path, p, 1);
++              iam_reccpy(path, iam_leaf_rec(path->ip_container, p),
++                         iam_leaf_rec(path->ip_container, q));
++              iam_keycpy(path->ip_container, iam_leaf_key_at(path, p),
++                         iam_leaf_key_at(path, q));
++              p = iam_lentry_shift(path, p, -1);
++      }
++      dx_set_count((struct iam_entry*)path->ip_leaf.il_entries, count + 1);
++      err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
++      if (err)
++              ext3_std_error(iam_path_obj(path)->i_sb, err);
++      return err;
++#endif
++}
++
++static int iam_leaf_is_full(struct iam_path *path)
++{
++      int count, limit;
++
++      count = dx_get_count((struct iam_entry *)path->ip_leaf.il_entries);
++      limit = dx_get_limit((struct iam_entry *)path->ip_leaf.il_entries);
++
++      return (count >= limit);
++}
++
++static int split_leaf_node(handle_t *handle, struct iam_path *path)
++{
++#if 0
++      struct inode *dir = iam_path_obj(path);
++      unsigned continued = 0;
++      struct buffer_head *bh2;
++      u32 newblock, hash_split;
++      char *data2;
++      unsigned split;
++      int     err;
++
++      bh2 = ext3_append (handle, dir, &newblock, &err);
++      if (!(bh2))
++              return -ENOSPC;
++      
++      err = iam_leaf_load(path);
++      if (err)
++              goto errout;
++
++      BUFFER_TRACE(path->ip_leaf.il_bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, path->ip_leaf.il_bh);
++      if (err) {
++      journal_error:
++              iam_leaf_fini(path);
++              brelse(bh2);
++              ext3_std_error(dir->i_sb, err);
++              err = -EIO;
++              goto errout;
++      }
++      data2 = bh2->b_data;
++      split = dx_get_count((struct iam_entry*)iam_leaf_entries(path))/2;
++      hash_split = *(__u32*)iam_leaf_key_at(path,
++                     iam_lentry_shift(path, iam_leaf_entries(path),
++                     split));
++      if (iam_keycmp(path->ip_container, iam_leaf_key_at(path,
++                 iam_lentry_shift(path, iam_leaf_entries(path), split)),
++                 iam_leaf_key_at(path,
++                 iam_lentry_shift(path, iam_leaf_entries(path), split -1))) == 0)
++              continued = 1;
++
++      memcpy(iam_lentry_shift(path, (struct iam_lentry *)data2, 1),
++             iam_lentry_shift(path, iam_leaf_entries(path), split),
++             split * iam_lentry_size(path));
++
++      /* Which block gets the new entry? */
++      dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
++      err = ext3_journal_dirty_metadata (handle, bh2);
++      if (err)
++              goto journal_error;
++      err = ext3_journal_dirty_metadata (handle, path->ip_leaf.il_bh);
++      if (err)
++              goto journal_error;
++errout:
++      brelse (bh2);
++      return err;
++#endif
++}
++
++int iam_add_rec(handle_t *handle, struct iam_path *path)
++{
++      int err;
++
++      if (!iam_leaf_is_full(path)) {
++              err = iam_leaf_rec_add(handle, path);
++      } else {
++              err = split_index_node(handle, path);
++              if (err == 0) {
++                      err = split_leaf_node(handle, path);
++                      if (err == 0)
++                              err = iam_leaf_rec_add(handle, path);
++              }
++      }
++      return err;
++}
++
++/*
++ * Insert new record with key @k and contents from @r, shifting records to the
++ * right.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
++ *                it->ii_flags&IAM_IT_WRITE &&
++ *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ *                ergo(result == 0,
++ *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
++ *                     !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
++                      struct iam_key *k, struct iam_rec *r)
++{
++        int result;
++
++        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++        assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0);
++
++      result = iam_add_rec(h, &it->ii_path);
++      if (result == 0) {
++              /* place record and key info freed space. Leaf node is already
++               * in transaction. */
++              iam_it_reccpy(it, r);
++              /*
++               * XXX TBD.
++               */
++        }
++        assert(it_state(it) == IAM_IT_ATTACHED);
++        assert(ergo(result == 0,
++                    it_keycmp(it,
++                              iam_it_key_get(it,
++                                             it_scratch_key(it, 0)), k) == 0 &&
++                    !memcmp(iam_it_rec_get(it), r,
++                            iam_it_container(it)->ic_descr->id_rec_size)));
++        return result;
++}
++
++static int iam_leaf_rec_remove(handle_t *handle, struct iam_container *c,
++                             struct iam_path *path)
++{
++#if 0
++      struct iam_lentry *p, *q, *end;
 +      int count, err;
 +
-+      count = dx_get_count((struct iam_entry *)path->ip_leaf->entries);
-+      p = iam_leaf_entry_shift(path, path->ip_leaf->entries, count - 1);
-+      while (p > path->ip_leaf->at) {
-+              q = iam_leaf_entry_shift(path, p, 1);
-+              reccpy(path, iam_leaf_rec(path->ip_container, p),
-+                     iam_leaf_rec(path->ip_container, q));
-+              keycpy(path->ip_container, iam_leaf_key_at(path, p),
-+                     iam_leaf_key_at(path, q));
-+              p = iam_leaf_entry_shift(path, p, -1);
++      count = dx_get_count((struct iam_entry *)path->ip_leaf.il_entries);
++      end = iam_lentry_shift(path, path->ip_leaf.il_entries, count - 1);
++      p = iam_lentry_at(path, path->ip_leaf.il_at);
++      while (p <= end) {
++              q = iam_lentry_shift(path, p, 1);
++              iam_reccpy(path, iam_leaf_rec(path->ip_container, p),
++                         iam_leaf_rec(path->ip_container, q));
++              iam_keycpy(c, iam_leaf_key_at(path, p),
++                         iam_leaf_key_at(path, q));
++              p = iam_lentry_shift(path, p, 1);
 +      }
-+      dx_set_count((struct iam_entry*)path->ip_leaf->entries, count + 1);
-+      err = ext3_journal_dirty_metadata(handle, path->ip_leaf->bh);
++      dx_set_count((struct iam_entry*)path->ip_leaf.il_entries, count - 1);
++      err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
 +      if (err)
-+              ext3_std_error(path_obj(path)->i_sb, err);
-+      return err;     
++              ext3_std_error(iam_path_obj(path)->i_sb, err);
++      return err;
++#endif
++}
++
++/*
++ * Delete record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
++{
++        int result;
++
++        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++
++        result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
++        /*
++         * no compaction for now.
++         */
++        if (result == 0)
++                iam_leaf_rec_remove(h, iam_it_container(it), &it->ii_path);
++
++      return result;
++}
++
++/*
++ * Convert iterator to cookie.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
++ *                iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++iam_pos_t iam_it_store(struct iam_iterator *it)
++{
++        iam_pos_t result;
++
++        assert(it_state(it) == IAM_IT_ATTACHED);
++        assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result);
++
++        result = 0;
++        iam_it_key_get(it, (struct iam_key *)&result);
++        return result;
++}
++
++/*
++ * Restore iterator from cookie.
++ *
++ * precondition:  it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
++ *                iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
++ *                                  iam_it_store(it) == pos)
++ */
++int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
++{
++        assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE);
++        assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos);
++        return iam_it_get(it, (struct iam_key *)&pos);
++}
++
++/***********************************************************************/
++/* invariants                                                          */
++/***********************************************************************/
++
++static inline int ptr_inside(void *base, size_t size, void *ptr)
++{
++        return (base <= ptr) && (ptr < base + size);
++}
++
++int iam_frame_invariant(struct iam_frame *f)
++{
++        return
++                (f->bh != NULL &&
++                f->bh->b_data != NULL &&
++                ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
++                ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
++                f->entries <= f->at);
++}
++int iam_leaf_invariant(struct iam_leaf *l)
++{
++        return
++                l->il_bh != NULL &&
++                l->il_bh->b_data != NULL &&
++                ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) &&
++                ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) &&
++                l->il_entries <= l->il_at;
++}
++
++int iam_path_invariant(struct iam_path *p)
++{
++        int i;
++
++        if (p->ip_container == NULL ||
++            p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
++            p->ip_frame != p->ip_frames + p->ip_indirect ||
++            !iam_leaf_invariant(&p->ip_leaf))
++                return 0;
++        for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
++                if (i <= p->ip_indirect) {
++                        if (!iam_frame_invariant(&p->ip_frames[i]))
++                                return 0;
++                }
++        }
++        return 1;
++}
++
++int iam_it_invariant(struct iam_iterator *it)
++{
++        return
++                (it->ii_state == IAM_IT_DETACHED ||
++                 it->ii_state == IAM_IT_ATTACHED) &&
++                !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
++                ergo(it->ii_state == IAM_IT_ATTACHED,
++                     iam_path_invariant(&it->ii_path));
++}
++
++/*
++ * Search container @c for record with key @k. If record is found, its data
++ * are moved into @r.
++ *
++ *
++ *
++ * Return values: +ve: found, 0: not-found, -ve: error
++ */
++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r,
++             struct iam_path_descr *pd)
++{
++        struct iam_iterator it;
++        int result;
++
++        iam_it_init(&it, c, 0, pd);
++
++        result = iam_it_get_exact(&it, k);
++        if (result == 0)
++                /*
++                 * record with required key found, copy it into user buffer
++                 */
++                iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it));
++        iam_it_put(&it);
++        iam_it_fini(&it);
++        return result;
 +}
++EXPORT_SYMBOL(iam_lookup);
 +
-+static int iam_leaf_full(struct iam_path *path)
++/*
++ * Insert new record @r with key @k into container @c (within context of
++ * transaction @h.
++ *
++ * Return values: 0: success, -ve: error, including -EEXIST when record with
++ * given key is already present.
++ *
++ * postcondition: ergo(result == 0 || result == -EEXIST,
++ *                                  iam_lookup(c, k, r2) > 0 &&
++ *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
++ */
++int iam_insert(handle_t *h, struct iam_container *c,
++               struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
 +{
-+      int count, limit;
++        struct iam_iterator it;
++        int result;
 +
-+      count = dx_get_count((struct iam_entry *)path->ip_leaf->entries);
-+      limit = dx_get_limit((struct iam_entry *)path->ip_leaf->entries);
++        iam_it_init(&it, c, IAM_IT_WRITE, pd);
 +
-+      return (count >= limit);
++        result = iam_it_get_exact(&it, k);
++        if (result == -ENOENT)
++                result = iam_it_rec_insert(h, &it, k, r);
++        else if (result == 0)
++                result = -EEXIST;
++        iam_it_put(&it);
++        iam_it_fini(&it);
++        return result;
 +}
++EXPORT_SYMBOL(iam_insert);
 +
-+static int split_leaf_node(handle_t *handle, struct iam_path *path)
++int iam_update(handle_t *h, struct iam_container *c,
++               struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
 +{
-+      struct inode *dir = path_obj(path);
-+      unsigned continued = 0;
-+      struct buffer_head *bh2;
-+      u32 newblock, hash_split;
-+      char *data2;
-+      unsigned split;
-+      int     err;
-+
-+      bh2 = ext3_append (handle, dir, &newblock, &err);
-+      if (!(bh2)) 
-+              return -ENOSPC;
-+      
-+      err = iam_leaf_init(path);
-+      if (err)
-+              goto errout;
++        struct iam_iterator it;
++        int result;
 +
-+      BUFFER_TRACE(path->ip_leaf->bh, "get_write_access");
-+      err = ext3_journal_get_write_access(handle, path->ip_leaf->bh);
-+      if (err) {
-+      journal_error:
-+              iam_leaf_fini(path);
-+              brelse(bh2);
-+              ext3_std_error(dir->i_sb, err);
-+              err = -EIO;
-+              goto errout;
-+      }
-+      data2 = bh2->b_data;
-+      split = dx_get_count((struct iam_entry*)iam_leaf_entries(path))/2;
-+      hash_split = *(__u32*)iam_leaf_key_at(path, 
-+                     iam_leaf_entry_shift(path, iam_leaf_entries(path), 
-+                     split));
-+      if (keycmp(path->ip_container, iam_leaf_key_at(path, 
-+                 iam_leaf_entry_shift(path, iam_leaf_entries(path), split)),
-+                 iam_leaf_key_at(path, 
-+                 iam_leaf_entry_shift(path, iam_leaf_entries(path), split -1))) == 0)
-+              continued = 1;
++        iam_it_init(&it, c, IAM_IT_WRITE, pd);
 +
-+      memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
-+             iam_leaf_entry_shift(path, iam_leaf_entries(path), split),
-+             split * iam_leaf_entry_size(path));
-+ 
-+      /* Which block gets the new entry? */
-+      dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
-+      err = ext3_journal_dirty_metadata (handle, bh2);
-+      if (err)
-+              goto journal_error;
-+      err = ext3_journal_dirty_metadata (handle, path->ip_leaf->bh);
-+      if (err)
-+              goto journal_error;
-+errout:
-+      brelse (bh2);
-+      return err;
++        result = iam_it_get_exact(&it, k);
++        if (result == 0)
++                iam_it_rec_set(h, &it, r);
++        iam_it_put(&it);
++        iam_it_fini(&it);
++        return result;
 +}
++EXPORT_SYMBOL(iam_update);
 +
-+int iam_add_rec(handle_t *handle, struct iam_path *path)
-+{
-+      int err;
-+
-+      if (!iam_leaf_full(path)) {
-+              err = iam_leaf_rec_add(handle, path);
-+              return 0;
-+      }               
-+
-+      err = split_index_node(handle, path);
-+      if (err)
-+              goto errout;    
-+
-+      err = split_leaf_node(handle, path);
-+      if (err)
-+              goto errout;
-+
-+      err = iam_leaf_rec_add(handle, path);
-+errout:
-+      return err;
-+}
 +/*
-+ * Insert new record with key @k and contents from @r, shifting records to the
-+ * right.
++ * Delete existing record with key @k.
 + *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
-+ *                it->ii_flags&IAM_IT_WRITE &&
-+ *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-+ *                ergo(result == 0,
-+ *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
-+ *                     !memcmp(iam_it_rec_get(it), r, ...))
++ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
++ *
++ * postcondition: ergo(result == 0 || result == -ENOENT,
++ *                                 !iam_lookup(c, k, *));
 + */
-+int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
-+                      struct iam_key *k, struct iam_rec *r)
++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k,
++             struct iam_path_descr *pd)
 +{
++        struct iam_iterator it;
 +        int result;
 +
-+        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
-+        assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0);
++        iam_it_init(&it, c, IAM_IT_WRITE, pd);
 +
-+        result = iam_add_rec(h, &it->ii_path);
-+        if (result == 0) {
-+                /* place record and key info freed space. Leaf node is already
-+                 * in transaction. */
-+                iam_it_reccpy(it, r);
-+                keycpy(iam_it_container(it),
-+                       iam_leaf_key_at(&it->ii_path, it->ii_path.ip_leaf->at),
-+                     k);
-+        }
-+        assert(it_state(it) == IAM_IT_ATTACHED);
-+#if 0
-+        assert(ergo(result == 0,
-+                    it_keycmp(it,
-+                              iam_it_key_get(it,
-+                                             it_scratch_key(it, 0)), k) == 0 &&
-+                    !memcmp(iam_it_rec_get(it), r,
-+                            iam_it_container(it)->ic_descr->id_rec_size)));
-+#endif
++        result = iam_it_get_exact(&it, k);
++        if (result == 0)
++                iam_it_rec_delete(h, &it);
++        iam_it_put(&it);
++        iam_it_fini(&it);
 +        return result;
 +}
++EXPORT_SYMBOL(iam_delete);
 +
-+static int iam_leaf_rec_remove(handle_t *handle, struct iam_container *c, 
-+                             struct iam_path *path)
-+{
-+      struct iam_leaf_entry *p, *q, *end;
-+      int count, err;
-+
-+      count = dx_get_count((struct iam_entry *)path->ip_leaf->entries);
-+      end = iam_leaf_entry_shift(path, path->ip_leaf->entries, count - 1);
-+      p = iam_leaf_entry_at(path, path->ip_leaf->at);
-+      while (p <= end) {
-+              q = iam_leaf_entry_shift(path, p, 1);
-+              reccpy(path, iam_leaf_rec(path->ip_container, p),
-+                     iam_leaf_rec(path->ip_container, q));
-+              keycpy(c, iam_leaf_key_at(path, p),
-+                     iam_leaf_key_at(path, q));
-+              p = iam_leaf_entry_shift(path, p, 1);
-+      }
-+      dx_set_count((struct iam_entry*)path->ip_leaf->entries, count - 1);
-+      err = ext3_journal_dirty_metadata(handle, path->ip_leaf->bh);
-+      if (err)
-+              ext3_std_error(path_obj(path)->i_sb, err);
-+      return err;
-+}
+Index: iam/fs/ext3/namei.c
+===================================================================
+--- iam.orig/fs/ext3/namei.c   2006-05-10 18:21:01.000000000 +0400
++++ iam/fs/ext3/namei.c        2006-05-10 20:56:22.000000000 +0400
+@@ -24,81 +24,6 @@
+  *    Theodore Ts'o, 2002
+  */
+-/*
+- * iam: big theory statement.
+- *
+- * iam (Index Access Module) is a module providing abstraction of persistent
+- * transactional container on top of generalized ext3 htree.
+- *
+- * iam supports:
+- *
+- *     - key, pointer, and record size specifiable per container.
+- *
+- *     - trees taller than 2 index levels.
+- *
+- *     - read/write to existing ext3 htree directories as iam containers.
+- *
+- * iam container is a tree, consisting of leaf nodes containing keys and
+- * records stored in this container, and index nodes, containing keys and
+- * pointers to leaf or index nodes.
+- *
+- * iam does not work with keys directly, instead it calls user-supplied key
+- * comparison function (->dpo_keycmp()).
+- *
+- * Pointers are (currently) interpreted as logical offsets (measured in
+- * blocksful) within underlying flat file on top of which iam tree lives.
+- *
+- * On-disk format:
+- *
+- * iam mostly tries to reuse existing htree formats.
+- *
+- * Format of index node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * |     | count |       |       |      |       |            |
+- * | gap |   /   | entry | entry | .... | entry | free space |
+- * |     | limit |       |       |      |       |            |
+- * +-----+-------+-------+-------+------+-------+------------+
+- *
+- *       gap           this part of node is never accessed by iam code. It
+- *                     exists for binary compatibility with ext3 htree (that,
+- *                     in turn, stores fake struct ext2_dirent for ext2
+- *                     compatibility), and to keep some unspecified per-node
+- *                     data. Gap can be different for root and non-root index
+- *                     nodes. Gap size can be specified for each container
+- *                     (gap of 0 is allowed).
+- *
+- *       count/limit   current number of entries in this node, and the maximal
+- *                     number of entries that can fit into node. count/limit
+- *                     has the same size as entry, and is itself counted in
+- *                     count.
+- *
+- *       entry         index entry: consists of a key immediately followed by
+- *                     a pointer to a child node. Size of a key and size of a
+- *                     pointer depends on container. Entry has neither
+- *                     alignment nor padding.
+- *
+- *       free space    portion of node new entries are added to
+- *
+- * Entries in index node are sorted by their key value.
+- *
+- * Format of leaf node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * |     | count |       |       |      |       |            |
+- * | gap |   /   | leaf  | leaf  | .... | leaf  | free space |
+- * |     | limit |       |       |      |       |            |
+- * +-----+-------+-------+-------+------+-------+------------+
+-
+- *       leaf          For leaf entry: consists of a rec immediately followd by 
+- *                     a key. size of a key and size of a rec depends on container.  
+- *
+- *
+- *
+- *
+- *
+- */
+-
+ #include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
+@@ -112,10 +37,10 @@
+ #include <linux/quotaops.h>
+ #include <linux/buffer_head.h>
+ #include <linux/smp_lock.h>
++#include <linux/lustre_iam.h>
+ #include "xattr.h"
+ #include "iopen.h"
+ #include "acl.h"
+-#include <linux/lustre_iam.h>
+ /*
+  * define how far ahead to read directories while searching them.
+  */
+@@ -125,9 +50,9 @@
+ #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
+-static struct buffer_head *ext3_append(handle_t *handle,
+-                                      struct inode *inode,
+-                                      u32 *block, int *err)
++struct buffer_head *ext3_append(handle_t *handle,
++                              struct inode *inode,
++                              u32 *block, int *err)
+ {
+       struct buffer_head *bh;
+@@ -141,9 +66,6 @@ static struct buffer_head *ext3_append(h
+       return bh;
+ }
+-#ifndef assert
+-#define assert(test) J_ASSERT(test)
+-#endif
+ #ifndef swap
+ #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
+@@ -162,10 +84,6 @@ struct fake_dirent {
+       u8 file_type;
+ };
+-struct dx_countlimit {
+-      __le16 limit;
+-      __le16 count;
+-};
+ /*
+  * dx_root_info is laid out so that if it should somehow get overlaid by a
+@@ -203,242 +121,10 @@ struct dx_map_entry
+ };
+-static u32 htree_root_ptr(struct iam_container *c);
+-static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
+-static int htree_node_init(struct iam_container *c,
+-                         struct buffer_head *bh, int root);
+-static int htree_keycmp(struct iam_container *c,
+-                      struct iam_key *k1, struct iam_key *k2);
+-static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
+-                         handle_t *h, struct buffer_head **bh);
+-
+-/*
+- * Parameters describing iam compatibility mode in which existing ext3 htrees
+- * can be manipulated.
+- */
+-static struct iam_descr htree_compat_param = {
+-      .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
+-      .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
+-      .id_node_gap = offsetof(struct dx_node, entries),
+-      .id_root_gap = offsetof(struct dx_root, entries),
+-
+-      .id_root_ptr   = htree_root_ptr,
+-      .id_node_check = htree_node_check,
+-      .id_node_init  = htree_node_init,
+-      .id_node_read  = htree_node_read,
+-      .id_keycmp     = htree_keycmp
+-};
+-
+-
+-struct iam_key;
+-struct iam_rec;
+-struct iam_descr;
+-struct iam_container;
+-struct iam_path;
+-
+-
+-
+-/*
+- * iam cursor (iterator) api.
+- */
+-
+-/*
+- * Flags controlling iterator functionality.
+- */
+-enum iam_it_flags {
+-      /*
+-       * this iterator will move (iam_it_{prev,next}() will be called on it)
+-       */
+-      IAM_IT_MOVE  = (1 << 0),
+-      /*
+-       * tree can be updated through this iterator.
+-       */
+-      IAM_IT_WRITE = (1 << 1)
+-};
+-
+-/*
+- * States of iterator state machine.
+- */
+-enum iam_it_state {
+-      /* initial state */
+-      IAM_IT_DETACHED,
+-      /* iterator is above particular record in the container */
+-      IAM_IT_ATTACHED
+-};
+-
+-struct htree_cookie {
+-      struct dx_hash_info *hinfo;
+-      struct dentry       *dentry;
+-};
+-
+-/*
+- * Iterator.
+- *
+- * Immediately after call to iam_it_init() iterator is in "detached"
+- * (IAM_IT_DETACHED) state: it is associated with given parent container, but
+- * doesn't point to any particular record in this container.
+- *
+- * After successful call to iam_it_get() and until corresponding call to
+- * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
+- *
+- * Attached iterator can move through records in a container (provided
+- * IAM_IT_MOVE permission) in a key order, can get record and key values as it
+- * passes over them, and can modify container (provided IAM_IT_WRITE
+- * permission).
+- *
+- * Concurrency: iterators are supposed to be local to thread. Interfaces below
+- * do no internal serialization.
+- *
+- */
+-struct iam_iterator {
+-      /*
+-       * iterator flags, taken from enum iam_it_flags.
+-       */
+-      __u32                 ii_flags;
+-      enum iam_it_state     ii_state;
+-      /*
+-       * path to the record. Valid in IAM_IT_ATTACHED state.
+-       */
+-      struct iam_path       ii_path;
+-};
+-
+-static inline struct iam_key *keycpy(struct iam_container *c,
+-                                   struct iam_key *k1, struct iam_key *k2)
+-{
+-      return memcpy(k1, k2, c->ic_descr->id_key_size);
+-}
+-
+-static inline int keycmp(struct iam_container *c,
+-                       struct iam_key *k1, struct iam_key *k2)
+-{
+-      return c->ic_descr->id_keycmp(c, k1, k2);
+-}
+-
+-static struct iam_container *iam_it_container(struct iam_iterator *it)
+-{
+-      return it->ii_path.ip_container;
+-}
+-
+-static inline int it_keycmp(struct iam_iterator *it,
+-                          struct iam_key *k1, struct iam_key *k2)
+-{
+-      return keycmp(iam_it_container(it), k1, k2);
+-}
+-
+-/*
+- * Initialize iterator to IAM_IT_DETACHED state.
+- *
+- * postcondition: it_state(it) == IAM_IT_DETACHED
+- */
+-int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
+-/*
+- * Finalize iterator and release all resources.
+- *
+- * precondition: it_state(it) == IAM_IT_DETACHED
+- */
+-void iam_it_fini(struct iam_iterator *it);
+-
+-/*
+- * Attach iterator. After successful completion, @it points to record with the
+- * largest key not larger than @k. Semantics of ->id_create() method guarantee
+- * that such record will always be found.
+- *
+- * Return value: 0: positioned on existing record,
+- *             -ve: error.
+- *
+- * precondition:  it_state(it) == IAM_IT_DETACHED
+- * postcondition: ergo(result == 0,
+- *                     (it_state(it) == IAM_IT_ATTACHED &&
+- *                      it_keycmp(it, iam_it_key_get(it, *), k) < 0))
+- */
+-int iam_it_get(struct iam_iterator *it, struct iam_key *k);
+-
+-/*
+- * Duplicates iterator.
+- *
+- * postcondition: it_state(dst) == it_state(src) &&
+- *                iam_it_container(dst) == iam_it_container(src) &&
+- *                dst->ii_flags = src->ii_flags &&
+- *                ergo(it_state(it) == IAM_IT_ATTACHED,
+- *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
+- *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
+- */
+-void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
+-
+-/*
+- * Detach iterator. Does nothing it detached state.
+- *
+- * postcondition: it_state(it) == IAM_IT_DETACHED
+- */
+-void iam_it_put(struct iam_iterator *it);
+-
+-/*
+- * Move iterator one record right.
+- *
+- * Return value: 0: success,
+- *              +1: end of container reached
+- *             -ve: error
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
+- * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
+- */
+-int iam_it_next(struct iam_iterator *it);
+-
+-/*
+- * Return pointer to the record under iterator.
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
+-
+-/*
+- * Replace contents of record under iterator.
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+- *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
+- */
+-int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
+-
+-/*
+- * Place key under iterator in @k, return @k
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-const struct iam_key *iam_it_key_get(struct iam_iterator *it,
+-                                   struct iam_key *k);
+-
+-/*
+- * Insert new record with key @k and contents from @r, shifting records to the
+- * right.
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED &&
+- *                it->ii_flags&IAM_IT_WRITE &&
+- *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
+- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+- *                ergo(result == 0,
+- *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
+- *                     !memcmp(iam_it_rec_get(it), r, ...))
+- */
+-int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
+-                    struct iam_key *k, struct iam_rec *r);
+-/*
+- * Delete record under iterator.
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
+-
+ #ifdef CONFIG_EXT3_INDEX
+ static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry);
+ static void dx_set_block(struct iam_path *p,
+                        struct iam_entry *entry, unsigned value);
+-static inline struct iam_key *dx_get_key(struct iam_path *p,
+-                                      struct iam_entry *entry,
+-                                      struct iam_key *key);
+ static void dx_set_key(struct iam_path *p, struct iam_entry *entry,
+                      struct iam_key *key);
+ static unsigned dx_get_count(struct iam_entry *entries);
+@@ -457,80 +143,29 @@ static void dx_sort_map(struct dx_map_en
+ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+               struct dx_map_entry *offsets, int count);
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+-static void dx_insert_block (struct iam_path *path,
+-                           struct iam_frame *frame, u32 hash, u32 block);
+-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+-                               struct iam_path *path, __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+                      struct ext3_dir_entry_2 **res_dir, int *err);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode);
+-static inline void iam_path_init(struct iam_path *path,
+-                               struct iam_container *c, struct htree_cookie *hc);
+-static inline void iam_path_fini(struct iam_path *path);
+-
+-
+-/*
+- * Future: use high four bits of block for coalesce-on-delete flags
+- * Mask them off for now.
+- */
+-
+-static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
+-{
+-      return (void *)((char *)entry + off);
+-}
+-
+-static inline struct iam_descr *path_descr(struct iam_path *p)
+-{
+-      return p->ip_container->ic_descr;
+-}
+-
+-static inline struct inode *path_obj(struct iam_path *p)
+-{
+-      return p->ip_container->ic_object;
+-}
+-
+ static inline size_t iam_entry_size(struct iam_path *p)
+ {
+-      return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
++      return iam_path_descr(p)->id_key_size + iam_path_descr(p)->id_ptr_size;
+ }
+ static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
+-                                            struct iam_entry *entry, int shift)
++                                              struct iam_entry *entry,
++                                              int shift)
+ {
+       void *e = entry;
+       return e + shift * iam_entry_size(p);
+ }
+-static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
+-                                    struct iam_entry *e1, struct iam_entry *e2)
+-{
+-      ptrdiff_t diff;
+-
+-      diff = (void *)e1 - (void *)e2;
+-      assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
+-      return diff / iam_entry_size(p);
+-}
+-
+-static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
+-{
+-      return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
+-              & 0x00ffffff;
+-}
+-
+-static inline void dx_set_block(struct iam_path *p,
+-                              struct iam_entry *entry, unsigned value)
+-{
+-      *(u32*)entry_off(entry,
+-                       path_descr(p)->id_key_size) = cpu_to_le32(value);
+-}
+-
+-static inline struct iam_key *dx_get_key(struct iam_path *p,
+-                                      struct iam_entry *entry,
+-                                      struct iam_key *key)
++static inline struct iam_key *iam_get_key(struct iam_path *p,
++                                        struct iam_entry *entry,
++                                        struct iam_key *key)
+ {
+-      memcpy(key, entry, path_descr(p)->id_key_size);
++      memcpy(key, entry, iam_path_descr(p)->id_key_size);
+       return key;
+ }
+@@ -540,68 +175,69 @@ static inline struct iam_key *iam_key_at
+       return (struct iam_key *)entry;
+ }
+-static inline void dx_set_key(struct iam_path *p,
+-                            struct iam_entry *entry, struct iam_key *key)
+-{
+-      memcpy(entry, key, path_descr(p)->id_key_size);
+-}
+-
+-static inline unsigned dx_get_count (struct iam_entry *entries)
+-{
+-      return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+-}
+-
+-static inline unsigned dx_get_limit (struct iam_entry *entries)
++static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
++                                     struct iam_entry *e1,
++                                     struct iam_entry *e2)
+ {
+-      return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
+-}
++      ptrdiff_t diff;
+-static inline void dx_set_count (struct iam_entry *entries, unsigned value)
+-{
+-      ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
++      diff = (void *)e1 - (void *)e2;
++      assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
++      return diff / iam_entry_size(p);
+ }
+-static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
++static inline void dx_set_limit(struct iam_entry *entries, unsigned value)
+ {
+       ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+ }
+ static inline unsigned dx_root_limit(struct iam_path *p)
+ {
+-      struct iam_descr *param = path_descr(p);
+-      unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
++      struct iam_descr *param = iam_path_descr(p);
++      unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize -
+               param->id_root_gap;
+       return entry_space / (param->id_key_size + param->id_ptr_size);
+ }
+-static inline unsigned dx_node_limit(struct iam_path *p)
+-{
+-      struct iam_descr *param = path_descr(p);
+-      unsigned entry_space   = path_obj(p)->i_sb->s_blocksize -
+-              param->id_node_gap;
+-      return entry_space / (param->id_key_size + param->id_ptr_size);
+-}
 +/*
-+ * Delete record under iterator.
++ * Two iam_descr's are provided:
 + *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
-+{
-+        int result;
-+
-+        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
-+
-+        result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf->bh);
-+        /*
-+         * no compaction for now.
-+         */
-+        if (result == 0)
-+                iam_leaf_rec_remove(h, iam_it_container(it), &it->ii_path);
-+        
-+      return result;
-+}
-+/*
-+ * Convert iterator to cookie.
++ *    - htree_compat_param that supports legacy ext3-htree indices;
++ *    - fixed_rec_param that supports containers with records of fixed size.
 + *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
-+ *                path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
 + */
-+#define iam_pos_t int
-+
-+iam_pos_t iam_it_store(struct iam_iterator *it)
-+{
-+        iam_pos_t result;
-+
-+        assert(it_state(it) == IAM_IT_ATTACHED);
-+        assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result);
-+
-+        result = 0;
-+        iam_it_key_get(it, (struct iam_key *)&result);
-+        return result;
-+}
-+
+-static inline int dx_index_is_compat(struct iam_path *path)
+-{
+-      return path_descr(path) == &htree_compat_param;
+-}
++static u32 htree_root_ptr(struct iam_container *c);
++static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
++static int htree_node_init(struct iam_container *c, struct buffer_head *bh, int root);
++static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
++                         handle_t *handle, struct buffer_head **bh);
++static int htree_keycmp(struct iam_container *c,
++                      struct iam_key *k1, struct iam_key *k2);
 +/*
-+ * Restore iterator from cookie.
-+ *
-+ * precondition:  it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
-+ *                path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
-+ *                                  iam_it_store(it) == pos)
++ * Parameters describing iam compatibility mode in which existing ext3 htrees
++ * can be manipulated.
 + */
-+int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
-+{
-+        assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE);
-+        assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos);
-+        return iam_it_get(it, (struct iam_key *)&pos);
-+}
-+
-+/***********************************************************************/
-+/* invariants                                                          */
-+/***********************************************************************/
-+
-+static inline int ptr_inside(void *base, size_t size, void *ptr)
-+{
-+        return (base <= ptr) && (ptr < base + size);
-+}
-+
-+int iam_frame_invariant(struct iam_frame *f)
-+{
-+        return
-+                (f->bh != NULL &&
-+                f->bh->b_data != NULL &&
-+                ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
-+                ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
-+                f->entries <= f->at);
-+}
-+int iam_leaf_invariant(struct iam_leaf *l)
-+{
-+        return
-+                l->bh != NULL &&
-+                l->bh->b_data != NULL &&
-+                ptr_inside(l->bh->b_data, l->bh->b_size, l->entries) &&
-+                ptr_inside(l->bh->b_data, l->bh->b_size, l->at) &&
-+                l->entries <= l->at;
-+}
-+
-+int iam_path_invariant(struct iam_path *p)
-+{
-+        int i;
-+
-+        if (p->ip_container == NULL ||
-+            p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
-+            p->ip_frame != p->ip_frames + p->ip_indirect ||
-+            p->ip_leaf == NULL || !iam_leaf_invariant(p->ip_leaf))
-+                return 0;
-+        for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
-+                if (i <= p->ip_indirect) {
-+                        if (!iam_frame_invariant(&p->ip_frames[i]))
-+                                return 0;
-+                }
-+        }
-+        return 1;
-+}
-+
-+__u32 iam_root_ptr(struct iam_container *c)
-+{
-+        return 0;
-+}
-+EXPORT_SYMBOL(iam_root_ptr);
-+
-+int iam_node_check(struct iam_path *path, struct iam_frame *frame)
-+{
-+        void *data;
-+        struct iam_entry *entries;
-+        struct super_block *sb;
-+
-+        data = frame->bh->b_data;
-+        entries = dx_node_get_entries(path, frame);
-+        sb = path_obj(path)->i_sb;
-+        if (frame == path->ip_frames) {
-+                struct iam_cookie *ic = path->ip_descr_data;
-+               /* root node */
-+                path->ip_key_target = ic->ic_key;
-+        } else {
-+                /* non-root index */
-+                assert(entries == data + path_descr(path)->id_node_gap);
-+                assert(dx_get_limit(entries) == dx_node_limit(path));
-+        }
-+        frame->entries = frame->at = entries;
-+        return 0;
-+}
-+EXPORT_SYMBOL(iam_node_check);
-+
-+int iam_node_init(struct iam_container *c, struct buffer_head *bh, int root)
-+{
-+        return 0;
-+}
-+EXPORT_SYMBOL(iam_node_init);
-+
-+int iam_keycmp(struct iam_container *c, struct iam_key *k1, struct iam_key *k2)
-+{
-+        return key_cmp(le64_to_cpu(*(__u64 *)k1), le64_to_cpu(*(__u64 *)k2));
-+}
-+EXPORT_SYMBOL(iam_keycmp);
-+
-+int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
-+                         handle_t *h, struct buffer_head **bh)
-+{
-+        int result = 0;
-+
-+        *bh = ext3_bread(h, c->ic_object, (int)ptr, 0, &result);
-+        if (*bh == NULL)
-+                result = -EIO;
-+        return result;
-+}
-+EXPORT_SYMBOL(iam_node_read);
++struct iam_descr htree_compat_param = {
++      .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
++      .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
++      .id_node_gap = offsetof(struct dx_node, entries),
++      .id_root_gap = offsetof(struct dx_root, entries),
 +
-+#if 0
-+int iam_it_invariant(struct iam_iterator *it)
-+{
-+        return
-+                (it->ii_state == IAM_IT_DETACHED ||
-+                 it->ii_state == IAM_IT_ATTACHED) &&
-+                !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
-+                ergo(it->ii_state == IAM_IT_ATTACHED,
-+                     iam_path_invariant(&it->ii_path));
-+}
-+#endif
-+/*external function*/
-+/*
-+ * Search container @c for record with key @k. If record is found, its data
-+ * are moved into @r.
-+ *
-+ *
-+ *
-+ * Return values: +ve: found, 0: not-found, -ve: error
-+ */
++      .id_root_ptr   = htree_root_ptr,
++      .id_node_check = htree_node_check,
++      .id_node_init  = htree_node_init,
++      .id_node_read  = htree_node_read,
++      .id_keycmp     = htree_keycmp
++};
+-static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
+-                                     int root)
+-{
+-      return data +
+-              (root ?
+-               path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
+-}
+-static struct iam_entry *dx_node_get_entries(struct iam_path *path,
+-                                          struct iam_frame *frame)
++static inline int dx_index_is_compat(struct iam_path *path)
+ {
+-      return dx_get_entries(path,
+-                            frame->bh->b_data, frame == path->ip_frames);
++      return iam_path_descr(path) == &htree_compat_param;
+ }
 +
-+int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
+ static int dx_node_check(struct iam_path *p, struct iam_frame *f)
+ {
+       struct iam_entry     *e;
+@@ -614,10 +250,10 @@ static int dx_node_check(struct iam_path
+       count = dx_get_count(e);
+       e = iam_entry_shift(p, e, 1);
+       for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) {
+-              keycpy(c, p->ip_key_scratch[0], p->ip_key_scratch[1]);
+-              dx_get_key(p, e, p->ip_key_scratch[1]);
++              iam_keycpy(c, iam_path_key(p, 0), iam_path_key(p, 1));
++              iam_get_key(p, e, iam_path_key(p, 1));
+               if (i > 0 &&
+-                  keycmp(c, p->ip_key_scratch[0], p->ip_key_scratch[1]) > 0)
++                  iam_keycmp(c, iam_path_key(p, 0), iam_path_key(p, 1)) > 0)
+                       return 0;
+       }
+       return 1;
+@@ -636,13 +272,17 @@ static int htree_node_check(struct iam_p
+       data = frame->bh->b_data;
+       entries = dx_node_get_entries(path, frame);
+-      sb = path_obj(path)->i_sb;
++      sb = iam_path_obj(path)->i_sb;
+       if (frame == path->ip_frames) {
+               /* root node */
+               struct dx_root *root;
+-              struct htree_cookie *hc = path->ip_descr_data;
++              struct iam_path_compat *ipc;
+               root = data;
++              assert(path->ip_data != NULL);
++              ipc = container_of(path->ip_data, struct iam_path_compat,
++                                 ipc_descr);
++
+               if (root->info.hash_version > DX_HASH_MAX) {
+                       ext3_warning(sb, __FUNCTION__,
+                                    "Unrecognised inode hash code %d",
+@@ -669,15 +309,16 @@ static int htree_node_check(struct iam_p
+                                          root->info.info_length));
+               assert(dx_get_limit(entries) == dx_root_limit(path));
+-              hc->hinfo->hash_version = root->info.hash_version;
+-              hc->hinfo->seed = EXT3_SB(sb)->s_hash_seed;
+-              if (hc->dentry)
+-                      ext3fs_dirhash(hc->dentry->d_name.name,
+-                                     hc->dentry->d_name.len, hc->hinfo);
+-              path->ip_key_target = (struct iam_key *)&hc->hinfo->hash;
++              ipc->ipc_hinfo->hash_version = root->info.hash_version;
++              ipc->ipc_hinfo->seed = EXT3_SB(sb)->s_hash_seed;
++              if (ipc->ipc_dentry)
++                      ext3fs_dirhash(ipc->ipc_dentry->d_name.name,
++                                     ipc->ipc_dentry->d_name.len,
++                                     ipc->ipc_hinfo);
++              path->ip_key_target = (struct iam_key *)&ipc->ipc_hinfo->hash;
+       } else {
+               /* non-root index */
+-              assert(entries == data + path_descr(path)->id_node_gap);
++              assert(entries == data + iam_path_descr(path)->id_node_gap);
+               assert(dx_get_limit(entries) == dx_node_limit(path));
+       }
+       frame->entries = frame->at = entries;
+@@ -800,7 +441,7 @@ struct stats dx_show_entries(struct dx_h
+ }
+ #endif /* DX_DEBUG */
+-static int dx_lookup(struct iam_path *path)
++int dx_lookup(struct iam_path *path)
+ {
+       u32 ptr;
+       int err = 0;
+@@ -810,7 +451,7 @@ static int dx_lookup(struct iam_path *pa
+       struct iam_frame *frame;
+       struct iam_container *c;
+-      param = path_descr(path);
++      param = iam_path_descr(path);
+       c = path->ip_container;
+       
+       for (frame = path->ip_frames, i = 0,
+@@ -841,8 +482,8 @@ static int dx_lookup(struct iam_path *pa
+                       m = iam_entry_shift(path,
+                                          p, iam_entry_diff(path, q, p) / 2);
+                       dxtrace(printk("."));
+-                      if (keycmp(c, iam_key_at(path, m),
+-                                 path->ip_key_target) > 0)
++                      if (iam_keycmp(c, iam_key_at(path, m),
++                                     path->ip_key_target) > 0)
+                               q = iam_entry_shift(path, m, -1);
+                       else
+                               p = iam_entry_shift(path, m, +1);
+@@ -857,12 +498,12 @@ static int dx_lookup(struct iam_path *pa
+                       while (n--) {
+                               dxtrace(printk(","));
+                               at = iam_entry_shift(path, at, +1);
+-                              if (keycmp(c, iam_key_at(path, at),
+-                                         path->ip_key_target) > 0) {
++                              if (iam_keycmp(c, iam_key_at(path, at),
++                                             path->ip_key_target) > 0) {
+                                       if (at != iam_entry_shift(path, frame->at, 1)) {
+                                               BREAKPOINT;
+                                               printk(KERN_EMERG "%i\n",
+-                                                     keycmp(c, iam_key_at(path, at),
++                                                     iam_keycmp(c, iam_key_at(path, at),
+                                                             path->ip_key_target));
+                                       }
+                                       at = iam_entry_shift(path, at, -1);
+@@ -891,508 +532,20 @@ static int dx_probe(struct dentry *dentr
+                   struct dx_hash_info *hinfo, struct iam_path *path)
+ {
+       int err;
+-      struct htree_cookie hc = {
+-              .dentry = dentry,
+-              .hinfo  = hinfo
+-      };
++      struct iam_path_compat *ipc;
++
++      assert(path->ip_data != NULL);
++      ipc = container_of(path->ip_data, struct iam_path_compat, ipc_descr);
++      ipc->ipc_dentry = dentry;
++      ipc->ipc_hinfo = hinfo;
+       assert(dx_index_is_compat(path));
+-      path->ip_descr_data = &hc;
+       err = dx_lookup(path);
+       assert(err != 0 || path->ip_frames[path->ip_indirect].bh != NULL);
+       return err;
+ }
+ /*
+- * Initialize container @c, acquires additional reference on @inode.
+- */
+-int iam_container_init(struct iam_container *c,
+-                     struct iam_descr *descr, struct inode *inode)
+-{
+-      memset(c, 0, sizeof *c);
+-      c->ic_descr  = descr;
+-      c->ic_object = igrab(inode);
+-      if (c->ic_object != NULL)
+-              return 0;
+-      else
+-              return -ENOENT;
+-}
+-
+-/*
+- * Finalize container @c, release all resources.
+- */
+-void iam_container_fini(struct iam_container *c)
+-{
+-      if (c->ic_object != NULL) {
+-              iput(c->ic_object);
+-              c->ic_object = NULL;
+-      }
+-}
+-
+-static inline void iam_path_init(struct iam_path *path, struct iam_container *c, 
+-                               struct htree_cookie *hc)
+-{
+-      memset(path, 0, sizeof *path);
+-      path->ip_container = c;
+-      path->ip_frame = path->ip_frames;
+-      path->ip_descr_data = hc;
+-}
+-
+-static inline void iam_path_fini(struct iam_path *path)
+-{
+-      int i;
+-
+-      for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
+-              if (path->ip_frames[i].bh != NULL) {
+-                      brelse(path->ip_frames[i].bh);
+-                      path->ip_frames[i].bh = NULL;
+-              }
+-      }
+-}
+-
+-static void iam_path_compat_init(struct iam_path_compat *path,
+-                               struct inode *inode)
+-{
+-      int i;
+-
+-      iam_container_init(&path->ipc_container, &htree_compat_param, inode);
+-      /*
+-       * XXX hack allowing finalization of iam_path_compat with
+-       * iam_path_fini().
+-       */
+-      iput(inode);
+-      iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
+-      for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
+-              path->ipc_path.ip_key_scratch[i] =
+-                      (struct iam_key *)&path->ipc_scrach[i];
+-}
+-
+-static void iam_path_compat_fini(struct iam_path_compat *path)
+-{
+-      iam_path_fini(&path->ipc_path);
+-      iam_container_fini(&path->ipc_container);
+-}
+-
+-static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
+-{
+-      int block, err;
+-      struct buffer_head *bh;
+-      
+-      block = dx_get_block(path, path->ip_frame->at);
+-      err = path_descr(path)->id_node_read(path->ip_container, block, 
+-                                           NULL, &bh);
+-      if (err)
+-              return err;
+-
+-      leaf->bh = bh;
+-      leaf->entries = (struct iam_leaf_entry *)bh->b_data;
+-      return 0;
+-}
+-
+-static void iam_leaf_fini(struct iam_leaf *leaf)
+-{
+-      if (leaf->bh)
+-              brelse(leaf->bh);
+-}
+-
+-/*
+- * Search container @c for record with key @k. If record is found, its data
+- * are moved into @r.
+- *
+- *
+- *
+- * Return values: +ve: found, 0: not-found, -ve: error
+- */
+-
+-int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
+-{
+-      struct dx_hash_info     hinfo;
+-      struct iam_path_compat cpath;
+-      struct iam_path *path = &cpath.ipc_path;
+-      struct htree_cookie hc = {
+-              .hinfo  = &hinfo
+-      };
+-      int err, i;
+-
+-      iam_path_init(path, c, &hc);
+-      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+-              path->ip_key_scratch[i] =
+-                      (struct iam_key *)&cpath.ipc_scrach[i];
+-      err = dx_lookup(path);
+-      do {
+-              struct iam_leaf leaf;
+-              err = iam_leaf_init(path, &leaf);
+-              if (err)
+-                      goto errout;
+-
+-              for (path_descr(path)->id_leaf.start(c, &leaf);
+-                   !path_descr(path)->id_leaf.at_end(c, &leaf);
+-                   path_descr(path)->id_leaf.next(c, &leaf)) {
+-                      struct iam_key *key;
+-
+-                      key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
+-                      path_descr(path)->id_leaf.key(c, &leaf, key);
+-                      if (keycmp(c, k, key) == 0) {
+-                              memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
+-                                     path_descr(path)->id_rec_size);
+-                              iam_path_fini(path);
+-                              iam_leaf_fini(&leaf);
+-                              return 0;
+-                      }
+-              }
+-
+-              iam_leaf_fini(&leaf);
+-              /* Check to see if we should continue to search */
+-              err = ext3_htree_next_block(c->ic_object, hinfo.hash, path, NULL);
+-              if (err < 0)
+-                      goto errout;
+-      } while (err == 1);
+-errout:
+-      iam_path_fini(path);
+-      return(err);
+-}
+-EXPORT_SYMBOL(iam_lookup);
+-
+-static inline size_t iam_leaf_entry_size(struct iam_path *p)
+-{
+-      return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
+-}
+-
+-static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
+-                                    struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
+-{
+-      ptrdiff_t diff;
+-
+-      diff = (void *)e1 - (void *)e2;
+-      assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
+-      return diff / iam_leaf_entry_size(p);
+-}
+-
+-static inline struct iam_leaf_entry* 
+-iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
+-{
+-      void *e = entry;
+-      return e + shift * iam_leaf_entry_size(p);
+-}
+-
+-static inline struct iam_key *
+-dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
+-{
+-      memcpy(key, e, path_descr(p)->id_key_size);
+-      return key;
+-}
+-
+-static inline struct iam_key *
+-iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
+-{
+-      void *e = entry;
+-      return e + path_descr(p)->id_rec_size;
+-}
+-static inline struct iam_leaf_entry *
+-iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
+-{
+-      return entry; 
+-}
+-
+-static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf, 
+-                         struct iam_key *k)
+-{
+-      struct iam_leaf_entry *p, *q, *m;
+-      struct iam_leaf_entry *entries = leaf->entries;
+-      int count = dx_get_count((struct iam_entry *)entries);
+-      
+-      p = iam_leaf_entry_shift(path, entries, 1);
+-      q = iam_leaf_entry_shift(path, entries, count - 1);
+-      while (p <= q) {
+-              m = iam_leaf_entry_shift(path,
+-                                 p, iam_leaf_entry_diff(path, q, p) / 2);
+-              dxtrace(printk("."));
+-              if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
+-                         path->ip_key_target) > 0)
+-                      q = iam_leaf_entry_shift(path, m, -1);
+-              else
+-                      p = iam_leaf_entry_shift(path, m, +1);
+-      }
+-      leaf->at = q; 
+-      return 0;
+-}
+-
+-/*XXX what kind of lock should this entry be locked: WangDi */
+-static int iam_leaf_insert(handle_t *handle, struct iam_path *path, 
+-                         struct iam_key *k, struct iam_rec *r)
+-{
+-      struct iam_leaf leaf;
+-      struct iam_leaf_entry *p, *q;
+-      int err, count;
+-
+-      err = iam_leaf_init(path, &leaf);
+-      if (err)
+-              goto errout;
+-      path_descr(path)->id_leaf.start(path->ip_container, &leaf);
+-      count = dx_get_count((struct iam_entry *)leaf.entries);
+-      if (dx_get_count((struct iam_entry *)leaf.entries) >= 
+-          dx_get_limit((struct iam_entry *)leaf.entries)){
+-              err = -ENOSPC;
+-              goto errout;
+-      }
+-
+-      err = iam_leaf_lookup(path, &leaf, k);
+-      if (err)
+-              goto errout;
+-      
+-      /*insert the k/r to leaf entries*/
+-      p = iam_leaf_entry_shift(path, leaf.at, 1);
+-      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
+-      while (q < p) {
+-              memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
+-              q = iam_leaf_entry_shift(path, q, -1);  
+-      }
+-      memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
+-      memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
+-
+-      dx_set_count((struct iam_entry*)leaf.entries, count + 1);
+-      err = ext3_journal_dirty_metadata(handle, leaf.bh);
+-      if (err)
+-              ext3_std_error(path->ip_container->ic_object->i_sb, err);
+-errout:       
+-      iam_leaf_fini(&leaf);
+-      return err;
+-} 
+-
+-static int split_leaf_node(handle_t *handle, struct iam_path *path)
+-{
+-      struct inode *dir = path_obj(path);
+-      unsigned continued = 0;
+-      struct buffer_head *bh2;
+-      u32 newblock, hash_split;
+-      char *data2;
+-      struct iam_leaf leaf;
+-      unsigned split;
+-      int     err;
+-
+-      bh2 = ext3_append (handle, dir, &newblock, &err);
+-      if (!(bh2)) {
+-              err = -ENOSPC;
+-              goto errout;
+-      }
+-      err = iam_leaf_init(path, &leaf);
+-      if (err)
+-              goto errout;
+-
+-      BUFFER_TRACE(leaf.bh, "get_write_access");
+-      err = ext3_journal_get_write_access(handle, leaf.bh);
+-      if (err) {
+-      journal_error:
+-              iam_leaf_fini(&leaf);
+-              brelse(bh2);
+-              ext3_std_error(dir->i_sb, err);
+-              err = -EIO;
+-              goto errout;
+-      }
+-      data2 = bh2->b_data;
+-      split = dx_get_count((struct iam_entry*)leaf.entries)/2;
+-      hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
+-      if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
+-                 iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
+-              continued = 1;
+-
+-      memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
+-             iam_leaf_entry_shift(path, leaf.entries, split),
+-             split * iam_leaf_entry_size(path));
+- 
+-      /* Which block gets the new entry? */
+-      dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
+-      err = ext3_journal_dirty_metadata (handle, bh2);
+-      if (err)
+-              goto journal_error;
+-      err = ext3_journal_dirty_metadata (handle, leaf.bh);
+-      if (err)
+-              goto journal_error;
+-      brelse (bh2);
+-      iam_leaf_fini(&leaf);
+-errout:
+-      return err;
+-}
+-
+-static int split_index_node(handle_t *handle, struct iam_path *path);
+-/*
+- * Insert new record @r with key @k into container @c (within context of
+- * transaction @h.
+- *
+- * Return values: 0: success, -ve: error, including -EEXIST when record with
+- * given key is already present.
+- *
+- * postcondition: ergo(result == 0 || result == -EEXIST,
+- *                                  iam_lookup(c, k, r2) > 0 &&
+- *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
+- */
+-int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, 
+-             struct iam_rec *r)
+-{
+-      struct dx_hash_info     hinfo;
+-      struct iam_path_compat cpath;
+-      struct iam_path *path = &cpath.ipc_path;
+-      struct htree_cookie hc = {
+-              .hinfo  = &hinfo
+-      };
+-      int err, i;
+-
+-      iam_path_init(path, c, &hc);
+-      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+-              path->ip_key_scratch[i] =
+-                      (struct iam_key *)&cpath.ipc_scrach[i];
+-      err = dx_lookup(path);
+-      if (err)
+-              goto errout; 
+-
+-      err = iam_leaf_insert(handle, path, k, r);
+-      
+-      if (err != -ENOSPC) 
+-              goto errout;    
+-
+-      err = split_index_node(handle, path);
+-      if (err)
+-              goto errout;    
+-
+-      err = split_leaf_node(handle, path);
+-      if (err)
+-              goto errout;
+-      
+-      err = iam_leaf_insert(handle, path, k, r);
+-errout:
+-      iam_path_fini(path);
+-      return(err);
+-}
+-
+-EXPORT_SYMBOL(iam_insert);
+-static int iam_leaf_delete(handle_t *handle, struct iam_path *path, 
+-                         struct iam_key *k)
+-{
+-      struct iam_leaf leaf;
+-      struct iam_leaf_entry *p, *q;
+-      int err, count;
+-
+-      err = iam_leaf_init(path, &leaf);
+-      if (err)
+-              goto errout;
+-      
+-      err = iam_leaf_lookup(path, &leaf, k);
+-      if (err)
+-              goto errout;
+-
+-      count = dx_get_count((struct iam_entry*)leaf.entries);
+-      /*delete the k to leaf entries*/
+-      p = iam_leaf_entry_shift(path, leaf.at, 1);
+-      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
+-      while (p < q) {
+-              memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
+-              p = iam_leaf_entry_shift(path, p, 1);
+-      }
+-      dx_set_count((struct iam_entry*)leaf.entries, count - 1);
+-
+-      err = ext3_journal_dirty_metadata(handle, leaf.bh);
+-      if (err)
+-              ext3_std_error(path_obj(path)->i_sb, err);
+-errout:       
+-      iam_leaf_fini(&leaf);
+-      return err;
+-}
+-
+-/*
+- * Delete existing record with key @k.
+- *
+- * Return values: 0: success, -ENOENT: not-found, -ve: other error.
+- *
+- * postcondition: ergo(result == 0 || result == -ENOENT,
+- *                                 !iam_lookup(c, k, *));
+- */
+-int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
+-{
+-      struct dx_hash_info     hinfo;
+-      struct iam_path_compat cpath;
+-      struct iam_path *path = &cpath.ipc_path;
+-      struct htree_cookie hc = {
+-              .hinfo  = &hinfo
+-      };
+-      int err, i;
+-
+-      iam_path_init(path, c, &hc);
+-      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+-              path->ip_key_scratch[i] =
+-                      (struct iam_key *)&cpath.ipc_scrach[i];
+-      err = dx_lookup(path);
+-      if (err)
+-              goto errout; 
+-
+-      err = iam_leaf_delete(h, path, k);
+-errout:
+-      iam_path_fini(path);
+-      return err;
+-}
+-
+-EXPORT_SYMBOL(iam_delete);
+-
+-static int iam_leaf_update(handle_t *handle, struct iam_path *path, 
+-                         struct iam_key *k, struct iam_rec *r)
+-{
+-      struct iam_leaf leaf;
+-      int err;
+-
+-      err = iam_leaf_init(path, &leaf);
+-      if (err)
+-              goto errout;
+-      
+-      err = iam_leaf_lookup(path, &leaf, k);
+-      if (err)
+-              goto errout;
+-
+-      memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
+-      memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
+-
+-      err = ext3_journal_dirty_metadata(handle, leaf.bh);
+-      if (err)
+-              ext3_std_error(path_obj(path)->i_sb, err);
+-errout:       
+-      iam_leaf_fini(&leaf);
+-      return err;
+-}
+-/*
+- * Replace existing record with key @k, or insert new one. New record data are
+- * in @r.
+- *
+- * Return values: 0: success, -ve: error.
+- *
+- * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
+- *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
+- */
+-int iam_update(handle_t *h, struct iam_container *c,
+-             struct iam_key *k, struct iam_rec *r)
+-{
+-      struct dx_hash_info     hinfo;
+-      struct iam_path_compat cpath;
+-      struct iam_path *path = &cpath.ipc_path;
+-      struct htree_cookie hc = {
+-              .hinfo  = &hinfo
+-      };
+-      int err, i;
+-      
+-      iam_path_init(path, c, &hc);
+-      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+-              path->ip_key_scratch[i] =
+-                      (struct iam_key *)&cpath.ipc_scrach[i];
+-      err = dx_lookup(path);
+-      if (err)
+-              goto errout; 
+-
+-      err = iam_leaf_update(h, path, k, r);
+-errout:
+-      iam_path_fini(path);
+-      return err;
+-}
+-
+-EXPORT_SYMBOL(iam_update);
+-
+-/*
+  * This function increments the frame pointer to search the next leaf
+  * block, and reads in the necessary intervening nodes if the search
+  * should be necessary.  Whether or not the search is necessary is
+@@ -1409,8 +562,8 @@ EXPORT_SYMBOL(iam_update);
+  * If start_hash is non-null, it will be filled in with the starting
+  * hash of the next page.
+  */
+-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+-                               struct iam_path *path, __u32 *start_hash)
++int ext3_htree_next_block(struct inode *dir, __u32 hash,
++                        struct iam_path *path, __u32 *start_hash)
+ {
+       struct iam_frame *p;
+       struct buffer_head *bh;
+@@ -1445,7 +598,7 @@ static int ext3_htree_next_block(struct 
+        * desired contiuation hash.  If it doesn't, return since
+        * there's no point to read in the successive index pages.
+        */
+-      dx_get_key(path, p->at, (struct iam_key *)&bhash);
++      iam_get_key(path, p->at, (struct iam_key *)&bhash);
+       if (start_hash)
+               *start_hash = bhash;
+       if ((hash & 1) == 0) {
+@@ -1457,9 +610,10 @@ static int ext3_htree_next_block(struct 
+        * block so no check is necessary
+        */
+       while (num_frames--) {
+-              err = path_descr(path)->id_node_read(path->ip_container,
+-                                                   (iam_ptr_t)dx_get_block(path, p->at),
+-                                                   NULL, &bh);
++              err = iam_path_descr(path)->
++                      id_node_read(path->ip_container,
++                                   (iam_ptr_t)dx_get_block(path, p->at),
++                                   NULL, &bh);
+               if (err != 0)
+                       return err; /* Failure */
+               ++p;
+@@ -1662,8 +816,8 @@ static void dx_sort_map (struct dx_map_e
+       } while(more);
+ }
+-static void dx_insert_block(struct iam_path *path,
+-                          struct iam_frame *frame, u32 hash, u32 block)
++void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
++                   u32 hash, u32 block)
+ {
+       struct iam_entry *entries = frame->entries;
+       struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
+@@ -1897,14 +1051,15 @@ static struct buffer_head * ext3_dx_find
+               if (*err != 0)
+                       return NULL;
+       } else {
+-              path->ip_frame->bh = NULL;              /* for iam_path_fini() */
++              path->ip_frame->bh = NULL;      /* for iam_path_fini() */
+               path->ip_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
+       }
+       hash = hinfo.hash;
+       do {
+               block = dx_get_block(path, path->ip_frame->at);
+-              *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
+-                                                   NULL, &bh);
++              *err = iam_path_descr(path)->id_node_read(path->ip_container,
++                                                        (iam_ptr_t)block,
++                                                        NULL, &bh);
+               if (*err != 0)
+                       goto errout;
+               de = (struct ext3_dir_entry_2 *) bh->b_data;
+@@ -2067,7 +1222,7 @@ static struct ext3_dir_entry_2 *do_split
+                       struct buffer_head **bh,struct iam_frame *frame,
+                       struct dx_hash_info *hinfo, int *error)
+ {
+-      struct inode *dir = path_obj(path);
++      struct inode *dir = iam_path_obj(path);
+       unsigned blocksize = dir->i_sb->s_blocksize;
+       unsigned count, continued;
+       struct buffer_head *bh2;
+@@ -2392,15 +1547,15 @@ static int ext3_add_entry (handle_t *han
+ }
+ #ifdef CONFIG_EXT3_INDEX
+-static int split_index_node(handle_t *handle, struct iam_path *path)
+-{ 
++int split_index_node(handle_t *handle, struct iam_path *path)
 +{
-+        struct iam_iterator it;
-+        int result;
-+
-+        iam_it_init(&it, c, 0);
-+
-+        result = iam_it_get_exact(&it, k);
-+        if (result == 0)
-+                /*
-+                 * record with required key found, copy it into user buffer
-+                 */
-+                reccpy(&it.ii_path, r, iam_it_rec_get(&it));
-+        iam_it_put(&it);
-+        iam_it_fini(&it);
-+        return result;
-+}
-+
-+EXPORT_SYMBOL(iam_lookup);
-+/*
-+ * Insert new record @r with key @k into container @c (within context of
-+ * transaction @h.
-+ *
-+ * Return values: 0: success, -ve: error, including -EEXIST when record with
-+ * given key is already present.
-+ *
-+ * postcondition: ergo(result == 0 || result == -EEXIST,
-+ *                                  iam_lookup(c, k, r2) > 0 &&
-+ *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
+       struct iam_entry *entries;   /* old block contents */
+       struct iam_entry *entries2;  /* new block contents */
+       struct iam_frame *frame, *safe;
+       struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+       u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
+-      struct inode *dir = path_obj(path);
++      struct inode *dir = iam_path_obj(path);
+       int nr_splet;
+       int i, err;
+@@ -2442,7 +1597,8 @@ static int split_index_node(handle_t *ha
+       for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
+               bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
+               if (!bh_new[i] ||
+-                  path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
++                  iam_path_descr(path)->id_node_init(path->ip_container,
++                                                     bh_new[i], 0) != 0)
+                       goto cleanup;
+               BUFFER_TRACE(frame->bh, "get_write_access");
+               err = ext3_journal_get_write_access(handle, frame->bh);
+@@ -2516,9 +1672,9 @@ static int split_index_node(handle_t *ha
+                       unsigned count1 = count/2, count2 = count - count1;
+                       unsigned hash2;
+-                      dx_get_key(path,
+-                                 iam_entry_shift(path, entries, count1),
+-                                 (struct iam_key *)&hash2);
++                      iam_get_key(path,
++                                  iam_entry_shift(path, entries, count1),
++                                  (struct iam_key *)&hash2);
+                       dxtrace(printk("Split index %i/%i\n", count1, count2));
+@@ -2578,7 +1734,7 @@ static int ext3_dx_add_entry(handle_t *h
+       size_t isize;
+       iam_path_compat_init(&cpath, dir);
+-      param = path_descr(path);
++      param = iam_path_descr(path);
+       err = dx_probe(dentry, NULL, &hinfo, path);
+       if (err != 0)
+@@ -2588,7 +1744,7 @@ static int ext3_dx_add_entry(handle_t *h
+       /* XXX nikita: global serialization! */
+       isize = dir->i_size;
+-      err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at), 
++      err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at),
+                                 handle, &bh);
+       if (err != 0)
+               goto cleanup;
+@@ -2724,12 +1880,12 @@ static struct inode * ext3_new_inode_wan
+  * is so far negative - it has no inode.
+  *
+  * If the create succeeds, we fill in the inode information
+- * with d_instantiate(). 
++ * with d_instantiate().
+  */
+ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+               struct nameidata *nd)
+ {
+-      handle_t *handle; 
++      handle_t *handle;
+       struct inode * inode;
+       int err, retries = 0;
+Index: iam/include/linux/lustre_iam.h
+===================================================================
+--- iam.orig/include/linux/lustre_iam.h        2006-05-10 18:21:01.000000000 +0400
++++ iam/include/linux/lustre_iam.h     2006-05-10 21:22:41.000000000 +0400
+@@ -1,3 +1,39 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ *  lustre_iam.c
++ *  Top-level entry points into osd module
++ *
++ *  Copyright (c) 2006 Cluster File Systems, Inc.
++ *   Author: Wang Di <wangdi@clusterfs.com>
++ *   Author: Nikita Danilov <nikita@clusterfs.com>
++ *
++ *   This file is part of the Lustre file system, http://www.lustre.org
++ *   Lustre is a trademark of Cluster File Systems, Inc.
++ *
++ *   You may have signed or agreed to another license before downloading
++ *   this software.  If so, you are bound by the terms and conditions
++ *   of that agreement, and the following does not apply to you.  See the
++ *   LICENSE file included with this distribution for more information.
++ *
++ *   If you did not agree to a different license, then this copy of Lustre
++ *   is open source software; you can redistribute it and/or modify it
++ *   under the terms of version 2 of the GNU General Public License as
++ *   published by the Free Software Foundation.
++ *
++ *   In either case, Lustre is distributed in the hope that it will be
++ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
++ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   license text for more details.
 + */
 +
-+int iam_insert(handle_t *h, struct iam_container *c,
-+               struct iam_key *k, struct iam_rec *r)
-+{
-+        struct iam_iterator it;
-+        int result;
-+
-+        iam_it_init(&it, c, IAM_IT_WRITE);
-+
-+        result = iam_it_get_exact(&it, k);
-+        if (result == -ENOENT)
-+                result = iam_it_rec_insert(h, &it, k, r);
-+        else if (result == 0)
-+                result = -EEXIST;
-+        iam_it_put(&it);
-+        iam_it_fini(&it);
-+        return result;
-+}
-+
-+EXPORT_SYMBOL(iam_insert);
-+
-+int iam_update(handle_t *h, struct iam_container *c,
-+               struct iam_key *k, struct iam_rec *r)
-+{
-+        struct iam_iterator it;
-+        int result;
-+
-+        iam_it_init(&it, c, IAM_IT_WRITE);
-+
-+        result = iam_it_get_exact(&it, k);
-+        if (result == 0)
-+                iam_it_rec_set(h, &it, r);
-+        iam_it_put(&it);
-+        iam_it_fini(&it);
-+        return result;
-+}
++#ifndef __LINUX_LUSTRE_IAM_H__
++#define __LINUX_LUSTRE_IAM_H__
 +
-+EXPORT_SYMBOL(iam_update);
 +/*
-+ * Delete existing record with key @k.
-+ *
-+ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
-+ *
-+ * postcondition: ergo(result == 0 || result == -ENOENT,
-+ *                                 !iam_lookup(c, k, *));
-+ */
-+
-+int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
-+{
-+        struct iam_iterator it;
-+        int result;
-+
-+        iam_it_init(&it, c, IAM_IT_WRITE);
-+
-+        result = iam_it_get_exact(&it, k);
-+        if (result == 0)
-+                iam_it_rec_delete(h, &it);
-+        iam_it_put(&it);
-+        iam_it_fini(&it);
-+        return result;
-+}
-+EXPORT_SYMBOL(iam_delete);
-+
-Index: linux-2.6.9/fs/ext3/Makefile
-===================================================================
---- linux-2.6.9.orig/fs/ext3/Makefile  2006-05-09 13:37:44.000000000 +0800
-+++ linux-2.6.9/fs/ext3/Makefile       2006-05-09 13:37:46.000000000 +0800
-@@ -6,7 +6,7 @@
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-          ioctl.o namei.o super.o symlink.o hash.o resize.o \
--         extents.o mballoc.o
-+         extents.o mballoc.o iam.o
- ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
- ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
-Index: linux-2.6.9/include/linux/lustre_iam.h
-===================================================================
---- linux-2.6.9.orig/include/linux/lustre_iam.h        2006-05-09 13:37:46.000000000 +0800
-+++ linux-2.6.9/include/linux/lustre_iam.h     2006-05-09 13:51:43.000000000 +0800
-@@ -1,4 +1,8 @@
- /*
 + *  linux/include/linux/lustre_iam.h
 + */
 +
-+/*
+ /*
   * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
   */
- enum {
-@@ -30,6 +34,11 @@
+@@ -30,6 +66,11 @@ struct iam_key;
  /* Incomplete type use to refer to the records stored in iam containers. */
  struct iam_rec;
  
@@ -2157,20 +2358,147 @@ Index: linux-2.6.9/include/linux/lustre_iam.h
  typedef __u64 iam_ptr_t;
  
  /*
-@@ -42,7 +51,8 @@
+@@ -41,12 +82,17 @@ struct iam_frame {
+       struct iam_entry *at;      /* target entry, found by binary search */
  };
  
++/*
++ * Opaque entry in the leaf node.
++ */
++struct iam_lentry;
++
  /* leaf node reached by tree lookup */
 -#define iam_leaf_entry iam_rec
-+struct iam_leaf_entry;
-+
  struct iam_leaf {
-       struct buffer_head *bh;
-       struct iam_leaf_entry *entries;
-@@ -196,6 +206,161 @@
-       __u32                ipc_scrach[DX_SCRATCH_KEYS];
+-      struct buffer_head *bh;
+-      struct iam_leaf_entry *entries;
+-      struct iam_leaf_entry *at;
++      struct buffer_head *il_bh;
++      struct iam_lentry  *il_entries;
++      struct iam_lentry  *il_at;
++      void               *il_descr_data;
+ };
+ struct iam_path;
+@@ -115,6 +161,15 @@ struct iam_descr {
+               /*
+                * leaf operations.
+                */
++
++              /*
++               * initialize just loaded leaf node.
++               */
++              int (*init)(struct iam_container *c, struct iam_leaf *l);
++              /*
++               * Release resources.
++               */
++              void (*fini)(struct iam_container *c, struct iam_leaf *l);
+               /*
+                * returns true iff leaf is positioned at the last entry.
+                */
+@@ -123,12 +178,30 @@ struct iam_descr {
+               void (*start)(struct iam_container *c, struct iam_leaf *l);
+               /* more leaf to the next entry. */
+               void (*next)(struct iam_container *c, struct iam_leaf *l);
+-              /* return key of current leaf record in @k */
+-              void (*key)(struct iam_container *c, struct iam_leaf *l,
+-                          struct iam_key *k);
+-              /* return pointer to entry body */
++              /* return key of current leaf record. This method may return
++               * either pointer to the key stored in node, or copy key into
++               * @k buffer supplied by caller and return pointer to this
++               * buffer. The latter approach is used when keys in nodes are
++               * not stored in plain form (e.g., htree doesn't store keys at
++               * all).
++               *
++               * Caller should assume that returned pointer is only valid
++               * while leaf node is pinned and locked.*/
++              struct iam_key *(*key)(struct iam_container *c,
++                                     struct iam_leaf *l, struct iam_key *k);
++              /* return pointer to entry body. Pointer is valid while
++                 corresponding leaf node is locked and pinned. */
+               struct iam_rec *(*rec)(struct iam_container *c,
+                                      struct iam_leaf *l);
++
++              /*
++               * Search leaf @l for a record with key @k or for a place
++               * where such record is to be inserted.
++               *
++               * Scratch keys from @path can be used.
++               */
++              int (*lookup)(struct iam_container *c, struct iam_path *path,
++                            struct iam_leaf *l, struct iam_key *k);
+       } id_leaf;
+ };
+@@ -149,6 +222,17 @@ struct iam_container {
+ };
+ /*
++ * description-specific part of iam_path. This is usually embedded into larger
++ * structure.
++ */
++struct iam_path_descr {
++      /*
++       * Scratch-pad area for temporary keys.
++       */
++      struct iam_key        *ipd_key_scratch[DX_SCRATCH_KEYS];
++};
++
++/*
+  * Structure to keep track of a path drilled through htree.
+  */
+ struct iam_path {
+@@ -172,34 +256,232 @@ struct iam_path {
+       /*
+        * Leaf node: a child of ->ip_frame.
+        */
+-      struct iam_leaf       *ip_leaf;
++      struct iam_leaf        ip_leaf;
+       /*
+        * Key searched for.
+        */
+       struct iam_key        *ip_key_target;
+       /*
+-       * Scratch-pad area for temporary keys.
+-       */
+-      struct iam_key        *ip_key_scratch[DX_SCRATCH_KEYS];
+-      /*
+-       * pointer to flavor-specific per-container data.
++       * Description-specific data.
+        */
+-      void                  *ip_descr_data;
++      struct iam_path_descr *ip_data;
+ };
++struct dx_hash_info;
++
+ /*
+  * Helper structure for legacy htrees.
+  */
+ struct iam_path_compat {
+-      struct iam_path      ipc_path;
+-      struct iam_container ipc_container;
+-      __u32                ipc_scrach[DX_SCRATCH_KEYS];
++      struct iam_path       ipc_path;
++      struct iam_container  ipc_container;
++      __u32                 ipc_scratch[DX_SCRATCH_KEYS];
++      struct dx_hash_info  *ipc_hinfo;
++      struct dentry        *ipc_dentry;
++      struct iam_path_descr ipc_descr;
  };
  
+-int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r);
+-int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k);
+-int iam_update(handle_t *h, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
+-int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
++/*
++ * iam cursor (iterator) api.
++ */
++
++/*
++ * States of iterator state machine.
++ */
 +enum iam_it_state {
 +      /* initial state */
 +      IAM_IT_DETACHED,
@@ -2179,6 +2507,20 @@ Index: linux-2.6.9/include/linux/lustre_iam.h
 +};
 +
 +/*
++ * Flags controlling iterator functionality.
++ */
++enum iam_it_flags {
++      /*
++       * this iterator will move (iam_it_{prev,next}() will be called on it)
++       */
++      IAM_IT_MOVE  = (1 << 0),
++      /*
++       * tree can be updated through this iterator.
++       */
++      IAM_IT_WRITE = (1 << 1)
++};
++
++/*
 + * Iterator.
 + *
 + * Immediately after call to iam_it_init() iterator is in "detached"
@@ -2209,13 +2551,8 @@ Index: linux-2.6.9/include/linux/lustre_iam.h
 +      struct iam_path       ii_path;
 +};
 +
-+static struct iam_container *iam_it_container(struct iam_iterator *it)
-+{
-+      return it->ii_path.ip_container;
-+}
-+
-+void iam_path_init(struct iam_path *path, struct iam_container *c);
-+
++void iam_path_init(struct iam_path *path, struct iam_container *c,
++                 struct iam_path_descr *pd);
 +void iam_path_fini(struct iam_path *path);
 +
 +void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode);
@@ -2225,7 +2562,8 @@ Index: linux-2.6.9/include/linux/lustre_iam.h
 + *
 + * postcondition: it_state(it) == IAM_IT_DETACHED
 + */
-+int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
++int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
++               struct iam_path_descr *pd);
 +/*
 + * Finalize iterator and release all resources.
 + *
@@ -2326,114 +2664,93 @@ Index: linux-2.6.9/include/linux/lustre_iam.h
 + */
 +int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
 +
- int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r);
- int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k);
- int iam_update(handle_t *h, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
-@@ -209,4 +374,202 @@
-  * Finalize container @c, release all resources.
-  */
- void iam_container_fini(struct iam_container *c);
++typedef __u64 iam_pos_t;
++
 +/*
-+ * Future: use high four bits of block for coalesce-on-delete flags
-+ * Mask them off for now.
++ * Convert iterator to cookie.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
++ *                path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++iam_pos_t iam_it_store(struct iam_iterator *it);
++
++/*
++ * Restore iterator from cookie.
++ *
++ * precondition:  it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
++ *                path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
++ *                                  iam_it_store(it) == pos)
 + */
++int iam_it_load(struct iam_iterator *it, iam_pos_t pos);
++
++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r,
++             struct iam_path_descr *pd);
++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k,
++             struct iam_path_descr *pd);
++int iam_update(handle_t *h, struct iam_container *c, struct iam_key *k,
++             struct iam_rec *r, struct iam_path_descr *pd);
++int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k,
++             struct iam_rec *r, struct iam_path_descr *pd);
+ /*
+  * Initialize container @c, acquires additional reference on @inode.
+  */
+@@ -210,3 +492,149 @@ int iam_container_init(struct iam_contai
+  */
+ void iam_container_fini(struct iam_container *c);
 +#ifndef assert
 +#define assert(test) J_ASSERT(test)
 +#endif
 +
-+static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
-+{
-+      return (void *)((char *)entry + off);
-+}
-+
-+static inline struct iam_descr *path_descr(struct iam_path *p)
++static inline struct iam_descr *iam_path_descr(struct iam_path *p)
 +{
 +      return p->ip_container->ic_descr;
 +}
 +
-+static inline struct inode *path_obj(struct iam_path *p)
++static inline struct inode *iam_path_obj(struct iam_path *p)
 +{
 +      return p->ip_container->ic_object;
 +}
 +
-+static inline size_t iam_entry_size(struct iam_path *p)
-+{
-+      return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
-+}
-+
-+static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
-+                                            struct iam_entry *entry, int shift)
-+{
-+      void *e = entry;
-+      return e + shift * iam_entry_size(p);
-+}
-+
-+static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
-+                                    struct iam_entry *e1, struct iam_entry *e2)
-+{
-+      ptrdiff_t diff;
-+
-+      diff = (void *)e1 - (void *)e2;
-+      assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
-+      return diff / iam_entry_size(p);
-+}
-+
-+static inline struct iam_key *dx_get_key(struct iam_path *p,
-+                                      struct iam_entry *entry,
-+                                      struct iam_key *key)
-+{
-+      memcpy(key, entry, path_descr(p)->id_key_size);
-+      return key;
-+}
-+
-+static inline struct iam_key *iam_key_at(struct iam_path *p,
-+                                     struct iam_entry *entry)
-+{
-+      return (struct iam_key *)entry;
-+}
-+
-+static inline void reccpy(struct iam_path *p, struct iam_rec *rec_dst,
-+                        struct iam_rec *rec_src)
-+{
-+      memcpy(rec_dst, rec_src, path_descr(p)->id_rec_size);
-+}   
-+
-+static inline void keycpy(struct iam_container *c, struct iam_key *k1, 
-+                        struct iam_key *k2)
++static inline void iam_keycpy(struct iam_container *c, struct iam_key *k1,
++                            struct iam_key *k2)
 +{
 +      memcpy(k1, k2, c->ic_descr->id_key_size);
 +}
 +
-+static inline int keycmp(struct iam_container *c,
-+                       struct iam_key *k1, struct iam_key *k2)
++static inline int iam_keycmp(struct iam_container *c,
++                           struct iam_key *k1, struct iam_key *k2)
 +{
 +      return c->ic_descr->id_keycmp(c, k1, k2);
 +}
 +
-+static inline int it_keycmp(struct iam_iterator *it,
-+                          struct iam_key *k1, struct iam_key *k2)
++static inline void *iam_entry_off(struct iam_entry *entry, size_t off)
 +{
-+      return keycmp(iam_it_container(it), k1, k2);
++      return (void *)((char *)entry + off);
 +}
 +
 +/*XXX These stuff put here, just because they are used by iam.c and namei.c*/
 +static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
 +{
-+      return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
++      return le32_to_cpu(*(u32*)iam_entry_off(entry,
++                                              iam_path_descr(p)->id_key_size))
 +              & 0x00ffffff;
 +}
 +
 +static inline void dx_set_block(struct iam_path *p,
 +                              struct iam_entry *entry, unsigned value)
 +{
-+      *(u32*)entry_off(entry,
-+                       path_descr(p)->id_key_size) = cpu_to_le32(value);
++      *(u32*)iam_entry_off(entry,
++                           iam_path_descr(p)->id_key_size) =
++              cpu_to_le32(value);
 +}
 +
 +static inline void dx_set_key(struct iam_path *p,
 +                            struct iam_entry *entry, struct iam_key *key)
 +{
-+      memcpy(entry, key, path_descr(p)->id_key_size);
++      memcpy(entry, key, iam_path_descr(p)->id_key_size);
 +}
 +
 +struct dx_countlimit {
@@ -2441,59 +2758,52 @@ Index: linux-2.6.9/include/linux/lustre_iam.h
 +      __le16 count;
 +};
 +
-+static inline unsigned dx_get_count (struct iam_entry *entries)
++static inline unsigned dx_get_count(struct iam_entry *entries)
 +{
 +      return le16_to_cpu(((struct dx_countlimit *) entries)->count);
 +}
 +
-+static inline unsigned dx_get_limit (struct iam_entry *entries)
++static inline unsigned dx_get_limit(struct iam_entry *entries)
 +{
 +      return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
 +}
 +
-+static inline void dx_set_count (struct iam_entry *entries, unsigned value)
++static inline void dx_set_count(struct iam_entry *entries, unsigned value)
 +{
 +      ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
 +}
 +
-+static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
-+{
-+      ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
-+}
-+
-+static inline unsigned dx_root_limit(struct iam_path *p)
-+{
-+      struct iam_descr *param = path_descr(p);
-+      unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
-+              param->id_root_gap;
-+      return entry_space / (param->id_key_size + param->id_ptr_size);
-+}
-+
 +static inline unsigned dx_node_limit(struct iam_path *p)
 +{
-+      struct iam_descr *param = path_descr(p);
-+      unsigned entry_space   = path_obj(p)->i_sb->s_blocksize -
++      struct iam_descr *param = iam_path_descr(p);
++      unsigned entry_space   = iam_path_obj(p)->i_sb->s_blocksize -
 +              param->id_node_gap;
 +      return entry_space / (param->id_key_size + param->id_ptr_size);
 +}
 +
-+static inline struct iam_entry *dx_get_entries(struct iam_path *path, 
++static inline struct iam_entry *dx_get_entries(struct iam_path *path,
 +                                             void *data, int root)
 +{
-+      return data +
-+              (root ?
-+               path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
++      struct iam_descr *param = iam_path_descr(path);
++      return data + (root ? param->id_root_gap : param->id_node_gap);
 +}
 +
++
 +static inline struct iam_entry *dx_node_get_entries(struct iam_path *path,
-+                                          struct iam_frame *frame)
++                                                  struct iam_frame *frame)
 +{
 +      return dx_get_entries(path,
 +                            frame->bh->b_data, frame == path->ip_frames);
 +}
 +
++static inline struct iam_key *iam_path_key(struct iam_path *path, int nr)
++{
++      assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch));
++      return path->ip_data->ipd_key_scratch[nr];
++}
++
 +int dx_lookup(struct iam_path *path);
-+void dx_insert_block(struct iam_path *path, struct iam_frame *frame, 
++void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
 +                   u32 hash, u32 block);
 +
 +int ext3_htree_next_block(struct inode *dir, __u32 hash,
@@ -2517,8 +2827,9 @@ Index: linux-2.6.9/include/linux/lustre_iam.h
 +
 +int  iam_leaf_at_end(struct iam_container *c, struct iam_leaf *leaf);
 +void iam_leaf_start(struct iam_container *c, struct iam_leaf *leaf);
-+struct iam_rec *iam_leaf_rec(struct iam_container *c, struct iam_leaf_entry *leaf);
-+struct iam_key *iam_leaf_key(struct iam_container *c, struct iam_key *key);
++struct iam_rec *iam_leaf_rec(struct iam_container *c, struct iam_leaf *leaf);
++struct iam_key *iam_leaf_key(struct iam_container *c, struct iam_leaf *leaf,
++                           struct iam_key *key);
 +
 +int iam_index_next(struct iam_path *p);
 +int iam_read_leaf(struct iam_path *p);
@@ -2531,4 +2842,7 @@ Index: linux-2.6.9/include/linux/lustre_iam.h
 +int iam_keycmp(struct iam_container *c, struct iam_key *k1, struct iam_key *k2);
 +int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
 +                  handle_t *h, struct buffer_head **bh);
++
++
++/* __LINUX_LUSTRE_IAM_H__ */
++#endif