Whamcloud - gitweb
Branch: b_new_cmd
authorwangdi <wangdi>
Fri, 28 Apr 2006 09:37:04 +0000 (09:37 +0000)
committerwangdi <wangdi>
Fri, 28 Apr 2006 09:37:04 +0000 (09:37 +0000)
update lustre fld and iam prototype

ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series
lustre/fld/Makefile.in
lustre/fld/fld_handle.c [moved from lustre/fld/fld_seq.c with 99% similarity]
lustre/fld/fld_iam.c
lustre/fld/fld_internal.h
lustre/kernel_patches/patches/ext3-iam-separate.patch [new file with mode: 0644]
lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series

index b90ed7a..9f7b49b 100644 (file)
@@ -17,3 +17,4 @@ ext3-htree-path-ops.patch
 ext3-hash-selection.patch
 ext3-htree-comments.patch
 ext3-iam-ops.patch
+ext3-iam-separate.patch
index 11ca873..ff381b3 100644 (file)
@@ -1,5 +1,5 @@
 MODULES := fld 
-fld-objs := fld_seq.o fld_iam.o
+fld-objs := fld_handle.o fld_iam.o
 
 EXTRA_PRE_CFLAGS := -I@LUSTRE@/ldiskfs/
 @INCLUDE_RULES@
similarity index 99%
rename from lustre/fld/fld_seq.c
rename to lustre/fld/fld_handle.c
index a8ca007..adca60e 100644 (file)
@@ -1,7 +1,7 @@
 /* -*- MODE: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
  * vim:expandtab:shiftwidth=8:tabstop=8:
  *
- *  fld/fld.c
+ *  fld/fld_handle.c
  *
  *  Copyright (C) 2006 Cluster File Systems, Inc.
  *   Author: WangDi <wangdi@clusterfs.com>
index 2cdf5a2..36283d5 100644 (file)
@@ -53,14 +53,14 @@ struct fld_info fld_info;
 int fld_handle_insert(struct fld_info *fld_info, fidseq_t seq_num, mdsno_t mdsno)
 {
         handle_t *handle = NULL;
-        return iam_insert(handle, &fld_info->fi_container,
+        return iam_insert(handle, fld_info->fi_container,
                           (struct iam_key *)&seq_num, (struct iam_rec *)&mdsno);
 }
 
 int fld_handle_delete(struct fld_info *fld_info, fidseq_t seq_num, mdsno_t mds_num)
 {
         handle_t *handle = NULL;
-        return iam_delete(handle, &fld_info->fi_container,
+        return iam_delete(handle, fld_info->fi_container,
                           (struct iam_key *)&seq_num);
 }
 
@@ -69,7 +69,7 @@ int fld_handle_lookup(struct fld_info *fld_info, fidseq_t seq_num, mdsno_t *mds_
         mdsno_t mdsno;
         int result;
 
-        result = iam_lookup(&fld_info->fi_container, (struct iam_key *)&seq_num,
+        result = iam_lookup(fld_info->fi_container, (struct iam_key *)&seq_num,
                             (struct iam_rec *)&mdsno);
         if (result == 0)
                 return -ENOENT;
@@ -79,32 +79,58 @@ int fld_handle_lookup(struct fld_info *fld_info, fidseq_t seq_num, mdsno_t *mds_
                 return result;
 }
 
-static u32 fld_root_ptr(struct iam_container *c)
+static __u32 fld_root_ptr(struct iam_container *c)
 {
         return 0;
 }
+
 static int fld_node_check(struct iam_path *path, struct iam_frame *frame)
 {
+        void *data;
+        struct iam_entry *entries;
+        struct super_block *sb;
+
+        data = frame->bh->b_data;
+        entries = dx_node_get_entries(path, frame);
+        sb = path_obj(path)->i_sb;
+        if (frame == path->ip_frames) {
+                struct iam_cookie *ic = path->ip_descr_data;
+               /* root node */
+                path->ip_key_target = ic->ic_key;
+        } else {
+                /* non-root index */
+                assert(entries == data + path_descr(path)->id_node_gap);
+                assert(dx_get_limit(entries) == dx_node_limit(path));
+        }
+        frame->entries = frame->at = entries;
         return 0;
 }
+
 static int fld_node_init(struct iam_container *c, struct buffer_head *bh,
                            int root)
 {
         return 0;
 }
+
 static int fld_keycmp(struct iam_container *c,
-                        struct iam_key *k1, struct iam_key *k2)
+                      struct iam_key *k1, struct iam_key *k2)
 {
         return key_cmp(le64_to_cpu(*(__u64 *)k1), le64_to_cpu(*(__u64 *)k2));
 }
+
 static int fld_node_read(struct iam_container *c, iam_ptr_t ptr,
-                           handle_t *h, struct buffer_head **bh)
+                         handle_t *h, struct buffer_head **bh)
 {
-        return 0;
+        int result = 0;
+#if 0
+        *bh = ext3_bread(h, c->ic_object, (int)ptr, 0, &result);
+        if (*bh == NULL)
+                result = -EIO;
+#endif
+        return result;
 }
 
-
-static struct iam_descr fld_param = {
+struct iam_descr fld_param = {
         .id_key_size = sizeof ((struct lu_fid *)0)->f_seq,
         .id_ptr_size = 4, /* 32 bit block numbers for now */
         .id_rec_size = sizeof(mdsno_t),
@@ -117,22 +143,30 @@ static struct iam_descr fld_param = {
         .id_node_read  = fld_node_read,
         .id_node_check = fld_node_check,
         .id_node_init  = fld_node_init,
-        .id_keycmp     = fld_keycmp
+        .id_keycmp     = fld_keycmp,
 };
 
 int fld_info_init(struct fld_info *fld_info)
 {
         struct file *fld_file;
+        int rc;
+        ENTRY;
 
         fld_file = filp_open("/fld", O_RDWR, S_IRWXU);
         /* sanity and security checks... */
-        return iam_container_init(&fld_info->fi_container, &fld_param,
-                                  fld_file->f_dentry->d_inode);
+        OBD_ALLOC(fld_info->fi_container, sizeof(struct iam_container));
+        if (!fld_info->fi_container)
+                RETURN(-ENOMEM);
+
+        rc =iam_container_init(fld_info->fi_container, &fld_param,
+                               fld_file->f_dentry->d_inode);
+        RETURN(rc);
 }
 
 void fld_info_fini(struct fld_info *fld_info)
 {
-        iam_container_fini(&fld_info->fi_container);
+        iam_container_fini(fld_info->fi_container);
+        OBD_FREE(fld_info->fi_container, sizeof(struct iam_container));       
         OBD_FREE_PTR(fld_info);
 }
 
index 11d54d8..efb0814 100644 (file)
@@ -26,8 +26,6 @@
 #ifndef _FLD_INTERNAL_H
 #define _FLD_INTERNAL_H
 
-#include <linux/lustre_iam.h>
-
 #define mdsno_t  __u64
 #define fidseq_t __u64
 
@@ -68,7 +66,7 @@ enum fld_op {
 
 
 struct fld_info {
-        struct iam_container fi_container;
+        void *fi_container;
 };
 
 int fld_handle_insert(struct fld_info *fld, fidseq_t seq_num, mdsno_t mdsno);
diff --git a/lustre/kernel_patches/patches/ext3-iam-separate.patch b/lustre/kernel_patches/patches/ext3-iam-separate.patch
new file mode 100644 (file)
index 0000000..8dd618a
--- /dev/null
@@ -0,0 +1,2084 @@
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c   2006-04-28 16:54:18.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c        2006-04-28 16:54:18.000000000 +0800
+@@ -24,81 +24,6 @@
+  *    Theodore Ts'o, 2002
+  */
+-/*
+- * iam: big theory statement.
+- *
+- * iam (Index Access Module) is a module providing abstraction of persistent
+- * transactional container on top of generalized ext3 htree.
+- *
+- * iam supports:
+- *
+- *     - key, pointer, and record size specifiable per container.
+- *
+- *     - trees taller than 2 index levels.
+- *
+- *     - read/write to existing ext3 htree directories as iam containers.
+- *
+- * iam container is a tree, consisting of leaf nodes containing keys and
+- * records stored in this container, and index nodes, containing keys and
+- * pointers to leaf or index nodes.
+- *
+- * iam does not work with keys directly, instead it calls user-supplied key
+- * comparison function (->dpo_keycmp()).
+- *
+- * Pointers are (currently) interpreted as logical offsets (measured in
+- * blocksful) within underlying flat file on top of which iam tree lives.
+- *
+- * On-disk format:
+- *
+- * iam mostly tries to reuse existing htree formats.
+- *
+- * Format of index node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * |     | count |       |       |      |       |            |
+- * | gap |   /   | entry | entry | .... | entry | free space |
+- * |     | limit |       |       |      |       |            |
+- * +-----+-------+-------+-------+------+-------+------------+
+- *
+- *       gap           this part of node is never accessed by iam code. It
+- *                     exists for binary compatibility with ext3 htree (that,
+- *                     in turn, stores fake struct ext2_dirent for ext2
+- *                     compatibility), and to keep some unspecified per-node
+- *                     data. Gap can be different for root and non-root index
+- *                     nodes. Gap size can be specified for each container
+- *                     (gap of 0 is allowed).
+- *
+- *       count/limit   current number of entries in this node, and the maximal
+- *                     number of entries that can fit into node. count/limit
+- *                     has the same size as entry, and is itself counted in
+- *                     count.
+- *
+- *       entry         index entry: consists of a key immediately followed by
+- *                     a pointer to a child node. Size of a key and size of a
+- *                     pointer depends on container. Entry has neither
+- *                     alignment nor padding.
+- *
+- *       free space    portion of node new entries are added to
+- *
+- * Entries in index node are sorted by their key value.
+- *
+- * Format of leaf node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * |     | count |       |       |      |       |            |
+- * | gap |   /   | leaf  | leaf  | .... | leaf  | free space |
+- * |     | limit |       |       |      |       |            |
+- * +-----+-------+-------+-------+------+-------+------------+
+-
+- *       leaf          For leaf entry: consists of a rec immediately followd by 
+- *                     a key. size of a key and size of a rec depends on container.  
+- *
+- *
+- *
+- *
+- *
+- */
+-
+ #include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
+@@ -112,10 +37,10 @@
+ #include <linux/quotaops.h>
+ #include <linux/buffer_head.h>
+ #include <linux/smp_lock.h>
++#include <linux/lustre_iam.h>
+ #include "xattr.h"
+ #include "iopen.h"
+ #include "acl.h"
+-#include <linux/lustre_iam.h>
+ /*
+  * define how far ahead to read directories while searching them.
+  */
+@@ -125,9 +50,9 @@
+ #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
+-static struct buffer_head *ext3_append(handle_t *handle,
+-                                      struct inode *inode,
+-                                      u32 *block, int *err)
++struct buffer_head *ext3_append(handle_t *handle,
++                              struct inode *inode,
++                              u32 *block, int *err)
+ {
+       struct buffer_head *bh;
+@@ -141,9 +66,6 @@
+       return bh;
+ }
+-#ifndef assert
+-#define assert(test) J_ASSERT(test)
+-#endif
+ #ifndef swap
+ #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
+@@ -162,10 +84,6 @@
+       u8 file_type;
+ };
+-struct dx_countlimit {
+-      __le16 limit;
+-      __le16 count;
+-};
+ /*
+  * dx_root_info is laid out so that if it should somehow get overlaid by a
+@@ -203,235 +121,6 @@
+ };
+-static u32 htree_root_ptr(struct iam_container *c);
+-static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
+-static int htree_node_init(struct iam_container *c,
+-                         struct buffer_head *bh, int root);
+-static int htree_keycmp(struct iam_container *c,
+-                      struct iam_key *k1, struct iam_key *k2);
+-static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
+-                         handle_t *h, struct buffer_head **bh);
+-
+-/*
+- * Parameters describing iam compatibility mode in which existing ext3 htrees
+- * can be manipulated.
+- */
+-static struct iam_descr htree_compat_param = {
+-      .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
+-      .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
+-      .id_node_gap = offsetof(struct dx_node, entries),
+-      .id_root_gap = offsetof(struct dx_root, entries),
+-
+-      .id_root_ptr   = htree_root_ptr,
+-      .id_node_check = htree_node_check,
+-      .id_node_init  = htree_node_init,
+-      .id_node_read  = htree_node_read,
+-      .id_keycmp     = htree_keycmp
+-};
+-
+-
+-struct iam_key;
+-struct iam_rec;
+-struct iam_descr;
+-struct iam_container;
+-struct iam_path;
+-
+-
+-
+-/*
+- * iam cursor (iterator) api.
+- */
+-
+-/*
+- * Flags controlling iterator functionality.
+- */
+-enum iam_it_flags {
+-      /*
+-       * this iterator will move (iam_it_{prev,next}() will be called on it)
+-       */
+-      IAM_IT_MOVE  = (1 << 0),
+-      /*
+-       * tree can be updated through this iterator.
+-       */
+-      IAM_IT_WRITE = (1 << 1)
+-};
+-
+-/*
+- * States of iterator state machine.
+- */
+-enum iam_it_state {
+-      /* initial state */
+-      IAM_IT_DETACHED,
+-      /* iterator is above particular record in the container */
+-      IAM_IT_ATTACHED
+-};
+-
+-struct htree_cookie {
+-      struct dx_hash_info *hinfo;
+-      struct dentry       *dentry;
+-};
+-
+-/*
+- * Iterator.
+- *
+- * Immediately after call to iam_it_init() iterator is in "detached"
+- * (IAM_IT_DETACHED) state: it is associated with given parent container, but
+- * doesn't point to any particular record in this container.
+- *
+- * After successful call to iam_it_get() and until corresponding call to
+- * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
+- *
+- * Attached iterator can move through records in a container (provided
+- * IAM_IT_MOVE permission) in a key order, can get record and key values as it
+- * passes over them, and can modify container (provided IAM_IT_WRITE
+- * permission).
+- *
+- * Concurrency: iterators are supposed to be local to thread. Interfaces below
+- * do no internal serialization.
+- *
+- */
+-struct iam_iterator {
+-      /*
+-       * iterator flags, taken from enum iam_it_flags.
+-       */
+-      __u32                 ii_flags;
+-      enum iam_it_state     ii_state;
+-      /*
+-       * path to the record. Valid in IAM_IT_ATTACHED state.
+-       */
+-      struct iam_path       ii_path;
+-};
+-
+-static inline struct iam_key *keycpy(struct iam_container *c,
+-                                   struct iam_key *k1, struct iam_key *k2)
+-{
+-      return memcpy(k1, k2, c->ic_descr->id_key_size);
+-}
+-
+-static inline int keycmp(struct iam_container *c,
+-                       struct iam_key *k1, struct iam_key *k2)
+-{
+-      return c->ic_descr->id_keycmp(c, k1, k2);
+-}
+-
+-static struct iam_container *iam_it_container(struct iam_iterator *it)
+-{
+-      return it->ii_path.ip_container;
+-}
+-
+-static inline int it_keycmp(struct iam_iterator *it,
+-                          struct iam_key *k1, struct iam_key *k2)
+-{
+-      return keycmp(iam_it_container(it), k1, k2);
+-}
+-
+-/*
+- * Initialize iterator to IAM_IT_DETACHED state.
+- *
+- * postcondition: it_state(it) == IAM_IT_DETACHED
+- */
+-int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
+-/*
+- * Finalize iterator and release all resources.
+- *
+- * precondition: it_state(it) == IAM_IT_DETACHED
+- */
+-void iam_it_fini(struct iam_iterator *it);
+-
+-/*
+- * Attach iterator. After successful completion, @it points to record with the
+- * largest key not larger than @k. Semantics of ->id_create() method guarantee
+- * that such record will always be found.
+- *
+- * Return value: 0: positioned on existing record,
+- *             -ve: error.
+- *
+- * precondition:  it_state(it) == IAM_IT_DETACHED
+- * postcondition: ergo(result == 0,
+- *                     (it_state(it) == IAM_IT_ATTACHED &&
+- *                      it_keycmp(it, iam_it_key_get(it, *), k) < 0))
+- */
+-int iam_it_get(struct iam_iterator *it, struct iam_key *k);
+-
+-/*
+- * Duplicates iterator.
+- *
+- * postcondition: it_state(dst) == it_state(src) &&
+- *                iam_it_container(dst) == iam_it_container(src) &&
+- *                dst->ii_flags = src->ii_flags &&
+- *                ergo(it_state(it) == IAM_IT_ATTACHED,
+- *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
+- *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
+- */
+-void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
+-
+-/*
+- * Detach iterator. Does nothing it detached state.
+- *
+- * postcondition: it_state(it) == IAM_IT_DETACHED
+- */
+-void iam_it_put(struct iam_iterator *it);
+-
+-/*
+- * Move iterator one record right.
+- *
+- * Return value: 0: success,
+- *              +1: end of container reached
+- *             -ve: error
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
+- * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
+- */
+-int iam_it_next(struct iam_iterator *it);
+-
+-/*
+- * Return pointer to the record under iterator.
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
+-
+-/*
+- * Replace contents of record under iterator.
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+- *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
+- */
+-int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
+-
+-/*
+- * Place key under iterator in @k, return @k
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-const struct iam_key *iam_it_key_get(struct iam_iterator *it,
+-                                   struct iam_key *k);
+-
+-/*
+- * Insert new record with key @k and contents from @r, shifting records to the
+- * right.
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED &&
+- *                it->ii_flags&IAM_IT_WRITE &&
+- *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
+- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+- *                ergo(result == 0,
+- *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
+- *                     !memcmp(iam_it_rec_get(it), r, ...))
+- */
+-int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
+-                    struct iam_key *k, struct iam_rec *r);
+-/*
+- * Delete record under iterator.
+- *
+- * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
+-
+ #ifdef CONFIG_EXT3_INDEX
+ static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry);
+ static void dx_set_block(struct iam_path *p,
+@@ -457,150 +146,41 @@
+ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+               struct dx_map_entry *offsets, int count);
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+-static void dx_insert_block (struct iam_path *path,
+-                           struct iam_frame *frame, u32 hash, u32 block);
+-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+-                               struct iam_path *path, __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+                      struct ext3_dir_entry_2 **res_dir, int *err);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode);
+-static inline void iam_path_init(struct iam_path *path,
+-                               struct iam_container *c, struct htree_cookie *hc);
+-static inline void iam_path_fini(struct iam_path *path);
+-
+-
++static u32 htree_root_ptr(struct iam_container *c);
++static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
++static int htree_node_init(struct iam_container *c, struct buffer_head *bh, int root);
++static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
++                         handle_t *handle, struct buffer_head **bh);
++static int htree_keycmp(struct iam_container *c,
++                      struct iam_key *k1, struct iam_key *k2);
+ /*
+- * Future: use high four bits of block for coalesce-on-delete flags
+- * Mask them off for now.
++ * Parameters describing iam compatibility mode in which existing ext3 htrees
++ * can be manipulated.
+  */
++struct iam_descr htree_compat_param = {
++      .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
++      .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
++      .id_node_gap = offsetof(struct dx_node, entries),
++      .id_root_gap = offsetof(struct dx_root, entries),
+-static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
+-{
+-      return (void *)((char *)entry + off);
+-}
+-
+-static inline struct iam_descr *path_descr(struct iam_path *p)
+-{
+-      return p->ip_container->ic_descr;
+-}
+-
+-static inline struct inode *path_obj(struct iam_path *p)
+-{
+-      return p->ip_container->ic_object;
+-}
+-
+-static inline size_t iam_entry_size(struct iam_path *p)
+-{
+-      return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
+-}
+-
+-static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
+-                                            struct iam_entry *entry, int shift)
+-{
+-      void *e = entry;
+-      return e + shift * iam_entry_size(p);
+-}
+-
+-static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
+-                                    struct iam_entry *e1, struct iam_entry *e2)
+-{
+-      ptrdiff_t diff;
+-
+-      diff = (void *)e1 - (void *)e2;
+-      assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
+-      return diff / iam_entry_size(p);
+-}
+-
+-static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
+-{
+-      return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
+-              & 0x00ffffff;
+-}
+-
+-static inline void dx_set_block(struct iam_path *p,
+-                              struct iam_entry *entry, unsigned value)
+-{
+-      *(u32*)entry_off(entry,
+-                       path_descr(p)->id_key_size) = cpu_to_le32(value);
+-}
+-
+-static inline struct iam_key *dx_get_key(struct iam_path *p,
+-                                      struct iam_entry *entry,
+-                                      struct iam_key *key)
+-{
+-      memcpy(key, entry, path_descr(p)->id_key_size);
+-      return key;
+-}
+-
+-static inline struct iam_key *iam_key_at(struct iam_path *p,
+-                                     struct iam_entry *entry)
+-{
+-      return (struct iam_key *)entry;
+-}
+-
+-static inline void dx_set_key(struct iam_path *p,
+-                            struct iam_entry *entry, struct iam_key *key)
+-{
+-      memcpy(entry, key, path_descr(p)->id_key_size);
+-}
+-
+-static inline unsigned dx_get_count (struct iam_entry *entries)
+-{
+-      return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+-}
+-
+-static inline unsigned dx_get_limit (struct iam_entry *entries)
+-{
+-      return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
+-}
+-
+-static inline void dx_set_count (struct iam_entry *entries, unsigned value)
+-{
+-      ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
+-}
+-
+-static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
+-{
+-      ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+-}
+-
+-static inline unsigned dx_root_limit(struct iam_path *p)
+-{
+-      struct iam_descr *param = path_descr(p);
+-      unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
+-              param->id_root_gap;
+-      return entry_space / (param->id_key_size + param->id_ptr_size);
+-}
++      .id_root_ptr   = htree_root_ptr,
++      .id_node_check = htree_node_check,
++      .id_node_init  = htree_node_init,
++      .id_node_read  = htree_node_read,
++      .id_keycmp     = htree_keycmp
++};
+-static inline unsigned dx_node_limit(struct iam_path *p)
+-{
+-      struct iam_descr *param = path_descr(p);
+-      unsigned entry_space   = path_obj(p)->i_sb->s_blocksize -
+-              param->id_node_gap;
+-      return entry_space / (param->id_key_size + param->id_ptr_size);
+-}
+ static inline int dx_index_is_compat(struct iam_path *path)
+ {
+       return path_descr(path) == &htree_compat_param;
+ }
+-static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
+-                                     int root)
+-{
+-      return data +
+-              (root ?
+-               path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
+-}
+-
+-static struct iam_entry *dx_node_get_entries(struct iam_path *path,
+-                                          struct iam_frame *frame)
+-{
+-      return dx_get_entries(path,
+-                            frame->bh->b_data, frame == path->ip_frames);
+-}
+ static int dx_node_check(struct iam_path *p, struct iam_frame *f)
+ {
+@@ -623,6 +203,15 @@
+       return 1;
+ }
++/*
++ * States of iterator state machine.
++ */
++
++struct htree_cookie {
++      struct dx_hash_info *hinfo;
++      struct dentry       *dentry;
++};
++
+ static u32 htree_root_ptr(struct iam_container *c)
+ {
+       return 0;
+@@ -800,7 +389,7 @@
+ }
+ #endif /* DX_DEBUG */
+-static int dx_lookup(struct iam_path *path)
++int dx_lookup(struct iam_path *path)
+ {
+       u32 ptr;
+       int err = 0;
+@@ -904,495 +493,6 @@
+ }
+ /*
+- * Initialize container @c, acquires additional reference on @inode.
+- */
+-int iam_container_init(struct iam_container *c,
+-                     struct iam_descr *descr, struct inode *inode)
+-{
+-      memset(c, 0, sizeof *c);
+-      c->ic_descr  = descr;
+-      c->ic_object = igrab(inode);
+-      if (c->ic_object != NULL)
+-              return 0;
+-      else
+-              return -ENOENT;
+-}
+-
+-/*
+- * Finalize container @c, release all resources.
+- */
+-void iam_container_fini(struct iam_container *c)
+-{
+-      if (c->ic_object != NULL) {
+-              iput(c->ic_object);
+-              c->ic_object = NULL;
+-      }
+-}
+-
+-static inline void iam_path_init(struct iam_path *path, struct iam_container *c, 
+-                               struct htree_cookie *hc)
+-{
+-      memset(path, 0, sizeof *path);
+-      path->ip_container = c;
+-      path->ip_frame = path->ip_frames;
+-      path->ip_descr_data = hc;
+-}
+-
+-static inline void iam_path_fini(struct iam_path *path)
+-{
+-      int i;
+-
+-      for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
+-              if (path->ip_frames[i].bh != NULL) {
+-                      brelse(path->ip_frames[i].bh);
+-                      path->ip_frames[i].bh = NULL;
+-              }
+-      }
+-}
+-
+-static void iam_path_compat_init(struct iam_path_compat *path,
+-                               struct inode *inode)
+-{
+-      int i;
+-
+-      iam_container_init(&path->ipc_container, &htree_compat_param, inode);
+-      /*
+-       * XXX hack allowing finalization of iam_path_compat with
+-       * iam_path_fini().
+-       */
+-      iput(inode);
+-      iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
+-      for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
+-              path->ipc_path.ip_key_scratch[i] =
+-                      (struct iam_key *)&path->ipc_scrach[i];
+-}
+-
+-static void iam_path_compat_fini(struct iam_path_compat *path)
+-{
+-      iam_path_fini(&path->ipc_path);
+-      iam_container_fini(&path->ipc_container);
+-}
+-
+-static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
+-{
+-      int block, err;
+-      struct buffer_head *bh;
+-      
+-      block = dx_get_block(path, path->ip_frame->at);
+-      err = path_descr(path)->id_node_read(path->ip_container, block, 
+-                                           NULL, &bh);
+-      if (err)
+-              return err;
+-
+-      leaf->bh = bh;
+-      leaf->entries = (struct iam_leaf_entry *)bh->b_data;
+-      return 0;
+-}
+-
+-static void iam_leaf_fini(struct iam_leaf *leaf)
+-{
+-      if (leaf->bh)
+-              brelse(leaf->bh);
+-}
+-
+-/*
+- * Search container @c for record with key @k. If record is found, its data
+- * are moved into @r.
+- *
+- *
+- *
+- * Return values: +ve: found, 0: not-found, -ve: error
+- */
+-
+-int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
+-{
+-      struct dx_hash_info     hinfo;
+-      struct iam_path_compat cpath;
+-      struct iam_path *path = &cpath.ipc_path;
+-      struct htree_cookie hc = {
+-              .hinfo  = &hinfo
+-      };
+-      int err, i;
+-
+-      iam_path_init(path, c, &hc);
+-      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+-              path->ip_key_scratch[i] =
+-                      (struct iam_key *)&cpath.ipc_scrach[i];
+-      err = dx_lookup(path);
+-      do {
+-              struct iam_leaf leaf;
+-              err = iam_leaf_init(path, &leaf);
+-              if (err)
+-                      goto errout;
+-
+-              for (path_descr(path)->id_leaf.start(c, &leaf);
+-                   !path_descr(path)->id_leaf.at_end(c, &leaf);
+-                   path_descr(path)->id_leaf.next(c, &leaf)) {
+-                      struct iam_key *key;
+-
+-                      key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
+-                      path_descr(path)->id_leaf.key(c, &leaf, key);
+-                      if (keycmp(c, k, key) == 0) {
+-                              memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
+-                                     path_descr(path)->id_rec_size);
+-                              iam_path_fini(path);
+-                              iam_leaf_fini(&leaf);
+-                              return 0;
+-                      }
+-              }
+-
+-              iam_leaf_fini(&leaf);
+-              /* Check to see if we should continue to search */
+-              err = ext3_htree_next_block(c->ic_object, hinfo.hash, path, NULL);
+-              if (err < 0)
+-                      goto errout;
+-      } while (err == 1);
+-errout:
+-      iam_path_fini(path);
+-      return(err);
+-}
+-EXPORT_SYMBOL(iam_lookup);
+-
+-static inline size_t iam_leaf_entry_size(struct iam_path *p)
+-{
+-      return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
+-}
+-
+-static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
+-                                    struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
+-{
+-      ptrdiff_t diff;
+-
+-      diff = (void *)e1 - (void *)e2;
+-      assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
+-      return diff / iam_leaf_entry_size(p);
+-}
+-
+-static inline struct iam_leaf_entry* 
+-iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
+-{
+-      void *e = entry;
+-      return e + shift * iam_leaf_entry_size(p);
+-}
+-
+-static inline struct iam_key *
+-dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
+-{
+-      memcpy(key, e, path_descr(p)->id_key_size);
+-      return key;
+-}
+-
+-static inline struct iam_key *
+-iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
+-{
+-      void *e = entry;
+-      return e + path_descr(p)->id_rec_size;
+-}
+-static inline struct iam_leaf_entry *
+-iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
+-{
+-      return entry; 
+-}
+-
+-static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf, 
+-                         struct iam_key *k)
+-{
+-      struct iam_leaf_entry *p, *q, *m;
+-      struct iam_leaf_entry *entries = leaf->entries;
+-      int count = dx_get_count((struct iam_entry *)entries);
+-      
+-      p = iam_leaf_entry_shift(path, entries, 1);
+-      q = iam_leaf_entry_shift(path, entries, count - 1);
+-      while (p <= q) {
+-              m = iam_leaf_entry_shift(path,
+-                                 p, iam_leaf_entry_diff(path, q, p) / 2);
+-              dxtrace(printk("."));
+-              if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
+-                         path->ip_key_target) > 0)
+-                      q = iam_leaf_entry_shift(path, m, -1);
+-              else
+-                      p = iam_leaf_entry_shift(path, m, +1);
+-      }
+-      leaf->at = q; 
+-      return 0;
+-}
+-
+-/*XXX what kind of lock should this entry be locked: WangDi */
+-static int iam_leaf_insert(handle_t *handle, struct iam_path *path, 
+-                         struct iam_key *k, struct iam_rec *r)
+-{
+-      struct iam_leaf leaf;
+-      struct iam_leaf_entry *p, *q;
+-      int err, count;
+-
+-      err = iam_leaf_init(path, &leaf);
+-      if (err)
+-              goto errout;
+-      path_descr(path)->id_leaf.start(path->ip_container, &leaf);
+-      count = dx_get_count((struct iam_entry *)leaf.entries);
+-      if (dx_get_count((struct iam_entry *)leaf.entries) >= 
+-          dx_get_limit((struct iam_entry *)leaf.entries)){
+-              err = -ENOSPC;
+-              goto errout;
+-      }
+-
+-      err = iam_leaf_lookup(path, &leaf, k);
+-      if (err)
+-              goto errout;
+-      
+-      /*insert the k/r to leaf entries*/
+-      p = iam_leaf_entry_shift(path, leaf.at, 1);
+-      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
+-      while (q < p) {
+-              memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
+-              q = iam_leaf_entry_shift(path, q, -1);  
+-      }
+-      memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
+-      memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
+-
+-      dx_set_count((struct iam_entry*)leaf.entries, count + 1);
+-      err = ext3_journal_dirty_metadata(handle, leaf.bh);
+-      if (err)
+-              ext3_std_error(path->ip_container->ic_object->i_sb, err);
+-errout:       
+-      iam_leaf_fini(&leaf);
+-      return err;
+-} 
+-
+-static int split_leaf_node(handle_t *handle, struct iam_path *path)
+-{
+-      struct inode *dir = path_obj(path);
+-      unsigned continued = 0;
+-      struct buffer_head *bh2;
+-      u32 newblock, hash_split;
+-      char *data2;
+-      struct iam_leaf leaf;
+-      unsigned split;
+-      int     err;
+-
+-      bh2 = ext3_append (handle, dir, &newblock, &err);
+-      if (!(bh2)) {
+-              err = -ENOSPC;
+-              goto errout;
+-      }
+-      err = iam_leaf_init(path, &leaf);
+-      if (err)
+-              goto errout;
+-
+-      BUFFER_TRACE(leaf.bh, "get_write_access");
+-      err = ext3_journal_get_write_access(handle, leaf.bh);
+-      if (err) {
+-      journal_error:
+-              iam_leaf_fini(&leaf);
+-              brelse(bh2);
+-              ext3_std_error(dir->i_sb, err);
+-              err = -EIO;
+-              goto errout;
+-      }
+-      data2 = bh2->b_data;
+-      split = dx_get_count((struct iam_entry*)leaf.entries)/2;
+-      hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
+-      if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
+-                 iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
+-              continued = 1;
+-
+-      memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
+-             iam_leaf_entry_shift(path, leaf.entries, split),
+-             split * iam_leaf_entry_size(path));
+- 
+-      /* Which block gets the new entry? */
+-      dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
+-      err = ext3_journal_dirty_metadata (handle, bh2);
+-      if (err)
+-              goto journal_error;
+-      err = ext3_journal_dirty_metadata (handle, leaf.bh);
+-      if (err)
+-              goto journal_error;
+-      brelse (bh2);
+-      iam_leaf_fini(&leaf);
+-errout:
+-      return err;
+-}
+-
+-static int split_index_node(handle_t *handle, struct iam_path *path);
+-/*
+- * Insert new record @r with key @k into container @c (within context of
+- * transaction @h.
+- *
+- * Return values: 0: success, -ve: error, including -EEXIST when record with
+- * given key is already present.
+- *
+- * postcondition: ergo(result == 0 || result == -EEXIST,
+- *                                  iam_lookup(c, k, r2) > 0 &&
+- *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
+- */
+-int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, 
+-             struct iam_rec *r)
+-{
+-      struct dx_hash_info     hinfo;
+-      struct iam_path_compat cpath;
+-      struct iam_path *path = &cpath.ipc_path;
+-      struct htree_cookie hc = {
+-              .hinfo  = &hinfo
+-      };
+-      int err, i;
+-
+-      iam_path_init(path, c, &hc);
+-      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+-              path->ip_key_scratch[i] =
+-                      (struct iam_key *)&cpath.ipc_scrach[i];
+-      err = dx_lookup(path);
+-      if (err)
+-              goto errout; 
+-
+-      err = iam_leaf_insert(handle, path, k, r);
+-      
+-      if (err != -ENOSPC) 
+-              goto errout;    
+-
+-      err = split_index_node(handle, path);
+-      if (err)
+-              goto errout;    
+-
+-      err = split_leaf_node(handle, path);
+-      if (err)
+-              goto errout;
+-      
+-      err = iam_leaf_insert(handle, path, k, r);
+-errout:
+-      iam_path_fini(path);
+-      return(err);
+-}
+-
+-EXPORT_SYMBOL(iam_insert);
+-static int iam_leaf_delete(handle_t *handle, struct iam_path *path, 
+-                         struct iam_key *k)
+-{
+-      struct iam_leaf leaf;
+-      struct iam_leaf_entry *p, *q;
+-      int err, count;
+-
+-      err = iam_leaf_init(path, &leaf);
+-      if (err)
+-              goto errout;
+-      
+-      err = iam_leaf_lookup(path, &leaf, k);
+-      if (err)
+-              goto errout;
+-
+-      count = dx_get_count((struct iam_entry*)leaf.entries);
+-      /*delete the k to leaf entries*/
+-      p = iam_leaf_entry_shift(path, leaf.at, 1);
+-      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
+-      while (p < q) {
+-              memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
+-              p = iam_leaf_entry_shift(path, p, 1);
+-      }
+-      dx_set_count((struct iam_entry*)leaf.entries, count - 1);
+-
+-      err = ext3_journal_dirty_metadata(handle, leaf.bh);
+-      if (err)
+-              ext3_std_error(path_obj(path)->i_sb, err);
+-errout:       
+-      iam_leaf_fini(&leaf);
+-      return err;
+-}
+-
+-/*
+- * Delete existing record with key @k.
+- *
+- * Return values: 0: success, -ENOENT: not-found, -ve: other error.
+- *
+- * postcondition: ergo(result == 0 || result == -ENOENT,
+- *                                 !iam_lookup(c, k, *));
+- */
+-int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
+-{
+-      struct dx_hash_info     hinfo;
+-      struct iam_path_compat cpath;
+-      struct iam_path *path = &cpath.ipc_path;
+-      struct htree_cookie hc = {
+-              .hinfo  = &hinfo
+-      };
+-      int err, i;
+-
+-      iam_path_init(path, c, &hc);
+-      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+-              path->ip_key_scratch[i] =
+-                      (struct iam_key *)&cpath.ipc_scrach[i];
+-      err = dx_lookup(path);
+-      if (err)
+-              goto errout; 
+-
+-      err = iam_leaf_delete(h, path, k);
+-errout:
+-      iam_path_fini(path);
+-      return err;
+-}
+-
+-EXPORT_SYMBOL(iam_delete);
+-
+-static int iam_leaf_update(handle_t *handle, struct iam_path *path, 
+-                         struct iam_key *k, struct iam_rec *r)
+-{
+-      struct iam_leaf leaf;
+-      int err;
+-
+-      err = iam_leaf_init(path, &leaf);
+-      if (err)
+-              goto errout;
+-      
+-      err = iam_leaf_lookup(path, &leaf, k);
+-      if (err)
+-              goto errout;
+-
+-      memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
+-      memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
+-
+-      err = ext3_journal_dirty_metadata(handle, leaf.bh);
+-      if (err)
+-              ext3_std_error(path_obj(path)->i_sb, err);
+-errout:       
+-      iam_leaf_fini(&leaf);
+-      return err;
+-}
+-/*
+- * Replace existing record with key @k, or insert new one. New record data are
+- * in @r.
+- *
+- * Return values: 0: success, -ve: error.
+- *
+- * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
+- *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
+- */
+-int iam_update(handle_t *h, struct iam_container *c,
+-             struct iam_key *k, struct iam_rec *r)
+-{
+-      struct dx_hash_info     hinfo;
+-      struct iam_path_compat cpath;
+-      struct iam_path *path = &cpath.ipc_path;
+-      struct htree_cookie hc = {
+-              .hinfo  = &hinfo
+-      };
+-      int err, i;
+-      
+-      iam_path_init(path, c, &hc);
+-      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+-              path->ip_key_scratch[i] =
+-                      (struct iam_key *)&cpath.ipc_scrach[i];
+-      err = dx_lookup(path);
+-      if (err)
+-              goto errout; 
+-
+-      err = iam_leaf_update(h, path, k, r);
+-errout:
+-      iam_path_fini(path);
+-      return err;
+-}
+-
+-EXPORT_SYMBOL(iam_update);
+-
+-/*
+  * This function increments the frame pointer to search the next leaf
+  * block, and reads in the necessary intervening nodes if the search
+  * should be necessary.  Whether or not the search is necessary is
+@@ -1409,8 +509,8 @@
+  * If start_hash is non-null, it will be filled in with the starting
+  * hash of the next page.
+  */
+-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+-                               struct iam_path *path, __u32 *start_hash)
++int ext3_htree_next_block(struct inode *dir, __u32 hash,
++                        struct iam_path *path, __u32 *start_hash)
+ {
+       struct iam_frame *p;
+       struct buffer_head *bh;
+@@ -1662,8 +762,8 @@
+       } while(more);
+ }
+-static void dx_insert_block(struct iam_path *path,
+-                          struct iam_frame *frame, u32 hash, u32 block)
++void dx_insert_block(struct iam_path *path, struct iam_frame *frame, 
++                   u32 hash, u32 block)
+ {
+       struct iam_entry *entries = frame->entries;
+       struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
+@@ -2392,7 +1492,7 @@
+ }
+ #ifdef CONFIG_EXT3_INDEX
+-static int split_index_node(handle_t *handle, struct iam_path *path)
++int split_index_node(handle_t *handle, struct iam_path *path)
+ { 
+       struct iam_entry *entries;   /* old block contents */
+Index: linux-2.6.9/fs/ext3/iam.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/iam.c     2006-04-28 19:25:01.957835224 +0800
++++ linux-2.6.9/fs/ext3/iam.c  2006-04-28 16:54:18.000000000 +0800
+@@ -0,0 +1,610 @@
++/*
++ * iam: big theory statement.
++ *
++ * iam (Index Access Module) is a module providing abstraction of persistent
++ * transactional container on top of generalized ext3 htree.
++ *
++ * iam supports:
++ *
++ *     - key, pointer, and record size specifiable per container.
++ *
++ *     - trees taller than 2 index levels.
++ *
++ *     - read/write to existing ext3 htree directories as iam containers.
++ *
++ * iam container is a tree, consisting of leaf nodes containing keys and
++ * records stored in this container, and index nodes, containing keys and
++ * pointers to leaf or index nodes.
++ *
++ * iam does not work with keys directly, instead it calls user-supplied key
++ * comparison function (->dpo_keycmp()).
++ *
++ * Pointers are (currently) interpreted as logical offsets (measured in
++ * blocksful) within underlying flat file on top of which iam tree lives.
++ *
++ * On-disk format:
++ *
++ * iam mostly tries to reuse existing htree formats.
++ *
++ * Format of index node:
++ *
++ * +-----+-------+-------+-------+------+-------+------------+
++ * |     | count |       |       |      |       |            |
++ * | gap |   /   | entry | entry | .... | entry | free space |
++ * |     | limit |       |       |      |       |            |
++ * +-----+-------+-------+-------+------+-------+------------+
++ *
++ *       gap           this part of node is never accessed by iam code. It
++ *                     exists for binary compatibility with ext3 htree (that,
++ *                     in turn, stores fake struct ext2_dirent for ext2
++ *                     compatibility), and to keep some unspecified per-node
++ *                     data. Gap can be different for root and non-root index
++ *                     nodes. Gap size can be specified for each container
++ *                     (gap of 0 is allowed).
++ *
++ *       count/limit   current number of entries in this node, and the maximal
++ *                     number of entries that can fit into node. count/limit
++ *                     has the same size as entry, and is itself counted in
++ *                     count.
++ *
++ *       entry         index entry: consists of a key immediately followed by
++ *                     a pointer to a child node. Size of a key and size of a
++ *                     pointer depends on container. Entry has neither
++ *                     alignment nor padding.
++ *
++ *       free space    portion of node new entries are added to
++ *
++ * Entries in index node are sorted by their key value.
++ *
++ * Format of leaf node:
++ *
++ * +-----+-------+-------+-------+------+-------+------------+
++ * |     | count |       |       |      |       |            |
++ * | gap |   /   | leaf  | leaf  | .... | leaf  | free space |
++ * |     | limit |       |       |      |       |            |
++ * +-----+-------+-------+-------+------+-------+------------+
++
++ *       leaf          For leaf entry: consists of a rec immediately followd by 
++ *                     a key. size of a key and size of a rec depends on container.  
++ *
++ *
++ *
++ *
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/pagemap.h>
++#include <linux/jbd.h>
++#include <linux/time.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/fcntl.h>
++#include <linux/stat.h>
++#include <linux/string.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/smp_lock.h>
++#include <linux/lustre_iam.h>
++#include "xattr.h"
++#include "iopen.h"
++#include "acl.h"
++
++struct iam_key;
++struct iam_rec;
++struct iam_descr;
++struct iam_container;
++struct iam_path;
++
++
++#define key_cmp(e1, e2) ({                              \
++        typeof(e1) __e1 = (e1);                         \
++        typeof(e2) __e2 = (e2);                         \
++        __e1 > __e2 ? +1 : (__e1 < __e2 ? -1 : 0);      \
++})
++
++/*
++ * iam cursor (iterator) api.
++ */
++
++/*
++ * Flags controlling iterator functionality.
++ */
++enum iam_it_flags {
++      /*
++       * this iterator will move (iam_it_{prev,next}() will be called on it)
++       */
++      IAM_IT_MOVE  = (1 << 0),
++      /*
++       * tree can be updated through this iterator.
++       */
++      IAM_IT_WRITE = (1 << 1)
++};
++
++/*
++ * Initialize container @c, acquires additional reference on @inode.
++ */
++int iam_container_init(struct iam_container *c,
++                     struct iam_descr *descr, struct inode *inode)
++{
++      memset(c, 0, sizeof *c);
++      c->ic_descr  = descr;
++      c->ic_object = igrab(inode);
++      if (c->ic_object != NULL)
++              return 0;
++      else
++              return -ENOENT;
++}
++
++/*
++ * Finalize container @c, release all resources.
++ */
++void iam_container_fini(struct iam_container *c)
++{
++      if (c->ic_object != NULL) {
++              iput(c->ic_object);
++              c->ic_object = NULL;
++      }
++}
++
++void iam_path_init(struct iam_path *path, struct iam_container *c, void *cookie)
++{
++      memset(path, 0, sizeof *path);
++      path->ip_container = c;
++      path->ip_frame = path->ip_frames;
++      path->ip_descr_data = cookie;
++}
++
++void iam_path_fini(struct iam_path *path)
++{
++      int i;
++
++      for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
++              if (path->ip_frames[i].bh != NULL) {
++                      brelse(path->ip_frames[i].bh);
++                      path->ip_frames[i].bh = NULL;
++              }
++      }
++}
++
++extern struct iam_descr htree_compat_param;
++void iam_path_compat_init(struct iam_path_compat *path,
++                               struct inode *inode)
++{
++      int i;
++
++      iam_container_init(&path->ipc_container, &htree_compat_param, inode);
++      /*
++       * XXX hack allowing finalization of iam_path_compat with
++       * iam_path_fini().
++       */
++      iput(inode);
++      iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
++      for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
++              path->ipc_path.ip_key_scratch[i] =
++                      (struct iam_key *)&path->ipc_scrach[i];
++}
++
++void iam_path_compat_fini(struct iam_path_compat *path)
++{
++      iam_path_fini(&path->ipc_path);
++      iam_container_fini(&path->ipc_container);
++}
++
++static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
++{
++      int block, err;
++      struct buffer_head *bh;
++      
++      block = dx_get_block(path, path->ip_frame->at);
++      err = path_descr(path)->id_node_read(path->ip_container, block, 
++                                           NULL, &bh);
++      if (err)
++              return err;
++
++      leaf->bh = bh;
++      leaf->entries = (struct iam_leaf_entry *)bh->b_data;
++      return 0;
++}
++
++static void iam_leaf_fini(struct iam_leaf *leaf)
++{
++      if (leaf->bh)
++              brelse(leaf->bh);
++}
++
++/*
++ * Search container @c for record with key @k. If record is found, its data
++ * are moved into @r.
++ *
++ *
++ *
++ * Return values: +ve: found, 0: not-found, -ve: error
++ */
++
++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
++{
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct iam_cookie ic = {
++              .ic_key = k,
++              .ic_rec = r 
++      };
++      int err, i;
++
++      iam_path_init(path, c, &ic);
++      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++              path->ip_key_scratch[i] =
++                      (struct iam_key *)&cpath.ipc_scrach[i];
++
++      err = dx_lookup(path);
++      do {
++              struct iam_leaf leaf;
++              err = iam_leaf_init(path, &leaf);
++              if (err)
++                      goto errout;
++
++              for (path_descr(path)->id_leaf.start(c, &leaf);
++                   !path_descr(path)->id_leaf.at_end(c, &leaf);
++                   path_descr(path)->id_leaf.next(c, &leaf)) {
++                      struct iam_key *key;
++
++                      key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
++                      path_descr(path)->id_leaf.key(c, &leaf, key);
++                      if (keycmp(c, k, key) == 0) {
++                              memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
++                                     path_descr(path)->id_rec_size);
++                              iam_path_fini(path);
++                              iam_leaf_fini(&leaf);
++                              return 0;
++                      }
++              }
++
++              iam_leaf_fini(&leaf);
++              /* Check to see if we should continue to search */
++              if (err < 0)
++                      goto errout;
++      } while (err == 1);
++errout:
++      iam_path_fini(path);
++      return(err);
++}
++EXPORT_SYMBOL(iam_lookup);
++
++static inline size_t iam_leaf_entry_size(struct iam_path *p)
++{
++      return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
++}
++
++static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
++                                    struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
++{
++      ptrdiff_t diff;
++
++      diff = (void *)e1 - (void *)e2;
++      assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
++      return diff / iam_leaf_entry_size(p);
++}
++
++static inline struct iam_leaf_entry* 
++iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
++{
++      void *e = entry;
++      return e + shift * iam_leaf_entry_size(p);
++}
++
++static inline struct iam_key *
++dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
++{
++      memcpy(key, e, path_descr(p)->id_key_size);
++      return key;
++}
++
++static inline struct iam_key *
++iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
++{
++      void *e = entry;
++      return e + path_descr(p)->id_rec_size;
++}
++static inline struct iam_leaf_entry *
++iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
++{
++      return entry; 
++}
++
++static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf, 
++                         struct iam_key *k)
++{
++      struct iam_leaf_entry *p, *q, *m;
++      struct iam_leaf_entry *entries = leaf->entries;
++      int count = dx_get_count((struct iam_entry *)entries);
++      
++      p = iam_leaf_entry_shift(path, entries, 1);
++      q = iam_leaf_entry_shift(path, entries, count - 1);
++      while (p <= q) {
++              m = iam_leaf_entry_shift(path,
++                                 p, iam_leaf_entry_diff(path, q, p) / 2);
++              if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
++                         path->ip_key_target) > 0)
++                      q = iam_leaf_entry_shift(path, m, -1);
++              else
++                      p = iam_leaf_entry_shift(path, m, +1);
++      }
++      leaf->at = q; 
++      return 0;
++}
++
++/*XXX what kind of lock should this entry be locked: WangDi */
++static int iam_leaf_insert(handle_t *handle, struct iam_path *path, 
++                         struct iam_key *k, struct iam_rec *r)
++{
++      struct iam_leaf leaf;
++      struct iam_leaf_entry *p, *q;
++      int err, count;
++
++      err = iam_leaf_init(path, &leaf);
++      if (err)
++              goto errout;
++      path_descr(path)->id_leaf.start(path->ip_container, &leaf);
++      count = dx_get_count((struct iam_entry *)leaf.entries);
++      if (dx_get_count((struct iam_entry *)leaf.entries) >= 
++          dx_get_limit((struct iam_entry *)leaf.entries)){
++              err = -ENOSPC;
++              goto errout;
++      }
++
++      err = iam_leaf_lookup(path, &leaf, k);
++      if (err)
++              goto errout;
++      
++      /*insert the k/r to leaf entries*/
++      p = iam_leaf_entry_shift(path, leaf.at, 1);
++      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
++      while (q < p) {
++              memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
++              q = iam_leaf_entry_shift(path, q, -1);  
++      }
++      memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
++      memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
++
++      dx_set_count((struct iam_entry*)leaf.entries, count + 1);
++      err = ext3_journal_dirty_metadata(handle, leaf.bh);
++      if (err)
++              ext3_std_error(path->ip_container->ic_object->i_sb, err);
++errout:       
++      iam_leaf_fini(&leaf);
++      return err;
++} 
++
++static int split_leaf_node(handle_t *handle, struct iam_path *path)
++{
++      struct inode *dir = path_obj(path);
++      unsigned continued = 0;
++      struct buffer_head *bh2;
++      u32 newblock, hash_split;
++      char *data2;
++      struct iam_leaf leaf;
++      unsigned split;
++      int     err;
++
++      bh2 = ext3_append (handle, dir, &newblock, &err);
++      if (!(bh2)) {
++              err = -ENOSPC;
++              goto errout;
++      }
++      err = iam_leaf_init(path, &leaf);
++      if (err)
++              goto errout;
++
++      BUFFER_TRACE(leaf.bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, leaf.bh);
++      if (err) {
++      journal_error:
++              iam_leaf_fini(&leaf);
++              brelse(bh2);
++              ext3_std_error(dir->i_sb, err);
++              err = -EIO;
++              goto errout;
++      }
++      data2 = bh2->b_data;
++      split = dx_get_count((struct iam_entry*)leaf.entries)/2;
++      hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
++      if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
++                 iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
++              continued = 1;
++
++      memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
++             iam_leaf_entry_shift(path, leaf.entries, split),
++             split * iam_leaf_entry_size(path));
++ 
++      /* Which block gets the new entry? */
++      dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
++      err = ext3_journal_dirty_metadata (handle, bh2);
++      if (err)
++              goto journal_error;
++      err = ext3_journal_dirty_metadata (handle, leaf.bh);
++      if (err)
++              goto journal_error;
++      brelse (bh2);
++      iam_leaf_fini(&leaf);
++errout:
++      return err;
++}
++
++/*
++ * Insert new record @r with key @k into container @c (within context of
++ * transaction @h.
++ *
++ * Return values: 0: success, -ve: error, including -EEXIST when record with
++ * given key is already present.
++ *
++ * postcondition: ergo(result == 0 || result == -EEXIST,
++ *                                  iam_lookup(c, k, r2) > 0 &&
++ *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
++ */
++int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, 
++             struct iam_rec *r)
++{
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct iam_cookie hc = {
++              .ic_key = k,
++              .ic_rec = r
++      };
++      int err, i;
++
++      iam_path_init(path, c, &hc);
++      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++              path->ip_key_scratch[i] =
++                      (struct iam_key *)&cpath.ipc_scrach[i];
++      err = dx_lookup(path);
++      if (err)
++              goto errout; 
++
++      err = iam_leaf_insert(handle, path, k, r);
++      
++      if (err != -ENOSPC) 
++              goto errout;    
++
++      err = split_index_node(handle, path);
++      if (err)
++              goto errout;    
++
++      err = split_leaf_node(handle, path);
++      if (err)
++              goto errout;
++      
++      err = iam_leaf_insert(handle, path, k, r);
++errout:
++      iam_path_fini(path);
++      return(err);
++}
++
++EXPORT_SYMBOL(iam_insert);
++static int iam_leaf_delete(handle_t *handle, struct iam_path *path, 
++                         struct iam_key *k)
++{
++      struct iam_leaf leaf;
++      struct iam_leaf_entry *p, *q;
++      int err, count;
++
++      err = iam_leaf_init(path, &leaf);
++      if (err)
++              goto errout;
++      
++      err = iam_leaf_lookup(path, &leaf, k);
++      if (err)
++              goto errout;
++
++      count = dx_get_count((struct iam_entry*)leaf.entries);
++      /*delete the k to leaf entries*/
++      p = iam_leaf_entry_shift(path, leaf.at, 1);
++      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
++      while (p < q) {
++              memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
++              p = iam_leaf_entry_shift(path, p, 1);
++      }
++      dx_set_count((struct iam_entry*)leaf.entries, count - 1);
++
++      err = ext3_journal_dirty_metadata(handle, leaf.bh);
++      if (err)
++              ext3_std_error(path_obj(path)->i_sb, err);
++errout:       
++      iam_leaf_fini(&leaf);
++      return err;
++}
++
++/*
++ * Delete existing record with key @k.
++ *
++ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
++ *
++ * postcondition: ergo(result == 0 || result == -ENOENT,
++ *                                 !iam_lookup(c, k, *));
++ */
++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
++{
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct iam_cookie hc = {
++              .ic_key = k
++      };
++      int err, i;
++
++      iam_path_init(path, c, &hc);
++      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++              path->ip_key_scratch[i] =
++                      (struct iam_key *)&cpath.ipc_scrach[i];
++      err = dx_lookup(path);
++      if (err)
++              goto errout; 
++
++      err = iam_leaf_delete(h, path, k);
++errout:
++      iam_path_fini(path);
++      return err;
++}
++
++EXPORT_SYMBOL(iam_delete);
++
++static int iam_leaf_update(handle_t *handle, struct iam_path *path, 
++                         struct iam_key *k, struct iam_rec *r)
++{
++      struct iam_leaf leaf;
++      int err;
++
++      err = iam_leaf_init(path, &leaf);
++      if (err)
++              goto errout;
++      
++      err = iam_leaf_lookup(path, &leaf, k);
++      if (err)
++              goto errout;
++
++      memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
++      memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
++
++      err = ext3_journal_dirty_metadata(handle, leaf.bh);
++      if (err)
++              ext3_std_error(path_obj(path)->i_sb, err);
++errout:       
++      iam_leaf_fini(&leaf);
++      return err;
++}
++/*
++ * Replace existing record with key @k, or insert new one. New record data are
++ * in @r.
++ *
++ * Return values: 0: success, -ve: error.
++ *
++ * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
++ *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
++ */
++int iam_update(handle_t *h, struct iam_container *c,
++             struct iam_key *k, struct iam_rec *r)
++{
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct iam_cookie hc = {
++              .ic_key  = k,
++              .ic_rec  = r 
++      };
++      int err, i;
++
++      iam_path_init(path, c, &hc);
++      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++              path->ip_key_scratch[i] =
++                      (struct iam_key *)&cpath.ipc_scrach[i];
++      err = dx_lookup(path);
++      if (err)
++              goto errout; 
++
++      err = iam_leaf_update(h, path, k, r);
++errout:
++      iam_path_fini(path);
++      return err;
++}
++
++EXPORT_SYMBOL(iam_update);
++
+Index: linux-2.6.9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/Makefile  2006-04-28 16:54:16.000000000 +0800
++++ linux-2.6.9/fs/ext3/Makefile       2006-04-28 16:54:18.000000000 +0800
+@@ -6,7 +6,7 @@
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o mballoc.o
++         extents.o mballoc.o iam.o
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.9/include/linux/lustre_iam.h
+===================================================================
+--- linux-2.6.9.orig/include/linux/lustre_iam.h        2006-04-28 16:54:18.000000000 +0800
++++ linux-2.6.9/include/linux/lustre_iam.h     2006-04-28 16:59:18.000000000 +0800
+@@ -1,4 +1,8 @@
+ /*
++ *  linux/include/linux/lustre_iam.h
++ */
++
++/*
+  * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
+  */
+ enum {
+@@ -30,6 +34,11 @@
+ /* Incomplete type use to refer to the records stored in iam containers. */
+ struct iam_rec;
++struct iam_cookie {
++      struct iam_key *ic_key;
++      struct iam_rec *ic_rec;
++};
++
+ typedef __u64 iam_ptr_t;
+ /*
+@@ -42,7 +51,8 @@
+ };
+ /* leaf node reached by tree lookup */
+-#define iam_leaf_entry iam_rec
++struct iam_leaf_entry;
++
+ struct iam_leaf {
+       struct buffer_head *bh;
+       struct iam_leaf_entry *entries;
+@@ -196,6 +206,162 @@
+       __u32                ipc_scrach[DX_SCRATCH_KEYS];
+ };
++enum iam_it_state {
++      /* initial state */
++      IAM_IT_DETACHED,
++      /* iterator is above particular record in the container */
++      IAM_IT_ATTACHED
++};
++
++/*
++ * Iterator.
++ *
++ * Immediately after call to iam_it_init() iterator is in "detached"
++ * (IAM_IT_DETACHED) state: it is associated with given parent container, but
++ * doesn't point to any particular record in this container.
++ *
++ * After successful call to iam_it_get() and until corresponding call to
++ * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
++ *
++ * Attached iterator can move through records in a container (provided
++ * IAM_IT_MOVE permission) in a key order, can get record and key values as it
++ * passes over them, and can modify container (provided IAM_IT_WRITE
++ * permission).
++ *
++ * Concurrency: iterators are supposed to be local to thread. Interfaces below
++ * do no internal serialization.
++ *
++ */
++struct iam_iterator {
++      /*
++       * iterator flags, taken from enum iam_it_flags.
++       */
++      __u32                 ii_flags;
++      enum iam_it_state     ii_state;
++      /*
++       * path to the record. Valid in IAM_IT_ATTACHED state.
++       */
++      struct iam_path       ii_path;
++};
++
++static struct iam_container *iam_it_container(struct iam_iterator *it)
++{
++      return it->ii_path.ip_container;
++}
++
++void iam_path_init(struct iam_path *path, struct iam_container *c, void* cookie);
++
++void iam_path_fini(struct iam_path *path);
++
++void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode);
++void iam_path_compat_fini(struct iam_path_compat *path);
++/*
++ * Initialize iterator to IAM_IT_DETACHED state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
++/*
++ * Finalize iterator and release all resources.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_fini(struct iam_iterator *it);
++
++/*
++ * Attach iterator. After successful completion, @it points to record with the
++ * largest key not larger than @k. Semantics of ->id_create() method guarantee
++ * that such record will always be found.
++ *
++ * Return value: 0: positioned on existing record,
++ *             -ve: error.
++ *
++ * precondition:  it_state(it) == IAM_IT_DETACHED
++ * postcondition: ergo(result == 0,
++ *                     (it_state(it) == IAM_IT_ATTACHED &&
++ *                      it_keycmp(it, iam_it_key_get(it, *), k) < 0))
++ */
++int iam_it_get(struct iam_iterator *it, struct iam_key *k);
++
++/*
++ * Duplicates iterator.
++ *
++ * postcondition: it_state(dst) == it_state(src) &&
++ *                iam_it_container(dst) == iam_it_container(src) &&
++ *                dst->ii_flags = src->ii_flags &&
++ *                ergo(it_state(it) == IAM_IT_ATTACHED,
++ *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++ *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
++ */
++void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
++
++/*
++ * Detach iterator. Does nothing it detached state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_put(struct iam_iterator *it);
++
++/*
++ * Move iterator one record right.
++ *
++ * Return value: 0: success,
++ *              +1: end of container reached
++ *             -ve: error
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
++ * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
++ */
++int iam_it_next(struct iam_iterator *it);
++
++/*
++ * Return pointer to the record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
++
++/*
++ * Replace contents of record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
++
++/*
++ * Place key under iterator in @k, return @k
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++const struct iam_key *iam_it_key_get(struct iam_iterator *it,
++                                   struct iam_key *k);
++
++/*
++ * Insert new record with key @k and contents from @r, shifting records to the
++ * right.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
++ *                it->ii_flags&IAM_IT_WRITE &&
++ *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ *                ergo(result == 0,
++ *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
++ *                     !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
++                    struct iam_key *k, struct iam_rec *r);
++/*
++ * Delete record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
++
+ int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r);
+ int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k);
+ int iam_update(handle_t *h, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
+@@ -209,4 +375,166 @@
+  * Finalize container @c, release all resources.
+  */
+ void iam_container_fini(struct iam_container *c);
++/*
++ * Future: use high four bits of block for coalesce-on-delete flags
++ * Mask them off for now.
++ */
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
++{
++      return (void *)((char *)entry + off);
++}
++
++static inline struct iam_descr *path_descr(struct iam_path *p)
++{
++      return p->ip_container->ic_descr;
++}
++
++static inline struct inode *path_obj(struct iam_path *p)
++{
++      return p->ip_container->ic_object;
++}
++
++static inline size_t iam_entry_size(struct iam_path *p)
++{
++      return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
++}
++
++static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
++                                            struct iam_entry *entry, int shift)
++{
++      void *e = entry;
++      return e + shift * iam_entry_size(p);
++}
++
++static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
++                                    struct iam_entry *e1, struct iam_entry *e2)
++{
++      ptrdiff_t diff;
++
++      diff = (void *)e1 - (void *)e2;
++      assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
++      return diff / iam_entry_size(p);
++}
++
++static inline struct iam_key *dx_get_key(struct iam_path *p,
++                                      struct iam_entry *entry,
++                                      struct iam_key *key)
++{
++      memcpy(key, entry, path_descr(p)->id_key_size);
++      return key;
++}
++
++static inline struct iam_key *iam_key_at(struct iam_path *p,
++                                     struct iam_entry *entry)
++{
++      return (struct iam_key *)entry;
++}
++
++static inline struct iam_key *keycpy(struct iam_container *c,
++                                   struct iam_key *k1, struct iam_key *k2)
++{
++      return memcpy(k1, k2, c->ic_descr->id_key_size);
++}
++
++static inline int keycmp(struct iam_container *c,
++                       struct iam_key *k1, struct iam_key *k2)
++{
++      return c->ic_descr->id_keycmp(c, k1, k2);
++}
++
++static inline int it_keycmp(struct iam_iterator *it,
++                          struct iam_key *k1, struct iam_key *k2)
++{
++      return keycmp(iam_it_container(it), k1, k2);
++}
++
++/*XXX These stuff put here, just because they are used by iam.c and namei.c*/
++static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
++{
++      return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
++              & 0x00ffffff;
++}
++
++static inline void dx_set_block(struct iam_path *p,
++                              struct iam_entry *entry, unsigned value)
++{
++      *(u32*)entry_off(entry,
++                       path_descr(p)->id_key_size) = cpu_to_le32(value);
++}
++
++static inline void dx_set_key(struct iam_path *p,
++                            struct iam_entry *entry, struct iam_key *key)
++{
++      memcpy(entry, key, path_descr(p)->id_key_size);
++}
++
++struct dx_countlimit {
++      __le16 limit;
++      __le16 count;
++};
++static inline unsigned dx_get_count (struct iam_entry *entries)
++{
++      return le16_to_cpu(((struct dx_countlimit *) entries)->count);
++}
++
++static inline unsigned dx_get_limit (struct iam_entry *entries)
++{
++      return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
++}
++
++static inline void dx_set_count (struct iam_entry *entries, unsigned value)
++{
++      ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
++}
++
++static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
++{
++      ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
++}
++
++static inline unsigned dx_root_limit(struct iam_path *p)
++{
++      struct iam_descr *param = path_descr(p);
++      unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
++              param->id_root_gap;
++      return entry_space / (param->id_key_size + param->id_ptr_size);
++}
++
++static inline unsigned dx_node_limit(struct iam_path *p)
++{
++      struct iam_descr *param = path_descr(p);
++      unsigned entry_space   = path_obj(p)->i_sb->s_blocksize -
++              param->id_node_gap;
++      return entry_space / (param->id_key_size + param->id_ptr_size);
++}
++
++static inline struct iam_entry *dx_get_entries(struct iam_path *path, 
++                                             void *data, int root)
++{
++      return data +
++              (root ?
++               path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
++}
++
++static inline struct iam_entry *dx_node_get_entries(struct iam_path *path,
++                                          struct iam_frame *frame)
++{
++      return dx_get_entries(path,
++                            frame->bh->b_data, frame == path->ip_frames);
++}
++
++int dx_lookup(struct iam_path *path);
++void dx_insert_block(struct iam_path *path, struct iam_frame *frame, 
++                   u32 hash, u32 block);
++
++int ext3_htree_next_block(struct inode *dir, __u32 hash,
++                        struct iam_path *path, __u32 *start_hash);
++
++struct buffer_head *ext3_append(handle_t *handle, struct inode *inode,
++                              u32 *block, int *err);
++int split_index_node(handle_t *handle, struct iam_path *path);
index b90ed7a..9f7b49b 100644 (file)
@@ -17,3 +17,4 @@ ext3-htree-path-ops.patch
 ext3-hash-selection.patch
 ext3-htree-comments.patch
 ext3-iam-ops.patch
+ext3-iam-separate.patch