Whamcloud - gitweb
Branch: b_new_cmd
authorwangdi <wangdi>
Sun, 23 Apr 2006 15:33:32 +0000 (15:33 +0000)
committerwangdi <wangdi>
Sun, 23 Apr 2006 15:33:32 +0000 (15:33 +0000)
1) merge b_iam patch to this branch
2) update prototype, add ext3-iam-ops.patch

ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel4.series
lustre/kernel_patches/patches/ext3-hash-selection.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-htree-comments.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-htree-path-ops.patch
lustre/kernel_patches/patches/ext3-htree-path.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-htree-r5-hash.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-iam-ops.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-tall-htree.patch [new file with mode: 0644]
lustre/kernel_patches/series/ldiskfs-2.6-rhel4.series

index bab81b9..b90ed7a 100644 (file)
@@ -10,3 +10,10 @@ ext3-extents-2.6.9-rhel4.patch
 ext3-mballoc2-2.6.9-rhel4.patch 
 ext3-nlinks-2.6.9.patch
 ext3-ialloc-2.6.patch
+ext3-tall-htree.patch
+ext3-htree-path.patch
+ext3-htree-r5-hash.patch
+ext3-htree-path-ops.patch
+ext3-hash-selection.patch
+ext3-htree-comments.patch
+ext3-iam-ops.patch
diff --git a/lustre/kernel_patches/patches/ext3-hash-selection.patch b/lustre/kernel_patches/patches/ext3-hash-selection.patch
new file mode 100644 (file)
index 0000000..12343cc
--- /dev/null
@@ -0,0 +1,126 @@
+Index: linux-2.6.9/fs/ext3/hash.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/hash.c    2006-04-23 22:39:01.000000000 +0800
++++ linux-2.6.9/fs/ext3/hash.c 2006-04-23 22:39:16.000000000 +0800
+@@ -127,6 +127,11 @@
+       return a;
+ }
++static __u32 dx_same_hash(const signed char *msg, int len)
++{
++      return 0xcafebabeUL;
++}
++
+ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+ {
+       __u32   pad, val;
+@@ -220,6 +225,9 @@
+       case DX_HASH_R5:
+               hash = dx_r5_hash(name, len);
+               break;
++      case DX_HASH_SAME:
++              hash = dx_same_hash(name, len);
++              break;
+       default:
+               hinfo->hash = 0;
+               return -1;
+Index: linux-2.6.9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/super.c   2006-04-23 22:38:55.000000000 +0800
++++ linux-2.6.9/fs/ext3/super.c        2006-04-23 22:39:56.000000000 +0800
+@@ -598,6 +598,7 @@
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
+       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+       Opt_extents, Opt_extdebug, Opt_mballoc,
++      Opt_hashfunc,
+ };
+ static match_table_t tokens = {
+@@ -651,6 +652,7 @@
+       {Opt_extdebug, "extdebug"},
+       {Opt_mballoc, "mballoc"},
+       {Opt_barrier, "barrier=%u"},
++      {Opt_hashfunc,"hash=%s"},
+       {Opt_err, NULL},
+       {Opt_resize, "resize"},
+ };
+@@ -675,6 +677,7 @@
+       return sb_block;
+ }
++int user_selected_hash_function = -1;
+ static int parse_options (char * options, struct super_block *sb,
+                         unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
+ {
+@@ -963,6 +966,23 @@
+               case Opt_mballoc:
+                       set_opt (sbi->s_mount_opt, MBALLOC);
+                       break;
++              case Opt_hashfunc:
++                      if (strncmp (args[0].from,"legacy",6) == 0){
++               user_selected_hash_function = 0;
++            } else if (strncmp (args[0].from,"half_md4",8) == 0){
++               user_selected_hash_function = 1;
++            } else if (strncmp (args[0].from,"tea",3) == 0){
++               user_selected_hash_function = 2;
++            } else if (strncmp (args[0].from,"r5",2) == 0){
++               user_selected_hash_function = 3;
++            } else if (strncmp (args[0].from,"same",4) == 0){
++               user_selected_hash_function = 4;
++            } else {
++               printk ("Hashfunc name wrong\n");
++               return 0;
++            }
++                       break;
++      
+               default:
+                       printk (KERN_ERR
+                               "EXT3-fs: Unrecognized mount option \"%s\" "
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c   2006-04-23 22:39:02.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c        2006-04-23 22:39:16.000000000 +0800
+@@ -365,10 +365,7 @@
+               struct htree_cookie *hc = cookie;
+               root = data;
+-              if (root->info.hash_version != DX_HASH_TEA &&
+-                  root->info.hash_version != DX_HASH_HALF_MD4 &&
+-                  root->info.hash_version != DX_HASH_R5 &&
+-                  root->info.hash_version != DX_HASH_LEGACY) {
++              if (root->info.hash_version > DX_HASH_MAX) {
+                       ext3_warning(sb, __FUNCTION__,
+                                    "Unrecognised inode hash code %d",
+                                    root->info.hash_version);
+@@ -1467,6 +1464,7 @@
+  * This converts a one block unindexed directory to a 3 block indexed
+  * directory, and adds the dentry to the indexed directory.
+  */
++extern int user_selected_hash_function;
+ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
+                           struct inode *inode, struct buffer_head *bh)
+ {
+@@ -1522,7 +1520,9 @@
+       memset (&root->info, 0, sizeof(root->info));
+       root->info.info_length = sizeof(root->info);
+       root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+-      root->info.hash_version = DX_HASH_R5;
++      if (user_selected_hash_function >= 0 &&
++          user_selected_hash_function <= DX_HASH_MAX)
++              root->info.hash_version = user_selected_hash_function;
+       entries = (void *)root->entries;
+       dx_set_block (&path, entries, 1);
+       dx_set_count (entries, 1);
+Index: linux-2.6.9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9.orig/include/linux/ext3_fs.h   2006-04-23 22:39:01.000000000 +0800
++++ linux-2.6.9/include/linux/ext3_fs.h        2006-04-23 22:39:16.000000000 +0800
+@@ -665,6 +665,8 @@
+ #define DX_HASH_HALF_MD4      1
+ #define DX_HASH_TEA           2
+ #define DX_HASH_R5            3
++#define DX_HASH_SAME          4
++#define DX_HASH_MAX           4
+ /* hash info structure used by the directory hash */
+ struct dx_hash_info
diff --git a/lustre/kernel_patches/patches/ext3-htree-comments.patch b/lustre/kernel_patches/patches/ext3-htree-comments.patch
new file mode 100644 (file)
index 0000000..159add6
--- /dev/null
@@ -0,0 +1,1643 @@
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c   2006-04-17 18:32:12.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c        2006-04-23 21:40:41.000000000 +0800
+@@ -24,6 +24,78 @@
+  *    Theodore Ts'o, 2002
+  */
++/*
++ * iam: big theory statement.
++ *
++ * iam (Index Access Module) is a module providing abstraction of persistent
++ * transactional container on top of generalized ext3 htree.
++ *
++ * iam supports:
++ *
++ *     - key, pointer, and record size specifiable per container.
++ *
++ *     - trees taller than 2 index levels.
++ *
++ *     - read/write to existing ext3 htree directories as iam containers.
++ *
++ * iam container is a tree, consisting of leaf nodes containing keys and
++ * records stored in this container, and index nodes, containing keys and
++ * pointers to leaf or index nodes.
++ *
++ * iam does not work with keys directly, instead it calls user-supplied key
++ * comparison function (->dpo_keycmp()).
++ *
++ * Pointers are (currently) interpreted as logical offsets (measured in
++ * blocksful) within underlying flat file on top of which iam tree lives.
++ *
++ * On-disk format:
++ *
++ * iam mostly tries to reuse existing htree formats.
++ *
++ * Format of index node:
++ *
++ * +-----+-------+-------+-------+------+-------+------------+
++ * |     | count |       |       |      |       |            |
++ * | gap |   /   | entry | entry | .... | entry | free space |
++ * |     | limit |       |       |      |       |            |
++ * +-----+-------+-------+-------+------+-------+------------+
++ *
++ *       gap           this part of node is never accessed by iam code. It
++ *                     exists for binary compatibility with ext3 htree (that,
++ *                     in turn, stores fake struct ext2_dirent for ext2
++ *                     compatibility), and to keep some unspecified per-node
++ *                     data. Gap can be different for root and non-root index
++ *                     nodes. Gap size can be specified for each container
++ *                     (gap of 0 is allowed).
++ *
++ *       count/limit   current number of entries in this node, and the maximal
++ *                     number of entries that can fit into node. count/limit
++ *                     has the same size as entry, and is itself counted in
++ *                     count.
++ *
++ *       entry         index entry: consists of a key immediately followed by
++ *                     a pointer to a child node. Size of a key and size of a
++ *                     pointer depends on container. Entry has neither
++ *                     alignment nor padding.
++ *
++ *       free space    portion of node new entries are added to
++ *
++ * Entries in index node are sorted by their key value.
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ */
++
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
+ #include <linux/jbd.h>
+@@ -98,14 +170,6 @@
+       __le16 count;
+ };
+-struct dx_entry; /* incomplete type */
+-struct dx_key;   /* incomplete type */
+-
+-struct dx_entry_compat {
+-      __le32 hash;
+-      __le32 block;
+-};
+-
+ /*
+  * dx_root_info is laid out so that if it should somehow get overlaid by a
+  * dirent the two low bits of the hash version will be zero.  Therefore, the
+@@ -135,111 +199,513 @@
+       struct {} entries[0];
+ };
+-
+-struct dx_frame
+-{
+-      struct buffer_head *bh;
+-      struct dx_entry *entries;
+-      struct dx_entry *at;
+-};
+-
+ struct dx_map_entry
+ {
+       u32 hash;
+       u32 offs;
+ };
+-struct dx_path;
+-struct dx_param {
+-      size_t       dpo_key_size;
+-      size_t       dpo_ptr_size;
+-      size_t       dpo_node_gap;
+-      size_t       dpo_root_gap;
+-
+-      u32 (*dpo_root_ptr)(struct dx_path *path);
+-      int (*dpo_node_check)(struct dx_path *path,
+-                            struct dx_frame *frame, void *cookie);
+-      int (*dpo_node_init)(struct dx_path *path,
+-                           struct buffer_head *bh, int root);
+-      int (*dpo_keycmp)(struct dx_path *path,
+-                        struct dx_key *k1, struct dx_key *k2);
++/*
++ * Entry within index tree node. Consists of a key immediately followed
++ * (without padding) by a pointer to the child node.
++ *
++ * Both key and pointer are of variable size, hence incomplete type.
++ */
++struct iam_entry;
++
++struct iam_entry_compat {
++      __le32 hash;
++      __le32 block;
++};
++
++/*
++ * Incomplete type used to refer to keys in iam container.
++ *
++ * As key size can be different from container to container, iam has to use
++ * incomplete type. Clients cast pointer to iam_key to real key type and back.
++ */
++struct iam_key;
++
++/* Incomplete type use to refer to the records stored in iam containers. */
++struct iam_rec;
++
++typedef __u64 iam_ptr_t;
++
++/*
++ * Index node traversed during tree lookup.
++ */
++struct iam_frame {
++      struct buffer_head *bh;    /* buffer holding node data */
++      struct iam_entry *entries; /* array of entries */
++      struct iam_entry *at;      /* target entry, found by binary search */
++};
++
++/* leaf node reached by tree lookup */
++struct iam_leaf {
++      struct buffer_head *bh;
++      struct iam_leaf_entry *entries;
++      struct iam_leaf_entry *at;
++};
++
++struct iam_path;
++struct iam_container;
++
++/*
++ * Parameters, describing a flavor of iam container.
++ */
++struct iam_descr {
++      /*
++       * Size of a key in this container, in bytes.
++       */
++      size_t       id_key_size;
++      /*
++       * Size of a pointer to the next level (stored in index nodes), in
++       * bytes.
++       */
++      size_t       id_ptr_size;
++      /*
++       * Size of a record (stored in leaf nodes), in bytes.
++       */
++      size_t       id_rec_size;
++      /*
++       * Size of unused (by iam) space at the beginning of every non-root
++       * node, in bytes. Used for compatibility with ext3.
++       */
++      size_t       id_node_gap;
++      /*
++       * Size of unused (by iam) space at the beginning of root node, in
++       * bytes. Used for compatibility with ext3.
++       */
++      size_t       id_root_gap;
++
++      /*
++       * Returns pointer (in the same sense as pointer in index entry) to
++       * the root node.
++       */
++      __u32 (*id_root_ptr)(struct iam_container *c);
++
++      /*
++       * Check validity and consistency of index node. This is called when
++       * iam just loaded new node into frame.
++       */
++      int (*id_node_check)(struct iam_path *path, struct iam_frame *frame);
++      /*
++       * Initialize new node (stored in @bh) that is going to be added into
++       * tree.
++       */
++      int (*id_node_init)(struct iam_container *c,
++                          struct buffer_head *bh, int root);
++      int (*id_node_read)(struct iam_container *c, iam_ptr_t ptr,
++                          handle_t *h, struct buffer_head **bh);
++      /*
++       * Key comparison function. Returns -1, 0, +1.
++       */
++      int (*id_keycmp)(struct iam_container *c,
++                       struct iam_key *k1, struct iam_key *k2);
++      /*
++       * Create new container.
++       *
++       * Newly created container has a root node and a single leaf. Leaf
++       * contains single record with the smallest possible key.
++       */
++      int (*id_create)(struct iam_container *c);
++      struct {
++              /*
++               * leaf operations.
++               */
++              /*
++               * returns true iff leaf is positioned at the last entry.
++               */
++              int (*at_end)(struct iam_container *c, struct iam_leaf *l);
++              /* position leaf at the first entry */
++              void (*start)(struct iam_container *c, struct iam_leaf *l);
++              /* more leaf to the next entry. */
++              void (*next)(struct iam_container *c, struct iam_leaf *l);
++              /* return key of current leaf record in @k */
++              void (*key)(struct iam_container *c, struct iam_leaf *l,
++                          struct iam_key *k);
++              /* return pointer to entry body */
++              struct iam_rec *(*rec)(struct iam_container *c,
++                                     struct iam_leaf *l);
++      } id_leaf;
++};
++
++struct iam_container {
++      /*
++       * Underlying flat file. IO against this object is issued to
++       * read/write nodes.
++       */
++      struct inode     *ic_object;
++      /*
++       * container flavor.
++       */
++      struct iam_descr *ic_descr;
++      /*
++       * pointer to flavor-specific per-container data.
++       */
++      void             *ic_descr_data;
+ };
+ /*
+  * Structure to keep track of a path drilled through htree.
+  */
+-struct dx_path {
+-      struct inode         *dp_object;
+-      struct dx_param      *dp_param;
+-      int                   dp_indirect;
+-      struct dx_frame       dp_frames[DX_MAX_TREE_HEIGHT];
+-      struct dx_frame      *dp_frame;
+-      struct dx_key        *dp_key_target;
+-      struct dx_key        *dp_key_scratch[DX_SCRATCH_KEYS];
+-};
+-
+-struct dx_path_compat {
+-      struct dx_path dpc_path;
+-      __u32          dpc_scrach[DX_SCRATCH_KEYS];
+-};
+-
+-static u32 htree_root_ptr(struct dx_path *p);
+-static int htree_node_check(struct dx_path *path,
+-                          struct dx_frame *frame, void *cookie);
+-static int htree_node_init(struct dx_path *path,
++struct iam_path {
++      /*
++       * Parent container.
++       */
++      struct iam_container  *ip_container;
++      /*
++       * Number of index levels minus one.
++       */
++      int                    ip_indirect;
++      /*
++       * Nodes that top-to-bottom traversal passed through.
++       */
++      struct iam_frame       ip_frames[DX_MAX_TREE_HEIGHT];
++      /*
++       * Last filled frame in ->ip_frames. Refers to the 'twig' node (one
++       * immediately above leaf).
++       */
++      struct iam_frame      *ip_frame;
++      /*
++       * Leaf node: a child of ->ip_frame.
++       */
++      struct iam_leaf       *ip_leaf;
++      /*
++       * Key searched for.
++       */
++      struct iam_key        *ip_key_target;
++      /*
++       * Scratch-pad area for temporary keys.
++       */
++      struct iam_key        *ip_key_scratch[DX_SCRATCH_KEYS];
++      /*
++       * pointer to flavor-specific per-container data.
++       */
++      void                  *ip_descr_data;
++};
++
++/*
++ * Helper structure for legacy htrees.
++ */
++struct iam_path_compat {
++      struct iam_path      ipc_path;
++      struct iam_container ipc_container;
++      __u32                ipc_scrach[DX_SCRATCH_KEYS];
++};
++
++static u32 htree_root_ptr(struct iam_container *c);
++static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
++static int htree_node_init(struct iam_container *c,
+                          struct buffer_head *bh, int root);
+-static int htree_keycmp(struct dx_path *path,
+-                      struct dx_key *k1, struct dx_key *k2);
++static int htree_keycmp(struct iam_container *c,
++                      struct iam_key *k1, struct iam_key *k2);
++static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
++                         handle_t *h, struct buffer_head **bh);
++
++/*
++ * Parameters describing iam compatibility mode in which existing ext3 htrees
++ * can be manipulated.
++ */
++static struct iam_descr htree_compat_param = {
++      .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
++      .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
++      .id_node_gap = offsetof(struct dx_node, entries),
++      .id_root_gap = offsetof(struct dx_root, entries),
++
++      .id_root_ptr   = htree_root_ptr,
++      .id_node_check = htree_node_check,
++      .id_node_init  = htree_node_init,
++      .id_node_read  = htree_node_read,
++      .id_keycmp     = htree_keycmp
++};
++
++
++struct iam_key;
++struct iam_rec;
++struct iam_descr;
++struct iam_container;
++struct iam_path;
+-static struct dx_param htree_compat_param = {
+-      .dpo_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
+-      .dpo_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
+-      .dpo_node_gap = offsetof(struct dx_node, entries),
+-      .dpo_root_gap = offsetof(struct dx_root, entries),
+-
+-      .dpo_root_ptr   = htree_root_ptr,
+-      .dpo_node_check = htree_node_check,
+-      .dpo_node_init  = htree_node_init,
+-      .dpo_keycmp     = htree_keycmp
++/*
++ * Initialize container @c, acquires additional reference on @inode.
++ */
++int iam_container_init(struct iam_container *c,
++                     struct iam_descr *descr, struct inode *inode);
++/*
++ * Finalize container @c, release all resources.
++ */
++void iam_container_fini(struct iam_container *c);
++
++/*
++ * Search container @c for record with key @k. If record is found, its data
++ * are moved into @r.
++ *
++ *
++ *
++ * Return values: +ve: found, 0: not-found, -ve: error
++ */
++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r);
++/*
++ * Insert new record @r with key @k into container @c (within context of
++ * transaction @h.
++ *
++ * Return values: 0: success, -ve: error, including -EEXIST when record with
++ * given key is already present.
++ *
++ * postcondition: ergo(result == 0 || result == -EEXIST,
++ *                                  iam_lookup(c, k, r2) > 0 &&
++ *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
++ */
++int iam_insert(handle_t *h, struct iam_container *c,
++             struct iam_key *k, struct iam_rec *r);
++/*
++ * Replace existing record with key @k, or insert new one. New record data are
++ * in @r.
++ *
++ * Return values: 0: success, -ve: error.
++ *
++ * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
++ *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
++ */
++int iam_update(handle_t *h, struct iam_container *c,
++             struct iam_key *k, struct iam_rec *r);
++/*
++ * Delete existing record with key @k.
++ *
++ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
++ *
++ * postcondition: ergo(result == 0 || result == -ENOENT,
++ *                                 !iam_lookup(c, k, *));
++ */
++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k);
++
++/*
++ * iam cursor (iterator) api.
++ */
++
++/*
++ * Flags controlling iterator functionality.
++ */
++enum iam_it_flags {
++      /*
++       * this iterator will move (iam_it_{prev,next}() will be called on it)
++       */
++      IAM_IT_MOVE  = (1 << 0),
++      /*
++       * tree can be updated through this iterator.
++       */
++      IAM_IT_WRITE = (1 << 1)
+ };
++/*
++ * States of iterator state machine.
++ */
++enum iam_it_state {
++      /* initial state */
++      IAM_IT_DETACHED,
++      /* iterator is above particular record in the container */
++      IAM_IT_ATTACHED
++};
++
++/*
++ * Iterator.
++ *
++ * Immediately after call to iam_it_init() iterator is in "detached"
++ * (IAM_IT_DETACHED) state: it is associated with given parent container, but
++ * doesn't point to any particular record in this container.
++ *
++ * After successful call to iam_it_get() and until corresponding call to
++ * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
++ *
++ * Attached iterator can move through records in a container (provided
++ * IAM_IT_MOVE permission) in a key order, can get record and key values as it
++ * passes over them, and can modify container (provided IAM_IT_WRITE
++ * permission).
++ *
++ * Concurrency: iterators are supposed to be local to thread. Interfaces below
++ * do no internal serialization.
++ *
++ */
++struct iam_iterator {
++      /*
++       * iterator flags, taken from enum iam_it_flags.
++       */
++      __u32                 ii_flags;
++      enum iam_it_state     ii_state;
++      /*
++       * path to the record. Valid in IAM_IT_ATTACHED state.
++       */
++      struct iam_path       ii_path;
++};
++
++static inline struct iam_key *keycpy(struct iam_container *c,
++                                   struct iam_key *k1, struct iam_key *k2)
++{
++      return memcpy(k1, k2, c->ic_descr->id_key_size);
++}
++
++static inline int keycmp(struct iam_container *c,
++                       struct iam_key *k1, struct iam_key *k2)
++{
++      return c->ic_descr->id_keycmp(c, k1, k2);
++}
++
++static struct iam_container *iam_it_container(struct iam_iterator *it)
++{
++      return it->ii_path.ip_container;
++}
++
++static inline int it_keycmp(struct iam_iterator *it,
++                          struct iam_key *k1, struct iam_key *k2)
++{
++      return keycmp(iam_it_container(it), k1, k2);
++}
++
++/*
++ * Initialize iterator to IAM_IT_DETACHED state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
++/*
++ * Finalize iterator and release all resources.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_fini(struct iam_iterator *it);
++
++/*
++ * Attach iterator. After successful completion, @it points to record with the
++ * largest key not larger than @k. Semantics of ->id_create() method guarantee
++ * that such record will always be found.
++ *
++ * Return value: 0: positioned on existing record,
++ *             -ve: error.
++ *
++ * precondition:  it_state(it) == IAM_IT_DETACHED
++ * postcondition: ergo(result == 0,
++ *                     (it_state(it) == IAM_IT_ATTACHED &&
++ *                      it_keycmp(it, iam_it_key_get(it, *), k) < 0))
++ */
++int iam_it_get(struct iam_iterator *it, struct iam_key *k);
++
++/*
++ * Duplicates iterator.
++ *
++ * postcondition: it_state(dst) == it_state(src) &&
++ *                iam_it_container(dst) == iam_it_container(src) &&
++ *                dst->ii_flags = src->ii_flags &&
++ *                ergo(it_state(it) == IAM_IT_ATTACHED,
++ *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++ *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
++ */
++void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
++
++/*
++ * Detach iterator. Does nothing it detached state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_put(struct iam_iterator *it);
++
++/*
++ * Move iterator one record right.
++ *
++ * Return value: 0: success,
++ *              +1: end of container reached
++ *             -ve: error
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
++ * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
++ */
++int iam_it_next(struct iam_iterator *it);
++
++/*
++ * Return pointer to the record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
++
++/*
++ * Replace contents of record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
++
++/*
++ * Place key under iterator in @k, return @k
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++const struct iam_key *iam_it_key_get(struct iam_iterator *it,
++                                   struct iam_key *k);
++
++/*
++ * Insert new record with key @k and contents from @r, shifting records to the
++ * right.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
++ *                it->ii_flags&IAM_IT_WRITE &&
++ *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ *                ergo(result == 0,
++ *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
++ *                     !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
++                    struct iam_key *k, struct iam_rec *r);
++/*
++ * Delete record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
+ #ifdef CONFIG_EXT3_INDEX
+-static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry);
+-static void dx_set_block(struct dx_path *p,
+-                       struct dx_entry *entry, unsigned value);
+-static inline struct dx_key *dx_get_key(struct dx_path *p,
+-                                      struct dx_entry *entry,
+-                                      struct dx_key *key);
+-static void dx_set_key(struct dx_path *p, struct dx_entry *entry,
+-                     struct dx_key *key);
+-static unsigned dx_get_count(struct dx_entry *entries);
+-static unsigned dx_get_limit(struct dx_entry *entries);
+-static void dx_set_count(struct dx_entry *entries, unsigned value);
+-static void dx_set_limit(struct dx_entry *entries, unsigned value);
+-static unsigned dx_root_limit(struct dx_path *p);
+-static unsigned dx_node_limit(struct dx_path *p);
++static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry);
++static void dx_set_block(struct iam_path *p,
++                       struct iam_entry *entry, unsigned value);
++static inline struct iam_key *dx_get_key(struct iam_path *p,
++                                      struct iam_entry *entry,
++                                      struct iam_key *key);
++static void dx_set_key(struct iam_path *p, struct iam_entry *entry,
++                     struct iam_key *key);
++static unsigned dx_get_count(struct iam_entry *entries);
++static unsigned dx_get_limit(struct iam_entry *entries);
++static void dx_set_count(struct iam_entry *entries, unsigned value);
++static void dx_set_limit(struct iam_entry *entries, unsigned value);
++static unsigned dx_root_limit(struct iam_path *p);
++static unsigned dx_node_limit(struct iam_path *p);
+ static int dx_probe(struct dentry *dentry,
+                   struct inode *dir,
+                   struct dx_hash_info *hinfo,
+-                  struct dx_path *path);
++                  struct iam_path *path);
+ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+ static void dx_sort_map(struct dx_map_entry *map, unsigned count);
+ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+               struct dx_map_entry *offsets, int count);
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+-static void dx_insert_block (struct dx_path *path,
+-                           struct dx_frame *frame, u32 hash, u32 block);
++static void dx_insert_block (struct iam_path *path,
++                           struct iam_frame *frame, u32 hash, u32 block);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+-                               struct dx_path *path, __u32 *start_hash);
++                               struct iam_path *path, __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+                      struct ext3_dir_entry_2 **res_dir, int *err);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode);
+-static inline void dx_path_init(struct dx_path *path, struct inode *inode);
+-static inline void dx_path_fini(struct dx_path *path);
++static inline void iam_path_init(struct iam_path *path,
++                               struct iam_container *c);
++static inline void iam_path_fini(struct iam_path *path);
+ /*
+@@ -247,153 +713,154 @@
+  * Mask them off for now.
+  */
+-static inline void *entry_off(struct dx_entry *entry, ptrdiff_t off)
++static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
+ {
+       return (void *)((char *)entry + off);
+ }
+-static inline size_t dx_entry_size(struct dx_path *p)
++static inline struct iam_descr *path_descr(struct iam_path *p)
+ {
+-      return p->dp_param->dpo_key_size + p->dp_param->dpo_ptr_size;
++      return p->ip_container->ic_descr;
+ }
+-static inline struct dx_entry *dx_entry_shift(struct dx_path *p,
+-                                            struct dx_entry *entry, int shift)
++static inline struct inode *path_obj(struct iam_path *p)
++{
++      return p->ip_container->ic_object;
++}
++
++static inline size_t iam_entry_size(struct iam_path *p)
++{
++      return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
++}
++
++static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
++                                            struct iam_entry *entry, int shift)
+ {
+       void *e = entry;
+-      return e + shift * dx_entry_size(p);
++      return e + shift * iam_entry_size(p);
+ }
+-static inline ptrdiff_t dx_entry_diff(struct dx_path *p,
+-                                    struct dx_entry *e1, struct dx_entry *e2)
++static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
++                                    struct iam_entry *e1, struct iam_entry *e2)
+ {
+       ptrdiff_t diff;
+       diff = (void *)e1 - (void *)e2;
+-      assert(diff / dx_entry_size(p) * dx_entry_size(p) == diff);
+-      return diff / dx_entry_size(p);
++      assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
++      return diff / iam_entry_size(p);
+ }
+-static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry)
++static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
+ {
+-      return le32_to_cpu(*(u32 *)entry_off(entry, p->dp_param->dpo_key_size))
++      return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
+               & 0x00ffffff;
+ }
+-static inline void dx_set_block(struct dx_path *p,
+-                              struct dx_entry *entry, unsigned value)
++static inline void dx_set_block(struct iam_path *p,
++                              struct iam_entry *entry, unsigned value)
+ {
+-      *(u32*)entry_off(entry, p->dp_param->dpo_key_size) = cpu_to_le32(value);
++      *(u32*)entry_off(entry,
++                       path_descr(p)->id_key_size) = cpu_to_le32(value);
+ }
+-static inline struct dx_key *dx_get_key(struct dx_path *p,
+-                                      struct dx_entry *entry,
+-                                      struct dx_key *key)
++static inline struct iam_key *dx_get_key(struct iam_path *p,
++                                      struct iam_entry *entry,
++                                      struct iam_key *key)
+ {
+-      memcpy(key, entry, p->dp_param->dpo_key_size);
++      memcpy(key, entry, path_descr(p)->id_key_size);
+       return key;
+ }
+-static inline struct dx_key *dx_key_at(struct dx_path *p,
+-                                     struct dx_entry *entry)
++static inline struct iam_key *iam_key_at(struct iam_path *p,
++                                     struct iam_entry *entry)
+ {
+-      return (struct dx_key *)entry;
++      return (struct iam_key *)entry;
+ }
+-static inline void dx_set_key(struct dx_path *p,
+-                            struct dx_entry *entry, struct dx_key *key)
++static inline void dx_set_key(struct iam_path *p,
++                            struct iam_entry *entry, struct iam_key *key)
+ {
+-      memcpy(entry, key, p->dp_param->dpo_key_size);
++      memcpy(entry, key, path_descr(p)->id_key_size);
+ }
+-static inline unsigned dx_get_count (struct dx_entry *entries)
++static inline unsigned dx_get_count (struct iam_entry *entries)
+ {
+       return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+ }
+-static inline unsigned dx_get_limit (struct dx_entry *entries)
++static inline unsigned dx_get_limit (struct iam_entry *entries)
+ {
+       return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
+ }
+-static inline void dx_set_count (struct dx_entry *entries, unsigned value)
++static inline void dx_set_count (struct iam_entry *entries, unsigned value)
+ {
+       ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
+ }
+-static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
++static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
+ {
+       ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+ }
+-static inline unsigned dx_root_limit(struct dx_path *p)
++static inline unsigned dx_root_limit(struct iam_path *p)
+ {
+-      struct dx_param *param = p->dp_param;
+-      unsigned entry_space   = p->dp_object->i_sb->s_blocksize -
+-              param->dpo_root_gap;
+-      return entry_space / (param->dpo_key_size + param->dpo_ptr_size);
++      struct iam_descr *param = path_descr(p);
++      unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
++              param->id_root_gap;
++      return entry_space / (param->id_key_size + param->id_ptr_size);
+ }
+-static inline unsigned dx_node_limit(struct dx_path *p)
++static inline unsigned dx_node_limit(struct iam_path *p)
+ {
+-      struct dx_param *param = p->dp_param;
+-      unsigned entry_space   = p->dp_object->i_sb->s_blocksize -
+-              param->dpo_node_gap;
+-      return entry_space / (param->dpo_key_size + param->dpo_ptr_size);
++      struct iam_descr *param = path_descr(p);
++      unsigned entry_space   = path_obj(p)->i_sb->s_blocksize -
++              param->id_node_gap;
++      return entry_space / (param->id_key_size + param->id_ptr_size);
+ }
+-static inline int dx_index_is_compat(struct dx_path *path)
++static inline int dx_index_is_compat(struct iam_path *path)
+ {
+-      return path->dp_param == &htree_compat_param;
++      return path_descr(path) == &htree_compat_param;
+ }
+-static struct dx_entry *dx_get_entries(struct dx_path *path, void *data,
++static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
+                                      int root)
+ {
+       return data +
+               (root ?
+-               path->dp_param->dpo_root_gap : path->dp_param->dpo_node_gap);
++               path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
+ }
+-static struct dx_entry *dx_node_get_entries(struct dx_path *path,
+-                                          struct dx_frame *frame)
++static struct iam_entry *dx_node_get_entries(struct iam_path *path,
++                                          struct iam_frame *frame)
+ {
+       return dx_get_entries(path,
+-                            frame->bh->b_data, frame == path->dp_frames);
+-}
+-
+-static inline struct dx_key *keycpy(struct dx_path *p,
+-                                  struct dx_key *k1, struct dx_key *k2)
+-{
+-      return memcpy(k1, k2, p->dp_param->dpo_key_size);
+-}
+-
+-static inline int keycmp(struct dx_path *p,
+-                       struct dx_key *k1, struct dx_key *k2)
+-{
+-      return p->dp_param->dpo_keycmp(p, k1, k2);
++                            frame->bh->b_data, frame == path->ip_frames);
+ }
+-static int dx_node_check(struct dx_path *p, struct dx_frame *f)
++static int dx_node_check(struct iam_path *p, struct iam_frame *f)
+ {
+-      struct dx_entry *e;
++      struct iam_entry     *e;
++      struct iam_container *c;
+       unsigned count;
+       unsigned  i;
++      c = p->ip_container;
+       e = dx_node_get_entries(p, f);
+       count = dx_get_count(e);
+-      e = dx_entry_shift(p, e, 1);
+-      for (i = 0; i < count - 1; ++i, e = dx_entry_shift(p, e, 1)) {
+-              keycpy(p, p->dp_key_scratch[0], p->dp_key_scratch[1]);
+-              dx_get_key(p, e, p->dp_key_scratch[1]);
++      e = iam_entry_shift(p, e, 1);
++      for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) {
++              keycpy(c, p->ip_key_scratch[0], p->ip_key_scratch[1]);
++              dx_get_key(p, e, p->ip_key_scratch[1]);
+               if (i > 0 &&
+-                  keycmp(p, p->dp_key_scratch[0], p->dp_key_scratch[1]) > 0)
++                  keycmp(c, p->ip_key_scratch[0], p->ip_key_scratch[1]) > 0)
+                       return 0;
+       }
+       return 1;
+ }
+-static u32 htree_root_ptr(struct dx_path *path)
++static u32 htree_root_ptr(struct iam_container *c)
+ {
+       return 0;
+ }
+@@ -403,20 +870,19 @@
+       struct dentry       *dentry;
+ };
+-static int htree_node_check(struct dx_path *path, struct dx_frame *frame,
+-                          void *cookie)
++static int htree_node_check(struct iam_path *path, struct iam_frame *frame)
+ {
+       void *data;
+-      struct dx_entry *entries;
++      struct iam_entry *entries;
+       struct super_block *sb;
+       data = frame->bh->b_data;
+       entries = dx_node_get_entries(path, frame);
+-      sb = path->dp_object->i_sb;
+-      if (frame == path->dp_frames) {
++      sb = path_obj(path)->i_sb;
++      if (frame == path->ip_frames) {
+               /* root node */
+               struct dx_root *root;
+-              struct htree_cookie *hc = cookie;
++              struct htree_cookie *hc = path->ip_descr_data;
+               root = data;
+               if (root->info.hash_version > DX_HASH_MAX) {
+@@ -433,8 +899,8 @@
+                       return ERR_BAD_DX_DIR;
+               }
+-              path->dp_indirect = root->info.indirect_levels;
+-              if (path->dp_indirect > DX_MAX_TREE_HEIGHT - 1) {
++              path->ip_indirect = root->info.indirect_levels;
++              if (path->ip_indirect > DX_MAX_TREE_HEIGHT - 1) {
+                       ext3_warning(sb, __FUNCTION__,
+                                    "Unimplemented inode hash depth: %#06x",
+                                    root->info.indirect_levels);
+@@ -450,17 +916,17 @@
+               if (hc->dentry)
+                       ext3fs_dirhash(hc->dentry->d_name.name,
+                                      hc->dentry->d_name.len, hc->hinfo);
+-              path->dp_key_target = (struct dx_key *)&hc->hinfo->hash;
++              path->ip_key_target = (struct iam_key *)&hc->hinfo->hash;
+       } else {
+               /* non-root index */
+-              assert(entries == data + path->dp_param->dpo_node_gap);
++              assert(entries == data + path_descr(path)->id_node_gap);
+               assert(dx_get_limit(entries) == dx_node_limit(path));
+       }
+       frame->entries = frame->at = entries;
+       return 0;
+ }
+-static int htree_node_init(struct dx_path *path,
++static int htree_node_init(struct iam_container *c,
+                          struct buffer_head *bh, int root)
+ {
+       struct dx_node *node;
+@@ -468,13 +934,24 @@
+       assert(!root);
+       node = (void *)bh->b_data;
+-      node->fake.rec_len = cpu_to_le16(path->dp_object->i_sb->s_blocksize);
++      node->fake.rec_len = cpu_to_le16(c->ic_object->i_sb->s_blocksize);
+       node->fake.inode = 0;
+       return 0;
+ }
+-static int htree_keycmp(struct dx_path *path,
+-                      struct dx_key *k1, struct dx_key *k2)
++static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
++                         handle_t *handle, struct buffer_head **bh)
++{
++      int result = 0;
++
++      *bh = ext3_bread(handle, c->ic_object, (int)ptr, 0, &result);
++      if (*bh == NULL)
++              result = -EIO;
++      return result;
++}
++
++static int htree_keycmp(struct iam_container *c,
++                      struct iam_key *k1, struct iam_key *k2)
+ {
+       __u32 p1 = le32_to_cpu(*(__u32 *)k1);
+       __u32 p2 = le32_to_cpu(*(__u32 *)k2);
+@@ -486,7 +963,7 @@
+  * Debug
+  */
+ #ifdef DX_DEBUG
+-static void dx_show_index (char * label, struct dx_entry *entries)
++static void dx_show_index (char * label, struct iam_entry *entries)
+ {
+         int i, n = dx_get_count (entries);
+         printk("%s index ", label);
+@@ -535,7 +1012,7 @@
+ }
+ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
+-                           struct dx_entry *entries, int levels)
++                           struct iam_entry *entries, int levels)
+ {
+       unsigned blocksize = dir->i_sb->s_blocksize;
+       unsigned count = dx_get_count (entries), names = 0, space = 0, i;
+@@ -565,32 +1042,33 @@
+ }
+ #endif /* DX_DEBUG */
+-static int dx_lookup(struct dx_path *path, void *cookie)
++static int dx_lookup(struct iam_path *path)
+ {
+       u32 ptr;
+-      int err;
++      int err = 0;
+       int i;
+-      struct dx_param *param;
+-      struct dx_frame *frame;
+-
+-      param = path->dp_param;
++      struct iam_descr *param;
++      struct iam_frame *frame;
++      struct iam_container *c;
+-      for (frame = path->dp_frames, i = 0,
+-           ptr = param->dpo_root_ptr(path); i <= path->dp_indirect;
++      param = path_descr(path);
++      c = path->ip_container;
++      
++      for (frame = path->ip_frames, i = 0,
++                   ptr = param->id_root_ptr(path->ip_container);
++           i <= path->ip_indirect;
+            ptr = dx_get_block(path, frame->at), ++frame, ++i) {
+-              struct dx_entry *entries;
+-              struct dx_entry *p;
+-              struct dx_entry *q;
+-              struct dx_entry *m;
++              struct iam_entry *entries;
++              struct iam_entry *p;
++              struct iam_entry *q;
++              struct iam_entry *m;
+               unsigned count;
+-              frame->bh = ext3_bread(NULL, path->dp_object, ptr, 0, &err);
+-              if (frame->bh == NULL) {
+-                      err = -EIO;
++              err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh);
++              if (err != 0)
+                       break;
+-              }
+-              err = param->dpo_node_check(path, frame, cookie);
++              err = param->id_node_check(path, frame);
+               if (err != 0)
+                       break;
+@@ -599,37 +1077,37 @@
+               entries = frame->entries;
+               count = dx_get_count(entries);
+               assert(count && count <= dx_get_limit(entries));
+-              p = dx_entry_shift(path, entries, 1);
+-              q = dx_entry_shift(path, entries, count - 1);
++              p = iam_entry_shift(path, entries, 1);
++              q = iam_entry_shift(path, entries, count - 1);
+               while (p <= q) {
+-                      m = dx_entry_shift(path,
+-                                         p, dx_entry_diff(path, q, p) / 2);
++                      m = iam_entry_shift(path,
++                                         p, iam_entry_diff(path, q, p) / 2);
+                       dxtrace(printk("."));
+-                      if (keycmp(path, dx_key_at(path, m),
+-                                 path->dp_key_target) > 0)
+-                              q = dx_entry_shift(path, m, -1);
++                      if (keycmp(c, iam_key_at(path, m),
++                                 path->ip_key_target) > 0)
++                              q = iam_entry_shift(path, m, -1);
+                       else
+-                              p = dx_entry_shift(path, m, +1);
++                              p = iam_entry_shift(path, m, +1);
+               }
+-              frame->at = dx_entry_shift(path, p, -1);
++              frame->at = iam_entry_shift(path, p, -1);
+               if (1) { // linear search cross check
+                       unsigned n = count - 1;
+-                      struct dx_entry *at;
++                      struct iam_entry *at;
+                       at = entries;
+                       while (n--) {
+                               dxtrace(printk(","));
+-                              at = dx_entry_shift(path, at, +1);
+-                              if (keycmp(path, dx_key_at(path, at),
+-                                         path->dp_key_target) > 0) {
+-                                      if (at != dx_entry_shift(path, frame->at, 1)) {
++                              at = iam_entry_shift(path, at, +1);
++                              if (keycmp(c, iam_key_at(path, at),
++                                         path->ip_key_target) > 0) {
++                                      if (at != iam_entry_shift(path, frame->at, 1)) {
+                                               BREAKPOINT;
+                                               printk(KERN_EMERG "%i\n",
+-                                                     keycmp(path, dx_key_at(path, at),
+-                                                            path->dp_key_target));
++                                                     keycmp(c, iam_key_at(path, at),
++                                                            path->ip_key_target));
+                                       }
+-                                      at = dx_entry_shift(path, at, -1);
++                                      at = iam_entry_shift(path, at, -1);
+                                       break;
+                               }
+                       }
+@@ -637,8 +1115,8 @@
+               }
+       }
+       if (err != 0)
+-              dx_path_fini(path);
+-      path->dp_frame = --frame;
++              iam_path_fini(path);
++      path->ip_frame = --frame;
+       return err;
+ }
+@@ -652,7 +1130,7 @@
+  * back to userspace.
+  */
+ static int dx_probe(struct dentry *dentry, struct inode *dir,
+-                  struct dx_hash_info *hinfo, struct dx_path *path)
++                  struct dx_hash_info *hinfo, struct iam_path *path)
+ {
+       int err;
+       struct htree_cookie hc = {
+@@ -661,39 +1139,78 @@
+       };
+       assert(dx_index_is_compat(path));
+-      err = dx_lookup(path, &hc);
+-      assert(err != 0 || path->dp_frames[path->dp_indirect].bh != NULL);
++      path->ip_descr_data = &hc;
++      err = dx_lookup(path);
++      assert(err != 0 || path->ip_frames[path->ip_indirect].bh != NULL);
+       return err;
+ }
+-static inline void dx_path_init(struct dx_path *path, struct inode *inode)
++/*
++ * Initialize container @c, acquires additional reference on @inode.
++ */
++int iam_container_init(struct iam_container *c,
++                     struct iam_descr *descr, struct inode *inode)
++{
++      memset(c, 0, sizeof *c);
++      c->ic_descr  = descr;
++      c->ic_object = igrab(inode);
++      if (c->ic_object != NULL)
++              return 0;
++      else
++              return -ENOENT;
++}
++
++/*
++ * Finalize container @c, release all resources.
++ */
++void iam_container_fini(struct iam_container *c)
++{
++      if (c->ic_object != NULL) {
++              iput(c->ic_object);
++              c->ic_object = NULL;
++      }
++}
++
++static inline void iam_path_init(struct iam_path *path, struct iam_container *c)
+ {
+       memset(path, 0, sizeof *path);
+-      path->dp_object = inode;
+-      path->dp_frame = path->dp_frames;
++      path->ip_container = c;
++      path->ip_frame = path->ip_frames;
+ }
+-static inline void dx_path_fini(struct dx_path *path)
++static inline void iam_path_fini(struct iam_path *path)
+ {
+       int i;
+-      for (i = 0; i < ARRAY_SIZE(path->dp_frames); i++) {
+-              if (path->dp_frames[i].bh != NULL) {
+-                      brelse(path->dp_frames[i].bh);
+-                      path->dp_frames[i].bh = NULL;
++      for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
++              if (path->ip_frames[i].bh != NULL) {
++                      brelse(path->ip_frames[i].bh);
++                      path->ip_frames[i].bh = NULL;
+               }
+       }
+ }
+-static void dx_path_compat_init(struct dx_path_compat *path,
+-                              struct inode *inode)
++static void iam_path_compat_init(struct iam_path_compat *path,
++                               struct inode *inode)
+ {
+       int i;
+-      dx_path_init(&path->dpc_path, inode);
+-      path->dpc_path.dp_param = &htree_compat_param;
+-      for (i = 0; i < ARRAY_SIZE(path->dpc_path.dp_key_scratch); ++i)
+-              path->dpc_path.dp_key_scratch[i] =
+-                      (struct dx_key *)&path->dpc_scrach[i];
++
++      iam_container_init(&path->ipc_container, &htree_compat_param, inode);
++      /*
++       * XXX hack allowing finalization of iam_path_compat with
++       * iam_path_fini().
++       */
++      iput(inode);
++      iam_path_init(&path->ipc_path, &path->ipc_container);
++      for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
++              path->ipc_path.ip_key_scratch[i] =
++                      (struct iam_key *)&path->ipc_scrach[i];
++}
++
++static void iam_path_compat_fini(struct iam_path_compat *path)
++{
++      iam_path_fini(&path->ipc_path);
++      iam_container_fini(&path->ipc_container);
+ }
+ /*
+@@ -714,16 +1231,16 @@
+  * hash of the next page.
+  */
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+-                               struct dx_path *path, __u32 *start_hash)
++                               struct iam_path *path, __u32 *start_hash)
+ {
+-      struct dx_frame *p;
++      struct iam_frame *p;
+       struct buffer_head *bh;
+       int err, num_frames = 0;
+       __u32 bhash;
+       assert(dx_index_is_compat(path));
+-      p = path->dp_frame;
++      p = path->ip_frame;
+       /*
+        * Find the next leaf page by incrementing the frame pointer.
+        * If we run out of entries in the interior node, loop around and
+@@ -732,11 +1249,11 @@
+        * nodes need to be read.
+        */
+       while (1) {
+-              p->at = dx_entry_shift(path, p->at, +1);
+-              if (p->at < dx_entry_shift(path, p->entries,
++              p->at = iam_entry_shift(path, p->at, +1);
++              if (p->at < iam_entry_shift(path, p->entries,
+                                          dx_get_count(p->entries)))
+                       break;
+-              if (p == path->dp_frames)
++              if (p == path->ip_frames)
+                       return 0;
+               num_frames++;
+               --p;
+@@ -749,7 +1266,7 @@
+        * desired contiuation hash.  If it doesn't, return since
+        * there's no point to read in the successive index pages.
+        */
+-      dx_get_key(path, p->at, (struct dx_key *)&bhash);
++      dx_get_key(path, p->at, (struct iam_key *)&bhash);
+       if (start_hash)
+               *start_hash = bhash;
+       if ((hash & 1) == 0) {
+@@ -761,8 +1278,10 @@
+        * block so no check is necessary
+        */
+       while (num_frames--) {
+-              if (!(bh = ext3_bread(NULL, dir,
+-                                    dx_get_block(path, p->at), 0, &err)))
++              err = path_descr(path)->id_node_read(path->ip_container,
++                                                   (iam_ptr_t)dx_get_block(path, p->at),
++                                                   NULL, &bh);
++              if (err != 0)
+                       return err; /* Failure */
+               ++p;
+               brelse (p->bh);
+@@ -837,8 +1356,8 @@
+ {
+       struct dx_hash_info hinfo;
+       struct ext3_dir_entry_2 *de;
+-      struct dx_path_compat cpath;
+-      struct dx_path *path = &cpath.dpc_path;
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
+       struct inode *dir;
+       int block, err;
+       int count = 0;
+@@ -848,7 +1367,7 @@
+       dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
+                      start_minor_hash));
+       dir = dir_file->f_dentry->d_inode;
+-      dx_path_compat_init(&cpath, dir);
++      iam_path_compat_init(&cpath, dir);
+       if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
+               hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+               hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+@@ -865,7 +1384,7 @@
+       /* Add '.' and '..' from the htree header */
+       if (!start_hash && !start_minor_hash) {
+-              de = (struct ext3_dir_entry_2 *) path->dp_frames[0].bh->b_data;
++              de = (struct ext3_dir_entry_2 *) path->ip_frames[0].bh->b_data;
+               if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+                       goto errout;
+               de = ext3_next_entry(de);
+@@ -875,7 +1394,7 @@
+       }
+       while (1) {
+-              block = dx_get_block(path, path->dp_frame->at);
++              block = dx_get_block(path, path->ip_frame->at);
+               ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
+                                            start_hash, start_minor_hash);
+               if (ret < 0) {
+@@ -900,12 +1419,12 @@
+                   (count && ((hashval & 1) == 0)))
+                       break;
+       }
+-      dx_path_fini(path);
++      iam_path_fini(path);
+       dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+                      count, *next_hash));
+       return count;
+ errout:
+-      dx_path_fini(path);
++      iam_path_fini(path);
+       return (err);
+ }
+@@ -964,18 +1483,18 @@
+       } while(more);
+ }
+-static void dx_insert_block(struct dx_path *path,
+-                          struct dx_frame *frame, u32 hash, u32 block)
++static void dx_insert_block(struct iam_path *path,
++                          struct iam_frame *frame, u32 hash, u32 block)
+ {
+-      struct dx_entry *entries = frame->entries;
+-      struct dx_entry *old = frame->at, *new = dx_entry_shift(path, old, +1);
++      struct iam_entry *entries = frame->entries;
++      struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
+       int count = dx_get_count(entries);
+       assert(count < dx_get_limit(entries));
+-      assert(old < dx_entry_shift(path, entries, count));
+-      memmove(dx_entry_shift(path, new, 1), new,
+-              (char *)dx_entry_shift(path, entries, count) - (char *)new);
+-      dx_set_key(path, new, (struct dx_key *)&hash);
++      assert(old < iam_entry_shift(path, entries, count));
++      memmove(iam_entry_shift(path, new, 1), new,
++              (char *)iam_entry_shift(path, entries, count) - (char *)new);
++      dx_set_key(path, new, (struct iam_key *)&hash);
+       dx_set_block(path, new, block);
+       dx_set_count(entries, count + 1);
+ }
+@@ -1177,9 +1696,9 @@
+       struct super_block * sb;
+       struct dx_hash_info     hinfo;
+       u32 hash;
+-      struct dx_path_compat cpath;
+-      struct dx_path *path = &cpath.dpc_path;
+-      struct dx_entry_compat dummy_dot = {
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct iam_entry_compat dummy_dot = {
+               .block = 0
+       };
+       struct ext3_dir_entry_2 *de, *top;
+@@ -1190,8 +1709,8 @@
+       const u8 *name = dentry->d_name.name;
+       struct inode *dir = dentry->d_parent->d_inode;
+-      dx_path_compat_init(&cpath, dir);
+-      
++      iam_path_compat_init(&cpath, dir);
++
+       sb = dir->i_sb;
+       /* NFS may look up ".." - look at dx_root directory block */
+       if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+@@ -1199,13 +1718,15 @@
+               if (*err != 0)
+                       return NULL;
+       } else {
+-              path->dp_frame->bh = NULL;              /* for dx_path_fini() */
+-              path->dp_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
++              path->ip_frame->bh = NULL;              /* for iam_path_fini() */
++              path->ip_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
+       }
+       hash = hinfo.hash;
+       do {
+-              block = dx_get_block(path, path->dp_frame->at);
+-              if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
++              block = dx_get_block(path, path->ip_frame->at);
++              *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
++                                                   NULL, &bh);
++              if (*err != 0)
+                       goto errout;
+               de = (struct ext3_dir_entry_2 *) bh->b_data;
+               top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
+@@ -1220,7 +1741,7 @@
+                               goto errout;
+                       }
+                       *res_dir = de;
+-                      dx_path_fini(path);
++                      iam_path_fini(path);
+                       return bh;
+               }
+               brelse (bh);
+@@ -1238,7 +1759,7 @@
+       *err = -ENOENT;
+ errout:
+       dxtrace(printk("%s not found\n", name));
+-      dx_path_fini(path);
++      iam_path_fini(path);
+       return NULL;
+ }
+ #endif
+@@ -1363,11 +1884,11 @@
+ /* Allocate new node, and split leaf node @bh into it, inserting new pointer
+  * into parent node identified by @frame */
+-static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct dx_path *path,
+-                      struct buffer_head **bh,struct dx_frame *frame,
++static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct iam_path *path,
++                      struct buffer_head **bh,struct iam_frame *frame,
+                       struct dx_hash_info *hinfo, int *error)
+ {
+-      struct inode *dir = path->dp_object;
++      struct inode *dir = path_obj(path);
+       unsigned blocksize = dir->i_sb->s_blocksize;
+       unsigned count, continued;
+       struct buffer_head *bh2;
+@@ -1553,9 +2074,9 @@
+       int             namelen = dentry->d_name.len;
+       struct buffer_head *bh2;
+       struct dx_root  *root;
+-      struct dx_path_compat cpath;
+-      struct dx_path *path = &cpath.dpc_path;
+-      struct dx_entry *entries;
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct iam_entry *entries;
+       struct ext3_dir_entry_2 *de, *de2;
+       char            *data1, *top;
+       unsigned        len;
+@@ -1565,7 +2086,7 @@
+       u32             block;
+       struct fake_dirent *fde;
+-      dx_path_compat_init(&cpath, dir);
++      iam_path_compat_init(&cpath, dir);
+       blocksize =  dir->i_sb->s_blocksize;
+       dxtrace(printk("Creating index\n"));
+       retval = ext3_journal_get_write_access(handle, bh);
+@@ -1612,12 +2133,12 @@
+       hinfo.hash_version = root->info.hash_version;
+       hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+       ext3fs_dirhash(name, namelen, &hinfo);
+-      path->dp_frame->entries = entries;
+-      path->dp_frame->at = entries;
+-      path->dp_frame->bh = bh;
++      path->ip_frame->entries = entries;
++      path->ip_frame->at = entries;
++      path->ip_frame->bh = bh;
+       bh = bh2;
+-      de = do_split(handle, path, &bh, path->dp_frame, &hinfo, &retval);
+-      dx_path_fini(path);
++      de = do_split(handle, path, &bh, path->ip_frame, &hinfo, &retval);
++      iam_path_fini(path);
+       if (!de)
+               return retval;
+@@ -1698,12 +2219,12 @@
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode)
+ {
+-      struct dx_path_compat cpath;
+-      struct dx_path *path = &cpath.dpc_path;
+-      struct dx_param *param;
+-      struct dx_frame *frame, *safe;
+-      struct dx_entry *entries;   /* old block contents */
+-      struct dx_entry *entries2;  /* new block contents */
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct iam_descr *param;
++      struct iam_frame *frame, *safe;
++      struct iam_entry *entries;   /* old block contents */
++      struct iam_entry *entries2;  /* new block contents */
+       struct dx_hash_info hinfo;
+       struct buffer_head * bh;
+       struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+@@ -1716,20 +2237,22 @@
+       int i;
+       size_t isize;
+-      dx_path_compat_init(&cpath, dir);
+-      param = path->dp_param;
++      iam_path_compat_init(&cpath, dir);
++      param = path_descr(path);
+       err = dx_probe(dentry, NULL, &hinfo, path);
+       if (err != 0)
+               return err;
+-      frame = path->dp_frame;
++      frame = path->ip_frame;
+       entries = frame->entries;
+       /* XXX nikita: global serialization! */
+       isize = dir->i_size;
+-      if (!(bh = ext3_bread(handle, dir,
+-                            dx_get_block(path, frame->at), 0, &err)))
++      err = param->id_node_read(path->ip_container, 
++                                (iam_ptr_t)dx_get_block(path, 
++                                frame->at), handle, &bh);
++      if (err != 0)
+               goto cleanup;
+       BUFFER_TRACE(bh, "get_write_access");
+@@ -1761,7 +2284,7 @@
+                      dx_get_count(entries), dx_get_limit(entries)));
+       /* What levels need split? */
+-      for (nr_splet = 0; frame >= path->dp_frames &&
++      for (nr_splet = 0; frame >= path->ip_frames &&
+            dx_get_count(frame->entries) == dx_get_limit(frame->entries);
+            --frame, ++nr_splet) {
+               if (nr_splet == DX_MAX_TREE_HEIGHT) {
+@@ -1778,7 +2301,7 @@
+       for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
+               bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
+               if (!bh_new[i] ||
+-                  param->dpo_node_init(path, bh_new[i], 0) != 0)
++                  param->id_node_init(path->ip_container, bh_new[i], 0) != 0)
+                       goto cleanup;
+               BUFFER_TRACE(frame->bh, "get_write_access");
+               err = ext3_journal_get_write_access(handle, frame->bh);
+@@ -1786,7 +2309,7 @@
+                       goto journal_error;
+       }
+       /* Add "safe" node to transaction too */
+-      if (safe + 1 != path->dp_frames) {
++      if (safe + 1 != path->ip_frames) {
+               err = ext3_journal_get_write_access(handle, safe->bh);
+               if (err)
+                       goto journal_error;
+@@ -1800,12 +2323,12 @@
+               entries = frame->entries;
+               count = dx_get_count(entries);
+-              idx = dx_entry_diff(path, frame->at, entries);
++              idx = iam_entry_diff(path, frame->at, entries);
+               bh2 = bh_new[i];
+               entries2 = dx_get_entries(path, bh2->b_data, 0);
+-              if (frame == path->dp_frames) {
++              if (frame == path->ip_frames) {
+                       /* splitting root node. Tricky point:
+                        *
+                        * In the "normal" B-tree we'd split root *and* add
+@@ -1818,14 +2341,14 @@
+                        */
+                       struct dx_root *root;
+                       u8 indirects;
+-                      struct dx_frame *frames;
++                      struct iam_frame *frames;
+-                      frames = path->dp_frames;
++                      frames = path->ip_frames;
+                       root = (struct dx_root *) frames->bh->b_data;
+                       indirects = root->info.indirect_levels;
+                       dxtrace(printk("Creating new root %d\n", indirects));
+                       memcpy((char *) entries2, (char *) entries,
+-                             count * dx_entry_size(path));
++                             count * iam_entry_size(path));
+                       dx_set_limit(entries2, dx_node_limit(path));
+                       /* Set up root */
+@@ -1835,9 +2358,9 @@
+                       /* Shift frames in the path */
+                       memmove(frames + 2, frames + 1,
+-                              (sizeof path->dp_frames) - 2 * sizeof frames[0]);
++                              (sizeof path->ip_frames) - 2 * sizeof frames[0]);
+                       /* Add new access path frame */
+-                      frames[1].at = dx_entry_shift(path, entries2, idx);
++                      frames[1].at = iam_entry_shift(path, entries2, idx);
+                       frames[1].entries = entries = entries2;
+                       frames[1].bh = bh2;
+                       assert(dx_node_check(path, frame));
+@@ -1853,22 +2376,22 @@
+                       unsigned hash2;
+                       dx_get_key(path,
+-                                 dx_entry_shift(path, entries, count1),
+-                                 (struct dx_key *)&hash2);
++                                 iam_entry_shift(path, entries, count1),
++                                 (struct iam_key *)&hash2);
+                       dxtrace(printk("Split index %i/%i\n", count1, count2));
+                       memcpy ((char *) entries2,
+-                              (char *) dx_entry_shift(path, entries, count1),
+-                              count2 * dx_entry_size(path));
++                              (char *) iam_entry_shift(path, entries, count1),
++                              count2 * iam_entry_size(path));
+                       dx_set_count (entries, count1);
+                       dx_set_count (entries2, count2);
+                       dx_set_limit (entries2, dx_node_limit(path));
+                       /* Which index block gets the new entry? */
+                       if (idx >= count1) {
+-                              frame->at = dx_entry_shift(path, entries2,
+-                                                         idx - count1);
++                              frame->at = iam_entry_shift(path, entries2,
++                                                          idx - count1);
+                               frame->entries = entries = entries2;
+                               swap(frame->bh, bh2);
+                               bh_new[i] = bh2;
+@@ -1903,7 +2426,7 @@
+       }
+       if (err)
+               inode->i_size = isize;
+-      dx_path_fini(path);
++      iam_path_fini(path);
+       return err;
+ }
+ #endif
index 9a2edbd..ec66561 100644 (file)
@@ -1,8 +1,20 @@
 Index: iam-src/fs/ext3/namei.c
 ===================================================================
---- iam-src.orig/fs/ext3/namei.c       2006-02-12 16:43:57.000000000 +0300
-+++ iam-src/fs/ext3/namei.c    2006-02-12 23:22:12.000000000 +0300
-@@ -83,22 +83,21 @@ static struct buffer_head *ext3_append(h
+--- iam-src.orig/fs/ext3/namei.c       2006-02-15 18:31:48.000000000 +0300
++++ iam-src/fs/ext3/namei.c    2006-02-15 21:25:34.000000000 +0300
+@@ -51,7 +51,10 @@
+ /*
+  * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
+  */
+-#define DX_MAX_TREE_HEIGHT (5)
++enum {
++      DX_MAX_TREE_HEIGHT = 5,
++      DX_SCRATCH_KEYS    = 2
++};
+ static struct buffer_head *ext3_append(handle_t *handle,
+                                       struct inode *inode,
+@@ -83,22 +86,22 @@ static struct buffer_head *ext3_append(h
  #define dxtrace(command)
  #endif
  
@@ -25,12 +37,13 @@ Index: iam-src/fs/ext3/namei.c
 -struct dx_entry
 -{
 +struct dx_entry; /* incomplete type */
++struct dx_key;   /* incomplete type */
 +
 +struct dx_entry_compat {
        __le32 hash;
        __le32 block;
  };
-@@ -109,8 +108,7 @@ struct dx_entry
+@@ -109,8 +112,7 @@ struct dx_entry
   * hash version mod 4 should never be 0.  Sincerely, the paranoia department.
   */
  
@@ -40,7 +53,7 @@ Index: iam-src/fs/ext3/namei.c
        struct fake_dirent dot;
        char dot_name[4];
        struct fake_dirent dotdot;
-@@ -124,13 +122,13 @@ struct dx_root
+@@ -124,13 +126,13 @@ struct dx_root
                u8 unused_flags;
        }
        info;
@@ -56,7 +69,7 @@ Index: iam-src/fs/ext3/namei.c
  };
  
  
-@@ -147,38 +145,76 @@ struct dx_map_entry
+@@ -147,38 +149,88 @@ struct dx_map_entry
        u32 offs;
  };
  
@@ -72,6 +85,8 @@ Index: iam-src/fs/ext3/namei.c
 +                            struct dx_frame *frame, void *cookie);
 +      int (*dpo_node_init)(struct dx_path *path,
 +                           struct buffer_head *bh, int root);
++      int (*dpo_keycmp)(struct dx_path *path,
++                        struct dx_key *k1, struct dx_key *k2);
 +};
 +
  /*
@@ -86,8 +101,13 @@ Index: iam-src/fs/ext3/namei.c
 +      int                   dp_indirect;
 +      struct dx_frame       dp_frames[DX_MAX_TREE_HEIGHT];
 +      struct dx_frame      *dp_frame;
-+      void                 *dp_key_target;
-+      void                 *dp_key;
++      struct dx_key        *dp_key_target;
++      struct dx_key        *dp_key_scratch[DX_SCRATCH_KEYS];
++};
++
++struct dx_path_compat {
++      struct dx_path dpc_path;
++      __u32          dpc_scrach[DX_SCRATCH_KEYS];
  };
  
 +static u32 htree_root_ptr(struct dx_path *p);
@@ -95,6 +115,8 @@ Index: iam-src/fs/ext3/namei.c
 +                          struct dx_frame *frame, void *cookie);
 +static int htree_node_init(struct dx_path *path,
 +                         struct buffer_head *bh, int root);
++static int htree_keycmp(struct dx_path *path,
++                      struct dx_key *k1, struct dx_key *k2);
 +
 +static struct dx_param htree_compat_param = {
 +      .dpo_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
@@ -104,7 +126,8 @@ Index: iam-src/fs/ext3/namei.c
 +
 +      .dpo_root_ptr   = htree_root_ptr,
 +      .dpo_node_check = htree_node_check,
-+      .dpo_node_init  = htree_node_init
++      .dpo_node_init  = htree_node_init,
++      .dpo_keycmp     = htree_keycmp
 +};
 +
 +
@@ -127,9 +150,11 @@ Index: iam-src/fs/ext3/namei.c
 +static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry);
 +static void dx_set_block(struct dx_path *p,
 +                       struct dx_entry *entry, unsigned value);
-+static inline void *dx_get_key(struct dx_path *p,
-+                             struct dx_entry *entry, void *key);
-+static void dx_set_key(struct dx_path *p, struct dx_entry *entry, void *key);
++static inline struct dx_key *dx_get_key(struct dx_path *p,
++                                      struct dx_entry *entry,
++                                      struct dx_key *key);
++static void dx_set_key(struct dx_path *p, struct dx_entry *entry,
++                     struct dx_key *key);
 +static unsigned dx_get_count(struct dx_entry *entries);
 +static unsigned dx_get_limit(struct dx_entry *entries);
 +static void dx_set_count(struct dx_entry *entries, unsigned value);
@@ -152,7 +177,7 @@ Index: iam-src/fs/ext3/namei.c
  static int ext3_htree_next_block(struct inode *dir, __u32 hash,
                                 struct dx_path *path, __u32 *start_hash);
  static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-@@ -186,29 +222,65 @@ static struct buffer_head * ext3_dx_find
+@@ -186,29 +238,72 @@ static struct buffer_head * ext3_dx_find
  static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
                             struct inode *inode);
  
@@ -167,75 +192,79 @@ Index: iam-src/fs/ext3/namei.c
  
 -static inline unsigned dx_get_block (struct dx_entry *entry)
 +static inline void *entry_off(struct dx_entry *entry, ptrdiff_t off)
-+{
+ {
+-      return le32_to_cpu(entry->block) & 0x00ffffff;
 +      return (void *)((char *)entry + off);
-+}
-+
+ }
+-static inline void dx_set_block (struct dx_entry *entry, unsigned value)
 +static inline size_t dx_entry_size(struct dx_path *p)
  {
--      return le32_to_cpu(entry->block) & 0x00ffffff;
+-      entry->block = cpu_to_le32(value);
 +      return p->dp_param->dpo_key_size + p->dp_param->dpo_ptr_size;
  }
  
--static inline void dx_set_block (struct dx_entry *entry, unsigned value)
+-static inline unsigned dx_get_hash (struct dx_entry *entry)
 +static inline struct dx_entry *dx_entry_shift(struct dx_path *p,
 +                                            struct dx_entry *entry, int shift)
  {
--      entry->block = cpu_to_le32(value);
+-      return le32_to_cpu(entry->hash);
 +      void *e = entry;
 +      return e + shift * dx_entry_size(p);
- }
--static inline unsigned dx_get_hash (struct dx_entry *entry)
++}
++
 +static inline ptrdiff_t dx_entry_diff(struct dx_path *p,
 +                                    struct dx_entry *e1, struct dx_entry *e2)
- {
--      return le32_to_cpu(entry->hash);
++{
 +      ptrdiff_t diff;
 +
 +      diff = (void *)e1 - (void *)e2;
 +      assert(diff / dx_entry_size(p) * dx_entry_size(p) == diff);
 +      return diff / dx_entry_size(p);
-+}
-+
-+static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry)
-+{
-+      return le32_to_cpu(*(u32 *)entry_off(entry, p->dp_param->dpo_key_size))
-+              & 0x00ffffff;
  }
  
 -static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
-+static inline void dx_set_block(struct dx_path *p,
-+                              struct dx_entry *entry, unsigned value)
++static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry)
  {
 -      entry->hash = cpu_to_le32(value);
++      return le32_to_cpu(*(u32 *)entry_off(entry, p->dp_param->dpo_key_size))
++              & 0x00ffffff;
++}
++
++static inline void dx_set_block(struct dx_path *p,
++                              struct dx_entry *entry, unsigned value)
++{
 +      *(u32*)entry_off(entry, p->dp_param->dpo_key_size) = cpu_to_le32(value);
 +}
 +
-+static inline void *dx_get_key(struct dx_path *p,
-+                             struct dx_entry *entry, void *key)
++static inline struct dx_key *dx_get_key(struct dx_path *p,
++                                      struct dx_entry *entry,
++                                      struct dx_key *key)
 +{
 +      memcpy(key, entry, p->dp_param->dpo_key_size);
 +      return key;
 +}
 +
++static inline struct dx_key *dx_key_at(struct dx_path *p,
++                                     struct dx_entry *entry)
++{
++      return (struct dx_key *)entry;
++}
++
 +static inline void dx_set_key(struct dx_path *p,
-+                            struct dx_entry *entry, void *key)
++                            struct dx_entry *entry, struct dx_key *key)
 +{
 +      memcpy(entry, key, p->dp_param->dpo_key_size);
  }
  
  static inline unsigned dx_get_count (struct dx_entry *entries)
-@@ -231,17 +303,123 @@ static inline void dx_set_limit (struct 
+@@ -231,17 +326,163 @@ static inline void dx_set_limit (struct 
        ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
  }
  
 -static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
 +static inline unsigned dx_root_limit(struct dx_path *p)
- {
--      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
--              EXT3_DIR_REC_LEN(2) - infosize;
--      return 0? 20: entry_space / sizeof(struct dx_entry);
++{
 +      struct dx_param *param = p->dp_param;
 +      unsigned entry_space   = p->dp_object->i_sb->s_blocksize -
 +              param->dpo_root_gap;
@@ -270,6 +299,37 @@ Index: iam-src/fs/ext3/namei.c
 +                            frame->bh->b_data, frame == path->dp_frames);
 +}
 +
++static inline struct dx_key *keycpy(struct dx_path *p,
++                                  struct dx_key *k1, struct dx_key *k2)
++{
++      return memcpy(k1, k2, p->dp_param->dpo_key_size);
++}
++
++static inline int keycmp(struct dx_path *p,
++                       struct dx_key *k1, struct dx_key *k2)
++{
++      return p->dp_param->dpo_keycmp(p, k1, k2);
++}
++
++static int dx_node_check(struct dx_path *p, struct dx_frame *f)
++{
++      struct dx_entry *e;
++      unsigned count;
++      unsigned  i;
++
++      e = dx_node_get_entries(p, f);
++      count = dx_get_count(e);
++      e = dx_entry_shift(p, e, 1);
++      for (i = 0; i < count - 1; ++i, e = dx_entry_shift(p, e, 1)) {
++              keycpy(p, p->dp_key_scratch[0], p->dp_key_scratch[1]);
++              dx_get_key(p, e, p->dp_key_scratch[1]);
++              if (i > 0 &&
++                  keycmp(p, p->dp_key_scratch[0], p->dp_key_scratch[1]) > 0)
++                      return 0;
++      }
++      return 1;
++}
++
 +static u32 htree_root_ptr(struct dx_path *path)
 +{
 +      return 0;
@@ -330,7 +390,7 @@ Index: iam-src/fs/ext3/namei.c
 +              if (hc->dentry)
 +                      ext3fs_dirhash(hc->dentry->d_name.name,
 +                                     hc->dentry->d_name.len, hc->hinfo);
-+              path->dp_key_target = &hc->hinfo->hash;
++              path->dp_key_target = (struct dx_key *)&hc->hinfo->hash;
 +      } else {
 +              /* non-root index */
 +              assert(entries == data + path->dp_param->dpo_node_gap);
@@ -338,14 +398,14 @@ Index: iam-src/fs/ext3/namei.c
 +      }
 +      frame->entries = frame->at = entries;
 +      return 0;
- }
--static inline unsigned dx_node_limit (struct inode *dir)
++}
++
 +static int htree_node_init(struct dx_path *path,
 +                         struct buffer_head *bh, int root)
  {
--      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
--      return 0? 22: entry_space / sizeof(struct dx_entry);
+-      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
+-              EXT3_DIR_REC_LEN(2) - infosize;
+-      return 0? 20: entry_space / sizeof(struct dx_entry);
 +      struct dx_node *node;
 +
 +      assert(!root);
@@ -356,8 +416,20 @@ Index: iam-src/fs/ext3/namei.c
 +      return 0;
  }
  
+-static inline unsigned dx_node_limit (struct inode *dir)
++static int htree_keycmp(struct dx_path *path,
++                      struct dx_key *k1, struct dx_key *k2)
+ {
+-      unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+-      return 0? 22: entry_space / sizeof(struct dx_entry);
++      __u32 p1 = le32_to_cpu(*(__u32 *)k1);
++      __u32 p2 = le32_to_cpu(*(__u32 *)k2);
++
++      return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
+ }
  /*
-@@ -327,123 +505,101 @@ struct stats dx_show_entries(struct dx_h
+@@ -327,123 +568,105 @@ struct stats dx_show_entries(struct dx_h
  }
  #endif /* DX_DEBUG */
  
@@ -454,6 +526,8 @@ Index: iam-src/fs/ext3/namei.c
 +              if (err != 0)
 +                      break;
 +
++              assert(dx_node_check(path, frame));
++
 +              entries = frame->entries;
                count = dx_get_count(entries);
 -              assert (count && count <= dx_get_limit(entries));
@@ -471,9 +545,8 @@ Index: iam-src/fs/ext3/namei.c
                        dxtrace(printk("."));
 -                      if (dx_get_hash(m) > hash)
 -                              q = m - 1;
-+                      if (memcmp(dx_get_key(path, m, path->dp_key),
-+                                 path->dp_key_target,
-+                                 param->dpo_key_size) > 0)
++                      if (keycmp(path, dx_key_at(path, m),
++                                 path->dp_key_target) > 0)
 +                              q = dx_entry_shift(path, m, -1);
                        else
 -                              p = m + 1;
@@ -496,9 +569,14 @@ Index: iam-src/fs/ext3/namei.c
 -                              {
 -                                      at--;
 +                              at = dx_entry_shift(path, at, +1);
-+                              if (memcmp(dx_get_key(path, at, path->dp_key),
-+                                         path->dp_key_target,
-+                                         param->dpo_key_size) > 0) {
++                              if (keycmp(path, dx_key_at(path, at),
++                                         path->dp_key_target) > 0) {
++                                      if (at != dx_entry_shift(path, frame->at, 1)) {
++                                              BREAKPOINT;
++                                              printk(KERN_EMERG "%i\n",
++                                                     keycmp(path, dx_key_at(path, at),
++                                                            path->dp_key_target));
++                                      }
 +                                      at = dx_entry_shift(path, at, -1);
                                        break;
                                }
@@ -546,24 +624,22 @@ Index: iam-src/fs/ext3/namei.c
 +                  struct dx_hash_info *hinfo, struct dx_path *path)
 +{
 +      int err;
-+      __u32 hash_storage;
 +      struct htree_cookie hc = {
 +              .dentry = dentry,
 +              .hinfo  = hinfo
 +      };
 +
 +      assert(dx_index_is_compat(path));
-+      path->dp_key = &hash_storage;
 +      err = dx_lookup(path, &hc);
 +      assert(err != 0 || path->dp_frames[path->dp_indirect].bh != NULL);
 +      return err;
  }
  
  static inline void dx_path_init(struct dx_path *path, struct inode *inode)
-@@ -458,8 +614,10 @@ static inline void dx_path_fini(struct d
+@@ -458,11 +681,24 @@ static inline void dx_path_fini(struct d
        int i;
  
-       for (i = 0; i < ARRAY_SIZE(path->dp_frames); i--) {
+       for (i = 0; i < ARRAY_SIZE(path->dp_frames); i++) {
 -              if (path->dp_frames[i].bh != NULL)
 +              if (path->dp_frames[i].bh != NULL) {
                        brelse(path->dp_frames[i].bh);
@@ -572,7 +648,21 @@ Index: iam-src/fs/ext3/namei.c
        }
  }
  
-@@ -488,6 +646,8 @@ static int ext3_htree_next_block(struct 
++static void dx_path_compat_init(struct dx_path_compat *path,
++                              struct inode *inode)
++{
++      int i;
++      dx_path_init(&path->dpc_path, inode);
++      path->dpc_path.dp_param = &htree_compat_param;
++      for (i = 0; i < ARRAY_SIZE(path->dpc_path.dp_key_scratch); ++i)
++              path->dpc_path.dp_key_scratch[i] =
++                      (struct dx_key *)&path->dpc_scrach[i];
++}
++
+ /*
+  * This function increments the frame pointer to search the next leaf
+  * block, and reads in the necessary intervening nodes if the search
+@@ -488,6 +724,8 @@ static int ext3_htree_next_block(struct 
        int err, num_frames = 0;
        __u32 bhash;
  
@@ -581,7 +671,7 @@ Index: iam-src/fs/ext3/namei.c
        p = path->dp_frame;
        /*
         * Find the next leaf page by incrementing the frame pointer.
-@@ -497,7 +657,9 @@ static int ext3_htree_next_block(struct 
+@@ -497,7 +735,9 @@ static int ext3_htree_next_block(struct 
         * nodes need to be read.
         */
        while (1) {
@@ -592,16 +682,16 @@ Index: iam-src/fs/ext3/namei.c
                        break;
                if (p == path->dp_frames)
                        return 0;
-@@ -512,7 +674,7 @@ static int ext3_htree_next_block(struct 
+@@ -512,7 +752,7 @@ static int ext3_htree_next_block(struct 
         * desired contiuation hash.  If it doesn't, return since
         * there's no point to read in the successive index pages.
         */
 -      bhash = dx_get_hash(p->at);
-+      dx_get_key(path, p->at, &bhash);
++      dx_get_key(path, p->at, (struct dx_key *)&bhash);
        if (start_hash)
                *start_hash = bhash;
        if ((hash & 1) == 0) {
-@@ -524,12 +686,13 @@ static int ext3_htree_next_block(struct 
+@@ -524,12 +764,14 @@ static int ext3_htree_next_block(struct 
         * block so no check is necessary
         */
        while (num_frames--) {
@@ -614,37 +704,80 @@ Index: iam-src/fs/ext3/namei.c
                p->bh = bh;
 -              p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
 +              p->at = p->entries = dx_node_get_entries(path, p);
++              assert(dx_node_check(path, p));
        }
        return 1;
  }
-@@ -609,6 +772,7 @@ int ext3_htree_fill_tree(struct file *di
+@@ -598,7 +840,8 @@ int ext3_htree_fill_tree(struct file *di
+ {
+       struct dx_hash_info hinfo;
+       struct ext3_dir_entry_2 *de;
+-      struct dx_path path;
++      struct dx_path_compat cpath;
++      struct dx_path *path = &cpath.dpc_path;
+       struct inode *dir;
+       int block, err;
+       int count = 0;
+@@ -608,7 +851,7 @@ int ext3_htree_fill_tree(struct file *di
+       dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
                       start_minor_hash));
        dir = dir_file->f_dentry->d_inode;
-       dx_path_init(&path, dir);
-+      path.dp_param = &htree_compat_param;
+-      dx_path_init(&path, dir);
++      dx_path_compat_init(&cpath, dir);
        if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
                hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
                hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
-@@ -619,7 +783,8 @@ int ext3_htree_fill_tree(struct file *di
+@@ -619,12 +862,13 @@ int ext3_htree_fill_tree(struct file *di
        }
        hinfo.hash = start_hash;
        hinfo.minor_hash = 0;
 -      if (!dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path, &err))
-+      err = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path);
++      err = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, path);
 +      if (err != 0)
                return err;
  
        /* Add '.' and '..' from the htree header */
-@@ -634,7 +799,7 @@ int ext3_htree_fill_tree(struct file *di
+       if (!start_hash && !start_minor_hash) {
+-              de = (struct ext3_dir_entry_2 *) path.dp_frames[0].bh->b_data;
++              de = (struct ext3_dir_entry_2 *) path->dp_frames[0].bh->b_data;
+               if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+                       goto errout;
+               de = ext3_next_entry(de);
+@@ -634,7 +878,7 @@ int ext3_htree_fill_tree(struct file *di
        }
  
        while (1) {
 -              block = dx_get_block(path.dp_frame->at);
-+              block = dx_get_block(&path, path.dp_frame->at);
++              block = dx_get_block(path, path->dp_frame->at);
                ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
                                             start_hash, start_minor_hash);
                if (ret < 0) {
-@@ -722,17 +887,19 @@ static void dx_sort_map (struct dx_map_e
+@@ -643,7 +887,8 @@ int ext3_htree_fill_tree(struct file *di
+               }
+               count += ret;
+               hashval = ~0;
+-              ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, &path, &hashval);
++              ret = ext3_htree_next_block(dir,
++                                          HASH_NB_ALWAYS, path, &hashval);
+               *next_hash = hashval;
+               if (ret < 0) {
+                       err = ret;
+@@ -658,12 +903,12 @@ int ext3_htree_fill_tree(struct file *di
+                   (count && ((hashval & 1) == 0)))
+                       break;
+       }
+-      dx_path_fini(&path);
++      dx_path_fini(path);
+       dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+                      count, *next_hash));
+       return count;
+ errout:
+-      dx_path_fini(&path);
++      dx_path_fini(path);
+       return (err);
+ }
+@@ -722,17 +967,19 @@ static void dx_sort_map (struct dx_map_e
        } while(more);
  }
  
@@ -665,50 +798,78 @@ Index: iam-src/fs/ext3/namei.c
 +      assert(old < dx_entry_shift(path, entries, count));
 +      memmove(dx_entry_shift(path, new, 1), new,
 +              (char *)dx_entry_shift(path, entries, count) - (char *)new);
-+      dx_set_key(path, new, &hash);
++      dx_set_key(path, new, (struct dx_key *)&hash);
 +      dx_set_block(path, new, block);
        dx_set_count(entries, count + 1);
  }
  #endif
-@@ -934,7 +1101,9 @@ static struct buffer_head * ext3_dx_find
+@@ -933,8 +1180,11 @@ static struct buffer_head * ext3_dx_find
+       struct super_block * sb;
        struct dx_hash_info     hinfo;
        u32 hash;
-       struct dx_path path;
+-      struct dx_path path;
 -      struct dx_entry dummy_dot;
++      struct dx_path_compat cpath;
++      struct dx_path *path = &cpath.dpc_path;
 +      struct dx_entry_compat dummy_dot = {
 +              .block = 0
 +      };
        struct ext3_dir_entry_2 *de, *top;
        struct buffer_head *bh;
        unsigned long block;
-@@ -944,19 +1113,21 @@ static struct buffer_head * ext3_dx_find
+@@ -943,20 +1193,21 @@ static struct buffer_head * ext3_dx_find
+       const u8 *name = dentry->d_name.name;
        struct inode *dir = dentry->d_parent->d_inode;
  
-       dx_path_init(&path, dir);
-+      path.dp_param = &htree_compat_param;
+-      dx_path_init(&path, dir);
++      dx_path_compat_init(&cpath, dir);
 +      
        sb = dir->i_sb;
        /* NFS may look up ".." - look at dx_root directory block */
        if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
 -              if (!(dx_probe(dentry, NULL, &hinfo, &path, err)))
-+              *err = dx_probe(dentry, NULL, &hinfo, &path);
++              *err = dx_probe(dentry, NULL, &hinfo, path);
 +              if (*err != 0)
                        return NULL;
        } else {
 -              path.dp_frame->bh = NULL;                       /* for dx_path_fini() */
 -              path.dp_frame->at = &dummy_dot;         /* hack for zero entry*/
 -              dx_set_block(path.dp_frame->at, 0);     /* dx_root block is 0 */
-+              path.dp_frame->bh = NULL;               /* for dx_path_fini() */
-+              path.dp_frame->at = (void *)&dummy_dot; /* hack for zero entry*/
++              path->dp_frame->bh = NULL;              /* for dx_path_fini() */
++              path->dp_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
        }
        hash = hinfo.hash;
        do {
 -              block = dx_get_block(path.dp_frame->at);
-+              block = dx_get_block(&path, path.dp_frame->at);
++              block = dx_get_block(path, path->dp_frame->at);
                if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
                        goto errout;
                de = (struct ext3_dir_entry_2 *) bh->b_data;
-@@ -1115,10 +1286,11 @@ static struct ext3_dir_entry_2* dx_pack_
+@@ -972,12 +1223,12 @@ static struct buffer_head * ext3_dx_find
+                               goto errout;
+                       }
+                       *res_dir = de;
+-                      dx_path_fini(&path);
++                      dx_path_fini(path);
+                       return bh;
+               }
+               brelse (bh);
+               /* Check to see if we should continue to search */
+-              retval = ext3_htree_next_block(dir, hash, &path, NULL);
++              retval = ext3_htree_next_block(dir, hash, path, NULL);
+               if (retval < 0) {
+                       ext3_warning(sb, __FUNCTION__,
+                            "error reading index page in directory #%lu",
+@@ -990,7 +1241,7 @@ static struct buffer_head * ext3_dx_find
+       *err = -ENOENT;
+ errout:
+       dxtrace(printk("%s not found\n", name));
+-      dx_path_fini(&path);
++      dx_path_fini(path);
+       return NULL;
+ }
+ #endif
+@@ -1115,10 +1366,11 @@ static struct ext3_dir_entry_2* dx_pack_
  
  /* Allocate new node, and split leaf node @bh into it, inserting new pointer
   * into parent node identified by @frame */
@@ -721,7 +882,7 @@ Index: iam-src/fs/ext3/namei.c
        unsigned blocksize = dir->i_sb->s_blocksize;
        unsigned count, continued;
        struct buffer_head *bh2;
-@@ -1180,7 +1352,7 @@ static struct ext3_dir_entry_2 *do_split
+@@ -1180,7 +1432,7 @@ static struct ext3_dir_entry_2 *do_split
                swap(*bh, bh2);
                de = de2;
        }
@@ -730,76 +891,109 @@ Index: iam-src/fs/ext3/namei.c
        err = ext3_journal_dirty_metadata (handle, bh2);
        if (err)
                goto journal_error;
-@@ -1315,6 +1487,7 @@ static int make_indexed_dir(handle_t *ha
+@@ -1303,7 +1555,8 @@ static int make_indexed_dir(handle_t *ha
+       int             namelen = dentry->d_name.len;
+       struct buffer_head *bh2;
+       struct dx_root  *root;
+-      struct dx_path path;
++      struct dx_path_compat cpath;
++      struct dx_path *path = &cpath.dpc_path;
+       struct dx_entry *entries;
+       struct ext3_dir_entry_2 *de, *de2;
+       char            *data1, *top;
+@@ -1314,7 +1567,7 @@ static int make_indexed_dir(handle_t *ha
+       u32             block;
        struct fake_dirent *fde;
  
-       dx_path_init(&path, dir);
-+      path.dp_param = &htree_compat_param;
+-      dx_path_init(&path, dir);
++      dx_path_compat_init(&cpath, dir);
        blocksize =  dir->i_sb->s_blocksize;
        dxtrace(printk("Creating index\n"));
        retval = ext3_journal_get_write_access(handle, bh);
-@@ -1350,10 +1523,10 @@ static int make_indexed_dir(handle_t *ha
+@@ -1350,21 +1603,21 @@ static int make_indexed_dir(handle_t *ha
        root->info.info_length = sizeof(root->info);
        root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
        root->info.hash_version = DX_HASH_R5;
 -      entries = root->entries;
 -      dx_set_block (entries, 1);
 +      entries = (void *)root->entries;
-+      dx_set_block (&path, entries, 1);
++      dx_set_block (path, entries, 1);
        dx_set_count (entries, 1);
 -      dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
-+      dx_set_limit (entries, dx_root_limit(&path));
++      dx_set_limit (entries, dx_root_limit(path));
  
        /* Initialize as for dx_probe */
        hinfo.hash_version = root->info.hash_version;
-@@ -1363,7 +1536,7 @@ static int make_indexed_dir(handle_t *ha
-       path.dp_frame->at = entries;
-       path.dp_frame->bh = bh;
+       hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+       ext3fs_dirhash(name, namelen, &hinfo);
+-      path.dp_frame->entries = entries;
+-      path.dp_frame->at = entries;
+-      path.dp_frame->bh = bh;
++      path->dp_frame->entries = entries;
++      path->dp_frame->at = entries;
++      path->dp_frame->bh = bh;
        bh = bh2;
 -      de = do_split(handle,dir, &bh, path.dp_frame, &hinfo, &retval);
-+      de = do_split(handle, &path, &bh, path.dp_frame, &hinfo, &retval);
-       dx_path_fini(&path);
+-      dx_path_fini(&path);
++      de = do_split(handle, path, &bh, path->dp_frame, &hinfo, &retval);
++      dx_path_fini(path);
        if (!de)
                return retval;
-@@ -1446,8 +1619,8 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -1445,9 +1698,10 @@ static int ext3_add_entry (handle_t *han
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
                             struct inode *inode)
  {
-       struct dx_path path;
+-      struct dx_path path;
++      struct dx_path_compat cpath;
++      struct dx_path *path = &cpath.dpc_path;
 +      struct dx_param *param;
        struct dx_frame *frame, *safe;
 -      struct dx_node *node2;
        struct dx_entry *entries;   /* old block contents */
        struct dx_entry *entries2;  /* new block contents */
        struct dx_hash_info hinfo;
-@@ -1463,7 +1636,10 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -1462,16 +1716,20 @@ static int ext3_dx_add_entry(handle_t *h
+       int i;
        size_t isize;
  
-       dx_path_init(&path, dir);
+-      dx_path_init(&path, dir);
 -      if (!dx_probe(dentry, NULL, &hinfo, &path, &err))
-+      param = path.dp_param = &htree_compat_param;
++      dx_path_compat_init(&cpath, dir);
++      param = path->dp_param;
 +
-+      err = dx_probe(dentry, NULL, &hinfo, &path);
++      err = dx_probe(dentry, NULL, &hinfo, path);
 +      if (err != 0)
                return err;
-       frame = path.dp_frame;
+-      frame = path.dp_frame;
++      frame = path->dp_frame;
        entries = frame->entries;
-@@ -1471,7 +1647,8 @@ static int ext3_dx_add_entry(handle_t *h
        /* XXX nikita: global serialization! */
        isize = dir->i_size;
  
 -      if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
 +      if (!(bh = ext3_bread(handle, dir,
-+                            dx_get_block(&path, frame->at), 0, &err)))
++                            dx_get_block(path, frame->at), 0, &err)))
                goto cleanup;
  
        BUFFER_TRACE(bh, "get_write_access");
-@@ -1519,12 +1696,9 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -1503,7 +1761,7 @@ static int ext3_dx_add_entry(handle_t *h
+                      dx_get_count(entries), dx_get_limit(entries)));
+       /* What levels need split? */
+-      for (nr_splet = 0; frame >= path.dp_frames &&
++      for (nr_splet = 0; frame >= path->dp_frames &&
+            dx_get_count(frame->entries) == dx_get_limit(frame->entries);
+            --frame, ++nr_splet) {
+               if (nr_splet == DX_MAX_TREE_HEIGHT) {
+@@ -1519,19 +1777,16 @@ static int ext3_dx_add_entry(handle_t *h
         * transaction... */
        for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
                bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
 -              if (!bh_new[i])
 +              if (!bh_new[i] ||
-+                  param->dpo_node_init(&path, bh_new[i], 0) != 0)
++                  param->dpo_node_init(path, bh_new[i], 0) != 0)
                        goto cleanup;
 -              node2 = (struct dx_node *)(bh_new[i]->b_data);
 -              entries2 = node2->entries;
@@ -808,87 +1002,124 @@ Index: iam-src/fs/ext3/namei.c
                BUFFER_TRACE(frame->bh, "get_write_access");
                err = ext3_journal_get_write_access(handle, frame->bh);
                if (err)
-@@ -1545,11 +1719,10 @@ static int ext3_dx_add_entry(handle_t *h
+                       goto journal_error;
+       }
+       /* Add "safe" node to transaction too */
+-      if (safe + 1 != path.dp_frames) {
++      if (safe + 1 != path->dp_frames) {
+               err = ext3_journal_get_write_access(handle, safe->bh);
+               if (err)
+                       goto journal_error;
+@@ -1545,13 +1800,12 @@ static int ext3_dx_add_entry(handle_t *h
  
                entries = frame->entries;
                count = dx_get_count(entries);
 -              idx = frame->at - entries;
-+              idx = dx_entry_diff(&path, frame->at, entries);
++              idx = dx_entry_diff(path, frame->at, entries);
  
                bh2 = bh_new[i];
 -              node2 = (struct dx_node *)(bh2->b_data);
 -              entries2 = node2->entries;
-+              entries2 = dx_get_entries(&path, bh2->b_data, 0);
++              entries2 = dx_get_entries(path, bh2->b_data, 0);
  
-               if (frame == path.dp_frames) {
+-              if (frame == path.dp_frames) {
++              if (frame == path->dp_frames) {
                        /* splitting root node. Tricky point:
-@@ -1571,19 +1744,19 @@ static int ext3_dx_add_entry(handle_t *h
+                        *
+                        * In the "normal" B-tree we'd split root *and* add
+@@ -1566,27 +1820,29 @@ static int ext3_dx_add_entry(handle_t *h
+                       u8 indirects;
+                       struct dx_frame *frames;
+-                      frames = path.dp_frames;
++                      frames = path->dp_frames;
+                       root = (struct dx_root *) frames->bh->b_data;
                        indirects = root->info.indirect_levels;
                        dxtrace(printk("Creating new root %d\n", indirects));
                        memcpy((char *) entries2, (char *) entries,
 -                             count * sizeof(struct dx_entry));
 -                      dx_set_limit(entries2, dx_node_limit(dir));
-+                             count * dx_entry_size(&path));
-+                      dx_set_limit(entries2, dx_node_limit(&path));
++                             count * dx_entry_size(path));
++                      dx_set_limit(entries2, dx_node_limit(path));
  
                        /* Set up root */
                        dx_set_count(entries, 1);
 -                      dx_set_block(entries + 0, newblock[i]);
-+                      dx_set_block(&path, entries, newblock[i]);
++                      dx_set_block(path, entries, newblock[i]);
                        root->info.indirect_levels = indirects + 1;
  
                        /* Shift frames in the path */
                        memmove(frames + 2, frames + 1,
-                               (sizeof path.dp_frames) - 2 * sizeof frames[0]);
+-                              (sizeof path.dp_frames) - 2 * sizeof frames[0]);
++                              (sizeof path->dp_frames) - 2 * sizeof frames[0]);
                        /* Add new access path frame */
 -                      frames[1].at = entries2 + idx;
-+                      frames[1].at = dx_entry_shift(&path, entries2, idx);
++                      frames[1].at = dx_entry_shift(path, entries2, idx);
                        frames[1].entries = entries = entries2;
                        frames[1].bh = bh2;
++                      assert(dx_node_check(path, frame));
                        ++ frame;
-@@ -1594,23 +1767,30 @@ static int ext3_dx_add_entry(handle_t *h
++                      assert(dx_node_check(path, frame));
+                       bh_new[i] = NULL; /* buffer head is "consumed" */
+                       err = ext3_journal_get_write_access(handle, bh2);
+                       if (err)
+@@ -1594,23 +1850,32 @@ static int ext3_dx_add_entry(handle_t *h
                } else {
                        /* splitting non-root index node. */
                        unsigned count1 = count/2, count2 = count - count1;
 -                      unsigned hash2 = dx_get_hash(entries + count1);
 +                      unsigned hash2;
 +
-+                      dx_get_key(&path,
-+                                 dx_entry_shift(&path, entries, count1),
-+                                 &hash2);
++                      dx_get_key(path,
++                                 dx_entry_shift(path, entries, count1),
++                                 (struct dx_key *)&hash2);
 +
                        dxtrace(printk("Split index %i/%i\n", count1, count2));
  
 -                      memcpy ((char *) entries2, (char *) (entries + count1),
 -                              count2 * sizeof(struct dx_entry));
 +                      memcpy ((char *) entries2,
-+                              (char *) dx_entry_shift(&path, entries, count1),
-+                              count2 * dx_entry_size(&path));
++                              (char *) dx_entry_shift(path, entries, count1),
++                              count2 * dx_entry_size(path));
                        dx_set_count (entries, count1);
                        dx_set_count (entries2, count2);
 -                      dx_set_limit (entries2, dx_node_limit(dir));
-+                      dx_set_limit (entries2, dx_node_limit(&path));
++                      dx_set_limit (entries2, dx_node_limit(path));
  
                        /* Which index block gets the new entry? */
                        if (idx >= count1) {
 -                              frame->at = entries2 + idx - count1;
-+                              frame->at = dx_entry_shift(&path, entries2,
++                              frame->at = dx_entry_shift(path, entries2,
 +                                                         idx - count1);
                                frame->entries = entries = entries2;
                                swap(frame->bh, bh2);
                                bh_new[i] = bh2;
                        }
 -                      dx_insert_block (frame - 1, hash2, newblock[i]);
-+                      dx_insert_block(&path, frame - 1, hash2, newblock[i]);
++                      dx_insert_block(path, frame - 1, hash2, newblock[i]);
++                      assert(dx_node_check(path, frame));
++                      assert(dx_node_check(path, frame - 1));
                        dxtrace(dx_show_index ("node", frame->entries));
                        dxtrace(dx_show_index ("node",
                               ((struct dx_node *) bh2->b_data)->entries));
-@@ -1619,7 +1799,7 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -1619,9 +1884,10 @@ static int ext3_dx_add_entry(handle_t *h
                                goto journal_error;
                }
        }
 -      de = do_split(handle, dir, &bh, --frame, &hinfo, &err);
-+      de = do_split(handle, &path, &bh, --frame, &hinfo, &err);
++      de = do_split(handle, path, &bh, --frame, &hinfo, &err);
        if (!de)
                goto cleanup;
++      assert(dx_node_check(path, frame));
        err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+       goto cleanup2;
+@@ -1637,7 +1903,7 @@ cleanup2:
+       }
+       if (err)
+               inode->i_size = isize;
+-      dx_path_fini(&path);
++      dx_path_fini(path);
+       return err;
+ }
+ #endif
diff --git a/lustre/kernel_patches/patches/ext3-htree-path.patch b/lustre/kernel_patches/patches/ext3-htree-path.patch
new file mode 100644 (file)
index 0000000..893d1d1
--- /dev/null
@@ -0,0 +1,406 @@
+Index: iam-src/fs/ext3/namei.c
+===================================================================
+--- iam-src.orig/fs/ext3/namei.c       2006-02-09 20:44:02.000000000 +0300
++++ iam-src/fs/ext3/namei.c    2006-02-10 18:23:32.000000000 +0300
+@@ -147,6 +147,15 @@ struct dx_map_entry
+       u32 offs;
+ };
++/*
++ * Structure to keep track of a path drilled through htree.
++ */
++struct dx_path {
++      struct inode    *dp_object;
++      struct dx_frame  dp_frames[DX_MAX_TREE_HEIGHT];
++      struct dx_frame *dp_frame;
++};
++
+ #ifdef CONFIG_EXT3_INDEX
+ static inline unsigned dx_get_block (struct dx_entry *entry);
+ static void dx_set_block (struct dx_entry *entry, unsigned value);
+@@ -161,9 +170,8 @@ static unsigned dx_node_limit (struct in
+ static struct dx_frame *dx_probe(struct dentry *dentry,
+                                struct inode *dir,
+                                struct dx_hash_info *hinfo,
+-                               struct dx_frame *frame,
++                               struct dx_path *path,
+                                int *err);
+-static void dx_release (struct dx_frame *frames);
+ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+                       struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+ static void dx_sort_map(struct dx_map_entry *map, unsigned count);
+@@ -172,9 +180,7 @@ static struct ext3_dir_entry_2 *dx_move_
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+ static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+-                               struct dx_frame *frame,
+-                               struct dx_frame *frames,
+-                               __u32 *start_hash);
++                               struct dx_path *path, __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+                      struct ext3_dir_entry_2 **res_dir, int *err);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+@@ -332,13 +338,13 @@ struct stats dx_show_entries(struct dx_h
+  */
+ static struct dx_frame *
+ dx_probe(struct dentry *dentry, struct inode *dir,
+-       struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
++       struct dx_hash_info *hinfo, struct dx_path *path, int *err)
+ {
+       unsigned count, indirect;
+       struct dx_entry *at, *entries, *p, *q, *m;
+       struct dx_root *root;
+       struct buffer_head *bh;
+-      struct dx_frame *frame = frame_in;
++      struct dx_frame *frame = path->dp_frames;
+       u32 hash;
+       frame->bh = NULL;
+@@ -352,8 +358,7 @@ dx_probe(struct dentry *dentry, struct i
+           root->info.hash_version != DX_HASH_R5 &&
+           root->info.hash_version != DX_HASH_LEGACY) {
+               ext3_warning(dir->i_sb, __FUNCTION__,
+-                           "Unrecognised inode hash code %d",
+-                           root->info.hash_version);
++                           "Unrecognised inode hash code %d", root->info.hash_version);
+               brelse(bh);
+               *err = ERR_BAD_DX_DIR;
+               goto fail;
+@@ -424,7 +429,8 @@ dx_probe(struct dentry *dentry, struct i
+               frame->bh = bh;
+               frame->entries = entries;
+               frame->at = at;
+-              if (!indirect--) return frame;
++              if (!indirect--)
++                      return path->dp_frame = frame;
+               if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+                       goto fail2;
+               at = entries = ((struct dx_node *) bh->b_data)->entries;
+@@ -432,7 +438,7 @@ dx_probe(struct dentry *dentry, struct i
+               frame++;
+       }
+ fail2:
+-      while (frame >= frame_in) {
++      while (frame >= path->dp_frames) {
+               brelse(frame->bh);
+               frame--;
+       }
+@@ -440,16 +446,20 @@ fail:
+       return NULL;
+ }
+-static void dx_release (struct dx_frame *frames)
++static inline void dx_path_init(struct dx_path *path, struct inode *inode)
+ {
+-      int height;
++      memset(path, 0, sizeof *path);
++      path->dp_object = inode;
++      path->dp_frame = path->dp_frames;
++}
+-      if (frames[0].bh == NULL)
+-              return;
+-      height = ((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels;
+-      for (; height >= 0; height--) {
+-              assert(frames[height].bh != NULL);
+-              brelse(frames[height].bh);
++static inline void dx_path_fini(struct dx_path *path)
++{
++      int i;
++
++      for (i = 0; i < ARRAY_SIZE(path->dp_frames); i++) {
++              if (path->dp_frames[i].bh != NULL)
++                      brelse(path->dp_frames[i].bh);
+       }
+ }
+@@ -471,16 +481,14 @@ static void dx_release (struct dx_frame 
+  * hash of the next page.
+  */
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+-                               struct dx_frame *frame,
+-                               struct dx_frame *frames,
+-                               __u32 *start_hash)
++                               struct dx_path *path, __u32 *start_hash)
+ {
+       struct dx_frame *p;
+       struct buffer_head *bh;
+       int err, num_frames = 0;
+       __u32 bhash;
+-      p = frame;
++      p = path->dp_frame;
+       /*
+        * Find the next leaf page by incrementing the frame pointer.
+        * If we run out of entries in the interior node, loop around and
+@@ -491,10 +499,10 @@ static int ext3_htree_next_block(struct 
+       while (1) {
+               if (++(p->at) < p->entries + dx_get_count(p->entries))
+                       break;
+-              if (p == frames)
++              if (p == path->dp_frames)
+                       return 0;
+               num_frames++;
+-              p--;
++              --p;
+       }
+       /*
+@@ -516,10 +524,9 @@ static int ext3_htree_next_block(struct 
+        * block so no check is necessary
+        */
+       while (num_frames--) {
+-              if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+-                                    0, &err)))
++              if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), 0, &err)))
+                       return err; /* Failure */
+-              p++;
++              ++p;
+               brelse (p->bh);
+               p->bh = bh;
+               p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
+@@ -591,7 +598,7 @@ int ext3_htree_fill_tree(struct file *di
+ {
+       struct dx_hash_info hinfo;
+       struct ext3_dir_entry_2 *de;
+-      struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
++      struct dx_path path;
+       struct inode *dir;
+       int block, err;
+       int count = 0;
+@@ -601,6 +608,7 @@ int ext3_htree_fill_tree(struct file *di
+       dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
+                      start_minor_hash));
+       dir = dir_file->f_dentry->d_inode;
++      dx_path_init(&path, dir);
+       if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
+               hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+               hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+@@ -611,13 +619,12 @@ int ext3_htree_fill_tree(struct file *di
+       }
+       hinfo.hash = start_hash;
+       hinfo.minor_hash = 0;
+-      frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+-      if (!frame)
++      if (!dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path, &err))
+               return err;
+       /* Add '.' and '..' from the htree header */
+       if (!start_hash && !start_minor_hash) {
+-              de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
++              de = (struct ext3_dir_entry_2 *) path.dp_frames[0].bh->b_data;
+               if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+                       goto errout;
+               de = ext3_next_entry(de);
+@@ -627,7 +634,7 @@ int ext3_htree_fill_tree(struct file *di
+       }
+       while (1) {
+-              block = dx_get_block(frame->at);
++              block = dx_get_block(path.dp_frame->at);
+               ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
+                                            start_hash, start_minor_hash);
+               if (ret < 0) {
+@@ -636,8 +643,7 @@ int ext3_htree_fill_tree(struct file *di
+               }
+               count += ret;
+               hashval = ~0;
+-              ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+-                                          frame, frames, &hashval);
++              ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, &path, &hashval);
+               *next_hash = hashval;
+               if (ret < 0) {
+                       err = ret;
+@@ -652,12 +658,12 @@ int ext3_htree_fill_tree(struct file *di
+                   (count && ((hashval & 1) == 0)))
+                       break;
+       }
+-      dx_release(frames);
++      dx_path_fini(&path);
+       dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+                      count, *next_hash));
+       return count;
+ errout:
+-      dx_release(frames);
++      dx_path_fini(&path);
+       return (err);
+ }
+@@ -927,7 +933,8 @@ static struct buffer_head * ext3_dx_find
+       struct super_block * sb;
+       struct dx_hash_info     hinfo;
+       u32 hash;
+-      struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
++      struct dx_path path;
++      struct dx_entry dummy_dot;
+       struct ext3_dir_entry_2 *de, *top;
+       struct buffer_head *bh;
+       unsigned long block;
+@@ -936,20 +943,20 @@ static struct buffer_head * ext3_dx_find
+       const u8 *name = dentry->d_name.name;
+       struct inode *dir = dentry->d_parent->d_inode;
++      dx_path_init(&path, dir);
+       sb = dir->i_sb;
+       /* NFS may look up ".." - look at dx_root directory block */
+       if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+-              if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
++              if (!(dx_probe(dentry, NULL, &hinfo, &path, err)))
+                       return NULL;
+       } else {
+-              frame = frames;
+-              frame->bh = NULL;                       /* for dx_release() */
+-              frame->at = (struct dx_entry *)frames;  /* hack for zero entry*/
+-              dx_set_block(frame->at, 0);             /* dx_root block is 0 */
++              path.dp_frame->bh = NULL;                       /* for dx_path_fini() */
++              path.dp_frame->at = &dummy_dot;         /* hack for zero entry*/
++              dx_set_block(path.dp_frame->at, 0);     /* dx_root block is 0 */
+       }
+       hash = hinfo.hash;
+       do {
+-              block = dx_get_block(frame->at);
++              block = dx_get_block(path.dp_frame->at);
+               if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+                       goto errout;
+               de = (struct ext3_dir_entry_2 *) bh->b_data;
+@@ -965,13 +972,12 @@ static struct buffer_head * ext3_dx_find
+                               goto errout;
+                       }
+                       *res_dir = de;
+-                      dx_release (frames);
++                      dx_path_fini(&path);
+                       return bh;
+               }
+               brelse (bh);
+               /* Check to see if we should continue to search */
+-              retval = ext3_htree_next_block(dir, hash, frame,
+-                                             frames, NULL);
++              retval = ext3_htree_next_block(dir, hash, &path, NULL);
+               if (retval < 0) {
+                       ext3_warning(sb, __FUNCTION__,
+                            "error reading index page in directory #%lu",
+@@ -984,7 +990,7 @@ static struct buffer_head * ext3_dx_find
+       *err = -ENOENT;
+ errout:
+       dxtrace(printk("%s not found\n", name));
+-      dx_release (frames);
++      dx_path_fini(&path);
+       return NULL;
+ }
+ #endif
+@@ -1297,7 +1303,7 @@ static int make_indexed_dir(handle_t *ha
+       int             namelen = dentry->d_name.len;
+       struct buffer_head *bh2;
+       struct dx_root  *root;
+-      struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
++      struct dx_path path;
+       struct dx_entry *entries;
+       struct ext3_dir_entry_2 *de, *de2;
+       char            *data1, *top;
+@@ -1308,6 +1314,7 @@ static int make_indexed_dir(handle_t *ha
+       u32             block;
+       struct fake_dirent *fde;
++      dx_path_init(&path, dir);
+       blocksize =  dir->i_sb->s_blocksize;
+       dxtrace(printk("Creating index\n"));
+       retval = ext3_journal_get_write_access(handle, bh);
+@@ -1352,14 +1359,13 @@ static int make_indexed_dir(handle_t *ha
+       hinfo.hash_version = root->info.hash_version;
+       hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+       ext3fs_dirhash(name, namelen, &hinfo);
+-      frame = frames;
+-      frame->entries = entries;
+-      frame->at = entries;
+-      frame->bh = bh;
++      path.dp_frame->entries = entries;
++      path.dp_frame->at = entries;
++      path.dp_frame->bh = bh;
+       bh = bh2;
+-      de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
+-      dx_release (frames);
+-      if (!(de))
++      de = do_split(handle,dir, &bh, path.dp_frame, &hinfo, &retval);
++      dx_path_fini(&path);
++      if (!de)
+               return retval;
+       return add_dirent_to_buf(handle, dentry, inode, de, bh);
+@@ -1439,7 +1445,8 @@ static int ext3_add_entry (handle_t *han
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode)
+ {
+-      struct dx_frame frames[DX_MAX_TREE_HEIGHT] = {{0,},}, *frame, *safe;
++      struct dx_path path;
++      struct dx_frame *frame, *safe;
+       struct dx_node *node2;
+       struct dx_entry *entries;   /* old block contents */
+       struct dx_entry *entries2;  /* new block contents */
+@@ -1455,9 +1462,10 @@ static int ext3_dx_add_entry(handle_t *h
+       int i;
+       size_t isize;
+-      frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
+-      if (!frame)
++      dx_path_init(&path, dir);
++      if (!dx_probe(dentry, NULL, &hinfo, &path, &err))
+               return err;
++      frame = path.dp_frame;
+       entries = frame->entries;
+       /* XXX nikita: global serialization! */
+@@ -1495,7 +1503,7 @@ static int ext3_dx_add_entry(handle_t *h
+                      dx_get_count(entries), dx_get_limit(entries)));
+       /* What levels need split? */
+-      for (nr_splet = 0; frame >= frames &&
++      for (nr_splet = 0; frame >= path.dp_frames &&
+            dx_get_count(frame->entries) == dx_get_limit(frame->entries);
+            --frame, ++nr_splet) {
+               if (nr_splet == DX_MAX_TREE_HEIGHT) {
+@@ -1523,7 +1531,7 @@ static int ext3_dx_add_entry(handle_t *h
+                       goto journal_error;
+       }
+       /* Add "safe" node to transaction too */
+-      if (safe + 1 != frames) {
++      if (safe + 1 != path.dp_frames) {
+               err = ext3_journal_get_write_access(handle, safe->bh);
+               if (err)
+                       goto journal_error;
+@@ -1543,7 +1551,7 @@ static int ext3_dx_add_entry(handle_t *h
+               node2 = (struct dx_node *)(bh2->b_data);
+               entries2 = node2->entries;
+-              if (frame == frames) {
++              if (frame == path.dp_frames) {
+                       /* splitting root node. Tricky point:
+                        *
+                        * In the "normal" B-tree we'd split root *and* add
+@@ -1556,7 +1564,9 @@ static int ext3_dx_add_entry(handle_t *h
+                        */
+                       struct dx_root *root;
+                       u8 indirects;
++                      struct dx_frame *frames;
++                      frames = path.dp_frames;
+                       root = (struct dx_root *) frames->bh->b_data;
+                       indirects = root->info.indirect_levels;
+                       dxtrace(printk("Creating new root %d\n", indirects));
+@@ -1571,7 +1581,7 @@ static int ext3_dx_add_entry(handle_t *h
+                       /* Shift frames in the path */
+                       memmove(frames + 2, frames + 1,
+-                              (sizeof frames) - 2 * sizeof frames[0]);
++                              (sizeof path.dp_frames) - 2 * sizeof frames[0]);
+                       /* Add new access path frame */
+                       frames[1].at = entries2 + idx;
+                       frames[1].entries = entries = entries2;
+@@ -1627,7 +1637,7 @@ cleanup2:
+       }
+       if (err)
+               inode->i_size = isize;
+-      dx_release(frames);
++      dx_path_fini(&path);
+       return err;
+ }
+ #endif
diff --git a/lustre/kernel_patches/patches/ext3-htree-r5-hash.patch b/lustre/kernel_patches/patches/ext3-htree-r5-hash.patch
new file mode 100644 (file)
index 0000000..48897e7
--- /dev/null
@@ -0,0 +1,88 @@
+Index: iam-src/fs/ext3/hash.c
+===================================================================
+--- iam-src.orig/fs/ext3/hash.c        2006-02-11 01:08:59.000000000 +0300
++++ iam-src/fs/ext3/hash.c     2006-02-11 20:46:22.000000000 +0300
+@@ -4,7 +4,7 @@
+  * Copyright (C) 2002 by Theodore Ts'o
+  *
+  * This file is released under the GPL v2.
+- * 
++ *
+  * This file may be redistributed under the terms of the GNU Public
+  * License.
+  */
+@@ -115,6 +115,18 @@ static __u32 dx_hack_hash (const char *n
+       return (hash0 << 1);
+ }
++static __u32 dx_r5_hash(const signed char *msg, int len)
++{
++      __u32 a = 0;
++      while (len--) {
++              a += *msg << 4;
++              a += *msg >> 4;
++              a *= 11;
++              msg++;
++      }
++      return a;
++}
++
+ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+ {
+       __u32   pad, val;
+@@ -146,11 +158,11 @@ static void str2hashbuf(const char *msg,
+  * Returns the hash of a filename.  If len is 0 and name is NULL, then
+  * this function can be used to test whether or not a hash version is
+  * supported.
+- * 
++ *
+  * The seed is an 4 longword (32 bits) "secret" which can be used to
+  * uniquify a hash.  If the seed is all zero's, then some default seed
+  * may be used.
+- * 
++ *
+  * A particular hash version specifies whether or not the seed is
+  * represented, and whether or not the returned hash is 32 bits or 64
+  * bits.  32 bit hashes will return 0 for the minor hash.
+@@ -205,6 +217,9 @@ int ext3fs_dirhash(const char *name, int
+               hash = buf[0];
+               minor_hash = buf[1];
+               break;
++      case DX_HASH_R5:
++              hash = dx_r5_hash(name, len);
++              break;
+       default:
+               hinfo->hash = 0;
+               return -1;
+Index: iam-src/fs/ext3/namei.c
+===================================================================
+--- iam-src.orig/fs/ext3/namei.c       2006-02-11 01:09:12.000000000 +0300
++++ iam-src/fs/ext3/namei.c    2006-02-11 20:45:58.000000000 +0300
+@@ -370,6 +370,7 @@ dx_probe(struct dentry *dentry, struct i
+       root = (struct dx_root *) bh->b_data;
+       if (root->info.hash_version != DX_HASH_TEA &&
+           root->info.hash_version != DX_HASH_HALF_MD4 &&
++          root->info.hash_version != DX_HASH_R5 &&
+           root->info.hash_version != DX_HASH_LEGACY) {
+               ext3_warning(dir->i_sb, __FUNCTION__,
+                            "Unrecognised inode hash code %d", root->info.hash_version);
+@@ -1363,6 +1364,7 @@ static int make_indexed_dir(handle_t *ha
+       memset (&root->info, 0, sizeof(root->info));
+       root->info.info_length = sizeof(root->info);
+       root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
++      root->info.hash_version = DX_HASH_R5;
+       entries = root->entries;
+       dx_set_block (entries, 1);
+       dx_set_count (entries, 1);
+Index: iam-src/include/linux/ext3_fs.h
+===================================================================
+--- iam-src.orig/include/linux/ext3_fs.h       2006-02-11 01:08:59.000000000 +0300
++++ iam-src/include/linux/ext3_fs.h    2006-02-11 20:45:58.000000000 +0300
+@@ -665,6 +665,7 @@ struct ext3_dir_entry_2 {
+ #define DX_HASH_LEGACY                0
+ #define DX_HASH_HALF_MD4      1
+ #define DX_HASH_TEA           2
++#define DX_HASH_R5            3
+ /* hash info structure used by the directory hash */
+ struct dx_hash_info
diff --git a/lustre/kernel_patches/patches/ext3-iam-ops.patch b/lustre/kernel_patches/patches/ext3-iam-ops.patch
new file mode 100644 (file)
index 0000000..f0181d9
--- /dev/null
@@ -0,0 +1,646 @@
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c   2006-04-23 22:05:38.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c        2006-04-23 22:22:58.000000000 +0800
+@@ -82,13 +82,16 @@
+  *
+  * Entries in index node are sorted by their key value.
+  *
++ * Format of leaf node:
+  *
+- *
+- *
+- *
+- *
+- *
+- *
++ * +-----+-------+-------+-------+------+-------+------------+
++ * |     | count |       |       |      |       |            |
++ * | gap |   /   | leaf  | leaf  | .... | leaf  | free space |
++ * |     | limit |       |       |      |       |            |
++ * +-----+-------+-------+-------+------+-------+------------+
++
++ *       leaf          For leaf entry: consists of a rec immediately followd by 
++ *                     a key. size of a key and size of a rec depends on container.  
+  *
+  *
+  *
+@@ -241,6 +244,7 @@
+ };
+ /* leaf node reached by tree lookup */
++#define iam_leaf_entry iam_rec
+ struct iam_leaf {
+       struct buffer_head *bh;
+       struct iam_leaf_entry *entries;
+@@ -508,6 +512,11 @@
+       IAM_IT_ATTACHED
+ };
++struct htree_cookie {
++      struct dx_hash_info *hinfo;
++      struct dentry       *dentry;
++};
++
+ /*
+  * Iterator.
+  *
+@@ -704,7 +713,7 @@
+                            struct inode *inode);
+ static inline void iam_path_init(struct iam_path *path,
+-                               struct iam_container *c);
++                               struct iam_container *c, struct htree_cookie *hc);
+ static inline void iam_path_fini(struct iam_path *path);
+@@ -865,11 +874,6 @@
+       return 0;
+ }
+-struct htree_cookie {
+-      struct dx_hash_info *hinfo;
+-      struct dentry       *dentry;
+-};
+-
+ static int htree_node_check(struct iam_path *path, struct iam_frame *frame)
+ {
+       void *data;
+@@ -1171,11 +1175,13 @@
+       }
+ }
+-static inline void iam_path_init(struct iam_path *path, struct iam_container *c)
++static inline void iam_path_init(struct iam_path *path, struct iam_container *c, 
++                               struct htree_cookie *hc)
+ {
+       memset(path, 0, sizeof *path);
+       path->ip_container = c;
+       path->ip_frame = path->ip_frames;
++      path->ip_descr_data = hc;
+ }
+ static inline void iam_path_fini(struct iam_path *path)
+@@ -1201,7 +1207,7 @@
+        * iam_path_fini().
+        */
+       iput(inode);
+-      iam_path_init(&path->ipc_path, &path->ipc_container);
++      iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
+       for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
+               path->ipc_path.ip_key_scratch[i] =
+                       (struct iam_key *)&path->ipc_scrach[i];
+@@ -1213,6 +1219,382 @@
+       iam_container_fini(&path->ipc_container);
+ }
++static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
++{
++      int block, err;
++      struct buffer_head *bh;
++      
++      block = dx_get_block(path, path->ip_frame->at);
++      err = path_descr(path)->id_node_read(path->ip_container, block, 
++                                           NULL, &bh);
++      if (err)
++              return err;
++
++      leaf->bh = bh;
++      leaf->entries = (struct iam_leaf_entry *)bh->b_data;
++      return 0;
++}
++
++static void iam_leaf_fini(struct iam_leaf *leaf)
++{
++      if (leaf->bh)
++              brelse(leaf->bh);
++}
++
++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
++{
++      struct dx_hash_info     hinfo;
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct htree_cookie hc = {
++              .hinfo  = &hinfo
++      };
++      int err, i;
++
++      iam_path_init(path, c, &hc);
++      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++              path->ip_key_scratch[i] =
++                      (struct iam_key *)&cpath.ipc_scrach[i];
++      err = dx_lookup(path);
++      do {
++              struct iam_leaf leaf;
++              err = iam_leaf_init(path, &leaf);
++              if (err)
++                      goto errout;
++
++              for (path_descr(path)->id_leaf.start(c, &leaf);
++                   !path_descr(path)->id_leaf.at_end(c, &leaf);
++                   path_descr(path)->id_leaf.next(c, &leaf)) {
++                      struct iam_key *key;
++
++                      key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
++                      path_descr(path)->id_leaf.key(c, &leaf, key);
++                      if (keycmp(c, k, key) == 0) {
++                              memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
++                                     path_descr(path)->id_rec_size);
++                              iam_path_fini(path);
++                              iam_leaf_fini(&leaf);
++                              return 0;
++                      }
++              }
++
++              iam_leaf_fini(&leaf);
++              /* Check to see if we should continue to search */
++              err = ext3_htree_next_block(c->ic_object, hinfo.hash, path, NULL);
++              if (err < 0)
++                      goto errout;
++      } while (err == 1);
++errout:
++      iam_path_fini(path);
++      return(err);
++}
++
++static inline size_t iam_leaf_entry_size(struct iam_path *p)
++{
++      return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
++}
++
++static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
++                                    struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
++{
++      ptrdiff_t diff;
++
++      diff = (void *)e1 - (void *)e2;
++      assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
++      return diff / iam_leaf_entry_size(p);
++}
++
++static inline struct iam_leaf_entry* 
++iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
++{
++      void *e = entry;
++      return e + shift * iam_leaf_entry_size(p);
++}
++
++static inline struct iam_key *
++dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
++{
++      memcpy(key, e, path_descr(p)->id_key_size);
++      return key;
++}
++
++static inline struct iam_key *
++iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
++{
++      void *e = entry;
++      return e + path_descr(p)->id_rec_size;
++}
++static inline struct iam_leaf_entry *
++iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
++{
++      return entry; 
++}
++
++static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf, 
++                         struct iam_key *k)
++{
++      struct iam_leaf_entry *p, *q, *m;
++      struct iam_leaf_entry *entries = leaf->entries;
++      int count = dx_get_count((struct iam_entry *)entries);
++      
++      p = iam_leaf_entry_shift(path, entries, 1);
++      q = iam_leaf_entry_shift(path, entries, count - 1);
++      while (p <= q) {
++              m = iam_leaf_entry_shift(path,
++                                 p, iam_leaf_entry_diff(path, q, p) / 2);
++              dxtrace(printk("."));
++              if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
++                         path->ip_key_target) > 0)
++                      q = iam_leaf_entry_shift(path, m, -1);
++              else
++                      p = iam_leaf_entry_shift(path, m, +1);
++      }
++      leaf->at = q; 
++      return 0;
++}
++
++/*XXX what kind of lock should this entry be locked: WangDi */
++static int iam_leaf_insert(handle_t *handle, struct iam_path *path, 
++                         struct iam_key *k, struct iam_rec *r)
++{
++      struct iam_leaf leaf;
++      struct iam_leaf_entry *p, *q;
++      int err, count;
++
++      err = iam_leaf_init(path, &leaf);
++      if (err)
++              goto errout;
++      path_descr(path)->id_leaf.start(path->ip_container, &leaf);
++      count = dx_get_count((struct iam_entry *)leaf.entries);
++      if (dx_get_count((struct iam_entry *)leaf.entries) >= 
++          dx_get_limit((struct iam_entry *)leaf.entries)){
++              err = -ENOSPC;
++              goto errout;
++      }
++
++      err = iam_leaf_lookup(path, &leaf, k);
++      if (err)
++              goto errout;
++      
++      /*insert the k/r to leaf entries*/
++      p = iam_leaf_entry_shift(path, leaf.at, 1);
++      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
++      while (q < p) {
++              memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
++              q = iam_leaf_entry_shift(path, q, -1);  
++      }
++      memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
++      memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
++
++      dx_set_count((struct iam_entry*)leaf.entries, count + 1);
++      err = ext3_journal_dirty_metadata(handle, leaf.bh);
++      if (err)
++              ext3_std_error(path->ip_container->ic_object->i_sb, err);
++errout:       
++      iam_leaf_fini(&leaf);
++      return err;
++} 
++
++static int split_leaf_node(handle_t *handle, struct iam_path *path)
++{
++      struct inode *dir = path_obj(path);
++      unsigned continued = 0;
++      struct buffer_head *bh2;
++      u32 newblock, hash_split;
++      char *data2;
++      struct iam_leaf leaf;
++      unsigned split;
++      int     err;
++
++      bh2 = ext3_append (handle, dir, &newblock, &err);
++      if (!(bh2)) {
++              err = -ENOSPC;
++              goto errout;
++      }
++      err = iam_leaf_init(path, &leaf);
++      if (err)
++              goto errout;
++
++      BUFFER_TRACE(leaf.bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, leaf.bh);
++      if (err) {
++      journal_error:
++              iam_leaf_fini(&leaf);
++              brelse(bh2);
++              ext3_std_error(dir->i_sb, err);
++              err = -EIO;
++              goto errout;
++      }
++      data2 = bh2->b_data;
++      split = dx_get_count((struct iam_entry*)leaf.entries)/2;
++      hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
++      if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
++                 iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
++              continued = 1;
++
++      memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
++             iam_leaf_entry_shift(path, leaf.entries, split),
++             split * iam_leaf_entry_size(path));
++ 
++      /* Which block gets the new entry? */
++      dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
++      err = ext3_journal_dirty_metadata (handle, bh2);
++      if (err)
++              goto journal_error;
++      err = ext3_journal_dirty_metadata (handle, leaf.bh);
++      if (err)
++              goto journal_error;
++      brelse (bh2);
++      iam_leaf_fini(&leaf);
++errout:
++      return err;
++}
++
++static int split_index_node(handle_t *handle, struct iam_path *path);
++int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, 
++             struct iam_rec *r)
++{
++      struct dx_hash_info     hinfo;
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct htree_cookie hc = {
++              .hinfo  = &hinfo
++      };
++      int err, i;
++
++      iam_path_init(path, c, &hc);
++      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++              path->ip_key_scratch[i] =
++                      (struct iam_key *)&cpath.ipc_scrach[i];
++      err = dx_lookup(path);
++      if (err)
++              goto errout; 
++
++      err = iam_leaf_insert(handle, path, k, r);
++      
++      if (err != -ENOSPC) 
++              goto errout;    
++
++      err = split_index_node(handle, path);
++      if (err)
++              goto errout;    
++
++      err = split_leaf_node(handle, path);
++      if (err)
++              goto errout;
++      
++      err = iam_leaf_insert(handle, path, k, r);
++errout:
++      iam_path_fini(path);
++      return(err);
++}
++
++static int iam_leaf_delete(handle_t *handle, struct iam_path *path, 
++                         struct iam_key *k)
++{
++      struct iam_leaf leaf;
++      struct iam_leaf_entry *p, *q;
++      int err, count;
++
++      err = iam_leaf_init(path, &leaf);
++      if (err)
++              goto errout;
++      
++      err = iam_leaf_lookup(path, &leaf, k);
++      if (err)
++              goto errout;
++
++      count = dx_get_count((struct iam_entry*)leaf.entries);
++      /*delete the k to leaf entries*/
++      p = iam_leaf_entry_shift(path, leaf.at, 1);
++      q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
++      while (p < q) {
++              memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
++              p = iam_leaf_entry_shift(path, p, 1);
++      }
++      dx_set_count((struct iam_entry*)leaf.entries, count - 1);
++
++      err = ext3_journal_dirty_metadata(handle, leaf.bh);
++      if (err)
++              ext3_std_error(path_obj(path)->i_sb, err);
++errout:       
++      iam_leaf_fini(&leaf);
++      return err;
++}
++
++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
++{
++      struct dx_hash_info     hinfo;
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct htree_cookie hc = {
++              .hinfo  = &hinfo
++      };
++      int err, i;
++
++      iam_path_init(path, c, &hc);
++      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++              path->ip_key_scratch[i] =
++                      (struct iam_key *)&cpath.ipc_scrach[i];
++      err = dx_lookup(path);
++      if (err)
++              goto errout; 
++
++      err = iam_leaf_delete(h, path, k);
++errout:
++      iam_path_fini(path);
++      return err;
++}
++
++static int iam_leaf_update(handle_t *handle, struct iam_path *path, 
++                         struct iam_key *k, struct iam_rec *r)
++{
++      struct iam_leaf leaf;
++      int err;
++
++      err = iam_leaf_init(path, &leaf);
++      if (err)
++              goto errout;
++      
++      err = iam_leaf_lookup(path, &leaf, k);
++      if (err)
++              goto errout;
++
++      memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
++      memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
++
++      err = ext3_journal_dirty_metadata(handle, leaf.bh);
++      if (err)
++              ext3_std_error(path_obj(path)->i_sb, err);
++errout:       
++      iam_leaf_fini(&leaf);
++      return err;
++}
++
++int iam_update(handle_t *h, struct iam_container *c,
++             struct iam_key *k, struct iam_rec *r)
++{
++      struct dx_hash_info     hinfo;
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct htree_cookie hc = {
++              .hinfo  = &hinfo
++      };
++      int err, i;
++      
++      iam_path_init(path, c, &hc);
++      for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++              path->ip_key_scratch[i] =
++                      (struct iam_key *)&cpath.ipc_scrach[i];
++      err = dx_lookup(path);
++      if (err)
++              goto errout; 
++
++      err = iam_leaf_update(h, path, k, r);
++errout:
++      iam_path_fini(path);
++      return err;
++}
+ /*
+  * This function increments the frame pointer to search the next leaf
+  * block, and reads in the necessary intervening nodes if the search
+@@ -2213,59 +2595,21 @@
+ }
+ #ifdef CONFIG_EXT3_INDEX
+-/*
+- * Returns 0 for success, or a negative error value
+- */
+-static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+-                           struct inode *inode)
+-{
+-      struct iam_path_compat cpath;
+-      struct iam_path *path = &cpath.ipc_path;
+-      struct iam_descr *param;
+-      struct iam_frame *frame, *safe;
++static int split_index_node(handle_t *handle, struct iam_path *path)
++{ 
++
+       struct iam_entry *entries;   /* old block contents */
+       struct iam_entry *entries2;  /* new block contents */
+-      struct dx_hash_info hinfo;
+-      struct buffer_head * bh;
++      struct iam_frame *frame, *safe;
+       struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+-      struct inode *dir = dentry->d_parent->d_inode;
+-      struct super_block * sb = dir->i_sb;
+-      struct ext3_dir_entry_2 *de;
+       u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
+-      int err;
++      struct inode *dir = path_obj(path);
+       int nr_splet;
+-      int i;
+-      size_t isize;
++      int i, err;
+-      iam_path_compat_init(&cpath, dir);
+-      param = path_descr(path);
+-
+-      err = dx_probe(dentry, NULL, &hinfo, path);
+-      if (err != 0)
+-              return err;
+       frame = path->ip_frame;
+       entries = frame->entries;
+-      /* XXX nikita: global serialization! */
+-      isize = dir->i_size;
+-
+-      err = param->id_node_read(path->ip_container, 
+-                                (iam_ptr_t)dx_get_block(path, 
+-                                frame->at), handle, &bh);
+-      if (err != 0)
+-              goto cleanup;
+-
+-      BUFFER_TRACE(bh, "get_write_access");
+-      err = ext3_journal_get_write_access(handle, bh);
+-      if (err)
+-              goto journal_error;
+-
+-      err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+-      if (err != -ENOSPC) {
+-              bh = NULL;
+-              goto cleanup;
+-      }
+-
+       /*
+        * Tall-tree handling: we might have to split multiple index blocks
+        * all the way up to tree root. Tricky point here is error handling:
+@@ -2288,7 +2632,7 @@
+            dx_get_count(frame->entries) == dx_get_limit(frame->entries);
+            --frame, ++nr_splet) {
+               if (nr_splet == DX_MAX_TREE_HEIGHT) {
+-                      ext3_warning(sb, __FUNCTION__,
++                      ext3_warning(dir->i_sb, __FUNCTION__,
+                                    "Directory index full!\n");
+                       err = -ENOSPC;
+                       goto cleanup;
+@@ -2301,7 +2645,7 @@
+       for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
+               bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
+               if (!bh_new[i] ||
+-                  param->id_node_init(path->ip_container, bh_new[i], 0) != 0)
++                  path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
+                       goto cleanup;
+               BUFFER_TRACE(frame->bh, "get_write_access");
+               err = ext3_journal_get_write_access(handle, frame->bh);
+@@ -2407,23 +2751,81 @@
+                               goto journal_error;
+               }
+       }
++      goto cleanup;
++journal_error:
++      ext3_std_error(dir->i_sb, err);
++
++cleanup:
++      for (i = 0; i < ARRAY_SIZE(bh_new); ++i) {
++              if (bh_new[i] != NULL)
++                      brelse(bh_new[i]);
++      }
++      return err;
++}
++
++/*
++ * Returns 0 for success, or a negative error value
++ */
++static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
++                           struct inode *inode)
++{
++      struct iam_path_compat cpath;
++      struct iam_path *path = &cpath.ipc_path;
++      struct iam_descr *param;
++      struct iam_frame *frame;
++      struct dx_hash_info hinfo;
++      struct buffer_head * bh = NULL;
++      struct inode *dir = dentry->d_parent->d_inode;
++      struct ext3_dir_entry_2 *de;
++      int err;
++      size_t isize;
++
++      iam_path_compat_init(&cpath, dir);
++      param = path_descr(path);
++
++      err = dx_probe(dentry, NULL, &hinfo, path);
++      if (err != 0)
++              return err;
++      frame = path->ip_frame;
++
++      /* XXX nikita: global serialization! */
++      isize = dir->i_size;
++
++      err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at), 
++                                handle, &bh);
++      if (err != 0)
++              goto cleanup;
++
++      BUFFER_TRACE(bh, "get_write_access");
++      err = ext3_journal_get_write_access(handle, bh);
++      if (err)
++              goto journal_error;
++
++      err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
++      if (err != -ENOSPC) {
++              bh = NULL;
++              goto cleanup;
++      }
++      
++      err = split_index_node(handle, path);
++      if (err)
++              goto cleanup;   
++
++      /*copy split inode too*/
+       de = do_split(handle, path, &bh, --frame, &hinfo, &err);
+       if (!de)
+               goto cleanup;
++
+       assert(dx_node_check(path, frame));
+       err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+       goto cleanup2;
+ journal_error:
+-      ext3_std_error(dir->i_sb, err);
++        ext3_std_error(dir->i_sb, err);
+ cleanup:
+       if (bh)
+               brelse(bh);
+ cleanup2:
+-      for (i = 0; i < ARRAY_SIZE(bh_new); ++i) {
+-              if (bh_new[i] != NULL)
+-                      brelse(bh_new[i]);
+-      }
+       if (err)
+               inode->i_size = isize;
+       iam_path_fini(path);
diff --git a/lustre/kernel_patches/patches/ext3-tall-htree.patch b/lustre/kernel_patches/patches/ext3-tall-htree.patch
new file mode 100644 (file)
index 0000000..5021759
--- /dev/null
@@ -0,0 +1,431 @@
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c   2006-04-23 22:35:38.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c        2006-04-23 22:35:47.000000000 +0800
+@@ -48,6 +48,11 @@
+ #define NAMEI_RA_SIZE        (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+ #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
++/*
++ * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
++ */
++#define DX_MAX_TREE_HEIGHT (5)
++
+ static struct buffer_head *ext3_append(handle_t *handle,
+                                       struct inode *inode,
+                                       u32 *block, int *err)
+@@ -75,7 +80,7 @@
+ #ifdef DX_DEBUG
+ #define dxtrace(command) command
+ #else
+-#define dxtrace(command) 
++#define dxtrace(command)
+ #endif
+ struct fake_dirent
+@@ -168,7 +173,7 @@
+ static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+                                struct dx_frame *frame,
+-                               struct dx_frame *frames, 
++                               struct dx_frame *frames,
+                                __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+                      struct ext3_dir_entry_2 **res_dir, int *err);
+@@ -249,7 +254,7 @@
+ }
+ struct stats
+-{ 
++{
+       unsigned names;
+       unsigned space;
+       unsigned bcount;
+@@ -367,7 +372,7 @@
+               goto fail;
+       }
+-      if ((indirect = root->info.indirect_levels) > 1) {
++      if ((indirect = root->info.indirect_levels) > DX_MAX_TREE_HEIGHT - 1) {
+               ext3_warning(dir->i_sb, __FUNCTION__,
+                            "Unimplemented inode hash depth: %#06x",
+                            root->info.indirect_levels);
+@@ -436,12 +441,15 @@
+ static void dx_release (struct dx_frame *frames)
+ {
++      int height;
++
+       if (frames[0].bh == NULL)
+               return;
+-
+-      if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
+-              brelse(frames[1].bh);
+-      brelse(frames[0].bh);
++      height = ((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels;
++      for (; height >= 0; height--) {
++              assert(frames[height].bh != NULL);
++              brelse(frames[height].bh);
++      }
+ }
+ /*
+@@ -463,7 +471,7 @@
+  */
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+                                struct dx_frame *frame,
+-                               struct dx_frame *frames, 
++                               struct dx_frame *frames,
+                                __u32 *start_hash)
+ {
+       struct dx_frame *p;
+@@ -582,7 +590,7 @@
+ {
+       struct dx_hash_info hinfo;
+       struct ext3_dir_entry_2 *de;
+-      struct dx_frame frames[2], *frame;
++      struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
+       struct inode *dir;
+       int block, err;
+       int count = 0;
+@@ -627,7 +635,7 @@
+               }
+               count += ret;
+               hashval = ~0;
+-              ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, 
++              ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+                                           frame, frames, &hashval);
+               *next_hash = hashval;
+               if (ret < 0) {
+@@ -644,7 +652,7 @@
+                       break;
+       }
+       dx_release(frames);
+-      dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n", 
++      dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+                      count, *next_hash));
+       return count;
+ errout:
+@@ -918,7 +926,7 @@
+       struct super_block * sb;
+       struct dx_hash_info     hinfo;
+       u32 hash;
+-      struct dx_frame frames[2], *frame;
++      struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
+       struct ext3_dir_entry_2 *de, *top;
+       struct buffer_head *bh;
+       unsigned long block;
+@@ -1037,7 +1045,7 @@
+               parent = ERR_PTR(-ENOMEM);
+       }
+       return parent;
+-} 
++}
+ #define S_SHIFT 12
+ static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
+@@ -1098,6 +1106,8 @@
+       return prev;
+ }
++/* Allocate new node, and split leaf node @bh into it, inserting new pointer
++ * into parent node identified by @frame */
+ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+                       struct buffer_head **bh,struct dx_frame *frame,
+                       struct dx_hash_info *hinfo, int *error)
+@@ -1185,7 +1195,7 @@
+  * add_dirent_to_buf will attempt search the directory block for
+  * space.  It will return -ENOSPC if no space is available, and -EIO
+  * and -EEXIST if directory entry already exists.
+- * 
++ *
+  * NOTE!  bh is NOT released in the case where ENOSPC is returned.  In
+  * all other cases bh is released.
+  */
+@@ -1286,7 +1296,7 @@
+       int             namelen = dentry->d_name.len;
+       struct buffer_head *bh2;
+       struct dx_root  *root;
+-      struct dx_frame frames[2], *frame;
++      struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
+       struct dx_entry *entries;
+       struct ext3_dir_entry_2 *de, *de2;
+       char            *data1, *top;
+@@ -1427,20 +1437,29 @@
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+                            struct inode *inode)
+ {
+-      struct dx_frame frames[2], *frame;
+-      struct dx_entry *entries, *at;
++      struct dx_frame frames[DX_MAX_TREE_HEIGHT] = {{0,},}, *frame, *safe;
++      struct dx_node *node2;
++      struct dx_entry *entries;   /* old block contents */
++      struct dx_entry *entries2;  /* new block contents */
+       struct dx_hash_info hinfo;
+       struct buffer_head * bh;
++      struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+       struct inode *dir = dentry->d_parent->d_inode;
+       struct super_block * sb = dir->i_sb;
+       struct ext3_dir_entry_2 *de;
++      u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
+       int err;
++      int nr_splet;
++      int i;
++      size_t isize;
+       frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
+       if (!frame)
+               return err;
+       entries = frame->entries;
+-      at = frame->at;
++
++      /* XXX nikita: global serialization! */
++      isize = dir->i_size;
+       if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+               goto cleanup;
+@@ -1456,29 +1475,43 @@
+               goto cleanup;
+       }
++      /*
++       * Tall-tree handling: we might have to split multiple index blocks
++       * all the way up to tree root. Tricky point here is error handling:
++       * to avoid complicated undo/rollback we
++       *
++       *   - first allocate all necessary blocks
++       *
++       *   - insert pointers into them atomically.
++       *
++       * XXX nikita: this algorithm is *not* scalable, as it assumes that at
++       * least nodes in the path are locked.
++       */
++
+       /* Block full, should compress but for now just split */
+       dxtrace(printk("using %u of %u node entries\n",
+                      dx_get_count(entries), dx_get_limit(entries)));
+-      /* Need to split index? */
+-      if (dx_get_count(entries) == dx_get_limit(entries)) {
+-              u32 newblock;
+-              unsigned icount = dx_get_count(entries);
+-              int levels = frame - frames;
+-              struct dx_entry *entries2;
+-              struct dx_node *node2;
+-              struct buffer_head *bh2;
+-              if (levels && (dx_get_count(frames->entries) ==
+-                             dx_get_limit(frames->entries))) {
++      /* What levels need split? */
++      for (nr_splet = 0; frame >= frames &&
++           dx_get_count(frame->entries) == dx_get_limit(frame->entries);
++           --frame, ++nr_splet) {
++              if (nr_splet == DX_MAX_TREE_HEIGHT) {
+                       ext3_warning(sb, __FUNCTION__,
+                                    "Directory index full!\n");
+                       err = -ENOSPC;
+                       goto cleanup;
+               }
+-              bh2 = ext3_append (handle, dir, &newblock, &err);
+-              if (!(bh2))
++      }
++
++      safe = frame;
++      /* Go back down, allocating blocks, and adding blocks into
++       * transaction... */
++      for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
++              bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
++              if (!bh_new[i])
+                       goto cleanup;
+-              node2 = (struct dx_node *)(bh2->b_data);
++              node2 = (struct dx_node *)(bh_new[i]->b_data);
+               entries2 = node2->entries;
+               node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+               node2->fake.inode = 0;
+@@ -1486,72 +1519,112 @@
+               err = ext3_journal_get_write_access(handle, frame->bh);
+               if (err)
+                       goto journal_error;
+-              if (levels) {
+-                      unsigned icount1 = icount/2, icount2 = icount - icount1;
+-                      unsigned hash2 = dx_get_hash(entries + icount1);
+-                      dxtrace(printk("Split index %i/%i\n", icount1, icount2));
+-
+-                      BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
+-                      err = ext3_journal_get_write_access(handle,
+-                                                           frames[0].bh);
++      }
++      /* Add "safe" node to transaction too */
++      if (safe + 1 != frames) {
++              err = ext3_journal_get_write_access(handle, safe->bh);
++              if (err)
++                      goto journal_error;
++      }
++
++      /* Go through nodes once more, inserting pointers */
++      for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
++              unsigned count;
++              int idx;
++              struct buffer_head *bh2;
++
++              entries = frame->entries;
++              count = dx_get_count(entries);
++              idx = frame->at - entries;
++
++              bh2 = bh_new[i];
++              node2 = (struct dx_node *)(bh2->b_data);
++              entries2 = node2->entries;
++
++              if (frame == frames) {
++                      /* splitting root node. Tricky point:
++                       *
++                       * In the "normal" B-tree we'd split root *and* add
++                       * new root to the tree with pointers to the old root
++                       * and its sibling (thus introducing two new nodes).
++                       *
++                       * In htree it's enough to add one node, because
++                       * capacity of the root node is smaller than that of
++                       * non-root one.
++                       */
++                      struct dx_root *root;
++                      u8 indirects;
++
++                      root = (struct dx_root *) frames->bh->b_data;
++                      indirects = root->info.indirect_levels;
++                      dxtrace(printk("Creating new root %d\n", indirects));
++                      memcpy((char *) entries2, (char *) entries,
++                             count * sizeof(struct dx_entry));
++                      dx_set_limit(entries2, dx_node_limit(dir));
++
++                      /* Set up root */
++                      dx_set_count(entries, 1);
++                      dx_set_block(entries + 0, newblock[i]);
++                      root->info.indirect_levels = indirects + 1;
++
++                      /* Shift frames in the path */
++                      memmove(frames + 2, frames + 1,
++                              (sizeof frames) - 2 * sizeof frames[0]);
++                      /* Add new access path frame */
++                      frames[1].at = entries2 + idx;
++                      frames[1].entries = entries = entries2;
++                      frames[1].bh = bh2;
++                      ++ frame;
++                      bh_new[i] = NULL; /* buffer head is "consumed" */
++                      err = ext3_journal_get_write_access(handle, bh2);
+                       if (err)
+                               goto journal_error;
+-
+-                      memcpy ((char *) entries2, (char *) (entries + icount1),
+-                              icount2 * sizeof(struct dx_entry));
+-                      dx_set_count (entries, icount1);
+-                      dx_set_count (entries2, icount2);
++              } else {
++                      /* splitting non-root index node. */
++                      unsigned count1 = count/2, count2 = count - count1;
++                      unsigned hash2 = dx_get_hash(entries + count1);
++                      dxtrace(printk("Split index %i/%i\n", count1, count2));
++
++                      memcpy ((char *) entries2, (char *) (entries + count1),
++                              count2 * sizeof(struct dx_entry));
++                      dx_set_count (entries, count1);
++                      dx_set_count (entries2, count2);
+                       dx_set_limit (entries2, dx_node_limit(dir));
+                       /* Which index block gets the new entry? */
+-                      if (at - entries >= icount1) {
+-                              frame->at = at = at - entries - icount1 + entries2;
++                      if (idx >= count1) {
++                              frame->at = entries2 + idx - count1;
+                               frame->entries = entries = entries2;
+                               swap(frame->bh, bh2);
++                              bh_new[i] = bh2;
+                       }
+-                      dx_insert_block (frames + 0, hash2, newblock);
+-                      dxtrace(dx_show_index ("node", frames[1].entries));
++                      dx_insert_block (frame - 1, hash2, newblock[i]);
++                      dxtrace(dx_show_index ("node", frame->entries));
+                       dxtrace(dx_show_index ("node",
+                              ((struct dx_node *) bh2->b_data)->entries));
+                       err = ext3_journal_dirty_metadata(handle, bh2);
+                       if (err)
+                               goto journal_error;
+-                      brelse (bh2);
+-              } else {
+-                      dxtrace(printk("Creating second level index...\n"));
+-                      memcpy((char *) entries2, (char *) entries,
+-                             icount * sizeof(struct dx_entry));
+-                      dx_set_limit(entries2, dx_node_limit(dir));
+-
+-                      /* Set up root */
+-                      dx_set_count(entries, 1);
+-                      dx_set_block(entries + 0, newblock);
+-                      ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
+-
+-                      /* Add new access path frame */
+-                      frame = frames + 1;
+-                      frame->at = at = at - entries + entries2;
+-                      frame->entries = entries = entries2;
+-                      frame->bh = bh2;
+-                      err = ext3_journal_get_write_access(handle,
+-                                                           frame->bh);
+-                      if (err)
+-                              goto journal_error;
+               }
+-              ext3_journal_dirty_metadata(handle, frames[0].bh);
+       }
+-      de = do_split(handle, dir, &bh, frame, &hinfo, &err);
++      de = do_split(handle, dir, &bh, --frame, &hinfo, &err);
+       if (!de)
+               goto cleanup;
+       err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+-      bh = NULL;
+-      goto cleanup;
++      goto cleanup2;
+ journal_error:
+       ext3_std_error(dir->i_sb, err);
+ cleanup:
+       if (bh)
+               brelse(bh);
++cleanup2:
++      for (i = 0; i < ARRAY_SIZE(bh_new); ++i) {
++              if (bh_new[i] != NULL)
++                      brelse(bh_new[i]);
++      }
++      if (err)
++              inode->i_size = isize;
+       dx_release(frames);
+       return err;
+ }
+@@ -1561,7 +1634,7 @@
+  * ext3_delete_entry deletes a directory entry by merging it with the
+  * previous entry
+  */
+-static int ext3_delete_entry (handle_t *handle, 
++static int ext3_delete_entry (handle_t *handle,
+                             struct inode * dir,
+                             struct ext3_dir_entry_2 * de_del,
+                             struct buffer_head * bh)
+@@ -1821,7 +1894,7 @@
+       de1 = (struct ext3_dir_entry_2 *)
+                       ((char *) de + le16_to_cpu(de->rec_len));
+       if (le32_to_cpu(de->inode) != inode->i_ino ||
+-                      !le32_to_cpu(de1->inode) || 
++                      !le32_to_cpu(de1->inode) ||
+                       strcmp (".", de->name) ||
+                       strcmp ("..", de1->name)) {
+               ext3_warning (inode->i_sb, "empty_dir",
+@@ -1891,7 +1964,7 @@
+        * being truncated, or files being unlinked. */
+       /* @@@ FIXME: Observation from aviro:
+-       * I think I can trigger J_ASSERT in ext3_orphan_add().  We block 
++       * I think I can trigger J_ASSERT in ext3_orphan_add().  We block
+        * here (on lock_super()), so race with ext3_link() which might bump
+        * ->i_nlink. For, say it, character device. Not a regular file,
+        * not a directory, not a symlink and ->i_nlink > 0.
+@@ -2415,4 +2488,4 @@
+       .removexattr    = generic_removexattr,
+ #endif
+       .permission     = ext3_permission,
+-}; 
++};
index bab81b9..b90ed7a 100644 (file)
@@ -10,3 +10,10 @@ ext3-extents-2.6.9-rhel4.patch
 ext3-mballoc2-2.6.9-rhel4.patch 
 ext3-nlinks-2.6.9.patch
 ext3-ialloc-2.6.patch
+ext3-tall-htree.patch
+ext3-htree-path.patch
+ext3-htree-r5-hash.patch
+ext3-htree-path-ops.patch
+ext3-hash-selection.patch
+ext3-htree-comments.patch
+ext3-iam-ops.patch