ext3-mballoc2-2.6.9-rhel4.patch
ext3-nlinks-2.6.9.patch
ext3-ialloc-2.6.patch
+ext3-tall-htree.patch
+ext3-htree-path.patch
+ext3-htree-r5-hash.patch
+ext3-htree-path-ops.patch
+ext3-hash-selection.patch
+ext3-htree-comments.patch
+ext3-iam-ops.patch
--- /dev/null
+Index: linux-2.6.9/fs/ext3/hash.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/hash.c 2006-04-23 22:39:01.000000000 +0800
++++ linux-2.6.9/fs/ext3/hash.c 2006-04-23 22:39:16.000000000 +0800
+@@ -127,6 +127,11 @@
+ return a;
+ }
+
++static __u32 dx_same_hash(const signed char *msg, int len)
++{
++ return 0xcafebabeUL;
++}
++
+ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+ {
+ __u32 pad, val;
+@@ -220,6 +225,9 @@
+ case DX_HASH_R5:
+ hash = dx_r5_hash(name, len);
+ break;
++ case DX_HASH_SAME:
++ hash = dx_same_hash(name, len);
++ break;
+ default:
+ hinfo->hash = 0;
+ return -1;
+Index: linux-2.6.9/fs/ext3/super.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/super.c 2006-04-23 22:38:55.000000000 +0800
++++ linux-2.6.9/fs/ext3/super.c 2006-04-23 22:39:56.000000000 +0800
+@@ -598,6 +598,7 @@
+ Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
+ Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+ Opt_extents, Opt_extdebug, Opt_mballoc,
++ Opt_hashfunc,
+ };
+
+ static match_table_t tokens = {
+@@ -651,6 +652,7 @@
+ {Opt_extdebug, "extdebug"},
+ {Opt_mballoc, "mballoc"},
+ {Opt_barrier, "barrier=%u"},
++ {Opt_hashfunc,"hash=%s"},
+ {Opt_err, NULL},
+ {Opt_resize, "resize"},
+ };
+@@ -675,6 +677,7 @@
+ return sb_block;
+ }
+
++int user_selected_hash_function = -1;
+ static int parse_options (char * options, struct super_block *sb,
+ unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
+ {
+@@ -963,6 +966,23 @@
+ case Opt_mballoc:
+ set_opt (sbi->s_mount_opt, MBALLOC);
+ break;
++ case Opt_hashfunc:
++ if (strncmp (args[0].from,"legacy",6) == 0){
++ user_selected_hash_function = 0;
++ } else if (strncmp (args[0].from,"half_md4",8) == 0){
++ user_selected_hash_function = 1;
++ } else if (strncmp (args[0].from,"tea",3) == 0){
++ user_selected_hash_function = 2;
++ } else if (strncmp (args[0].from,"r5",2) == 0){
++ user_selected_hash_function = 3;
++ } else if (strncmp (args[0].from,"same",4) == 0){
++ user_selected_hash_function = 4;
++ } else {
++ printk ("Hashfunc name wrong\n");
++ return 0;
++ }
++ break;
++
+ default:
+ printk (KERN_ERR
+ "EXT3-fs: Unrecognized mount option \"%s\" "
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c 2006-04-23 22:39:02.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c 2006-04-23 22:39:16.000000000 +0800
+@@ -365,10 +365,7 @@
+ struct htree_cookie *hc = cookie;
+
+ root = data;
+- if (root->info.hash_version != DX_HASH_TEA &&
+- root->info.hash_version != DX_HASH_HALF_MD4 &&
+- root->info.hash_version != DX_HASH_R5 &&
+- root->info.hash_version != DX_HASH_LEGACY) {
++ if (root->info.hash_version > DX_HASH_MAX) {
+ ext3_warning(sb, __FUNCTION__,
+ "Unrecognised inode hash code %d",
+ root->info.hash_version);
+@@ -1467,6 +1464,7 @@
+ * This converts a one block unindexed directory to a 3 block indexed
+ * directory, and adds the dentry to the indexed directory.
+ */
++extern int user_selected_hash_function;
+ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
+ struct inode *inode, struct buffer_head *bh)
+ {
+@@ -1522,7 +1520,9 @@
+ memset (&root->info, 0, sizeof(root->info));
+ root->info.info_length = sizeof(root->info);
+ root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+- root->info.hash_version = DX_HASH_R5;
++ if (user_selected_hash_function >= 0 &&
++ user_selected_hash_function <= DX_HASH_MAX)
++ root->info.hash_version = user_selected_hash_function;
+ entries = (void *)root->entries;
+ dx_set_block (&path, entries, 1);
+ dx_set_count (entries, 1);
+Index: linux-2.6.9/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9.orig/include/linux/ext3_fs.h 2006-04-23 22:39:01.000000000 +0800
++++ linux-2.6.9/include/linux/ext3_fs.h 2006-04-23 22:39:16.000000000 +0800
+@@ -665,6 +665,8 @@
+ #define DX_HASH_HALF_MD4 1
+ #define DX_HASH_TEA 2
+ #define DX_HASH_R5 3
++#define DX_HASH_SAME 4
++#define DX_HASH_MAX 4
+
+ /* hash info structure used by the directory hash */
+ struct dx_hash_info
--- /dev/null
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c 2006-04-17 18:32:12.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c 2006-04-23 21:40:41.000000000 +0800
+@@ -24,6 +24,78 @@
+ * Theodore Ts'o, 2002
+ */
+
++/*
++ * iam: big theory statement.
++ *
++ * iam (Index Access Module) is a module providing abstraction of persistent
++ * transactional container on top of generalized ext3 htree.
++ *
++ * iam supports:
++ *
++ * - key, pointer, and record size specifiable per container.
++ *
++ * - trees taller than 2 index levels.
++ *
++ * - read/write to existing ext3 htree directories as iam containers.
++ *
++ * iam container is a tree, consisting of leaf nodes containing keys and
++ * records stored in this container, and index nodes, containing keys and
++ * pointers to leaf or index nodes.
++ *
++ * iam does not work with keys directly, instead it calls user-supplied key
++ * comparison function (->dpo_keycmp()).
++ *
++ * Pointers are (currently) interpreted as logical offsets (measured in
++ * blocksful) within underlying flat file on top of which iam tree lives.
++ *
++ * On-disk format:
++ *
++ * iam mostly tries to reuse existing htree formats.
++ *
++ * Format of index node:
++ *
++ * +-----+-------+-------+-------+------+-------+------------+
++ * | | count | | | | | |
++ * | gap | / | entry | entry | .... | entry | free space |
++ * | | limit | | | | | |
++ * +-----+-------+-------+-------+------+-------+------------+
++ *
++ * gap this part of node is never accessed by iam code. It
++ * exists for binary compatibility with ext3 htree (that,
++ * in turn, stores fake struct ext2_dirent for ext2
++ * compatibility), and to keep some unspecified per-node
++ * data. Gap can be different for root and non-root index
++ * nodes. Gap size can be specified for each container
++ * (gap of 0 is allowed).
++ *
++ * count/limit current number of entries in this node, and the maximal
++ * number of entries that can fit into node. count/limit
++ * has the same size as entry, and is itself counted in
++ * count.
++ *
++ * entry index entry: consists of a key immediately followed by
++ * a pointer to a child node. Size of a key and size of a
++ * pointer depends on container. Entry has neither
++ * alignment nor padding.
++ *
++ * free space portion of node new entries are added to
++ *
++ * Entries in index node are sorted by their key value.
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ *
++ */
++
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
+ #include <linux/jbd.h>
+@@ -98,14 +170,6 @@
+ __le16 count;
+ };
+
+-struct dx_entry; /* incomplete type */
+-struct dx_key; /* incomplete type */
+-
+-struct dx_entry_compat {
+- __le32 hash;
+- __le32 block;
+-};
+-
+ /*
+ * dx_root_info is laid out so that if it should somehow get overlaid by a
+ * dirent the two low bits of the hash version will be zero. Therefore, the
+@@ -135,111 +199,513 @@
+ struct {} entries[0];
+ };
+
+-
+-struct dx_frame
+-{
+- struct buffer_head *bh;
+- struct dx_entry *entries;
+- struct dx_entry *at;
+-};
+-
+ struct dx_map_entry
+ {
+ u32 hash;
+ u32 offs;
+ };
+
+-struct dx_path;
+-struct dx_param {
+- size_t dpo_key_size;
+- size_t dpo_ptr_size;
+- size_t dpo_node_gap;
+- size_t dpo_root_gap;
+-
+- u32 (*dpo_root_ptr)(struct dx_path *path);
+- int (*dpo_node_check)(struct dx_path *path,
+- struct dx_frame *frame, void *cookie);
+- int (*dpo_node_init)(struct dx_path *path,
+- struct buffer_head *bh, int root);
+- int (*dpo_keycmp)(struct dx_path *path,
+- struct dx_key *k1, struct dx_key *k2);
++/*
++ * Entry within index tree node. Consists of a key immediately followed
++ * (without padding) by a pointer to the child node.
++ *
++ * Both key and pointer are of variable size, hence incomplete type.
++ */
++struct iam_entry;
++
++struct iam_entry_compat {
++ __le32 hash;
++ __le32 block;
++};
++
++/*
++ * Incomplete type used to refer to keys in iam container.
++ *
++ * As key size can be different from container to container, iam has to use
++ * incomplete type. Clients cast pointer to iam_key to real key type and back.
++ */
++struct iam_key;
++
++/* Incomplete type use to refer to the records stored in iam containers. */
++struct iam_rec;
++
++typedef __u64 iam_ptr_t;
++
++/*
++ * Index node traversed during tree lookup.
++ */
++struct iam_frame {
++ struct buffer_head *bh; /* buffer holding node data */
++ struct iam_entry *entries; /* array of entries */
++ struct iam_entry *at; /* target entry, found by binary search */
++};
++
++/* leaf node reached by tree lookup */
++struct iam_leaf {
++ struct buffer_head *bh;
++ struct iam_leaf_entry *entries;
++ struct iam_leaf_entry *at;
++};
++
++struct iam_path;
++struct iam_container;
++
++/*
++ * Parameters, describing a flavor of iam container.
++ */
++struct iam_descr {
++ /*
++ * Size of a key in this container, in bytes.
++ */
++ size_t id_key_size;
++ /*
++ * Size of a pointer to the next level (stored in index nodes), in
++ * bytes.
++ */
++ size_t id_ptr_size;
++ /*
++ * Size of a record (stored in leaf nodes), in bytes.
++ */
++ size_t id_rec_size;
++ /*
++ * Size of unused (by iam) space at the beginning of every non-root
++ * node, in bytes. Used for compatibility with ext3.
++ */
++ size_t id_node_gap;
++ /*
++ * Size of unused (by iam) space at the beginning of root node, in
++ * bytes. Used for compatibility with ext3.
++ */
++ size_t id_root_gap;
++
++ /*
++ * Returns pointer (in the same sense as pointer in index entry) to
++ * the root node.
++ */
++ __u32 (*id_root_ptr)(struct iam_container *c);
++
++ /*
++ * Check validity and consistency of index node. This is called when
++ * iam just loaded new node into frame.
++ */
++ int (*id_node_check)(struct iam_path *path, struct iam_frame *frame);
++ /*
++ * Initialize new node (stored in @bh) that is going to be added into
++ * tree.
++ */
++ int (*id_node_init)(struct iam_container *c,
++ struct buffer_head *bh, int root);
++ int (*id_node_read)(struct iam_container *c, iam_ptr_t ptr,
++ handle_t *h, struct buffer_head **bh);
++ /*
++ * Key comparison function. Returns -1, 0, +1.
++ */
++ int (*id_keycmp)(struct iam_container *c,
++ struct iam_key *k1, struct iam_key *k2);
++ /*
++ * Create new container.
++ *
++ * Newly created container has a root node and a single leaf. Leaf
++ * contains single record with the smallest possible key.
++ */
++ int (*id_create)(struct iam_container *c);
++ struct {
++ /*
++ * leaf operations.
++ */
++ /*
++ * returns true iff leaf is positioned at the last entry.
++ */
++ int (*at_end)(struct iam_container *c, struct iam_leaf *l);
++ /* position leaf at the first entry */
++ void (*start)(struct iam_container *c, struct iam_leaf *l);
++ /* more leaf to the next entry. */
++ void (*next)(struct iam_container *c, struct iam_leaf *l);
++ /* return key of current leaf record in @k */
++ void (*key)(struct iam_container *c, struct iam_leaf *l,
++ struct iam_key *k);
++ /* return pointer to entry body */
++ struct iam_rec *(*rec)(struct iam_container *c,
++ struct iam_leaf *l);
++ } id_leaf;
++};
++
++struct iam_container {
++ /*
++ * Underlying flat file. IO against this object is issued to
++ * read/write nodes.
++ */
++ struct inode *ic_object;
++ /*
++ * container flavor.
++ */
++ struct iam_descr *ic_descr;
++ /*
++ * pointer to flavor-specific per-container data.
++ */
++ void *ic_descr_data;
+ };
+
+ /*
+ * Structure to keep track of a path drilled through htree.
+ */
+-struct dx_path {
+- struct inode *dp_object;
+- struct dx_param *dp_param;
+- int dp_indirect;
+- struct dx_frame dp_frames[DX_MAX_TREE_HEIGHT];
+- struct dx_frame *dp_frame;
+- struct dx_key *dp_key_target;
+- struct dx_key *dp_key_scratch[DX_SCRATCH_KEYS];
+-};
+-
+-struct dx_path_compat {
+- struct dx_path dpc_path;
+- __u32 dpc_scrach[DX_SCRATCH_KEYS];
+-};
+-
+-static u32 htree_root_ptr(struct dx_path *p);
+-static int htree_node_check(struct dx_path *path,
+- struct dx_frame *frame, void *cookie);
+-static int htree_node_init(struct dx_path *path,
++struct iam_path {
++ /*
++ * Parent container.
++ */
++ struct iam_container *ip_container;
++ /*
++ * Number of index levels minus one.
++ */
++ int ip_indirect;
++ /*
++ * Nodes that top-to-bottom traversal passed through.
++ */
++ struct iam_frame ip_frames[DX_MAX_TREE_HEIGHT];
++ /*
++ * Last filled frame in ->ip_frames. Refers to the 'twig' node (one
++ * immediately above leaf).
++ */
++ struct iam_frame *ip_frame;
++ /*
++ * Leaf node: a child of ->ip_frame.
++ */
++ struct iam_leaf *ip_leaf;
++ /*
++ * Key searched for.
++ */
++ struct iam_key *ip_key_target;
++ /*
++ * Scratch-pad area for temporary keys.
++ */
++ struct iam_key *ip_key_scratch[DX_SCRATCH_KEYS];
++ /*
++ * pointer to flavor-specific per-container data.
++ */
++ void *ip_descr_data;
++};
++
++/*
++ * Helper structure for legacy htrees.
++ */
++struct iam_path_compat {
++ struct iam_path ipc_path;
++ struct iam_container ipc_container;
++ __u32 ipc_scrach[DX_SCRATCH_KEYS];
++};
++
++static u32 htree_root_ptr(struct iam_container *c);
++static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
++static int htree_node_init(struct iam_container *c,
+ struct buffer_head *bh, int root);
+-static int htree_keycmp(struct dx_path *path,
+- struct dx_key *k1, struct dx_key *k2);
++static int htree_keycmp(struct iam_container *c,
++ struct iam_key *k1, struct iam_key *k2);
++static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
++ handle_t *h, struct buffer_head **bh);
++
++/*
++ * Parameters describing iam compatibility mode in which existing ext3 htrees
++ * can be manipulated.
++ */
++static struct iam_descr htree_compat_param = {
++ .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
++ .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
++ .id_node_gap = offsetof(struct dx_node, entries),
++ .id_root_gap = offsetof(struct dx_root, entries),
++
++ .id_root_ptr = htree_root_ptr,
++ .id_node_check = htree_node_check,
++ .id_node_init = htree_node_init,
++ .id_node_read = htree_node_read,
++ .id_keycmp = htree_keycmp
++};
++
++
++struct iam_key;
++struct iam_rec;
++struct iam_descr;
++struct iam_container;
++struct iam_path;
+
+-static struct dx_param htree_compat_param = {
+- .dpo_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
+- .dpo_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
+- .dpo_node_gap = offsetof(struct dx_node, entries),
+- .dpo_root_gap = offsetof(struct dx_root, entries),
+-
+- .dpo_root_ptr = htree_root_ptr,
+- .dpo_node_check = htree_node_check,
+- .dpo_node_init = htree_node_init,
+- .dpo_keycmp = htree_keycmp
++/*
++ * Initialize container @c, acquires additional reference on @inode.
++ */
++int iam_container_init(struct iam_container *c,
++ struct iam_descr *descr, struct inode *inode);
++/*
++ * Finalize container @c, release all resources.
++ */
++void iam_container_fini(struct iam_container *c);
++
++/*
++ * Search container @c for record with key @k. If record is found, its data
++ * are moved into @r.
++ *
++ *
++ *
++ * Return values: +ve: found, 0: not-found, -ve: error
++ */
++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r);
++/*
++ * Insert new record @r with key @k into container @c (within context of
++ * transaction @h.
++ *
++ * Return values: 0: success, -ve: error, including -EEXIST when record with
++ * given key is already present.
++ *
++ * postcondition: ergo(result == 0 || result == -EEXIST,
++ * iam_lookup(c, k, r2) > 0 &&
++ * !memcmp(r, r2, c->ic_descr->id_rec_size));
++ */
++int iam_insert(handle_t *h, struct iam_container *c,
++ struct iam_key *k, struct iam_rec *r);
++/*
++ * Replace existing record with key @k, or insert new one. New record data are
++ * in @r.
++ *
++ * Return values: 0: success, -ve: error.
++ *
++ * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
++ * !memcmp(r, r2, c->ic_descr->id_rec_size));
++ */
++int iam_update(handle_t *h, struct iam_container *c,
++ struct iam_key *k, struct iam_rec *r);
++/*
++ * Delete existing record with key @k.
++ *
++ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
++ *
++ * postcondition: ergo(result == 0 || result == -ENOENT,
++ * !iam_lookup(c, k, *));
++ */
++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k);
++
++/*
++ * iam cursor (iterator) api.
++ */
++
++/*
++ * Flags controlling iterator functionality.
++ */
++enum iam_it_flags {
++ /*
++ * this iterator will move (iam_it_{prev,next}() will be called on it)
++ */
++ IAM_IT_MOVE = (1 << 0),
++ /*
++ * tree can be updated through this iterator.
++ */
++ IAM_IT_WRITE = (1 << 1)
+ };
+
++/*
++ * States of iterator state machine.
++ */
++enum iam_it_state {
++ /* initial state */
++ IAM_IT_DETACHED,
++ /* iterator is above particular record in the container */
++ IAM_IT_ATTACHED
++};
++
++/*
++ * Iterator.
++ *
++ * Immediately after call to iam_it_init() iterator is in "detached"
++ * (IAM_IT_DETACHED) state: it is associated with given parent container, but
++ * doesn't point to any particular record in this container.
++ *
++ * After successful call to iam_it_get() and until corresponding call to
++ * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
++ *
++ * Attached iterator can move through records in a container (provided
++ * IAM_IT_MOVE permission) in a key order, can get record and key values as it
++ * passes over them, and can modify container (provided IAM_IT_WRITE
++ * permission).
++ *
++ * Concurrency: iterators are supposed to be local to thread. Interfaces below
++ * do no internal serialization.
++ *
++ */
++struct iam_iterator {
++ /*
++ * iterator flags, taken from enum iam_it_flags.
++ */
++ __u32 ii_flags;
++ enum iam_it_state ii_state;
++ /*
++ * path to the record. Valid in IAM_IT_ATTACHED state.
++ */
++ struct iam_path ii_path;
++};
++
++static inline struct iam_key *keycpy(struct iam_container *c,
++ struct iam_key *k1, struct iam_key *k2)
++{
++ return memcpy(k1, k2, c->ic_descr->id_key_size);
++}
++
++static inline int keycmp(struct iam_container *c,
++ struct iam_key *k1, struct iam_key *k2)
++{
++ return c->ic_descr->id_keycmp(c, k1, k2);
++}
++
++static struct iam_container *iam_it_container(struct iam_iterator *it)
++{
++ return it->ii_path.ip_container;
++}
++
++static inline int it_keycmp(struct iam_iterator *it,
++ struct iam_key *k1, struct iam_key *k2)
++{
++ return keycmp(iam_it_container(it), k1, k2);
++}
++
++/*
++ * Initialize iterator to IAM_IT_DETACHED state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
++/*
++ * Finalize iterator and release all resources.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_fini(struct iam_iterator *it);
++
++/*
++ * Attach iterator. After successful completion, @it points to record with the
++ * largest key not larger than @k. Semantics of ->id_create() method guarantee
++ * that such record will always be found.
++ *
++ * Return value: 0: positioned on existing record,
++ * -ve: error.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED
++ * postcondition: ergo(result == 0,
++ * (it_state(it) == IAM_IT_ATTACHED &&
++ * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
++ */
++int iam_it_get(struct iam_iterator *it, struct iam_key *k);
++
++/*
++ * Duplicates iterator.
++ *
++ * postcondition: it_state(dst) == it_state(src) &&
++ * iam_it_container(dst) == iam_it_container(src) &&
++ * dst->ii_flags = src->ii_flags &&
++ * ergo(it_state(it) == IAM_IT_ATTACHED,
++ * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++ * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
++ */
++void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
++
++/*
++ * Detach iterator. Does nothing it detached state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_put(struct iam_iterator *it);
++
++/*
++ * Move iterator one record right.
++ *
++ * Return value: 0: success,
++ * +1: end of container reached
++ * -ve: error
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
++ * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
++ */
++int iam_it_next(struct iam_iterator *it);
++
++/*
++ * Return pointer to the record under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
++
++/*
++ * Replace contents of record under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
++
++/*
++ * Place key under iterator in @k, return @k
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++const struct iam_key *iam_it_key_get(struct iam_iterator *it,
++ struct iam_key *k);
++
++/*
++ * Insert new record with key @k and contents from @r, shifting records to the
++ * right.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED &&
++ * it->ii_flags&IAM_IT_WRITE &&
++ * it_keycmp(it, iam_it_key_get(it, *), k) < 0
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ * ergo(result == 0,
++ * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
++ * !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
++ struct iam_key *k, struct iam_rec *r);
++/*
++ * Delete record under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
+
+ #ifdef CONFIG_EXT3_INDEX
+-static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry);
+-static void dx_set_block(struct dx_path *p,
+- struct dx_entry *entry, unsigned value);
+-static inline struct dx_key *dx_get_key(struct dx_path *p,
+- struct dx_entry *entry,
+- struct dx_key *key);
+-static void dx_set_key(struct dx_path *p, struct dx_entry *entry,
+- struct dx_key *key);
+-static unsigned dx_get_count(struct dx_entry *entries);
+-static unsigned dx_get_limit(struct dx_entry *entries);
+-static void dx_set_count(struct dx_entry *entries, unsigned value);
+-static void dx_set_limit(struct dx_entry *entries, unsigned value);
+-static unsigned dx_root_limit(struct dx_path *p);
+-static unsigned dx_node_limit(struct dx_path *p);
++static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry);
++static void dx_set_block(struct iam_path *p,
++ struct iam_entry *entry, unsigned value);
++static inline struct iam_key *dx_get_key(struct iam_path *p,
++ struct iam_entry *entry,
++ struct iam_key *key);
++static void dx_set_key(struct iam_path *p, struct iam_entry *entry,
++ struct iam_key *key);
++static unsigned dx_get_count(struct iam_entry *entries);
++static unsigned dx_get_limit(struct iam_entry *entries);
++static void dx_set_count(struct iam_entry *entries, unsigned value);
++static void dx_set_limit(struct iam_entry *entries, unsigned value);
++static unsigned dx_root_limit(struct iam_path *p);
++static unsigned dx_node_limit(struct iam_path *p);
+ static int dx_probe(struct dentry *dentry,
+ struct inode *dir,
+ struct dx_hash_info *hinfo,
+- struct dx_path *path);
++ struct iam_path *path);
+ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+ struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+ static void dx_sort_map(struct dx_map_entry *map, unsigned count);
+ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+ struct dx_map_entry *offsets, int count);
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+-static void dx_insert_block (struct dx_path *path,
+- struct dx_frame *frame, u32 hash, u32 block);
++static void dx_insert_block (struct iam_path *path,
++ struct iam_frame *frame, u32 hash, u32 block);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+- struct dx_path *path, __u32 *start_hash);
++ struct iam_path *path, __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+ struct ext3_dir_entry_2 **res_dir, int *err);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ struct inode *inode);
+
+-static inline void dx_path_init(struct dx_path *path, struct inode *inode);
+-static inline void dx_path_fini(struct dx_path *path);
++static inline void iam_path_init(struct iam_path *path,
++ struct iam_container *c);
++static inline void iam_path_fini(struct iam_path *path);
+
+
+ /*
+@@ -247,153 +713,154 @@
+ * Mask them off for now.
+ */
+
+-static inline void *entry_off(struct dx_entry *entry, ptrdiff_t off)
++static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
+ {
+ return (void *)((char *)entry + off);
+ }
+
+-static inline size_t dx_entry_size(struct dx_path *p)
++static inline struct iam_descr *path_descr(struct iam_path *p)
+ {
+- return p->dp_param->dpo_key_size + p->dp_param->dpo_ptr_size;
++ return p->ip_container->ic_descr;
+ }
+
+-static inline struct dx_entry *dx_entry_shift(struct dx_path *p,
+- struct dx_entry *entry, int shift)
++static inline struct inode *path_obj(struct iam_path *p)
++{
++ return p->ip_container->ic_object;
++}
++
++static inline size_t iam_entry_size(struct iam_path *p)
++{
++ return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
++}
++
++static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
++ struct iam_entry *entry, int shift)
+ {
+ void *e = entry;
+- return e + shift * dx_entry_size(p);
++ return e + shift * iam_entry_size(p);
+ }
+
+-static inline ptrdiff_t dx_entry_diff(struct dx_path *p,
+- struct dx_entry *e1, struct dx_entry *e2)
++static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
++ struct iam_entry *e1, struct iam_entry *e2)
+ {
+ ptrdiff_t diff;
+
+ diff = (void *)e1 - (void *)e2;
+- assert(diff / dx_entry_size(p) * dx_entry_size(p) == diff);
+- return diff / dx_entry_size(p);
++ assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
++ return diff / iam_entry_size(p);
+ }
+
+-static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry)
++static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
+ {
+- return le32_to_cpu(*(u32 *)entry_off(entry, p->dp_param->dpo_key_size))
++ return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
+ & 0x00ffffff;
+ }
+
+-static inline void dx_set_block(struct dx_path *p,
+- struct dx_entry *entry, unsigned value)
++static inline void dx_set_block(struct iam_path *p,
++ struct iam_entry *entry, unsigned value)
+ {
+- *(u32*)entry_off(entry, p->dp_param->dpo_key_size) = cpu_to_le32(value);
++ *(u32*)entry_off(entry,
++ path_descr(p)->id_key_size) = cpu_to_le32(value);
+ }
+
+-static inline struct dx_key *dx_get_key(struct dx_path *p,
+- struct dx_entry *entry,
+- struct dx_key *key)
++static inline struct iam_key *dx_get_key(struct iam_path *p,
++ struct iam_entry *entry,
++ struct iam_key *key)
+ {
+- memcpy(key, entry, p->dp_param->dpo_key_size);
++ memcpy(key, entry, path_descr(p)->id_key_size);
+ return key;
+ }
+
+-static inline struct dx_key *dx_key_at(struct dx_path *p,
+- struct dx_entry *entry)
++static inline struct iam_key *iam_key_at(struct iam_path *p,
++ struct iam_entry *entry)
+ {
+- return (struct dx_key *)entry;
++ return (struct iam_key *)entry;
+ }
+
+-static inline void dx_set_key(struct dx_path *p,
+- struct dx_entry *entry, struct dx_key *key)
++static inline void dx_set_key(struct iam_path *p,
++ struct iam_entry *entry, struct iam_key *key)
+ {
+- memcpy(entry, key, p->dp_param->dpo_key_size);
++ memcpy(entry, key, path_descr(p)->id_key_size);
+ }
+
+-static inline unsigned dx_get_count (struct dx_entry *entries)
++static inline unsigned dx_get_count (struct iam_entry *entries)
+ {
+ return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+ }
+
+-static inline unsigned dx_get_limit (struct dx_entry *entries)
++static inline unsigned dx_get_limit (struct iam_entry *entries)
+ {
+ return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
+ }
+
+-static inline void dx_set_count (struct dx_entry *entries, unsigned value)
++static inline void dx_set_count (struct iam_entry *entries, unsigned value)
+ {
+ ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
+ }
+
+-static inline void dx_set_limit (struct dx_entry *entries, unsigned value)
++static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
+ {
+ ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+ }
+
+-static inline unsigned dx_root_limit(struct dx_path *p)
++static inline unsigned dx_root_limit(struct iam_path *p)
+ {
+- struct dx_param *param = p->dp_param;
+- unsigned entry_space = p->dp_object->i_sb->s_blocksize -
+- param->dpo_root_gap;
+- return entry_space / (param->dpo_key_size + param->dpo_ptr_size);
++ struct iam_descr *param = path_descr(p);
++ unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
++ param->id_root_gap;
++ return entry_space / (param->id_key_size + param->id_ptr_size);
+ }
+
+-static inline unsigned dx_node_limit(struct dx_path *p)
++static inline unsigned dx_node_limit(struct iam_path *p)
+ {
+- struct dx_param *param = p->dp_param;
+- unsigned entry_space = p->dp_object->i_sb->s_blocksize -
+- param->dpo_node_gap;
+- return entry_space / (param->dpo_key_size + param->dpo_ptr_size);
++ struct iam_descr *param = path_descr(p);
++ unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
++ param->id_node_gap;
++ return entry_space / (param->id_key_size + param->id_ptr_size);
+ }
+
+-static inline int dx_index_is_compat(struct dx_path *path)
++static inline int dx_index_is_compat(struct iam_path *path)
+ {
+- return path->dp_param == &htree_compat_param;
++ return path_descr(path) == &htree_compat_param;
+ }
+
+-static struct dx_entry *dx_get_entries(struct dx_path *path, void *data,
++static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
+ int root)
+ {
+ return data +
+ (root ?
+- path->dp_param->dpo_root_gap : path->dp_param->dpo_node_gap);
++ path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
+ }
+
+-static struct dx_entry *dx_node_get_entries(struct dx_path *path,
+- struct dx_frame *frame)
++static struct iam_entry *dx_node_get_entries(struct iam_path *path,
++ struct iam_frame *frame)
+ {
+ return dx_get_entries(path,
+- frame->bh->b_data, frame == path->dp_frames);
+-}
+-
+-static inline struct dx_key *keycpy(struct dx_path *p,
+- struct dx_key *k1, struct dx_key *k2)
+-{
+- return memcpy(k1, k2, p->dp_param->dpo_key_size);
+-}
+-
+-static inline int keycmp(struct dx_path *p,
+- struct dx_key *k1, struct dx_key *k2)
+-{
+- return p->dp_param->dpo_keycmp(p, k1, k2);
++ frame->bh->b_data, frame == path->ip_frames);
+ }
+
+-static int dx_node_check(struct dx_path *p, struct dx_frame *f)
++static int dx_node_check(struct iam_path *p, struct iam_frame *f)
+ {
+- struct dx_entry *e;
++ struct iam_entry *e;
++ struct iam_container *c;
+ unsigned count;
+ unsigned i;
+
++ c = p->ip_container;
+ e = dx_node_get_entries(p, f);
+ count = dx_get_count(e);
+- e = dx_entry_shift(p, e, 1);
+- for (i = 0; i < count - 1; ++i, e = dx_entry_shift(p, e, 1)) {
+- keycpy(p, p->dp_key_scratch[0], p->dp_key_scratch[1]);
+- dx_get_key(p, e, p->dp_key_scratch[1]);
++ e = iam_entry_shift(p, e, 1);
++ for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) {
++ keycpy(c, p->ip_key_scratch[0], p->ip_key_scratch[1]);
++ dx_get_key(p, e, p->ip_key_scratch[1]);
+ if (i > 0 &&
+- keycmp(p, p->dp_key_scratch[0], p->dp_key_scratch[1]) > 0)
++ keycmp(c, p->ip_key_scratch[0], p->ip_key_scratch[1]) > 0)
+ return 0;
+ }
+ return 1;
+ }
+
+-static u32 htree_root_ptr(struct dx_path *path)
++static u32 htree_root_ptr(struct iam_container *c)
+ {
+ return 0;
+ }
+@@ -403,20 +870,19 @@
+ struct dentry *dentry;
+ };
+
+-static int htree_node_check(struct dx_path *path, struct dx_frame *frame,
+- void *cookie)
++static int htree_node_check(struct iam_path *path, struct iam_frame *frame)
+ {
+ void *data;
+- struct dx_entry *entries;
++ struct iam_entry *entries;
+ struct super_block *sb;
+
+ data = frame->bh->b_data;
+ entries = dx_node_get_entries(path, frame);
+- sb = path->dp_object->i_sb;
+- if (frame == path->dp_frames) {
++ sb = path_obj(path)->i_sb;
++ if (frame == path->ip_frames) {
+ /* root node */
+ struct dx_root *root;
+- struct htree_cookie *hc = cookie;
++ struct htree_cookie *hc = path->ip_descr_data;
+
+ root = data;
+ if (root->info.hash_version > DX_HASH_MAX) {
+@@ -433,8 +899,8 @@
+ return ERR_BAD_DX_DIR;
+ }
+
+- path->dp_indirect = root->info.indirect_levels;
+- if (path->dp_indirect > DX_MAX_TREE_HEIGHT - 1) {
++ path->ip_indirect = root->info.indirect_levels;
++ if (path->ip_indirect > DX_MAX_TREE_HEIGHT - 1) {
+ ext3_warning(sb, __FUNCTION__,
+ "Unimplemented inode hash depth: %#06x",
+ root->info.indirect_levels);
+@@ -450,17 +916,17 @@
+ if (hc->dentry)
+ ext3fs_dirhash(hc->dentry->d_name.name,
+ hc->dentry->d_name.len, hc->hinfo);
+- path->dp_key_target = (struct dx_key *)&hc->hinfo->hash;
++ path->ip_key_target = (struct iam_key *)&hc->hinfo->hash;
+ } else {
+ /* non-root index */
+- assert(entries == data + path->dp_param->dpo_node_gap);
++ assert(entries == data + path_descr(path)->id_node_gap);
+ assert(dx_get_limit(entries) == dx_node_limit(path));
+ }
+ frame->entries = frame->at = entries;
+ return 0;
+ }
+
+-static int htree_node_init(struct dx_path *path,
++static int htree_node_init(struct iam_container *c,
+ struct buffer_head *bh, int root)
+ {
+ struct dx_node *node;
+@@ -468,13 +934,24 @@
+ assert(!root);
+
+ node = (void *)bh->b_data;
+- node->fake.rec_len = cpu_to_le16(path->dp_object->i_sb->s_blocksize);
++ node->fake.rec_len = cpu_to_le16(c->ic_object->i_sb->s_blocksize);
+ node->fake.inode = 0;
+ return 0;
+ }
+
+-static int htree_keycmp(struct dx_path *path,
+- struct dx_key *k1, struct dx_key *k2)
++static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
++ handle_t *handle, struct buffer_head **bh)
++{
++ int result = 0;
++
++ *bh = ext3_bread(handle, c->ic_object, (int)ptr, 0, &result);
++ if (*bh == NULL)
++ result = -EIO;
++ return result;
++}
++
++static int htree_keycmp(struct iam_container *c,
++ struct iam_key *k1, struct iam_key *k2)
+ {
+ __u32 p1 = le32_to_cpu(*(__u32 *)k1);
+ __u32 p2 = le32_to_cpu(*(__u32 *)k2);
+@@ -486,7 +963,7 @@
+ * Debug
+ */
+ #ifdef DX_DEBUG
+-static void dx_show_index (char * label, struct dx_entry *entries)
++static void dx_show_index (char * label, struct iam_entry *entries)
+ {
+ int i, n = dx_get_count (entries);
+ printk("%s index ", label);
+@@ -535,7 +1012,7 @@
+ }
+
+ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
+- struct dx_entry *entries, int levels)
++ struct iam_entry *entries, int levels)
+ {
+ unsigned blocksize = dir->i_sb->s_blocksize;
+ unsigned count = dx_get_count (entries), names = 0, space = 0, i;
+@@ -565,32 +1042,33 @@
+ }
+ #endif /* DX_DEBUG */
+
+-static int dx_lookup(struct dx_path *path, void *cookie)
++static int dx_lookup(struct iam_path *path)
+ {
+ u32 ptr;
+- int err;
++ int err = 0;
+ int i;
+
+- struct dx_param *param;
+- struct dx_frame *frame;
+-
+- param = path->dp_param;
++ struct iam_descr *param;
++ struct iam_frame *frame;
++ struct iam_container *c;
+
+- for (frame = path->dp_frames, i = 0,
+- ptr = param->dpo_root_ptr(path); i <= path->dp_indirect;
++ param = path_descr(path);
++ c = path->ip_container;
++
++ for (frame = path->ip_frames, i = 0,
++ ptr = param->id_root_ptr(path->ip_container);
++ i <= path->ip_indirect;
+ ptr = dx_get_block(path, frame->at), ++frame, ++i) {
+- struct dx_entry *entries;
+- struct dx_entry *p;
+- struct dx_entry *q;
+- struct dx_entry *m;
++ struct iam_entry *entries;
++ struct iam_entry *p;
++ struct iam_entry *q;
++ struct iam_entry *m;
+ unsigned count;
+
+- frame->bh = ext3_bread(NULL, path->dp_object, ptr, 0, &err);
+- if (frame->bh == NULL) {
+- err = -EIO;
++ err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh);
++ if (err != 0)
+ break;
+- }
+- err = param->dpo_node_check(path, frame, cookie);
++ err = param->id_node_check(path, frame);
+ if (err != 0)
+ break;
+
+@@ -599,37 +1077,37 @@
+ entries = frame->entries;
+ count = dx_get_count(entries);
+ assert(count && count <= dx_get_limit(entries));
+- p = dx_entry_shift(path, entries, 1);
+- q = dx_entry_shift(path, entries, count - 1);
++ p = iam_entry_shift(path, entries, 1);
++ q = iam_entry_shift(path, entries, count - 1);
+ while (p <= q) {
+- m = dx_entry_shift(path,
+- p, dx_entry_diff(path, q, p) / 2);
++ m = iam_entry_shift(path,
++ p, iam_entry_diff(path, q, p) / 2);
+ dxtrace(printk("."));
+- if (keycmp(path, dx_key_at(path, m),
+- path->dp_key_target) > 0)
+- q = dx_entry_shift(path, m, -1);
++ if (keycmp(c, iam_key_at(path, m),
++ path->ip_key_target) > 0)
++ q = iam_entry_shift(path, m, -1);
+ else
+- p = dx_entry_shift(path, m, +1);
++ p = iam_entry_shift(path, m, +1);
+ }
+
+- frame->at = dx_entry_shift(path, p, -1);
++ frame->at = iam_entry_shift(path, p, -1);
+ if (1) { // linear search cross check
+ unsigned n = count - 1;
+- struct dx_entry *at;
++ struct iam_entry *at;
+
+ at = entries;
+ while (n--) {
+ dxtrace(printk(","));
+- at = dx_entry_shift(path, at, +1);
+- if (keycmp(path, dx_key_at(path, at),
+- path->dp_key_target) > 0) {
+- if (at != dx_entry_shift(path, frame->at, 1)) {
++ at = iam_entry_shift(path, at, +1);
++ if (keycmp(c, iam_key_at(path, at),
++ path->ip_key_target) > 0) {
++ if (at != iam_entry_shift(path, frame->at, 1)) {
+ BREAKPOINT;
+ printk(KERN_EMERG "%i\n",
+- keycmp(path, dx_key_at(path, at),
+- path->dp_key_target));
++ keycmp(c, iam_key_at(path, at),
++ path->ip_key_target));
+ }
+- at = dx_entry_shift(path, at, -1);
++ at = iam_entry_shift(path, at, -1);
+ break;
+ }
+ }
+@@ -637,8 +1115,8 @@
+ }
+ }
+ if (err != 0)
+- dx_path_fini(path);
+- path->dp_frame = --frame;
++ iam_path_fini(path);
++ path->ip_frame = --frame;
+ return err;
+ }
+
+@@ -652,7 +1130,7 @@
+ * back to userspace.
+ */
+ static int dx_probe(struct dentry *dentry, struct inode *dir,
+- struct dx_hash_info *hinfo, struct dx_path *path)
++ struct dx_hash_info *hinfo, struct iam_path *path)
+ {
+ int err;
+ struct htree_cookie hc = {
+@@ -661,39 +1139,78 @@
+ };
+
+ assert(dx_index_is_compat(path));
+- err = dx_lookup(path, &hc);
+- assert(err != 0 || path->dp_frames[path->dp_indirect].bh != NULL);
++ path->ip_descr_data = &hc;
++ err = dx_lookup(path);
++ assert(err != 0 || path->ip_frames[path->ip_indirect].bh != NULL);
+ return err;
+ }
+
+-static inline void dx_path_init(struct dx_path *path, struct inode *inode)
++/*
++ * Initialize container @c, acquires additional reference on @inode.
++ */
++int iam_container_init(struct iam_container *c,
++ struct iam_descr *descr, struct inode *inode)
++{
++ memset(c, 0, sizeof *c);
++ c->ic_descr = descr;
++ c->ic_object = igrab(inode);
++ if (c->ic_object != NULL)
++ return 0;
++ else
++ return -ENOENT;
++}
++
++/*
++ * Finalize container @c, release all resources.
++ */
++void iam_container_fini(struct iam_container *c)
++{
++ if (c->ic_object != NULL) {
++ iput(c->ic_object);
++ c->ic_object = NULL;
++ }
++}
++
++static inline void iam_path_init(struct iam_path *path, struct iam_container *c)
+ {
+ memset(path, 0, sizeof *path);
+- path->dp_object = inode;
+- path->dp_frame = path->dp_frames;
++ path->ip_container = c;
++ path->ip_frame = path->ip_frames;
+ }
+
+-static inline void dx_path_fini(struct dx_path *path)
++static inline void iam_path_fini(struct iam_path *path)
+ {
+ int i;
+
+- for (i = 0; i < ARRAY_SIZE(path->dp_frames); i++) {
+- if (path->dp_frames[i].bh != NULL) {
+- brelse(path->dp_frames[i].bh);
+- path->dp_frames[i].bh = NULL;
++ for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
++ if (path->ip_frames[i].bh != NULL) {
++ brelse(path->ip_frames[i].bh);
++ path->ip_frames[i].bh = NULL;
+ }
+ }
+ }
+
+-static void dx_path_compat_init(struct dx_path_compat *path,
+- struct inode *inode)
++static void iam_path_compat_init(struct iam_path_compat *path,
++ struct inode *inode)
+ {
+ int i;
+- dx_path_init(&path->dpc_path, inode);
+- path->dpc_path.dp_param = &htree_compat_param;
+- for (i = 0; i < ARRAY_SIZE(path->dpc_path.dp_key_scratch); ++i)
+- path->dpc_path.dp_key_scratch[i] =
+- (struct dx_key *)&path->dpc_scrach[i];
++
++ iam_container_init(&path->ipc_container, &htree_compat_param, inode);
++ /*
++ * XXX hack allowing finalization of iam_path_compat with
++ * iam_path_fini().
++ */
++ iput(inode);
++ iam_path_init(&path->ipc_path, &path->ipc_container);
++ for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
++ path->ipc_path.ip_key_scratch[i] =
++ (struct iam_key *)&path->ipc_scrach[i];
++}
++
++static void iam_path_compat_fini(struct iam_path_compat *path)
++{
++ iam_path_fini(&path->ipc_path);
++ iam_container_fini(&path->ipc_container);
+ }
+
+ /*
+@@ -714,16 +1231,16 @@
+ * hash of the next page.
+ */
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+- struct dx_path *path, __u32 *start_hash)
++ struct iam_path *path, __u32 *start_hash)
+ {
+- struct dx_frame *p;
++ struct iam_frame *p;
+ struct buffer_head *bh;
+ int err, num_frames = 0;
+ __u32 bhash;
+
+ assert(dx_index_is_compat(path));
+
+- p = path->dp_frame;
++ p = path->ip_frame;
+ /*
+ * Find the next leaf page by incrementing the frame pointer.
+ * If we run out of entries in the interior node, loop around and
+@@ -732,11 +1249,11 @@
+ * nodes need to be read.
+ */
+ while (1) {
+- p->at = dx_entry_shift(path, p->at, +1);
+- if (p->at < dx_entry_shift(path, p->entries,
++ p->at = iam_entry_shift(path, p->at, +1);
++ if (p->at < iam_entry_shift(path, p->entries,
+ dx_get_count(p->entries)))
+ break;
+- if (p == path->dp_frames)
++ if (p == path->ip_frames)
+ return 0;
+ num_frames++;
+ --p;
+@@ -749,7 +1266,7 @@
+ * desired contiuation hash. If it doesn't, return since
+ * there's no point to read in the successive index pages.
+ */
+- dx_get_key(path, p->at, (struct dx_key *)&bhash);
++ dx_get_key(path, p->at, (struct iam_key *)&bhash);
+ if (start_hash)
+ *start_hash = bhash;
+ if ((hash & 1) == 0) {
+@@ -761,8 +1278,10 @@
+ * block so no check is necessary
+ */
+ while (num_frames--) {
+- if (!(bh = ext3_bread(NULL, dir,
+- dx_get_block(path, p->at), 0, &err)))
++ err = path_descr(path)->id_node_read(path->ip_container,
++ (iam_ptr_t)dx_get_block(path, p->at),
++ NULL, &bh);
++ if (err != 0)
+ return err; /* Failure */
+ ++p;
+ brelse (p->bh);
+@@ -837,8 +1356,8 @@
+ {
+ struct dx_hash_info hinfo;
+ struct ext3_dir_entry_2 *de;
+- struct dx_path_compat cpath;
+- struct dx_path *path = &cpath.dpc_path;
++ struct iam_path_compat cpath;
++ struct iam_path *path = &cpath.ipc_path;
+ struct inode *dir;
+ int block, err;
+ int count = 0;
+@@ -848,7 +1367,7 @@
+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
+ start_minor_hash));
+ dir = dir_file->f_dentry->d_inode;
+- dx_path_compat_init(&cpath, dir);
++ iam_path_compat_init(&cpath, dir);
+ if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
+ hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+ hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+@@ -865,7 +1384,7 @@
+
+ /* Add '.' and '..' from the htree header */
+ if (!start_hash && !start_minor_hash) {
+- de = (struct ext3_dir_entry_2 *) path->dp_frames[0].bh->b_data;
++ de = (struct ext3_dir_entry_2 *) path->ip_frames[0].bh->b_data;
+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+ goto errout;
+ de = ext3_next_entry(de);
+@@ -875,7 +1394,7 @@
+ }
+
+ while (1) {
+- block = dx_get_block(path, path->dp_frame->at);
++ block = dx_get_block(path, path->ip_frame->at);
+ ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
+ start_hash, start_minor_hash);
+ if (ret < 0) {
+@@ -900,12 +1419,12 @@
+ (count && ((hashval & 1) == 0)))
+ break;
+ }
+- dx_path_fini(path);
++ iam_path_fini(path);
+ dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+ count, *next_hash));
+ return count;
+ errout:
+- dx_path_fini(path);
++ iam_path_fini(path);
+ return (err);
+ }
+
+@@ -964,18 +1483,18 @@
+ } while(more);
+ }
+
+-static void dx_insert_block(struct dx_path *path,
+- struct dx_frame *frame, u32 hash, u32 block)
++static void dx_insert_block(struct iam_path *path,
++ struct iam_frame *frame, u32 hash, u32 block)
+ {
+- struct dx_entry *entries = frame->entries;
+- struct dx_entry *old = frame->at, *new = dx_entry_shift(path, old, +1);
++ struct iam_entry *entries = frame->entries;
++ struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
+ int count = dx_get_count(entries);
+
+ assert(count < dx_get_limit(entries));
+- assert(old < dx_entry_shift(path, entries, count));
+- memmove(dx_entry_shift(path, new, 1), new,
+- (char *)dx_entry_shift(path, entries, count) - (char *)new);
+- dx_set_key(path, new, (struct dx_key *)&hash);
++ assert(old < iam_entry_shift(path, entries, count));
++ memmove(iam_entry_shift(path, new, 1), new,
++ (char *)iam_entry_shift(path, entries, count) - (char *)new);
++ dx_set_key(path, new, (struct iam_key *)&hash);
+ dx_set_block(path, new, block);
+ dx_set_count(entries, count + 1);
+ }
+@@ -1177,9 +1696,9 @@
+ struct super_block * sb;
+ struct dx_hash_info hinfo;
+ u32 hash;
+- struct dx_path_compat cpath;
+- struct dx_path *path = &cpath.dpc_path;
+- struct dx_entry_compat dummy_dot = {
++ struct iam_path_compat cpath;
++ struct iam_path *path = &cpath.ipc_path;
++ struct iam_entry_compat dummy_dot = {
+ .block = 0
+ };
+ struct ext3_dir_entry_2 *de, *top;
+@@ -1190,8 +1709,8 @@
+ const u8 *name = dentry->d_name.name;
+ struct inode *dir = dentry->d_parent->d_inode;
+
+- dx_path_compat_init(&cpath, dir);
+-
++ iam_path_compat_init(&cpath, dir);
++
+ sb = dir->i_sb;
+ /* NFS may look up ".." - look at dx_root directory block */
+ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+@@ -1199,13 +1718,15 @@
+ if (*err != 0)
+ return NULL;
+ } else {
+- path->dp_frame->bh = NULL; /* for dx_path_fini() */
+- path->dp_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
++ path->ip_frame->bh = NULL; /* for iam_path_fini() */
++ path->ip_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
+ }
+ hash = hinfo.hash;
+ do {
+- block = dx_get_block(path, path->dp_frame->at);
+- if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
++ block = dx_get_block(path, path->ip_frame->at);
++ *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
++ NULL, &bh);
++ if (*err != 0)
+ goto errout;
+ de = (struct ext3_dir_entry_2 *) bh->b_data;
+ top = (struct ext3_dir_entry_2 *) ((char *) de + sb->s_blocksize -
+@@ -1220,7 +1741,7 @@
+ goto errout;
+ }
+ *res_dir = de;
+- dx_path_fini(path);
++ iam_path_fini(path);
+ return bh;
+ }
+ brelse (bh);
+@@ -1238,7 +1759,7 @@
+ *err = -ENOENT;
+ errout:
+ dxtrace(printk("%s not found\n", name));
+- dx_path_fini(path);
++ iam_path_fini(path);
+ return NULL;
+ }
+ #endif
+@@ -1363,11 +1884,11 @@
+
+ /* Allocate new node, and split leaf node @bh into it, inserting new pointer
+ * into parent node identified by @frame */
+-static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct dx_path *path,
+- struct buffer_head **bh,struct dx_frame *frame,
++static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct iam_path *path,
++ struct buffer_head **bh,struct iam_frame *frame,
+ struct dx_hash_info *hinfo, int *error)
+ {
+- struct inode *dir = path->dp_object;
++ struct inode *dir = path_obj(path);
+ unsigned blocksize = dir->i_sb->s_blocksize;
+ unsigned count, continued;
+ struct buffer_head *bh2;
+@@ -1553,9 +2074,9 @@
+ int namelen = dentry->d_name.len;
+ struct buffer_head *bh2;
+ struct dx_root *root;
+- struct dx_path_compat cpath;
+- struct dx_path *path = &cpath.dpc_path;
+- struct dx_entry *entries;
++ struct iam_path_compat cpath;
++ struct iam_path *path = &cpath.ipc_path;
++ struct iam_entry *entries;
+ struct ext3_dir_entry_2 *de, *de2;
+ char *data1, *top;
+ unsigned len;
+@@ -1565,7 +2086,7 @@
+ u32 block;
+ struct fake_dirent *fde;
+
+- dx_path_compat_init(&cpath, dir);
++ iam_path_compat_init(&cpath, dir);
+ blocksize = dir->i_sb->s_blocksize;
+ dxtrace(printk("Creating index\n"));
+ retval = ext3_journal_get_write_access(handle, bh);
+@@ -1612,12 +2133,12 @@
+ hinfo.hash_version = root->info.hash_version;
+ hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+ ext3fs_dirhash(name, namelen, &hinfo);
+- path->dp_frame->entries = entries;
+- path->dp_frame->at = entries;
+- path->dp_frame->bh = bh;
++ path->ip_frame->entries = entries;
++ path->ip_frame->at = entries;
++ path->ip_frame->bh = bh;
+ bh = bh2;
+- de = do_split(handle, path, &bh, path->dp_frame, &hinfo, &retval);
+- dx_path_fini(path);
++ de = do_split(handle, path, &bh, path->ip_frame, &hinfo, &retval);
++ iam_path_fini(path);
+ if (!de)
+ return retval;
+
+@@ -1698,12 +2219,12 @@
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ struct inode *inode)
+ {
+- struct dx_path_compat cpath;
+- struct dx_path *path = &cpath.dpc_path;
+- struct dx_param *param;
+- struct dx_frame *frame, *safe;
+- struct dx_entry *entries; /* old block contents */
+- struct dx_entry *entries2; /* new block contents */
++ struct iam_path_compat cpath;
++ struct iam_path *path = &cpath.ipc_path;
++ struct iam_descr *param;
++ struct iam_frame *frame, *safe;
++ struct iam_entry *entries; /* old block contents */
++ struct iam_entry *entries2; /* new block contents */
+ struct dx_hash_info hinfo;
+ struct buffer_head * bh;
+ struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+@@ -1716,20 +2237,22 @@
+ int i;
+ size_t isize;
+
+- dx_path_compat_init(&cpath, dir);
+- param = path->dp_param;
++ iam_path_compat_init(&cpath, dir);
++ param = path_descr(path);
+
+ err = dx_probe(dentry, NULL, &hinfo, path);
+ if (err != 0)
+ return err;
+- frame = path->dp_frame;
++ frame = path->ip_frame;
+ entries = frame->entries;
+
+ /* XXX nikita: global serialization! */
+ isize = dir->i_size;
+
+- if (!(bh = ext3_bread(handle, dir,
+- dx_get_block(path, frame->at), 0, &err)))
++ err = param->id_node_read(path->ip_container,
++ (iam_ptr_t)dx_get_block(path,
++ frame->at), handle, &bh);
++ if (err != 0)
+ goto cleanup;
+
+ BUFFER_TRACE(bh, "get_write_access");
+@@ -1761,7 +2284,7 @@
+ dx_get_count(entries), dx_get_limit(entries)));
+
+ /* What levels need split? */
+- for (nr_splet = 0; frame >= path->dp_frames &&
++ for (nr_splet = 0; frame >= path->ip_frames &&
+ dx_get_count(frame->entries) == dx_get_limit(frame->entries);
+ --frame, ++nr_splet) {
+ if (nr_splet == DX_MAX_TREE_HEIGHT) {
+@@ -1778,7 +2301,7 @@
+ for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
+ bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
+ if (!bh_new[i] ||
+- param->dpo_node_init(path, bh_new[i], 0) != 0)
++ param->id_node_init(path->ip_container, bh_new[i], 0) != 0)
+ goto cleanup;
+ BUFFER_TRACE(frame->bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, frame->bh);
+@@ -1786,7 +2309,7 @@
+ goto journal_error;
+ }
+ /* Add "safe" node to transaction too */
+- if (safe + 1 != path->dp_frames) {
++ if (safe + 1 != path->ip_frames) {
+ err = ext3_journal_get_write_access(handle, safe->bh);
+ if (err)
+ goto journal_error;
+@@ -1800,12 +2323,12 @@
+
+ entries = frame->entries;
+ count = dx_get_count(entries);
+- idx = dx_entry_diff(path, frame->at, entries);
++ idx = iam_entry_diff(path, frame->at, entries);
+
+ bh2 = bh_new[i];
+ entries2 = dx_get_entries(path, bh2->b_data, 0);
+
+- if (frame == path->dp_frames) {
++ if (frame == path->ip_frames) {
+ /* splitting root node. Tricky point:
+ *
+ * In the "normal" B-tree we'd split root *and* add
+@@ -1818,14 +2341,14 @@
+ */
+ struct dx_root *root;
+ u8 indirects;
+- struct dx_frame *frames;
++ struct iam_frame *frames;
+
+- frames = path->dp_frames;
++ frames = path->ip_frames;
+ root = (struct dx_root *) frames->bh->b_data;
+ indirects = root->info.indirect_levels;
+ dxtrace(printk("Creating new root %d\n", indirects));
+ memcpy((char *) entries2, (char *) entries,
+- count * dx_entry_size(path));
++ count * iam_entry_size(path));
+ dx_set_limit(entries2, dx_node_limit(path));
+
+ /* Set up root */
+@@ -1835,9 +2358,9 @@
+
+ /* Shift frames in the path */
+ memmove(frames + 2, frames + 1,
+- (sizeof path->dp_frames) - 2 * sizeof frames[0]);
++ (sizeof path->ip_frames) - 2 * sizeof frames[0]);
+ /* Add new access path frame */
+- frames[1].at = dx_entry_shift(path, entries2, idx);
++ frames[1].at = iam_entry_shift(path, entries2, idx);
+ frames[1].entries = entries = entries2;
+ frames[1].bh = bh2;
+ assert(dx_node_check(path, frame));
+@@ -1853,22 +2376,22 @@
+ unsigned hash2;
+
+ dx_get_key(path,
+- dx_entry_shift(path, entries, count1),
+- (struct dx_key *)&hash2);
++ iam_entry_shift(path, entries, count1),
++ (struct iam_key *)&hash2);
+
+ dxtrace(printk("Split index %i/%i\n", count1, count2));
+
+ memcpy ((char *) entries2,
+- (char *) dx_entry_shift(path, entries, count1),
+- count2 * dx_entry_size(path));
++ (char *) iam_entry_shift(path, entries, count1),
++ count2 * iam_entry_size(path));
+ dx_set_count (entries, count1);
+ dx_set_count (entries2, count2);
+ dx_set_limit (entries2, dx_node_limit(path));
+
+ /* Which index block gets the new entry? */
+ if (idx >= count1) {
+- frame->at = dx_entry_shift(path, entries2,
+- idx - count1);
++ frame->at = iam_entry_shift(path, entries2,
++ idx - count1);
+ frame->entries = entries = entries2;
+ swap(frame->bh, bh2);
+ bh_new[i] = bh2;
+@@ -1903,7 +2426,7 @@
+ }
+ if (err)
+ inode->i_size = isize;
+- dx_path_fini(path);
++ iam_path_fini(path);
+ return err;
+ }
+ #endif
Index: iam-src/fs/ext3/namei.c
===================================================================
---- iam-src.orig/fs/ext3/namei.c 2006-02-12 16:43:57.000000000 +0300
-+++ iam-src/fs/ext3/namei.c 2006-02-12 23:22:12.000000000 +0300
-@@ -83,22 +83,21 @@ static struct buffer_head *ext3_append(h
+--- iam-src.orig/fs/ext3/namei.c 2006-02-15 18:31:48.000000000 +0300
++++ iam-src/fs/ext3/namei.c 2006-02-15 21:25:34.000000000 +0300
+@@ -51,7 +51,10 @@
+ /*
+ * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
+ */
+-#define DX_MAX_TREE_HEIGHT (5)
++enum {
++ DX_MAX_TREE_HEIGHT = 5,
++ DX_SCRATCH_KEYS = 2
++};
+
+ static struct buffer_head *ext3_append(handle_t *handle,
+ struct inode *inode,
+@@ -83,22 +86,22 @@ static struct buffer_head *ext3_append(h
#define dxtrace(command)
#endif
-struct dx_entry
-{
+struct dx_entry; /* incomplete type */
++struct dx_key; /* incomplete type */
+
+struct dx_entry_compat {
__le32 hash;
__le32 block;
};
-@@ -109,8 +108,7 @@ struct dx_entry
+@@ -109,8 +112,7 @@ struct dx_entry
* hash version mod 4 should never be 0. Sincerely, the paranoia department.
*/
struct fake_dirent dot;
char dot_name[4];
struct fake_dirent dotdot;
-@@ -124,13 +122,13 @@ struct dx_root
+@@ -124,13 +126,13 @@ struct dx_root
u8 unused_flags;
}
info;
};
-@@ -147,38 +145,76 @@ struct dx_map_entry
+@@ -147,38 +149,88 @@ struct dx_map_entry
u32 offs;
};
+ struct dx_frame *frame, void *cookie);
+ int (*dpo_node_init)(struct dx_path *path,
+ struct buffer_head *bh, int root);
++ int (*dpo_keycmp)(struct dx_path *path,
++ struct dx_key *k1, struct dx_key *k2);
+};
+
/*
+ int dp_indirect;
+ struct dx_frame dp_frames[DX_MAX_TREE_HEIGHT];
+ struct dx_frame *dp_frame;
-+ void *dp_key_target;
-+ void *dp_key;
++ struct dx_key *dp_key_target;
++ struct dx_key *dp_key_scratch[DX_SCRATCH_KEYS];
++};
++
++struct dx_path_compat {
++ struct dx_path dpc_path;
++ __u32 dpc_scrach[DX_SCRATCH_KEYS];
};
+static u32 htree_root_ptr(struct dx_path *p);
+ struct dx_frame *frame, void *cookie);
+static int htree_node_init(struct dx_path *path,
+ struct buffer_head *bh, int root);
++static int htree_keycmp(struct dx_path *path,
++ struct dx_key *k1, struct dx_key *k2);
+
+static struct dx_param htree_compat_param = {
+ .dpo_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
+
+ .dpo_root_ptr = htree_root_ptr,
+ .dpo_node_check = htree_node_check,
-+ .dpo_node_init = htree_node_init
++ .dpo_node_init = htree_node_init,
++ .dpo_keycmp = htree_keycmp
+};
+
+
+static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry);
+static void dx_set_block(struct dx_path *p,
+ struct dx_entry *entry, unsigned value);
-+static inline void *dx_get_key(struct dx_path *p,
-+ struct dx_entry *entry, void *key);
-+static void dx_set_key(struct dx_path *p, struct dx_entry *entry, void *key);
++static inline struct dx_key *dx_get_key(struct dx_path *p,
++ struct dx_entry *entry,
++ struct dx_key *key);
++static void dx_set_key(struct dx_path *p, struct dx_entry *entry,
++ struct dx_key *key);
+static unsigned dx_get_count(struct dx_entry *entries);
+static unsigned dx_get_limit(struct dx_entry *entries);
+static void dx_set_count(struct dx_entry *entries, unsigned value);
static int ext3_htree_next_block(struct inode *dir, __u32 hash,
struct dx_path *path, __u32 *start_hash);
static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
-@@ -186,29 +222,65 @@ static struct buffer_head * ext3_dx_find
+@@ -186,29 +238,72 @@ static struct buffer_head * ext3_dx_find
static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode);
-static inline unsigned dx_get_block (struct dx_entry *entry)
+static inline void *entry_off(struct dx_entry *entry, ptrdiff_t off)
-+{
+ {
+- return le32_to_cpu(entry->block) & 0x00ffffff;
+ return (void *)((char *)entry + off);
-+}
-+
+ }
+
+-static inline void dx_set_block (struct dx_entry *entry, unsigned value)
+static inline size_t dx_entry_size(struct dx_path *p)
{
-- return le32_to_cpu(entry->block) & 0x00ffffff;
+- entry->block = cpu_to_le32(value);
+ return p->dp_param->dpo_key_size + p->dp_param->dpo_ptr_size;
}
--static inline void dx_set_block (struct dx_entry *entry, unsigned value)
+-static inline unsigned dx_get_hash (struct dx_entry *entry)
+static inline struct dx_entry *dx_entry_shift(struct dx_path *p,
+ struct dx_entry *entry, int shift)
{
-- entry->block = cpu_to_le32(value);
+- return le32_to_cpu(entry->hash);
+ void *e = entry;
+ return e + shift * dx_entry_size(p);
- }
-
--static inline unsigned dx_get_hash (struct dx_entry *entry)
++}
++
+static inline ptrdiff_t dx_entry_diff(struct dx_path *p,
+ struct dx_entry *e1, struct dx_entry *e2)
- {
-- return le32_to_cpu(entry->hash);
++{
+ ptrdiff_t diff;
+
+ diff = (void *)e1 - (void *)e2;
+ assert(diff / dx_entry_size(p) * dx_entry_size(p) == diff);
+ return diff / dx_entry_size(p);
-+}
-+
-+static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry)
-+{
-+ return le32_to_cpu(*(u32 *)entry_off(entry, p->dp_param->dpo_key_size))
-+ & 0x00ffffff;
}
-static inline void dx_set_hash (struct dx_entry *entry, unsigned value)
-+static inline void dx_set_block(struct dx_path *p,
-+ struct dx_entry *entry, unsigned value)
++static inline unsigned dx_get_block(struct dx_path *p, struct dx_entry *entry)
{
- entry->hash = cpu_to_le32(value);
++ return le32_to_cpu(*(u32 *)entry_off(entry, p->dp_param->dpo_key_size))
++ & 0x00ffffff;
++}
++
++static inline void dx_set_block(struct dx_path *p,
++ struct dx_entry *entry, unsigned value)
++{
+ *(u32*)entry_off(entry, p->dp_param->dpo_key_size) = cpu_to_le32(value);
+}
+
-+static inline void *dx_get_key(struct dx_path *p,
-+ struct dx_entry *entry, void *key)
++static inline struct dx_key *dx_get_key(struct dx_path *p,
++ struct dx_entry *entry,
++ struct dx_key *key)
+{
+ memcpy(key, entry, p->dp_param->dpo_key_size);
+ return key;
+}
+
++static inline struct dx_key *dx_key_at(struct dx_path *p,
++ struct dx_entry *entry)
++{
++ return (struct dx_key *)entry;
++}
++
+static inline void dx_set_key(struct dx_path *p,
-+ struct dx_entry *entry, void *key)
++ struct dx_entry *entry, struct dx_key *key)
+{
+ memcpy(entry, key, p->dp_param->dpo_key_size);
}
static inline unsigned dx_get_count (struct dx_entry *entries)
-@@ -231,17 +303,123 @@ static inline void dx_set_limit (struct
+@@ -231,17 +326,163 @@ static inline void dx_set_limit (struct
((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
}
-static inline unsigned dx_root_limit (struct inode *dir, unsigned infosize)
+static inline unsigned dx_root_limit(struct dx_path *p)
- {
-- unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
-- EXT3_DIR_REC_LEN(2) - infosize;
-- return 0? 20: entry_space / sizeof(struct dx_entry);
++{
+ struct dx_param *param = p->dp_param;
+ unsigned entry_space = p->dp_object->i_sb->s_blocksize -
+ param->dpo_root_gap;
+ frame->bh->b_data, frame == path->dp_frames);
+}
+
++static inline struct dx_key *keycpy(struct dx_path *p,
++ struct dx_key *k1, struct dx_key *k2)
++{
++ return memcpy(k1, k2, p->dp_param->dpo_key_size);
++}
++
++static inline int keycmp(struct dx_path *p,
++ struct dx_key *k1, struct dx_key *k2)
++{
++ return p->dp_param->dpo_keycmp(p, k1, k2);
++}
++
++static int dx_node_check(struct dx_path *p, struct dx_frame *f)
++{
++ struct dx_entry *e;
++ unsigned count;
++ unsigned i;
++
++ e = dx_node_get_entries(p, f);
++ count = dx_get_count(e);
++ e = dx_entry_shift(p, e, 1);
++ for (i = 0; i < count - 1; ++i, e = dx_entry_shift(p, e, 1)) {
++ keycpy(p, p->dp_key_scratch[0], p->dp_key_scratch[1]);
++ dx_get_key(p, e, p->dp_key_scratch[1]);
++ if (i > 0 &&
++ keycmp(p, p->dp_key_scratch[0], p->dp_key_scratch[1]) > 0)
++ return 0;
++ }
++ return 1;
++}
++
+static u32 htree_root_ptr(struct dx_path *path)
+{
+ return 0;
+ if (hc->dentry)
+ ext3fs_dirhash(hc->dentry->d_name.name,
+ hc->dentry->d_name.len, hc->hinfo);
-+ path->dp_key_target = &hc->hinfo->hash;
++ path->dp_key_target = (struct dx_key *)&hc->hinfo->hash;
+ } else {
+ /* non-root index */
+ assert(entries == data + path->dp_param->dpo_node_gap);
+ }
+ frame->entries = frame->at = entries;
+ return 0;
- }
-
--static inline unsigned dx_node_limit (struct inode *dir)
++}
++
+static int htree_node_init(struct dx_path *path,
+ struct buffer_head *bh, int root)
{
-- unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
-- return 0? 22: entry_space / sizeof(struct dx_entry);
+- unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(1) -
+- EXT3_DIR_REC_LEN(2) - infosize;
+- return 0? 20: entry_space / sizeof(struct dx_entry);
+ struct dx_node *node;
+
+ assert(!root);
+ return 0;
}
+-static inline unsigned dx_node_limit (struct inode *dir)
++static int htree_keycmp(struct dx_path *path,
++ struct dx_key *k1, struct dx_key *k2)
+ {
+- unsigned entry_space = dir->i_sb->s_blocksize - EXT3_DIR_REC_LEN(0);
+- return 0? 22: entry_space / sizeof(struct dx_entry);
++ __u32 p1 = le32_to_cpu(*(__u32 *)k1);
++ __u32 p2 = le32_to_cpu(*(__u32 *)k2);
++
++ return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
+ }
+
/*
-@@ -327,123 +505,101 @@ struct stats dx_show_entries(struct dx_h
+@@ -327,123 +568,105 @@ struct stats dx_show_entries(struct dx_h
}
#endif /* DX_DEBUG */
+ if (err != 0)
+ break;
+
++ assert(dx_node_check(path, frame));
++
+ entries = frame->entries;
count = dx_get_count(entries);
- assert (count && count <= dx_get_limit(entries));
dxtrace(printk("."));
- if (dx_get_hash(m) > hash)
- q = m - 1;
-+ if (memcmp(dx_get_key(path, m, path->dp_key),
-+ path->dp_key_target,
-+ param->dpo_key_size) > 0)
++ if (keycmp(path, dx_key_at(path, m),
++ path->dp_key_target) > 0)
+ q = dx_entry_shift(path, m, -1);
else
- p = m + 1;
- {
- at--;
+ at = dx_entry_shift(path, at, +1);
-+ if (memcmp(dx_get_key(path, at, path->dp_key),
-+ path->dp_key_target,
-+ param->dpo_key_size) > 0) {
++ if (keycmp(path, dx_key_at(path, at),
++ path->dp_key_target) > 0) {
++ if (at != dx_entry_shift(path, frame->at, 1)) {
++ BREAKPOINT;
++ printk(KERN_EMERG "%i\n",
++ keycmp(path, dx_key_at(path, at),
++ path->dp_key_target));
++ }
+ at = dx_entry_shift(path, at, -1);
break;
}
+ struct dx_hash_info *hinfo, struct dx_path *path)
+{
+ int err;
-+ __u32 hash_storage;
+ struct htree_cookie hc = {
+ .dentry = dentry,
+ .hinfo = hinfo
+ };
+
+ assert(dx_index_is_compat(path));
-+ path->dp_key = &hash_storage;
+ err = dx_lookup(path, &hc);
+ assert(err != 0 || path->dp_frames[path->dp_indirect].bh != NULL);
+ return err;
}
static inline void dx_path_init(struct dx_path *path, struct inode *inode)
-@@ -458,8 +614,10 @@ static inline void dx_path_fini(struct d
+@@ -458,11 +681,24 @@ static inline void dx_path_fini(struct d
int i;
- for (i = 0; i < ARRAY_SIZE(path->dp_frames); i--) {
+ for (i = 0; i < ARRAY_SIZE(path->dp_frames); i++) {
- if (path->dp_frames[i].bh != NULL)
+ if (path->dp_frames[i].bh != NULL) {
brelse(path->dp_frames[i].bh);
}
}
-@@ -488,6 +646,8 @@ static int ext3_htree_next_block(struct
++static void dx_path_compat_init(struct dx_path_compat *path,
++ struct inode *inode)
++{
++ int i;
++ dx_path_init(&path->dpc_path, inode);
++ path->dpc_path.dp_param = &htree_compat_param;
++ for (i = 0; i < ARRAY_SIZE(path->dpc_path.dp_key_scratch); ++i)
++ path->dpc_path.dp_key_scratch[i] =
++ (struct dx_key *)&path->dpc_scrach[i];
++}
++
+ /*
+ * This function increments the frame pointer to search the next leaf
+ * block, and reads in the necessary intervening nodes if the search
+@@ -488,6 +724,8 @@ static int ext3_htree_next_block(struct
int err, num_frames = 0;
__u32 bhash;
p = path->dp_frame;
/*
* Find the next leaf page by incrementing the frame pointer.
-@@ -497,7 +657,9 @@ static int ext3_htree_next_block(struct
+@@ -497,7 +735,9 @@ static int ext3_htree_next_block(struct
* nodes need to be read.
*/
while (1) {
break;
if (p == path->dp_frames)
return 0;
-@@ -512,7 +674,7 @@ static int ext3_htree_next_block(struct
+@@ -512,7 +752,7 @@ static int ext3_htree_next_block(struct
* desired contiuation hash. If it doesn't, return since
* there's no point to read in the successive index pages.
*/
- bhash = dx_get_hash(p->at);
-+ dx_get_key(path, p->at, &bhash);
++ dx_get_key(path, p->at, (struct dx_key *)&bhash);
if (start_hash)
*start_hash = bhash;
if ((hash & 1) == 0) {
-@@ -524,12 +686,13 @@ static int ext3_htree_next_block(struct
+@@ -524,12 +764,14 @@ static int ext3_htree_next_block(struct
* block so no check is necessary
*/
while (num_frames--) {
p->bh = bh;
- p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
+ p->at = p->entries = dx_node_get_entries(path, p);
++ assert(dx_node_check(path, p));
}
return 1;
}
-@@ -609,6 +772,7 @@ int ext3_htree_fill_tree(struct file *di
+@@ -598,7 +840,8 @@ int ext3_htree_fill_tree(struct file *di
+ {
+ struct dx_hash_info hinfo;
+ struct ext3_dir_entry_2 *de;
+- struct dx_path path;
++ struct dx_path_compat cpath;
++ struct dx_path *path = &cpath.dpc_path;
+ struct inode *dir;
+ int block, err;
+ int count = 0;
+@@ -608,7 +851,7 @@ int ext3_htree_fill_tree(struct file *di
+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
start_minor_hash));
dir = dir_file->f_dentry->d_inode;
- dx_path_init(&path, dir);
-+ path.dp_param = &htree_compat_param;
+- dx_path_init(&path, dir);
++ dx_path_compat_init(&cpath, dir);
if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
-@@ -619,7 +783,8 @@ int ext3_htree_fill_tree(struct file *di
+@@ -619,12 +862,13 @@ int ext3_htree_fill_tree(struct file *di
}
hinfo.hash = start_hash;
hinfo.minor_hash = 0;
- if (!dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path, &err))
-+ err = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path);
++ err = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, path);
+ if (err != 0)
return err;
/* Add '.' and '..' from the htree header */
-@@ -634,7 +799,7 @@ int ext3_htree_fill_tree(struct file *di
+ if (!start_hash && !start_minor_hash) {
+- de = (struct ext3_dir_entry_2 *) path.dp_frames[0].bh->b_data;
++ de = (struct ext3_dir_entry_2 *) path->dp_frames[0].bh->b_data;
+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+ goto errout;
+ de = ext3_next_entry(de);
+@@ -634,7 +878,7 @@ int ext3_htree_fill_tree(struct file *di
}
while (1) {
- block = dx_get_block(path.dp_frame->at);
-+ block = dx_get_block(&path, path.dp_frame->at);
++ block = dx_get_block(path, path->dp_frame->at);
ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
start_hash, start_minor_hash);
if (ret < 0) {
-@@ -722,17 +887,19 @@ static void dx_sort_map (struct dx_map_e
+@@ -643,7 +887,8 @@ int ext3_htree_fill_tree(struct file *di
+ }
+ count += ret;
+ hashval = ~0;
+- ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, &path, &hashval);
++ ret = ext3_htree_next_block(dir,
++ HASH_NB_ALWAYS, path, &hashval);
+ *next_hash = hashval;
+ if (ret < 0) {
+ err = ret;
+@@ -658,12 +903,12 @@ int ext3_htree_fill_tree(struct file *di
+ (count && ((hashval & 1) == 0)))
+ break;
+ }
+- dx_path_fini(&path);
++ dx_path_fini(path);
+ dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+ count, *next_hash));
+ return count;
+ errout:
+- dx_path_fini(&path);
++ dx_path_fini(path);
+ return (err);
+ }
+
+@@ -722,17 +967,19 @@ static void dx_sort_map (struct dx_map_e
} while(more);
}
+ assert(old < dx_entry_shift(path, entries, count));
+ memmove(dx_entry_shift(path, new, 1), new,
+ (char *)dx_entry_shift(path, entries, count) - (char *)new);
-+ dx_set_key(path, new, &hash);
++ dx_set_key(path, new, (struct dx_key *)&hash);
+ dx_set_block(path, new, block);
dx_set_count(entries, count + 1);
}
#endif
-@@ -934,7 +1101,9 @@ static struct buffer_head * ext3_dx_find
+@@ -933,8 +1180,11 @@ static struct buffer_head * ext3_dx_find
+ struct super_block * sb;
struct dx_hash_info hinfo;
u32 hash;
- struct dx_path path;
+- struct dx_path path;
- struct dx_entry dummy_dot;
++ struct dx_path_compat cpath;
++ struct dx_path *path = &cpath.dpc_path;
+ struct dx_entry_compat dummy_dot = {
+ .block = 0
+ };
struct ext3_dir_entry_2 *de, *top;
struct buffer_head *bh;
unsigned long block;
-@@ -944,19 +1113,21 @@ static struct buffer_head * ext3_dx_find
+@@ -943,20 +1193,21 @@ static struct buffer_head * ext3_dx_find
+ const u8 *name = dentry->d_name.name;
struct inode *dir = dentry->d_parent->d_inode;
- dx_path_init(&path, dir);
-+ path.dp_param = &htree_compat_param;
+- dx_path_init(&path, dir);
++ dx_path_compat_init(&cpath, dir);
+
sb = dir->i_sb;
/* NFS may look up ".." - look at dx_root directory block */
if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
- if (!(dx_probe(dentry, NULL, &hinfo, &path, err)))
-+ *err = dx_probe(dentry, NULL, &hinfo, &path);
++ *err = dx_probe(dentry, NULL, &hinfo, path);
+ if (*err != 0)
return NULL;
} else {
- path.dp_frame->bh = NULL; /* for dx_path_fini() */
- path.dp_frame->at = &dummy_dot; /* hack for zero entry*/
- dx_set_block(path.dp_frame->at, 0); /* dx_root block is 0 */
-+ path.dp_frame->bh = NULL; /* for dx_path_fini() */
-+ path.dp_frame->at = (void *)&dummy_dot; /* hack for zero entry*/
++ path->dp_frame->bh = NULL; /* for dx_path_fini() */
++ path->dp_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
}
hash = hinfo.hash;
do {
- block = dx_get_block(path.dp_frame->at);
-+ block = dx_get_block(&path, path.dp_frame->at);
++ block = dx_get_block(path, path->dp_frame->at);
if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
goto errout;
de = (struct ext3_dir_entry_2 *) bh->b_data;
-@@ -1115,10 +1286,11 @@ static struct ext3_dir_entry_2* dx_pack_
+@@ -972,12 +1223,12 @@ static struct buffer_head * ext3_dx_find
+ goto errout;
+ }
+ *res_dir = de;
+- dx_path_fini(&path);
++ dx_path_fini(path);
+ return bh;
+ }
+ brelse (bh);
+ /* Check to see if we should continue to search */
+- retval = ext3_htree_next_block(dir, hash, &path, NULL);
++ retval = ext3_htree_next_block(dir, hash, path, NULL);
+ if (retval < 0) {
+ ext3_warning(sb, __FUNCTION__,
+ "error reading index page in directory #%lu",
+@@ -990,7 +1241,7 @@ static struct buffer_head * ext3_dx_find
+ *err = -ENOENT;
+ errout:
+ dxtrace(printk("%s not found\n", name));
+- dx_path_fini(&path);
++ dx_path_fini(path);
+ return NULL;
+ }
+ #endif
+@@ -1115,10 +1366,11 @@ static struct ext3_dir_entry_2* dx_pack_
/* Allocate new node, and split leaf node @bh into it, inserting new pointer
* into parent node identified by @frame */
unsigned blocksize = dir->i_sb->s_blocksize;
unsigned count, continued;
struct buffer_head *bh2;
-@@ -1180,7 +1352,7 @@ static struct ext3_dir_entry_2 *do_split
+@@ -1180,7 +1432,7 @@ static struct ext3_dir_entry_2 *do_split
swap(*bh, bh2);
de = de2;
}
err = ext3_journal_dirty_metadata (handle, bh2);
if (err)
goto journal_error;
-@@ -1315,6 +1487,7 @@ static int make_indexed_dir(handle_t *ha
+@@ -1303,7 +1555,8 @@ static int make_indexed_dir(handle_t *ha
+ int namelen = dentry->d_name.len;
+ struct buffer_head *bh2;
+ struct dx_root *root;
+- struct dx_path path;
++ struct dx_path_compat cpath;
++ struct dx_path *path = &cpath.dpc_path;
+ struct dx_entry *entries;
+ struct ext3_dir_entry_2 *de, *de2;
+ char *data1, *top;
+@@ -1314,7 +1567,7 @@ static int make_indexed_dir(handle_t *ha
+ u32 block;
struct fake_dirent *fde;
- dx_path_init(&path, dir);
-+ path.dp_param = &htree_compat_param;
+- dx_path_init(&path, dir);
++ dx_path_compat_init(&cpath, dir);
blocksize = dir->i_sb->s_blocksize;
dxtrace(printk("Creating index\n"));
retval = ext3_journal_get_write_access(handle, bh);
-@@ -1350,10 +1523,10 @@ static int make_indexed_dir(handle_t *ha
+@@ -1350,21 +1603,21 @@ static int make_indexed_dir(handle_t *ha
root->info.info_length = sizeof(root->info);
root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
root->info.hash_version = DX_HASH_R5;
- entries = root->entries;
- dx_set_block (entries, 1);
+ entries = (void *)root->entries;
-+ dx_set_block (&path, entries, 1);
++ dx_set_block (path, entries, 1);
dx_set_count (entries, 1);
- dx_set_limit (entries, dx_root_limit(dir, sizeof(root->info)));
-+ dx_set_limit (entries, dx_root_limit(&path));
++ dx_set_limit (entries, dx_root_limit(path));
/* Initialize as for dx_probe */
hinfo.hash_version = root->info.hash_version;
-@@ -1363,7 +1536,7 @@ static int make_indexed_dir(handle_t *ha
- path.dp_frame->at = entries;
- path.dp_frame->bh = bh;
+ hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+ ext3fs_dirhash(name, namelen, &hinfo);
+- path.dp_frame->entries = entries;
+- path.dp_frame->at = entries;
+- path.dp_frame->bh = bh;
++ path->dp_frame->entries = entries;
++ path->dp_frame->at = entries;
++ path->dp_frame->bh = bh;
bh = bh2;
- de = do_split(handle,dir, &bh, path.dp_frame, &hinfo, &retval);
-+ de = do_split(handle, &path, &bh, path.dp_frame, &hinfo, &retval);
- dx_path_fini(&path);
+- dx_path_fini(&path);
++ de = do_split(handle, path, &bh, path->dp_frame, &hinfo, &retval);
++ dx_path_fini(path);
if (!de)
return retval;
-@@ -1446,8 +1619,8 @@ static int ext3_dx_add_entry(handle_t *h
+
+@@ -1445,9 +1698,10 @@ static int ext3_add_entry (handle_t *han
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
struct inode *inode)
{
- struct dx_path path;
+- struct dx_path path;
++ struct dx_path_compat cpath;
++ struct dx_path *path = &cpath.dpc_path;
+ struct dx_param *param;
struct dx_frame *frame, *safe;
- struct dx_node *node2;
struct dx_entry *entries; /* old block contents */
struct dx_entry *entries2; /* new block contents */
struct dx_hash_info hinfo;
-@@ -1463,7 +1636,10 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -1462,16 +1716,20 @@ static int ext3_dx_add_entry(handle_t *h
+ int i;
size_t isize;
- dx_path_init(&path, dir);
+- dx_path_init(&path, dir);
- if (!dx_probe(dentry, NULL, &hinfo, &path, &err))
-+ param = path.dp_param = &htree_compat_param;
++ dx_path_compat_init(&cpath, dir);
++ param = path->dp_param;
+
-+ err = dx_probe(dentry, NULL, &hinfo, &path);
++ err = dx_probe(dentry, NULL, &hinfo, path);
+ if (err != 0)
return err;
- frame = path.dp_frame;
+- frame = path.dp_frame;
++ frame = path->dp_frame;
entries = frame->entries;
-@@ -1471,7 +1647,8 @@ static int ext3_dx_add_entry(handle_t *h
+
/* XXX nikita: global serialization! */
isize = dir->i_size;
- if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+ if (!(bh = ext3_bread(handle, dir,
-+ dx_get_block(&path, frame->at), 0, &err)))
++ dx_get_block(path, frame->at), 0, &err)))
goto cleanup;
BUFFER_TRACE(bh, "get_write_access");
-@@ -1519,12 +1696,9 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -1503,7 +1761,7 @@ static int ext3_dx_add_entry(handle_t *h
+ dx_get_count(entries), dx_get_limit(entries)));
+
+ /* What levels need split? */
+- for (nr_splet = 0; frame >= path.dp_frames &&
++ for (nr_splet = 0; frame >= path->dp_frames &&
+ dx_get_count(frame->entries) == dx_get_limit(frame->entries);
+ --frame, ++nr_splet) {
+ if (nr_splet == DX_MAX_TREE_HEIGHT) {
+@@ -1519,19 +1777,16 @@ static int ext3_dx_add_entry(handle_t *h
* transaction... */
for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
- if (!bh_new[i])
+ if (!bh_new[i] ||
-+ param->dpo_node_init(&path, bh_new[i], 0) != 0)
++ param->dpo_node_init(path, bh_new[i], 0) != 0)
goto cleanup;
- node2 = (struct dx_node *)(bh_new[i]->b_data);
- entries2 = node2->entries;
BUFFER_TRACE(frame->bh, "get_write_access");
err = ext3_journal_get_write_access(handle, frame->bh);
if (err)
-@@ -1545,11 +1719,10 @@ static int ext3_dx_add_entry(handle_t *h
+ goto journal_error;
+ }
+ /* Add "safe" node to transaction too */
+- if (safe + 1 != path.dp_frames) {
++ if (safe + 1 != path->dp_frames) {
+ err = ext3_journal_get_write_access(handle, safe->bh);
+ if (err)
+ goto journal_error;
+@@ -1545,13 +1800,12 @@ static int ext3_dx_add_entry(handle_t *h
entries = frame->entries;
count = dx_get_count(entries);
- idx = frame->at - entries;
-+ idx = dx_entry_diff(&path, frame->at, entries);
++ idx = dx_entry_diff(path, frame->at, entries);
bh2 = bh_new[i];
- node2 = (struct dx_node *)(bh2->b_data);
- entries2 = node2->entries;
-+ entries2 = dx_get_entries(&path, bh2->b_data, 0);
++ entries2 = dx_get_entries(path, bh2->b_data, 0);
- if (frame == path.dp_frames) {
+- if (frame == path.dp_frames) {
++ if (frame == path->dp_frames) {
/* splitting root node. Tricky point:
-@@ -1571,19 +1744,19 @@ static int ext3_dx_add_entry(handle_t *h
+ *
+ * In the "normal" B-tree we'd split root *and* add
+@@ -1566,27 +1820,29 @@ static int ext3_dx_add_entry(handle_t *h
+ u8 indirects;
+ struct dx_frame *frames;
+
+- frames = path.dp_frames;
++ frames = path->dp_frames;
+ root = (struct dx_root *) frames->bh->b_data;
indirects = root->info.indirect_levels;
dxtrace(printk("Creating new root %d\n", indirects));
memcpy((char *) entries2, (char *) entries,
- count * sizeof(struct dx_entry));
- dx_set_limit(entries2, dx_node_limit(dir));
-+ count * dx_entry_size(&path));
-+ dx_set_limit(entries2, dx_node_limit(&path));
++ count * dx_entry_size(path));
++ dx_set_limit(entries2, dx_node_limit(path));
/* Set up root */
dx_set_count(entries, 1);
- dx_set_block(entries + 0, newblock[i]);
-+ dx_set_block(&path, entries, newblock[i]);
++ dx_set_block(path, entries, newblock[i]);
root->info.indirect_levels = indirects + 1;
/* Shift frames in the path */
memmove(frames + 2, frames + 1,
- (sizeof path.dp_frames) - 2 * sizeof frames[0]);
+- (sizeof path.dp_frames) - 2 * sizeof frames[0]);
++ (sizeof path->dp_frames) - 2 * sizeof frames[0]);
/* Add new access path frame */
- frames[1].at = entries2 + idx;
-+ frames[1].at = dx_entry_shift(&path, entries2, idx);
++ frames[1].at = dx_entry_shift(path, entries2, idx);
frames[1].entries = entries = entries2;
frames[1].bh = bh2;
++ assert(dx_node_check(path, frame));
++ frame;
-@@ -1594,23 +1767,30 @@ static int ext3_dx_add_entry(handle_t *h
++ assert(dx_node_check(path, frame));
+ bh_new[i] = NULL; /* buffer head is "consumed" */
+ err = ext3_journal_get_write_access(handle, bh2);
+ if (err)
+@@ -1594,23 +1850,32 @@ static int ext3_dx_add_entry(handle_t *h
} else {
/* splitting non-root index node. */
unsigned count1 = count/2, count2 = count - count1;
- unsigned hash2 = dx_get_hash(entries + count1);
+ unsigned hash2;
+
-+ dx_get_key(&path,
-+ dx_entry_shift(&path, entries, count1),
-+ &hash2);
++ dx_get_key(path,
++ dx_entry_shift(path, entries, count1),
++ (struct dx_key *)&hash2);
+
dxtrace(printk("Split index %i/%i\n", count1, count2));
- memcpy ((char *) entries2, (char *) (entries + count1),
- count2 * sizeof(struct dx_entry));
+ memcpy ((char *) entries2,
-+ (char *) dx_entry_shift(&path, entries, count1),
-+ count2 * dx_entry_size(&path));
++ (char *) dx_entry_shift(path, entries, count1),
++ count2 * dx_entry_size(path));
dx_set_count (entries, count1);
dx_set_count (entries2, count2);
- dx_set_limit (entries2, dx_node_limit(dir));
-+ dx_set_limit (entries2, dx_node_limit(&path));
++ dx_set_limit (entries2, dx_node_limit(path));
/* Which index block gets the new entry? */
if (idx >= count1) {
- frame->at = entries2 + idx - count1;
-+ frame->at = dx_entry_shift(&path, entries2,
++ frame->at = dx_entry_shift(path, entries2,
+ idx - count1);
frame->entries = entries = entries2;
swap(frame->bh, bh2);
bh_new[i] = bh2;
}
- dx_insert_block (frame - 1, hash2, newblock[i]);
-+ dx_insert_block(&path, frame - 1, hash2, newblock[i]);
++ dx_insert_block(path, frame - 1, hash2, newblock[i]);
++ assert(dx_node_check(path, frame));
++ assert(dx_node_check(path, frame - 1));
dxtrace(dx_show_index ("node", frame->entries));
dxtrace(dx_show_index ("node",
((struct dx_node *) bh2->b_data)->entries));
-@@ -1619,7 +1799,7 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -1619,9 +1884,10 @@ static int ext3_dx_add_entry(handle_t *h
goto journal_error;
}
}
- de = do_split(handle, dir, &bh, --frame, &hinfo, &err);
-+ de = do_split(handle, &path, &bh, --frame, &hinfo, &err);
++ de = do_split(handle, path, &bh, --frame, &hinfo, &err);
if (!de)
goto cleanup;
++ assert(dx_node_check(path, frame));
err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+ goto cleanup2;
+
+@@ -1637,7 +1903,7 @@ cleanup2:
+ }
+ if (err)
+ inode->i_size = isize;
+- dx_path_fini(&path);
++ dx_path_fini(path);
+ return err;
+ }
+ #endif
--- /dev/null
+Index: iam-src/fs/ext3/namei.c
+===================================================================
+--- iam-src.orig/fs/ext3/namei.c 2006-02-09 20:44:02.000000000 +0300
++++ iam-src/fs/ext3/namei.c 2006-02-10 18:23:32.000000000 +0300
+@@ -147,6 +147,15 @@ struct dx_map_entry
+ u32 offs;
+ };
+
++/*
++ * Structure to keep track of a path drilled through htree.
++ */
++struct dx_path {
++ struct inode *dp_object;
++ struct dx_frame dp_frames[DX_MAX_TREE_HEIGHT];
++ struct dx_frame *dp_frame;
++};
++
+ #ifdef CONFIG_EXT3_INDEX
+ static inline unsigned dx_get_block (struct dx_entry *entry);
+ static void dx_set_block (struct dx_entry *entry, unsigned value);
+@@ -161,9 +170,8 @@ static unsigned dx_node_limit (struct in
+ static struct dx_frame *dx_probe(struct dentry *dentry,
+ struct inode *dir,
+ struct dx_hash_info *hinfo,
+- struct dx_frame *frame,
++ struct dx_path *path,
+ int *err);
+-static void dx_release (struct dx_frame *frames);
+ static int dx_make_map (struct ext3_dir_entry_2 *de, int size,
+ struct dx_hash_info *hinfo, struct dx_map_entry map[]);
+ static void dx_sort_map(struct dx_map_entry *map, unsigned count);
+@@ -172,9 +180,7 @@ static struct ext3_dir_entry_2 *dx_move_
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+ static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+- struct dx_frame *frame,
+- struct dx_frame *frames,
+- __u32 *start_hash);
++ struct dx_path *path, __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+ struct ext3_dir_entry_2 **res_dir, int *err);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+@@ -332,13 +338,13 @@ struct stats dx_show_entries(struct dx_h
+ */
+ static struct dx_frame *
+ dx_probe(struct dentry *dentry, struct inode *dir,
+- struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
++ struct dx_hash_info *hinfo, struct dx_path *path, int *err)
+ {
+ unsigned count, indirect;
+ struct dx_entry *at, *entries, *p, *q, *m;
+ struct dx_root *root;
+ struct buffer_head *bh;
+- struct dx_frame *frame = frame_in;
++ struct dx_frame *frame = path->dp_frames;
+ u32 hash;
+
+ frame->bh = NULL;
+@@ -352,8 +358,7 @@ dx_probe(struct dentry *dentry, struct i
+ root->info.hash_version != DX_HASH_R5 &&
+ root->info.hash_version != DX_HASH_LEGACY) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+- "Unrecognised inode hash code %d",
+- root->info.hash_version);
++ "Unrecognised inode hash code %d", root->info.hash_version);
+ brelse(bh);
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+@@ -424,7 +429,8 @@ dx_probe(struct dentry *dentry, struct i
+ frame->bh = bh;
+ frame->entries = entries;
+ frame->at = at;
+- if (!indirect--) return frame;
++ if (!indirect--)
++ return path->dp_frame = frame;
+ if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
+ goto fail2;
+ at = entries = ((struct dx_node *) bh->b_data)->entries;
+@@ -432,7 +438,7 @@ dx_probe(struct dentry *dentry, struct i
+ frame++;
+ }
+ fail2:
+- while (frame >= frame_in) {
++ while (frame >= path->dp_frames) {
+ brelse(frame->bh);
+ frame--;
+ }
+@@ -440,16 +446,20 @@ fail:
+ return NULL;
+ }
+
+-static void dx_release (struct dx_frame *frames)
++static inline void dx_path_init(struct dx_path *path, struct inode *inode)
+ {
+- int height;
++ memset(path, 0, sizeof *path);
++ path->dp_object = inode;
++ path->dp_frame = path->dp_frames;
++}
+
+- if (frames[0].bh == NULL)
+- return;
+- height = ((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels;
+- for (; height >= 0; height--) {
+- assert(frames[height].bh != NULL);
+- brelse(frames[height].bh);
++static inline void dx_path_fini(struct dx_path *path)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(path->dp_frames); i++) {
++ if (path->dp_frames[i].bh != NULL)
++ brelse(path->dp_frames[i].bh);
+ }
+ }
+
+@@ -471,16 +481,14 @@ static void dx_release (struct dx_frame
+ * hash of the next page.
+ */
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+- struct dx_frame *frame,
+- struct dx_frame *frames,
+- __u32 *start_hash)
++ struct dx_path *path, __u32 *start_hash)
+ {
+ struct dx_frame *p;
+ struct buffer_head *bh;
+ int err, num_frames = 0;
+ __u32 bhash;
+
+- p = frame;
++ p = path->dp_frame;
+ /*
+ * Find the next leaf page by incrementing the frame pointer.
+ * If we run out of entries in the interior node, loop around and
+@@ -491,10 +499,10 @@ static int ext3_htree_next_block(struct
+ while (1) {
+ if (++(p->at) < p->entries + dx_get_count(p->entries))
+ break;
+- if (p == frames)
++ if (p == path->dp_frames)
+ return 0;
+ num_frames++;
+- p--;
++ --p;
+ }
+
+ /*
+@@ -516,10 +524,9 @@ static int ext3_htree_next_block(struct
+ * block so no check is necessary
+ */
+ while (num_frames--) {
+- if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+- 0, &err)))
++ if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at), 0, &err)))
+ return err; /* Failure */
+- p++;
++ ++p;
+ brelse (p->bh);
+ p->bh = bh;
+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
+@@ -591,7 +598,7 @@ int ext3_htree_fill_tree(struct file *di
+ {
+ struct dx_hash_info hinfo;
+ struct ext3_dir_entry_2 *de;
+- struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
++ struct dx_path path;
+ struct inode *dir;
+ int block, err;
+ int count = 0;
+@@ -601,6 +608,7 @@ int ext3_htree_fill_tree(struct file *di
+ dxtrace(printk("In htree_fill_tree, start hash: %x:%x\n", start_hash,
+ start_minor_hash));
+ dir = dir_file->f_dentry->d_inode;
++ dx_path_init(&path, dir);
+ if (!(EXT3_I(dir)->i_flags & EXT3_INDEX_FL)) {
+ hinfo.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
+ hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+@@ -611,13 +619,12 @@ int ext3_htree_fill_tree(struct file *di
+ }
+ hinfo.hash = start_hash;
+ hinfo.minor_hash = 0;
+- frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+- if (!frame)
++ if (!dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, &path, &err))
+ return err;
+
+ /* Add '.' and '..' from the htree header */
+ if (!start_hash && !start_minor_hash) {
+- de = (struct ext3_dir_entry_2 *) frames[0].bh->b_data;
++ de = (struct ext3_dir_entry_2 *) path.dp_frames[0].bh->b_data;
+ if ((err = ext3_htree_store_dirent(dir_file, 0, 0, de)) != 0)
+ goto errout;
+ de = ext3_next_entry(de);
+@@ -627,7 +634,7 @@ int ext3_htree_fill_tree(struct file *di
+ }
+
+ while (1) {
+- block = dx_get_block(frame->at);
++ block = dx_get_block(path.dp_frame->at);
+ ret = htree_dirblock_to_tree(dir_file, dir, block, &hinfo,
+ start_hash, start_minor_hash);
+ if (ret < 0) {
+@@ -636,8 +643,7 @@ int ext3_htree_fill_tree(struct file *di
+ }
+ count += ret;
+ hashval = ~0;
+- ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+- frame, frames, &hashval);
++ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS, &path, &hashval);
+ *next_hash = hashval;
+ if (ret < 0) {
+ err = ret;
+@@ -652,12 +658,12 @@ int ext3_htree_fill_tree(struct file *di
+ (count && ((hashval & 1) == 0)))
+ break;
+ }
+- dx_release(frames);
++ dx_path_fini(&path);
+ dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+ count, *next_hash));
+ return count;
+ errout:
+- dx_release(frames);
++ dx_path_fini(&path);
+ return (err);
+ }
+
+@@ -927,7 +933,8 @@ static struct buffer_head * ext3_dx_find
+ struct super_block * sb;
+ struct dx_hash_info hinfo;
+ u32 hash;
+- struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
++ struct dx_path path;
++ struct dx_entry dummy_dot;
+ struct ext3_dir_entry_2 *de, *top;
+ struct buffer_head *bh;
+ unsigned long block;
+@@ -936,20 +943,20 @@ static struct buffer_head * ext3_dx_find
+ const u8 *name = dentry->d_name.name;
+ struct inode *dir = dentry->d_parent->d_inode;
+
++ dx_path_init(&path, dir);
+ sb = dir->i_sb;
+ /* NFS may look up ".." - look at dx_root directory block */
+ if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
+- if (!(frame = dx_probe(dentry, NULL, &hinfo, frames, err)))
++ if (!(dx_probe(dentry, NULL, &hinfo, &path, err)))
+ return NULL;
+ } else {
+- frame = frames;
+- frame->bh = NULL; /* for dx_release() */
+- frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
+- dx_set_block(frame->at, 0); /* dx_root block is 0 */
++ path.dp_frame->bh = NULL; /* for dx_path_fini() */
++ path.dp_frame->at = &dummy_dot; /* hack for zero entry*/
++ dx_set_block(path.dp_frame->at, 0); /* dx_root block is 0 */
+ }
+ hash = hinfo.hash;
+ do {
+- block = dx_get_block(frame->at);
++ block = dx_get_block(path.dp_frame->at);
+ if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+ goto errout;
+ de = (struct ext3_dir_entry_2 *) bh->b_data;
+@@ -965,13 +972,12 @@ static struct buffer_head * ext3_dx_find
+ goto errout;
+ }
+ *res_dir = de;
+- dx_release (frames);
++ dx_path_fini(&path);
+ return bh;
+ }
+ brelse (bh);
+ /* Check to see if we should continue to search */
+- retval = ext3_htree_next_block(dir, hash, frame,
+- frames, NULL);
++ retval = ext3_htree_next_block(dir, hash, &path, NULL);
+ if (retval < 0) {
+ ext3_warning(sb, __FUNCTION__,
+ "error reading index page in directory #%lu",
+@@ -984,7 +990,7 @@ static struct buffer_head * ext3_dx_find
+ *err = -ENOENT;
+ errout:
+ dxtrace(printk("%s not found\n", name));
+- dx_release (frames);
++ dx_path_fini(&path);
+ return NULL;
+ }
+ #endif
+@@ -1297,7 +1303,7 @@ static int make_indexed_dir(handle_t *ha
+ int namelen = dentry->d_name.len;
+ struct buffer_head *bh2;
+ struct dx_root *root;
+- struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
++ struct dx_path path;
+ struct dx_entry *entries;
+ struct ext3_dir_entry_2 *de, *de2;
+ char *data1, *top;
+@@ -1308,6 +1314,7 @@ static int make_indexed_dir(handle_t *ha
+ u32 block;
+ struct fake_dirent *fde;
+
++ dx_path_init(&path, dir);
+ blocksize = dir->i_sb->s_blocksize;
+ dxtrace(printk("Creating index\n"));
+ retval = ext3_journal_get_write_access(handle, bh);
+@@ -1352,14 +1359,13 @@ static int make_indexed_dir(handle_t *ha
+ hinfo.hash_version = root->info.hash_version;
+ hinfo.seed = EXT3_SB(dir->i_sb)->s_hash_seed;
+ ext3fs_dirhash(name, namelen, &hinfo);
+- frame = frames;
+- frame->entries = entries;
+- frame->at = entries;
+- frame->bh = bh;
++ path.dp_frame->entries = entries;
++ path.dp_frame->at = entries;
++ path.dp_frame->bh = bh;
+ bh = bh2;
+- de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
+- dx_release (frames);
+- if (!(de))
++ de = do_split(handle,dir, &bh, path.dp_frame, &hinfo, &retval);
++ dx_path_fini(&path);
++ if (!de)
+ return retval;
+
+ return add_dirent_to_buf(handle, dentry, inode, de, bh);
+@@ -1439,7 +1445,8 @@ static int ext3_add_entry (handle_t *han
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ struct inode *inode)
+ {
+- struct dx_frame frames[DX_MAX_TREE_HEIGHT] = {{0,},}, *frame, *safe;
++ struct dx_path path;
++ struct dx_frame *frame, *safe;
+ struct dx_node *node2;
+ struct dx_entry *entries; /* old block contents */
+ struct dx_entry *entries2; /* new block contents */
+@@ -1455,9 +1462,10 @@ static int ext3_dx_add_entry(handle_t *h
+ int i;
+ size_t isize;
+
+- frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
+- if (!frame)
++ dx_path_init(&path, dir);
++ if (!dx_probe(dentry, NULL, &hinfo, &path, &err))
+ return err;
++ frame = path.dp_frame;
+ entries = frame->entries;
+
+ /* XXX nikita: global serialization! */
+@@ -1495,7 +1503,7 @@ static int ext3_dx_add_entry(handle_t *h
+ dx_get_count(entries), dx_get_limit(entries)));
+
+ /* What levels need split? */
+- for (nr_splet = 0; frame >= frames &&
++ for (nr_splet = 0; frame >= path.dp_frames &&
+ dx_get_count(frame->entries) == dx_get_limit(frame->entries);
+ --frame, ++nr_splet) {
+ if (nr_splet == DX_MAX_TREE_HEIGHT) {
+@@ -1523,7 +1531,7 @@ static int ext3_dx_add_entry(handle_t *h
+ goto journal_error;
+ }
+ /* Add "safe" node to transaction too */
+- if (safe + 1 != frames) {
++ if (safe + 1 != path.dp_frames) {
+ err = ext3_journal_get_write_access(handle, safe->bh);
+ if (err)
+ goto journal_error;
+@@ -1543,7 +1551,7 @@ static int ext3_dx_add_entry(handle_t *h
+ node2 = (struct dx_node *)(bh2->b_data);
+ entries2 = node2->entries;
+
+- if (frame == frames) {
++ if (frame == path.dp_frames) {
+ /* splitting root node. Tricky point:
+ *
+ * In the "normal" B-tree we'd split root *and* add
+@@ -1556,7 +1564,9 @@ static int ext3_dx_add_entry(handle_t *h
+ */
+ struct dx_root *root;
+ u8 indirects;
++ struct dx_frame *frames;
+
++ frames = path.dp_frames;
+ root = (struct dx_root *) frames->bh->b_data;
+ indirects = root->info.indirect_levels;
+ dxtrace(printk("Creating new root %d\n", indirects));
+@@ -1571,7 +1581,7 @@ static int ext3_dx_add_entry(handle_t *h
+
+ /* Shift frames in the path */
+ memmove(frames + 2, frames + 1,
+- (sizeof frames) - 2 * sizeof frames[0]);
++ (sizeof path.dp_frames) - 2 * sizeof frames[0]);
+ /* Add new access path frame */
+ frames[1].at = entries2 + idx;
+ frames[1].entries = entries = entries2;
+@@ -1627,7 +1637,7 @@ cleanup2:
+ }
+ if (err)
+ inode->i_size = isize;
+- dx_release(frames);
++ dx_path_fini(&path);
+ return err;
+ }
+ #endif
--- /dev/null
+Index: iam-src/fs/ext3/hash.c
+===================================================================
+--- iam-src.orig/fs/ext3/hash.c 2006-02-11 01:08:59.000000000 +0300
++++ iam-src/fs/ext3/hash.c 2006-02-11 20:46:22.000000000 +0300
+@@ -4,7 +4,7 @@
+ * Copyright (C) 2002 by Theodore Ts'o
+ *
+ * This file is released under the GPL v2.
+- *
++ *
+ * This file may be redistributed under the terms of the GNU Public
+ * License.
+ */
+@@ -115,6 +115,18 @@ static __u32 dx_hack_hash (const char *n
+ return (hash0 << 1);
+ }
+
++static __u32 dx_r5_hash(const signed char *msg, int len)
++{
++ __u32 a = 0;
++ while (len--) {
++ a += *msg << 4;
++ a += *msg >> 4;
++ a *= 11;
++ msg++;
++ }
++ return a;
++}
++
+ static void str2hashbuf(const char *msg, int len, __u32 *buf, int num)
+ {
+ __u32 pad, val;
+@@ -146,11 +158,11 @@ static void str2hashbuf(const char *msg,
+ * Returns the hash of a filename. If len is 0 and name is NULL, then
+ * this function can be used to test whether or not a hash version is
+ * supported.
+- *
++ *
+ * The seed is an 4 longword (32 bits) "secret" which can be used to
+ * uniquify a hash. If the seed is all zero's, then some default seed
+ * may be used.
+- *
++ *
+ * A particular hash version specifies whether or not the seed is
+ * represented, and whether or not the returned hash is 32 bits or 64
+ * bits. 32 bit hashes will return 0 for the minor hash.
+@@ -205,6 +217,9 @@ int ext3fs_dirhash(const char *name, int
+ hash = buf[0];
+ minor_hash = buf[1];
+ break;
++ case DX_HASH_R5:
++ hash = dx_r5_hash(name, len);
++ break;
+ default:
+ hinfo->hash = 0;
+ return -1;
+Index: iam-src/fs/ext3/namei.c
+===================================================================
+--- iam-src.orig/fs/ext3/namei.c 2006-02-11 01:09:12.000000000 +0300
++++ iam-src/fs/ext3/namei.c 2006-02-11 20:45:58.000000000 +0300
+@@ -370,6 +370,7 @@ dx_probe(struct dentry *dentry, struct i
+ root = (struct dx_root *) bh->b_data;
+ if (root->info.hash_version != DX_HASH_TEA &&
+ root->info.hash_version != DX_HASH_HALF_MD4 &&
++ root->info.hash_version != DX_HASH_R5 &&
+ root->info.hash_version != DX_HASH_LEGACY) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Unrecognised inode hash code %d", root->info.hash_version);
+@@ -1363,6 +1364,7 @@ static int make_indexed_dir(handle_t *ha
+ memset (&root->info, 0, sizeof(root->info));
+ root->info.info_length = sizeof(root->info);
+ root->info.hash_version = EXT3_SB(dir->i_sb)->s_def_hash_version;
++ root->info.hash_version = DX_HASH_R5;
+ entries = root->entries;
+ dx_set_block (entries, 1);
+ dx_set_count (entries, 1);
+Index: iam-src/include/linux/ext3_fs.h
+===================================================================
+--- iam-src.orig/include/linux/ext3_fs.h 2006-02-11 01:08:59.000000000 +0300
++++ iam-src/include/linux/ext3_fs.h 2006-02-11 20:45:58.000000000 +0300
+@@ -665,6 +665,7 @@ struct ext3_dir_entry_2 {
+ #define DX_HASH_LEGACY 0
+ #define DX_HASH_HALF_MD4 1
+ #define DX_HASH_TEA 2
++#define DX_HASH_R5 3
+
+ /* hash info structure used by the directory hash */
+ struct dx_hash_info
--- /dev/null
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c 2006-04-23 22:05:38.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c 2006-04-23 22:22:58.000000000 +0800
+@@ -82,13 +82,16 @@
+ *
+ * Entries in index node are sorted by their key value.
+ *
++ * Format of leaf node:
+ *
+- *
+- *
+- *
+- *
+- *
+- *
++ * +-----+-------+-------+-------+------+-------+------------+
++ * | | count | | | | | |
++ * | gap | / | leaf | leaf | .... | leaf | free space |
++ * | | limit | | | | | |
++ * +-----+-------+-------+-------+------+-------+------------+
++
++ * leaf For leaf entry: consists of a rec immediately followd by
++ * a key. size of a key and size of a rec depends on container.
+ *
+ *
+ *
+@@ -241,6 +244,7 @@
+ };
+
+ /* leaf node reached by tree lookup */
++#define iam_leaf_entry iam_rec
+ struct iam_leaf {
+ struct buffer_head *bh;
+ struct iam_leaf_entry *entries;
+@@ -508,6 +512,11 @@
+ IAM_IT_ATTACHED
+ };
+
++struct htree_cookie {
++ struct dx_hash_info *hinfo;
++ struct dentry *dentry;
++};
++
+ /*
+ * Iterator.
+ *
+@@ -704,7 +713,7 @@
+ struct inode *inode);
+
+ static inline void iam_path_init(struct iam_path *path,
+- struct iam_container *c);
++ struct iam_container *c, struct htree_cookie *hc);
+ static inline void iam_path_fini(struct iam_path *path);
+
+
+@@ -865,11 +874,6 @@
+ return 0;
+ }
+
+-struct htree_cookie {
+- struct dx_hash_info *hinfo;
+- struct dentry *dentry;
+-};
+-
+ static int htree_node_check(struct iam_path *path, struct iam_frame *frame)
+ {
+ void *data;
+@@ -1171,11 +1175,13 @@
+ }
+ }
+
+-static inline void iam_path_init(struct iam_path *path, struct iam_container *c)
++static inline void iam_path_init(struct iam_path *path, struct iam_container *c,
++ struct htree_cookie *hc)
+ {
+ memset(path, 0, sizeof *path);
+ path->ip_container = c;
+ path->ip_frame = path->ip_frames;
++ path->ip_descr_data = hc;
+ }
+
+ static inline void iam_path_fini(struct iam_path *path)
+@@ -1201,7 +1207,7 @@
+ * iam_path_fini().
+ */
+ iput(inode);
+- iam_path_init(&path->ipc_path, &path->ipc_container);
++ iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
+ for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
+ path->ipc_path.ip_key_scratch[i] =
+ (struct iam_key *)&path->ipc_scrach[i];
+@@ -1213,6 +1219,382 @@
+ iam_container_fini(&path->ipc_container);
+ }
+
++static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
++{
++ int block, err;
++ struct buffer_head *bh;
++
++ block = dx_get_block(path, path->ip_frame->at);
++ err = path_descr(path)->id_node_read(path->ip_container, block,
++ NULL, &bh);
++ if (err)
++ return err;
++
++ leaf->bh = bh;
++ leaf->entries = (struct iam_leaf_entry *)bh->b_data;
++ return 0;
++}
++
++static void iam_leaf_fini(struct iam_leaf *leaf)
++{
++ if (leaf->bh)
++ brelse(leaf->bh);
++}
++
++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
++{
++ struct dx_hash_info hinfo;
++ struct iam_path_compat cpath;
++ struct iam_path *path = &cpath.ipc_path;
++ struct htree_cookie hc = {
++ .hinfo = &hinfo
++ };
++ int err, i;
++
++ iam_path_init(path, c, &hc);
++ for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++ path->ip_key_scratch[i] =
++ (struct iam_key *)&cpath.ipc_scrach[i];
++ err = dx_lookup(path);
++ do {
++ struct iam_leaf leaf;
++ err = iam_leaf_init(path, &leaf);
++ if (err)
++ goto errout;
++
++ for (path_descr(path)->id_leaf.start(c, &leaf);
++ !path_descr(path)->id_leaf.at_end(c, &leaf);
++ path_descr(path)->id_leaf.next(c, &leaf)) {
++ struct iam_key *key;
++
++ key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
++ path_descr(path)->id_leaf.key(c, &leaf, key);
++ if (keycmp(c, k, key) == 0) {
++ memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
++ path_descr(path)->id_rec_size);
++ iam_path_fini(path);
++ iam_leaf_fini(&leaf);
++ return 0;
++ }
++ }
++
++ iam_leaf_fini(&leaf);
++ /* Check to see if we should continue to search */
++ err = ext3_htree_next_block(c->ic_object, hinfo.hash, path, NULL);
++ if (err < 0)
++ goto errout;
++ } while (err == 1);
++errout:
++ iam_path_fini(path);
++ return(err);
++}
++
++static inline size_t iam_leaf_entry_size(struct iam_path *p)
++{
++ return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
++}
++
++static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
++ struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
++{
++ ptrdiff_t diff;
++
++ diff = (void *)e1 - (void *)e2;
++ assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
++ return diff / iam_leaf_entry_size(p);
++}
++
++static inline struct iam_leaf_entry*
++iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
++{
++ void *e = entry;
++ return e + shift * iam_leaf_entry_size(p);
++}
++
++static inline struct iam_key *
++dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
++{
++ memcpy(key, e, path_descr(p)->id_key_size);
++ return key;
++}
++
++static inline struct iam_key *
++iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
++{
++ void *e = entry;
++ return e + path_descr(p)->id_rec_size;
++}
++static inline struct iam_leaf_entry *
++iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
++{
++ return entry;
++}
++
++static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf,
++ struct iam_key *k)
++{
++ struct iam_leaf_entry *p, *q, *m;
++ struct iam_leaf_entry *entries = leaf->entries;
++ int count = dx_get_count((struct iam_entry *)entries);
++
++ p = iam_leaf_entry_shift(path, entries, 1);
++ q = iam_leaf_entry_shift(path, entries, count - 1);
++ while (p <= q) {
++ m = iam_leaf_entry_shift(path,
++ p, iam_leaf_entry_diff(path, q, p) / 2);
++ dxtrace(printk("."));
++ if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
++ path->ip_key_target) > 0)
++ q = iam_leaf_entry_shift(path, m, -1);
++ else
++ p = iam_leaf_entry_shift(path, m, +1);
++ }
++ leaf->at = q;
++ return 0;
++}
++
++/*XXX what kind of lock should this entry be locked: WangDi */
++static int iam_leaf_insert(handle_t *handle, struct iam_path *path,
++ struct iam_key *k, struct iam_rec *r)
++{
++ struct iam_leaf leaf;
++ struct iam_leaf_entry *p, *q;
++ int err, count;
++
++ err = iam_leaf_init(path, &leaf);
++ if (err)
++ goto errout;
++ path_descr(path)->id_leaf.start(path->ip_container, &leaf);
++ count = dx_get_count((struct iam_entry *)leaf.entries);
++ if (dx_get_count((struct iam_entry *)leaf.entries) >=
++ dx_get_limit((struct iam_entry *)leaf.entries)){
++ err = -ENOSPC;
++ goto errout;
++ }
++
++ err = iam_leaf_lookup(path, &leaf, k);
++ if (err)
++ goto errout;
++
++ /*insert the k/r to leaf entries*/
++ p = iam_leaf_entry_shift(path, leaf.at, 1);
++ q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
++ while (q < p) {
++ memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
++ q = iam_leaf_entry_shift(path, q, -1);
++ }
++ memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
++ memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
++
++ dx_set_count((struct iam_entry*)leaf.entries, count + 1);
++ err = ext3_journal_dirty_metadata(handle, leaf.bh);
++ if (err)
++ ext3_std_error(path->ip_container->ic_object->i_sb, err);
++errout:
++ iam_leaf_fini(&leaf);
++ return err;
++}
++
++static int split_leaf_node(handle_t *handle, struct iam_path *path)
++{
++ struct inode *dir = path_obj(path);
++ unsigned continued = 0;
++ struct buffer_head *bh2;
++ u32 newblock, hash_split;
++ char *data2;
++ struct iam_leaf leaf;
++ unsigned split;
++ int err;
++
++ bh2 = ext3_append (handle, dir, &newblock, &err);
++ if (!(bh2)) {
++ err = -ENOSPC;
++ goto errout;
++ }
++ err = iam_leaf_init(path, &leaf);
++ if (err)
++ goto errout;
++
++ BUFFER_TRACE(leaf.bh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, leaf.bh);
++ if (err) {
++ journal_error:
++ iam_leaf_fini(&leaf);
++ brelse(bh2);
++ ext3_std_error(dir->i_sb, err);
++ err = -EIO;
++ goto errout;
++ }
++ data2 = bh2->b_data;
++ split = dx_get_count((struct iam_entry*)leaf.entries)/2;
++ hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
++ if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
++ iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
++ continued = 1;
++
++ memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
++ iam_leaf_entry_shift(path, leaf.entries, split),
++ split * iam_leaf_entry_size(path));
++
++ /* Which block gets the new entry? */
++ dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
++ err = ext3_journal_dirty_metadata (handle, bh2);
++ if (err)
++ goto journal_error;
++ err = ext3_journal_dirty_metadata (handle, leaf.bh);
++ if (err)
++ goto journal_error;
++ brelse (bh2);
++ iam_leaf_fini(&leaf);
++errout:
++ return err;
++}
++
++static int split_index_node(handle_t *handle, struct iam_path *path);
++int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k,
++ struct iam_rec *r)
++{
++ struct dx_hash_info hinfo;
++ struct iam_path_compat cpath;
++ struct iam_path *path = &cpath.ipc_path;
++ struct htree_cookie hc = {
++ .hinfo = &hinfo
++ };
++ int err, i;
++
++ iam_path_init(path, c, &hc);
++ for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++ path->ip_key_scratch[i] =
++ (struct iam_key *)&cpath.ipc_scrach[i];
++ err = dx_lookup(path);
++ if (err)
++ goto errout;
++
++ err = iam_leaf_insert(handle, path, k, r);
++
++ if (err != -ENOSPC)
++ goto errout;
++
++ err = split_index_node(handle, path);
++ if (err)
++ goto errout;
++
++ err = split_leaf_node(handle, path);
++ if (err)
++ goto errout;
++
++ err = iam_leaf_insert(handle, path, k, r);
++errout:
++ iam_path_fini(path);
++ return(err);
++}
++
++static int iam_leaf_delete(handle_t *handle, struct iam_path *path,
++ struct iam_key *k)
++{
++ struct iam_leaf leaf;
++ struct iam_leaf_entry *p, *q;
++ int err, count;
++
++ err = iam_leaf_init(path, &leaf);
++ if (err)
++ goto errout;
++
++ err = iam_leaf_lookup(path, &leaf, k);
++ if (err)
++ goto errout;
++
++ count = dx_get_count((struct iam_entry*)leaf.entries);
++ /*delete the k to leaf entries*/
++ p = iam_leaf_entry_shift(path, leaf.at, 1);
++ q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
++ while (p < q) {
++ memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
++ p = iam_leaf_entry_shift(path, p, 1);
++ }
++ dx_set_count((struct iam_entry*)leaf.entries, count - 1);
++
++ err = ext3_journal_dirty_metadata(handle, leaf.bh);
++ if (err)
++ ext3_std_error(path_obj(path)->i_sb, err);
++errout:
++ iam_leaf_fini(&leaf);
++ return err;
++}
++
++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
++{
++ struct dx_hash_info hinfo;
++ struct iam_path_compat cpath;
++ struct iam_path *path = &cpath.ipc_path;
++ struct htree_cookie hc = {
++ .hinfo = &hinfo
++ };
++ int err, i;
++
++ iam_path_init(path, c, &hc);
++ for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++ path->ip_key_scratch[i] =
++ (struct iam_key *)&cpath.ipc_scrach[i];
++ err = dx_lookup(path);
++ if (err)
++ goto errout;
++
++ err = iam_leaf_delete(h, path, k);
++errout:
++ iam_path_fini(path);
++ return err;
++}
++
++static int iam_leaf_update(handle_t *handle, struct iam_path *path,
++ struct iam_key *k, struct iam_rec *r)
++{
++ struct iam_leaf leaf;
++ int err;
++
++ err = iam_leaf_init(path, &leaf);
++ if (err)
++ goto errout;
++
++ err = iam_leaf_lookup(path, &leaf, k);
++ if (err)
++ goto errout;
++
++ memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
++ memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
++
++ err = ext3_journal_dirty_metadata(handle, leaf.bh);
++ if (err)
++ ext3_std_error(path_obj(path)->i_sb, err);
++errout:
++ iam_leaf_fini(&leaf);
++ return err;
++}
++
++int iam_update(handle_t *h, struct iam_container *c,
++ struct iam_key *k, struct iam_rec *r)
++{
++ struct dx_hash_info hinfo;
++ struct iam_path_compat cpath;
++ struct iam_path *path = &cpath.ipc_path;
++ struct htree_cookie hc = {
++ .hinfo = &hinfo
++ };
++ int err, i;
++
++ iam_path_init(path, c, &hc);
++ for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
++ path->ip_key_scratch[i] =
++ (struct iam_key *)&cpath.ipc_scrach[i];
++ err = dx_lookup(path);
++ if (err)
++ goto errout;
++
++ err = iam_leaf_update(h, path, k, r);
++errout:
++ iam_path_fini(path);
++ return err;
++}
+ /*
+ * This function increments the frame pointer to search the next leaf
+ * block, and reads in the necessary intervening nodes if the search
+@@ -2213,59 +2595,21 @@
+ }
+
+ #ifdef CONFIG_EXT3_INDEX
+-/*
+- * Returns 0 for success, or a negative error value
+- */
+-static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+- struct inode *inode)
+-{
+- struct iam_path_compat cpath;
+- struct iam_path *path = &cpath.ipc_path;
+- struct iam_descr *param;
+- struct iam_frame *frame, *safe;
++static int split_index_node(handle_t *handle, struct iam_path *path)
++{
++
+ struct iam_entry *entries; /* old block contents */
+ struct iam_entry *entries2; /* new block contents */
+- struct dx_hash_info hinfo;
+- struct buffer_head * bh;
++ struct iam_frame *frame, *safe;
+ struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+- struct inode *dir = dentry->d_parent->d_inode;
+- struct super_block * sb = dir->i_sb;
+- struct ext3_dir_entry_2 *de;
+ u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
+- int err;
++ struct inode *dir = path_obj(path);
+ int nr_splet;
+- int i;
+- size_t isize;
++ int i, err;
+
+- iam_path_compat_init(&cpath, dir);
+- param = path_descr(path);
+-
+- err = dx_probe(dentry, NULL, &hinfo, path);
+- if (err != 0)
+- return err;
+ frame = path->ip_frame;
+ entries = frame->entries;
+
+- /* XXX nikita: global serialization! */
+- isize = dir->i_size;
+-
+- err = param->id_node_read(path->ip_container,
+- (iam_ptr_t)dx_get_block(path,
+- frame->at), handle, &bh);
+- if (err != 0)
+- goto cleanup;
+-
+- BUFFER_TRACE(bh, "get_write_access");
+- err = ext3_journal_get_write_access(handle, bh);
+- if (err)
+- goto journal_error;
+-
+- err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
+- if (err != -ENOSPC) {
+- bh = NULL;
+- goto cleanup;
+- }
+-
+ /*
+ * Tall-tree handling: we might have to split multiple index blocks
+ * all the way up to tree root. Tricky point here is error handling:
+@@ -2288,7 +2632,7 @@
+ dx_get_count(frame->entries) == dx_get_limit(frame->entries);
+ --frame, ++nr_splet) {
+ if (nr_splet == DX_MAX_TREE_HEIGHT) {
+- ext3_warning(sb, __FUNCTION__,
++ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Directory index full!\n");
+ err = -ENOSPC;
+ goto cleanup;
+@@ -2301,7 +2645,7 @@
+ for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
+ bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
+ if (!bh_new[i] ||
+- param->id_node_init(path->ip_container, bh_new[i], 0) != 0)
++ path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
+ goto cleanup;
+ BUFFER_TRACE(frame->bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, frame->bh);
+@@ -2407,23 +2751,81 @@
+ goto journal_error;
+ }
+ }
++ goto cleanup;
++journal_error:
++ ext3_std_error(dir->i_sb, err);
++
++cleanup:
++ for (i = 0; i < ARRAY_SIZE(bh_new); ++i) {
++ if (bh_new[i] != NULL)
++ brelse(bh_new[i]);
++ }
++ return err;
++}
++
++/*
++ * Returns 0 for success, or a negative error value
++ */
++static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
++ struct inode *inode)
++{
++ struct iam_path_compat cpath;
++ struct iam_path *path = &cpath.ipc_path;
++ struct iam_descr *param;
++ struct iam_frame *frame;
++ struct dx_hash_info hinfo;
++ struct buffer_head * bh = NULL;
++ struct inode *dir = dentry->d_parent->d_inode;
++ struct ext3_dir_entry_2 *de;
++ int err;
++ size_t isize;
++
++ iam_path_compat_init(&cpath, dir);
++ param = path_descr(path);
++
++ err = dx_probe(dentry, NULL, &hinfo, path);
++ if (err != 0)
++ return err;
++ frame = path->ip_frame;
++
++ /* XXX nikita: global serialization! */
++ isize = dir->i_size;
++
++ err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at),
++ handle, &bh);
++ if (err != 0)
++ goto cleanup;
++
++ BUFFER_TRACE(bh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, bh);
++ if (err)
++ goto journal_error;
++
++ err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
++ if (err != -ENOSPC) {
++ bh = NULL;
++ goto cleanup;
++ }
++
++ err = split_index_node(handle, path);
++ if (err)
++ goto cleanup;
++
++ /*copy split inode too*/
+ de = do_split(handle, path, &bh, --frame, &hinfo, &err);
+ if (!de)
+ goto cleanup;
++
+ assert(dx_node_check(path, frame));
+ err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+ goto cleanup2;
+
+ journal_error:
+- ext3_std_error(dir->i_sb, err);
++ ext3_std_error(dir->i_sb, err);
+ cleanup:
+ if (bh)
+ brelse(bh);
+ cleanup2:
+- for (i = 0; i < ARRAY_SIZE(bh_new); ++i) {
+- if (bh_new[i] != NULL)
+- brelse(bh_new[i]);
+- }
+ if (err)
+ inode->i_size = isize;
+ iam_path_fini(path);
--- /dev/null
+Index: linux-2.6.9/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/namei.c 2006-04-23 22:35:38.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c 2006-04-23 22:35:47.000000000 +0800
+@@ -48,6 +48,11 @@
+ #define NAMEI_RA_SIZE (NAMEI_RA_CHUNKS * NAMEI_RA_BLOCKS)
+ #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
+
++/*
++ * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
++ */
++#define DX_MAX_TREE_HEIGHT (5)
++
+ static struct buffer_head *ext3_append(handle_t *handle,
+ struct inode *inode,
+ u32 *block, int *err)
+@@ -75,7 +80,7 @@
+ #ifdef DX_DEBUG
+ #define dxtrace(command) command
+ #else
+-#define dxtrace(command)
++#define dxtrace(command)
+ #endif
+
+ struct fake_dirent
+@@ -168,7 +173,7 @@
+ static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+ struct dx_frame *frame,
+- struct dx_frame *frames,
++ struct dx_frame *frames,
+ __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+ struct ext3_dir_entry_2 **res_dir, int *err);
+@@ -249,7 +254,7 @@
+ }
+
+ struct stats
+-{
++{
+ unsigned names;
+ unsigned space;
+ unsigned bcount;
+@@ -367,7 +372,7 @@
+ goto fail;
+ }
+
+- if ((indirect = root->info.indirect_levels) > 1) {
++ if ((indirect = root->info.indirect_levels) > DX_MAX_TREE_HEIGHT - 1) {
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Unimplemented inode hash depth: %#06x",
+ root->info.indirect_levels);
+@@ -436,12 +441,15 @@
+
+ static void dx_release (struct dx_frame *frames)
+ {
++ int height;
++
+ if (frames[0].bh == NULL)
+ return;
+-
+- if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
+- brelse(frames[1].bh);
+- brelse(frames[0].bh);
++ height = ((struct dx_root *)frames[0].bh->b_data)->info.indirect_levels;
++ for (; height >= 0; height--) {
++ assert(frames[height].bh != NULL);
++ brelse(frames[height].bh);
++ }
+ }
+
+ /*
+@@ -463,7 +471,7 @@
+ */
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+ struct dx_frame *frame,
+- struct dx_frame *frames,
++ struct dx_frame *frames,
+ __u32 *start_hash)
+ {
+ struct dx_frame *p;
+@@ -582,7 +590,7 @@
+ {
+ struct dx_hash_info hinfo;
+ struct ext3_dir_entry_2 *de;
+- struct dx_frame frames[2], *frame;
++ struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
+ struct inode *dir;
+ int block, err;
+ int count = 0;
+@@ -627,7 +635,7 @@
+ }
+ count += ret;
+ hashval = ~0;
+- ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
++ ret = ext3_htree_next_block(dir, HASH_NB_ALWAYS,
+ frame, frames, &hashval);
+ *next_hash = hashval;
+ if (ret < 0) {
+@@ -644,7 +652,7 @@
+ break;
+ }
+ dx_release(frames);
+- dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
++ dxtrace(printk("Fill tree: returned %d entries, next hash: %x\n",
+ count, *next_hash));
+ return count;
+ errout:
+@@ -918,7 +926,7 @@
+ struct super_block * sb;
+ struct dx_hash_info hinfo;
+ u32 hash;
+- struct dx_frame frames[2], *frame;
++ struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
+ struct ext3_dir_entry_2 *de, *top;
+ struct buffer_head *bh;
+ unsigned long block;
+@@ -1037,7 +1045,7 @@
+ parent = ERR_PTR(-ENOMEM);
+ }
+ return parent;
+-}
++}
+
+ #define S_SHIFT 12
+ static unsigned char ext3_type_by_mode[S_IFMT >> S_SHIFT] = {
+@@ -1098,6 +1106,8 @@
+ return prev;
+ }
+
++/* Allocate new node, and split leaf node @bh into it, inserting new pointer
++ * into parent node identified by @frame */
+ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+ struct buffer_head **bh,struct dx_frame *frame,
+ struct dx_hash_info *hinfo, int *error)
+@@ -1185,7 +1195,7 @@
+ * add_dirent_to_buf will attempt search the directory block for
+ * space. It will return -ENOSPC if no space is available, and -EIO
+ * and -EEXIST if directory entry already exists.
+- *
++ *
+ * NOTE! bh is NOT released in the case where ENOSPC is returned. In
+ * all other cases bh is released.
+ */
+@@ -1286,7 +1296,7 @@
+ int namelen = dentry->d_name.len;
+ struct buffer_head *bh2;
+ struct dx_root *root;
+- struct dx_frame frames[2], *frame;
++ struct dx_frame frames[DX_MAX_TREE_HEIGHT], *frame;
+ struct dx_entry *entries;
+ struct ext3_dir_entry_2 *de, *de2;
+ char *data1, *top;
+@@ -1427,20 +1437,29 @@
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ struct inode *inode)
+ {
+- struct dx_frame frames[2], *frame;
+- struct dx_entry *entries, *at;
++ struct dx_frame frames[DX_MAX_TREE_HEIGHT] = {{0,},}, *frame, *safe;
++ struct dx_node *node2;
++ struct dx_entry *entries; /* old block contents */
++ struct dx_entry *entries2; /* new block contents */
+ struct dx_hash_info hinfo;
+ struct buffer_head * bh;
++ struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+ struct inode *dir = dentry->d_parent->d_inode;
+ struct super_block * sb = dir->i_sb;
+ struct ext3_dir_entry_2 *de;
++ u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
+ int err;
++ int nr_splet;
++ int i;
++ size_t isize;
+
+ frame = dx_probe(dentry, NULL, &hinfo, frames, &err);
+ if (!frame)
+ return err;
+ entries = frame->entries;
+- at = frame->at;
++
++ /* XXX nikita: global serialization! */
++ isize = dir->i_size;
+
+ if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
+ goto cleanup;
+@@ -1456,29 +1475,43 @@
+ goto cleanup;
+ }
+
++ /*
++ * Tall-tree handling: we might have to split multiple index blocks
++ * all the way up to tree root. Tricky point here is error handling:
++ * to avoid complicated undo/rollback we
++ *
++ * - first allocate all necessary blocks
++ *
++ * - insert pointers into them atomically.
++ *
++ * XXX nikita: this algorithm is *not* scalable, as it assumes that at
++ * least nodes in the path are locked.
++ */
++
+ /* Block full, should compress but for now just split */
+ dxtrace(printk("using %u of %u node entries\n",
+ dx_get_count(entries), dx_get_limit(entries)));
+- /* Need to split index? */
+- if (dx_get_count(entries) == dx_get_limit(entries)) {
+- u32 newblock;
+- unsigned icount = dx_get_count(entries);
+- int levels = frame - frames;
+- struct dx_entry *entries2;
+- struct dx_node *node2;
+- struct buffer_head *bh2;
+
+- if (levels && (dx_get_count(frames->entries) ==
+- dx_get_limit(frames->entries))) {
++ /* What levels need split? */
++ for (nr_splet = 0; frame >= frames &&
++ dx_get_count(frame->entries) == dx_get_limit(frame->entries);
++ --frame, ++nr_splet) {
++ if (nr_splet == DX_MAX_TREE_HEIGHT) {
+ ext3_warning(sb, __FUNCTION__,
+ "Directory index full!\n");
+ err = -ENOSPC;
+ goto cleanup;
+ }
+- bh2 = ext3_append (handle, dir, &newblock, &err);
+- if (!(bh2))
++ }
++
++ safe = frame;
++ /* Go back down, allocating blocks, and adding blocks into
++ * transaction... */
++ for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
++ bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
++ if (!bh_new[i])
+ goto cleanup;
+- node2 = (struct dx_node *)(bh2->b_data);
++ node2 = (struct dx_node *)(bh_new[i]->b_data);
+ entries2 = node2->entries;
+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+ node2->fake.inode = 0;
+@@ -1486,72 +1519,112 @@
+ err = ext3_journal_get_write_access(handle, frame->bh);
+ if (err)
+ goto journal_error;
+- if (levels) {
+- unsigned icount1 = icount/2, icount2 = icount - icount1;
+- unsigned hash2 = dx_get_hash(entries + icount1);
+- dxtrace(printk("Split index %i/%i\n", icount1, icount2));
+-
+- BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
+- err = ext3_journal_get_write_access(handle,
+- frames[0].bh);
++ }
++ /* Add "safe" node to transaction too */
++ if (safe + 1 != frames) {
++ err = ext3_journal_get_write_access(handle, safe->bh);
++ if (err)
++ goto journal_error;
++ }
++
++ /* Go through nodes once more, inserting pointers */
++ for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
++ unsigned count;
++ int idx;
++ struct buffer_head *bh2;
++
++ entries = frame->entries;
++ count = dx_get_count(entries);
++ idx = frame->at - entries;
++
++ bh2 = bh_new[i];
++ node2 = (struct dx_node *)(bh2->b_data);
++ entries2 = node2->entries;
++
++ if (frame == frames) {
++ /* splitting root node. Tricky point:
++ *
++ * In the "normal" B-tree we'd split root *and* add
++ * new root to the tree with pointers to the old root
++ * and its sibling (thus introducing two new nodes).
++ *
++ * In htree it's enough to add one node, because
++ * capacity of the root node is smaller than that of
++ * non-root one.
++ */
++ struct dx_root *root;
++ u8 indirects;
++
++ root = (struct dx_root *) frames->bh->b_data;
++ indirects = root->info.indirect_levels;
++ dxtrace(printk("Creating new root %d\n", indirects));
++ memcpy((char *) entries2, (char *) entries,
++ count * sizeof(struct dx_entry));
++ dx_set_limit(entries2, dx_node_limit(dir));
++
++ /* Set up root */
++ dx_set_count(entries, 1);
++ dx_set_block(entries + 0, newblock[i]);
++ root->info.indirect_levels = indirects + 1;
++
++ /* Shift frames in the path */
++ memmove(frames + 2, frames + 1,
++ (sizeof frames) - 2 * sizeof frames[0]);
++ /* Add new access path frame */
++ frames[1].at = entries2 + idx;
++ frames[1].entries = entries = entries2;
++ frames[1].bh = bh2;
++ ++ frame;
++ bh_new[i] = NULL; /* buffer head is "consumed" */
++ err = ext3_journal_get_write_access(handle, bh2);
+ if (err)
+ goto journal_error;
+-
+- memcpy ((char *) entries2, (char *) (entries + icount1),
+- icount2 * sizeof(struct dx_entry));
+- dx_set_count (entries, icount1);
+- dx_set_count (entries2, icount2);
++ } else {
++ /* splitting non-root index node. */
++ unsigned count1 = count/2, count2 = count - count1;
++ unsigned hash2 = dx_get_hash(entries + count1);
++ dxtrace(printk("Split index %i/%i\n", count1, count2));
++
++ memcpy ((char *) entries2, (char *) (entries + count1),
++ count2 * sizeof(struct dx_entry));
++ dx_set_count (entries, count1);
++ dx_set_count (entries2, count2);
+ dx_set_limit (entries2, dx_node_limit(dir));
+
+ /* Which index block gets the new entry? */
+- if (at - entries >= icount1) {
+- frame->at = at = at - entries - icount1 + entries2;
++ if (idx >= count1) {
++ frame->at = entries2 + idx - count1;
+ frame->entries = entries = entries2;
+ swap(frame->bh, bh2);
++ bh_new[i] = bh2;
+ }
+- dx_insert_block (frames + 0, hash2, newblock);
+- dxtrace(dx_show_index ("node", frames[1].entries));
++ dx_insert_block (frame - 1, hash2, newblock[i]);
++ dxtrace(dx_show_index ("node", frame->entries));
+ dxtrace(dx_show_index ("node",
+ ((struct dx_node *) bh2->b_data)->entries));
+ err = ext3_journal_dirty_metadata(handle, bh2);
+ if (err)
+ goto journal_error;
+- brelse (bh2);
+- } else {
+- dxtrace(printk("Creating second level index...\n"));
+- memcpy((char *) entries2, (char *) entries,
+- icount * sizeof(struct dx_entry));
+- dx_set_limit(entries2, dx_node_limit(dir));
+-
+- /* Set up root */
+- dx_set_count(entries, 1);
+- dx_set_block(entries + 0, newblock);
+- ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
+-
+- /* Add new access path frame */
+- frame = frames + 1;
+- frame->at = at = at - entries + entries2;
+- frame->entries = entries = entries2;
+- frame->bh = bh2;
+- err = ext3_journal_get_write_access(handle,
+- frame->bh);
+- if (err)
+- goto journal_error;
+ }
+- ext3_journal_dirty_metadata(handle, frames[0].bh);
+ }
+- de = do_split(handle, dir, &bh, frame, &hinfo, &err);
++ de = do_split(handle, dir, &bh, --frame, &hinfo, &err);
+ if (!de)
+ goto cleanup;
+ err = add_dirent_to_buf(handle, dentry, inode, de, bh);
+- bh = NULL;
+- goto cleanup;
++ goto cleanup2;
+
+ journal_error:
+ ext3_std_error(dir->i_sb, err);
+ cleanup:
+ if (bh)
+ brelse(bh);
++cleanup2:
++ for (i = 0; i < ARRAY_SIZE(bh_new); ++i) {
++ if (bh_new[i] != NULL)
++ brelse(bh_new[i]);
++ }
++ if (err)
++ inode->i_size = isize;
+ dx_release(frames);
+ return err;
+ }
+@@ -1561,7 +1634,7 @@
+ * ext3_delete_entry deletes a directory entry by merging it with the
+ * previous entry
+ */
+-static int ext3_delete_entry (handle_t *handle,
++static int ext3_delete_entry (handle_t *handle,
+ struct inode * dir,
+ struct ext3_dir_entry_2 * de_del,
+ struct buffer_head * bh)
+@@ -1821,7 +1894,7 @@
+ de1 = (struct ext3_dir_entry_2 *)
+ ((char *) de + le16_to_cpu(de->rec_len));
+ if (le32_to_cpu(de->inode) != inode->i_ino ||
+- !le32_to_cpu(de1->inode) ||
++ !le32_to_cpu(de1->inode) ||
+ strcmp (".", de->name) ||
+ strcmp ("..", de1->name)) {
+ ext3_warning (inode->i_sb, "empty_dir",
+@@ -1891,7 +1964,7 @@
+ * being truncated, or files being unlinked. */
+
+ /* @@@ FIXME: Observation from aviro:
+- * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
++ * I think I can trigger J_ASSERT in ext3_orphan_add(). We block
+ * here (on lock_super()), so race with ext3_link() which might bump
+ * ->i_nlink. For, say it, character device. Not a regular file,
+ * not a directory, not a symlink and ->i_nlink > 0.
+@@ -2415,4 +2488,4 @@
+ .removexattr = generic_removexattr,
+ #endif
+ .permission = ext3_permission,
+-};
++};
ext3-mballoc2-2.6.9-rhel4.patch
ext3-nlinks-2.6.9.patch
ext3-ialloc-2.6.patch
+ext3-tall-htree.patch
+ext3-htree-path.patch
+ext3-htree-r5-hash.patch
+ext3-htree-path-ops.patch
+ext3-hash-selection.patch
+ext3-htree-comments.patch
+ext3-iam-ops.patch