-Index: linux-stage/fs/ext3/namei.c
+Index: iam/fs/ext3/Makefile
===================================================================
---- linux-stage.orig/fs/ext3/namei.c 2006-05-29 13:01:21.000000000 +0800
-+++ linux-stage/fs/ext3/namei.c 2006-05-29 13:01:22.000000000 +0800
-@@ -24,81 +24,6 @@
- * Theodore Ts'o, 2002
- */
-
--/*
-- * iam: big theory statement.
-- *
-- * iam (Index Access Module) is a module providing abstraction of persistent
-- * transactional container on top of generalized ext3 htree.
-- *
-- * iam supports:
-- *
-- * - key, pointer, and record size specifiable per container.
-- *
-- * - trees taller than 2 index levels.
-- *
-- * - read/write to existing ext3 htree directories as iam containers.
-- *
-- * iam container is a tree, consisting of leaf nodes containing keys and
-- * records stored in this container, and index nodes, containing keys and
-- * pointers to leaf or index nodes.
-- *
-- * iam does not work with keys directly, instead it calls user-supplied key
-- * comparison function (->dpo_keycmp()).
-- *
-- * Pointers are (currently) interpreted as logical offsets (measured in
-- * blocksful) within underlying flat file on top of which iam tree lives.
-- *
-- * On-disk format:
-- *
-- * iam mostly tries to reuse existing htree formats.
-- *
-- * Format of index node:
-- *
-- * +-----+-------+-------+-------+------+-------+------------+
-- * | | count | | | | | |
-- * | gap | / | entry | entry | .... | entry | free space |
-- * | | limit | | | | | |
-- * +-----+-------+-------+-------+------+-------+------------+
-- *
-- * gap this part of node is never accessed by iam code. It
-- * exists for binary compatibility with ext3 htree (that,
-- * in turn, stores fake struct ext2_dirent for ext2
-- * compatibility), and to keep some unspecified per-node
-- * data. Gap can be different for root and non-root index
-- * nodes. Gap size can be specified for each container
-- * (gap of 0 is allowed).
-- *
-- * count/limit current number of entries in this node, and the maximal
-- * number of entries that can fit into node. count/limit
-- * has the same size as entry, and is itself counted in
-- * count.
-- *
-- * entry index entry: consists of a key immediately followed by
-- * a pointer to a child node. Size of a key and size of a
-- * pointer depends on container. Entry has neither
-- * alignment nor padding.
-- *
-- * free space portion of node new entries are added to
-- *
-- * Entries in index node are sorted by their key value.
-- *
-- * Format of leaf node:
-- *
-- * +-----+-------+-------+-------+------+-------+------------+
-- * | | count | | | | | |
-- * | gap | / | leaf | leaf | .... | leaf | free space |
-- * | | limit | | | | | |
-- * +-----+-------+-------+-------+------+-------+------------+
--
-- * leaf For leaf entry: consists of a rec immediately followd by
-- * a key. size of a key and size of a rec depends on container.
-- *
-- *
-- *
-- *
-- *
-- */
--
- #include <linux/module.h>
- #include <linux/fs.h>
- #include <linux/pagemap.h>
-@@ -112,10 +37,10 @@
- #include <linux/quotaops.h>
- #include <linux/buffer_head.h>
- #include <linux/smp_lock.h>
-+#include <linux/lustre_iam.h>
- #include "xattr.h"
- #include "iopen.h"
- #include "acl.h"
--#include <linux/lustre_iam.h>
- /*
- * define how far ahead to read directories while searching them.
- */
-@@ -125,9 +50,9 @@
- #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
-
-
--static struct buffer_head *ext3_append(handle_t *handle,
-- struct inode *inode,
-- u32 *block, int *err)
-+struct buffer_head *ext3_append(handle_t *handle,
-+ struct inode *inode,
-+ u32 *block, int *err)
- {
- struct buffer_head *bh;
-
-@@ -136,14 +61,15 @@
- if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
- inode->i_size += inode->i_sb->s_blocksize;
- EXT3_I(inode)->i_disksize = inode->i_size;
-- ext3_journal_get_write_access(handle,bh);
-+ *err = ext3_journal_get_write_access(handle, bh);
-+ if (err != 0) {
-+ brelse(bh);
-+ bh = NULL;
-+ }
- }
- return bh;
- }
-
--#ifndef assert
--#define assert(test) J_ASSERT(test)
--#endif
-
- #ifndef swap
- #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-@@ -162,10 +88,6 @@
- u8 file_type;
- };
-
--struct dx_countlimit {
-- __le16 limit;
-- __le16 count;
--};
-
- /*
- * dx_root_info is laid out so that if it should somehow get overlaid by a
-@@ -203,245 +125,10 @@
- };
+--- iam.orig/fs/ext3/Makefile 2006-05-27 19:58:43.000000000 +0400
++++ iam/fs/ext3/Makefile 2006-05-27 20:03:07.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+ ioctl.o namei.o super.o symlink.o hash.o resize.o \
+- extents.o mballoc.o
++ extents.o mballoc.o iam.o iam_lfix.o
--static u32 htree_root_ptr(struct iam_container *c);
--static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
--static int htree_node_init(struct iam_container *c,
-- struct buffer_head *bh, int root);
--static int htree_keycmp(struct iam_container *c,
-- struct iam_key *k1, struct iam_key *k2);
--static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
-- handle_t *h, struct buffer_head **bh);
--
--/*
-- * Parameters describing iam compatibility mode in which existing ext3 htrees
-- * can be manipulated.
-- */
--static struct iam_descr htree_compat_param = {
-- .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
-- .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
-- .id_node_gap = offsetof(struct dx_node, entries),
-- .id_root_gap = offsetof(struct dx_root, entries),
--
-- .id_root_ptr = htree_root_ptr,
-- .id_node_check = htree_node_check,
-- .id_node_init = htree_node_init,
-- .id_node_read = htree_node_read,
-- .id_keycmp = htree_keycmp
--};
--
--
--struct iam_key;
--struct iam_rec;
--struct iam_descr;
--struct iam_container;
--struct iam_path;
--
--
--
--/*
-- * iam cursor (iterator) api.
-- */
--
--/*
-- * Flags controlling iterator functionality.
-- */
--enum iam_it_flags {
-- /*
-- * this iterator will move (iam_it_{prev,next}() will be called on it)
-- */
-- IAM_IT_MOVE = (1 << 0),
-- /*
-- * tree can be updated through this iterator.
-- */
-- IAM_IT_WRITE = (1 << 1)
--};
--
--/*
-- * States of iterator state machine.
-- */
--enum iam_it_state {
-- /* initial state */
-- IAM_IT_DETACHED,
-- /* iterator is above particular record in the container */
-- IAM_IT_ATTACHED
--};
--
--struct htree_cookie {
-- struct dx_hash_info *hinfo;
-- struct dentry *dentry;
--};
--
--/*
-- * Iterator.
-- *
-- * Immediately after call to iam_it_init() iterator is in "detached"
-- * (IAM_IT_DETACHED) state: it is associated with given parent container, but
-- * doesn't point to any particular record in this container.
-- *
-- * After successful call to iam_it_get() and until corresponding call to
-- * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
-- *
-- * Attached iterator can move through records in a container (provided
-- * IAM_IT_MOVE permission) in a key order, can get record and key values as it
-- * passes over them, and can modify container (provided IAM_IT_WRITE
-- * permission).
-- *
-- * Concurrency: iterators are supposed to be local to thread. Interfaces below
-- * do no internal serialization.
-- *
-- */
--struct iam_iterator {
-- /*
-- * iterator flags, taken from enum iam_it_flags.
-- */
-- __u32 ii_flags;
-- enum iam_it_state ii_state;
-- /*
-- * path to the record. Valid in IAM_IT_ATTACHED state.
-- */
-- struct iam_path ii_path;
--};
--
--static inline struct iam_key *keycpy(struct iam_container *c,
-- struct iam_key *k1, struct iam_key *k2)
--{
-- return memcpy(k1, k2, c->ic_descr->id_key_size);
--}
--
--static inline int keycmp(struct iam_container *c,
-- struct iam_key *k1, struct iam_key *k2)
--{
-- return c->ic_descr->id_keycmp(c, k1, k2);
--}
--
--static struct iam_container *iam_it_container(struct iam_iterator *it)
--{
-- return it->ii_path.ip_container;
--}
--
--static inline int it_keycmp(struct iam_iterator *it,
-- struct iam_key *k1, struct iam_key *k2)
--{
-- return keycmp(iam_it_container(it), k1, k2);
--}
--
--/*
-- * Initialize iterator to IAM_IT_DETACHED state.
-- *
-- * postcondition: it_state(it) == IAM_IT_DETACHED
-- */
--int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
--/*
-- * Finalize iterator and release all resources.
-- *
-- * precondition: it_state(it) == IAM_IT_DETACHED
-- */
--void iam_it_fini(struct iam_iterator *it);
--
--/*
-- * Attach iterator. After successful completion, @it points to record with the
-- * largest key not larger than @k. Semantics of ->id_create() method guarantee
-- * that such record will always be found.
-- *
-- * Return value: 0: positioned on existing record,
-- * -ve: error.
-- *
-- * precondition: it_state(it) == IAM_IT_DETACHED
-- * postcondition: ergo(result == 0,
-- * (it_state(it) == IAM_IT_ATTACHED &&
-- * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
-- */
--int iam_it_get(struct iam_iterator *it, struct iam_key *k);
--
--/*
-- * Duplicates iterator.
-- *
-- * postcondition: it_state(dst) == it_state(src) &&
-- * iam_it_container(dst) == iam_it_container(src) &&
-- * dst->ii_flags = src->ii_flags &&
-- * ergo(it_state(it) == IAM_IT_ATTACHED,
-- * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
-- * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
-- */
--void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
--
--/*
-- * Detach iterator. Does nothing it detached state.
-- *
-- * postcondition: it_state(it) == IAM_IT_DETACHED
-- */
--void iam_it_put(struct iam_iterator *it);
--
--/*
-- * Move iterator one record right.
-- *
-- * Return value: 0: success,
-- * +1: end of container reached
-- * -ve: error
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
-- * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
-- */
--int iam_it_next(struct iam_iterator *it);
--
--/*
-- * Return pointer to the record under iterator.
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
--const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
--
--/*
-- * Replace contents of record under iterator.
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-- * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
-- */
--int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
--
--/*
-- * Place key under iterator in @k, return @k
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
--const struct iam_key *iam_it_key_get(struct iam_iterator *it,
-- struct iam_key *k);
--
--/*
-- * Insert new record with key @k and contents from @r, shifting records to the
-- * right.
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED &&
-- * it->ii_flags&IAM_IT_WRITE &&
-- * it_keycmp(it, iam_it_key_get(it, *), k) < 0
-- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-- * ergo(result == 0,
-- * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
-- * !memcmp(iam_it_rec_get(it), r, ...))
-- */
--int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
-- struct iam_key *k, struct iam_rec *r);
--/*
-- * Delete record under iterator.
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
--int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
--
- #ifdef CONFIG_EXT3_INDEX
- static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry);
- static void dx_set_block(struct iam_path *p,
- struct iam_entry *entry, unsigned value);
--static inline struct iam_key *dx_get_key(struct iam_path *p,
-- struct iam_entry *entry,
-- struct iam_key *key);
--static void dx_set_key(struct iam_path *p, struct iam_entry *entry,
-- struct iam_key *key);
--static unsigned dx_get_count(struct iam_entry *entries);
- static unsigned dx_get_limit(struct iam_entry *entries);
- static void dx_set_count(struct iam_entry *entries, unsigned value);
- static void dx_set_limit(struct iam_entry *entries, unsigned value);
-@@ -457,80 +144,29 @@
- static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
- struct dx_map_entry *offsets, int count);
- static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
--static void dx_insert_block (struct iam_path *path,
-- struct iam_frame *frame, u32 hash, u32 block);
--static int ext3_htree_next_block(struct inode *dir, __u32 hash,
-- struct iam_path *path, __u32 *start_hash);
- static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
- struct ext3_dir_entry_2 **res_dir, int *err);
- static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
- struct inode *inode);
-
--static inline void iam_path_init(struct iam_path *path,
-- struct iam_container *c, struct htree_cookie *hc);
--static inline void iam_path_fini(struct iam_path *path);
--
--
--/*
-- * Future: use high four bits of block for coalesce-on-delete flags
-- * Mask them off for now.
-- */
--
--static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
--{
-- return (void *)((char *)entry + off);
--}
--
--static inline struct iam_descr *path_descr(struct iam_path *p)
--{
-- return p->ip_container->ic_descr;
--}
--
--static inline struct inode *path_obj(struct iam_path *p)
--{
-- return p->ip_container->ic_object;
--}
--
- static inline size_t iam_entry_size(struct iam_path *p)
- {
-- return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
-+ return iam_path_descr(p)->id_key_size + iam_path_descr(p)->id_ptr_size;
- }
-
- static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
-- struct iam_entry *entry, int shift)
-+ struct iam_entry *entry,
-+ int shift)
- {
- void *e = entry;
- return e + shift * iam_entry_size(p);
- }
-
--static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
-- struct iam_entry *e1, struct iam_entry *e2)
--{
-- ptrdiff_t diff;
--
-- diff = (void *)e1 - (void *)e2;
-- assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
-- return diff / iam_entry_size(p);
--}
--
--static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
--{
-- return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
-- & 0x00ffffff;
--}
--
--static inline void dx_set_block(struct iam_path *p,
-- struct iam_entry *entry, unsigned value)
-+static inline struct iam_key *iam_get_key(struct iam_path *p,
-+ struct iam_entry *entry,
-+ struct iam_key *key)
- {
-- *(u32*)entry_off(entry,
-- path_descr(p)->id_key_size) = cpu_to_le32(value);
--}
--
--static inline struct iam_key *dx_get_key(struct iam_path *p,
-- struct iam_entry *entry,
-- struct iam_key *key)
--{
-- memcpy(key, entry, path_descr(p)->id_key_size);
-+ memcpy(key, entry, iam_path_descr(p)->id_key_size);
- return key;
- }
-
-@@ -540,68 +176,70 @@
- return (struct iam_key *)entry;
- }
-
--static inline void dx_set_key(struct iam_path *p,
-- struct iam_entry *entry, struct iam_key *key)
--{
-- memcpy(entry, key, path_descr(p)->id_key_size);
--}
--
--static inline unsigned dx_get_count (struct iam_entry *entries)
--{
-- return le16_to_cpu(((struct dx_countlimit *) entries)->count);
--}
--
--static inline unsigned dx_get_limit (struct iam_entry *entries)
-+static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
-+ struct iam_entry *e1,
-+ struct iam_entry *e2)
- {
-- return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
--}
-+ ptrdiff_t diff;
-
--static inline void dx_set_count (struct iam_entry *entries, unsigned value)
--{
-- ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
-+ diff = (void *)e1 - (void *)e2;
-+ assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
-+ return diff / iam_entry_size(p);
- }
-
--static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
-+static inline void dx_set_limit(struct iam_entry *entries, unsigned value)
- {
- ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
- }
-
- static inline unsigned dx_root_limit(struct iam_path *p)
- {
-- struct iam_descr *param = path_descr(p);
-- unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
-+ struct iam_descr *param = iam_path_descr(p);
-+ unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize -
- param->id_root_gap;
- return entry_space / (param->id_key_size + param->id_ptr_size);
- }
-
--static inline unsigned dx_node_limit(struct iam_path *p)
--{
-- struct iam_descr *param = path_descr(p);
-- unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
-- param->id_node_gap;
-- return entry_space / (param->id_key_size + param->id_ptr_size);
--}
-+/*
-+ * Two iam_descr's are provided:
-+ *
-+ * - htree_compat_param that supports legacy ext3-htree indices;
-+ * - fixed_rec_param that supports containers with records of fixed size.
-+ *
-+ */
-
--static inline int dx_index_is_compat(struct iam_path *path)
--{
-- return path_descr(path) == &htree_compat_param;
--}
-+static u32 htree_root_ptr(struct iam_container *c);
-+static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
-+static int htree_node_init(struct iam_container *c, struct buffer_head *bh, int root);
-+static int htree_keycmp(const struct iam_container *c,
-+ const struct iam_key *k1, const struct iam_key *k2);
-
--static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
-- int root)
--{
-- return data +
-- (root ?
-- path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
--}
-+struct iam_operations htree_operation = {
-+ .id_root_ptr = htree_root_ptr,
-+ .id_node_check = htree_node_check,
-+ .id_node_init = htree_node_init,
-+ .id_node_read = iam_node_read,
-+ .id_keycmp = htree_keycmp
-+};
-+
-+/*
-+ * Parameters describing iam compatibility mode in which existing ext3 htrees
-+ * can be manipulated.
-+ */
-+struct iam_descr htree_compat_param = {
-+ .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
-+ .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
-+ .id_node_gap = offsetof(struct dx_node, entries),
-+ .id_root_gap = offsetof(struct dx_root, entries),
-+ .id_ops = &htree_operation
-+};
-
--static struct iam_entry *dx_node_get_entries(struct iam_path *path,
-- struct iam_frame *frame)
-+static inline int dx_index_is_compat(struct iam_path *path)
- {
-- return dx_get_entries(path,
-- frame->bh->b_data, frame == path->ip_frames);
-+ return iam_path_descr(path) == &htree_compat_param;
- }
-
-+
- static int dx_node_check(struct iam_path *p, struct iam_frame *f)
- {
- struct iam_entry *e;
-@@ -614,10 +252,10 @@
- count = dx_get_count(e);
- e = iam_entry_shift(p, e, 1);
- for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) {
-- keycpy(c, p->ip_key_scratch[0], p->ip_key_scratch[1]);
-- dx_get_key(p, e, p->ip_key_scratch[1]);
-+ iam_keycpy(c, iam_path_key(p, 0), iam_path_key(p, 1));
-+ iam_get_key(p, e, iam_path_key(p, 1));
- if (i > 0 &&
-- keycmp(c, p->ip_key_scratch[0], p->ip_key_scratch[1]) > 0)
-+ iam_keycmp(c, iam_path_key(p, 0), iam_path_key(p, 1)) > 0)
- return 0;
- }
- return 1;
-@@ -636,13 +274,17 @@
-
- data = frame->bh->b_data;
- entries = dx_node_get_entries(path, frame);
-- sb = path_obj(path)->i_sb;
-+ sb = iam_path_obj(path)->i_sb;
- if (frame == path->ip_frames) {
- /* root node */
- struct dx_root *root;
-- struct htree_cookie *hc = path->ip_descr_data;
-+ struct iam_path_compat *ipc;
-
- root = data;
-+ assert(path->ip_data != NULL);
-+ ipc = container_of(path->ip_data, struct iam_path_compat,
-+ ipc_descr);
-+
- if (root->info.hash_version > DX_HASH_MAX) {
- ext3_warning(sb, __FUNCTION__,
- "Unrecognised inode hash code %d",
-@@ -669,15 +311,16 @@
- root->info.info_length));
- assert(dx_get_limit(entries) == dx_root_limit(path));
-
-- hc->hinfo->hash_version = root->info.hash_version;
-- hc->hinfo->seed = EXT3_SB(sb)->s_hash_seed;
-- if (hc->dentry)
-- ext3fs_dirhash(hc->dentry->d_name.name,
-- hc->dentry->d_name.len, hc->hinfo);
-- path->ip_key_target = (struct iam_key *)&hc->hinfo->hash;
-+ ipc->ipc_hinfo->hash_version = root->info.hash_version;
-+ ipc->ipc_hinfo->seed = EXT3_SB(sb)->s_hash_seed;
-+ if (ipc->ipc_dentry)
-+ ext3fs_dirhash(ipc->ipc_dentry->d_name.name,
-+ ipc->ipc_dentry->d_name.len,
-+ ipc->ipc_hinfo);
-+ path->ip_key_target = (struct iam_key *)&ipc->ipc_hinfo->hash;
- } else {
- /* non-root index */
-- assert(entries == data + path_descr(path)->id_node_gap);
-+ assert(entries == data + iam_path_descr(path)->id_node_gap);
- assert(dx_get_limit(entries) == dx_node_limit(path));
- }
- frame->entries = frame->at = entries;
-@@ -697,8 +340,8 @@
- return 0;
- }
-
--static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
-- handle_t *handle, struct buffer_head **bh)
-+int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
-+ handle_t *handle, struct buffer_head **bh)
- {
- int result = 0;
-
-@@ -708,8 +351,8 @@
- return result;
- }
-
--static int htree_keycmp(struct iam_container *c,
-- struct iam_key *k1, struct iam_key *k2)
-+static int htree_keycmp(const struct iam_container *c,
-+ const struct iam_key *k1, const struct iam_key *k2)
- {
- __u32 p1 = le32_to_cpu(*(__u32 *)k1);
- __u32 p2 = le32_to_cpu(*(__u32 *)k2);
-@@ -800,7 +443,7 @@
- }
- #endif /* DX_DEBUG */
-
--static int dx_lookup(struct iam_path *path)
-+int dx_lookup(struct iam_path *path)
- {
- u32 ptr;
- int err = 0;
-@@ -810,11 +453,11 @@
- struct iam_frame *frame;
- struct iam_container *c;
-
-- param = path_descr(path);
-+ param = iam_path_descr(path);
- c = path->ip_container;
-
- for (frame = path->ip_frames, i = 0,
-- ptr = param->id_root_ptr(path->ip_container);
-+ ptr = param->id_ops->id_root_ptr(path->ip_container);
- i <= path->ip_indirect;
- ptr = dx_get_block(path, frame->at), ++frame, ++i) {
- struct iam_entry *entries;
-@@ -823,10 +466,11 @@
- struct iam_entry *m;
- unsigned count;
-
-- err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh);
-+ err = param->id_ops->id_node_read(c, (iam_ptr_t)ptr, NULL,
-+ &frame->bh);
- if (err != 0)
- break;
-- err = param->id_node_check(path, frame);
-+ err = param->id_ops->id_node_check(path, frame);
- if (err != 0)
- break;
-
-@@ -841,8 +485,8 @@
- m = iam_entry_shift(path,
- p, iam_entry_diff(path, q, p) / 2);
- dxtrace(printk("."));
-- if (keycmp(c, iam_key_at(path, m),
-- path->ip_key_target) > 0)
-+ if (iam_keycmp(c, iam_key_at(path, m),
-+ path->ip_key_target) > 0)
- q = iam_entry_shift(path, m, -1);
- else
- p = iam_entry_shift(path, m, +1);
-@@ -857,12 +501,12 @@
- while (n--) {
- dxtrace(printk(","));
- at = iam_entry_shift(path, at, +1);
-- if (keycmp(c, iam_key_at(path, at),
-- path->ip_key_target) > 0) {
-+ if (iam_keycmp(c, iam_key_at(path, at),
-+ path->ip_key_target) > 0) {
- if (at != iam_entry_shift(path, frame->at, 1)) {
- BREAKPOINT;
- printk(KERN_EMERG "%i\n",
-- keycmp(c, iam_key_at(path, at),
-+ iam_keycmp(c, iam_key_at(path, at),
- path->ip_key_target));
- }
- at = iam_entry_shift(path, at, -1);
-@@ -891,508 +535,20 @@
- struct dx_hash_info *hinfo, struct iam_path *path)
- {
- int err;
-- struct htree_cookie hc = {
-- .dentry = dentry,
-- .hinfo = hinfo
-- };
-+ struct iam_path_compat *ipc;
-+
-+ assert(path->ip_data != NULL);
-+ ipc = container_of(path->ip_data, struct iam_path_compat, ipc_descr);
-+ ipc->ipc_dentry = dentry;
-+ ipc->ipc_hinfo = hinfo;
-
- assert(dx_index_is_compat(path));
-- path->ip_descr_data = &hc;
- err = dx_lookup(path);
- assert(err != 0 || path->ip_frames[path->ip_indirect].bh != NULL);
- return err;
- }
-
- /*
-- * Initialize container @c, acquires additional reference on @inode.
-- */
--int iam_container_init(struct iam_container *c,
-- struct iam_descr *descr, struct inode *inode)
--{
-- memset(c, 0, sizeof *c);
-- c->ic_descr = descr;
-- c->ic_object = igrab(inode);
-- if (c->ic_object != NULL)
-- return 0;
-- else
-- return -ENOENT;
--}
--
--/*
-- * Finalize container @c, release all resources.
-- */
--void iam_container_fini(struct iam_container *c)
--{
-- if (c->ic_object != NULL) {
-- iput(c->ic_object);
-- c->ic_object = NULL;
-- }
--}
--
--static inline void iam_path_init(struct iam_path *path, struct iam_container *c,
-- struct htree_cookie *hc)
--{
-- memset(path, 0, sizeof *path);
-- path->ip_container = c;
-- path->ip_frame = path->ip_frames;
-- path->ip_descr_data = hc;
--}
--
--static inline void iam_path_fini(struct iam_path *path)
--{
-- int i;
--
-- for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
-- if (path->ip_frames[i].bh != NULL) {
-- brelse(path->ip_frames[i].bh);
-- path->ip_frames[i].bh = NULL;
-- }
-- }
--}
--
--static void iam_path_compat_init(struct iam_path_compat *path,
-- struct inode *inode)
--{
-- int i;
--
-- iam_container_init(&path->ipc_container, &htree_compat_param, inode);
-- /*
-- * XXX hack allowing finalization of iam_path_compat with
-- * iam_path_fini().
-- */
-- iput(inode);
-- iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
-- for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
-- path->ipc_path.ip_key_scratch[i] =
-- (struct iam_key *)&path->ipc_scrach[i];
--}
--
--static void iam_path_compat_fini(struct iam_path_compat *path)
--{
-- iam_path_fini(&path->ipc_path);
-- iam_container_fini(&path->ipc_container);
--}
--
--static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
--{
-- int block, err;
-- struct buffer_head *bh;
--
-- block = dx_get_block(path, path->ip_frame->at);
-- err = path_descr(path)->id_node_read(path->ip_container, block,
-- NULL, &bh);
-- if (err)
-- return err;
--
-- leaf->bh = bh;
-- leaf->entries = (struct iam_leaf_entry *)bh->b_data;
-- return 0;
--}
--
--static void iam_leaf_fini(struct iam_leaf *leaf)
--{
-- if (leaf->bh)
-- brelse(leaf->bh);
--}
--
--/*
-- * Search container @c for record with key @k. If record is found, its data
-- * are moved into @r.
-- *
-- *
-- *
-- * Return values: +ve: found, 0: not-found, -ve: error
-- */
--
--int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
--{
-- struct dx_hash_info hinfo;
-- struct iam_path_compat cpath;
-- struct iam_path *path = &cpath.ipc_path;
-- struct htree_cookie hc = {
-- .hinfo = &hinfo
-- };
-- int err, i;
--
-- iam_path_init(path, c, &hc);
-- for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
-- path->ip_key_scratch[i] =
-- (struct iam_key *)&cpath.ipc_scrach[i];
-- err = dx_lookup(path);
-- do {
-- struct iam_leaf leaf;
-- err = iam_leaf_init(path, &leaf);
-- if (err)
-- goto errout;
--
-- for (path_descr(path)->id_leaf.start(c, &leaf);
-- !path_descr(path)->id_leaf.at_end(c, &leaf);
-- path_descr(path)->id_leaf.next(c, &leaf)) {
-- struct iam_key *key;
--
-- key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
-- path_descr(path)->id_leaf.key(c, &leaf, key);
-- if (keycmp(c, k, key) == 0) {
-- memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
-- path_descr(path)->id_rec_size);
-- iam_path_fini(path);
-- iam_leaf_fini(&leaf);
-- return 0;
-- }
-- }
--
-- iam_leaf_fini(&leaf);
-- /* Check to see if we should continue to search */
-- err = ext3_htree_next_block(c->ic_object, hinfo.hash, path, NULL);
-- if (err < 0)
-- goto errout;
-- } while (err == 1);
--errout:
-- iam_path_fini(path);
-- return(err);
--}
--EXPORT_SYMBOL(iam_lookup);
--
--static inline size_t iam_leaf_entry_size(struct iam_path *p)
--{
-- return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
--}
--
--static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
-- struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
--{
-- ptrdiff_t diff;
--
-- diff = (void *)e1 - (void *)e2;
-- assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
-- return diff / iam_leaf_entry_size(p);
--}
--
--static inline struct iam_leaf_entry*
--iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
--{
-- void *e = entry;
-- return e + shift * iam_leaf_entry_size(p);
--}
--
--static inline struct iam_key *
--dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
--{
-- memcpy(key, e, path_descr(p)->id_key_size);
-- return key;
--}
--
--static inline struct iam_key *
--iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
--{
-- void *e = entry;
-- return e + path_descr(p)->id_rec_size;
--}
--static inline struct iam_leaf_entry *
--iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
--{
-- return entry;
--}
--
--static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf,
-- struct iam_key *k)
--{
-- struct iam_leaf_entry *p, *q, *m;
-- struct iam_leaf_entry *entries = leaf->entries;
-- int count = dx_get_count((struct iam_entry *)entries);
--
-- p = iam_leaf_entry_shift(path, entries, 1);
-- q = iam_leaf_entry_shift(path, entries, count - 1);
-- while (p <= q) {
-- m = iam_leaf_entry_shift(path,
-- p, iam_leaf_entry_diff(path, q, p) / 2);
-- dxtrace(printk("."));
-- if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
-- path->ip_key_target) > 0)
-- q = iam_leaf_entry_shift(path, m, -1);
-- else
-- p = iam_leaf_entry_shift(path, m, +1);
-- }
-- leaf->at = q;
-- return 0;
--}
--
--/*XXX what kind of lock should this entry be locked: WangDi */
--static int iam_leaf_insert(handle_t *handle, struct iam_path *path,
-- struct iam_key *k, struct iam_rec *r)
--{
-- struct iam_leaf leaf;
-- struct iam_leaf_entry *p, *q;
-- int err, count;
--
-- err = iam_leaf_init(path, &leaf);
-- if (err)
-- goto errout;
-- path_descr(path)->id_leaf.start(path->ip_container, &leaf);
-- count = dx_get_count((struct iam_entry *)leaf.entries);
-- if (dx_get_count((struct iam_entry *)leaf.entries) >=
-- dx_get_limit((struct iam_entry *)leaf.entries)){
-- err = -ENOSPC;
-- goto errout;
-- }
--
-- err = iam_leaf_lookup(path, &leaf, k);
-- if (err)
-- goto errout;
--
-- /*insert the k/r to leaf entries*/
-- p = iam_leaf_entry_shift(path, leaf.at, 1);
-- q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
-- while (q < p) {
-- memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
-- q = iam_leaf_entry_shift(path, q, -1);
-- }
-- memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
-- memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
--
-- dx_set_count((struct iam_entry*)leaf.entries, count + 1);
-- err = ext3_journal_dirty_metadata(handle, leaf.bh);
-- if (err)
-- ext3_std_error(path->ip_container->ic_object->i_sb, err);
--errout:
-- iam_leaf_fini(&leaf);
-- return err;
--}
--
--static int split_leaf_node(handle_t *handle, struct iam_path *path)
--{
-- struct inode *dir = path_obj(path);
-- unsigned continued = 0;
-- struct buffer_head *bh2;
-- u32 newblock, hash_split;
-- char *data2;
-- struct iam_leaf leaf;
-- unsigned split;
-- int err;
--
-- bh2 = ext3_append (handle, dir, &newblock, &err);
-- if (!(bh2)) {
-- err = -ENOSPC;
-- goto errout;
-- }
-- err = iam_leaf_init(path, &leaf);
-- if (err)
-- goto errout;
--
-- BUFFER_TRACE(leaf.bh, "get_write_access");
-- err = ext3_journal_get_write_access(handle, leaf.bh);
-- if (err) {
-- journal_error:
-- iam_leaf_fini(&leaf);
-- brelse(bh2);
-- ext3_std_error(dir->i_sb, err);
-- err = -EIO;
-- goto errout;
-- }
-- data2 = bh2->b_data;
-- split = dx_get_count((struct iam_entry*)leaf.entries)/2;
-- hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
-- if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
-- iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
-- continued = 1;
--
-- memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
-- iam_leaf_entry_shift(path, leaf.entries, split),
-- split * iam_leaf_entry_size(path));
--
-- /* Which block gets the new entry? */
-- dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
-- err = ext3_journal_dirty_metadata (handle, bh2);
-- if (err)
-- goto journal_error;
-- err = ext3_journal_dirty_metadata (handle, leaf.bh);
-- if (err)
-- goto journal_error;
-- brelse (bh2);
-- iam_leaf_fini(&leaf);
--errout:
-- return err;
--}
--
--static int split_index_node(handle_t *handle, struct iam_path *path);
--/*
-- * Insert new record @r with key @k into container @c (within context of
-- * transaction @h.
-- *
-- * Return values: 0: success, -ve: error, including -EEXIST when record with
-- * given key is already present.
-- *
-- * postcondition: ergo(result == 0 || result == -EEXIST,
-- * iam_lookup(c, k, r2) > 0 &&
-- * !memcmp(r, r2, c->ic_descr->id_rec_size));
-- */
--int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k,
-- struct iam_rec *r)
--{
-- struct dx_hash_info hinfo;
-- struct iam_path_compat cpath;
-- struct iam_path *path = &cpath.ipc_path;
-- struct htree_cookie hc = {
-- .hinfo = &hinfo
-- };
-- int err, i;
--
-- iam_path_init(path, c, &hc);
-- for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
-- path->ip_key_scratch[i] =
-- (struct iam_key *)&cpath.ipc_scrach[i];
-- err = dx_lookup(path);
-- if (err)
-- goto errout;
--
-- err = iam_leaf_insert(handle, path, k, r);
--
-- if (err != -ENOSPC)
-- goto errout;
--
-- err = split_index_node(handle, path);
-- if (err)
-- goto errout;
--
-- err = split_leaf_node(handle, path);
-- if (err)
-- goto errout;
--
-- err = iam_leaf_insert(handle, path, k, r);
--errout:
-- iam_path_fini(path);
-- return(err);
--}
--
--EXPORT_SYMBOL(iam_insert);
--static int iam_leaf_delete(handle_t *handle, struct iam_path *path,
-- struct iam_key *k)
--{
-- struct iam_leaf leaf;
-- struct iam_leaf_entry *p, *q;
-- int err, count;
--
-- err = iam_leaf_init(path, &leaf);
-- if (err)
-- goto errout;
--
-- err = iam_leaf_lookup(path, &leaf, k);
-- if (err)
-- goto errout;
--
-- count = dx_get_count((struct iam_entry*)leaf.entries);
-- /*delete the k to leaf entries*/
-- p = iam_leaf_entry_shift(path, leaf.at, 1);
-- q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
-- while (p < q) {
-- memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
-- p = iam_leaf_entry_shift(path, p, 1);
-- }
-- dx_set_count((struct iam_entry*)leaf.entries, count - 1);
--
-- err = ext3_journal_dirty_metadata(handle, leaf.bh);
-- if (err)
-- ext3_std_error(path_obj(path)->i_sb, err);
--errout:
-- iam_leaf_fini(&leaf);
-- return err;
--}
--
--/*
-- * Delete existing record with key @k.
-- *
-- * Return values: 0: success, -ENOENT: not-found, -ve: other error.
-- *
-- * postcondition: ergo(result == 0 || result == -ENOENT,
-- * !iam_lookup(c, k, *));
-- */
--int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
--{
-- struct dx_hash_info hinfo;
-- struct iam_path_compat cpath;
-- struct iam_path *path = &cpath.ipc_path;
-- struct htree_cookie hc = {
-- .hinfo = &hinfo
-- };
-- int err, i;
--
-- iam_path_init(path, c, &hc);
-- for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
-- path->ip_key_scratch[i] =
-- (struct iam_key *)&cpath.ipc_scrach[i];
-- err = dx_lookup(path);
-- if (err)
-- goto errout;
--
-- err = iam_leaf_delete(h, path, k);
--errout:
-- iam_path_fini(path);
-- return err;
--}
--
--EXPORT_SYMBOL(iam_delete);
--
--static int iam_leaf_update(handle_t *handle, struct iam_path *path,
-- struct iam_key *k, struct iam_rec *r)
--{
-- struct iam_leaf leaf;
-- int err;
--
-- err = iam_leaf_init(path, &leaf);
-- if (err)
-- goto errout;
--
-- err = iam_leaf_lookup(path, &leaf, k);
-- if (err)
-- goto errout;
--
-- memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
-- memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
--
-- err = ext3_journal_dirty_metadata(handle, leaf.bh);
-- if (err)
-- ext3_std_error(path_obj(path)->i_sb, err);
--errout:
-- iam_leaf_fini(&leaf);
-- return err;
--}
--/*
-- * Replace existing record with key @k, or insert new one. New record data are
-- * in @r.
-- *
-- * Return values: 0: success, -ve: error.
-- *
-- * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
-- * !memcmp(r, r2, c->ic_descr->id_rec_size));
-- */
--int iam_update(handle_t *h, struct iam_container *c,
-- struct iam_key *k, struct iam_rec *r)
--{
-- struct dx_hash_info hinfo;
-- struct iam_path_compat cpath;
-- struct iam_path *path = &cpath.ipc_path;
-- struct htree_cookie hc = {
-- .hinfo = &hinfo
-- };
-- int err, i;
--
-- iam_path_init(path, c, &hc);
-- for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
-- path->ip_key_scratch[i] =
-- (struct iam_key *)&cpath.ipc_scrach[i];
-- err = dx_lookup(path);
-- if (err)
-- goto errout;
--
-- err = iam_leaf_update(h, path, k, r);
--errout:
-- iam_path_fini(path);
-- return err;
--}
--
--EXPORT_SYMBOL(iam_update);
--
--/*
- * This function increments the frame pointer to search the next leaf
- * block, and reads in the necessary intervening nodes if the search
- * should be necessary. Whether or not the search is necessary is
-@@ -1409,16 +565,15 @@
- * If start_hash is non-null, it will be filled in with the starting
- * hash of the next page.
- */
--static int ext3_htree_next_block(struct inode *dir, __u32 hash,
-- struct iam_path *path, __u32 *start_hash)
-+static int ext3_htree_advance(struct inode *dir, __u32 hash,
-+ struct iam_path *path, __u32 *start_hash,
-+ int compat)
- {
- struct iam_frame *p;
- struct buffer_head *bh;
- int err, num_frames = 0;
- __u32 bhash;
-
-- assert(dx_index_is_compat(path));
--
- p = path->ip_frame;
- /*
- * Find the next leaf page by incrementing the frame pointer.
-@@ -1438,28 +593,34 @@
- --p;
- }
-
-- /*
-- * If the hash is 1, then continue only if the next page has a
-- * continuation hash of any value. This is used for readdir
-- * handling. Otherwise, check to see if the hash matches the
-- * desired contiuation hash. If it doesn't, return since
-- * there's no point to read in the successive index pages.
-- */
-- dx_get_key(path, p->at, (struct iam_key *)&bhash);
-- if (start_hash)
-- *start_hash = bhash;
-- if ((hash & 1) == 0) {
-- if ((bhash & ~1) != hash)
-- return 0;
-+ if (compat) {
-+ /*
-+ * Htree hash magic.
-+ */
-+ /*
-+ * If the hash is 1, then continue only if the next page has a
-+ * continuation hash of any value. This is used for readdir
-+ * handling. Otherwise, check to see if the hash matches the
-+ * desired contiuation hash. If it doesn't, return since
-+ * there's no point to read in the successive index pages.
-+ */
-+ iam_get_key(path, p->at, (struct iam_key *)&bhash);
-+ if (start_hash)
-+ *start_hash = bhash;
-+ if ((hash & 1) == 0) {
-+ if ((bhash & ~1) != hash)
-+ return 0;
-+ }
- }
- /*
- * If the hash is HASH_NB_ALWAYS, we always go to the next
- * block so no check is necessary
- */
- while (num_frames--) {
-- err = path_descr(path)->id_node_read(path->ip_container,
-- (iam_ptr_t)dx_get_block(path, p->at),
-- NULL, &bh);
-+ err = iam_path_descr(path)->id_ops->
-+ id_node_read(path->ip_container,
-+ (iam_ptr_t)dx_get_block(path, p->at),
-+ NULL, &bh);
- if (err != 0)
- return err; /* Failure */
- ++p;
-@@ -1471,6 +632,16 @@
- return 1;
- }
-
-+int iam_index_next(struct iam_container *c, struct iam_path *path)
-+{
-+ return ext3_htree_advance(c->ic_object, 0, path, NULL, 0);
-+}
-+
-+int ext3_htree_next_block(struct inode *dir, __u32 hash,
-+ struct iam_path *path, __u32 *start_hash)
-+{
-+ return ext3_htree_advance(dir, hash, path, start_hash, 1);
-+}
-
- /*
- * p is at least 6 bytes before the end of page
-@@ -1662,21 +833,30 @@
- } while(more);
- }
-
--static void dx_insert_block(struct iam_path *path,
-- struct iam_frame *frame, u32 hash, u32 block)
-+void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
-+ const struct iam_key *key, iam_ptr_t ptr)
- {
- struct iam_entry *entries = frame->entries;
-- struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
-+ struct iam_entry *new = iam_entry_shift(path, frame->at, +1);
- int count = dx_get_count(entries);
-
- assert(count < dx_get_limit(entries));
-- assert(old < iam_entry_shift(path, entries, count));
-+ assert(frame->at < iam_entry_shift(path, entries, count));
-+
- memmove(iam_entry_shift(path, new, 1), new,
- (char *)iam_entry_shift(path, entries, count) - (char *)new);
-- dx_set_key(path, new, (struct iam_key *)&hash);
-- dx_set_block(path, new, block);
-+ dx_set_key(path, new, key);
-+ dx_set_block(path, new, ptr);
- dx_set_count(entries, count + 1);
- }
-+
-+void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
-+ u32 hash, u32 block)
-+{
-+ assert(dx_index_is_compat(path));
-+ iam_insert_key(path, frame, (struct iam_key *)&hash, block);
-+}
-+
- #endif
-
-
-@@ -1897,14 +1077,15 @@
- if (*err != 0)
- return NULL;
- } else {
-- path->ip_frame->bh = NULL; /* for iam_path_fini() */
-+ path->ip_frame->bh = NULL; /* for iam_path_fini() */
- path->ip_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
- }
- hash = hinfo.hash;
- do {
- block = dx_get_block(path, path->ip_frame->at);
-- *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
-- NULL, &bh);
-+ *err = iam_path_descr(path)->id_ops->id_node_read(path->ip_container,
-+ (iam_ptr_t)block,
-+ NULL, &bh);
- if (*err != 0)
- goto errout;
- de = (struct ext3_dir_entry_2 *) bh->b_data;
-@@ -2067,7 +1248,7 @@
- struct buffer_head **bh,struct iam_frame *frame,
- struct dx_hash_info *hinfo, int *error)
- {
-- struct inode *dir = path_obj(path);
-+ struct inode *dir = iam_path_obj(path);
- unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
- struct buffer_head *bh2;
-@@ -2392,15 +1573,15 @@
- }
-
- #ifdef CONFIG_EXT3_INDEX
--static int split_index_node(handle_t *handle, struct iam_path *path)
--{
-+int split_index_node(handle_t *handle, struct iam_path *path)
-+{
-
- struct iam_entry *entries; /* old block contents */
- struct iam_entry *entries2; /* new block contents */
- struct iam_frame *frame, *safe;
- struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
- u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
-- struct inode *dir = path_obj(path);
-+ struct inode *dir = iam_path_obj(path);
- int nr_splet;
- int i, err;
-
-@@ -2442,7 +1623,8 @@
- for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
- bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
- if (!bh_new[i] ||
-- path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
-+ iam_path_descr(path)->id_ops->id_node_init(path->ip_container,
-+ bh_new[i], 0) != 0)
- goto cleanup;
- BUFFER_TRACE(frame->bh, "get_write_access");
- err = ext3_journal_get_write_access(handle, frame->bh);
-@@ -2516,9 +1698,9 @@
- unsigned count1 = count/2, count2 = count - count1;
- unsigned hash2;
-
-- dx_get_key(path,
-- iam_entry_shift(path, entries, count1),
-- (struct iam_key *)&hash2);
-+ iam_get_key(path,
-+ iam_entry_shift(path, entries, count1),
-+ (struct iam_key *)&hash2);
-
- dxtrace(printk("Split index %i/%i\n", count1, count2));
-
-@@ -2578,7 +1760,7 @@
- size_t isize;
-
- iam_path_compat_init(&cpath, dir);
-- param = path_descr(path);
-+ param = iam_path_descr(path);
-
- err = dx_probe(dentry, NULL, &hinfo, path);
- if (err != 0)
-@@ -2588,8 +1770,9 @@
- /* XXX nikita: global serialization! */
- isize = dir->i_size;
-
-- err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at),
-- handle, &bh);
-+ err = param->id_ops->id_node_read(path->ip_container,
-+ (iam_ptr_t)dx_get_block(path, frame->at),
-+ handle, &bh);
- if (err != 0)
- goto cleanup;
-
-@@ -2724,12 +1907,12 @@
- * is so far negative - it has no inode.
- *
- * If the create succeeds, we fill in the inode information
-- * with d_instantiate().
-+ * with d_instantiate().
- */
- static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
- struct nameidata *nd)
- {
-- handle_t *handle;
-+ handle_t *handle;
- struct inode * inode;
- int err, retries = 0;
-
-Index: linux-stage/fs/ext3/iam.c
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: iam/fs/ext3/iam.c
===================================================================
---- linux-stage.orig/fs/ext3/iam.c 2006-05-29 18:23:53.597737944 +0800
-+++ linux-stage/fs/ext3/iam.c 2006-05-29 13:01:22.000000000 +0800
-@@ -0,0 +1,990 @@
+--- iam.orig/fs/ext3/iam.c 2004-04-06 17:27:52.000000000 +0400
++++ iam/fs/ext3/iam.c 2006-05-29 22:49:31.000000000 +0400
+@@ -0,0 +1,1021 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+#include <linux/smp_lock.h>
+#include <linux/lustre_iam.h>
+
-+#include <libcfs/libcfs.h>
-+#include <libcfs/kp30.h>
++#include <libcfs/libcfs.h>
++#include <libcfs/kp30.h>
++
++#include "xattr.h"
++#include "iopen.h"
++#include "acl.h"
++
++/*
++ * List of all registered formats.
++ *
++ * No locking. Callers synchronize.
++ */
++static LIST_HEAD(iam_formats);
++
++void iam_format_register(struct iam_format *fmt)
++{
++ list_add(&fmt->if_linkage, &iam_formats);
++}
++EXPORT_SYMBOL(iam_format_register);
++
++static int iam_format_guess(struct iam_container *c)
++{
++ int result;
++ struct iam_format *fmt;
++
++ /*
++ * XXX temporary initialization hook.
++ */
++ {
++ static int initialized = 0;
++
++ if (!initialized) {
++ iam_lfix_format_init();
++ initialized = 1;
++ }
++ }
++
++ result = -ENOENT;
++ list_for_each_entry(fmt, &iam_formats, if_linkage) {
++ result = fmt->if_guess(c);
++ if (result == 0)
++ break;
++ }
++ return result;
++}
++
++/*
++ * Initialize container @c, acquires additional reference on @inode.
++ */
++int iam_container_init(struct iam_container *c,
++ struct iam_descr *descr, struct inode *inode)
++{
++ memset(c, 0, sizeof *c);
++ c->ic_descr = descr;
++ c->ic_object = igrab(inode);
++ if (c->ic_object != NULL)
++ return 0;
++ else
++ return -ENOENT;
++}
++EXPORT_SYMBOL(iam_container_init);
++
++/*
++ * Determine container format.
++ */
++int iam_container_setup(struct iam_container *c)
++{
++ return iam_format_guess(c);
++}
++EXPORT_SYMBOL(iam_container_setup);
++
++/*
++ * Finalize container @c, release all resources.
++ */
++void iam_container_fini(struct iam_container *c)
++{
++ if (c->ic_object != NULL) {
++ iput(c->ic_object);
++ c->ic_object = NULL;
++ }
++}
++EXPORT_SYMBOL(iam_container_fini);
++
++void iam_path_init(struct iam_path *path, struct iam_container *c,
++ struct iam_path_descr *pd)
++{
++ memset(path, 0, sizeof *path);
++ path->ip_container = c;
++ path->ip_frame = path->ip_frames;
++ path->ip_data = pd;
++}
++
++static void iam_leaf_fini(struct iam_leaf *leaf);
++
++void iam_path_fini(struct iam_path *path)
++{
++ int i;
++
++ iam_leaf_fini(&path->ip_leaf);
++ for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
++ if (path->ip_frames[i].bh != NULL) {
++ brelse(path->ip_frames[i].bh);
++ path->ip_frames[i].bh = NULL;
++ }
++ }
++}
++
++extern struct iam_descr htree_compat_param;
++
++void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i)
++ path->ipc_descr.ipd_key_scratch[i] =
++ (struct iam_key *)&path->ipc_scratch[i];
++
++ iam_container_init(&path->ipc_container, &htree_compat_param, inode);
++ /*
++ * XXX hack allowing finalization of iam_path_compat with
++ * iam_path_fini().
++ */
++ iput(inode);
++ iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr);
++}
++
++void iam_path_compat_fini(struct iam_path_compat *path)
++{
++ iam_path_fini(&path->ipc_path);
++ iam_container_fini(&path->ipc_container);
++}
++
++/*
++ * Helper function allocating iam_path_descr and initializing its key scratch
++ * area.
++ */
++struct iam_path_descr *iam_ipd_alloc(int keysize)
++{
++ struct iam_path_descr *ipd;
++ void *karea;
++ int i;
++
++ ipd = kmalloc(ARRAY_SIZE(ipd->ipd_key_scratch) * keysize +
++ sizeof *ipd, GFP_KERNEL);
++ if (ipd != NULL) {
++ karea = ipd + 1;
++ for (i = 0; i < ARRAY_SIZE(ipd->ipd_key_scratch);
++ ++i, karea += keysize)
++ ipd->ipd_key_scratch[i] = karea;
++ }
++ return ipd;
++}
++EXPORT_SYMBOL(iam_ipd_alloc);
++
++void iam_ipd_free(struct iam_path_descr *ipd)
++{
++ kfree(ipd);
++}
++EXPORT_SYMBOL(iam_ipd_free);
++
++/*
++ * Leaf helpers.
++ */
++
++struct iam_path *iam_leaf_path(const struct iam_leaf *leaf)
++{
++ return leaf->il_path;
++}
++
++struct iam_container *iam_leaf_container(const struct iam_leaf *leaf)
++{
++ return iam_leaf_path(leaf)->ip_container;
++}
++
++struct iam_descr *iam_leaf_descr(const struct iam_leaf *leaf)
++{
++ return iam_leaf_container(leaf)->ic_descr;
++}
++
++struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf)
++{
++ return iam_leaf_descr(leaf)->id_leaf_ops;
++}
++
++/*
++ * Return pointer to current leaf record. Pointer is valid while corresponding
++ * leaf node is locked and pinned.
++ */
++static struct iam_rec *iam_leaf_rec(const struct iam_leaf *leaf)
++{
++ return iam_leaf_ops(leaf)->rec(leaf);
++}
++
++/*
++ * Return pointer to the current leaf key. This function may return either
++ * pointer to the key stored in node, or copy key into @key buffer supplied by
++ * caller and return pointer to this buffer. The latter approach is used when
++ * keys in nodes are not stored in plain form (e.g., htree doesn't store keys
++ * at all).
++ *
++ * Caller should assume that returned pointer is only valid while leaf node is
++ * pinned and locked.
++ */
++static struct iam_key *iam_leaf_key(const struct iam_leaf *leaf,
++ struct iam_key *key)
++{
++ return iam_leaf_ops(leaf)->key(leaf, key);
++}
++
++static int iam_leaf_load(struct iam_path *path)
++{
++ int block;
++ int err;
++ struct iam_container *c;
++ struct buffer_head *bh;
++ struct iam_leaf *leaf;
++ struct iam_descr *descr;
++
++ c = path->ip_container;
++ leaf = &path->ip_leaf;
++ descr = iam_path_descr(path);
++ block = dx_get_block(path, path->ip_frame->at);
++ err = descr->id_ops->id_node_read(c, block, NULL, &bh);
++ if (err == 0) {
++ leaf->il_bh = bh;
++ leaf->il_path = path;
++ err = iam_leaf_ops(leaf)->init(leaf);
++ }
++ return err;
++}
++
++static void iam_leaf_fini(struct iam_leaf *leaf)
++{
++ iam_leaf_ops(leaf)->fini(leaf);
++ if (leaf->il_bh) {
++ brelse(leaf->il_bh);
++ leaf->il_bh = NULL;
++ }
++}
++
++static void iam_leaf_start(struct iam_leaf *folio)
++{
++ iam_leaf_ops(folio)->start(folio);
++}
++
++void iam_leaf_next(struct iam_leaf *folio)
++{
++ iam_leaf_ops(folio)->next(folio);
++}
++
++static void iam_rec_add(struct iam_leaf *leaf, struct iam_key *key,
++ struct iam_rec *rec)
++{
++ iam_leaf_ops(leaf)->rec_add(leaf, key, rec);
++}
++
++static void iam_rec_del(struct iam_leaf *leaf)
++{
++ iam_leaf_ops(leaf)->rec_del(leaf);
++}
++
++int iam_leaf_at_end(const struct iam_leaf *leaf)
++{
++ return iam_leaf_ops(leaf)->at_end(leaf);
++}
++
++void iam_leaf_split(struct iam_leaf *l, struct buffer_head *bh)
++{
++ iam_leaf_ops(l)->split(l, bh);
++}
++
++static int iam_leaf_can_add(const struct iam_leaf *l,
++ const struct iam_key *k, const struct iam_rec *r)
++{
++ return iam_leaf_ops(l)->can_add(l, k, r);
++}
++
++/***********************************************************************/
++/* iterator interface */
++/***********************************************************************/
+
-+#include "xattr.h"
-+#include "iopen.h"
-+#include "acl.h"
++static enum iam_it_state it_state(const struct iam_iterator *it)
++{
++ return it->ii_state;
++}
+
++/*
++ * Helper function returning scratch key.
++ */
++static struct iam_key *it_scratch_key(const struct iam_iterator *it, int n)
++{
++ return iam_path_key(&it->ii_path, n);
++}
+
-+static __u32 iam_root_ptr(struct iam_container *c)
++static struct iam_container *iam_it_container(const struct iam_iterator *it)
+{
-+ return 0;
++ return it->ii_path.ip_container;
+}
+
-+static int iam_node_init(struct iam_container *c, struct buffer_head *bh,
-+ int root)
++static inline int it_keycmp(const struct iam_iterator *it,
++ const struct iam_key *k1, const struct iam_key *k2)
+{
-+ return 0;
++ return iam_keycmp(iam_it_container(it), k1, k2);
+}
+
-+static int iam_node_check(struct iam_path *path, struct iam_frame *frame)
++/*
++ * Helper wrapper around iam_it_get(): returns 0 (success) only when record
++ * with exactly the same key as asked is found.
++ */
++static int iam_it_get_exact(struct iam_iterator *it, const struct iam_key *k)
+{
-+ struct iam_entry *entries;
-+ void *data;
-+ entries = dx_node_get_entries(path, frame);
++ int result;
+
-+ data = frame->bh->b_data;
++ result = iam_it_get(it, k);
++ if (result == 0 &&
++ (it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 1))) != 0))
++ /*
++ * Return -ENOENT if cursor is located above record with a key
++ * different from one specified.
++ *
++ * XXX returning -ENOENT only works if iam_it_get never
++ * returns -ENOENT as a legitimate error.
++ */
++ result = -ENOENT;
++ return result;
++}
+
-+ if (frame == path->ip_frames) {
-+ struct iam_root *root;
++void iam_container_write_lock(struct iam_container *ic)
++{
++ down(&ic->ic_object->i_sem);
++}
+
-+ root = data;
-+ path->ip_indirect = root->info.indirect_levels;
-+ }
-+ frame->entries = frame->at = entries;
-+ return 0;
++void iam_container_write_unlock(struct iam_container *ic)
++{
++ up(&ic->ic_object->i_sem);
+}
+
-+static int iam_node_create(struct iam_container *c)
++void iam_container_read_lock(struct iam_container *ic)
+{
-+ return 0;
++ down(&ic->ic_object->i_sem);
+}
+
-+struct iam_operations generic_iam_ops = {
-+ .id_root_ptr = iam_root_ptr,
-+ .id_node_read = iam_node_read,
-+ .id_node_init = iam_node_init,
-+ .id_node_check = iam_node_check,
-+ .id_create = iam_node_create,
-+};
-+EXPORT_SYMBOL(generic_iam_ops);
++void iam_container_read_unlock(struct iam_container *ic)
++{
++ up(&ic->ic_object->i_sem);
++}
+
-+static inline void iam_reccpy(struct iam_path *p, struct iam_rec *rec_dst,
-+ struct iam_rec *rec_src)
++static void iam_it_lock(struct iam_iterator *it)
+{
-+ memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size);
++ if (it->ii_flags&IAM_IT_WRITE)
++ iam_container_write_lock(iam_it_container(it));
++ else
++ iam_container_read_lock(iam_it_container(it));
+}
+
-+/*
-+ * Initialize container @c, acquires additional reference on @inode.
-+ */
-+int iam_container_init(struct iam_container *c,
-+ struct iam_descr *descr, struct inode *inode)
++static void iam_it_unlock(struct iam_iterator *it)
+{
-+ memset(c, 0, sizeof *c);
-+ c->ic_descr = descr;
-+ c->ic_object = igrab(inode);
-+ if (c->ic_object != NULL)
-+ return 0;
++ if (it->ii_flags&IAM_IT_WRITE)
++ iam_container_write_unlock(iam_it_container(it));
+ else
-+ return -ENOENT;
++ iam_container_read_unlock(iam_it_container(it));
+}
-+EXPORT_SYMBOL(iam_container_init);
+
+/*
-+ * Finalize container @c, release all resources.
++ * Initialize iterator to IAM_IT_DETACHED state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
+ */
-+void iam_container_fini(struct iam_container *c)
++int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
++ struct iam_path_descr *pd)
+{
-+ if (c->ic_object != NULL) {
-+ iput(c->ic_object);
-+ c->ic_object = NULL;
-+ }
++ memset(it, 0, sizeof *it);
++ it->ii_flags = flags;
++ it->ii_state = IAM_IT_DETACHED;
++ iam_path_init(&it->ii_path, c, pd);
++ return 0;
+}
-+EXPORT_SYMBOL(iam_container_fini);
+
-+void iam_path_init(struct iam_path *path, struct iam_container *c,
-+ struct iam_path_descr *pd)
++/*
++ * Finalize iterator and release all resources.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_fini(struct iam_iterator *it)
+{
-+ memset(path, 0, sizeof *path);
-+ path->ip_container = c;
-+ path->ip_frame = path->ip_frames;
-+ path->ip_data = pd;
++ assert(it_state(it) == IAM_IT_DETACHED);
++ iam_path_fini(&it->ii_path);
+}
+
-+static void iam_leaf_fini(struct iam_leaf *leaf);
-+
-+void iam_path_fini(struct iam_path *path)
++int iam_path_lookup(struct iam_path *path)
+{
-+ int i;
-+
-+ iam_leaf_fini(&path->ip_leaf);
-+ for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
-+ if (path->ip_frames[i].bh != NULL) {
-+ brelse(path->ip_frames[i].bh);
-+ path->ip_frames[i].bh = NULL;
-+ }
++ struct iam_container *c;
++ struct iam_descr *descr;
++ struct iam_leaf *leaf;
++ int result;
++
++ c = path->ip_container;
++ leaf = &path->ip_leaf;
++ descr = iam_path_descr(path);
++ result = dx_lookup(path);
++ if (result == 0) {
++ result = iam_leaf_load(path);
++ if (result == 0)
++ result = iam_leaf_ops(leaf)->lookup(leaf,
++ path->ip_key_target);
+ }
++ return result;
+}
+
-+extern struct iam_descr htree_compat_param;
-+
-+void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode)
++/*
++ * Attach iterator. After successful completion, @it points to record with
++ * smallest key not larger than @k.
++ *
++ * Return value: 0: positioned on existing record,
++ * -ve: error.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED
++ * postcondition: ergo(result == 0,
++ * (it_state(it) == IAM_IT_ATTACHED &&
++ * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
++ */
++int iam_it_get(struct iam_iterator *it, const struct iam_key *k)
+{
-+ int i;
++ int result;
++ assert(it_state(it) == IAM_IT_DETACHED);
+
-+ for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i)
-+ path->ipc_descr.ipd_key_scratch[i] =
-+ (struct iam_key *)&path->ipc_scratch[i];
++ it->ii_path.ip_key_target = k;
++ iam_it_lock(it);
++ result = iam_path_lookup(&it->ii_path);
++ if (result == 0 || result == -ENOENT)
++ it->ii_state = IAM_IT_ATTACHED;
++ else
++ iam_it_unlock(it);
++ assert(ergo(result == 0,
++ it_keycmp(it,
++ iam_it_key_get(it, it_scratch_key(it, 0)),
++ k) <= 0));
++ return result;
++}
+
-+ iam_container_init(&path->ipc_container, &htree_compat_param, inode);
-+ /*
-+ * XXX hack allowing finalization of iam_path_compat with
-+ * iam_path_fini().
-+ */
-+ iput(inode);
-+ iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr);
++/*
++ * Duplicates iterator.
++ *
++ * postcondition: it_state(dst) == it_state(src) &&
++ * iam_it_container(dst) == iam_it_container(src) &&
++ * dst->ii_flags = src->ii_flags &&
++ * ergo(it_state(src) == IAM_IT_ATTACHED,
++ * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++ * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
++ */
++void iam_it_dup(struct iam_iterator *dst, const struct iam_iterator *src)
++{
++ dst->ii_flags = src->ii_flags;
++ dst->ii_state = src->ii_state;
++ /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */
++ /*
++ * XXX: duplicate lock.
++ */
++ assert(it_state(dst) == it_state(src));
++ assert(iam_it_container(dst) == iam_it_container(src));
++ assert(dst->ii_flags = src->ii_flags);
++ assert(ergo(it_state(src) == IAM_IT_ATTACHED,
++ iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++ iam_it_key_get(dst, it_scratch_key(dst, 0)) ==
++ iam_it_key_get(src, it_scratch_key(src, 0))));
++
++}
++/*
++ * Detach iterator. Does nothing it detached state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_put(struct iam_iterator *it)
++{
++ if (it->ii_state == IAM_IT_ATTACHED) {
++ it->ii_state = IAM_IT_DETACHED;
++ iam_leaf_fini(&it->ii_path.ip_leaf);
++ iam_it_unlock(it);
++ }
+}
+
-+void iam_path_compat_fini(struct iam_path_compat *path)
++/*
++ * Move iterator one record right.
++ *
++ * Return value: 0: success,
++ * +1: end of container reached
++ * -ve: error
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
++ */
++int iam_it_next(struct iam_iterator *it)
+{
-+ iam_path_fini(&path->ipc_path);
-+ iam_container_fini(&path->ipc_container);
++ int result;
++ struct iam_container *c;
++ struct iam_path *path;
++ struct iam_leaf *leaf;
++
++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE);
++
++ c = iam_it_container(it);
++ path = &it->ii_path;
++ leaf = &path->ip_leaf;
++
++ if (iam_leaf_at_end(leaf)) {
++ /* advance index portion of the path */
++ result = iam_index_next(c, path);
++ if (result == 1) {
++ result = iam_leaf_load(path);
++ if (result == 0)
++ iam_leaf_start(leaf);
++ } else if (result == 0)
++ /* end of container reached */
++ result = +1;
++ if (result < 0)
++ iam_it_put(it);
++ } else {
++ /* advance within leaf node */
++ iam_leaf_next(leaf);
++ result = 0;
++ }
++ assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
++ return result;
+}
+
+/*
-+ * Leaf helpers.
++ * Return pointer to the record under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
+ */
-+
-+struct iam_path *iam_leaf_path(const struct iam_leaf *leaf)
++struct iam_rec *iam_it_rec_get(const struct iam_iterator *it)
+{
-+ return leaf->il_path;
++ assert(it_state(it) == IAM_IT_ATTACHED);
++ return iam_leaf_rec(&it->ii_path.ip_leaf);
+}
+
-+struct iam_container *iam_leaf_container(const struct iam_leaf *leaf)
++static void iam_it_reccpy(struct iam_iterator *it, const struct iam_rec *r)
+{
-+ return iam_leaf_path(leaf)->ip_container;
-+}
++ struct iam_leaf *folio;
+
-+struct iam_descr *iam_leaf_descr(const struct iam_leaf *leaf)
-+{
-+ return iam_leaf_container(leaf)->ic_descr;
++ folio = &it->ii_path.ip_leaf;
++ iam_leaf_ops(folio)->rec_set(folio, r);
+}
+
-+struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf)
++static void iam_it_keycpy(struct iam_iterator *it, const struct iam_key *k)
+{
-+ return iam_leaf_descr(leaf)->id_leaf_ops;
-+}
++ struct iam_leaf *folio;
+
-+/*
-+ * Return pointer to current leaf record. Pointer is valid while corresponding
-+ * leaf node is locked and pinned.
-+ */
-+struct iam_rec *iam_leaf_rec(struct iam_leaf *leaf)
-+{
-+ return iam_leaf_ops(leaf)->rec(leaf);
++ folio = &it->ii_path.ip_leaf;
++ iam_leaf_ops(folio)->key_set(folio, k);
+}
+
++
+/*
-+ * Return pointer to the current leaf key. This function may return either
-+ * pointer to the key stored in node, or copy key into @key buffer supplied by
-+ * caller and return pointer to this buffer. The latter approach is used when
-+ * keys in nodes are not stored in plain form (e.g., htree doesn't store keys
-+ * at all).
++ * Replace contents of record under iterator.
+ *
-+ * Caller should assume that returned pointer is only valid while leaf node is
-+ * pinned and locked.
++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
+ */
-+struct iam_key *iam_leaf_key(struct iam_leaf *leaf, struct iam_key *key)
++int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r)
+{
-+ return iam_leaf_ops(leaf)->key(leaf, key);
-+}
++ int result;
+
-+static int iam_leaf_load(struct iam_path *path)
-+{
-+ int block;
-+ int err;
-+ struct iam_container *c;
-+ struct buffer_head *bh;
-+ struct iam_leaf *leaf;
-+ struct iam_descr *descr;
-+
-+ c = path->ip_container;
-+ leaf = &path->ip_leaf;
-+ descr = iam_path_descr(path);
-+ block = dx_get_block(path, path->ip_frame->at);
-+ err = descr->id_ops->id_node_read(c, block, NULL, &bh);
-+ if (err == 0) {
-+ leaf->il_bh = bh;
-+ leaf->il_path = path;
-+ err = iam_leaf_ops(leaf)->init(leaf);
-+ }
-+ return err;
-+}
++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
+
-+static void iam_leaf_fini(struct iam_leaf *leaf)
-+{
-+ iam_leaf_ops(leaf)->fini(leaf);
-+ if (leaf->il_bh) {
-+ brelse(leaf->il_bh);
-+ leaf->il_bh = NULL;
-+ }
++ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
++ if (result == 0)
++ iam_it_reccpy(it, r);
++ return result;
+}
+
-+static void iam_leaf_start(struct iam_leaf *folio)
++/*
++ * Return pointer to the key under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++struct iam_key *iam_it_key_get(const struct iam_iterator *it, struct iam_key *k)
+{
-+ iam_leaf_ops(folio)->start(folio);
++ assert(it_state(it) == IAM_IT_ATTACHED);
++ return iam_leaf_key(&it->ii_path.ip_leaf, k);
+}
+
-+void iam_leaf_next(struct iam_leaf *folio)
++static int iam_leaf_rec_add(handle_t *handle, struct iam_path *path)
+{
-+ iam_leaf_ops(folio)->next(folio);
-+}
++ int err;
+
-+static void iam_rec_add(struct iam_leaf *leaf, struct iam_key *key,
-+ struct iam_rec *rec)
-+{
-+ iam_leaf_ops(leaf)->rec_add(leaf, key, rec);
++ err = ext3_journal_get_write_access(handle, path->ip_leaf.il_bh);
++ if (err)
++ goto journal_error;
++ iam_rec_add(&path->ip_leaf, NULL, NULL);
++ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
++journal_error:
++ if (err)
++ ext3_std_error(iam_path_obj(path)->i_sb, err);
++ return err;
+}
+
-+static void iam_rec_del(struct iam_leaf *leaf)
++static int iam_new_leaf(handle_t *handle, struct iam_leaf *leaf)
+{
-+ iam_leaf_ops(leaf)->rec_del(leaf);
-+}
++ int err;
++ int err2;
++ u32 blknr; /* XXX 32bit block size */
++ struct buffer_head *new_leaf;
++ struct iam_container *c;
+
-+int iam_leaf_at_end(const struct iam_leaf *leaf)
-+{
-+ return iam_leaf_ops(leaf)->at_end(leaf);
-+}
++ c = iam_leaf_container(leaf);
++ err = ext3_journal_get_write_access(handle, leaf->il_bh);
++ if (err == 0) {
++ struct inode *obj;
+
-+void iam_leaf_split(struct iam_leaf *l, struct buffer_head *bh)
-+{
-+ iam_leaf_ops(l)->split(l, bh);
++ obj = c->ic_object;
++ new_leaf = ext3_append(handle, c->ic_object, &blknr, &err);
++ if (new_leaf != NULL) {
++ iam_leaf_ops(leaf)->init_new(c, new_leaf);
++ iam_leaf_ops(leaf)->split(leaf, new_leaf);
++ err = ext3_journal_dirty_metadata(handle, new_leaf);
++ err2 = ext3_journal_dirty_metadata(handle, leaf->il_bh);
++ err = err ? : err2;
++ if (err)
++ ext3_std_error(obj->i_sb, err);
++ brelse(new_leaf);
++ }
++ }
++ return err;
+}
+
-+static int iam_leaf_can_add(struct iam_leaf *l,
-+ struct iam_key *k, struct iam_rec *r)
++static int iam_add_rec(handle_t *handle, struct iam_path *path,
++ const struct iam_key *k, const struct iam_rec *r)
+{
-+ return iam_leaf_ops(l)->can_add(l, k, r);
-+}
-+
-+/***********************************************************************/
-+/* iterator interface */
-+/***********************************************************************/
++ int err;
+
-+static enum iam_it_state it_state(const struct iam_iterator *it)
-+{
-+ return it->ii_state;
++ if (iam_leaf_can_add(&path->ip_leaf, k, r)) {
++ err = iam_leaf_rec_add(handle, path);
++ } else {
++ err = split_index_node(handle, path);
++ if (err == 0) {
++ err = iam_new_leaf(handle, &path->ip_leaf);
++ if (err == 0)
++ err = iam_leaf_rec_add(handle, path);
++ }
++ }
++ return err;
+}
+
+/*
-+ * Helper function returning scratch key.
++ * Insert new record with key @k and contents from @r, shifting records to the
++ * right.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED &&
++ * it->ii_flags&IAM_IT_WRITE &&
++ * it_keycmp(it, iam_it_key_get(it, *), k) < 0
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ * ergo(result == 0,
++ * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
++ * !memcmp(iam_it_rec_get(it), r, ...))
+ */
-+static struct iam_key *it_scratch_key(struct iam_iterator *it, int n)
++int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
++ const struct iam_key *k, const struct iam_rec *r)
+{
-+ return iam_path_key(&it->ii_path, n);
-+}
++ int result;
+
-+static struct iam_container *iam_it_container(const struct iam_iterator *it)
-+{
-+ return it->ii_path.ip_container;
++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++#if 0
++ /*XXX remove this assert temporarily, since if the il_at point to the hearder,
++ * this assert might has some problems*/
++ assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0);
++#endif
++ result = iam_add_rec(h, &it->ii_path, k, r);
++ if (result == 0) {
++ /* place record and key info freed space. Leaf node is already
++ * in transaction. */
++ iam_it_reccpy(it, r);
++ iam_it_keycpy(it, k);
++ iam_keycpy(it->ii_path.ip_container, it_scratch_key(it, 0), k);
++ /*
++ * XXX TBD.
++ */
++ }
++ assert(it_state(it) == IAM_IT_ATTACHED);
++ assert(ergo(result == 0,
++ it_keycmp(it,
++ iam_it_key_get(it,
++ it_scratch_key(it, 0)), k) == 0 &&
++ !memcmp(iam_it_rec_get(it), r,
++ iam_it_container(it)->ic_descr->id_rec_size)));
++ return result;
+}
+
-+static inline int it_keycmp(const struct iam_iterator *it,
-+ const struct iam_key *k1, const struct iam_key *k2)
++static int iam_leaf_rec_remove(handle_t *handle, struct iam_leaf *leaf)
+{
-+ return iam_keycmp(iam_it_container(it), k1, k2);
++ int err;
++
++ iam_rec_del(leaf);
++ err = ext3_journal_dirty_metadata(handle, leaf->il_bh);
++ if (err)
++ ext3_std_error(iam_path_obj(iam_leaf_path(leaf))->i_sb, err);
++ return err;
+}
+
+/*
-+ * Helper wrapper around iam_it_get(): returns 0 (success) only when record
-+ * with exactly the same key as asked is found.
++ * Delete record under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
+ */
-+static int iam_it_get_exact(struct iam_iterator *it, struct iam_key *k)
++int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
+{
+ int result;
+
-+ result = iam_it_get(it, k);
-+ if (result == 0 &&
-+ (it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 1))) != 0))
-+ /*
-+ * Return -ENOENT if cursor is located above record with a key
-+ * different from one specified.
-+ *
-+ * XXX returning -ENOENT only works if iam_it_get never
-+ * returns -ENOENT as a legitimate error.
-+ */
-+ result = -ENOENT;
-+ return result;
++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++
++ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
++ /*
++ * no compaction for now.
++ */
++ if (result == 0)
++ iam_leaf_rec_remove(h, &it->ii_path.ip_leaf);
++
++ return result;
+}
+
-+void iam_container_write_lock(struct iam_container *ic)
++/*
++ * Convert iterator to cookie.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED &&
++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++iam_pos_t iam_it_store(const struct iam_iterator *it)
+{
-+ down(&ic->ic_object->i_sem);
++ iam_pos_t result;
++
++ assert(it_state(it) == IAM_IT_ATTACHED);
++ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result);
++
++ result = 0;
++ iam_it_key_get(it, (struct iam_key *)&result);
++ return result;
+}
+
-+void iam_container_write_unlock(struct iam_container *ic)
++/*
++ * Restore iterator from cookie.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
++ * iam_it_store(it) == pos)
++ */
++int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
+{
-+ up(&ic->ic_object->i_sem);
++ assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE);
++ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos);
++ return iam_it_get(it, (struct iam_key *)&pos);
+}
+
-+void iam_container_read_lock(struct iam_container *ic)
++/***********************************************************************/
++/* invariants */
++/***********************************************************************/
++
++static inline int ptr_inside(void *base, size_t size, void *ptr)
+{
-+ down(&ic->ic_object->i_sem);
++ return (base <= ptr) && (ptr < base + size);
+}
+
-+void iam_container_read_unlock(struct iam_container *ic)
++int iam_frame_invariant(struct iam_frame *f)
+{
-+ up(&ic->ic_object->i_sem);
++ return
++ (f->bh != NULL &&
++ f->bh->b_data != NULL &&
++ ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
++ ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
++ f->entries <= f->at);
+}
-+
-+static void iam_it_lock(struct iam_iterator *it)
++int iam_leaf_invariant(struct iam_leaf *l)
+{
-+ if (it->ii_flags&IAM_IT_WRITE)
-+ iam_container_write_lock(iam_it_container(it));
-+ else
-+ iam_container_read_lock(iam_it_container(it));
++ return
++ l->il_bh != NULL &&
++ l->il_bh->b_data != NULL &&
++ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) &&
++ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) &&
++ l->il_entries <= l->il_at;
+}
+
-+static void iam_it_unlock(struct iam_iterator *it)
++int iam_path_invariant(struct iam_path *p)
+{
-+ if (it->ii_flags&IAM_IT_WRITE)
-+ iam_container_write_unlock(iam_it_container(it));
-+ else
-+ iam_container_read_unlock(iam_it_container(it));
++ int i;
++
++ if (p->ip_container == NULL ||
++ p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
++ p->ip_frame != p->ip_frames + p->ip_indirect ||
++ !iam_leaf_invariant(&p->ip_leaf))
++ return 0;
++ for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
++ if (i <= p->ip_indirect) {
++ if (!iam_frame_invariant(&p->ip_frames[i]))
++ return 0;
++ }
++ }
++ return 1;
+}
+
-+/*
-+ * Initialize iterator to IAM_IT_DETACHED state.
-+ *
-+ * postcondition: it_state(it) == IAM_IT_DETACHED
-+ */
-+int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
-+ struct iam_path_descr *pd)
++int iam_it_invariant(struct iam_iterator *it)
+{
-+ memset(it, 0, sizeof *it);
-+ it->ii_flags = flags;
-+ it->ii_state = IAM_IT_DETACHED;
-+ iam_path_init(&it->ii_path, c, pd);
-+ return 0;
++ return
++ (it->ii_state == IAM_IT_DETACHED ||
++ it->ii_state == IAM_IT_ATTACHED) &&
++ !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
++ ergo(it->ii_state == IAM_IT_ATTACHED,
++ iam_path_invariant(&it->ii_path));
+}
+
+/*
-+ * Finalize iterator and release all resources.
++ * Search container @c for record with key @k. If record is found, its data
++ * are moved into @r.
+ *
-+ * precondition: it_state(it) == IAM_IT_DETACHED
++ *
++ *
++ * Return values: +ve: found, 0: not-found, -ve: error
+ */
-+void iam_it_fini(struct iam_iterator *it)
++int iam_lookup(struct iam_container *c, const struct iam_key *k,
++ struct iam_rec *r, struct iam_path_descr *pd)
+{
-+ assert(it_state(it) == IAM_IT_DETACHED);
-+ iam_path_fini(&it->ii_path);
-+}
++ struct iam_iterator it;
++ int result;
+
-+int iam_path_lookup(struct iam_path *path)
-+{
-+ struct iam_container *c;
-+ struct iam_descr *descr;
-+ struct iam_leaf *leaf;
-+ int result;
-+
-+ c = path->ip_container;
-+ leaf = &path->ip_leaf;
-+ descr = iam_path_descr(path);
-+ result = dx_lookup(path);
-+ if (result == 0) {
-+ result = iam_leaf_load(path);
-+ if (result == 0)
-+ result = iam_leaf_ops(leaf)->lookup(leaf,
-+ path->ip_key_target);
-+ }
-+ return result;
++ iam_it_init(&it, c, 0, pd);
++
++ result = iam_it_get_exact(&it, k);
++ if (result == 0)
++ /*
++ * record with required key found, copy it into user buffer
++ */
++ iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it));
++ iam_it_put(&it);
++ iam_it_fini(&it);
++ return result;
+}
++EXPORT_SYMBOL(iam_lookup);
+
+/*
-+ * Attach iterator. After successful completion, @it points to record with
-+ * smallest key not larger than @k.
++ * Insert new record @r with key @k into container @c (within context of
++ * transaction @h.
+ *
-+ * Return value: 0: positioned on existing record,
-+ * -ve: error.
++ * Return values: 0: success, -ve: error, including -EEXIST when record with
++ * given key is already present.
+ *
-+ * precondition: it_state(it) == IAM_IT_DETACHED
-+ * postcondition: ergo(result == 0,
-+ * (it_state(it) == IAM_IT_ATTACHED &&
-+ * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
++ * postcondition: ergo(result == 0 || result == -EEXIST,
++ * iam_lookup(c, k, r2) > 0 &&
++ * !memcmp(r, r2, c->ic_descr->id_rec_size));
+ */
-+int iam_it_get(struct iam_iterator *it, struct iam_key *k)
++int iam_insert(handle_t *h, struct iam_container *c, const struct iam_key *k,
++ struct iam_rec *r, struct iam_path_descr *pd)
+{
++ struct iam_iterator it;
+ int result;
-+ assert(it_state(it) == IAM_IT_DETACHED);
+
-+ it->ii_path.ip_key_target = k;
-+ iam_it_lock(it);
-+ result = iam_path_lookup(&it->ii_path);
-+ if (result == 0 || result == -ENOENT)
-+ it->ii_state = IAM_IT_ATTACHED;
-+ else
-+ iam_it_unlock(it);
-+ assert(ergo(result == 0,
-+ it_keycmp(it,
-+ iam_it_key_get(it, it_scratch_key(it, 0)),
-+ k) <= 0));
++ iam_it_init(&it, c, IAM_IT_WRITE, pd);
++
++ result = iam_it_get_exact(&it, k);
++ if (result == -ENOENT)
++ result = iam_it_rec_insert(h, &it, k, r);
++ else if (result == 0)
++ result = -EEXIST;
++ iam_it_put(&it);
++ iam_it_fini(&it);
+ return result;
+}
++EXPORT_SYMBOL(iam_insert);
+
-+/*
-+ * Duplicates iterator.
-+ *
-+ * postcondition: it_state(dst) == it_state(src) &&
-+ * iam_it_container(dst) == iam_it_container(src) &&
-+ * dst->ii_flags = src->ii_flags &&
-+ * ergo(it_state(src) == IAM_IT_ATTACHED,
-+ * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
-+ * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
-+ */
-+void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src)
++int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
++ struct iam_rec *r, struct iam_path_descr *pd)
+{
-+ dst->ii_flags = src->ii_flags;
-+ dst->ii_state = src->ii_state;
-+ /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */
-+ /*
-+ * XXX: duplicate lock.
-+ */
-+ assert(it_state(dst) == it_state(src));
-+ assert(iam_it_container(dst) == iam_it_container(src));
-+ assert(dst->ii_flags = src->ii_flags);
-+ assert(ergo(it_state(src) == IAM_IT_ATTACHED,
-+ iam_it_rec_get(dst) == iam_it_rec_get(src) &&
-+ iam_it_key_get(dst, it_scratch_key(dst, 0)) ==
-+ iam_it_key_get(src, it_scratch_key(src, 0))));
++ struct iam_iterator it;
++ int result;
++
++ iam_it_init(&it, c, IAM_IT_WRITE, pd);
+
++ result = iam_it_get_exact(&it, k);
++ if (result == 0)
++ iam_it_rec_set(h, &it, r);
++ iam_it_put(&it);
++ iam_it_fini(&it);
++ return result;
+}
++EXPORT_SYMBOL(iam_update);
++
+/*
-+ * Detach iterator. Does nothing it detached state.
++ * Delete existing record with key @k.
+ *
-+ * postcondition: it_state(it) == IAM_IT_DETACHED
++ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
++ *
++ * postcondition: ergo(result == 0 || result == -ENOENT,
++ * !iam_lookup(c, k, *));
+ */
-+void iam_it_put(struct iam_iterator *it)
++int iam_delete(handle_t *h, struct iam_container *c, const struct iam_key *k,
++ struct iam_path_descr *pd)
+{
-+ if (it->ii_state == IAM_IT_ATTACHED) {
-+ it->ii_state = IAM_IT_DETACHED;
-+ iam_leaf_fini(&it->ii_path.ip_leaf);
-+ iam_it_unlock(it);
-+ }
++ struct iam_iterator it;
++ int result;
++
++ iam_it_init(&it, c, IAM_IT_WRITE, pd);
++
++ result = iam_it_get_exact(&it, k);
++ if (result == 0)
++ iam_it_rec_delete(h, &it);
++ iam_it_put(&it);
++ iam_it_fini(&it);
++ return result;
+}
++EXPORT_SYMBOL(iam_delete);
+
-+/*
-+ * Move iterator one record right.
+Index: iam/fs/ext3/iam_lfix.c
+===================================================================
+--- iam.orig/fs/ext3/iam_lfix.c 2004-04-06 17:27:52.000000000 +0400
++++ iam/fs/ext3/iam_lfix.c 2006-05-29 23:50:12.000000000 +0400
+@@ -0,0 +1,445 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * iam_lfix.c
++ * implementation of iam format for fixed size records.
++ *
++ * Copyright (c) 2006 Cluster File Systems, Inc.
++ * Author: Wang Di <wangdi@clusterfs.com>
++ * Author: Nikita Danilov <nikita@clusterfs.com>
++ *
++ * This file is part of the Lustre file system, http://www.lustre.org
++ * Lustre is a trademark of Cluster File Systems, Inc.
+ *
-+ * Return value: 0: success,
-+ * +1: end of container reached
-+ * -ve: error
++ * You may have signed or agreed to another license before downloading
++ * this software. If so, you are bound by the terms and conditions
++ * of that agreement, and the following does not apply to you. See the
++ * LICENSE file included with this distribution for more information.
+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
-+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
++ * If you did not agree to a different license, then this copy of Lustre
++ * is open source software; you can redistribute it and/or modify it
++ * under the terms of version 2 of the GNU General Public License as
++ * published by the Free Software Foundation.
++ *
++ * In either case, Lustre is distributed in the hope that it will be
++ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
++ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * license text for more details.
+ */
-+int iam_it_next(struct iam_iterator *it)
-+{
-+ int result;
-+ struct iam_container *c;
-+ struct iam_path *path;
-+ struct iam_leaf *leaf;
+
-+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE);
++#include <linux/types.h>
++#include <linux/jbd.h>
++/* ext3_error() */
++#include <linux/ext3_fs.h>
+
-+ c = iam_it_container(it);
-+ path = &it->ii_path;
-+ leaf = &path->ip_leaf;
++#include <linux/lustre_iam.h>
+
-+ if (iam_leaf_at_end(leaf)) {
-+ /* advance index portion of the path */
-+ result = iam_index_next(c, path);
-+ if (result == 1) {
-+ result = iam_leaf_load(path);
-+ if (result == 0)
-+ iam_leaf_start(leaf);
-+ } else if (result == 0)
-+ /* end of container reached */
-+ result = +1;
-+ if (result < 0)
-+ iam_it_put(it);
-+ } else {
-+ /* advance within leaf node */
-+ iam_leaf_next(leaf);
-+ result = 0;
-+ }
-+ assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
-+ return result;
-+}
++#include <libcfs/libcfs.h>
++#include <libcfs/kp30.h>
+
+/*
-+ * Return pointer to the record under iterator.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ * Leaf operations.
+ */
-+struct iam_rec *iam_it_rec_get(struct iam_iterator *it)
++
++enum {
++ IAM_LEAF_HEADER_MAGIC = 0x1976 /* This is duplicated in
++ * lustre/utils/create_iam.c */
++};
++
++/* This is duplicated in lustre/utils/create_iam.c */
++struct iam_leaf_head {
++ __le16 ill_magic;
++ __le16 ill_count;
++};
++
++static inline int iam_lfix_entry_size(const struct iam_leaf *l)
+{
-+ assert(it_state(it) == IAM_IT_ATTACHED);
-+ return iam_leaf_rec(&it->ii_path.ip_leaf);
++ return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
+}
+
-+static void iam_it_reccpy(struct iam_iterator *it, struct iam_rec *r)
++static inline struct iam_lentry *
++iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
+{
-+ memcpy(iam_leaf_rec(&it->ii_path.ip_leaf), r,
-+ iam_it_container(it)->ic_descr->id_rec_size);
++ return (void *)entry + shift * iam_lfix_entry_size(l);
+}
+
-+static void iam_it_keycpy(struct iam_iterator *it, struct iam_key *k)
++static inline struct iam_key *iam_leaf_key_at(struct iam_lentry *entry)
+{
-+ memcpy(iam_leaf_key(&it->ii_path.ip_leaf, NULL), k,
-+ iam_it_container(it)->ic_descr->id_key_size);
++ return (struct iam_key *)entry;
+}
+
++static struct iam_lentry *iam_entries(const struct buffer_head *bh)
++{
++ return (void *)bh->b_data + sizeof(struct iam_leaf_head);
++}
+
-+/*
-+ * Replace contents of record under iterator.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
-+ */
-+int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r)
++static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
+{
-+ int result;
++ return iam_entries(l->il_bh);
++}
+
-+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++static int lentry_count_get(const struct iam_leaf *leaf)
++{
++ struct iam_lentry *lentry = leaf->il_entries;
++ return le16_to_cpu(((struct iam_leaf_head *)lentry)->ill_count);
++}
+
-+ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
-+ if (result == 0)
-+ iam_it_reccpy(it, r);
-+ return result;
++static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
++{
++ struct iam_lentry *lentry = leaf->il_entries;
++ ((struct iam_leaf_head *)lentry)->ill_count = cpu_to_le16(count);
+}
+
-+/*
-+ * Return pointer to the key under iterator.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
++/*This func is for flat key, for those keys,
++ *which are not stored explicitly
++ *it would be decrypt in the key buffer
+ */
-+struct iam_key *iam_it_key_get(struct iam_iterator *it, struct iam_key *k)
++struct iam_key *iam_lfix_key(const struct iam_leaf *l, struct iam_key *key)
+{
-+ assert(it_state(it) == IAM_IT_ATTACHED);
-+ return iam_leaf_key(&it->ii_path.ip_leaf, k);
++ void *ie = l->il_at;
++ return (struct iam_key*)ie;
+}
+
-+static int iam_leaf_rec_add(handle_t *handle, struct iam_path *path)
++static void iam_lfix_start(struct iam_leaf *l)
+{
-+ int err;
++ l->il_at = iam_get_lentries(l);
++}
+
-+ err = ext3_journal_get_write_access(handle, path->ip_leaf.il_bh);
-+ if (err)
-+ goto journal_error;
-+ iam_rec_add(&path->ip_leaf, NULL, NULL);
-+ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
-+journal_error:
-+ if (err)
-+ ext3_std_error(iam_path_obj(path)->i_sb, err);
-+ return err;
++static inline ptrdiff_t iam_lfix_diff(const struct iam_leaf *l,
++ const struct iam_lentry *e1,
++ const struct iam_lentry *e2)
++{
++ ptrdiff_t diff;
++ int esize;
++
++ esize = iam_lfix_entry_size(l);
++ diff = (void *)e1 - (void *)e2;
++ assert(diff / esize * esize == diff);
++ return diff / esize;
+}
+
-+static int iam_new_leaf(handle_t *handle, struct iam_leaf *leaf)
++static int iam_lfix_init(struct iam_leaf *l)
+{
-+ int err;
-+ int err2;
-+ u32 blknr; /* XXX 32bit block size */
-+ struct buffer_head *new_leaf;
-+ struct iam_container *c;
++ int result;
++ struct iam_leaf_head *ill;
+
-+ c = iam_leaf_container(leaf);
-+ err = ext3_journal_get_write_access(handle, leaf->il_bh);
-+ if (err == 0) {
++ assert(l->il_bh != NULL);
++
++ ill = (struct iam_leaf_head*)l->il_bh->b_data;
++ if (ill->ill_magic == le16_to_cpu(IAM_LEAF_HEADER_MAGIC)) {
++ l->il_at = l->il_entries = iam_get_lentries(l);
++ result = 0;
++ } else {
+ struct inode *obj;
+
-+ obj = c->ic_object;
-+ new_leaf = ext3_append(handle, c->ic_object, &blknr, &err);
-+ if (new_leaf != NULL) {
-+ iam_leaf_ops(leaf)->init_new(c, new_leaf);
-+ iam_leaf_ops(leaf)->split(leaf, new_leaf);
-+ err = ext3_journal_dirty_metadata(handle, new_leaf);
-+ err2 = ext3_journal_dirty_metadata(handle, leaf->il_bh);
-+ err = err ? : err2;
-+ if (err)
-+ ext3_std_error(obj->i_sb, err);
-+ brelse(new_leaf);
-+ }
++ obj = iam_leaf_container(l)->ic_object;
++ ext3_error(obj->i_sb, __FUNCTION__,
++ "Wrong magic in node %llu (#%lu): %#x != %#x\n",
++ l->il_bh->b_blocknr, obj->i_ino,
++ ill->ill_magic, le16_to_cpu(IAM_LEAF_HEADER_MAGIC));
++ result = -EIO;
+ }
-+ return err;
++ return result;
+}
+
-+int iam_add_rec(handle_t *handle, struct iam_path *path,
-+ struct iam_key *k, struct iam_rec *r)
++static void iam_lfix_fini(struct iam_leaf *l)
+{
-+ int err;
++ l->il_entries = l->il_at = NULL;
++ return;
++}
+
-+ if (iam_leaf_can_add(&path->ip_leaf, k, r)) {
-+ err = iam_leaf_rec_add(handle, path);
-+ } else {
-+ err = split_index_node(handle, path);
-+ if (err == 0) {
-+ err = iam_new_leaf(handle, &path->ip_leaf);
-+ if (err == 0)
-+ err = iam_leaf_rec_add(handle, path);
-+ }
-+ }
-+ return err;
++static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
++{
++ int count = lentry_count_get(l);
++ struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
++
++ return ile;
+}
+
-+/*
-+ * Insert new record with key @k and contents from @r, shifting records to the
-+ * right.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * it->ii_flags&IAM_IT_WRITE &&
-+ * it_keycmp(it, iam_it_key_get(it, *), k) < 0
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * ergo(result == 0,
-+ * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
-+ * !memcmp(iam_it_rec_get(it), r, ...))
-+ */
-+int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
-+ struct iam_key *k, struct iam_rec *r)
++struct iam_rec *iam_lfix_rec(const struct iam_leaf *l)
+{
-+ int result;
++ void *e = l->il_at;
++ return e + iam_leaf_descr(l)->id_key_size;
++}
+
-+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
-+#if 0
-+ /*XXX remove this assert temporarily, since if the il_at point to the hearder,
-+ * this assert might has some problems*/
-+ assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0);
-+#endif
-+ result = iam_add_rec(h, &it->ii_path, k, r);
-+ if (result == 0) {
-+ /* place record and key info freed space. Leaf node is already
-+ * in transaction. */
-+ iam_it_reccpy(it, r);
-+ iam_it_keycpy(it, k);
-+ iam_keycpy(it->ii_path.ip_container, it_scratch_key(it, 0), k);
-+ /*
-+ * XXX TBD.
-+ */
++static void iam_lfix_next(struct iam_leaf *l)
++{
++ assert(!iam_leaf_at_end(l));
++ l->il_at = iam_lfix_shift(l, l->il_at, 1);
++}
++
++static int iam_lfix_lookup(struct iam_leaf *l, const struct iam_key *k)
++{
++ struct iam_lentry *p, *q, *m;
++ struct iam_container *c;
++ int count;
++
++ count = lentry_count_get(l);
++ c = iam_leaf_container(l);
++
++ p = iam_lfix_shift(l, l->il_entries, 1);
++ q = iam_lfix_shift(l, l->il_entries, count - 1);
++
++ while (p <= q) {
++ m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
++ if (iam_keycmp(c, iam_leaf_key_at(m), k) > 0)
++ q = iam_lfix_shift(l, m, -1);
++ else
++ p = iam_lfix_shift(l, m, +1);
+ }
-+ assert(it_state(it) == IAM_IT_ATTACHED);
-+ assert(ergo(result == 0,
-+ it_keycmp(it,
-+ iam_it_key_get(it,
-+ it_scratch_key(it, 0)), k) == 0 &&
-+ !memcmp(iam_it_rec_get(it), r,
-+ iam_it_container(it)->ic_descr->id_rec_size)));
-+ return result;
++ l->il_at = iam_lfix_shift(l, p, -1);
++ iam_keycpy(c, iam_path_key(iam_leaf_path(l), 0), iam_leaf_key_at(q));
++
++ if (l->il_at == l->il_entries ||
++ iam_keycmp(c, iam_leaf_key_at(q), k) != 0)
++ return -ENOENT;
++ else
++ return 0;
+}
+
-+static int iam_leaf_rec_remove(handle_t *handle, struct iam_leaf *leaf)
++static void iam_lfix_rec_add(struct iam_leaf *leaf,
++ const struct iam_key *k, const struct iam_rec *r)
+{
-+ int err;
++ struct iam_lentry *end, *next, *cur, *nnext;
++ ptrdiff_t diff;
++ int count;
+
-+ iam_rec_del(leaf);
-+ err = ext3_journal_dirty_metadata(handle, leaf->il_bh);
-+ if (err)
-+ ext3_std_error(iam_path_obj(iam_leaf_path(leaf))->i_sb, err);
-+ return err;
++ count = lentry_count_get(leaf);
++ end = iam_lfix_get_end(leaf);
++ cur = leaf->il_at;
++ if (cur != end) {
++ next = iam_lfix_shift(leaf, cur, 1);
++ if (next != end) {
++ nnext = iam_lfix_shift(leaf, next, 1);
++ diff = (void *)end - (void *)next;
++ memmove(nnext, next, diff);
++ }
++ iam_lfix_next(leaf);
++ }
++ lentry_count_set(leaf, count + 1);
+}
+
-+/*
-+ * Delete record under iterator.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
++static void iam_lfix_rec_del(struct iam_leaf *leaf)
+{
-+ int result;
-+
-+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++ struct iam_lentry *next, *end;
++ int count;
++ ptrdiff_t diff;
+
-+ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
-+ /*
-+ * no compaction for now.
-+ */
-+ if (result == 0)
-+ iam_leaf_rec_remove(h, &it->ii_path.ip_leaf);
++ count = lentry_count_get(leaf);
++ end = iam_lfix_get_end(leaf);
++ next = iam_lfix_shift(leaf, leaf->il_at, 1);
++ diff = (void *)end - (void *)next;
++ memmove(leaf->il_at, next, diff);
+
-+ return result;
++ lentry_count_set(leaf, count - 1);
+}
+
-+/*
-+ * Convert iterator to cookie.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+iam_pos_t iam_it_store(struct iam_iterator *it)
++static int iam_lfix_can_add(const struct iam_leaf *l,
++ const struct iam_key *k, const struct iam_rec *r)
+{
-+ iam_pos_t result;
++ struct iam_lentry *end;
++ int block_size = iam_leaf_container(l)->ic_object->i_sb->s_blocksize;
++ unsigned long left, entry_size;
+
-+ assert(it_state(it) == IAM_IT_ATTACHED);
-+ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result);
++ end = iam_lfix_get_end(l);
+
-+ result = 0;
-+ iam_it_key_get(it, (struct iam_key *)&result);
-+ return result;
-+}
++ left = block_size - iam_leaf_descr(l)->id_node_gap;
+
-+/*
-+ * Restore iterator from cookie.
-+ *
-+ * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
-+ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
-+ * iam_it_store(it) == pos)
-+ */
-+int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
-+{
-+ assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE);
-+ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos);
-+ return iam_it_get(it, (struct iam_key *)&pos);
-+}
++ left -= (unsigned long)((void*)end - (void*)l->il_entries);
+
-+/***********************************************************************/
-+/* invariants */
-+/***********************************************************************/
++ entry_size = iam_lfix_entry_size(l);
+
-+static inline int ptr_inside(void *base, size_t size, void *ptr)
-+{
-+ return (base <= ptr) && (ptr < base + size);
++ if (left >= entry_size)
++ return 1;
++
++ return 0;
+}
+
-+int iam_frame_invariant(struct iam_frame *f)
++static int iam_lfix_at_end(const struct iam_leaf *folio)
+{
-+ return
-+ (f->bh != NULL &&
-+ f->bh->b_data != NULL &&
-+ ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
-+ ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
-+ f->entries <= f->at);
++ struct iam_lentry *ile = iam_lfix_get_end(folio);
++
++ return (folio->il_at == ile);
+}
-+int iam_leaf_invariant(struct iam_leaf *l)
++
++static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
+{
-+ return
-+ l->il_bh != NULL &&
-+ l->il_bh->b_data != NULL &&
-+ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) &&
-+ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) &&
-+ l->il_entries <= l->il_at;
++ struct iam_leaf_head *hdr;
++
++ hdr = (struct iam_leaf_head*)bh->b_data;
++ hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
++ hdr->ill_count = cpu_to_le16(0);
+}
+
-+int iam_path_invariant(struct iam_path *p)
++static void iam_lfix_split(struct iam_leaf *l, struct buffer_head *bh)
+{
-+ int i;
++ struct iam_path *path;
++ struct iam_leaf_head *hdr;
++ const struct iam_key *pivot;
+
-+ if (p->ip_container == NULL ||
-+ p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
-+ p->ip_frame != p->ip_frames + p->ip_indirect ||
-+ !iam_leaf_invariant(&p->ip_leaf))
-+ return 0;
-+ for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
-+ if (i <= p->ip_indirect) {
-+ if (!iam_frame_invariant(&p->ip_frames[i]))
-+ return 0;
-+ }
-+ }
-+ return 1;
++ unsigned count;
++ unsigned split;
++
++ void *start;
++ void *finis;
++
++ path = iam_leaf_path(l);
++
++ hdr = (void *)bh->b_data;
++
++ count = lentry_count_get(l);
++ split = count / 2;
++
++ start = iam_lfix_shift(l, iam_get_lentries(l), split);
++ finis = iam_lfix_shift(l, iam_get_lentries(l), count);
++
++ pivot = iam_leaf_key_at(start);
++
++ memmove(iam_entries(bh), start, finis - start);
++ hdr->ill_count = count - split;
++ lentry_count_set(l, split);
++ /*
++ * Insert pointer to the new node (together with the smallest key in
++ * the node) into index node.
++ */
++ iam_insert_key(path, path->ip_frame, pivot, bh->b_blocknr);
+}
+
-+int iam_it_invariant(struct iam_iterator *it)
++static void iam_lfix_key_set(struct iam_leaf *l, const struct iam_key *k)
+{
-+ return
-+ (it->ii_state == IAM_IT_DETACHED ||
-+ it->ii_state == IAM_IT_ATTACHED) &&
-+ !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
-+ ergo(it->ii_state == IAM_IT_ATTACHED,
-+ iam_path_invariant(&it->ii_path));
++ iam_keycpy(iam_leaf_container(l), iam_leaf_key_at(l->il_at), k);
+}
+
-+/*
-+ * Search container @c for record with key @k. If record is found, its data
-+ * are moved into @r.
-+ *
-+ *
-+ *
-+ * Return values: +ve: found, 0: not-found, -ve: error
-+ */
-+int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r,
-+ struct iam_path_descr *pd)
++static void iam_lfix_rec_set(struct iam_leaf *l, const struct iam_rec *r)
+{
-+ struct iam_iterator it;
-+ int result;
-+
-+ iam_it_init(&it, c, 0, pd);
-+
-+ result = iam_it_get_exact(&it, k);
-+ if (result == 0)
-+ /*
-+ * record with required key found, copy it into user buffer
-+ */
-+ iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it));
-+ iam_it_put(&it);
-+ iam_it_fini(&it);
-+ return result;
++ iam_reccpy(iam_leaf_path(l), iam_lfix_rec(l), r);
+}
-+EXPORT_SYMBOL(iam_lookup);
++
++static struct iam_leaf_operations iam_lfix_leaf_ops = {
++ .init = iam_lfix_init,
++ .init_new = iam_lfix_init_new,
++ .fini = iam_lfix_fini,
++ .start = iam_lfix_start,
++ .next = iam_lfix_next,
++ .key = iam_lfix_key,
++ .rec = iam_lfix_rec,
++ .key_set = iam_lfix_key_set,
++ .rec_set = iam_lfix_rec_set,
++ .lookup = iam_lfix_lookup,
++ .at_end = iam_lfix_at_end,
++ .rec_add = iam_lfix_rec_add,
++ .rec_del = iam_lfix_rec_del,
++ .can_add = iam_lfix_can_add,
++ .split = iam_lfix_split
++};
+
+/*
-+ * Insert new record @r with key @k into container @c (within context of
-+ * transaction @h.
-+ *
-+ * Return values: 0: success, -ve: error, including -EEXIST when record with
-+ * given key is already present.
-+ *
-+ * postcondition: ergo(result == 0 || result == -EEXIST,
-+ * iam_lookup(c, k, r2) > 0 &&
-+ * !memcmp(r, r2, c->ic_descr->id_rec_size));
++ * Index operations.
+ */
-+int iam_insert(handle_t *h, struct iam_container *c,
-+ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
-+{
-+ struct iam_iterator it;
-+ int result;
+
-+ iam_it_init(&it, c, IAM_IT_WRITE, pd);
++enum {
++ /* This is duplicated in lustre/utils/create_iam.c */
++ /*
++ * Then shalt thou see the dew-BEDABBLED wretch
++ * Turn, and return, indenting with the way;
++ * Each envious brier his weary legs doth scratch,
++ * Each shadow makes him stop, each murmur stay:
++ * For misery is trodden on by many,
++ * And being low never relieved by any.
++ */
++ IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL // d01efull
++};
+
-+ result = iam_it_get_exact(&it, k);
-+ if (result == -ENOENT)
-+ result = iam_it_rec_insert(h, &it, k, r);
-+ else if (result == 0)
-+ result = -EEXIST;
-+ iam_it_put(&it);
-+ iam_it_fini(&it);
-+ return result;
++/* This is duplicated in lustre/utils/create_iam.c */
++struct iam_lfix_root {
++ __le64 ilr_magic;
++ __le16 ilr_keysize;
++ __le16 ilr_recsize;
++ __le16 ilr_indirect_levels;
++ __le16 ilr_padding;
++};
++
++static __u32 iam_lfix_root_ptr(struct iam_container *c)
++{
++ return 0;
+}
-+EXPORT_SYMBOL(iam_insert);
+
-+int iam_update(handle_t *h, struct iam_container *c,
-+ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
++static int iam_lfix_node_init(struct iam_container *c, struct buffer_head *bh,
++ int root)
+{
-+ struct iam_iterator it;
-+ int result;
++ return 0;
++}
+
-+ iam_it_init(&it, c, IAM_IT_WRITE, pd);
++static int iam_lfix_node_check(struct iam_path *path, struct iam_frame *frame)
++{
++ struct iam_entry *entries;
++ void *data;
++ entries = dx_node_get_entries(path, frame);
+
-+ result = iam_it_get_exact(&it, k);
-+ if (result == 0)
-+ iam_it_rec_set(h, &it, r);
-+ iam_it_put(&it);
-+ iam_it_fini(&it);
-+ return result;
++ data = frame->bh->b_data;
++
++ if (frame == path->ip_frames) {
++ struct iam_lfix_root *root;
++
++ root = data;
++ path->ip_indirect = le16_to_cpu(root->ilr_indirect_levels);
++ }
++ frame->entries = frame->at = entries;
++ return 0;
+}
-+EXPORT_SYMBOL(iam_update);
+
-+/*
-+ * Delete existing record with key @k.
-+ *
-+ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
-+ *
-+ * postcondition: ergo(result == 0 || result == -ENOENT,
-+ * !iam_lookup(c, k, *));
-+ */
-+int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k,
-+ struct iam_path_descr *pd)
++static int iam_lfix_node_create(struct iam_container *c)
+{
-+ struct iam_iterator it;
-+ int result;
++ return 0;
++}
+
-+ iam_it_init(&it, c, IAM_IT_WRITE, pd);
++static int iam_lfix_keycmp(const struct iam_container *c,
++ const struct iam_key *k1, const struct iam_key *k2)
++{
++ return memcmp(k1, k2, c->ic_descr->id_key_size);
++}
+
-+ result = iam_it_get_exact(&it, k);
-+ if (result == 0)
-+ iam_it_rec_delete(h, &it);
-+ iam_it_put(&it);
-+ iam_it_fini(&it);
++static struct iam_operations iam_lfix_ops = {
++ .id_root_ptr = iam_lfix_root_ptr,
++ .id_node_read = iam_node_read,
++ .id_node_init = iam_lfix_node_init,
++ .id_node_check = iam_lfix_node_check,
++ .id_create = iam_lfix_node_create,
++ .id_keycmp = iam_lfix_keycmp
++};
++
++static int iam_lfix_guess(struct iam_container *c)
++{
++ int result;
++ struct buffer_head *bh;
++ const struct iam_lfix_root *root;
++
++ assert(c->ic_object != NULL);
++
++ result = iam_node_read(c, iam_lfix_root_ptr(c), NULL, &bh);
++ if (result == 0) {
++ root = (void *)bh->b_data;
++ if (le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC) {
++ struct iam_descr *descr;
++
++ descr = c->ic_descr;
++ descr->id_key_size = le16_to_cpu(root->ilr_keysize);
++ descr->id_rec_size = le16_to_cpu(root->ilr_recsize);
++ descr->id_root_gap = sizeof(struct iam_lfix_root);
++ descr->id_node_gap = 0;
++ descr->id_ops = &iam_lfix_ops;
++ descr->id_leaf_ops = &iam_lfix_leaf_ops;
++ } else
++ result = -EBADF;
++ }
+ return result;
+}
-+EXPORT_SYMBOL(iam_delete);
+
-Index: linux-stage/fs/ext3/Makefile
++static struct iam_format iam_lfix_format = {
++ .if_guess = iam_lfix_guess
++};
++
++void iam_lfix_format_init(void)
++{
++ iam_format_register(&iam_lfix_format);
++}
+Index: iam/fs/ext3/namei.c
===================================================================
---- linux-stage.orig/fs/ext3/Makefile 2006-05-29 13:01:21.000000000 +0800
-+++ linux-stage/fs/ext3/Makefile 2006-05-29 13:01:22.000000000 +0800
-@@ -6,7 +6,7 @@
+--- iam.orig/fs/ext3/namei.c 2006-05-27 19:58:44.000000000 +0400
++++ iam/fs/ext3/namei.c 2006-05-29 19:44:45.000000000 +0400
+@@ -24,81 +24,6 @@
+ * Theodore Ts'o, 2002
+ */
+
+-/*
+- * iam: big theory statement.
+- *
+- * iam (Index Access Module) is a module providing abstraction of persistent
+- * transactional container on top of generalized ext3 htree.
+- *
+- * iam supports:
+- *
+- * - key, pointer, and record size specifiable per container.
+- *
+- * - trees taller than 2 index levels.
+- *
+- * - read/write to existing ext3 htree directories as iam containers.
+- *
+- * iam container is a tree, consisting of leaf nodes containing keys and
+- * records stored in this container, and index nodes, containing keys and
+- * pointers to leaf or index nodes.
+- *
+- * iam does not work with keys directly, instead it calls user-supplied key
+- * comparison function (->dpo_keycmp()).
+- *
+- * Pointers are (currently) interpreted as logical offsets (measured in
+- * blocksful) within underlying flat file on top of which iam tree lives.
+- *
+- * On-disk format:
+- *
+- * iam mostly tries to reuse existing htree formats.
+- *
+- * Format of index node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * | | count | | | | | |
+- * | gap | / | entry | entry | .... | entry | free space |
+- * | | limit | | | | | |
+- * +-----+-------+-------+-------+------+-------+------------+
+- *
+- * gap this part of node is never accessed by iam code. It
+- * exists for binary compatibility with ext3 htree (that,
+- * in turn, stores fake struct ext2_dirent for ext2
+- * compatibility), and to keep some unspecified per-node
+- * data. Gap can be different for root and non-root index
+- * nodes. Gap size can be specified for each container
+- * (gap of 0 is allowed).
+- *
+- * count/limit current number of entries in this node, and the maximal
+- * number of entries that can fit into node. count/limit
+- * has the same size as entry, and is itself counted in
+- * count.
+- *
+- * entry index entry: consists of a key immediately followed by
+- * a pointer to a child node. Size of a key and size of a
+- * pointer depends on container. Entry has neither
+- * alignment nor padding.
+- *
+- * free space portion of node new entries are added to
+- *
+- * Entries in index node are sorted by their key value.
+- *
+- * Format of leaf node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * | | count | | | | | |
+- * | gap | / | leaf | leaf | .... | leaf | free space |
+- * | | limit | | | | | |
+- * +-----+-------+-------+-------+------+-------+------------+
+-
+- * leaf For leaf entry: consists of a rec immediately followd by
+- * a key. size of a key and size of a rec depends on container.
+- *
+- *
+- *
+- *
+- *
+- */
+-
+ #include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
+@@ -112,10 +37,10 @@
+ #include <linux/quotaops.h>
+ #include <linux/buffer_head.h>
+ #include <linux/smp_lock.h>
++#include <linux/lustre_iam.h>
+ #include "xattr.h"
+ #include "iopen.h"
+ #include "acl.h"
+-#include <linux/lustre_iam.h>
+ /*
+ * define how far ahead to read directories while searching them.
+ */
+@@ -125,9 +50,9 @@
+ #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
+
+
+-static struct buffer_head *ext3_append(handle_t *handle,
+- struct inode *inode,
+- u32 *block, int *err)
++struct buffer_head *ext3_append(handle_t *handle,
++ struct inode *inode,
++ u32 *block, int *err)
+ {
+ struct buffer_head *bh;
+
+@@ -136,14 +61,15 @@ static struct buffer_head *ext3_append(h
+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
+ inode->i_size += inode->i_sb->s_blocksize;
+ EXT3_I(inode)->i_disksize = inode->i_size;
+- ext3_journal_get_write_access(handle,bh);
++ *err = ext3_journal_get_write_access(handle, bh);
++ if (err != 0) {
++ brelse(bh);
++ bh = NULL;
++ }
+ }
+ return bh;
+ }
+
+-#ifndef assert
+-#define assert(test) J_ASSERT(test)
+-#endif
+
+ #ifndef swap
+ #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
+@@ -162,10 +88,6 @@ struct fake_dirent {
+ u8 file_type;
+ };
+
+-struct dx_countlimit {
+- __le16 limit;
+- __le16 count;
+-};
+
+ /*
+ * dx_root_info is laid out so that if it should somehow get overlaid by a
+@@ -203,245 +125,10 @@ struct dx_map_entry
+ };
+
+
+-static u32 htree_root_ptr(struct iam_container *c);
+-static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
+-static int htree_node_init(struct iam_container *c,
+- struct buffer_head *bh, int root);
+-static int htree_keycmp(struct iam_container *c,
+- struct iam_key *k1, struct iam_key *k2);
+-static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
+- handle_t *h, struct buffer_head **bh);
+-
+-/*
+- * Parameters describing iam compatibility mode in which existing ext3 htrees
+- * can be manipulated.
+- */
+-static struct iam_descr htree_compat_param = {
+- .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
+- .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
+- .id_node_gap = offsetof(struct dx_node, entries),
+- .id_root_gap = offsetof(struct dx_root, entries),
+-
+- .id_root_ptr = htree_root_ptr,
+- .id_node_check = htree_node_check,
+- .id_node_init = htree_node_init,
+- .id_node_read = htree_node_read,
+- .id_keycmp = htree_keycmp
+-};
+-
+-
+-struct iam_key;
+-struct iam_rec;
+-struct iam_descr;
+-struct iam_container;
+-struct iam_path;
+-
+-
+-
+-/*
+- * iam cursor (iterator) api.
+- */
+-
+-/*
+- * Flags controlling iterator functionality.
+- */
+-enum iam_it_flags {
+- /*
+- * this iterator will move (iam_it_{prev,next}() will be called on it)
+- */
+- IAM_IT_MOVE = (1 << 0),
+- /*
+- * tree can be updated through this iterator.
+- */
+- IAM_IT_WRITE = (1 << 1)
+-};
+-
+-/*
+- * States of iterator state machine.
+- */
+-enum iam_it_state {
+- /* initial state */
+- IAM_IT_DETACHED,
+- /* iterator is above particular record in the container */
+- IAM_IT_ATTACHED
+-};
+-
+-struct htree_cookie {
+- struct dx_hash_info *hinfo;
+- struct dentry *dentry;
+-};
+-
+-/*
+- * Iterator.
+- *
+- * Immediately after call to iam_it_init() iterator is in "detached"
+- * (IAM_IT_DETACHED) state: it is associated with given parent container, but
+- * doesn't point to any particular record in this container.
+- *
+- * After successful call to iam_it_get() and until corresponding call to
+- * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
+- *
+- * Attached iterator can move through records in a container (provided
+- * IAM_IT_MOVE permission) in a key order, can get record and key values as it
+- * passes over them, and can modify container (provided IAM_IT_WRITE
+- * permission).
+- *
+- * Concurrency: iterators are supposed to be local to thread. Interfaces below
+- * do no internal serialization.
+- *
+- */
+-struct iam_iterator {
+- /*
+- * iterator flags, taken from enum iam_it_flags.
+- */
+- __u32 ii_flags;
+- enum iam_it_state ii_state;
+- /*
+- * path to the record. Valid in IAM_IT_ATTACHED state.
+- */
+- struct iam_path ii_path;
+-};
+-
+-static inline struct iam_key *keycpy(struct iam_container *c,
+- struct iam_key *k1, struct iam_key *k2)
+-{
+- return memcpy(k1, k2, c->ic_descr->id_key_size);
+-}
+-
+-static inline int keycmp(struct iam_container *c,
+- struct iam_key *k1, struct iam_key *k2)
+-{
+- return c->ic_descr->id_keycmp(c, k1, k2);
+-}
+-
+-static struct iam_container *iam_it_container(struct iam_iterator *it)
+-{
+- return it->ii_path.ip_container;
+-}
+-
+-static inline int it_keycmp(struct iam_iterator *it,
+- struct iam_key *k1, struct iam_key *k2)
+-{
+- return keycmp(iam_it_container(it), k1, k2);
+-}
+-
+-/*
+- * Initialize iterator to IAM_IT_DETACHED state.
+- *
+- * postcondition: it_state(it) == IAM_IT_DETACHED
+- */
+-int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
+-/*
+- * Finalize iterator and release all resources.
+- *
+- * precondition: it_state(it) == IAM_IT_DETACHED
+- */
+-void iam_it_fini(struct iam_iterator *it);
+-
+-/*
+- * Attach iterator. After successful completion, @it points to record with the
+- * largest key not larger than @k. Semantics of ->id_create() method guarantee
+- * that such record will always be found.
+- *
+- * Return value: 0: positioned on existing record,
+- * -ve: error.
+- *
+- * precondition: it_state(it) == IAM_IT_DETACHED
+- * postcondition: ergo(result == 0,
+- * (it_state(it) == IAM_IT_ATTACHED &&
+- * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
+- */
+-int iam_it_get(struct iam_iterator *it, struct iam_key *k);
+-
+-/*
+- * Duplicates iterator.
+- *
+- * postcondition: it_state(dst) == it_state(src) &&
+- * iam_it_container(dst) == iam_it_container(src) &&
+- * dst->ii_flags = src->ii_flags &&
+- * ergo(it_state(it) == IAM_IT_ATTACHED,
+- * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
+- * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
+- */
+-void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
+-
+-/*
+- * Detach iterator. Does nothing it detached state.
+- *
+- * postcondition: it_state(it) == IAM_IT_DETACHED
+- */
+-void iam_it_put(struct iam_iterator *it);
+-
+-/*
+- * Move iterator one record right.
+- *
+- * Return value: 0: success,
+- * +1: end of container reached
+- * -ve: error
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
+- * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
+- */
+-int iam_it_next(struct iam_iterator *it);
+-
+-/*
+- * Return pointer to the record under iterator.
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
+-
+-/*
+- * Replace contents of record under iterator.
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+- * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
+- */
+-int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
+-
+-/*
+- * Place key under iterator in @k, return @k
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-const struct iam_key *iam_it_key_get(struct iam_iterator *it,
+- struct iam_key *k);
+-
+-/*
+- * Insert new record with key @k and contents from @r, shifting records to the
+- * right.
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED &&
+- * it->ii_flags&IAM_IT_WRITE &&
+- * it_keycmp(it, iam_it_key_get(it, *), k) < 0
+- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+- * ergo(result == 0,
+- * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
+- * !memcmp(iam_it_rec_get(it), r, ...))
+- */
+-int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
+- struct iam_key *k, struct iam_rec *r);
+-/*
+- * Delete record under iterator.
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
+-
+ #ifdef CONFIG_EXT3_INDEX
+ static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry);
+ static void dx_set_block(struct iam_path *p,
+ struct iam_entry *entry, unsigned value);
+-static inline struct iam_key *dx_get_key(struct iam_path *p,
+- struct iam_entry *entry,
+- struct iam_key *key);
+-static void dx_set_key(struct iam_path *p, struct iam_entry *entry,
+- struct iam_key *key);
+-static unsigned dx_get_count(struct iam_entry *entries);
+ static unsigned dx_get_limit(struct iam_entry *entries);
+ static void dx_set_count(struct iam_entry *entries, unsigned value);
+ static void dx_set_limit(struct iam_entry *entries, unsigned value);
+@@ -457,80 +144,29 @@ static void dx_sort_map(struct dx_map_en
+ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+ struct dx_map_entry *offsets, int count);
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+-static void dx_insert_block (struct iam_path *path,
+- struct iam_frame *frame, u32 hash, u32 block);
+-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+- struct iam_path *path, __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+ struct ext3_dir_entry_2 **res_dir, int *err);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ struct inode *inode);
+
+-static inline void iam_path_init(struct iam_path *path,
+- struct iam_container *c, struct htree_cookie *hc);
+-static inline void iam_path_fini(struct iam_path *path);
+-
+-
+-/*
+- * Future: use high four bits of block for coalesce-on-delete flags
+- * Mask them off for now.
+- */
+-
+-static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
+-{
+- return (void *)((char *)entry + off);
+-}
+-
+-static inline struct iam_descr *path_descr(struct iam_path *p)
+-{
+- return p->ip_container->ic_descr;
+-}
+-
+-static inline struct inode *path_obj(struct iam_path *p)
+-{
+- return p->ip_container->ic_object;
+-}
+-
+ static inline size_t iam_entry_size(struct iam_path *p)
+ {
+- return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
++ return iam_path_descr(p)->id_key_size + iam_path_descr(p)->id_ptr_size;
+ }
+
+ static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
+- struct iam_entry *entry, int shift)
++ struct iam_entry *entry,
++ int shift)
+ {
+ void *e = entry;
+ return e + shift * iam_entry_size(p);
+ }
+
+-static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
+- struct iam_entry *e1, struct iam_entry *e2)
+-{
+- ptrdiff_t diff;
+-
+- diff = (void *)e1 - (void *)e2;
+- assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
+- return diff / iam_entry_size(p);
+-}
+-
+-static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
+-{
+- return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
+- & 0x00ffffff;
+-}
+-
+-static inline void dx_set_block(struct iam_path *p,
+- struct iam_entry *entry, unsigned value)
+-{
+- *(u32*)entry_off(entry,
+- path_descr(p)->id_key_size) = cpu_to_le32(value);
+-}
+-
+-static inline struct iam_key *dx_get_key(struct iam_path *p,
+- struct iam_entry *entry,
+- struct iam_key *key)
++static inline struct iam_key *iam_get_key(struct iam_path *p,
++ struct iam_entry *entry,
++ struct iam_key *key)
+ {
+- memcpy(key, entry, path_descr(p)->id_key_size);
++ memcpy(key, entry, iam_path_descr(p)->id_key_size);
+ return key;
+ }
+
+@@ -540,68 +176,70 @@ static inline struct iam_key *iam_key_at
+ return (struct iam_key *)entry;
+ }
+
+-static inline void dx_set_key(struct iam_path *p,
+- struct iam_entry *entry, struct iam_key *key)
+-{
+- memcpy(entry, key, path_descr(p)->id_key_size);
+-}
+-
+-static inline unsigned dx_get_count (struct iam_entry *entries)
+-{
+- return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+-}
+-
+-static inline unsigned dx_get_limit (struct iam_entry *entries)
++static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
++ struct iam_entry *e1,
++ struct iam_entry *e2)
+ {
+- return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
+-}
++ ptrdiff_t diff;
- ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
- ioctl.o namei.o super.o symlink.o hash.o resize.o \
-- extents.o mballoc.o
-+ extents.o mballoc.o iam.o iam_lfix.o
+-static inline void dx_set_count (struct iam_entry *entries, unsigned value)
+-{
+- ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
++ diff = (void *)e1 - (void *)e2;
++ assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
++ return diff / iam_entry_size(p);
+ }
- ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
- ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
-Index: linux-stage/fs/ext3/iam_lfix.c
-===================================================================
---- linux-stage.orig/fs/ext3/iam_lfix.c 2006-05-29 18:23:53.597737944 +0800
-+++ linux-stage/fs/ext3/iam_lfix.c 2006-05-29 18:04:05.000000000 +0800
-@@ -0,0 +1,310 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * iam_lfix.c
-+ * implementation of iam format for fixed size records.
-+ *
-+ * Copyright (c) 2006 Cluster File Systems, Inc.
-+ * Author: Wang Di <wangdi@clusterfs.com>
-+ * Author: Nikita Danilov <nikita@clusterfs.com>
-+ *
-+ * This file is part of the Lustre file system, http://www.lustre.org
-+ * Lustre is a trademark of Cluster File Systems, Inc.
-+ *
-+ * You may have signed or agreed to another license before downloading
-+ * this software. If so, you are bound by the terms and conditions
-+ * of that agreement, and the following does not apply to you. See the
-+ * LICENSE file included with this distribution for more information.
+-static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
++static inline void dx_set_limit(struct iam_entry *entries, unsigned value)
+ {
+ ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
+ }
+
+ static inline unsigned dx_root_limit(struct iam_path *p)
+ {
+- struct iam_descr *param = path_descr(p);
+- unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
++ struct iam_descr *param = iam_path_descr(p);
++ unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize -
+ param->id_root_gap;
+ return entry_space / (param->id_key_size + param->id_ptr_size);
+ }
+
+-static inline unsigned dx_node_limit(struct iam_path *p)
+-{
+- struct iam_descr *param = path_descr(p);
+- unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
+- param->id_node_gap;
+- return entry_space / (param->id_key_size + param->id_ptr_size);
+-}
++/*
++ * Two iam_descr's are provided:
+ *
-+ * If you did not agree to a different license, then this copy of Lustre
-+ * is open source software; you can redistribute it and/or modify it
-+ * under the terms of version 2 of the GNU General Public License as
-+ * published by the Free Software Foundation.
++ * - htree_compat_param that supports legacy ext3-htree indices;
++ * - fixed_rec_param that supports containers with records of fixed size.
+ *
-+ * In either case, Lustre is distributed in the hope that it will be
-+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
-+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * license text for more details.
-+ */
-+
-+#include <linux/types.h>
-+#include <linux/jbd.h>
-+/* ext3_error() */
-+#include <linux/ext3_fs.h>
-+
-+#include <linux/lustre_iam.h>
-+
-+#include <libcfs/libcfs.h>
-+#include <libcfs/kp30.h>
-+
-+static inline int iam_lfix_entry_size(const struct iam_leaf *l)
-+{
-+ return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
-+}
-+
-+static inline struct iam_lentry *
-+iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
-+{
-+ void *e = entry;
-+ return e + shift * iam_lfix_entry_size(l);
-+}
-+
-+static inline const struct iam_key *
-+iam_leaf_key_at(const struct iam_container *c, const struct iam_lentry *entry)
-+{
-+ return (const struct iam_key *)entry;
-+}
-+
-+static struct iam_lentry *iam_entries(const struct buffer_head *bh)
-+{
-+ return (void *)bh->b_data + sizeof(struct iam_leaf_head);
-+}
-+
-+static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
-+{
-+ return iam_entries(l->il_bh);
-+}
-+
-+static int lentry_count_get(const struct iam_leaf *leaf)
-+{
-+ struct iam_lentry *lentry = leaf->il_entries;
-+ return le16_to_cpu(((struct iam_leaf_head *)lentry)->ill_count);
-+}
-+
-+static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
-+{
-+ struct iam_lentry *lentry = leaf->il_entries;
-+ ((struct iam_leaf_head *)lentry)->ill_count = cpu_to_le16(count);
-+}
-+
-+/*This func is for flat key, for those keys,
-+ *which are not stored explicitly
-+ *it would be decrypt in the key buffer
+ */
-+struct iam_key *iam_lfix_key(struct iam_leaf *l, struct iam_key *key)
-+{
-+ void *ie = l->il_at;
-+ return (struct iam_key*)ie;
-+}
-+
-+static void iam_lfix_start(struct iam_leaf *l)
-+{
-+ l->il_at = iam_get_lentries(l);
-+}
-+
-+static inline ptrdiff_t iam_lfix_diff(struct iam_leaf *l, struct iam_lentry *e1,
-+ struct iam_lentry *e2)
-+{
-+ ptrdiff_t diff;
-+ int esize;
-+
-+ esize = iam_lfix_entry_size(l);
-+ diff = (void *)e1 - (void *)e2;
-+ assert(diff / esize * esize == diff);
-+ return diff / esize;
-+}
-+
-+static int iam_lfix_init(struct iam_leaf *l)
-+{
-+ int result;
-+ struct iam_leaf_head *ill;
-+
-+ assert(l->il_bh != NULL);
-+
-+ ill = (struct iam_leaf_head*)l->il_bh->b_data;
-+ if (ill->ill_magic == le16_to_cpu(IAM_LEAF_HEADER_MAGIC)) {
-+ l->il_at = l->il_entries = iam_get_lentries(l);
-+ result = 0;
-+ } else {
-+ struct inode *obj;
-+
-+ obj = iam_leaf_container(l)->ic_object;
-+ ext3_error(obj->i_sb, __FUNCTION__,
-+ "Wrong magic in node %llu (#%lu): %#x != %#x\n",
-+ l->il_bh->b_blocknr, obj->i_ino,
-+ ill->ill_magic, le16_to_cpu(IAM_LEAF_HEADER_MAGIC));
-+ result = -EIO;
-+ }
-+ return result;
-+}
-+
-+static void iam_lfix_fini(struct iam_leaf *l)
-+{
-+ l->il_entries = l->il_at = NULL;
-+ return;
-+}
-+
-+static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
-+{
-+ int count = lentry_count_get(l);
-+ struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
-+
-+ return ile;
-+}
-+
-+struct iam_rec *iam_lfix_rec(struct iam_leaf *l)
-+{
-+ void *e = l->il_at;
-+ return e + iam_leaf_descr(l)->id_key_size;
-+}
-+
-+static void iam_lfix_next(struct iam_leaf *l)
-+{
-+ assert(!iam_leaf_at_end(l));
-+ l->il_at = iam_lfix_shift(l, l->il_at, 1);
-+}
-+
-+static int iam_lfix_lookup(struct iam_leaf *l, struct iam_key *k)
-+{
-+ struct iam_lentry *p, *q, *m;
-+ struct iam_container *c;
-+ int count;
-+
-+ count = lentry_count_get(l);
-+ c = iam_leaf_container(l);
-+
-+ p = iam_get_lentries(l);
-+ q = iam_lfix_shift(l, l->il_entries, count);
-+
-+ while (p <= q) {
-+ m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
-+ if (iam_keycmp(c, iam_leaf_key_at(c, m), k) > 0)
-+ q = iam_lfix_shift(l, m, -1);
-+ else
-+ p = iam_lfix_shift(l, m, +1);
-+ }
-+ assert(p != iam_get_lentries(l));
-+
-+ l->il_at = iam_lfix_shift(l, p, -1);
-+ iam_keycpy(c, iam_path_key(iam_leaf_path(l), 0), iam_leaf_key_at(c, q));
-+ if (l->il_at <= l->il_entries ||
-+ iam_keycmp(c, iam_leaf_key_at(c, q), k) != 0)
-+ return -ENOENT;
-+ else
-+ return 0;
-+}
-+
-+static void iam_lfix_rec_add(struct iam_leaf *leaf,
-+ struct iam_key *k, struct iam_rec *r)
-+{
-+ struct iam_lentry *end, *next, *cur, *nnext;
-+ ptrdiff_t diff;
-+ int count;
-+
-+ count = lentry_count_get(leaf);
-+ end = iam_lfix_get_end(leaf);
-+ cur = leaf->il_at;
-+ if (cur != end) {
-+ next = iam_lfix_shift(leaf, cur, 1);
-+ if (next != end) {
-+ nnext = iam_lfix_shift(leaf, next, 1);
-+ diff = (void *)end - (void *)next;
-+ memmove(nnext, next, diff);
-+ }
-+ iam_lfix_next(leaf);
-+ }
-+ lentry_count_set(leaf, count + 1);
-+}
-+
-+static void iam_lfix_rec_del(struct iam_leaf *leaf)
-+{
-+ struct iam_lentry *next, *end;
-+ int count;
-+ ptrdiff_t diff;
-+
-+ count = lentry_count_get(leaf);
-+ end = iam_lfix_get_end(leaf);
-+ next = iam_lfix_shift(leaf, leaf->il_at, 1);
-+ diff = (void *)end - (void *)next;
-+ memmove(leaf->il_at, next, diff);
-+
-+ lentry_count_set(leaf, count - 1);
-+}
-+
-+static int iam_lfix_can_add(struct iam_leaf *l,
-+ struct iam_key *k, struct iam_rec *r)
-+{
-+ struct iam_lentry *end;
-+ int block_size = iam_leaf_container(l)->ic_object->i_sb->s_blocksize;
-+ unsigned long left, entry_size;
-+
-+ end = iam_lfix_get_end(l);
-+
-+ left = block_size - iam_leaf_descr(l)->id_node_gap;
+
+-static inline int dx_index_is_compat(struct iam_path *path)
+-{
+- return path_descr(path) == &htree_compat_param;
+-}
++static u32 htree_root_ptr(struct iam_container *c);
++static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
++static int htree_node_init(struct iam_container *c, struct buffer_head *bh, int root);
++static int htree_keycmp(const struct iam_container *c,
++ const struct iam_key *k1, const struct iam_key *k2);
+
+-static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
+- int root)
+-{
+- return data +
+- (root ?
+- path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
+-}
++struct iam_operations htree_operation = {
++ .id_root_ptr = htree_root_ptr,
++ .id_node_check = htree_node_check,
++ .id_node_init = htree_node_init,
++ .id_node_read = iam_node_read,
++ .id_keycmp = htree_keycmp
++};
+
-+ left -= (unsigned long)((void*)end - (void*)l->il_entries);
++/*
++ * Parameters describing iam compatibility mode in which existing ext3 htrees
++ * can be manipulated.
++ */
++struct iam_descr htree_compat_param = {
++ .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
++ .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
++ .id_node_gap = offsetof(struct dx_node, entries),
++ .id_root_gap = offsetof(struct dx_root, entries),
++ .id_ops = &htree_operation
++};
+
+-static struct iam_entry *dx_node_get_entries(struct iam_path *path,
+- struct iam_frame *frame)
++static inline int dx_index_is_compat(struct iam_path *path)
+ {
+- return dx_get_entries(path,
+- frame->bh->b_data, frame == path->ip_frames);
++ return iam_path_descr(path) == &htree_compat_param;
+ }
+
+
-+ entry_size = iam_lfix_entry_size(l);
+ static int dx_node_check(struct iam_path *p, struct iam_frame *f)
+ {
+ struct iam_entry *e;
+@@ -614,10 +252,10 @@ static int dx_node_check(struct iam_path
+ count = dx_get_count(e);
+ e = iam_entry_shift(p, e, 1);
+ for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) {
+- keycpy(c, p->ip_key_scratch[0], p->ip_key_scratch[1]);
+- dx_get_key(p, e, p->ip_key_scratch[1]);
++ iam_keycpy(c, iam_path_key(p, 0), iam_path_key(p, 1));
++ iam_get_key(p, e, iam_path_key(p, 1));
+ if (i > 0 &&
+- keycmp(c, p->ip_key_scratch[0], p->ip_key_scratch[1]) > 0)
++ iam_keycmp(c, iam_path_key(p, 0), iam_path_key(p, 1)) > 0)
+ return 0;
+ }
+ return 1;
+@@ -636,13 +274,17 @@ static int htree_node_check(struct iam_p
+
+ data = frame->bh->b_data;
+ entries = dx_node_get_entries(path, frame);
+- sb = path_obj(path)->i_sb;
++ sb = iam_path_obj(path)->i_sb;
+ if (frame == path->ip_frames) {
+ /* root node */
+ struct dx_root *root;
+- struct htree_cookie *hc = path->ip_descr_data;
++ struct iam_path_compat *ipc;
+
+ root = data;
++ assert(path->ip_data != NULL);
++ ipc = container_of(path->ip_data, struct iam_path_compat,
++ ipc_descr);
+
-+ if (left >= entry_size)
-+ return 1;
+ if (root->info.hash_version > DX_HASH_MAX) {
+ ext3_warning(sb, __FUNCTION__,
+ "Unrecognised inode hash code %d",
+@@ -669,15 +311,17 @@ static int htree_node_check(struct iam_p
+ root->info.info_length));
+ assert(dx_get_limit(entries) == dx_root_limit(path));
+
+- hc->hinfo->hash_version = root->info.hash_version;
+- hc->hinfo->seed = EXT3_SB(sb)->s_hash_seed;
+- if (hc->dentry)
+- ext3fs_dirhash(hc->dentry->d_name.name,
+- hc->dentry->d_name.len, hc->hinfo);
+- path->ip_key_target = (struct iam_key *)&hc->hinfo->hash;
++ ipc->ipc_hinfo->hash_version = root->info.hash_version;
++ ipc->ipc_hinfo->seed = EXT3_SB(sb)->s_hash_seed;
++ if (ipc->ipc_dentry)
++ ext3fs_dirhash(ipc->ipc_dentry->d_name.name,
++ ipc->ipc_dentry->d_name.len,
++ ipc->ipc_hinfo);
++ path->ip_key_target =
++ (const struct iam_key *)&ipc->ipc_hinfo->hash;
+ } else {
+ /* non-root index */
+- assert(entries == data + path_descr(path)->id_node_gap);
++ assert(entries == data + iam_path_descr(path)->id_node_gap);
+ assert(dx_get_limit(entries) == dx_node_limit(path));
+ }
+ frame->entries = frame->at = entries;
+@@ -697,8 +341,8 @@ static int htree_node_init(struct iam_co
+ return 0;
+ }
+
+-static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
+- handle_t *handle, struct buffer_head **bh)
++int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
++ handle_t *handle, struct buffer_head **bh)
+ {
+ int result = 0;
+
+@@ -708,8 +352,8 @@ static int htree_node_read(struct iam_co
+ return result;
+ }
+
+-static int htree_keycmp(struct iam_container *c,
+- struct iam_key *k1, struct iam_key *k2)
++static int htree_keycmp(const struct iam_container *c,
++ const struct iam_key *k1, const struct iam_key *k2)
+ {
+ __u32 p1 = le32_to_cpu(*(__u32 *)k1);
+ __u32 p2 = le32_to_cpu(*(__u32 *)k2);
+@@ -800,7 +444,7 @@ struct stats dx_show_entries(struct dx_h
+ }
+ #endif /* DX_DEBUG */
+
+-static int dx_lookup(struct iam_path *path)
++int dx_lookup(struct iam_path *path)
+ {
+ u32 ptr;
+ int err = 0;
+@@ -810,11 +454,11 @@ static int dx_lookup(struct iam_path *pa
+ struct iam_frame *frame;
+ struct iam_container *c;
+
+- param = path_descr(path);
++ param = iam_path_descr(path);
+ c = path->ip_container;
+
+ for (frame = path->ip_frames, i = 0,
+- ptr = param->id_root_ptr(path->ip_container);
++ ptr = param->id_ops->id_root_ptr(c);
+ i <= path->ip_indirect;
+ ptr = dx_get_block(path, frame->at), ++frame, ++i) {
+ struct iam_entry *entries;
+@@ -823,10 +467,11 @@ static int dx_lookup(struct iam_path *pa
+ struct iam_entry *m;
+ unsigned count;
+
+- err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh);
++ err = param->id_ops->id_node_read(c, (iam_ptr_t)ptr, NULL,
++ &frame->bh);
+ if (err != 0)
+ break;
+- err = param->id_node_check(path, frame);
++ err = param->id_ops->id_node_check(path, frame);
+ if (err != 0)
+ break;
+
+@@ -837,12 +482,27 @@ static int dx_lookup(struct iam_path *pa
+ assert(count && count <= dx_get_limit(entries));
+ p = iam_entry_shift(path, entries, 1);
+ q = iam_entry_shift(path, entries, count - 1);
++ /*
++ * Sanity check: target key is larger or equal to the leftmost
++ * key in the node.
++ */
++ if (iam_keycmp(c,
++ iam_key_at(path, p), path->ip_key_target) < 0) {
++ struct inode *obj;
++
++ obj = c->ic_object;
++ ext3_error(obj->i_sb, __FUNCTION__,
++ "corrupted search tree #%lu", obj->i_ino);
++ err = -EIO;
++ break;
++
++ }
+ while (p <= q) {
+ m = iam_entry_shift(path,
+ p, iam_entry_diff(path, q, p) / 2);
+ dxtrace(printk("."));
+- if (keycmp(c, iam_key_at(path, m),
+- path->ip_key_target) > 0)
++ if (iam_keycmp(c, iam_key_at(path, m),
++ path->ip_key_target) > 0)
+ q = iam_entry_shift(path, m, -1);
+ else
+ p = iam_entry_shift(path, m, +1);
+@@ -857,12 +517,12 @@ static int dx_lookup(struct iam_path *pa
+ while (n--) {
+ dxtrace(printk(","));
+ at = iam_entry_shift(path, at, +1);
+- if (keycmp(c, iam_key_at(path, at),
+- path->ip_key_target) > 0) {
++ if (iam_keycmp(c, iam_key_at(path, at),
++ path->ip_key_target) > 0) {
+ if (at != iam_entry_shift(path, frame->at, 1)) {
+ BREAKPOINT;
+ printk(KERN_EMERG "%i\n",
+- keycmp(c, iam_key_at(path, at),
++ iam_keycmp(c, iam_key_at(path, at),
+ path->ip_key_target));
+ }
+ at = iam_entry_shift(path, at, -1);
+@@ -891,508 +551,20 @@ static int dx_probe(struct dentry *dentr
+ struct dx_hash_info *hinfo, struct iam_path *path)
+ {
+ int err;
+- struct htree_cookie hc = {
+- .dentry = dentry,
+- .hinfo = hinfo
+- };
++ struct iam_path_compat *ipc;
+
-+ return 0;
++ assert(path->ip_data != NULL);
++ ipc = container_of(path->ip_data, struct iam_path_compat, ipc_descr);
++ ipc->ipc_dentry = dentry;
++ ipc->ipc_hinfo = hinfo;
+
+ assert(dx_index_is_compat(path));
+- path->ip_descr_data = &hc;
+ err = dx_lookup(path);
+ assert(err != 0 || path->ip_frames[path->ip_indirect].bh != NULL);
+ return err;
+ }
+
+ /*
+- * Initialize container @c, acquires additional reference on @inode.
+- */
+-int iam_container_init(struct iam_container *c,
+- struct iam_descr *descr, struct inode *inode)
+-{
+- memset(c, 0, sizeof *c);
+- c->ic_descr = descr;
+- c->ic_object = igrab(inode);
+- if (c->ic_object != NULL)
+- return 0;
+- else
+- return -ENOENT;
+-}
+-
+-/*
+- * Finalize container @c, release all resources.
+- */
+-void iam_container_fini(struct iam_container *c)
+-{
+- if (c->ic_object != NULL) {
+- iput(c->ic_object);
+- c->ic_object = NULL;
+- }
+-}
+-
+-static inline void iam_path_init(struct iam_path *path, struct iam_container *c,
+- struct htree_cookie *hc)
+-{
+- memset(path, 0, sizeof *path);
+- path->ip_container = c;
+- path->ip_frame = path->ip_frames;
+- path->ip_descr_data = hc;
+-}
+-
+-static inline void iam_path_fini(struct iam_path *path)
+-{
+- int i;
+-
+- for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
+- if (path->ip_frames[i].bh != NULL) {
+- brelse(path->ip_frames[i].bh);
+- path->ip_frames[i].bh = NULL;
+- }
+- }
+-}
+-
+-static void iam_path_compat_init(struct iam_path_compat *path,
+- struct inode *inode)
+-{
+- int i;
+-
+- iam_container_init(&path->ipc_container, &htree_compat_param, inode);
+- /*
+- * XXX hack allowing finalization of iam_path_compat with
+- * iam_path_fini().
+- */
+- iput(inode);
+- iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
+- for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
+- path->ipc_path.ip_key_scratch[i] =
+- (struct iam_key *)&path->ipc_scrach[i];
+-}
+-
+-static void iam_path_compat_fini(struct iam_path_compat *path)
+-{
+- iam_path_fini(&path->ipc_path);
+- iam_container_fini(&path->ipc_container);
+-}
+-
+-static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
+-{
+- int block, err;
+- struct buffer_head *bh;
+-
+- block = dx_get_block(path, path->ip_frame->at);
+- err = path_descr(path)->id_node_read(path->ip_container, block,
+- NULL, &bh);
+- if (err)
+- return err;
+-
+- leaf->bh = bh;
+- leaf->entries = (struct iam_leaf_entry *)bh->b_data;
+- return 0;
+-}
+-
+-static void iam_leaf_fini(struct iam_leaf *leaf)
+-{
+- if (leaf->bh)
+- brelse(leaf->bh);
+-}
+-
+-/*
+- * Search container @c for record with key @k. If record is found, its data
+- * are moved into @r.
+- *
+- *
+- *
+- * Return values: +ve: found, 0: not-found, -ve: error
+- */
+-
+-int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
+-{
+- struct dx_hash_info hinfo;
+- struct iam_path_compat cpath;
+- struct iam_path *path = &cpath.ipc_path;
+- struct htree_cookie hc = {
+- .hinfo = &hinfo
+- };
+- int err, i;
+-
+- iam_path_init(path, c, &hc);
+- for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+- path->ip_key_scratch[i] =
+- (struct iam_key *)&cpath.ipc_scrach[i];
+- err = dx_lookup(path);
+- do {
+- struct iam_leaf leaf;
+- err = iam_leaf_init(path, &leaf);
+- if (err)
+- goto errout;
+-
+- for (path_descr(path)->id_leaf.start(c, &leaf);
+- !path_descr(path)->id_leaf.at_end(c, &leaf);
+- path_descr(path)->id_leaf.next(c, &leaf)) {
+- struct iam_key *key;
+-
+- key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
+- path_descr(path)->id_leaf.key(c, &leaf, key);
+- if (keycmp(c, k, key) == 0) {
+- memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
+- path_descr(path)->id_rec_size);
+- iam_path_fini(path);
+- iam_leaf_fini(&leaf);
+- return 0;
+- }
+- }
+-
+- iam_leaf_fini(&leaf);
+- /* Check to see if we should continue to search */
+- err = ext3_htree_next_block(c->ic_object, hinfo.hash, path, NULL);
+- if (err < 0)
+- goto errout;
+- } while (err == 1);
+-errout:
+- iam_path_fini(path);
+- return(err);
+-}
+-EXPORT_SYMBOL(iam_lookup);
+-
+-static inline size_t iam_leaf_entry_size(struct iam_path *p)
+-{
+- return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
+-}
+-
+-static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
+- struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
+-{
+- ptrdiff_t diff;
+-
+- diff = (void *)e1 - (void *)e2;
+- assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
+- return diff / iam_leaf_entry_size(p);
+-}
+-
+-static inline struct iam_leaf_entry*
+-iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
+-{
+- void *e = entry;
+- return e + shift * iam_leaf_entry_size(p);
+-}
+-
+-static inline struct iam_key *
+-dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
+-{
+- memcpy(key, e, path_descr(p)->id_key_size);
+- return key;
+-}
+-
+-static inline struct iam_key *
+-iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
+-{
+- void *e = entry;
+- return e + path_descr(p)->id_rec_size;
+-}
+-static inline struct iam_leaf_entry *
+-iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
+-{
+- return entry;
+-}
+-
+-static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf,
+- struct iam_key *k)
+-{
+- struct iam_leaf_entry *p, *q, *m;
+- struct iam_leaf_entry *entries = leaf->entries;
+- int count = dx_get_count((struct iam_entry *)entries);
+-
+- p = iam_leaf_entry_shift(path, entries, 1);
+- q = iam_leaf_entry_shift(path, entries, count - 1);
+- while (p <= q) {
+- m = iam_leaf_entry_shift(path,
+- p, iam_leaf_entry_diff(path, q, p) / 2);
+- dxtrace(printk("."));
+- if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
+- path->ip_key_target) > 0)
+- q = iam_leaf_entry_shift(path, m, -1);
+- else
+- p = iam_leaf_entry_shift(path, m, +1);
+- }
+- leaf->at = q;
+- return 0;
+-}
+-
+-/*XXX what kind of lock should this entry be locked: WangDi */
+-static int iam_leaf_insert(handle_t *handle, struct iam_path *path,
+- struct iam_key *k, struct iam_rec *r)
+-{
+- struct iam_leaf leaf;
+- struct iam_leaf_entry *p, *q;
+- int err, count;
+-
+- err = iam_leaf_init(path, &leaf);
+- if (err)
+- goto errout;
+- path_descr(path)->id_leaf.start(path->ip_container, &leaf);
+- count = dx_get_count((struct iam_entry *)leaf.entries);
+- if (dx_get_count((struct iam_entry *)leaf.entries) >=
+- dx_get_limit((struct iam_entry *)leaf.entries)){
+- err = -ENOSPC;
+- goto errout;
+- }
+-
+- err = iam_leaf_lookup(path, &leaf, k);
+- if (err)
+- goto errout;
+-
+- /*insert the k/r to leaf entries*/
+- p = iam_leaf_entry_shift(path, leaf.at, 1);
+- q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
+- while (q < p) {
+- memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
+- q = iam_leaf_entry_shift(path, q, -1);
+- }
+- memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
+- memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
+-
+- dx_set_count((struct iam_entry*)leaf.entries, count + 1);
+- err = ext3_journal_dirty_metadata(handle, leaf.bh);
+- if (err)
+- ext3_std_error(path->ip_container->ic_object->i_sb, err);
+-errout:
+- iam_leaf_fini(&leaf);
+- return err;
+-}
+-
+-static int split_leaf_node(handle_t *handle, struct iam_path *path)
+-{
+- struct inode *dir = path_obj(path);
+- unsigned continued = 0;
+- struct buffer_head *bh2;
+- u32 newblock, hash_split;
+- char *data2;
+- struct iam_leaf leaf;
+- unsigned split;
+- int err;
+-
+- bh2 = ext3_append (handle, dir, &newblock, &err);
+- if (!(bh2)) {
+- err = -ENOSPC;
+- goto errout;
+- }
+- err = iam_leaf_init(path, &leaf);
+- if (err)
+- goto errout;
+-
+- BUFFER_TRACE(leaf.bh, "get_write_access");
+- err = ext3_journal_get_write_access(handle, leaf.bh);
+- if (err) {
+- journal_error:
+- iam_leaf_fini(&leaf);
+- brelse(bh2);
+- ext3_std_error(dir->i_sb, err);
+- err = -EIO;
+- goto errout;
+- }
+- data2 = bh2->b_data;
+- split = dx_get_count((struct iam_entry*)leaf.entries)/2;
+- hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
+- if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
+- iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
+- continued = 1;
+-
+- memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
+- iam_leaf_entry_shift(path, leaf.entries, split),
+- split * iam_leaf_entry_size(path));
+-
+- /* Which block gets the new entry? */
+- dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
+- err = ext3_journal_dirty_metadata (handle, bh2);
+- if (err)
+- goto journal_error;
+- err = ext3_journal_dirty_metadata (handle, leaf.bh);
+- if (err)
+- goto journal_error;
+- brelse (bh2);
+- iam_leaf_fini(&leaf);
+-errout:
+- return err;
+-}
+-
+-static int split_index_node(handle_t *handle, struct iam_path *path);
+-/*
+- * Insert new record @r with key @k into container @c (within context of
+- * transaction @h.
+- *
+- * Return values: 0: success, -ve: error, including -EEXIST when record with
+- * given key is already present.
+- *
+- * postcondition: ergo(result == 0 || result == -EEXIST,
+- * iam_lookup(c, k, r2) > 0 &&
+- * !memcmp(r, r2, c->ic_descr->id_rec_size));
+- */
+-int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k,
+- struct iam_rec *r)
+-{
+- struct dx_hash_info hinfo;
+- struct iam_path_compat cpath;
+- struct iam_path *path = &cpath.ipc_path;
+- struct htree_cookie hc = {
+- .hinfo = &hinfo
+- };
+- int err, i;
+-
+- iam_path_init(path, c, &hc);
+- for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+- path->ip_key_scratch[i] =
+- (struct iam_key *)&cpath.ipc_scrach[i];
+- err = dx_lookup(path);
+- if (err)
+- goto errout;
+-
+- err = iam_leaf_insert(handle, path, k, r);
+-
+- if (err != -ENOSPC)
+- goto errout;
+-
+- err = split_index_node(handle, path);
+- if (err)
+- goto errout;
+-
+- err = split_leaf_node(handle, path);
+- if (err)
+- goto errout;
+-
+- err = iam_leaf_insert(handle, path, k, r);
+-errout:
+- iam_path_fini(path);
+- return(err);
+-}
+-
+-EXPORT_SYMBOL(iam_insert);
+-static int iam_leaf_delete(handle_t *handle, struct iam_path *path,
+- struct iam_key *k)
+-{
+- struct iam_leaf leaf;
+- struct iam_leaf_entry *p, *q;
+- int err, count;
+-
+- err = iam_leaf_init(path, &leaf);
+- if (err)
+- goto errout;
+-
+- err = iam_leaf_lookup(path, &leaf, k);
+- if (err)
+- goto errout;
+-
+- count = dx_get_count((struct iam_entry*)leaf.entries);
+- /*delete the k to leaf entries*/
+- p = iam_leaf_entry_shift(path, leaf.at, 1);
+- q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
+- while (p < q) {
+- memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
+- p = iam_leaf_entry_shift(path, p, 1);
+- }
+- dx_set_count((struct iam_entry*)leaf.entries, count - 1);
+-
+- err = ext3_journal_dirty_metadata(handle, leaf.bh);
+- if (err)
+- ext3_std_error(path_obj(path)->i_sb, err);
+-errout:
+- iam_leaf_fini(&leaf);
+- return err;
+-}
+-
+-/*
+- * Delete existing record with key @k.
+- *
+- * Return values: 0: success, -ENOENT: not-found, -ve: other error.
+- *
+- * postcondition: ergo(result == 0 || result == -ENOENT,
+- * !iam_lookup(c, k, *));
+- */
+-int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
+-{
+- struct dx_hash_info hinfo;
+- struct iam_path_compat cpath;
+- struct iam_path *path = &cpath.ipc_path;
+- struct htree_cookie hc = {
+- .hinfo = &hinfo
+- };
+- int err, i;
+-
+- iam_path_init(path, c, &hc);
+- for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+- path->ip_key_scratch[i] =
+- (struct iam_key *)&cpath.ipc_scrach[i];
+- err = dx_lookup(path);
+- if (err)
+- goto errout;
+-
+- err = iam_leaf_delete(h, path, k);
+-errout:
+- iam_path_fini(path);
+- return err;
+-}
+-
+-EXPORT_SYMBOL(iam_delete);
+-
+-static int iam_leaf_update(handle_t *handle, struct iam_path *path,
+- struct iam_key *k, struct iam_rec *r)
+-{
+- struct iam_leaf leaf;
+- int err;
+-
+- err = iam_leaf_init(path, &leaf);
+- if (err)
+- goto errout;
+-
+- err = iam_leaf_lookup(path, &leaf, k);
+- if (err)
+- goto errout;
+-
+- memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
+- memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
+-
+- err = ext3_journal_dirty_metadata(handle, leaf.bh);
+- if (err)
+- ext3_std_error(path_obj(path)->i_sb, err);
+-errout:
+- iam_leaf_fini(&leaf);
+- return err;
+-}
+-/*
+- * Replace existing record with key @k, or insert new one. New record data are
+- * in @r.
+- *
+- * Return values: 0: success, -ve: error.
+- *
+- * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
+- * !memcmp(r, r2, c->ic_descr->id_rec_size));
+- */
+-int iam_update(handle_t *h, struct iam_container *c,
+- struct iam_key *k, struct iam_rec *r)
+-{
+- struct dx_hash_info hinfo;
+- struct iam_path_compat cpath;
+- struct iam_path *path = &cpath.ipc_path;
+- struct htree_cookie hc = {
+- .hinfo = &hinfo
+- };
+- int err, i;
+-
+- iam_path_init(path, c, &hc);
+- for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
+- path->ip_key_scratch[i] =
+- (struct iam_key *)&cpath.ipc_scrach[i];
+- err = dx_lookup(path);
+- if (err)
+- goto errout;
+-
+- err = iam_leaf_update(h, path, k, r);
+-errout:
+- iam_path_fini(path);
+- return err;
+-}
+-
+-EXPORT_SYMBOL(iam_update);
+-
+-/*
+ * This function increments the frame pointer to search the next leaf
+ * block, and reads in the necessary intervening nodes if the search
+ * should be necessary. Whether or not the search is necessary is
+@@ -1409,16 +581,15 @@ EXPORT_SYMBOL(iam_update);
+ * If start_hash is non-null, it will be filled in with the starting
+ * hash of the next page.
+ */
+-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+- struct iam_path *path, __u32 *start_hash)
++static int ext3_htree_advance(struct inode *dir, __u32 hash,
++ struct iam_path *path, __u32 *start_hash,
++ int compat)
+ {
+ struct iam_frame *p;
+ struct buffer_head *bh;
+ int err, num_frames = 0;
+ __u32 bhash;
+
+- assert(dx_index_is_compat(path));
+-
+ p = path->ip_frame;
+ /*
+ * Find the next leaf page by incrementing the frame pointer.
+@@ -1438,28 +609,34 @@ static int ext3_htree_next_block(struct
+ --p;
+ }
+
+- /*
+- * If the hash is 1, then continue only if the next page has a
+- * continuation hash of any value. This is used for readdir
+- * handling. Otherwise, check to see if the hash matches the
+- * desired contiuation hash. If it doesn't, return since
+- * there's no point to read in the successive index pages.
+- */
+- dx_get_key(path, p->at, (struct iam_key *)&bhash);
+- if (start_hash)
+- *start_hash = bhash;
+- if ((hash & 1) == 0) {
+- if ((bhash & ~1) != hash)
+- return 0;
++ if (compat) {
++ /*
++ * Htree hash magic.
++ */
++ /*
++ * If the hash is 1, then continue only if the next page has a
++ * continuation hash of any value. This is used for readdir
++ * handling. Otherwise, check to see if the hash matches the
++ * desired contiuation hash. If it doesn't, return since
++ * there's no point to read in the successive index pages.
++ */
++ iam_get_key(path, p->at, (struct iam_key *)&bhash);
++ if (start_hash)
++ *start_hash = bhash;
++ if ((hash & 1) == 0) {
++ if ((bhash & ~1) != hash)
++ return 0;
++ }
+ }
+ /*
+ * If the hash is HASH_NB_ALWAYS, we always go to the next
+ * block so no check is necessary
+ */
+ while (num_frames--) {
+- err = path_descr(path)->id_node_read(path->ip_container,
+- (iam_ptr_t)dx_get_block(path, p->at),
+- NULL, &bh);
++ err = iam_path_descr(path)->id_ops->
++ id_node_read(path->ip_container,
++ (iam_ptr_t)dx_get_block(path, p->at),
++ NULL, &bh);
+ if (err != 0)
+ return err; /* Failure */
+ ++p;
+@@ -1471,6 +648,16 @@ static int ext3_htree_next_block(struct
+ return 1;
+ }
+
++int iam_index_next(struct iam_container *c, struct iam_path *path)
++{
++ return ext3_htree_advance(c->ic_object, 0, path, NULL, 0);
+}
+
-+static int iam_lfix_at_end(const struct iam_leaf *folio)
++int ext3_htree_next_block(struct inode *dir, __u32 hash,
++ struct iam_path *path, __u32 *start_hash)
+{
-+ struct iam_lentry *ile = iam_lfix_get_end(folio);
-+
-+ return (folio->il_at == ile);
++ return ext3_htree_advance(dir, hash, path, start_hash, 1);
+}
+
+ /*
+ * p is at least 6 bytes before the end of page
+@@ -1662,21 +849,30 @@ static void dx_sort_map (struct dx_map_e
+ } while(more);
+ }
+
+-static void dx_insert_block(struct iam_path *path,
+- struct iam_frame *frame, u32 hash, u32 block)
++void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
++ const struct iam_key *key, iam_ptr_t ptr)
+ {
+ struct iam_entry *entries = frame->entries;
+- struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
++ struct iam_entry *new = iam_entry_shift(path, frame->at, +1);
+ int count = dx_get_count(entries);
+
+ assert(count < dx_get_limit(entries));
+- assert(old < iam_entry_shift(path, entries, count));
++ assert(frame->at < iam_entry_shift(path, entries, count));
+
-+static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
-+{
-+ struct iam_leaf_head *hdr;
+ memmove(iam_entry_shift(path, new, 1), new,
+ (char *)iam_entry_shift(path, entries, count) - (char *)new);
+- dx_set_key(path, new, (struct iam_key *)&hash);
+- dx_set_block(path, new, block);
++ dx_set_key(path, new, key);
++ dx_set_block(path, new, ptr);
+ dx_set_count(entries, count + 1);
+ }
+
-+ hdr = (struct iam_leaf_head*)bh->b_data;
-+ hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
-+ hdr->ill_count = cpu_to_le16(0);
++void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
++ u32 hash, u32 block)
++{
++ assert(dx_index_is_compat(path));
++ iam_insert_key(path, frame, (struct iam_key *)&hash, block);
+}
+
-+static void iam_lfix_split(struct iam_leaf *l, struct buffer_head *bh)
+ #endif
+
+
+@@ -1897,14 +1093,15 @@ static struct buffer_head * ext3_dx_find
+ if (*err != 0)
+ return NULL;
+ } else {
+- path->ip_frame->bh = NULL; /* for iam_path_fini() */
++ path->ip_frame->bh = NULL; /* for iam_path_fini() */
+ path->ip_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
+ }
+ hash = hinfo.hash;
+ do {
+ block = dx_get_block(path, path->ip_frame->at);
+- *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
+- NULL, &bh);
++ *err = iam_path_descr(path)->id_ops->id_node_read(path->ip_container,
++ (iam_ptr_t)block,
++ NULL, &bh);
+ if (*err != 0)
+ goto errout;
+ de = (struct ext3_dir_entry_2 *) bh->b_data;
+@@ -2067,7 +1264,7 @@ static struct ext3_dir_entry_2 *do_split
+ struct buffer_head **bh,struct iam_frame *frame,
+ struct dx_hash_info *hinfo, int *error)
+ {
+- struct inode *dir = path_obj(path);
++ struct inode *dir = iam_path_obj(path);
+ unsigned blocksize = dir->i_sb->s_blocksize;
+ unsigned count, continued;
+ struct buffer_head *bh2;
+@@ -2392,18 +1589,25 @@ static int ext3_add_entry (handle_t *han
+ }
+
+ #ifdef CONFIG_EXT3_INDEX
+-static int split_index_node(handle_t *handle, struct iam_path *path)
+-{
++int split_index_node(handle_t *handle, struct iam_path *path)
+{
-+ struct iam_path *path;
-+ struct iam_leaf_head *hdr;
-+ const struct iam_key *pivot;
-+
-+ unsigned count;
-+ unsigned split;
-+
-+ void *start;
-+ void *finis;
-+
-+ path = iam_leaf_path(l);
-+
-+ hdr = (void *)bh->b_data;
-+
-+ count = lentry_count_get(l);
-+ split = count / 2;
-+
-+ start = iam_lfix_shift(l, iam_get_lentries(l), split);
-+ finis = iam_lfix_shift(l, iam_get_lentries(l), count);
-+
-+ pivot = iam_leaf_key_at(iam_leaf_container(l), start);
-+
-+ memmove(iam_entries(bh), start, finis - start);
-+ hdr->ill_count = count - split;
-+ lentry_count_set(l, split);
-+ /*
-+ * Insert pointer to the new node (together with the smallest key in
-+ * the node) into index node.
-+ */
-+ iam_insert_key(path, path->ip_frame, pivot, bh->b_blocknr);
-+}
+
+ struct iam_entry *entries; /* old block contents */
+ struct iam_entry *entries2; /* new block contents */
+ struct iam_frame *frame, *safe;
+ struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+ u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
+- struct inode *dir = path_obj(path);
++ struct inode *dir = iam_path_obj(path);
++ struct iam_descr *descr;
+ int nr_splet;
+ int i, err;
+
++ descr = iam_path_descr(path);
++ /*
++ * Algorithm below depends on this.
++ */
++ assert(descr->id_node_gap < descr->id_root_gap);
+
-+struct iam_leaf_operations iam_lfix_leaf_ops = {
-+ .init = iam_lfix_init,
-+ .init_new = iam_lfix_init_new,
-+ .fini = iam_lfix_fini,
-+ .start = iam_lfix_start,
-+ .next = iam_lfix_next,
-+ .key = iam_lfix_key,
-+ .rec = iam_lfix_rec,
-+ .lookup = iam_lfix_lookup,
-+ .at_end = iam_lfix_at_end,
-+ .rec_add = iam_lfix_rec_add,
-+ .rec_del = iam_lfix_rec_del,
-+ .can_add = iam_lfix_can_add,
-+ .split = iam_lfix_split
-+};
-+EXPORT_SYMBOL(iam_lfix_leaf_ops);
-Index: linux-stage/include/linux/lustre_iam.h
+ frame = path->ip_frame;
+ entries = frame->entries;
+
+@@ -2442,7 +1646,8 @@ static int split_index_node(handle_t *ha
+ for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
+ bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
+ if (!bh_new[i] ||
+- path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
++ descr->id_ops->id_node_init(path->ip_container,
++ bh_new[i], 0) != 0)
+ goto cleanup;
+ BUFFER_TRACE(frame->bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, frame->bh);
+@@ -2516,9 +1721,9 @@ static int split_index_node(handle_t *ha
+ unsigned count1 = count/2, count2 = count - count1;
+ unsigned hash2;
+
+- dx_get_key(path,
+- iam_entry_shift(path, entries, count1),
+- (struct iam_key *)&hash2);
++ iam_get_key(path,
++ iam_entry_shift(path, entries, count1),
++ (struct iam_key *)&hash2);
+
+ dxtrace(printk("Split index %i/%i\n", count1, count2));
+
+@@ -2578,7 +1783,7 @@ static int ext3_dx_add_entry(handle_t *h
+ size_t isize;
+
+ iam_path_compat_init(&cpath, dir);
+- param = path_descr(path);
++ param = iam_path_descr(path);
+
+ err = dx_probe(dentry, NULL, &hinfo, path);
+ if (err != 0)
+@@ -2588,8 +1793,9 @@ static int ext3_dx_add_entry(handle_t *h
+ /* XXX nikita: global serialization! */
+ isize = dir->i_size;
+
+- err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at),
+- handle, &bh);
++ err = param->id_ops->id_node_read(path->ip_container,
++ (iam_ptr_t)dx_get_block(path, frame->at),
++ handle, &bh);
+ if (err != 0)
+ goto cleanup;
+
+@@ -2724,12 +1930,12 @@ static struct inode * ext3_new_inode_wan
+ * is so far negative - it has no inode.
+ *
+ * If the create succeeds, we fill in the inode information
+- * with d_instantiate().
++ * with d_instantiate().
+ */
+ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+ struct nameidata *nd)
+ {
+- handle_t *handle;
++ handle_t *handle;
+ struct inode * inode;
+ int err, retries = 0;
+
+Index: iam/include/linux/lustre_iam.h
===================================================================
---- linux-stage.orig/include/linux/lustre_iam.h 2006-05-29 13:01:21.000000000 +0800
-+++ linux-stage/include/linux/lustre_iam.h 2006-05-29 13:01:22.000000000 +0800
-@@ -1,9 +1,61 @@
+--- iam.orig/include/linux/lustre_iam.h 2006-05-27 19:58:44.000000000 +0400
++++ iam/include/linux/lustre_iam.h 2006-05-29 22:41:51.000000000 +0400
+@@ -1,9 +1,64 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+#ifndef __LINUX_LUSTRE_IAM_H__
+#define __LINUX_LUSTRE_IAM_H__
+
++/* handle_t, journal_start(), journal_stop() */
++#include <linux/jbd.h>
++
/*
- * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
+ * linux/include/linux/lustre_iam.h
};
/*
-@@ -30,6 +82,11 @@
+@@ -30,6 +85,11 @@ struct iam_key;
/* Incomplete type use to refer to the records stored in iam containers. */
struct iam_rec;
typedef __u64 iam_ptr_t;
/*
-@@ -41,45 +98,25 @@
+@@ -41,45 +101,25 @@ struct iam_frame {
struct iam_entry *at; /* target entry, found by binary search */
};
/*
* Returns pointer (in the same sense as pointer in index entry) to
* the root node.
-@@ -102,8 +139,8 @@
+@@ -102,8 +142,8 @@ struct iam_descr {
/*
* Key comparison function. Returns -1, 0, +1.
*/
/*
* Create new container.
*
-@@ -111,25 +148,120 @@
+@@ -111,25 +151,109 @@ struct iam_descr {
* contains single record with the smallest possible key.
*/
int (*id_create)(struct iam_container *c);
+ *
+ * Caller should assume that returned pointer is only valid
+ * while leaf node is pinned and locked.*/
-+ struct iam_key *(*key)(struct iam_leaf *l, struct iam_key *k);
++ struct iam_key *(*key)(const struct iam_leaf *l, struct iam_key *k);
+ /* return pointer to entry body. Pointer is valid while
+ corresponding leaf node is locked and pinned. */
-+ struct iam_rec *(*rec)(struct iam_leaf *l);
++ struct iam_rec *(*rec)(const struct iam_leaf *l);
++
++ void (*key_set)(struct iam_leaf *l, const struct iam_key *k);
++ void (*rec_set)(struct iam_leaf *l, const struct iam_rec *r);
+
+ /*
+ * Search leaf @l for a record with key @k or for a place
+ *
+ * Scratch keys from @path can be used.
+ */
-+ int (*lookup)(struct iam_leaf *l, struct iam_key *k);
++ int (*lookup)(struct iam_leaf *l, const struct iam_key *k);
+
-+ int (*can_add)(struct iam_leaf *l,
-+ struct iam_key *k, struct iam_rec *r);
++ int (*can_add)(const struct iam_leaf *l,
++ const struct iam_key *k, const struct iam_rec *r);
+ /*
+ * add rec for a leaf
+ */
+ void (*rec_add)(struct iam_leaf *l,
-+ struct iam_key *k, struct iam_rec *r);
++ const struct iam_key *k, const struct iam_rec *r);
+ /*
+ * remove rec for a leaf
+ */
+struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
+struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
+
-+struct iam_root {
-+ struct iam_root_info {
-+ u8 indirect_levels;
-+ u8 pad[3];
-+ } info;
-+ struct {} entries[0];
-+};
-+
-+#define IAM_LEAF_HEADER_MAGIC 0x1976
-+struct iam_leaf_head {
-+ __le16 ill_magic;
-+ __le16 ill_count;
-+};
-+
+/*
+ * Parameters, describing a flavor of iam container.
+ */
};
struct iam_container {
-@@ -149,6 +281,17 @@
- };
-
- /*
+@@ -142,10 +266,17 @@ struct iam_container {
+ * container flavor.
+ */
+ struct iam_descr *ic_descr;
++};
++
++/*
+ * description-specific part of iam_path. This is usually embedded into larger
+ * structure.
+ */
+struct iam_path_descr {
-+ /*
+ /*
+- * pointer to flavor-specific per-container data.
+ * Scratch-pad area for temporary keys.
-+ */
+ */
+- void *ic_descr_data;
+ struct iam_key *ipd_key_scratch[DX_SCRATCH_KEYS];
-+};
-+
-+/*
- * Structure to keep track of a path drilled through htree.
- */
- struct iam_path {
-@@ -172,34 +315,232 @@
+ };
+
+ /*
+@@ -172,34 +303,238 @@ struct iam_path {
/*
* Leaf node: a child of ->ip_frame.
*/
/*
* Key searched for.
*/
- struct iam_key *ip_key_target;
- /*
+- struct iam_key *ip_key_target;
+- /*
- * Scratch-pad area for temporary keys.
- */
- struct iam_key *ip_key_scratch[DX_SCRATCH_KEYS];
-- /*
++ const struct iam_key *ip_key_target;
+ /*
- * pointer to flavor-specific per-container data.
+ * Description-specific data.
*/
+ struct dx_hash_info *ipc_hinfo;
+ struct dentry *ipc_dentry;
+ struct iam_path_descr ipc_descr;
- };
-
--int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r);
--int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k);
--int iam_update(handle_t *h, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
--int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
++};
++
+/*
+ * iam cursor (iterator) api.
+ */
+ IAM_IT_DETACHED,
+ /* iterator is above particular record in the container */
+ IAM_IT_ATTACHED
-+};
-+
+ };
+
+-int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r);
+-int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k);
+-int iam_update(handle_t *h, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
+-int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
+/*
+ * Flags controlling iterator functionality.
+ */
+
+void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode);
+void iam_path_compat_fini(struct iam_path_compat *path);
++
++struct iam_path_descr *iam_ipd_alloc(int keysize);
++void iam_ipd_free(struct iam_path_descr *ipd);
++
+/*
+ * Initialize iterator to IAM_IT_DETACHED state.
+ *
+ * (it_state(it) == IAM_IT_ATTACHED &&
+ * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
+ */
-+int iam_it_get(struct iam_iterator *it, struct iam_key *k);
++int iam_it_get(struct iam_iterator *it, const struct iam_key *k);
+
+/*
+ * Duplicates iterator.
+ * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
+ * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
+ */
-+void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
++void iam_it_dup(struct iam_iterator *dst, const struct iam_iterator *src);
+
+/*
+ * Detach iterator. Does nothing it detached state.
+ * precondition: it_state(it) == IAM_IT_ATTACHED
+ * postcondition: it_state(it) == IAM_IT_ATTACHED
+ */
-+struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
++struct iam_rec *iam_it_rec_get(const struct iam_iterator *it);
+
+/*
+ * Replace contents of record under iterator.
+ * precondition: it_state(it) == IAM_IT_ATTACHED
+ * postcondition: it_state(it) == IAM_IT_ATTACHED
+ */
-+struct iam_key *iam_it_key_get(struct iam_iterator *it, struct iam_key *k);
++struct iam_key *iam_it_key_get(const struct iam_iterator *it,
++ struct iam_key *k);
+
+/*
+ * Insert new record with key @k and contents from @r, shifting records to the
+ * !memcmp(iam_it_rec_get(it), r, ...))
+ */
+int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
-+ struct iam_key *k, struct iam_rec *r);
++ const struct iam_key *k, const struct iam_rec *r);
+/*
+ * Delete record under iterator.
+ *
+ * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
+ * postcondition: it_state(it) == IAM_IT_ATTACHED
+ */
-+iam_pos_t iam_it_store(struct iam_iterator *it);
++iam_pos_t iam_it_store(const struct iam_iterator *it);
+
+/*
+ * Restore iterator from cookie.
+ */
+int iam_it_load(struct iam_iterator *it, iam_pos_t pos);
+
-+int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r,
++int iam_lookup(struct iam_container *c, const struct iam_key *k,
++ struct iam_rec *r, struct iam_path_descr *pd);
++int iam_delete(handle_t *h, struct iam_container *c, const struct iam_key *k,
+ struct iam_path_descr *pd);
-+int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k,
-+ struct iam_path_descr *pd);
-+int iam_update(handle_t *h, struct iam_container *c, struct iam_key *k,
++int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
+ struct iam_rec *r, struct iam_path_descr *pd);
-+int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k,
++int iam_insert(handle_t *handle, struct iam_container *c,
++ const struct iam_key *k,
+ struct iam_rec *r, struct iam_path_descr *pd);
/*
* Initialize container @c, acquires additional reference on @inode.
*/
-@@ -210,3 +551,155 @@
+@@ -210,3 +545,170 @@ int iam_container_init(struct iam_contai
*/
void iam_container_fini(struct iam_container *c);
++/*
++ * Determine container format.
++ */
++int iam_container_setup(struct iam_container *c);
++
+#ifndef assert
+#define assert(test) J_ASSERT(test)
+#endif
+ return c->ic_descr;
+}
+
-+static inline struct iam_descr *iam_path_descr(struct iam_path *p)
++static inline struct iam_descr *iam_path_descr(const struct iam_path *p)
+{
+ return p->ip_container->ic_descr;
+}
+ return c->ic_descr->id_ops->id_keycmp(c, k1, k2);
+}
+
++static inline void iam_reccpy(const struct iam_path *p, struct iam_rec *rec_dst,
++ const struct iam_rec *rec_src)
++{
++ memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size);
++}
++
+static inline void *iam_entry_off(struct iam_entry *entry, size_t off)
+{
+ return (void *)((char *)entry + off);
+ frame->bh->b_data, frame == path->ip_frames);
+}
+
-+static inline struct iam_key *iam_path_key(struct iam_path *path, int nr)
++static inline struct iam_key *iam_path_key(const struct iam_path *path, int nr)
+{
+ assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch));
+ return path->ip_data->ipd_key_scratch[nr];
+ u32 *block, int *err);
+int split_index_node(handle_t *handle, struct iam_path *path);
+
-+extern struct iam_leaf_operations lfix_leaf_ops;
-+extern struct iam_operations generic_iam_ops;
-+
-+
+/*
+ * external
+ */
+struct iam_descr *iam_leaf_descr(const struct iam_leaf *leaf);
+struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf);
+
-+extern struct iam_leaf_operations iam_lfix_leaf_ops;
++
++struct iam_format {
++ int (*if_guess)(struct iam_container *c);
++ struct list_head if_linkage;
++};
++
++void iam_format_register(struct iam_format *fmt);
++
++void iam_lfix_format_init(void);
+
+/* __LINUX_LUSTRE_IAM_H__ */
+#endif