Whamcloud - gitweb
Branch: b_new_cmd
authorwangdi <wangdi>
Mon, 29 May 2006 10:21:09 +0000 (10:21 +0000)
committerwangdi <wangdi>
Mon, 29 May 2006 10:21:09 +0000 (10:21 +0000)
serval fixes about iam

lustre/kernel_patches/patches/ext3-iam-separate.patch
lustre/utils/create_iam.c

index 3dc5649..0d4403b 100644 (file)
-Index: iam/fs/ext3/Makefile
+Index: linux-stage/fs/ext3/namei.c
 ===================================================================
---- iam.orig/fs/ext3/Makefile  2006-05-27 19:58:43.000000000 +0400
-+++ iam/fs/ext3/Makefile       2006-05-27 20:03:07.000000000 +0400
-@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-          ioctl.o namei.o super.o symlink.o hash.o resize.o \
--         extents.o mballoc.o
-+         extents.o mballoc.o iam.o iam_lfix.o
+--- linux-stage.orig/fs/ext3/namei.c   2006-05-29 13:01:21.000000000 +0800
++++ linux-stage/fs/ext3/namei.c        2006-05-29 13:01:22.000000000 +0800
+@@ -24,81 +24,6 @@
+  *    Theodore Ts'o, 2002
+  */
  
- ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
- ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
-Index: iam/fs/ext3/iam.c
-===================================================================
---- iam.orig/fs/ext3/iam.c     2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam.c  2006-05-29 00:31:12.000000000 +0400
-@@ -0,0 +1,990 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ *  iam.c
-+ *  Top-level entry points into iam module
-+ *
-+ *  Copyright (c) 2006 Cluster File Systems, Inc.
-+ *   Author: Wang Di <wangdi@clusterfs.com>
-+ *   Author: Nikita Danilov <nikita@clusterfs.com>
-+ *
-+ *   This file is part of the Lustre file system, http://www.lustre.org
-+ *   Lustre is a trademark of Cluster File Systems, Inc.
-+ *
-+ *   You may have signed or agreed to another license before downloading
-+ *   this software.  If so, you are bound by the terms and conditions
-+ *   of that agreement, and the following does not apply to you.  See the
-+ *   LICENSE file included with this distribution for more information.
-+ *
-+ *   If you did not agree to a different license, then this copy of Lustre
-+ *   is open source software; you can redistribute it and/or modify it
-+ *   under the terms of version 2 of the GNU General Public License as
-+ *   published by the Free Software Foundation.
-+ *
-+ *   In either case, Lustre is distributed in the hope that it will be
-+ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
-+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *   license text for more details.
-+ */
-+
-+/*
-+ * iam: big theory statement.
-+ *
-+ * iam (Index Access Module) is a module providing abstraction of persistent
-+ * transactional container on top of generalized ext3 htree.
-+ *
-+ * iam supports:
-+ *
-+ *     - key, pointer, and record size specifiable per container.
-+ *
-+ *     - trees taller than 2 index levels.
-+ *
-+ *     - read/write to existing ext3 htree directories as iam containers.
-+ *
-+ * iam container is a tree, consisting of leaf nodes containing keys and
-+ * records stored in this container, and index nodes, containing keys and
-+ * pointers to leaf or index nodes.
-+ *
-+ * iam does not work with keys directly, instead it calls user-supplied key
-+ * comparison function (->dpo_keycmp()).
-+ *
-+ * Pointers are (currently) interpreted as logical offsets (measured in
-+ * blocksful) within underlying flat file on top of which iam tree lives.
-+ *
-+ * On-disk format:
-+ *
-+ * iam mostly tries to reuse existing htree formats.
-+ *
-+ * Format of index node:
-+ *
-+ * +-----+-------+-------+-------+------+-------+------------+
-+ * |     | count |       |       |      |       |            |
-+ * | gap |   /   | entry | entry | .... | entry | free space |
-+ * |     | limit |       |       |      |       |            |
-+ * +-----+-------+-------+-------+------+-------+------------+
-+ *
-+ *       gap           this part of node is never accessed by iam code. It
-+ *                     exists for binary compatibility with ext3 htree (that,
-+ *                     in turn, stores fake struct ext2_dirent for ext2
-+ *                     compatibility), and to keep some unspecified per-node
-+ *                     data. Gap can be different for root and non-root index
-+ *                     nodes. Gap size can be specified for each container
-+ *                     (gap of 0 is allowed).
-+ *
-+ *       count/limit   current number of entries in this node, and the maximal
-+ *                     number of entries that can fit into node. count/limit
-+ *                     has the same size as entry, and is itself counted in
-+ *                     count.
-+ *
-+ *       entry         index entry: consists of a key immediately followed by
-+ *                     a pointer to a child node. Size of a key and size of a
-+ *                     pointer depends on container. Entry has neither
-+ *                     alignment nor padding.
-+ *
-+ *       free space    portion of node new entries are added to
-+ *
-+ * Entries in index node are sorted by their key value.
-+ *
-+ * Format of a leaf node is not specified. Generic iam code accesses leaf
-+ * nodes through ->id_leaf methods in struct iam_descr.
-+ *
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/fs.h>
-+#include <linux/pagemap.h>
-+#include <linux/jbd.h>
-+#include <linux/time.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/fcntl.h>
-+#include <linux/stat.h>
-+#include <linux/string.h>
-+#include <linux/quotaops.h>
-+#include <linux/buffer_head.h>
-+#include <linux/smp_lock.h>
+-/*
+- * iam: big theory statement.
+- *
+- * iam (Index Access Module) is a module providing abstraction of persistent
+- * transactional container on top of generalized ext3 htree.
+- *
+- * iam supports:
+- *
+- *     - key, pointer, and record size specifiable per container.
+- *
+- *     - trees taller than 2 index levels.
+- *
+- *     - read/write to existing ext3 htree directories as iam containers.
+- *
+- * iam container is a tree, consisting of leaf nodes containing keys and
+- * records stored in this container, and index nodes, containing keys and
+- * pointers to leaf or index nodes.
+- *
+- * iam does not work with keys directly, instead it calls user-supplied key
+- * comparison function (->dpo_keycmp()).
+- *
+- * Pointers are (currently) interpreted as logical offsets (measured in
+- * blocksful) within underlying flat file on top of which iam tree lives.
+- *
+- * On-disk format:
+- *
+- * iam mostly tries to reuse existing htree formats.
+- *
+- * Format of index node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * |     | count |       |       |      |       |            |
+- * | gap |   /   | entry | entry | .... | entry | free space |
+- * |     | limit |       |       |      |       |            |
+- * +-----+-------+-------+-------+------+-------+------------+
+- *
+- *       gap           this part of node is never accessed by iam code. It
+- *                     exists for binary compatibility with ext3 htree (that,
+- *                     in turn, stores fake struct ext2_dirent for ext2
+- *                     compatibility), and to keep some unspecified per-node
+- *                     data. Gap can be different for root and non-root index
+- *                     nodes. Gap size can be specified for each container
+- *                     (gap of 0 is allowed).
+- *
+- *       count/limit   current number of entries in this node, and the maximal
+- *                     number of entries that can fit into node. count/limit
+- *                     has the same size as entry, and is itself counted in
+- *                     count.
+- *
+- *       entry         index entry: consists of a key immediately followed by
+- *                     a pointer to a child node. Size of a key and size of a
+- *                     pointer depends on container. Entry has neither
+- *                     alignment nor padding.
+- *
+- *       free space    portion of node new entries are added to
+- *
+- * Entries in index node are sorted by their key value.
+- *
+- * Format of leaf node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * |     | count |       |       |      |       |            |
+- * | gap |   /   | leaf  | leaf  | .... | leaf  | free space |
+- * |     | limit |       |       |      |       |            |
+- * +-----+-------+-------+-------+------+-------+------------+
+-
+- *       leaf          For leaf entry: consists of a rec immediately followd by 
+- *                     a key. size of a key and size of a rec depends on container.  
+- *
+- *
+- *
+- *
+- *
+- */
+-
+ #include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
+@@ -112,10 +37,10 @@
+ #include <linux/quotaops.h>
+ #include <linux/buffer_head.h>
+ #include <linux/smp_lock.h>
 +#include <linux/lustre_iam.h>
-+
-+#include <libcfs/libcfs.h>
-+#include <libcfs/kp30.h>
-+
-+#include "xattr.h"
-+#include "iopen.h"
-+#include "acl.h"
-+
-+
-+static __u32 iam_root_ptr(struct iam_container *c)
-+{
-+        return 0;
-+}
-+
-+static int iam_node_init(struct iam_container *c, struct buffer_head *bh,
-+                        int root)
-+{
-+        return 0;
-+}
-+
-+static int iam_node_check(struct iam_path *path, struct iam_frame *frame)
-+{
-+        struct iam_entry *entries;
-+        void *data;
-+        entries = dx_node_get_entries(path, frame);
-+
-+        data = frame->bh->b_data;
-+
-+        if (frame == path->ip_frames) {
-+               struct iam_root *root;
-+
-+               root = data;
-+               path->ip_indirect = root->info.indirect_levels;
-+        }
-+        frame->entries = frame->at = entries;
-+      return 0;
-+}
-+
-+static int iam_node_create(struct iam_container *c)
-+{
-+        return 0;
-+}
-+
-+struct iam_operations generic_iam_ops = {
-+        .id_root_ptr    = iam_root_ptr,
-+        .id_node_read   = iam_node_read,
-+        .id_node_init   = iam_node_init,
-+        .id_node_check  = iam_node_check,
-+        .id_create      = iam_node_create,
-+};
-+EXPORT_SYMBOL(generic_iam_ops);
-+
-+static inline void iam_reccpy(struct iam_path *p, struct iam_rec *rec_dst,
-+                            struct iam_rec *rec_src)
-+{
-+      memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size);
-+}
-+
-+/*
-+ * Initialize container @c, acquires additional reference on @inode.
-+ */
-+int iam_container_init(struct iam_container *c,
-+                     struct iam_descr *descr, struct inode *inode)
-+{
-+      memset(c, 0, sizeof *c);
-+      c->ic_descr  = descr;
-+      c->ic_object = igrab(inode);
-+      if (c->ic_object != NULL)
-+              return 0;
-+      else
-+              return -ENOENT;
-+}
-+EXPORT_SYMBOL(iam_container_init);
-+
-+/*
-+ * Finalize container @c, release all resources.
-+ */
-+void iam_container_fini(struct iam_container *c)
-+{
-+      if (c->ic_object != NULL) {
-+              iput(c->ic_object);
-+              c->ic_object = NULL;
-+      }
-+}
-+EXPORT_SYMBOL(iam_container_fini);
-+
-+void iam_path_init(struct iam_path *path, struct iam_container *c,
-+                struct iam_path_descr *pd)
-+{
-+      memset(path, 0, sizeof *path);
-+      path->ip_container = c;
-+      path->ip_frame = path->ip_frames;
-+      path->ip_data = pd;
-+}
-+
-+static void iam_leaf_fini(struct iam_leaf *leaf);
-+
-+void iam_path_fini(struct iam_path *path)
-+{
-+      int i;
-+
-+      iam_leaf_fini(&path->ip_leaf);
-+      for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
-+              if (path->ip_frames[i].bh != NULL) {
-+                      brelse(path->ip_frames[i].bh);
-+                      path->ip_frames[i].bh = NULL;
-+              }
-+      }
-+}
-+
-+extern struct iam_descr htree_compat_param;
-+
-+void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode)
-+{
-+      int i;
-+
-+      for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i)
-+              path->ipc_descr.ipd_key_scratch[i] =
-+                      (struct iam_key *)&path->ipc_scratch[i];
-+
-+      iam_container_init(&path->ipc_container, &htree_compat_param, inode);
-+      /*
-+       * XXX hack allowing finalization of iam_path_compat with
-+       * iam_path_fini().
-+       */
-+      iput(inode);
-+      iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr);
-+}
-+
-+void iam_path_compat_fini(struct iam_path_compat *path)
-+{
-+      iam_path_fini(&path->ipc_path);
-+      iam_container_fini(&path->ipc_container);
-+}
-+
-+/*
-+ * Leaf helpers.
-+ */
-+
-+struct iam_path *iam_leaf_path(const struct iam_leaf *leaf)
-+{
-+        return leaf->il_path;
-+}
-+
-+struct iam_container *iam_leaf_container(const struct iam_leaf *leaf)
-+{
-+        return iam_leaf_path(leaf)->ip_container;
-+}
-+
-+struct iam_descr *iam_leaf_descr(const struct iam_leaf *leaf)
-+{
-+        return iam_leaf_container(leaf)->ic_descr;
-+}
-+
-+struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf)
-+{
-+        return iam_leaf_descr(leaf)->id_leaf_ops;
-+}
-+
-+/*
-+ * Return pointer to current leaf record. Pointer is valid while corresponding
-+ * leaf node is locked and pinned.
-+ */
-+struct iam_rec *iam_leaf_rec(struct iam_leaf *leaf)
-+{
-+      return iam_leaf_ops(leaf)->rec(leaf);
-+}
-+
-+/*
-+ * Return pointer to the current leaf key. This function may return either
-+ * pointer to the key stored in node, or copy key into @key buffer supplied by
-+ * caller and return pointer to this buffer. The latter approach is used when
-+ * keys in nodes are not stored in plain form (e.g., htree doesn't store keys
-+ * at all).
-+ *
-+ * Caller should assume that returned pointer is only valid while leaf node is
-+ * pinned and locked.
-+ */
-+struct iam_key *iam_leaf_key(struct iam_leaf *leaf, struct iam_key *key)
-+{
-+      return iam_leaf_ops(leaf)->key(leaf, key);
-+}
-+
-+static int iam_leaf_load(struct iam_path *path)
-+{
-+      int block;
-+      int err;
-+      struct iam_container *c;
-+      struct buffer_head   *bh;
-+      struct iam_leaf      *leaf;
-+      struct iam_descr     *descr;
-+      
-+      c     = path->ip_container;
-+      leaf  = &path->ip_leaf;
-+      descr = iam_path_descr(path);
-+      block = dx_get_block(path, path->ip_frame->at);
-+      err   = descr->id_ops->id_node_read(c, block, NULL, &bh);
-+      if (err == 0) {
-+              leaf->il_bh = bh;
-+                leaf->il_path = path;
-+              err = iam_leaf_ops(leaf)->init(leaf);
-+      }
-+      return err;
-+}
-+
-+static void iam_leaf_fini(struct iam_leaf *leaf)
-+{
-+      iam_leaf_ops(leaf)->fini(leaf);
-+      if (leaf->il_bh) {
-+              brelse(leaf->il_bh);
-+              leaf->il_bh = NULL;
-+      }
-+}
-+
-+static void iam_leaf_start(struct iam_leaf *folio)
-+{
-+      iam_leaf_ops(folio)->start(folio);
-+}
-+
-+void iam_leaf_next(struct iam_leaf *folio)
-+{
-+      iam_leaf_ops(folio)->next(folio);
-+}
-+
-+static void iam_rec_add(struct iam_leaf *leaf, struct iam_key *key,
-+                        struct iam_rec *rec)
-+{
-+        iam_leaf_ops(leaf)->rec_add(leaf, key, rec);
-+}
-+
-+static void iam_rec_del(struct iam_leaf *leaf)
-+{
-+        iam_leaf_ops(leaf)->rec_del(leaf);
-+}
-+
-+int iam_leaf_at_end(const struct iam_leaf *leaf)
-+{
-+        return iam_leaf_ops(leaf)->at_end(leaf);
-+}
-+
-+void iam_leaf_split(struct iam_leaf *l, struct buffer_head *bh)
-+{
-+        iam_leaf_ops(l)->split(l, bh);
-+}
-+
-+static int iam_leaf_can_add(struct iam_leaf *l,
-+                            struct iam_key *k, struct iam_rec *r)
-+{
-+        return iam_leaf_ops(l)->can_add(l, k, r);
-+}
-+
-+/***********************************************************************/
-+/* iterator interface                                                  */
-+/***********************************************************************/
-+
-+static enum iam_it_state it_state(const struct iam_iterator *it)
-+{
-+        return it->ii_state;
-+}
-+
-+/*
-+ * Helper function returning scratch key.
-+ */
-+static struct iam_key *it_scratch_key(struct iam_iterator *it, int n)
-+{
-+        return iam_path_key(&it->ii_path, n);
-+}
-+
-+static struct iam_container *iam_it_container(const struct iam_iterator *it)
-+{
-+      return it->ii_path.ip_container;
-+}
-+
-+static inline int it_keycmp(const struct iam_iterator *it,
-+                          const struct iam_key *k1, const struct iam_key *k2)
-+{
-+      return iam_keycmp(iam_it_container(it), k1, k2);
-+}
-+
-+/*
-+ * Helper wrapper around iam_it_get(): returns 0 (success) only when record
-+ * with exactly the same key as asked is found.
-+ */
-+static int iam_it_get_exact(struct iam_iterator *it, struct iam_key *k)
-+{
-+        int result;
-+
-+        result = iam_it_get(it, k);
-+        if (result == 0 &&
-+            (it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 1))) != 0))
-+                /*
-+                 * Return -ENOENT if cursor is located above record with a key
-+                 * different from one specified.
-+                 *
-+                 * XXX returning -ENOENT only works if iam_it_get never
-+                 * returns -ENOENT as a legitimate error.
-+                 */
-+                result = -ENOENT;
-+        return result;
-+}
-+
-+void iam_container_write_lock(struct iam_container *ic)
-+{
-+      down(&ic->ic_object->i_sem);
-+}
-+
-+void iam_container_write_unlock(struct iam_container *ic)
-+{
-+      up(&ic->ic_object->i_sem);
-+}
-+
-+void iam_container_read_lock(struct iam_container *ic)
-+{
-+      down(&ic->ic_object->i_sem);
-+}
-+
-+void iam_container_read_unlock(struct iam_container *ic)
-+{
-+      up(&ic->ic_object->i_sem);
-+}
-+
-+static void iam_it_lock(struct iam_iterator *it)
-+{
-+        if (it->ii_flags&IAM_IT_WRITE)
-+                iam_container_write_lock(iam_it_container(it));
-+        else
-+                iam_container_read_lock(iam_it_container(it));
-+}
-+
-+static void iam_it_unlock(struct iam_iterator *it)
-+{
-+      if (it->ii_flags&IAM_IT_WRITE)
-+              iam_container_write_unlock(iam_it_container(it));
-+      else
-+              iam_container_read_unlock(iam_it_container(it));
-+}
-+
-+/*
-+ * Initialize iterator to IAM_IT_DETACHED state.
-+ *
-+ * postcondition: it_state(it) == IAM_IT_DETACHED
-+ */
-+int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
-+               struct iam_path_descr *pd)
-+{
-+      memset(it, 0, sizeof *it);
-+      it->ii_flags  = flags;
-+      it->ii_state  = IAM_IT_DETACHED;
-+      iam_path_init(&it->ii_path, c, pd);
-+      return 0;
-+}
-+
-+/*
-+ * Finalize iterator and release all resources.
-+ *
-+ * precondition: it_state(it) == IAM_IT_DETACHED
-+ */
-+void iam_it_fini(struct iam_iterator *it)
-+{
-+      assert(it_state(it) == IAM_IT_DETACHED);
-+      iam_path_fini(&it->ii_path);
-+}
-+
-+int iam_path_lookup(struct iam_path *path)
-+{
-+      struct iam_container *c;
-+      struct iam_descr *descr;
-+      struct iam_leaf  *leaf;
-+      int result;
-+      
-+      c = path->ip_container;
-+      leaf = &path->ip_leaf;
-+      descr = iam_path_descr(path);
-+      result = dx_lookup(path);
-+      if (result == 0) {
-+              result = iam_leaf_load(path);
-+              if (result == 0)
-+                      result = iam_leaf_ops(leaf)->lookup(leaf,
-+                                                            path->ip_key_target);
-+      }
-+      return result;
-+}
-+
-+/*
-+ * Attach iterator. After successful completion, @it points to record with
-+ * smallest key not larger than @k.
-+ *
-+ * Return value: 0: positioned on existing record,
-+ *             -ve: error.
-+ *
-+ * precondition:  it_state(it) == IAM_IT_DETACHED
-+ * postcondition: ergo(result == 0,
-+ *                     (it_state(it) == IAM_IT_ATTACHED &&
-+ *                      it_keycmp(it, iam_it_key_get(it, *), k) < 0))
-+ */
-+int iam_it_get(struct iam_iterator *it, struct iam_key *k)
-+{
-+        int result;
-+        assert(it_state(it) == IAM_IT_DETACHED);
-+
-+        it->ii_path.ip_key_target = k;
-+        iam_it_lock(it);
-+        result = iam_path_lookup(&it->ii_path);
-+        if (result == 0 || result == -ENOENT)
-+                it->ii_state = IAM_IT_ATTACHED;
-+        else
-+                iam_it_unlock(it);
-+      assert(ergo(result == 0,
-+                    it_keycmp(it,
-+                              iam_it_key_get(it, it_scratch_key(it, 0)),
-+                            k) <= 0));
-+        return result;
-+}
-+
-+/*
-+ * Duplicates iterator.
-+ *
-+ * postcondition: it_state(dst) == it_state(src) &&
-+ *                iam_it_container(dst) == iam_it_container(src) &&
-+ *                dst->ii_flags = src->ii_flags &&
-+ *                ergo(it_state(src) == IAM_IT_ATTACHED,
-+ *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
-+ *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
-+ */
-+void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src)
-+{
-+        dst->ii_flags     = src->ii_flags;
-+        dst->ii_state     = src->ii_state;
-+        /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */
-+        /*
-+         * XXX: duplicate lock.
-+         */
-+      assert(it_state(dst) == it_state(src));
-+      assert(iam_it_container(dst) == iam_it_container(src));
-+      assert(dst->ii_flags = src->ii_flags);
-+      assert(ergo(it_state(src) == IAM_IT_ATTACHED,
-+                  iam_it_rec_get(dst) == iam_it_rec_get(src) &&
-+                  iam_it_key_get(dst, it_scratch_key(dst, 0)) ==
-+                  iam_it_key_get(src, it_scratch_key(src, 0))));
-+
-+}
-+/*
-+ * Detach iterator. Does nothing it detached state.
-+ *
-+ * postcondition: it_state(it) == IAM_IT_DETACHED
-+ */
-+void iam_it_put(struct iam_iterator *it)
-+{
-+        if (it->ii_state == IAM_IT_ATTACHED) {
-+                it->ii_state = IAM_IT_DETACHED;
-+              iam_leaf_fini(&it->ii_path.ip_leaf);
-+                iam_it_unlock(it);
-+        }
-+}
-+
-+/*
-+ * Move iterator one record right.
-+ *
-+ * Return value: 0: success,
-+ *              +1: end of container reached
-+ *             -ve: error
-+ *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
-+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
-+ */
-+int iam_it_next(struct iam_iterator *it)
-+{
-+        int result;
-+        struct iam_container *c;
-+        struct iam_path      *path;
-+        struct iam_leaf      *leaf;
-+
-+        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE);
-+
-+        c    = iam_it_container(it);
-+        path = &it->ii_path;
-+        leaf = &path->ip_leaf;
-+
-+        if (iam_leaf_at_end(leaf)) {
-+                /* advance index portion of the path */
-+                result = iam_index_next(c, path);
-+                if (result == 1) {
-+                        result = iam_leaf_load(path);
-+                        if (result == 0)
-+                                iam_leaf_start(leaf);
-+                } else if (result == 0)
-+                        /* end of container reached */
-+                        result = +1;
-+                if (result < 0)
-+                        iam_it_put(it);
-+        } else {
-+                /* advance within leaf node */
-+                iam_leaf_next(leaf);
-+                result = 0;
-+        }
-+        assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
-+        return result;
-+}
-+
-+/*
-+ * Return pointer to the record under iterator.
-+ *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+struct iam_rec *iam_it_rec_get(struct iam_iterator *it)
-+{
-+        assert(it_state(it) == IAM_IT_ATTACHED);
-+        return iam_leaf_rec(&it->ii_path.ip_leaf);
-+}
-+
-+static void iam_it_reccpy(struct iam_iterator *it, struct iam_rec *r)
-+{
-+        memcpy(iam_leaf_rec(&it->ii_path.ip_leaf), r,
-+               iam_it_container(it)->ic_descr->id_rec_size);
-+}
-+
-+static void iam_it_keycpy(struct iam_iterator *it, struct iam_key *k)
-+{
-+        memcpy(iam_leaf_key(&it->ii_path.ip_leaf, NULL), k,
-+                iam_it_container(it)->ic_descr->id_key_size);
-+}
-+
-+
-+/*
-+ * Replace contents of record under iterator.
-+ *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-+ *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
-+ */
-+int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r)
-+{
-+        int result;
-+
-+        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
-+
-+        result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
-+        if (result == 0)
-+                iam_it_reccpy(it, r);
-+        return result;
-+}
-+
-+/*
-+ * Return pointer to the key under iterator.
-+ *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+struct iam_key *iam_it_key_get(struct iam_iterator *it, struct iam_key *k)
-+{
-+        assert(it_state(it) == IAM_IT_ATTACHED);
-+        return iam_leaf_key(&it->ii_path.ip_leaf, k);
-+}
-+
-+static int iam_leaf_rec_add(handle_t *handle, struct iam_path *path)
-+{
-+        int err;
-+
-+        err = ext3_journal_get_write_access(handle, path->ip_leaf.il_bh);
-+        if (err)
-+                goto journal_error;
-+        iam_rec_add(&path->ip_leaf, NULL, NULL);
-+      err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
-+journal_error:
-+      if (err)
-+                ext3_std_error(iam_path_obj(path)->i_sb, err);
-+      return err;
-+}
-+
-+static int iam_new_leaf(handle_t *handle, struct iam_leaf *leaf)
-+{
-+        int err;
-+        int err2;
-+        u32 blknr; /* XXX 32bit block size */
-+        struct buffer_head   *new_leaf;
-+        struct iam_container *c;
-+
-+        c = iam_leaf_container(leaf);
-+        err = ext3_journal_get_write_access(handle, leaf->il_bh);
-+        if (err == 0) {
-+                struct inode *obj;
-+
-+                obj = c->ic_object;
-+                new_leaf = ext3_append(handle, c->ic_object, &blknr, &err);
-+                if (new_leaf != NULL) {
-+                        iam_leaf_ops(leaf)->init_new(c, new_leaf);
-+                        iam_leaf_ops(leaf)->split(leaf, new_leaf);
-+                        err = ext3_journal_dirty_metadata(handle, new_leaf);
-+                        err2 = ext3_journal_dirty_metadata(handle, leaf->il_bh);
-+                        err = err ? : err2;
-+                        if (err)
-+                                ext3_std_error(obj->i_sb, err);
-+                        brelse(new_leaf);
-+                }
-+        }
-+        return err;
-+}
-+
-+int iam_add_rec(handle_t *handle, struct iam_path *path,
-+                struct iam_key *k, struct iam_rec *r)
-+{
-+      int err;
-+
-+      if (iam_leaf_can_add(&path->ip_leaf, k, r)) {
-+              err = iam_leaf_rec_add(handle, path);
-+      } else {
-+              err = split_index_node(handle, path);
-+              if (err == 0) {
-+                        err = iam_new_leaf(handle, &path->ip_leaf);
-+                      if (err == 0)
-+                              err = iam_leaf_rec_add(handle, path);
-+              }
-+      }
-+      return err;
-+}
-+
-+/*
-+ * Insert new record with key @k and contents from @r, shifting records to the
-+ * right.
-+ *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
-+ *                it->ii_flags&IAM_IT_WRITE &&
-+ *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-+ *                ergo(result == 0,
-+ *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
-+ *                     !memcmp(iam_it_rec_get(it), r, ...))
-+ */
-+int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
-+                      struct iam_key *k, struct iam_rec *r)
-+{
-+        int result;
-+
-+        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
-+#if 0
-+        /*XXX remove this assert temporarily, since if the il_at point to the hearder,
-+         * this assert might has some problems*/
-+        assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0);
-+#endif
-+      result = iam_add_rec(h, &it->ii_path, k, r);
-+      if (result == 0) {
-+              /* place record and key info freed space. Leaf node is already
-+               * in transaction. */
-+              iam_it_reccpy(it, r);
-+                iam_it_keycpy(it, k);
-+                iam_keycpy(it->ii_path.ip_container, it_scratch_key(it, 0), k);
-+                /*
-+               * XXX TBD.
-+               */
-+        }
-+        assert(it_state(it) == IAM_IT_ATTACHED);
-+        assert(ergo(result == 0,
-+                    it_keycmp(it,
-+                              iam_it_key_get(it,
-+                                             it_scratch_key(it, 0)), k) == 0 &&
-+                    !memcmp(iam_it_rec_get(it), r,
-+                            iam_it_container(it)->ic_descr->id_rec_size)));
-+        return result;
-+}
-+
-+static int iam_leaf_rec_remove(handle_t *handle, struct iam_leaf *leaf)
-+{
-+      int err;
-+
-+        iam_rec_del(leaf);
-+      err = ext3_journal_dirty_metadata(handle, leaf->il_bh);
-+      if (err)
-+              ext3_std_error(iam_path_obj(iam_leaf_path(leaf))->i_sb, err);
-+      return err;
-+}
-+
-+/*
-+ * Delete record under iterator.
-+ *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
-+{
-+        int result;
-+
-+        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
-+
-+        result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
-+        /*
-+         * no compaction for now.
-+         */
-+        if (result == 0)
-+                iam_leaf_rec_remove(h, &it->ii_path.ip_leaf);
-+
-+      return result;
-+}
-+
-+/*
-+ * Convert iterator to cookie.
-+ *
-+ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
-+ *                iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+iam_pos_t iam_it_store(struct iam_iterator *it)
-+{
-+        iam_pos_t result;
-+
-+        assert(it_state(it) == IAM_IT_ATTACHED);
-+        assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result);
-+
-+        result = 0;
-+        iam_it_key_get(it, (struct iam_key *)&result);
-+        return result;
-+}
-+
-+/*
-+ * Restore iterator from cookie.
-+ *
-+ * precondition:  it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
-+ *                iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
-+ *                                  iam_it_store(it) == pos)
-+ */
-+int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
-+{
-+        assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE);
-+        assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos);
-+        return iam_it_get(it, (struct iam_key *)&pos);
-+}
-+
-+/***********************************************************************/
-+/* invariants                                                          */
-+/***********************************************************************/
-+
-+static inline int ptr_inside(void *base, size_t size, void *ptr)
-+{
-+        return (base <= ptr) && (ptr < base + size);
-+}
-+
-+int iam_frame_invariant(struct iam_frame *f)
-+{
-+        return
-+                (f->bh != NULL &&
-+                f->bh->b_data != NULL &&
-+                ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
-+                ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
-+                f->entries <= f->at);
-+}
-+int iam_leaf_invariant(struct iam_leaf *l)
-+{
-+        return
-+                l->il_bh != NULL &&
-+                l->il_bh->b_data != NULL &&
-+                ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) &&
-+                ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) &&
-+                l->il_entries <= l->il_at;
-+}
-+
-+int iam_path_invariant(struct iam_path *p)
-+{
-+        int i;
-+
-+        if (p->ip_container == NULL ||
-+            p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
-+            p->ip_frame != p->ip_frames + p->ip_indirect ||
-+            !iam_leaf_invariant(&p->ip_leaf))
-+                return 0;
-+        for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
-+                if (i <= p->ip_indirect) {
-+                        if (!iam_frame_invariant(&p->ip_frames[i]))
-+                                return 0;
-+                }
-+        }
-+        return 1;
-+}
-+
-+int iam_it_invariant(struct iam_iterator *it)
-+{
-+        return
-+                (it->ii_state == IAM_IT_DETACHED ||
-+                 it->ii_state == IAM_IT_ATTACHED) &&
-+                !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
-+                ergo(it->ii_state == IAM_IT_ATTACHED,
-+                     iam_path_invariant(&it->ii_path));
-+}
-+
-+/*
-+ * Search container @c for record with key @k. If record is found, its data
-+ * are moved into @r.
-+ *
-+ *
-+ *
-+ * Return values: +ve: found, 0: not-found, -ve: error
-+ */
-+int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r,
-+             struct iam_path_descr *pd)
-+{
-+        struct iam_iterator it;
-+        int result;
-+
-+        iam_it_init(&it, c, 0, pd);
-+
-+        result = iam_it_get_exact(&it, k);
-+        if (result == 0)
-+                /*
-+                 * record with required key found, copy it into user buffer
-+                 */
-+                iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it));
-+        iam_it_put(&it);
-+        iam_it_fini(&it);
-+        return result;
-+}
-+EXPORT_SYMBOL(iam_lookup);
-+
-+/*
-+ * Insert new record @r with key @k into container @c (within context of
-+ * transaction @h.
-+ *
-+ * Return values: 0: success, -ve: error, including -EEXIST when record with
-+ * given key is already present.
-+ *
-+ * postcondition: ergo(result == 0 || result == -EEXIST,
-+ *                                  iam_lookup(c, k, r2) > 0 &&
-+ *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
-+ */
-+int iam_insert(handle_t *h, struct iam_container *c,
-+               struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
-+{
-+        struct iam_iterator it;
-+        int result;
-+
-+        iam_it_init(&it, c, IAM_IT_WRITE, pd);
-+
-+        result = iam_it_get_exact(&it, k);
-+        if (result == -ENOENT)
-+                result = iam_it_rec_insert(h, &it, k, r);
-+        else if (result == 0)
-+                result = -EEXIST;
-+        iam_it_put(&it);
-+        iam_it_fini(&it);
-+        return result;
-+}
-+EXPORT_SYMBOL(iam_insert);
-+
-+int iam_update(handle_t *h, struct iam_container *c,
-+               struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
-+{
-+        struct iam_iterator it;
-+        int result;
-+
-+        iam_it_init(&it, c, IAM_IT_WRITE, pd);
-+
-+        result = iam_it_get_exact(&it, k);
-+        if (result == 0)
-+                iam_it_rec_set(h, &it, r);
-+        iam_it_put(&it);
-+        iam_it_fini(&it);
-+        return result;
-+}
-+EXPORT_SYMBOL(iam_update);
-+
-+/*
-+ * Delete existing record with key @k.
-+ *
-+ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
-+ *
-+ * postcondition: ergo(result == 0 || result == -ENOENT,
-+ *                                 !iam_lookup(c, k, *));
-+ */
-+int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k,
-+             struct iam_path_descr *pd)
-+{
-+        struct iam_iterator it;
-+        int result;
-+
-+        iam_it_init(&it, c, IAM_IT_WRITE, pd);
-+
-+        result = iam_it_get_exact(&it, k);
-+        if (result == 0)
-+                iam_it_rec_delete(h, &it);
-+        iam_it_put(&it);
-+        iam_it_fini(&it);
-+        return result;
-+}
-+EXPORT_SYMBOL(iam_delete);
-+
-Index: iam/fs/ext3/iam_lfix.c
-===================================================================
---- iam.orig/fs/ext3/iam_lfix.c        2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam_lfix.c     2006-05-29 00:42:57.000000000 +0400
-@@ -0,0 +1,309 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ *  iam_lfix.c
-+ *  implementation of iam format for fixed size records.
-+ *
-+ *  Copyright (c) 2006 Cluster File Systems, Inc.
-+ *   Author: Wang Di <wangdi@clusterfs.com>
-+ *   Author: Nikita Danilov <nikita@clusterfs.com>
-+ *
-+ *   This file is part of the Lustre file system, http://www.lustre.org
-+ *   Lustre is a trademark of Cluster File Systems, Inc.
-+ *
-+ *   You may have signed or agreed to another license before downloading
-+ *   this software.  If so, you are bound by the terms and conditions
-+ *   of that agreement, and the following does not apply to you.  See the
-+ *   LICENSE file included with this distribution for more information.
-+ *
-+ *   If you did not agree to a different license, then this copy of Lustre
-+ *   is open source software; you can redistribute it and/or modify it
-+ *   under the terms of version 2 of the GNU General Public License as
-+ *   published by the Free Software Foundation.
-+ *
-+ *   In either case, Lustre is distributed in the hope that it will be
-+ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
-+ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-+ *   license text for more details.
-+ */
-+
-+#include <linux/types.h>
-+#include <linux/jbd.h>
-+/* ext3_error() */
-+#include <linux/ext3_fs.h>
-+
-+#include <linux/lustre_iam.h>
-+
-+#include <libcfs/libcfs.h>
-+#include <libcfs/kp30.h>
-+
-+static inline int iam_lfix_entry_size(const struct iam_leaf *l)
-+{
-+      return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
-+}
-+
-+static inline struct iam_lentry *
-+iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
-+{
-+              void *e = entry;
-+      return e + shift * iam_lfix_entry_size(l);
-+}
-+
-+static inline const struct iam_key *
-+iam_leaf_key_at(const struct iam_container *c, const struct iam_lentry *entry)
-+{
-+        return (const struct iam_key *)entry;
-+}
-+
-+static struct iam_lentry *iam_entries(const struct buffer_head *bh)
-+{
-+        return (void *)bh->b_data + sizeof(struct iam_leaf_head);
-+}
-+
-+static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
-+{
-+        return iam_entries(l->il_bh);
-+}
-+
-+static int lentry_count_get(const struct iam_leaf *leaf)
-+{
-+        struct iam_lentry *lentry = leaf->il_entries;
-+        return le16_to_cpu(((struct iam_leaf_head *)lentry)->ill_count);
-+}
-+
-+static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
-+{
-+        struct iam_lentry *lentry = leaf->il_entries;
-+      ((struct iam_leaf_head *)lentry)->ill_count = cpu_to_le16(count);
-+}
-+
-+/*This func is for flat key, for those keys,
-+ *which are not stored explicitly
-+ *it would be decrypt in the key buffer
-+ */
-+struct iam_key *iam_lfix_key(struct iam_leaf *l, struct iam_key *key)
-+{
-+        void *ie = l->il_at;
-+        return (struct iam_key*)ie;
-+}
-+
-+static void iam_lfix_start(struct iam_leaf *l)
-+{
-+        l->il_at = iam_get_lentries(l);
-+}
-+
-+static inline ptrdiff_t iam_lfix_diff(struct iam_leaf *l, struct iam_lentry *e1,
-+                                    struct iam_lentry *e2)
-+{
-+      ptrdiff_t diff;
-+        int esize;
-+
-+        esize = iam_lfix_entry_size(l);
-+      diff = (void *)e1 - (void *)e2;
-+      assert(diff / esize * esize == diff);
-+      return diff / esize;
-+}
-+
-+static int iam_lfix_init(struct iam_leaf *l)
-+{
-+        int result;
-+        struct iam_leaf_head *ill;
-+
-+        assert(l->il_bh != NULL);
-+
-+        ill = (struct iam_leaf_head*)l->il_bh->b_data;
-+        if (ill->ill_magic == le16_to_cpu(IAM_LEAF_HEADER_MAGIC)) {
-+                l->il_at = l->il_entries = iam_get_lentries(l);
-+                result = 0;
-+        } else {
-+                struct inode *obj;
-+
-+                obj = iam_leaf_container(l)->ic_object;
-+                ext3_error(obj->i_sb, __FUNCTION__,
-+                           "Wrong magic in node %llu (#%lu): %#x != %#x\n",
-+                           l->il_bh->b_blocknr, obj->i_ino,
-+                           ill->ill_magic, le16_to_cpu(IAM_LEAF_HEADER_MAGIC));
-+                result = -EIO;
-+        }
-+        return result;
-+}
-+
-+static void iam_lfix_fini(struct iam_leaf *l)
-+{
-+        l->il_entries = l->il_at = NULL;
-+        return;
-+}
-+
-+static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
-+{
-+        int count = lentry_count_get(l);
-+        struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
-+
-+        return ile;
-+}
-+
-+struct iam_rec *iam_lfix_rec(struct iam_leaf *l)
-+{
-+        void *e = l->il_at;
-+        return e + iam_leaf_descr(l)->id_key_size;
-+}
-+
-+static void iam_lfix_next(struct iam_leaf *l)
-+{
-+        assert(!iam_leaf_at_end(l));
-+        l->il_at = iam_lfix_shift(l, l->il_at, 1);
-+}
-+
-+static int iam_lfix_lookup(struct iam_leaf *l, struct iam_key *k)
-+{
-+        struct iam_lentry *p, *q, *m;
-+        struct iam_container *c;
-+        int count;
-+
-+        count = lentry_count_get(l);
-+        c = iam_leaf_container(l);
-+
-+        p = iam_lfix_shift(l, l->il_entries, 1);
-+        q = iam_lfix_shift(l, l->il_entries, count - 1);
-+
-+        while (p <= q) {
-+                m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
-+                if (iam_keycmp(c, iam_leaf_key_at(c, m), k) > 0)
-+                        q = iam_lfix_shift(l, m, -1);
-+                else
-+                        p = iam_lfix_shift(l, m, +1);
-+        }
-+        l->il_at = iam_lfix_shift(l, p, -1);
-+        iam_keycpy(c, iam_path_key(iam_leaf_path(l), 0), iam_leaf_key_at(c, q));
-+
-+        if (l->il_at == l->il_entries ||
-+            iam_keycmp(c, iam_leaf_key_at(c, q), k) != 0)
-+                return -ENOENT;
-+        else
-+                return 0;
-+}
-+
-+static void iam_lfix_rec_add(struct iam_leaf *leaf,
-+                             struct iam_key *k, struct iam_rec *r)
-+{
-+        struct iam_lentry *end, *next, *cur, *nnext;
-+        ptrdiff_t diff;
-+        int count;
-+
-+        count = lentry_count_get(leaf);
-+        end = iam_lfix_get_end(leaf);
-+        cur = leaf->il_at;
-+        if (cur != end) {
-+                next = iam_lfix_shift(leaf, cur, 1);
-+                if (next != end) {
-+                        nnext = iam_lfix_shift(leaf, next, 1);
-+                        diff = (void *)end - (void *)next;
-+                        memmove(nnext, next, diff);
-+                }
-+                iam_lfix_next(leaf);
-+        }
-+        lentry_count_set(leaf, count + 1);
-+}
-+
-+static void iam_lfix_rec_del(struct iam_leaf *leaf)
-+{
-+      struct iam_lentry *next, *end;
-+      int count;
-+      ptrdiff_t diff;
-+
-+        count = lentry_count_get(leaf);
-+        end = iam_lfix_get_end(leaf);
-+        next = iam_lfix_shift(leaf, leaf->il_at, 1);
-+        diff = (void *)end - (void *)next;
-+        memmove(leaf->il_at, next, diff);
-+
-+      lentry_count_set(leaf, count - 1);
-+}
-+
-+static int iam_lfix_can_add(struct iam_leaf *l,
-+                            struct iam_key *k, struct iam_rec *r)
-+{
-+        struct iam_lentry *end;
-+        int block_size = iam_leaf_container(l)->ic_object->i_sb->s_blocksize;
-+        unsigned long left, entry_size;
-+
-+        end = iam_lfix_get_end(l);
-+
-+        left = block_size - iam_leaf_descr(l)->id_node_gap;
-+
-+        left -= (unsigned long)((void*)end - (void*)l->il_entries);
-+
-+        entry_size = iam_lfix_entry_size(l);
-+
-+        if (left >= entry_size)
-+                return 1;
-+
-+        return 0;
-+}
-+
-+static int iam_lfix_at_end(const struct iam_leaf *folio)
-+{
-+        struct iam_lentry *ile = iam_lfix_get_end(folio);
-+
-+        return (folio->il_at == ile);
-+}
-+
-+static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
-+{
-+        struct iam_leaf_head *hdr;
-+
-+        hdr = (struct iam_leaf_head*)bh->b_data;
-+        hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
-+        hdr->ill_count = cpu_to_le16(0);
-+}
-+
-+static void iam_lfix_split(struct iam_leaf *l, struct buffer_head *bh)
-+{
-+        struct iam_path      *path;
-+      struct iam_leaf_head *hdr;
-+        const struct iam_key *pivot;
-+
-+      unsigned count;
-+      unsigned split;
-+
-+      void *start;
-+      void *finis;
-+
-+        path = iam_leaf_path(l);
-+
-+      hdr = (void *)bh->b_data;
-+
-+        count = lentry_count_get(l);
-+        split = count / 2;
-+
-+        start = iam_lfix_shift(l, iam_get_lentries(l), split);
-+        finis = iam_lfix_shift(l, iam_get_lentries(l), count);
-+
-+        pivot = iam_leaf_key_at(iam_leaf_container(l), start);
-+
-+        memmove(iam_entries(bh), start, finis - start);
-+        hdr->ill_count = count - split;
-+        lentry_count_set(l, split);
-+        /*
-+         * Insert pointer to the new node (together with the smallest key in
-+         * the node) into index node.
-+         */
-+        iam_insert_key(path, path->ip_frame, pivot, bh->b_blocknr);
-+}
-+
-+struct iam_leaf_operations iam_lfix_leaf_ops = {
-+        .init           = iam_lfix_init,
-+        .init_new       = iam_lfix_init_new,
-+        .fini           = iam_lfix_fini,
-+        .start          = iam_lfix_start,
-+        .next           = iam_lfix_next,
-+        .key            = iam_lfix_key,
-+        .rec            = iam_lfix_rec,
-+        .lookup         = iam_lfix_lookup,
-+        .at_end         = iam_lfix_at_end,
-+        .rec_add        = iam_lfix_rec_add,
-+        .rec_del        = iam_lfix_rec_del,
-+        .can_add        = iam_lfix_can_add,
-+        .split          = iam_lfix_split
-+};
-+EXPORT_SYMBOL(iam_lfix_leaf_ops);
-Index: iam/fs/ext3/namei.c
-===================================================================
---- iam.orig/fs/ext3/namei.c   2006-05-27 19:58:44.000000000 +0400
-+++ iam/fs/ext3/namei.c        2006-05-29 00:40:31.000000000 +0400
-@@ -24,81 +24,6 @@
-  *    Theodore Ts'o, 2002
-  */
--/*
-- * iam: big theory statement.
-- *
-- * iam (Index Access Module) is a module providing abstraction of persistent
-- * transactional container on top of generalized ext3 htree.
-- *
-- * iam supports:
-- *
-- *     - key, pointer, and record size specifiable per container.
-- *
-- *     - trees taller than 2 index levels.
-- *
-- *     - read/write to existing ext3 htree directories as iam containers.
-- *
-- * iam container is a tree, consisting of leaf nodes containing keys and
-- * records stored in this container, and index nodes, containing keys and
-- * pointers to leaf or index nodes.
-- *
-- * iam does not work with keys directly, instead it calls user-supplied key
-- * comparison function (->dpo_keycmp()).
-- *
-- * Pointers are (currently) interpreted as logical offsets (measured in
-- * blocksful) within underlying flat file on top of which iam tree lives.
-- *
-- * On-disk format:
-- *
-- * iam mostly tries to reuse existing htree formats.
-- *
-- * Format of index node:
-- *
-- * +-----+-------+-------+-------+------+-------+------------+
-- * |     | count |       |       |      |       |            |
-- * | gap |   /   | entry | entry | .... | entry | free space |
-- * |     | limit |       |       |      |       |            |
-- * +-----+-------+-------+-------+------+-------+------------+
-- *
-- *       gap           this part of node is never accessed by iam code. It
-- *                     exists for binary compatibility with ext3 htree (that,
-- *                     in turn, stores fake struct ext2_dirent for ext2
-- *                     compatibility), and to keep some unspecified per-node
-- *                     data. Gap can be different for root and non-root index
-- *                     nodes. Gap size can be specified for each container
-- *                     (gap of 0 is allowed).
-- *
-- *       count/limit   current number of entries in this node, and the maximal
-- *                     number of entries that can fit into node. count/limit
-- *                     has the same size as entry, and is itself counted in
-- *                     count.
-- *
-- *       entry         index entry: consists of a key immediately followed by
-- *                     a pointer to a child node. Size of a key and size of a
-- *                     pointer depends on container. Entry has neither
-- *                     alignment nor padding.
-- *
-- *       free space    portion of node new entries are added to
-- *
-- * Entries in index node are sorted by their key value.
-- *
-- * Format of leaf node:
-- *
-- * +-----+-------+-------+-------+------+-------+------------+
-- * |     | count |       |       |      |       |            |
-- * | gap |   /   | leaf  | leaf  | .... | leaf  | free space |
-- * |     | limit |       |       |      |       |            |
-- * +-----+-------+-------+-------+------+-------+------------+
--
-- *       leaf          For leaf entry: consists of a rec immediately followd by 
-- *                     a key. size of a key and size of a rec depends on container.  
-- *
-- *
-- *
-- *
-- *
-- */
--
- #include <linux/module.h>
- #include <linux/fs.h>
- #include <linux/pagemap.h>
-@@ -112,10 +37,10 @@
- #include <linux/quotaops.h>
- #include <linux/buffer_head.h>
- #include <linux/smp_lock.h>
-+#include <linux/lustre_iam.h>
- #include "xattr.h"
- #include "iopen.h"
- #include "acl.h"
--#include <linux/lustre_iam.h>
- /*
-  * define how far ahead to read directories while searching them.
-  */
-@@ -125,9 +50,9 @@
- #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
--static struct buffer_head *ext3_append(handle_t *handle,
--                                      struct inode *inode,
--                                      u32 *block, int *err)
-+struct buffer_head *ext3_append(handle_t *handle,
-+                              struct inode *inode,
-+                              u32 *block, int *err)
- {
-       struct buffer_head *bh;
-@@ -136,14 +61,15 @@ static struct buffer_head *ext3_append(h
-       if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
-               inode->i_size += inode->i_sb->s_blocksize;
-               EXT3_I(inode)->i_disksize = inode->i_size;
--              ext3_journal_get_write_access(handle,bh);
-+              *err = ext3_journal_get_write_access(handle, bh);
-+              if (err != 0) {
-+                      brelse(bh);
-+                      bh = NULL;
+ #include "xattr.h"
+ #include "iopen.h"
+ #include "acl.h"
+-#include <linux/lustre_iam.h>
+ /*
+  * define how far ahead to read directories while searching them.
+  */
+@@ -125,9 +50,9 @@
+ #define NAMEI_RA_INDEX(c,b)  (((c) * NAMEI_RA_BLOCKS) + (b))
+-static struct buffer_head *ext3_append(handle_t *handle,
+-                                      struct inode *inode,
+-                                      u32 *block, int *err)
++struct buffer_head *ext3_append(handle_t *handle,
++                              struct inode *inode,
++                              u32 *block, int *err)
+ {
+       struct buffer_head *bh;
+@@ -136,14 +61,15 @@
+       if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
+               inode->i_size += inode->i_sb->s_blocksize;
+               EXT3_I(inode)->i_disksize = inode->i_size;
+-              ext3_journal_get_write_access(handle,bh);
++              *err = ext3_journal_get_write_access(handle, bh);
++              if (err != 0) {
++                      brelse(bh);
++                      bh = NULL;
 +              }
        }
        return bh;
@@ -1451,7 +129,7 @@ Index: iam/fs/ext3/namei.c
  
  #ifndef swap
  #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-@@ -162,10 +88,6 @@ struct fake_dirent {
+@@ -162,10 +88,6 @@
        u8 file_type;
  };
  
@@ -1462,7 +140,7 @@ Index: iam/fs/ext3/namei.c
  
  /*
   * dx_root_info is laid out so that if it should somehow get overlaid by a
-@@ -203,245 +125,10 @@ struct dx_map_entry
+@@ -203,245 +125,10 @@
  };
  
  
@@ -1708,7 +386,7 @@ Index: iam/fs/ext3/namei.c
  static unsigned dx_get_limit(struct iam_entry *entries);
  static void dx_set_count(struct iam_entry *entries, unsigned value);
  static void dx_set_limit(struct iam_entry *entries, unsigned value);
-@@ -457,80 +144,29 @@ static void dx_sort_map(struct dx_map_en
+@@ -457,80 +144,29 @@
  static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
                struct dx_map_entry *offsets, int count);
  static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
@@ -1796,7 +474,7 @@ Index: iam/fs/ext3/namei.c
        return key;
  }
  
-@@ -540,68 +176,70 @@ static inline struct iam_key *iam_key_at
+@@ -540,68 +176,70 @@
        return (struct iam_key *)entry;
  }
  
@@ -1909,7 +587,7 @@ Index: iam/fs/ext3/namei.c
  static int dx_node_check(struct iam_path *p, struct iam_frame *f)
  {
        struct iam_entry     *e;
-@@ -614,10 +252,10 @@ static int dx_node_check(struct iam_path
+@@ -614,10 +252,10 @@
        count = dx_get_count(e);
        e = iam_entry_shift(p, e, 1);
        for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) {
@@ -1923,7 +601,7 @@ Index: iam/fs/ext3/namei.c
                        return 0;
        }
        return 1;
-@@ -636,13 +274,17 @@ static int htree_node_check(struct iam_p
+@@ -636,13 +274,17 @@
  
        data = frame->bh->b_data;
        entries = dx_node_get_entries(path, frame);
@@ -1943,7 +621,7 @@ Index: iam/fs/ext3/namei.c
                if (root->info.hash_version > DX_HASH_MAX) {
                        ext3_warning(sb, __FUNCTION__,
                                     "Unrecognised inode hash code %d",
-@@ -669,15 +311,16 @@ static int htree_node_check(struct iam_p
+@@ -669,15 +311,16 @@
                                           root->info.info_length));
                assert(dx_get_limit(entries) == dx_root_limit(path));
  
@@ -1967,7 +645,7 @@ Index: iam/fs/ext3/namei.c
                assert(dx_get_limit(entries) == dx_node_limit(path));
        }
        frame->entries = frame->at = entries;
-@@ -697,8 +340,8 @@ static int htree_node_init(struct iam_co
+@@ -697,8 +340,8 @@
        return 0;
  }
  
@@ -1978,7 +656,7 @@ Index: iam/fs/ext3/namei.c
  {
        int result = 0;
  
-@@ -708,8 +351,8 @@ static int htree_node_read(struct iam_co
+@@ -708,8 +351,8 @@
        return result;
  }
  
@@ -1989,7 +667,7 @@ Index: iam/fs/ext3/namei.c
  {
        __u32 p1 = le32_to_cpu(*(__u32 *)k1);
        __u32 p2 = le32_to_cpu(*(__u32 *)k2);
-@@ -800,7 +443,7 @@ struct stats dx_show_entries(struct dx_h
+@@ -800,7 +443,7 @@
  }
  #endif /* DX_DEBUG */
  
@@ -1998,7 +676,7 @@ Index: iam/fs/ext3/namei.c
  {
        u32 ptr;
        int err = 0;
-@@ -810,11 +453,11 @@ static int dx_lookup(struct iam_path *pa
+@@ -810,11 +453,11 @@
        struct iam_frame *frame;
        struct iam_container *c;
  
@@ -2012,7 +690,7 @@ Index: iam/fs/ext3/namei.c
             i <= path->ip_indirect;
             ptr = dx_get_block(path, frame->at), ++frame, ++i) {
                struct iam_entry *entries;
-@@ -823,10 +466,11 @@ static int dx_lookup(struct iam_path *pa
+@@ -823,10 +466,11 @@
                struct iam_entry *m;
                unsigned count;
  
@@ -2026,7 +704,7 @@ Index: iam/fs/ext3/namei.c
                if (err != 0)
                        break;
  
-@@ -841,8 +485,8 @@ static int dx_lookup(struct iam_path *pa
+@@ -841,8 +485,8 @@
                        m = iam_entry_shift(path,
                                           p, iam_entry_diff(path, q, p) / 2);
                        dxtrace(printk("."));
@@ -2037,7 +715,7 @@ Index: iam/fs/ext3/namei.c
                                q = iam_entry_shift(path, m, -1);
                        else
                                p = iam_entry_shift(path, m, +1);
-@@ -857,12 +501,12 @@ static int dx_lookup(struct iam_path *pa
+@@ -857,12 +501,12 @@
                        while (n--) {
                                dxtrace(printk(","));
                                at = iam_entry_shift(path, at, +1);
@@ -2053,7 +731,7 @@ Index: iam/fs/ext3/namei.c
                                                              path->ip_key_target));
                                        }
                                        at = iam_entry_shift(path, at, -1);
-@@ -891,508 +535,20 @@ static int dx_probe(struct dentry *dentr
+@@ -891,508 +535,20 @@
                    struct dx_hash_info *hinfo, struct iam_path *path)
  {
        int err;
@@ -2568,241 +1246,1564 @@ Index: iam/fs/ext3/namei.c
   * This function increments the frame pointer to search the next leaf
   * block, and reads in the necessary intervening nodes if the search
   * should be necessary.  Whether or not the search is necessary is
-@@ -1409,16 +565,15 @@ EXPORT_SYMBOL(iam_update);
+@@ -1409,16 +565,15 @@
   * If start_hash is non-null, it will be filled in with the starting
   * hash of the next page.
   */
--static int ext3_htree_next_block(struct inode *dir, __u32 hash,
--                               struct iam_path *path, __u32 *start_hash)
-+static int ext3_htree_advance(struct inode *dir, __u32 hash,
-+                            struct iam_path *path, __u32 *start_hash,
-+                            int compat)
+-static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+-                               struct iam_path *path, __u32 *start_hash)
++static int ext3_htree_advance(struct inode *dir, __u32 hash,
++                            struct iam_path *path, __u32 *start_hash,
++                            int compat)
+ {
+       struct iam_frame *p;
+       struct buffer_head *bh;
+       int err, num_frames = 0;
+       __u32 bhash;
+-      assert(dx_index_is_compat(path));
+-
+       p = path->ip_frame;
+       /*
+        * Find the next leaf page by incrementing the frame pointer.
+@@ -1438,28 +593,34 @@
+               --p;
+       }
+-      /*
+-       * If the hash is 1, then continue only if the next page has a
+-       * continuation hash of any value.  This is used for readdir
+-       * handling.  Otherwise, check to see if the hash matches the
+-       * desired contiuation hash.  If it doesn't, return since
+-       * there's no point to read in the successive index pages.
+-       */
+-      dx_get_key(path, p->at, (struct iam_key *)&bhash);
+-      if (start_hash)
+-              *start_hash = bhash;
+-      if ((hash & 1) == 0) {
+-              if ((bhash & ~1) != hash)
+-                      return 0;
++      if (compat) {
++              /*
++               * Htree hash magic.
++               */
++              /*
++               * If the hash is 1, then continue only if the next page has a
++               * continuation hash of any value.  This is used for readdir
++               * handling.  Otherwise, check to see if the hash matches the
++               * desired contiuation hash.  If it doesn't, return since
++               * there's no point to read in the successive index pages.
++               */
++              iam_get_key(path, p->at, (struct iam_key *)&bhash);
++              if (start_hash)
++                      *start_hash = bhash;
++              if ((hash & 1) == 0) {
++                      if ((bhash & ~1) != hash)
++                              return 0;
++              }
+       }
+       /*
+        * If the hash is HASH_NB_ALWAYS, we always go to the next
+        * block so no check is necessary
+        */
+       while (num_frames--) {
+-              err = path_descr(path)->id_node_read(path->ip_container,
+-                                                   (iam_ptr_t)dx_get_block(path, p->at),
+-                                                   NULL, &bh);
++              err = iam_path_descr(path)->id_ops->
++                      id_node_read(path->ip_container,
++                                   (iam_ptr_t)dx_get_block(path, p->at),
++                                   NULL, &bh);
+               if (err != 0)
+                       return err; /* Failure */
+               ++p;
+@@ -1471,6 +632,16 @@
+       return 1;
+ }
++int iam_index_next(struct iam_container *c, struct iam_path *path)
++{
++      return ext3_htree_advance(c->ic_object, 0, path, NULL, 0);
++}
++
++int ext3_htree_next_block(struct inode *dir, __u32 hash,
++                        struct iam_path *path, __u32 *start_hash)
++{
++      return ext3_htree_advance(dir, hash, path, start_hash, 1);
++}
+ /*
+  * p is at least 6 bytes before the end of page
+@@ -1662,21 +833,30 @@
+       } while(more);
+ }
+-static void dx_insert_block(struct iam_path *path,
+-                          struct iam_frame *frame, u32 hash, u32 block)
++void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
++                  const struct iam_key *key, iam_ptr_t ptr)
+ {
+       struct iam_entry *entries = frame->entries;
+-      struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
++      struct iam_entry *new = iam_entry_shift(path, frame->at, +1);
+       int count = dx_get_count(entries);
+       assert(count < dx_get_limit(entries));
+-      assert(old < iam_entry_shift(path, entries, count));
++      assert(frame->at < iam_entry_shift(path, entries, count));
++
+       memmove(iam_entry_shift(path, new, 1), new,
+               (char *)iam_entry_shift(path, entries, count) - (char *)new);
+-      dx_set_key(path, new, (struct iam_key *)&hash);
+-      dx_set_block(path, new, block);
++      dx_set_key(path, new, key);
++      dx_set_block(path, new, ptr);
+       dx_set_count(entries, count + 1);
+ }
++
++void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
++                   u32 hash, u32 block)
++{
++      assert(dx_index_is_compat(path));
++      iam_insert_key(path, frame, (struct iam_key *)&hash, block);
++}
++
+ #endif
+@@ -1897,14 +1077,15 @@
+               if (*err != 0)
+                       return NULL;
+       } else {
+-              path->ip_frame->bh = NULL;              /* for iam_path_fini() */
++              path->ip_frame->bh = NULL;      /* for iam_path_fini() */
+               path->ip_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
+       }
+       hash = hinfo.hash;
+       do {
+               block = dx_get_block(path, path->ip_frame->at);
+-              *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
+-                                                   NULL, &bh);
++              *err = iam_path_descr(path)->id_ops->id_node_read(path->ip_container,
++                                                        (iam_ptr_t)block,
++                                                        NULL, &bh);
+               if (*err != 0)
+                       goto errout;
+               de = (struct ext3_dir_entry_2 *) bh->b_data;
+@@ -2067,7 +1248,7 @@
+                       struct buffer_head **bh,struct iam_frame *frame,
+                       struct dx_hash_info *hinfo, int *error)
+ {
+-      struct inode *dir = path_obj(path);
++      struct inode *dir = iam_path_obj(path);
+       unsigned blocksize = dir->i_sb->s_blocksize;
+       unsigned count, continued;
+       struct buffer_head *bh2;
+@@ -2392,15 +1573,15 @@
+ }
+ #ifdef CONFIG_EXT3_INDEX
+-static int split_index_node(handle_t *handle, struct iam_path *path)
+-{ 
++int split_index_node(handle_t *handle, struct iam_path *path)
++{
+       struct iam_entry *entries;   /* old block contents */
+       struct iam_entry *entries2;  /* new block contents */
+       struct iam_frame *frame, *safe;
+       struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+       u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
+-      struct inode *dir = path_obj(path);
++      struct inode *dir = iam_path_obj(path);
+       int nr_splet;
+       int i, err;
+@@ -2442,7 +1623,8 @@
+       for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
+               bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
+               if (!bh_new[i] ||
+-                  path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
++                iam_path_descr(path)->id_ops->id_node_init(path->ip_container,
++                                                     bh_new[i], 0) != 0)
+                       goto cleanup;
+               BUFFER_TRACE(frame->bh, "get_write_access");
+               err = ext3_journal_get_write_access(handle, frame->bh);
+@@ -2516,9 +1698,9 @@
+                       unsigned count1 = count/2, count2 = count - count1;
+                       unsigned hash2;
+-                      dx_get_key(path,
+-                                 iam_entry_shift(path, entries, count1),
+-                                 (struct iam_key *)&hash2);
++                      iam_get_key(path,
++                                  iam_entry_shift(path, entries, count1),
++                                  (struct iam_key *)&hash2);
+                       dxtrace(printk("Split index %i/%i\n", count1, count2));
+@@ -2578,7 +1760,7 @@
+       size_t isize;
+       iam_path_compat_init(&cpath, dir);
+-      param = path_descr(path);
++      param = iam_path_descr(path);
+       err = dx_probe(dentry, NULL, &hinfo, path);
+       if (err != 0)
+@@ -2588,8 +1770,9 @@
+       /* XXX nikita: global serialization! */
+       isize = dir->i_size;
+-      err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at), 
+-                                handle, &bh);
++      err = param->id_ops->id_node_read(path->ip_container,
++                      (iam_ptr_t)dx_get_block(path, frame->at),
++                      handle, &bh);
+       if (err != 0)
+               goto cleanup;
+@@ -2724,12 +1907,12 @@
+  * is so far negative - it has no inode.
+  *
+  * If the create succeeds, we fill in the inode information
+- * with d_instantiate(). 
++ * with d_instantiate().
+  */
+ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+               struct nameidata *nd)
  {
-       struct iam_frame *p;
-       struct buffer_head *bh;
-       int err, num_frames = 0;
-       __u32 bhash;
+-      handle_t *handle; 
++      handle_t *handle;
+       struct inode * inode;
+       int err, retries = 0;
  
--      assert(dx_index_is_compat(path));
--
-       p = path->ip_frame;
-       /*
-        * Find the next leaf page by incrementing the frame pointer.
-@@ -1438,28 +593,34 @@ static int ext3_htree_next_block(struct 
-               --p;
-       }
+Index: linux-stage/fs/ext3/iam.c
+===================================================================
+--- linux-stage.orig/fs/ext3/iam.c     2006-05-29 18:23:53.597737944 +0800
++++ linux-stage/fs/ext3/iam.c  2006-05-29 13:01:22.000000000 +0800
+@@ -0,0 +1,990 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ *  iam.c
++ *  Top-level entry points into iam module
++ *
++ *  Copyright (c) 2006 Cluster File Systems, Inc.
++ *   Author: Wang Di <wangdi@clusterfs.com>
++ *   Author: Nikita Danilov <nikita@clusterfs.com>
++ *
++ *   This file is part of the Lustre file system, http://www.lustre.org
++ *   Lustre is a trademark of Cluster File Systems, Inc.
++ *
++ *   You may have signed or agreed to another license before downloading
++ *   this software.  If so, you are bound by the terms and conditions
++ *   of that agreement, and the following does not apply to you.  See the
++ *   LICENSE file included with this distribution for more information.
++ *
++ *   If you did not agree to a different license, then this copy of Lustre
++ *   is open source software; you can redistribute it and/or modify it
++ *   under the terms of version 2 of the GNU General Public License as
++ *   published by the Free Software Foundation.
++ *
++ *   In either case, Lustre is distributed in the hope that it will be
++ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
++ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   license text for more details.
++ */
++
++/*
++ * iam: big theory statement.
++ *
++ * iam (Index Access Module) is a module providing abstraction of persistent
++ * transactional container on top of generalized ext3 htree.
++ *
++ * iam supports:
++ *
++ *     - key, pointer, and record size specifiable per container.
++ *
++ *     - trees taller than 2 index levels.
++ *
++ *     - read/write to existing ext3 htree directories as iam containers.
++ *
++ * iam container is a tree, consisting of leaf nodes containing keys and
++ * records stored in this container, and index nodes, containing keys and
++ * pointers to leaf or index nodes.
++ *
++ * iam does not work with keys directly, instead it calls user-supplied key
++ * comparison function (->dpo_keycmp()).
++ *
++ * Pointers are (currently) interpreted as logical offsets (measured in
++ * blocksful) within underlying flat file on top of which iam tree lives.
++ *
++ * On-disk format:
++ *
++ * iam mostly tries to reuse existing htree formats.
++ *
++ * Format of index node:
++ *
++ * +-----+-------+-------+-------+------+-------+------------+
++ * |     | count |       |       |      |       |            |
++ * | gap |   /   | entry | entry | .... | entry | free space |
++ * |     | limit |       |       |      |       |            |
++ * +-----+-------+-------+-------+------+-------+------------+
++ *
++ *       gap           this part of node is never accessed by iam code. It
++ *                     exists for binary compatibility with ext3 htree (that,
++ *                     in turn, stores fake struct ext2_dirent for ext2
++ *                     compatibility), and to keep some unspecified per-node
++ *                     data. Gap can be different for root and non-root index
++ *                     nodes. Gap size can be specified for each container
++ *                     (gap of 0 is allowed).
++ *
++ *       count/limit   current number of entries in this node, and the maximal
++ *                     number of entries that can fit into node. count/limit
++ *                     has the same size as entry, and is itself counted in
++ *                     count.
++ *
++ *       entry         index entry: consists of a key immediately followed by
++ *                     a pointer to a child node. Size of a key and size of a
++ *                     pointer depends on container. Entry has neither
++ *                     alignment nor padding.
++ *
++ *       free space    portion of node new entries are added to
++ *
++ * Entries in index node are sorted by their key value.
++ *
++ * Format of a leaf node is not specified. Generic iam code accesses leaf
++ * nodes through ->id_leaf methods in struct iam_descr.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/pagemap.h>
++#include <linux/jbd.h>
++#include <linux/time.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/fcntl.h>
++#include <linux/stat.h>
++#include <linux/string.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/smp_lock.h>
++#include <linux/lustre_iam.h>
++
++#include <libcfs/libcfs.h>
++#include <libcfs/kp30.h>
++
++#include "xattr.h"
++#include "iopen.h"
++#include "acl.h"
++
++
++static __u32 iam_root_ptr(struct iam_container *c)
++{
++        return 0;
++}
++
++static int iam_node_init(struct iam_container *c, struct buffer_head *bh,
++                        int root)
++{
++        return 0;
++}
++
++static int iam_node_check(struct iam_path *path, struct iam_frame *frame)
++{
++        struct iam_entry *entries;
++        void *data;
++        entries = dx_node_get_entries(path, frame);
++
++        data = frame->bh->b_data;
++
++        if (frame == path->ip_frames) {
++               struct iam_root *root;
++
++               root = data;
++               path->ip_indirect = root->info.indirect_levels;
++        }
++        frame->entries = frame->at = entries;
++      return 0;
++}
++
++static int iam_node_create(struct iam_container *c)
++{
++        return 0;
++}
++
++struct iam_operations generic_iam_ops = {
++        .id_root_ptr    = iam_root_ptr,
++        .id_node_read   = iam_node_read,
++        .id_node_init   = iam_node_init,
++        .id_node_check  = iam_node_check,
++        .id_create      = iam_node_create,
++};
++EXPORT_SYMBOL(generic_iam_ops);
++
++static inline void iam_reccpy(struct iam_path *p, struct iam_rec *rec_dst,
++                            struct iam_rec *rec_src)
++{
++      memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size);
++}
++
++/*
++ * Initialize container @c, acquires additional reference on @inode.
++ */
++int iam_container_init(struct iam_container *c,
++                     struct iam_descr *descr, struct inode *inode)
++{
++      memset(c, 0, sizeof *c);
++      c->ic_descr  = descr;
++      c->ic_object = igrab(inode);
++      if (c->ic_object != NULL)
++              return 0;
++      else
++              return -ENOENT;
++}
++EXPORT_SYMBOL(iam_container_init);
++
++/*
++ * Finalize container @c, release all resources.
++ */
++void iam_container_fini(struct iam_container *c)
++{
++      if (c->ic_object != NULL) {
++              iput(c->ic_object);
++              c->ic_object = NULL;
++      }
++}
++EXPORT_SYMBOL(iam_container_fini);
++
++void iam_path_init(struct iam_path *path, struct iam_container *c,
++                struct iam_path_descr *pd)
++{
++      memset(path, 0, sizeof *path);
++      path->ip_container = c;
++      path->ip_frame = path->ip_frames;
++      path->ip_data = pd;
++}
++
++static void iam_leaf_fini(struct iam_leaf *leaf);
++
++void iam_path_fini(struct iam_path *path)
++{
++      int i;
++
++      iam_leaf_fini(&path->ip_leaf);
++      for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
++              if (path->ip_frames[i].bh != NULL) {
++                      brelse(path->ip_frames[i].bh);
++                      path->ip_frames[i].bh = NULL;
++              }
++      }
++}
++
++extern struct iam_descr htree_compat_param;
++
++void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode)
++{
++      int i;
++
++      for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i)
++              path->ipc_descr.ipd_key_scratch[i] =
++                      (struct iam_key *)&path->ipc_scratch[i];
++
++      iam_container_init(&path->ipc_container, &htree_compat_param, inode);
++      /*
++       * XXX hack allowing finalization of iam_path_compat with
++       * iam_path_fini().
++       */
++      iput(inode);
++      iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr);
++}
++
++void iam_path_compat_fini(struct iam_path_compat *path)
++{
++      iam_path_fini(&path->ipc_path);
++      iam_container_fini(&path->ipc_container);
++}
++
++/*
++ * Leaf helpers.
++ */
++
++struct iam_path *iam_leaf_path(const struct iam_leaf *leaf)
++{
++        return leaf->il_path;
++}
++
++struct iam_container *iam_leaf_container(const struct iam_leaf *leaf)
++{
++        return iam_leaf_path(leaf)->ip_container;
++}
++
++struct iam_descr *iam_leaf_descr(const struct iam_leaf *leaf)
++{
++        return iam_leaf_container(leaf)->ic_descr;
++}
++
++struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf)
++{
++        return iam_leaf_descr(leaf)->id_leaf_ops;
++}
++
++/*
++ * Return pointer to current leaf record. Pointer is valid while corresponding
++ * leaf node is locked and pinned.
++ */
++struct iam_rec *iam_leaf_rec(struct iam_leaf *leaf)
++{
++      return iam_leaf_ops(leaf)->rec(leaf);
++}
++
++/*
++ * Return pointer to the current leaf key. This function may return either
++ * pointer to the key stored in node, or copy key into @key buffer supplied by
++ * caller and return pointer to this buffer. The latter approach is used when
++ * keys in nodes are not stored in plain form (e.g., htree doesn't store keys
++ * at all).
++ *
++ * Caller should assume that returned pointer is only valid while leaf node is
++ * pinned and locked.
++ */
++struct iam_key *iam_leaf_key(struct iam_leaf *leaf, struct iam_key *key)
++{
++      return iam_leaf_ops(leaf)->key(leaf, key);
++}
++
++static int iam_leaf_load(struct iam_path *path)
++{
++      int block;
++      int err;
++      struct iam_container *c;
++      struct buffer_head   *bh;
++      struct iam_leaf      *leaf;
++      struct iam_descr     *descr;
++      
++      c     = path->ip_container;
++      leaf  = &path->ip_leaf;
++      descr = iam_path_descr(path);
++      block = dx_get_block(path, path->ip_frame->at);
++      err   = descr->id_ops->id_node_read(c, block, NULL, &bh);
++      if (err == 0) {
++              leaf->il_bh = bh;
++                leaf->il_path = path;
++              err = iam_leaf_ops(leaf)->init(leaf);
++      }
++      return err;
++}
++
++static void iam_leaf_fini(struct iam_leaf *leaf)
++{
++      iam_leaf_ops(leaf)->fini(leaf);
++      if (leaf->il_bh) {
++              brelse(leaf->il_bh);
++              leaf->il_bh = NULL;
++      }
++}
++
++static void iam_leaf_start(struct iam_leaf *folio)
++{
++      iam_leaf_ops(folio)->start(folio);
++}
++
++void iam_leaf_next(struct iam_leaf *folio)
++{
++      iam_leaf_ops(folio)->next(folio);
++}
++
++static void iam_rec_add(struct iam_leaf *leaf, struct iam_key *key,
++                        struct iam_rec *rec)
++{
++        iam_leaf_ops(leaf)->rec_add(leaf, key, rec);
++}
++
++static void iam_rec_del(struct iam_leaf *leaf)
++{
++        iam_leaf_ops(leaf)->rec_del(leaf);
++}
++
++int iam_leaf_at_end(const struct iam_leaf *leaf)
++{
++        return iam_leaf_ops(leaf)->at_end(leaf);
++}
++
++void iam_leaf_split(struct iam_leaf *l, struct buffer_head *bh)
++{
++        iam_leaf_ops(l)->split(l, bh);
++}
++
++static int iam_leaf_can_add(struct iam_leaf *l,
++                            struct iam_key *k, struct iam_rec *r)
++{
++        return iam_leaf_ops(l)->can_add(l, k, r);
++}
++
++/***********************************************************************/
++/* iterator interface                                                  */
++/***********************************************************************/
++
++static enum iam_it_state it_state(const struct iam_iterator *it)
++{
++        return it->ii_state;
++}
++
++/*
++ * Helper function returning scratch key.
++ */
++static struct iam_key *it_scratch_key(struct iam_iterator *it, int n)
++{
++        return iam_path_key(&it->ii_path, n);
++}
++
++static struct iam_container *iam_it_container(const struct iam_iterator *it)
++{
++      return it->ii_path.ip_container;
++}
++
++static inline int it_keycmp(const struct iam_iterator *it,
++                          const struct iam_key *k1, const struct iam_key *k2)
++{
++      return iam_keycmp(iam_it_container(it), k1, k2);
++}
++
++/*
++ * Helper wrapper around iam_it_get(): returns 0 (success) only when record
++ * with exactly the same key as asked is found.
++ */
++static int iam_it_get_exact(struct iam_iterator *it, struct iam_key *k)
++{
++        int result;
++
++        result = iam_it_get(it, k);
++        if (result == 0 &&
++            (it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 1))) != 0))
++                /*
++                 * Return -ENOENT if cursor is located above record with a key
++                 * different from one specified.
++                 *
++                 * XXX returning -ENOENT only works if iam_it_get never
++                 * returns -ENOENT as a legitimate error.
++                 */
++                result = -ENOENT;
++        return result;
++}
++
++void iam_container_write_lock(struct iam_container *ic)
++{
++      down(&ic->ic_object->i_sem);
++}
++
++void iam_container_write_unlock(struct iam_container *ic)
++{
++      up(&ic->ic_object->i_sem);
++}
++
++void iam_container_read_lock(struct iam_container *ic)
++{
++      down(&ic->ic_object->i_sem);
++}
++
++void iam_container_read_unlock(struct iam_container *ic)
++{
++      up(&ic->ic_object->i_sem);
++}
++
++static void iam_it_lock(struct iam_iterator *it)
++{
++        if (it->ii_flags&IAM_IT_WRITE)
++                iam_container_write_lock(iam_it_container(it));
++        else
++                iam_container_read_lock(iam_it_container(it));
++}
++
++static void iam_it_unlock(struct iam_iterator *it)
++{
++      if (it->ii_flags&IAM_IT_WRITE)
++              iam_container_write_unlock(iam_it_container(it));
++      else
++              iam_container_read_unlock(iam_it_container(it));
++}
++
++/*
++ * Initialize iterator to IAM_IT_DETACHED state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++int  iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
++               struct iam_path_descr *pd)
++{
++      memset(it, 0, sizeof *it);
++      it->ii_flags  = flags;
++      it->ii_state  = IAM_IT_DETACHED;
++      iam_path_init(&it->ii_path, c, pd);
++      return 0;
++}
++
++/*
++ * Finalize iterator and release all resources.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_fini(struct iam_iterator *it)
++{
++      assert(it_state(it) == IAM_IT_DETACHED);
++      iam_path_fini(&it->ii_path);
++}
++
++int iam_path_lookup(struct iam_path *path)
++{
++      struct iam_container *c;
++      struct iam_descr *descr;
++      struct iam_leaf  *leaf;
++      int result;
++      
++      c = path->ip_container;
++      leaf = &path->ip_leaf;
++      descr = iam_path_descr(path);
++      result = dx_lookup(path);
++      if (result == 0) {
++              result = iam_leaf_load(path);
++              if (result == 0)
++                      result = iam_leaf_ops(leaf)->lookup(leaf,
++                                                            path->ip_key_target);
++      }
++      return result;
++}
++
++/*
++ * Attach iterator. After successful completion, @it points to record with
++ * smallest key not larger than @k.
++ *
++ * Return value: 0: positioned on existing record,
++ *             -ve: error.
++ *
++ * precondition:  it_state(it) == IAM_IT_DETACHED
++ * postcondition: ergo(result == 0,
++ *                     (it_state(it) == IAM_IT_ATTACHED &&
++ *                      it_keycmp(it, iam_it_key_get(it, *), k) < 0))
++ */
++int iam_it_get(struct iam_iterator *it, struct iam_key *k)
++{
++        int result;
++        assert(it_state(it) == IAM_IT_DETACHED);
++
++        it->ii_path.ip_key_target = k;
++        iam_it_lock(it);
++        result = iam_path_lookup(&it->ii_path);
++        if (result == 0 || result == -ENOENT)
++                it->ii_state = IAM_IT_ATTACHED;
++        else
++                iam_it_unlock(it);
++      assert(ergo(result == 0,
++                    it_keycmp(it,
++                              iam_it_key_get(it, it_scratch_key(it, 0)),
++                            k) <= 0));
++        return result;
++}
++
++/*
++ * Duplicates iterator.
++ *
++ * postcondition: it_state(dst) == it_state(src) &&
++ *                iam_it_container(dst) == iam_it_container(src) &&
++ *                dst->ii_flags = src->ii_flags &&
++ *                ergo(it_state(src) == IAM_IT_ATTACHED,
++ *                     iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++ *                     iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
++ */
++void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src)
++{
++        dst->ii_flags     = src->ii_flags;
++        dst->ii_state     = src->ii_state;
++        /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */
++        /*
++         * XXX: duplicate lock.
++         */
++      assert(it_state(dst) == it_state(src));
++      assert(iam_it_container(dst) == iam_it_container(src));
++      assert(dst->ii_flags = src->ii_flags);
++      assert(ergo(it_state(src) == IAM_IT_ATTACHED,
++                  iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++                  iam_it_key_get(dst, it_scratch_key(dst, 0)) ==
++                  iam_it_key_get(src, it_scratch_key(src, 0))));
++
++}
++/*
++ * Detach iterator. Does nothing it detached state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_put(struct iam_iterator *it)
++{
++        if (it->ii_state == IAM_IT_ATTACHED) {
++                it->ii_state = IAM_IT_DETACHED;
++              iam_leaf_fini(&it->ii_path.ip_leaf);
++                iam_it_unlock(it);
++        }
++}
++
++/*
++ * Move iterator one record right.
++ *
++ * Return value: 0: success,
++ *              +1: end of container reached
++ *             -ve: error
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
++ */
++int iam_it_next(struct iam_iterator *it)
++{
++        int result;
++        struct iam_container *c;
++        struct iam_path      *path;
++        struct iam_leaf      *leaf;
++
++        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE);
++
++        c    = iam_it_container(it);
++        path = &it->ii_path;
++        leaf = &path->ip_leaf;
++
++        if (iam_leaf_at_end(leaf)) {
++                /* advance index portion of the path */
++                result = iam_index_next(c, path);
++                if (result == 1) {
++                        result = iam_leaf_load(path);
++                        if (result == 0)
++                                iam_leaf_start(leaf);
++                } else if (result == 0)
++                        /* end of container reached */
++                        result = +1;
++                if (result < 0)
++                        iam_it_put(it);
++        } else {
++                /* advance within leaf node */
++                iam_leaf_next(leaf);
++                result = 0;
++        }
++        assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
++        return result;
++}
++
++/*
++ * Return pointer to the record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++struct iam_rec *iam_it_rec_get(struct iam_iterator *it)
++{
++        assert(it_state(it) == IAM_IT_ATTACHED);
++        return iam_leaf_rec(&it->ii_path.ip_leaf);
++}
++
++static void iam_it_reccpy(struct iam_iterator *it, struct iam_rec *r)
++{
++        memcpy(iam_leaf_rec(&it->ii_path.ip_leaf), r,
++               iam_it_container(it)->ic_descr->id_rec_size);
++}
++
++static void iam_it_keycpy(struct iam_iterator *it, struct iam_key *k)
++{
++        memcpy(iam_leaf_key(&it->ii_path.ip_leaf, NULL), k,
++                iam_it_container(it)->ic_descr->id_key_size);
++}
++
++
++/*
++ * Replace contents of record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ *                ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r)
++{
++        int result;
++
++        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++
++        result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
++        if (result == 0)
++                iam_it_reccpy(it, r);
++        return result;
++}
++
++/*
++ * Return pointer to the key under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++struct iam_key *iam_it_key_get(struct iam_iterator *it, struct iam_key *k)
++{
++        assert(it_state(it) == IAM_IT_ATTACHED);
++        return iam_leaf_key(&it->ii_path.ip_leaf, k);
++}
++
++static int iam_leaf_rec_add(handle_t *handle, struct iam_path *path)
++{
++        int err;
++
++        err = ext3_journal_get_write_access(handle, path->ip_leaf.il_bh);
++        if (err)
++                goto journal_error;
++        iam_rec_add(&path->ip_leaf, NULL, NULL);
++      err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
++journal_error:
++      if (err)
++                ext3_std_error(iam_path_obj(path)->i_sb, err);
++      return err;
++}
++
++static int iam_new_leaf(handle_t *handle, struct iam_leaf *leaf)
++{
++        int err;
++        int err2;
++        u32 blknr; /* XXX 32bit block size */
++        struct buffer_head   *new_leaf;
++        struct iam_container *c;
++
++        c = iam_leaf_container(leaf);
++        err = ext3_journal_get_write_access(handle, leaf->il_bh);
++        if (err == 0) {
++                struct inode *obj;
++
++                obj = c->ic_object;
++                new_leaf = ext3_append(handle, c->ic_object, &blknr, &err);
++                if (new_leaf != NULL) {
++                        iam_leaf_ops(leaf)->init_new(c, new_leaf);
++                        iam_leaf_ops(leaf)->split(leaf, new_leaf);
++                        err = ext3_journal_dirty_metadata(handle, new_leaf);
++                        err2 = ext3_journal_dirty_metadata(handle, leaf->il_bh);
++                        err = err ? : err2;
++                        if (err)
++                                ext3_std_error(obj->i_sb, err);
++                        brelse(new_leaf);
++                }
++        }
++        return err;
++}
++
++int iam_add_rec(handle_t *handle, struct iam_path *path,
++                struct iam_key *k, struct iam_rec *r)
++{
++      int err;
++
++      if (iam_leaf_can_add(&path->ip_leaf, k, r)) {
++              err = iam_leaf_rec_add(handle, path);
++      } else {
++              err = split_index_node(handle, path);
++              if (err == 0) {
++                        err = iam_new_leaf(handle, &path->ip_leaf);
++                      if (err == 0)
++                              err = iam_leaf_rec_add(handle, path);
++              }
++      }
++      return err;
++}
++
++/*
++ * Insert new record with key @k and contents from @r, shifting records to the
++ * right.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
++ *                it->ii_flags&IAM_IT_WRITE &&
++ *                it_keycmp(it, iam_it_key_get(it, *), k) < 0
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ *                ergo(result == 0,
++ *                     it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
++ *                     !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
++                      struct iam_key *k, struct iam_rec *r)
++{
++        int result;
++
++        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++#if 0
++        /*XXX remove this assert temporarily, since if the il_at point to the hearder,
++         * this assert might has some problems*/
++        assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0);
++#endif
++      result = iam_add_rec(h, &it->ii_path, k, r);
++      if (result == 0) {
++              /* place record and key info freed space. Leaf node is already
++               * in transaction. */
++              iam_it_reccpy(it, r);
++                iam_it_keycpy(it, k);
++                iam_keycpy(it->ii_path.ip_container, it_scratch_key(it, 0), k);
++                /*
++               * XXX TBD.
++               */
++        }
++        assert(it_state(it) == IAM_IT_ATTACHED);
++        assert(ergo(result == 0,
++                    it_keycmp(it,
++                              iam_it_key_get(it,
++                                             it_scratch_key(it, 0)), k) == 0 &&
++                    !memcmp(iam_it_rec_get(it), r,
++                            iam_it_container(it)->ic_descr->id_rec_size)));
++        return result;
++}
++
++static int iam_leaf_rec_remove(handle_t *handle, struct iam_leaf *leaf)
++{
++      int err;
++
++        iam_rec_del(leaf);
++      err = ext3_journal_dirty_metadata(handle, leaf->il_bh);
++      if (err)
++              ext3_std_error(iam_path_obj(iam_leaf_path(leaf))->i_sb, err);
++      return err;
++}
++
++/*
++ * Delete record under iterator.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
++{
++        int result;
++
++        assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++
++        result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
++        /*
++         * no compaction for now.
++         */
++        if (result == 0)
++                iam_leaf_rec_remove(h, &it->ii_path.ip_leaf);
++
++      return result;
++}
++
++/*
++ * Convert iterator to cookie.
++ *
++ * precondition:  it_state(it) == IAM_IT_ATTACHED &&
++ *                iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++iam_pos_t iam_it_store(struct iam_iterator *it)
++{
++        iam_pos_t result;
++
++        assert(it_state(it) == IAM_IT_ATTACHED);
++        assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result);
++
++        result = 0;
++        iam_it_key_get(it, (struct iam_key *)&result);
++        return result;
++}
++
++/*
++ * Restore iterator from cookie.
++ *
++ * precondition:  it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
++ *                iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
++ *                                  iam_it_store(it) == pos)
++ */
++int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
++{
++        assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE);
++        assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos);
++        return iam_it_get(it, (struct iam_key *)&pos);
++}
++
++/***********************************************************************/
++/* invariants                                                          */
++/***********************************************************************/
++
++static inline int ptr_inside(void *base, size_t size, void *ptr)
++{
++        return (base <= ptr) && (ptr < base + size);
++}
++
++int iam_frame_invariant(struct iam_frame *f)
++{
++        return
++                (f->bh != NULL &&
++                f->bh->b_data != NULL &&
++                ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
++                ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
++                f->entries <= f->at);
++}
++int iam_leaf_invariant(struct iam_leaf *l)
++{
++        return
++                l->il_bh != NULL &&
++                l->il_bh->b_data != NULL &&
++                ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) &&
++                ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) &&
++                l->il_entries <= l->il_at;
++}
++
++int iam_path_invariant(struct iam_path *p)
++{
++        int i;
++
++        if (p->ip_container == NULL ||
++            p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
++            p->ip_frame != p->ip_frames + p->ip_indirect ||
++            !iam_leaf_invariant(&p->ip_leaf))
++                return 0;
++        for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
++                if (i <= p->ip_indirect) {
++                        if (!iam_frame_invariant(&p->ip_frames[i]))
++                                return 0;
++                }
++        }
++        return 1;
++}
++
++int iam_it_invariant(struct iam_iterator *it)
++{
++        return
++                (it->ii_state == IAM_IT_DETACHED ||
++                 it->ii_state == IAM_IT_ATTACHED) &&
++                !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
++                ergo(it->ii_state == IAM_IT_ATTACHED,
++                     iam_path_invariant(&it->ii_path));
++}
++
++/*
++ * Search container @c for record with key @k. If record is found, its data
++ * are moved into @r.
++ *
++ *
++ *
++ * Return values: +ve: found, 0: not-found, -ve: error
++ */
++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r,
++             struct iam_path_descr *pd)
++{
++        struct iam_iterator it;
++        int result;
++
++        iam_it_init(&it, c, 0, pd);
++
++        result = iam_it_get_exact(&it, k);
++        if (result == 0)
++                /*
++                 * record with required key found, copy it into user buffer
++                 */
++                iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it));
++        iam_it_put(&it);
++        iam_it_fini(&it);
++        return result;
++}
++EXPORT_SYMBOL(iam_lookup);
++
++/*
++ * Insert new record @r with key @k into container @c (within context of
++ * transaction @h.
++ *
++ * Return values: 0: success, -ve: error, including -EEXIST when record with
++ * given key is already present.
++ *
++ * postcondition: ergo(result == 0 || result == -EEXIST,
++ *                                  iam_lookup(c, k, r2) > 0 &&
++ *                                  !memcmp(r, r2, c->ic_descr->id_rec_size));
++ */
++int iam_insert(handle_t *h, struct iam_container *c,
++               struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
++{
++        struct iam_iterator it;
++        int result;
++
++        iam_it_init(&it, c, IAM_IT_WRITE, pd);
++
++        result = iam_it_get_exact(&it, k);
++        if (result == -ENOENT)
++                result = iam_it_rec_insert(h, &it, k, r);
++        else if (result == 0)
++                result = -EEXIST;
++        iam_it_put(&it);
++        iam_it_fini(&it);
++        return result;
++}
++EXPORT_SYMBOL(iam_insert);
++
++int iam_update(handle_t *h, struct iam_container *c,
++               struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
++{
++        struct iam_iterator it;
++        int result;
++
++        iam_it_init(&it, c, IAM_IT_WRITE, pd);
++
++        result = iam_it_get_exact(&it, k);
++        if (result == 0)
++                iam_it_rec_set(h, &it, r);
++        iam_it_put(&it);
++        iam_it_fini(&it);
++        return result;
++}
++EXPORT_SYMBOL(iam_update);
++
++/*
++ * Delete existing record with key @k.
++ *
++ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
++ *
++ * postcondition: ergo(result == 0 || result == -ENOENT,
++ *                                 !iam_lookup(c, k, *));
++ */
++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k,
++             struct iam_path_descr *pd)
++{
++        struct iam_iterator it;
++        int result;
++
++        iam_it_init(&it, c, IAM_IT_WRITE, pd);
++
++        result = iam_it_get_exact(&it, k);
++        if (result == 0)
++                iam_it_rec_delete(h, &it);
++        iam_it_put(&it);
++        iam_it_fini(&it);
++        return result;
++}
++EXPORT_SYMBOL(iam_delete);
++
+Index: linux-stage/fs/ext3/Makefile
+===================================================================
+--- linux-stage.orig/fs/ext3/Makefile  2006-05-29 13:01:21.000000000 +0800
++++ linux-stage/fs/ext3/Makefile       2006-05-29 13:01:22.000000000 +0800
+@@ -6,7 +6,7 @@
  
--      /*
--       * If the hash is 1, then continue only if the next page has a
--       * continuation hash of any value.  This is used for readdir
--       * handling.  Otherwise, check to see if the hash matches the
--       * desired contiuation hash.  If it doesn't, return since
--       * there's no point to read in the successive index pages.
--       */
--      dx_get_key(path, p->at, (struct iam_key *)&bhash);
--      if (start_hash)
--              *start_hash = bhash;
--      if ((hash & 1) == 0) {
--              if ((bhash & ~1) != hash)
--                      return 0;
-+      if (compat) {
-+              /*
-+               * Htree hash magic.
-+               */
-+              /*
-+               * If the hash is 1, then continue only if the next page has a
-+               * continuation hash of any value.  This is used for readdir
-+               * handling.  Otherwise, check to see if the hash matches the
-+               * desired contiuation hash.  If it doesn't, return since
-+               * there's no point to read in the successive index pages.
-+               */
-+              iam_get_key(path, p->at, (struct iam_key *)&bhash);
-+              if (start_hash)
-+                      *start_hash = bhash;
-+              if ((hash & 1) == 0) {
-+                      if ((bhash & ~1) != hash)
-+                              return 0;
-+              }
-       }
-       /*
-        * If the hash is HASH_NB_ALWAYS, we always go to the next
-        * block so no check is necessary
-        */
-       while (num_frames--) {
--              err = path_descr(path)->id_node_read(path->ip_container,
--                                                   (iam_ptr_t)dx_get_block(path, p->at),
--                                                   NULL, &bh);
-+              err = iam_path_descr(path)->id_ops->
-+                      id_node_read(path->ip_container,
-+                                   (iam_ptr_t)dx_get_block(path, p->at),
-+                                   NULL, &bh);
-               if (err != 0)
-                       return err; /* Failure */
-               ++p;
-@@ -1471,6 +632,16 @@ static int ext3_htree_next_block(struct 
-       return 1;
- }
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o mballoc.o
++         extents.o mballoc.o iam.o iam_lfix.o
  
-+int iam_index_next(struct iam_container *c, struct iam_path *path)
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-stage/fs/ext3/iam_lfix.c
+===================================================================
+--- linux-stage.orig/fs/ext3/iam_lfix.c        2006-05-29 18:23:53.597737944 +0800
++++ linux-stage/fs/ext3/iam_lfix.c     2006-05-29 18:04:05.000000000 +0800
+@@ -0,0 +1,310 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ *  iam_lfix.c
++ *  implementation of iam format for fixed size records.
++ *
++ *  Copyright (c) 2006 Cluster File Systems, Inc.
++ *   Author: Wang Di <wangdi@clusterfs.com>
++ *   Author: Nikita Danilov <nikita@clusterfs.com>
++ *
++ *   This file is part of the Lustre file system, http://www.lustre.org
++ *   Lustre is a trademark of Cluster File Systems, Inc.
++ *
++ *   You may have signed or agreed to another license before downloading
++ *   this software.  If so, you are bound by the terms and conditions
++ *   of that agreement, and the following does not apply to you.  See the
++ *   LICENSE file included with this distribution for more information.
++ *
++ *   If you did not agree to a different license, then this copy of Lustre
++ *   is open source software; you can redistribute it and/or modify it
++ *   under the terms of version 2 of the GNU General Public License as
++ *   published by the Free Software Foundation.
++ *
++ *   In either case, Lustre is distributed in the hope that it will be
++ *   useful, but WITHOUT ANY WARRANTY; without even the implied warranty
++ *   of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *   license text for more details.
++ */
++
++#include <linux/types.h>
++#include <linux/jbd.h>
++/* ext3_error() */
++#include <linux/ext3_fs.h>
++
++#include <linux/lustre_iam.h>
++
++#include <libcfs/libcfs.h>
++#include <libcfs/kp30.h>
++
++static inline int iam_lfix_entry_size(const struct iam_leaf *l)
++{
++      return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
++}
++
++static inline struct iam_lentry *
++iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
++{
++              void *e = entry;
++      return e + shift * iam_lfix_entry_size(l);
++}
++
++static inline const struct iam_key *
++iam_leaf_key_at(const struct iam_container *c, const struct iam_lentry *entry)
++{
++        return (const struct iam_key *)entry;
++}
++
++static struct iam_lentry *iam_entries(const struct buffer_head *bh)
++{
++        return (void *)bh->b_data + sizeof(struct iam_leaf_head);
++}
++
++static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
++{
++        return iam_entries(l->il_bh);
++}
++
++static int lentry_count_get(const struct iam_leaf *leaf)
++{
++        struct iam_lentry *lentry = leaf->il_entries;
++        return le16_to_cpu(((struct iam_leaf_head *)lentry)->ill_count);
++}
++
++static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
++{
++        struct iam_lentry *lentry = leaf->il_entries;
++      ((struct iam_leaf_head *)lentry)->ill_count = cpu_to_le16(count);
++}
++
++/*This func is for flat key, for those keys,
++ *which are not stored explicitly
++ *it would be decrypt in the key buffer
++ */
++struct iam_key *iam_lfix_key(struct iam_leaf *l, struct iam_key *key)
++{
++        void *ie = l->il_at;
++        return (struct iam_key*)ie;
++}
++
++static void iam_lfix_start(struct iam_leaf *l)
++{
++        l->il_at = iam_get_lentries(l);
++}
++
++static inline ptrdiff_t iam_lfix_diff(struct iam_leaf *l, struct iam_lentry *e1,
++                                    struct iam_lentry *e2)
++{
++      ptrdiff_t diff;
++        int esize;
++
++        esize = iam_lfix_entry_size(l);
++      diff = (void *)e1 - (void *)e2;
++      assert(diff / esize * esize == diff);
++      return diff / esize;
++}
++
++static int iam_lfix_init(struct iam_leaf *l)
++{
++        int result;
++        struct iam_leaf_head *ill;
++
++        assert(l->il_bh != NULL);
++
++        ill = (struct iam_leaf_head*)l->il_bh->b_data;
++        if (ill->ill_magic == le16_to_cpu(IAM_LEAF_HEADER_MAGIC)) {
++                l->il_at = l->il_entries = iam_get_lentries(l);
++                result = 0;
++        } else {
++                struct inode *obj;
++
++                obj = iam_leaf_container(l)->ic_object;
++                ext3_error(obj->i_sb, __FUNCTION__,
++                           "Wrong magic in node %llu (#%lu): %#x != %#x\n",
++                           l->il_bh->b_blocknr, obj->i_ino,
++                           ill->ill_magic, le16_to_cpu(IAM_LEAF_HEADER_MAGIC));
++                result = -EIO;
++        }
++        return result;
++}
++
++static void iam_lfix_fini(struct iam_leaf *l)
++{
++        l->il_entries = l->il_at = NULL;
++        return;
++}
++
++static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
++{
++        int count = lentry_count_get(l);
++        struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
++
++        return ile;
++}
++
++struct iam_rec *iam_lfix_rec(struct iam_leaf *l)
++{
++        void *e = l->il_at;
++        return e + iam_leaf_descr(l)->id_key_size;
++}
++
++static void iam_lfix_next(struct iam_leaf *l)
++{
++        assert(!iam_leaf_at_end(l));
++        l->il_at = iam_lfix_shift(l, l->il_at, 1);
++}
++
++static int iam_lfix_lookup(struct iam_leaf *l, struct iam_key *k)
++{
++        struct iam_lentry *p, *q, *m;
++        struct iam_container *c;
++        int count;
++
++        count = lentry_count_get(l);
++        c = iam_leaf_container(l);
++
++        p = iam_get_lentries(l);
++        q = iam_lfix_shift(l, l->il_entries, count);
++
++        while (p <= q) {
++                m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
++                if (iam_keycmp(c, iam_leaf_key_at(c, m), k) > 0)
++                        q = iam_lfix_shift(l, m, -1);
++                else
++                        p = iam_lfix_shift(l, m, +1);
++        }
++        assert(p != iam_get_lentries(l));
++        
++        l->il_at = iam_lfix_shift(l, p, -1);
++        iam_keycpy(c, iam_path_key(iam_leaf_path(l), 0), iam_leaf_key_at(c, q));
++        if (l->il_at <= l->il_entries ||
++            iam_keycmp(c, iam_leaf_key_at(c, q), k) != 0)
++                return -ENOENT;
++        else
++                return 0;
++}
++
++static void iam_lfix_rec_add(struct iam_leaf *leaf,
++                             struct iam_key *k, struct iam_rec *r)
 +{
-+      return ext3_htree_advance(c->ic_object, 0, path, NULL, 0);
++        struct iam_lentry *end, *next, *cur, *nnext;
++        ptrdiff_t diff;
++        int count;
++
++        count = lentry_count_get(leaf);
++        end = iam_lfix_get_end(leaf);
++        cur = leaf->il_at;
++        if (cur != end) {
++                next = iam_lfix_shift(leaf, cur, 1);
++                if (next != end) {
++                        nnext = iam_lfix_shift(leaf, next, 1);
++                        diff = (void *)end - (void *)next;
++                        memmove(nnext, next, diff);
++                }
++                iam_lfix_next(leaf);
++        }
++        lentry_count_set(leaf, count + 1);
 +}
 +
-+int ext3_htree_next_block(struct inode *dir, __u32 hash,
-+                        struct iam_path *path, __u32 *start_hash)
++static void iam_lfix_rec_del(struct iam_leaf *leaf)
 +{
-+      return ext3_htree_advance(dir, hash, path, start_hash, 1);
++      struct iam_lentry *next, *end;
++      int count;
++      ptrdiff_t diff;
++
++        count = lentry_count_get(leaf);
++        end = iam_lfix_get_end(leaf);
++        next = iam_lfix_shift(leaf, leaf->il_at, 1);
++        diff = (void *)end - (void *)next;
++        memmove(leaf->il_at, next, diff);
++
++      lentry_count_set(leaf, count - 1);
 +}
- /*
-  * p is at least 6 bytes before the end of page
-@@ -1662,21 +833,30 @@ static void dx_sort_map (struct dx_map_e
-       } while(more);
- }
--static void dx_insert_block(struct iam_path *path,
--                          struct iam_frame *frame, u32 hash, u32 block)
-+void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
-+                  const struct iam_key *key, iam_ptr_t ptr)
- {
-       struct iam_entry *entries = frame->entries;
--      struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
-+      struct iam_entry *new = iam_entry_shift(path, frame->at, +1);
-       int count = dx_get_count(entries);
-       assert(count < dx_get_limit(entries));
--      assert(old < iam_entry_shift(path, entries, count));
-+      assert(frame->at < iam_entry_shift(path, entries, count));
 +
-       memmove(iam_entry_shift(path, new, 1), new,
-               (char *)iam_entry_shift(path, entries, count) - (char *)new);
--      dx_set_key(path, new, (struct iam_key *)&hash);
--      dx_set_block(path, new, block);
-+      dx_set_key(path, new, key);
-+      dx_set_block(path, new, ptr);
-       dx_set_count(entries, count + 1);
- }
++static int iam_lfix_can_add(struct iam_leaf *l,
++                            struct iam_key *k, struct iam_rec *r)
++{
++        struct iam_lentry *end;
++        int block_size = iam_leaf_container(l)->ic_object->i_sb->s_blocksize;
++        unsigned long left, entry_size;
 +
-+void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
-+                   u32 hash, u32 block)
++        end = iam_lfix_get_end(l);
++
++        left = block_size - iam_leaf_descr(l)->id_node_gap;
++
++        left -= (unsigned long)((void*)end - (void*)l->il_entries);
++
++        entry_size = iam_lfix_entry_size(l);
++
++        if (left >= entry_size)
++                return 1;
++
++        return 0;
++}
++
++static int iam_lfix_at_end(const struct iam_leaf *folio)
 +{
-+      assert(dx_index_is_compat(path));
-+      iam_insert_key(path, frame, (struct iam_key *)&hash, block);
++        struct iam_lentry *ile = iam_lfix_get_end(folio);
++
++        return (folio->il_at == ile);
 +}
 +
- #endif
-@@ -1897,14 +1077,15 @@ static struct buffer_head * ext3_dx_find
-               if (*err != 0)
-                       return NULL;
-       } else {
--              path->ip_frame->bh = NULL;              /* for iam_path_fini() */
-+              path->ip_frame->bh = NULL;      /* for iam_path_fini() */
-               path->ip_frame->at = (void *)&dummy_dot;/* hack for zero entry*/
-       }
-       hash = hinfo.hash;
-       do {
-               block = dx_get_block(path, path->ip_frame->at);
--              *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
--                                                   NULL, &bh);
-+              *err = iam_path_descr(path)->id_ops->id_node_read(path->ip_container,
-+                                                        (iam_ptr_t)block,
-+                                                        NULL, &bh);
-               if (*err != 0)
-                       goto errout;
-               de = (struct ext3_dir_entry_2 *) bh->b_data;
-@@ -2067,7 +1248,7 @@ static struct ext3_dir_entry_2 *do_split
-                       struct buffer_head **bh,struct iam_frame *frame,
-                       struct dx_hash_info *hinfo, int *error)
- {
--      struct inode *dir = path_obj(path);
-+      struct inode *dir = iam_path_obj(path);
-       unsigned blocksize = dir->i_sb->s_blocksize;
-       unsigned count, continued;
-       struct buffer_head *bh2;
-@@ -2392,15 +1573,15 @@ static int ext3_add_entry (handle_t *han
- }
- #ifdef CONFIG_EXT3_INDEX
--static int split_index_node(handle_t *handle, struct iam_path *path)
--{ 
-+int split_index_node(handle_t *handle, struct iam_path *path)
++static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
 +{
-       struct iam_entry *entries;   /* old block contents */
-       struct iam_entry *entries2;  /* new block contents */
-       struct iam_frame *frame, *safe;
-       struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
-       u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
--      struct inode *dir = path_obj(path);
-+      struct inode *dir = iam_path_obj(path);
-       int nr_splet;
-       int i, err;
-@@ -2442,7 +1623,8 @@ static int split_index_node(handle_t *ha
-       for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
-               bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
-               if (!bh_new[i] ||
--                  path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
-+                iam_path_descr(path)->id_ops->id_node_init(path->ip_container,
-+                                                     bh_new[i], 0) != 0)
-                       goto cleanup;
-               BUFFER_TRACE(frame->bh, "get_write_access");
-               err = ext3_journal_get_write_access(handle, frame->bh);
-@@ -2516,9 +1698,9 @@ static int split_index_node(handle_t *ha
-                       unsigned count1 = count/2, count2 = count - count1;
-                       unsigned hash2;
--                      dx_get_key(path,
--                                 iam_entry_shift(path, entries, count1),
--                                 (struct iam_key *)&hash2);
-+                      iam_get_key(path,
-+                                  iam_entry_shift(path, entries, count1),
-+                                  (struct iam_key *)&hash2);
-                       dxtrace(printk("Split index %i/%i\n", count1, count2));
-@@ -2578,7 +1760,7 @@ static int ext3_dx_add_entry(handle_t *h
-       size_t isize;
-       iam_path_compat_init(&cpath, dir);
--      param = path_descr(path);
-+      param = iam_path_descr(path);
-       err = dx_probe(dentry, NULL, &hinfo, path);
-       if (err != 0)
-@@ -2588,8 +1770,9 @@ static int ext3_dx_add_entry(handle_t *h
-       /* XXX nikita: global serialization! */
-       isize = dir->i_size;
--      err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at), 
--                                handle, &bh);
-+      err = param->id_ops->id_node_read(path->ip_container,
-+                      (iam_ptr_t)dx_get_block(path, frame->at),
-+                      handle, &bh);
-       if (err != 0)
-               goto cleanup;
-@@ -2724,12 +1907,12 @@ static struct inode * ext3_new_inode_wan
-  * is so far negative - it has no inode.
-  *
-  * If the create succeeds, we fill in the inode information
-- * with d_instantiate(). 
-+ * with d_instantiate().
-  */
- static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
-               struct nameidata *nd)
- {
--      handle_t *handle; 
-+      handle_t *handle;
-       struct inode * inode;
-       int err, retries = 0;
-Index: iam/include/linux/lustre_iam.h
++        struct iam_leaf_head *hdr;
++
++        hdr = (struct iam_leaf_head*)bh->b_data;
++        hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
++        hdr->ill_count = cpu_to_le16(0);
++}
++
++static void iam_lfix_split(struct iam_leaf *l, struct buffer_head *bh)
++{
++        struct iam_path      *path;
++      struct iam_leaf_head *hdr;
++        const struct iam_key *pivot;
++
++      unsigned count;
++      unsigned split;
++
++      void *start;
++      void *finis;
++
++        path = iam_leaf_path(l);
++
++      hdr = (void *)bh->b_data;
++
++        count = lentry_count_get(l);
++        split = count / 2;
++
++        start = iam_lfix_shift(l, iam_get_lentries(l), split);
++        finis = iam_lfix_shift(l, iam_get_lentries(l), count);
++
++        pivot = iam_leaf_key_at(iam_leaf_container(l), start);
++
++        memmove(iam_entries(bh), start, finis - start);
++        hdr->ill_count = count - split;
++        lentry_count_set(l, split);
++        /*
++         * Insert pointer to the new node (together with the smallest key in
++         * the node) into index node.
++         */
++        iam_insert_key(path, path->ip_frame, pivot, bh->b_blocknr);
++}
++
++struct iam_leaf_operations iam_lfix_leaf_ops = {
++        .init           = iam_lfix_init,
++        .init_new       = iam_lfix_init_new,
++        .fini           = iam_lfix_fini,
++        .start          = iam_lfix_start,
++        .next           = iam_lfix_next,
++        .key            = iam_lfix_key,
++        .rec            = iam_lfix_rec,
++        .lookup         = iam_lfix_lookup,
++        .at_end         = iam_lfix_at_end,
++        .rec_add        = iam_lfix_rec_add,
++        .rec_del        = iam_lfix_rec_del,
++        .can_add        = iam_lfix_can_add,
++        .split          = iam_lfix_split
++};
++EXPORT_SYMBOL(iam_lfix_leaf_ops);
+Index: linux-stage/include/linux/lustre_iam.h
 ===================================================================
---- iam.orig/include/linux/lustre_iam.h        2006-05-27 19:58:44.000000000 +0400
-+++ iam/include/linux/lustre_iam.h     2006-05-29 00:40:51.000000000 +0400
+--- linux-stage.orig/include/linux/lustre_iam.h        2006-05-29 13:01:21.000000000 +0800
++++ linux-stage/include/linux/lustre_iam.h     2006-05-29 13:01:22.000000000 +0800
 @@ -1,9 +1,61 @@
 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
 + * vim:expandtab:shiftwidth=8:tabstop=8:
@@ -2867,7 +2868,7 @@ Index: iam/include/linux/lustre_iam.h
  };
  
  /*
-@@ -30,6 +82,11 @@ struct iam_key;
+@@ -30,6 +82,11 @@
  /* Incomplete type use to refer to the records stored in iam containers. */
  struct iam_rec;
  
@@ -2879,7 +2880,7 @@ Index: iam/include/linux/lustre_iam.h
  typedef __u64 iam_ptr_t;
  
  /*
-@@ -41,45 +98,25 @@ struct iam_frame {
+@@ -41,45 +98,25 @@
        struct iam_entry *at;      /* target entry, found by binary search */
  };
  
@@ -2939,7 +2940,7 @@ Index: iam/include/linux/lustre_iam.h
        /*
         * Returns pointer (in the same sense as pointer in index entry) to
         * the root node.
-@@ -102,8 +139,8 @@ struct iam_descr {
+@@ -102,8 +139,8 @@
        /*
         * Key comparison function. Returns -1, 0, +1.
         */
@@ -2950,7 +2951,7 @@ Index: iam/include/linux/lustre_iam.h
        /*
         * Create new container.
         *
-@@ -111,25 +148,120 @@ struct iam_descr {
+@@ -111,25 +148,120 @@
         * contains single record with the smallest possible key.
         */
        int (*id_create)(struct iam_container *c);
@@ -3090,7 +3091,7 @@ Index: iam/include/linux/lustre_iam.h
  };
  
  struct iam_container {
-@@ -149,6 +281,17 @@ struct iam_container {
+@@ -149,6 +281,17 @@
  };
  
  /*
@@ -3108,7 +3109,7 @@ Index: iam/include/linux/lustre_iam.h
   * Structure to keep track of a path drilled through htree.
   */
  struct iam_path {
-@@ -172,34 +315,232 @@ struct iam_path {
+@@ -172,34 +315,232 @@
        /*
         * Leaf node: a child of ->ip_frame.
         */
@@ -3355,7 +3356,7 @@ Index: iam/include/linux/lustre_iam.h
  /*
   * Initialize container @c, acquires additional reference on @inode.
   */
-@@ -210,3 +551,155 @@ int iam_container_init(struct iam_contai
+@@ -210,3 +551,155 @@
   */
  void iam_container_fini(struct iam_container *c);
  
index a67eef7..51a3bc8 100644 (file)
@@ -26,13 +26,19 @@ struct leaf_header {
        unsigned short   lh_count;
 };
 
+struct leaf_entry {
+       unsigned long long le_key;
+       unsigned long long le_rec;
+};
+
 #define LEAF_HEAD_MAGIC 0x1976
 int main(int argc, char **argv)
 {
-       struct leaf_header header;
        struct iam_root root;
-       char buf[4096];
        struct iam_entry ie;
+       struct leaf_header header;
+       struct leaf_entry le;
+       char buf[4096];
        int fd, rc, file_arg = 1;
 
         memset(buf, 0, 4096);
@@ -52,10 +58,14 @@ int main(int argc, char **argv)
        memset(buf, 0, 4096);
        root.info.indirect_levels = 0;
        memcpy(buf, &root, sizeof(struct iam_root));
-       header.lh_magic = LEAF_HEAD_MAGIC;
+
+       /*insert the dx_limit compatiable structure to make 
+        *iam compatiable with dx code*/        
        header.lh_count = 2;
+       
        memcpy (buf + sizeof(struct iam_root), &header,
                sizeof(struct iam_entry));
+       
        ie.ie_key = 0x0;
        ie.ie_index = 1;
 
@@ -67,11 +77,19 @@ int main(int argc, char **argv)
                close(fd);
                exit(rc);
        }
+       
        /*create the first index entry*/        
        memset(buf, 0, 4096);
        header.lh_magic = LEAF_HEAD_MAGIC;
-       header.lh_count = 0
+       header.lh_count = 1
        memcpy(buf, &header, sizeof(struct leaf_header));
+
+       /*insert the lowest key of the leaf*/
+       le.le_key = 0; /*tmp assume 0 is the lowest key of the leaf*/ 
+       le.le_rec = 0;
+
+       memcpy(buf + sizeof(struct leaf_header), &le, 
+              sizeof(struct leaf_entry)); 
        rc = write(fd, buf, sizeof(buf));
        if (rc < 0) {
                printf("Error Writing %s %s \n", argv[1], strerror(errno));