-Index: iam/fs/ext3/Makefile
+Index: linux-2.6.9/fs/ext3/namei.c
===================================================================
---- iam.orig/fs/ext3/Makefile 2006-05-10 18:21:01.000000000 +0400
-+++ iam/fs/ext3/Makefile 2006-05-10 18:21:01.000000000 +0400
-@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
-
- ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
- ioctl.o namei.o super.o symlink.o hash.o resize.o \
-- extents.o mballoc.o
-+ extents.o mballoc.o iam.o
+--- linux-2.6.9.orig/fs/ext3/namei.c 2006-05-23 17:01:09.000000000 +0800
++++ linux-2.6.9/fs/ext3/namei.c 2006-05-23 17:01:09.000000000 +0800
+@@ -24,81 +24,6 @@
+ * Theodore Ts'o, 2002
+ */
- ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
- ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
-Index: iam/fs/ext3/iam.c
-===================================================================
---- iam.orig/fs/ext3/iam.c 2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam.c 2006-05-11 01:24:29.000000000 +0400
-@@ -0,0 +1,945 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * iam.c
-+ * Top-level entry points into osd module
-+ *
-+ * Copyright (c) 2006 Cluster File Systems, Inc.
-+ * Author: Wang Di <wangdi@clusterfs.com>
-+ * Author: Nikita Danilov <nikita@clusterfs.com>
-+ *
-+ * This file is part of the Lustre file system, http://www.lustre.org
-+ * Lustre is a trademark of Cluster File Systems, Inc.
-+ *
-+ * You may have signed or agreed to another license before downloading
-+ * this software. If so, you are bound by the terms and conditions
-+ * of that agreement, and the following does not apply to you. See the
-+ * LICENSE file included with this distribution for more information.
-+ *
-+ * If you did not agree to a different license, then this copy of Lustre
-+ * is open source software; you can redistribute it and/or modify it
-+ * under the terms of version 2 of the GNU General Public License as
-+ * published by the Free Software Foundation.
-+ *
-+ * In either case, Lustre is distributed in the hope that it will be
-+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
-+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * license text for more details.
-+ */
-+
-+/*
-+ * iam: big theory statement.
-+ *
-+ * iam (Index Access Module) is a module providing abstraction of persistent
-+ * transactional container on top of generalized ext3 htree.
-+ *
-+ * iam supports:
-+ *
-+ * - key, pointer, and record size specifiable per container.
-+ *
-+ * - trees taller than 2 index levels.
-+ *
-+ * - read/write to existing ext3 htree directories as iam containers.
-+ *
-+ * iam container is a tree, consisting of leaf nodes containing keys and
-+ * records stored in this container, and index nodes, containing keys and
-+ * pointers to leaf or index nodes.
-+ *
-+ * iam does not work with keys directly, instead it calls user-supplied key
-+ * comparison function (->dpo_keycmp()).
-+ *
-+ * Pointers are (currently) interpreted as logical offsets (measured in
-+ * blocksful) within underlying flat file on top of which iam tree lives.
-+ *
-+ * On-disk format:
-+ *
-+ * iam mostly tries to reuse existing htree formats.
-+ *
-+ * Format of index node:
-+ *
-+ * +-----+-------+-------+-------+------+-------+------------+
-+ * | | count | | | | | |
-+ * | gap | / | entry | entry | .... | entry | free space |
-+ * | | limit | | | | | |
-+ * +-----+-------+-------+-------+------+-------+------------+
-+ *
-+ * gap this part of node is never accessed by iam code. It
-+ * exists for binary compatibility with ext3 htree (that,
-+ * in turn, stores fake struct ext2_dirent for ext2
-+ * compatibility), and to keep some unspecified per-node
-+ * data. Gap can be different for root and non-root index
-+ * nodes. Gap size can be specified for each container
-+ * (gap of 0 is allowed).
-+ *
-+ * count/limit current number of entries in this node, and the maximal
-+ * number of entries that can fit into node. count/limit
-+ * has the same size as entry, and is itself counted in
-+ * count.
-+ *
-+ * entry index entry: consists of a key immediately followed by
-+ * a pointer to a child node. Size of a key and size of a
-+ * pointer depends on container. Entry has neither
-+ * alignment nor padding.
-+ *
-+ * free space portion of node new entries are added to
-+ *
-+ * Entries in index node are sorted by their key value.
-+ *
-+ * Format of a leaf node is not specified. Generic iam code accesses leaf
-+ * nodes through ->id_leaf methods in struct iam_descr.
-+ *
-+ */
-+
-+#include <linux/module.h>
-+#include <linux/fs.h>
-+#include <linux/pagemap.h>
-+#include <linux/jbd.h>
-+#include <linux/time.h>
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+#include <linux/fcntl.h>
-+#include <linux/stat.h>
-+#include <linux/string.h>
-+#include <linux/quotaops.h>
-+#include <linux/buffer_head.h>
-+#include <linux/smp_lock.h>
-+#include <linux/lustre_iam.h>
-+
-+#include <libcfs/libcfs.h>
-+#include <libcfs/kp30.h>
-+
-+#include "xattr.h"
-+#include "iopen.h"
-+#include "acl.h"
-+
-+static inline void iam_reccpy(struct iam_path *p, struct iam_rec *rec_dst,
-+ struct iam_rec *rec_src)
-+{
-+ memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size);
-+}
-+
-+/*
-+ * Return pointer to current leaf record. Pointer is valid while corresponding
-+ * leaf node is locked and pinned.
-+ */
-+struct iam_rec *iam_leaf_rec(struct iam_container *c, struct iam_leaf *leaf)
-+{
-+ return c->ic_descr->id_leaf.rec(c, leaf);
-+}
-+
-+/*
-+ * Return pointer to the current leaf key. This function may return either
-+ * pointer to the key stored in node, or copy key into @key buffer supplied by
-+ * caller and return pointer to this buffer. The latter approach is used when
-+ * keys in nodes are not stored in plain form (e.g., htree doesn't store keys
-+ * at all).
-+ *
-+ * Caller should assume that returned pointer is only valid while leaf node is
-+ * pinned and locked.
-+ */
-+struct iam_key *iam_leaf_key(struct iam_container *c, struct iam_leaf *leaf,
-+ struct iam_key *key)
-+{
-+ return c->ic_descr->id_leaf.key(c, leaf, key);
-+}
-+
-+/*
-+ * Initialize container @c, acquires additional reference on @inode.
-+ */
-+int iam_container_init(struct iam_container *c,
-+ struct iam_descr *descr, struct inode *inode)
-+{
-+ memset(c, 0, sizeof *c);
-+ c->ic_descr = descr;
-+ c->ic_object = igrab(inode);
-+ if (c->ic_object != NULL)
-+ return 0;
-+ else
-+ return -ENOENT;
-+}
-+EXPORT_SYMBOL(iam_container_init);
-+
-+/*
-+ * Finalize container @c, release all resources.
-+ */
-+void iam_container_fini(struct iam_container *c)
-+{
-+ if (c->ic_object != NULL) {
-+ iput(c->ic_object);
-+ c->ic_object = NULL;
-+ }
-+}
-+EXPORT_SYMBOL(iam_container_fini);
-+
-+void iam_path_init(struct iam_path *path, struct iam_container *c,
-+ struct iam_path_descr *pd)
-+{
-+ memset(path, 0, sizeof *path);
-+ path->ip_container = c;
-+ path->ip_frame = path->ip_frames;
-+ path->ip_data = pd;
-+}
-+
-+static void iam_leaf_fini(struct iam_path *path);
-+
-+void iam_path_fini(struct iam_path *path)
-+{
-+ int i;
-+
-+ iam_leaf_fini(path);
-+ for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
-+ if (path->ip_frames[i].bh != NULL) {
-+ brelse(path->ip_frames[i].bh);
-+ path->ip_frames[i].bh = NULL;
-+ }
-+ }
-+}
-+
-+extern struct iam_descr htree_compat_param;
-+
-+void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode)
-+{
-+ int i;
-+
-+ for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i)
-+ path->ipc_descr.ipd_key_scratch[i] =
-+ (struct iam_key *)&path->ipc_scratch[i];
-+
-+ iam_container_init(&path->ipc_container, &htree_compat_param, inode);
-+ /*
-+ * XXX hack allowing finalization of iam_path_compat with
-+ * iam_path_fini().
-+ */
-+ iput(inode);
-+ iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr);
-+}
-+
-+void iam_path_compat_fini(struct iam_path_compat *path)
-+{
-+ iam_path_fini(&path->ipc_path);
-+ iam_container_fini(&path->ipc_container);
-+}
-+
-+static int iam_leaf_load(struct iam_path *path)
-+{
-+ int block;
-+ int err;
-+ struct iam_container *c;
-+ struct buffer_head *bh;
-+ struct iam_leaf *leaf;
-+ struct iam_descr *descr;
-+
-+ c = path->ip_container;
-+ leaf = &path->ip_leaf;
-+ descr = iam_path_descr(path);
-+ block = dx_get_block(path, path->ip_frame->at);
-+ err = descr->id_node_read(c, block, NULL, &bh);
-+ if (err == 0) {
-+ leaf->il_bh = bh;
-+ err = descr->id_leaf.init(c, leaf);
-+ }
-+ return err;
-+}
-+
-+static void iam_leaf_fini(struct iam_path *path)
-+{
-+ iam_path_descr(path)->id_leaf.fini(path->ip_container, &path->ip_leaf);
-+ if (path && path->ip_leaf.il_bh) {
-+ brelse(path->ip_leaf.il_bh);
-+ path->ip_leaf.il_bh = NULL;
-+ }
-+}
-+
-+static void iam_leaf_start(struct iam_container *c, struct iam_leaf *folio)
-+{
-+ c->ic_descr->id_leaf.start(c, folio);
-+}
-+
-+static void iam_leaf_next(struct iam_container *c, struct iam_leaf *folio)
-+{
-+ c->ic_descr->id_leaf.next(c, folio);
-+}
-+
-+static int iam_leaf_at_end(struct iam_container *c, struct iam_leaf *folio)
-+{
-+ c->ic_descr->id_leaf.at_end(c, folio);
-+}
-+
-+/*
-+ * Helper function returning scratch key.
-+ */
-+static struct iam_key *it_scratch_key(struct iam_iterator *it, int n)
-+{
-+ return iam_path_key(&it->ii_path, n);
-+}
-+
-+static struct iam_container *iam_it_container(struct iam_iterator *it)
-+{
-+ return it->ii_path.ip_container;
-+}
-+
-+static inline int it_keycmp(struct iam_iterator *it,
-+ struct iam_key *k1, struct iam_key *k2)
-+{
-+ return iam_keycmp(iam_it_container(it), k1, k2);
-+}
-+
-+/*
-+ * Helper wrapper around iam_it_get(): returns 0 (success) only when record
-+ * with exactly the same key as asked is found.
-+ */
-+static int iam_it_get_exact(struct iam_iterator *it, struct iam_key *k)
-+{
-+ int result;
-+
-+ result = iam_it_get(it, k);
-+ if (result == 0 &&
-+ (it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 0))) != 0))
-+ /*
-+ * Return -ENOENT if cursor is located above record with a key
-+ * different from one specified.
-+ *
-+ * XXX returning -ENOENT only works if iam_it_get never
-+ * returns -ENOENT as a legitimate error.
-+ */
-+ result = -ENOENT;
-+ return result;
-+}
-+
-+/***********************************************************************/
-+/* iterator interface */
-+/***********************************************************************/
-+
-+static enum iam_it_state it_state(struct iam_iterator *it)
-+{
-+ return it->ii_state;
-+}
-+
-+void iam_container_write_lock(struct iam_container *ic)
-+{
-+ down(&ic->ic_object->i_sem);
-+}
-+
-+void iam_container_write_unlock(struct iam_container *ic)
-+{
-+ up(&ic->ic_object->i_sem);
-+}
-+
-+void iam_container_read_lock(struct iam_container *ic)
-+{
-+ down(&ic->ic_object->i_sem);
-+}
-+
-+void iam_container_read_unlock(struct iam_container *ic)
-+{
-+ up(&ic->ic_object->i_sem);
-+}
-+
-+static void iam_it_lock(struct iam_iterator *it)
-+{
-+ if (it->ii_flags&IAM_IT_WRITE)
-+ iam_container_write_lock(iam_it_container(it));
-+ else
-+ iam_container_read_lock(iam_it_container(it));
-+}
-+
-+static void iam_it_unlock(struct iam_iterator *it)
-+{
-+ if (it->ii_flags&IAM_IT_WRITE)
-+ iam_container_write_unlock(iam_it_container(it));
-+ else
-+ iam_container_read_unlock(iam_it_container(it));
-+}
-+
-+/*
-+ * Initialize iterator to IAM_IT_DETACHED state.
-+ *
-+ * postcondition: it_state(it) == IAM_IT_DETACHED
-+ */
-+int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
-+ struct iam_path_descr *pd)
-+{
-+ memset(it, 0, sizeof *it);
-+ it->ii_flags = flags;
-+ it->ii_state = IAM_IT_DETACHED;
-+ iam_path_init(&it->ii_path, c, pd);
-+ return 0;
-+}
-+
-+/*
-+ * Finalize iterator and release all resources.
-+ *
-+ * precondition: it_state(it) == IAM_IT_DETACHED
-+ */
-+void iam_it_fini(struct iam_iterator *it)
-+{
-+ assert(it_state(it) == IAM_IT_DETACHED);
-+ iam_path_fini(&it->ii_path);
-+}
-+
-+int iam_path_lookup(struct iam_path *path)
-+{
-+ struct iam_container *c;
-+ struct iam_descr *descr;
-+ struct iam_leaf *leaf;
-+ int result;
-+
-+ c = path->ip_container;
-+ leaf = &path->ip_leaf;
-+ descr = iam_path_descr(path);
-+ result = dx_lookup(path);
-+ if (result == 0) {
-+ result = iam_leaf_load(path);
-+ if (result == 0)
-+ result = descr->id_leaf.lookup(c, path, leaf,
-+ path->ip_key_target);
-+ }
-+ return result;
-+}
-+
-+/*
-+ * Attach iterator. After successful completion, @it points to record with
-+ * smallest key not larger than @k.
-+ *
-+ * Return value: 0: positioned on existing record,
-+ * -ve: error.
-+ *
-+ * precondition: it_state(it) == IAM_IT_DETACHED
-+ * postcondition: ergo(result == 0,
-+ * (it_state(it) == IAM_IT_ATTACHED &&
-+ * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
-+ */
-+int iam_it_get(struct iam_iterator *it, struct iam_key *k)
-+{
-+ int result;
-+ assert(it_state(it) == IAM_IT_DETACHED);
-+
-+ it->ii_path.ip_key_target = k;
-+ iam_it_lock(it);
-+ result = iam_path_lookup(&it->ii_path);
-+ if (result == 0)
-+ it->ii_state = IAM_IT_ATTACHED;
-+ else
-+ iam_it_unlock(it);
-+ assert(ergo(result == 0,
-+ it_keycmp(it,
-+ iam_it_key_get(it, it_scratch_key(it, 0)),
-+ k) < 0));
-+ return result;
-+}
-+
-+/*
-+ * Duplicates iterator.
-+ *
-+ * postcondition: it_state(dst) == it_state(src) &&
-+ * iam_it_container(dst) == iam_it_container(src) &&
-+ * dst->ii_flags = src->ii_flags &&
-+ * ergo(it_state(src) == IAM_IT_ATTACHED,
-+ * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
-+ * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
-+ */
-+void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src)
-+{
-+ dst->ii_flags = src->ii_flags;
-+ dst->ii_state = src->ii_state;
-+ /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */
-+ /*
-+ * XXX: duplicate lock.
-+ */
-+ assert(it_state(dst) == it_state(src));
-+ assert(iam_it_container(dst) == iam_it_container(src));
-+ assert(dst->ii_flags = src->ii_flags);
-+ assert(ergo(it_state(src) == IAM_IT_ATTACHED,
-+ iam_it_rec_get(dst) == iam_it_rec_get(src) &&
-+ iam_it_key_get(dst, it_scratch_key(dst, 0)) ==
-+ iam_it_key_get(src, it_scratch_key(src, 0))));
-+
-+}
-+/*
-+ * Detach iterator. Does nothing it detached state.
-+ *
-+ * postcondition: it_state(it) == IAM_IT_DETACHED
-+ */
-+void iam_it_put(struct iam_iterator *it)
-+{
-+ if (it->ii_state == IAM_IT_ATTACHED) {
-+ it->ii_state = IAM_IT_DETACHED;
-+ iam_leaf_fini(&it->ii_path);
-+ iam_it_unlock(it);
-+ }
-+}
-+
-+/*
-+ * Move iterator one record right.
-+ *
-+ * Return value: 0: success,
-+ * +1: end of container reached
-+ * -ve: error
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
-+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
-+ */
-+int iam_it_next(struct iam_iterator *it)
-+{
-+ int result;
-+ struct iam_container *c;
-+
-+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE);
-+
-+ c = iam_it_container(it);
-+ if (iam_leaf_at_end(c, &it->ii_path.ip_leaf)) {
-+ /* advance index portion of the path */
-+ result = 0; /* XXX not yet iam_index_next(&it->ii_path); */
-+ if (result == 0) {
-+ result = 0; /* XXX not yet iam_read_leaf(&it->ii_path); */
-+ if (result == 0)
-+ iam_leaf_start(c, &it->ii_path.ip_leaf);
-+ } else if (result > 0)
-+ /* end of container reached */
-+ result = +1;
-+ if (result < 0)
-+ iam_it_put(it);
-+ } else {
-+ /* advance within leaf node */
-+ iam_leaf_next(c, &it->ii_path.ip_leaf);
-+ result = 0;
-+ }
-+ assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
-+ return result;
-+}
-+
-+/*
-+ * Return pointer to the record under iterator.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+struct iam_rec *iam_it_rec_get(struct iam_iterator *it)
-+{
-+ assert(it_state(it) == IAM_IT_ATTACHED);
-+ return iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf);
-+}
-+
-+static void iam_it_reccpy(struct iam_iterator *it, struct iam_rec *r)
-+{
-+ memcpy(iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf), r,
-+ iam_it_container(it)->ic_descr->id_rec_size);
-+}
-+
-+/*
-+ * Replace contents of record under iterator.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
-+ */
-+int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r)
-+{
-+ int result;
-+
-+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
-+
-+ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
-+ if (result == 0)
-+ iam_it_reccpy(it, r);
-+ return result;
-+}
-+
-+/*
-+ * Return pointer to the key under iterator.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+struct iam_key *iam_it_key_get(struct iam_iterator *it, struct iam_key *k)
-+{
-+ assert(it_state(it) == IAM_IT_ATTACHED);
-+ return iam_leaf_key(iam_it_container(it), &it->ii_path.ip_leaf, k);
-+}
-+
-+static int iam_leaf_rec_add(handle_t *handle, struct iam_path *path)
-+{
-+#if 0
-+ struct iam_lentry *p, *q;
-+ int count, err;
-+
-+ count = dx_get_count((struct iam_entry *)path->ip_leaf.il_entries);
-+ p = iam_lentry_shift(path, path->ip_leaf.il_entries, count - 1);
-+ while (p > path->ip_leaf.il_at) {
-+ q = iam_lentry_shift(path, p, 1);
-+ iam_reccpy(path, iam_leaf_rec(path->ip_container, p),
-+ iam_leaf_rec(path->ip_container, q));
-+ iam_keycpy(path->ip_container, iam_leaf_key_at(path, p),
-+ iam_leaf_key_at(path, q));
-+ p = iam_lentry_shift(path, p, -1);
-+ }
-+ dx_set_count((struct iam_entry*)path->ip_leaf.il_entries, count + 1);
-+ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
-+ if (err)
-+ ext3_std_error(iam_path_obj(path)->i_sb, err);
-+ return err;
-+#endif
-+}
-+
-+static int iam_leaf_is_full(struct iam_path *path)
-+{
-+ int count, limit;
-+
-+ count = dx_get_count((struct iam_entry *)path->ip_leaf.il_entries);
-+ limit = dx_get_limit((struct iam_entry *)path->ip_leaf.il_entries);
-+
-+ return (count >= limit);
-+}
-+
-+static int split_leaf_node(handle_t *handle, struct iam_path *path)
-+{
-+#if 0
-+ struct inode *dir = iam_path_obj(path);
-+ unsigned continued = 0;
-+ struct buffer_head *bh2;
-+ u32 newblock, hash_split;
-+ char *data2;
-+ unsigned split;
-+ int err;
-+
-+ bh2 = ext3_append (handle, dir, &newblock, &err);
-+ if (!(bh2))
-+ return -ENOSPC;
-+
-+ err = iam_leaf_load(path);
-+ if (err)
-+ goto errout;
-+
-+ BUFFER_TRACE(path->ip_leaf.il_bh, "get_write_access");
-+ err = ext3_journal_get_write_access(handle, path->ip_leaf.il_bh);
-+ if (err) {
-+ journal_error:
-+ iam_leaf_fini(path);
-+ brelse(bh2);
-+ ext3_std_error(dir->i_sb, err);
-+ err = -EIO;
-+ goto errout;
-+ }
-+ data2 = bh2->b_data;
-+ split = dx_get_count((struct iam_entry*)iam_leaf_entries(path))/2;
-+ hash_split = *(__u32*)iam_leaf_key_at(path,
-+ iam_lentry_shift(path, iam_leaf_entries(path),
-+ split));
-+ if (iam_keycmp(path->ip_container, iam_leaf_key_at(path,
-+ iam_lentry_shift(path, iam_leaf_entries(path), split)),
-+ iam_leaf_key_at(path,
-+ iam_lentry_shift(path, iam_leaf_entries(path), split -1))) == 0)
-+ continued = 1;
-+
-+ memcpy(iam_lentry_shift(path, (struct iam_lentry *)data2, 1),
-+ iam_lentry_shift(path, iam_leaf_entries(path), split),
-+ split * iam_lentry_size(path));
-+
-+ /* Which block gets the new entry? */
-+ dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
-+ err = ext3_journal_dirty_metadata (handle, bh2);
-+ if (err)
-+ goto journal_error;
-+ err = ext3_journal_dirty_metadata (handle, path->ip_leaf.il_bh);
-+ if (err)
-+ goto journal_error;
-+errout:
-+ brelse (bh2);
-+ return err;
-+#endif
-+}
-+
-+int iam_add_rec(handle_t *handle, struct iam_path *path)
-+{
-+ int err;
-+
-+ if (!iam_leaf_is_full(path)) {
-+ err = iam_leaf_rec_add(handle, path);
-+ } else {
-+ err = split_index_node(handle, path);
-+ if (err == 0) {
-+ err = split_leaf_node(handle, path);
-+ if (err == 0)
-+ err = iam_leaf_rec_add(handle, path);
-+ }
-+ }
-+ return err;
-+}
-+
-+/*
-+ * Insert new record with key @k and contents from @r, shifting records to the
-+ * right.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * it->ii_flags&IAM_IT_WRITE &&
-+ * it_keycmp(it, iam_it_key_get(it, *), k) < 0
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * ergo(result == 0,
-+ * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
-+ * !memcmp(iam_it_rec_get(it), r, ...))
-+ */
-+int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
-+ struct iam_key *k, struct iam_rec *r)
-+{
-+ int result;
-+
-+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
-+ assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0);
-+
-+ result = iam_add_rec(h, &it->ii_path);
-+ if (result == 0) {
-+ /* place record and key info freed space. Leaf node is already
-+ * in transaction. */
-+ iam_it_reccpy(it, r);
-+ /*
-+ * XXX TBD.
-+ */
-+ }
-+ assert(it_state(it) == IAM_IT_ATTACHED);
-+ assert(ergo(result == 0,
-+ it_keycmp(it,
-+ iam_it_key_get(it,
-+ it_scratch_key(it, 0)), k) == 0 &&
-+ !memcmp(iam_it_rec_get(it), r,
-+ iam_it_container(it)->ic_descr->id_rec_size)));
-+ return result;
-+}
-+
-+static int iam_leaf_rec_remove(handle_t *handle, struct iam_container *c,
-+ struct iam_path *path)
-+{
-+#if 0
-+ struct iam_lentry *p, *q, *end;
-+ int count, err;
-+
-+ count = dx_get_count((struct iam_entry *)path->ip_leaf.il_entries);
-+ end = iam_lentry_shift(path, path->ip_leaf.il_entries, count - 1);
-+ p = iam_lentry_at(path, path->ip_leaf.il_at);
-+ while (p <= end) {
-+ q = iam_lentry_shift(path, p, 1);
-+ iam_reccpy(path, iam_leaf_rec(path->ip_container, p),
-+ iam_leaf_rec(path->ip_container, q));
-+ iam_keycpy(c, iam_leaf_key_at(path, p),
-+ iam_leaf_key_at(path, q));
-+ p = iam_lentry_shift(path, p, 1);
-+ }
-+ dx_set_count((struct iam_entry*)path->ip_leaf.il_entries, count - 1);
-+ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
-+ if (err)
-+ ext3_std_error(iam_path_obj(path)->i_sb, err);
-+ return err;
-+#endif
-+}
-+
-+/*
-+ * Delete record under iterator.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
-+{
-+ int result;
-+
-+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
-+
-+ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
-+ /*
-+ * no compaction for now.
-+ */
-+ if (result == 0)
-+ iam_leaf_rec_remove(h, iam_it_container(it), &it->ii_path);
-+
-+ return result;
-+}
-+
-+/*
-+ * Convert iterator to cookie.
-+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED
-+ */
-+iam_pos_t iam_it_store(struct iam_iterator *it)
-+{
-+ iam_pos_t result;
-+
-+ assert(it_state(it) == IAM_IT_ATTACHED);
-+ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result);
-+
-+ result = 0;
-+ iam_it_key_get(it, (struct iam_key *)&result);
-+ return result;
-+}
-+
-+/*
-+ * Restore iterator from cookie.
-+ *
-+ * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
-+ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
-+ * iam_it_store(it) == pos)
-+ */
-+int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
-+{
-+ assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE);
-+ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos);
-+ return iam_it_get(it, (struct iam_key *)&pos);
-+}
-+
-+/***********************************************************************/
-+/* invariants */
-+/***********************************************************************/
-+
-+static inline int ptr_inside(void *base, size_t size, void *ptr)
-+{
-+ return (base <= ptr) && (ptr < base + size);
-+}
-+
-+int iam_frame_invariant(struct iam_frame *f)
-+{
-+ return
-+ (f->bh != NULL &&
-+ f->bh->b_data != NULL &&
-+ ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
-+ ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
-+ f->entries <= f->at);
-+}
-+int iam_leaf_invariant(struct iam_leaf *l)
-+{
-+ return
-+ l->il_bh != NULL &&
-+ l->il_bh->b_data != NULL &&
-+ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) &&
-+ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) &&
-+ l->il_entries <= l->il_at;
-+}
-+
-+int iam_path_invariant(struct iam_path *p)
-+{
-+ int i;
-+
-+ if (p->ip_container == NULL ||
-+ p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
-+ p->ip_frame != p->ip_frames + p->ip_indirect ||
-+ !iam_leaf_invariant(&p->ip_leaf))
-+ return 0;
-+ for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
-+ if (i <= p->ip_indirect) {
-+ if (!iam_frame_invariant(&p->ip_frames[i]))
-+ return 0;
-+ }
-+ }
-+ return 1;
-+}
-+
-+int iam_it_invariant(struct iam_iterator *it)
-+{
-+ return
-+ (it->ii_state == IAM_IT_DETACHED ||
-+ it->ii_state == IAM_IT_ATTACHED) &&
-+ !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
-+ ergo(it->ii_state == IAM_IT_ATTACHED,
-+ iam_path_invariant(&it->ii_path));
-+}
-+
-+/*
-+ * Search container @c for record with key @k. If record is found, its data
-+ * are moved into @r.
-+ *
-+ *
-+ *
-+ * Return values: +ve: found, 0: not-found, -ve: error
-+ */
-+int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r,
-+ struct iam_path_descr *pd)
-+{
-+ struct iam_iterator it;
-+ int result;
-+
-+ iam_it_init(&it, c, 0, pd);
-+
-+ result = iam_it_get_exact(&it, k);
-+ if (result == 0)
-+ /*
-+ * record with required key found, copy it into user buffer
-+ */
-+ iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it));
-+ iam_it_put(&it);
-+ iam_it_fini(&it);
-+ return result;
-+}
-+EXPORT_SYMBOL(iam_lookup);
-+
-+/*
-+ * Insert new record @r with key @k into container @c (within context of
-+ * transaction @h.
-+ *
-+ * Return values: 0: success, -ve: error, including -EEXIST when record with
-+ * given key is already present.
-+ *
-+ * postcondition: ergo(result == 0 || result == -EEXIST,
-+ * iam_lookup(c, k, r2) > 0 &&
-+ * !memcmp(r, r2, c->ic_descr->id_rec_size));
-+ */
-+int iam_insert(handle_t *h, struct iam_container *c,
-+ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
-+{
-+ struct iam_iterator it;
-+ int result;
-+
-+ iam_it_init(&it, c, IAM_IT_WRITE, pd);
-+
-+ result = iam_it_get_exact(&it, k);
-+ if (result == -ENOENT)
-+ result = iam_it_rec_insert(h, &it, k, r);
-+ else if (result == 0)
-+ result = -EEXIST;
-+ iam_it_put(&it);
-+ iam_it_fini(&it);
-+ return result;
-+}
-+EXPORT_SYMBOL(iam_insert);
-+
-+int iam_update(handle_t *h, struct iam_container *c,
-+ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
-+{
-+ struct iam_iterator it;
-+ int result;
-+
-+ iam_it_init(&it, c, IAM_IT_WRITE, pd);
-+
-+ result = iam_it_get_exact(&it, k);
-+ if (result == 0)
-+ iam_it_rec_set(h, &it, r);
-+ iam_it_put(&it);
-+ iam_it_fini(&it);
-+ return result;
-+}
-+EXPORT_SYMBOL(iam_update);
-+
-+/*
-+ * Delete existing record with key @k.
-+ *
-+ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
-+ *
-+ * postcondition: ergo(result == 0 || result == -ENOENT,
-+ * !iam_lookup(c, k, *));
-+ */
-+int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k,
-+ struct iam_path_descr *pd)
-+{
-+ struct iam_iterator it;
-+ int result;
-+
-+ iam_it_init(&it, c, IAM_IT_WRITE, pd);
-+
-+ result = iam_it_get_exact(&it, k);
-+ if (result == 0)
-+ iam_it_rec_delete(h, &it);
-+ iam_it_put(&it);
-+ iam_it_fini(&it);
-+ return result;
-+}
-+EXPORT_SYMBOL(iam_delete);
-+
-Index: iam/fs/ext3/namei.c
-===================================================================
---- iam.orig/fs/ext3/namei.c 2006-05-10 18:21:01.000000000 +0400
-+++ iam/fs/ext3/namei.c 2006-05-10 20:56:22.000000000 +0400
-@@ -24,81 +24,6 @@
- * Theodore Ts'o, 2002
- */
-
--/*
-- * iam: big theory statement.
-- *
-- * iam (Index Access Module) is a module providing abstraction of persistent
-- * transactional container on top of generalized ext3 htree.
-- *
-- * iam supports:
-- *
-- * - key, pointer, and record size specifiable per container.
-- *
-- * - trees taller than 2 index levels.
-- *
-- * - read/write to existing ext3 htree directories as iam containers.
-- *
-- * iam container is a tree, consisting of leaf nodes containing keys and
-- * records stored in this container, and index nodes, containing keys and
-- * pointers to leaf or index nodes.
-- *
-- * iam does not work with keys directly, instead it calls user-supplied key
-- * comparison function (->dpo_keycmp()).
-- *
-- * Pointers are (currently) interpreted as logical offsets (measured in
-- * blocksful) within underlying flat file on top of which iam tree lives.
-- *
-- * On-disk format:
-- *
-- * iam mostly tries to reuse existing htree formats.
-- *
-- * Format of index node:
-- *
-- * +-----+-------+-------+-------+------+-------+------------+
-- * | | count | | | | | |
-- * | gap | / | entry | entry | .... | entry | free space |
-- * | | limit | | | | | |
-- * +-----+-------+-------+-------+------+-------+------------+
-- *
-- * gap this part of node is never accessed by iam code. It
-- * exists for binary compatibility with ext3 htree (that,
-- * in turn, stores fake struct ext2_dirent for ext2
-- * compatibility), and to keep some unspecified per-node
-- * data. Gap can be different for root and non-root index
-- * nodes. Gap size can be specified for each container
-- * (gap of 0 is allowed).
-- *
-- * count/limit current number of entries in this node, and the maximal
-- * number of entries that can fit into node. count/limit
-- * has the same size as entry, and is itself counted in
-- * count.
-- *
-- * entry index entry: consists of a key immediately followed by
-- * a pointer to a child node. Size of a key and size of a
-- * pointer depends on container. Entry has neither
-- * alignment nor padding.
-- *
-- * free space portion of node new entries are added to
-- *
-- * Entries in index node are sorted by their key value.
-- *
-- * Format of leaf node:
-- *
-- * +-----+-------+-------+-------+------+-------+------------+
-- * | | count | | | | | |
-- * | gap | / | leaf | leaf | .... | leaf | free space |
-- * | | limit | | | | | |
-- * +-----+-------+-------+-------+------+-------+------------+
--
-- * leaf For leaf entry: consists of a rec immediately followd by
-- * a key. size of a key and size of a rec depends on container.
-- *
-- *
-- *
-- *
-- *
-- */
--
- #include <linux/module.h>
- #include <linux/fs.h>
- #include <linux/pagemap.h>
-@@ -112,10 +37,10 @@
- #include <linux/quotaops.h>
- #include <linux/buffer_head.h>
- #include <linux/smp_lock.h>
+-/*
+- * iam: big theory statement.
+- *
+- * iam (Index Access Module) is a module providing abstraction of persistent
+- * transactional container on top of generalized ext3 htree.
+- *
+- * iam supports:
+- *
+- * - key, pointer, and record size specifiable per container.
+- *
+- * - trees taller than 2 index levels.
+- *
+- * - read/write to existing ext3 htree directories as iam containers.
+- *
+- * iam container is a tree, consisting of leaf nodes containing keys and
+- * records stored in this container, and index nodes, containing keys and
+- * pointers to leaf or index nodes.
+- *
+- * iam does not work with keys directly, instead it calls user-supplied key
+- * comparison function (->dpo_keycmp()).
+- *
+- * Pointers are (currently) interpreted as logical offsets (measured in
+- * blocksful) within underlying flat file on top of which iam tree lives.
+- *
+- * On-disk format:
+- *
+- * iam mostly tries to reuse existing htree formats.
+- *
+- * Format of index node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * | | count | | | | | |
+- * | gap | / | entry | entry | .... | entry | free space |
+- * | | limit | | | | | |
+- * +-----+-------+-------+-------+------+-------+------------+
+- *
+- * gap this part of node is never accessed by iam code. It
+- * exists for binary compatibility with ext3 htree (that,
+- * in turn, stores fake struct ext2_dirent for ext2
+- * compatibility), and to keep some unspecified per-node
+- * data. Gap can be different for root and non-root index
+- * nodes. Gap size can be specified for each container
+- * (gap of 0 is allowed).
+- *
+- * count/limit current number of entries in this node, and the maximal
+- * number of entries that can fit into node. count/limit
+- * has the same size as entry, and is itself counted in
+- * count.
+- *
+- * entry index entry: consists of a key immediately followed by
+- * a pointer to a child node. Size of a key and size of a
+- * pointer depends on container. Entry has neither
+- * alignment nor padding.
+- *
+- * free space portion of node new entries are added to
+- *
+- * Entries in index node are sorted by their key value.
+- *
+- * Format of leaf node:
+- *
+- * +-----+-------+-------+-------+------+-------+------------+
+- * | | count | | | | | |
+- * | gap | / | leaf | leaf | .... | leaf | free space |
+- * | | limit | | | | | |
+- * +-----+-------+-------+-------+------+-------+------------+
+-
+- * leaf For leaf entry: consists of a rec immediately followd by
+- * a key. size of a key and size of a rec depends on container.
+- *
+- *
+- *
+- *
+- *
+- */
+-
+ #include <linux/module.h>
+ #include <linux/fs.h>
+ #include <linux/pagemap.h>
+@@ -112,10 +37,10 @@
+ #include <linux/quotaops.h>
+ #include <linux/buffer_head.h>
+ #include <linux/smp_lock.h>
+#include <linux/lustre_iam.h>
#include "xattr.h"
#include "iopen.h"
{
struct buffer_head *bh;
-@@ -141,9 +66,6 @@ static struct buffer_head *ext3_append(h
+@@ -141,9 +66,6 @@
return bh;
}
#ifndef swap
#define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
-@@ -162,10 +84,6 @@ struct fake_dirent {
+@@ -162,10 +84,6 @@
u8 file_type;
};
/*
* dx_root_info is laid out so that if it should somehow get overlaid by a
-@@ -203,242 +121,10 @@ struct dx_map_entry
+@@ -203,242 +121,10 @@
};
static void dx_set_key(struct iam_path *p, struct iam_entry *entry,
struct iam_key *key);
static unsigned dx_get_count(struct iam_entry *entries);
-@@ -457,80 +143,29 @@ static void dx_sort_map(struct dx_map_en
+@@ -457,80 +143,29 @@
static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
struct dx_map_entry *offsets, int count);
static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
return key;
}
-@@ -540,68 +175,69 @@ static inline struct iam_key *iam_key_at
+@@ -540,68 +175,70 @@
return (struct iam_key *)entry;
}
+static u32 htree_root_ptr(struct iam_container *c);
+static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
+static int htree_node_init(struct iam_container *c, struct buffer_head *bh, int root);
-+static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
-+ handle_t *handle, struct buffer_head **bh);
+static int htree_keycmp(struct iam_container *c,
+ struct iam_key *k1, struct iam_key *k2);
+
+-static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
+- int root)
+-{
+- return data +
+- (root ?
+- path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
+-}
++struct iam_operations htree_operation = {
++ .id_root_ptr = htree_root_ptr,
++ .id_node_check = htree_node_check,
++ .id_node_init = htree_node_init,
++ .id_node_read = iam_node_read,
++ .id_keycmp = htree_keycmp
++};
++
+/*
+ * Parameters describing iam compatibility mode in which existing ext3 htrees
+ * can be manipulated.
+ .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
+ .id_node_gap = offsetof(struct dx_node, entries),
+ .id_root_gap = offsetof(struct dx_root, entries),
-+
-+ .id_root_ptr = htree_root_ptr,
-+ .id_node_check = htree_node_check,
-+ .id_node_init = htree_node_init,
-+ .id_node_read = htree_node_read,
-+ .id_keycmp = htree_keycmp
++ .id_ops = &htree_operation
+};
--static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
-- int root)
--{
-- return data +
-- (root ?
-- path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
--}
-
-static struct iam_entry *dx_node_get_entries(struct iam_path *path,
- struct iam_frame *frame)
+static inline int dx_index_is_compat(struct iam_path *path)
static int dx_node_check(struct iam_path *p, struct iam_frame *f)
{
struct iam_entry *e;
-@@ -614,10 +250,10 @@ static int dx_node_check(struct iam_path
+@@ -614,10 +251,10 @@
count = dx_get_count(e);
e = iam_entry_shift(p, e, 1);
for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) {
return 0;
}
return 1;
-@@ -636,13 +272,17 @@ static int htree_node_check(struct iam_p
+@@ -636,13 +273,17 @@
data = frame->bh->b_data;
entries = dx_node_get_entries(path, frame);
if (root->info.hash_version > DX_HASH_MAX) {
ext3_warning(sb, __FUNCTION__,
"Unrecognised inode hash code %d",
-@@ -669,15 +309,16 @@ static int htree_node_check(struct iam_p
+@@ -669,15 +310,16 @@
root->info.info_length));
assert(dx_get_limit(entries) == dx_root_limit(path));
assert(dx_get_limit(entries) == dx_node_limit(path));
}
frame->entries = frame->at = entries;
-@@ -800,7 +441,7 @@ struct stats dx_show_entries(struct dx_h
+@@ -697,8 +339,8 @@
+ return 0;
+ }
+
+-static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
+- handle_t *handle, struct buffer_head **bh)
++int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
++ handle_t *handle, struct buffer_head **bh)
+ {
+ int result = 0;
+
+@@ -800,7 +442,7 @@
}
#endif /* DX_DEBUG */
{
u32 ptr;
int err = 0;
-@@ -810,7 +451,7 @@ static int dx_lookup(struct iam_path *pa
+@@ -810,11 +452,11 @@
struct iam_frame *frame;
struct iam_container *c;
c = path->ip_container;
for (frame = path->ip_frames, i = 0,
-@@ -841,8 +482,8 @@ static int dx_lookup(struct iam_path *pa
+- ptr = param->id_root_ptr(path->ip_container);
++ ptr = param->id_ops->id_root_ptr(path->ip_container);
+ i <= path->ip_indirect;
+ ptr = dx_get_block(path, frame->at), ++frame, ++i) {
+ struct iam_entry *entries;
+@@ -823,10 +465,11 @@
+ struct iam_entry *m;
+ unsigned count;
+
+- err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh);
++ err = param->id_ops->id_node_read(c, (iam_ptr_t)ptr, NULL,
++ &frame->bh);
+ if (err != 0)
+ break;
+- err = param->id_node_check(path, frame);
++ err = param->id_ops->id_node_check(path, frame);
+ if (err != 0)
+ break;
+
+@@ -841,8 +484,8 @@
m = iam_entry_shift(path,
p, iam_entry_diff(path, q, p) / 2);
dxtrace(printk("."));
q = iam_entry_shift(path, m, -1);
else
p = iam_entry_shift(path, m, +1);
-@@ -857,12 +498,12 @@ static int dx_lookup(struct iam_path *pa
+@@ -857,12 +500,12 @@
while (n--) {
dxtrace(printk(","));
at = iam_entry_shift(path, at, +1);
path->ip_key_target));
}
at = iam_entry_shift(path, at, -1);
-@@ -891,508 +532,20 @@ static int dx_probe(struct dentry *dentr
+@@ -891,508 +534,20 @@
struct dx_hash_info *hinfo, struct iam_path *path)
{
int err;
* This function increments the frame pointer to search the next leaf
* block, and reads in the necessary intervening nodes if the search
* should be necessary. Whether or not the search is necessary is
-@@ -1409,8 +562,8 @@ EXPORT_SYMBOL(iam_update);
+@@ -1409,8 +564,8 @@
* If start_hash is non-null, it will be filled in with the starting
* hash of the next page.
*/
{
struct iam_frame *p;
struct buffer_head *bh;
-@@ -1445,7 +598,7 @@ static int ext3_htree_next_block(struct
+@@ -1445,7 +600,7 @@
* desired contiuation hash. If it doesn't, return since
* there's no point to read in the successive index pages.
*/
if (start_hash)
*start_hash = bhash;
if ((hash & 1) == 0) {
-@@ -1457,9 +610,10 @@ static int ext3_htree_next_block(struct
+@@ -1457,9 +612,10 @@
* block so no check is necessary
*/
while (num_frames--) {
- err = path_descr(path)->id_node_read(path->ip_container,
- (iam_ptr_t)dx_get_block(path, p->at),
- NULL, &bh);
-+ err = iam_path_descr(path)->
++ err = iam_path_descr(path)->id_ops->
+ id_node_read(path->ip_container,
+ (iam_ptr_t)dx_get_block(path, p->at),
+ NULL, &bh);
if (err != 0)
return err; /* Failure */
++p;
-@@ -1662,8 +816,8 @@ static void dx_sort_map (struct dx_map_e
+@@ -1662,8 +818,8 @@
} while(more);
}
{
struct iam_entry *entries = frame->entries;
struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
-@@ -1897,14 +1051,15 @@ static struct buffer_head * ext3_dx_find
+@@ -1897,14 +1053,15 @@
if (*err != 0)
return NULL;
} else {
block = dx_get_block(path, path->ip_frame->at);
- *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
- NULL, &bh);
-+ *err = iam_path_descr(path)->id_node_read(path->ip_container,
++ *err = iam_path_descr(path)->id_ops->id_node_read(path->ip_container,
+ (iam_ptr_t)block,
+ NULL, &bh);
if (*err != 0)
goto errout;
de = (struct ext3_dir_entry_2 *) bh->b_data;
-@@ -2067,7 +1222,7 @@ static struct ext3_dir_entry_2 *do_split
+@@ -2067,7 +1224,7 @@
struct buffer_head **bh,struct iam_frame *frame,
struct dx_hash_info *hinfo, int *error)
{
- struct inode *dir = path_obj(path);
+ struct inode *dir = iam_path_obj(path);
- unsigned blocksize = dir->i_sb->s_blocksize;
- unsigned count, continued;
- struct buffer_head *bh2;
-@@ -2392,15 +1547,15 @@ static int ext3_add_entry (handle_t *han
- }
-
- #ifdef CONFIG_EXT3_INDEX
--static int split_index_node(handle_t *handle, struct iam_path *path)
--{
-+int split_index_node(handle_t *handle, struct iam_path *path)
+ unsigned blocksize = dir->i_sb->s_blocksize;
+ unsigned count, continued;
+ struct buffer_head *bh2;
+@@ -2392,15 +1549,15 @@
+ }
+
+ #ifdef CONFIG_EXT3_INDEX
+-static int split_index_node(handle_t *handle, struct iam_path *path)
+-{
++int split_index_node(handle_t *handle, struct iam_path *path)
++{
+
+ struct iam_entry *entries; /* old block contents */
+ struct iam_entry *entries2; /* new block contents */
+ struct iam_frame *frame, *safe;
+ struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
+ u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
+- struct inode *dir = path_obj(path);
++ struct inode *dir = iam_path_obj(path);
+ int nr_splet;
+ int i, err;
+
+@@ -2442,7 +1599,8 @@
+ for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
+ bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
+ if (!bh_new[i] ||
+- path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
++ iam_path_descr(path)->id_ops->id_node_init(path->ip_container,
++ bh_new[i], 0) != 0)
+ goto cleanup;
+ BUFFER_TRACE(frame->bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, frame->bh);
+@@ -2516,9 +1674,9 @@
+ unsigned count1 = count/2, count2 = count - count1;
+ unsigned hash2;
+
+- dx_get_key(path,
+- iam_entry_shift(path, entries, count1),
+- (struct iam_key *)&hash2);
++ iam_get_key(path,
++ iam_entry_shift(path, entries, count1),
++ (struct iam_key *)&hash2);
+
+ dxtrace(printk("Split index %i/%i\n", count1, count2));
+
+@@ -2578,7 +1736,7 @@
+ size_t isize;
+
+ iam_path_compat_init(&cpath, dir);
+- param = path_descr(path);
++ param = iam_path_descr(path);
+
+ err = dx_probe(dentry, NULL, &hinfo, path);
+ if (err != 0)
+@@ -2588,8 +1746,9 @@
+ /* XXX nikita: global serialization! */
+ isize = dir->i_size;
+
+- err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at),
+- handle, &bh);
++ err = param->id_ops->id_node_read(path->ip_container,
++ (iam_ptr_t)dx_get_block(path, frame->at),
++ handle, &bh);
+ if (err != 0)
+ goto cleanup;
+
+@@ -2724,12 +1883,12 @@
+ * is so far negative - it has no inode.
+ *
+ * If the create succeeds, we fill in the inode information
+- * with d_instantiate().
++ * with d_instantiate().
+ */
+ static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
+ struct nameidata *nd)
+ {
+- handle_t *handle;
++ handle_t *handle;
+ struct inode * inode;
+ int err, retries = 0;
+
+Index: linux-2.6.9/fs/ext3/iam.c
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/iam.c 2006-05-24 19:52:06.500448688 +0800
++++ linux-2.6.9/fs/ext3/iam.c 2006-05-24 17:47:34.000000000 +0800
+@@ -0,0 +1,1183 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
++ *
++ * iam.c
++ * Top-level entry points into osd module
++ *
++ * Copyright (c) 2006 Cluster File Systems, Inc.
++ * Author: Wang Di <wangdi@clusterfs.com>
++ * Author: Nikita Danilov <nikita@clusterfs.com>
++ *
++ * This file is part of the Lustre file system, http://www.lustre.org
++ * Lustre is a trademark of Cluster File Systems, Inc.
++ *
++ * You may have signed or agreed to another license before downloading
++ * this software. If so, you are bound by the terms and conditions
++ * of that agreement, and the following does not apply to you. See the
++ * LICENSE file included with this distribution for more information.
++ *
++ * If you did not agree to a different license, then this copy of Lustre
++ * is open source software; you can redistribute it and/or modify it
++ * under the terms of version 2 of the GNU General Public License as
++ * published by the Free Software Foundation.
++ *
++ * In either case, Lustre is distributed in the hope that it will be
++ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
++ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * license text for more details.
++ */
++
++/*
++ * iam: big theory statement.
++ *
++ * iam (Index Access Module) is a module providing abstraction of persistent
++ * transactional container on top of generalized ext3 htree.
++ *
++ * iam supports:
++ *
++ * - key, pointer, and record size specifiable per container.
++ *
++ * - trees taller than 2 index levels.
++ *
++ * - read/write to existing ext3 htree directories as iam containers.
++ *
++ * iam container is a tree, consisting of leaf nodes containing keys and
++ * records stored in this container, and index nodes, containing keys and
++ * pointers to leaf or index nodes.
++ *
++ * iam does not work with keys directly, instead it calls user-supplied key
++ * comparison function (->dpo_keycmp()).
++ *
++ * Pointers are (currently) interpreted as logical offsets (measured in
++ * blocksful) within underlying flat file on top of which iam tree lives.
++ *
++ * On-disk format:
++ *
++ * iam mostly tries to reuse existing htree formats.
++ *
++ * Format of index node:
++ *
++ * +-----+-------+-------+-------+------+-------+------------+
++ * | | count | | | | | |
++ * | gap | / | entry | entry | .... | entry | free space |
++ * | | limit | | | | | |
++ * +-----+-------+-------+-------+------+-------+------------+
++ *
++ * gap this part of node is never accessed by iam code. It
++ * exists for binary compatibility with ext3 htree (that,
++ * in turn, stores fake struct ext2_dirent for ext2
++ * compatibility), and to keep some unspecified per-node
++ * data. Gap can be different for root and non-root index
++ * nodes. Gap size can be specified for each container
++ * (gap of 0 is allowed).
++ *
++ * count/limit current number of entries in this node, and the maximal
++ * number of entries that can fit into node. count/limit
++ * has the same size as entry, and is itself counted in
++ * count.
++ *
++ * entry index entry: consists of a key immediately followed by
++ * a pointer to a child node. Size of a key and size of a
++ * pointer depends on container. Entry has neither
++ * alignment nor padding.
++ *
++ * free space portion of node new entries are added to
++ *
++ * Entries in index node are sorted by their key value.
++ *
++ * Format of a leaf node is not specified. Generic iam code accesses leaf
++ * nodes through ->id_leaf methods in struct iam_descr.
++ *
++ */
++
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/pagemap.h>
++#include <linux/jbd.h>
++#include <linux/time.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/fcntl.h>
++#include <linux/stat.h>
++#include <linux/string.h>
++#include <linux/quotaops.h>
++#include <linux/buffer_head.h>
++#include <linux/smp_lock.h>
++#include <linux/lustre_iam.h>
++
++#include <libcfs/libcfs.h>
++#include <libcfs/kp30.h>
++
++#include "xattr.h"
++#include "iopen.h"
++#include "acl.h"
++
++static int iam_leaf_at_end(struct iam_container *c, struct iam_leaf *l);
++static void iam_leaf_next(struct iam_container *c, struct iam_leaf *folio);
++
++static inline int iam_lfix_entry_size(struct iam_container *c)
++{
++ return iam_container_descr(c)->id_key_size +
++ iam_container_descr(c)->id_rec_size;
++}
++
++static inline struct iam_lentry *
++iam_lentry_shift(struct iam_container *c, struct iam_lentry *entry, int shift)
++{
++ void *e = entry;
++ return e + shift * iam_lfix_entry_size(c);
++}
++
++static inline struct iam_key *
++iam_leaf_key_at(struct iam_container *c, struct iam_lentry *entry)
++{
++ void *e = entry;
++ return e;
++}
++
++static struct iam_lentry *
++iam_get_lentries(struct iam_container *c, void *data)
++{
++ return data + iam_container_descr(c)->id_node_gap;
++}
++
++static int lentry_count_get (struct iam_leaf *leaf)
++{
++ struct iam_lentry *lentry = leaf->il_entries;
++ return le16_to_cpu(((struct iam_leaf_head *)lentry)->ill_count);
++}
++
++static void lentry_count_set (struct iam_leaf *leaf, unsigned count)
++{
++ struct iam_lentry *lentry = leaf->il_entries;
++ ((struct iam_leaf_head *)lentry)->ill_count = cpu_to_le16(count);
++}
++
++/*
++ * Helper function returning scratch key.
++ */
++static struct iam_key *it_scratch_key(struct iam_iterator *it, int n)
++{
++ return iam_path_key(&it->ii_path, n);
++}
++
++static struct iam_container *iam_it_container(struct iam_iterator *it)
++{
++ return it->ii_path.ip_container;
++}
++
++static inline int it_keycmp(struct iam_iterator *it,
++ struct iam_key *k1, struct iam_key *k2)
++{
++ return iam_keycmp(iam_it_container(it), k1, k2);
++}
++
++/*This func is for flat key, for those keys,
++ *which are not stored explicitly
++ *it would be decrypt in the key buffer
++ */
++struct iam_key*
++iam_generic_leaf_flat_key(struct iam_container *c, struct iam_leaf *l,
++ struct iam_key *key)
++{
++ void *ie = l->il_at;
++ return (struct iam_key*)ie;
++}
++
++static void
++iam_generic_leaf_start(struct iam_container *c, struct iam_leaf *l)
++{
++ l->il_at = iam_get_lentries(c, l->il_bh->b_data);
++}
++
++static inline ptrdiff_t iam_lfix_diff(struct iam_path *p, struct iam_lentry *e1,
++ struct iam_lentry *e2)
++{
++ ptrdiff_t diff;
++
++ diff = (void *)e1 - (void *)e2;
++ assert(diff / iam_lfix_entry_size(p->ip_container) *
++ iam_lfix_entry_size(p->ip_container) == diff);
++ return diff / iam_lfix_entry_size(p->ip_container);
++}
++
++int iam_lfix_init(struct iam_container *c, struct iam_leaf *l)
++{
++ struct iam_leaf_head *ill;
++
++ assert(l->il_bh != NULL);
++
++ ill = (struct iam_leaf_head*)l->il_bh->b_data;
++ assert(ill->ill_magic == IAM_LEAF_HEADER_MAGIC);
++
++ l->il_at = l->il_entries = iam_get_lentries(c, l->il_bh->b_data);
++ return 0;
++}
++
++void iam_lfix_fini(struct iam_container *c, struct iam_leaf *l)
++{
++ l->il_entries = l->il_at = NULL;
++ return;
++}
++
++static struct iam_lentry *
++iam_lfix_get_end(struct iam_container *c, struct iam_leaf *l)
++{
++ int count = lentry_count_get(l);
++ struct iam_lentry *ile = iam_lentry_shift(c, l->il_entries, count);
++
++ return ile;
++}
++
++struct iam_rec*
++iam_lfix_rec(struct iam_container *c, struct iam_leaf *l)
++{
++ void *e = l->il_at;
++ return e + iam_container_descr(c)->id_key_size;
++}
++
++static void
++iam_lfix_next(struct iam_container *c, struct iam_leaf *l)
++{
++ assert(!iam_leaf_at_end(c, l));
++ l->il_at = iam_lentry_shift(c, l->il_at, 1);
++}
++
++static int
++iam_lfix_lookup(struct iam_container *c, struct iam_path *path,
++ struct iam_leaf *l, struct iam_key *k)
++{
++ struct iam_lentry *p, *q, *m;
++ int count;
++
++ count = lentry_count_get(l);
++
++ p = iam_lentry_shift(c, l->il_entries, 1);
++ q = iam_lentry_shift(c, l->il_entries, count - 1);
++
++ while (p < q) {
++ m = iam_lentry_shift(c, p, iam_lfix_diff(path, q, p) / 2);
++ if (iam_keycmp(c, iam_leaf_key_at(c, m), k) >= 0)
++ q = iam_lentry_shift(c, m, -1);
++ else
++ p = iam_lentry_shift(c, m, +1);
++ }
++ l->il_at = q;
++ iam_keycpy(c, iam_path_key(path, 0), iam_leaf_key_at(c, q));
++
++ if (iam_keycmp(c, iam_leaf_key_at(c, q), k) != 0)
++ return -ENOENT;
++ return 0;
++}
++
++static void iam_lfix_rec_add (struct iam_path *path,
++ struct iam_key *k, struct iam_rec *r)
++{
++ struct iam_lentry *end, *next, *nnext;
++ int count;
++ ptrdiff_t diff;
++
++ count = lentry_count_get(&path->ip_leaf);
++ end = iam_lfix_get_end(path->ip_container, &path->ip_leaf);
++ next = iam_lentry_shift(path->ip_container, path->ip_leaf.il_at, 1);
++ nnext = iam_lentry_shift(path->ip_container, next, 1);
++
++ diff = (void *)end - (void *)next;
++ memmove(next, nnext, diff);
++
++ lentry_count_set(&path->ip_leaf, count + 1);
++}
++
++static void iam_lfix_rec_del(struct iam_path *path)
++{
++ struct iam_lentry *next, *end;
++ int count;
++ ptrdiff_t diff;
++
++ count = lentry_count_get(&path->ip_leaf);
++ end = iam_lfix_get_end(path->ip_container, &path->ip_leaf);
++ next = iam_lentry_shift(path->ip_container, path->ip_leaf.il_at, 1);
++ diff = (void *)end - (void *)next;
++ memmove(path->ip_leaf.il_at, next, diff);
++
++ lentry_count_set(&path->ip_leaf, count - 1);
++}
++
++static int iam_lfix_can_add (struct iam_container *c, struct iam_leaf *l,
++ struct iam_key *k, struct iam_rec *r)
++{
++ struct iam_lentry *end;
++ int block_size = c->ic_object->i_sb->s_blocksize;
++ unsigned long left, entry_size;
++
++ end = iam_lfix_get_end(c, l);
++
++ left = block_size - iam_container_descr(c)->id_node_gap;
++
++ left -= (unsigned long)((void*)end - (void*)l->il_entries);
++
++ entry_size = iam_lfix_entry_size(c);
++
++ if (left >= entry_size)
++ return 1;
++
++ return 0;
++}
++
++static int iam_lfix_at_end(struct iam_container *c, struct iam_leaf *folio)
++{
++ struct iam_lentry *ile = iam_lfix_get_end(c, folio);
++
++ return (folio->il_at == ile);
++}
++
++struct iam_leaf_operations lfix_leaf_ops = {
++ .init = iam_lfix_init,
++ .fini = iam_lfix_fini,
++ .start = iam_generic_leaf_start,
++ .next = iam_lfix_next,
++ .key = iam_generic_leaf_flat_key,
++ .rec = iam_lfix_rec,
++ .lookup = iam_lfix_lookup,
++ .at_end = iam_lfix_at_end,
++ .rec_add = iam_lfix_rec_add,
++ .rec_del = iam_lfix_rec_del,
++ .can_add = iam_lfix_can_add
++};
++EXPORT_SYMBOL(lfix_leaf_ops);
++
++static __u32 iam_root_ptr(struct iam_container *c)
++{
++ return 0;
++}
++
++static int iam_node_init(struct iam_container *c, struct buffer_head *bh,
++ int root)
++{
++ return 0;
++}
++
++static int iam_node_check(struct iam_path *path, struct iam_frame *frame)
++{
++ struct iam_entry *entries;
++ void *data;
++ entries = dx_node_get_entries(path, frame);
++
++ data = frame->bh->b_data;
++
++ if (frame == path->ip_frames) {
++ struct iam_root *root;
++
++ root = data;
++ path->ip_indirect = root->info.indirect_levels;
++ }
++ frame->entries = frame->at = entries;
++ return 0;
++}
++
++static int iam_node_create(struct iam_container *c)
++{
++ return 0;
++}
++
++struct iam_operations generic_iam_ops = {
++ .id_root_ptr = iam_root_ptr,
++ .id_node_read = iam_node_read,
++ .id_node_init = iam_node_init,
++ .id_node_check = iam_node_check,
++ .id_create = iam_node_create,
++};
++EXPORT_SYMBOL(generic_iam_ops);
++
++static inline void iam_reccpy(struct iam_path *p, struct iam_rec *rec_dst,
++ struct iam_rec *rec_src)
++{
++ memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size);
++}
++
++/*
++ * Return pointer to current leaf record. Pointer is valid while corresponding
++ * leaf node is locked and pinned.
++ */
++struct iam_rec *iam_leaf_rec(struct iam_container *c, struct iam_leaf *leaf)
++{
++ return c->ic_descr->id_leaf_ops->rec(c, leaf);
++}
++
++/*
++ * Return pointer to the current leaf key. This function may return either
++ * pointer to the key stored in node, or copy key into @key buffer supplied by
++ * caller and return pointer to this buffer. The latter approach is used when
++ * keys in nodes are not stored in plain form (e.g., htree doesn't store keys
++ * at all).
++ *
++ * Caller should assume that returned pointer is only valid while leaf node is
++ * pinned and locked.
++ */
++struct iam_key *iam_leaf_key(struct iam_container *c, struct iam_leaf *leaf,
++ struct iam_key *key)
++{
++ return c->ic_descr->id_leaf_ops->key(c, leaf, key);
++}
++
++/*
++ * Initialize container @c, acquires additional reference on @inode.
++ */
++int iam_container_init(struct iam_container *c,
++ struct iam_descr *descr, struct inode *inode)
++{
++ memset(c, 0, sizeof *c);
++ c->ic_descr = descr;
++ c->ic_object = igrab(inode);
++ if (c->ic_object != NULL)
++ return 0;
++ else
++ return -ENOENT;
++}
++EXPORT_SYMBOL(iam_container_init);
++
++/*
++ * Finalize container @c, release all resources.
++ */
++void iam_container_fini(struct iam_container *c)
++{
++ if (c->ic_object != NULL) {
++ iput(c->ic_object);
++ c->ic_object = NULL;
++ }
++}
++EXPORT_SYMBOL(iam_container_fini);
++
++void iam_path_init(struct iam_path *path, struct iam_container *c,
++ struct iam_path_descr *pd)
++{
++ memset(path, 0, sizeof *path);
++ path->ip_container = c;
++ path->ip_frame = path->ip_frames;
++ path->ip_data = pd;
++}
++
++static void iam_leaf_fini(struct iam_path *path);
++void iam_path_fini(struct iam_path *path)
++{
++ int i;
++
++ iam_leaf_fini(path);
++ for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
++ if (path->ip_frames[i].bh != NULL) {
++ brelse(path->ip_frames[i].bh);
++ path->ip_frames[i].bh = NULL;
++ }
++ }
++}
++
++extern struct iam_descr htree_compat_param;
++void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode)
++{
++ int i;
++
++ for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i)
++ path->ipc_descr.ipd_key_scratch[i] =
++ (struct iam_key *)&path->ipc_scratch[i];
++
++ iam_container_init(&path->ipc_container, &htree_compat_param, inode);
++ /*
++ * XXX hack allowing finalization of iam_path_compat with
++ * iam_path_fini().
++ */
++ iput(inode);
++ iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr);
++}
++
++void iam_path_compat_fini(struct iam_path_compat *path)
++{
++ iam_path_fini(&path->ipc_path);
++ iam_container_fini(&path->ipc_container);
++}
++
++static int iam_leaf_load(struct iam_path *path)
++{
++ int block;
++ int err;
++ struct iam_container *c;
++ struct buffer_head *bh;
++ struct iam_leaf *leaf;
++ struct iam_descr *descr;
++
++ c = path->ip_container;
++ leaf = &path->ip_leaf;
++ descr = iam_path_descr(path);
++ block = dx_get_block(path, path->ip_frame->at);
++ err = descr->id_ops->id_node_read(c, block, NULL, &bh);
++ if (err == 0) {
++ leaf->il_bh = bh;
++ err = descr->id_leaf_ops->init(c, leaf);
++ }
++ return err;
++}
++
++static void iam_leaf_fini(struct iam_path *path)
++{
++ iam_path_descr(path)->id_leaf_ops->fini(path->ip_container,
++ &path->ip_leaf);
++ if (path && path->ip_leaf.il_bh) {
++ brelse(path->ip_leaf.il_bh);
++ path->ip_leaf.il_bh = NULL;
++ }
++}
++
++static void iam_leaf_start(struct iam_container *c, struct iam_leaf *folio)
++{
++ c->ic_descr->id_leaf_ops->start(c, folio);
++}
++
++static void iam_leaf_next(struct iam_container *c, struct iam_leaf *folio)
++{
++ c->ic_descr->id_leaf_ops->next(c, folio);
++}
++
++static void iam_rec_add (struct iam_path *path, struct iam_key *key,
++ struct iam_rec *rec)
++{
++ iam_path_descr(path)->id_leaf_ops->rec_add(path, key, rec);
++}
++
++static void iam_rec_del (struct iam_path *path)
++{
++ iam_path_descr(path)->id_leaf_ops->rec_del(path);
++}
++
++static int iam_leaf_at_end(struct iam_container *c, struct iam_leaf *l)
++{
++ return iam_container_descr(c)->id_leaf_ops->at_end(c, l);
++}
++/*
++ * Helper wrapper around iam_it_get(): returns 0 (success) only when record
++ * with exactly the same key as asked is found.
++ */
++static int iam_it_get_exact(struct iam_iterator *it, struct iam_key *k)
++{
++ int result;
++
++ result = iam_it_get(it, k);
++ if (result == 0 &&
++ (it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 0))) != 0))
++ /*
++ * Return -ENOENT if cursor is located above record with a key
++ * different from one specified.
++ *
++ * XXX returning -ENOENT only works if iam_it_get never
++ * returns -ENOENT as a legitimate error.
++ */
++ result = -ENOENT;
++ return result;
++}
++
++/***********************************************************************/
++/* iterator interface */
++/***********************************************************************/
++
++static enum iam_it_state it_state(struct iam_iterator *it)
++{
++ return it->ii_state;
++}
++
++void iam_container_write_lock(struct iam_container *ic)
++{
++ down(&ic->ic_object->i_sem);
++}
++
++void iam_container_write_unlock(struct iam_container *ic)
++{
++ up(&ic->ic_object->i_sem);
++}
++
++void iam_container_read_lock(struct iam_container *ic)
++{
++ down(&ic->ic_object->i_sem);
++}
++
++void iam_container_read_unlock(struct iam_container *ic)
++{
++ up(&ic->ic_object->i_sem);
++}
++
++static void iam_it_lock(struct iam_iterator *it)
++{
++ if (it->ii_flags&IAM_IT_WRITE)
++ iam_container_write_lock(iam_it_container(it));
++ else
++ iam_container_read_lock(iam_it_container(it));
++}
++
++static void iam_it_unlock(struct iam_iterator *it)
++{
++ if (it->ii_flags&IAM_IT_WRITE)
++ iam_container_write_unlock(iam_it_container(it));
++ else
++ iam_container_read_unlock(iam_it_container(it));
++}
++
++/*
++ * Initialize iterator to IAM_IT_DETACHED state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
++ struct iam_path_descr *pd)
++{
++ memset(it, 0, sizeof *it);
++ it->ii_flags = flags;
++ it->ii_state = IAM_IT_DETACHED;
++ iam_path_init(&it->ii_path, c, pd);
++ return 0;
++}
++
++/*
++ * Finalize iterator and release all resources.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_fini(struct iam_iterator *it)
++{
++ assert(it_state(it) == IAM_IT_DETACHED);
++ iam_path_fini(&it->ii_path);
++}
++
++int iam_path_lookup(struct iam_path *path)
++{
++ struct iam_container *c;
++ struct iam_descr *descr;
++ struct iam_leaf *leaf;
++ int result;
++
++ c = path->ip_container;
++ leaf = &path->ip_leaf;
++ descr = iam_path_descr(path);
++ result = dx_lookup(path);
++ if (result == 0) {
++ result = iam_leaf_load(path);
++ if (result == 0)
++ result = descr->id_leaf_ops->lookup(c, path, leaf,
++ path->ip_key_target);
++ }
++ return result;
++}
++
++/*
++ * Attach iterator. After successful completion, @it points to record with
++ * smallest key not larger than @k.
++ *
++ * Return value: 0: positioned on existing record,
++ * -ve: error.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED
++ * postcondition: ergo(result == 0,
++ * (it_state(it) == IAM_IT_ATTACHED &&
++ * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
++ */
++int iam_it_get(struct iam_iterator *it, struct iam_key *k)
++{
++ int result;
++ assert(it_state(it) == IAM_IT_DETACHED);
++
++ it->ii_path.ip_key_target = k;
++ iam_it_lock(it);
++ result = iam_path_lookup(&it->ii_path);
++ if (result == 0 || result == -ENOENT)
++ it->ii_state = IAM_IT_ATTACHED;
++ else
++ iam_it_unlock(it);
++ assert(ergo(result == 0,
++ it_keycmp(it,
++ iam_it_key_get(it, it_scratch_key(it, 0)),
++ k) <= 0));
++ return result;
++}
++
++/*
++ * Duplicates iterator.
++ *
++ * postcondition: it_state(dst) == it_state(src) &&
++ * iam_it_container(dst) == iam_it_container(src) &&
++ * dst->ii_flags = src->ii_flags &&
++ * ergo(it_state(src) == IAM_IT_ATTACHED,
++ * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++ * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
++ */
++void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src)
++{
++ dst->ii_flags = src->ii_flags;
++ dst->ii_state = src->ii_state;
++ /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */
++ /*
++ * XXX: duplicate lock.
++ */
++ assert(it_state(dst) == it_state(src));
++ assert(iam_it_container(dst) == iam_it_container(src));
++ assert(dst->ii_flags = src->ii_flags);
++ assert(ergo(it_state(src) == IAM_IT_ATTACHED,
++ iam_it_rec_get(dst) == iam_it_rec_get(src) &&
++ iam_it_key_get(dst, it_scratch_key(dst, 0)) ==
++ iam_it_key_get(src, it_scratch_key(src, 0))));
++
++}
++/*
++ * Detach iterator. Does nothing it detached state.
++ *
++ * postcondition: it_state(it) == IAM_IT_DETACHED
++ */
++void iam_it_put(struct iam_iterator *it)
++{
++ if (it->ii_state == IAM_IT_ATTACHED) {
++ it->ii_state = IAM_IT_DETACHED;
++ iam_leaf_fini(&it->ii_path);
++ iam_it_unlock(it);
++ }
++}
++
++/*
++ * Move iterator one record right.
++ *
++ * Return value: 0: success,
++ * +1: end of container reached
++ * -ve: error
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
++ */
++int iam_it_next(struct iam_iterator *it)
++{
++ int result;
++ struct iam_container *c;
++
++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE);
++
++ c = iam_it_container(it);
++ if (iam_leaf_at_end(c, &it->ii_path.ip_leaf)) {
++ /* advance index portion of the path */
++ result = 0; /* XXX not yet iam_index_next(&it->ii_path); */
++ if (result == 0) {
++ result = 0; /* XXX not yet iam_read_leaf(&it->ii_path); */
++ if (result == 0)
++ iam_leaf_start(c, &it->ii_path.ip_leaf);
++ } else if (result > 0)
++ /* end of container reached */
++ result = +1;
++ if (result < 0)
++ iam_it_put(it);
++ } else {
++ /* advance within leaf node */
++ iam_leaf_next(c, &it->ii_path.ip_leaf);
++ result = 0;
++ }
++ assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
++ return result;
++}
++
++/*
++ * Return pointer to the record under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++struct iam_rec *iam_it_rec_get(struct iam_iterator *it)
++{
++ assert(it_state(it) == IAM_IT_ATTACHED);
++ return iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf);
++}
++
++static void iam_it_reccpy(struct iam_iterator *it, struct iam_rec *r)
++{
++ memcpy(iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf), r,
++ iam_it_container(it)->ic_descr->id_rec_size);
++}
++
++/*
++ * Replace contents of record under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r)
++{
++ int result;
++
++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++
++ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
++ if (result == 0)
++ iam_it_reccpy(it, r);
++ return result;
++}
++
++/*
++ * Return pointer to the key under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++struct iam_key *iam_it_key_get(struct iam_iterator *it, struct iam_key *k)
++{
++ assert(it_state(it) == IAM_IT_ATTACHED);
++ return iam_leaf_key(iam_it_container(it), &it->ii_path.ip_leaf, k);
++}
++
++static int iam_leaf_rec_add(handle_t *handle, struct iam_path *path)
++{
++ int err;
++
++ iam_rec_add(path, NULL, NULL);
++ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
++ if (err)
++ ext3_std_error(iam_path_obj(path)->i_sb, err);
++ return err;
++}
++
++static int iam_leaf_can_add (struct iam_container *c, struct iam_leaf *l,
++ struct iam_key *k, struct iam_rec *r)
++{
++ return iam_container_descr(c)->id_leaf_ops->can_add(c, l, k, r);
++}
++
++static int split_leaf_node(handle_t *handle, struct iam_path *path)
++{
++#if 0
++ struct inode *dir = iam_path_obj(path);
++ unsigned continued = 0;
++ struct buffer_head *bh2;
++ u32 newblock, hash_split;
++ char *data2;
++ unsigned split;
++ int err;
++
++ bh2 = ext3_append (handle, dir, &newblock, &err);
++ if (!(bh2))
++ return -ENOSPC;
++
++ err = iam_leaf_load(path);
++ if (err)
++ goto errout;
++
++ BUFFER_TRACE(path->ip_leaf.il_bh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, path->ip_leaf.il_bh);
++ if (err) {
++ journal_error:
++ iam_leaf_fini(path);
++ brelse(bh2);
++ ext3_std_error(dir->i_sb, err);
++ err = -EIO;
++ goto errout;
++ }
++ data2 = bh2->b_data;
++ split = dx_get_count((struct iam_entry*)iam_leaf_entries(path))/2;
++ hash_split = *(__u32*)iam_leaf_key_at(path,
++ iam_lentry_shift(path, iam_leaf_entries(path),
++ split));
++ if (iam_keycmp(path->ip_container, iam_leaf_key_at(path,
++ iam_lentry_shift(path, iam_leaf_entries(path), split)),
++ iam_leaf_key_at(path,
++ iam_lentry_shift(path, iam_leaf_entries(path), split -1))) == 0)
++ continued = 1;
++
++ memcpy(iam_lentry_shift(path, (struct iam_lentry *)data2, 1),
++ iam_lentry_shift(path, iam_leaf_entries(path), split),
++ split * iam_lfix_entry_size(path));
++
++ /* Which block gets the new entry? */
++ dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
++ err = ext3_journal_dirty_metadata (handle, bh2);
++ if (err)
++ goto journal_error;
++ err = ext3_journal_dirty_metadata (handle, path->ip_leaf.il_bh);
++ if (err)
++ goto journal_error;
++errout:
++ brelse (bh2);
++ return err;
++#endif
++ return 0;
++}
++
++int iam_add_rec(handle_t *handle, struct iam_path *path,
++ struct iam_key *k, struct iam_rec *r)
++{
++ int err;
++
++ if (!iam_leaf_can_add(path->ip_container, &path->ip_leaf, k, r)) {
++ err = iam_leaf_rec_add(handle, path);
++ } else {
++ err = split_index_node(handle, path);
++ if (err == 0) {
++ err = split_leaf_node(handle, path);
++ if (err == 0)
++ err = iam_leaf_rec_add(handle, path);
++ }
++ }
++ return err;
++}
++
++/*
++ * Insert new record with key @k and contents from @r, shifting records to the
++ * right.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED &&
++ * it->ii_flags&IAM_IT_WRITE &&
++ * it_keycmp(it, iam_it_key_get(it, *), k) < 0
++ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
++ * ergo(result == 0,
++ * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
++ * !memcmp(iam_it_rec_get(it), r, ...))
++ */
++int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
++ struct iam_key *k, struct iam_rec *r)
++{
++ int result;
++
++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++ assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0);
++
++ result = iam_add_rec(h, &it->ii_path, k, r);
++ if (result == 0) {
++ /* place record and key info freed space. Leaf node is already
++ * in transaction. */
++ iam_it_reccpy(it, r);
++ /*
++ * XXX TBD.
++ */
++ }
++ assert(it_state(it) == IAM_IT_ATTACHED);
++ assert(ergo(result == 0,
++ it_keycmp(it,
++ iam_it_key_get(it,
++ it_scratch_key(it, 0)), k) == 0 &&
++ !memcmp(iam_it_rec_get(it), r,
++ iam_it_container(it)->ic_descr->id_rec_size)));
++ return result;
++}
++
++static int iam_leaf_rec_remove(handle_t *handle, struct iam_container *c,
++ struct iam_path *path)
++{
++ int err;
++
++ iam_rec_del(path);
++ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh);
++ if (err)
++ ext3_std_error(iam_path_obj(path)->i_sb, err);
++ return err;
++}
++
++/*
++ * Delete record under iterator.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
++{
++ int result;
++
++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
++
++ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh);
++ /*
++ * no compaction for now.
++ */
++ if (result == 0)
++ iam_leaf_rec_remove(h, iam_it_container(it), &it->ii_path);
++
++ return result;
++}
++
++/*
++ * Convert iterator to cookie.
++ *
++ * precondition: it_state(it) == IAM_IT_ATTACHED &&
++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: it_state(it) == IAM_IT_ATTACHED
++ */
++iam_pos_t iam_it_store(struct iam_iterator *it)
++{
++ iam_pos_t result;
++
++ assert(it_state(it) == IAM_IT_ATTACHED);
++ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result);
++
++ result = 0;
++ iam_it_key_get(it, (struct iam_key *)&result);
++ return result;
++}
++
++/*
++ * Restore iterator from cookie.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
++ * iam_it_store(it) == pos)
++ */
++int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
++{
++ assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE);
++ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos);
++ return iam_it_get(it, (struct iam_key *)&pos);
++}
++
++/***********************************************************************/
++/* invariants */
++/***********************************************************************/
++
++static inline int ptr_inside(void *base, size_t size, void *ptr)
++{
++ return (base <= ptr) && (ptr < base + size);
++}
++
++int iam_frame_invariant(struct iam_frame *f)
++{
++ return
++ (f->bh != NULL &&
++ f->bh->b_data != NULL &&
++ ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
++ ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
++ f->entries <= f->at);
++}
++int iam_leaf_invariant(struct iam_leaf *l)
++{
++ return
++ l->il_bh != NULL &&
++ l->il_bh->b_data != NULL &&
++ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) &&
++ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) &&
++ l->il_entries <= l->il_at;
++}
++
++int iam_path_invariant(struct iam_path *p)
++{
++ int i;
++
++ if (p->ip_container == NULL ||
++ p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
++ p->ip_frame != p->ip_frames + p->ip_indirect ||
++ !iam_leaf_invariant(&p->ip_leaf))
++ return 0;
++ for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
++ if (i <= p->ip_indirect) {
++ if (!iam_frame_invariant(&p->ip_frames[i]))
++ return 0;
++ }
++ }
++ return 1;
++}
++
++int iam_it_invariant(struct iam_iterator *it)
+{
++ return
++ (it->ii_state == IAM_IT_DETACHED ||
++ it->ii_state == IAM_IT_ATTACHED) &&
++ !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
++ ergo(it->ii_state == IAM_IT_ATTACHED,
++ iam_path_invariant(&it->ii_path));
++}
++
++/*
++ * Search container @c for record with key @k. If record is found, its data
++ * are moved into @r.
++ *
++ *
++ *
++ * Return values: +ve: found, 0: not-found, -ve: error
++ */
++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r,
++ struct iam_path_descr *pd)
++{
++ struct iam_iterator it;
++ int result;
++
++ iam_it_init(&it, c, 0, pd);
++
++ result = iam_it_get_exact(&it, k);
++ if (result == 0)
++ /*
++ * record with required key found, copy it into user buffer
++ */
++ iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it));
++ iam_it_put(&it);
++ iam_it_fini(&it);
++ return result;
++}
++EXPORT_SYMBOL(iam_lookup);
++
++/*
++ * Insert new record @r with key @k into container @c (within context of
++ * transaction @h.
++ *
++ * Return values: 0: success, -ve: error, including -EEXIST when record with
++ * given key is already present.
++ *
++ * postcondition: ergo(result == 0 || result == -EEXIST,
++ * iam_lookup(c, k, r2) > 0 &&
++ * !memcmp(r, r2, c->ic_descr->id_rec_size));
++ */
++int iam_insert(handle_t *h, struct iam_container *c,
++ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
++{
++ struct iam_iterator it;
++ int result;
++
++ iam_it_init(&it, c, IAM_IT_WRITE, pd);
++
++ result = iam_it_get_exact(&it, k);
++ if (result == -ENOENT)
++ result = iam_it_rec_insert(h, &it, k, r);
++ else if (result == 0)
++ result = -EEXIST;
++ iam_it_put(&it);
++ iam_it_fini(&it);
++ return result;
++}
++EXPORT_SYMBOL(iam_insert);
++
++int iam_update(handle_t *h, struct iam_container *c,
++ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd)
++{
++ struct iam_iterator it;
++ int result;
++
++ iam_it_init(&it, c, IAM_IT_WRITE, pd);
++
++ result = iam_it_get_exact(&it, k);
++ if (result == 0)
++ iam_it_rec_set(h, &it, r);
++ iam_it_put(&it);
++ iam_it_fini(&it);
++ return result;
++}
++EXPORT_SYMBOL(iam_update);
++
++/*
++ * Delete existing record with key @k.
++ *
++ * Return values: 0: success, -ENOENT: not-found, -ve: other error.
++ *
++ * postcondition: ergo(result == 0 || result == -ENOENT,
++ * !iam_lookup(c, k, *));
++ */
++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k,
++ struct iam_path_descr *pd)
++{
++ struct iam_iterator it;
++ int result;
++
++ iam_it_init(&it, c, IAM_IT_WRITE, pd);
++
++ result = iam_it_get_exact(&it, k);
++ if (result == 0)
++ iam_it_rec_delete(h, &it);
++ iam_it_put(&it);
++ iam_it_fini(&it);
++ return result;
++}
++EXPORT_SYMBOL(iam_delete);
++
+Index: linux-2.6.9/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.9.orig/fs/ext3/Makefile 2006-05-23 17:01:07.000000000 +0800
++++ linux-2.6.9/fs/ext3/Makefile 2006-05-23 17:01:09.000000000 +0800
+@@ -6,7 +6,7 @@
- struct iam_entry *entries; /* old block contents */
- struct iam_entry *entries2; /* new block contents */
- struct iam_frame *frame, *safe;
- struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
- u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
-- struct inode *dir = path_obj(path);
-+ struct inode *dir = iam_path_obj(path);
- int nr_splet;
- int i, err;
-
-@@ -2442,7 +1597,8 @@ static int split_index_node(handle_t *ha
- for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
- bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
- if (!bh_new[i] ||
-- path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
-+ iam_path_descr(path)->id_node_init(path->ip_container,
-+ bh_new[i], 0) != 0)
- goto cleanup;
- BUFFER_TRACE(frame->bh, "get_write_access");
- err = ext3_journal_get_write_access(handle, frame->bh);
-@@ -2516,9 +1672,9 @@ static int split_index_node(handle_t *ha
- unsigned count1 = count/2, count2 = count - count1;
- unsigned hash2;
-
-- dx_get_key(path,
-- iam_entry_shift(path, entries, count1),
-- (struct iam_key *)&hash2);
-+ iam_get_key(path,
-+ iam_entry_shift(path, entries, count1),
-+ (struct iam_key *)&hash2);
-
- dxtrace(printk("Split index %i/%i\n", count1, count2));
-
-@@ -2578,7 +1734,7 @@ static int ext3_dx_add_entry(handle_t *h
- size_t isize;
-
- iam_path_compat_init(&cpath, dir);
-- param = path_descr(path);
-+ param = iam_path_descr(path);
-
- err = dx_probe(dentry, NULL, &hinfo, path);
- if (err != 0)
-@@ -2588,7 +1744,7 @@ static int ext3_dx_add_entry(handle_t *h
- /* XXX nikita: global serialization! */
- isize = dir->i_size;
-
-- err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at),
-+ err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at),
- handle, &bh);
- if (err != 0)
- goto cleanup;
-@@ -2724,12 +1880,12 @@ static struct inode * ext3_new_inode_wan
- * is so far negative - it has no inode.
- *
- * If the create succeeds, we fill in the inode information
-- * with d_instantiate().
-+ * with d_instantiate().
- */
- static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
- struct nameidata *nd)
- {
-- handle_t *handle;
-+ handle_t *handle;
- struct inode * inode;
- int err, retries = 0;
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+ ioctl.o namei.o super.o symlink.o hash.o resize.o \
+- extents.o mballoc.o
++ extents.o mballoc.o iam.o
-Index: iam/include/linux/lustre_iam.h
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.9/include/linux/lustre_iam.h
===================================================================
---- iam.orig/include/linux/lustre_iam.h 2006-05-10 18:21:01.000000000 +0400
-+++ iam/include/linux/lustre_iam.h 2006-05-10 21:22:41.000000000 +0400
+--- linux-2.6.9.orig/include/linux/lustre_iam.h 2006-05-23 17:01:09.000000000 +0800
++++ linux-2.6.9/include/linux/lustre_iam.h 2006-05-24 17:41:04.000000000 +0800
@@ -1,3 +1,39 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
/*
* Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
*/
-@@ -30,6 +66,11 @@ struct iam_key;
+@@ -30,6 +66,11 @@
/* Incomplete type use to refer to the records stored in iam containers. */
struct iam_rec;
typedef __u64 iam_ptr_t;
/*
-@@ -41,12 +82,17 @@ struct iam_frame {
+@@ -41,45 +82,23 @@
struct iam_entry *at; /* target entry, found by binary search */
};
};
struct iam_path;
-@@ -115,6 +161,15 @@ struct iam_descr {
- /*
- * leaf operations.
- */
-+
-+ /*
-+ * initialize just loaded leaf node.
-+ */
-+ int (*init)(struct iam_container *c, struct iam_leaf *l);
-+ /*
-+ * Release resources.
-+ */
-+ void (*fini)(struct iam_container *c, struct iam_leaf *l);
- /*
- * returns true iff leaf is positioned at the last entry.
- */
-@@ -123,12 +178,30 @@ struct iam_descr {
- void (*start)(struct iam_container *c, struct iam_leaf *l);
- /* more leaf to the next entry. */
- void (*next)(struct iam_container *c, struct iam_leaf *l);
+ struct iam_container;
+
+-/*
+- * Parameters, describing a flavor of iam container.
+- */
+-struct iam_descr {
+- /*
+- * Size of a key in this container, in bytes.
+- */
+- size_t id_key_size;
+- /*
+- * Size of a pointer to the next level (stored in index nodes), in
+- * bytes.
+- */
+- size_t id_ptr_size;
+- /*
+- * Size of a record (stored in leaf nodes), in bytes.
+- */
+- size_t id_rec_size;
+- /*
+- * Size of unused (by iam) space at the beginning of every non-root
+- * node, in bytes. Used for compatibility with ext3.
+- */
+- size_t id_node_gap;
+- /*
+- * Size of unused (by iam) space at the beginning of root node, in
+- * bytes. Used for compatibility with ext3.
+- */
+- size_t id_root_gap;
+-
++struct iam_operations {
+ /*
+ * Returns pointer (in the same sense as pointer in index entry) to
+ * the root node.
+@@ -111,25 +130,107 @@
+ * contains single record with the smallest possible key.
+ */
+ int (*id_create)(struct iam_container *c);
+- struct {
+- /*
+- * leaf operations.
+- */
+- /*
+- * returns true iff leaf is positioned at the last entry.
+- */
+- int (*at_end)(struct iam_container *c, struct iam_leaf *l);
+- /* position leaf at the first entry */
+- void (*start)(struct iam_container *c, struct iam_leaf *l);
+- /* more leaf to the next entry. */
+- void (*next)(struct iam_container *c, struct iam_leaf *l);
- /* return key of current leaf record in @k */
- void (*key)(struct iam_container *c, struct iam_leaf *l,
- struct iam_key *k);
- /* return pointer to entry body */
-+ /* return key of current leaf record. This method may return
-+ * either pointer to the key stored in node, or copy key into
-+ * @k buffer supplied by caller and return pointer to this
-+ * buffer. The latter approach is used when keys in nodes are
-+ * not stored in plain form (e.g., htree doesn't store keys at
-+ * all).
-+ *
-+ * Caller should assume that returned pointer is only valid
-+ * while leaf node is pinned and locked.*/
-+ struct iam_key *(*key)(struct iam_container *c,
-+ struct iam_leaf *l, struct iam_key *k);
-+ /* return pointer to entry body. Pointer is valid while
-+ corresponding leaf node is locked and pinned. */
- struct iam_rec *(*rec)(struct iam_container *c,
- struct iam_leaf *l);
+- struct iam_rec *(*rec)(struct iam_container *c,
+- struct iam_leaf *l);
+- } id_leaf;
++};
+
-+ /*
-+ * Search leaf @l for a record with key @k or for a place
-+ * where such record is to be inserted.
-+ *
-+ * Scratch keys from @path can be used.
-+ */
-+ int (*lookup)(struct iam_container *c, struct iam_path *path,
-+ struct iam_leaf *l, struct iam_key *k);
- } id_leaf;
++struct iam_leaf_operations {
++ /*
++ * leaf operations.
++ */
++
++ /*
++ * initialize just loaded leaf node.
++ */
++ int (*init)(struct iam_container *c, struct iam_leaf *p);
++ /*
++ * Release resources.
++ */
++ void (*fini)(struct iam_container *c, struct iam_leaf *l);
++ /*
++ * returns true iff leaf is positioned at the last entry.
++ */
++ int (*at_end)(struct iam_container *c, struct iam_leaf *l);
++ /* position leaf at the first entry */
++ void (*start)(struct iam_container *c, struct iam_leaf *l);
++ /* more leaf to the next entry. */
++ void (*next)(struct iam_container *c, struct iam_leaf *l);
++ /* return key of current leaf record. This method may return
++ * either pointer to the key stored in node, or copy key into
++ * @k buffer supplied by caller and return pointer to this
++ * buffer. The latter approach is used when keys in nodes are
++ * not stored in plain form (e.g., htree doesn't store keys at
++ * all).
++ *
++ * Caller should assume that returned pointer is only valid
++ * while leaf node is pinned and locked.*/
++ struct iam_key *(*key)(struct iam_container *c,
++ struct iam_leaf *l, struct iam_key *k);
++ /* return pointer to entry body. Pointer is valid while
++ corresponding leaf node is locked and pinned. */
++ struct iam_rec *(*rec)(struct iam_container *c,
++ struct iam_leaf *l);
++
++ /*
++ * Search leaf @l for a record with key @k or for a place
++ * where such record is to be inserted.
++ *
++ * Scratch keys from @path can be used.
++ */
++ int (*lookup)(struct iam_container *c, struct iam_path *path,
++ struct iam_leaf *l, struct iam_key *k);
++
++ int (*can_add) (struct iam_container *c, struct iam_leaf *l,
++ struct iam_key *k, struct iam_rec *r);
++ /*add rec for a leaf*/
++ void (*rec_add)(struct iam_path *path, struct iam_key *k,
++ struct iam_rec *r);
++ /*remove rec for a leaf*/
++ void (*rec_del)(struct iam_path *path);
++};
++
++struct iam_root {
++ struct iam_root_info {
++ u8 indirect_levels;
++ u8 pad[3];
++ } info;
++ struct {} entries[0];
++};
++
++#define IAM_LEAF_HEADER_MAGIC 0x1976
++struct iam_leaf_head {
++ __le16 ill_magic;
++ __le16 ill_count;
++};
++
++/*
++ * Parameters, describing a flavor of iam container.
++ */
++struct iam_descr {
++ /*
++ * Size of a key in this container, in bytes.
++ */
++ size_t id_key_size;
++ /*
++ * Size of a pointer to the next level (stored in index nodes), in
++ * bytes.
++ */
++ size_t id_ptr_size;
++ /*
++ * Size of a record (stored in leaf nodes), in bytes.
++ */
++ size_t id_rec_size;
++ /*
++ * Size of unused (by iam) space at the beginning of every non-root
++ * node, in bytes. Used for compatibility with ext3.
++ */
++ size_t id_node_gap;
++ /*
++ * Size of unused (by iam) space at the beginning of root node, in
++ * bytes. Used for compatibility with ext3.
++ */
++ size_t id_root_gap;
++
++ struct iam_operations *id_ops;
++ struct iam_leaf_operations *id_leaf_ops;
};
-@@ -149,6 +222,17 @@ struct iam_container {
+ struct iam_container {
+@@ -149,6 +250,17 @@
};
/*
* Structure to keep track of a path drilled through htree.
*/
struct iam_path {
-@@ -172,34 +256,232 @@ struct iam_path {
+@@ -172,34 +284,232 @@
/*
* Leaf node: a child of ->ip_frame.
*/
struct iam_key *ip_key_target;
/*
- * Scratch-pad area for temporary keys.
-- */
++ * Description-specific data.
+ */
- struct iam_key *ip_key_scratch[DX_SCRATCH_KEYS];
- /*
- * pointer to flavor-specific per-container data.
-+ * Description-specific data.
- */
+- */
- void *ip_descr_data;
+ struct iam_path_descr *ip_data;
};
/*
* Initialize container @c, acquires additional reference on @inode.
*/
-@@ -210,3 +492,149 @@ int iam_container_init(struct iam_contai
+@@ -210,3 +520,143 @@
*/
void iam_container_fini(struct iam_container *c);
+#define assert(test) J_ASSERT(test)
+#endif
+
++static inline struct iam_descr *iam_container_descr(struct iam_container *c)
++{
++ return c->ic_descr;
++}
++
+static inline struct iam_descr *iam_path_descr(struct iam_path *p)
+{
+ return p->ip_container->ic_descr;
+static inline int iam_keycmp(struct iam_container *c,
+ struct iam_key *k1, struct iam_key *k2)
+{
-+ return c->ic_descr->id_keycmp(c, k1, k2);
++ return c->ic_descr->id_ops->id_keycmp(c, k1, k2);
+}
+
+static inline void *iam_entry_off(struct iam_entry *entry, size_t off)
+struct buffer_head *ext3_append(handle_t *handle, struct inode *inode,
+ u32 *block, int *err);
+int split_index_node(handle_t *handle, struct iam_path *path);
++
++extern struct iam_leaf_operations generic_leaf_ops;
++extern struct iam_operations generic_iam_ops;
++
++
+/*
+ * external
+ */
+void iam_container_read_lock(struct iam_container *c);
+void iam_container_read_unlock(struct iam_container *c);
+
-+int iam_path_lookup(struct iam_path *p);
-+
-+void iam_path_dup(struct iam_path *dst, struct iam_path *src);
-+
-+int iam_leaf_at_end(struct iam_container *c, struct iam_leaf *leaf);
-+void iam_leaf_start(struct iam_container *c, struct iam_leaf *leaf);
-+struct iam_rec *iam_leaf_rec(struct iam_container *c, struct iam_leaf *leaf);
-+struct iam_key *iam_leaf_key(struct iam_container *c, struct iam_leaf *leaf,
-+ struct iam_key *key);
-+
+int iam_index_next(struct iam_path *p);
+int iam_read_leaf(struct iam_path *p);
+
-+int iam_add_rec(handle_t *h, struct iam_path *p);
-+
-+__u32 iam_root_ptr(struct iam_container *c);
-+int iam_node_check(struct iam_path *path, struct iam_frame *frame);
-+int iam_node_init(struct iam_container *c, struct buffer_head *bh, int root);
+int iam_keycmp(struct iam_container *c, struct iam_key *k1, struct iam_key *k2);
+int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
-+ handle_t *h, struct buffer_head **bh);
-+
++ handle_t *handle, struct buffer_head **bh);
+
+/* __LINUX_LUSTRE_IAM_H__ */
+#endif