-Index: iam/include/linux/lustre_iam.h
+Index: iam/fs/ext3/Makefile
===================================================================
---- iam.orig/include/linux/lustre_iam.h 2006-09-22 17:18:09.000000000 +0800
-+++ iam/include/linux/lustre_iam.h 2006-09-22 17:18:09.000000000 +0800
-@@ -30,9 +30,6 @@
- #ifndef __LINUX_LUSTRE_IAM_H__
- #define __LINUX_LUSTRE_IAM_H__
+--- iam.orig/fs/ext3/Makefile 2006-09-28 22:11:15.000000000 +0400
++++ iam/fs/ext3/Makefile 2006-09-28 22:11:15.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
--/* handle_t, journal_start(), journal_stop() */
--#include <linux/jbd.h>
--
- /*
- * linux/include/linux/lustre_iam.h
- */
-@@ -57,14 +54,21 @@
- * [2] reserved for leaf node operations.
- *
- * [3] reserved for index operations.
-+ *
-+ * [4] reserved for path->ip_ikey_target
-+ *
- */
-- DX_SCRATCH_KEYS = 4,
-+ DX_SCRATCH_KEYS = 5,
- /*
- * Maximal format name length.
- */
- DX_FMT_NAME_LEN = 16
- };
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+ ioctl.o namei.o super.o symlink.o hash.o resize.o \
+- extents.o mballoc.o iam.o iam_lfix.o
++ extents.o mballoc.o iam.o iam_lfix.o iam_lvar.o iam_htree.o iam_uapi.o
-+#ifdef __KERNEL__
-+/* handle_t, journal_start(), journal_stop() */
-+#include <linux/jbd.h>
-+
- /*
- * Entry within index tree node. Consists of a key immediately followed
- * (without padding) by a pointer to the child node.
-@@ -86,14 +90,21 @@
- */
- struct iam_key;
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: iam/fs/ext3/dir.c
+===================================================================
+--- iam.orig/fs/ext3/dir.c 2006-09-28 22:10:32.000000000 +0400
++++ iam/fs/ext3/dir.c 2006-09-28 22:11:15.000000000 +0400
+@@ -28,6 +28,7 @@
+ #include <linux/smp_lock.h>
+ #include <linux/slab.h>
+ #include <linux/rbtree.h>
++#include <linux/lustre_iam.h>
--/* Incomplete type use to refer to the records stored in iam containers. */
-+/*
-+ * Incomplete type use to refer to the records stored in iam containers.
-+ */
- struct iam_rec;
+ static unsigned char ext3_filetype_table[] = {
+ DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
+@@ -59,7 +60,7 @@ static unsigned char get_dtype(struct su
--struct iam_cookie {
-- struct iam_key *ic_key;
-- struct iam_rec *ic_rec;
--};
-+/*
-+ * Key in index node. Possibly compressed. Fixed size.
-+ */
-+struct iam_ikey;
+ return (ext3_filetype_table[filetype]);
+ }
+-
++
-+/*
-+ * Scalar type into which certain iam_key's can be uniquely mapped. Used to
-+ * support interfaces like readdir(), where iteration over index has to be
-+ * re-startable.
-+ */
- typedef __u64 iam_ptr_t;
+ int ext3_check_dir_entry (const char * function, struct inode * dir,
+ struct ext3_dir_entry_2 * de,
+@@ -165,7 +166,7 @@ revalidate:
+ * to make sure. */
+ if (filp->f_version != inode->i_version) {
+ for (i = 0; i < sb->s_blocksize && i < offset; ) {
+- de = (struct ext3_dir_entry_2 *)
++ de = (struct ext3_dir_entry_2 *)
+ (bh->b_data + i);
+ /* It's too expensive to do a full
+ * dirent test each time round this
+@@ -184,7 +185,7 @@ revalidate:
+ filp->f_version = inode->i_version;
+ }
+- while (!error && filp->f_pos < inode->i_size
++ while (!error && filp->f_pos < inode->i_size
+ && offset < sb->s_blocksize) {
+ de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
+ if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
+@@ -232,7 +233,7 @@ out:
/*
-@@ -123,6 +134,31 @@
- void *il_descr_data;
- };
-
-+/*
-+ * Return values of ->lookup() operation from struct iam_leaf_operations.
-+ */
-+enum iam_lookup_t {
-+ /*
-+ * lookup found a record with the key requested
-+ */
-+ IAM_LOOKUP_EXACT,
-+ /*
-+ * lookup positioned leaf on some record
-+ */
-+ IAM_LOOKUP_OK,
-+ /*
-+ * leaf was empty
-+ */
-+ IAM_LOOKUP_EMPTY,
-+ /*
-+ * lookup positioned leaf before first record
-+ */
-+ IAM_LOOKUP_BEFORE
-+};
-+
-+/*
-+ * Format-specific container operations. These are called by generic iam code.
-+ */
- struct iam_operations {
- /*
- * Returns pointer (in the same sense as pointer in index entry) to
-@@ -131,11 +167,15 @@
- __u32 (*id_root_ptr)(struct iam_container *c);
+ * These functions convert from the major/minor hash to an f_pos
+ * value.
+- *
++ *
+ * Currently we only use major hash numer. This is unfortunate, but
+ * on 32-bit machines, the same VFS interface is used for lseek and
+ * llseek, so if we use the 64 bit offset, then the 32-bit versions of
+@@ -253,7 +254,7 @@ out:
+ struct fname {
+ __u32 hash;
+ __u32 minor_hash;
+- struct rb_node rb_hash;
++ struct rb_node rb_hash;
+ struct fname *next;
+ __u32 inode;
+ __u8 name_len;
+@@ -305,12 +306,14 @@ static void free_rb_tree_fname(struct rb
+ root->rb_node = NULL;
+ }
- /*
-- * Check validity and consistency of index node. This is called when
-- * iam just loaded new node into frame.
-+ * Check validity and consistency of index node.
- */
- int (*id_node_check)(struct iam_path *path, struct iam_frame *frame);
- /*
-+ * Copy some data from node header into frame. This is called when
-+ * new node is loaded into frame.
-+ */
-+ int (*id_node_load)(struct iam_path *path, struct iam_frame *frame);
-+ /*
- * Initialize new node (stored in @bh) that is going to be added into
- * tree.
- */
-@@ -144,23 +184,33 @@
- int (*id_node_read)(struct iam_container *c, iam_ptr_t ptr,
- handle_t *h, struct buffer_head **bh);
- /*
-- * Key comparison function. Returns -1, 0, +1.
-+ * Key comparison functions. Returns -1, 0, +1.
- */
-- int (*id_keycmp)(const struct iam_container *c,
-- const struct iam_key *k1, const struct iam_key *k2);
-+ int (*id_ikeycmp)(const struct iam_container *c,
-+ const struct iam_ikey *k1,
-+ const struct iam_ikey *k2);
- /*
-- * Create new container.
-- *
-- * Newly created container has a root node and a single leaf. Leaf
-- * contains single record with the smallest possible key.
-+ * Modify root node when tree height increases.
- */
-- int (*id_create)(struct iam_container *c);
-+ struct iam_entry *(*id_root_inc)(struct iam_container *c,
-+ struct iam_path *path,
-+ struct iam_frame *frame);
-+
-+ struct iam_path_descr *(*id_ipd_alloc)(const struct iam_container *c);
-+ void (*id_ipd_free)(const struct iam_container *c,
-+ struct iam_path_descr *ipd);
- /*
- * Format name.
- */
- char id_name[DX_FMT_NAME_LEN];
- };
++extern struct iam_private_info *ext3_iam_alloc_info(int flags);
++extern void ext3_iam_release_info(struct iam_private_info *info);
-+/*
-+ * Another format-specific operation vector, consisting of methods to access
-+ * leaf nodes. This is separated from struct iam_operations, because it is
-+ * assumed that there will be many formats with different format of leaf
-+ * nodes, yes the same struct iam_operations.
-+ */
- struct iam_leaf_operations {
- /*
- * leaf operations.
-@@ -186,7 +236,8 @@
- void (*start)(struct iam_leaf *l);
- /* more leaf to the next entry. */
- void (*next)(struct iam_leaf *l);
-- /* return key of current leaf record. This method may return
-+ /*
-+ * return key of current leaf record. This method may return
- * either pointer to the key stored in node, or copy key into
- * @k buffer supplied by caller and return pointer to this
- * buffer. The latter approach is used when keys in nodes are
-@@ -194,8 +245,10 @@
- * all).
- *
- * Caller should assume that returned pointer is only valid
-- * while leaf node is pinned and locked.*/
-- struct iam_key *(*key)(const struct iam_leaf *l, struct iam_key *k);
-+ * while leaf node is pinned and locked.
-+ */
-+ struct iam_ikey *(*ikey)(const struct iam_leaf *l, struct iam_ikey *k);
-+ struct iam_key *(*key)(const struct iam_leaf *l);
- /* return pointer to entry body. Pointer is valid while
- corresponding leaf node is locked and pinned. */
- struct iam_rec *(*rec)(const struct iam_leaf *l);
-@@ -203,6 +256,9 @@
- void (*key_set)(struct iam_leaf *l, const struct iam_key *k);
- void (*rec_set)(struct iam_leaf *l, const struct iam_rec *r);
+ struct dir_private_info *create_dir_info(loff_t pos)
+ {
+ struct dir_private_info *p;
-+ int (*key_cmp)(const struct iam_leaf *l, const struct iam_key *k);
-+
-+ int (*key_size)(const struct iam_leaf *l);
- /*
- * Search leaf @l for a record with key @k or for a place
- * where such record is to be inserted.
-@@ -210,6 +266,7 @@
- * Scratch keys from @path can be used.
- */
- int (*lookup)(struct iam_leaf *l, const struct iam_key *k);
-+ int (*ilookup)(struct iam_leaf *l, const struct iam_ikey *ik);
+- p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
++ p = (void *)ext3_iam_alloc_info(GFP_KERNEL);
+ if (!p)
+ return NULL;
+ p->root.rb_node = NULL;
+@@ -326,6 +329,7 @@ struct dir_private_info *create_dir_info
+ void ext3_htree_free_dir_info(struct dir_private_info *p)
+ {
+ free_rb_tree_fname(&p->root);
++ ext3_iam_release_info((void *)p);
+ kfree(p);
+ }
- int (*can_add)(const struct iam_leaf *l,
- const struct iam_key *k, const struct iam_rec *r);
-@@ -221,12 +278,13 @@
- /*
- * remove rec for a leaf
- */
-- void (*rec_del)(struct iam_leaf *l);
-+ void (*rec_del)(struct iam_leaf *l, int shift);
- /*
- * split leaf node, moving some entries into @bh (the latter currently
- * is assumed to be empty).
- */
-- void (*split)(struct iam_leaf *l, struct buffer_head *bh);
-+ void (*split)(struct iam_leaf *l, struct buffer_head **bh,
-+ iam_ptr_t newblknr);
- };
-
- struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
-@@ -241,6 +299,10 @@
- */
- size_t id_key_size;
- /*
-+ * Size of a key in index nodes, in bytes.
-+ */
-+ size_t id_ikey_size;
-+ /*
- * Size of a pointer to the next level (stored in index nodes), in
- * bytes.
- */
-@@ -264,6 +326,9 @@
- struct iam_leaf_operations *id_leaf_ops;
- };
-
-+/*
-+ * An instance of iam container.
-+ */
- struct iam_container {
- /*
- * Underlying flat file. IO against this object is issued to
-@@ -284,7 +349,7 @@
- /*
- * Scratch-pad area for temporary keys.
- */
-- struct iam_key *ipd_key_scratch[DX_SCRATCH_KEYS];
-+ struct iam_ikey *ipd_key_scratch[DX_SCRATCH_KEYS];
- };
+@@ -413,7 +417,7 @@ static int call_filldir(struct file * fi
+ curr_pos = hash2pos(fname->hash, fname->minor_hash);
+ while (fname) {
+ error = filldir(dirent, fname->name,
+- fname->name_len, curr_pos,
++ fname->name_len, curr_pos,
+ fname->inode,
+ get_dtype(sb, fname->file_type));
+ if (error) {
+@@ -468,7 +472,7 @@ static int ext3_dx_readdir(struct file *
+ /*
+ * Fill the rbtree if we have no more entries,
+ * or the inode has changed since we last read in the
+- * cached entries.
++ * cached entries.
+ */
+ if ((!info->curr_node) ||
+ (filp->f_version != inode->i_version)) {
+Index: iam/fs/ext3/file.c
+===================================================================
+--- iam.orig/fs/ext3/file.c 2006-09-28 22:10:32.000000000 +0400
++++ iam/fs/ext3/file.c 2006-09-28 22:11:15.000000000 +0400
+@@ -23,6 +23,7 @@
+ #include <linux/jbd.h>
+ #include <linux/ext3_fs.h>
+ #include <linux/ext3_jbd.h>
++#include <linux/lustre_iam.h>
+ #include "xattr.h"
+ #include "acl.h"
- /*
-@@ -316,6 +381,7 @@
- * Key searched for.
- */
- const struct iam_key *ip_key_target;
-+ const struct iam_ikey *ip_ikey_target;
- /*
- * Description-specific data.
- */
-@@ -334,6 +400,7 @@
- struct dx_hash_info *ipc_hinfo;
- struct dentry *ipc_dentry;
- struct iam_path_descr ipc_descr;
-+ struct dx_hash_info ipc_hinfo_area;
- };
+@@ -31,14 +32,18 @@
+ * from ext3_file_open: open gets called at every open, but release
+ * gets called only when /all/ the files are closed.
+ */
+-static int ext3_release_file (struct inode * inode, struct file * filp)
++static int ext3_release_file(struct inode * inode, struct file * filp)
+ {
+ /* if we are the last writer on the inode, drop the block reservation */
+ if ((filp->f_mode & FMODE_WRITE) &&
+ (atomic_read(&inode->i_writecount) == 1))
+ ext3_discard_reservation(inode);
+- if (is_dx(inode) && filp->private_data)
+- ext3_htree_free_dir_info(filp->private_data);
++ if (is_dx(inode) && filp->private_data) {
++ if (S_ISDIR(inode->i_mode))
++ ext3_htree_free_dir_info(filp->private_data);
++ else
++ ext3_iam_release(filp, inode);
++ }
- /*
-@@ -347,7 +414,9 @@
- /* initial state */
- IAM_IT_DETACHED,
- /* iterator is above particular record in the container */
-- IAM_IT_ATTACHED
-+ IAM_IT_ATTACHED,
-+ /* iterator is positioned before record */
-+ IAM_IT_SKEWED
- };
+ return 0;
+ }
+@@ -110,7 +115,7 @@ ext3_file_write(struct kiocb *iocb, cons
- /*
-@@ -355,7 +424,7 @@
- */
- enum iam_it_flags {
- /*
-- * this iterator will move (iam_it_{prev,next}() will be called on it)
-+ * this iterator will move (iam_it_next() will be called on it)
- */
- IAM_IT_MOVE = (1 << 0),
- /*
-@@ -372,15 +441,26 @@
- * doesn't point to any particular record in this container.
- *
- * After successful call to iam_it_get() and until corresponding call to
-- * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
-+ * iam_it_put() iterator is in one of "active" states: IAM_IT_ATTACHED or
-+ * IAM_IT_SKEWED.
- *
-- * Attached iterator can move through records in a container (provided
-+ * Active iterator can move through records in a container (provided
- * IAM_IT_MOVE permission) in a key order, can get record and key values as it
- * passes over them, and can modify container (provided IAM_IT_WRITE
- * permission).
- *
-+ * Iteration may reach the end of container, at which point iterator switches
-+ * into IAM_IT_DETACHED state.
+ force_commit:
+ err = ext3_force_commit(inode->i_sb);
+- if (err)
++ if (err)
+ return err;
+ return ret;
+ }
+Index: iam/fs/ext3/iam-uapi.c
+===================================================================
+--- iam.orig/fs/ext3/iam-uapi.c 2004-04-06 17:27:52.000000000 +0400
++++ iam/fs/ext3/iam-uapi.c 2006-09-28 22:11:15.000000000 +0400
+@@ -0,0 +1,368 @@
++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
++ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
- * Concurrency: iterators are supposed to be local to thread. Interfaces below
-- * do no internal serialization.
-+ * do no internal serialization of access to the iterator fields.
++ * iam_uapi.c
++ * User-level interface to iam (ioctl based)
+ *
-+ * When in non-detached state, iterator keeps some container nodes pinned in
-+ * memory and locked (that locking may be implemented at the container
-+ * granularity though). In particular, clients may assume that pointers to
-+ * records and keys obtained through iterator interface as valid until
-+ * iterator is detached (except that they may be invalidated by sub-sequent
-+ * operations done through the same iterator).
- *
- */
- struct iam_iterator {
-@@ -390,7 +470,8 @@
- __u32 ii_flags;
- enum iam_it_state ii_state;
- /*
-- * path to the record. Valid in IAM_IT_ATTACHED state.
-+ * path to the record. Valid in IAM_IT_ATTACHED, and IAM_IT_SKEWED
-+ * states.
- */
- struct iam_path ii_path;
- };
-@@ -405,133 +486,26 @@
- struct iam_path_descr *iam_ipd_alloc(int keysize);
- void iam_ipd_free(struct iam_path_descr *ipd);
-
--/*
-- * Initialize iterator to IAM_IT_DETACHED state.
-- *
-- * postcondition: it_state(it) == IAM_IT_DETACHED
-- */
- int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
- struct iam_path_descr *pd);
--/*
-- * Finalize iterator and release all resources.
-- *
-- * precondition: it_state(it) == IAM_IT_DETACHED
-- */
- void iam_it_fini(struct iam_iterator *it);
--
--/*
-- * Attach iterator. After successful completion, @it points to record with the
-- * largest key not larger than @k. Semantics of ->id_create() method guarantee
-- * that such record will always be found.
-- *
-- * Return value: 0: positioned on existing record,
-- * -ve: error.
-- *
-- * precondition: it_state(it) == IAM_IT_DETACHED
-- * postcondition: ergo(result == 0,
-- * (it_state(it) == IAM_IT_ATTACHED &&
-- * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
-- */
- int iam_it_get(struct iam_iterator *it, const struct iam_key *k);
--
--/*
-- * Duplicates iterator.
-- *
-- * postcondition: it_state(dst) == it_state(src) &&
-- * iam_it_container(dst) == iam_it_container(src) &&
-- * dst->ii_flags = src->ii_flags &&
-- * ergo(it_state(it) == IAM_IT_ATTACHED,
-- * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
-- * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
-- */
-+int iam_it_get_at(struct iam_iterator *it, const struct iam_key *k);
- void iam_it_dup(struct iam_iterator *dst, const struct iam_iterator *src);
--
--/*
-- * Detach iterator. Does nothing it detached state.
-- *
-- * postcondition: it_state(it) == IAM_IT_DETACHED
-- */
- void iam_it_put(struct iam_iterator *it);
--
--/*
-- * Move iterator one record right.
-- *
-- * Return value: 0: success,
-- * +1: end of container reached
-- * -ve: error
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
-- * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
-- */
- int iam_it_next(struct iam_iterator *it);
--
--/*
-- * Return pointer to the record under iterator.
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
- struct iam_rec *iam_it_rec_get(const struct iam_iterator *it);
--
--/*
-- * Replace contents of record under iterator.
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-- * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
-- */
--int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
--
--/*
-- * Place key under iterator in @k, return @k
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
--struct iam_key *iam_it_key_get(const struct iam_iterator *it,
-- struct iam_key *k);
--
--/*
-- * Insert new record with key @k and contents from @r, shifting records to the
-- * right.
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED &&
-- * it->ii_flags&IAM_IT_WRITE &&
-- * it_keycmp(it, iam_it_key_get(it, *), k) < 0
-- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
-- * ergo(result == 0,
-- * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
-- * !memcmp(iam_it_rec_get(it), r, ...))
-- */
-+int iam_it_rec_set(handle_t *h,
-+ struct iam_iterator *it, const struct iam_rec *r);
-+struct iam_key *iam_it_key_get(const struct iam_iterator *it);
-+int iam_it_key_size(const struct iam_iterator *it);
- int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
- const struct iam_key *k, const struct iam_rec *r);
--/*
-- * Delete record under iterator.
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
- int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
-
- typedef __u64 iam_pos_t;
-
--/*
-- * Convert iterator to cookie.
-- *
-- * precondition: it_state(it) == IAM_IT_ATTACHED &&
-- * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-- * postcondition: it_state(it) == IAM_IT_ATTACHED
-- */
- iam_pos_t iam_it_store(const struct iam_iterator *it);
--
--/*
-- * Restore iterator from cookie.
-- *
-- * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
-- * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-- * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
-- * iam_it_store(it) == pos)
-- */
- int iam_it_load(struct iam_iterator *it, iam_pos_t pos);
-
- int iam_lookup(struct iam_container *c, const struct iam_key *k,
-@@ -539,10 +513,10 @@
- int iam_delete(handle_t *h, struct iam_container *c, const struct iam_key *k,
- struct iam_path_descr *pd);
- int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
-- struct iam_rec *r, struct iam_path_descr *pd);
-+ const struct iam_rec *r, struct iam_path_descr *pd);
- int iam_insert(handle_t *handle, struct iam_container *c,
- const struct iam_key *k,
-- struct iam_rec *r, struct iam_path_descr *pd);
-+ const struct iam_rec *r, struct iam_path_descr *pd);
- /*
- * Initialize container @c.
- */
-@@ -577,16 +551,65 @@
- return p->ip_container->ic_object;
- }
-
--static inline void iam_keycpy(const struct iam_container *c,
-- struct iam_key *k1, const struct iam_key *k2)
-+static inline void iam_ikeycpy(const struct iam_container *c,
-+ struct iam_ikey *k1, const struct iam_ikey *k2)
++ * Copyright (c) 2006 Cluster File Systems, Inc.
++ * Author: Nikita Danilov <nikita@clusterfs.com>
++ *
++ * This file is part of the Lustre file system, http://www.lustre.org
++ * Lustre is a trademark of Cluster File Systems, Inc.
++ *
++ * You may have signed or agreed to another license before downloading
++ * this software. If so, you are bound by the terms and conditions
++ * of that agreement, and the following does not apply to you. See the
++ * LICENSE file included with this distribution for more information.
++ *
++ * If you did not agree to a different license, then this copy of Lustre
++ * is open source software; you can redistribute it and/or modify it
++ * under the terms of version 2 of the GNU General Public License as
++ * published by the Free Software Foundation.
++ *
++ * In either case, Lustre is distributed in the hope that it will be
++ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
++ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ * license text for more details.
++ */
++
++#include <linux/types.h>
++#include <linux/jbd.h>
++/* ext3_error() */
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++
++#include <linux/lustre_iam.h>
++
++#include <libcfs/libcfs.h>
++#include <libcfs/kp30.h>
++
++struct iam_private_info {
++ struct dir_private_info ipi_dir; /* has to be first */
++ struct iam_container ipi_bag;
++ struct iam_descr ipi_descr;
++ struct iam_iterator ipi_it;
++ struct iam_path_descr *ipi_ipd;
++};
++
++enum {
++ IAM_INSERT_CREDITS = 20
++};
++
++static struct iam_private_info *get_ipi(struct file *filp)
+{
-+ memcpy(k1, k2, c->ic_descr->id_ikey_size);
++ return filp->private_data;
+}
+
-+static inline size_t iam_entry_size(struct iam_path *p)
++static int iam_uapi_it(int cmd, struct inode *inode,
++ struct file *filp, struct iam_uapi_it *itop)
+{
-+ return iam_path_descr(p)->id_ikey_size + iam_path_descr(p)->id_ptr_size;
-+}
++ struct iam_private_info *ipi;
++ struct iam_iterator *it;
++ enum iam_it_state st;
++ int result = 0;
+
-+static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
-+ struct iam_entry *entry,
-+ int shift)
- {
-- memcpy(k1, k2, c->ic_descr->id_key_size);
-+ void *e = entry;
-+ return e + shift * iam_entry_size(p);
- }
-
--static inline int iam_keycmp(const struct iam_container *c,
-- const struct iam_key *k1, const struct iam_key *k2)
-+static inline struct iam_ikey *iam_get_ikey(struct iam_path *p,
-+ struct iam_entry *entry,
-+ struct iam_ikey *key)
- {
-- return c->ic_descr->id_ops->id_keycmp(c, k1, k2);
-+ return memcpy(key, entry, iam_path_descr(p)->id_ikey_size);
++ ipi = get_ipi(filp);
++ it = &ipi->ipi_it;
++ st = it->ii_state;
++ switch (cmd) {
++ case IAM_IOC_IT_START:
++ result = iam_it_init(it, &ipi->ipi_bag,
++ IAM_IT_MOVE, ipi->ipi_ipd);
++ if (result == 0)
++ result = iam_it_get(it, itop->iui_op.iul_key);
++ break;
++ case IAM_IOC_IT_NEXT:
++ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED)
++ result = iam_it_next(it);
++ else
++ result = -EBUSY;
++ break;
++ case IAM_IOC_IT_STOP:
++ iam_it_put(it);
++ iam_it_fini(it);
++ result = 0;
++ break;
++ }
++ st = it->ii_state;
++ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED)
++ memcpy(itop->iui_op.iul_key, iam_it_key_get(it),
++ iam_it_key_size(it));
++ if (st == IAM_IT_ATTACHED)
++ iam_reccpy(&it->ii_path,
++ itop->iui_op.iul_rec, iam_it_rec_get(it));
++ itop->iui_state = st;
++ return result;
+}
+
-+static inline struct iam_ikey *iam_ikey_at(struct iam_path *p,
-+ struct iam_entry *entry)
++static int iam_uapi_op(int cmd, struct inode *inode,
++ struct file *filp, struct iam_uapi_op *op)
+{
-+ return (struct iam_ikey *)entry;
++ int result;
++ struct iam_private_info *ipi;
++
++ ipi = get_ipi(filp);
++ if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_DELETE) {
++ handle_t *h;
++
++ h = ext3_journal_start(inode, IAM_INSERT_CREDITS);
++ if (!IS_ERR(h)) {
++ if (cmd == IAM_IOC_INSERT)
++ result = iam_insert(h, &ipi->ipi_bag,
++ op->iul_key,
++ op->iul_rec, ipi->ipi_ipd);
++ else
++ result = iam_delete(h, &ipi->ipi_bag,
++ op->iul_key, ipi->ipi_ipd);
++ ext3_journal_stop(h);
++ } else {
++ result = PTR_ERR(h);
++ ext3_std_error(inode->i_sb, result);
++ }
++ } else
++ result = iam_lookup(&ipi->ipi_bag, op->iul_key,
++ op->iul_rec, ipi->ipi_ipd);
++ return result;
+}
+
-+static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
-+ struct iam_entry *e1,
-+ struct iam_entry *e2)
++struct iam_private_info *ext3_iam_alloc_info(int flags)
+{
-+ ptrdiff_t diff;
++ struct iam_private_info *info;
+
-+ diff = (void *)e1 - (void *)e2;
-+ assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
-+ return diff / iam_entry_size(p);
++ info = kmalloc(sizeof *info, flags);
++ if (info != NULL)
++ memset(info, 0, sizeof *info);
++ return info;
+}
+
-+/*
-+ * Helper for the frequent case, where key was already placed into @k1 by
-+ * callback.
-+ */
-+static inline void iam_ikeycpy0(const struct iam_container *c,
-+ struct iam_ikey *k1, const struct iam_ikey *k2)
++void ext3_iam_release_info(struct iam_private_info *info)
+{
-+ if (k1 != k2)
-+ iam_ikeycpy(c, k1, k2);
++ iam_it_put(&info->ipi_it);
++ iam_it_fini(&info->ipi_it);
++ if (info->ipi_ipd != NULL)
++ info->ipi_bag.ic_descr->id_ops->id_ipd_free(&info->ipi_bag,
++ info->ipi_ipd);
++ iam_container_fini(&info->ipi_bag);
+}
+
-+static inline int iam_ikeycmp(const struct iam_container *c,
-+ const struct iam_ikey *k1,
-+ const struct iam_ikey *k2)
-+{
-+ return c->ic_descr->id_ops->id_ikeycmp(c, k1, k2);
- }
-
- static inline void iam_reccpy(const struct iam_path *p, struct iam_rec *rec_dst,
-@@ -604,7 +627,7 @@
- static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
- {
- return le32_to_cpu(*(u32*)iam_entry_off(entry,
-- iam_path_descr(p)->id_key_size))
-+ iam_path_descr(p)->id_ikey_size))
- & 0x00ffffff;
- }
-
-@@ -612,21 +635,64 @@
- struct iam_entry *entry, unsigned value)
- {
- *(u32*)iam_entry_off(entry,
-- iam_path_descr(p)->id_key_size) =
-+ iam_path_descr(p)->id_ikey_size) =
- cpu_to_le32(value);
- }
-
--static inline void dx_set_key(struct iam_path *p, struct iam_entry *entry,
-- const struct iam_key *key)
-+static inline void dx_set_ikey(struct iam_path *p, struct iam_entry *entry,
-+ const struct iam_ikey *key)
- {
-- iam_keycpy(p->ip_container, iam_entry_off(entry, 0), key);
-+ iam_ikeycpy(p->ip_container, iam_entry_off(entry, 0), key);
- }
-
-+struct dx_map_entry
++void ext3_iam_release(struct file *filp, struct inode *inode)
+{
-+ u32 hash;
-+ u32 offs;
-+};
-+
-+struct fake_dirent {
-+ __le32 inode;
-+ __le16 rec_len;
-+ u8 name_len;
-+ u8 file_type;
-+};
++ struct iam_private_info *info;
+
- struct dx_countlimit {
- __le16 limit;
- __le16 count;
- };
-
-+/*
-+ * dx_root_info is laid out so that if it should somehow get overlaid by a
-+ * dirent the two low bits of the hash version will be zero. Therefore, the
-+ * hash version mod 4 should never be 0. Sincerely, the paranoia department.
-+ */
++ info = filp->private_data;
++ ext3_iam_release_info(info);
+
-+struct dx_root {
-+ struct fake_dirent dot;
-+ char dot_name[4];
-+ struct fake_dirent dotdot;
-+ char dotdot_name[4];
-+ struct dx_root_info
-+ {
-+ __le32 reserved_zero;
-+ u8 hash_version;
-+ u8 info_length; /* 8 */
-+ u8 indirect_levels;
-+ u8 unused_flags;
-+ }
-+ info;
-+ struct {} entries[0];
-+};
++ kfree(info);
++ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
++}
+
-+struct dx_node
++static int iam_uapi_init(struct inode *inode,
++ struct file *filp, struct iam_uapi_info *ua)
+{
-+ struct fake_dirent fake;
-+ struct {} entries[0];
-+};
++ int result;
++ struct iam_private_info *info;
+
++ info = ext3_iam_alloc_info(GFP_KERNEL);
++ if (info != NULL) {
++ struct iam_container *bag;
++ struct iam_descr *des;
+
- static inline unsigned dx_get_count(struct iam_entry *entries)
- {
- return le16_to_cpu(((struct dx_countlimit *) entries)->count);
-@@ -647,9 +713,21 @@
- struct iam_descr *param = iam_path_descr(p);
- unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize -
- param->id_node_gap;
-- return entry_space / (param->id_key_size + param->id_ptr_size);
-+ return entry_space / (param->id_ikey_size + param->id_ptr_size);
++ bag = &info->ipi_bag;
++ des = &info->ipi_descr;
++ result = iam_container_init(bag, des, inode);
++ if (result == 0) {
++ result = iam_container_setup(bag);
++ if (result == 0) {
++ /*
++ * Container setup might change ->ic_descr
++ */
++ des = bag->ic_descr;
++ info->ipi_ipd = des->id_ops->id_ipd_alloc(bag);
++ if (info->ipi_ipd != NULL) {
++ filp->private_data = info;
++ EXT3_I(inode)->i_flags |= EXT3_INDEX_FL;
++ } else
++ result = -ENOMEM;
++ }
++ }
++ } else
++ result = -ENOMEM;
++ return result;
+}
+
-+static inline unsigned dx_root_limit(struct iam_path *p)
++
++static int getua(struct iam_uapi_info *ua, unsigned long arg)
+{
-+ struct iam_descr *param = iam_path_descr(p);
-+ unsigned limit = iam_path_obj(p)->i_sb->s_blocksize -
-+ param->id_root_gap;
-+ limit /= (param->id_ikey_size + param->id_ptr_size);
-+ if (limit == dx_node_limit(p))
-+ limit--;
-+ return limit;
- }
-
++ if (copy_from_user(ua, (struct iam_uapi_info __user *)arg, sizeof *ua))
++ return -EFAULT;
++ else
++ return 0;
++}
+
- static inline struct iam_entry *dx_get_entries(struct iam_path *path,
- void *data, int root)
- {
-@@ -665,7 +743,8 @@
- frame->bh->b_data, frame == path->ip_frames);
- }
-
--static inline struct iam_key *iam_path_key(const struct iam_path *path, int nr)
-+static inline struct iam_ikey *iam_path_ikey(const struct iam_path *path,
-+ int nr)
- {
- assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch));
- return path->ip_data->ipd_key_scratch[nr];
-@@ -674,6 +753,7 @@
- int dx_lookup(struct iam_path *path);
- void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
- u32 hash, u32 block);
-+int dx_index_is_compat(struct iam_path *path);
-
- int ext3_htree_next_block(struct inode *dir, __u32 hash,
- struct iam_path *path, __u32 *start_hash);
-@@ -681,6 +761,20 @@
- struct buffer_head *ext3_append(handle_t *handle, struct inode *inode,
- u32 *block, int *err);
- int split_index_node(handle_t *handle, struct iam_path *path);
-+struct ext3_dir_entry_2 *split_entry(struct inode *dir,
-+ struct ext3_dir_entry_2 *de,
-+ unsigned long ino, mode_t mode,
-+ const char *name, int namelen);
-+struct ext3_dir_entry_2 *find_insertion_point(struct inode *dir,
-+ struct buffer_head *bh,
-+ const char *name, int namelen);
-+struct ext3_dir_entry_2 *move_entries(struct inode *dir,
-+ struct dx_hash_info *hinfo,
-+ struct buffer_head **bh1,
-+ struct buffer_head **bh2,
-+ __u32 *delim_hash);
++static int putua(struct iam_uapi_info *ua, unsigned long arg)
++{
++ if (copy_to_user((struct iam_uapi_info __user *)arg, ua, sizeof *ua))
++ return -EFAULT;
++ else
++ return 0;
++}
+
-+extern struct iam_descr iam_htree_compat_param;
-
- /*
- * external
-@@ -698,10 +792,12 @@
- handle_t *handle, struct buffer_head **bh);
-
- void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
-- const struct iam_key *key, iam_ptr_t ptr);
-+ const struct iam_ikey *key, iam_ptr_t ptr);
-
- int iam_leaf_at_end(const struct iam_leaf *l);
- void iam_leaf_next(struct iam_leaf *folio);
-+int iam_leaf_can_add(const struct iam_leaf *l,
-+ const struct iam_key *k, const struct iam_rec *r);
-
- struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
- struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
-@@ -709,14 +805,79 @@
- struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf);
-
-
-+int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
-+ handle_t *h, struct buffer_head **bh);
++enum outop_t {
++ KEY = 1 << 0,
++ REC = 1 << 1,
++ STATE = 1 << 2
++};
+
-+/*
-+ * Container format.
-+ */
- struct iam_format {
-+ /*
-+ * Method called to recognize container format. Should return true iff
-+ * container @c conforms to this format. This method may do IO to read
-+ * container pages.
-+ *
-+ * If container is recognized, this method sets operation vectors
-+ * ->id_ops and ->id_leaf_ops in container description (c->ic_descr),
-+ * and fills other description fields.
-+ */
- int (*if_guess)(struct iam_container *c);
-+ /*
-+ * Linkage into global list of container formats.
-+ */
- struct list_head if_linkage;
- };
-
- void iam_format_register(struct iam_format *fmt);
-
- void iam_lfix_format_init(void);
-+void iam_lvar_format_init(void);
-+void iam_htree_format_init(void);
++static int outop(struct iam_uapi_op *op, struct iam_uapi_op *uop,
++ struct iam_descr *des, enum outop_t opt)
++{
++ int result;
+
-+struct iam_private_info;
++ if (((opt & REC) && copy_to_user((void __user *)uop->iul_rec,
++ op->iul_rec, des->id_rec_size)) ||
++ ((opt & KEY) && copy_to_user((void __user *)uop->iul_key,
++ op->iul_key, des->id_key_size)))
++ result = -EFAULT;
++ else
++ result = 0;
++ return result;
++}
+
-+void ext3_iam_release(struct file *filp, struct inode *inode);
++static void putop(struct iam_uapi_op *op)
++{
++ kfree(op->iul_key);
++ kfree(op->iul_rec);
++}
+
-+int iam_uapi_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
-+ unsigned long arg);
++static int getop(struct iam_uapi_op *op, struct iam_uapi_op *uop,
++ struct iam_descr *des, unsigned long arg)
++{
++ int result;
++ int ks;
++ int rs;
+
-+/* __KERNEL__ */
-+#endif
++ ks = des->id_key_size;
++ rs = des->id_rec_size;
++ op->iul_key = kmalloc(ks, GFP_KERNEL);
++ op->iul_rec = kmalloc(rs, GFP_KERNEL);
++ if (!copy_from_user(uop,
++ (struct iam_uapi_op __user *)arg, sizeof *uop) &&
++ op->iul_key != NULL && op->iul_rec != NULL &&
++ !copy_from_user(op->iul_key, (void __user *)uop->iul_key, ks) &&
++ !copy_from_user(op->iul_rec, (void __user *)uop->iul_rec, rs))
++ result = 0;
++ else {
++ result = -EFAULT;
++ putop(op);
++ }
++ return result;
++}
+
-+/*
-+ * User level API. Copy exists in lustre/lustre/tests/iam_ut.c
-+ */
++static int outit(struct iam_uapi_it *it, struct iam_uapi_it *uit,
++ struct iam_descr *des, enum outop_t opt, unsigned long arg)
++{
++ int result;
+
-+struct iam_uapi_info {
-+ __u16 iui_keysize;
-+ __u16 iui_recsize;
-+ __u16 iui_ptrsize;
-+ __u16 iui_height;
-+ char iui_fmt_name[DX_FMT_NAME_LEN];
-+};
++ result = outop(&it->iui_op, &uit->iui_op, des, opt);
++ if (result == 0 && (opt&STATE))
++ result = put_user(it->iui_state, (int __user *) arg);
++ return result;
++}
+
-+struct iam_uapi_op {
-+ void *iul_key;
-+ void *iul_rec;
-+};
-+
-+struct iam_uapi_it {
-+ struct iam_uapi_op iui_op;
-+ __u16 iui_state;
-+};
-+
-+enum iam_ioctl_cmd {
-+ IAM_IOC_INIT = _IOW('i', 1, struct iam_uapi_info),
-+ IAM_IOC_GETINFO = _IOR('i', 2, struct iam_uapi_info),
-+ IAM_IOC_INSERT = _IOR('i', 3, struct iam_uapi_op),
-+ IAM_IOC_LOOKUP = _IOWR('i', 4, struct iam_uapi_op),
-+ IAM_IOC_DELETE = _IOR('i', 5, struct iam_uapi_op),
-+ IAM_IOC_IT_START = _IOR('i', 6, struct iam_uapi_it),
-+ IAM_IOC_IT_NEXT = _IOW('i', 7, struct iam_uapi_it),
-+ IAM_IOC_IT_STOP = _IOR('i', 8, struct iam_uapi_it),
-+
-+ IAM_IOC_POLYMORPH = _IOR('i', 9, unsigned long)
-+};
-
- /* __LINUX_LUSTRE_IAM_H__ */
- #endif
-Index: iam/fs/ext3/iam-uapi.c
-===================================================================
---- iam.orig/fs/ext3/iam-uapi.c 2006-09-20 09:10:35.143350952 +0800
-+++ iam/fs/ext3/iam-uapi.c 2006-09-22 17:24:07.000000000 +0800
-@@ -0,0 +1,368 @@
-+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
-+ * vim:expandtab:shiftwidth=8:tabstop=8:
-+ *
-+ * iam_uapi.c
-+ * User-level interface to iam (ioctl based)
-+ *
-+ * Copyright (c) 2006 Cluster File Systems, Inc.
-+ * Author: Nikita Danilov <nikita@clusterfs.com>
-+ *
-+ * This file is part of the Lustre file system, http://www.lustre.org
-+ * Lustre is a trademark of Cluster File Systems, Inc.
-+ *
-+ * You may have signed or agreed to another license before downloading
-+ * this software. If so, you are bound by the terms and conditions
-+ * of that agreement, and the following does not apply to you. See the
-+ * LICENSE file included with this distribution for more information.
-+ *
-+ * If you did not agree to a different license, then this copy of Lustre
-+ * is open source software; you can redistribute it and/or modify it
-+ * under the terms of version 2 of the GNU General Public License as
-+ * published by the Free Software Foundation.
-+ *
-+ * In either case, Lustre is distributed in the hope that it will be
-+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
-+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-+ * license text for more details.
-+ */
-+
-+#include <linux/types.h>
-+#include <linux/jbd.h>
-+/* ext3_error() */
-+#include <linux/ext3_fs.h>
-+#include <linux/ext3_jbd.h>
-+
-+#include <linux/lustre_iam.h>
-+
-+#include <libcfs/libcfs.h>
-+#include <libcfs/kp30.h>
-+
-+struct iam_private_info {
-+ struct dir_private_info ipi_dir; /* has to be first */
-+ struct iam_container ipi_bag;
-+ struct iam_descr ipi_descr;
-+ struct iam_iterator ipi_it;
-+ struct iam_path_descr *ipi_ipd;
-+};
-+
-+enum {
-+ IAM_INSERT_CREDITS = 20
-+};
-+
-+static struct iam_private_info *get_ipi(struct file *filp)
++static void putit(struct iam_uapi_it *it)
+{
-+ return filp->private_data;
++ putop(&it->iui_op);
+}
+
-+static int iam_uapi_it(int cmd, struct inode *inode,
-+ struct file *filp, struct iam_uapi_it *itop)
++static int getit(struct iam_uapi_it *it, struct iam_uapi_it *uit,
++ struct iam_descr *des, unsigned long arg)
+{
-+ struct iam_private_info *ipi;
-+ struct iam_iterator *it;
-+ enum iam_it_state st;
-+ int result = 0;
-+
-+ ipi = get_ipi(filp);
-+ it = &ipi->ipi_it;
-+ st = it->ii_state;
-+ switch (cmd) {
-+ case IAM_IOC_IT_START:
-+ result = iam_it_init(it, &ipi->ipi_bag,
-+ IAM_IT_MOVE, ipi->ipi_ipd);
-+ if (result == 0)
-+ result = iam_it_get(it, itop->iui_op.iul_key);
-+ break;
-+ case IAM_IOC_IT_NEXT:
-+ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED)
-+ result = iam_it_next(it);
-+ else
-+ result = -EBUSY;
-+ break;
-+ case IAM_IOC_IT_STOP:
-+ iam_it_put(it);
-+ iam_it_fini(it);
-+ result = 0;
-+ break;
-+ }
-+ st = it->ii_state;
-+ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED)
-+ memcpy(itop->iui_op.iul_key, iam_it_key_get(it),
-+ iam_it_key_size(it));
-+ if (st == IAM_IT_ATTACHED)
-+ iam_reccpy(&it->ii_path,
-+ itop->iui_op.iul_rec, iam_it_rec_get(it));
-+ itop->iui_state = st;
-+ return result;
++ return getop(&it->iui_op, &uit->iui_op, des,
++ (unsigned long)&((struct iam_uapi_it *)arg)->iui_op);
+}
+
-+static int iam_uapi_op(int cmd, struct inode *inode,
-+ struct file *filp, struct iam_uapi_op *op)
++int iam_uapi_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
++ unsigned long arg)
+{
+ int result;
-+ struct iam_private_info *ipi;
++ struct iam_uapi_info ua;
++ struct iam_uapi_op uop;
++ struct iam_uapi_op op;
++ struct iam_uapi_it uit;
++ struct iam_uapi_it it;
++ enum outop_t opt;
+
-+ ipi = get_ipi(filp);
-+ if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_DELETE) {
-+ handle_t *h;
++ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) {
++ result = -EACCES;
++ } else if (cmd == IAM_IOC_POLYMORPH) {
++ /*
++ * If polymorphing into directory, increase hard-link count.
++ */
++ if (S_ISDIR((umode_t)arg) && !S_ISDIR(inode->i_mode))
++ inode->i_nlink++;
++ else if (!S_ISDIR((umode_t)arg) && S_ISDIR(inode->i_mode))
++ inode->i_nlink--;
++ inode->i_mode = (umode_t)arg;
++ mark_inode_dirty(inode);
++ result = 0;
++ } else if (cmd == IAM_IOC_INIT) {
++ if (filp->private_data == NULL) {
++ result = getua(&ua, arg);
++ if (result == 0)
++ result = iam_uapi_init(inode, filp, &ua);
++ } else
++ result = -EBUSY;
++ } else if (is_dx(inode) && filp->private_data != NULL) {
++ struct iam_descr *des;
+
-+ h = ext3_journal_start(inode, IAM_INSERT_CREDITS);
-+ if (!IS_ERR(h)) {
-+ if (cmd == IAM_IOC_INSERT)
-+ result = iam_insert(h, &ipi->ipi_bag,
-+ op->iul_key,
-+ op->iul_rec, ipi->ipi_ipd);
-+ else
-+ result = iam_delete(h, &ipi->ipi_bag,
-+ op->iul_key, ipi->ipi_ipd);
-+ ext3_journal_stop(h);
-+ } else {
-+ result = PTR_ERR(h);
-+ ext3_std_error(inode->i_sb, result);
++ switch (cmd) {
++ case IAM_IOC_IT_START:
++ case IAM_IOC_IT_NEXT:
++ opt = KEY|REC|STATE;
++ break;
++ case IAM_IOC_LOOKUP:
++ opt = REC;
++ break;
++ default:
++ opt = 0;
++ break;
+ }
-+ } else
-+ result = iam_lookup(&ipi->ipi_bag, op->iul_key,
-+ op->iul_rec, ipi->ipi_ipd);
-+ return result;
-+}
-+
-+struct iam_private_info *ext3_iam_alloc_info(int flags)
-+{
-+ struct iam_private_info *info;
-+
-+ info = kmalloc(sizeof *info, flags);
-+ if (info != NULL)
-+ memset(info, 0, sizeof *info);
-+ return info;
-+}
+
-+void ext3_iam_release_info(struct iam_private_info *info)
-+{
-+ iam_it_put(&info->ipi_it);
-+ iam_it_fini(&info->ipi_it);
-+ if (info->ipi_ipd != NULL)
-+ info->ipi_bag.ic_descr->id_ops->id_ipd_free(&info->ipi_bag,
-+ info->ipi_ipd);
-+ iam_container_fini(&info->ipi_bag);
-+}
++ des = get_ipi(filp)->ipi_bag.ic_descr;
++ if (cmd == IAM_IOC_GETINFO) {
++ ua.iui_keysize = des->id_key_size;
++ ua.iui_recsize = des->id_rec_size;
++ ua.iui_ptrsize = des->id_ptr_size;
++ ua.iui_height = 0; /* not yet */
++ memcpy(ua.iui_fmt_name, des->id_ops->id_name,
++ ARRAY_SIZE(ua.iui_fmt_name));
++ result = putua(&ua, arg);
++ } else if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_LOOKUP ||
++ cmd == IAM_IOC_DELETE) {
++ result = getop(&op, &uop, des, arg);
++ if (result == 0) {
++ int res2;
++ result = iam_uapi_op(cmd, inode, filp, &op);
+
-+void ext3_iam_release(struct file *filp, struct inode *inode)
-+{
-+ struct iam_private_info *info;
++ res2 = outop(&op, &uop, des, opt);
++ result = result ? : res2;
++ putop(&op);
++ }
++ } else if (cmd == IAM_IOC_IT_START || cmd == IAM_IOC_IT_NEXT ||
++ cmd == IAM_IOC_IT_STOP) {
++ result = getit(&it, &uit, des, arg);
++ if (result == 0) {
++ int res2;
+
-+ info = filp->private_data;
-+ ext3_iam_release_info(info);
++ result = iam_uapi_it(cmd, inode, filp, &it);
+
-+ kfree(info);
-+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
-+}
-+
-+static int iam_uapi_init(struct inode *inode,
-+ struct file *filp, struct iam_uapi_info *ua)
-+{
-+ int result;
-+ struct iam_private_info *info;
-+
-+ info = ext3_iam_alloc_info(GFP_KERNEL);
-+ if (info != NULL) {
-+ struct iam_container *bag;
-+ struct iam_descr *des;
-+
-+ bag = &info->ipi_bag;
-+ des = &info->ipi_descr;
-+ result = iam_container_init(bag, des, inode);
-+ if (result == 0) {
-+ result = iam_container_setup(bag);
-+ if (result == 0) {
-+ /*
-+ * Container setup might change ->ic_descr
-+ */
-+ des = bag->ic_descr;
-+ info->ipi_ipd = des->id_ops->id_ipd_alloc(bag);
-+ if (info->ipi_ipd != NULL) {
-+ filp->private_data = info;
-+ EXT3_I(inode)->i_flags |= EXT3_INDEX_FL;
-+ } else
-+ result = -ENOMEM;
-+ }
-+ }
-+ } else
-+ result = -ENOMEM;
-+ return result;
-+}
-+
-+
-+static int getua(struct iam_uapi_info *ua, unsigned long arg)
-+{
-+ if (copy_from_user(ua, (struct iam_uapi_info __user *)arg, sizeof *ua))
-+ return -EFAULT;
-+ else
-+ return 0;
-+}
-+
-+static int putua(struct iam_uapi_info *ua, unsigned long arg)
-+{
-+ if (copy_to_user((struct iam_uapi_info __user *)arg, ua, sizeof *ua))
-+ return -EFAULT;
-+ else
-+ return 0;
++ res2 = outit(&it, &uit, des, opt, arg);
++ result = result ? : res2;
++ putit(&it);
++ }
++ } else
++ result = -EINVAL;
++ } else
++ result = -ENOENT;
++ return result;
+}
+Index: iam/fs/ext3/ioctl.c
+===================================================================
+--- iam.orig/fs/ext3/ioctl.c 2006-09-28 22:11:14.000000000 +0400
++++ iam/fs/ext3/ioctl.c 2006-09-28 22:11:15.000000000 +0400
+@@ -250,6 +250,6 @@ flags_err:
+
+
+ default:
+- return -ENOTTY;
++ return iam_uapi_ioctl(inode, filp, cmd, arg);
+ }
+ }
+Index: iam/include/linux/lustre_iam.h
+===================================================================
+--- iam.orig/include/linux/lustre_iam.h 2006-09-28 22:11:15.000000000 +0400
++++ iam/include/linux/lustre_iam.h 2006-09-28 22:11:15.000000000 +0400
+@@ -30,9 +30,6 @@
+ #ifndef __LINUX_LUSTRE_IAM_H__
+ #define __LINUX_LUSTRE_IAM_H__
+
+-/* handle_t, journal_start(), journal_stop() */
+-#include <linux/jbd.h>
+-
+ /*
+ * linux/include/linux/lustre_iam.h
+ */
+@@ -57,14 +54,21 @@ enum {
+ * [2] reserved for leaf node operations.
+ *
+ * [3] reserved for index operations.
++ *
++ * [4] reserved for path->ip_ikey_target
++ *
+ */
+- DX_SCRATCH_KEYS = 4,
++ DX_SCRATCH_KEYS = 5,
+ /*
+ * Maximal format name length.
+ */
+ DX_FMT_NAME_LEN = 16
+ };
+
++#ifdef __KERNEL__
++/* handle_t, journal_start(), journal_stop() */
++#include <linux/jbd.h>
+
-+enum outop_t {
-+ KEY = 1 << 0,
-+ REC = 1 << 1,
-+ STATE = 1 << 2
+ /*
+ * Entry within index tree node. Consists of a key immediately followed
+ * (without padding) by a pointer to the child node.
+@@ -86,14 +90,21 @@ struct iam_entry_compat {
+ */
+ struct iam_key;
+
+-/* Incomplete type use to refer to the records stored in iam containers. */
++/*
++ * Incomplete type use to refer to the records stored in iam containers.
++ */
+ struct iam_rec;
+
+-struct iam_cookie {
+- struct iam_key *ic_key;
+- struct iam_rec *ic_rec;
+-};
++/*
++ * Key in index node. Possibly compressed. Fixed size.
++ */
++struct iam_ikey;
+
++/*
++ * Scalar type into which certain iam_key's can be uniquely mapped. Used to
++ * support interfaces like readdir(), where iteration over index has to be
++ * re-startable.
++ */
+ typedef __u64 iam_ptr_t;
+
+ /*
+@@ -123,6 +134,31 @@ struct iam_leaf {
+ void *il_descr_data;
+ };
+
++/*
++ * Return values of ->lookup() operation from struct iam_leaf_operations.
++ */
++enum iam_lookup_t {
++ /*
++ * lookup found a record with the key requested
++ */
++ IAM_LOOKUP_EXACT,
++ /*
++ * lookup positioned leaf on some record
++ */
++ IAM_LOOKUP_OK,
++ /*
++ * leaf was empty
++ */
++ IAM_LOOKUP_EMPTY,
++ /*
++ * lookup positioned leaf before first record
++ */
++ IAM_LOOKUP_BEFORE
+};
+
-+static int outop(struct iam_uapi_op *op, struct iam_uapi_op *uop,
-+ struct iam_descr *des, enum outop_t opt)
-+{
-+ int result;
++/*
++ * Format-specific container operations. These are called by generic iam code.
++ */
+ struct iam_operations {
+ /*
+ * Returns pointer (in the same sense as pointer in index entry) to
+@@ -131,11 +167,15 @@ struct iam_operations {
+ __u32 (*id_root_ptr)(struct iam_container *c);
+
+ /*
+- * Check validity and consistency of index node. This is called when
+- * iam just loaded new node into frame.
++ * Check validity and consistency of index node.
+ */
+ int (*id_node_check)(struct iam_path *path, struct iam_frame *frame);
+ /*
++ * Copy some data from node header into frame. This is called when
++ * new node is loaded into frame.
++ */
++ int (*id_node_load)(struct iam_path *path, struct iam_frame *frame);
++ /*
+ * Initialize new node (stored in @bh) that is going to be added into
+ * tree.
+ */
+@@ -144,23 +184,33 @@ struct iam_operations {
+ int (*id_node_read)(struct iam_container *c, iam_ptr_t ptr,
+ handle_t *h, struct buffer_head **bh);
+ /*
+- * Key comparison function. Returns -1, 0, +1.
++ * Key comparison functions. Returns -1, 0, +1.
+ */
+- int (*id_keycmp)(const struct iam_container *c,
+- const struct iam_key *k1, const struct iam_key *k2);
++ int (*id_ikeycmp)(const struct iam_container *c,
++ const struct iam_ikey *k1,
++ const struct iam_ikey *k2);
+ /*
+- * Create new container.
+- *
+- * Newly created container has a root node and a single leaf. Leaf
+- * contains single record with the smallest possible key.
++ * Modify root node when tree height increases.
+ */
+- int (*id_create)(struct iam_container *c);
++ struct iam_entry *(*id_root_inc)(struct iam_container *c,
++ struct iam_path *path,
++ struct iam_frame *frame);
+
-+ if (((opt & REC) && copy_to_user((void __user *)uop->iul_rec,
-+ op->iul_rec, des->id_rec_size)) ||
-+ ((opt & KEY) && copy_to_user((void __user *)uop->iul_key,
-+ op->iul_key, des->id_key_size)))
-+ result = -EFAULT;
-+ else
-+ result = 0;
-+ return result;
-+}
++ struct iam_path_descr *(*id_ipd_alloc)(const struct iam_container *c);
++ void (*id_ipd_free)(const struct iam_container *c,
++ struct iam_path_descr *ipd);
+ /*
+ * Format name.
+ */
+ char id_name[DX_FMT_NAME_LEN];
+ };
+
++/*
++ * Another format-specific operation vector, consisting of methods to access
++ * leaf nodes. This is separated from struct iam_operations, because it is
++ * assumed that there will be many formats with different format of leaf
++ * nodes, yes the same struct iam_operations.
++ */
+ struct iam_leaf_operations {
+ /*
+ * leaf operations.
+@@ -186,7 +236,8 @@ struct iam_leaf_operations {
+ void (*start)(struct iam_leaf *l);
+ /* more leaf to the next entry. */
+ void (*next)(struct iam_leaf *l);
+- /* return key of current leaf record. This method may return
++ /*
++ * return key of current leaf record. This method may return
+ * either pointer to the key stored in node, or copy key into
+ * @k buffer supplied by caller and return pointer to this
+ * buffer. The latter approach is used when keys in nodes are
+@@ -194,8 +245,10 @@ struct iam_leaf_operations {
+ * all).
+ *
+ * Caller should assume that returned pointer is only valid
+- * while leaf node is pinned and locked.*/
+- struct iam_key *(*key)(const struct iam_leaf *l, struct iam_key *k);
++ * while leaf node is pinned and locked.
++ */
++ struct iam_ikey *(*ikey)(const struct iam_leaf *l, struct iam_ikey *k);
++ struct iam_key *(*key)(const struct iam_leaf *l);
+ /* return pointer to entry body. Pointer is valid while
+ corresponding leaf node is locked and pinned. */
+ struct iam_rec *(*rec)(const struct iam_leaf *l);
+@@ -203,6 +256,9 @@ struct iam_leaf_operations {
+ void (*key_set)(struct iam_leaf *l, const struct iam_key *k);
+ void (*rec_set)(struct iam_leaf *l, const struct iam_rec *r);
+
++ int (*key_cmp)(const struct iam_leaf *l, const struct iam_key *k);
+
-+static void putop(struct iam_uapi_op *op)
++ int (*key_size)(const struct iam_leaf *l);
+ /*
+ * Search leaf @l for a record with key @k or for a place
+ * where such record is to be inserted.
+@@ -210,6 +266,7 @@ struct iam_leaf_operations {
+ * Scratch keys from @path can be used.
+ */
+ int (*lookup)(struct iam_leaf *l, const struct iam_key *k);
++ int (*ilookup)(struct iam_leaf *l, const struct iam_ikey *ik);
+
+ int (*can_add)(const struct iam_leaf *l,
+ const struct iam_key *k, const struct iam_rec *r);
+@@ -221,12 +278,13 @@ struct iam_leaf_operations {
+ /*
+ * remove rec for a leaf
+ */
+- void (*rec_del)(struct iam_leaf *l);
++ void (*rec_del)(struct iam_leaf *l, int shift);
+ /*
+ * split leaf node, moving some entries into @bh (the latter currently
+ * is assumed to be empty).
+ */
+- void (*split)(struct iam_leaf *l, struct buffer_head *bh);
++ void (*split)(struct iam_leaf *l, struct buffer_head **bh,
++ iam_ptr_t newblknr);
+ };
+
+ struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
+@@ -241,6 +299,10 @@ struct iam_descr {
+ */
+ size_t id_key_size;
+ /*
++ * Size of a key in index nodes, in bytes.
++ */
++ size_t id_ikey_size;
++ /*
+ * Size of a pointer to the next level (stored in index nodes), in
+ * bytes.
+ */
+@@ -264,6 +326,9 @@ struct iam_descr {
+ struct iam_leaf_operations *id_leaf_ops;
+ };
+
++/*
++ * An instance of iam container.
++ */
+ struct iam_container {
+ /*
+ * Underlying flat file. IO against this object is issued to
+@@ -284,7 +349,7 @@ struct iam_path_descr {
+ /*
+ * Scratch-pad area for temporary keys.
+ */
+- struct iam_key *ipd_key_scratch[DX_SCRATCH_KEYS];
++ struct iam_ikey *ipd_key_scratch[DX_SCRATCH_KEYS];
+ };
+
+ /*
+@@ -316,6 +381,7 @@ struct iam_path {
+ * Key searched for.
+ */
+ const struct iam_key *ip_key_target;
++ const struct iam_ikey *ip_ikey_target;
+ /*
+ * Description-specific data.
+ */
+@@ -334,6 +400,7 @@ struct iam_path_compat {
+ struct dx_hash_info *ipc_hinfo;
+ struct dentry *ipc_dentry;
+ struct iam_path_descr ipc_descr;
++ struct dx_hash_info ipc_hinfo_area;
+ };
+
+ /*
+@@ -347,7 +414,9 @@ enum iam_it_state {
+ /* initial state */
+ IAM_IT_DETACHED,
+ /* iterator is above particular record in the container */
+- IAM_IT_ATTACHED
++ IAM_IT_ATTACHED,
++ /* iterator is positioned before record */
++ IAM_IT_SKEWED
+ };
+
+ /*
+@@ -355,7 +424,7 @@ enum iam_it_state {
+ */
+ enum iam_it_flags {
+ /*
+- * this iterator will move (iam_it_{prev,next}() will be called on it)
++ * this iterator will move (iam_it_next() will be called on it)
+ */
+ IAM_IT_MOVE = (1 << 0),
+ /*
+@@ -372,15 +441,26 @@ enum iam_it_flags {
+ * doesn't point to any particular record in this container.
+ *
+ * After successful call to iam_it_get() and until corresponding call to
+- * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
++ * iam_it_put() iterator is in one of "active" states: IAM_IT_ATTACHED or
++ * IAM_IT_SKEWED.
+ *
+- * Attached iterator can move through records in a container (provided
++ * Active iterator can move through records in a container (provided
+ * IAM_IT_MOVE permission) in a key order, can get record and key values as it
+ * passes over them, and can modify container (provided IAM_IT_WRITE
+ * permission).
+ *
++ * Iteration may reach the end of container, at which point iterator switches
++ * into IAM_IT_DETACHED state.
++ *
+ * Concurrency: iterators are supposed to be local to thread. Interfaces below
+- * do no internal serialization.
++ * do no internal serialization of access to the iterator fields.
++ *
++ * When in non-detached state, iterator keeps some container nodes pinned in
++ * memory and locked (that locking may be implemented at the container
++ * granularity though). In particular, clients may assume that pointers to
++ * records and keys obtained through iterator interface as valid until
++ * iterator is detached (except that they may be invalidated by sub-sequent
++ * operations done through the same iterator).
+ *
+ */
+ struct iam_iterator {
+@@ -390,7 +470,8 @@ struct iam_iterator {
+ __u32 ii_flags;
+ enum iam_it_state ii_state;
+ /*
+- * path to the record. Valid in IAM_IT_ATTACHED state.
++ * path to the record. Valid in IAM_IT_ATTACHED, and IAM_IT_SKEWED
++ * states.
+ */
+ struct iam_path ii_path;
+ };
+@@ -405,133 +486,26 @@ void iam_path_compat_fini(struct iam_pat
+ struct iam_path_descr *iam_ipd_alloc(int keysize);
+ void iam_ipd_free(struct iam_path_descr *ipd);
+
+-/*
+- * Initialize iterator to IAM_IT_DETACHED state.
+- *
+- * postcondition: it_state(it) == IAM_IT_DETACHED
+- */
+ int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
+ struct iam_path_descr *pd);
+-/*
+- * Finalize iterator and release all resources.
+- *
+- * precondition: it_state(it) == IAM_IT_DETACHED
+- */
+ void iam_it_fini(struct iam_iterator *it);
+-
+-/*
+- * Attach iterator. After successful completion, @it points to record with the
+- * largest key not larger than @k. Semantics of ->id_create() method guarantee
+- * that such record will always be found.
+- *
+- * Return value: 0: positioned on existing record,
+- * -ve: error.
+- *
+- * precondition: it_state(it) == IAM_IT_DETACHED
+- * postcondition: ergo(result == 0,
+- * (it_state(it) == IAM_IT_ATTACHED &&
+- * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
+- */
+ int iam_it_get(struct iam_iterator *it, const struct iam_key *k);
+-
+-/*
+- * Duplicates iterator.
+- *
+- * postcondition: it_state(dst) == it_state(src) &&
+- * iam_it_container(dst) == iam_it_container(src) &&
+- * dst->ii_flags = src->ii_flags &&
+- * ergo(it_state(it) == IAM_IT_ATTACHED,
+- * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
+- * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
+- */
++int iam_it_get_at(struct iam_iterator *it, const struct iam_key *k);
+ void iam_it_dup(struct iam_iterator *dst, const struct iam_iterator *src);
+-
+-/*
+- * Detach iterator. Does nothing it detached state.
+- *
+- * postcondition: it_state(it) == IAM_IT_DETACHED
+- */
+ void iam_it_put(struct iam_iterator *it);
+-
+-/*
+- * Move iterator one record right.
+- *
+- * Return value: 0: success,
+- * +1: end of container reached
+- * -ve: error
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
+- * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
+- */
+ int iam_it_next(struct iam_iterator *it);
+-
+-/*
+- * Return pointer to the record under iterator.
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+ struct iam_rec *iam_it_rec_get(const struct iam_iterator *it);
+-
+-/*
+- * Replace contents of record under iterator.
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+- * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
+- */
+-int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
+-
+-/*
+- * Place key under iterator in @k, return @k
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+-struct iam_key *iam_it_key_get(const struct iam_iterator *it,
+- struct iam_key *k);
+-
+-/*
+- * Insert new record with key @k and contents from @r, shifting records to the
+- * right.
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED &&
+- * it->ii_flags&IAM_IT_WRITE &&
+- * it_keycmp(it, iam_it_key_get(it, *), k) < 0
+- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+- * ergo(result == 0,
+- * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
+- * !memcmp(iam_it_rec_get(it), r, ...))
+- */
++int iam_it_rec_set(handle_t *h,
++ struct iam_iterator *it, const struct iam_rec *r);
++struct iam_key *iam_it_key_get(const struct iam_iterator *it);
++int iam_it_key_size(const struct iam_iterator *it);
+ int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
+ const struct iam_key *k, const struct iam_rec *r);
+-/*
+- * Delete record under iterator.
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+ int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
+
+ typedef __u64 iam_pos_t;
+
+-/*
+- * Convert iterator to cookie.
+- *
+- * precondition: it_state(it) == IAM_IT_ATTACHED &&
+- * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+- */
+ iam_pos_t iam_it_store(const struct iam_iterator *it);
+-
+-/*
+- * Restore iterator from cookie.
+- *
+- * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
+- * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
+- * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
+- * iam_it_store(it) == pos)
+- */
+ int iam_it_load(struct iam_iterator *it, iam_pos_t pos);
+
+ int iam_lookup(struct iam_container *c, const struct iam_key *k,
+@@ -539,10 +513,10 @@ int iam_lookup(struct iam_container *c,
+ int iam_delete(handle_t *h, struct iam_container *c, const struct iam_key *k,
+ struct iam_path_descr *pd);
+ int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
+- struct iam_rec *r, struct iam_path_descr *pd);
++ const struct iam_rec *r, struct iam_path_descr *pd);
+ int iam_insert(handle_t *handle, struct iam_container *c,
+ const struct iam_key *k,
+- struct iam_rec *r, struct iam_path_descr *pd);
++ const struct iam_rec *r, struct iam_path_descr *pd);
+ /*
+ * Initialize container @c.
+ */
+@@ -577,16 +551,65 @@ static inline struct inode *iam_path_obj
+ return p->ip_container->ic_object;
+ }
+
+-static inline void iam_keycpy(const struct iam_container *c,
+- struct iam_key *k1, const struct iam_key *k2)
++static inline void iam_ikeycpy(const struct iam_container *c,
++ struct iam_ikey *k1, const struct iam_ikey *k2)
+{
-+ kfree(op->iul_key);
-+ kfree(op->iul_rec);
++ memcpy(k1, k2, c->ic_descr->id_ikey_size);
+}
+
-+static int getop(struct iam_uapi_op *op, struct iam_uapi_op *uop,
-+ struct iam_descr *des, unsigned long arg)
++static inline size_t iam_entry_size(struct iam_path *p)
+{
-+ int result;
-+ int ks;
-+ int rs;
++ return iam_path_descr(p)->id_ikey_size + iam_path_descr(p)->id_ptr_size;
++}
+
-+ ks = des->id_key_size;
-+ rs = des->id_rec_size;
-+ op->iul_key = kmalloc(ks, GFP_KERNEL);
-+ op->iul_rec = kmalloc(rs, GFP_KERNEL);
-+ if (!copy_from_user(uop,
-+ (struct iam_uapi_op __user *)arg, sizeof *uop) &&
-+ op->iul_key != NULL && op->iul_rec != NULL &&
-+ !copy_from_user(op->iul_key, (void __user *)uop->iul_key, ks) &&
-+ !copy_from_user(op->iul_rec, (void __user *)uop->iul_rec, rs))
-+ result = 0;
-+ else {
-+ result = -EFAULT;
-+ putop(op);
-+ }
-+ return result;
++static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
++ struct iam_entry *entry,
++ int shift)
+ {
+- memcpy(k1, k2, c->ic_descr->id_key_size);
++ void *e = entry;
++ return e + shift * iam_entry_size(p);
+ }
+
+-static inline int iam_keycmp(const struct iam_container *c,
+- const struct iam_key *k1, const struct iam_key *k2)
++static inline struct iam_ikey *iam_get_ikey(struct iam_path *p,
++ struct iam_entry *entry,
++ struct iam_ikey *key)
+ {
+- return c->ic_descr->id_ops->id_keycmp(c, k1, k2);
++ return memcpy(key, entry, iam_path_descr(p)->id_ikey_size);
+}
+
-+static int outit(struct iam_uapi_it *it, struct iam_uapi_it *uit,
-+ struct iam_descr *des, enum outop_t opt, unsigned long arg)
++static inline struct iam_ikey *iam_ikey_at(struct iam_path *p,
++ struct iam_entry *entry)
+{
-+ int result;
-+
-+ result = outop(&it->iui_op, &uit->iui_op, des, opt);
-+ if (result == 0 && (opt&STATE))
-+ result = put_user(it->iui_state, (int __user *) arg);
-+ return result;
++ return (struct iam_ikey *)entry;
+}
+
-+static void putit(struct iam_uapi_it *it)
++static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
++ struct iam_entry *e1,
++ struct iam_entry *e2)
+{
-+ putop(&it->iui_op);
++ ptrdiff_t diff;
++
++ diff = (void *)e1 - (void *)e2;
++ assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
++ return diff / iam_entry_size(p);
+}
+
-+static int getit(struct iam_uapi_it *it, struct iam_uapi_it *uit,
-+ struct iam_descr *des, unsigned long arg)
++/*
++ * Helper for the frequent case, where key was already placed into @k1 by
++ * callback.
++ */
++static inline void iam_ikeycpy0(const struct iam_container *c,
++ struct iam_ikey *k1, const struct iam_ikey *k2)
+{
-+ return getop(&it->iui_op, &uit->iui_op, des,
-+ (unsigned long)&((struct iam_uapi_it *)arg)->iui_op);
++ if (k1 != k2)
++ iam_ikeycpy(c, k1, k2);
+}
+
-+int iam_uapi_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
-+ unsigned long arg)
++static inline int iam_ikeycmp(const struct iam_container *c,
++ const struct iam_ikey *k1,
++ const struct iam_ikey *k2)
+{
-+ int result;
-+ struct iam_uapi_info ua;
-+ struct iam_uapi_op uop;
-+ struct iam_uapi_op op;
-+ struct iam_uapi_it uit;
-+ struct iam_uapi_it it;
-+ enum outop_t opt;
-+
-+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) {
-+ result = -EACCES;
-+ } else if (cmd == IAM_IOC_POLYMORPH) {
-+ /*
-+ * If polymorphing into directory, increase hard-link count.
-+ */
-+ if (S_ISDIR((umode_t)arg) && !S_ISDIR(inode->i_mode))
-+ inode->i_nlink++;
-+ else if (!S_ISDIR((umode_t)arg) && S_ISDIR(inode->i_mode))
-+ inode->i_nlink--;
-+ inode->i_mode = (umode_t)arg;
-+ mark_inode_dirty(inode);
-+ result = 0;
-+ } else if (cmd == IAM_IOC_INIT) {
-+ if (filp->private_data == NULL) {
-+ result = getua(&ua, arg);
-+ if (result == 0)
-+ result = iam_uapi_init(inode, filp, &ua);
-+ } else
-+ result = -EBUSY;
-+ } else if (is_dx(inode) && filp->private_data != NULL) {
-+ struct iam_descr *des;
-+
-+ switch (cmd) {
-+ case IAM_IOC_IT_START:
-+ case IAM_IOC_IT_NEXT:
-+ opt = KEY|REC|STATE;
-+ break;
-+ case IAM_IOC_LOOKUP:
-+ opt = REC;
-+ break;
-+ default:
-+ opt = 0;
-+ break;
-+ }
-+
-+ des = get_ipi(filp)->ipi_bag.ic_descr;
-+ if (cmd == IAM_IOC_GETINFO) {
-+ ua.iui_keysize = des->id_key_size;
-+ ua.iui_recsize = des->id_rec_size;
-+ ua.iui_ptrsize = des->id_ptr_size;
-+ ua.iui_height = 0; /* not yet */
-+ memcpy(ua.iui_fmt_name, des->id_ops->id_name,
-+ ARRAY_SIZE(ua.iui_fmt_name));
-+ result = putua(&ua, arg);
-+ } else if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_LOOKUP ||
-+ cmd == IAM_IOC_DELETE) {
-+ result = getop(&op, &uop, des, arg);
-+ if (result == 0) {
-+ int res2;
-+ result = iam_uapi_op(cmd, inode, filp, &op);
-+
-+ res2 = outop(&op, &uop, des, opt);
-+ result = result ? : res2;
-+ putop(&op);
-+ }
-+ } else if (cmd == IAM_IOC_IT_START || cmd == IAM_IOC_IT_NEXT ||
-+ cmd == IAM_IOC_IT_STOP) {
-+ result = getit(&it, &uit, des, arg);
-+ if (result == 0) {
-+ int res2;
++ return c->ic_descr->id_ops->id_ikeycmp(c, k1, k2);
+ }
+
+ static inline void iam_reccpy(const struct iam_path *p, struct iam_rec *rec_dst,
+@@ -604,7 +627,7 @@ static inline void *iam_entry_off(struct
+ static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
+ {
+ return le32_to_cpu(*(u32*)iam_entry_off(entry,
+- iam_path_descr(p)->id_key_size))
++ iam_path_descr(p)->id_ikey_size))
+ & 0x00ffffff;
+ }
+
+@@ -612,21 +635,64 @@ static inline void dx_set_block(struct i
+ struct iam_entry *entry, unsigned value)
+ {
+ *(u32*)iam_entry_off(entry,
+- iam_path_descr(p)->id_key_size) =
++ iam_path_descr(p)->id_ikey_size) =
+ cpu_to_le32(value);
+ }
+
+-static inline void dx_set_key(struct iam_path *p, struct iam_entry *entry,
+- const struct iam_key *key)
++static inline void dx_set_ikey(struct iam_path *p, struct iam_entry *entry,
++ const struct iam_ikey *key)
+ {
+- iam_keycpy(p->ip_container, iam_entry_off(entry, 0), key);
++ iam_ikeycpy(p->ip_container, iam_entry_off(entry, 0), key);
+ }
+
++struct dx_map_entry
++{
++ u32 hash;
++ u32 offs;
++};
+
-+ result = iam_uapi_it(cmd, inode, filp, &it);
++struct fake_dirent {
++ __le32 inode;
++ __le16 rec_len;
++ u8 name_len;
++ u8 file_type;
++};
+
-+ res2 = outit(&it, &uit, des, opt, arg);
-+ result = result ? : res2;
-+ putit(&it);
-+ }
-+ } else
-+ result = -EINVAL;
-+ } else
-+ result = -ENOENT;
-+ return result;
-+}
-Index: iam/fs/ext3/file.c
-===================================================================
---- iam.orig/fs/ext3/file.c 2006-09-19 15:23:19.000000000 +0800
-+++ iam/fs/ext3/file.c 2006-09-22 17:18:09.000000000 +0800
-@@ -23,6 +23,7 @@
- #include <linux/jbd.h>
- #include <linux/ext3_fs.h>
- #include <linux/ext3_jbd.h>
-+#include <linux/lustre_iam.h>
- #include "xattr.h"
- #include "acl.h"
+ struct dx_countlimit {
+ __le16 limit;
+ __le16 count;
+ };
-@@ -31,14 +32,18 @@
- * from ext3_file_open: open gets called at every open, but release
- * gets called only when /all/ the files are closed.
- */
--static int ext3_release_file (struct inode * inode, struct file * filp)
-+static int ext3_release_file(struct inode * inode, struct file * filp)
- {
- /* if we are the last writer on the inode, drop the block reservation */
- if ((filp->f_mode & FMODE_WRITE) &&
- (atomic_read(&inode->i_writecount) == 1))
- ext3_discard_reservation(inode);
-- if (is_dx(inode) && filp->private_data)
-- ext3_htree_free_dir_info(filp->private_data);
-+ if (is_dx(inode) && filp->private_data) {
-+ if (S_ISDIR(inode->i_mode))
-+ ext3_htree_free_dir_info(filp->private_data);
-+ else
-+ ext3_iam_release(filp, inode);
++/*
++ * dx_root_info is laid out so that if it should somehow get overlaid by a
++ * dirent the two low bits of the hash version will be zero. Therefore, the
++ * hash version mod 4 should never be 0. Sincerely, the paranoia department.
++ */
++
++struct dx_root {
++ struct fake_dirent dot;
++ char dot_name[4];
++ struct fake_dirent dotdot;
++ char dotdot_name[4];
++ struct dx_root_info
++ {
++ __le32 reserved_zero;
++ u8 hash_version;
++ u8 info_length; /* 8 */
++ u8 indirect_levels;
++ u8 unused_flags;
+ }
-
- return 0;
++ info;
++ struct {} entries[0];
++};
++
++struct dx_node
++{
++ struct fake_dirent fake;
++ struct {} entries[0];
++};
++
++
+ static inline unsigned dx_get_count(struct iam_entry *entries)
+ {
+ return le16_to_cpu(((struct dx_countlimit *) entries)->count);
+@@ -647,9 +713,21 @@ static inline unsigned dx_node_limit(str
+ struct iam_descr *param = iam_path_descr(p);
+ unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize -
+ param->id_node_gap;
+- return entry_space / (param->id_key_size + param->id_ptr_size);
++ return entry_space / (param->id_ikey_size + param->id_ptr_size);
++}
++
++static inline unsigned dx_root_limit(struct iam_path *p)
++{
++ struct iam_descr *param = iam_path_descr(p);
++ unsigned limit = iam_path_obj(p)->i_sb->s_blocksize -
++ param->id_root_gap;
++ limit /= (param->id_ikey_size + param->id_ptr_size);
++ if (limit == dx_node_limit(p))
++ limit--;
++ return limit;
}
-@@ -110,7 +115,7 @@
- force_commit:
- err = ext3_force_commit(inode->i_sb);
-- if (err)
-+ if (err)
- return err;
- return ret;
++
+ static inline struct iam_entry *dx_get_entries(struct iam_path *path,
+ void *data, int root)
+ {
+@@ -665,7 +743,8 @@ static inline struct iam_entry *dx_node_
+ frame->bh->b_data, frame == path->ip_frames);
}
-Index: iam/fs/ext3/ioctl.c
-===================================================================
---- iam.orig/fs/ext3/ioctl.c 2006-09-22 17:18:08.000000000 +0800
-+++ iam/fs/ext3/ioctl.c 2006-09-22 17:18:09.000000000 +0800
-@@ -250,6 +250,6 @@
-
- default:
-- return -ENOTTY;
-+ return iam_uapi_ioctl(inode, filp, cmd, arg);
- }
- }
-Index: iam/fs/ext3/dir.c
-===================================================================
---- iam.orig/fs/ext3/dir.c 2006-09-19 15:23:19.000000000 +0800
-+++ iam/fs/ext3/dir.c 2006-09-22 17:18:09.000000000 +0800
-@@ -28,6 +28,7 @@
- #include <linux/smp_lock.h>
- #include <linux/slab.h>
- #include <linux/rbtree.h>
-+#include <linux/lustre_iam.h>
+-static inline struct iam_key *iam_path_key(const struct iam_path *path, int nr)
++static inline struct iam_ikey *iam_path_ikey(const struct iam_path *path,
++ int nr)
+ {
+ assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch));
+ return path->ip_data->ipd_key_scratch[nr];
+@@ -674,6 +753,7 @@ static inline struct iam_key *iam_path_k
+ int dx_lookup(struct iam_path *path);
+ void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
+ u32 hash, u32 block);
++int dx_index_is_compat(struct iam_path *path);
- static unsigned char ext3_filetype_table[] = {
- DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
-@@ -59,7 +60,7 @@
+ int ext3_htree_next_block(struct inode *dir, __u32 hash,
+ struct iam_path *path, __u32 *start_hash);
+@@ -681,6 +761,20 @@ int ext3_htree_next_block(struct inode *
+ struct buffer_head *ext3_append(handle_t *handle, struct inode *inode,
+ u32 *block, int *err);
+ int split_index_node(handle_t *handle, struct iam_path *path);
++struct ext3_dir_entry_2 *split_entry(struct inode *dir,
++ struct ext3_dir_entry_2 *de,
++ unsigned long ino, mode_t mode,
++ const char *name, int namelen);
++struct ext3_dir_entry_2 *find_insertion_point(struct inode *dir,
++ struct buffer_head *bh,
++ const char *name, int namelen);
++struct ext3_dir_entry_2 *move_entries(struct inode *dir,
++ struct dx_hash_info *hinfo,
++ struct buffer_head **bh1,
++ struct buffer_head **bh2,
++ __u32 *delim_hash);
++
++extern struct iam_descr iam_htree_compat_param;
- return (ext3_filetype_table[filetype]);
- }
--
-+
+ /*
+ * external
+@@ -698,10 +792,12 @@ int iam_node_read(struct iam_container *
+ handle_t *handle, struct buffer_head **bh);
- int ext3_check_dir_entry (const char * function, struct inode * dir,
- struct ext3_dir_entry_2 * de,
-@@ -165,7 +166,7 @@
- * to make sure. */
- if (filp->f_version != inode->i_version) {
- for (i = 0; i < sb->s_blocksize && i < offset; ) {
-- de = (struct ext3_dir_entry_2 *)
-+ de = (struct ext3_dir_entry_2 *)
- (bh->b_data + i);
- /* It's too expensive to do a full
- * dirent test each time round this
-@@ -184,7 +185,7 @@
- filp->f_version = inode->i_version;
- }
+ void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
+- const struct iam_key *key, iam_ptr_t ptr);
++ const struct iam_ikey *key, iam_ptr_t ptr);
-- while (!error && filp->f_pos < inode->i_size
-+ while (!error && filp->f_pos < inode->i_size
- && offset < sb->s_blocksize) {
- de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
- if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
-@@ -232,7 +233,7 @@
- /*
- * These functions convert from the major/minor hash to an f_pos
- * value.
-- *
-+ *
- * Currently we only use major hash numer. This is unfortunate, but
- * on 32-bit machines, the same VFS interface is used for lseek and
- * llseek, so if we use the 64 bit offset, then the 32-bit versions of
-@@ -253,7 +254,7 @@
- struct fname {
- __u32 hash;
- __u32 minor_hash;
-- struct rb_node rb_hash;
-+ struct rb_node rb_hash;
- struct fname *next;
- __u32 inode;
- __u8 name_len;
-@@ -305,12 +306,14 @@
- root->rb_node = NULL;
- }
+ int iam_leaf_at_end(const struct iam_leaf *l);
+ void iam_leaf_next(struct iam_leaf *folio);
++int iam_leaf_can_add(const struct iam_leaf *l,
++ const struct iam_key *k, const struct iam_rec *r);
-+extern struct iam_private_info *ext3_iam_alloc_info(int flags);
-+extern void ext3_iam_release_info(struct iam_private_info *info);
+ struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
+ struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
+@@ -709,14 +805,79 @@ struct iam_descr *iam_leaf_descr(const s
+ struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf);
- struct dir_private_info *create_dir_info(loff_t pos)
- {
- struct dir_private_info *p;
-- p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
-+ p = (void *)ext3_iam_alloc_info(GFP_KERNEL);
- if (!p)
- return NULL;
- p->root.rb_node = NULL;
-@@ -326,6 +329,7 @@
- void ext3_htree_free_dir_info(struct dir_private_info *p)
- {
- free_rb_tree_fname(&p->root);
-+ ext3_iam_release_info((void *)p);
- kfree(p);
- }
++int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
++ handle_t *h, struct buffer_head **bh);
++
++/*
++ * Container format.
++ */
+ struct iam_format {
++ /*
++ * Method called to recognize container format. Should return true iff
++ * container @c conforms to this format. This method may do IO to read
++ * container pages.
++ *
++ * If container is recognized, this method sets operation vectors
++ * ->id_ops and ->id_leaf_ops in container description (c->ic_descr),
++ * and fills other description fields.
++ */
+ int (*if_guess)(struct iam_container *c);
++ /*
++ * Linkage into global list of container formats.
++ */
+ struct list_head if_linkage;
+ };
-@@ -413,7 +417,7 @@
- curr_pos = hash2pos(fname->hash, fname->minor_hash);
- while (fname) {
- error = filldir(dirent, fname->name,
-- fname->name_len, curr_pos,
-+ fname->name_len, curr_pos,
- fname->inode,
- get_dtype(sb, fname->file_type));
- if (error) {
-@@ -468,7 +472,7 @@
- /*
- * Fill the rbtree if we have no more entries,
- * or the inode has changed since we last read in the
-- * cached entries.
-+ * cached entries.
- */
- if ((!info->curr_node) ||
- (filp->f_version != inode->i_version)) {
-Index: iam/fs/ext3/Makefile
-===================================================================
---- iam.orig/fs/ext3/Makefile 2006-09-22 17:18:09.000000000 +0800
-+++ iam/fs/ext3/Makefile 2006-09-22 17:18:09.000000000 +0800
-@@ -6,7 +6,7 @@
+ void iam_format_register(struct iam_format *fmt);
- ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
- ioctl.o namei.o super.o symlink.o hash.o resize.o \
-- extents.o mballoc.o iam.o iam_lfix.o
-+ extents.o mballoc.o iam.o iam_lfix.o iam_lvar.o iam_htree.o iam_uapi.o
+ void iam_lfix_format_init(void);
++void iam_lvar_format_init(void);
++void iam_htree_format_init(void);
++
++struct iam_private_info;
++
++void ext3_iam_release(struct file *filp, struct inode *inode);
++
++int iam_uapi_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
++ unsigned long arg);
++
++/* __KERNEL__ */
++#endif
++
++/*
++ * User level API. Copy exists in lustre/lustre/tests/iam_ut.c
++ */
++
++struct iam_uapi_info {
++ __u16 iui_keysize;
++ __u16 iui_recsize;
++ __u16 iui_ptrsize;
++ __u16 iui_height;
++ char iui_fmt_name[DX_FMT_NAME_LEN];
++};
++
++struct iam_uapi_op {
++ void *iul_key;
++ void *iul_rec;
++};
++
++struct iam_uapi_it {
++ struct iam_uapi_op iui_op;
++ __u16 iui_state;
++};
++
++enum iam_ioctl_cmd {
++ IAM_IOC_INIT = _IOW('i', 1, struct iam_uapi_info),
++ IAM_IOC_GETINFO = _IOR('i', 2, struct iam_uapi_info),
++ IAM_IOC_INSERT = _IOR('i', 3, struct iam_uapi_op),
++ IAM_IOC_LOOKUP = _IOWR('i', 4, struct iam_uapi_op),
++ IAM_IOC_DELETE = _IOR('i', 5, struct iam_uapi_op),
++ IAM_IOC_IT_START = _IOR('i', 6, struct iam_uapi_it),
++ IAM_IOC_IT_NEXT = _IOW('i', 7, struct iam_uapi_it),
++ IAM_IOC_IT_STOP = _IOR('i', 8, struct iam_uapi_it),
++
++ IAM_IOC_POLYMORPH = _IOR('i', 9, unsigned long)
++};
- ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
- ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+ /* __LINUX_LUSTRE_IAM_H__ */
+ #endif