From: wangdi Date: Wed, 24 May 2006 10:02:45 +0000 (+0000) Subject: Branch: b_new_cmd X-Git-Tag: v1_8_0_110~486^2~1771 X-Git-Url: https://git.whamcloud.com/?a=commitdiff_plain;h=e24895bddd3e6dd247220b6c0127fed71b11d147;p=fs%2Flustre-release.git Branch: b_new_cmd update iam prototype --- diff --git a/lustre/kernel_patches/patches/ext3-iam-separate.patch b/lustre/kernel_patches/patches/ext3-iam-separate.patch index 1be2b38..afce843 100644 --- a/lustre/kernel_patches/patches/ext3-iam-separate.patch +++ b/lustre/kernel_patches/patches/ext3-iam-separate.patch @@ -1,1056 +1,93 @@ -Index: iam/fs/ext3/Makefile +Index: linux-2.6.9/fs/ext3/namei.c =================================================================== ---- iam.orig/fs/ext3/Makefile 2006-05-10 18:21:01.000000000 +0400 -+++ iam/fs/ext3/Makefile 2006-05-10 18:21:01.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o \ -- extents.o mballoc.o -+ extents.o mballoc.o iam.o +--- linux-2.6.9.orig/fs/ext3/namei.c 2006-05-23 17:01:09.000000000 +0800 ++++ linux-2.6.9/fs/ext3/namei.c 2006-05-23 17:01:09.000000000 +0800 +@@ -24,81 +24,6 @@ + * Theodore Ts'o, 2002 + */ - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: iam/fs/ext3/iam.c -=================================================================== ---- iam.orig/fs/ext3/iam.c 2004-04-06 17:27:52.000000000 +0400 -+++ iam/fs/ext3/iam.c 2006-05-11 01:24:29.000000000 +0400 -@@ -0,0 +1,945 @@ -+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -+ * vim:expandtab:shiftwidth=8:tabstop=8: -+ * -+ * iam.c -+ * Top-level entry points into osd module -+ * -+ * Copyright (c) 2006 Cluster File Systems, Inc. -+ * Author: Wang Di -+ * Author: Nikita Danilov -+ * -+ * This file is part of the Lustre file system, http://www.lustre.org -+ * Lustre is a trademark of Cluster File Systems, Inc. -+ * -+ * You may have signed or agreed to another license before downloading -+ * this software. If so, you are bound by the terms and conditions -+ * of that agreement, and the following does not apply to you. See the -+ * LICENSE file included with this distribution for more information. -+ * -+ * If you did not agree to a different license, then this copy of Lustre -+ * is open source software; you can redistribute it and/or modify it -+ * under the terms of version 2 of the GNU General Public License as -+ * published by the Free Software Foundation. -+ * -+ * In either case, Lustre is distributed in the hope that it will be -+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty -+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * license text for more details. -+ */ -+ -+/* -+ * iam: big theory statement. -+ * -+ * iam (Index Access Module) is a module providing abstraction of persistent -+ * transactional container on top of generalized ext3 htree. -+ * -+ * iam supports: -+ * -+ * - key, pointer, and record size specifiable per container. -+ * -+ * - trees taller than 2 index levels. -+ * -+ * - read/write to existing ext3 htree directories as iam containers. -+ * -+ * iam container is a tree, consisting of leaf nodes containing keys and -+ * records stored in this container, and index nodes, containing keys and -+ * pointers to leaf or index nodes. -+ * -+ * iam does not work with keys directly, instead it calls user-supplied key -+ * comparison function (->dpo_keycmp()). -+ * -+ * Pointers are (currently) interpreted as logical offsets (measured in -+ * blocksful) within underlying flat file on top of which iam tree lives. -+ * -+ * On-disk format: -+ * -+ * iam mostly tries to reuse existing htree formats. -+ * -+ * Format of index node: -+ * -+ * +-----+-------+-------+-------+------+-------+------------+ -+ * | | count | | | | | | -+ * | gap | / | entry | entry | .... | entry | free space | -+ * | | limit | | | | | | -+ * +-----+-------+-------+-------+------+-------+------------+ -+ * -+ * gap this part of node is never accessed by iam code. It -+ * exists for binary compatibility with ext3 htree (that, -+ * in turn, stores fake struct ext2_dirent for ext2 -+ * compatibility), and to keep some unspecified per-node -+ * data. Gap can be different for root and non-root index -+ * nodes. Gap size can be specified for each container -+ * (gap of 0 is allowed). -+ * -+ * count/limit current number of entries in this node, and the maximal -+ * number of entries that can fit into node. count/limit -+ * has the same size as entry, and is itself counted in -+ * count. -+ * -+ * entry index entry: consists of a key immediately followed by -+ * a pointer to a child node. Size of a key and size of a -+ * pointer depends on container. Entry has neither -+ * alignment nor padding. -+ * -+ * free space portion of node new entries are added to -+ * -+ * Entries in index node are sorted by their key value. -+ * -+ * Format of a leaf node is not specified. Generic iam code accesses leaf -+ * nodes through ->id_leaf methods in struct iam_descr. -+ * -+ */ -+ -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+ -+#include -+#include -+ -+#include "xattr.h" -+#include "iopen.h" -+#include "acl.h" -+ -+static inline void iam_reccpy(struct iam_path *p, struct iam_rec *rec_dst, -+ struct iam_rec *rec_src) -+{ -+ memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size); -+} -+ -+/* -+ * Return pointer to current leaf record. Pointer is valid while corresponding -+ * leaf node is locked and pinned. -+ */ -+struct iam_rec *iam_leaf_rec(struct iam_container *c, struct iam_leaf *leaf) -+{ -+ return c->ic_descr->id_leaf.rec(c, leaf); -+} -+ -+/* -+ * Return pointer to the current leaf key. This function may return either -+ * pointer to the key stored in node, or copy key into @key buffer supplied by -+ * caller and return pointer to this buffer. The latter approach is used when -+ * keys in nodes are not stored in plain form (e.g., htree doesn't store keys -+ * at all). -+ * -+ * Caller should assume that returned pointer is only valid while leaf node is -+ * pinned and locked. -+ */ -+struct iam_key *iam_leaf_key(struct iam_container *c, struct iam_leaf *leaf, -+ struct iam_key *key) -+{ -+ return c->ic_descr->id_leaf.key(c, leaf, key); -+} -+ -+/* -+ * Initialize container @c, acquires additional reference on @inode. -+ */ -+int iam_container_init(struct iam_container *c, -+ struct iam_descr *descr, struct inode *inode) -+{ -+ memset(c, 0, sizeof *c); -+ c->ic_descr = descr; -+ c->ic_object = igrab(inode); -+ if (c->ic_object != NULL) -+ return 0; -+ else -+ return -ENOENT; -+} -+EXPORT_SYMBOL(iam_container_init); -+ -+/* -+ * Finalize container @c, release all resources. -+ */ -+void iam_container_fini(struct iam_container *c) -+{ -+ if (c->ic_object != NULL) { -+ iput(c->ic_object); -+ c->ic_object = NULL; -+ } -+} -+EXPORT_SYMBOL(iam_container_fini); -+ -+void iam_path_init(struct iam_path *path, struct iam_container *c, -+ struct iam_path_descr *pd) -+{ -+ memset(path, 0, sizeof *path); -+ path->ip_container = c; -+ path->ip_frame = path->ip_frames; -+ path->ip_data = pd; -+} -+ -+static void iam_leaf_fini(struct iam_path *path); -+ -+void iam_path_fini(struct iam_path *path) -+{ -+ int i; -+ -+ iam_leaf_fini(path); -+ for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) { -+ if (path->ip_frames[i].bh != NULL) { -+ brelse(path->ip_frames[i].bh); -+ path->ip_frames[i].bh = NULL; -+ } -+ } -+} -+ -+extern struct iam_descr htree_compat_param; -+ -+void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode) -+{ -+ int i; -+ -+ for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i) -+ path->ipc_descr.ipd_key_scratch[i] = -+ (struct iam_key *)&path->ipc_scratch[i]; -+ -+ iam_container_init(&path->ipc_container, &htree_compat_param, inode); -+ /* -+ * XXX hack allowing finalization of iam_path_compat with -+ * iam_path_fini(). -+ */ -+ iput(inode); -+ iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr); -+} -+ -+void iam_path_compat_fini(struct iam_path_compat *path) -+{ -+ iam_path_fini(&path->ipc_path); -+ iam_container_fini(&path->ipc_container); -+} -+ -+static int iam_leaf_load(struct iam_path *path) -+{ -+ int block; -+ int err; -+ struct iam_container *c; -+ struct buffer_head *bh; -+ struct iam_leaf *leaf; -+ struct iam_descr *descr; -+ -+ c = path->ip_container; -+ leaf = &path->ip_leaf; -+ descr = iam_path_descr(path); -+ block = dx_get_block(path, path->ip_frame->at); -+ err = descr->id_node_read(c, block, NULL, &bh); -+ if (err == 0) { -+ leaf->il_bh = bh; -+ err = descr->id_leaf.init(c, leaf); -+ } -+ return err; -+} -+ -+static void iam_leaf_fini(struct iam_path *path) -+{ -+ iam_path_descr(path)->id_leaf.fini(path->ip_container, &path->ip_leaf); -+ if (path && path->ip_leaf.il_bh) { -+ brelse(path->ip_leaf.il_bh); -+ path->ip_leaf.il_bh = NULL; -+ } -+} -+ -+static void iam_leaf_start(struct iam_container *c, struct iam_leaf *folio) -+{ -+ c->ic_descr->id_leaf.start(c, folio); -+} -+ -+static void iam_leaf_next(struct iam_container *c, struct iam_leaf *folio) -+{ -+ c->ic_descr->id_leaf.next(c, folio); -+} -+ -+static int iam_leaf_at_end(struct iam_container *c, struct iam_leaf *folio) -+{ -+ c->ic_descr->id_leaf.at_end(c, folio); -+} -+ -+/* -+ * Helper function returning scratch key. -+ */ -+static struct iam_key *it_scratch_key(struct iam_iterator *it, int n) -+{ -+ return iam_path_key(&it->ii_path, n); -+} -+ -+static struct iam_container *iam_it_container(struct iam_iterator *it) -+{ -+ return it->ii_path.ip_container; -+} -+ -+static inline int it_keycmp(struct iam_iterator *it, -+ struct iam_key *k1, struct iam_key *k2) -+{ -+ return iam_keycmp(iam_it_container(it), k1, k2); -+} -+ -+/* -+ * Helper wrapper around iam_it_get(): returns 0 (success) only when record -+ * with exactly the same key as asked is found. -+ */ -+static int iam_it_get_exact(struct iam_iterator *it, struct iam_key *k) -+{ -+ int result; -+ -+ result = iam_it_get(it, k); -+ if (result == 0 && -+ (it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 0))) != 0)) -+ /* -+ * Return -ENOENT if cursor is located above record with a key -+ * different from one specified. -+ * -+ * XXX returning -ENOENT only works if iam_it_get never -+ * returns -ENOENT as a legitimate error. -+ */ -+ result = -ENOENT; -+ return result; -+} -+ -+/***********************************************************************/ -+/* iterator interface */ -+/***********************************************************************/ -+ -+static enum iam_it_state it_state(struct iam_iterator *it) -+{ -+ return it->ii_state; -+} -+ -+void iam_container_write_lock(struct iam_container *ic) -+{ -+ down(&ic->ic_object->i_sem); -+} -+ -+void iam_container_write_unlock(struct iam_container *ic) -+{ -+ up(&ic->ic_object->i_sem); -+} -+ -+void iam_container_read_lock(struct iam_container *ic) -+{ -+ down(&ic->ic_object->i_sem); -+} -+ -+void iam_container_read_unlock(struct iam_container *ic) -+{ -+ up(&ic->ic_object->i_sem); -+} -+ -+static void iam_it_lock(struct iam_iterator *it) -+{ -+ if (it->ii_flags&IAM_IT_WRITE) -+ iam_container_write_lock(iam_it_container(it)); -+ else -+ iam_container_read_lock(iam_it_container(it)); -+} -+ -+static void iam_it_unlock(struct iam_iterator *it) -+{ -+ if (it->ii_flags&IAM_IT_WRITE) -+ iam_container_write_unlock(iam_it_container(it)); -+ else -+ iam_container_read_unlock(iam_it_container(it)); -+} -+ -+/* -+ * Initialize iterator to IAM_IT_DETACHED state. -+ * -+ * postcondition: it_state(it) == IAM_IT_DETACHED -+ */ -+int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags, -+ struct iam_path_descr *pd) -+{ -+ memset(it, 0, sizeof *it); -+ it->ii_flags = flags; -+ it->ii_state = IAM_IT_DETACHED; -+ iam_path_init(&it->ii_path, c, pd); -+ return 0; -+} -+ -+/* -+ * Finalize iterator and release all resources. -+ * -+ * precondition: it_state(it) == IAM_IT_DETACHED -+ */ -+void iam_it_fini(struct iam_iterator *it) -+{ -+ assert(it_state(it) == IAM_IT_DETACHED); -+ iam_path_fini(&it->ii_path); -+} -+ -+int iam_path_lookup(struct iam_path *path) -+{ -+ struct iam_container *c; -+ struct iam_descr *descr; -+ struct iam_leaf *leaf; -+ int result; -+ -+ c = path->ip_container; -+ leaf = &path->ip_leaf; -+ descr = iam_path_descr(path); -+ result = dx_lookup(path); -+ if (result == 0) { -+ result = iam_leaf_load(path); -+ if (result == 0) -+ result = descr->id_leaf.lookup(c, path, leaf, -+ path->ip_key_target); -+ } -+ return result; -+} -+ -+/* -+ * Attach iterator. After successful completion, @it points to record with -+ * smallest key not larger than @k. -+ * -+ * Return value: 0: positioned on existing record, -+ * -ve: error. -+ * -+ * precondition: it_state(it) == IAM_IT_DETACHED -+ * postcondition: ergo(result == 0, -+ * (it_state(it) == IAM_IT_ATTACHED && -+ * it_keycmp(it, iam_it_key_get(it, *), k) < 0)) -+ */ -+int iam_it_get(struct iam_iterator *it, struct iam_key *k) -+{ -+ int result; -+ assert(it_state(it) == IAM_IT_DETACHED); -+ -+ it->ii_path.ip_key_target = k; -+ iam_it_lock(it); -+ result = iam_path_lookup(&it->ii_path); -+ if (result == 0) -+ it->ii_state = IAM_IT_ATTACHED; -+ else -+ iam_it_unlock(it); -+ assert(ergo(result == 0, -+ it_keycmp(it, -+ iam_it_key_get(it, it_scratch_key(it, 0)), -+ k) < 0)); -+ return result; -+} -+ -+/* -+ * Duplicates iterator. -+ * -+ * postcondition: it_state(dst) == it_state(src) && -+ * iam_it_container(dst) == iam_it_container(src) && -+ * dst->ii_flags = src->ii_flags && -+ * ergo(it_state(src) == IAM_IT_ATTACHED, -+ * iam_it_rec_get(dst) == iam_it_rec_get(src) && -+ * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2)) -+ */ -+void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src) -+{ -+ dst->ii_flags = src->ii_flags; -+ dst->ii_state = src->ii_state; -+ /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */ -+ /* -+ * XXX: duplicate lock. -+ */ -+ assert(it_state(dst) == it_state(src)); -+ assert(iam_it_container(dst) == iam_it_container(src)); -+ assert(dst->ii_flags = src->ii_flags); -+ assert(ergo(it_state(src) == IAM_IT_ATTACHED, -+ iam_it_rec_get(dst) == iam_it_rec_get(src) && -+ iam_it_key_get(dst, it_scratch_key(dst, 0)) == -+ iam_it_key_get(src, it_scratch_key(src, 0)))); -+ -+} -+/* -+ * Detach iterator. Does nothing it detached state. -+ * -+ * postcondition: it_state(it) == IAM_IT_DETACHED -+ */ -+void iam_it_put(struct iam_iterator *it) -+{ -+ if (it->ii_state == IAM_IT_ATTACHED) { -+ it->ii_state = IAM_IT_DETACHED; -+ iam_leaf_fini(&it->ii_path); -+ iam_it_unlock(it); -+ } -+} -+ -+/* -+ * Move iterator one record right. -+ * -+ * Return value: 0: success, -+ * +1: end of container reached -+ * -ve: error -+ * -+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE -+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) -+ */ -+int iam_it_next(struct iam_iterator *it) -+{ -+ int result; -+ struct iam_container *c; -+ -+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE); -+ -+ c = iam_it_container(it); -+ if (iam_leaf_at_end(c, &it->ii_path.ip_leaf)) { -+ /* advance index portion of the path */ -+ result = 0; /* XXX not yet iam_index_next(&it->ii_path); */ -+ if (result == 0) { -+ result = 0; /* XXX not yet iam_read_leaf(&it->ii_path); */ -+ if (result == 0) -+ iam_leaf_start(c, &it->ii_path.ip_leaf); -+ } else if (result > 0) -+ /* end of container reached */ -+ result = +1; -+ if (result < 0) -+ iam_it_put(it); -+ } else { -+ /* advance within leaf node */ -+ iam_leaf_next(c, &it->ii_path.ip_leaf); -+ result = 0; -+ } -+ assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)); -+ return result; -+} -+ -+/* -+ * Return pointer to the record under iterator. -+ * -+ * precondition: it_state(it) == IAM_IT_ATTACHED -+ * postcondition: it_state(it) == IAM_IT_ATTACHED -+ */ -+struct iam_rec *iam_it_rec_get(struct iam_iterator *it) -+{ -+ assert(it_state(it) == IAM_IT_ATTACHED); -+ return iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf); -+} -+ -+static void iam_it_reccpy(struct iam_iterator *it, struct iam_rec *r) -+{ -+ memcpy(iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf), r, -+ iam_it_container(it)->ic_descr->id_rec_size); -+} -+ -+/* -+ * Replace contents of record under iterator. -+ * -+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE -+ * postcondition: it_state(it) == IAM_IT_ATTACHED && -+ * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...)) -+ */ -+int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r) -+{ -+ int result; -+ -+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE); -+ -+ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh); -+ if (result == 0) -+ iam_it_reccpy(it, r); -+ return result; -+} -+ -+/* -+ * Return pointer to the key under iterator. -+ * -+ * precondition: it_state(it) == IAM_IT_ATTACHED -+ * postcondition: it_state(it) == IAM_IT_ATTACHED -+ */ -+struct iam_key *iam_it_key_get(struct iam_iterator *it, struct iam_key *k) -+{ -+ assert(it_state(it) == IAM_IT_ATTACHED); -+ return iam_leaf_key(iam_it_container(it), &it->ii_path.ip_leaf, k); -+} -+ -+static int iam_leaf_rec_add(handle_t *handle, struct iam_path *path) -+{ -+#if 0 -+ struct iam_lentry *p, *q; -+ int count, err; -+ -+ count = dx_get_count((struct iam_entry *)path->ip_leaf.il_entries); -+ p = iam_lentry_shift(path, path->ip_leaf.il_entries, count - 1); -+ while (p > path->ip_leaf.il_at) { -+ q = iam_lentry_shift(path, p, 1); -+ iam_reccpy(path, iam_leaf_rec(path->ip_container, p), -+ iam_leaf_rec(path->ip_container, q)); -+ iam_keycpy(path->ip_container, iam_leaf_key_at(path, p), -+ iam_leaf_key_at(path, q)); -+ p = iam_lentry_shift(path, p, -1); -+ } -+ dx_set_count((struct iam_entry*)path->ip_leaf.il_entries, count + 1); -+ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh); -+ if (err) -+ ext3_std_error(iam_path_obj(path)->i_sb, err); -+ return err; -+#endif -+} -+ -+static int iam_leaf_is_full(struct iam_path *path) -+{ -+ int count, limit; -+ -+ count = dx_get_count((struct iam_entry *)path->ip_leaf.il_entries); -+ limit = dx_get_limit((struct iam_entry *)path->ip_leaf.il_entries); -+ -+ return (count >= limit); -+} -+ -+static int split_leaf_node(handle_t *handle, struct iam_path *path) -+{ -+#if 0 -+ struct inode *dir = iam_path_obj(path); -+ unsigned continued = 0; -+ struct buffer_head *bh2; -+ u32 newblock, hash_split; -+ char *data2; -+ unsigned split; -+ int err; -+ -+ bh2 = ext3_append (handle, dir, &newblock, &err); -+ if (!(bh2)) -+ return -ENOSPC; -+ -+ err = iam_leaf_load(path); -+ if (err) -+ goto errout; -+ -+ BUFFER_TRACE(path->ip_leaf.il_bh, "get_write_access"); -+ err = ext3_journal_get_write_access(handle, path->ip_leaf.il_bh); -+ if (err) { -+ journal_error: -+ iam_leaf_fini(path); -+ brelse(bh2); -+ ext3_std_error(dir->i_sb, err); -+ err = -EIO; -+ goto errout; -+ } -+ data2 = bh2->b_data; -+ split = dx_get_count((struct iam_entry*)iam_leaf_entries(path))/2; -+ hash_split = *(__u32*)iam_leaf_key_at(path, -+ iam_lentry_shift(path, iam_leaf_entries(path), -+ split)); -+ if (iam_keycmp(path->ip_container, iam_leaf_key_at(path, -+ iam_lentry_shift(path, iam_leaf_entries(path), split)), -+ iam_leaf_key_at(path, -+ iam_lentry_shift(path, iam_leaf_entries(path), split -1))) == 0) -+ continued = 1; -+ -+ memcpy(iam_lentry_shift(path, (struct iam_lentry *)data2, 1), -+ iam_lentry_shift(path, iam_leaf_entries(path), split), -+ split * iam_lentry_size(path)); -+ -+ /* Which block gets the new entry? */ -+ dx_insert_block(path, path->ip_frame, hash_split + continued, newblock); -+ err = ext3_journal_dirty_metadata (handle, bh2); -+ if (err) -+ goto journal_error; -+ err = ext3_journal_dirty_metadata (handle, path->ip_leaf.il_bh); -+ if (err) -+ goto journal_error; -+errout: -+ brelse (bh2); -+ return err; -+#endif -+} -+ -+int iam_add_rec(handle_t *handle, struct iam_path *path) -+{ -+ int err; -+ -+ if (!iam_leaf_is_full(path)) { -+ err = iam_leaf_rec_add(handle, path); -+ } else { -+ err = split_index_node(handle, path); -+ if (err == 0) { -+ err = split_leaf_node(handle, path); -+ if (err == 0) -+ err = iam_leaf_rec_add(handle, path); -+ } -+ } -+ return err; -+} -+ -+/* -+ * Insert new record with key @k and contents from @r, shifting records to the -+ * right. -+ * -+ * precondition: it_state(it) == IAM_IT_ATTACHED && -+ * it->ii_flags&IAM_IT_WRITE && -+ * it_keycmp(it, iam_it_key_get(it, *), k) < 0 -+ * postcondition: it_state(it) == IAM_IT_ATTACHED && -+ * ergo(result == 0, -+ * it_keycmp(it, iam_it_key_get(it, *), k) == 0 && -+ * !memcmp(iam_it_rec_get(it), r, ...)) -+ */ -+int iam_it_rec_insert(handle_t *h, struct iam_iterator *it, -+ struct iam_key *k, struct iam_rec *r) -+{ -+ int result; -+ -+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE); -+ assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0); -+ -+ result = iam_add_rec(h, &it->ii_path); -+ if (result == 0) { -+ /* place record and key info freed space. Leaf node is already -+ * in transaction. */ -+ iam_it_reccpy(it, r); -+ /* -+ * XXX TBD. -+ */ -+ } -+ assert(it_state(it) == IAM_IT_ATTACHED); -+ assert(ergo(result == 0, -+ it_keycmp(it, -+ iam_it_key_get(it, -+ it_scratch_key(it, 0)), k) == 0 && -+ !memcmp(iam_it_rec_get(it), r, -+ iam_it_container(it)->ic_descr->id_rec_size))); -+ return result; -+} -+ -+static int iam_leaf_rec_remove(handle_t *handle, struct iam_container *c, -+ struct iam_path *path) -+{ -+#if 0 -+ struct iam_lentry *p, *q, *end; -+ int count, err; -+ -+ count = dx_get_count((struct iam_entry *)path->ip_leaf.il_entries); -+ end = iam_lentry_shift(path, path->ip_leaf.il_entries, count - 1); -+ p = iam_lentry_at(path, path->ip_leaf.il_at); -+ while (p <= end) { -+ q = iam_lentry_shift(path, p, 1); -+ iam_reccpy(path, iam_leaf_rec(path->ip_container, p), -+ iam_leaf_rec(path->ip_container, q)); -+ iam_keycpy(c, iam_leaf_key_at(path, p), -+ iam_leaf_key_at(path, q)); -+ p = iam_lentry_shift(path, p, 1); -+ } -+ dx_set_count((struct iam_entry*)path->ip_leaf.il_entries, count - 1); -+ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh); -+ if (err) -+ ext3_std_error(iam_path_obj(path)->i_sb, err); -+ return err; -+#endif -+} -+ -+/* -+ * Delete record under iterator. -+ * -+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE -+ * postcondition: it_state(it) == IAM_IT_ATTACHED -+ */ -+int iam_it_rec_delete(handle_t *h, struct iam_iterator *it) -+{ -+ int result; -+ -+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE); -+ -+ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh); -+ /* -+ * no compaction for now. -+ */ -+ if (result == 0) -+ iam_leaf_rec_remove(h, iam_it_container(it), &it->ii_path); -+ -+ return result; -+} -+ -+/* -+ * Convert iterator to cookie. -+ * -+ * precondition: it_state(it) == IAM_IT_ATTACHED && -+ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) -+ * postcondition: it_state(it) == IAM_IT_ATTACHED -+ */ -+iam_pos_t iam_it_store(struct iam_iterator *it) -+{ -+ iam_pos_t result; -+ -+ assert(it_state(it) == IAM_IT_ATTACHED); -+ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result); -+ -+ result = 0; -+ iam_it_key_get(it, (struct iam_key *)&result); -+ return result; -+} -+ -+/* -+ * Restore iterator from cookie. -+ * -+ * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE && -+ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) -+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED && -+ * iam_it_store(it) == pos) -+ */ -+int iam_it_load(struct iam_iterator *it, iam_pos_t pos) -+{ -+ assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE); -+ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos); -+ return iam_it_get(it, (struct iam_key *)&pos); -+} -+ -+/***********************************************************************/ -+/* invariants */ -+/***********************************************************************/ -+ -+static inline int ptr_inside(void *base, size_t size, void *ptr) -+{ -+ return (base <= ptr) && (ptr < base + size); -+} -+ -+int iam_frame_invariant(struct iam_frame *f) -+{ -+ return -+ (f->bh != NULL && -+ f->bh->b_data != NULL && -+ ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) && -+ ptr_inside(f->bh->b_data, f->bh->b_size, f->at) && -+ f->entries <= f->at); -+} -+int iam_leaf_invariant(struct iam_leaf *l) -+{ -+ return -+ l->il_bh != NULL && -+ l->il_bh->b_data != NULL && -+ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) && -+ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) && -+ l->il_entries <= l->il_at; -+} -+ -+int iam_path_invariant(struct iam_path *p) -+{ -+ int i; -+ -+ if (p->ip_container == NULL || -+ p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 || -+ p->ip_frame != p->ip_frames + p->ip_indirect || -+ !iam_leaf_invariant(&p->ip_leaf)) -+ return 0; -+ for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) { -+ if (i <= p->ip_indirect) { -+ if (!iam_frame_invariant(&p->ip_frames[i])) -+ return 0; -+ } -+ } -+ return 1; -+} -+ -+int iam_it_invariant(struct iam_iterator *it) -+{ -+ return -+ (it->ii_state == IAM_IT_DETACHED || -+ it->ii_state == IAM_IT_ATTACHED) && -+ !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) && -+ ergo(it->ii_state == IAM_IT_ATTACHED, -+ iam_path_invariant(&it->ii_path)); -+} -+ -+/* -+ * Search container @c for record with key @k. If record is found, its data -+ * are moved into @r. -+ * -+ * -+ * -+ * Return values: +ve: found, 0: not-found, -ve: error -+ */ -+int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r, -+ struct iam_path_descr *pd) -+{ -+ struct iam_iterator it; -+ int result; -+ -+ iam_it_init(&it, c, 0, pd); -+ -+ result = iam_it_get_exact(&it, k); -+ if (result == 0) -+ /* -+ * record with required key found, copy it into user buffer -+ */ -+ iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it)); -+ iam_it_put(&it); -+ iam_it_fini(&it); -+ return result; -+} -+EXPORT_SYMBOL(iam_lookup); -+ -+/* -+ * Insert new record @r with key @k into container @c (within context of -+ * transaction @h. -+ * -+ * Return values: 0: success, -ve: error, including -EEXIST when record with -+ * given key is already present. -+ * -+ * postcondition: ergo(result == 0 || result == -EEXIST, -+ * iam_lookup(c, k, r2) > 0 && -+ * !memcmp(r, r2, c->ic_descr->id_rec_size)); -+ */ -+int iam_insert(handle_t *h, struct iam_container *c, -+ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd) -+{ -+ struct iam_iterator it; -+ int result; -+ -+ iam_it_init(&it, c, IAM_IT_WRITE, pd); -+ -+ result = iam_it_get_exact(&it, k); -+ if (result == -ENOENT) -+ result = iam_it_rec_insert(h, &it, k, r); -+ else if (result == 0) -+ result = -EEXIST; -+ iam_it_put(&it); -+ iam_it_fini(&it); -+ return result; -+} -+EXPORT_SYMBOL(iam_insert); -+ -+int iam_update(handle_t *h, struct iam_container *c, -+ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd) -+{ -+ struct iam_iterator it; -+ int result; -+ -+ iam_it_init(&it, c, IAM_IT_WRITE, pd); -+ -+ result = iam_it_get_exact(&it, k); -+ if (result == 0) -+ iam_it_rec_set(h, &it, r); -+ iam_it_put(&it); -+ iam_it_fini(&it); -+ return result; -+} -+EXPORT_SYMBOL(iam_update); -+ -+/* -+ * Delete existing record with key @k. -+ * -+ * Return values: 0: success, -ENOENT: not-found, -ve: other error. -+ * -+ * postcondition: ergo(result == 0 || result == -ENOENT, -+ * !iam_lookup(c, k, *)); -+ */ -+int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k, -+ struct iam_path_descr *pd) -+{ -+ struct iam_iterator it; -+ int result; -+ -+ iam_it_init(&it, c, IAM_IT_WRITE, pd); -+ -+ result = iam_it_get_exact(&it, k); -+ if (result == 0) -+ iam_it_rec_delete(h, &it); -+ iam_it_put(&it); -+ iam_it_fini(&it); -+ return result; -+} -+EXPORT_SYMBOL(iam_delete); -+ -Index: iam/fs/ext3/namei.c -=================================================================== ---- iam.orig/fs/ext3/namei.c 2006-05-10 18:21:01.000000000 +0400 -+++ iam/fs/ext3/namei.c 2006-05-10 20:56:22.000000000 +0400 -@@ -24,81 +24,6 @@ - * Theodore Ts'o, 2002 - */ - --/* -- * iam: big theory statement. -- * -- * iam (Index Access Module) is a module providing abstraction of persistent -- * transactional container on top of generalized ext3 htree. -- * -- * iam supports: -- * -- * - key, pointer, and record size specifiable per container. -- * -- * - trees taller than 2 index levels. -- * -- * - read/write to existing ext3 htree directories as iam containers. -- * -- * iam container is a tree, consisting of leaf nodes containing keys and -- * records stored in this container, and index nodes, containing keys and -- * pointers to leaf or index nodes. -- * -- * iam does not work with keys directly, instead it calls user-supplied key -- * comparison function (->dpo_keycmp()). -- * -- * Pointers are (currently) interpreted as logical offsets (measured in -- * blocksful) within underlying flat file on top of which iam tree lives. -- * -- * On-disk format: -- * -- * iam mostly tries to reuse existing htree formats. -- * -- * Format of index node: -- * -- * +-----+-------+-------+-------+------+-------+------------+ -- * | | count | | | | | | -- * | gap | / | entry | entry | .... | entry | free space | -- * | | limit | | | | | | -- * +-----+-------+-------+-------+------+-------+------------+ -- * -- * gap this part of node is never accessed by iam code. It -- * exists for binary compatibility with ext3 htree (that, -- * in turn, stores fake struct ext2_dirent for ext2 -- * compatibility), and to keep some unspecified per-node -- * data. Gap can be different for root and non-root index -- * nodes. Gap size can be specified for each container -- * (gap of 0 is allowed). -- * -- * count/limit current number of entries in this node, and the maximal -- * number of entries that can fit into node. count/limit -- * has the same size as entry, and is itself counted in -- * count. -- * -- * entry index entry: consists of a key immediately followed by -- * a pointer to a child node. Size of a key and size of a -- * pointer depends on container. Entry has neither -- * alignment nor padding. -- * -- * free space portion of node new entries are added to -- * -- * Entries in index node are sorted by their key value. -- * -- * Format of leaf node: -- * -- * +-----+-------+-------+-------+------+-------+------------+ -- * | | count | | | | | | -- * | gap | / | leaf | leaf | .... | leaf | free space | -- * | | limit | | | | | | -- * +-----+-------+-------+-------+------+-------+------------+ -- -- * leaf For leaf entry: consists of a rec immediately followd by -- * a key. size of a key and size of a rec depends on container. -- * -- * -- * -- * -- * -- */ -- - #include - #include - #include -@@ -112,10 +37,10 @@ - #include - #include - #include +-/* +- * iam: big theory statement. +- * +- * iam (Index Access Module) is a module providing abstraction of persistent +- * transactional container on top of generalized ext3 htree. +- * +- * iam supports: +- * +- * - key, pointer, and record size specifiable per container. +- * +- * - trees taller than 2 index levels. +- * +- * - read/write to existing ext3 htree directories as iam containers. +- * +- * iam container is a tree, consisting of leaf nodes containing keys and +- * records stored in this container, and index nodes, containing keys and +- * pointers to leaf or index nodes. +- * +- * iam does not work with keys directly, instead it calls user-supplied key +- * comparison function (->dpo_keycmp()). +- * +- * Pointers are (currently) interpreted as logical offsets (measured in +- * blocksful) within underlying flat file on top of which iam tree lives. +- * +- * On-disk format: +- * +- * iam mostly tries to reuse existing htree formats. +- * +- * Format of index node: +- * +- * +-----+-------+-------+-------+------+-------+------------+ +- * | | count | | | | | | +- * | gap | / | entry | entry | .... | entry | free space | +- * | | limit | | | | | | +- * +-----+-------+-------+-------+------+-------+------------+ +- * +- * gap this part of node is never accessed by iam code. It +- * exists for binary compatibility with ext3 htree (that, +- * in turn, stores fake struct ext2_dirent for ext2 +- * compatibility), and to keep some unspecified per-node +- * data. Gap can be different for root and non-root index +- * nodes. Gap size can be specified for each container +- * (gap of 0 is allowed). +- * +- * count/limit current number of entries in this node, and the maximal +- * number of entries that can fit into node. count/limit +- * has the same size as entry, and is itself counted in +- * count. +- * +- * entry index entry: consists of a key immediately followed by +- * a pointer to a child node. Size of a key and size of a +- * pointer depends on container. Entry has neither +- * alignment nor padding. +- * +- * free space portion of node new entries are added to +- * +- * Entries in index node are sorted by their key value. +- * +- * Format of leaf node: +- * +- * +-----+-------+-------+-------+------+-------+------------+ +- * | | count | | | | | | +- * | gap | / | leaf | leaf | .... | leaf | free space | +- * | | limit | | | | | | +- * +-----+-------+-------+-------+------+-------+------------+ +- +- * leaf For leaf entry: consists of a rec immediately followd by +- * a key. size of a key and size of a rec depends on container. +- * +- * +- * +- * +- * +- */ +- + #include + #include + #include +@@ -112,10 +37,10 @@ + #include + #include + #include +#include #include "xattr.h" #include "iopen.h" @@ -1072,7 +109,7 @@ Index: iam/fs/ext3/namei.c { struct buffer_head *bh; -@@ -141,9 +66,6 @@ static struct buffer_head *ext3_append(h +@@ -141,9 +66,6 @@ return bh; } @@ -1082,7 +119,7 @@ Index: iam/fs/ext3/namei.c #ifndef swap #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0) -@@ -162,10 +84,6 @@ struct fake_dirent { +@@ -162,10 +84,6 @@ u8 file_type; }; @@ -1093,7 +130,7 @@ Index: iam/fs/ext3/namei.c /* * dx_root_info is laid out so that if it should somehow get overlaid by a -@@ -203,242 +121,10 @@ struct dx_map_entry +@@ -203,242 +121,10 @@ }; @@ -1336,7 +373,7 @@ Index: iam/fs/ext3/namei.c static void dx_set_key(struct iam_path *p, struct iam_entry *entry, struct iam_key *key); static unsigned dx_get_count(struct iam_entry *entries); -@@ -457,80 +143,29 @@ static void dx_sort_map(struct dx_map_en +@@ -457,80 +143,29 @@ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to, struct dx_map_entry *offsets, int count); static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size); @@ -1424,7 +461,7 @@ Index: iam/fs/ext3/namei.c return key; } -@@ -540,68 +175,69 @@ static inline struct iam_key *iam_key_at +@@ -540,68 +175,70 @@ return (struct iam_key *)entry; } @@ -1494,10 +531,24 @@ Index: iam/fs/ext3/namei.c +static u32 htree_root_ptr(struct iam_container *c); +static int htree_node_check(struct iam_path *path, struct iam_frame *frame); +static int htree_node_init(struct iam_container *c, struct buffer_head *bh, int root); -+static int htree_node_read(struct iam_container *c, iam_ptr_t ptr, -+ handle_t *handle, struct buffer_head **bh); +static int htree_keycmp(struct iam_container *c, + struct iam_key *k1, struct iam_key *k2); + +-static struct iam_entry *dx_get_entries(struct iam_path *path, void *data, +- int root) +-{ +- return data + +- (root ? +- path_descr(path)->id_root_gap : path_descr(path)->id_node_gap); +-} ++struct iam_operations htree_operation = { ++ .id_root_ptr = htree_root_ptr, ++ .id_node_check = htree_node_check, ++ .id_node_init = htree_node_init, ++ .id_node_read = iam_node_read, ++ .id_keycmp = htree_keycmp ++}; ++ +/* + * Parameters describing iam compatibility mode in which existing ext3 htrees + * can be manipulated. @@ -1507,22 +558,9 @@ Index: iam/fs/ext3/namei.c + .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs, + .id_node_gap = offsetof(struct dx_node, entries), + .id_root_gap = offsetof(struct dx_root, entries), -+ -+ .id_root_ptr = htree_root_ptr, -+ .id_node_check = htree_node_check, -+ .id_node_init = htree_node_init, -+ .id_node_read = htree_node_read, -+ .id_keycmp = htree_keycmp ++ .id_ops = &htree_operation +}; --static struct iam_entry *dx_get_entries(struct iam_path *path, void *data, -- int root) --{ -- return data + -- (root ? -- path_descr(path)->id_root_gap : path_descr(path)->id_node_gap); --} - -static struct iam_entry *dx_node_get_entries(struct iam_path *path, - struct iam_frame *frame) +static inline int dx_index_is_compat(struct iam_path *path) @@ -1536,7 +574,7 @@ Index: iam/fs/ext3/namei.c static int dx_node_check(struct iam_path *p, struct iam_frame *f) { struct iam_entry *e; -@@ -614,10 +250,10 @@ static int dx_node_check(struct iam_path +@@ -614,10 +251,10 @@ count = dx_get_count(e); e = iam_entry_shift(p, e, 1); for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) { @@ -1550,7 +588,7 @@ Index: iam/fs/ext3/namei.c return 0; } return 1; -@@ -636,13 +272,17 @@ static int htree_node_check(struct iam_p +@@ -636,13 +273,17 @@ data = frame->bh->b_data; entries = dx_node_get_entries(path, frame); @@ -1570,7 +608,7 @@ Index: iam/fs/ext3/namei.c if (root->info.hash_version > DX_HASH_MAX) { ext3_warning(sb, __FUNCTION__, "Unrecognised inode hash code %d", -@@ -669,15 +309,16 @@ static int htree_node_check(struct iam_p +@@ -669,15 +310,16 @@ root->info.info_length)); assert(dx_get_limit(entries) == dx_root_limit(path)); @@ -1594,7 +632,18 @@ Index: iam/fs/ext3/namei.c assert(dx_get_limit(entries) == dx_node_limit(path)); } frame->entries = frame->at = entries; -@@ -800,7 +441,7 @@ struct stats dx_show_entries(struct dx_h +@@ -697,8 +339,8 @@ + return 0; + } + +-static int htree_node_read(struct iam_container *c, iam_ptr_t ptr, +- handle_t *handle, struct buffer_head **bh) ++int iam_node_read(struct iam_container *c, iam_ptr_t ptr, ++ handle_t *handle, struct buffer_head **bh) + { + int result = 0; + +@@ -800,7 +442,7 @@ } #endif /* DX_DEBUG */ @@ -1603,7 +652,7 @@ Index: iam/fs/ext3/namei.c { u32 ptr; int err = 0; -@@ -810,7 +451,7 @@ static int dx_lookup(struct iam_path *pa +@@ -810,11 +452,11 @@ struct iam_frame *frame; struct iam_container *c; @@ -1612,7 +661,26 @@ Index: iam/fs/ext3/namei.c c = path->ip_container; for (frame = path->ip_frames, i = 0, -@@ -841,8 +482,8 @@ static int dx_lookup(struct iam_path *pa +- ptr = param->id_root_ptr(path->ip_container); ++ ptr = param->id_ops->id_root_ptr(path->ip_container); + i <= path->ip_indirect; + ptr = dx_get_block(path, frame->at), ++frame, ++i) { + struct iam_entry *entries; +@@ -823,10 +465,11 @@ + struct iam_entry *m; + unsigned count; + +- err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh); ++ err = param->id_ops->id_node_read(c, (iam_ptr_t)ptr, NULL, ++ &frame->bh); + if (err != 0) + break; +- err = param->id_node_check(path, frame); ++ err = param->id_ops->id_node_check(path, frame); + if (err != 0) + break; + +@@ -841,8 +484,8 @@ m = iam_entry_shift(path, p, iam_entry_diff(path, q, p) / 2); dxtrace(printk(".")); @@ -1623,7 +691,7 @@ Index: iam/fs/ext3/namei.c q = iam_entry_shift(path, m, -1); else p = iam_entry_shift(path, m, +1); -@@ -857,12 +498,12 @@ static int dx_lookup(struct iam_path *pa +@@ -857,12 +500,12 @@ while (n--) { dxtrace(printk(",")); at = iam_entry_shift(path, at, +1); @@ -1639,7 +707,7 @@ Index: iam/fs/ext3/namei.c path->ip_key_target)); } at = iam_entry_shift(path, at, -1); -@@ -891,508 +532,20 @@ static int dx_probe(struct dentry *dentr +@@ -891,508 +534,20 @@ struct dx_hash_info *hinfo, struct iam_path *path) { int err; @@ -2154,7 +1222,7 @@ Index: iam/fs/ext3/namei.c * This function increments the frame pointer to search the next leaf * block, and reads in the necessary intervening nodes if the search * should be necessary. Whether or not the search is necessary is -@@ -1409,8 +562,8 @@ EXPORT_SYMBOL(iam_update); +@@ -1409,8 +564,8 @@ * If start_hash is non-null, it will be filled in with the starting * hash of the next page. */ @@ -2165,7 +1233,7 @@ Index: iam/fs/ext3/namei.c { struct iam_frame *p; struct buffer_head *bh; -@@ -1445,7 +598,7 @@ static int ext3_htree_next_block(struct +@@ -1445,7 +600,7 @@ * desired contiuation hash. If it doesn't, return since * there's no point to read in the successive index pages. */ @@ -2174,21 +1242,21 @@ Index: iam/fs/ext3/namei.c if (start_hash) *start_hash = bhash; if ((hash & 1) == 0) { -@@ -1457,9 +610,10 @@ static int ext3_htree_next_block(struct +@@ -1457,9 +612,10 @@ * block so no check is necessary */ while (num_frames--) { - err = path_descr(path)->id_node_read(path->ip_container, - (iam_ptr_t)dx_get_block(path, p->at), - NULL, &bh); -+ err = iam_path_descr(path)-> ++ err = iam_path_descr(path)->id_ops-> + id_node_read(path->ip_container, + (iam_ptr_t)dx_get_block(path, p->at), + NULL, &bh); if (err != 0) return err; /* Failure */ ++p; -@@ -1662,8 +816,8 @@ static void dx_sort_map (struct dx_map_e +@@ -1662,8 +818,8 @@ } while(more); } @@ -2199,7 +1267,7 @@ Index: iam/fs/ext3/namei.c { struct iam_entry *entries = frame->entries; struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1); -@@ -1897,14 +1051,15 @@ static struct buffer_head * ext3_dx_find +@@ -1897,14 +1053,15 @@ if (*err != 0) return NULL; } else { @@ -2212,100 +1280,1304 @@ Index: iam/fs/ext3/namei.c block = dx_get_block(path, path->ip_frame->at); - *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block, - NULL, &bh); -+ *err = iam_path_descr(path)->id_node_read(path->ip_container, ++ *err = iam_path_descr(path)->id_ops->id_node_read(path->ip_container, + (iam_ptr_t)block, + NULL, &bh); if (*err != 0) goto errout; de = (struct ext3_dir_entry_2 *) bh->b_data; -@@ -2067,7 +1222,7 @@ static struct ext3_dir_entry_2 *do_split +@@ -2067,7 +1224,7 @@ struct buffer_head **bh,struct iam_frame *frame, struct dx_hash_info *hinfo, int *error) { - struct inode *dir = path_obj(path); + struct inode *dir = iam_path_obj(path); - unsigned blocksize = dir->i_sb->s_blocksize; - unsigned count, continued; - struct buffer_head *bh2; -@@ -2392,15 +1547,15 @@ static int ext3_add_entry (handle_t *han - } - - #ifdef CONFIG_EXT3_INDEX --static int split_index_node(handle_t *handle, struct iam_path *path) --{ -+int split_index_node(handle_t *handle, struct iam_path *path) + unsigned blocksize = dir->i_sb->s_blocksize; + unsigned count, continued; + struct buffer_head *bh2; +@@ -2392,15 +1549,15 @@ + } + + #ifdef CONFIG_EXT3_INDEX +-static int split_index_node(handle_t *handle, struct iam_path *path) +-{ ++int split_index_node(handle_t *handle, struct iam_path *path) ++{ + + struct iam_entry *entries; /* old block contents */ + struct iam_entry *entries2; /* new block contents */ + struct iam_frame *frame, *safe; + struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0}; + u32 newblock[DX_MAX_TREE_HEIGHT] = {0}; +- struct inode *dir = path_obj(path); ++ struct inode *dir = iam_path_obj(path); + int nr_splet; + int i, err; + +@@ -2442,7 +1599,8 @@ + for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) { + bh_new[i] = ext3_append (handle, dir, &newblock[i], &err); + if (!bh_new[i] || +- path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0) ++ iam_path_descr(path)->id_ops->id_node_init(path->ip_container, ++ bh_new[i], 0) != 0) + goto cleanup; + BUFFER_TRACE(frame->bh, "get_write_access"); + err = ext3_journal_get_write_access(handle, frame->bh); +@@ -2516,9 +1674,9 @@ + unsigned count1 = count/2, count2 = count - count1; + unsigned hash2; + +- dx_get_key(path, +- iam_entry_shift(path, entries, count1), +- (struct iam_key *)&hash2); ++ iam_get_key(path, ++ iam_entry_shift(path, entries, count1), ++ (struct iam_key *)&hash2); + + dxtrace(printk("Split index %i/%i\n", count1, count2)); + +@@ -2578,7 +1736,7 @@ + size_t isize; + + iam_path_compat_init(&cpath, dir); +- param = path_descr(path); ++ param = iam_path_descr(path); + + err = dx_probe(dentry, NULL, &hinfo, path); + if (err != 0) +@@ -2588,8 +1746,9 @@ + /* XXX nikita: global serialization! */ + isize = dir->i_size; + +- err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at), +- handle, &bh); ++ err = param->id_ops->id_node_read(path->ip_container, ++ (iam_ptr_t)dx_get_block(path, frame->at), ++ handle, &bh); + if (err != 0) + goto cleanup; + +@@ -2724,12 +1883,12 @@ + * is so far negative - it has no inode. + * + * If the create succeeds, we fill in the inode information +- * with d_instantiate(). ++ * with d_instantiate(). + */ + static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, + struct nameidata *nd) + { +- handle_t *handle; ++ handle_t *handle; + struct inode * inode; + int err, retries = 0; + +Index: linux-2.6.9/fs/ext3/iam.c +=================================================================== +--- linux-2.6.9.orig/fs/ext3/iam.c 2006-05-24 19:52:06.500448688 +0800 ++++ linux-2.6.9/fs/ext3/iam.c 2006-05-24 17:47:34.000000000 +0800 +@@ -0,0 +1,1183 @@ ++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- ++ * vim:expandtab:shiftwidth=8:tabstop=8: ++ * ++ * iam.c ++ * Top-level entry points into osd module ++ * ++ * Copyright (c) 2006 Cluster File Systems, Inc. ++ * Author: Wang Di ++ * Author: Nikita Danilov ++ * ++ * This file is part of the Lustre file system, http://www.lustre.org ++ * Lustre is a trademark of Cluster File Systems, Inc. ++ * ++ * You may have signed or agreed to another license before downloading ++ * this software. If so, you are bound by the terms and conditions ++ * of that agreement, and the following does not apply to you. See the ++ * LICENSE file included with this distribution for more information. ++ * ++ * If you did not agree to a different license, then this copy of Lustre ++ * is open source software; you can redistribute it and/or modify it ++ * under the terms of version 2 of the GNU General Public License as ++ * published by the Free Software Foundation. ++ * ++ * In either case, Lustre is distributed in the hope that it will be ++ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty ++ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * license text for more details. ++ */ ++ ++/* ++ * iam: big theory statement. ++ * ++ * iam (Index Access Module) is a module providing abstraction of persistent ++ * transactional container on top of generalized ext3 htree. ++ * ++ * iam supports: ++ * ++ * - key, pointer, and record size specifiable per container. ++ * ++ * - trees taller than 2 index levels. ++ * ++ * - read/write to existing ext3 htree directories as iam containers. ++ * ++ * iam container is a tree, consisting of leaf nodes containing keys and ++ * records stored in this container, and index nodes, containing keys and ++ * pointers to leaf or index nodes. ++ * ++ * iam does not work with keys directly, instead it calls user-supplied key ++ * comparison function (->dpo_keycmp()). ++ * ++ * Pointers are (currently) interpreted as logical offsets (measured in ++ * blocksful) within underlying flat file on top of which iam tree lives. ++ * ++ * On-disk format: ++ * ++ * iam mostly tries to reuse existing htree formats. ++ * ++ * Format of index node: ++ * ++ * +-----+-------+-------+-------+------+-------+------------+ ++ * | | count | | | | | | ++ * | gap | / | entry | entry | .... | entry | free space | ++ * | | limit | | | | | | ++ * +-----+-------+-------+-------+------+-------+------------+ ++ * ++ * gap this part of node is never accessed by iam code. It ++ * exists for binary compatibility with ext3 htree (that, ++ * in turn, stores fake struct ext2_dirent for ext2 ++ * compatibility), and to keep some unspecified per-node ++ * data. Gap can be different for root and non-root index ++ * nodes. Gap size can be specified for each container ++ * (gap of 0 is allowed). ++ * ++ * count/limit current number of entries in this node, and the maximal ++ * number of entries that can fit into node. count/limit ++ * has the same size as entry, and is itself counted in ++ * count. ++ * ++ * entry index entry: consists of a key immediately followed by ++ * a pointer to a child node. Size of a key and size of a ++ * pointer depends on container. Entry has neither ++ * alignment nor padding. ++ * ++ * free space portion of node new entries are added to ++ * ++ * Entries in index node are sorted by their key value. ++ * ++ * Format of a leaf node is not specified. Generic iam code accesses leaf ++ * nodes through ->id_leaf methods in struct iam_descr. ++ * ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include ++#include ++ ++#include "xattr.h" ++#include "iopen.h" ++#include "acl.h" ++ ++static int iam_leaf_at_end(struct iam_container *c, struct iam_leaf *l); ++static void iam_leaf_next(struct iam_container *c, struct iam_leaf *folio); ++ ++static inline int iam_lfix_entry_size(struct iam_container *c) ++{ ++ return iam_container_descr(c)->id_key_size + ++ iam_container_descr(c)->id_rec_size; ++} ++ ++static inline struct iam_lentry * ++iam_lentry_shift(struct iam_container *c, struct iam_lentry *entry, int shift) ++{ ++ void *e = entry; ++ return e + shift * iam_lfix_entry_size(c); ++} ++ ++static inline struct iam_key * ++iam_leaf_key_at(struct iam_container *c, struct iam_lentry *entry) ++{ ++ void *e = entry; ++ return e; ++} ++ ++static struct iam_lentry * ++iam_get_lentries(struct iam_container *c, void *data) ++{ ++ return data + iam_container_descr(c)->id_node_gap; ++} ++ ++static int lentry_count_get (struct iam_leaf *leaf) ++{ ++ struct iam_lentry *lentry = leaf->il_entries; ++ return le16_to_cpu(((struct iam_leaf_head *)lentry)->ill_count); ++} ++ ++static void lentry_count_set (struct iam_leaf *leaf, unsigned count) ++{ ++ struct iam_lentry *lentry = leaf->il_entries; ++ ((struct iam_leaf_head *)lentry)->ill_count = cpu_to_le16(count); ++} ++ ++/* ++ * Helper function returning scratch key. ++ */ ++static struct iam_key *it_scratch_key(struct iam_iterator *it, int n) ++{ ++ return iam_path_key(&it->ii_path, n); ++} ++ ++static struct iam_container *iam_it_container(struct iam_iterator *it) ++{ ++ return it->ii_path.ip_container; ++} ++ ++static inline int it_keycmp(struct iam_iterator *it, ++ struct iam_key *k1, struct iam_key *k2) ++{ ++ return iam_keycmp(iam_it_container(it), k1, k2); ++} ++ ++/*This func is for flat key, for those keys, ++ *which are not stored explicitly ++ *it would be decrypt in the key buffer ++ */ ++struct iam_key* ++iam_generic_leaf_flat_key(struct iam_container *c, struct iam_leaf *l, ++ struct iam_key *key) ++{ ++ void *ie = l->il_at; ++ return (struct iam_key*)ie; ++} ++ ++static void ++iam_generic_leaf_start(struct iam_container *c, struct iam_leaf *l) ++{ ++ l->il_at = iam_get_lentries(c, l->il_bh->b_data); ++} ++ ++static inline ptrdiff_t iam_lfix_diff(struct iam_path *p, struct iam_lentry *e1, ++ struct iam_lentry *e2) ++{ ++ ptrdiff_t diff; ++ ++ diff = (void *)e1 - (void *)e2; ++ assert(diff / iam_lfix_entry_size(p->ip_container) * ++ iam_lfix_entry_size(p->ip_container) == diff); ++ return diff / iam_lfix_entry_size(p->ip_container); ++} ++ ++int iam_lfix_init(struct iam_container *c, struct iam_leaf *l) ++{ ++ struct iam_leaf_head *ill; ++ ++ assert(l->il_bh != NULL); ++ ++ ill = (struct iam_leaf_head*)l->il_bh->b_data; ++ assert(ill->ill_magic == IAM_LEAF_HEADER_MAGIC); ++ ++ l->il_at = l->il_entries = iam_get_lentries(c, l->il_bh->b_data); ++ return 0; ++} ++ ++void iam_lfix_fini(struct iam_container *c, struct iam_leaf *l) ++{ ++ l->il_entries = l->il_at = NULL; ++ return; ++} ++ ++static struct iam_lentry * ++iam_lfix_get_end(struct iam_container *c, struct iam_leaf *l) ++{ ++ int count = lentry_count_get(l); ++ struct iam_lentry *ile = iam_lentry_shift(c, l->il_entries, count); ++ ++ return ile; ++} ++ ++struct iam_rec* ++iam_lfix_rec(struct iam_container *c, struct iam_leaf *l) ++{ ++ void *e = l->il_at; ++ return e + iam_container_descr(c)->id_key_size; ++} ++ ++static void ++iam_lfix_next(struct iam_container *c, struct iam_leaf *l) ++{ ++ assert(!iam_leaf_at_end(c, l)); ++ l->il_at = iam_lentry_shift(c, l->il_at, 1); ++} ++ ++static int ++iam_lfix_lookup(struct iam_container *c, struct iam_path *path, ++ struct iam_leaf *l, struct iam_key *k) ++{ ++ struct iam_lentry *p, *q, *m; ++ int count; ++ ++ count = lentry_count_get(l); ++ ++ p = iam_lentry_shift(c, l->il_entries, 1); ++ q = iam_lentry_shift(c, l->il_entries, count - 1); ++ ++ while (p < q) { ++ m = iam_lentry_shift(c, p, iam_lfix_diff(path, q, p) / 2); ++ if (iam_keycmp(c, iam_leaf_key_at(c, m), k) >= 0) ++ q = iam_lentry_shift(c, m, -1); ++ else ++ p = iam_lentry_shift(c, m, +1); ++ } ++ l->il_at = q; ++ iam_keycpy(c, iam_path_key(path, 0), iam_leaf_key_at(c, q)); ++ ++ if (iam_keycmp(c, iam_leaf_key_at(c, q), k) != 0) ++ return -ENOENT; ++ return 0; ++} ++ ++static void iam_lfix_rec_add (struct iam_path *path, ++ struct iam_key *k, struct iam_rec *r) ++{ ++ struct iam_lentry *end, *next, *nnext; ++ int count; ++ ptrdiff_t diff; ++ ++ count = lentry_count_get(&path->ip_leaf); ++ end = iam_lfix_get_end(path->ip_container, &path->ip_leaf); ++ next = iam_lentry_shift(path->ip_container, path->ip_leaf.il_at, 1); ++ nnext = iam_lentry_shift(path->ip_container, next, 1); ++ ++ diff = (void *)end - (void *)next; ++ memmove(next, nnext, diff); ++ ++ lentry_count_set(&path->ip_leaf, count + 1); ++} ++ ++static void iam_lfix_rec_del(struct iam_path *path) ++{ ++ struct iam_lentry *next, *end; ++ int count; ++ ptrdiff_t diff; ++ ++ count = lentry_count_get(&path->ip_leaf); ++ end = iam_lfix_get_end(path->ip_container, &path->ip_leaf); ++ next = iam_lentry_shift(path->ip_container, path->ip_leaf.il_at, 1); ++ diff = (void *)end - (void *)next; ++ memmove(path->ip_leaf.il_at, next, diff); ++ ++ lentry_count_set(&path->ip_leaf, count - 1); ++} ++ ++static int iam_lfix_can_add (struct iam_container *c, struct iam_leaf *l, ++ struct iam_key *k, struct iam_rec *r) ++{ ++ struct iam_lentry *end; ++ int block_size = c->ic_object->i_sb->s_blocksize; ++ unsigned long left, entry_size; ++ ++ end = iam_lfix_get_end(c, l); ++ ++ left = block_size - iam_container_descr(c)->id_node_gap; ++ ++ left -= (unsigned long)((void*)end - (void*)l->il_entries); ++ ++ entry_size = iam_lfix_entry_size(c); ++ ++ if (left >= entry_size) ++ return 1; ++ ++ return 0; ++} ++ ++static int iam_lfix_at_end(struct iam_container *c, struct iam_leaf *folio) ++{ ++ struct iam_lentry *ile = iam_lfix_get_end(c, folio); ++ ++ return (folio->il_at == ile); ++} ++ ++struct iam_leaf_operations lfix_leaf_ops = { ++ .init = iam_lfix_init, ++ .fini = iam_lfix_fini, ++ .start = iam_generic_leaf_start, ++ .next = iam_lfix_next, ++ .key = iam_generic_leaf_flat_key, ++ .rec = iam_lfix_rec, ++ .lookup = iam_lfix_lookup, ++ .at_end = iam_lfix_at_end, ++ .rec_add = iam_lfix_rec_add, ++ .rec_del = iam_lfix_rec_del, ++ .can_add = iam_lfix_can_add ++}; ++EXPORT_SYMBOL(lfix_leaf_ops); ++ ++static __u32 iam_root_ptr(struct iam_container *c) ++{ ++ return 0; ++} ++ ++static int iam_node_init(struct iam_container *c, struct buffer_head *bh, ++ int root) ++{ ++ return 0; ++} ++ ++static int iam_node_check(struct iam_path *path, struct iam_frame *frame) ++{ ++ struct iam_entry *entries; ++ void *data; ++ entries = dx_node_get_entries(path, frame); ++ ++ data = frame->bh->b_data; ++ ++ if (frame == path->ip_frames) { ++ struct iam_root *root; ++ ++ root = data; ++ path->ip_indirect = root->info.indirect_levels; ++ } ++ frame->entries = frame->at = entries; ++ return 0; ++} ++ ++static int iam_node_create(struct iam_container *c) ++{ ++ return 0; ++} ++ ++struct iam_operations generic_iam_ops = { ++ .id_root_ptr = iam_root_ptr, ++ .id_node_read = iam_node_read, ++ .id_node_init = iam_node_init, ++ .id_node_check = iam_node_check, ++ .id_create = iam_node_create, ++}; ++EXPORT_SYMBOL(generic_iam_ops); ++ ++static inline void iam_reccpy(struct iam_path *p, struct iam_rec *rec_dst, ++ struct iam_rec *rec_src) ++{ ++ memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size); ++} ++ ++/* ++ * Return pointer to current leaf record. Pointer is valid while corresponding ++ * leaf node is locked and pinned. ++ */ ++struct iam_rec *iam_leaf_rec(struct iam_container *c, struct iam_leaf *leaf) ++{ ++ return c->ic_descr->id_leaf_ops->rec(c, leaf); ++} ++ ++/* ++ * Return pointer to the current leaf key. This function may return either ++ * pointer to the key stored in node, or copy key into @key buffer supplied by ++ * caller and return pointer to this buffer. The latter approach is used when ++ * keys in nodes are not stored in plain form (e.g., htree doesn't store keys ++ * at all). ++ * ++ * Caller should assume that returned pointer is only valid while leaf node is ++ * pinned and locked. ++ */ ++struct iam_key *iam_leaf_key(struct iam_container *c, struct iam_leaf *leaf, ++ struct iam_key *key) ++{ ++ return c->ic_descr->id_leaf_ops->key(c, leaf, key); ++} ++ ++/* ++ * Initialize container @c, acquires additional reference on @inode. ++ */ ++int iam_container_init(struct iam_container *c, ++ struct iam_descr *descr, struct inode *inode) ++{ ++ memset(c, 0, sizeof *c); ++ c->ic_descr = descr; ++ c->ic_object = igrab(inode); ++ if (c->ic_object != NULL) ++ return 0; ++ else ++ return -ENOENT; ++} ++EXPORT_SYMBOL(iam_container_init); ++ ++/* ++ * Finalize container @c, release all resources. ++ */ ++void iam_container_fini(struct iam_container *c) ++{ ++ if (c->ic_object != NULL) { ++ iput(c->ic_object); ++ c->ic_object = NULL; ++ } ++} ++EXPORT_SYMBOL(iam_container_fini); ++ ++void iam_path_init(struct iam_path *path, struct iam_container *c, ++ struct iam_path_descr *pd) ++{ ++ memset(path, 0, sizeof *path); ++ path->ip_container = c; ++ path->ip_frame = path->ip_frames; ++ path->ip_data = pd; ++} ++ ++static void iam_leaf_fini(struct iam_path *path); ++void iam_path_fini(struct iam_path *path) ++{ ++ int i; ++ ++ iam_leaf_fini(path); ++ for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) { ++ if (path->ip_frames[i].bh != NULL) { ++ brelse(path->ip_frames[i].bh); ++ path->ip_frames[i].bh = NULL; ++ } ++ } ++} ++ ++extern struct iam_descr htree_compat_param; ++void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode) ++{ ++ int i; ++ ++ for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i) ++ path->ipc_descr.ipd_key_scratch[i] = ++ (struct iam_key *)&path->ipc_scratch[i]; ++ ++ iam_container_init(&path->ipc_container, &htree_compat_param, inode); ++ /* ++ * XXX hack allowing finalization of iam_path_compat with ++ * iam_path_fini(). ++ */ ++ iput(inode); ++ iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr); ++} ++ ++void iam_path_compat_fini(struct iam_path_compat *path) ++{ ++ iam_path_fini(&path->ipc_path); ++ iam_container_fini(&path->ipc_container); ++} ++ ++static int iam_leaf_load(struct iam_path *path) ++{ ++ int block; ++ int err; ++ struct iam_container *c; ++ struct buffer_head *bh; ++ struct iam_leaf *leaf; ++ struct iam_descr *descr; ++ ++ c = path->ip_container; ++ leaf = &path->ip_leaf; ++ descr = iam_path_descr(path); ++ block = dx_get_block(path, path->ip_frame->at); ++ err = descr->id_ops->id_node_read(c, block, NULL, &bh); ++ if (err == 0) { ++ leaf->il_bh = bh; ++ err = descr->id_leaf_ops->init(c, leaf); ++ } ++ return err; ++} ++ ++static void iam_leaf_fini(struct iam_path *path) ++{ ++ iam_path_descr(path)->id_leaf_ops->fini(path->ip_container, ++ &path->ip_leaf); ++ if (path && path->ip_leaf.il_bh) { ++ brelse(path->ip_leaf.il_bh); ++ path->ip_leaf.il_bh = NULL; ++ } ++} ++ ++static void iam_leaf_start(struct iam_container *c, struct iam_leaf *folio) ++{ ++ c->ic_descr->id_leaf_ops->start(c, folio); ++} ++ ++static void iam_leaf_next(struct iam_container *c, struct iam_leaf *folio) ++{ ++ c->ic_descr->id_leaf_ops->next(c, folio); ++} ++ ++static void iam_rec_add (struct iam_path *path, struct iam_key *key, ++ struct iam_rec *rec) ++{ ++ iam_path_descr(path)->id_leaf_ops->rec_add(path, key, rec); ++} ++ ++static void iam_rec_del (struct iam_path *path) ++{ ++ iam_path_descr(path)->id_leaf_ops->rec_del(path); ++} ++ ++static int iam_leaf_at_end(struct iam_container *c, struct iam_leaf *l) ++{ ++ return iam_container_descr(c)->id_leaf_ops->at_end(c, l); ++} ++/* ++ * Helper wrapper around iam_it_get(): returns 0 (success) only when record ++ * with exactly the same key as asked is found. ++ */ ++static int iam_it_get_exact(struct iam_iterator *it, struct iam_key *k) ++{ ++ int result; ++ ++ result = iam_it_get(it, k); ++ if (result == 0 && ++ (it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 0))) != 0)) ++ /* ++ * Return -ENOENT if cursor is located above record with a key ++ * different from one specified. ++ * ++ * XXX returning -ENOENT only works if iam_it_get never ++ * returns -ENOENT as a legitimate error. ++ */ ++ result = -ENOENT; ++ return result; ++} ++ ++/***********************************************************************/ ++/* iterator interface */ ++/***********************************************************************/ ++ ++static enum iam_it_state it_state(struct iam_iterator *it) ++{ ++ return it->ii_state; ++} ++ ++void iam_container_write_lock(struct iam_container *ic) ++{ ++ down(&ic->ic_object->i_sem); ++} ++ ++void iam_container_write_unlock(struct iam_container *ic) ++{ ++ up(&ic->ic_object->i_sem); ++} ++ ++void iam_container_read_lock(struct iam_container *ic) ++{ ++ down(&ic->ic_object->i_sem); ++} ++ ++void iam_container_read_unlock(struct iam_container *ic) ++{ ++ up(&ic->ic_object->i_sem); ++} ++ ++static void iam_it_lock(struct iam_iterator *it) ++{ ++ if (it->ii_flags&IAM_IT_WRITE) ++ iam_container_write_lock(iam_it_container(it)); ++ else ++ iam_container_read_lock(iam_it_container(it)); ++} ++ ++static void iam_it_unlock(struct iam_iterator *it) ++{ ++ if (it->ii_flags&IAM_IT_WRITE) ++ iam_container_write_unlock(iam_it_container(it)); ++ else ++ iam_container_read_unlock(iam_it_container(it)); ++} ++ ++/* ++ * Initialize iterator to IAM_IT_DETACHED state. ++ * ++ * postcondition: it_state(it) == IAM_IT_DETACHED ++ */ ++int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags, ++ struct iam_path_descr *pd) ++{ ++ memset(it, 0, sizeof *it); ++ it->ii_flags = flags; ++ it->ii_state = IAM_IT_DETACHED; ++ iam_path_init(&it->ii_path, c, pd); ++ return 0; ++} ++ ++/* ++ * Finalize iterator and release all resources. ++ * ++ * precondition: it_state(it) == IAM_IT_DETACHED ++ */ ++void iam_it_fini(struct iam_iterator *it) ++{ ++ assert(it_state(it) == IAM_IT_DETACHED); ++ iam_path_fini(&it->ii_path); ++} ++ ++int iam_path_lookup(struct iam_path *path) ++{ ++ struct iam_container *c; ++ struct iam_descr *descr; ++ struct iam_leaf *leaf; ++ int result; ++ ++ c = path->ip_container; ++ leaf = &path->ip_leaf; ++ descr = iam_path_descr(path); ++ result = dx_lookup(path); ++ if (result == 0) { ++ result = iam_leaf_load(path); ++ if (result == 0) ++ result = descr->id_leaf_ops->lookup(c, path, leaf, ++ path->ip_key_target); ++ } ++ return result; ++} ++ ++/* ++ * Attach iterator. After successful completion, @it points to record with ++ * smallest key not larger than @k. ++ * ++ * Return value: 0: positioned on existing record, ++ * -ve: error. ++ * ++ * precondition: it_state(it) == IAM_IT_DETACHED ++ * postcondition: ergo(result == 0, ++ * (it_state(it) == IAM_IT_ATTACHED && ++ * it_keycmp(it, iam_it_key_get(it, *), k) < 0)) ++ */ ++int iam_it_get(struct iam_iterator *it, struct iam_key *k) ++{ ++ int result; ++ assert(it_state(it) == IAM_IT_DETACHED); ++ ++ it->ii_path.ip_key_target = k; ++ iam_it_lock(it); ++ result = iam_path_lookup(&it->ii_path); ++ if (result == 0 || result == -ENOENT) ++ it->ii_state = IAM_IT_ATTACHED; ++ else ++ iam_it_unlock(it); ++ assert(ergo(result == 0, ++ it_keycmp(it, ++ iam_it_key_get(it, it_scratch_key(it, 0)), ++ k) <= 0)); ++ return result; ++} ++ ++/* ++ * Duplicates iterator. ++ * ++ * postcondition: it_state(dst) == it_state(src) && ++ * iam_it_container(dst) == iam_it_container(src) && ++ * dst->ii_flags = src->ii_flags && ++ * ergo(it_state(src) == IAM_IT_ATTACHED, ++ * iam_it_rec_get(dst) == iam_it_rec_get(src) && ++ * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2)) ++ */ ++void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src) ++{ ++ dst->ii_flags = src->ii_flags; ++ dst->ii_state = src->ii_state; ++ /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */ ++ /* ++ * XXX: duplicate lock. ++ */ ++ assert(it_state(dst) == it_state(src)); ++ assert(iam_it_container(dst) == iam_it_container(src)); ++ assert(dst->ii_flags = src->ii_flags); ++ assert(ergo(it_state(src) == IAM_IT_ATTACHED, ++ iam_it_rec_get(dst) == iam_it_rec_get(src) && ++ iam_it_key_get(dst, it_scratch_key(dst, 0)) == ++ iam_it_key_get(src, it_scratch_key(src, 0)))); ++ ++} ++/* ++ * Detach iterator. Does nothing it detached state. ++ * ++ * postcondition: it_state(it) == IAM_IT_DETACHED ++ */ ++void iam_it_put(struct iam_iterator *it) ++{ ++ if (it->ii_state == IAM_IT_ATTACHED) { ++ it->ii_state = IAM_IT_DETACHED; ++ iam_leaf_fini(&it->ii_path); ++ iam_it_unlock(it); ++ } ++} ++ ++/* ++ * Move iterator one record right. ++ * ++ * Return value: 0: success, ++ * +1: end of container reached ++ * -ve: error ++ * ++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE ++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) ++ */ ++int iam_it_next(struct iam_iterator *it) ++{ ++ int result; ++ struct iam_container *c; ++ ++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE); ++ ++ c = iam_it_container(it); ++ if (iam_leaf_at_end(c, &it->ii_path.ip_leaf)) { ++ /* advance index portion of the path */ ++ result = 0; /* XXX not yet iam_index_next(&it->ii_path); */ ++ if (result == 0) { ++ result = 0; /* XXX not yet iam_read_leaf(&it->ii_path); */ ++ if (result == 0) ++ iam_leaf_start(c, &it->ii_path.ip_leaf); ++ } else if (result > 0) ++ /* end of container reached */ ++ result = +1; ++ if (result < 0) ++ iam_it_put(it); ++ } else { ++ /* advance within leaf node */ ++ iam_leaf_next(c, &it->ii_path.ip_leaf); ++ result = 0; ++ } ++ assert(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)); ++ return result; ++} ++ ++/* ++ * Return pointer to the record under iterator. ++ * ++ * precondition: it_state(it) == IAM_IT_ATTACHED ++ * postcondition: it_state(it) == IAM_IT_ATTACHED ++ */ ++struct iam_rec *iam_it_rec_get(struct iam_iterator *it) ++{ ++ assert(it_state(it) == IAM_IT_ATTACHED); ++ return iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf); ++} ++ ++static void iam_it_reccpy(struct iam_iterator *it, struct iam_rec *r) ++{ ++ memcpy(iam_leaf_rec(iam_it_container(it), &it->ii_path.ip_leaf), r, ++ iam_it_container(it)->ic_descr->id_rec_size); ++} ++ ++/* ++ * Replace contents of record under iterator. ++ * ++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE ++ * postcondition: it_state(it) == IAM_IT_ATTACHED && ++ * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...)) ++ */ ++int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r) ++{ ++ int result; ++ ++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE); ++ ++ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh); ++ if (result == 0) ++ iam_it_reccpy(it, r); ++ return result; ++} ++ ++/* ++ * Return pointer to the key under iterator. ++ * ++ * precondition: it_state(it) == IAM_IT_ATTACHED ++ * postcondition: it_state(it) == IAM_IT_ATTACHED ++ */ ++struct iam_key *iam_it_key_get(struct iam_iterator *it, struct iam_key *k) ++{ ++ assert(it_state(it) == IAM_IT_ATTACHED); ++ return iam_leaf_key(iam_it_container(it), &it->ii_path.ip_leaf, k); ++} ++ ++static int iam_leaf_rec_add(handle_t *handle, struct iam_path *path) ++{ ++ int err; ++ ++ iam_rec_add(path, NULL, NULL); ++ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh); ++ if (err) ++ ext3_std_error(iam_path_obj(path)->i_sb, err); ++ return err; ++} ++ ++static int iam_leaf_can_add (struct iam_container *c, struct iam_leaf *l, ++ struct iam_key *k, struct iam_rec *r) ++{ ++ return iam_container_descr(c)->id_leaf_ops->can_add(c, l, k, r); ++} ++ ++static int split_leaf_node(handle_t *handle, struct iam_path *path) ++{ ++#if 0 ++ struct inode *dir = iam_path_obj(path); ++ unsigned continued = 0; ++ struct buffer_head *bh2; ++ u32 newblock, hash_split; ++ char *data2; ++ unsigned split; ++ int err; ++ ++ bh2 = ext3_append (handle, dir, &newblock, &err); ++ if (!(bh2)) ++ return -ENOSPC; ++ ++ err = iam_leaf_load(path); ++ if (err) ++ goto errout; ++ ++ BUFFER_TRACE(path->ip_leaf.il_bh, "get_write_access"); ++ err = ext3_journal_get_write_access(handle, path->ip_leaf.il_bh); ++ if (err) { ++ journal_error: ++ iam_leaf_fini(path); ++ brelse(bh2); ++ ext3_std_error(dir->i_sb, err); ++ err = -EIO; ++ goto errout; ++ } ++ data2 = bh2->b_data; ++ split = dx_get_count((struct iam_entry*)iam_leaf_entries(path))/2; ++ hash_split = *(__u32*)iam_leaf_key_at(path, ++ iam_lentry_shift(path, iam_leaf_entries(path), ++ split)); ++ if (iam_keycmp(path->ip_container, iam_leaf_key_at(path, ++ iam_lentry_shift(path, iam_leaf_entries(path), split)), ++ iam_leaf_key_at(path, ++ iam_lentry_shift(path, iam_leaf_entries(path), split -1))) == 0) ++ continued = 1; ++ ++ memcpy(iam_lentry_shift(path, (struct iam_lentry *)data2, 1), ++ iam_lentry_shift(path, iam_leaf_entries(path), split), ++ split * iam_lfix_entry_size(path)); ++ ++ /* Which block gets the new entry? */ ++ dx_insert_block(path, path->ip_frame, hash_split + continued, newblock); ++ err = ext3_journal_dirty_metadata (handle, bh2); ++ if (err) ++ goto journal_error; ++ err = ext3_journal_dirty_metadata (handle, path->ip_leaf.il_bh); ++ if (err) ++ goto journal_error; ++errout: ++ brelse (bh2); ++ return err; ++#endif ++ return 0; ++} ++ ++int iam_add_rec(handle_t *handle, struct iam_path *path, ++ struct iam_key *k, struct iam_rec *r) ++{ ++ int err; ++ ++ if (!iam_leaf_can_add(path->ip_container, &path->ip_leaf, k, r)) { ++ err = iam_leaf_rec_add(handle, path); ++ } else { ++ err = split_index_node(handle, path); ++ if (err == 0) { ++ err = split_leaf_node(handle, path); ++ if (err == 0) ++ err = iam_leaf_rec_add(handle, path); ++ } ++ } ++ return err; ++} ++ ++/* ++ * Insert new record with key @k and contents from @r, shifting records to the ++ * right. ++ * ++ * precondition: it_state(it) == IAM_IT_ATTACHED && ++ * it->ii_flags&IAM_IT_WRITE && ++ * it_keycmp(it, iam_it_key_get(it, *), k) < 0 ++ * postcondition: it_state(it) == IAM_IT_ATTACHED && ++ * ergo(result == 0, ++ * it_keycmp(it, iam_it_key_get(it, *), k) == 0 && ++ * !memcmp(iam_it_rec_get(it), r, ...)) ++ */ ++int iam_it_rec_insert(handle_t *h, struct iam_iterator *it, ++ struct iam_key *k, struct iam_rec *r) ++{ ++ int result; ++ ++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE); ++ assert(it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), k) < 0); ++ ++ result = iam_add_rec(h, &it->ii_path, k, r); ++ if (result == 0) { ++ /* place record and key info freed space. Leaf node is already ++ * in transaction. */ ++ iam_it_reccpy(it, r); ++ /* ++ * XXX TBD. ++ */ ++ } ++ assert(it_state(it) == IAM_IT_ATTACHED); ++ assert(ergo(result == 0, ++ it_keycmp(it, ++ iam_it_key_get(it, ++ it_scratch_key(it, 0)), k) == 0 && ++ !memcmp(iam_it_rec_get(it), r, ++ iam_it_container(it)->ic_descr->id_rec_size))); ++ return result; ++} ++ ++static int iam_leaf_rec_remove(handle_t *handle, struct iam_container *c, ++ struct iam_path *path) ++{ ++ int err; ++ ++ iam_rec_del(path); ++ err = ext3_journal_dirty_metadata(handle, path->ip_leaf.il_bh); ++ if (err) ++ ext3_std_error(iam_path_obj(path)->i_sb, err); ++ return err; ++} ++ ++/* ++ * Delete record under iterator. ++ * ++ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE ++ * postcondition: it_state(it) == IAM_IT_ATTACHED ++ */ ++int iam_it_rec_delete(handle_t *h, struct iam_iterator *it) ++{ ++ int result; ++ ++ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE); ++ ++ result = ext3_journal_get_write_access(h, it->ii_path.ip_leaf.il_bh); ++ /* ++ * no compaction for now. ++ */ ++ if (result == 0) ++ iam_leaf_rec_remove(h, iam_it_container(it), &it->ii_path); ++ ++ return result; ++} ++ ++/* ++ * Convert iterator to cookie. ++ * ++ * precondition: it_state(it) == IAM_IT_ATTACHED && ++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) ++ * postcondition: it_state(it) == IAM_IT_ATTACHED ++ */ ++iam_pos_t iam_it_store(struct iam_iterator *it) ++{ ++ iam_pos_t result; ++ ++ assert(it_state(it) == IAM_IT_ATTACHED); ++ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof result); ++ ++ result = 0; ++ iam_it_key_get(it, (struct iam_key *)&result); ++ return result; ++} ++ ++/* ++ * Restore iterator from cookie. ++ * ++ * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE && ++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) ++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED && ++ * iam_it_store(it) == pos) ++ */ ++int iam_it_load(struct iam_iterator *it, iam_pos_t pos) ++{ ++ assert(it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE); ++ assert(iam_it_container(it)->ic_descr->id_key_size <= sizeof pos); ++ return iam_it_get(it, (struct iam_key *)&pos); ++} ++ ++/***********************************************************************/ ++/* invariants */ ++/***********************************************************************/ ++ ++static inline int ptr_inside(void *base, size_t size, void *ptr) ++{ ++ return (base <= ptr) && (ptr < base + size); ++} ++ ++int iam_frame_invariant(struct iam_frame *f) ++{ ++ return ++ (f->bh != NULL && ++ f->bh->b_data != NULL && ++ ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) && ++ ptr_inside(f->bh->b_data, f->bh->b_size, f->at) && ++ f->entries <= f->at); ++} ++int iam_leaf_invariant(struct iam_leaf *l) ++{ ++ return ++ l->il_bh != NULL && ++ l->il_bh->b_data != NULL && ++ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) && ++ ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) && ++ l->il_entries <= l->il_at; ++} ++ ++int iam_path_invariant(struct iam_path *p) ++{ ++ int i; ++ ++ if (p->ip_container == NULL || ++ p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 || ++ p->ip_frame != p->ip_frames + p->ip_indirect || ++ !iam_leaf_invariant(&p->ip_leaf)) ++ return 0; ++ for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) { ++ if (i <= p->ip_indirect) { ++ if (!iam_frame_invariant(&p->ip_frames[i])) ++ return 0; ++ } ++ } ++ return 1; ++} ++ ++int iam_it_invariant(struct iam_iterator *it) +{ ++ return ++ (it->ii_state == IAM_IT_DETACHED || ++ it->ii_state == IAM_IT_ATTACHED) && ++ !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) && ++ ergo(it->ii_state == IAM_IT_ATTACHED, ++ iam_path_invariant(&it->ii_path)); ++} ++ ++/* ++ * Search container @c for record with key @k. If record is found, its data ++ * are moved into @r. ++ * ++ * ++ * ++ * Return values: +ve: found, 0: not-found, -ve: error ++ */ ++int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r, ++ struct iam_path_descr *pd) ++{ ++ struct iam_iterator it; ++ int result; ++ ++ iam_it_init(&it, c, 0, pd); ++ ++ result = iam_it_get_exact(&it, k); ++ if (result == 0) ++ /* ++ * record with required key found, copy it into user buffer ++ */ ++ iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it)); ++ iam_it_put(&it); ++ iam_it_fini(&it); ++ return result; ++} ++EXPORT_SYMBOL(iam_lookup); ++ ++/* ++ * Insert new record @r with key @k into container @c (within context of ++ * transaction @h. ++ * ++ * Return values: 0: success, -ve: error, including -EEXIST when record with ++ * given key is already present. ++ * ++ * postcondition: ergo(result == 0 || result == -EEXIST, ++ * iam_lookup(c, k, r2) > 0 && ++ * !memcmp(r, r2, c->ic_descr->id_rec_size)); ++ */ ++int iam_insert(handle_t *h, struct iam_container *c, ++ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd) ++{ ++ struct iam_iterator it; ++ int result; ++ ++ iam_it_init(&it, c, IAM_IT_WRITE, pd); ++ ++ result = iam_it_get_exact(&it, k); ++ if (result == -ENOENT) ++ result = iam_it_rec_insert(h, &it, k, r); ++ else if (result == 0) ++ result = -EEXIST; ++ iam_it_put(&it); ++ iam_it_fini(&it); ++ return result; ++} ++EXPORT_SYMBOL(iam_insert); ++ ++int iam_update(handle_t *h, struct iam_container *c, ++ struct iam_key *k, struct iam_rec *r, struct iam_path_descr *pd) ++{ ++ struct iam_iterator it; ++ int result; ++ ++ iam_it_init(&it, c, IAM_IT_WRITE, pd); ++ ++ result = iam_it_get_exact(&it, k); ++ if (result == 0) ++ iam_it_rec_set(h, &it, r); ++ iam_it_put(&it); ++ iam_it_fini(&it); ++ return result; ++} ++EXPORT_SYMBOL(iam_update); ++ ++/* ++ * Delete existing record with key @k. ++ * ++ * Return values: 0: success, -ENOENT: not-found, -ve: other error. ++ * ++ * postcondition: ergo(result == 0 || result == -ENOENT, ++ * !iam_lookup(c, k, *)); ++ */ ++int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k, ++ struct iam_path_descr *pd) ++{ ++ struct iam_iterator it; ++ int result; ++ ++ iam_it_init(&it, c, IAM_IT_WRITE, pd); ++ ++ result = iam_it_get_exact(&it, k); ++ if (result == 0) ++ iam_it_rec_delete(h, &it); ++ iam_it_put(&it); ++ iam_it_fini(&it); ++ return result; ++} ++EXPORT_SYMBOL(iam_delete); ++ +Index: linux-2.6.9/fs/ext3/Makefile +=================================================================== +--- linux-2.6.9.orig/fs/ext3/Makefile 2006-05-23 17:01:07.000000000 +0800 ++++ linux-2.6.9/fs/ext3/Makefile 2006-05-23 17:01:09.000000000 +0800 +@@ -6,7 +6,7 @@ - struct iam_entry *entries; /* old block contents */ - struct iam_entry *entries2; /* new block contents */ - struct iam_frame *frame, *safe; - struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0}; - u32 newblock[DX_MAX_TREE_HEIGHT] = {0}; -- struct inode *dir = path_obj(path); -+ struct inode *dir = iam_path_obj(path); - int nr_splet; - int i, err; - -@@ -2442,7 +1597,8 @@ static int split_index_node(handle_t *ha - for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) { - bh_new[i] = ext3_append (handle, dir, &newblock[i], &err); - if (!bh_new[i] || -- path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0) -+ iam_path_descr(path)->id_node_init(path->ip_container, -+ bh_new[i], 0) != 0) - goto cleanup; - BUFFER_TRACE(frame->bh, "get_write_access"); - err = ext3_journal_get_write_access(handle, frame->bh); -@@ -2516,9 +1672,9 @@ static int split_index_node(handle_t *ha - unsigned count1 = count/2, count2 = count - count1; - unsigned hash2; - -- dx_get_key(path, -- iam_entry_shift(path, entries, count1), -- (struct iam_key *)&hash2); -+ iam_get_key(path, -+ iam_entry_shift(path, entries, count1), -+ (struct iam_key *)&hash2); - - dxtrace(printk("Split index %i/%i\n", count1, count2)); - -@@ -2578,7 +1734,7 @@ static int ext3_dx_add_entry(handle_t *h - size_t isize; - - iam_path_compat_init(&cpath, dir); -- param = path_descr(path); -+ param = iam_path_descr(path); - - err = dx_probe(dentry, NULL, &hinfo, path); - if (err != 0) -@@ -2588,7 +1744,7 @@ static int ext3_dx_add_entry(handle_t *h - /* XXX nikita: global serialization! */ - isize = dir->i_size; - -- err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at), -+ err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at), - handle, &bh); - if (err != 0) - goto cleanup; -@@ -2724,12 +1880,12 @@ static struct inode * ext3_new_inode_wan - * is so far negative - it has no inode. - * - * If the create succeeds, we fill in the inode information -- * with d_instantiate(). -+ * with d_instantiate(). - */ - static int ext3_create (struct inode * dir, struct dentry * dentry, int mode, - struct nameidata *nd) - { -- handle_t *handle; -+ handle_t *handle; - struct inode * inode; - int err, retries = 0; + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ + ioctl.o namei.o super.o symlink.o hash.o resize.o \ +- extents.o mballoc.o ++ extents.o mballoc.o iam.o -Index: iam/include/linux/lustre_iam.h + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.9/include/linux/lustre_iam.h =================================================================== ---- iam.orig/include/linux/lustre_iam.h 2006-05-10 18:21:01.000000000 +0400 -+++ iam/include/linux/lustre_iam.h 2006-05-10 21:22:41.000000000 +0400 +--- linux-2.6.9.orig/include/linux/lustre_iam.h 2006-05-23 17:01:09.000000000 +0800 ++++ linux-2.6.9/include/linux/lustre_iam.h 2006-05-24 17:41:04.000000000 +0800 @@ -1,3 +1,39 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: @@ -2346,7 +2618,7 @@ Index: iam/include/linux/lustre_iam.h /* * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2. */ -@@ -30,6 +66,11 @@ struct iam_key; +@@ -30,6 +66,11 @@ /* Incomplete type use to refer to the records stored in iam containers. */ struct iam_rec; @@ -2358,7 +2630,7 @@ Index: iam/include/linux/lustre_iam.h typedef __u64 iam_ptr_t; /* -@@ -41,12 +82,17 @@ struct iam_frame { +@@ -41,45 +82,23 @@ struct iam_entry *at; /* target entry, found by binary search */ }; @@ -2380,58 +2652,168 @@ Index: iam/include/linux/lustre_iam.h }; struct iam_path; -@@ -115,6 +161,15 @@ struct iam_descr { - /* - * leaf operations. - */ -+ -+ /* -+ * initialize just loaded leaf node. -+ */ -+ int (*init)(struct iam_container *c, struct iam_leaf *l); -+ /* -+ * Release resources. -+ */ -+ void (*fini)(struct iam_container *c, struct iam_leaf *l); - /* - * returns true iff leaf is positioned at the last entry. - */ -@@ -123,12 +178,30 @@ struct iam_descr { - void (*start)(struct iam_container *c, struct iam_leaf *l); - /* more leaf to the next entry. */ - void (*next)(struct iam_container *c, struct iam_leaf *l); + struct iam_container; + +-/* +- * Parameters, describing a flavor of iam container. +- */ +-struct iam_descr { +- /* +- * Size of a key in this container, in bytes. +- */ +- size_t id_key_size; +- /* +- * Size of a pointer to the next level (stored in index nodes), in +- * bytes. +- */ +- size_t id_ptr_size; +- /* +- * Size of a record (stored in leaf nodes), in bytes. +- */ +- size_t id_rec_size; +- /* +- * Size of unused (by iam) space at the beginning of every non-root +- * node, in bytes. Used for compatibility with ext3. +- */ +- size_t id_node_gap; +- /* +- * Size of unused (by iam) space at the beginning of root node, in +- * bytes. Used for compatibility with ext3. +- */ +- size_t id_root_gap; +- ++struct iam_operations { + /* + * Returns pointer (in the same sense as pointer in index entry) to + * the root node. +@@ -111,25 +130,107 @@ + * contains single record with the smallest possible key. + */ + int (*id_create)(struct iam_container *c); +- struct { +- /* +- * leaf operations. +- */ +- /* +- * returns true iff leaf is positioned at the last entry. +- */ +- int (*at_end)(struct iam_container *c, struct iam_leaf *l); +- /* position leaf at the first entry */ +- void (*start)(struct iam_container *c, struct iam_leaf *l); +- /* more leaf to the next entry. */ +- void (*next)(struct iam_container *c, struct iam_leaf *l); - /* return key of current leaf record in @k */ - void (*key)(struct iam_container *c, struct iam_leaf *l, - struct iam_key *k); - /* return pointer to entry body */ -+ /* return key of current leaf record. This method may return -+ * either pointer to the key stored in node, or copy key into -+ * @k buffer supplied by caller and return pointer to this -+ * buffer. The latter approach is used when keys in nodes are -+ * not stored in plain form (e.g., htree doesn't store keys at -+ * all). -+ * -+ * Caller should assume that returned pointer is only valid -+ * while leaf node is pinned and locked.*/ -+ struct iam_key *(*key)(struct iam_container *c, -+ struct iam_leaf *l, struct iam_key *k); -+ /* return pointer to entry body. Pointer is valid while -+ corresponding leaf node is locked and pinned. */ - struct iam_rec *(*rec)(struct iam_container *c, - struct iam_leaf *l); +- struct iam_rec *(*rec)(struct iam_container *c, +- struct iam_leaf *l); +- } id_leaf; ++}; + -+ /* -+ * Search leaf @l for a record with key @k or for a place -+ * where such record is to be inserted. -+ * -+ * Scratch keys from @path can be used. -+ */ -+ int (*lookup)(struct iam_container *c, struct iam_path *path, -+ struct iam_leaf *l, struct iam_key *k); - } id_leaf; ++struct iam_leaf_operations { ++ /* ++ * leaf operations. ++ */ ++ ++ /* ++ * initialize just loaded leaf node. ++ */ ++ int (*init)(struct iam_container *c, struct iam_leaf *p); ++ /* ++ * Release resources. ++ */ ++ void (*fini)(struct iam_container *c, struct iam_leaf *l); ++ /* ++ * returns true iff leaf is positioned at the last entry. ++ */ ++ int (*at_end)(struct iam_container *c, struct iam_leaf *l); ++ /* position leaf at the first entry */ ++ void (*start)(struct iam_container *c, struct iam_leaf *l); ++ /* more leaf to the next entry. */ ++ void (*next)(struct iam_container *c, struct iam_leaf *l); ++ /* return key of current leaf record. This method may return ++ * either pointer to the key stored in node, or copy key into ++ * @k buffer supplied by caller and return pointer to this ++ * buffer. The latter approach is used when keys in nodes are ++ * not stored in plain form (e.g., htree doesn't store keys at ++ * all). ++ * ++ * Caller should assume that returned pointer is only valid ++ * while leaf node is pinned and locked.*/ ++ struct iam_key *(*key)(struct iam_container *c, ++ struct iam_leaf *l, struct iam_key *k); ++ /* return pointer to entry body. Pointer is valid while ++ corresponding leaf node is locked and pinned. */ ++ struct iam_rec *(*rec)(struct iam_container *c, ++ struct iam_leaf *l); ++ ++ /* ++ * Search leaf @l for a record with key @k or for a place ++ * where such record is to be inserted. ++ * ++ * Scratch keys from @path can be used. ++ */ ++ int (*lookup)(struct iam_container *c, struct iam_path *path, ++ struct iam_leaf *l, struct iam_key *k); ++ ++ int (*can_add) (struct iam_container *c, struct iam_leaf *l, ++ struct iam_key *k, struct iam_rec *r); ++ /*add rec for a leaf*/ ++ void (*rec_add)(struct iam_path *path, struct iam_key *k, ++ struct iam_rec *r); ++ /*remove rec for a leaf*/ ++ void (*rec_del)(struct iam_path *path); ++}; ++ ++struct iam_root { ++ struct iam_root_info { ++ u8 indirect_levels; ++ u8 pad[3]; ++ } info; ++ struct {} entries[0]; ++}; ++ ++#define IAM_LEAF_HEADER_MAGIC 0x1976 ++struct iam_leaf_head { ++ __le16 ill_magic; ++ __le16 ill_count; ++}; ++ ++/* ++ * Parameters, describing a flavor of iam container. ++ */ ++struct iam_descr { ++ /* ++ * Size of a key in this container, in bytes. ++ */ ++ size_t id_key_size; ++ /* ++ * Size of a pointer to the next level (stored in index nodes), in ++ * bytes. ++ */ ++ size_t id_ptr_size; ++ /* ++ * Size of a record (stored in leaf nodes), in bytes. ++ */ ++ size_t id_rec_size; ++ /* ++ * Size of unused (by iam) space at the beginning of every non-root ++ * node, in bytes. Used for compatibility with ext3. ++ */ ++ size_t id_node_gap; ++ /* ++ * Size of unused (by iam) space at the beginning of root node, in ++ * bytes. Used for compatibility with ext3. ++ */ ++ size_t id_root_gap; ++ ++ struct iam_operations *id_ops; ++ struct iam_leaf_operations *id_leaf_ops; }; -@@ -149,6 +222,17 @@ struct iam_container { + struct iam_container { +@@ -149,6 +250,17 @@ }; /* @@ -2449,7 +2831,7 @@ Index: iam/include/linux/lustre_iam.h * Structure to keep track of a path drilled through htree. */ struct iam_path { -@@ -172,34 +256,232 @@ struct iam_path { +@@ -172,34 +284,232 @@ /* * Leaf node: a child of ->ip_frame. */ @@ -2461,12 +2843,12 @@ Index: iam/include/linux/lustre_iam.h struct iam_key *ip_key_target; /* - * Scratch-pad area for temporary keys. -- */ ++ * Description-specific data. + */ - struct iam_key *ip_key_scratch[DX_SCRATCH_KEYS]; - /* - * pointer to flavor-specific per-container data. -+ * Description-specific data. - */ +- */ - void *ip_descr_data; + struct iam_path_descr *ip_data; }; @@ -2696,7 +3078,7 @@ Index: iam/include/linux/lustre_iam.h /* * Initialize container @c, acquires additional reference on @inode. */ -@@ -210,3 +492,149 @@ int iam_container_init(struct iam_contai +@@ -210,3 +520,143 @@ */ void iam_container_fini(struct iam_container *c); @@ -2704,6 +3086,11 @@ Index: iam/include/linux/lustre_iam.h +#define assert(test) J_ASSERT(test) +#endif + ++static inline struct iam_descr *iam_container_descr(struct iam_container *c) ++{ ++ return c->ic_descr; ++} ++ +static inline struct iam_descr *iam_path_descr(struct iam_path *p) +{ + return p->ip_container->ic_descr; @@ -2723,7 +3110,7 @@ Index: iam/include/linux/lustre_iam.h +static inline int iam_keycmp(struct iam_container *c, + struct iam_key *k1, struct iam_key *k2) +{ -+ return c->ic_descr->id_keycmp(c, k1, k2); ++ return c->ic_descr->id_ops->id_keycmp(c, k1, k2); +} + +static inline void *iam_entry_off(struct iam_entry *entry, size_t off) @@ -2812,6 +3199,11 @@ Index: iam/include/linux/lustre_iam.h +struct buffer_head *ext3_append(handle_t *handle, struct inode *inode, + u32 *block, int *err); +int split_index_node(handle_t *handle, struct iam_path *path); ++ ++extern struct iam_leaf_operations generic_leaf_ops; ++extern struct iam_operations generic_iam_ops; ++ ++ +/* + * external + */ @@ -2821,28 +3213,12 @@ Index: iam/include/linux/lustre_iam.h +void iam_container_read_lock(struct iam_container *c); +void iam_container_read_unlock(struct iam_container *c); + -+int iam_path_lookup(struct iam_path *p); -+ -+void iam_path_dup(struct iam_path *dst, struct iam_path *src); -+ -+int iam_leaf_at_end(struct iam_container *c, struct iam_leaf *leaf); -+void iam_leaf_start(struct iam_container *c, struct iam_leaf *leaf); -+struct iam_rec *iam_leaf_rec(struct iam_container *c, struct iam_leaf *leaf); -+struct iam_key *iam_leaf_key(struct iam_container *c, struct iam_leaf *leaf, -+ struct iam_key *key); -+ +int iam_index_next(struct iam_path *p); +int iam_read_leaf(struct iam_path *p); + -+int iam_add_rec(handle_t *h, struct iam_path *p); -+ -+__u32 iam_root_ptr(struct iam_container *c); -+int iam_node_check(struct iam_path *path, struct iam_frame *frame); -+int iam_node_init(struct iam_container *c, struct buffer_head *bh, int root); +int iam_keycmp(struct iam_container *c, struct iam_key *k1, struct iam_key *k2); +int iam_node_read(struct iam_container *c, iam_ptr_t ptr, -+ handle_t *h, struct buffer_head **bh); -+ ++ handle_t *handle, struct buffer_head **bh); + +/* __LINUX_LUSTRE_IAM_H__ */ +#endif