Index: iam/fs/ext3/Makefile
===================================================================
--- iam.orig/fs/ext3/Makefile 2006-05-31 20:24:32.000000000 +0400
-+++ iam/fs/ext3/Makefile 2006-06-08 21:50:42.000000000 +0400
++++ iam/fs/ext3/Makefile 2006-06-16 14:39:59.000000000 +0400
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
Index: iam/fs/ext3/iam.c
===================================================================
--- iam.orig/fs/ext3/iam.c 2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam.c 2006-06-08 19:42:19.000000000 +0400
-@@ -0,0 +1,1163 @@
++++ iam/fs/ext3/iam.c 2006-06-15 19:51:50.000000000 +0400
+@@ -0,0 +1,1246 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+}
+EXPORT_SYMBOL(iam_format_register);
+
++/*
++ * Determine format of given container. This is done by scanning list of
++ * registered formats and calling ->if_guess() method of each in turn.
++ */
+static int iam_format_guess(struct iam_container *c)
+{
+ int result;
+ return !iam_leaf_at_end(&it->ii_path.ip_leaf);
+}
+
++static inline int it_before(const struct iam_iterator *it)
++{
++ return it_state(it) == IAM_IT_SKEWED && it_at_rec(it);
++}
++
+/*
+ * Helper wrapper around iam_it_get(): returns 0 (success) only when record
+ * with exactly the same key as asked is found.
+
+ result = iam_it_get(it, k);
+ if (result == 0 &&
-+ (!it_at_rec(it) ||
++ (it_state(it) != IAM_IT_ATTACHED ||
+ it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 1))) != 0))
+ /*
+ * Return -ENOENT if cursor is located above record with a key
+ iam_path_fini(&it->ii_path);
+}
+
++/*
++ * Performs tree top-to-bottom traversal starting from root, and loads leaf
++ * node.
++ */
+static int iam_path_lookup(struct iam_path *path)
+{
+ struct iam_container *c;
+
+/*
+ * Attach iterator. After successful completion, @it points to record with
-+ * smallest key not larger than @k.
++ * least key not larger than @k.
+ *
+ * Return value: 0: positioned on existing record,
+ * -ve: error.
+ case IAM_LOOKUP_OK:
+ it->ii_state = IAM_IT_ATTACHED;
+ break;
-+ case IAM_LOOKUP_EMPTY:
-+ it->ii_state = IAM_IT_EMPTY;
-+ break;
+ case IAM_LOOKUP_BEFORE:
-+ it->ii_state = IAM_IT_BEFORE;
++ case IAM_LOOKUP_EMPTY:
++ it->ii_state = IAM_IT_SKEWED;
+ break;
+ default:
+ assert(0);
+}
+
+/*
++ * Attach iterator, and assure it points to the record (not skewed).
++ *
++ * Return value: 0: positioned on existing record,
++ * -ve: error.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED &&
++ * !(it->ii_flags&IAM_IT_WRITE)
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
++ */
++int iam_it_get_at(struct iam_iterator *it, const struct iam_key *k)
++{
++ int result;
++ assert(it_state(it) == IAM_IT_DETACHED && !(it->ii_flags&IAM_IT_WRITE));
++ result = iam_it_get(it, k);
++ if (result == 0) {
++ if (it_state(it) != IAM_IT_ATTACHED) {
++ assert(it_state(it) == IAM_IT_SKEWED);
++ result = iam_it_next(it);
++ }
++ }
++ assert(ergo(result == 0, it_state(it) == IAM_IT_ATTACHED));
++ return result;
++}
++
++/*
+ * Duplicates iterator.
+ *
+ * postcondition: it_state(dst) == it_state(src) &&
+ iam_it_key_get(src, it_scratch_key(src, 0))));
+
+}
++
+/*
+ * Detach iterator. Does nothing it detached state.
+ *
+ * +1: end of container reached
+ * -ve: error
+ *
-+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
-+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
++ * precondition: (it_state(it) == IAM_IT_ATTACHED ||
++ * it_state(it) == IAM_IT_SKEWED) && it->ii_flags&IAM_IT_MOVE
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) &&
++ * ergo(result > 0, it_state(it) == IAM_IT_DETACHED)
+ */
+int iam_it_next(struct iam_iterator *it)
+{
+
+ assert(it->ii_flags&IAM_IT_MOVE);
+ assert(it_state(it) == IAM_IT_ATTACHED ||
-+ it_state(it) == IAM_IT_BEFORE || it_state(it) == IAM_IT_EMPTY);
++ it_state(it) == IAM_IT_SKEWED);
+
+ path = &it->ii_path;
+ leaf = &path->ip_leaf;
+
+ result = 0;
-+ if (it_state(it) == IAM_IT_BEFORE) {
++ if (it_before(it)) {
+ assert(!iam_leaf_at_end(leaf));
+ it->ii_state = IAM_IT_ATTACHED;
+ } else {
+ /* advance index portion of the path */
+ result = iam_index_next(iam_it_container(it), path);
+ if (result == 1) {
++ iam_leaf_fini(leaf);
+ result = iam_leaf_load(path);
+ if (result == 0)
+ iam_leaf_start(leaf);
-+ } else if (result == 0) {
++ } else if (result == 0)
+ /* end of container reached */
-+ it->ii_state = IAM_IT_EOC;
+ result = +1;
-+ }
-+ if (result < 0)
++ if (result != 0)
+ iam_it_put(it);
+ }
+ }
+ assert(ergo(result == 0, it_state(it) == IAM_IT_ATTACHED));
++ assert(ergo(result > 0, it_state(it) == IAM_IT_DETACHED));
+ return result;
+}
+
+ * Replace contents of record under iterator.
+ *
+ * precondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * it->ii_flags&IAM_IT_WRITE &&
-+ * it_at_rec(it)
++ * it->ii_flags&IAM_IT_WRITE
+ * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+ * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
+ */
+int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r)
+{
+ int result;
++ struct iam_path *path;
++ struct buffer_head *bh;
+
+ assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE);
+ assert(it_at_rec(it));
+
-+ result = iam_txn_add(h, &it->ii_path, it->ii_path.ip_leaf.il_bh);
-+ if (result == 0)
++ path = &it->ii_path;
++ bh = path->ip_leaf.il_bh;
++ result = iam_txn_add(h, path, bh);
++ if (result == 0) {
+ iam_it_reccpy(it, r);
++ result = iam_txn_dirty(h, path, bh);
++ }
+ return result;
+}
+
+/*
++ * Assertionless version of iam_it_key_get().
++ */
++static struct iam_key *__iam_it_key_get(const struct iam_iterator *it,
++ struct iam_key *k)
++{
++ return iam_leaf_key(&it->ii_path.ip_leaf, k);
++}
++
++/*
+ * Return pointer to the key under iterator.
+ *
+ * precondition: it_state(it) == IAM_IT_ATTACHED ||
-+ * it_state(it) == IAM_IT_BEFORE
++ * it_state(it) == IAM_IT_SKEWED
+ * postcondition: it_state(it) == IAM_IT_ATTACHED
+ */
+struct iam_key *iam_it_key_get(const struct iam_iterator *it, struct iam_key *k)
+{
-+ assert(it_state(it) == IAM_IT_ATTACHED ||
-+ it_state(it) == IAM_IT_BEFORE);
++ assert(it_state(it) == IAM_IT_ATTACHED);
+ assert(it_at_rec(it));
-+ return iam_leaf_key(&it->ii_path.ip_leaf, k);
++ return __iam_it_key_get(it, k);
+}
+
++/*
++ * Insertion of new record. Interaction with jbd during non-trivial case (when
++ * split happens) is as following:
++ *
++ * - new leaf node is involved into transaction by ext3_append();
++ *
++ * - old leaf node is involved into transaction by iam_add_rec();
++ *
++ * - leaf where insertion point ends in, is marked dirty by iam_add_rec();
++ *
++ * - leaf without insertion point is marked dirty (as @new_leaf) by
++ * iam_new_leaf();
++ *
++ * - split index nodes are involved into transaction and marked dirty by
++ * split_index_node().
++ *
++ * - "safe" index node, which is no split, but where new pointer is inserted
++ * is involved into transaction and marked dirty by split_index_node().
++ *
++ * - index node where pointer to new leaf is inserted is involved into
++ * transaction by split_index_node() and marked dirty by iam_add_rec().
++ *
++ * - inode is marked dirty by iam_add_rec().
++ *
++ */
++
+static int iam_new_leaf(handle_t *handle, struct iam_leaf *leaf)
+{
+ int err;
+ iam_leaf_split(leaf, &new_leaf, blknr);
+ err = iam_txn_dirty(handle, iam_leaf_path(leaf), new_leaf);
+ brelse(new_leaf);
++ if (err == 0)
++ err = ext3_mark_inode_dirty(handle, c->ic_object);
+ }
+ assert(iam_leaf_check(leaf));
+ assert(iam_leaf_check(&iam_leaf_path(leaf)->ip_leaf));
+ *
+ * precondition: it->ii_flags&IAM_IT_WRITE &&
+ * (it_state(it) == IAM_IT_ATTACHED ||
-+ * it_state(it) == IAM_IT_BEFORE ||
-+ * it_state(it) == IAM_IT_EMPTY) &&
++ * it_state(it) == IAM_IT_SKEWED) &&
+ * ergo(it_state(it) == IAM_IT_ATTACHED,
+ * it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)),
+ * k) < 0) &&
-+ * ergo(it_state(it) == IAM_IT_BEFORE,
-+ * it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)),
++ * ergo(it_before(it),
++ * it_keycmp(it, __iam_it_key_get(it, it_scratch_key(it, 0)),
+ * k) > 0));
+ * postcondition: ergo(result == 0,
+ * it_state(it) == IAM_IT_ATTACHED &&
+
+ assert(it->ii_flags&IAM_IT_WRITE);
+ assert(it_state(it) == IAM_IT_ATTACHED ||
-+ it_state(it) == IAM_IT_BEFORE || it_state(it) == IAM_IT_EMPTY);
++ it_state(it) == IAM_IT_SKEWED);
+ assert(ergo(it_state(it) == IAM_IT_ATTACHED,
+ it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)),
+ k) < 0));
-+ assert(ergo(it_state(it) == IAM_IT_BEFORE,
-+ it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)),
++ assert(ergo(it_before(it),
++ it_keycmp(it, __iam_it_key_get(it, it_scratch_key(it, 0)),
+ k) > 0));
+ result = iam_add_rec(h, path, k, r);
+ if (result == 0)
+ * precondition: it_state(it) == IAM_IT_ATTACHED &&
+ * it->ii_flags&IAM_IT_WRITE &&
+ * it_at_rec(it)
-+ * postcondition: it_state(it) == IAM_IT_ATTACHED || it_state(it) == IAM_IT_EOC
++ * postcondition: it_state(it) == IAM_IT_ATTACHED ||
++ * it_state(it) == IAM_IT_DETACHED
+ */
+int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
+{
+ }
+ assert(iam_leaf_check(leaf));
+ assert(iam_path_check(path));
-+ assert(it_state(it) == IAM_IT_ATTACHED || it_state(it) == IAM_IT_EOC);
++ assert(it_state(it) == IAM_IT_ATTACHED ||
++ it_state(it) == IAM_IT_DETACHED);
+ return result;
+}
+
+ return
+ (it->ii_state == IAM_IT_DETACHED ||
+ it->ii_state == IAM_IT_ATTACHED ||
-+ it->ii_state == IAM_IT_EMPTY ||
-+ it->ii_state == IAM_IT_BEFORE ||
-+ it->ii_state == IAM_IT_EOC) &&
++ it->ii_state == IAM_IT_SKEWED) &&
+ !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
+ ergo(it->ii_state == IAM_IT_ATTACHED ||
-+ it->ii_state == IAM_IT_EMPTY ||
-+ it->ii_state == IAM_IT_BEFORE,
++ it->ii_state == IAM_IT_SKEWED,
+ iam_path_invariant(&it->ii_path) &&
-+ equi(it->ii_state == IAM_IT_EMPTY, !it_at_rec(it)));
++ equi(it_at_rec(it), it->ii_state == IAM_IT_SKEWED));
+}
+
+/*
+ * Search container @c for record with key @k. If record is found, its data
+ * are moved into @r.
+ *
-+ *
-+ *
+ * Return values: +ve: found, 0: not-found, -ve: error
+ */
+int iam_lookup(struct iam_container *c, const struct iam_key *k,
+}
+EXPORT_SYMBOL(iam_insert);
+
++/*
++ * Update record with the key @k in container @c (within context of
++ * transaction @h), new record is given by @r.
++ *
++ * Return values: 0: success, -ve: error, including -ENOENT if no record with
++ * the given key found.
++ */
+int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
+ struct iam_rec *r, struct iam_path_descr *pd)
+{
Index: iam/fs/ext3/iam_lfix.c
===================================================================
--- iam.orig/fs/ext3/iam_lfix.c 2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam_lfix.c 2006-06-08 17:34:38.000000000 +0400
++++ iam/fs/ext3/iam_lfix.c 2006-06-15 19:55:41.000000000 +0400
@@ -0,0 +1,613 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ q = iam_lfix_shift(l, p, count - 1);
+ if (iam_keycmp(c, k, iam_leaf_key_at(p)) < 0) {
+ /*
-+ * @k is less than the smallest key in the leaf
++ * @k is less than the least key in the leaf
+ */
+ l->il_at = p;
+ result = IAM_LOOKUP_BEFORE;
+ else
+ /*
+ * Another exceptional case: insertion with the key
-+ * less than smallest key in the leaf.
++ * less than least key in the leaf.
+ */
+ assert(cur == leaf->il_entries);
+
+ hdr->ill_count = count - split;
+ lentry_count_set(l, split);
+ /*
-+ * Insert pointer to the new node (together with the smallest key in
++ * Insert pointer to the new node (together with the least key in
+ * the node) into index node.
+ */
+ iam_insert_key(path, path->ip_frame, pivot, new_blknr);
Index: iam/fs/ext3/namei.c
===================================================================
--- iam.orig/fs/ext3/namei.c 2006-05-31 20:24:32.000000000 +0400
-+++ iam/fs/ext3/namei.c 2006-06-02 22:59:05.000000000 +0400
++++ iam/fs/ext3/namei.c 2006-06-12 22:12:33.000000000 +0400
@@ -24,81 +24,6 @@
* Theodore Ts'o, 2002
*/
-static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
- struct iam_entry *e1, struct iam_entry *e2)
--{
++static inline struct iam_key *iam_get_key(struct iam_path *p,
++ struct iam_entry *entry,
++ struct iam_key *key)
+ {
- ptrdiff_t diff;
-
- diff = (void *)e1 - (void *)e2;
-
-static inline void dx_set_block(struct iam_path *p,
- struct iam_entry *entry, unsigned value)
-+static inline struct iam_key *iam_get_key(struct iam_path *p,
-+ struct iam_entry *entry,
-+ struct iam_key *key)
- {
+-{
- *(u32*)entry_off(entry,
- path_descr(p)->id_key_size) = cpu_to_le32(value);
-}
static inline int dx_index_is_compat(struct iam_path *path)
{
- return path_descr(path) == &htree_compat_param;
-+ return iam_path_descr(path) == &htree_compat_param;
- }
-
+-}
+-
-static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
- int root)
-{
- return data +
- (root ?
- path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
--}
++ return iam_path_descr(path) == &htree_compat_param;
+ }
-static struct iam_entry *dx_node_get_entries(struct iam_path *path,
- struct iam_frame *frame)
- return dx_get_entries(path,
- frame->bh->b_data, frame == path->ip_frames);
-}
--
+
-static int dx_node_check(struct iam_path *p, struct iam_frame *f)
+int dx_node_check(struct iam_path *p, struct iam_frame *f)
{
- keycmp(c, p->ip_key_scratch[0], p->ip_key_scratch[1]) > 0)
+ iam_keycmp(c, iam_path_key(p, 0), iam_path_key(p, 1)) > 0) {
+ BREAKPOINT;
-+ return 0;
+ return 0;
+ }
+ blk = dx_get_block(p, e);
+ if (inode->i_size < (blk + 1) * inode->i_sb->s_blocksize) {
+ BREAKPOINT;
- return 0;
++ return 0;
+ }
}
return 1;
- err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh);
+ err = param->id_ops->id_node_read(c, (iam_ptr_t)ptr, NULL,
+ &frame->bh);
-+ if (err != 0)
-+ break;
-+
-+ err = param->id_ops->id_node_check(path, frame);
if (err != 0)
break;
- err = param->id_node_check(path, frame);
+
++ err = param->id_ops->id_node_check(path, frame);
++ if (err != 0)
++ break;
++
+ err = param->id_ops->id_node_load(path, frame);
if (err != 0)
break;
dxtrace(printk("Split index %i/%i\n", count1, count2));
-@@ -2537,16 +1771,22 @@ static int split_index_node(handle_t *ha
+@@ -2537,16 +1771,30 @@ static int split_index_node(handle_t *ha
swap(frame->bh, bh2);
bh_new[i] = bh2;
}
+ err = ext3_journal_dirty_metadata(handle, bh);
+ if (err)
+ goto journal_error;
++ }
++ if (nr_splet > 0) {
++ /*
++ * Log ->i_size modification.
++ */
++ err = ext3_mark_inode_dirty(handle, dir);
++ if (err)
++ goto journal_error;
}
goto cleanup;
journal_error:
-@@ -2578,7 +1818,7 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -2578,7 +1826,7 @@ static int ext3_dx_add_entry(handle_t *h
size_t isize;
iam_path_compat_init(&cpath, dir);
err = dx_probe(dentry, NULL, &hinfo, path);
if (err != 0)
-@@ -2588,8 +1828,9 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -2588,8 +1836,9 @@ static int ext3_dx_add_entry(handle_t *h
/* XXX nikita: global serialization! */
isize = dir->i_size;
if (err != 0)
goto cleanup;
-@@ -2609,7 +1850,7 @@ static int ext3_dx_add_entry(handle_t *h
+@@ -2609,7 +1858,7 @@ static int ext3_dx_add_entry(handle_t *h
goto cleanup;
/*copy split inode too*/
if (!de)
goto cleanup;
-@@ -2724,12 +1965,12 @@ static struct inode * ext3_new_inode_wan
+@@ -2724,12 +1973,12 @@ static struct inode * ext3_new_inode_wan
* is so far negative - it has no inode.
*
* If the create succeeds, we fill in the inode information
Index: iam/include/linux/lustre_iam.h
===================================================================
--- iam.orig/include/linux/lustre_iam.h 2006-05-31 20:24:32.000000000 +0400
-+++ iam/include/linux/lustre_iam.h 2006-06-08 21:50:42.000000000 +0400
++++ iam/include/linux/lustre_iam.h 2006-06-16 14:39:59.000000000 +0400
@@ -1,9 +1,68 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
Index: iam/fs/ext3/Makefile
===================================================================
---- iam.orig/fs/ext3/Makefile 2006-06-08 21:50:42.000000000 +0400
-+++ iam/fs/ext3/Makefile 2006-06-08 21:50:42.000000000 +0400
+--- iam.orig/fs/ext3/Makefile 2006-06-16 14:39:59.000000000 +0400
++++ iam/fs/ext3/Makefile 2006-06-16 14:40:00.000000000 +0400
@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
Index: iam/fs/ext3/file.c
===================================================================
---- iam.orig/fs/ext3/file.c 2006-06-08 21:50:42.000000000 +0400
-+++ iam/fs/ext3/file.c 2006-06-08 21:50:42.000000000 +0400
+--- iam.orig/fs/ext3/file.c 2006-06-16 14:39:59.000000000 +0400
++++ iam/fs/ext3/file.c 2006-06-16 14:40:00.000000000 +0400
@@ -23,6 +23,7 @@
#include <linux/jbd.h>
#include <linux/ext3_fs.h>
Index: iam/fs/ext3/iam-uapi.c
===================================================================
--- iam.orig/fs/ext3/iam-uapi.c 2004-04-06 17:27:52.000000000 +0400
-+++ iam/fs/ext3/iam-uapi.c 2006-06-08 21:50:42.000000000 +0400
-@@ -0,0 +1,349 @@
++++ iam/fs/ext3/iam-uapi.c 2006-06-16 14:40:00.000000000 +0400
+@@ -0,0 +1,348 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ result = iam_it_get(it, itop->iui_op.iul_key);
+ break;
+ case IAM_IOC_IT_NEXT:
-+ if (st == IAM_IT_ATTACHED ||
-+ st == IAM_IT_BEFORE || st == IAM_IT_EMPTY)
++ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED)
+ result = iam_it_next(it);
+ else
+ result = -EBUSY;
+ break;
+ }
+ st = it->ii_state;
-+ if (st == IAM_IT_ATTACHED || st == IAM_IT_BEFORE)
++ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED)
+ iam_keycpy0(&ipi->ipi_bag, itop->iui_op.iul_key,
+ iam_it_key_get(it, itop->iui_op.iul_key));
+ if (st == IAM_IT_ATTACHED)
+}
Index: iam/include/linux/lustre_iam.h
===================================================================
---- iam.orig/include/linux/lustre_iam.h 2006-06-08 21:50:42.000000000 +0400
-+++ iam/include/linux/lustre_iam.h 2006-06-08 21:50:42.000000000 +0400
+--- iam.orig/include/linux/lustre_iam.h 2006-06-16 14:39:59.000000000 +0400
++++ iam/include/linux/lustre_iam.h 2006-06-16 14:40:00.000000000 +0400
@@ -30,9 +30,6 @@
#ifndef __LINUX_LUSTRE_IAM_H__
#define __LINUX_LUSTRE_IAM_H__
typedef __u64 iam_ptr_t;
/*
-@@ -123,6 +124,21 @@ struct iam_leaf {
+@@ -123,6 +124,27 @@ struct iam_leaf {
void *il_descr_data;
};
++/*
++ * Return values of ->lookup() operation from struct iam_leaf_operations.
++ */
+enum iam_lookup_t {
+ /*
+ * lookup positioned leaf on some record
+ IAM_LOOKUP_BEFORE
+};
+
++/*
++ * Format-specific container operations. These are called by generic iam code.
++ */
struct iam_operations {
/*
* Returns pointer (in the same sense as pointer in index entry) to
-@@ -131,11 +147,15 @@ struct iam_operations {
+@@ -131,11 +153,15 @@ struct iam_operations {
__u32 (*id_root_ptr)(struct iam_container *c);
/*
* Initialize new node (stored in @bh) that is going to be added into
* tree.
*/
-@@ -155,6 +175,10 @@ struct iam_operations {
- * contains single record with the smallest possible key.
+@@ -152,15 +178,25 @@ struct iam_operations {
+ * Create new container.
+ *
+ * Newly created container has a root node and a single leaf. Leaf
+- * contains single record with the smallest possible key.
++ * contains single record with the least possible key.
*/
int (*id_create)(struct iam_container *c);
+ /*
/*
* Format name.
*/
-@@ -226,7 +250,8 @@ struct iam_leaf_operations {
+ char id_name[DX_FMT_NAME_LEN];
+ };
+
++/*
++ * Another format-specific operation vector, consisting of methods to access
++ * leaf nodes. This is separated from struct iam_operations, because it is
++ * assumed that there will be many formats with different format of leaf
++ * nodes, yes the same struct iam_operations.
++ */
+ struct iam_leaf_operations {
+ /*
+ * leaf operations.
+@@ -226,7 +262,8 @@ struct iam_leaf_operations {
* split leaf node, moving some entries into @bh (the latter currently
* is assumed to be empty).
*/
};
struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
-@@ -347,7 +372,13 @@ enum iam_it_state {
+@@ -264,6 +301,9 @@ struct iam_descr {
+ struct iam_leaf_operations *id_leaf_ops;
+ };
+
++/*
++ * An instance of iam container.
++ */
+ struct iam_container {
+ /*
+ * Underlying flat file. IO against this object is issued to
+@@ -347,7 +387,9 @@ enum iam_it_state {
/* initial state */
IAM_IT_DETACHED,
/* iterator is above particular record in the container */
- IAM_IT_ATTACHED
+ IAM_IT_ATTACHED,
-+ /* iterator landed into empty leaf */
-+ IAM_IT_EMPTY,
-+ /* iterator is positioned before first record in the leaf */
-+ IAM_IT_BEFORE,
-+ /* end of container reached */
-+ IAM_IT_EOC
++ /* iterator is positioned before record */
++ IAM_IT_SKEWED
+ };
+
+ /*
+@@ -355,7 +397,7 @@ enum iam_it_state {
+ */
+ enum iam_it_flags {
+ /*
+- * this iterator will move (iam_it_{prev,next}() will be called on it)
++ * this iterator will move (iam_it_next() will be called on it)
+ */
+ IAM_IT_MOVE = (1 << 0),
+ /*
+@@ -372,15 +414,26 @@ enum iam_it_flags {
+ * doesn't point to any particular record in this container.
+ *
+ * After successful call to iam_it_get() and until corresponding call to
+- * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
++ * iam_it_put() iterator is in one of "active" states: IAM_IT_ATTACHED or
++ * IAM_IT_SKEWED.
+ *
+- * Attached iterator can move through records in a container (provided
++ * Active iterator can move through records in a container (provided
+ * IAM_IT_MOVE permission) in a key order, can get record and key values as it
+ * passes over them, and can modify container (provided IAM_IT_WRITE
+ * permission).
+ *
++ * Iteration may reach the end of container, at which point iterator switches
++ * into IAM_IT_DETACHED state.
++ *
+ * Concurrency: iterators are supposed to be local to thread. Interfaces below
+- * do no internal serialization.
++ * do no internal serialization of access to the iterator fields.
++ *
++ * When in non-detached state, iterator keeps some container nodes pinned in
++ * memory and locked (that locking may be implemented at the container
++ * granularity though). In particular, clients may assume that pointers to
++ * records and keys obtained through iterator interface as valid until
++ * iterator is detached (except that they may be invalidated by sub-sequent
++ * operations done through the same iterator).
+ *
+ */
+ struct iam_iterator {
+@@ -390,7 +443,8 @@ struct iam_iterator {
+ __u32 ii_flags;
+ enum iam_it_state ii_state;
+ /*
+- * path to the record. Valid in IAM_IT_ATTACHED state.
++ * path to the record. Valid in IAM_IT_ATTACHED, and IAM_IT_SKEWED
++ * states.
+ */
+ struct iam_path ii_path;
};
+@@ -420,27 +474,37 @@ int iam_it_init(struct iam_iterator *it
+ void iam_it_fini(struct iam_iterator *it);
/*
-@@ -468,7 +499,7 @@ int iam_it_next(struct iam_iterator *it)
+- * Attach iterator. After successful completion, @it points to record with the
+- * largest key not larger than @k. Semantics of ->id_create() method guarantee
+- * that such record will always be found.
++ * Attach iterator. After successful completion, @it points to record with
++ * smallest key not larger than @k.
+ *
+ * Return value: 0: positioned on existing record,
+ * -ve: error.
+ *
+ * precondition: it_state(it) == IAM_IT_DETACHED
+- * postcondition: ergo(result == 0,
+- * (it_state(it) == IAM_IT_ATTACHED &&
+- * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
++ * postcondition: ergo(result == 0 && it_state(it) == IAM_IT_ATTACHED,
++ * it_keycmp(it, iam_it_key_get(it, *), k) <= 0)
+ */
+ int iam_it_get(struct iam_iterator *it, const struct iam_key *k);
+
+ /*
++ * Attach iterator, and assure it points to the record (not skewed).
++ *
++ * Return value: 0: positioned on existing record,
++ * -ve: error.
++ *
++ * precondition: it_state(it) == IAM_IT_DETACHED &&
++ * !(it->ii_flags&IAM_IT_WRITE)
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
++ */
++int iam_it_get_at(struct iam_iterator *it, const struct iam_key *k);
++
++/*
+ * Duplicates iterator.
+ *
+ * postcondition: it_state(dst) == it_state(src) &&
+ * iam_it_container(dst) == iam_it_container(src) &&
+ * dst->ii_flags = src->ii_flags &&
+- * ergo(it_state(it) == IAM_IT_ATTACHED,
++ * ergo(it_state(src) == IAM_IT_ATTACHED,
+ * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
+ * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
+ */
+@@ -460,15 +524,17 @@ void iam_it_put(struct iam_iterator *it)
+ * +1: end of container reached
+ * -ve: error
+ *
+- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
+- * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
++ * precondition: (it_state(it) == IAM_IT_ATTACHED ||
++ * it_state(it) == IAM_IT_SKEWED) && it->ii_flags&IAM_IT_MOVE
++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) &&
++ * ergo(result > 0, it_state(it) == IAM_IT_DETACHED)
+ */
+ int iam_it_next(struct iam_iterator *it);
+
/*
* Return pointer to the record under iterator.
*
* postcondition: it_state(it) == IAM_IT_ATTACHED
*/
struct iam_rec *iam_it_rec_get(const struct iam_iterator *it);
-@@ -476,7 +507,9 @@ struct iam_rec *iam_it_rec_get(const str
+@@ -476,14 +542,15 @@ struct iam_rec *iam_it_rec_get(const str
/*
* Replace contents of record under iterator.
*
- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+ * precondition: it_state(it) == IAM_IT_ATTACHED &&
-+ * it->ii_flags&IAM_IT_WRITE &&
-+ * it_at_rec(it)
++ * it->ii_flags&IAM_IT_WRITE
* postcondition: it_state(it) == IAM_IT_ATTACHED &&
* ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
*/
-@@ -485,7 +518,7 @@ int iam_it_rec_set(handle_t *h, struct i
+ int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
+
/*
- * Place key under iterator in @k, return @k
+- * Place key under iterator in @k, return @k
++ * Return pointer to the key under iterator.
*
-- * precondition: it_state(it) == IAM_IT_ATTACHED
-+ * precondition: it_state(it) == IAM_IT_ATTACHED && it_at_rec(it)
+ * precondition: it_state(it) == IAM_IT_ATTACHED
* postcondition: it_state(it) == IAM_IT_ATTACHED
- */
- struct iam_key *iam_it_key_get(const struct iam_iterator *it,
-@@ -497,7 +530,8 @@ struct iam_key *iam_it_key_get(const str
+@@ -495,11 +562,17 @@ struct iam_key *iam_it_key_get(const str
+ * Insert new record with key @k and contents from @r, shifting records to the
+ * right.
*
- * precondition: it_state(it) == IAM_IT_ATTACHED &&
- * it->ii_flags&IAM_IT_WRITE &&
+- * precondition: it_state(it) == IAM_IT_ATTACHED &&
+- * it->ii_flags&IAM_IT_WRITE &&
- * it_keycmp(it, iam_it_key_get(it, *), k) < 0
-+ * ergo(it_at_rec(it),
-+ * it_keycmp(it, iam_it_key_get(it, *), k) < 0)
- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
- * ergo(result == 0,
+- * postcondition: it_state(it) == IAM_IT_ATTACHED &&
+- * ergo(result == 0,
++ * precondition: it->ii_flags&IAM_IT_WRITE &&
++ * (it_state(it) == IAM_IT_ATTACHED ||
++ * it_state(it) == IAM_IT_SKEWED) &&
++ * ergo(it_state(it) == IAM_IT_ATTACHED,
++ * it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)),
++ * k) < 0) &&
++ * ergo(it_before(it),
++ * it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)),
++ * k) > 0));
++ * postcondition: ergo(result == 0,
++ * it_state(it) == IAM_IT_ATTACHED &&
* it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
-@@ -508,7 +542,9 @@ int iam_it_rec_insert(handle_t *h, struc
+ * !memcmp(iam_it_rec_get(it), r, ...))
+ */
+@@ -508,8 +581,10 @@ int iam_it_rec_insert(handle_t *h, struc
/*
* Delete record under iterator.
*
- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
+- * postcondition: it_state(it) == IAM_IT_ATTACHED
+ * precondition: it_state(it) == IAM_IT_ATTACHED &&
+ * it->ii_flags&IAM_IT_WRITE &&
+ * it_at_rec(it)
- * postcondition: it_state(it) == IAM_IT_ATTACHED
++ * postcondition: it_state(it) == IAM_IT_ATTACHED || it_state(it) == IAM_IT_EOC
*/
int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
-@@ -519,7 +555,8 @@ typedef __u64 iam_pos_t;
+
+@@ -519,7 +594,7 @@ typedef __u64 iam_pos_t;
* Convert iterator to cookie.
*
* precondition: it_state(it) == IAM_IT_ATTACHED &&
- * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
-+ * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) &&
-+ * it_at_rec(it)
++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
* postcondition: it_state(it) == IAM_IT_ATTACHED
*/
iam_pos_t iam_it_store(const struct iam_iterator *it);
-@@ -583,6 +620,17 @@ static inline void iam_keycpy(const stru
+@@ -527,8 +602,9 @@ iam_pos_t iam_it_store(const struct iam_
+ /*
+ * Restore iterator from cookie.
+ *
+- * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
+- * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
++ * precondition: it_state(it) == IAM_IT_DETACHED &&
++ * it->ii_flags&IAM_IT_MOVE &&
++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
+ * iam_it_store(it) == pos)
+ */
+@@ -583,6 +659,17 @@ static inline void iam_keycpy(const stru
memcpy(k1, k2, c->ic_descr->id_key_size);
}
static inline int iam_keycmp(const struct iam_container *c,
const struct iam_key *k1, const struct iam_key *k2)
{
-@@ -650,6 +698,15 @@ static inline unsigned dx_node_limit(str
+@@ -650,6 +737,15 @@ static inline unsigned dx_node_limit(str
return entry_space / (param->id_key_size + param->id_ptr_size);
}
static inline struct iam_entry *dx_get_entries(struct iam_path *path,
void *data, int root)
{
-@@ -702,6 +759,8 @@ void iam_insert_key(struct iam_path *pat
+@@ -702,6 +798,8 @@ void iam_insert_key(struct iam_path *pat
int iam_leaf_at_end(const struct iam_leaf *l);
void iam_leaf_next(struct iam_leaf *folio);
struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
-@@ -718,5 +777,48 @@ void iam_format_register(struct iam_form
+@@ -709,8 +807,23 @@ struct iam_descr *iam_leaf_descr(const s
+ struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf);
+
+
++/*
++ * Container format.
++ */
+ struct iam_format {
++ /*
++ * Method called to recognize container format. Should return true iff
++ * container @c conforms to this format. This method may do IO to read
++ * container pages.
++ *
++ * If container is recognized, this method sets operation vectors
++ * ->id_ops and ->id_leaf_ops in container description (c->ic_descr),
++ * and fills other description fields.
++ */
+ int (*if_guess)(struct iam_container *c);
++ /*
++ * Linkage into global list of container formats.
++ */
+ struct list_head if_linkage;
+ };
+
+@@ -718,5 +831,48 @@ void iam_format_register(struct iam_form
void iam_lfix_format_init(void);