From: nikita Date: Fri, 16 Jun 2006 11:12:21 +0000 (+0000) Subject: iam fixes for defects found during DLDR X-Git-Tag: v1_8_0_110~486^2~1622 X-Git-Url: https://git.whamcloud.com/gitweb?a=commitdiff_plain;h=1f89dcbf221fd9a75938bcf7d036f0f7543d1689;p=fs%2Flustre-release.git iam fixes for defects found during DLDR --- diff --git a/lustre/kernel_patches/patches/ext3-iam-separate.patch b/lustre/kernel_patches/patches/ext3-iam-separate.patch index 0ac62184..c8373e3 100644 --- a/lustre/kernel_patches/patches/ext3-iam-separate.patch +++ b/lustre/kernel_patches/patches/ext3-iam-separate.patch @@ -1,7 +1,7 @@ Index: iam/fs/ext3/Makefile =================================================================== --- iam.orig/fs/ext3/Makefile 2006-05-31 20:24:32.000000000 +0400 -+++ iam/fs/ext3/Makefile 2006-06-08 21:50:42.000000000 +0400 ++++ iam/fs/ext3/Makefile 2006-06-16 14:39:59.000000000 +0400 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ @@ -14,8 +14,8 @@ Index: iam/fs/ext3/Makefile Index: iam/fs/ext3/iam.c =================================================================== --- iam.orig/fs/ext3/iam.c 2004-04-06 17:27:52.000000000 +0400 -+++ iam/fs/ext3/iam.c 2006-06-08 19:42:19.000000000 +0400 -@@ -0,0 +1,1163 @@ ++++ iam/fs/ext3/iam.c 2006-06-15 19:51:50.000000000 +0400 +@@ -0,0 +1,1246 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * @@ -143,6 +143,10 @@ Index: iam/fs/ext3/iam.c +} +EXPORT_SYMBOL(iam_format_register); + ++/* ++ * Determine format of given container. This is done by scanning list of ++ * registered formats and calling ->if_guess() method of each in turn. ++ */ +static int iam_format_guess(struct iam_container *c) +{ + int result; @@ -514,6 +518,11 @@ Index: iam/fs/ext3/iam.c + return !iam_leaf_at_end(&it->ii_path.ip_leaf); +} + ++static inline int it_before(const struct iam_iterator *it) ++{ ++ return it_state(it) == IAM_IT_SKEWED && it_at_rec(it); ++} ++ +/* + * Helper wrapper around iam_it_get(): returns 0 (success) only when record + * with exactly the same key as asked is found. @@ -524,7 +533,7 @@ Index: iam/fs/ext3/iam.c + + result = iam_it_get(it, k); + if (result == 0 && -+ (!it_at_rec(it) || ++ (it_state(it) != IAM_IT_ATTACHED || + it_keycmp(it, k, iam_it_key_get(it, it_scratch_key(it, 1))) != 0)) + /* + * Return -ENOENT if cursor is located above record with a key @@ -599,6 +608,10 @@ Index: iam/fs/ext3/iam.c + iam_path_fini(&it->ii_path); +} + ++/* ++ * Performs tree top-to-bottom traversal starting from root, and loads leaf ++ * node. ++ */ +static int iam_path_lookup(struct iam_path *path) +{ + struct iam_container *c; @@ -623,7 +636,7 @@ Index: iam/fs/ext3/iam.c + +/* + * Attach iterator. After successful completion, @it points to record with -+ * smallest key not larger than @k. ++ * least key not larger than @k. + * + * Return value: 0: positioned on existing record, + * -ve: error. @@ -645,11 +658,9 @@ Index: iam/fs/ext3/iam.c + case IAM_LOOKUP_OK: + it->ii_state = IAM_IT_ATTACHED; + break; -+ case IAM_LOOKUP_EMPTY: -+ it->ii_state = IAM_IT_EMPTY; -+ break; + case IAM_LOOKUP_BEFORE: -+ it->ii_state = IAM_IT_BEFORE; ++ case IAM_LOOKUP_EMPTY: ++ it->ii_state = IAM_IT_SKEWED; + break; + default: + assert(0); @@ -668,6 +679,31 @@ Index: iam/fs/ext3/iam.c +} + +/* ++ * Attach iterator, and assure it points to the record (not skewed). ++ * ++ * Return value: 0: positioned on existing record, ++ * -ve: error. ++ * ++ * precondition: it_state(it) == IAM_IT_DETACHED && ++ * !(it->ii_flags&IAM_IT_WRITE) ++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) ++ */ ++int iam_it_get_at(struct iam_iterator *it, const struct iam_key *k) ++{ ++ int result; ++ assert(it_state(it) == IAM_IT_DETACHED && !(it->ii_flags&IAM_IT_WRITE)); ++ result = iam_it_get(it, k); ++ if (result == 0) { ++ if (it_state(it) != IAM_IT_ATTACHED) { ++ assert(it_state(it) == IAM_IT_SKEWED); ++ result = iam_it_next(it); ++ } ++ } ++ assert(ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)); ++ return result; ++} ++ ++/* + * Duplicates iterator. + * + * postcondition: it_state(dst) == it_state(src) && @@ -694,6 +730,7 @@ Index: iam/fs/ext3/iam.c + iam_it_key_get(src, it_scratch_key(src, 0)))); + +} ++ +/* + * Detach iterator. Does nothing it detached state. + * @@ -715,8 +752,10 @@ Index: iam/fs/ext3/iam.c + * +1: end of container reached + * -ve: error + * -+ * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE -+ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) ++ * precondition: (it_state(it) == IAM_IT_ATTACHED || ++ * it_state(it) == IAM_IT_SKEWED) && it->ii_flags&IAM_IT_MOVE ++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) && ++ * ergo(result > 0, it_state(it) == IAM_IT_DETACHED) + */ +int iam_it_next(struct iam_iterator *it) +{ @@ -726,13 +765,13 @@ Index: iam/fs/ext3/iam.c + + assert(it->ii_flags&IAM_IT_MOVE); + assert(it_state(it) == IAM_IT_ATTACHED || -+ it_state(it) == IAM_IT_BEFORE || it_state(it) == IAM_IT_EMPTY); ++ it_state(it) == IAM_IT_SKEWED); + + path = &it->ii_path; + leaf = &path->ip_leaf; + + result = 0; -+ if (it_state(it) == IAM_IT_BEFORE) { ++ if (it_before(it)) { + assert(!iam_leaf_at_end(leaf)); + it->ii_state = IAM_IT_ATTACHED; + } else { @@ -746,19 +785,19 @@ Index: iam/fs/ext3/iam.c + /* advance index portion of the path */ + result = iam_index_next(iam_it_container(it), path); + if (result == 1) { ++ iam_leaf_fini(leaf); + result = iam_leaf_load(path); + if (result == 0) + iam_leaf_start(leaf); -+ } else if (result == 0) { ++ } else if (result == 0) + /* end of container reached */ -+ it->ii_state = IAM_IT_EOC; + result = +1; -+ } -+ if (result < 0) ++ if (result != 0) + iam_it_put(it); + } + } + assert(ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)); ++ assert(ergo(result > 0, it_state(it) == IAM_IT_DETACHED)); + return result; +} + @@ -796,39 +835,78 @@ Index: iam/fs/ext3/iam.c + * Replace contents of record under iterator. + * + * precondition: it_state(it) == IAM_IT_ATTACHED && -+ * it->ii_flags&IAM_IT_WRITE && -+ * it_at_rec(it) ++ * it->ii_flags&IAM_IT_WRITE + * postcondition: it_state(it) == IAM_IT_ATTACHED && + * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...)) + */ +int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r) +{ + int result; ++ struct iam_path *path; ++ struct buffer_head *bh; + + assert(it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE); + assert(it_at_rec(it)); + -+ result = iam_txn_add(h, &it->ii_path, it->ii_path.ip_leaf.il_bh); -+ if (result == 0) ++ path = &it->ii_path; ++ bh = path->ip_leaf.il_bh; ++ result = iam_txn_add(h, path, bh); ++ if (result == 0) { + iam_it_reccpy(it, r); ++ result = iam_txn_dirty(h, path, bh); ++ } + return result; +} + +/* ++ * Assertionless version of iam_it_key_get(). ++ */ ++static struct iam_key *__iam_it_key_get(const struct iam_iterator *it, ++ struct iam_key *k) ++{ ++ return iam_leaf_key(&it->ii_path.ip_leaf, k); ++} ++ ++/* + * Return pointer to the key under iterator. + * + * precondition: it_state(it) == IAM_IT_ATTACHED || -+ * it_state(it) == IAM_IT_BEFORE ++ * it_state(it) == IAM_IT_SKEWED + * postcondition: it_state(it) == IAM_IT_ATTACHED + */ +struct iam_key *iam_it_key_get(const struct iam_iterator *it, struct iam_key *k) +{ -+ assert(it_state(it) == IAM_IT_ATTACHED || -+ it_state(it) == IAM_IT_BEFORE); ++ assert(it_state(it) == IAM_IT_ATTACHED); + assert(it_at_rec(it)); -+ return iam_leaf_key(&it->ii_path.ip_leaf, k); ++ return __iam_it_key_get(it, k); +} + ++/* ++ * Insertion of new record. Interaction with jbd during non-trivial case (when ++ * split happens) is as following: ++ * ++ * - new leaf node is involved into transaction by ext3_append(); ++ * ++ * - old leaf node is involved into transaction by iam_add_rec(); ++ * ++ * - leaf where insertion point ends in, is marked dirty by iam_add_rec(); ++ * ++ * - leaf without insertion point is marked dirty (as @new_leaf) by ++ * iam_new_leaf(); ++ * ++ * - split index nodes are involved into transaction and marked dirty by ++ * split_index_node(). ++ * ++ * - "safe" index node, which is no split, but where new pointer is inserted ++ * is involved into transaction and marked dirty by split_index_node(). ++ * ++ * - index node where pointer to new leaf is inserted is involved into ++ * transaction by split_index_node() and marked dirty by iam_add_rec(). ++ * ++ * - inode is marked dirty by iam_add_rec(). ++ * ++ */ ++ +static int iam_new_leaf(handle_t *handle, struct iam_leaf *leaf) +{ + int err; @@ -848,6 +926,8 @@ Index: iam/fs/ext3/iam.c + iam_leaf_split(leaf, &new_leaf, blknr); + err = iam_txn_dirty(handle, iam_leaf_path(leaf), new_leaf); + brelse(new_leaf); ++ if (err == 0) ++ err = ext3_mark_inode_dirty(handle, c->ic_object); + } + assert(iam_leaf_check(leaf)); + assert(iam_leaf_check(&iam_leaf_path(leaf)->ip_leaf)); @@ -897,13 +977,12 @@ Index: iam/fs/ext3/iam.c + * + * precondition: it->ii_flags&IAM_IT_WRITE && + * (it_state(it) == IAM_IT_ATTACHED || -+ * it_state(it) == IAM_IT_BEFORE || -+ * it_state(it) == IAM_IT_EMPTY) && ++ * it_state(it) == IAM_IT_SKEWED) && + * ergo(it_state(it) == IAM_IT_ATTACHED, + * it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), + * k) < 0) && -+ * ergo(it_state(it) == IAM_IT_BEFORE, -+ * it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), ++ * ergo(it_before(it), ++ * it_keycmp(it, __iam_it_key_get(it, it_scratch_key(it, 0)), + * k) > 0)); + * postcondition: ergo(result == 0, + * it_state(it) == IAM_IT_ATTACHED && @@ -920,12 +999,12 @@ Index: iam/fs/ext3/iam.c + + assert(it->ii_flags&IAM_IT_WRITE); + assert(it_state(it) == IAM_IT_ATTACHED || -+ it_state(it) == IAM_IT_BEFORE || it_state(it) == IAM_IT_EMPTY); ++ it_state(it) == IAM_IT_SKEWED); + assert(ergo(it_state(it) == IAM_IT_ATTACHED, + it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), + k) < 0)); -+ assert(ergo(it_state(it) == IAM_IT_BEFORE, -+ it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), ++ assert(ergo(it_before(it), ++ it_keycmp(it, __iam_it_key_get(it, it_scratch_key(it, 0)), + k) > 0)); + result = iam_add_rec(h, path, k, r); + if (result == 0) @@ -945,7 +1024,8 @@ Index: iam/fs/ext3/iam.c + * precondition: it_state(it) == IAM_IT_ATTACHED && + * it->ii_flags&IAM_IT_WRITE && + * it_at_rec(it) -+ * postcondition: it_state(it) == IAM_IT_ATTACHED || it_state(it) == IAM_IT_EOC ++ * postcondition: it_state(it) == IAM_IT_ATTACHED || ++ * it_state(it) == IAM_IT_DETACHED + */ +int iam_it_rec_delete(handle_t *h, struct iam_iterator *it) +{ @@ -978,7 +1058,8 @@ Index: iam/fs/ext3/iam.c + } + assert(iam_leaf_check(leaf)); + assert(iam_path_check(path)); -+ assert(it_state(it) == IAM_IT_ATTACHED || it_state(it) == IAM_IT_EOC); ++ assert(it_state(it) == IAM_IT_ATTACHED || ++ it_state(it) == IAM_IT_DETACHED); + return result; +} + @@ -1068,23 +1149,18 @@ Index: iam/fs/ext3/iam.c + return + (it->ii_state == IAM_IT_DETACHED || + it->ii_state == IAM_IT_ATTACHED || -+ it->ii_state == IAM_IT_EMPTY || -+ it->ii_state == IAM_IT_BEFORE || -+ it->ii_state == IAM_IT_EOC) && ++ it->ii_state == IAM_IT_SKEWED) && + !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) && + ergo(it->ii_state == IAM_IT_ATTACHED || -+ it->ii_state == IAM_IT_EMPTY || -+ it->ii_state == IAM_IT_BEFORE, ++ it->ii_state == IAM_IT_SKEWED, + iam_path_invariant(&it->ii_path) && -+ equi(it->ii_state == IAM_IT_EMPTY, !it_at_rec(it))); ++ equi(it_at_rec(it), it->ii_state == IAM_IT_SKEWED)); +} + +/* + * Search container @c for record with key @k. If record is found, its data + * are moved into @r. + * -+ * -+ * + * Return values: +ve: found, 0: not-found, -ve: error + */ +int iam_lookup(struct iam_container *c, const struct iam_key *k, @@ -1137,6 +1213,13 @@ Index: iam/fs/ext3/iam.c +} +EXPORT_SYMBOL(iam_insert); + ++/* ++ * Update record with the key @k in container @c (within context of ++ * transaction @h), new record is given by @r. ++ * ++ * Return values: 0: success, -ve: error, including -ENOENT if no record with ++ * the given key found. ++ */ +int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k, + struct iam_rec *r, struct iam_path_descr *pd) +{ @@ -1182,7 +1265,7 @@ Index: iam/fs/ext3/iam.c Index: iam/fs/ext3/iam_lfix.c =================================================================== --- iam.orig/fs/ext3/iam_lfix.c 2004-04-06 17:27:52.000000000 +0400 -+++ iam/fs/ext3/iam_lfix.c 2006-06-08 17:34:38.000000000 +0400 ++++ iam/fs/ext3/iam_lfix.c 2006-06-15 19:55:41.000000000 +0400 @@ -0,0 +1,613 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: @@ -1402,7 +1485,7 @@ Index: iam/fs/ext3/iam_lfix.c + q = iam_lfix_shift(l, p, count - 1); + if (iam_keycmp(c, k, iam_leaf_key_at(p)) < 0) { + /* -+ * @k is less than the smallest key in the leaf ++ * @k is less than the least key in the leaf + */ + l->il_at = p; + result = IAM_LOOKUP_BEFORE; @@ -1476,7 +1559,7 @@ Index: iam/fs/ext3/iam_lfix.c + else + /* + * Another exceptional case: insertion with the key -+ * less than smallest key in the leaf. ++ * less than least key in the leaf. + */ + assert(cur == leaf->il_entries); + @@ -1559,7 +1642,7 @@ Index: iam/fs/ext3/iam_lfix.c + hdr->ill_count = count - split; + lentry_count_set(l, split); + /* -+ * Insert pointer to the new node (together with the smallest key in ++ * Insert pointer to the new node (together with the least key in + * the node) into index node. + */ + iam_insert_key(path, path->ip_frame, pivot, new_blknr); @@ -1800,7 +1883,7 @@ Index: iam/fs/ext3/iam_lfix.c Index: iam/fs/ext3/namei.c =================================================================== --- iam.orig/fs/ext3/namei.c 2006-05-31 20:24:32.000000000 +0400 -+++ iam/fs/ext3/namei.c 2006-06-02 22:59:05.000000000 +0400 ++++ iam/fs/ext3/namei.c 2006-06-12 22:12:33.000000000 +0400 @@ -24,81 +24,6 @@ * Theodore Ts'o, 2002 */ @@ -2240,7 +2323,10 @@ Index: iam/fs/ext3/namei.c -static inline ptrdiff_t iam_entry_diff(struct iam_path *p, - struct iam_entry *e1, struct iam_entry *e2) --{ ++static inline struct iam_key *iam_get_key(struct iam_path *p, ++ struct iam_entry *entry, ++ struct iam_key *key) + { - ptrdiff_t diff; - - diff = (void *)e1 - (void *)e2; @@ -2256,10 +2342,7 @@ Index: iam/fs/ext3/namei.c - -static inline void dx_set_block(struct iam_path *p, - struct iam_entry *entry, unsigned value) -+static inline struct iam_key *iam_get_key(struct iam_path *p, -+ struct iam_entry *entry, -+ struct iam_key *key) - { +-{ - *(u32*)entry_off(entry, - path_descr(p)->id_key_size) = cpu_to_le32(value); -} @@ -2363,16 +2446,16 @@ Index: iam/fs/ext3/namei.c static inline int dx_index_is_compat(struct iam_path *path) { - return path_descr(path) == &htree_compat_param; -+ return iam_path_descr(path) == &htree_compat_param; - } - +-} +- -static struct iam_entry *dx_get_entries(struct iam_path *path, void *data, - int root) -{ - return data + - (root ? - path_descr(path)->id_root_gap : path_descr(path)->id_node_gap); --} ++ return iam_path_descr(path) == &htree_compat_param; + } -static struct iam_entry *dx_node_get_entries(struct iam_path *path, - struct iam_frame *frame) @@ -2380,7 +2463,7 @@ Index: iam/fs/ext3/namei.c - return dx_get_entries(path, - frame->bh->b_data, frame == path->ip_frames); -} -- + -static int dx_node_check(struct iam_path *p, struct iam_frame *f) +int dx_node_check(struct iam_path *p, struct iam_frame *f) { @@ -2405,12 +2488,12 @@ Index: iam/fs/ext3/namei.c - keycmp(c, p->ip_key_scratch[0], p->ip_key_scratch[1]) > 0) + iam_keycmp(c, iam_path_key(p, 0), iam_path_key(p, 1)) > 0) { + BREAKPOINT; -+ return 0; + return 0; + } + blk = dx_get_block(p, e); + if (inode->i_size < (blk + 1) * inode->i_sb->s_blocksize) { + BREAKPOINT; - return 0; ++ return 0; + } } return 1; @@ -2512,14 +2595,14 @@ Index: iam/fs/ext3/namei.c - err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh); + err = param->id_ops->id_node_read(c, (iam_ptr_t)ptr, NULL, + &frame->bh); -+ if (err != 0) -+ break; -+ -+ err = param->id_ops->id_node_check(path, frame); if (err != 0) break; - err = param->id_node_check(path, frame); + ++ err = param->id_ops->id_node_check(path, frame); ++ if (err != 0) ++ break; ++ + err = param->id_ops->id_node_load(path, frame); if (err != 0) break; @@ -3360,7 +3443,7 @@ Index: iam/fs/ext3/namei.c dxtrace(printk("Split index %i/%i\n", count1, count2)); -@@ -2537,16 +1771,22 @@ static int split_index_node(handle_t *ha +@@ -2537,16 +1771,30 @@ static int split_index_node(handle_t *ha swap(frame->bh, bh2); bh_new[i] = bh2; } @@ -3382,10 +3465,18 @@ Index: iam/fs/ext3/namei.c + err = ext3_journal_dirty_metadata(handle, bh); + if (err) + goto journal_error; ++ } ++ if (nr_splet > 0) { ++ /* ++ * Log ->i_size modification. ++ */ ++ err = ext3_mark_inode_dirty(handle, dir); ++ if (err) ++ goto journal_error; } goto cleanup; journal_error: -@@ -2578,7 +1818,7 @@ static int ext3_dx_add_entry(handle_t *h +@@ -2578,7 +1826,7 @@ static int ext3_dx_add_entry(handle_t *h size_t isize; iam_path_compat_init(&cpath, dir); @@ -3394,7 +3485,7 @@ Index: iam/fs/ext3/namei.c err = dx_probe(dentry, NULL, &hinfo, path); if (err != 0) -@@ -2588,8 +1828,9 @@ static int ext3_dx_add_entry(handle_t *h +@@ -2588,8 +1836,9 @@ static int ext3_dx_add_entry(handle_t *h /* XXX nikita: global serialization! */ isize = dir->i_size; @@ -3406,7 +3497,7 @@ Index: iam/fs/ext3/namei.c if (err != 0) goto cleanup; -@@ -2609,7 +1850,7 @@ static int ext3_dx_add_entry(handle_t *h +@@ -2609,7 +1858,7 @@ static int ext3_dx_add_entry(handle_t *h goto cleanup; /*copy split inode too*/ @@ -3415,7 +3506,7 @@ Index: iam/fs/ext3/namei.c if (!de) goto cleanup; -@@ -2724,12 +1965,12 @@ static struct inode * ext3_new_inode_wan +@@ -2724,12 +1973,12 @@ static struct inode * ext3_new_inode_wan * is so far negative - it has no inode. * * If the create succeeds, we fill in the inode information @@ -3433,7 +3524,7 @@ Index: iam/fs/ext3/namei.c Index: iam/include/linux/lustre_iam.h =================================================================== --- iam.orig/include/linux/lustre_iam.h 2006-05-31 20:24:32.000000000 +0400 -+++ iam/include/linux/lustre_iam.h 2006-06-08 21:50:42.000000000 +0400 ++++ iam/include/linux/lustre_iam.h 2006-06-16 14:39:59.000000000 +0400 @@ -1,9 +1,68 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: diff --git a/lustre/kernel_patches/patches/ext3-iam-uapi.patch b/lustre/kernel_patches/patches/ext3-iam-uapi.patch index 9209dfb..9f1b46b 100644 --- a/lustre/kernel_patches/patches/ext3-iam-uapi.patch +++ b/lustre/kernel_patches/patches/ext3-iam-uapi.patch @@ -1,7 +1,7 @@ Index: iam/fs/ext3/Makefile =================================================================== ---- iam.orig/fs/ext3/Makefile 2006-06-08 21:50:42.000000000 +0400 -+++ iam/fs/ext3/Makefile 2006-06-08 21:50:42.000000000 +0400 +--- iam.orig/fs/ext3/Makefile 2006-06-16 14:39:59.000000000 +0400 ++++ iam/fs/ext3/Makefile 2006-06-16 14:40:00.000000000 +0400 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ @@ -13,8 +13,8 @@ Index: iam/fs/ext3/Makefile ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o Index: iam/fs/ext3/file.c =================================================================== ---- iam.orig/fs/ext3/file.c 2006-06-08 21:50:42.000000000 +0400 -+++ iam/fs/ext3/file.c 2006-06-08 21:50:42.000000000 +0400 +--- iam.orig/fs/ext3/file.c 2006-06-16 14:39:59.000000000 +0400 ++++ iam/fs/ext3/file.c 2006-06-16 14:40:00.000000000 +0400 @@ -23,6 +23,7 @@ #include #include @@ -50,8 +50,8 @@ Index: iam/fs/ext3/file.c Index: iam/fs/ext3/iam-uapi.c =================================================================== --- iam.orig/fs/ext3/iam-uapi.c 2004-04-06 17:27:52.000000000 +0400 -+++ iam/fs/ext3/iam-uapi.c 2006-06-08 21:50:42.000000000 +0400 -@@ -0,0 +1,349 @@ ++++ iam/fs/ext3/iam-uapi.c 2006-06-16 14:40:00.000000000 +0400 +@@ -0,0 +1,348 @@ +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- + * vim:expandtab:shiftwidth=8:tabstop=8: + * @@ -126,8 +126,7 @@ Index: iam/fs/ext3/iam-uapi.c + result = iam_it_get(it, itop->iui_op.iul_key); + break; + case IAM_IOC_IT_NEXT: -+ if (st == IAM_IT_ATTACHED || -+ st == IAM_IT_BEFORE || st == IAM_IT_EMPTY) ++ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED) + result = iam_it_next(it); + else + result = -EBUSY; @@ -139,7 +138,7 @@ Index: iam/fs/ext3/iam-uapi.c + break; + } + st = it->ii_state; -+ if (st == IAM_IT_ATTACHED || st == IAM_IT_BEFORE) ++ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED) + iam_keycpy0(&ipi->ipi_bag, itop->iui_op.iul_key, + iam_it_key_get(it, itop->iui_op.iul_key)); + if (st == IAM_IT_ATTACHED) @@ -403,8 +402,8 @@ Index: iam/fs/ext3/iam-uapi.c +} Index: iam/include/linux/lustre_iam.h =================================================================== ---- iam.orig/include/linux/lustre_iam.h 2006-06-08 21:50:42.000000000 +0400 -+++ iam/include/linux/lustre_iam.h 2006-06-08 21:50:42.000000000 +0400 +--- iam.orig/include/linux/lustre_iam.h 2006-06-16 14:39:59.000000000 +0400 ++++ iam/include/linux/lustre_iam.h 2006-06-16 14:40:00.000000000 +0400 @@ -30,9 +30,6 @@ #ifndef __LINUX_LUSTRE_IAM_H__ #define __LINUX_LUSTRE_IAM_H__ @@ -443,10 +442,13 @@ Index: iam/include/linux/lustre_iam.h typedef __u64 iam_ptr_t; /* -@@ -123,6 +124,21 @@ struct iam_leaf { +@@ -123,6 +124,27 @@ struct iam_leaf { void *il_descr_data; }; ++/* ++ * Return values of ->lookup() operation from struct iam_leaf_operations. ++ */ +enum iam_lookup_t { + /* + * lookup positioned leaf on some record @@ -462,10 +464,13 @@ Index: iam/include/linux/lustre_iam.h + IAM_LOOKUP_BEFORE +}; + ++/* ++ * Format-specific container operations. These are called by generic iam code. ++ */ struct iam_operations { /* * Returns pointer (in the same sense as pointer in index entry) to -@@ -131,11 +147,15 @@ struct iam_operations { +@@ -131,11 +153,15 @@ struct iam_operations { __u32 (*id_root_ptr)(struct iam_container *c); /* @@ -483,8 +488,12 @@ Index: iam/include/linux/lustre_iam.h * Initialize new node (stored in @bh) that is going to be added into * tree. */ -@@ -155,6 +175,10 @@ struct iam_operations { - * contains single record with the smallest possible key. +@@ -152,15 +178,25 @@ struct iam_operations { + * Create new container. + * + * Newly created container has a root node and a single leaf. Leaf +- * contains single record with the smallest possible key. ++ * contains single record with the least possible key. */ int (*id_create)(struct iam_container *c); + /* @@ -494,7 +503,19 @@ Index: iam/include/linux/lustre_iam.h /* * Format name. */ -@@ -226,7 +250,8 @@ struct iam_leaf_operations { + char id_name[DX_FMT_NAME_LEN]; + }; + ++/* ++ * Another format-specific operation vector, consisting of methods to access ++ * leaf nodes. This is separated from struct iam_operations, because it is ++ * assumed that there will be many formats with different format of leaf ++ * nodes, yes the same struct iam_operations. ++ */ + struct iam_leaf_operations { + /* + * leaf operations. +@@ -226,7 +262,8 @@ struct iam_leaf_operations { * split leaf node, moving some entries into @bh (the latter currently * is assumed to be empty). */ @@ -504,22 +525,134 @@ Index: iam/include/linux/lustre_iam.h }; struct iam_path *iam_leaf_path(const struct iam_leaf *leaf); -@@ -347,7 +372,13 @@ enum iam_it_state { +@@ -264,6 +301,9 @@ struct iam_descr { + struct iam_leaf_operations *id_leaf_ops; + }; + ++/* ++ * An instance of iam container. ++ */ + struct iam_container { + /* + * Underlying flat file. IO against this object is issued to +@@ -347,7 +387,9 @@ enum iam_it_state { /* initial state */ IAM_IT_DETACHED, /* iterator is above particular record in the container */ - IAM_IT_ATTACHED + IAM_IT_ATTACHED, -+ /* iterator landed into empty leaf */ -+ IAM_IT_EMPTY, -+ /* iterator is positioned before first record in the leaf */ -+ IAM_IT_BEFORE, -+ /* end of container reached */ -+ IAM_IT_EOC ++ /* iterator is positioned before record */ ++ IAM_IT_SKEWED + }; + + /* +@@ -355,7 +397,7 @@ enum iam_it_state { + */ + enum iam_it_flags { + /* +- * this iterator will move (iam_it_{prev,next}() will be called on it) ++ * this iterator will move (iam_it_next() will be called on it) + */ + IAM_IT_MOVE = (1 << 0), + /* +@@ -372,15 +414,26 @@ enum iam_it_flags { + * doesn't point to any particular record in this container. + * + * After successful call to iam_it_get() and until corresponding call to +- * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED). ++ * iam_it_put() iterator is in one of "active" states: IAM_IT_ATTACHED or ++ * IAM_IT_SKEWED. + * +- * Attached iterator can move through records in a container (provided ++ * Active iterator can move through records in a container (provided + * IAM_IT_MOVE permission) in a key order, can get record and key values as it + * passes over them, and can modify container (provided IAM_IT_WRITE + * permission). + * ++ * Iteration may reach the end of container, at which point iterator switches ++ * into IAM_IT_DETACHED state. ++ * + * Concurrency: iterators are supposed to be local to thread. Interfaces below +- * do no internal serialization. ++ * do no internal serialization of access to the iterator fields. ++ * ++ * When in non-detached state, iterator keeps some container nodes pinned in ++ * memory and locked (that locking may be implemented at the container ++ * granularity though). In particular, clients may assume that pointers to ++ * records and keys obtained through iterator interface as valid until ++ * iterator is detached (except that they may be invalidated by sub-sequent ++ * operations done through the same iterator). + * + */ + struct iam_iterator { +@@ -390,7 +443,8 @@ struct iam_iterator { + __u32 ii_flags; + enum iam_it_state ii_state; + /* +- * path to the record. Valid in IAM_IT_ATTACHED state. ++ * path to the record. Valid in IAM_IT_ATTACHED, and IAM_IT_SKEWED ++ * states. + */ + struct iam_path ii_path; }; +@@ -420,27 +474,37 @@ int iam_it_init(struct iam_iterator *it + void iam_it_fini(struct iam_iterator *it); /* -@@ -468,7 +499,7 @@ int iam_it_next(struct iam_iterator *it) +- * Attach iterator. After successful completion, @it points to record with the +- * largest key not larger than @k. Semantics of ->id_create() method guarantee +- * that such record will always be found. ++ * Attach iterator. After successful completion, @it points to record with ++ * smallest key not larger than @k. + * + * Return value: 0: positioned on existing record, + * -ve: error. + * + * precondition: it_state(it) == IAM_IT_DETACHED +- * postcondition: ergo(result == 0, +- * (it_state(it) == IAM_IT_ATTACHED && +- * it_keycmp(it, iam_it_key_get(it, *), k) < 0)) ++ * postcondition: ergo(result == 0 && it_state(it) == IAM_IT_ATTACHED, ++ * it_keycmp(it, iam_it_key_get(it, *), k) <= 0) + */ + int iam_it_get(struct iam_iterator *it, const struct iam_key *k); + + /* ++ * Attach iterator, and assure it points to the record (not skewed). ++ * ++ * Return value: 0: positioned on existing record, ++ * -ve: error. ++ * ++ * precondition: it_state(it) == IAM_IT_DETACHED && ++ * !(it->ii_flags&IAM_IT_WRITE) ++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) ++ */ ++int iam_it_get_at(struct iam_iterator *it, const struct iam_key *k); ++ ++/* + * Duplicates iterator. + * + * postcondition: it_state(dst) == it_state(src) && + * iam_it_container(dst) == iam_it_container(src) && + * dst->ii_flags = src->ii_flags && +- * ergo(it_state(it) == IAM_IT_ATTACHED, ++ * ergo(it_state(src) == IAM_IT_ATTACHED, + * iam_it_rec_get(dst) == iam_it_rec_get(src) && + * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2)) + */ +@@ -460,15 +524,17 @@ void iam_it_put(struct iam_iterator *it) + * +1: end of container reached + * -ve: error + * +- * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE +- * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED) ++ * precondition: (it_state(it) == IAM_IT_ATTACHED || ++ * it_state(it) == IAM_IT_SKEWED) && it->ii_flags&IAM_IT_MOVE ++ * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) && ++ * ergo(result > 0, it_state(it) == IAM_IT_DETACHED) + */ + int iam_it_next(struct iam_iterator *it); + /* * Return pointer to the record under iterator. * @@ -528,58 +661,82 @@ Index: iam/include/linux/lustre_iam.h * postcondition: it_state(it) == IAM_IT_ATTACHED */ struct iam_rec *iam_it_rec_get(const struct iam_iterator *it); -@@ -476,7 +507,9 @@ struct iam_rec *iam_it_rec_get(const str +@@ -476,14 +542,15 @@ struct iam_rec *iam_it_rec_get(const str /* * Replace contents of record under iterator. * - * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE + * precondition: it_state(it) == IAM_IT_ATTACHED && -+ * it->ii_flags&IAM_IT_WRITE && -+ * it_at_rec(it) ++ * it->ii_flags&IAM_IT_WRITE * postcondition: it_state(it) == IAM_IT_ATTACHED && * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...)) */ -@@ -485,7 +518,7 @@ int iam_it_rec_set(handle_t *h, struct i + int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r); + /* - * Place key under iterator in @k, return @k +- * Place key under iterator in @k, return @k ++ * Return pointer to the key under iterator. * -- * precondition: it_state(it) == IAM_IT_ATTACHED -+ * precondition: it_state(it) == IAM_IT_ATTACHED && it_at_rec(it) + * precondition: it_state(it) == IAM_IT_ATTACHED * postcondition: it_state(it) == IAM_IT_ATTACHED - */ - struct iam_key *iam_it_key_get(const struct iam_iterator *it, -@@ -497,7 +530,8 @@ struct iam_key *iam_it_key_get(const str +@@ -495,11 +562,17 @@ struct iam_key *iam_it_key_get(const str + * Insert new record with key @k and contents from @r, shifting records to the + * right. * - * precondition: it_state(it) == IAM_IT_ATTACHED && - * it->ii_flags&IAM_IT_WRITE && +- * precondition: it_state(it) == IAM_IT_ATTACHED && +- * it->ii_flags&IAM_IT_WRITE && - * it_keycmp(it, iam_it_key_get(it, *), k) < 0 -+ * ergo(it_at_rec(it), -+ * it_keycmp(it, iam_it_key_get(it, *), k) < 0) - * postcondition: it_state(it) == IAM_IT_ATTACHED && - * ergo(result == 0, +- * postcondition: it_state(it) == IAM_IT_ATTACHED && +- * ergo(result == 0, ++ * precondition: it->ii_flags&IAM_IT_WRITE && ++ * (it_state(it) == IAM_IT_ATTACHED || ++ * it_state(it) == IAM_IT_SKEWED) && ++ * ergo(it_state(it) == IAM_IT_ATTACHED, ++ * it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), ++ * k) < 0) && ++ * ergo(it_before(it), ++ * it_keycmp(it, iam_it_key_get(it, it_scratch_key(it, 0)), ++ * k) > 0)); ++ * postcondition: ergo(result == 0, ++ * it_state(it) == IAM_IT_ATTACHED && * it_keycmp(it, iam_it_key_get(it, *), k) == 0 && -@@ -508,7 +542,9 @@ int iam_it_rec_insert(handle_t *h, struc + * !memcmp(iam_it_rec_get(it), r, ...)) + */ +@@ -508,8 +581,10 @@ int iam_it_rec_insert(handle_t *h, struc /* * Delete record under iterator. * - * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE +- * postcondition: it_state(it) == IAM_IT_ATTACHED + * precondition: it_state(it) == IAM_IT_ATTACHED && + * it->ii_flags&IAM_IT_WRITE && + * it_at_rec(it) - * postcondition: it_state(it) == IAM_IT_ATTACHED ++ * postcondition: it_state(it) == IAM_IT_ATTACHED || it_state(it) == IAM_IT_EOC */ int iam_it_rec_delete(handle_t *h, struct iam_iterator *it); -@@ -519,7 +555,8 @@ typedef __u64 iam_pos_t; + +@@ -519,7 +594,7 @@ typedef __u64 iam_pos_t; * Convert iterator to cookie. * * precondition: it_state(it) == IAM_IT_ATTACHED && - * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) -+ * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) && -+ * it_at_rec(it) ++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) * postcondition: it_state(it) == IAM_IT_ATTACHED */ iam_pos_t iam_it_store(const struct iam_iterator *it); -@@ -583,6 +620,17 @@ static inline void iam_keycpy(const stru +@@ -527,8 +602,9 @@ iam_pos_t iam_it_store(const struct iam_ + /* + * Restore iterator from cookie. + * +- * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE && +- * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) ++ * precondition: it_state(it) == IAM_IT_DETACHED && ++ * it->ii_flags&IAM_IT_MOVE && ++ * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t) + * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED && + * iam_it_store(it) == pos) + */ +@@ -583,6 +659,17 @@ static inline void iam_keycpy(const stru memcpy(k1, k2, c->ic_descr->id_key_size); } @@ -597,7 +754,7 @@ Index: iam/include/linux/lustre_iam.h static inline int iam_keycmp(const struct iam_container *c, const struct iam_key *k1, const struct iam_key *k2) { -@@ -650,6 +698,15 @@ static inline unsigned dx_node_limit(str +@@ -650,6 +737,15 @@ static inline unsigned dx_node_limit(str return entry_space / (param->id_key_size + param->id_ptr_size); } @@ -613,7 +770,7 @@ Index: iam/include/linux/lustre_iam.h static inline struct iam_entry *dx_get_entries(struct iam_path *path, void *data, int root) { -@@ -702,6 +759,8 @@ void iam_insert_key(struct iam_path *pat +@@ -702,6 +798,8 @@ void iam_insert_key(struct iam_path *pat int iam_leaf_at_end(const struct iam_leaf *l); void iam_leaf_next(struct iam_leaf *folio); @@ -622,7 +779,31 @@ Index: iam/include/linux/lustre_iam.h struct iam_path *iam_leaf_path(const struct iam_leaf *leaf); struct iam_container *iam_leaf_container(const struct iam_leaf *leaf); -@@ -718,5 +777,48 @@ void iam_format_register(struct iam_form +@@ -709,8 +807,23 @@ struct iam_descr *iam_leaf_descr(const s + struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf); + + ++/* ++ * Container format. ++ */ + struct iam_format { ++ /* ++ * Method called to recognize container format. Should return true iff ++ * container @c conforms to this format. This method may do IO to read ++ * container pages. ++ * ++ * If container is recognized, this method sets operation vectors ++ * ->id_ops and ->id_leaf_ops in container description (c->ic_descr), ++ * and fills other description fields. ++ */ + int (*if_guess)(struct iam_container *c); ++ /* ++ * Linkage into global list of container formats. ++ */ + struct list_head if_linkage; + }; + +@@ -718,5 +831,48 @@ void iam_format_register(struct iam_form void iam_lfix_format_init(void);