1 Index: iam/fs/ext3/Makefile
2 ===================================================================
3 --- iam.orig/fs/ext3/Makefile
4 +++ iam/fs/ext3/Makefile
5 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
7 ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o \
10 + extents.o mballoc.o iam.o iam_lfix.o
12 ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
13 ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
14 Index: iam/fs/ext3/iam.c
15 ===================================================================
16 --- iam.orig/fs/ext3/iam.c
19 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
20 + * vim:expandtab:shiftwidth=8:tabstop=8:
23 + * Top-level entry points into iam module
25 + * Copyright (c) 2006 Cluster File Systems, Inc.
26 + * Author: Wang Di <wangdi@clusterfs.com>
27 + * Author: Nikita Danilov <nikita@clusterfs.com>
29 + * This file is part of the Lustre file system, http://www.lustre.org
30 + * Lustre is a trademark of Cluster File Systems, Inc.
32 + * You may have signed or agreed to another license before downloading
33 + * this software. If so, you are bound by the terms and conditions
34 + * of that agreement, and the following does not apply to you. See the
35 + * LICENSE file included with this distribution for more information.
37 + * If you did not agree to a different license, then this copy of Lustre
38 + * is open source software; you can redistribute it and/or modify it
39 + * under the terms of version 2 of the GNU General Public License as
40 + * published by the Free Software Foundation.
42 + * In either case, Lustre is distributed in the hope that it will be
43 + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
44 + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
45 + * license text for more details.
49 + * iam: big theory statement.
51 + * iam (Index Access Module) is a module providing abstraction of persistent
52 + * transactional container on top of generalized ext3 htree.
56 + * - key, pointer, and record size specifiable per container.
58 + * - trees taller than 2 index levels.
60 + * - read/write to existing ext3 htree directories as iam containers.
62 + * iam container is a tree, consisting of leaf nodes containing keys and
63 + * records stored in this container, and index nodes, containing keys and
64 + * pointers to leaf or index nodes.
66 + * iam does not work with keys directly, instead it calls user-supplied key
67 + * comparison function (->dpo_keycmp()).
69 + * Pointers are (currently) interpreted as logical offsets (measured in
70 + * blocksful) within underlying flat file on top of which iam tree lives.
74 + * iam mostly tries to reuse existing htree formats.
76 + * Format of index node:
78 + * +-----+-------+-------+-------+------+-------+------------+
79 + * | | count | | | | | |
80 + * | gap | / | entry | entry | .... | entry | free space |
81 + * | | limit | | | | | |
82 + * +-----+-------+-------+-------+------+-------+------------+
84 + * gap this part of node is never accessed by iam code. It
85 + * exists for binary compatibility with ext3 htree (that,
86 + * in turn, stores fake struct ext2_dirent for ext2
87 + * compatibility), and to keep some unspecified per-node
88 + * data. Gap can be different for root and non-root index
89 + * nodes. Gap size can be specified for each container
90 + * (gap of 0 is allowed).
92 + * count/limit current number of entries in this node, and the maximal
93 + * number of entries that can fit into node. count/limit
94 + * has the same size as entry, and is itself counted in
97 + * entry index entry: consists of a key immediately followed by
98 + * a pointer to a child node. Size of a key and size of a
99 + * pointer depends on container. Entry has neither
100 + * alignment nor padding.
102 + * free space portion of node new entries are added to
104 + * Entries in index node are sorted by their key value.
106 + * Format of a leaf node is not specified. Generic iam code accesses leaf
107 + * nodes through ->id_leaf methods in struct iam_descr.
111 +#include <linux/module.h>
112 +#include <linux/fs.h>
113 +#include <linux/pagemap.h>
114 +#include <linux/jbd.h>
115 +#include <linux/time.h>
116 +#include <linux/ext3_fs.h>
117 +#include <linux/ext3_jbd.h>
118 +#include <linux/fcntl.h>
119 +#include <linux/stat.h>
120 +#include <linux/string.h>
121 +#include <linux/quotaops.h>
122 +#include <linux/buffer_head.h>
123 +#include <linux/smp_lock.h>
124 +#include <linux/lustre_iam.h>
126 +#include <libcfs/libcfs.h>
127 +#include <libcfs/kp30.h>
134 + * List of all registered formats.
136 + * No locking. Callers synchronize.
138 +static LIST_HEAD(iam_formats);
140 +void iam_format_register(struct iam_format *fmt)
142 + list_add(&fmt->if_linkage, &iam_formats);
144 +EXPORT_SYMBOL(iam_format_register);
147 + * Determine format of given container. This is done by scanning list of
148 + * registered formats and calling ->if_guess() method of each in turn.
150 +static int iam_format_guess(struct iam_container *c)
153 + struct iam_format *fmt;
156 + * XXX temporary initialization hook.
159 + static int initialized = 0;
161 + if (!initialized) {
163 + * Keep that order: htree should be registered first,
164 + * so that iam_htree_guess() runs last.
166 + iam_htree_format_init();
167 + iam_lvar_format_init();
168 + iam_lfix_format_init();
174 + list_for_each_entry(fmt, &iam_formats, if_linkage) {
175 + result = fmt->if_guess(c);
183 + * Initialize container @c.
185 +int iam_container_init(struct iam_container *c,
186 + struct iam_descr *descr, struct inode *inode)
188 + memset(c, 0, sizeof *c);
189 + c->ic_descr = descr;
190 + c->ic_object = inode;
191 + init_rwsem(&c->ic_sem);
194 +EXPORT_SYMBOL(iam_container_init);
197 + * Determine container format.
199 +int iam_container_setup(struct iam_container *c)
201 + return iam_format_guess(c);
203 +EXPORT_SYMBOL(iam_container_setup);
206 + * Finalize container @c, release all resources.
208 +void iam_container_fini(struct iam_container *c)
211 +EXPORT_SYMBOL(iam_container_fini);
213 +void iam_path_init(struct iam_path *path, struct iam_container *c,
214 + struct iam_path_descr *pd)
216 + memset(path, 0, sizeof *path);
217 + path->ip_container = c;
218 + path->ip_frame = path->ip_frames;
219 + path->ip_data = pd;
220 + path->ip_leaf.il_path = path;
223 +static void iam_leaf_fini(struct iam_leaf *leaf);
225 +void iam_path_release(struct iam_path *path)
229 + for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
230 + if (path->ip_frames[i].bh != NULL) {
231 + brelse(path->ip_frames[i].bh);
232 + path->ip_frames[i].bh = NULL;
237 +void iam_path_fini(struct iam_path *path)
239 + iam_leaf_fini(&path->ip_leaf);
240 + iam_path_release(path);
243 +void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode)
247 + path->ipc_hinfo = &path->ipc_hinfo_area;
248 + for (i = 0; i < ARRAY_SIZE(path->ipc_scratch); ++i)
249 + path->ipc_descr.ipd_key_scratch[i] =
250 + (struct iam_ikey *)&path->ipc_scratch[i];
252 + iam_container_init(&path->ipc_container,
253 + &iam_htree_compat_param, inode);
254 + iam_path_init(&path->ipc_path, &path->ipc_container, &path->ipc_descr);
257 +void iam_path_compat_fini(struct iam_path_compat *path)
259 + iam_path_fini(&path->ipc_path);
260 + iam_container_fini(&path->ipc_container);
264 + * Helper function allocating iam_path_descr and initializing its key scratch
267 +struct iam_path_descr *iam_ipd_alloc(int keysize)
269 + struct iam_path_descr *ipd;
273 + ipd = kmalloc(ARRAY_SIZE(ipd->ipd_key_scratch) * keysize +
274 + sizeof *ipd, GFP_KERNEL);
277 + for (i = 0; i < ARRAY_SIZE(ipd->ipd_key_scratch);
278 + ++i, karea += keysize)
279 + ipd->ipd_key_scratch[i] = karea;
283 +EXPORT_SYMBOL(iam_ipd_alloc);
285 +void iam_ipd_free(struct iam_path_descr *ipd)
289 +EXPORT_SYMBOL(iam_ipd_free);
291 +int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
292 + handle_t *h, struct buffer_head **bh)
296 + *bh = ext3_bread(h, c->ic_object, (int)ptr, 0, &result);
303 + * Return pointer to current leaf record. Pointer is valid while corresponding
304 + * leaf node is locked and pinned.
306 +static struct iam_rec *iam_leaf_rec(const struct iam_leaf *leaf)
308 + return iam_leaf_ops(leaf)->rec(leaf);
312 + * Return pointer to the current leaf key. This function returns pointer to
313 + * the key stored in node.
315 + * Caller should assume that returned pointer is only valid while leaf node is
316 + * pinned and locked.
318 +static struct iam_key *iam_leaf_key(const struct iam_leaf *leaf)
320 + return iam_leaf_ops(leaf)->key(leaf);
323 +static int iam_leaf_key_size(const struct iam_leaf *leaf)
325 + return iam_leaf_ops(leaf)->key_size(leaf);
328 +static struct iam_ikey *iam_leaf_ikey(const struct iam_leaf *leaf,
329 + struct iam_ikey *key)
331 + return iam_leaf_ops(leaf)->ikey(leaf, key);
334 +static int iam_leaf_keycmp(const struct iam_leaf *leaf,
335 + const struct iam_key *key)
337 + return iam_leaf_ops(leaf)->key_cmp(leaf, key);
340 +static int iam_leaf_keyeq(const struct iam_leaf *leaf,
341 + const struct iam_key *key)
343 + return iam_leaf_ops(leaf)->key_eq(leaf, key);
346 +#if EXT3_INVARIANT_ON
347 +static int iam_leaf_check(struct iam_leaf *leaf);
348 +extern int dx_node_check(struct iam_path *p, struct iam_frame *f);
350 +static int iam_path_check(struct iam_path *p)
354 + struct iam_frame *f;
355 + struct iam_descr *param;
358 + param = iam_path_descr(p);
359 + for (i = 0; result && i < ARRAY_SIZE(p->ip_frames); ++i) {
360 + f = &p->ip_frames[i];
361 + if (f->bh != NULL) {
362 + result = dx_node_check(p, f);
364 + result = !param->id_ops->id_node_check(p, f);
367 + if (result && p->ip_leaf.il_bh != NULL)
368 + result = iam_leaf_check(&p->ip_leaf);
371 + ext3_std_error(iam_path_obj(p)->i_sb, result);
377 +static int iam_leaf_load(struct iam_path *path)
381 + struct iam_container *c;
382 + struct buffer_head *bh;
383 + struct iam_leaf *leaf;
384 + struct iam_descr *descr;
386 + c = path->ip_container;
387 + leaf = &path->ip_leaf;
388 + descr = iam_path_descr(path);
389 + block = path->ip_frame->leaf;
391 + /* XXX bug 11027 */
392 + printk(KERN_EMERG "wrong leaf: %lu %d [%p %p %p]\n",
393 + (long unsigned)path->ip_frame->leaf,
394 + dx_get_count(dx_node_get_entries(path, path->ip_frame)),
395 + path->ip_frames[0].bh, path->ip_frames[1].bh,
396 + path->ip_frames[2].bh);
398 + err = descr->id_ops->id_node_read(c, block, NULL, &bh);
401 + leaf->il_curidx = block;
402 + err = iam_leaf_ops(leaf)->init(leaf);
403 + assert_inv(ergo(err == 0, iam_leaf_check(leaf)));
408 +static void iam_leaf_unlock(struct iam_leaf *leaf)
410 + if (leaf->il_lock != NULL) {
411 + dx_unlock_htree(iam_leaf_container(leaf)->ic_object,
413 + do_corr(schedule());
414 + leaf->il_lock = NULL;
418 +static void iam_leaf_fini(struct iam_leaf *leaf)
420 + if (leaf->il_path != NULL) {
421 + iam_leaf_unlock(leaf);
422 + assert_inv(ergo(leaf->il_bh != NULL, iam_leaf_check(leaf)));
423 + iam_leaf_ops(leaf)->fini(leaf);
425 + brelse(leaf->il_bh);
426 + leaf->il_bh = NULL;
427 + leaf->il_curidx = 0;
432 +static void iam_leaf_start(struct iam_leaf *folio)
434 + iam_leaf_ops(folio)->start(folio);
437 +void iam_leaf_next(struct iam_leaf *folio)
439 + iam_leaf_ops(folio)->next(folio);
442 +static void iam_leaf_rec_add(struct iam_leaf *leaf, const struct iam_key *key,
443 + const struct iam_rec *rec)
445 + iam_leaf_ops(leaf)->rec_add(leaf, key, rec);
448 +static void iam_rec_del(struct iam_leaf *leaf, int shift)
450 + iam_leaf_ops(leaf)->rec_del(leaf, shift);
453 +int iam_leaf_at_end(const struct iam_leaf *leaf)
455 + return iam_leaf_ops(leaf)->at_end(leaf);
458 +void iam_leaf_split(struct iam_leaf *l, struct buffer_head **bh, iam_ptr_t nr)
460 + iam_leaf_ops(l)->split(l, bh, nr);
463 +int iam_leaf_can_add(const struct iam_leaf *l,
464 + const struct iam_key *k, const struct iam_rec *r)
466 + return iam_leaf_ops(l)->can_add(l, k, r);
469 +#if EXT3_INVARIANT_ON
470 +static int iam_leaf_check(struct iam_leaf *leaf)
474 + struct iam_lentry *orig;
475 + struct iam_path *path;
476 + struct iam_container *bag;
477 + struct iam_ikey *k0;
478 + struct iam_ikey *k1;
482 + orig = leaf->il_at;
483 + path = iam_leaf_path(leaf);
484 + bag = iam_leaf_container(leaf);
486 + result = iam_leaf_ops(leaf)->init(leaf);
491 + iam_leaf_start(leaf);
492 + k0 = iam_path_ikey(path, 0);
493 + k1 = iam_path_ikey(path, 1);
494 + while (!iam_leaf_at_end(leaf)) {
495 + iam_ikeycpy(bag, k0, k1);
496 + iam_ikeycpy(bag, k1, iam_leaf_ikey(leaf, k1));
497 + if (!first && iam_ikeycmp(bag, k0, k1) > 0) {
502 + iam_leaf_next(leaf);
504 + leaf->il_at = orig;
510 +static int iam_txn_dirty(handle_t *handle,
511 + struct iam_path *path, struct buffer_head *bh)
515 + result = ext3_journal_dirty_metadata(handle, bh);
517 + ext3_std_error(iam_path_obj(path)->i_sb, result);
521 +static int iam_txn_add(handle_t *handle,
522 + struct iam_path *path, struct buffer_head *bh)
526 + result = ext3_journal_get_write_access(handle, bh);
528 + ext3_std_error(iam_path_obj(path)->i_sb, result);
532 +/***********************************************************************/
533 +/* iterator interface */
534 +/***********************************************************************/
536 +static enum iam_it_state it_state(const struct iam_iterator *it)
538 + return it->ii_state;
542 + * Helper function returning scratch key.
544 +static struct iam_container *iam_it_container(const struct iam_iterator *it)
546 + return it->ii_path.ip_container;
549 +static inline int it_keycmp(const struct iam_iterator *it,
550 + const struct iam_key *k)
552 + return iam_leaf_keycmp(&it->ii_path.ip_leaf, k);
555 +static inline int it_keyeq(const struct iam_iterator *it,
556 + const struct iam_key *k)
558 + return iam_leaf_keyeq(&it->ii_path.ip_leaf, k);
561 +static int it_ikeycmp(const struct iam_iterator *it, const struct iam_ikey *ik)
563 + return iam_ikeycmp(it->ii_path.ip_container,
564 + iam_leaf_ikey(&it->ii_path.ip_leaf,
565 + iam_path_ikey(&it->ii_path, 0)), ik);
568 +static inline int it_at_rec(const struct iam_iterator *it)
570 + return !iam_leaf_at_end(&it->ii_path.ip_leaf);
573 +static inline int it_before(const struct iam_iterator *it)
575 + return it_state(it) == IAM_IT_SKEWED && it_at_rec(it);
579 + * Helper wrapper around iam_it_get(): returns 0 (success) only when record
580 + * with exactly the same key as asked is found.
582 +static int iam_it_get_exact(struct iam_iterator *it, const struct iam_key *k)
586 + result = iam_it_get(it, k);
589 + else if (result == 0)
591 + * Return -ENOENT if cursor is located above record with a key
592 + * different from one specified, or in the empty leaf.
594 + * XXX returning -ENOENT only works if iam_it_get() never
595 + * returns -ENOENT as a legitimate error.
601 +void iam_container_write_lock(struct iam_container *ic)
603 + down_write(&ic->ic_sem);
606 +void iam_container_write_unlock(struct iam_container *ic)
608 + up_write(&ic->ic_sem);
611 +void iam_container_read_lock(struct iam_container *ic)
613 + down_read(&ic->ic_sem);
616 +void iam_container_read_unlock(struct iam_container *ic)
618 + up_read(&ic->ic_sem);
622 + * Initialize iterator to IAM_IT_DETACHED state.
624 + * postcondition: it_state(it) == IAM_IT_DETACHED
626 +int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
627 + struct iam_path_descr *pd)
629 + memset(it, 0, sizeof *it);
630 + it->ii_flags = flags;
631 + it->ii_state = IAM_IT_DETACHED;
632 + iam_path_init(&it->ii_path, c, pd);
635 +EXPORT_SYMBOL(iam_it_init);
638 + * Finalize iterator and release all resources.
640 + * precondition: it_state(it) == IAM_IT_DETACHED
642 +void iam_it_fini(struct iam_iterator *it)
644 + assert_corr(it_state(it) == IAM_IT_DETACHED);
645 + iam_path_fini(&it->ii_path);
647 +EXPORT_SYMBOL(iam_it_fini);
650 + * Performs tree top-to-bottom traversal starting from root, and loads leaf
653 +static int iam_path_lookup(struct iam_path *path, int index)
655 + struct iam_container *c;
656 + struct iam_descr *descr;
657 + struct iam_leaf *leaf;
660 + c = path->ip_container;
661 + leaf = &path->ip_leaf;
662 + descr = iam_path_descr(path);
663 + result = dx_lookup_lock(path, &leaf->il_lock, DLT_WRITE);
664 + assert_inv(iam_path_check(path));
665 + do_corr(schedule());
667 + result = iam_leaf_load(path);
668 + assert_inv(ergo(result == 0, iam_leaf_check(leaf)));
670 + do_corr(schedule());
672 + result = iam_leaf_ops(leaf)->
673 + ilookup(leaf, path->ip_ikey_target);
675 + result = iam_leaf_ops(leaf)->
676 + lookup(leaf, path->ip_key_target);
677 + do_corr(schedule());
680 + iam_leaf_unlock(leaf);
686 + * Common part of iam_it_{i,}get().
688 +static int __iam_it_get(struct iam_iterator *it, int index)
691 + assert_corr(it_state(it) == IAM_IT_DETACHED);
693 + result = iam_path_lookup(&it->ii_path, index);
697 + collision = result & IAM_LOOKUP_LAST;
698 + switch (result & ~IAM_LOOKUP_LAST) {
699 + case IAM_LOOKUP_EXACT:
701 + it->ii_state = IAM_IT_ATTACHED;
703 + case IAM_LOOKUP_OK:
705 + it->ii_state = IAM_IT_ATTACHED;
707 + case IAM_LOOKUP_BEFORE:
708 + case IAM_LOOKUP_EMPTY:
710 + it->ii_state = IAM_IT_SKEWED;
715 + result |= collision;
718 + * See iam_it_get_exact() for explanation.
720 + assert_corr(result != -ENOENT);
725 + * Correct hash, but not the same key was found, iterate through hash
726 + * collision chain, looking for correct record.
728 +static int iam_it_collision(struct iam_iterator *it)
732 + assert(ergo(it_at_rec(it), !it_keyeq(it, it->ii_path.ip_key_target)));
734 + while ((result = iam_it_next(it)) == 0) {
735 + do_corr(schedule());
736 + if (it_ikeycmp(it, it->ii_path.ip_ikey_target) != 0)
738 + if (it_keyeq(it, it->ii_path.ip_key_target))
745 + * Attach iterator. After successful completion, @it points to record with
746 + * least key not larger than @k.
748 + * Return value: 0: positioned on existing record,
749 + * +ve: exact position found,
752 + * precondition: it_state(it) == IAM_IT_DETACHED
753 + * postcondition: ergo(result == 0 && it_state(it) == IAM_IT_ATTACHED,
754 + * it_keycmp(it, k) <= 0)
756 +int iam_it_get(struct iam_iterator *it, const struct iam_key *k)
759 + assert_corr(it_state(it) == IAM_IT_DETACHED);
761 + it->ii_path.ip_ikey_target = NULL;
762 + it->ii_path.ip_key_target = k;
764 + result = __iam_it_get(it, 0);
766 + if (result == IAM_LOOKUP_LAST) {
767 + result = iam_it_collision(it);
771 + result = __iam_it_get(it, 0);
776 + result &= ~IAM_LOOKUP_LAST;
778 + assert_corr(ergo(result > 0, it_keycmp(it, k) == 0));
779 + assert_corr(ergo(result == 0 && it_state(it) == IAM_IT_ATTACHED,
780 + it_keycmp(it, k) <= 0));
783 +EXPORT_SYMBOL(iam_it_get);
786 + * Attach iterator by index key.
788 +static int iam_it_iget(struct iam_iterator *it, const struct iam_ikey *k)
790 + assert_corr(it_state(it) == IAM_IT_DETACHED);
792 + it->ii_path.ip_ikey_target = k;
793 + return __iam_it_get(it, 1) & ~IAM_LOOKUP_LAST;
797 + * Attach iterator, and assure it points to the record (not skewed).
799 + * Return value: 0: positioned on existing record,
800 + * +ve: exact position found,
803 + * precondition: it_state(it) == IAM_IT_DETACHED &&
804 + * !(it->ii_flags&IAM_IT_WRITE)
805 + * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED)
807 +int iam_it_get_at(struct iam_iterator *it, const struct iam_key *k)
810 + assert_corr(it_state(it) == IAM_IT_DETACHED &&
811 + !(it->ii_flags&IAM_IT_WRITE));
812 + result = iam_it_get(it, k);
814 + if (it_state(it) != IAM_IT_ATTACHED) {
815 + assert_corr(it_state(it) == IAM_IT_SKEWED);
816 + result = iam_it_next(it);
819 + assert_corr(ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED));
822 +EXPORT_SYMBOL(iam_it_get_at);
825 + * Duplicates iterator.
827 + * postcondition: it_state(dst) == it_state(src) &&
828 + * iam_it_container(dst) == iam_it_container(src) &&
829 + * dst->ii_flags = src->ii_flags &&
830 + * ergo(it_state(src) == IAM_IT_ATTACHED,
831 + * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
832 + * iam_it_key_get(dst) == iam_it_key_get(src))
834 +void iam_it_dup(struct iam_iterator *dst, const struct iam_iterator *src)
836 + dst->ii_flags = src->ii_flags;
837 + dst->ii_state = src->ii_state;
838 + /* XXX not yet. iam_path_dup(&dst->ii_path, &src->ii_path); */
840 + * XXX: duplicate lock.
842 + assert_corr(it_state(dst) == it_state(src));
843 + assert_corr(iam_it_container(dst) == iam_it_container(src));
844 + assert_corr(dst->ii_flags = src->ii_flags);
845 + assert_corr(ergo(it_state(src) == IAM_IT_ATTACHED,
846 + iam_it_rec_get(dst) == iam_it_rec_get(src) &&
847 + iam_it_key_get(dst) == iam_it_key_get(src)));
852 + * Detach iterator. Does nothing it detached state.
854 + * postcondition: it_state(it) == IAM_IT_DETACHED
856 +void iam_it_put(struct iam_iterator *it)
858 + if (it->ii_state != IAM_IT_DETACHED) {
859 + it->ii_state = IAM_IT_DETACHED;
860 + iam_leaf_fini(&it->ii_path.ip_leaf);
863 +EXPORT_SYMBOL(iam_it_put);
865 +static struct iam_ikey *iam_it_ikey_get(const struct iam_iterator *it,
866 + struct iam_ikey *ikey);
868 + * Move iterator one record right.
870 + * Return value: 0: success,
871 + * +1: end of container reached
874 + * precondition: (it_state(it) == IAM_IT_ATTACHED ||
875 + * it_state(it) == IAM_IT_SKEWED) && it->ii_flags&IAM_IT_MOVE
876 + * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED) &&
877 + * ergo(result > 0, it_state(it) == IAM_IT_DETACHED)
879 +int iam_it_next(struct iam_iterator *it)
882 + struct iam_path *path;
883 + struct iam_leaf *leaf;
885 + do_corr(struct iam_ikey *ik_orig);
887 + /* assert_corr(it->ii_flags&IAM_IT_MOVE); */
888 + assert_corr(it_state(it) == IAM_IT_ATTACHED ||
889 + it_state(it) == IAM_IT_SKEWED);
891 + path = &it->ii_path;
892 + leaf = &path->ip_leaf;
893 + obj = iam_path_obj(path);
895 + assert_corr(iam_leaf_is_locked(leaf));
898 + do_corr(ik_orig = iam_it_ikey_get(it, iam_path_ikey(path, 2)));
899 + if (it_before(it)) {
900 + assert_corr(!iam_leaf_at_end(leaf));
901 + it->ii_state = IAM_IT_ATTACHED;
903 + if (!iam_leaf_at_end(leaf))
904 + /* advance within leaf node */
905 + iam_leaf_next(leaf);
907 + * multiple iterations may be necessary due to empty leaves.
909 + while (result == 0 && iam_leaf_at_end(leaf)) {
910 + do_corr(schedule());
911 + /* advance index portion of the path */
912 + result = iam_index_next(iam_it_container(it), path);
913 + assert_corr(iam_leaf_is_locked(leaf));
915 + struct dynlock_handle *lh;
916 + lh = dx_lock_htree(obj, path->ip_frame->leaf,
919 + iam_leaf_fini(leaf);
920 + leaf->il_lock = lh;
921 + result = iam_leaf_load(path);
923 + iam_leaf_start(leaf);
926 + } else if (result == 0)
927 + /* end of container reached */
933 + it->ii_state = IAM_IT_ATTACHED;
935 + assert_corr(ergo(result == 0, it_state(it) == IAM_IT_ATTACHED));
936 + assert_corr(ergo(result > 0, it_state(it) == IAM_IT_DETACHED));
937 + assert_corr(ergo(result == 0, it_ikeycmp(it, ik_orig) >= 0));
940 +EXPORT_SYMBOL(iam_it_next);
943 + * Return pointer to the record under iterator.
945 + * precondition: it_state(it) == IAM_IT_ATTACHED && it_at_rec(it)
946 + * postcondition: it_state(it) == IAM_IT_ATTACHED
948 +struct iam_rec *iam_it_rec_get(const struct iam_iterator *it)
950 + assert_corr(it_state(it) == IAM_IT_ATTACHED);
951 + assert_corr(it_at_rec(it));
952 + return iam_leaf_rec(&it->ii_path.ip_leaf);
954 +EXPORT_SYMBOL(iam_it_rec_get);
956 +static void iam_it_reccpy(struct iam_iterator *it, const struct iam_rec *r)
958 + struct iam_leaf *folio;
960 + folio = &it->ii_path.ip_leaf;
961 + iam_leaf_ops(folio)->rec_set(folio, r);
965 + * Replace contents of record under iterator.
967 + * precondition: it_state(it) == IAM_IT_ATTACHED &&
968 + * it->ii_flags&IAM_IT_WRITE
969 + * postcondition: it_state(it) == IAM_IT_ATTACHED &&
970 + * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
972 +int iam_it_rec_set(handle_t *h,
973 + struct iam_iterator *it, const struct iam_rec *r)
976 + struct iam_path *path;
977 + struct buffer_head *bh;
979 + assert_corr(it_state(it) == IAM_IT_ATTACHED &&
980 + it->ii_flags&IAM_IT_WRITE);
981 + assert_corr(it_at_rec(it));
983 + path = &it->ii_path;
984 + bh = path->ip_leaf.il_bh;
985 + result = iam_txn_add(h, path, bh);
987 + iam_it_reccpy(it, r);
988 + result = iam_txn_dirty(h, path, bh);
992 +EXPORT_SYMBOL(iam_it_rec_set);
995 + * Return pointer to the index key under iterator.
997 + * precondition: it_state(it) == IAM_IT_ATTACHED ||
998 + * it_state(it) == IAM_IT_SKEWED
1000 +static struct iam_ikey *iam_it_ikey_get(const struct iam_iterator *it,
1001 + struct iam_ikey *ikey)
1003 + assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1004 + it_state(it) == IAM_IT_SKEWED);
1005 + assert_corr(it_at_rec(it));
1006 + return iam_leaf_ikey(&it->ii_path.ip_leaf, ikey);
1010 + * Return pointer to the key under iterator.
1012 + * precondition: it_state(it) == IAM_IT_ATTACHED ||
1013 + * it_state(it) == IAM_IT_SKEWED
1015 +struct iam_key *iam_it_key_get(const struct iam_iterator *it)
1017 + assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1018 + it_state(it) == IAM_IT_SKEWED);
1019 + assert_corr(it_at_rec(it));
1020 + return iam_leaf_key(&it->ii_path.ip_leaf);
1022 +EXPORT_SYMBOL(iam_it_key_get);
1025 + * Return size of key under iterator (in bytes)
1027 + * precondition: it_state(it) == IAM_IT_ATTACHED ||
1028 + * it_state(it) == IAM_IT_SKEWED
1030 +int iam_it_key_size(const struct iam_iterator *it)
1032 + assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1033 + it_state(it) == IAM_IT_SKEWED);
1034 + assert_corr(it_at_rec(it));
1035 + return iam_leaf_key_size(&it->ii_path.ip_leaf);
1037 +EXPORT_SYMBOL(iam_it_key_size);
1040 + * Insertion of new record. Interaction with jbd during non-trivial case (when
1041 + * split happens) is as following:
1043 + * - new leaf node is involved into transaction by ext3_append();
1045 + * - old leaf node is involved into transaction by iam_add_rec();
1047 + * - leaf where insertion point ends in, is marked dirty by iam_add_rec();
1049 + * - leaf without insertion point is marked dirty (as @new_leaf) by
1052 + * - split index nodes are involved into transaction and marked dirty by
1053 + * split_index_node().
1055 + * - "safe" index node, which is no split, but where new pointer is inserted
1056 + * is involved into transaction and marked dirty by split_index_node().
1058 + * - index node where pointer to new leaf is inserted is involved into
1059 + * transaction by split_index_node() and marked dirty by iam_add_rec().
1061 + * - inode is marked dirty by iam_add_rec().
1065 +static int iam_new_leaf(handle_t *handle, struct iam_leaf *leaf)
1069 + struct buffer_head *new_leaf;
1070 + struct buffer_head *old_leaf;
1071 + struct iam_container *c;
1072 + struct inode *obj;
1073 + struct iam_path *path;
1075 + assert_inv(iam_leaf_check(leaf));
1077 + c = iam_leaf_container(leaf);
1078 + path = leaf->il_path;
1080 + obj = c->ic_object;
1081 + new_leaf = ext3_append(handle, obj, (__u32 *)&blknr, &err);
1082 + do_corr(schedule());
1083 + if (new_leaf != NULL) {
1084 + struct dynlock_handle *lh;
1086 + lh = dx_lock_htree(obj, blknr, DLT_WRITE);
1087 + do_corr(schedule());
1089 + iam_leaf_ops(leaf)->init_new(c, new_leaf);
1090 + do_corr(schedule());
1091 + old_leaf = leaf->il_bh;
1092 + iam_leaf_split(leaf, &new_leaf, blknr);
1093 + if (old_leaf != leaf->il_bh) {
1095 + * Switched to the new leaf.
1097 + iam_leaf_unlock(leaf);
1098 + leaf->il_lock = lh;
1099 + path->ip_frame->leaf = blknr;
1101 + dx_unlock_htree(obj, lh);
1102 + do_corr(schedule());
1103 + err = iam_txn_dirty(handle, path, new_leaf);
1106 + err = ext3_mark_inode_dirty(handle, obj);
1107 + do_corr(schedule());
1111 + assert_inv(iam_leaf_check(leaf));
1112 + assert_inv(iam_leaf_check(&iam_leaf_path(leaf)->ip_leaf));
1113 + assert_inv(iam_path_check(iam_leaf_path(leaf)));
1117 +static int iam_add_rec(handle_t *handle, struct iam_iterator *it,
1118 + struct iam_path *path,
1119 + const struct iam_key *k, const struct iam_rec *r)
1122 + struct iam_leaf *leaf;
1124 + leaf = &path->ip_leaf;
1125 + assert_inv(iam_leaf_check(leaf));
1126 + assert_inv(iam_path_check(path));
1127 + err = iam_txn_add(handle, path, leaf->il_bh);
1129 + do_corr(schedule());
1130 + if (!iam_leaf_can_add(leaf, k, r)) {
1131 + struct dynlock_handle *lh = NULL;
1134 + assert_corr(lh == NULL);
1135 + do_corr(schedule());
1136 + err = split_index_node(handle, path, &lh);
1137 + if (err == -EAGAIN) {
1138 + assert_corr(lh == NULL);
1140 + iam_path_fini(path);
1141 + it->ii_state = IAM_IT_DETACHED;
1143 + do_corr(schedule());
1144 + err = iam_it_get_exact(it, k);
1145 + if (err == -ENOENT)
1146 + err = +1; /* repeat split */
1147 + else if (err == 0)
1150 + } while (err > 0);
1151 + assert_inv(iam_path_check(path));
1153 + assert_corr(lh != NULL);
1154 + do_corr(schedule());
1155 + err = iam_new_leaf(handle, leaf);
1157 + err = iam_txn_dirty(handle, path,
1158 + path->ip_frame->bh);
1160 + dx_unlock_htree(iam_path_obj(path), lh);
1161 + do_corr(schedule());
1164 + iam_leaf_rec_add(leaf, k, r);
1165 + err = iam_txn_dirty(handle, path, leaf->il_bh);
1168 + assert_inv(iam_leaf_check(leaf));
1169 + assert_inv(iam_leaf_check(&path->ip_leaf));
1170 + assert_inv(iam_path_check(path));
1175 + * Insert new record with key @k and contents from @r, shifting records to the
1176 + * right. On success, iterator is positioned on the newly inserted record.
1178 + * precondition: it->ii_flags&IAM_IT_WRITE &&
1179 + * (it_state(it) == IAM_IT_ATTACHED ||
1180 + * it_state(it) == IAM_IT_SKEWED) &&
1181 + * ergo(it_state(it) == IAM_IT_ATTACHED,
1182 + * it_keycmp(it, k) <= 0) &&
1183 + * ergo(it_before(it), it_keycmp(it, k) > 0));
1184 + * postcondition: ergo(result == 0,
1185 + * it_state(it) == IAM_IT_ATTACHED &&
1186 + * it_keycmp(it, k) == 0 &&
1187 + * !memcmp(iam_it_rec_get(it), r, ...))
1189 +int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
1190 + const struct iam_key *k, const struct iam_rec *r)
1193 + struct iam_path *path;
1195 + path = &it->ii_path;
1197 + assert_corr(it->ii_flags&IAM_IT_WRITE);
1198 + assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1199 + it_state(it) == IAM_IT_SKEWED);
1200 + assert_corr(ergo(it_state(it) == IAM_IT_ATTACHED,
1201 + it_keycmp(it, k) <= 0));
1202 + assert_corr(ergo(it_before(it), it_keycmp(it, k) > 0));
1203 + result = iam_add_rec(h, it, path, k, r);
1205 + it->ii_state = IAM_IT_ATTACHED;
1206 + assert_corr(ergo(result == 0,
1207 + it_state(it) == IAM_IT_ATTACHED &&
1208 + it_keycmp(it, k) == 0 &&
1209 + !memcmp(iam_it_rec_get(it), r,
1210 + iam_it_container(it)->ic_descr->id_rec_size)));
1213 +EXPORT_SYMBOL(iam_it_rec_insert);
1216 + * Delete record under iterator.
1218 + * precondition: it_state(it) == IAM_IT_ATTACHED &&
1219 + * it->ii_flags&IAM_IT_WRITE &&
1221 + * postcondition: it_state(it) == IAM_IT_ATTACHED ||
1222 + * it_state(it) == IAM_IT_DETACHED
1224 +int iam_it_rec_delete(handle_t *h, struct iam_iterator *it)
1227 + struct iam_leaf *leaf;
1228 + struct iam_path *path;
1230 + assert_corr(it_state(it) == IAM_IT_ATTACHED &&
1231 + it->ii_flags&IAM_IT_WRITE);
1232 + assert_corr(it_at_rec(it));
1234 + path = &it->ii_path;
1235 + leaf = &path->ip_leaf;
1237 + assert_inv(iam_leaf_check(leaf));
1238 + assert_inv(iam_path_check(path));
1240 + result = iam_txn_add(h, path, leaf->il_bh);
1242 + * no compaction for now.
1244 + if (result == 0) {
1245 + iam_rec_del(leaf, it->ii_flags&IAM_IT_MOVE);
1246 + result = iam_txn_dirty(h, path, leaf->il_bh);
1247 + if (result == 0 && iam_leaf_at_end(leaf) &&
1248 + it->ii_flags&IAM_IT_MOVE) {
1249 + result = iam_it_next(it);
1254 + assert_inv(iam_leaf_check(leaf));
1255 + assert_inv(iam_path_check(path));
1256 + assert_corr(it_state(it) == IAM_IT_ATTACHED ||
1257 + it_state(it) == IAM_IT_DETACHED);
1260 +EXPORT_SYMBOL(iam_it_rec_delete);
1263 + * Convert iterator to cookie.
1265 + * precondition: it_state(it) == IAM_IT_ATTACHED &&
1266 + * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
1267 + * postcondition: it_state(it) == IAM_IT_ATTACHED
1269 +iam_pos_t iam_it_store(const struct iam_iterator *it)
1273 + assert_corr(it_state(it) == IAM_IT_ATTACHED);
1274 + assert_corr(it_at_rec(it));
1275 + assert_corr(iam_it_container(it)->ic_descr->id_ikey_size <=
1279 + return *(iam_pos_t *)iam_it_ikey_get(it, (void *)&result);
1281 +EXPORT_SYMBOL(iam_it_store);
1284 + * Restore iterator from cookie.
1286 + * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
1287 + * iam_path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
1288 + * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
1289 + * iam_it_store(it) == pos)
1291 +int iam_it_load(struct iam_iterator *it, iam_pos_t pos)
1293 + assert_corr(it_state(it) == IAM_IT_DETACHED &&
1294 + it->ii_flags&IAM_IT_MOVE);
1295 + assert_corr(iam_it_container(it)->ic_descr->id_ikey_size <= sizeof pos);
1296 + return iam_it_iget(it, (struct iam_ikey *)&pos);
1298 +EXPORT_SYMBOL(iam_it_load);
1300 +/***********************************************************************/
1302 +/***********************************************************************/
1304 +static inline int ptr_inside(void *base, size_t size, void *ptr)
1306 + return (base <= ptr) && (ptr < base + size);
1309 +int iam_frame_invariant(struct iam_frame *f)
1313 + f->bh->b_data != NULL &&
1314 + ptr_inside(f->bh->b_data, f->bh->b_size, f->entries) &&
1315 + ptr_inside(f->bh->b_data, f->bh->b_size, f->at) &&
1316 + f->entries <= f->at);
1318 +int iam_leaf_invariant(struct iam_leaf *l)
1321 + l->il_bh != NULL &&
1322 + l->il_bh->b_data != NULL &&
1323 + ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_entries) &&
1324 + ptr_inside(l->il_bh->b_data, l->il_bh->b_size, l->il_at) &&
1325 + l->il_entries <= l->il_at;
1328 +int iam_path_invariant(struct iam_path *p)
1332 + if (p->ip_container == NULL ||
1333 + p->ip_indirect < 0 || p->ip_indirect > DX_MAX_TREE_HEIGHT - 1 ||
1334 + p->ip_frame != p->ip_frames + p->ip_indirect ||
1335 + !iam_leaf_invariant(&p->ip_leaf))
1337 + for (i = 0; i < ARRAY_SIZE(p->ip_frames); ++i) {
1338 + if (i <= p->ip_indirect) {
1339 + if (!iam_frame_invariant(&p->ip_frames[i]))
1346 +int iam_it_invariant(struct iam_iterator *it)
1349 + (it->ii_state == IAM_IT_DETACHED ||
1350 + it->ii_state == IAM_IT_ATTACHED ||
1351 + it->ii_state == IAM_IT_SKEWED) &&
1352 + !(it->ii_flags & ~(IAM_IT_MOVE | IAM_IT_WRITE)) &&
1353 + ergo(it->ii_state == IAM_IT_ATTACHED ||
1354 + it->ii_state == IAM_IT_SKEWED,
1355 + iam_path_invariant(&it->ii_path) &&
1356 + equi(it_at_rec(it), it->ii_state == IAM_IT_SKEWED));
1360 + * Search container @c for record with key @k. If record is found, its data
1361 + * are moved into @r.
1363 + * Return values: 0: found, -ENOENT: not-found, -ve: error
1365 +int iam_lookup(struct iam_container *c, const struct iam_key *k,
1366 + struct iam_rec *r, struct iam_path_descr *pd)
1368 + struct iam_iterator it;
1371 + iam_it_init(&it, c, 0, pd);
1373 + result = iam_it_get_exact(&it, k);
1376 + * record with required key found, copy it into user buffer
1378 + iam_reccpy(&it.ii_path, r, iam_it_rec_get(&it));
1383 +EXPORT_SYMBOL(iam_lookup);
1386 + * Insert new record @r with key @k into container @c (within context of
1387 + * transaction @h).
1389 + * Return values: 0: success, -ve: error, including -EEXIST when record with
1390 + * given key is already present.
1392 + * postcondition: ergo(result == 0 || result == -EEXIST,
1393 + * iam_lookup(c, k, r2) > 0 &&
1394 + * !memcmp(r, r2, c->ic_descr->id_rec_size));
1396 +int iam_insert(handle_t *h, struct iam_container *c, const struct iam_key *k,
1397 + const struct iam_rec *r, struct iam_path_descr *pd)
1399 + struct iam_iterator it;
1402 + iam_it_init(&it, c, IAM_IT_WRITE, pd);
1404 + result = iam_it_get_exact(&it, k);
1405 + if (result == -ENOENT)
1406 + result = iam_it_rec_insert(h, &it, k, r);
1407 + else if (result == 0)
1413 +EXPORT_SYMBOL(iam_insert);
1416 + * Update record with the key @k in container @c (within context of
1417 + * transaction @h), new record is given by @r.
1419 + * Return values: 0: success, -ve: error, including -ENOENT if no record with
1420 + * the given key found.
1422 +int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
1423 + const struct iam_rec *r, struct iam_path_descr *pd)
1425 + struct iam_iterator it;
1428 + iam_it_init(&it, c, IAM_IT_WRITE, pd);
1430 + result = iam_it_get_exact(&it, k);
1432 + iam_it_rec_set(h, &it, r);
1437 +EXPORT_SYMBOL(iam_update);
1440 + * Delete existing record with key @k.
1442 + * Return values: 0: success, -ENOENT: not-found, -ve: other error.
1444 + * postcondition: ergo(result == 0 || result == -ENOENT,
1445 + * !iam_lookup(c, k, *));
1447 +int iam_delete(handle_t *h, struct iam_container *c, const struct iam_key *k,
1448 + struct iam_path_descr *pd)
1450 + struct iam_iterator it;
1453 + iam_it_init(&it, c, IAM_IT_WRITE, pd);
1455 + result = iam_it_get_exact(&it, k);
1457 + iam_it_rec_delete(h, &it);
1462 +EXPORT_SYMBOL(iam_delete);
1464 Index: iam/fs/ext3/iam_htree.c
1465 ===================================================================
1466 --- iam.orig/fs/ext3/iam_htree.c
1467 +++ iam/fs/ext3/iam_htree.c
1469 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
1470 + * vim:expandtab:shiftwidth=8:tabstop=8:
1473 + * implementation of iam format for ext3/htree.
1475 + * Copyright (c) 2006 Cluster File Systems, Inc.
1476 + * Author: Nikita Danilov <nikita@clusterfs.com>
1478 + * This file is part of the Lustre file system, http://www.lustre.org
1479 + * Lustre is a trademark of Cluster File Systems, Inc.
1481 + * You may have signed or agreed to another license before downloading
1482 + * this software. If so, you are bound by the terms and conditions
1483 + * of that agreement, and the following does not apply to you. See the
1484 + * LICENSE file included with this distribution for more information.
1486 + * If you did not agree to a different license, then this copy of Lustre
1487 + * is open source software; you can redistribute it and/or modify it
1488 + * under the terms of version 2 of the GNU General Public License as
1489 + * published by the Free Software Foundation.
1491 + * In either case, Lustre is distributed in the hope that it will be
1492 + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
1493 + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
1494 + * license text for more details.
1497 +#include <linux/types.h>
1498 +#include <linux/jbd.h>
1499 +/* ext3_error(), EXT3_DIR_ROUND() */
1500 +#include <linux/ext3_fs.h>
1502 +#include <linux/lustre_iam.h>
1504 +#include <libcfs/libcfs.h>
1505 +#include <libcfs/kp30.h>
1507 +static inline struct ext3_dir_entry_2 *dent(struct iam_lentry *ent)
1509 + return (struct ext3_dir_entry_2 *)ent;
1512 +static inline struct iam_path_compat *getipc(const struct iam_leaf *folio)
1514 + struct iam_path *path;
1516 + path = iam_leaf_path(folio);
1517 + assert_corr(dx_index_is_compat(path));
1518 + assert_corr(path->ip_data != NULL);
1519 + return container_of(path->ip_data, struct iam_path_compat, ipc_descr);
1522 +static inline struct ext3_dir_entry_2 *getent(const struct iam_leaf *folio)
1524 + return dent(folio->il_at);
1527 +static __u32 hashname(const struct iam_leaf *folio,
1528 + const char *name, int namelen)
1531 + struct dx_hash_info *hinfo;
1533 + hinfo = getipc(folio)->ipc_hinfo;
1534 + assert_corr(hinfo != NULL);
1535 + result = ext3fs_dirhash(name, namelen, hinfo);
1536 + assert_corr(result == 0);
1537 + return hinfo->hash;
1540 +static __u32 gethash(const struct iam_leaf *folio,
1541 + const struct ext3_dir_entry_2 *ent)
1543 + return hashname(folio, ent->name, ent->name_len);
1546 +static inline size_t recsize(size_t namelen)
1548 + return EXT3_DIR_REC_LEN(namelen);
1551 +static struct ext3_dir_entry_2 *getlast(const struct iam_leaf *folio, int namelen)
1554 + (void *)folio->il_bh->b_data +
1555 + iam_leaf_container(folio)->ic_object->i_sb->s_blocksize -
1559 +static struct ext3_dir_entry_2 *gettop(const struct iam_leaf *folio)
1561 + return getlast(folio, 0);
1564 +static inline int ent_is_live(const struct ext3_dir_entry_2 *ent)
1566 + return ent->inode != 0;
1569 +static struct ext3_dir_entry_2 *entnext(const struct ext3_dir_entry_2 *ent)
1571 + return (void *)ent + le16_to_cpu(ent->rec_len);
1574 +static struct ext3_dir_entry_2 *skipdead(struct ext3_dir_entry_2 *ent)
1576 + if (!ent_is_live(ent))
1577 + ent = entnext(ent);
1579 + * There can be no more than one dead entry in a row.
1584 +static struct ext3_dir_entry_2 *getstart(const struct iam_leaf *folio)
1586 + return (void *)folio->il_bh->b_data;
1589 +static int getfreespace(const struct ext3_dir_entry_2 *ent)
1593 + free = le16_to_cpu(ent->rec_len);
1594 + if (ent_is_live(ent))
1595 + free -= recsize(ent->name_len);
1596 + assert_corr(free >= 0);
1600 +static int entcmp(const struct iam_leaf *folio,
1601 + const struct ext3_dir_entry_2 *e0, const struct ext3_dir_entry_2 *e1)
1606 + assert_corr(ent_is_live(e0));
1607 + assert_corr(ent_is_live(e1));
1609 + hash0 = gethash(folio, e0);
1610 + hash1 = gethash(folio, e1);
1611 + if (hash0 < hash1)
1613 + else if (hash0 > hash1)
1623 +#if EXT3_CORRECTNESS_ON || EXT3_INVARIANT_ON
1624 +static int iam_leaf_at_rec(const struct iam_leaf *folio)
1626 + struct ext3_dir_entry_2 *ent;
1628 + ent = getent(folio);
1629 + return getstart(folio) <= ent &&
1630 + ent < gettop(folio) && ent_is_live(ent);
1635 + * Leaf operations.
1638 +static struct iam_ikey *iam_htree_ikey(const struct iam_leaf *l,
1639 + struct iam_ikey *key)
1642 + assert_corr(iam_leaf_at_rec(l));
1644 + hash = (void *)key;
1645 + *hash = gethash(l, getent(l));
1649 +static struct iam_key *iam_htree_key(const struct iam_leaf *l)
1651 + assert_corr(iam_leaf_at_rec(l));
1653 + return (struct iam_key *)&getent(l)->name;
1656 +static int iam_htree_key_size(const struct iam_leaf *l)
1658 + assert_corr(iam_leaf_at_rec(l));
1660 + return getent(l)->name_len;
1663 +static void iam_htree_start(struct iam_leaf *l)
1665 + l->il_at = (void *)skipdead(getstart(l));
1668 +static int iam_htree_init(struct iam_leaf *l)
1670 + assert_corr(l->il_bh != NULL);
1672 + l->il_at = l->il_entries = (void *)getstart(l);
1676 +static void iam_htree_fini(struct iam_leaf *l)
1678 + l->il_entries = l->il_at = NULL;
1681 +struct iam_rec *iam_htree_rec(const struct iam_leaf *l)
1683 + assert_corr(iam_leaf_at_rec(l));
1684 + return (void *)&getent(l)->inode;
1687 +static void iam_htree_next(struct iam_leaf *l)
1689 + struct ext3_dir_entry_2 *scan;
1690 + struct ext3_dir_entry_2 *found;
1692 + assert_corr(iam_leaf_at_rec(l));
1694 + for (scan = getstart(l); scan < gettop(l); scan = entnext(scan)) {
1695 + if (scan != getent(l) && ent_is_live(scan) &&
1696 + entcmp(l, getent(l), scan) < 0 &&
1697 + (found == NULL || entcmp(l, scan, found) < 0))
1700 + assert_corr(ergo(found != NULL,
1701 + gethash(l, getent(l)) <= gethash(l, found)));
1702 + l->il_at = (void *)(found ? : gettop(l));
1705 +static int iam_htree_at_end(const struct iam_leaf *folio)
1707 + return getent(folio) >= gettop(folio);
1711 +static inline int match(int len, const char *const name,
1712 + struct ext3_dir_entry_2 *de)
1714 + if (len != de->name_len)
1718 + return !memcmp(name, de->name, len);
1721 +static int iam_htree_lookup(struct iam_leaf *l, const struct iam_key *k)
1723 + struct iam_container *c;
1724 + struct ext3_dir_entry_2 *scan;
1725 + struct ext3_dir_entry_2 *found;
1732 + c = iam_leaf_container(l);
1733 + name = (const char *)k;
1734 + namelen = strlen(name);
1735 + hash = hashname(l, name, namelen);
1737 + result = IAM_LOOKUP_OK;
1738 + for (scan = getstart(l); scan < getlast(l, namelen);
1739 + scan = entnext(scan)) {
1740 + if (match(namelen, name, scan)) {
1742 + result = IAM_LOOKUP_EXACT;
1744 + } else if (ent_is_live(scan)) {
1745 + if (gethash(l, scan) <= hash)
1751 + if (found == NULL) {
1753 + * @k is less than all hashes in the leaf.
1755 + iam_htree_start(l);
1756 + result = IAM_LOOKUP_BEFORE;
1758 + l->il_at = (void *)found;
1759 + assert_corr(iam_leaf_at_rec(l));
1762 + result |= IAM_LOOKUP_LAST;
1766 +static int iam_htree_ilookup(struct iam_leaf *l, const struct iam_ikey *ik)
1769 + return IAM_LOOKUP_OK;
1772 +static void iam_htree_key_set(struct iam_leaf *l, const struct iam_key *k)
1774 + assert_corr(iam_leaf_at_rec(l));
1778 +static int iam_htree_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
1784 + name = (const char *)k;
1786 + assert_corr(ent_is_live(getent(l)));
1788 + h0 = gethash(l, getent(l));
1789 + h1 = hashname(l, name, strlen(name));
1791 + return h0 < h1 ? -1 : (h0 == h1 ? 0 : +1);
1794 +static int iam_htree_key_eq(const struct iam_leaf *l, const struct iam_key *k)
1798 + name = (const char *)k;
1799 + return match(strlen(name), name, getent(l));
1802 +static void iam_htree_rec_set(struct iam_leaf *l, const struct iam_rec *r)
1807 + getent(l)->inode = cpu_to_le32(*ino);
1810 +static void iam_htree_rec_add(struct iam_leaf *leaf, const struct iam_key *k,
1811 + const struct iam_rec *r)
1813 + struct ext3_dir_entry_2 *scan;
1814 + struct inode *dir;
1820 + assert_corr(iam_leaf_can_add(leaf, k, r));
1822 + dir = iam_leaf_container(leaf)->ic_object;
1824 + name = (const char *)k;
1825 + namelen = strlen(name);
1827 + scan = find_insertion_point(dir, leaf->il_bh, name, namelen);
1828 + assert_corr(!IS_ERR(scan));
1829 + scan = split_entry(dir, scan, *ino, EXT3_FT_UNKNOWN, name, namelen);
1830 + leaf->il_at = (void *)scan;
1833 +static void iam_htree_rec_del(struct iam_leaf *leaf, int shift)
1835 + struct ext3_dir_entry_2 *orig;
1836 + struct ext3_dir_entry_2 *scan;
1837 + struct ext3_dir_entry_2 *prev;
1839 + assert_corr(iam_leaf_at_rec(leaf));
1841 + orig = getent(leaf);
1844 + iam_htree_next(leaf);
1846 + for (prev = NULL, scan = getstart(leaf); scan < orig;
1847 + prev = scan, scan = entnext(scan))
1850 + assert_corr(scan == orig);
1851 + if (prev != NULL) {
1852 + prev->rec_len = cpu_to_le16(le16_to_cpu(prev->rec_len) +
1853 + le16_to_cpu(scan->rec_len));
1855 + assert_corr(scan == getstart(leaf));
1858 + iam_leaf_container(leaf)->ic_object->i_version ++;
1861 +static int iam_htree_can_add(const struct iam_leaf *leaf,
1862 + const struct iam_key *k, const struct iam_rec *r)
1864 + struct ext3_dir_entry_2 *scan;
1867 + size = recsize(strlen((const char *)k));
1868 + for (scan = getstart(leaf);
1869 + scan < gettop(leaf); scan = entnext(scan)) {
1870 + if (getfreespace(scan) >= size)
1876 +static void iam_htree_init_new(struct iam_container *c, struct buffer_head *bh)
1879 + * Do nothing, all work is done by iam_htree_split().
1883 +static void iam_htree_split(struct iam_leaf *l, struct buffer_head **bh,
1884 + iam_ptr_t new_blknr)
1888 + struct buffer_head *newbh = *bh;
1889 + struct iam_path *path;
1891 + old_hash = gethash(l, getent(l));
1892 + move_entries(iam_leaf_container(l)->ic_object,
1893 + getipc(l)->ipc_hinfo, &l->il_bh, bh, &delim_hash);
1895 + * Insert pointer to the new node (together with the least key in
1896 + * the node) into index node.
1898 + path = iam_leaf_path(l);
1899 + if (l->il_bh == newbh) {
1901 + * insertion point moves into new leaf.
1903 + assert_corr(delim_hash >= old_hash);
1904 + l->il_curidx = new_blknr;
1905 + iam_htree_lookup(l, (void *)&old_hash);
1907 + iam_insert_key_lock(path,
1908 + path->ip_frame, (void *)&delim_hash, new_blknr);
1911 +static struct iam_leaf_operations iam_htree_leaf_ops = {
1912 + .init = iam_htree_init,
1913 + .init_new = iam_htree_init_new,
1914 + .fini = iam_htree_fini,
1915 + .start = iam_htree_start,
1916 + .next = iam_htree_next,
1917 + .key = iam_htree_key,
1918 + .ikey = iam_htree_ikey,
1919 + .rec = iam_htree_rec,
1920 + .key_set = iam_htree_key_set,
1921 + .key_cmp = iam_htree_key_cmp,
1922 + .key_eq = iam_htree_key_eq,
1923 + .key_size = iam_htree_key_size,
1924 + .rec_set = iam_htree_rec_set,
1925 + .lookup = iam_htree_lookup,
1926 + .ilookup = iam_htree_ilookup,
1927 + .at_end = iam_htree_at_end,
1928 + .rec_add = iam_htree_rec_add,
1929 + .rec_del = iam_htree_rec_del,
1930 + .can_add = iam_htree_can_add,
1931 + .split = iam_htree_split
1935 + * Index operations.
1938 +static __u32 iam_htree_root_ptr(struct iam_container *c)
1943 +static int iam_htree_node_check(struct iam_path *path, struct iam_frame *frame)
1945 + /* XXX no checks yet */
1949 +static int is_htree(struct super_block *sb,
1950 + const struct dx_root *root, int silent)
1952 + if (root->info.hash_version > DX_HASH_MAX) {
1954 + ext3_warning(sb, __FUNCTION__,
1955 + "Unrecognised inode hash code %d",
1956 + root->info.hash_version);
1960 + if (root->info.unused_flags & 1) {
1962 + ext3_warning(sb, __FUNCTION__,
1963 + "Unimplemented inode hash flags: %#06x",
1964 + root->info.unused_flags);
1968 + if (root->info.indirect_levels > DX_MAX_TREE_HEIGHT - 1) {
1970 + ext3_warning(sb, __FUNCTION__,
1971 + "Unimplemented inode hash depth: %#06x",
1972 + root->info.indirect_levels);
1978 +static int iam_htree_node_load(struct iam_path *path, struct iam_frame *frame)
1981 + struct iam_entry *entries;
1982 + struct super_block *sb;
1984 + data = frame->bh->b_data;
1985 + entries = dx_node_get_entries(path, frame);
1986 + sb = iam_path_obj(path)->i_sb;
1987 + if (frame == path->ip_frames) {
1989 + struct dx_root *root;
1990 + struct iam_path_compat *ipc;
1996 + assert_corr(path->ip_data != NULL);
1997 + ipc = container_of(path->ip_data, struct iam_path_compat,
2000 + check = is_htree(sb, root, 0);
2003 + path->ip_indirect = root->info.indirect_levels;
2005 + assert_corr((char *)entries == (((char *)&root->info) +
2006 + root->info.info_length));
2007 + assert_corr(dx_get_limit(entries) == dx_root_limit(path));
2009 + ipc->ipc_hinfo->hash_version = root->info.hash_version;
2010 + ipc->ipc_hinfo->seed = EXT3_SB(sb)->s_hash_seed;
2012 + if (ipc->ipc_qstr) {
2013 + name = ipc->ipc_qstr->name;
2014 + namelen = ipc->ipc_qstr->len;
2015 + } else if (ipc->ipc_hinfo == &ipc->ipc_hinfo_area){
2016 + name = (const char *)path->ip_key_target;
2017 + namelen = strlen(name);
2020 + ext3fs_dirhash(name, namelen, ipc->ipc_hinfo);
2021 + if (path->ip_ikey_target == NULL) {
2022 + path->ip_ikey_target = iam_path_ikey(path, 4);
2023 + *(__u32 *)path->ip_ikey_target = ipc->ipc_hinfo->hash;
2026 + /* non-root index */
2027 + assert_corr(entries ==
2028 + data + iam_path_descr(path)->id_node_gap);
2029 + assert_corr(dx_get_limit(entries) == dx_node_limit(path));
2031 + frame->entries = frame->at = entries;
2035 +static int iam_htree_node_init(struct iam_container *c,
2036 + struct buffer_head *bh, int root)
2038 + struct dx_node *node;
2040 + assert_corr(!root);
2042 + node = (void *)bh->b_data;
2043 + node->fake.rec_len = cpu_to_le16(c->ic_object->i_sb->s_blocksize);
2044 + node->fake.inode = 0;
2048 +static struct iam_entry *iam_htree_root_inc(struct iam_container *c,
2049 + struct iam_path *path,
2050 + struct iam_frame *frame)
2052 + struct dx_root *root;
2053 + struct iam_entry *entries;
2055 + entries = frame->entries;
2057 + dx_set_count(entries, 1);
2058 + root = (struct dx_root *) frame->bh->b_data;
2059 + root->info.indirect_levels++;
2064 +static int iam_htree_ikeycmp(const struct iam_container *c,
2065 + const struct iam_ikey *k1,
2066 + const struct iam_ikey *k2)
2068 + __u32 p1 = le32_to_cpu(*(__u32 *)k1);
2069 + __u32 p2 = le32_to_cpu(*(__u32 *)k2);
2071 + return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
2074 +static struct iam_path_descr *iam_htree_ipd_alloc(const struct iam_container *c)
2076 + struct iam_path_compat *ipc;
2078 + ipc = kmalloc(sizeof *ipc, GFP_KERNEL);
2079 + if (ipc != NULL) {
2080 + memset(ipc, 0, sizeof *ipc);
2081 + iam_path_compat_init(ipc, c->ic_object);
2082 + return &ipc->ipc_descr;
2087 +static void iam_htree_ipd_free(struct iam_path_descr *ipd)
2089 + struct iam_path_compat *ipc;
2091 + ipc = container_of(ipd, struct iam_path_compat, ipc_descr);
2095 +static struct iam_operations iam_htree_ops = {
2096 + .id_root_ptr = iam_htree_root_ptr,
2097 + .id_node_read = iam_node_read,
2098 + .id_node_init = iam_htree_node_init,
2099 + .id_node_check = iam_htree_node_check,
2100 + .id_node_load = iam_htree_node_load,
2101 + .id_ikeycmp = iam_htree_ikeycmp,
2102 + .id_root_inc = iam_htree_root_inc,
2103 + .id_ipd_alloc = iam_htree_ipd_alloc,
2104 + .id_ipd_free = iam_htree_ipd_free,
2105 + .id_name = "htree"
2109 + * Parameters describing iam compatibility mode in which existing ext3 htrees
2110 + * can be manipulated.
2112 +struct iam_descr iam_htree_compat_param = {
2113 + .id_key_size = EXT3_NAME_LEN,
2114 + .id_rec_size = sizeof ((struct ext3_dir_entry_2 *)NULL)->inode,
2115 + .id_ikey_size = sizeof ((struct dx_map_entry *)NULL)->hash,
2116 + .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
2117 + .id_node_gap = offsetof(struct dx_node, entries),
2118 + .id_root_gap = offsetof(struct dx_root, entries),
2119 + .id_ops = &iam_htree_ops,
2120 + .id_leaf_ops = &iam_htree_leaf_ops
2122 +EXPORT_SYMBOL(iam_htree_compat_param);
2124 +static int iam_htree_guess(struct iam_container *c)
2127 + struct buffer_head *bh;
2128 + const struct dx_root *root;
2130 + assert_corr(c->ic_object != NULL);
2132 + result = iam_node_read(c, iam_htree_root_ptr(c), NULL, &bh);
2133 + if (result == 0) {
2134 + root = (void *)bh->b_data;
2135 + result = is_htree(c->ic_object->i_sb, root, 1);
2137 + c->ic_descr = &iam_htree_compat_param;
2145 +static struct iam_format iam_htree_format = {
2146 + .if_guess = iam_htree_guess
2149 +void iam_htree_format_init(void)
2151 + iam_format_register(&iam_htree_format);
2153 Index: iam/fs/ext3/iam_lfix.c
2154 ===================================================================
2155 --- iam.orig/fs/ext3/iam_lfix.c
2156 +++ iam/fs/ext3/iam_lfix.c
2158 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2159 + * vim:expandtab:shiftwidth=8:tabstop=8:
2162 + * implementation of iam format for fixed size records.
2164 + * Copyright (c) 2006 Cluster File Systems, Inc.
2165 + * Author: Wang Di <wangdi@clusterfs.com>
2166 + * Author: Nikita Danilov <nikita@clusterfs.com>
2168 + * This file is part of the Lustre file system, http://www.lustre.org
2169 + * Lustre is a trademark of Cluster File Systems, Inc.
2171 + * You may have signed or agreed to another license before downloading
2172 + * this software. If so, you are bound by the terms and conditions
2173 + * of that agreement, and the following does not apply to you. See the
2174 + * LICENSE file included with this distribution for more information.
2176 + * If you did not agree to a different license, then this copy of Lustre
2177 + * is open source software; you can redistribute it and/or modify it
2178 + * under the terms of version 2 of the GNU General Public License as
2179 + * published by the Free Software Foundation.
2181 + * In either case, Lustre is distributed in the hope that it will be
2182 + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
2183 + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2184 + * license text for more details.
2187 +#include <linux/types.h>
2188 +#include <linux/jbd.h>
2190 +#include <linux/ext3_fs.h>
2192 +#include <linux/lustre_iam.h>
2194 +#include <libcfs/libcfs.h>
2195 +#include <libcfs/kp30.h>
2198 + * Leaf operations.
2202 + IAM_LEAF_HEADER_MAGIC = 0x1976 /* This is duplicated in
2203 + * lustre/utils/create_iam.c */
2206 +/* This is duplicated in lustre/utils/create_iam.c */
2207 +struct iam_leaf_head {
2212 +static inline int iam_lfix_entry_size(const struct iam_leaf *l)
2214 + return iam_leaf_descr(l)->id_key_size + iam_leaf_descr(l)->id_rec_size;
2217 +static inline struct iam_lentry *
2218 +iam_lfix_shift(const struct iam_leaf *l, struct iam_lentry *entry, int shift)
2220 + return (void *)entry + shift * iam_lfix_entry_size(l);
2223 +static inline struct iam_key *iam_leaf_key_at(struct iam_lentry *entry)
2225 + return (struct iam_key *)entry;
2228 +static inline int lfix_keycmp(const struct iam_container *c,
2229 + const struct iam_key *k1,
2230 + const struct iam_key *k2)
2232 + return memcmp(k1, k2, c->ic_descr->id_key_size);
2235 +static struct iam_leaf_head *iam_get_head(const struct iam_leaf *l)
2237 + return (struct iam_leaf_head *)l->il_bh->b_data;
2240 +static struct iam_lentry *iam_entries(const struct buffer_head *bh)
2242 + return (void *)bh->b_data + sizeof(struct iam_leaf_head);
2245 +static struct iam_lentry *iam_get_lentries(const struct iam_leaf *l)
2247 + return iam_entries(l->il_bh);
2250 +static int leaf_count_limit(const struct iam_leaf *leaf)
2254 + free_space = iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
2255 + free_space -= sizeof(struct iam_leaf_head);
2256 + return free_space / iam_lfix_entry_size(leaf);
2259 +static int lentry_count_get(const struct iam_leaf *leaf)
2261 + return le16_to_cpu(iam_get_head(leaf)->ill_count);
2264 +static void lentry_count_set(struct iam_leaf *leaf, unsigned count)
2266 + assert_corr(0 <= count && count <= leaf_count_limit(leaf));
2267 + iam_get_head(leaf)->ill_count = cpu_to_le16(count);
2270 +static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l);
2272 +#if EXT3_CORRECTNESS_ON || EXT3_INVARIANT_ON
2273 +static int iam_leaf_at_rec(const struct iam_leaf *folio)
2276 + iam_get_lentries(folio) <= folio->il_at &&
2277 + folio->il_at < iam_lfix_get_end(folio);
2281 +static struct iam_ikey *iam_lfix_ikey(const struct iam_leaf *l,
2282 + struct iam_ikey *key)
2284 + void *ie = l->il_at;
2285 + assert_corr(iam_leaf_at_rec(l));
2286 + return (struct iam_ikey*)ie;
2289 +static struct iam_key *iam_lfix_key(const struct iam_leaf *l)
2291 + void *ie = l->il_at;
2292 + assert_corr(iam_leaf_at_rec(l));
2293 + return (struct iam_key*)ie;
2296 +static int iam_lfix_key_size(const struct iam_leaf *l)
2298 + return iam_leaf_descr(l)->id_key_size;
2301 +static void iam_lfix_start(struct iam_leaf *l)
2303 + l->il_at = iam_get_lentries(l);
2306 +static inline ptrdiff_t iam_lfix_diff(const struct iam_leaf *l,
2307 + const struct iam_lentry *e1,
2308 + const struct iam_lentry *e2)
2313 + esize = iam_lfix_entry_size(l);
2314 + diff = (void *)e1 - (void *)e2;
2315 + assert_corr(diff / esize * esize == diff);
2316 + return diff / esize;
2319 +static int iam_lfix_init(struct iam_leaf *l)
2322 + struct iam_leaf_head *ill;
2325 + assert_corr(l->il_bh != NULL);
2327 + ill = iam_get_head(l);
2328 + count = le16_to_cpu(ill->ill_count);
2329 + if (ill->ill_magic == le16_to_cpu(IAM_LEAF_HEADER_MAGIC) &&
2330 + 0 <= count && count <= leaf_count_limit(l)) {
2331 + l->il_at = l->il_entries = iam_get_lentries(l);
2334 + struct inode *obj;
2336 + obj = iam_leaf_container(l)->ic_object;
2337 + ext3_error(obj->i_sb, __FUNCTION__,
2338 + "Wrong magic in node %llu (#%lu): %#x != %#x or "
2339 + "wrong count: %i (%i)",
2340 + (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
2341 + ill->ill_magic, le16_to_cpu(IAM_LEAF_HEADER_MAGIC),
2342 + count, leaf_count_limit(l));
2349 +static void iam_lfix_fini(struct iam_leaf *l)
2351 + l->il_entries = l->il_at = NULL;
2354 +static struct iam_lentry *iam_lfix_get_end(const struct iam_leaf *l)
2356 + int count = lentry_count_get(l);
2357 + struct iam_lentry *ile = iam_lfix_shift(l, l->il_entries, count);
2362 +struct iam_rec *iam_lfix_rec(const struct iam_leaf *l)
2364 + void *e = l->il_at;
2365 + assert_corr(iam_leaf_at_rec(l));
2366 + return e + iam_leaf_descr(l)->id_key_size;
2369 +static void iam_lfix_next(struct iam_leaf *l)
2371 + assert_corr(iam_leaf_at_rec(l));
2372 + l->il_at = iam_lfix_shift(l, l->il_at, 1);
2379 +EXPORT_SYMBOL(lfix_dump);
2381 +static char hdigit(char ch)
2383 + static char d[] = "0123456789abcdef";
2384 + return d[ch & 0xf];
2387 +static char *hex(char ch, char *area)
2389 + area[0] = hdigit(ch >> 4);
2390 + area[1] = hdigit(ch);
2395 +static void l_print(struct iam_leaf *leaf, struct iam_lentry *entry)
2401 + area = (char *)entry;
2402 + printk(KERN_EMERG "[");
2403 + for (i = iam_lfix_key_size(leaf); i > 0; --i, ++area)
2404 + printk("%s", hex(*area, h));
2406 + for (i = iam_leaf_descr(leaf)->id_rec_size; i > 0; --i, ++area)
2407 + printk("%s", hex(*area, h));
2411 +static void lfix_print(struct iam_leaf *leaf)
2413 + struct iam_lentry *entry;
2417 + entry = leaf->il_entries;
2418 + count = lentry_count_get(leaf);
2419 + printk(KERN_EMERG "lfix: %p %p %d\n", leaf, leaf->il_at, count);
2420 + for (i = 0; i < count; ++i, entry = iam_lfix_shift(leaf, entry, 1))
2421 + l_print(leaf, entry);
2424 +static int iam_lfix_lookup(struct iam_leaf *l, const struct iam_key *k)
2426 + struct iam_lentry *p, *q, *m, *t;
2427 + struct iam_container *c;
2431 + count = lentry_count_get(l);
2433 + return IAM_LOOKUP_EMPTY;
2435 + result = IAM_LOOKUP_OK;
2436 + c = iam_leaf_container(l);
2438 + p = l->il_entries;
2439 + q = iam_lfix_shift(l, p, count - 1);
2440 + if (lfix_keycmp(c, k, iam_leaf_key_at(p)) < 0) {
2442 + * @k is less than the least key in the leaf
2445 + result = IAM_LOOKUP_BEFORE;
2446 + } else if (lfix_keycmp(c, iam_leaf_key_at(q), k) <= 0) {
2452 + while (iam_lfix_shift(l, p, 1) != q) {
2453 + m = iam_lfix_shift(l, p, iam_lfix_diff(l, q, p) / 2);
2454 + assert_corr(p < m && m < q);
2455 + (lfix_keycmp(c, iam_leaf_key_at(m), k) <= 0 ? p : q) = m;
2457 + assert_corr(lfix_keycmp(c, iam_leaf_key_at(p), k) <= 0 &&
2458 + lfix_keycmp(c, k, iam_leaf_key_at(q)) < 0);
2460 + * skip over records with duplicate keys.
2462 + while (p > l->il_entries) {
2463 + t = iam_lfix_shift(l, p, -1);
2464 + if (lfix_keycmp(c, iam_leaf_key_at(t), k) == 0)
2471 + assert_corr(iam_leaf_at_rec(l));
2473 + if (lfix_keycmp(c, iam_leaf_key_at(l->il_at), k) == 0)
2474 + result = IAM_LOOKUP_EXACT;
2482 +static int iam_lfix_ilookup(struct iam_leaf *l, const struct iam_ikey *ik)
2485 + return IAM_LOOKUP_OK;
2488 +static void iam_lfix_key_set(struct iam_leaf *l, const struct iam_key *k)
2490 + assert_corr(iam_leaf_at_rec(l));
2491 + memcpy(iam_leaf_key_at(l->il_at), k, iam_leaf_descr(l)->id_key_size);
2494 +static int iam_lfix_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
2496 + return lfix_keycmp(iam_leaf_container(l), iam_leaf_key_at(l->il_at), k);
2499 +static int iam_lfix_key_eq(const struct iam_leaf *l, const struct iam_key *k)
2501 + return !lfix_keycmp(iam_leaf_container(l),
2502 + iam_leaf_key_at(l->il_at), k);
2505 +static void iam_lfix_rec_set(struct iam_leaf *l, const struct iam_rec *r)
2507 + assert_corr(iam_leaf_at_rec(l));
2508 + iam_reccpy(iam_leaf_path(l), iam_lfix_rec(l), r);
2511 +static void iam_lfix_rec_add(struct iam_leaf *leaf,
2512 + const struct iam_key *k, const struct iam_rec *r)
2514 + struct iam_lentry *end;
2515 + struct iam_lentry *cur;
2516 + struct iam_lentry *start;
2520 + assert_corr(iam_leaf_can_add(leaf, k, r));
2522 + count = lentry_count_get(leaf);
2524 + * This branch handles two exceptional cases:
2526 + * - leaf positioned beyond last record, and
2530 + if (!iam_leaf_at_end(leaf)) {
2531 + end = iam_lfix_get_end(leaf);
2532 + cur = leaf->il_at;
2533 + if (lfix_keycmp(iam_leaf_container(leaf),
2534 + k, iam_leaf_key_at(cur)) >= 0)
2535 + iam_lfix_next(leaf);
2538 + * Another exceptional case: insertion with the key
2539 + * less than least key in the leaf.
2541 + assert_corr(cur == leaf->il_entries);
2543 + start = leaf->il_at;
2544 + diff = (void *)end - (void *)start;
2545 + assert_corr(diff >= 0);
2546 + memmove(iam_lfix_shift(leaf, start, 1), start, diff);
2548 + lentry_count_set(leaf, count + 1);
2549 + iam_lfix_key_set(leaf, k);
2550 + iam_lfix_rec_set(leaf, r);
2551 + assert_corr(iam_leaf_at_rec(leaf));
2554 +static void iam_lfix_rec_del(struct iam_leaf *leaf, int shift)
2556 + struct iam_lentry *next, *end;
2560 + assert_corr(iam_leaf_at_rec(leaf));
2562 + count = lentry_count_get(leaf);
2563 + end = iam_lfix_get_end(leaf);
2564 + next = iam_lfix_shift(leaf, leaf->il_at, 1);
2565 + diff = (void *)end - (void *)next;
2566 + memmove(leaf->il_at, next, diff);
2568 + lentry_count_set(leaf, count - 1);
2571 +static int iam_lfix_can_add(const struct iam_leaf *l,
2572 + const struct iam_key *k, const struct iam_rec *r)
2574 + return lentry_count_get(l) < leaf_count_limit(l);
2577 +static int iam_lfix_at_end(const struct iam_leaf *folio)
2579 + return folio->il_at == iam_lfix_get_end(folio);
2582 +static void iam_lfix_init_new(struct iam_container *c, struct buffer_head *bh)
2584 + struct iam_leaf_head *hdr;
2586 + hdr = (struct iam_leaf_head*)bh->b_data;
2587 + hdr->ill_magic = cpu_to_le16(IAM_LEAF_HEADER_MAGIC);
2588 + hdr->ill_count = cpu_to_le16(0);
2591 +static void iam_lfix_split(struct iam_leaf *l, struct buffer_head **bh,
2592 + iam_ptr_t new_blknr)
2594 + struct iam_path *path;
2595 + struct iam_leaf_head *hdr;
2596 + const struct iam_ikey *pivot;
2597 + struct buffer_head *new_leaf;
2606 + path = iam_leaf_path(l);
2608 + hdr = (void *)new_leaf->b_data;
2610 + count = lentry_count_get(l);
2611 + split = count / 2;
2613 + start = iam_lfix_shift(l, iam_get_lentries(l), split);
2614 + finis = iam_lfix_shift(l, iam_get_lentries(l), count);
2616 + pivot = (const struct iam_ikey *)iam_leaf_key_at(start);
2618 + memmove(iam_entries(new_leaf), start, finis - start);
2619 + hdr->ill_count = count - split;
2620 + lentry_count_set(l, split);
2621 + if ((void *)l->il_at >= start) {
2623 + * insertion point moves into new leaf.
2628 + shift = iam_lfix_diff(l, l->il_at, start);
2630 + l->il_bh = new_leaf;
2631 + l->il_curidx = new_blknr;
2632 + result = iam_lfix_init(l);
2634 + * init cannot fail, as node was just initialized.
2636 + assert_corr(result == 0);
2637 + l->il_at = iam_lfix_shift(l, iam_get_lentries(l), shift);
2640 + * Insert pointer to the new node (together with the least key in
2641 + * the node) into index node.
2643 + iam_insert_key_lock(path, path->ip_frame, pivot, new_blknr);
2646 +static struct iam_leaf_operations iam_lfix_leaf_ops = {
2647 + .init = iam_lfix_init,
2648 + .init_new = iam_lfix_init_new,
2649 + .fini = iam_lfix_fini,
2650 + .start = iam_lfix_start,
2651 + .next = iam_lfix_next,
2652 + .key = iam_lfix_key,
2653 + .ikey = iam_lfix_ikey,
2654 + .rec = iam_lfix_rec,
2655 + .key_set = iam_lfix_key_set,
2656 + .key_cmp = iam_lfix_key_cmp,
2657 + .key_eq = iam_lfix_key_eq,
2658 + .key_size = iam_lfix_key_size,
2659 + .rec_set = iam_lfix_rec_set,
2660 + .lookup = iam_lfix_lookup,
2661 + .ilookup = iam_lfix_ilookup,
2662 + .at_end = iam_lfix_at_end,
2663 + .rec_add = iam_lfix_rec_add,
2664 + .rec_del = iam_lfix_rec_del,
2665 + .can_add = iam_lfix_can_add,
2666 + .split = iam_lfix_split
2670 + * Index operations.
2674 + /* This is duplicated in lustre/utils/create_iam.c */
2676 + * Then shalt thou see the dew-BEDABBLED wretch
2677 + * Turn, and return, indenting with the way;
2678 + * Each envious brier his weary legs doth scratch,
2679 + * Each shadow makes him stop, each murmur stay:
2680 + * For misery is trodden on by many,
2681 + * And being low never relieved by any.
2683 + IAM_LFIX_ROOT_MAGIC = 0xbedabb1edULL // d01efull
2686 +/* This is duplicated in lustre/utils/create_iam.c */
2687 +struct iam_lfix_root {
2689 + __le16 ilr_keysize;
2690 + __le16 ilr_recsize;
2691 + __le16 ilr_ptrsize;
2692 + u8 ilr_indirect_levels;
2696 +static __u32 iam_lfix_root_ptr(struct iam_container *c)
2701 +static int iam_lfix_node_init(struct iam_container *c, struct buffer_head *bh,
2707 +static struct iam_entry *iam_lfix_root_inc(struct iam_container *c,
2708 + struct iam_path *path,
2709 + struct iam_frame *frame)
2711 + struct iam_lfix_root *root;
2712 + struct iam_entry *entries;
2714 + entries = frame->entries;
2716 + dx_set_count(entries, 2);
2717 + assert_corr(dx_get_limit(entries) == dx_root_limit(path));
2719 + root = (void *)frame->bh->b_data;
2720 + assert_corr(le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC);
2721 + root->ilr_indirect_levels ++;
2722 + frame->at = entries = iam_entry_shift(path, entries, 1);
2723 + memset(iam_ikey_at(path, entries), 0,
2724 + iam_path_descr(path)->id_ikey_size);
2728 +static int iam_lfix_node_check(struct iam_path *path, struct iam_frame *frame)
2732 + unsigned limit_correct;
2733 + struct iam_entry *entries;
2735 + entries = dx_node_get_entries(path, frame);
2737 + if (frame == path->ip_frames) {
2738 + struct iam_lfix_root *root;
2740 + root = (void *)frame->bh->b_data;
2741 + if (le64_to_cpu(root->ilr_magic) != IAM_LFIX_ROOT_MAGIC) {
2745 + limit_correct = dx_root_limit(path);
2747 + limit_correct = dx_node_limit(path);
2748 + count = dx_get_count(entries);
2749 + limit = dx_get_limit(entries);
2750 + if (count > limit) {
2754 + if (limit != limit_correct) {
2761 +static int iam_lfix_node_load(struct iam_path *path, struct iam_frame *frame)
2763 + struct iam_entry *entries;
2765 + entries = dx_node_get_entries(path, frame);
2767 + data = frame->bh->b_data;
2769 + if (frame == path->ip_frames) {
2770 + struct iam_lfix_root *root;
2773 + path->ip_indirect = root->ilr_indirect_levels;
2774 + if (path->ip_ikey_target == NULL)
2775 + path->ip_ikey_target =
2776 + (struct iam_ikey *)path->ip_key_target;
2778 + frame->entries = frame->at = entries;
2782 +static int iam_lfix_ikeycmp(const struct iam_container *c,
2783 + const struct iam_ikey *k1,
2784 + const struct iam_ikey *k2)
2786 + return memcmp(k1, k2, c->ic_descr->id_ikey_size);
2789 +static struct iam_path_descr *iam_lfix_ipd_alloc(const struct iam_container *c)
2791 + return iam_ipd_alloc(c->ic_descr->id_ikey_size);
2794 +static struct iam_operations iam_lfix_ops = {
2795 + .id_root_ptr = iam_lfix_root_ptr,
2796 + .id_node_read = iam_node_read,
2797 + .id_node_init = iam_lfix_node_init,
2798 + .id_node_check = iam_lfix_node_check,
2799 + .id_node_load = iam_lfix_node_load,
2800 + .id_ikeycmp = iam_lfix_ikeycmp,
2801 + .id_root_inc = iam_lfix_root_inc,
2802 + .id_ipd_alloc = iam_lfix_ipd_alloc,
2803 + .id_ipd_free = iam_ipd_free,
2807 +static int iam_lfix_guess(struct iam_container *c)
2810 + struct buffer_head *bh;
2811 + const struct iam_lfix_root *root;
2813 + assert_corr(c->ic_object != NULL);
2815 + result = iam_node_read(c, iam_lfix_root_ptr(c), NULL, &bh);
2816 + if (result == 0) {
2817 + root = (void *)bh->b_data;
2818 + if (le64_to_cpu(root->ilr_magic) == IAM_LFIX_ROOT_MAGIC) {
2819 + struct iam_descr *descr;
2821 + descr = c->ic_descr;
2822 + descr->id_key_size = le16_to_cpu(root->ilr_keysize);
2823 + descr->id_ikey_size = le16_to_cpu(root->ilr_keysize);
2824 + descr->id_rec_size = le16_to_cpu(root->ilr_recsize);
2825 + descr->id_ptr_size = le16_to_cpu(root->ilr_ptrsize);
2826 + descr->id_root_gap = sizeof(struct iam_lfix_root);
2827 + descr->id_node_gap = 0;
2828 + descr->id_ops = &iam_lfix_ops;
2829 + descr->id_leaf_ops = &iam_lfix_leaf_ops;
2837 +static struct iam_format iam_lfix_format = {
2838 + .if_guess = iam_lfix_guess
2841 +void iam_lfix_format_init(void)
2843 + iam_format_register(&iam_lfix_format);
2850 +#define KEYSIZE (8)
2851 +#define RECSIZE (8)
2852 +#define PTRSIZE (4)
2854 +#define LFIX_ROOT_RECNO \
2855 + ((4096 - sizeof(struct iam_lfix_root)) / (KEYSIZE + PTRSIZE))
2857 +#define LFIX_INDEX_RECNO (4096 / (KEYSIZE + PTRSIZE))
2859 +#define LFIX_LEAF_RECNO \
2860 + ((4096 - sizeof(struct iam_leaf_head)) / (KEYSIZE + RECSIZE))
2863 + struct iam_lfix_root lr_root;
2865 + char key[KEYSIZE];
2866 + char ptr[PTRSIZE];
2867 + } lr_entry[LFIX_ROOT_RECNO];
2870 +struct lfix_index {
2871 + struct dx_countlimit li_cl;
2872 + char li_padding[KEYSIZE + PTRSIZE - sizeof(struct dx_countlimit)];
2874 + char key[KEYSIZE];
2875 + char ptr[PTRSIZE];
2876 + } li_entry[LFIX_INDEX_RECNO - 1];
2880 + struct iam_leaf_head ll_head;
2882 + char key[KEYSIZE];
2883 + char rec[RECSIZE];
2884 + } ll_entry[LFIX_LEAF_RECNO];
2886 Index: iam/fs/ext3/iam_lvar.c
2887 ===================================================================
2888 --- iam.orig/fs/ext3/iam_lvar.c
2889 +++ iam/fs/ext3/iam_lvar.c
2891 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2892 + * vim:expandtab:shiftwidth=8:tabstop=8:
2895 + * implementation of iam format for fixed size records, variable sized keys.
2897 + * Copyright (c) 2006 Cluster File Systems, Inc.
2898 + * Author: Nikita Danilov <nikita@clusterfs.com>
2900 + * This file is part of the Lustre file system, http://www.lustre.org
2901 + * Lustre is a trademark of Cluster File Systems, Inc.
2903 + * You may have signed or agreed to another license before downloading
2904 + * this software. If so, you are bound by the terms and conditions
2905 + * of that agreement, and the following does not apply to you. See the
2906 + * LICENSE file included with this distribution for more information.
2908 + * If you did not agree to a different license, then this copy of Lustre
2909 + * is open source software; you can redistribute it and/or modify it
2910 + * under the terms of version 2 of the GNU General Public License as
2911 + * published by the Free Software Foundation.
2913 + * In either case, Lustre is distributed in the hope that it will be
2914 + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
2915 + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
2916 + * license text for more details.
2919 +#include <linux/types.h>
2920 +#include <linux/jbd.h>
2922 +#include <linux/ext3_fs.h>
2924 +#include <linux/lustre_iam.h>
2926 +#include <libcfs/libcfs.h>
2927 +#include <libcfs/kp30.h>
2930 + * Leaf operations.
2934 + IAM_LVAR_LEAF_MAGIC = 0x1973 /* This is duplicated in
2935 + * lustre/utils/create_iam.c */
2938 +/* This is duplicated in lustre/utils/create_iam.c */
2939 +struct lvar_leaf_header {
2940 + __le16 vlh_magic; /* magic number IAM_LVAR_LEAF_MAGIC */
2941 + __le16 vlh_used; /* used bytes, including header */
2945 + * Format of leaf entry:
2949 + * u8 record[rec_size]
2951 + * Entries are ordered in key order.
2954 +/* This is duplicated in lustre/utils/create_iam.c */
2955 +typedef __u32 lvar_hash_t;
2957 +/* This is duplicated in lustre/utils/create_iam.c */
2958 +struct lvar_leaf_entry {
2960 + __le16 vle_keysize;
2964 +#define PDIFF(ptr0, ptr1) (((char *)(ptr0)) - ((char *)(ptr1)))
2967 +static inline int blocksize(const struct iam_leaf *leaf)
2969 + return iam_leaf_container(leaf)->ic_object->i_sb->s_blocksize;
2972 +static inline const char *kchar(const struct iam_key *key)
2974 + return (void *)key;
2977 +static inline struct iam_lentry *lvar_lentry(const struct lvar_leaf_entry *ent)
2979 + return (struct iam_lentry *)ent;
2982 +static inline struct lvar_leaf_entry *lentry_lvar(const struct iam_lentry *lent)
2984 + return (struct lvar_leaf_entry *)lent;
2988 +static inline int recsize(const struct iam_leaf *leaf)
2990 + return iam_leaf_descr(leaf)->id_rec_size;
2993 +static inline int e_keysize(const struct lvar_leaf_entry *ent)
2995 + return le16_to_cpu(ent->vle_keysize);
2998 +/* This is duplicated in lustre/utils/create_iam.c */
3001 + LVAR_ROUND = LVAR_PAD - 1
3004 +static inline int getsize(const struct iam_leaf *leaf, int namelen)
3006 + CLASSERT(!(LVAR_PAD & (LVAR_PAD - 1)));
3008 + return (offsetof(struct lvar_leaf_entry, vle_key) +
3009 + namelen + recsize(leaf) + LVAR_ROUND) & ~LVAR_ROUND;
3012 +static inline int e_size(const struct iam_leaf *leaf,
3013 + const struct lvar_leaf_entry *ent)
3015 + return getsize(leaf, e_keysize(ent));
3018 +static inline char *e_char(const struct lvar_leaf_entry *ent)
3020 + return (char *)&ent->vle_key;
3023 +static inline struct iam_key *e_key(const struct lvar_leaf_entry *ent)
3025 + return (struct iam_key *)e_char(ent);
3028 +static inline lvar_hash_t e_hash(const struct lvar_leaf_entry *ent)
3030 + return le32_to_cpu(ent->vle_hash);
3033 +static inline struct iam_rec *e_rec(const struct lvar_leaf_entry *ent)
3035 + return ((void *)ent) +
3036 + offsetof(struct lvar_leaf_entry, vle_key) + e_keysize(ent);
3039 +static void e_print(const struct lvar_leaf_entry *ent)
3041 + printk(" %p %8.8x \"%*.*s\"\n", ent, e_hash(ent),
3042 + e_keysize(ent), e_keysize(ent), e_char(ent));
3045 +static int e_check(const struct iam_leaf *leaf,
3046 + const struct lvar_leaf_entry *ent)
3048 + const void *point = ent;
3049 + const void *start = leaf->il_bh->b_data;
3051 + start + sizeof(struct lvar_leaf_header) <= point &&
3052 + point + e_size(leaf, ent) < start + blocksize(leaf);
3056 +static struct lvar_leaf_entry *e_next(const struct iam_leaf *leaf,
3057 + const struct lvar_leaf_entry *ent)
3059 + return ((void *)ent) + e_size(leaf, ent);
3062 +#define LVAR_HASH_TEA (1)
3063 +#define LVAR_HASH_R5 (0)
3064 +#define LVAR_HASH_PREFIX (0)
3066 +static inline lvar_hash_t get_hash(const struct iam_container *bag,
3067 + const char *name, int namelen)
3069 + lvar_hash_t result;
3073 + if (strncmp(name, ".", 1) == 0 && namelen == 1)
3075 + if (strncmp(name, "..", 2) == 0 && namelen == 2)
3078 + if (LVAR_HASH_PREFIX) {
3080 + strncpy((void *)&result,
3081 + name, min(namelen, (int)sizeof result));
3083 + struct dx_hash_info hinfo;
3085 + if (LVAR_HASH_TEA)
3086 + hinfo.hash_version = DX_HASH_TEA;
3087 + else if (LVAR_HASH_R5)
3088 + hinfo.hash_version = DX_HASH_R5;
3090 + ext3fs_dirhash(name, namelen, &hinfo);
3091 + result = hinfo.hash;
3094 + return (result << 1) & 0x7fffffff;
3097 +static inline int e_eq(const struct lvar_leaf_entry *ent,
3098 + const char *name, int namelen)
3100 + return namelen == e_keysize(ent) && !memcmp(e_char(ent), name, namelen);
3103 +static inline int e_cmp(const struct iam_leaf *leaf,
3104 + const struct lvar_leaf_entry *ent, lvar_hash_t hash)
3106 + lvar_hash_t ehash;
3108 + ehash = e_hash(ent);
3109 + return ehash == hash ? 0 : (ehash < hash ? -1 : +1);
3112 +static struct lvar_leaf_header *n_head(const struct iam_leaf *l)
3114 + return (struct lvar_leaf_header *)l->il_bh->b_data;
3117 +static int h_used(const struct lvar_leaf_header *hdr)
3119 + return le16_to_cpu(hdr->vlh_used);
3122 +static void h_used_adj(const struct iam_leaf *leaf,
3123 + struct lvar_leaf_header *hdr, int adj)
3127 + used = h_used(hdr) + adj;
3128 + assert_corr(sizeof *hdr <= used && used <= blocksize(leaf));
3129 + hdr->vlh_used = cpu_to_le16(used);
3132 +static struct lvar_leaf_entry *n_start(const struct iam_leaf *leaf)
3134 + return (void *)leaf->il_bh->b_data + sizeof(struct lvar_leaf_header);
3137 +static struct lvar_leaf_entry *n_end(const struct iam_leaf *l)
3139 + return (void *)l->il_bh->b_data + h_used(n_head(l));
3142 +static struct lvar_leaf_entry *n_cur(const struct iam_leaf *l)
3144 + return lentry_lvar(l->il_at);
3147 +void n_print(const struct iam_leaf *l)
3149 + struct lvar_leaf_entry *scan;
3151 + printk(KERN_EMERG "used: %d\n", h_used(n_head(l)));
3152 + for (scan = n_start(l); scan < n_end(l); scan = e_next(l, scan))
3156 +#if EXT3_CORRECTNESS_ON
3157 +static int n_at_rec(const struct iam_leaf *folio)
3160 + n_start(folio) <= lentry_lvar(folio->il_at) &&
3161 + lentry_lvar(folio->il_at) < n_end(folio);
3164 +#if EXT3_INVARIANT_ON
3165 +static int n_invariant(const struct iam_leaf *leaf)
3167 + struct iam_path *path;
3168 + struct lvar_leaf_entry *scan;
3169 + struct lvar_leaf_entry *end;
3171 + lvar_hash_t nexthash;
3172 + lvar_hash_t starthash;
3174 + end = n_end(leaf);
3176 + path = leaf->il_path;
3178 + if (h_used(n_head(leaf)) > blocksize(leaf))
3182 + * Delimiting key in the parent index node. Clear least bit to account
3183 + * for hash collision marker.
3185 + starthash = *(lvar_hash_t *)iam_ikey_at(path, path->ip_frame->at) & ~1;
3186 + for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
3187 + nexthash = e_hash(scan);
3188 + if (nexthash != get_hash(iam_leaf_container(leaf),
3189 + e_char(scan), e_keysize(scan))) {
3193 + if (0 && nexthash < starthash) {
3195 + * Unfortunately this useful invariant cannot be
3196 + * reliably checked as parent node is nor necessarily
3200 + printk("%#x < %#x\n", nexthash, starthash);
3205 + if (nexthash < hash) {
3211 + if (scan != end) {
3217 +/* EXT3_INVARIANT_ON */
3220 +/* EXT3_CORRECTNESS_ON */
3223 +static struct iam_ikey *lvar_ikey(const struct iam_leaf *l,
3224 + struct iam_ikey *key)
3226 + lvar_hash_t *hash;
3228 + assert_corr(n_at_rec(l));
3230 + hash = (void *)key;
3231 + *hash = e_hash(n_cur(l));
3235 +static struct iam_key *lvar_key(const struct iam_leaf *l)
3237 + return e_key(n_cur(l));
3240 +static int lvar_key_size(const struct iam_leaf *l)
3242 + return e_keysize(n_cur(l));
3245 +static void lvar_start(struct iam_leaf *l)
3247 + l->il_at = lvar_lentry(n_start(l));
3250 +static int lvar_init(struct iam_leaf *l)
3254 + struct lvar_leaf_header *head;
3256 + assert_corr(l->il_bh != NULL);
3259 + used = h_used(head);
3260 + if (head->vlh_magic == le16_to_cpu(IAM_LVAR_LEAF_MAGIC) &&
3261 + used <= blocksize(l)) {
3262 + l->il_at = l->il_entries = lvar_lentry(n_start(l));
3265 + struct inode *obj;
3267 + obj = iam_leaf_container(l)->ic_object;
3268 + ext3_error(obj->i_sb, __FUNCTION__,
3269 + "Wrong magic in node %llu (#%lu): %#x != %#x or "
3271 + (unsigned long long)l->il_bh->b_blocknr, obj->i_ino,
3272 + head->vlh_magic, le16_to_cpu(IAM_LVAR_LEAF_MAGIC),
3280 +static void lvar_fini(struct iam_leaf *l)
3282 + l->il_entries = l->il_at = NULL;
3285 +struct iam_rec *lvar_rec(const struct iam_leaf *l)
3287 + assert_corr(n_at_rec(l));
3288 + return e_rec(n_cur(l));
3291 +static void lvar_next(struct iam_leaf *l)
3293 + assert_corr(n_at_rec(l));
3294 + assert_corr(iam_leaf_is_locked(l));
3295 + l->il_at = lvar_lentry(e_next(l, n_cur(l)));
3298 +static int lvar_lookup(struct iam_leaf *leaf, const struct iam_key *k)
3300 + struct lvar_leaf_entry *found;
3301 + struct lvar_leaf_entry *scan;
3302 + struct lvar_leaf_entry *end;
3310 + assert_inv(n_invariant(leaf));
3311 + end = n_end(leaf);
3314 + namelen = strlen(name);
3315 + hash = get_hash(iam_leaf_container(leaf), name, namelen);
3320 + for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
3321 + lvar_hash_t scan_hash;
3323 + scan_hash = e_hash(scan);
3324 + if (scan_hash < hash)
3326 + else if (scan_hash == hash) {
3327 + if (e_eq(scan, name, namelen)) {
3331 + leaf->il_at = lvar_lentry(scan);
3332 + return IAM_LOOKUP_EXACT;
3333 + } else if (!found_equal) {
3342 + if (found == NULL) {
3344 + * @k is less than all hashes in the leaf.
3347 + result = IAM_LOOKUP_BEFORE;
3349 + leaf->il_at = lvar_lentry(found);
3350 + result = IAM_LOOKUP_OK;
3351 + assert_corr(n_at_rec(leaf));
3354 + result |= IAM_LOOKUP_LAST;
3355 + assert_inv(n_invariant(leaf));
3357 + if (unlikely(current->debugging1 & 0x1)) {
3358 + struct iam_path *path;
3360 + path = leaf->il_path;
3361 + printk(KERN_EMERG "lvar noent: `%s'/%#x/%#x\n", name, hash,
3362 + *(lvar_hash_t *)iam_ikey_at(path, path->ip_frame->at));
3369 +static int lvar_ilookup(struct iam_leaf *leaf, const struct iam_ikey *ik)
3371 + struct lvar_leaf_entry *scan;
3372 + struct lvar_leaf_entry *end;
3375 + assert_inv(n_invariant(leaf));
3376 + end = n_end(leaf);
3377 + hash = *(const lvar_hash_t *)ik;
3380 + for (scan = n_start(leaf); scan < end; scan = e_next(leaf, scan)) {
3381 + lvar_hash_t scan_hash;
3383 + scan_hash = e_hash(scan);
3384 + if (scan_hash > hash)
3385 + return scan == n_start(leaf) ?
3386 + IAM_LOOKUP_BEFORE : IAM_LOOKUP_OK;
3387 + leaf->il_at = lvar_lentry(scan);
3388 + if (scan_hash == hash)
3389 + return IAM_LOOKUP_EXACT;
3391 + assert_inv(n_invariant(leaf));
3393 + * @ik is greater than any key in the node. Return last record in the
3396 + return IAM_LOOKUP_OK;
3399 +static void lvar_key_set(struct iam_leaf *l, const struct iam_key *k)
3401 + assert_corr(n_at_rec(l));
3402 + assert_corr(strlen(kchar(k)) == e_keysize(n_cur(l)));
3403 + assert_corr(iam_leaf_is_locked(l));
3404 + memcpy(e_key(n_cur(l)), k, e_keysize(n_cur(l)));
3405 + assert_inv(n_invariant(l));
3408 +static int lvar_key_cmp(const struct iam_leaf *l, const struct iam_key *k)
3415 + hash = get_hash(iam_leaf_container(l), name, strlen(name));
3416 + return e_cmp(l, n_cur(l), hash);
3419 +static int lvar_key_eq(const struct iam_leaf *l, const struct iam_key *k)
3424 + return e_eq(n_cur(l), name, strlen(name));
3427 +static void lvar_rec_set(struct iam_leaf *l, const struct iam_rec *r)
3429 + assert_corr(n_at_rec(l));
3430 + assert_corr(iam_leaf_is_locked(l));
3431 + iam_reccpy(iam_leaf_path(l), e_rec(n_cur(l)), r);
3432 + assert_inv(n_invariant(l));
3435 +static int lvar_can_add(const struct iam_leaf *l,
3436 + const struct iam_key *k, const struct iam_rec *r)
3438 + assert_corr(iam_leaf_is_locked(l));
3439 + return h_used(n_head(l)) + getsize(l, strlen(kchar(k))) <= blocksize(l);
3442 +static int lvar_at_end(const struct iam_leaf *folio)
3444 + assert_corr(iam_leaf_is_locked(folio));
3445 + return n_cur(folio) == n_end(folio);
3448 +static void lvar_rec_add(struct iam_leaf *leaf,
3449 + const struct iam_key *k, const struct iam_rec *r)
3458 + assert_corr(lvar_can_add(leaf, k, r));
3459 + assert_inv(n_invariant(leaf));
3460 + assert_corr(iam_leaf_is_locked(leaf));
3463 + ksize = strlen(key);
3464 + shift = getsize(leaf, ksize);
3466 + if (!lvar_at_end(leaf)) {
3467 + assert_corr(n_cur(leaf) < n_end(leaf));
3468 + end = n_end(leaf);
3469 + if (lvar_key_cmp(leaf, k) <= 0)
3473 + * Another exceptional case: insertion with the key
3474 + * less than least key in the leaf.
3476 + assert_corr(leaf->il_at == leaf->il_entries);
3478 + start = leaf->il_at;
3479 + diff = PDIFF(end, start);
3480 + assert_corr(diff >= 0);
3481 + memmove(start + shift, start, diff);
3483 + h_used_adj(leaf, n_head(leaf), shift);
3484 + n_cur(leaf)->vle_keysize = cpu_to_le16(ksize);
3485 + n_cur(leaf)->vle_hash = cpu_to_le32(get_hash(iam_leaf_container(leaf),
3487 + lvar_key_set(leaf, k);
3488 + lvar_rec_set(leaf, r);
3489 + assert_corr(n_at_rec(leaf));
3490 + assert_inv(n_invariant(leaf));
3493 +static void lvar_rec_del(struct iam_leaf *leaf, int shift)
3499 + assert_corr(n_at_rec(leaf));
3500 + assert_inv(n_invariant(leaf));
3501 + assert_corr(iam_leaf_is_locked(leaf));
3503 + end = n_end(leaf);
3504 + next = e_next(leaf, n_cur(leaf));
3505 + nob = e_size(leaf, n_cur(leaf));
3506 + memmove(leaf->il_at, next, end - next);
3507 + h_used_adj(leaf, n_head(leaf), -nob);
3508 + assert_inv(n_invariant(leaf));
3511 +static void lvar_init_new(struct iam_container *c, struct buffer_head *bh)
3513 + struct lvar_leaf_header *hdr;
3515 + hdr = (struct lvar_leaf_header *)bh->b_data;
3516 + hdr->vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC);
3517 + hdr->vlh_used = sizeof *hdr;
3520 +static struct lvar_leaf_entry *find_pivot(const struct iam_leaf *leaf,
3521 + struct lvar_leaf_entry **prev)
3528 + threshold = blocksize(leaf) / 2;
3529 + for (scan = start = n_start(leaf); scan - start <= threshold;
3530 + *prev = scan, scan = e_next(leaf, scan)) {
3536 +static void lvar_split(struct iam_leaf *leaf, struct buffer_head **bh,
3537 + iam_ptr_t new_blknr)
3539 + struct lvar_leaf_entry *first_to_move;
3540 + struct lvar_leaf_entry *last_to_stay;
3541 + struct iam_path *path;
3542 + struct lvar_leaf_header *hdr;
3543 + struct buffer_head *new_leaf;
3548 + assert_inv(n_invariant(leaf));
3549 + assert_corr(iam_leaf_is_locked(leaf));
3552 + path = iam_leaf_path(leaf);
3554 + hdr = (void *)new_leaf->b_data;
3556 + first_to_move = find_pivot(leaf, &last_to_stay);
3557 + assert_corr(last_to_stay != NULL);
3558 + assert_corr(e_next(leaf, last_to_stay) == first_to_move);
3560 + hash = e_hash(first_to_move);
3561 + if (hash == e_hash(last_to_stay))
3567 + tomove = PDIFF(n_end(leaf), first_to_move);
3568 + memmove(hdr + 1, first_to_move, tomove);
3570 + h_used_adj(leaf, hdr, tomove);
3571 + h_used_adj(leaf, n_head(leaf), -tomove);
3573 + assert_corr(n_end(leaf) == first_to_move);
3575 + if (n_cur(leaf) >= first_to_move) {
3577 + * insertion point moves into new leaf.
3582 + shift = PDIFF(leaf->il_at, first_to_move);
3583 + *bh = leaf->il_bh;
3584 + leaf->il_bh = new_leaf;
3585 + leaf->il_curidx = new_blknr;
3587 + assert_corr(iam_leaf_is_locked(leaf));
3588 + result = lvar_init(leaf);
3590 + * init cannot fail, as node was just initialized.
3592 + assert_corr(result == 0);
3593 + leaf->il_at = ((void *)leaf->il_at) + shift;
3596 + * Insert pointer to the new node (together with the least key in
3597 + * the node) into index node.
3599 + iam_insert_key_lock(path, path->ip_frame, (struct iam_ikey *)&hash,
3601 + assert_corr(n_cur(leaf) < n_end(leaf));
3602 + assert_inv(n_invariant(leaf));
3605 +static struct iam_leaf_operations lvar_leaf_ops = {
3606 + .init = lvar_init,
3607 + .init_new = lvar_init_new,
3608 + .fini = lvar_fini,
3609 + .start = lvar_start,
3610 + .next = lvar_next,
3612 + .ikey = lvar_ikey,
3614 + .key_set = lvar_key_set,
3615 + .key_cmp = lvar_key_cmp,
3616 + .key_eq = lvar_key_eq,
3617 + .key_size = lvar_key_size,
3618 + .rec_set = lvar_rec_set,
3619 + .lookup = lvar_lookup,
3620 + .ilookup = lvar_ilookup,
3621 + .at_end = lvar_at_end,
3622 + .rec_add = lvar_rec_add,
3623 + .rec_del = lvar_rec_del,
3624 + .can_add = lvar_can_add,
3625 + .split = lvar_split
3629 + * Index operations.
3633 + /* This is duplicated in lustre/utils/create_iam.c */
3634 + /* egrep -i '^o?x?[olabcdef]*$' /usr/share/dict/words */
3635 + IAM_LVAR_ROOT_MAGIC = 0xb01dface
3638 +/* This is duplicated in lustre/utils/create_iam.c */
3641 + __le16 vr_recsize;
3642 + __le16 vr_ptrsize;
3643 + u8 vr_indirect_levels;
3645 + __le16 vr_padding1;
3648 +static __u32 lvar_root_ptr(struct iam_container *c)
3653 +static int lvar_node_init(struct iam_container *c, struct buffer_head *bh,
3659 +static struct iam_entry *lvar_root_inc(struct iam_container *c,
3660 + struct iam_path *path,
3661 + struct iam_frame *frame)
3663 + struct lvar_root *root;
3664 + struct iam_entry *entries;
3666 + assert_corr(iam_frame_is_locked(path, frame));
3667 + entries = frame->entries;
3669 + dx_set_count(entries, 2);
3670 + assert_corr(dx_get_limit(entries) == dx_root_limit(path));
3672 + root = (void *)frame->bh->b_data;
3673 + assert_corr(le64_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC);
3674 + root->vr_indirect_levels ++;
3675 + frame->at = entries = iam_entry_shift(path, entries, 1);
3676 + memset(iam_ikey_at(path, entries), 0,
3677 + iam_path_descr(path)->id_ikey_size);
3681 +static int lvar_node_check(struct iam_path *path, struct iam_frame *frame)
3685 + unsigned limit_correct;
3686 + struct iam_entry *entries;
3688 + entries = dx_node_get_entries(path, frame);
3690 + if (frame == path->ip_frames) {
3691 + struct lvar_root *root;
3693 + root = (void *)frame->bh->b_data;
3694 + if (le64_to_cpu(root->vr_magic) != IAM_LVAR_ROOT_MAGIC) {
3698 + limit_correct = dx_root_limit(path);
3700 + limit_correct = dx_node_limit(path);
3701 + count = dx_get_count(entries);
3702 + limit = dx_get_limit(entries);
3703 + if (count > limit) {
3707 + if (limit != limit_correct) {
3714 +static int lvar_node_load(struct iam_path *path, struct iam_frame *frame)
3716 + struct iam_entry *entries;
3718 + entries = dx_node_get_entries(path, frame);
3720 + data = frame->bh->b_data;
3722 + if (frame == path->ip_frames) {
3723 + struct lvar_root *root;
3727 + name = kchar(path->ip_key_target);
3728 + path->ip_indirect = root->vr_indirect_levels;
3729 + if (path->ip_ikey_target == NULL) {
3730 + path->ip_ikey_target = iam_path_ikey(path, 4);
3731 + *(lvar_hash_t *)path->ip_ikey_target =
3732 + get_hash(path->ip_container, name,
3736 + frame->entries = frame->at = entries;
3740 +static int lvar_ikeycmp(const struct iam_container *c,
3741 + const struct iam_ikey *k1, const struct iam_ikey *k2)
3743 + lvar_hash_t p1 = le32_to_cpu(*(lvar_hash_t *)k1);
3744 + lvar_hash_t p2 = le32_to_cpu(*(lvar_hash_t *)k2);
3746 + return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
3749 +static struct iam_path_descr *lvar_ipd_alloc(const struct iam_container *c)
3751 + return iam_ipd_alloc(c->ic_descr->id_ikey_size);
3754 +static int root_limit(int rootgap, int blocksize, int size)
3759 + limit = (blocksize - rootgap) / size;
3760 + nlimit = blocksize / size;
3761 + if (limit == nlimit)
3766 +static int lvar_root_limit(int blocksize, int size)
3768 + return root_limit(sizeof(struct lvar_root), blocksize, size);
3771 +static void lvar_root(void *buf,
3772 + int blocksize, int keysize, int ptrsize, int recsize)
3774 + struct lvar_root *root;
3775 + struct dx_countlimit *limit;
3779 + isize = sizeof(lvar_hash_t) + ptrsize;
3781 + *root = (typeof(*root)) {
3782 + .vr_magic = cpu_to_le32(IAM_LVAR_ROOT_MAGIC),
3783 + .vr_recsize = cpu_to_le16(recsize),
3784 + .vr_ptrsize = cpu_to_le16(ptrsize),
3785 + .vr_indirect_levels = 0
3788 + limit = (void *)(root + 1);
3789 + *limit = (typeof(*limit)){
3791 + * limit itself + one pointer to the leaf.
3793 + .count = cpu_to_le16(2),
3794 + .limit = lvar_root_limit(blocksize,
3795 + sizeof (lvar_hash_t) + ptrsize)
3800 + * Skip over @limit.
3805 + * Entry format is <key> followed by <ptr>. In the minimal tree
3806 + * consisting of a root and single node, <key> is a minimal possible
3809 + *(lvar_hash_t *)entry = 0;
3810 + entry += sizeof(lvar_hash_t);
3811 + /* now @entry points to <ptr> */
3813 + *(u_int32_t *)entry = cpu_to_le32(1);
3815 + *(u_int64_t *)entry = cpu_to_le64(1);
3818 +static int lvar_esize(int namelen, int recsize)
3820 + return (offsetof(struct lvar_leaf_entry, vle_key) +
3821 + namelen + recsize + LVAR_ROUND) & ~LVAR_ROUND;
3824 +static void lvar_leaf(void *buf,
3825 + int blocksize, int keysize, int ptrsize, int recsize)
3827 + struct lvar_leaf_header *head;
3828 + struct lvar_leaf_entry *entry;
3832 + *head = (typeof(*head)) {
3833 + .vlh_magic = cpu_to_le16(IAM_LVAR_LEAF_MAGIC),
3834 + .vlh_used = cpu_to_le16(sizeof *head + lvar_esize(0, recsize))
3836 + entry = (void *)(head + 1);
3837 + *entry = (typeof(*entry)) {
3841 + memset(e_rec(entry), 0, recsize);
3844 +#include <linux/jbd.h>
3845 +#include <linux/ext3_fs.h>
3846 +#include <linux/ext3_jbd.h>
3848 +int iam_lvar_create(struct inode *obj,
3849 + int keysize, int ptrsize, int recsize, handle_t *handle)
3851 + struct buffer_head *root_node;
3852 + struct buffer_head *leaf_node;
3853 + struct super_block *sb;
3857 + unsigned long bsize;
3859 + assert_corr(obj->i_size == 0);
3862 + bsize = sb->s_blocksize;
3863 + root_node = ext3_append(handle, obj, &blknr, &result);
3864 + leaf_node = ext3_append(handle, obj, &blknr, &result);
3865 + if (root_node != NULL && leaf_node != NULL) {
3866 + lvar_root(root_node->b_data, bsize, keysize, ptrsize, recsize);
3867 + lvar_leaf(leaf_node->b_data, bsize, keysize, ptrsize, recsize);
3868 + ext3_mark_inode_dirty(handle, obj);
3869 + result = ext3_journal_dirty_metadata(handle, root_node);
3871 + result = ext3_journal_dirty_metadata(handle, leaf_node);
3873 + ext3_std_error(sb, result);
3875 + brelse(leaf_node);
3876 + brelse(root_node);
3879 +EXPORT_SYMBOL(iam_lvar_create);
3881 +static struct iam_operations lvar_ops = {
3882 + .id_root_ptr = lvar_root_ptr,
3883 + .id_node_read = iam_node_read,
3884 + .id_node_init = lvar_node_init,
3885 + .id_node_check = lvar_node_check,
3886 + .id_node_load = lvar_node_load,
3887 + .id_ikeycmp = lvar_ikeycmp,
3888 + .id_root_inc = lvar_root_inc,
3889 + .id_ipd_alloc = lvar_ipd_alloc,
3890 + .id_ipd_free = iam_ipd_free,
3894 +static int lvar_guess(struct iam_container *c)
3897 + struct buffer_head *bh;
3898 + const struct lvar_root *root;
3900 + assert_corr(c->ic_object != NULL);
3902 + result = iam_node_read(c, lvar_root_ptr(c), NULL, &bh);
3903 + if (result == 0) {
3904 + root = (void *)bh->b_data;
3905 + if (le64_to_cpu(root->vr_magic) == IAM_LVAR_ROOT_MAGIC) {
3906 + struct iam_descr *descr;
3908 + descr = c->ic_descr;
3909 + descr->id_key_size = EXT3_NAME_LEN;
3910 + descr->id_ikey_size = sizeof (lvar_hash_t);
3911 + descr->id_rec_size = le16_to_cpu(root->vr_recsize);
3912 + descr->id_ptr_size = le16_to_cpu(root->vr_ptrsize);
3913 + descr->id_root_gap = sizeof *root;
3914 + descr->id_node_gap = 0;
3915 + descr->id_ops = &lvar_ops;
3916 + descr->id_leaf_ops = &lvar_leaf_ops;
3924 +static struct iam_format lvar_format = {
3925 + .if_guess = lvar_guess
3928 +void iam_lvar_format_init(void)
3930 + iam_format_register(&lvar_format);
3933 Index: iam/fs/ext3/namei.c
3934 ===================================================================
3935 --- iam.orig/fs/ext3/namei.c
3936 +++ iam/fs/ext3/namei.c
3938 * Theodore Ts'o, 2002
3942 - * iam: big theory statement.
3944 - * iam (Index Access Module) is a module providing abstraction of persistent
3945 - * transactional container on top of generalized ext3 htree.
3949 - * - key, pointer, and record size specifiable per container.
3951 - * - trees taller than 2 index levels.
3953 - * - read/write to existing ext3 htree directories as iam containers.
3955 - * iam container is a tree, consisting of leaf nodes containing keys and
3956 - * records stored in this container, and index nodes, containing keys and
3957 - * pointers to leaf or index nodes.
3959 - * iam does not work with keys directly, instead it calls user-supplied key
3960 - * comparison function (->dpo_keycmp()).
3962 - * Pointers are (currently) interpreted as logical offsets (measured in
3963 - * blocksful) within underlying flat file on top of which iam tree lives.
3967 - * iam mostly tries to reuse existing htree formats.
3969 - * Format of index node:
3971 - * +-----+-------+-------+-------+------+-------+------------+
3972 - * | | count | | | | | |
3973 - * | gap | / | entry | entry | .... | entry | free space |
3974 - * | | limit | | | | | |
3975 - * +-----+-------+-------+-------+------+-------+------------+
3977 - * gap this part of node is never accessed by iam code. It
3978 - * exists for binary compatibility with ext3 htree (that,
3979 - * in turn, stores fake struct ext2_dirent for ext2
3980 - * compatibility), and to keep some unspecified per-node
3981 - * data. Gap can be different for root and non-root index
3982 - * nodes. Gap size can be specified for each container
3983 - * (gap of 0 is allowed).
3985 - * count/limit current number of entries in this node, and the maximal
3986 - * number of entries that can fit into node. count/limit
3987 - * has the same size as entry, and is itself counted in
3990 - * entry index entry: consists of a key immediately followed by
3991 - * a pointer to a child node. Size of a key and size of a
3992 - * pointer depends on container. Entry has neither
3993 - * alignment nor padding.
3995 - * free space portion of node new entries are added to
3997 - * Entries in index node are sorted by their key value.
3999 - * Format of leaf node:
4001 - * +-----+-------+-------+-------+------+-------+------------+
4002 - * | | count | | | | | |
4003 - * | gap | / | leaf | leaf | .... | leaf | free space |
4004 - * | | limit | | | | | |
4005 - * +-----+-------+-------+-------+------+-------+------------+
4007 - * leaf For leaf entry: consists of a rec immediately followd by
4008 - * a key. size of a key and size of a rec depends on container.
4016 #include <linux/module.h>
4017 #include <linux/fs.h>
4018 #include <linux/pagemap.h>
4019 @@ -112,10 +37,10 @@
4020 #include <linux/quotaops.h>
4021 #include <linux/buffer_head.h>
4022 #include <linux/smp_lock.h>
4023 +#include <linux/lustre_iam.h>
4027 -#include <linux/lustre_iam.h>
4029 * define how far ahead to read directories while searching them.
4032 #define NAMEI_RA_INDEX(c,b) (((c) * NAMEI_RA_BLOCKS) + (b))
4035 -static struct buffer_head *ext3_append(handle_t *handle,
4036 +struct buffer_head *ext3_append(handle_t *handle,
4037 struct inode *inode,
4038 u32 *block, int *err)
4040 @@ -136,14 +61,15 @@ static struct buffer_head *ext3_append(h
4041 if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
4042 inode->i_size += inode->i_sb->s_blocksize;
4043 EXT3_I(inode)->i_disksize = inode->i_size;
4044 - ext3_journal_get_write_access(handle,bh);
4045 + *err = ext3_journal_get_write_access(handle, bh);
4055 -#define assert(test) J_ASSERT(test)
4059 #define swap(x, y) do { typeof(x) z = x; x = y; y = z; } while (0)
4060 @@ -155,293 +81,10 @@ static struct buffer_head *ext3_append(h
4061 #define dxtrace(command)
4064 -struct fake_dirent {
4071 -struct dx_countlimit {
4077 - * dx_root_info is laid out so that if it should somehow get overlaid by a
4078 - * dirent the two low bits of the hash version will be zero. Therefore, the
4079 - * hash version mod 4 should never be 0. Sincerely, the paranoia department.
4083 - struct fake_dirent dot;
4085 - struct fake_dirent dotdot;
4086 - char dotdot_name[4];
4087 - struct dx_root_info
4089 - __le32 reserved_zero;
4091 - u8 info_length; /* 8 */
4092 - u8 indirect_levels;
4096 - struct {} entries[0];
4101 - struct fake_dirent fake;
4102 - struct {} entries[0];
4105 -struct dx_map_entry
4112 -static u32 htree_root_ptr(struct iam_container *c);
4113 -static int htree_node_check(struct iam_path *path, struct iam_frame *frame);
4114 -static int htree_node_init(struct iam_container *c,
4115 - struct buffer_head *bh, int root);
4116 -static int htree_keycmp(struct iam_container *c,
4117 - struct iam_key *k1, struct iam_key *k2);
4118 -static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
4119 - handle_t *h, struct buffer_head **bh);
4122 - * Parameters describing iam compatibility mode in which existing ext3 htrees
4123 - * can be manipulated.
4125 -static struct iam_descr htree_compat_param = {
4126 - .id_key_size = sizeof ((struct dx_map_entry *)NULL)->hash,
4127 - .id_ptr_size = sizeof ((struct dx_map_entry *)NULL)->offs,
4128 - .id_node_gap = offsetof(struct dx_node, entries),
4129 - .id_root_gap = offsetof(struct dx_root, entries),
4131 - .id_root_ptr = htree_root_ptr,
4132 - .id_node_check = htree_node_check,
4133 - .id_node_init = htree_node_init,
4134 - .id_node_read = htree_node_read,
4135 - .id_keycmp = htree_keycmp
4142 -struct iam_container;
4148 - * iam cursor (iterator) api.
4152 - * Flags controlling iterator functionality.
4154 -enum iam_it_flags {
4156 - * this iterator will move (iam_it_{prev,next}() will be called on it)
4158 - IAM_IT_MOVE = (1 << 0),
4160 - * tree can be updated through this iterator.
4162 - IAM_IT_WRITE = (1 << 1)
4166 - * States of iterator state machine.
4168 -enum iam_it_state {
4169 - /* initial state */
4171 - /* iterator is above particular record in the container */
4175 -struct htree_cookie {
4176 - struct dx_hash_info *hinfo;
4177 - struct dentry *dentry;
4183 - * Immediately after call to iam_it_init() iterator is in "detached"
4184 - * (IAM_IT_DETACHED) state: it is associated with given parent container, but
4185 - * doesn't point to any particular record in this container.
4187 - * After successful call to iam_it_get() and until corresponding call to
4188 - * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
4190 - * Attached iterator can move through records in a container (provided
4191 - * IAM_IT_MOVE permission) in a key order, can get record and key values as it
4192 - * passes over them, and can modify container (provided IAM_IT_WRITE
4195 - * Concurrency: iterators are supposed to be local to thread. Interfaces below
4196 - * do no internal serialization.
4199 -struct iam_iterator {
4201 - * iterator flags, taken from enum iam_it_flags.
4204 - enum iam_it_state ii_state;
4206 - * path to the record. Valid in IAM_IT_ATTACHED state.
4208 - struct iam_path ii_path;
4211 -static inline struct iam_key *keycpy(struct iam_container *c,
4212 - struct iam_key *k1, struct iam_key *k2)
4214 - return memcpy(k1, k2, c->ic_descr->id_key_size);
4217 -static inline int keycmp(struct iam_container *c,
4218 - struct iam_key *k1, struct iam_key *k2)
4220 - return c->ic_descr->id_keycmp(c, k1, k2);
4223 -static struct iam_container *iam_it_container(struct iam_iterator *it)
4225 - return it->ii_path.ip_container;
4228 -static inline int it_keycmp(struct iam_iterator *it,
4229 - struct iam_key *k1, struct iam_key *k2)
4231 - return keycmp(iam_it_container(it), k1, k2);
4235 - * Initialize iterator to IAM_IT_DETACHED state.
4237 - * postcondition: it_state(it) == IAM_IT_DETACHED
4239 -int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags);
4241 - * Finalize iterator and release all resources.
4243 - * precondition: it_state(it) == IAM_IT_DETACHED
4245 -void iam_it_fini(struct iam_iterator *it);
4248 - * Attach iterator. After successful completion, @it points to record with the
4249 - * largest key not larger than @k. Semantics of ->id_create() method guarantee
4250 - * that such record will always be found.
4252 - * Return value: 0: positioned on existing record,
4255 - * precondition: it_state(it) == IAM_IT_DETACHED
4256 - * postcondition: ergo(result == 0,
4257 - * (it_state(it) == IAM_IT_ATTACHED &&
4258 - * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
4260 -int iam_it_get(struct iam_iterator *it, struct iam_key *k);
4263 - * Duplicates iterator.
4265 - * postcondition: it_state(dst) == it_state(src) &&
4266 - * iam_it_container(dst) == iam_it_container(src) &&
4267 - * dst->ii_flags = src->ii_flags &&
4268 - * ergo(it_state(it) == IAM_IT_ATTACHED,
4269 - * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
4270 - * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
4272 -void iam_it_dup(struct iam_iterator *dst, struct iam_iterator *src);
4275 - * Detach iterator. Does nothing it detached state.
4277 - * postcondition: it_state(it) == IAM_IT_DETACHED
4279 -void iam_it_put(struct iam_iterator *it);
4282 - * Move iterator one record right.
4284 - * Return value: 0: success,
4285 - * +1: end of container reached
4288 - * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
4289 - * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
4291 -int iam_it_next(struct iam_iterator *it);
4294 - * Return pointer to the record under iterator.
4296 - * precondition: it_state(it) == IAM_IT_ATTACHED
4297 - * postcondition: it_state(it) == IAM_IT_ATTACHED
4299 -const struct iam_rec *iam_it_rec_get(struct iam_iterator *it);
4302 - * Replace contents of record under iterator.
4304 - * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
4305 - * postcondition: it_state(it) == IAM_IT_ATTACHED &&
4306 - * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
4308 -int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
4311 - * Place key under iterator in @k, return @k
4313 - * precondition: it_state(it) == IAM_IT_ATTACHED
4314 - * postcondition: it_state(it) == IAM_IT_ATTACHED
4316 -const struct iam_key *iam_it_key_get(struct iam_iterator *it,
4317 - struct iam_key *k);
4320 - * Insert new record with key @k and contents from @r, shifting records to the
4323 - * precondition: it_state(it) == IAM_IT_ATTACHED &&
4324 - * it->ii_flags&IAM_IT_WRITE &&
4325 - * it_keycmp(it, iam_it_key_get(it, *), k) < 0
4326 - * postcondition: it_state(it) == IAM_IT_ATTACHED &&
4327 - * ergo(result == 0,
4328 - * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
4329 - * !memcmp(iam_it_rec_get(it), r, ...))
4331 -int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
4332 - struct iam_key *k, struct iam_rec *r);
4334 - * Delete record under iterator.
4336 - * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
4337 - * postcondition: it_state(it) == IAM_IT_ATTACHED
4339 -int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
4341 #ifdef CONFIG_EXT3_INDEX
4342 static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry);
4343 static void dx_set_block(struct iam_path *p,
4344 struct iam_entry *entry, unsigned value);
4345 -static inline struct iam_key *dx_get_key(struct iam_path *p,
4346 - struct iam_entry *entry,
4347 - struct iam_key *key);
4348 -static void dx_set_key(struct iam_path *p, struct iam_entry *entry,
4349 - struct iam_key *key);
4350 -static unsigned dx_get_count(struct iam_entry *entries);
4351 static unsigned dx_get_limit(struct iam_entry *entries);
4352 static void dx_set_count(struct iam_entry *entries, unsigned value);
4353 static void dx_set_limit(struct iam_entry *entries, unsigned value);
4354 @@ -457,264 +100,62 @@ static void dx_sort_map(struct dx_map_en
4355 static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
4356 struct dx_map_entry *offsets, int count);
4357 static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
4358 -static void dx_insert_block (struct iam_path *path,
4359 - struct iam_frame *frame, u32 hash, u32 block);
4360 -static int ext3_htree_next_block(struct inode *dir, __u32 hash,
4361 - struct iam_path *path, __u32 *start_hash);
4362 static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
4363 struct ext3_dir_entry_2 **res_dir, int *err);
4364 static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
4365 struct inode *inode);
4367 -static inline void iam_path_init(struct iam_path *path,
4368 - struct iam_container *c, struct htree_cookie *hc);
4369 -static inline void iam_path_fini(struct iam_path *path);
4373 - * Future: use high four bits of block for coalesce-on-delete flags
4374 - * Mask them off for now.
4377 -static inline void *entry_off(struct iam_entry *entry, ptrdiff_t off)
4379 - return (void *)((char *)entry + off);
4382 -static inline struct iam_descr *path_descr(struct iam_path *p)
4384 - return p->ip_container->ic_descr;
4387 -static inline struct inode *path_obj(struct iam_path *p)
4389 - return p->ip_container->ic_object;
4392 -static inline size_t iam_entry_size(struct iam_path *p)
4394 - return path_descr(p)->id_key_size + path_descr(p)->id_ptr_size;
4397 -static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
4398 - struct iam_entry *entry, int shift)
4401 - return e + shift * iam_entry_size(p);
4404 -static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
4405 - struct iam_entry *e1, struct iam_entry *e2)
4409 - diff = (void *)e1 - (void *)e2;
4410 - assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
4411 - return diff / iam_entry_size(p);
4414 -static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
4416 - return le32_to_cpu(*(u32 *)entry_off(entry, path_descr(p)->id_key_size))
4420 -static inline void dx_set_block(struct iam_path *p,
4421 - struct iam_entry *entry, unsigned value)
4423 - *(u32*)entry_off(entry,
4424 - path_descr(p)->id_key_size) = cpu_to_le32(value);
4427 -static inline struct iam_key *dx_get_key(struct iam_path *p,
4428 - struct iam_entry *entry,
4429 - struct iam_key *key)
4431 - memcpy(key, entry, path_descr(p)->id_key_size);
4435 -static inline struct iam_key *iam_key_at(struct iam_path *p,
4436 - struct iam_entry *entry)
4438 - return (struct iam_key *)entry;
4441 -static inline void dx_set_key(struct iam_path *p,
4442 - struct iam_entry *entry, struct iam_key *key)
4444 - memcpy(entry, key, path_descr(p)->id_key_size);
4447 -static inline unsigned dx_get_count (struct iam_entry *entries)
4449 - return le16_to_cpu(((struct dx_countlimit *) entries)->count);
4452 -static inline unsigned dx_get_limit (struct iam_entry *entries)
4454 - return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
4457 -static inline void dx_set_count (struct iam_entry *entries, unsigned value)
4459 - ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
4462 -static inline void dx_set_limit (struct iam_entry *entries, unsigned value)
4463 +static inline void dx_set_limit(struct iam_entry *entries, unsigned value)
4465 ((struct dx_countlimit *) entries)->limit = cpu_to_le16(value);
4468 -static inline unsigned dx_root_limit(struct iam_path *p)
4470 - struct iam_descr *param = path_descr(p);
4471 - unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
4472 - param->id_root_gap;
4473 - return entry_space / (param->id_key_size + param->id_ptr_size);
4476 -static inline unsigned dx_node_limit(struct iam_path *p)
4478 - struct iam_descr *param = path_descr(p);
4479 - unsigned entry_space = path_obj(p)->i_sb->s_blocksize -
4480 - param->id_node_gap;
4481 - return entry_space / (param->id_key_size + param->id_ptr_size);
4484 -static inline int dx_index_is_compat(struct iam_path *path)
4486 - return path_descr(path) == &htree_compat_param;
4489 -static struct iam_entry *dx_get_entries(struct iam_path *path, void *data,
4491 +int dx_index_is_compat(struct iam_path *path)
4495 - path_descr(path)->id_root_gap : path_descr(path)->id_node_gap);
4496 + return iam_path_descr(path) == &iam_htree_compat_param;
4499 -static struct iam_entry *dx_node_get_entries(struct iam_path *path,
4500 - struct iam_frame *frame)
4502 - return dx_get_entries(path,
4503 - frame->bh->b_data, frame == path->ip_frames);
4506 -static int dx_node_check(struct iam_path *p, struct iam_frame *f)
4507 +int dx_node_check(struct iam_path *p, struct iam_frame *f)
4509 struct iam_entry *e;
4510 struct iam_container *c;
4515 + struct inode *inode;
4517 c = p->ip_container;
4518 e = dx_node_get_entries(p, f);
4519 count = dx_get_count(e);
4520 e = iam_entry_shift(p, e, 1);
4521 + root = iam_path_descr(p)->id_ops->id_root_ptr(c);
4523 + inode = iam_path_obj(p);
4524 for (i = 0; i < count - 1; ++i, e = iam_entry_shift(p, e, 1)) {
4525 - keycpy(c, p->ip_key_scratch[0], p->ip_key_scratch[1]);
4526 - dx_get_key(p, e, p->ip_key_scratch[1]);
4527 + iam_ikeycpy(c, iam_path_ikey(p, 0), iam_path_ikey(p, 1));
4528 + iam_get_ikey(p, e, iam_path_ikey(p, 1));
4530 - keycmp(c, p->ip_key_scratch[0], p->ip_key_scratch[1]) > 0)
4531 + iam_ikeycmp(c, iam_path_ikey(p, 0),
4532 + iam_path_ikey(p, 1)) > 0) {
4539 -static u32 htree_root_ptr(struct iam_container *c)
4541 + blk = dx_get_block(p, e);
4542 + if (inode->i_size < (blk + 1) * inode->i_sb->s_blocksize) {
4547 -static int htree_node_check(struct iam_path *path, struct iam_frame *frame)
4550 - struct iam_entry *entries;
4551 - struct super_block *sb;
4553 - data = frame->bh->b_data;
4554 - entries = dx_node_get_entries(path, frame);
4555 - sb = path_obj(path)->i_sb;
4556 - if (frame == path->ip_frames) {
4558 - struct dx_root *root;
4559 - struct htree_cookie *hc = path->ip_descr_data;
4562 - if (root->info.hash_version > DX_HASH_MAX) {
4563 - ext3_warning(sb, __FUNCTION__,
4564 - "Unrecognised inode hash code %d",
4565 - root->info.hash_version);
4566 - return ERR_BAD_DX_DIR;
4569 - if (root->info.unused_flags & 1) {
4570 - ext3_warning(sb, __FUNCTION__,
4571 - "Unimplemented inode hash flags: %#06x",
4572 - root->info.unused_flags);
4573 - return ERR_BAD_DX_DIR;
4576 - path->ip_indirect = root->info.indirect_levels;
4577 - if (path->ip_indirect > DX_MAX_TREE_HEIGHT - 1) {
4578 - ext3_warning(sb, __FUNCTION__,
4579 - "Unimplemented inode hash depth: %#06x",
4580 - root->info.indirect_levels);
4581 - return ERR_BAD_DX_DIR;
4583 + * By definition of a tree, no node points to the root.
4585 + if (blk == root) {
4590 - assert((char *)entries == (((char *)&root->info) +
4591 - root->info.info_length));
4592 - assert(dx_get_limit(entries) == dx_root_limit(path));
4594 - hc->hinfo->hash_version = root->info.hash_version;
4595 - hc->hinfo->seed = EXT3_SB(sb)->s_hash_seed;
4597 - ext3fs_dirhash(hc->dentry->d_name.name,
4598 - hc->dentry->d_name.len, hc->hinfo);
4599 - path->ip_key_target = (struct iam_key *)&hc->hinfo->hash;
4601 - /* non-root index */
4602 - assert(entries == data + path_descr(path)->id_node_gap);
4603 - assert(dx_get_limit(entries) == dx_node_limit(path));
4605 - frame->entries = frame->at = entries;
4609 -static int htree_node_init(struct iam_container *c,
4610 - struct buffer_head *bh, int root)
4612 - struct dx_node *node;
4616 - node = (void *)bh->b_data;
4617 - node->fake.rec_len = cpu_to_le16(c->ic_object->i_sb->s_blocksize);
4618 - node->fake.inode = 0;
4622 -static int htree_node_read(struct iam_container *c, iam_ptr_t ptr,
4623 - handle_t *handle, struct buffer_head **bh)
4627 - *bh = ext3_bread(handle, c->ic_object, (int)ptr, 0, &result);
4633 -static int htree_keycmp(struct iam_container *c,
4634 - struct iam_key *k1, struct iam_key *k2)
4636 - __u32 p1 = le32_to_cpu(*(__u32 *)k1);
4637 - __u32 p2 = le32_to_cpu(*(__u32 *)k2);
4639 - return p1 > p2 ? +1 : (p1 < p2 ? -1 : 0);
4644 @@ -800,598 +241,121 @@ struct stats dx_show_entries(struct dx_h
4646 #endif /* DX_DEBUG */
4648 -static int dx_lookup(struct iam_path *path)
4649 +int dx_lookup(struct iam_path *path)
4656 struct iam_descr *param;
4657 struct iam_frame *frame;
4658 struct iam_container *c;
4660 - param = path_descr(path);
4661 + param = iam_path_descr(path);
4662 c = path->ip_container;
4664 + delta = dx_index_is_compat(path) ? 1 : 2;
4666 for (frame = path->ip_frames, i = 0,
4667 - ptr = param->id_root_ptr(path->ip_container);
4668 + ptr = param->id_ops->id_root_ptr(c);
4669 i <= path->ip_indirect;
4670 ptr = dx_get_block(path, frame->at), ++frame, ++i) {
4671 struct iam_entry *entries;
4672 - struct iam_entry *p;
4673 - struct iam_entry *q;
4674 - struct iam_entry *m;
4677 - err = param->id_node_read(c, (iam_ptr_t)ptr, NULL, &frame->bh);
4680 - err = param->id_node_check(path, frame);
4684 - assert(dx_node_check(path, frame));
4686 - entries = frame->entries;
4687 - count = dx_get_count(entries);
4688 - assert(count && count <= dx_get_limit(entries));
4689 - p = iam_entry_shift(path, entries, 1);
4690 - q = iam_entry_shift(path, entries, count - 1);
4692 - m = iam_entry_shift(path,
4693 - p, iam_entry_diff(path, q, p) / 2);
4694 - dxtrace(printk("."));
4695 - if (keycmp(c, iam_key_at(path, m),
4696 - path->ip_key_target) > 0)
4697 - q = iam_entry_shift(path, m, -1);
4699 - p = iam_entry_shift(path, m, +1);
4702 - frame->at = iam_entry_shift(path, p, -1);
4703 - if (1) { // linear search cross check
4704 - unsigned n = count - 1;
4705 - struct iam_entry *at;
4709 - dxtrace(printk(","));
4710 - at = iam_entry_shift(path, at, +1);
4711 - if (keycmp(c, iam_key_at(path, at),
4712 - path->ip_key_target) > 0) {
4713 - if (at != iam_entry_shift(path, frame->at, 1)) {
4715 - printk(KERN_EMERG "%i\n",
4716 - keycmp(c, iam_key_at(path, at),
4717 - path->ip_key_target));
4719 - at = iam_entry_shift(path, at, -1);
4723 - assert(at == frame->at);
4727 - iam_path_fini(path);
4728 - path->ip_frame = --frame;
4733 - * Probe for a directory leaf block to search.
4735 - * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
4736 - * error in the directory index, and the caller should fall back to
4737 - * searching the directory normally. The callers of dx_probe **MUST**
4738 - * check for this error code, and make sure it never gets reflected
4739 - * back to userspace.
4741 -static int dx_probe(struct dentry *dentry, struct inode *dir,
4742 - struct dx_hash_info *hinfo, struct iam_path *path)
4745 - struct htree_cookie hc = {
4750 - assert(dx_index_is_compat(path));
4751 - path->ip_descr_data = &hc;
4752 - err = dx_lookup(path);
4753 - assert(err != 0 || path->ip_frames[path->ip_indirect].bh != NULL);
4758 - * Initialize container @c, acquires additional reference on @inode.
4760 -int iam_container_init(struct iam_container *c,
4761 - struct iam_descr *descr, struct inode *inode)
4763 - memset(c, 0, sizeof *c);
4764 - c->ic_descr = descr;
4765 - c->ic_object = igrab(inode);
4766 - if (c->ic_object != NULL)
4773 - * Finalize container @c, release all resources.
4775 -void iam_container_fini(struct iam_container *c)
4777 - if (c->ic_object != NULL) {
4778 - iput(c->ic_object);
4779 - c->ic_object = NULL;
4783 -static inline void iam_path_init(struct iam_path *path, struct iam_container *c,
4784 - struct htree_cookie *hc)
4786 - memset(path, 0, sizeof *path);
4787 - path->ip_container = c;
4788 - path->ip_frame = path->ip_frames;
4789 - path->ip_descr_data = hc;
4792 -static inline void iam_path_fini(struct iam_path *path)
4796 - for (i = 0; i < ARRAY_SIZE(path->ip_frames); i++) {
4797 - if (path->ip_frames[i].bh != NULL) {
4798 - brelse(path->ip_frames[i].bh);
4799 - path->ip_frames[i].bh = NULL;
4804 -static void iam_path_compat_init(struct iam_path_compat *path,
4805 - struct inode *inode)
4809 - iam_container_init(&path->ipc_container, &htree_compat_param, inode);
4811 - * XXX hack allowing finalization of iam_path_compat with
4812 - * iam_path_fini().
4815 - iam_path_init(&path->ipc_path, &path->ipc_container, NULL);
4816 - for (i = 0; i < ARRAY_SIZE(path->ipc_path.ip_key_scratch); ++i)
4817 - path->ipc_path.ip_key_scratch[i] =
4818 - (struct iam_key *)&path->ipc_scrach[i];
4821 -static void iam_path_compat_fini(struct iam_path_compat *path)
4823 - iam_path_fini(&path->ipc_path);
4824 - iam_container_fini(&path->ipc_container);
4827 -static int iam_leaf_init(struct iam_path *path, struct iam_leaf *leaf)
4830 - struct buffer_head *bh;
4832 - block = dx_get_block(path, path->ip_frame->at);
4833 - err = path_descr(path)->id_node_read(path->ip_container, block,
4839 - leaf->entries = (struct iam_leaf_entry *)bh->b_data;
4843 -static void iam_leaf_fini(struct iam_leaf *leaf)
4850 - * Search container @c for record with key @k. If record is found, its data
4851 - * are moved into @r.
4855 - * Return values: +ve: found, 0: not-found, -ve: error
4858 -int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r)
4860 - struct dx_hash_info hinfo;
4861 - struct iam_path_compat cpath;
4862 - struct iam_path *path = &cpath.ipc_path;
4863 - struct htree_cookie hc = {
4868 - iam_path_init(path, c, &hc);
4869 - for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
4870 - path->ip_key_scratch[i] =
4871 - (struct iam_key *)&cpath.ipc_scrach[i];
4872 - err = dx_lookup(path);
4874 - struct iam_leaf leaf;
4875 - err = iam_leaf_init(path, &leaf);
4879 - for (path_descr(path)->id_leaf.start(c, &leaf);
4880 - !path_descr(path)->id_leaf.at_end(c, &leaf);
4881 - path_descr(path)->id_leaf.next(c, &leaf)) {
4882 - struct iam_key *key;
4884 - key = kmalloc(path_descr(path)->id_key_size, GFP_KERNEL);
4885 - path_descr(path)->id_leaf.key(c, &leaf, key);
4886 - if (keycmp(c, k, key) == 0) {
4887 - memcpy(r, path_descr(path)->id_leaf.rec(c, &leaf),
4888 - path_descr(path)->id_rec_size);
4889 - iam_path_fini(path);
4890 - iam_leaf_fini(&leaf);
4895 - iam_leaf_fini(&leaf);
4896 - /* Check to see if we should continue to search */
4897 - err = ext3_htree_next_block(c->ic_object, hinfo.hash, path, NULL);
4900 - } while (err == 1);
4902 - iam_path_fini(path);
4905 -EXPORT_SYMBOL(iam_lookup);
4907 -static inline size_t iam_leaf_entry_size(struct iam_path *p)
4909 - return path_descr(p)->id_rec_size + path_descr(p)->id_key_size;
4912 -static inline ptrdiff_t iam_leaf_entry_diff(struct iam_path *p,
4913 - struct iam_leaf_entry *e1, struct iam_leaf_entry *e2)
4917 - diff = (void *)e1 - (void *)e2;
4918 - assert(diff / iam_leaf_entry_size(p) * iam_leaf_entry_size(p) == diff);
4919 - return diff / iam_leaf_entry_size(p);
4922 -static inline struct iam_leaf_entry*
4923 -iam_leaf_entry_shift(struct iam_path *p, struct iam_leaf_entry *entry, int shift)
4926 - return e + shift * iam_leaf_entry_size(p);
4929 -static inline struct iam_key *
4930 -dx_leaf_get_key(struct iam_path *p, struct iam_leaf_entry *e, struct iam_key *key)
4932 - memcpy(key, e, path_descr(p)->id_key_size);
4936 -static inline struct iam_key *
4937 -iam_leaf_key_at(struct iam_path *p, struct iam_leaf_entry *entry)
4940 - return e + path_descr(p)->id_rec_size;
4942 -static inline struct iam_leaf_entry *
4943 -iam_leaf_entry_at(struct iam_path *p, struct iam_leaf_entry *entry)
4948 -static int iam_leaf_lookup(struct iam_path *path, struct iam_leaf *leaf,
4949 - struct iam_key *k)
4951 - struct iam_leaf_entry *p, *q, *m;
4952 - struct iam_leaf_entry *entries = leaf->entries;
4953 - int count = dx_get_count((struct iam_entry *)entries);
4955 - p = iam_leaf_entry_shift(path, entries, 1);
4956 - q = iam_leaf_entry_shift(path, entries, count - 1);
4958 - m = iam_leaf_entry_shift(path,
4959 - p, iam_leaf_entry_diff(path, q, p) / 2);
4960 - dxtrace(printk("."));
4961 - if (keycmp(path->ip_container, iam_leaf_key_at(path, m),
4962 - path->ip_key_target) > 0)
4963 - q = iam_leaf_entry_shift(path, m, -1);
4965 - p = iam_leaf_entry_shift(path, m, +1);
4971 -/*XXX what kind of lock should this entry be locked: WangDi */
4972 -static int iam_leaf_insert(handle_t *handle, struct iam_path *path,
4973 - struct iam_key *k, struct iam_rec *r)
4975 - struct iam_leaf leaf;
4976 - struct iam_leaf_entry *p, *q;
4979 - err = iam_leaf_init(path, &leaf);
4982 - path_descr(path)->id_leaf.start(path->ip_container, &leaf);
4983 - count = dx_get_count((struct iam_entry *)leaf.entries);
4984 - if (dx_get_count((struct iam_entry *)leaf.entries) >=
4985 - dx_get_limit((struct iam_entry *)leaf.entries)){
4990 - err = iam_leaf_lookup(path, &leaf, k);
4994 - /*insert the k/r to leaf entries*/
4995 - p = iam_leaf_entry_shift(path, leaf.at, 1);
4996 - q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
4998 - memcpy(iam_leaf_entry_shift(path, q, 1), q, iam_leaf_entry_size(path));
4999 - q = iam_leaf_entry_shift(path, q, -1);
5001 - memcpy(iam_leaf_entry_at(path, p), r, path_descr(path)->id_rec_size);
5002 - memcpy(iam_leaf_key_at(path, p), k, path_descr(path)->id_key_size);
5004 - dx_set_count((struct iam_entry*)leaf.entries, count + 1);
5005 - err = ext3_journal_dirty_metadata(handle, leaf.bh);
5007 - ext3_std_error(path->ip_container->ic_object->i_sb, err);
5009 - iam_leaf_fini(&leaf);
5013 -static int split_leaf_node(handle_t *handle, struct iam_path *path)
5015 - struct inode *dir = path_obj(path);
5016 - unsigned continued = 0;
5017 - struct buffer_head *bh2;
5018 - u32 newblock, hash_split;
5020 - struct iam_leaf leaf;
5024 - bh2 = ext3_append (handle, dir, &newblock, &err);
5029 - err = iam_leaf_init(path, &leaf);
5033 - BUFFER_TRACE(leaf.bh, "get_write_access");
5034 - err = ext3_journal_get_write_access(handle, leaf.bh);
5037 - iam_leaf_fini(&leaf);
5039 - ext3_std_error(dir->i_sb, err);
5043 - data2 = bh2->b_data;
5044 - split = dx_get_count((struct iam_entry*)leaf.entries)/2;
5045 - hash_split = *(__u32*)iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split));
5046 - if (keycmp(path->ip_container, iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split)),
5047 - iam_leaf_key_at(path, iam_leaf_entry_shift(path, leaf.entries, split -1))) == 0)
5050 - memcpy(iam_leaf_entry_shift(path, (struct iam_leaf_entry *)data2, 1),
5051 - iam_leaf_entry_shift(path, leaf.entries, split),
5052 - split * iam_leaf_entry_size(path));
5054 - /* Which block gets the new entry? */
5055 - dx_insert_block(path, path->ip_frame, hash_split + continued, newblock);
5056 - err = ext3_journal_dirty_metadata (handle, bh2);
5058 - goto journal_error;
5059 - err = ext3_journal_dirty_metadata (handle, leaf.bh);
5061 - goto journal_error;
5063 - iam_leaf_fini(&leaf);
5068 -static int split_index_node(handle_t *handle, struct iam_path *path);
5070 - * Insert new record @r with key @k into container @c (within context of
5073 - * Return values: 0: success, -ve: error, including -EEXIST when record with
5074 - * given key is already present.
5076 - * postcondition: ergo(result == 0 || result == -EEXIST,
5077 - * iam_lookup(c, k, r2) > 0 &&
5078 - * !memcmp(r, r2, c->ic_descr->id_rec_size));
5080 -int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k,
5081 - struct iam_rec *r)
5083 - struct dx_hash_info hinfo;
5084 - struct iam_path_compat cpath;
5085 - struct iam_path *path = &cpath.ipc_path;
5086 - struct htree_cookie hc = {
5091 - iam_path_init(path, c, &hc);
5092 - for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
5093 - path->ip_key_scratch[i] =
5094 - (struct iam_key *)&cpath.ipc_scrach[i];
5095 - err = dx_lookup(path);
5099 - err = iam_leaf_insert(handle, path, k, r);
5101 - if (err != -ENOSPC)
5104 - err = split_index_node(handle, path);
5108 - err = split_leaf_node(handle, path);
5112 - err = iam_leaf_insert(handle, path, k, r);
5114 - iam_path_fini(path);
5118 -EXPORT_SYMBOL(iam_insert);
5119 -static int iam_leaf_delete(handle_t *handle, struct iam_path *path,
5120 - struct iam_key *k)
5122 - struct iam_leaf leaf;
5123 - struct iam_leaf_entry *p, *q;
5126 - err = iam_leaf_init(path, &leaf);
5130 - err = iam_leaf_lookup(path, &leaf, k);
5134 - count = dx_get_count((struct iam_entry*)leaf.entries);
5135 - /*delete the k to leaf entries*/
5136 - p = iam_leaf_entry_shift(path, leaf.at, 1);
5137 - q = iam_leaf_entry_shift(path, leaf.entries, count - 1);
5139 - memcpy(p, iam_leaf_entry_shift(path, p, 1), iam_leaf_entry_size(path));
5140 - p = iam_leaf_entry_shift(path, p, 1);
5142 - dx_set_count((struct iam_entry*)leaf.entries, count - 1);
5144 - err = ext3_journal_dirty_metadata(handle, leaf.bh);
5146 - ext3_std_error(path_obj(path)->i_sb, err);
5148 - iam_leaf_fini(&leaf);
5153 - * Delete existing record with key @k.
5155 - * Return values: 0: success, -ENOENT: not-found, -ve: other error.
5157 - * postcondition: ergo(result == 0 || result == -ENOENT,
5158 - * !iam_lookup(c, k, *));
5160 -int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k)
5162 - struct dx_hash_info hinfo;
5163 - struct iam_path_compat cpath;
5164 - struct iam_path *path = &cpath.ipc_path;
5165 - struct htree_cookie hc = {
5170 - iam_path_init(path, c, &hc);
5171 - for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
5172 - path->ip_key_scratch[i] =
5173 - (struct iam_key *)&cpath.ipc_scrach[i];
5174 - err = dx_lookup(path);
5177 + struct iam_entry *p;
5178 + struct iam_entry *q;
5179 + struct iam_entry *m;
5182 - err = iam_leaf_delete(h, path, k);
5184 - iam_path_fini(path);
5187 + err = param->id_ops->id_node_read(c, (iam_ptr_t)ptr, NULL,
5192 -EXPORT_SYMBOL(iam_delete);
5193 + if (EXT3_INVARIANT_ON) {
5194 + err = param->id_ops->id_node_check(path, frame);
5199 -static int iam_leaf_update(handle_t *handle, struct iam_path *path,
5200 - struct iam_key *k, struct iam_rec *r)
5202 - struct iam_leaf leaf;
5204 + err = param->id_ops->id_node_load(path, frame);
5208 - err = iam_leaf_init(path, &leaf);
5211 + assert_inv(dx_node_check(path, frame));
5213 - err = iam_leaf_lookup(path, &leaf, k);
5216 + entries = frame->entries;
5217 + count = dx_get_count(entries);
5218 + assert_corr(count && count <= dx_get_limit(entries));
5219 + p = iam_entry_shift(path, entries, delta);
5220 + q = iam_entry_shift(path, entries, count - 1);
5222 + m = iam_entry_shift(path,
5223 + p, iam_entry_diff(path, q, p) / 2);
5224 + dxtrace(printk("."));
5225 + if (iam_ikeycmp(c, iam_ikey_at(path, m),
5226 + path->ip_ikey_target) > 0)
5227 + q = iam_entry_shift(path, m, -1);
5229 + p = iam_entry_shift(path, m, +1);
5232 - memcpy(iam_leaf_entry_at(path, leaf.at), r, path_descr(path)->id_rec_size);
5233 - memcpy(iam_leaf_key_at(path, leaf.at), k, path_descr(path)->id_key_size);
5234 + frame->at = iam_entry_shift(path, p, -1);
5235 + if (EXT3_INVARIANT_ON) { // linear search cross check
5236 + unsigned n = count - 1;
5237 + struct iam_entry *at;
5239 - err = ext3_journal_dirty_metadata(handle, leaf.bh);
5241 - ext3_std_error(path_obj(path)->i_sb, err);
5243 - iam_leaf_fini(&leaf);
5246 + dxtrace(printk(","));
5247 + at = iam_entry_shift(path, at, +1);
5248 + if (iam_ikeycmp(c, iam_ikey_at(path, at),
5249 + path->ip_ikey_target) > 0) {
5250 + if (at != iam_entry_shift(path, frame->at, 1)) {
5252 + printk(KERN_EMERG "%i\n",
5253 + iam_ikeycmp(c, iam_ikey_at(path, at),
5254 + path->ip_ikey_target));
5256 + at = iam_entry_shift(path, at, -1);
5260 + assert_corr(at == frame->at);
5264 + iam_path_fini(path);
5265 + path->ip_frame = --frame;
5270 - * Replace existing record with key @k, or insert new one. New record data are
5273 - * Return values: 0: success, -ve: error.
5274 + * Probe for a directory leaf block to search.
5276 - * postcondition: ergo(result == 0, iam_lookup(c, k, r2) > 0 &&
5277 - * !memcmp(r, r2, c->ic_descr->id_rec_size));
5278 + * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
5279 + * error in the directory index, and the caller should fall back to
5280 + * searching the directory normally. The callers of dx_probe **MUST**
5281 + * check for this error code, and make sure it never gets reflected
5282 + * back to userspace.
5284 -int iam_update(handle_t *h, struct iam_container *c,
5285 - struct iam_key *k, struct iam_rec *r)
5286 +static int dx_probe(struct dentry *dentry, struct inode *dir,
5287 + struct dx_hash_info *hinfo, struct iam_path *path)
5289 - struct dx_hash_info hinfo;
5290 - struct iam_path_compat cpath;
5291 - struct iam_path *path = &cpath.ipc_path;
5292 - struct htree_cookie hc = {
5297 + struct iam_path_compat *ipc;
5299 - iam_path_init(path, c, &hc);
5300 - for (i = 0; i < ARRAY_SIZE(path->ip_key_scratch); ++i)
5301 - path->ip_key_scratch[i] =
5302 - (struct iam_key *)&cpath.ipc_scrach[i];
5303 - err = dx_lookup(path);
5306 + assert_corr(path->ip_data != NULL);
5307 + ipc = container_of(path->ip_data, struct iam_path_compat, ipc_descr);
5308 + ipc->ipc_dentry = dentry;
5309 + ipc->ipc_hinfo = hinfo;
5311 - err = iam_leaf_update(h, path, k, r);
5313 - iam_path_fini(path);
5314 + assert_corr(dx_index_is_compat(path));
5315 + err = dx_lookup(path);
5316 + assert_corr(err != 0 || path->ip_frames[path->ip_indirect].bh != NULL);
5320 -EXPORT_SYMBOL(iam_update);
5323 * This function increments the frame pointer to search the next leaf
5324 * block, and reads in the necessary intervening nodes if the search
5325 @@ -1409,16 +373,15 @@ EXPORT_SYMBOL(iam_update);
5326 * If start_hash is non-null, it will be filled in with the starting
5327 * hash of the next page.
5329 -static int ext3_htree_next_block(struct inode *dir, __u32 hash,
5330 - struct iam_path *path, __u32 *start_hash)
5331 +static int ext3_htree_advance(struct inode *dir, __u32 hash,
5332 + struct iam_path *path, __u32 *start_hash,
5335 struct iam_frame *p;
5336 struct buffer_head *bh;
5337 int err, num_frames = 0;
5340 - assert(dx_index_is_compat(path));
5344 * Find the next leaf page by incrementing the frame pointer.
5345 @@ -1438,6 +401,10 @@ static int ext3_htree_next_block(struct
5351 + * Htree hash magic.
5354 * If the hash is 1, then continue only if the next page has a
5355 * continuation hash of any value. This is used for readdir
5356 @@ -1445,19 +412,21 @@ static int ext3_htree_next_block(struct
5357 * desired contiuation hash. If it doesn't, return since
5358 * there's no point to read in the successive index pages.
5360 - dx_get_key(path, p->at, (struct iam_key *)&bhash);
5361 + iam_get_ikey(path, p->at, (struct iam_ikey *)&bhash);
5363 *start_hash = bhash;
5364 if ((hash & 1) == 0) {
5365 if ((bhash & ~1) != hash)
5370 * If the hash is HASH_NB_ALWAYS, we always go to the next
5371 * block so no check is necessary
5373 while (num_frames--) {
5374 - err = path_descr(path)->id_node_read(path->ip_container,
5375 + err = iam_path_descr(path)->id_ops->
5376 + id_node_read(path->ip_container,
5377 (iam_ptr_t)dx_get_block(path, p->at),
5380 @@ -1465,12 +434,23 @@ static int ext3_htree_next_block(struct
5384 - p->at = p->entries = dx_node_get_entries(path, p);
5385 - assert(dx_node_check(path, p));
5386 + p->entries = dx_node_get_entries(path, p);
5387 + p->at = iam_entry_shift(path, p->entries, !compat);
5388 + assert_inv(dx_node_check(path, p));
5393 +int iam_index_next(struct iam_container *c, struct iam_path *path)
5395 + return ext3_htree_advance(c->ic_object, 0, path, NULL, 0);
5398 +int ext3_htree_next_block(struct inode *dir, __u32 hash,
5399 + struct iam_path *path, __u32 *start_hash)
5401 + return ext3_htree_advance(dir, hash, path, start_hash, 1);
5405 * p is at least 6 bytes before the end of page
5406 @@ -1662,21 +642,30 @@ static void dx_sort_map (struct dx_map_e
5410 -static void dx_insert_block(struct iam_path *path,
5411 - struct iam_frame *frame, u32 hash, u32 block)
5412 +void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
5413 + const struct iam_ikey *key, iam_ptr_t ptr)
5415 struct iam_entry *entries = frame->entries;
5416 - struct iam_entry *old = frame->at, *new = iam_entry_shift(path, old, +1);
5417 + struct iam_entry *new = iam_entry_shift(path, frame->at, +1);
5418 int count = dx_get_count(entries);
5420 - assert(count < dx_get_limit(entries));
5421 - assert(old < iam_entry_shift(path, entries, count));
5422 + assert_corr(count < dx_get_limit(entries));
5423 + assert_corr(frame->at < iam_entry_shift(path, entries, count));
5425 memmove(iam_entry_shift(path, new, 1), new,
5426 (char *)iam_entry_shift(path, entries, count) - (char *)new);
5427 - dx_set_key(path, new, (struct iam_key *)&hash);
5428 - dx_set_block(path, new, block);
5429 + dx_set_ikey(path, new, key);
5430 + dx_set_block(path, new, ptr);
5431 dx_set_count(entries, count + 1);
5434 +void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
5435 + u32 hash, u32 block)
5437 + assert_corr(dx_index_is_compat(path));
5438 + iam_insert_key(path, frame, (struct iam_ikey *)&hash, block);
5444 @@ -1903,7 +892,8 @@ static struct buffer_head * ext3_dx_find
5447 block = dx_get_block(path, path->ip_frame->at);
5448 - *err = path_descr(path)->id_node_read(path->ip_container, (iam_ptr_t)block,
5449 + *err = iam_path_descr(path)->id_ops->id_node_read(path->ip_container,
5454 @@ -2093,22 +1083,69 @@ static struct ext3_dir_entry_2* dx_pack_
5458 +struct ext3_dir_entry_2 *move_entries(struct inode *dir,
5459 + struct dx_hash_info *hinfo,
5460 + struct buffer_head **bh1,
5461 + struct buffer_head **bh2,
5462 + __u32 *delim_hash)
5466 + unsigned blocksize = dir->i_sb->s_blocksize;
5468 + unsigned continued;
5472 + struct dx_map_entry *map;
5473 + struct ext3_dir_entry_2 *de1;
5474 + struct ext3_dir_entry_2 *de2;
5476 + data1 = (*bh1)->b_data;
5477 + data2 = (*bh2)->b_data;
5479 + /* create map in the end of data2 block */
5480 + map = (struct dx_map_entry *) (data2 + blocksize);
5481 + count = dx_make_map((struct ext3_dir_entry_2 *) data1,
5482 + blocksize, hinfo, map);
5484 + split = count/2; // need to adjust to actual middle
5485 + dx_sort_map(map, count);
5486 + hash2 = map[split].hash;
5487 + continued = hash2 == map[split - 1].hash;
5488 + dxtrace(printk("Split block %i at %x, %i/%i\n",
5489 + dx_get_block(frame->at), hash2, split, count - split));
5491 + /* Fancy dance to stay within two buffers */
5492 + de2 = dx_move_dirents(data1, data2, map + split, count - split);
5493 + de1 = dx_pack_dirents(data1, blocksize);
5494 + de1->rec_len = cpu_to_le16(data1 + blocksize - (char *) de1);
5495 + de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
5496 + dxtrace(dx_show_leaf(hinfo,
5497 + (struct ext3_dir_entry_2 *) data1, blocksize, 1));
5498 + dxtrace(dx_show_leaf(hinfo,
5499 + (struct ext3_dir_entry_2 *) data2, blocksize, 1));
5501 + /* Which block gets the new entry? */
5502 + if (hinfo->hash >= hash2) {
5506 + *delim_hash = hash2 + continued;
5510 /* Allocate new node, and split leaf node @bh into it, inserting new pointer
5511 * into parent node identified by @frame */
5512 static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct iam_path *path,
5513 struct buffer_head **bh,struct iam_frame *frame,
5514 struct dx_hash_info *hinfo, int *error)
5516 - struct inode *dir = path_obj(path);
5517 - unsigned blocksize = dir->i_sb->s_blocksize;
5518 - unsigned count, continued;
5519 + struct inode *dir = iam_path_obj(path);
5520 struct buffer_head *bh2;
5523 - struct dx_map_entry *map;
5524 - char *data1 = (*bh)->b_data, *data2;
5526 - struct ext3_dir_entry_2 *de = NULL, *de2;
5527 + struct ext3_dir_entry_2 *de = NULL;
5530 bh2 = ext3_append (handle, dir, &newblock, error);
5531 @@ -2133,35 +1170,9 @@ static struct ext3_dir_entry_2 *do_split
5535 - data2 = bh2->b_data;
5537 - /* create map in the end of data2 block */
5538 - map = (struct dx_map_entry *) (data2 + blocksize);
5539 - count = dx_make_map ((struct ext3_dir_entry_2 *) data1,
5540 - blocksize, hinfo, map);
5542 - split = count/2; // need to adjust to actual middle
5543 - dx_sort_map (map, count);
5544 - hash2 = map[split].hash;
5545 - continued = hash2 == map[split - 1].hash;
5546 - dxtrace(printk("Split block %i at %x, %i/%i\n",
5547 - dx_get_block(frame->at), hash2, split, count-split));
5549 - /* Fancy dance to stay within two buffers */
5550 - de2 = dx_move_dirents(data1, data2, map + split, count - split);
5551 - de = dx_pack_dirents(data1,blocksize);
5552 - de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
5553 - de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
5554 - dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
5555 - dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
5556 + de = move_entries(dir, hinfo, bh, &bh2, &hash2);
5558 - /* Which block gets the new entry? */
5559 - if (hinfo->hash >= hash2)
5564 - dx_insert_block(path, frame, hash2 + continued, newblock);
5565 + dx_insert_block(path, frame, hash2, newblock);
5566 err = ext3_journal_dirty_metadata (handle, bh2);
5569 @@ -2175,6 +1186,63 @@ errout:
5573 +struct ext3_dir_entry_2 *find_insertion_point(struct inode *dir,
5574 + struct buffer_head *bh,
5575 + const char *name, int namelen)
5577 + struct ext3_dir_entry_2 *de;
5579 + unsigned long offset;
5584 + reclen = EXT3_DIR_REC_LEN(namelen);
5585 + de = (struct ext3_dir_entry_2 *)bh->b_data;
5586 + top = bh->b_data + dir->i_sb->s_blocksize - reclen;
5588 + while ((char *) de <= top) {
5589 + if (!ext3_check_dir_entry("ext3_add_entry",
5590 + dir, de, bh, offset))
5591 + return ERR_PTR(-EIO);
5592 + if (ext3_match(namelen, name, de))
5593 + return ERR_PTR(-EEXIST);
5594 + nlen = EXT3_DIR_REC_LEN(de->name_len);
5595 + rlen = le16_to_cpu(de->rec_len);
5596 + if ((de->inode? rlen - nlen: rlen) >= reclen)
5598 + de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
5601 + return ERR_PTR(-ENOSPC);
5604 +struct ext3_dir_entry_2 *split_entry(struct inode *dir,
5605 + struct ext3_dir_entry_2 *de,
5606 + unsigned long ino, mode_t mode,
5607 + const char *name, int namelen)
5612 + nlen = EXT3_DIR_REC_LEN(de->name_len);
5613 + rlen = le16_to_cpu(de->rec_len);
5615 + struct ext3_dir_entry_2 *de1;
5617 + de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
5618 + de1->rec_len = cpu_to_le16(rlen - nlen);
5619 + de->rec_len = cpu_to_le16(nlen);
5622 + de->file_type = EXT3_FT_UNKNOWN;
5623 + de->inode = cpu_to_le32(ino);
5625 + ext3_set_de_type(dir->i_sb, de, mode);
5626 + de->name_len = namelen;
5627 + memcpy(de->name, name, namelen);
5632 * Add a new entry into a directory (leaf) block. If de is non-NULL,
5633 @@ -2194,34 +1262,16 @@ static int add_dirent_to_buf(handle_t *h
5634 struct inode *dir = dentry->d_parent->d_inode;
5635 const char *name = dentry->d_name.name;
5636 int namelen = dentry->d_name.len;
5637 - unsigned long offset = 0;
5638 - unsigned short reclen;
5639 - int nlen, rlen, err;
5643 - reclen = EXT3_DIR_REC_LEN(namelen);
5645 - de = (struct ext3_dir_entry_2 *)bh->b_data;
5646 - top = bh->b_data + dir->i_sb->s_blocksize - reclen;
5647 - while ((char *) de <= top) {
5648 - if (!ext3_check_dir_entry("ext3_add_entry", dir, de,
5653 - if (ext3_match (namelen, name, de)) {
5657 - nlen = EXT3_DIR_REC_LEN(de->name_len);
5658 - rlen = le16_to_cpu(de->rec_len);
5659 - if ((de->inode? rlen - nlen: rlen) >= reclen)
5661 - de = (struct ext3_dir_entry_2 *)((char *)de + rlen);
5663 + de = find_insertion_point(dir, bh, name, namelen);
5665 + err = PTR_ERR(de);
5666 + if (err != -ENOSPC)
5670 - if ((char *) de > top)
5673 BUFFER_TRACE(bh, "get_write_access");
5674 err = ext3_journal_get_write_access(handle, bh);
5675 @@ -2232,22 +1282,9 @@ static int add_dirent_to_buf(handle_t *h
5678 /* By now the buffer is marked for journaling */
5679 - nlen = EXT3_DIR_REC_LEN(de->name_len);
5680 - rlen = le16_to_cpu(de->rec_len);
5682 - struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
5683 - de1->rec_len = cpu_to_le16(rlen - nlen);
5684 - de->rec_len = cpu_to_le16(nlen);
5687 - de->file_type = EXT3_FT_UNKNOWN;
5689 - de->inode = cpu_to_le32(inode->i_ino);
5690 - ext3_set_de_type(dir->i_sb, de, inode->i_mode);
5693 - de->name_len = namelen;
5694 - memcpy (de->name, name, namelen);
5696 + split_entry(dir, de, inode ? inode->i_ino : 0,
5697 + inode ? inode->i_mode : 0, name, namelen);
5699 * XXX shouldn't update any times until successful
5700 * completion of syscall, but too many callers depend
5701 @@ -2423,8 +1460,40 @@ static int ext3_add_entry (handle_t *han
5702 return add_dirent_to_buf(handle, dentry, inode, de, bh);
5705 +static int shift_entries(struct iam_path *path,
5706 + struct iam_frame *frame, unsigned count,
5707 + struct iam_entry *entries, struct iam_entry *entries2,
5714 + struct iam_frame *parent = frame - 1;
5715 + struct iam_ikey *pivot = iam_path_ikey(path, 3);
5717 + delta = dx_index_is_compat(path) ? 0 : +1;
5719 + count1 = count/2 + delta;
5720 + count2 = count - count1;
5721 + iam_get_ikey(path, iam_entry_shift(path, entries, count1), pivot);
5723 + dxtrace(printk("Split index %i/%i\n", count1, count2));
5725 + memcpy((char *) iam_entry_shift(path, entries2, delta),
5726 + (char *) iam_entry_shift(path, entries, count1),
5727 + count2 * iam_entry_size(path));
5729 + dx_set_count(entries, count1);
5730 + dx_set_count(entries2, count2 + delta);
5731 + dx_set_limit(entries2, dx_node_limit(path));
5733 + iam_insert_key(path, parent, pivot, newblock);
5737 #ifdef CONFIG_EXT3_INDEX
5738 -static int split_index_node(handle_t *handle, struct iam_path *path)
5739 +int split_index_node(handle_t *handle, struct iam_path *path)
5742 struct iam_entry *entries; /* old block contents */
5743 @@ -2432,10 +1501,17 @@ static int split_index_node(handle_t *ha
5744 struct iam_frame *frame, *safe;
5745 struct buffer_head *bh_new[DX_MAX_TREE_HEIGHT] = {0};
5746 u32 newblock[DX_MAX_TREE_HEIGHT] = {0};
5747 - struct inode *dir = path_obj(path);
5748 + struct inode *dir = iam_path_obj(path);
5749 + struct iam_descr *descr;
5753 + descr = iam_path_descr(path);
5755 + * Algorithm below depends on this.
5757 + assert_corr(dx_root_limit(path) < dx_node_limit(path));
5759 frame = path->ip_frame;
5760 entries = frame->entries;
5762 @@ -2474,7 +1550,8 @@ static int split_index_node(handle_t *ha
5763 for (frame = safe + 1, i = 0; i < nr_splet; ++i, ++frame) {
5764 bh_new[i] = ext3_append (handle, dir, &newblock[i], &err);
5766 - path_descr(path)->id_node_init(path->ip_container, bh_new[i], 0) != 0)
5767 + descr->id_ops->id_node_init(path->ip_container,
5768 + bh_new[i], 0) != 0)
5770 BUFFER_TRACE(frame->bh, "get_write_access");
5771 err = ext3_journal_get_write_access(handle, frame->bh);
5772 @@ -2493,6 +1570,7 @@ static int split_index_node(handle_t *ha
5775 struct buffer_head *bh2;
5776 + struct buffer_head *bh;
5778 entries = frame->entries;
5779 count = dx_get_count(entries);
5780 @@ -2501,6 +1579,7 @@ static int split_index_node(handle_t *ha
5782 entries2 = dx_get_entries(path, bh2->b_data, 0);
5785 if (frame == path->ip_frames) {
5786 /* splitting root node. Tricky point:
5788 @@ -2512,22 +1591,20 @@ static int split_index_node(handle_t *ha
5789 * capacity of the root node is smaller than that of
5792 - struct dx_root *root;
5794 struct iam_frame *frames;
5795 + struct iam_entry *next;
5797 + assert_corr(i == 0);
5799 frames = path->ip_frames;
5800 - root = (struct dx_root *) frames->bh->b_data;
5801 - indirects = root->info.indirect_levels;
5802 - dxtrace(printk("Creating new root %d\n", indirects));
5803 memcpy((char *) entries2, (char *) entries,
5804 count * iam_entry_size(path));
5805 dx_set_limit(entries2, dx_node_limit(path));
5808 - dx_set_count(entries, 1);
5809 - dx_set_block(path, entries, newblock[i]);
5810 - root->info.indirect_levels = indirects + 1;
5811 + next = descr->id_ops->id_root_inc(path->ip_container,
5813 + dx_set_block(path, next, newblock[0]);
5815 /* Shift frames in the path */
5816 memmove(frames + 2, frames + 1,
5817 @@ -2536,49 +1613,61 @@ static int split_index_node(handle_t *ha
5818 frames[1].at = iam_entry_shift(path, entries2, idx);
5819 frames[1].entries = entries = entries2;
5821 - assert(dx_node_check(path, frame));
5822 + assert_inv(dx_node_check(path, frame));
5823 + ++ path->ip_frame;
5825 - assert(dx_node_check(path, frame));
5826 - bh_new[i] = NULL; /* buffer head is "consumed" */
5827 + assert_inv(dx_node_check(path, frame));
5828 + bh_new[0] = NULL; /* buffer head is "consumed" */
5829 err = ext3_journal_get_write_access(handle, bh2);
5833 /* splitting non-root index node. */
5834 - unsigned count1 = count/2, count2 = count - count1;
5838 - iam_entry_shift(path, entries, count1),
5839 - (struct iam_key *)&hash2);
5841 - dxtrace(printk("Split index %i/%i\n", count1, count2));
5843 - memcpy ((char *) entries2,
5844 - (char *) iam_entry_shift(path, entries, count1),
5845 - count2 * iam_entry_size(path));
5846 - dx_set_count (entries, count1);
5847 - dx_set_count (entries2, count2);
5848 - dx_set_limit (entries2, dx_node_limit(path));
5849 + struct iam_frame *parent = frame - 1;
5851 + count = shift_entries(path, frame, count,
5852 + entries, entries2, newblock[i]);
5853 /* Which index block gets the new entry? */
5854 - if (idx >= count1) {
5855 + if (idx >= count) {
5856 + int d = dx_index_is_compat(path) ? 0 : +1;
5858 frame->at = iam_entry_shift(path, entries2,
5861 frame->entries = entries = entries2;
5862 swap(frame->bh, bh2);
5864 + parent->at = iam_entry_shift(path,
5867 - dx_insert_block(path, frame - 1, hash2, newblock[i]);
5868 - assert(dx_node_check(path, frame));
5869 - assert(dx_node_check(path, frame - 1));
5870 + assert_inv(dx_node_check(path, frame));
5871 + assert_inv(dx_node_check(path, parent));
5872 dxtrace(dx_show_index ("node", frame->entries));
5873 dxtrace(dx_show_index ("node",
5874 ((struct dx_node *) bh2->b_data)->entries));
5875 err = ext3_journal_dirty_metadata(handle, bh2);
5878 + err = ext3_journal_dirty_metadata(handle, parent->bh);
5880 + goto journal_error;
5882 + err = ext3_journal_dirty_metadata(handle, bh);
5884 + goto journal_error;
5886 + * This function was called to make insertion of new leaf
5887 + * possible. Check that it fulfilled its obligations.
5889 + assert_corr(dx_get_count(path->ip_frame->entries) <
5890 + dx_get_limit(path->ip_frame->entries));
5892 + if (nr_splet > 0) {
5894 + * Log ->i_size modification.
5896 + err = ext3_mark_inode_dirty(handle, dir);
5898 + goto journal_error;
5902 @@ -2610,7 +1699,7 @@ static int ext3_dx_add_entry(handle_t *h
5905 iam_path_compat_init(&cpath, dir);
5906 - param = path_descr(path);
5907 + param = iam_path_descr(path);
5909 err = dx_probe(dentry, NULL, &hinfo, path);
5911 @@ -2620,7 +1709,8 @@ static int ext3_dx_add_entry(handle_t *h
5912 /* XXX nikita: global serialization! */
5913 isize = dir->i_size;
5915 - err = param->id_node_read(path->ip_container, (iam_ptr_t)dx_get_block(path, frame->at),
5916 + err = param->id_ops->id_node_read(path->ip_container,
5917 + (iam_ptr_t)dx_get_block(path, frame->at),
5921 @@ -2641,11 +1731,11 @@ static int ext3_dx_add_entry(handle_t *h
5924 /*copy split inode too*/
5925 - de = do_split(handle, path, &bh, --frame, &hinfo, &err);
5926 + de = do_split(handle, path, &bh, path->ip_frame, &hinfo, &err);
5930 - assert(dx_node_check(path, frame));
5931 + assert_inv(dx_node_check(path, frame));
5932 err = add_dirent_to_buf(handle, dentry, inode, de, bh);
5935 @@ -2752,6 +1842,26 @@ static struct inode * ext3_new_inode_wan
5936 return ext3_new_inode(handle, dir, mode, inum);
5939 +struct inode *ext3_create_inode(handle_t *handle, struct inode * dir, int mode)
5941 + struct inode *inode;
5943 + inode = ext3_new_inode(handle, dir, mode, 0);
5944 + if (!IS_ERR(inode)) {
5945 + if (S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode)) {
5946 +#ifdef CONFIG_LDISKFS_FS_XATTR
5947 + inode->i_op = &ext3_special_inode_operations;
5950 + inode->i_op = &ext3_file_inode_operations;
5951 + inode->i_fop = &ext3_file_operations;
5952 + ext3_set_aops(inode);
5957 +EXPORT_SYMBOL(ext3_create_inode);
5960 * By the time this is called, we already have created
5961 * the directory cache entry for the new file, but it
5962 Index: iam/include/linux/ext3_fs.h
5963 ===================================================================
5964 --- iam.orig/include/linux/ext3_fs.h
5965 +++ iam/include/linux/ext3_fs.h
5966 @@ -758,9 +758,7 @@ extern int ext3_should_retry_alloc(struc
5967 extern void rsv_window_add(struct super_block *sb, struct reserve_window_node *rsv);
5970 -extern int ext3_check_dir_entry(const char *, struct inode *,
5971 - struct ext3_dir_entry_2 *,
5972 - struct buffer_head *, unsigned long);
5974 extern int ext3_htree_store_dirent(struct file *dir_file, __u32 hash,
5976 struct ext3_dir_entry_2 *dirent);
5977 Index: iam/include/linux/lustre_iam.h
5978 ===================================================================
5979 --- iam.orig/include/linux/lustre_iam.h
5980 +++ iam/include/linux/lustre_iam.h
5982 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
5983 + * vim:expandtab:shiftwidth=8:tabstop=8:
5986 + * Top-level entry points into osd module
5988 + * Copyright (c) 2006 Cluster File Systems, Inc.
5989 + * Author: Wang Di <wangdi@clusterfs.com>
5990 + * Author: Nikita Danilov <nikita@clusterfs.com>
5992 + * This file is part of the Lustre file system, http://www.lustre.org
5993 + * Lustre is a trademark of Cluster File Systems, Inc.
5995 + * You may have signed or agreed to another license before downloading
5996 + * this software. If so, you are bound by the terms and conditions
5997 + * of that agreement, and the following does not apply to you. See the
5998 + * LICENSE file included with this distribution for more information.
6000 + * If you did not agree to a different license, then this copy of Lustre
6001 + * is open source software; you can redistribute it and/or modify it
6002 + * under the terms of version 2 of the GNU General Public License as
6003 + * published by the Free Software Foundation.
6005 + * In either case, Lustre is distributed in the hope that it will be
6006 + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
6007 + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
6008 + * license text for more details.
6011 +#ifndef __LINUX_LUSTRE_IAM_H__
6012 +#define __LINUX_LUSTRE_IAM_H__
6014 +/* handle_t, journal_start(), journal_stop() */
6015 +#include <linux/jbd.h>
6018 - * Maximal number of non-leaf levels in htree. In the stock ext3 this is 2.
6019 + * linux/include/linux/lustre_iam.h
6024 + * Maximal number of non-leaf levels in htree. In the stock ext3 this
6027 DX_MAX_TREE_HEIGHT = 5,
6028 - DX_SCRATCH_KEYS = 2
6030 + * Scratch keys used by generic code for temporaries.
6034 + * [0] reserved for assertions and as a staging area for
6035 + * record keys immediately used for key comparisons.
6037 + * [1] reserved for record key, stored during iteration over
6038 + * node records (see dx_node_check()).
6040 + * [2] reserved for leaf node operations.
6042 + * [3] reserved for index operations.
6044 + DX_SCRATCH_KEYS = 4,
6046 + * Maximal format name length.
6048 + DX_FMT_NAME_LEN = 16
6052 @@ -30,6 +89,11 @@ struct iam_key;
6053 /* Incomplete type use to refer to the records stored in iam containers. */
6056 +struct iam_cookie {
6057 + struct iam_key *ic_key;
6058 + struct iam_rec *ic_rec;
6061 typedef __u64 iam_ptr_t;
6064 @@ -41,45 +105,25 @@ struct iam_frame {
6065 struct iam_entry *at; /* target entry, found by binary search */
6068 -/* leaf node reached by tree lookup */
6069 -#define iam_leaf_entry iam_rec
6071 - struct buffer_head *bh;
6072 - struct iam_leaf_entry *entries;
6073 - struct iam_leaf_entry *at;
6076 + * Opaque entry in the leaf node.
6081 struct iam_container;
6084 - * Parameters, describing a flavor of iam container.
6088 - * Size of a key in this container, in bytes.
6090 - size_t id_key_size;
6092 - * Size of a pointer to the next level (stored in index nodes), in
6095 - size_t id_ptr_size;
6097 - * Size of a record (stored in leaf nodes), in bytes.
6099 - size_t id_rec_size;
6101 - * Size of unused (by iam) space at the beginning of every non-root
6102 - * node, in bytes. Used for compatibility with ext3.
6104 - size_t id_node_gap;
6106 - * Size of unused (by iam) space at the beginning of root node, in
6107 - * bytes. Used for compatibility with ext3.
6109 - size_t id_root_gap;
6111 +/* leaf node reached by tree lookup */
6113 + struct iam_path *il_path;
6114 + struct buffer_head *il_bh;
6115 + struct iam_lentry *il_entries;
6116 + struct iam_lentry *il_at;
6117 + void *il_descr_data;
6120 +struct iam_operations {
6122 * Returns pointer (in the same sense as pointer in index entry) to
6124 @@ -102,8 +146,8 @@ struct iam_descr {
6126 * Key comparison function. Returns -1, 0, +1.
6128 - int (*id_keycmp)(struct iam_container *c,
6129 - struct iam_key *k1, struct iam_key *k2);
6130 + int (*id_keycmp)(const struct iam_container *c,
6131 + const struct iam_key *k1, const struct iam_key *k2);
6133 * Create new container.
6135 @@ -111,25 +155,113 @@ struct iam_descr {
6136 * contains single record with the smallest possible key.
6138 int (*id_create)(struct iam_container *c);
6143 + char id_name[DX_FMT_NAME_LEN];
6146 +struct iam_leaf_operations {
6152 + * initialize just loaded leaf node.
6154 + int (*init)(struct iam_leaf *p);
6156 + * Format new node.
6158 + void (*init_new)(struct iam_container *c, struct buffer_head *bh);
6160 + * Release resources.
6162 + void (*fini)(struct iam_leaf *l);
6164 * returns true iff leaf is positioned at the last entry.
6166 - int (*at_end)(struct iam_container *c, struct iam_leaf *l);
6167 + int (*at_end)(const struct iam_leaf *l);
6168 /* position leaf at the first entry */
6169 - void (*start)(struct iam_container *c, struct iam_leaf *l);
6170 + void (*start)(struct iam_leaf *l);
6171 /* more leaf to the next entry. */
6172 - void (*next)(struct iam_container *c, struct iam_leaf *l);
6173 - /* return key of current leaf record in @k */
6174 - void (*key)(struct iam_container *c, struct iam_leaf *l,
6175 - struct iam_key *k);
6176 - /* return pointer to entry body */
6177 - struct iam_rec *(*rec)(struct iam_container *c,
6178 - struct iam_leaf *l);
6180 + void (*next)(struct iam_leaf *l);
6181 + /* return key of current leaf record. This method may return
6182 + * either pointer to the key stored in node, or copy key into
6183 + * @k buffer supplied by caller and return pointer to this
6184 + * buffer. The latter approach is used when keys in nodes are
6185 + * not stored in plain form (e.g., htree doesn't store keys at
6188 + * Caller should assume that returned pointer is only valid
6189 + * while leaf node is pinned and locked.*/
6190 + struct iam_key *(*key)(const struct iam_leaf *l, struct iam_key *k);
6191 + /* return pointer to entry body. Pointer is valid while
6192 + corresponding leaf node is locked and pinned. */
6193 + struct iam_rec *(*rec)(const struct iam_leaf *l);
6195 + void (*key_set)(struct iam_leaf *l, const struct iam_key *k);
6196 + void (*rec_set)(struct iam_leaf *l, const struct iam_rec *r);
6199 + * Search leaf @l for a record with key @k or for a place
6200 + * where such record is to be inserted.
6202 + * Scratch keys from @path can be used.
6204 + int (*lookup)(struct iam_leaf *l, const struct iam_key *k);
6206 + int (*can_add)(const struct iam_leaf *l,
6207 + const struct iam_key *k, const struct iam_rec *r);
6209 + * add rec for a leaf
6211 + void (*rec_add)(struct iam_leaf *l,
6212 + const struct iam_key *k, const struct iam_rec *r);
6214 + * remove rec for a leaf
6216 + void (*rec_del)(struct iam_leaf *l);
6218 + * split leaf node, moving some entries into @bh (the latter currently
6219 + * is assumed to be empty).
6221 + void (*split)(struct iam_leaf *l, struct buffer_head *bh);
6224 +struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
6225 +struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
6228 + * Parameters, describing a flavor of iam container.
6232 + * Size of a key in this container, in bytes.
6234 + size_t id_key_size;
6236 + * Size of a pointer to the next level (stored in index nodes), in
6239 + size_t id_ptr_size;
6241 + * Size of a record (stored in leaf nodes), in bytes.
6243 + size_t id_rec_size;
6245 + * Size of unused (by iam) space at the beginning of every non-root
6246 + * node, in bytes. Used for compatibility with ext3.
6248 + size_t id_node_gap;
6250 + * Size of unused (by iam) space at the beginning of root node, in
6251 + * bytes. Used for compatibility with ext3.
6253 + size_t id_root_gap;
6255 + struct iam_operations *id_ops;
6256 + struct iam_leaf_operations *id_leaf_ops;
6259 struct iam_container {
6260 @@ -142,10 +274,17 @@ struct iam_container {
6263 struct iam_descr *ic_descr;
6267 + * description-specific part of iam_path. This is usually embedded into larger
6270 +struct iam_path_descr {
6272 - * pointer to flavor-specific per-container data.
6273 + * Scratch-pad area for temporary keys.
6275 - void *ic_descr_data;
6276 + struct iam_key *ipd_key_scratch[DX_SCRATCH_KEYS];
6280 @@ -172,36 +311,240 @@ struct iam_path {
6282 * Leaf node: a child of ->ip_frame.
6284 - struct iam_leaf *ip_leaf;
6285 + struct iam_leaf ip_leaf;
6289 - struct iam_key *ip_key_target;
6290 + const struct iam_key *ip_key_target;
6292 - * Scratch-pad area for temporary keys.
6293 + * Description-specific data.
6295 - struct iam_key *ip_key_scratch[DX_SCRATCH_KEYS];
6297 - * pointer to flavor-specific per-container data.
6299 - void *ip_descr_data;
6300 + struct iam_path_descr *ip_data;
6303 +struct dx_hash_info;
6306 * Helper structure for legacy htrees.
6308 struct iam_path_compat {
6309 struct iam_path ipc_path;
6310 struct iam_container ipc_container;
6311 - __u32 ipc_scrach[DX_SCRATCH_KEYS];
6312 + __u32 ipc_scratch[DX_SCRATCH_KEYS];
6313 + struct dx_hash_info *ipc_hinfo;
6314 + struct dentry *ipc_dentry;
6315 + struct iam_path_descr ipc_descr;
6318 -int iam_lookup(struct iam_container *c, struct iam_key *k, struct iam_rec *r);
6319 -int iam_delete(handle_t *h, struct iam_container *c, struct iam_key *k);
6320 -int iam_update(handle_t *h, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
6321 -int iam_insert(handle_t *handle, struct iam_container *c, struct iam_key *k, struct iam_rec *r);
6323 - * Initialize container @c, acquires additional reference on @inode.
6324 + * iam cursor (iterator) api.
6328 + * States of iterator state machine.
6330 +enum iam_it_state {
6331 + /* initial state */
6333 + /* iterator is above particular record in the container */
6338 + * Flags controlling iterator functionality.
6340 +enum iam_it_flags {
6342 + * this iterator will move (iam_it_{prev,next}() will be called on it)
6344 + IAM_IT_MOVE = (1 << 0),
6346 + * tree can be updated through this iterator.
6348 + IAM_IT_WRITE = (1 << 1)
6354 + * Immediately after call to iam_it_init() iterator is in "detached"
6355 + * (IAM_IT_DETACHED) state: it is associated with given parent container, but
6356 + * doesn't point to any particular record in this container.
6358 + * After successful call to iam_it_get() and until corresponding call to
6359 + * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
6361 + * Attached iterator can move through records in a container (provided
6362 + * IAM_IT_MOVE permission) in a key order, can get record and key values as it
6363 + * passes over them, and can modify container (provided IAM_IT_WRITE
6366 + * Concurrency: iterators are supposed to be local to thread. Interfaces below
6367 + * do no internal serialization.
6370 +struct iam_iterator {
6372 + * iterator flags, taken from enum iam_it_flags.
6375 + enum iam_it_state ii_state;
6377 + * path to the record. Valid in IAM_IT_ATTACHED state.
6379 + struct iam_path ii_path;
6382 +void iam_path_init(struct iam_path *path, struct iam_container *c,
6383 + struct iam_path_descr *pd);
6384 +void iam_path_fini(struct iam_path *path);
6386 +void iam_path_compat_init(struct iam_path_compat *path, struct inode *inode);
6387 +void iam_path_compat_fini(struct iam_path_compat *path);
6389 +struct iam_path_descr *iam_ipd_alloc(int keysize);
6390 +void iam_ipd_free(struct iam_path_descr *ipd);
6393 + * Initialize iterator to IAM_IT_DETACHED state.
6395 + * postcondition: it_state(it) == IAM_IT_DETACHED
6397 +int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
6398 + struct iam_path_descr *pd);
6400 + * Finalize iterator and release all resources.
6402 + * precondition: it_state(it) == IAM_IT_DETACHED
6404 +void iam_it_fini(struct iam_iterator *it);
6407 + * Attach iterator. After successful completion, @it points to record with the
6408 + * largest key not larger than @k. Semantics of ->id_create() method guarantee
6409 + * that such record will always be found.
6411 + * Return value: 0: positioned on existing record,
6414 + * precondition: it_state(it) == IAM_IT_DETACHED
6415 + * postcondition: ergo(result == 0,
6416 + * (it_state(it) == IAM_IT_ATTACHED &&
6417 + * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
6419 +int iam_it_get(struct iam_iterator *it, const struct iam_key *k);
6422 + * Duplicates iterator.
6424 + * postcondition: it_state(dst) == it_state(src) &&
6425 + * iam_it_container(dst) == iam_it_container(src) &&
6426 + * dst->ii_flags = src->ii_flags &&
6427 + * ergo(it_state(it) == IAM_IT_ATTACHED,
6428 + * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
6429 + * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
6431 +void iam_it_dup(struct iam_iterator *dst, const struct iam_iterator *src);
6434 + * Detach iterator. Does nothing it detached state.
6436 + * postcondition: it_state(it) == IAM_IT_DETACHED
6438 +void iam_it_put(struct iam_iterator *it);
6441 + * Move iterator one record right.
6443 + * Return value: 0: success,
6444 + * +1: end of container reached
6447 + * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
6448 + * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
6450 +int iam_it_next(struct iam_iterator *it);
6453 + * Return pointer to the record under iterator.
6455 + * precondition: it_state(it) == IAM_IT_ATTACHED
6456 + * postcondition: it_state(it) == IAM_IT_ATTACHED
6458 +struct iam_rec *iam_it_rec_get(const struct iam_iterator *it);
6461 + * Replace contents of record under iterator.
6463 + * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
6464 + * postcondition: it_state(it) == IAM_IT_ATTACHED &&
6465 + * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
6467 +int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
6470 + * Place key under iterator in @k, return @k
6472 + * precondition: it_state(it) == IAM_IT_ATTACHED
6473 + * postcondition: it_state(it) == IAM_IT_ATTACHED
6475 +struct iam_key *iam_it_key_get(const struct iam_iterator *it,
6476 + struct iam_key *k);
6479 + * Insert new record with key @k and contents from @r, shifting records to the
6482 + * precondition: it_state(it) == IAM_IT_ATTACHED &&
6483 + * it->ii_flags&IAM_IT_WRITE &&
6484 + * it_keycmp(it, iam_it_key_get(it, *), k) < 0
6485 + * postcondition: it_state(it) == IAM_IT_ATTACHED &&
6486 + * ergo(result == 0,
6487 + * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
6488 + * !memcmp(iam_it_rec_get(it), r, ...))
6490 +int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
6491 + const struct iam_key *k, const struct iam_rec *r);
6493 + * Delete record under iterator.
6495 + * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
6496 + * postcondition: it_state(it) == IAM_IT_ATTACHED
6498 +int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
6500 +typedef __u64 iam_pos_t;
6503 + * Convert iterator to cookie.
6505 + * precondition: it_state(it) == IAM_IT_ATTACHED &&
6506 + * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
6507 + * postcondition: it_state(it) == IAM_IT_ATTACHED
6509 +iam_pos_t iam_it_store(const struct iam_iterator *it);
6512 + * Restore iterator from cookie.
6514 + * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
6515 + * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
6516 + * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
6517 + * iam_it_store(it) == pos)
6519 +int iam_it_load(struct iam_iterator *it, iam_pos_t pos);
6521 +int iam_lookup(struct iam_container *c, const struct iam_key *k,
6522 + struct iam_rec *r, struct iam_path_descr *pd);
6523 +int iam_delete(handle_t *h, struct iam_container *c, const struct iam_key *k,
6524 + struct iam_path_descr *pd);
6525 +int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k,
6526 + struct iam_rec *r, struct iam_path_descr *pd);
6527 +int iam_insert(handle_t *handle, struct iam_container *c,
6528 + const struct iam_key *k,
6529 + struct iam_rec *r, struct iam_path_descr *pd);
6531 + * Initialize container @c.
6533 int iam_container_init(struct iam_container *c,
6534 struct iam_descr *descr, struct inode *inode);
6535 @@ -210,3 +553,170 @@ int iam_container_init(struct iam_contai
6537 void iam_container_fini(struct iam_container *c);
6540 + * Determine container format.
6542 +int iam_container_setup(struct iam_container *c);
6545 +#define assert(test) J_ASSERT(test)
6548 +static inline struct iam_descr *iam_container_descr(struct iam_container *c)
6550 + return c->ic_descr;
6553 +static inline struct iam_descr *iam_path_descr(const struct iam_path *p)
6555 + return p->ip_container->ic_descr;
6558 +static inline struct inode *iam_path_obj(struct iam_path *p)
6560 + return p->ip_container->ic_object;
6563 +static inline void iam_keycpy(const struct iam_container *c,
6564 + struct iam_key *k1, const struct iam_key *k2)
6566 + memcpy(k1, k2, c->ic_descr->id_key_size);
6569 +static inline int iam_keycmp(const struct iam_container *c,
6570 + const struct iam_key *k1, const struct iam_key *k2)
6572 + return c->ic_descr->id_ops->id_keycmp(c, k1, k2);
6575 +static inline void iam_reccpy(const struct iam_path *p, struct iam_rec *rec_dst,
6576 + const struct iam_rec *rec_src)
6578 + memcpy(rec_dst, rec_src, iam_path_descr(p)->id_rec_size);
6581 +static inline void *iam_entry_off(struct iam_entry *entry, size_t off)
6583 + return (void *)((char *)entry + off);
6586 +/*XXX These stuff put here, just because they are used by iam.c and namei.c*/
6587 +static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
6589 + return le32_to_cpu(*(u32*)iam_entry_off(entry,
6590 + iam_path_descr(p)->id_key_size))
6594 +static inline void dx_set_block(struct iam_path *p,
6595 + struct iam_entry *entry, unsigned value)
6597 + *(u32*)iam_entry_off(entry,
6598 + iam_path_descr(p)->id_key_size) =
6599 + cpu_to_le32(value);
6602 +static inline void dx_set_key(struct iam_path *p, struct iam_entry *entry,
6603 + const struct iam_key *key)
6605 + iam_keycpy(p->ip_container, iam_entry_off(entry, 0), key);
6608 +struct dx_countlimit {
6613 +static inline unsigned dx_get_count(struct iam_entry *entries)
6615 + return le16_to_cpu(((struct dx_countlimit *) entries)->count);
6618 +static inline unsigned dx_get_limit(struct iam_entry *entries)
6620 + return le16_to_cpu(((struct dx_countlimit *) entries)->limit);
6623 +static inline void dx_set_count(struct iam_entry *entries, unsigned value)
6625 + ((struct dx_countlimit *) entries)->count = cpu_to_le16(value);
6628 +static inline unsigned dx_node_limit(struct iam_path *p)
6630 + struct iam_descr *param = iam_path_descr(p);
6631 + unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize -
6632 + param->id_node_gap;
6633 + return entry_space / (param->id_key_size + param->id_ptr_size);
6636 +static inline struct iam_entry *dx_get_entries(struct iam_path *path,
6637 + void *data, int root)
6639 + struct iam_descr *param = iam_path_descr(path);
6640 + return data + (root ? param->id_root_gap : param->id_node_gap);
6644 +static inline struct iam_entry *dx_node_get_entries(struct iam_path *path,
6645 + struct iam_frame *frame)
6647 + return dx_get_entries(path,
6648 + frame->bh->b_data, frame == path->ip_frames);
6651 +static inline struct iam_key *iam_path_key(const struct iam_path *path, int nr)
6653 + assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch));
6654 + return path->ip_data->ipd_key_scratch[nr];
6657 +int dx_lookup(struct iam_path *path);
6658 +void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
6659 + u32 hash, u32 block);
6661 +int ext3_htree_next_block(struct inode *dir, __u32 hash,
6662 + struct iam_path *path, __u32 *start_hash);
6664 +struct buffer_head *ext3_append(handle_t *handle, struct inode *inode,
6665 + u32 *block, int *err);
6666 +int split_index_node(handle_t *handle, struct iam_path *path);
6671 +void iam_container_write_lock(struct iam_container *c);
6672 +void iam_container_write_unlock(struct iam_container *c);
6674 +void iam_container_read_lock(struct iam_container *c);
6675 +void iam_container_read_unlock(struct iam_container *c);
6677 +int iam_index_next(struct iam_container *c, struct iam_path *p);
6678 +int iam_read_leaf(struct iam_path *p);
6680 +int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
6681 + handle_t *handle, struct buffer_head **bh);
6683 +void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
6684 + const struct iam_key *key, iam_ptr_t ptr);
6686 +int iam_leaf_at_end(const struct iam_leaf *l);
6687 +void iam_leaf_next(struct iam_leaf *folio);
6689 +struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
6690 +struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
6691 +struct iam_descr *iam_leaf_descr(const struct iam_leaf *leaf);
6692 +struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf);
6695 +struct iam_format {
6696 + int (*if_guess)(struct iam_container *c);
6697 + struct list_head if_linkage;
6700 +void iam_format_register(struct iam_format *fmt);
6702 +void iam_lfix_format_init(void);
6704 +/* __LINUX_LUSTRE_IAM_H__ */