1 Index: iam/fs/ext3/Makefile
2 ===================================================================
3 --- iam.orig/fs/ext3/Makefile 2006-07-25 16:59:51.000000000 +0400
4 +++ iam/fs/ext3/Makefile 2006-07-25 16:59:51.000000000 +0400
5 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
7 ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
8 ioctl.o namei.o super.o symlink.o hash.o resize.o \
9 - extents.o mballoc.o iam.o iam_lfix.o
10 + extents.o mballoc.o iam.o iam_lfix.o iam_lvar.o iam_htree.o iam_uapi.o
12 ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
13 ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
14 Index: iam/fs/ext3/dir.c
15 ===================================================================
16 --- iam.orig/fs/ext3/dir.c 2006-07-25 16:59:51.000000000 +0400
17 +++ iam/fs/ext3/dir.c 2006-07-25 16:59:51.000000000 +0400
19 #include <linux/smp_lock.h>
20 #include <linux/slab.h>
21 #include <linux/rbtree.h>
22 +#include <linux/lustre_iam.h>
24 static unsigned char ext3_filetype_table[] = {
25 DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
26 @@ -59,7 +60,7 @@ static unsigned char get_dtype(struct su
28 return (ext3_filetype_table[filetype]);
33 int ext3_check_dir_entry (const char * function, struct inode * dir,
34 struct ext3_dir_entry_2 * de,
35 @@ -165,7 +166,7 @@ revalidate:
37 if (filp->f_version != inode->i_version) {
38 for (i = 0; i < sb->s_blocksize && i < offset; ) {
39 - de = (struct ext3_dir_entry_2 *)
40 + de = (struct ext3_dir_entry_2 *)
42 /* It's too expensive to do a full
43 * dirent test each time round this
44 @@ -184,7 +185,7 @@ revalidate:
45 filp->f_version = inode->i_version;
48 - while (!error && filp->f_pos < inode->i_size
49 + while (!error && filp->f_pos < inode->i_size
50 && offset < sb->s_blocksize) {
51 de = (struct ext3_dir_entry_2 *) (bh->b_data + offset);
52 if (!ext3_check_dir_entry ("ext3_readdir", inode, de,
53 @@ -232,7 +233,7 @@ out:
55 * These functions convert from the major/minor hash to an f_pos
59 * Currently we only use major hash numer. This is unfortunate, but
60 * on 32-bit machines, the same VFS interface is used for lseek and
61 * llseek, so if we use the 64 bit offset, then the 32-bit versions of
62 @@ -253,7 +254,7 @@ out:
66 - struct rb_node rb_hash;
67 + struct rb_node rb_hash;
71 @@ -305,12 +306,14 @@ static void free_rb_tree_fname(struct rb
75 +extern struct iam_private_info *ext3_iam_alloc_info(int flags);
76 +extern void ext3_iam_release_info(struct iam_private_info *info);
78 struct dir_private_info *create_dir_info(loff_t pos)
80 struct dir_private_info *p;
82 - p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL);
83 + p = (void *)ext3_iam_alloc_info(GFP_KERNEL);
86 p->root.rb_node = NULL;
87 @@ -326,6 +329,7 @@ struct dir_private_info *create_dir_info
88 void ext3_htree_free_dir_info(struct dir_private_info *p)
90 free_rb_tree_fname(&p->root);
91 + ext3_iam_release_info((void *)p);
95 @@ -413,7 +417,7 @@ static int call_filldir(struct file * fi
96 curr_pos = hash2pos(fname->hash, fname->minor_hash);
98 error = filldir(dirent, fname->name,
99 - fname->name_len, curr_pos,
100 + fname->name_len, curr_pos,
102 get_dtype(sb, fname->file_type));
104 @@ -468,7 +472,7 @@ static int ext3_dx_readdir(struct file *
106 * Fill the rbtree if we have no more entries,
107 * or the inode has changed since we last read in the
111 if ((!info->curr_node) ||
112 (filp->f_version != inode->i_version)) {
113 Index: iam/fs/ext3/file.c
114 ===================================================================
115 --- iam.orig/fs/ext3/file.c 2006-07-25 16:59:51.000000000 +0400
116 +++ iam/fs/ext3/file.c 2006-07-25 16:59:51.000000000 +0400
118 #include <linux/jbd.h>
119 #include <linux/ext3_fs.h>
120 #include <linux/ext3_jbd.h>
121 +#include <linux/lustre_iam.h>
125 @@ -37,8 +38,12 @@ static int ext3_release_file (struct ino
126 if ((filp->f_mode & FMODE_WRITE) &&
127 (atomic_read(&inode->i_writecount) == 1))
128 ext3_discard_reservation(inode);
129 - if (is_dx(inode) && filp->private_data)
130 - ext3_htree_free_dir_info(filp->private_data);
131 + if (is_dx(inode) && filp->private_data) {
132 + if (S_ISDIR(inode->i_mode))
133 + ext3_htree_free_dir_info(filp->private_data);
135 + ext3_iam_release(filp, inode);
140 @@ -110,7 +115,7 @@ ext3_file_write(struct kiocb *iocb, cons
143 err = ext3_force_commit(inode->i_sb);
149 Index: iam/fs/ext3/iam-uapi.c
150 ===================================================================
151 --- iam.orig/fs/ext3/iam-uapi.c 2004-04-06 17:27:52.000000000 +0400
152 +++ iam/fs/ext3/iam-uapi.c 2006-07-25 16:59:51.000000000 +0400
154 +/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
155 + * vim:expandtab:shiftwidth=8:tabstop=8:
158 + * User-level interface to iam (ioctl based)
160 + * Copyright (c) 2006 Cluster File Systems, Inc.
161 + * Author: Nikita Danilov <nikita@clusterfs.com>
163 + * This file is part of the Lustre file system, http://www.lustre.org
164 + * Lustre is a trademark of Cluster File Systems, Inc.
166 + * You may have signed or agreed to another license before downloading
167 + * this software. If so, you are bound by the terms and conditions
168 + * of that agreement, and the following does not apply to you. See the
169 + * LICENSE file included with this distribution for more information.
171 + * If you did not agree to a different license, then this copy of Lustre
172 + * is open source software; you can redistribute it and/or modify it
173 + * under the terms of version 2 of the GNU General Public License as
174 + * published by the Free Software Foundation.
176 + * In either case, Lustre is distributed in the hope that it will be
177 + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty
178 + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
179 + * license text for more details.
182 +#include <linux/types.h>
183 +#include <linux/jbd.h>
185 +#include <linux/ext3_fs.h>
186 +#include <linux/ext3_jbd.h>
188 +#include <linux/lustre_iam.h>
190 +#include <libcfs/libcfs.h>
191 +#include <libcfs/kp30.h>
193 +struct iam_private_info {
194 + struct dir_private_info ipi_dir; /* has to be first */
195 + struct iam_container ipi_bag;
196 + struct iam_descr ipi_descr;
197 + struct iam_iterator ipi_it;
198 + struct iam_path_descr *ipi_ipd;
202 + IAM_INSERT_CREDITS = 20
205 +static struct iam_private_info *get_ipi(struct file *filp)
207 + return filp->private_data;
210 +static int iam_uapi_it(int cmd, struct inode *inode,
211 + struct file *filp, struct iam_uapi_it *itop)
213 + struct iam_private_info *ipi;
214 + struct iam_iterator *it;
215 + enum iam_it_state st;
218 + ipi = get_ipi(filp);
222 + case IAM_IOC_IT_START:
223 + result = iam_it_init(it, &ipi->ipi_bag,
224 + IAM_IT_MOVE, ipi->ipi_ipd);
226 + result = iam_it_get(it, itop->iui_op.iul_key);
228 + case IAM_IOC_IT_NEXT:
229 + if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED)
230 + result = iam_it_next(it);
234 + case IAM_IOC_IT_STOP:
241 + if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED)
242 + memcpy(itop->iui_op.iul_key, iam_it_key_get(it),
243 + iam_it_key_size(it));
244 + if (st == IAM_IT_ATTACHED)
245 + iam_reccpy(&it->ii_path,
246 + itop->iui_op.iul_rec, iam_it_rec_get(it));
247 + itop->iui_state = st;
251 +static int iam_uapi_op(int cmd, struct inode *inode,
252 + struct file *filp, struct iam_uapi_op *op)
255 + struct iam_private_info *ipi;
257 + ipi = get_ipi(filp);
258 + if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_DELETE) {
261 + h = ext3_journal_start(inode, IAM_INSERT_CREDITS);
263 + if (cmd == IAM_IOC_INSERT)
264 + result = iam_insert(h, &ipi->ipi_bag,
266 + op->iul_rec, ipi->ipi_ipd);
268 + result = iam_delete(h, &ipi->ipi_bag,
269 + op->iul_key, ipi->ipi_ipd);
270 + ext3_journal_stop(h);
272 + result = PTR_ERR(h);
273 + ext3_std_error(inode->i_sb, result);
276 + result = iam_lookup(&ipi->ipi_bag, op->iul_key,
277 + op->iul_rec, ipi->ipi_ipd);
281 +struct iam_private_info *ext3_iam_alloc_info(int flags)
283 + struct iam_private_info *info;
285 + info = kmalloc(sizeof *info, flags);
287 + memset(info, 0, sizeof *info);
291 +void ext3_iam_release_info(struct iam_private_info *info)
293 + iam_it_put(&info->ipi_it);
294 + iam_it_fini(&info->ipi_it);
295 + if (info->ipi_ipd != NULL)
296 + info->ipi_bag.ic_descr->id_ops->id_ipd_free(&info->ipi_bag,
298 + iam_container_fini(&info->ipi_bag);
301 +void ext3_iam_release(struct file *filp, struct inode *inode)
303 + struct iam_private_info *info;
305 + info = filp->private_data;
306 + ext3_iam_release_info(info);
309 + EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL;
312 +static int iam_uapi_init(struct inode *inode,
313 + struct file *filp, struct iam_uapi_info *ua)
316 + struct iam_private_info *info;
318 + info = ext3_iam_alloc_info(GFP_KERNEL);
319 + if (info != NULL) {
320 + struct iam_container *bag;
321 + struct iam_descr *des;
323 + bag = &info->ipi_bag;
324 + des = &info->ipi_descr;
325 + result = iam_container_init(bag, des, inode);
327 + result = iam_container_setup(bag);
330 + * Container setup might change ->ic_descr
332 + des = bag->ic_descr;
333 + info->ipi_ipd = des->id_ops->id_ipd_alloc(bag);
334 + if (info->ipi_ipd != NULL) {
335 + filp->private_data = info;
336 + EXT3_I(inode)->i_flags |= EXT3_INDEX_FL;
347 +static int getua(struct iam_uapi_info *ua, unsigned long arg)
349 + if (copy_from_user(ua, (struct iam_uapi_info __user *)arg, sizeof *ua))
355 +static int putua(struct iam_uapi_info *ua, unsigned long arg)
357 + if (copy_to_user((struct iam_uapi_info __user *)arg, ua, sizeof *ua))
369 +static int outop(struct iam_uapi_op *op, struct iam_uapi_op *uop,
370 + struct iam_descr *des, enum outop_t opt)
374 + if (((opt & REC) && copy_to_user((void __user *)uop->iul_rec,
375 + op->iul_rec, des->id_rec_size)) ||
376 + ((opt & KEY) && copy_to_user((void __user *)uop->iul_key,
377 + op->iul_key, des->id_key_size)))
384 +static void putop(struct iam_uapi_op *op)
386 + kfree(op->iul_key);
387 + kfree(op->iul_rec);
390 +static int getop(struct iam_uapi_op *op, struct iam_uapi_op *uop,
391 + struct iam_descr *des, unsigned long arg)
397 + ks = des->id_key_size;
398 + rs = des->id_rec_size;
399 + op->iul_key = kmalloc(ks, GFP_KERNEL);
400 + op->iul_rec = kmalloc(rs, GFP_KERNEL);
401 + if (!copy_from_user(uop,
402 + (struct iam_uapi_op __user *)arg, sizeof *uop) &&
403 + op->iul_key != NULL && op->iul_rec != NULL &&
404 + !copy_from_user(op->iul_key, (void __user *)uop->iul_key, ks) &&
405 + !copy_from_user(op->iul_rec, (void __user *)uop->iul_rec, rs))
414 +static int outit(struct iam_uapi_it *it, struct iam_uapi_it *uit,
415 + struct iam_descr *des, enum outop_t opt, unsigned long arg)
419 + result = outop(&it->iui_op, &uit->iui_op, des, opt);
420 + if (result == 0 && (opt&STATE))
421 + result = put_user(it->iui_state, (int __user *) arg);
425 +static void putit(struct iam_uapi_it *it)
427 + putop(&it->iui_op);
430 +static int getit(struct iam_uapi_it *it, struct iam_uapi_it *uit,
431 + struct iam_descr *des, unsigned long arg)
433 + return getop(&it->iui_op, &uit->iui_op, des,
434 + (unsigned long)&((struct iam_uapi_it *)arg)->iui_op);
437 +int iam_uapi_ioctl(struct inode *inode, struct file *filp, unsigned int cmd,
441 + struct iam_uapi_info ua;
442 + struct iam_uapi_op uop;
443 + struct iam_uapi_op op;
444 + struct iam_uapi_it uit;
445 + struct iam_uapi_it it;
448 + if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) {
450 + } else if (cmd == IAM_IOC_POLYMORPH) {
451 + inode->i_mode = (umode_t)arg;
452 + mark_inode_dirty(inode);
454 + } else if (cmd == IAM_IOC_INIT) {
455 + if (filp->private_data == NULL) {
456 + result = getua(&ua, arg);
458 + result = iam_uapi_init(inode, filp, &ua);
461 + } else if (is_dx(inode) && filp->private_data != NULL) {
462 + struct iam_descr *des;
465 + case IAM_IOC_IT_START:
466 + case IAM_IOC_IT_NEXT:
467 + opt = KEY|REC|STATE;
469 + case IAM_IOC_LOOKUP:
477 + des = get_ipi(filp)->ipi_bag.ic_descr;
478 + if (cmd == IAM_IOC_GETINFO) {
479 + ua.iui_keysize = des->id_key_size;
480 + ua.iui_recsize = des->id_rec_size;
481 + ua.iui_ptrsize = des->id_ptr_size;
482 + ua.iui_height = 0; /* not yet */
483 + memcpy(ua.iui_fmt_name, des->id_ops->id_name,
484 + ARRAY_SIZE(ua.iui_fmt_name));
485 + result = putua(&ua, arg);
486 + } else if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_LOOKUP ||
487 + cmd == IAM_IOC_DELETE) {
488 + result = getop(&op, &uop, des, arg);
491 + result = iam_uapi_op(cmd, inode, filp, &op);
493 + res2 = outop(&op, &uop, des, opt);
494 + result = result ? : res2;
497 + } else if (cmd == IAM_IOC_IT_START || cmd == IAM_IOC_IT_NEXT ||
498 + cmd == IAM_IOC_IT_STOP) {
499 + result = getit(&it, &uit, des, arg);
503 + result = iam_uapi_it(cmd, inode, filp, &it);
505 + res2 = outit(&it, &uit, des, opt, arg);
506 + result = result ? : res2;
515 Index: iam/fs/ext3/ioctl.c
516 ===================================================================
517 --- iam.orig/fs/ext3/ioctl.c 2006-07-25 16:59:51.000000000 +0400
518 +++ iam/fs/ext3/ioctl.c 2006-07-25 16:59:51.000000000 +0400
519 @@ -250,6 +250,6 @@ flags_err:
524 + return iam_uapi_ioctl(inode, filp, cmd, arg);
527 Index: iam/include/linux/lustre_iam.h
528 ===================================================================
529 --- iam.orig/include/linux/lustre_iam.h 2006-07-25 16:59:51.000000000 +0400
530 +++ iam/include/linux/lustre_iam.h 2006-07-25 16:59:51.000000000 +0400
532 #ifndef __LINUX_LUSTRE_IAM_H__
533 #define __LINUX_LUSTRE_IAM_H__
535 -/* handle_t, journal_start(), journal_stop() */
536 -#include <linux/jbd.h>
539 * linux/include/linux/lustre_iam.h
541 @@ -57,14 +54,21 @@ enum {
542 * [2] reserved for leaf node operations.
544 * [3] reserved for index operations.
546 + * [4] reserved for path->ip_ikey_target
549 - DX_SCRATCH_KEYS = 4,
550 + DX_SCRATCH_KEYS = 5,
552 * Maximal format name length.
558 +/* handle_t, journal_start(), journal_stop() */
559 +#include <linux/jbd.h>
562 * Entry within index tree node. Consists of a key immediately followed
563 * (without padding) by a pointer to the child node.
564 @@ -86,14 +90,21 @@ struct iam_entry_compat {
568 -/* Incomplete type use to refer to the records stored in iam containers. */
570 + * Incomplete type use to refer to the records stored in iam containers.
575 - struct iam_key *ic_key;
576 - struct iam_rec *ic_rec;
579 + * Key in index node. Possibly compressed. Fixed size.
584 + * Scalar type into which certain iam_key's can be uniquely mapped. Used to
585 + * support interfaces like readdir(), where iteration over index has to be
588 typedef __u64 iam_ptr_t;
591 @@ -123,6 +134,31 @@ struct iam_leaf {
596 + * Return values of ->lookup() operation from struct iam_leaf_operations.
600 + * lookup found a record with the key requested
604 + * lookup positioned leaf on some record
612 + * lookup positioned leaf before first record
618 + * Format-specific container operations. These are called by generic iam code.
620 struct iam_operations {
622 * Returns pointer (in the same sense as pointer in index entry) to
623 @@ -131,11 +167,15 @@ struct iam_operations {
624 __u32 (*id_root_ptr)(struct iam_container *c);
627 - * Check validity and consistency of index node. This is called when
628 - * iam just loaded new node into frame.
629 + * Check validity and consistency of index node.
631 int (*id_node_check)(struct iam_path *path, struct iam_frame *frame);
633 + * Copy some data from node header into frame. This is called when
634 + * new node is loaded into frame.
636 + int (*id_node_load)(struct iam_path *path, struct iam_frame *frame);
638 * Initialize new node (stored in @bh) that is going to be added into
641 @@ -144,23 +184,33 @@ struct iam_operations {
642 int (*id_node_read)(struct iam_container *c, iam_ptr_t ptr,
643 handle_t *h, struct buffer_head **bh);
645 - * Key comparison function. Returns -1, 0, +1.
646 + * Key comparison functions. Returns -1, 0, +1.
648 - int (*id_keycmp)(const struct iam_container *c,
649 - const struct iam_key *k1, const struct iam_key *k2);
650 + int (*id_ikeycmp)(const struct iam_container *c,
651 + const struct iam_ikey *k1,
652 + const struct iam_ikey *k2);
654 - * Create new container.
656 - * Newly created container has a root node and a single leaf. Leaf
657 - * contains single record with the smallest possible key.
658 + * Modify root node when tree height increases.
660 - int (*id_create)(struct iam_container *c);
661 + struct iam_entry *(*id_root_inc)(struct iam_container *c,
662 + struct iam_path *path,
663 + struct iam_frame *frame);
665 + struct iam_path_descr *(*id_ipd_alloc)(const struct iam_container *c);
666 + void (*id_ipd_free)(const struct iam_container *c,
667 + struct iam_path_descr *ipd);
671 char id_name[DX_FMT_NAME_LEN];
675 + * Another format-specific operation vector, consisting of methods to access
676 + * leaf nodes. This is separated from struct iam_operations, because it is
677 + * assumed that there will be many formats with different format of leaf
678 + * nodes, yes the same struct iam_operations.
680 struct iam_leaf_operations {
683 @@ -186,7 +236,8 @@ struct iam_leaf_operations {
684 void (*start)(struct iam_leaf *l);
685 /* more leaf to the next entry. */
686 void (*next)(struct iam_leaf *l);
687 - /* return key of current leaf record. This method may return
689 + * return key of current leaf record. This method may return
690 * either pointer to the key stored in node, or copy key into
691 * @k buffer supplied by caller and return pointer to this
692 * buffer. The latter approach is used when keys in nodes are
693 @@ -194,8 +245,10 @@ struct iam_leaf_operations {
696 * Caller should assume that returned pointer is only valid
697 - * while leaf node is pinned and locked.*/
698 - struct iam_key *(*key)(const struct iam_leaf *l, struct iam_key *k);
699 + * while leaf node is pinned and locked.
701 + struct iam_ikey *(*ikey)(const struct iam_leaf *l, struct iam_ikey *k);
702 + struct iam_key *(*key)(const struct iam_leaf *l);
703 /* return pointer to entry body. Pointer is valid while
704 corresponding leaf node is locked and pinned. */
705 struct iam_rec *(*rec)(const struct iam_leaf *l);
706 @@ -203,6 +256,9 @@ struct iam_leaf_operations {
707 void (*key_set)(struct iam_leaf *l, const struct iam_key *k);
708 void (*rec_set)(struct iam_leaf *l, const struct iam_rec *r);
710 + int (*key_cmp)(const struct iam_leaf *l, const struct iam_key *k);
712 + int (*key_size)(const struct iam_leaf *l);
714 * Search leaf @l for a record with key @k or for a place
715 * where such record is to be inserted.
716 @@ -221,12 +277,13 @@ struct iam_leaf_operations {
718 * remove rec for a leaf
720 - void (*rec_del)(struct iam_leaf *l);
721 + void (*rec_del)(struct iam_leaf *l, int shift);
723 * split leaf node, moving some entries into @bh (the latter currently
724 * is assumed to be empty).
726 - void (*split)(struct iam_leaf *l, struct buffer_head *bh);
727 + void (*split)(struct iam_leaf *l, struct buffer_head **bh,
728 + iam_ptr_t newblknr);
731 struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
732 @@ -241,6 +298,10 @@ struct iam_descr {
736 + * Size of a key in index nodes, in bytes.
738 + size_t id_ikey_size;
740 * Size of a pointer to the next level (stored in index nodes), in
743 @@ -264,6 +325,9 @@ struct iam_descr {
744 struct iam_leaf_operations *id_leaf_ops;
748 + * An instance of iam container.
750 struct iam_container {
752 * Underlying flat file. IO against this object is issued to
753 @@ -284,7 +348,7 @@ struct iam_path_descr {
755 * Scratch-pad area for temporary keys.
757 - struct iam_key *ipd_key_scratch[DX_SCRATCH_KEYS];
758 + struct iam_ikey *ipd_key_scratch[DX_SCRATCH_KEYS];
762 @@ -316,6 +380,7 @@ struct iam_path {
765 const struct iam_key *ip_key_target;
766 + struct iam_ikey *ip_ikey_target;
768 * Description-specific data.
770 @@ -334,6 +399,7 @@ struct iam_path_compat {
771 struct dx_hash_info *ipc_hinfo;
772 struct dentry *ipc_dentry;
773 struct iam_path_descr ipc_descr;
774 + struct dx_hash_info ipc_hinfo_area;
778 @@ -347,7 +413,9 @@ enum iam_it_state {
781 /* iterator is above particular record in the container */
784 + /* iterator is positioned before record */
789 @@ -355,7 +423,7 @@ enum iam_it_state {
793 - * this iterator will move (iam_it_{prev,next}() will be called on it)
794 + * this iterator will move (iam_it_next() will be called on it)
796 IAM_IT_MOVE = (1 << 0),
798 @@ -372,15 +440,26 @@ enum iam_it_flags {
799 * doesn't point to any particular record in this container.
801 * After successful call to iam_it_get() and until corresponding call to
802 - * iam_it_put() iterator is in "attached" state (IAM_IT_ATTACHED).
803 + * iam_it_put() iterator is in one of "active" states: IAM_IT_ATTACHED or
806 - * Attached iterator can move through records in a container (provided
807 + * Active iterator can move through records in a container (provided
808 * IAM_IT_MOVE permission) in a key order, can get record and key values as it
809 * passes over them, and can modify container (provided IAM_IT_WRITE
812 + * Iteration may reach the end of container, at which point iterator switches
813 + * into IAM_IT_DETACHED state.
815 * Concurrency: iterators are supposed to be local to thread. Interfaces below
816 - * do no internal serialization.
817 + * do no internal serialization of access to the iterator fields.
819 + * When in non-detached state, iterator keeps some container nodes pinned in
820 + * memory and locked (that locking may be implemented at the container
821 + * granularity though). In particular, clients may assume that pointers to
822 + * records and keys obtained through iterator interface as valid until
823 + * iterator is detached (except that they may be invalidated by sub-sequent
824 + * operations done through the same iterator).
827 struct iam_iterator {
828 @@ -390,7 +469,8 @@ struct iam_iterator {
830 enum iam_it_state ii_state;
832 - * path to the record. Valid in IAM_IT_ATTACHED state.
833 + * path to the record. Valid in IAM_IT_ATTACHED, and IAM_IT_SKEWED
836 struct iam_path ii_path;
838 @@ -405,133 +485,25 @@ void iam_path_compat_fini(struct iam_pat
839 struct iam_path_descr *iam_ipd_alloc(int keysize);
840 void iam_ipd_free(struct iam_path_descr *ipd);
843 - * Initialize iterator to IAM_IT_DETACHED state.
845 - * postcondition: it_state(it) == IAM_IT_DETACHED
847 int iam_it_init(struct iam_iterator *it, struct iam_container *c, __u32 flags,
848 struct iam_path_descr *pd);
850 - * Finalize iterator and release all resources.
852 - * precondition: it_state(it) == IAM_IT_DETACHED
854 void iam_it_fini(struct iam_iterator *it);
857 - * Attach iterator. After successful completion, @it points to record with the
858 - * largest key not larger than @k. Semantics of ->id_create() method guarantee
859 - * that such record will always be found.
861 - * Return value: 0: positioned on existing record,
864 - * precondition: it_state(it) == IAM_IT_DETACHED
865 - * postcondition: ergo(result == 0,
866 - * (it_state(it) == IAM_IT_ATTACHED &&
867 - * it_keycmp(it, iam_it_key_get(it, *), k) < 0))
869 int iam_it_get(struct iam_iterator *it, const struct iam_key *k);
872 - * Duplicates iterator.
874 - * postcondition: it_state(dst) == it_state(src) &&
875 - * iam_it_container(dst) == iam_it_container(src) &&
876 - * dst->ii_flags = src->ii_flags &&
877 - * ergo(it_state(it) == IAM_IT_ATTACHED,
878 - * iam_it_rec_get(dst) == iam_it_rec_get(src) &&
879 - * iam_it_key_get(dst, *1) == iam_it_key_get(src, *2))
881 +int iam_it_get_at(struct iam_iterator *it, const struct iam_key *k);
882 void iam_it_dup(struct iam_iterator *dst, const struct iam_iterator *src);
885 - * Detach iterator. Does nothing it detached state.
887 - * postcondition: it_state(it) == IAM_IT_DETACHED
889 void iam_it_put(struct iam_iterator *it);
892 - * Move iterator one record right.
894 - * Return value: 0: success,
895 - * +1: end of container reached
898 - * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_MOVE
899 - * postcondition: ergo(result >= 0, it_state(it) == IAM_IT_ATTACHED)
901 int iam_it_next(struct iam_iterator *it);
904 - * Return pointer to the record under iterator.
906 - * precondition: it_state(it) == IAM_IT_ATTACHED
907 - * postcondition: it_state(it) == IAM_IT_ATTACHED
909 struct iam_rec *iam_it_rec_get(const struct iam_iterator *it);
912 - * Replace contents of record under iterator.
914 - * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
915 - * postcondition: it_state(it) == IAM_IT_ATTACHED &&
916 - * ergo(result == 0, !memcmp(iam_it_rec_get(it), r, ...))
918 int iam_it_rec_set(handle_t *h, struct iam_iterator *it, struct iam_rec *r);
921 - * Place key under iterator in @k, return @k
923 - * precondition: it_state(it) == IAM_IT_ATTACHED
924 - * postcondition: it_state(it) == IAM_IT_ATTACHED
926 -struct iam_key *iam_it_key_get(const struct iam_iterator *it,
927 - struct iam_key *k);
930 - * Insert new record with key @k and contents from @r, shifting records to the
933 - * precondition: it_state(it) == IAM_IT_ATTACHED &&
934 - * it->ii_flags&IAM_IT_WRITE &&
935 - * it_keycmp(it, iam_it_key_get(it, *), k) < 0
936 - * postcondition: it_state(it) == IAM_IT_ATTACHED &&
937 - * ergo(result == 0,
938 - * it_keycmp(it, iam_it_key_get(it, *), k) == 0 &&
939 - * !memcmp(iam_it_rec_get(it), r, ...))
941 +struct iam_key *iam_it_key_get(const struct iam_iterator *it);
942 +int iam_it_key_size(const struct iam_iterator *it);
943 int iam_it_rec_insert(handle_t *h, struct iam_iterator *it,
944 const struct iam_key *k, const struct iam_rec *r);
946 - * Delete record under iterator.
948 - * precondition: it_state(it) == IAM_IT_ATTACHED && it->ii_flags&IAM_IT_WRITE
949 - * postcondition: it_state(it) == IAM_IT_ATTACHED
951 int iam_it_rec_delete(handle_t *h, struct iam_iterator *it);
953 typedef __u64 iam_pos_t;
956 - * Convert iterator to cookie.
958 - * precondition: it_state(it) == IAM_IT_ATTACHED &&
959 - * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
960 - * postcondition: it_state(it) == IAM_IT_ATTACHED
962 iam_pos_t iam_it_store(const struct iam_iterator *it);
965 - * Restore iterator from cookie.
967 - * precondition: it_state(it) == IAM_IT_DETACHED && it->ii_flags&IAM_IT_MOVE &&
968 - * path_descr(it->ii_path)->id_key_size <= sizeof(iam_pos_t)
969 - * postcondition: ergo(result == 0, it_state(it) == IAM_IT_ATTACHED &&
970 - * iam_it_store(it) == pos)
972 int iam_it_load(struct iam_iterator *it, iam_pos_t pos);
974 int iam_lookup(struct iam_container *c, const struct iam_key *k,
975 @@ -577,16 +549,65 @@ static inline struct inode *iam_path_obj
976 return p->ip_container->ic_object;
979 -static inline void iam_keycpy(const struct iam_container *c,
980 - struct iam_key *k1, const struct iam_key *k2)
981 +static inline void iam_ikeycpy(const struct iam_container *c,
982 + struct iam_ikey *k1, const struct iam_ikey *k2)
984 + memcpy(k1, k2, c->ic_descr->id_ikey_size);
987 +static inline size_t iam_entry_size(struct iam_path *p)
989 + return iam_path_descr(p)->id_ikey_size + iam_path_descr(p)->id_ptr_size;
992 +static inline struct iam_entry *iam_entry_shift(struct iam_path *p,
993 + struct iam_entry *entry,
996 - memcpy(k1, k2, c->ic_descr->id_key_size);
998 + return e + shift * iam_entry_size(p);
1001 -static inline int iam_keycmp(const struct iam_container *c,
1002 - const struct iam_key *k1, const struct iam_key *k2)
1003 +static inline struct iam_ikey *iam_get_ikey(struct iam_path *p,
1004 + struct iam_entry *entry,
1005 + struct iam_ikey *key)
1007 - return c->ic_descr->id_ops->id_keycmp(c, k1, k2);
1008 + return memcpy(key, entry, iam_path_descr(p)->id_ikey_size);
1011 +static inline struct iam_ikey *iam_ikey_at(struct iam_path *p,
1012 + struct iam_entry *entry)
1014 + return (struct iam_ikey *)entry;
1017 +static inline ptrdiff_t iam_entry_diff(struct iam_path *p,
1018 + struct iam_entry *e1,
1019 + struct iam_entry *e2)
1023 + diff = (void *)e1 - (void *)e2;
1024 + assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff);
1025 + return diff / iam_entry_size(p);
1029 + * Helper for the frequent case, where key was already placed into @k1 by
1032 +static inline void iam_ikeycpy0(const struct iam_container *c,
1033 + struct iam_ikey *k1, const struct iam_ikey *k2)
1036 + iam_ikeycpy(c, k1, k2);
1039 +static inline int iam_ikeycmp(const struct iam_container *c,
1040 + const struct iam_ikey *k1,
1041 + const struct iam_ikey *k2)
1043 + return c->ic_descr->id_ops->id_ikeycmp(c, k1, k2);
1046 static inline void iam_reccpy(const struct iam_path *p, struct iam_rec *rec_dst,
1047 @@ -604,7 +625,7 @@ static inline void *iam_entry_off(struct
1048 static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry)
1050 return le32_to_cpu(*(u32*)iam_entry_off(entry,
1051 - iam_path_descr(p)->id_key_size))
1052 + iam_path_descr(p)->id_ikey_size))
1056 @@ -612,21 +633,64 @@ static inline void dx_set_block(struct i
1057 struct iam_entry *entry, unsigned value)
1059 *(u32*)iam_entry_off(entry,
1060 - iam_path_descr(p)->id_key_size) =
1061 + iam_path_descr(p)->id_ikey_size) =
1065 -static inline void dx_set_key(struct iam_path *p, struct iam_entry *entry,
1066 - const struct iam_key *key)
1067 +static inline void dx_set_ikey(struct iam_path *p, struct iam_entry *entry,
1068 + const struct iam_ikey *key)
1070 - iam_keycpy(p->ip_container, iam_entry_off(entry, 0), key);
1071 + iam_ikeycpy(p->ip_container, iam_entry_off(entry, 0), key);
1074 +struct dx_map_entry
1080 +struct fake_dirent {
1087 struct dx_countlimit {
1093 + * dx_root_info is laid out so that if it should somehow get overlaid by a
1094 + * dirent the two low bits of the hash version will be zero. Therefore, the
1095 + * hash version mod 4 should never be 0. Sincerely, the paranoia department.
1099 + struct fake_dirent dot;
1101 + struct fake_dirent dotdot;
1102 + char dotdot_name[4];
1103 + struct dx_root_info
1105 + __le32 reserved_zero;
1107 + u8 info_length; /* 8 */
1108 + u8 indirect_levels;
1112 + struct {} entries[0];
1117 + struct fake_dirent fake;
1118 + struct {} entries[0];
1122 static inline unsigned dx_get_count(struct iam_entry *entries)
1124 return le16_to_cpu(((struct dx_countlimit *) entries)->count);
1125 @@ -647,9 +711,18 @@ static inline unsigned dx_node_limit(str
1126 struct iam_descr *param = iam_path_descr(p);
1127 unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize -
1129 - return entry_space / (param->id_key_size + param->id_ptr_size);
1130 + return entry_space / (param->id_ikey_size + param->id_ptr_size);
1133 +static inline unsigned dx_root_limit(struct iam_path *p)
1135 + struct iam_descr *param = iam_path_descr(p);
1136 + unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize -
1137 + param->id_root_gap;
1138 + return entry_space / (param->id_ikey_size + param->id_ptr_size);
1142 static inline struct iam_entry *dx_get_entries(struct iam_path *path,
1143 void *data, int root)
1145 @@ -665,7 +738,8 @@ static inline struct iam_entry *dx_node_
1146 frame->bh->b_data, frame == path->ip_frames);
1149 -static inline struct iam_key *iam_path_key(const struct iam_path *path, int nr)
1150 +static inline struct iam_ikey *iam_path_ikey(const struct iam_path *path,
1153 assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch));
1154 return path->ip_data->ipd_key_scratch[nr];
1155 @@ -674,6 +748,7 @@ static inline struct iam_key *iam_path_k
1156 int dx_lookup(struct iam_path *path);
1157 void dx_insert_block(struct iam_path *path, struct iam_frame *frame,
1158 u32 hash, u32 block);
1159 +int dx_index_is_compat(struct iam_path *path);
1161 int ext3_htree_next_block(struct inode *dir, __u32 hash,
1162 struct iam_path *path, __u32 *start_hash);
1163 @@ -681,6 +756,20 @@ int ext3_htree_next_block(struct inode *
1164 struct buffer_head *ext3_append(handle_t *handle, struct inode *inode,
1165 u32 *block, int *err);
1166 int split_index_node(handle_t *handle, struct iam_path *path);
1167 +struct ext3_dir_entry_2 *split_entry(struct inode *dir,
1168 + struct ext3_dir_entry_2 *de,
1169 + unsigned long ino, mode_t mode,
1170 + const char *name, int namelen);
1171 +struct ext3_dir_entry_2 *find_insertion_point(struct inode *dir,
1172 + struct buffer_head *bh,
1173 + const char *name, int namelen);
1174 +struct ext3_dir_entry_2 *move_entries(struct inode *dir,
1175 + struct dx_hash_info *hinfo,
1176 + struct buffer_head **bh1,
1177 + struct buffer_head **bh2,
1178 + __u32 *delim_hash);
1180 +extern struct iam_descr iam_htree_compat_param;
1184 @@ -698,10 +787,12 @@ int iam_node_read(struct iam_container *
1185 handle_t *handle, struct buffer_head **bh);
1187 void iam_insert_key(struct iam_path *path, struct iam_frame *frame,
1188 - const struct iam_key *key, iam_ptr_t ptr);
1189 + const struct iam_ikey *key, iam_ptr_t ptr);
1191 int iam_leaf_at_end(const struct iam_leaf *l);
1192 void iam_leaf_next(struct iam_leaf *folio);
1193 +int iam_leaf_can_add(const struct iam_leaf *l,
1194 + const struct iam_key *k, const struct iam_rec *r);
1196 struct iam_path *iam_leaf_path(const struct iam_leaf *leaf);
1197 struct iam_container *iam_leaf_container(const struct iam_leaf *leaf);
1198 @@ -709,14 +800,79 @@ struct iam_descr *iam_leaf_descr(const s
1199 struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf);
1202 +int iam_node_read(struct iam_container *c, iam_ptr_t ptr,
1203 + handle_t *h, struct buffer_head **bh);
1206 + * Container format.
1210 + * Method called to recognize container format. Should return true iff
1211 + * container @c conforms to this format. This method may do IO to read
1212 + * container pages.
1214 + * If container is recognized, this method sets operation vectors
1215 + * ->id_ops and ->id_leaf_ops in container description (c->ic_descr),
1216 + * and fills other description fields.
1218 int (*if_guess)(struct iam_container *c);
1220 + * Linkage into global list of container formats.
1222 struct list_head if_linkage;
1225 void iam_format_register(struct iam_format *fmt);
1227 void iam_lfix_format_init(void);
1228 +void iam_lvar_format_init(void);
1229 +void iam_htree_format_init(void);
1231 +struct iam_private_info;
1233 +void ext3_iam_release(struct file *filp, struct inode *inode);
1235 +int iam_uapi_ioctl(struct inode * inode, struct file * filp, unsigned int cmd,
1236 + unsigned long arg);
1242 + * User level API. Copy exists in lustre/lustre/tests/iam_ut.c
1245 +struct iam_uapi_info {
1246 + __u16 iui_keysize;
1247 + __u16 iui_recsize;
1248 + __u16 iui_ptrsize;
1250 + char iui_fmt_name[DX_FMT_NAME_LEN];
1253 +struct iam_uapi_op {
1258 +struct iam_uapi_it {
1259 + struct iam_uapi_op iui_op;
1263 +enum iam_ioctl_cmd {
1264 + IAM_IOC_INIT = _IOW('i', 1, struct iam_uapi_info),
1265 + IAM_IOC_GETINFO = _IOR('i', 2, struct iam_uapi_info),
1266 + IAM_IOC_INSERT = _IOR('i', 3, struct iam_uapi_op),
1267 + IAM_IOC_LOOKUP = _IOWR('i', 4, struct iam_uapi_op),
1268 + IAM_IOC_DELETE = _IOR('i', 5, struct iam_uapi_op),
1269 + IAM_IOC_IT_START = _IOR('i', 6, struct iam_uapi_it),
1270 + IAM_IOC_IT_NEXT = _IOW('i', 7, struct iam_uapi_it),
1271 + IAM_IOC_IT_STOP = _IOR('i', 8, struct iam_uapi_it),
1273 + IAM_IOC_POLYMORPH = _IOR('i', 9, unsigned long)
1276 /* __LINUX_LUSTRE_IAM_H__ */