From ccd87038305a70234bcf68cab47831a2614e6376 Mon Sep 17 00:00:00 2001 From: nikita Date: Fri, 22 Sep 2006 21:20:33 +0000 Subject: [PATCH] iam: adjust nlink when polymorphing regular file into directory (this assures that /root has nlink 2). Patch looks huge because it seems to rearrange files in diff. --- lustre/kernel_patches/patches/ext3-iam-uapi.patch | 1789 +++++++++++---------- 1 file changed, 898 insertions(+), 891 deletions(-) diff --git a/lustre/kernel_patches/patches/ext3-iam-uapi.patch b/lustre/kernel_patches/patches/ext3-iam-uapi.patch index cd74ccb..99764a0 100644 --- a/lustre/kernel_patches/patches/ext3-iam-uapi.patch +++ b/lustre/kernel_patches/patches/ext3-iam-uapi.patch @@ -1,624 +1,91 @@ -Index: iam/fs/ext3/Makefile -=================================================================== ---- iam.orig/fs/ext3/Makefile 2006-08-24 22:50:57.000000000 +0400 -+++ iam/fs/ext3/Makefile 2006-08-24 22:50:58.000000000 +0400 -@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o - - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ - ioctl.o namei.o super.o symlink.o hash.o resize.o \ -- extents.o mballoc.o iam.o iam_lfix.o -+ extents.o mballoc.o iam.o iam_lfix.o iam_lvar.o iam_htree.o iam_uapi.o - - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: iam/fs/ext3/dir.c +Index: iam/include/linux/lustre_iam.h =================================================================== ---- iam.orig/fs/ext3/dir.c 2006-08-24 22:50:57.000000000 +0400 -+++ iam/fs/ext3/dir.c 2006-08-24 22:50:58.000000000 +0400 -@@ -28,6 +28,7 @@ - #include - #include - #include -+#include - - static unsigned char ext3_filetype_table[] = { - DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK -@@ -59,7 +60,7 @@ static unsigned char get_dtype(struct su - - return (ext3_filetype_table[filetype]); - } -- -+ - - int ext3_check_dir_entry (const char * function, struct inode * dir, - struct ext3_dir_entry_2 * de, -@@ -165,7 +166,7 @@ revalidate: - * to make sure. */ - if (filp->f_version != inode->i_version) { - for (i = 0; i < sb->s_blocksize && i < offset; ) { -- de = (struct ext3_dir_entry_2 *) -+ de = (struct ext3_dir_entry_2 *) - (bh->b_data + i); - /* It's too expensive to do a full - * dirent test each time round this -@@ -184,7 +185,7 @@ revalidate: - filp->f_version = inode->i_version; - } +--- iam.orig/include/linux/lustre_iam.h 2006-09-22 17:18:09.000000000 +0800 ++++ iam/include/linux/lustre_iam.h 2006-09-22 17:18:09.000000000 +0800 +@@ -30,9 +30,6 @@ + #ifndef __LINUX_LUSTRE_IAM_H__ + #define __LINUX_LUSTRE_IAM_H__ -- while (!error && filp->f_pos < inode->i_size -+ while (!error && filp->f_pos < inode->i_size - && offset < sb->s_blocksize) { - de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); - if (!ext3_check_dir_entry ("ext3_readdir", inode, de, -@@ -232,7 +233,7 @@ out: +-/* handle_t, journal_start(), journal_stop() */ +-#include +- /* - * These functions convert from the major/minor hash to an f_pos - * value. -- * -+ * - * Currently we only use major hash numer. This is unfortunate, but - * on 32-bit machines, the same VFS interface is used for lseek and - * llseek, so if we use the 64 bit offset, then the 32-bit versions of -@@ -253,7 +254,7 @@ out: - struct fname { - __u32 hash; - __u32 minor_hash; -- struct rb_node rb_hash; -+ struct rb_node rb_hash; - struct fname *next; - __u32 inode; - __u8 name_len; -@@ -305,12 +306,14 @@ static void free_rb_tree_fname(struct rb - root->rb_node = NULL; - } - -+extern struct iam_private_info *ext3_iam_alloc_info(int flags); -+extern void ext3_iam_release_info(struct iam_private_info *info); + * linux/include/linux/lustre_iam.h + */ +@@ -57,14 +54,21 @@ + * [2] reserved for leaf node operations. + * + * [3] reserved for index operations. ++ * ++ * [4] reserved for path->ip_ikey_target ++ * + */ +- DX_SCRATCH_KEYS = 4, ++ DX_SCRATCH_KEYS = 5, + /* + * Maximal format name length. + */ + DX_FMT_NAME_LEN = 16 + }; - struct dir_private_info *create_dir_info(loff_t pos) - { - struct dir_private_info *p; ++#ifdef __KERNEL__ ++/* handle_t, journal_start(), journal_stop() */ ++#include ++ + /* + * Entry within index tree node. Consists of a key immediately followed + * (without padding) by a pointer to the child node. +@@ -86,14 +90,21 @@ + */ + struct iam_key; -- p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); -+ p = (void *)ext3_iam_alloc_info(GFP_KERNEL); - if (!p) - return NULL; - p->root.rb_node = NULL; -@@ -326,6 +329,7 @@ struct dir_private_info *create_dir_info - void ext3_htree_free_dir_info(struct dir_private_info *p) - { - free_rb_tree_fname(&p->root); -+ ext3_iam_release_info((void *)p); - kfree(p); - } +-/* Incomplete type use to refer to the records stored in iam containers. */ ++/* ++ * Incomplete type use to refer to the records stored in iam containers. ++ */ + struct iam_rec; -@@ -413,7 +417,7 @@ static int call_filldir(struct file * fi - curr_pos = hash2pos(fname->hash, fname->minor_hash); - while (fname) { - error = filldir(dirent, fname->name, -- fname->name_len, curr_pos, -+ fname->name_len, curr_pos, - fname->inode, - get_dtype(sb, fname->file_type)); - if (error) { -@@ -468,7 +472,7 @@ static int ext3_dx_readdir(struct file * - /* - * Fill the rbtree if we have no more entries, - * or the inode has changed since we last read in the -- * cached entries. -+ * cached entries. - */ - if ((!info->curr_node) || - (filp->f_version != inode->i_version)) { -Index: iam/fs/ext3/file.c -=================================================================== ---- iam.orig/fs/ext3/file.c 2006-08-24 22:50:57.000000000 +0400 -+++ iam/fs/ext3/file.c 2006-08-24 22:50:58.000000000 +0400 -@@ -23,6 +23,7 @@ - #include - #include - #include -+#include - #include "xattr.h" - #include "acl.h" +-struct iam_cookie { +- struct iam_key *ic_key; +- struct iam_rec *ic_rec; +-}; ++/* ++ * Key in index node. Possibly compressed. Fixed size. ++ */ ++struct iam_ikey; -@@ -31,14 +32,18 @@ - * from ext3_file_open: open gets called at every open, but release - * gets called only when /all/ the files are closed. - */ --static int ext3_release_file (struct inode * inode, struct file * filp) -+static int ext3_release_file(struct inode * inode, struct file * filp) - { - /* if we are the last writer on the inode, drop the block reservation */ - if ((filp->f_mode & FMODE_WRITE) && - (atomic_read(&inode->i_writecount) == 1)) - ext3_discard_reservation(inode); -- if (is_dx(inode) && filp->private_data) -- ext3_htree_free_dir_info(filp->private_data); -+ if (is_dx(inode) && filp->private_data) { -+ if (S_ISDIR(inode->i_mode)) -+ ext3_htree_free_dir_info(filp->private_data); -+ else -+ ext3_iam_release(filp, inode); -+ } ++/* ++ * Scalar type into which certain iam_key's can be uniquely mapped. Used to ++ * support interfaces like readdir(), where iteration over index has to be ++ * re-startable. ++ */ + typedef __u64 iam_ptr_t; - return 0; - } -@@ -110,7 +115,7 @@ ext3_file_write(struct kiocb *iocb, cons + /* +@@ -123,6 +134,31 @@ + void *il_descr_data; + }; - force_commit: - err = ext3_force_commit(inode->i_sb); -- if (err) -+ if (err) - return err; - return ret; - } -Index: iam/fs/ext3/iam-uapi.c -=================================================================== ---- iam.orig/fs/ext3/iam-uapi.c 2004-04-06 17:27:52.000000000 +0400 -+++ iam/fs/ext3/iam-uapi.c 2006-08-24 22:50:58.000000000 +0400 -@@ -0,0 +1,361 @@ -+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- -+ * vim:expandtab:shiftwidth=8:tabstop=8: -+ * -+ * iam_uapi.c -+ * User-level interface to iam (ioctl based) -+ * -+ * Copyright (c) 2006 Cluster File Systems, Inc. -+ * Author: Nikita Danilov -+ * -+ * This file is part of the Lustre file system, http://www.lustre.org -+ * Lustre is a trademark of Cluster File Systems, Inc. -+ * -+ * You may have signed or agreed to another license before downloading -+ * this software. If so, you are bound by the terms and conditions -+ * of that agreement, and the following does not apply to you. See the -+ * LICENSE file included with this distribution for more information. -+ * -+ * If you did not agree to a different license, then this copy of Lustre -+ * is open source software; you can redistribute it and/or modify it -+ * under the terms of version 2 of the GNU General Public License as -+ * published by the Free Software Foundation. -+ * -+ * In either case, Lustre is distributed in the hope that it will be -+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty -+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * license text for more details. ++/* ++ * Return values of ->lookup() operation from struct iam_leaf_operations. + */ -+ -+#include -+#include -+/* ext3_error() */ -+#include -+#include -+ -+#include -+ -+#include -+#include -+ -+struct iam_private_info { -+ struct dir_private_info ipi_dir; /* has to be first */ -+ struct iam_container ipi_bag; -+ struct iam_descr ipi_descr; -+ struct iam_iterator ipi_it; -+ struct iam_path_descr *ipi_ipd; -+}; -+ -+enum { -+ IAM_INSERT_CREDITS = 20 -+}; -+ -+static struct iam_private_info *get_ipi(struct file *filp) -+{ -+ return filp->private_data; -+} -+ -+static int iam_uapi_it(int cmd, struct inode *inode, -+ struct file *filp, struct iam_uapi_it *itop) -+{ -+ struct iam_private_info *ipi; -+ struct iam_iterator *it; -+ enum iam_it_state st; -+ int result = 0; -+ -+ ipi = get_ipi(filp); -+ it = &ipi->ipi_it; -+ st = it->ii_state; -+ switch (cmd) { -+ case IAM_IOC_IT_START: -+ result = iam_it_init(it, &ipi->ipi_bag, -+ IAM_IT_MOVE, ipi->ipi_ipd); -+ if (result == 0) -+ result = iam_it_get(it, itop->iui_op.iul_key); -+ break; -+ case IAM_IOC_IT_NEXT: -+ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED) -+ result = iam_it_next(it); -+ else -+ result = -EBUSY; -+ break; -+ case IAM_IOC_IT_STOP: -+ iam_it_put(it); -+ iam_it_fini(it); -+ result = 0; -+ break; -+ } -+ st = it->ii_state; -+ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED) -+ memcpy(itop->iui_op.iul_key, iam_it_key_get(it), -+ iam_it_key_size(it)); -+ if (st == IAM_IT_ATTACHED) -+ iam_reccpy(&it->ii_path, -+ itop->iui_op.iul_rec, iam_it_rec_get(it)); -+ itop->iui_state = st; -+ return result; -+} -+ -+static int iam_uapi_op(int cmd, struct inode *inode, -+ struct file *filp, struct iam_uapi_op *op) -+{ -+ int result; -+ struct iam_private_info *ipi; -+ -+ ipi = get_ipi(filp); -+ if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_DELETE) { -+ handle_t *h; -+ -+ h = ext3_journal_start(inode, IAM_INSERT_CREDITS); -+ if (!IS_ERR(h)) { -+ if (cmd == IAM_IOC_INSERT) -+ result = iam_insert(h, &ipi->ipi_bag, -+ op->iul_key, -+ op->iul_rec, ipi->ipi_ipd); -+ else -+ result = iam_delete(h, &ipi->ipi_bag, -+ op->iul_key, ipi->ipi_ipd); -+ ext3_journal_stop(h); -+ } else { -+ result = PTR_ERR(h); -+ ext3_std_error(inode->i_sb, result); -+ } -+ } else -+ result = iam_lookup(&ipi->ipi_bag, op->iul_key, -+ op->iul_rec, ipi->ipi_ipd); -+ return result; -+} -+ -+struct iam_private_info *ext3_iam_alloc_info(int flags) -+{ -+ struct iam_private_info *info; -+ -+ info = kmalloc(sizeof *info, flags); -+ if (info != NULL) -+ memset(info, 0, sizeof *info); -+ return info; -+} -+ -+void ext3_iam_release_info(struct iam_private_info *info) -+{ -+ iam_it_put(&info->ipi_it); -+ iam_it_fini(&info->ipi_it); -+ if (info->ipi_ipd != NULL) -+ info->ipi_bag.ic_descr->id_ops->id_ipd_free(&info->ipi_bag, -+ info->ipi_ipd); -+ iam_container_fini(&info->ipi_bag); -+} -+ -+void ext3_iam_release(struct file *filp, struct inode *inode) -+{ -+ struct iam_private_info *info; -+ -+ info = filp->private_data; -+ ext3_iam_release_info(info); -+ -+ kfree(info); -+ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; -+} -+ -+static int iam_uapi_init(struct inode *inode, -+ struct file *filp, struct iam_uapi_info *ua) -+{ -+ int result; -+ struct iam_private_info *info; -+ -+ info = ext3_iam_alloc_info(GFP_KERNEL); -+ if (info != NULL) { -+ struct iam_container *bag; -+ struct iam_descr *des; -+ -+ bag = &info->ipi_bag; -+ des = &info->ipi_descr; -+ result = iam_container_init(bag, des, inode); -+ if (result == 0) { -+ result = iam_container_setup(bag); -+ if (result == 0) { -+ /* -+ * Container setup might change ->ic_descr -+ */ -+ des = bag->ic_descr; -+ info->ipi_ipd = des->id_ops->id_ipd_alloc(bag); -+ if (info->ipi_ipd != NULL) { -+ filp->private_data = info; -+ EXT3_I(inode)->i_flags |= EXT3_INDEX_FL; -+ } else -+ result = -ENOMEM; -+ } -+ } -+ } else -+ result = -ENOMEM; -+ return result; -+} -+ -+ -+static int getua(struct iam_uapi_info *ua, unsigned long arg) -+{ -+ if (copy_from_user(ua, (struct iam_uapi_info __user *)arg, sizeof *ua)) -+ return -EFAULT; -+ else -+ return 0; -+} -+ -+static int putua(struct iam_uapi_info *ua, unsigned long arg) -+{ -+ if (copy_to_user((struct iam_uapi_info __user *)arg, ua, sizeof *ua)) -+ return -EFAULT; -+ else -+ return 0; -+} -+ -+enum outop_t { -+ KEY = 1 << 0, -+ REC = 1 << 1, -+ STATE = 1 << 2 -+}; -+ -+static int outop(struct iam_uapi_op *op, struct iam_uapi_op *uop, -+ struct iam_descr *des, enum outop_t opt) -+{ -+ int result; -+ -+ if (((opt & REC) && copy_to_user((void __user *)uop->iul_rec, -+ op->iul_rec, des->id_rec_size)) || -+ ((opt & KEY) && copy_to_user((void __user *)uop->iul_key, -+ op->iul_key, des->id_key_size))) -+ result = -EFAULT; -+ else -+ result = 0; -+ return result; -+} -+ -+static void putop(struct iam_uapi_op *op) -+{ -+ kfree(op->iul_key); -+ kfree(op->iul_rec); -+} -+ -+static int getop(struct iam_uapi_op *op, struct iam_uapi_op *uop, -+ struct iam_descr *des, unsigned long arg) -+{ -+ int result; -+ int ks; -+ int rs; -+ -+ ks = des->id_key_size; -+ rs = des->id_rec_size; -+ op->iul_key = kmalloc(ks, GFP_KERNEL); -+ op->iul_rec = kmalloc(rs, GFP_KERNEL); -+ if (!copy_from_user(uop, -+ (struct iam_uapi_op __user *)arg, sizeof *uop) && -+ op->iul_key != NULL && op->iul_rec != NULL && -+ !copy_from_user(op->iul_key, (void __user *)uop->iul_key, ks) && -+ !copy_from_user(op->iul_rec, (void __user *)uop->iul_rec, rs)) -+ result = 0; -+ else { -+ result = -EFAULT; -+ putop(op); -+ } -+ return result; -+} -+ -+static int outit(struct iam_uapi_it *it, struct iam_uapi_it *uit, -+ struct iam_descr *des, enum outop_t opt, unsigned long arg) -+{ -+ int result; -+ -+ result = outop(&it->iui_op, &uit->iui_op, des, opt); -+ if (result == 0 && (opt&STATE)) -+ result = put_user(it->iui_state, (int __user *) arg); -+ return result; -+} -+ -+static void putit(struct iam_uapi_it *it) -+{ -+ putop(&it->iui_op); -+} -+ -+static int getit(struct iam_uapi_it *it, struct iam_uapi_it *uit, -+ struct iam_descr *des, unsigned long arg) -+{ -+ return getop(&it->iui_op, &uit->iui_op, des, -+ (unsigned long)&((struct iam_uapi_it *)arg)->iui_op); -+} -+ -+int iam_uapi_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int result; -+ struct iam_uapi_info ua; -+ struct iam_uapi_op uop; -+ struct iam_uapi_op op; -+ struct iam_uapi_it uit; -+ struct iam_uapi_it it; -+ enum outop_t opt; -+ -+ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) { -+ result = -EACCES; -+ } else if (cmd == IAM_IOC_POLYMORPH) { -+ inode->i_mode = (umode_t)arg; -+ mark_inode_dirty(inode); -+ result = 0; -+ } else if (cmd == IAM_IOC_INIT) { -+ if (filp->private_data == NULL) { -+ result = getua(&ua, arg); -+ if (result == 0) -+ result = iam_uapi_init(inode, filp, &ua); -+ } else -+ result = -EBUSY; -+ } else if (is_dx(inode) && filp->private_data != NULL) { -+ struct iam_descr *des; -+ -+ switch (cmd) { -+ case IAM_IOC_IT_START: -+ case IAM_IOC_IT_NEXT: -+ opt = KEY|REC|STATE; -+ break; -+ case IAM_IOC_LOOKUP: -+ opt = REC; -+ break; -+ default: -+ opt = 0; -+ break; -+ } -+ -+ des = get_ipi(filp)->ipi_bag.ic_descr; -+ if (cmd == IAM_IOC_GETINFO) { -+ ua.iui_keysize = des->id_key_size; -+ ua.iui_recsize = des->id_rec_size; -+ ua.iui_ptrsize = des->id_ptr_size; -+ ua.iui_height = 0; /* not yet */ -+ memcpy(ua.iui_fmt_name, des->id_ops->id_name, -+ ARRAY_SIZE(ua.iui_fmt_name)); -+ result = putua(&ua, arg); -+ } else if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_LOOKUP || -+ cmd == IAM_IOC_DELETE) { -+ result = getop(&op, &uop, des, arg); -+ if (result == 0) { -+ int res2; -+ result = iam_uapi_op(cmd, inode, filp, &op); -+ -+ res2 = outop(&op, &uop, des, opt); -+ result = result ? : res2; -+ putop(&op); -+ } -+ } else if (cmd == IAM_IOC_IT_START || cmd == IAM_IOC_IT_NEXT || -+ cmd == IAM_IOC_IT_STOP) { -+ result = getit(&it, &uit, des, arg); -+ if (result == 0) { -+ int res2; -+ -+ result = iam_uapi_it(cmd, inode, filp, &it); -+ -+ res2 = outit(&it, &uit, des, opt, arg); -+ result = result ? : res2; -+ putit(&it); -+ } -+ } else -+ result = -EINVAL; -+ } else -+ result = -ENOENT; -+ return result; -+} -Index: iam/fs/ext3/ioctl.c -=================================================================== ---- iam.orig/fs/ext3/ioctl.c 2006-08-24 22:50:57.000000000 +0400 -+++ iam/fs/ext3/ioctl.c 2006-08-24 22:50:58.000000000 +0400 -@@ -250,6 +250,6 @@ flags_err: - - - default: -- return -ENOTTY; -+ return iam_uapi_ioctl(inode, filp, cmd, arg); - } - } -Index: iam/include/linux/lustre_iam.h -=================================================================== ---- iam.orig/include/linux/lustre_iam.h 2006-08-24 22:50:57.000000000 +0400 -+++ iam/include/linux/lustre_iam.h 2006-08-24 22:50:58.000000000 +0400 -@@ -30,9 +30,6 @@ - #ifndef __LINUX_LUSTRE_IAM_H__ - #define __LINUX_LUSTRE_IAM_H__ - --/* handle_t, journal_start(), journal_stop() */ --#include -- - /* - * linux/include/linux/lustre_iam.h - */ -@@ -57,14 +54,21 @@ enum { - * [2] reserved for leaf node operations. - * - * [3] reserved for index operations. -+ * -+ * [4] reserved for path->ip_ikey_target -+ * - */ -- DX_SCRATCH_KEYS = 4, -+ DX_SCRATCH_KEYS = 5, - /* - * Maximal format name length. - */ - DX_FMT_NAME_LEN = 16 - }; - -+#ifdef __KERNEL__ -+/* handle_t, journal_start(), journal_stop() */ -+#include -+ - /* - * Entry within index tree node. Consists of a key immediately followed - * (without padding) by a pointer to the child node. -@@ -86,14 +90,21 @@ struct iam_entry_compat { - */ - struct iam_key; - --/* Incomplete type use to refer to the records stored in iam containers. */ -+/* -+ * Incomplete type use to refer to the records stored in iam containers. -+ */ - struct iam_rec; - --struct iam_cookie { -- struct iam_key *ic_key; -- struct iam_rec *ic_rec; --}; -+/* -+ * Key in index node. Possibly compressed. Fixed size. -+ */ -+struct iam_ikey; - -+/* -+ * Scalar type into which certain iam_key's can be uniquely mapped. Used to -+ * support interfaces like readdir(), where iteration over index has to be -+ * re-startable. -+ */ - typedef __u64 iam_ptr_t; - - /* -@@ -123,6 +134,31 @@ struct iam_leaf { - void *il_descr_data; - }; - -+/* -+ * Return values of ->lookup() operation from struct iam_leaf_operations. -+ */ -+enum iam_lookup_t { -+ /* -+ * lookup found a record with the key requested -+ */ -+ IAM_LOOKUP_EXACT, -+ /* -+ * lookup positioned leaf on some record -+ */ -+ IAM_LOOKUP_OK, -+ /* -+ * leaf was empty -+ */ -+ IAM_LOOKUP_EMPTY, -+ /* -+ * lookup positioned leaf before first record -+ */ -+ IAM_LOOKUP_BEFORE ++enum iam_lookup_t { ++ /* ++ * lookup found a record with the key requested ++ */ ++ IAM_LOOKUP_EXACT, ++ /* ++ * lookup positioned leaf on some record ++ */ ++ IAM_LOOKUP_OK, ++ /* ++ * leaf was empty ++ */ ++ IAM_LOOKUP_EMPTY, ++ /* ++ * lookup positioned leaf before first record ++ */ ++ IAM_LOOKUP_BEFORE +}; + +/* @@ -627,7 +94,7 @@ Index: iam/include/linux/lustre_iam.h struct iam_operations { /* * Returns pointer (in the same sense as pointer in index entry) to -@@ -131,11 +167,15 @@ struct iam_operations { +@@ -131,11 +167,15 @@ __u32 (*id_root_ptr)(struct iam_container *c); /* @@ -645,7 +112,7 @@ Index: iam/include/linux/lustre_iam.h * Initialize new node (stored in @bh) that is going to be added into * tree. */ -@@ -144,23 +184,33 @@ struct iam_operations { +@@ -144,23 +184,33 @@ int (*id_node_read)(struct iam_container *c, iam_ptr_t ptr, handle_t *h, struct buffer_head **bh); /* @@ -687,7 +154,7 @@ Index: iam/include/linux/lustre_iam.h struct iam_leaf_operations { /* * leaf operations. -@@ -186,7 +236,8 @@ struct iam_leaf_operations { +@@ -186,7 +236,8 @@ void (*start)(struct iam_leaf *l); /* more leaf to the next entry. */ void (*next)(struct iam_leaf *l); @@ -697,7 +164,7 @@ Index: iam/include/linux/lustre_iam.h * either pointer to the key stored in node, or copy key into * @k buffer supplied by caller and return pointer to this * buffer. The latter approach is used when keys in nodes are -@@ -194,8 +245,10 @@ struct iam_leaf_operations { +@@ -194,8 +245,10 @@ * all). * * Caller should assume that returned pointer is only valid @@ -710,7 +177,7 @@ Index: iam/include/linux/lustre_iam.h /* return pointer to entry body. Pointer is valid while corresponding leaf node is locked and pinned. */ struct iam_rec *(*rec)(const struct iam_leaf *l); -@@ -203,6 +256,9 @@ struct iam_leaf_operations { +@@ -203,6 +256,9 @@ void (*key_set)(struct iam_leaf *l, const struct iam_key *k); void (*rec_set)(struct iam_leaf *l, const struct iam_rec *r); @@ -720,7 +187,7 @@ Index: iam/include/linux/lustre_iam.h /* * Search leaf @l for a record with key @k or for a place * where such record is to be inserted. -@@ -210,6 +266,7 @@ struct iam_leaf_operations { +@@ -210,6 +266,7 @@ * Scratch keys from @path can be used. */ int (*lookup)(struct iam_leaf *l, const struct iam_key *k); @@ -728,7 +195,7 @@ Index: iam/include/linux/lustre_iam.h int (*can_add)(const struct iam_leaf *l, const struct iam_key *k, const struct iam_rec *r); -@@ -221,12 +278,13 @@ struct iam_leaf_operations { +@@ -221,12 +278,13 @@ /* * remove rec for a leaf */ @@ -744,7 +211,7 @@ Index: iam/include/linux/lustre_iam.h }; struct iam_path *iam_leaf_path(const struct iam_leaf *leaf); -@@ -241,6 +299,10 @@ struct iam_descr { +@@ -241,6 +299,10 @@ */ size_t id_key_size; /* @@ -755,7 +222,7 @@ Index: iam/include/linux/lustre_iam.h * Size of a pointer to the next level (stored in index nodes), in * bytes. */ -@@ -264,6 +326,9 @@ struct iam_descr { +@@ -264,6 +326,9 @@ struct iam_leaf_operations *id_leaf_ops; }; @@ -765,7 +232,7 @@ Index: iam/include/linux/lustre_iam.h struct iam_container { /* * Underlying flat file. IO against this object is issued to -@@ -284,7 +349,7 @@ struct iam_path_descr { +@@ -284,7 +349,7 @@ /* * Scratch-pad area for temporary keys. */ @@ -774,7 +241,7 @@ Index: iam/include/linux/lustre_iam.h }; /* -@@ -316,6 +381,7 @@ struct iam_path { +@@ -316,6 +381,7 @@ * Key searched for. */ const struct iam_key *ip_key_target; @@ -782,7 +249,7 @@ Index: iam/include/linux/lustre_iam.h /* * Description-specific data. */ -@@ -334,6 +400,7 @@ struct iam_path_compat { +@@ -334,6 +400,7 @@ struct dx_hash_info *ipc_hinfo; struct dentry *ipc_dentry; struct iam_path_descr ipc_descr; @@ -790,7 +257,7 @@ Index: iam/include/linux/lustre_iam.h }; /* -@@ -347,7 +414,9 @@ enum iam_it_state { +@@ -347,7 +414,9 @@ /* initial state */ IAM_IT_DETACHED, /* iterator is above particular record in the container */ @@ -801,7 +268,7 @@ Index: iam/include/linux/lustre_iam.h }; /* -@@ -355,7 +424,7 @@ enum iam_it_state { +@@ -355,7 +424,7 @@ */ enum iam_it_flags { /* @@ -810,7 +277,7 @@ Index: iam/include/linux/lustre_iam.h */ IAM_IT_MOVE = (1 << 0), /* -@@ -372,15 +441,26 @@ enum iam_it_flags { +@@ -372,15 +441,26 @@ * doesn't point to any particular record in this container. * * After successful call to iam_it_get() and until corresponding call to @@ -840,7 +307,7 @@ Index: iam/include/linux/lustre_iam.h * */ struct iam_iterator { -@@ -390,7 +470,8 @@ struct iam_iterator { +@@ -390,7 +470,8 @@ __u32 ii_flags; enum iam_it_state ii_state; /* @@ -850,7 +317,7 @@ Index: iam/include/linux/lustre_iam.h */ struct iam_path ii_path; }; -@@ -405,133 +486,26 @@ void iam_path_compat_fini(struct iam_pat +@@ -405,133 +486,26 @@ struct iam_path_descr *iam_ipd_alloc(int keysize); void iam_ipd_free(struct iam_path_descr *ipd); @@ -989,7 +456,7 @@ Index: iam/include/linux/lustre_iam.h int iam_it_load(struct iam_iterator *it, iam_pos_t pos); int iam_lookup(struct iam_container *c, const struct iam_key *k, -@@ -539,10 +513,10 @@ int iam_lookup(struct iam_container *c, +@@ -539,10 +513,10 @@ int iam_delete(handle_t *h, struct iam_container *c, const struct iam_key *k, struct iam_path_descr *pd); int iam_update(handle_t *h, struct iam_container *c, const struct iam_key *k, @@ -1002,309 +469,849 @@ Index: iam/include/linux/lustre_iam.h /* * Initialize container @c. */ -@@ -577,16 +551,65 @@ static inline struct inode *iam_path_obj +@@ -577,16 +551,65 @@ return p->ip_container->ic_object; } --static inline void iam_keycpy(const struct iam_container *c, -- struct iam_key *k1, const struct iam_key *k2) -+static inline void iam_ikeycpy(const struct iam_container *c, -+ struct iam_ikey *k1, const struct iam_ikey *k2) +-static inline void iam_keycpy(const struct iam_container *c, +- struct iam_key *k1, const struct iam_key *k2) ++static inline void iam_ikeycpy(const struct iam_container *c, ++ struct iam_ikey *k1, const struct iam_ikey *k2) ++{ ++ memcpy(k1, k2, c->ic_descr->id_ikey_size); ++} ++ ++static inline size_t iam_entry_size(struct iam_path *p) ++{ ++ return iam_path_descr(p)->id_ikey_size + iam_path_descr(p)->id_ptr_size; ++} ++ ++static inline struct iam_entry *iam_entry_shift(struct iam_path *p, ++ struct iam_entry *entry, ++ int shift) + { +- memcpy(k1, k2, c->ic_descr->id_key_size); ++ void *e = entry; ++ return e + shift * iam_entry_size(p); + } + +-static inline int iam_keycmp(const struct iam_container *c, +- const struct iam_key *k1, const struct iam_key *k2) ++static inline struct iam_ikey *iam_get_ikey(struct iam_path *p, ++ struct iam_entry *entry, ++ struct iam_ikey *key) + { +- return c->ic_descr->id_ops->id_keycmp(c, k1, k2); ++ return memcpy(key, entry, iam_path_descr(p)->id_ikey_size); ++} ++ ++static inline struct iam_ikey *iam_ikey_at(struct iam_path *p, ++ struct iam_entry *entry) ++{ ++ return (struct iam_ikey *)entry; ++} ++ ++static inline ptrdiff_t iam_entry_diff(struct iam_path *p, ++ struct iam_entry *e1, ++ struct iam_entry *e2) ++{ ++ ptrdiff_t diff; ++ ++ diff = (void *)e1 - (void *)e2; ++ assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff); ++ return diff / iam_entry_size(p); ++} ++ ++/* ++ * Helper for the frequent case, where key was already placed into @k1 by ++ * callback. ++ */ ++static inline void iam_ikeycpy0(const struct iam_container *c, ++ struct iam_ikey *k1, const struct iam_ikey *k2) ++{ ++ if (k1 != k2) ++ iam_ikeycpy(c, k1, k2); ++} ++ ++static inline int iam_ikeycmp(const struct iam_container *c, ++ const struct iam_ikey *k1, ++ const struct iam_ikey *k2) ++{ ++ return c->ic_descr->id_ops->id_ikeycmp(c, k1, k2); + } + + static inline void iam_reccpy(const struct iam_path *p, struct iam_rec *rec_dst, +@@ -604,7 +627,7 @@ + static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry) + { + return le32_to_cpu(*(u32*)iam_entry_off(entry, +- iam_path_descr(p)->id_key_size)) ++ iam_path_descr(p)->id_ikey_size)) + & 0x00ffffff; + } + +@@ -612,21 +635,64 @@ + struct iam_entry *entry, unsigned value) + { + *(u32*)iam_entry_off(entry, +- iam_path_descr(p)->id_key_size) = ++ iam_path_descr(p)->id_ikey_size) = + cpu_to_le32(value); + } + +-static inline void dx_set_key(struct iam_path *p, struct iam_entry *entry, +- const struct iam_key *key) ++static inline void dx_set_ikey(struct iam_path *p, struct iam_entry *entry, ++ const struct iam_ikey *key) + { +- iam_keycpy(p->ip_container, iam_entry_off(entry, 0), key); ++ iam_ikeycpy(p->ip_container, iam_entry_off(entry, 0), key); + } + ++struct dx_map_entry ++{ ++ u32 hash; ++ u32 offs; ++}; ++ ++struct fake_dirent { ++ __le32 inode; ++ __le16 rec_len; ++ u8 name_len; ++ u8 file_type; ++}; ++ + struct dx_countlimit { + __le16 limit; + __le16 count; + }; + ++/* ++ * dx_root_info is laid out so that if it should somehow get overlaid by a ++ * dirent the two low bits of the hash version will be zero. Therefore, the ++ * hash version mod 4 should never be 0. Sincerely, the paranoia department. ++ */ ++ ++struct dx_root { ++ struct fake_dirent dot; ++ char dot_name[4]; ++ struct fake_dirent dotdot; ++ char dotdot_name[4]; ++ struct dx_root_info ++ { ++ __le32 reserved_zero; ++ u8 hash_version; ++ u8 info_length; /* 8 */ ++ u8 indirect_levels; ++ u8 unused_flags; ++ } ++ info; ++ struct {} entries[0]; ++}; ++ ++struct dx_node ++{ ++ struct fake_dirent fake; ++ struct {} entries[0]; ++}; ++ ++ + static inline unsigned dx_get_count(struct iam_entry *entries) + { + return le16_to_cpu(((struct dx_countlimit *) entries)->count); +@@ -647,9 +713,21 @@ + struct iam_descr *param = iam_path_descr(p); + unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize - + param->id_node_gap; +- return entry_space / (param->id_key_size + param->id_ptr_size); ++ return entry_space / (param->id_ikey_size + param->id_ptr_size); ++} ++ ++static inline unsigned dx_root_limit(struct iam_path *p) ++{ ++ struct iam_descr *param = iam_path_descr(p); ++ unsigned limit = iam_path_obj(p)->i_sb->s_blocksize - ++ param->id_root_gap; ++ limit /= (param->id_ikey_size + param->id_ptr_size); ++ if (limit == dx_node_limit(p)) ++ limit--; ++ return limit; + } + ++ + static inline struct iam_entry *dx_get_entries(struct iam_path *path, + void *data, int root) + { +@@ -665,7 +743,8 @@ + frame->bh->b_data, frame == path->ip_frames); + } + +-static inline struct iam_key *iam_path_key(const struct iam_path *path, int nr) ++static inline struct iam_ikey *iam_path_ikey(const struct iam_path *path, ++ int nr) + { + assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch)); + return path->ip_data->ipd_key_scratch[nr]; +@@ -674,6 +753,7 @@ + int dx_lookup(struct iam_path *path); + void dx_insert_block(struct iam_path *path, struct iam_frame *frame, + u32 hash, u32 block); ++int dx_index_is_compat(struct iam_path *path); + + int ext3_htree_next_block(struct inode *dir, __u32 hash, + struct iam_path *path, __u32 *start_hash); +@@ -681,6 +761,20 @@ + struct buffer_head *ext3_append(handle_t *handle, struct inode *inode, + u32 *block, int *err); + int split_index_node(handle_t *handle, struct iam_path *path); ++struct ext3_dir_entry_2 *split_entry(struct inode *dir, ++ struct ext3_dir_entry_2 *de, ++ unsigned long ino, mode_t mode, ++ const char *name, int namelen); ++struct ext3_dir_entry_2 *find_insertion_point(struct inode *dir, ++ struct buffer_head *bh, ++ const char *name, int namelen); ++struct ext3_dir_entry_2 *move_entries(struct inode *dir, ++ struct dx_hash_info *hinfo, ++ struct buffer_head **bh1, ++ struct buffer_head **bh2, ++ __u32 *delim_hash); ++ ++extern struct iam_descr iam_htree_compat_param; + + /* + * external +@@ -698,10 +792,12 @@ + handle_t *handle, struct buffer_head **bh); + + void iam_insert_key(struct iam_path *path, struct iam_frame *frame, +- const struct iam_key *key, iam_ptr_t ptr); ++ const struct iam_ikey *key, iam_ptr_t ptr); + + int iam_leaf_at_end(const struct iam_leaf *l); + void iam_leaf_next(struct iam_leaf *folio); ++int iam_leaf_can_add(const struct iam_leaf *l, ++ const struct iam_key *k, const struct iam_rec *r); + + struct iam_path *iam_leaf_path(const struct iam_leaf *leaf); + struct iam_container *iam_leaf_container(const struct iam_leaf *leaf); +@@ -709,14 +805,79 @@ + struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf); + + ++int iam_node_read(struct iam_container *c, iam_ptr_t ptr, ++ handle_t *h, struct buffer_head **bh); ++ ++/* ++ * Container format. ++ */ + struct iam_format { ++ /* ++ * Method called to recognize container format. Should return true iff ++ * container @c conforms to this format. This method may do IO to read ++ * container pages. ++ * ++ * If container is recognized, this method sets operation vectors ++ * ->id_ops and ->id_leaf_ops in container description (c->ic_descr), ++ * and fills other description fields. ++ */ + int (*if_guess)(struct iam_container *c); ++ /* ++ * Linkage into global list of container formats. ++ */ + struct list_head if_linkage; + }; + + void iam_format_register(struct iam_format *fmt); + + void iam_lfix_format_init(void); ++void iam_lvar_format_init(void); ++void iam_htree_format_init(void); ++ ++struct iam_private_info; ++ ++void ext3_iam_release(struct file *filp, struct inode *inode); ++ ++int iam_uapi_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, ++ unsigned long arg); ++ ++/* __KERNEL__ */ ++#endif ++ ++/* ++ * User level API. Copy exists in lustre/lustre/tests/iam_ut.c ++ */ ++ ++struct iam_uapi_info { ++ __u16 iui_keysize; ++ __u16 iui_recsize; ++ __u16 iui_ptrsize; ++ __u16 iui_height; ++ char iui_fmt_name[DX_FMT_NAME_LEN]; ++}; ++ ++struct iam_uapi_op { ++ void *iul_key; ++ void *iul_rec; ++}; ++ ++struct iam_uapi_it { ++ struct iam_uapi_op iui_op; ++ __u16 iui_state; ++}; ++ ++enum iam_ioctl_cmd { ++ IAM_IOC_INIT = _IOW('i', 1, struct iam_uapi_info), ++ IAM_IOC_GETINFO = _IOR('i', 2, struct iam_uapi_info), ++ IAM_IOC_INSERT = _IOR('i', 3, struct iam_uapi_op), ++ IAM_IOC_LOOKUP = _IOWR('i', 4, struct iam_uapi_op), ++ IAM_IOC_DELETE = _IOR('i', 5, struct iam_uapi_op), ++ IAM_IOC_IT_START = _IOR('i', 6, struct iam_uapi_it), ++ IAM_IOC_IT_NEXT = _IOW('i', 7, struct iam_uapi_it), ++ IAM_IOC_IT_STOP = _IOR('i', 8, struct iam_uapi_it), ++ ++ IAM_IOC_POLYMORPH = _IOR('i', 9, unsigned long) ++}; + + /* __LINUX_LUSTRE_IAM_H__ */ + #endif +Index: iam/fs/ext3/iam-uapi.c +=================================================================== +--- iam.orig/fs/ext3/iam-uapi.c 2006-09-20 09:10:35.143350952 +0800 ++++ iam/fs/ext3/iam-uapi.c 2006-09-22 17:24:07.000000000 +0800 +@@ -0,0 +1,368 @@ ++/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*- ++ * vim:expandtab:shiftwidth=8:tabstop=8: ++ * ++ * iam_uapi.c ++ * User-level interface to iam (ioctl based) ++ * ++ * Copyright (c) 2006 Cluster File Systems, Inc. ++ * Author: Nikita Danilov ++ * ++ * This file is part of the Lustre file system, http://www.lustre.org ++ * Lustre is a trademark of Cluster File Systems, Inc. ++ * ++ * You may have signed or agreed to another license before downloading ++ * this software. If so, you are bound by the terms and conditions ++ * of that agreement, and the following does not apply to you. See the ++ * LICENSE file included with this distribution for more information. ++ * ++ * If you did not agree to a different license, then this copy of Lustre ++ * is open source software; you can redistribute it and/or modify it ++ * under the terms of version 2 of the GNU General Public License as ++ * published by the Free Software Foundation. ++ * ++ * In either case, Lustre is distributed in the hope that it will be ++ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty ++ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * license text for more details. ++ */ ++ ++#include ++#include ++/* ext3_error() */ ++#include ++#include ++ ++#include ++ ++#include ++#include ++ ++struct iam_private_info { ++ struct dir_private_info ipi_dir; /* has to be first */ ++ struct iam_container ipi_bag; ++ struct iam_descr ipi_descr; ++ struct iam_iterator ipi_it; ++ struct iam_path_descr *ipi_ipd; ++}; ++ ++enum { ++ IAM_INSERT_CREDITS = 20 ++}; ++ ++static struct iam_private_info *get_ipi(struct file *filp) +{ -+ memcpy(k1, k2, c->ic_descr->id_ikey_size); ++ return filp->private_data; ++} ++ ++static int iam_uapi_it(int cmd, struct inode *inode, ++ struct file *filp, struct iam_uapi_it *itop) ++{ ++ struct iam_private_info *ipi; ++ struct iam_iterator *it; ++ enum iam_it_state st; ++ int result = 0; ++ ++ ipi = get_ipi(filp); ++ it = &ipi->ipi_it; ++ st = it->ii_state; ++ switch (cmd) { ++ case IAM_IOC_IT_START: ++ result = iam_it_init(it, &ipi->ipi_bag, ++ IAM_IT_MOVE, ipi->ipi_ipd); ++ if (result == 0) ++ result = iam_it_get(it, itop->iui_op.iul_key); ++ break; ++ case IAM_IOC_IT_NEXT: ++ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED) ++ result = iam_it_next(it); ++ else ++ result = -EBUSY; ++ break; ++ case IAM_IOC_IT_STOP: ++ iam_it_put(it); ++ iam_it_fini(it); ++ result = 0; ++ break; ++ } ++ st = it->ii_state; ++ if (st == IAM_IT_ATTACHED || st == IAM_IT_SKEWED) ++ memcpy(itop->iui_op.iul_key, iam_it_key_get(it), ++ iam_it_key_size(it)); ++ if (st == IAM_IT_ATTACHED) ++ iam_reccpy(&it->ii_path, ++ itop->iui_op.iul_rec, iam_it_rec_get(it)); ++ itop->iui_state = st; ++ return result; ++} ++ ++static int iam_uapi_op(int cmd, struct inode *inode, ++ struct file *filp, struct iam_uapi_op *op) ++{ ++ int result; ++ struct iam_private_info *ipi; ++ ++ ipi = get_ipi(filp); ++ if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_DELETE) { ++ handle_t *h; ++ ++ h = ext3_journal_start(inode, IAM_INSERT_CREDITS); ++ if (!IS_ERR(h)) { ++ if (cmd == IAM_IOC_INSERT) ++ result = iam_insert(h, &ipi->ipi_bag, ++ op->iul_key, ++ op->iul_rec, ipi->ipi_ipd); ++ else ++ result = iam_delete(h, &ipi->ipi_bag, ++ op->iul_key, ipi->ipi_ipd); ++ ext3_journal_stop(h); ++ } else { ++ result = PTR_ERR(h); ++ ext3_std_error(inode->i_sb, result); ++ } ++ } else ++ result = iam_lookup(&ipi->ipi_bag, op->iul_key, ++ op->iul_rec, ipi->ipi_ipd); ++ return result; ++} ++ ++struct iam_private_info *ext3_iam_alloc_info(int flags) ++{ ++ struct iam_private_info *info; ++ ++ info = kmalloc(sizeof *info, flags); ++ if (info != NULL) ++ memset(info, 0, sizeof *info); ++ return info; ++} ++ ++void ext3_iam_release_info(struct iam_private_info *info) ++{ ++ iam_it_put(&info->ipi_it); ++ iam_it_fini(&info->ipi_it); ++ if (info->ipi_ipd != NULL) ++ info->ipi_bag.ic_descr->id_ops->id_ipd_free(&info->ipi_bag, ++ info->ipi_ipd); ++ iam_container_fini(&info->ipi_bag); ++} ++ ++void ext3_iam_release(struct file *filp, struct inode *inode) ++{ ++ struct iam_private_info *info; ++ ++ info = filp->private_data; ++ ext3_iam_release_info(info); ++ ++ kfree(info); ++ EXT3_I(inode)->i_flags &= ~EXT3_INDEX_FL; ++} ++ ++static int iam_uapi_init(struct inode *inode, ++ struct file *filp, struct iam_uapi_info *ua) ++{ ++ int result; ++ struct iam_private_info *info; ++ ++ info = ext3_iam_alloc_info(GFP_KERNEL); ++ if (info != NULL) { ++ struct iam_container *bag; ++ struct iam_descr *des; ++ ++ bag = &info->ipi_bag; ++ des = &info->ipi_descr; ++ result = iam_container_init(bag, des, inode); ++ if (result == 0) { ++ result = iam_container_setup(bag); ++ if (result == 0) { ++ /* ++ * Container setup might change ->ic_descr ++ */ ++ des = bag->ic_descr; ++ info->ipi_ipd = des->id_ops->id_ipd_alloc(bag); ++ if (info->ipi_ipd != NULL) { ++ filp->private_data = info; ++ EXT3_I(inode)->i_flags |= EXT3_INDEX_FL; ++ } else ++ result = -ENOMEM; ++ } ++ } ++ } else ++ result = -ENOMEM; ++ return result; ++} ++ ++ ++static int getua(struct iam_uapi_info *ua, unsigned long arg) ++{ ++ if (copy_from_user(ua, (struct iam_uapi_info __user *)arg, sizeof *ua)) ++ return -EFAULT; ++ else ++ return 0; ++} ++ ++static int putua(struct iam_uapi_info *ua, unsigned long arg) ++{ ++ if (copy_to_user((struct iam_uapi_info __user *)arg, ua, sizeof *ua)) ++ return -EFAULT; ++ else ++ return 0; +} + -+static inline size_t iam_entry_size(struct iam_path *p) ++enum outop_t { ++ KEY = 1 << 0, ++ REC = 1 << 1, ++ STATE = 1 << 2 ++}; ++ ++static int outop(struct iam_uapi_op *op, struct iam_uapi_op *uop, ++ struct iam_descr *des, enum outop_t opt) +{ -+ return iam_path_descr(p)->id_ikey_size + iam_path_descr(p)->id_ptr_size; ++ int result; ++ ++ if (((opt & REC) && copy_to_user((void __user *)uop->iul_rec, ++ op->iul_rec, des->id_rec_size)) || ++ ((opt & KEY) && copy_to_user((void __user *)uop->iul_key, ++ op->iul_key, des->id_key_size))) ++ result = -EFAULT; ++ else ++ result = 0; ++ return result; +} + -+static inline struct iam_entry *iam_entry_shift(struct iam_path *p, -+ struct iam_entry *entry, -+ int shift) - { -- memcpy(k1, k2, c->ic_descr->id_key_size); -+ void *e = entry; -+ return e + shift * iam_entry_size(p); - } - --static inline int iam_keycmp(const struct iam_container *c, -- const struct iam_key *k1, const struct iam_key *k2) -+static inline struct iam_ikey *iam_get_ikey(struct iam_path *p, -+ struct iam_entry *entry, -+ struct iam_ikey *key) - { -- return c->ic_descr->id_ops->id_keycmp(c, k1, k2); -+ return memcpy(key, entry, iam_path_descr(p)->id_ikey_size); ++static void putop(struct iam_uapi_op *op) ++{ ++ kfree(op->iul_key); ++ kfree(op->iul_rec); +} + -+static inline struct iam_ikey *iam_ikey_at(struct iam_path *p, -+ struct iam_entry *entry) ++static int getop(struct iam_uapi_op *op, struct iam_uapi_op *uop, ++ struct iam_descr *des, unsigned long arg) +{ -+ return (struct iam_ikey *)entry; ++ int result; ++ int ks; ++ int rs; ++ ++ ks = des->id_key_size; ++ rs = des->id_rec_size; ++ op->iul_key = kmalloc(ks, GFP_KERNEL); ++ op->iul_rec = kmalloc(rs, GFP_KERNEL); ++ if (!copy_from_user(uop, ++ (struct iam_uapi_op __user *)arg, sizeof *uop) && ++ op->iul_key != NULL && op->iul_rec != NULL && ++ !copy_from_user(op->iul_key, (void __user *)uop->iul_key, ks) && ++ !copy_from_user(op->iul_rec, (void __user *)uop->iul_rec, rs)) ++ result = 0; ++ else { ++ result = -EFAULT; ++ putop(op); ++ } ++ return result; +} + -+static inline ptrdiff_t iam_entry_diff(struct iam_path *p, -+ struct iam_entry *e1, -+ struct iam_entry *e2) ++static int outit(struct iam_uapi_it *it, struct iam_uapi_it *uit, ++ struct iam_descr *des, enum outop_t opt, unsigned long arg) +{ -+ ptrdiff_t diff; ++ int result; + -+ diff = (void *)e1 - (void *)e2; -+ assert(diff / iam_entry_size(p) * iam_entry_size(p) == diff); -+ return diff / iam_entry_size(p); ++ result = outop(&it->iui_op, &uit->iui_op, des, opt); ++ if (result == 0 && (opt&STATE)) ++ result = put_user(it->iui_state, (int __user *) arg); ++ return result; +} + -+/* -+ * Helper for the frequent case, where key was already placed into @k1 by -+ * callback. -+ */ -+static inline void iam_ikeycpy0(const struct iam_container *c, -+ struct iam_ikey *k1, const struct iam_ikey *k2) ++static void putit(struct iam_uapi_it *it) +{ -+ if (k1 != k2) -+ iam_ikeycpy(c, k1, k2); ++ putop(&it->iui_op); +} + -+static inline int iam_ikeycmp(const struct iam_container *c, -+ const struct iam_ikey *k1, -+ const struct iam_ikey *k2) ++static int getit(struct iam_uapi_it *it, struct iam_uapi_it *uit, ++ struct iam_descr *des, unsigned long arg) +{ -+ return c->ic_descr->id_ops->id_ikeycmp(c, k1, k2); - } - - static inline void iam_reccpy(const struct iam_path *p, struct iam_rec *rec_dst, -@@ -604,7 +627,7 @@ static inline void *iam_entry_off(struct - static inline unsigned dx_get_block(struct iam_path *p, struct iam_entry *entry) - { - return le32_to_cpu(*(u32*)iam_entry_off(entry, -- iam_path_descr(p)->id_key_size)) -+ iam_path_descr(p)->id_ikey_size)) - & 0x00ffffff; - } - -@@ -612,21 +635,64 @@ static inline void dx_set_block(struct i - struct iam_entry *entry, unsigned value) - { - *(u32*)iam_entry_off(entry, -- iam_path_descr(p)->id_key_size) = -+ iam_path_descr(p)->id_ikey_size) = - cpu_to_le32(value); - } - --static inline void dx_set_key(struct iam_path *p, struct iam_entry *entry, -- const struct iam_key *key) -+static inline void dx_set_ikey(struct iam_path *p, struct iam_entry *entry, -+ const struct iam_ikey *key) - { -- iam_keycpy(p->ip_container, iam_entry_off(entry, 0), key); -+ iam_ikeycpy(p->ip_container, iam_entry_off(entry, 0), key); - } - -+struct dx_map_entry ++ return getop(&it->iui_op, &uit->iui_op, des, ++ (unsigned long)&((struct iam_uapi_it *)arg)->iui_op); ++} ++ ++int iam_uapi_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, ++ unsigned long arg) +{ -+ u32 hash; -+ u32 offs; -+}; ++ int result; ++ struct iam_uapi_info ua; ++ struct iam_uapi_op uop; ++ struct iam_uapi_op op; ++ struct iam_uapi_it uit; ++ struct iam_uapi_it it; ++ enum outop_t opt; + -+struct fake_dirent { -+ __le32 inode; -+ __le16 rec_len; -+ u8 name_len; -+ u8 file_type; -+}; ++ if ((current->fsuid != inode->i_uid) && !capable(CAP_FOWNER)) { ++ result = -EACCES; ++ } else if (cmd == IAM_IOC_POLYMORPH) { ++ /* ++ * If polymorphing into directory, increase hard-link count. ++ */ ++ if (S_ISDIR((umode_t)arg) && !S_ISDIR(inode->i_mode)) ++ inode->i_nlink++; ++ else if (!S_ISDIR((umode_t)arg) && S_ISDIR(inode->i_mode)) ++ inode->i_nlink--; ++ inode->i_mode = (umode_t)arg; ++ mark_inode_dirty(inode); ++ result = 0; ++ } else if (cmd == IAM_IOC_INIT) { ++ if (filp->private_data == NULL) { ++ result = getua(&ua, arg); ++ if (result == 0) ++ result = iam_uapi_init(inode, filp, &ua); ++ } else ++ result = -EBUSY; ++ } else if (is_dx(inode) && filp->private_data != NULL) { ++ struct iam_descr *des; + - struct dx_countlimit { - __le16 limit; - __le16 count; - }; - -+/* -+ * dx_root_info is laid out so that if it should somehow get overlaid by a -+ * dirent the two low bits of the hash version will be zero. Therefore, the -+ * hash version mod 4 should never be 0. Sincerely, the paranoia department. -+ */ ++ switch (cmd) { ++ case IAM_IOC_IT_START: ++ case IAM_IOC_IT_NEXT: ++ opt = KEY|REC|STATE; ++ break; ++ case IAM_IOC_LOOKUP: ++ opt = REC; ++ break; ++ default: ++ opt = 0; ++ break; ++ } + -+struct dx_root { -+ struct fake_dirent dot; -+ char dot_name[4]; -+ struct fake_dirent dotdot; -+ char dotdot_name[4]; -+ struct dx_root_info -+ { -+ __le32 reserved_zero; -+ u8 hash_version; -+ u8 info_length; /* 8 */ -+ u8 indirect_levels; -+ u8 unused_flags; -+ } -+ info; -+ struct {} entries[0]; -+}; ++ des = get_ipi(filp)->ipi_bag.ic_descr; ++ if (cmd == IAM_IOC_GETINFO) { ++ ua.iui_keysize = des->id_key_size; ++ ua.iui_recsize = des->id_rec_size; ++ ua.iui_ptrsize = des->id_ptr_size; ++ ua.iui_height = 0; /* not yet */ ++ memcpy(ua.iui_fmt_name, des->id_ops->id_name, ++ ARRAY_SIZE(ua.iui_fmt_name)); ++ result = putua(&ua, arg); ++ } else if (cmd == IAM_IOC_INSERT || cmd == IAM_IOC_LOOKUP || ++ cmd == IAM_IOC_DELETE) { ++ result = getop(&op, &uop, des, arg); ++ if (result == 0) { ++ int res2; ++ result = iam_uapi_op(cmd, inode, filp, &op); + -+struct dx_node -+{ -+ struct fake_dirent fake; -+ struct {} entries[0]; -+}; ++ res2 = outop(&op, &uop, des, opt); ++ result = result ? : res2; ++ putop(&op); ++ } ++ } else if (cmd == IAM_IOC_IT_START || cmd == IAM_IOC_IT_NEXT || ++ cmd == IAM_IOC_IT_STOP) { ++ result = getit(&it, &uit, des, arg); ++ if (result == 0) { ++ int res2; + ++ result = iam_uapi_it(cmd, inode, filp, &it); + - static inline unsigned dx_get_count(struct iam_entry *entries) ++ res2 = outit(&it, &uit, des, opt, arg); ++ result = result ? : res2; ++ putit(&it); ++ } ++ } else ++ result = -EINVAL; ++ } else ++ result = -ENOENT; ++ return result; ++} +Index: iam/fs/ext3/file.c +=================================================================== +--- iam.orig/fs/ext3/file.c 2006-09-19 15:23:19.000000000 +0800 ++++ iam/fs/ext3/file.c 2006-09-22 17:18:09.000000000 +0800 +@@ -23,6 +23,7 @@ + #include + #include + #include ++#include + #include "xattr.h" + #include "acl.h" + +@@ -31,14 +32,18 @@ + * from ext3_file_open: open gets called at every open, but release + * gets called only when /all/ the files are closed. + */ +-static int ext3_release_file (struct inode * inode, struct file * filp) ++static int ext3_release_file(struct inode * inode, struct file * filp) { - return le16_to_cpu(((struct dx_countlimit *) entries)->count); -@@ -647,9 +713,21 @@ static inline unsigned dx_node_limit(str - struct iam_descr *param = iam_path_descr(p); - unsigned entry_space = iam_path_obj(p)->i_sb->s_blocksize - - param->id_node_gap; -- return entry_space / (param->id_key_size + param->id_ptr_size); -+ return entry_space / (param->id_ikey_size + param->id_ptr_size); -+} -+ -+static inline unsigned dx_root_limit(struct iam_path *p) -+{ -+ struct iam_descr *param = iam_path_descr(p); -+ unsigned limit = iam_path_obj(p)->i_sb->s_blocksize - -+ param->id_root_gap; -+ limit /= (param->id_ikey_size + param->id_ptr_size); -+ if (limit == dx_node_limit(p)) -+ limit--; -+ return limit; + /* if we are the last writer on the inode, drop the block reservation */ + if ((filp->f_mode & FMODE_WRITE) && + (atomic_read(&inode->i_writecount) == 1)) + ext3_discard_reservation(inode); +- if (is_dx(inode) && filp->private_data) +- ext3_htree_free_dir_info(filp->private_data); ++ if (is_dx(inode) && filp->private_data) { ++ if (S_ISDIR(inode->i_mode)) ++ ext3_htree_free_dir_info(filp->private_data); ++ else ++ ext3_iam_release(filp, inode); ++ } + + return 0; } +@@ -110,7 +115,7 @@ -+ - static inline struct iam_entry *dx_get_entries(struct iam_path *path, - void *data, int root) - { -@@ -665,7 +743,8 @@ static inline struct iam_entry *dx_node_ - frame->bh->b_data, frame == path->ip_frames); + force_commit: + err = ext3_force_commit(inode->i_sb); +- if (err) ++ if (err) + return err; + return ret; } +Index: iam/fs/ext3/ioctl.c +=================================================================== +--- iam.orig/fs/ext3/ioctl.c 2006-09-22 17:18:08.000000000 +0800 ++++ iam/fs/ext3/ioctl.c 2006-09-22 17:18:09.000000000 +0800 +@@ -250,6 +250,6 @@ --static inline struct iam_key *iam_path_key(const struct iam_path *path, int nr) -+static inline struct iam_ikey *iam_path_ikey(const struct iam_path *path, -+ int nr) - { - assert(0 <= nr && nr < ARRAY_SIZE(path->ip_data->ipd_key_scratch)); - return path->ip_data->ipd_key_scratch[nr]; -@@ -674,6 +753,7 @@ static inline struct iam_key *iam_path_k - int dx_lookup(struct iam_path *path); - void dx_insert_block(struct iam_path *path, struct iam_frame *frame, - u32 hash, u32 block); -+int dx_index_is_compat(struct iam_path *path); - int ext3_htree_next_block(struct inode *dir, __u32 hash, - struct iam_path *path, __u32 *start_hash); -@@ -681,6 +761,20 @@ int ext3_htree_next_block(struct inode * - struct buffer_head *ext3_append(handle_t *handle, struct inode *inode, - u32 *block, int *err); - int split_index_node(handle_t *handle, struct iam_path *path); -+struct ext3_dir_entry_2 *split_entry(struct inode *dir, -+ struct ext3_dir_entry_2 *de, -+ unsigned long ino, mode_t mode, -+ const char *name, int namelen); -+struct ext3_dir_entry_2 *find_insertion_point(struct inode *dir, -+ struct buffer_head *bh, -+ const char *name, int namelen); -+struct ext3_dir_entry_2 *move_entries(struct inode *dir, -+ struct dx_hash_info *hinfo, -+ struct buffer_head **bh1, -+ struct buffer_head **bh2, -+ __u32 *delim_hash); -+ -+extern struct iam_descr iam_htree_compat_param; + default: +- return -ENOTTY; ++ return iam_uapi_ioctl(inode, filp, cmd, arg); + } + } +Index: iam/fs/ext3/dir.c +=================================================================== +--- iam.orig/fs/ext3/dir.c 2006-09-19 15:23:19.000000000 +0800 ++++ iam/fs/ext3/dir.c 2006-09-22 17:18:09.000000000 +0800 +@@ -28,6 +28,7 @@ + #include + #include + #include ++#include - /* - * external -@@ -698,10 +792,12 @@ int iam_node_read(struct iam_container * - handle_t *handle, struct buffer_head **bh); + static unsigned char ext3_filetype_table[] = { + DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK +@@ -59,7 +60,7 @@ - void iam_insert_key(struct iam_path *path, struct iam_frame *frame, -- const struct iam_key *key, iam_ptr_t ptr); -+ const struct iam_ikey *key, iam_ptr_t ptr); + return (ext3_filetype_table[filetype]); + } +- ++ - int iam_leaf_at_end(const struct iam_leaf *l); - void iam_leaf_next(struct iam_leaf *folio); -+int iam_leaf_can_add(const struct iam_leaf *l, -+ const struct iam_key *k, const struct iam_rec *r); + int ext3_check_dir_entry (const char * function, struct inode * dir, + struct ext3_dir_entry_2 * de, +@@ -165,7 +166,7 @@ + * to make sure. */ + if (filp->f_version != inode->i_version) { + for (i = 0; i < sb->s_blocksize && i < offset; ) { +- de = (struct ext3_dir_entry_2 *) ++ de = (struct ext3_dir_entry_2 *) + (bh->b_data + i); + /* It's too expensive to do a full + * dirent test each time round this +@@ -184,7 +185,7 @@ + filp->f_version = inode->i_version; + } - struct iam_path *iam_leaf_path(const struct iam_leaf *leaf); - struct iam_container *iam_leaf_container(const struct iam_leaf *leaf); -@@ -709,14 +805,79 @@ struct iam_descr *iam_leaf_descr(const s - struct iam_leaf_operations *iam_leaf_ops(const struct iam_leaf *leaf); +- while (!error && filp->f_pos < inode->i_size ++ while (!error && filp->f_pos < inode->i_size + && offset < sb->s_blocksize) { + de = (struct ext3_dir_entry_2 *) (bh->b_data + offset); + if (!ext3_check_dir_entry ("ext3_readdir", inode, de, +@@ -232,7 +233,7 @@ + /* + * These functions convert from the major/minor hash to an f_pos + * value. +- * ++ * + * Currently we only use major hash numer. This is unfortunate, but + * on 32-bit machines, the same VFS interface is used for lseek and + * llseek, so if we use the 64 bit offset, then the 32-bit versions of +@@ -253,7 +254,7 @@ + struct fname { + __u32 hash; + __u32 minor_hash; +- struct rb_node rb_hash; ++ struct rb_node rb_hash; + struct fname *next; + __u32 inode; + __u8 name_len; +@@ -305,12 +306,14 @@ + root->rb_node = NULL; + } ++extern struct iam_private_info *ext3_iam_alloc_info(int flags); ++extern void ext3_iam_release_info(struct iam_private_info *info); -+int iam_node_read(struct iam_container *c, iam_ptr_t ptr, -+ handle_t *h, struct buffer_head **bh); -+ -+/* -+ * Container format. -+ */ - struct iam_format { -+ /* -+ * Method called to recognize container format. Should return true iff -+ * container @c conforms to this format. This method may do IO to read -+ * container pages. -+ * -+ * If container is recognized, this method sets operation vectors -+ * ->id_ops and ->id_leaf_ops in container description (c->ic_descr), -+ * and fills other description fields. -+ */ - int (*if_guess)(struct iam_container *c); -+ /* -+ * Linkage into global list of container formats. -+ */ - struct list_head if_linkage; - }; + struct dir_private_info *create_dir_info(loff_t pos) + { + struct dir_private_info *p; - void iam_format_register(struct iam_format *fmt); +- p = kmalloc(sizeof(struct dir_private_info), GFP_KERNEL); ++ p = (void *)ext3_iam_alloc_info(GFP_KERNEL); + if (!p) + return NULL; + p->root.rb_node = NULL; +@@ -326,6 +329,7 @@ + void ext3_htree_free_dir_info(struct dir_private_info *p) + { + free_rb_tree_fname(&p->root); ++ ext3_iam_release_info((void *)p); + kfree(p); + } - void iam_lfix_format_init(void); -+void iam_lvar_format_init(void); -+void iam_htree_format_init(void); -+ -+struct iam_private_info; -+ -+void ext3_iam_release(struct file *filp, struct inode *inode); -+ -+int iam_uapi_ioctl(struct inode * inode, struct file * filp, unsigned int cmd, -+ unsigned long arg); -+ -+/* __KERNEL__ */ -+#endif -+ -+/* -+ * User level API. Copy exists in lustre/lustre/tests/iam_ut.c -+ */ -+ -+struct iam_uapi_info { -+ __u16 iui_keysize; -+ __u16 iui_recsize; -+ __u16 iui_ptrsize; -+ __u16 iui_height; -+ char iui_fmt_name[DX_FMT_NAME_LEN]; -+}; -+ -+struct iam_uapi_op { -+ void *iul_key; -+ void *iul_rec; -+}; -+ -+struct iam_uapi_it { -+ struct iam_uapi_op iui_op; -+ __u16 iui_state; -+}; -+ -+enum iam_ioctl_cmd { -+ IAM_IOC_INIT = _IOW('i', 1, struct iam_uapi_info), -+ IAM_IOC_GETINFO = _IOR('i', 2, struct iam_uapi_info), -+ IAM_IOC_INSERT = _IOR('i', 3, struct iam_uapi_op), -+ IAM_IOC_LOOKUP = _IOWR('i', 4, struct iam_uapi_op), -+ IAM_IOC_DELETE = _IOR('i', 5, struct iam_uapi_op), -+ IAM_IOC_IT_START = _IOR('i', 6, struct iam_uapi_it), -+ IAM_IOC_IT_NEXT = _IOW('i', 7, struct iam_uapi_it), -+ IAM_IOC_IT_STOP = _IOR('i', 8, struct iam_uapi_it), -+ -+ IAM_IOC_POLYMORPH = _IOR('i', 9, unsigned long) -+}; +@@ -413,7 +417,7 @@ + curr_pos = hash2pos(fname->hash, fname->minor_hash); + while (fname) { + error = filldir(dirent, fname->name, +- fname->name_len, curr_pos, ++ fname->name_len, curr_pos, + fname->inode, + get_dtype(sb, fname->file_type)); + if (error) { +@@ -468,7 +472,7 @@ + /* + * Fill the rbtree if we have no more entries, + * or the inode has changed since we last read in the +- * cached entries. ++ * cached entries. + */ + if ((!info->curr_node) || + (filp->f_version != inode->i_version)) { +Index: iam/fs/ext3/Makefile +=================================================================== +--- iam.orig/fs/ext3/Makefile 2006-09-22 17:18:09.000000000 +0800 ++++ iam/fs/ext3/Makefile 2006-09-22 17:18:09.000000000 +0800 +@@ -6,7 +6,7 @@ - /* __LINUX_LUSTRE_IAM_H__ */ - #endif + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ + ioctl.o namei.o super.o symlink.o hash.o resize.o \ +- extents.o mballoc.o iam.o iam_lfix.o ++ extents.o mballoc.o iam.o iam_lfix.o iam_lvar.o iam_htree.o iam_uapi.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -- 1.8.3.1