From 2be7bd991a12fc02f13523bee0665fe24a796219 Mon Sep 17 00:00:00 2001
From: braam <braam>
Date: Wed, 9 Jul 2003 18:39:52 +0000
Subject: [PATCH] - another merge of b_devel into b_ad

---
 lustre/include/linux/lustre_mgmt.h                 |   34 +
 .../patches/vfs_intent-2.4.18-18-chaos65.patch     | 1712 ++++++++++++++++++++
 .../pc/vfs_intent-2.4.18-18-chaos65.pc             |   12 +
 lustre/ldlm/ldlm_lib.c                             |   16 +-
 lustre/lov/lov_internal.h                          |   12 +
 lustre/mds/mds_lib.c                               |    1 +
 lustre/mgmt/.cvsignore                             |    9 +
 lustre/mgmt/mgmt_cli.c                             |  269 +++
 lustre/obdfilter/filter_internal.h                 |  122 ++
 lustre/obdfilter/filter_io.c                       |  764 +++++++++
 lustre/obdfilter/filter_log.c                      |  379 +++++
 lustre/obdfilter/filter_san.c                      |  130 ++
 12 files changed, 3456 insertions(+), 4 deletions(-)
 create mode 100644 lustre/include/linux/lustre_mgmt.h
 create mode 100644 lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch
 create mode 100644 lustre/kernel_patches/pc/vfs_intent-2.4.18-18-chaos65.pc
 create mode 100644 lustre/lov/lov_internal.h
 create mode 100644 lustre/mgmt/.cvsignore
 create mode 100644 lustre/mgmt/mgmt_cli.c
 create mode 100644 lustre/obdfilter/filter_internal.h
 create mode 100644 lustre/obdfilter/filter_io.c
 create mode 100644 lustre/obdfilter/filter_log.c
 create mode 100644 lustre/obdfilter/filter_san.c
diff --git a/lustre/include/linux/lustre_mgmt.h b/lustre/include/linux/lustre_mgmt.h
new file mode 100644
index 0000000..f3f4a9d
--- /dev/null
+++ b/lustre/include/linux/lustre_mgmt.h
@@ -0,0 +1,34 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001  Cluster File Systems, Inc.
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ */
+
+#ifndef LUSTRE_MGMT_H
+#define LUSTRE_MGMT_H
+
+#define LUSTRE_MGMTCLI_NAME "mgmtcli"
+
+/* For the convenience and type-safety of inter_module_getters. */
+
+struct obd_device;
+struct obd_uuid;
+
+/*
+ * The caller is responsible for ensuring that relevant_uuid -- if non-NULL --
+ * points to valid memory until deregister is called.  If relevant_uuid is NULL,
+ * all management events will be propagated to the registrant.  Notice that
+ * deregister doesn't take a relevant_uuid-matching parameter; I should probably
+ * fix that at some point.
+ */
+typedef int (*mgmtcli_register_for_events_t)(struct obd_device *mgmt_obd,
+                                             struct obd_device *notify_obd,
+                                             struct obd_uuid *relevant_uuid);
+
+typedef int (*mgmtcli_deregister_for_events_t)(struct obd_device *mgmt_obd,
+                                               struct obd_device *notify_obd);
+
+#endif /* LUSTRE_MGMT_H */
diff --git a/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch b/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch
new file mode 100644
index 0000000..71ad1bb
--- /dev/null
+++ b/lustre/kernel_patches/patches/vfs_intent-2.4.18-18-chaos65.patch
@@ -0,0 +1,1712 @@
+ fs/dcache.c            |   19 ++
+ fs/exec.c              |   15 +-
+ fs/namei.c             |  355 +++++++++++++++++++++++++++++++++++++++++--------
+ fs/namespace.c         |   30 +++-
+ fs/open.c              |  122 ++++++++++++++--
+ fs/proc/base.c         |    3 
+ fs/stat.c              |   27 ++-
+ include/linux/dcache.h |   53 +++++++
+ include/linux/fs.h     |   31 ++++
+ kernel/exit.c          |    3 
+ kernel/fork.c          |    3 
+ kernel/ksyms.c         |    1 
+ 12 files changed, 563 insertions(+), 99 deletions(-)
+
+--- linux-2.4.18-p4smp/fs/exec.c~vfs_intent-2.4.18-18-chaos65	2003-07-08 14:41:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/exec.c	2003-07-08 14:45:17.000000000 -0600
+@@ -117,8 +117,9 @@ asmlinkage long sys_uselib(const char * 
+ 	struct file * file;
+ 	struct nameidata nd;
+ 	int error;
++	struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
+ 
+-	error = user_path_walk(library, &nd);
++	error = user_path_walk_it(library, &nd, &it);
+ 	if (error)
+ 		goto out;
+ 
+@@ -130,7 +131,8 @@ asmlinkage long sys_uselib(const char * 
+ 	if (error)
+ 		goto exit;
+ 
+-	file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++	file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++	intent_release(&it);
+ 	error = PTR_ERR(file);
+ 	if (IS_ERR(file))
+ 		goto out;
+@@ -359,8 +361,9 @@ struct file *open_exec(const char *name)
+ 	struct inode *inode;
+ 	struct file *file;
+ 	int err = 0;
++	struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = O_RDONLY };
+ 
+-	err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
++	err = path_lookup_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
+ 	file = ERR_PTR(err);
+ 	if (!err) {
+ 		inode = nd.dentry->d_inode;
+@@ -372,7 +375,8 @@ struct file *open_exec(const char *name)
+ 				err = -EACCES;
+ 			file = ERR_PTR(err);
+ 			if (!err) {
+-				file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++				file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &it);
++                                intent_release(&it);
+ 				if (!IS_ERR(file)) {
+ 					err = deny_write_access(file);
+ 					if (err) {
+@@ -384,6 +388,7 @@ out:
+ 				return file;
+ 			}
+ 		}
++                intent_release(&it);
+ 		path_release(&nd);
+ 	}
+ 	goto out;
+@@ -1104,7 +1109,7 @@ int do_coredump(long signr, struct pt_re
+ 		goto close_fail;
+ 	if (!file->f_op->write)
+ 		goto close_fail;
+-	if (do_truncate(file->f_dentry, 0) != 0)
++	if (do_truncate(file->f_dentry, 0, 0) != 0)
+ 		goto close_fail;
+ 
+ 	retval = binfmt->core_dump(signr, regs, file);
+--- linux-2.4.18-p4smp/fs/dcache.c~vfs_intent-2.4.18-18-chaos65	2003-07-08 14:41:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/dcache.c	2003-07-08 14:45:17.000000000 -0600
+@@ -186,6 +186,13 @@ int d_invalidate(struct dentry * dentry)
+ 		spin_unlock(&dcache_lock);
+ 		return 0;
+ 	}
++
++	/* network invalidation by Lustre */
++	if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++		spin_unlock(&dcache_lock);
++		return 0;
++	}
++
+ 	/*
+ 	 * Check whether to do a partial shrink_dcache
+ 	 * to get rid of unused child entries.
+@@ -859,13 +866,19 @@ void d_delete(struct dentry * dentry)
+  * Adds a dentry to the hash according to its name.
+  */
+  
+-void d_rehash(struct dentry * entry)
++void __d_rehash(struct dentry * entry, int lock)
+ {
+ 	struct list_head *list = d_hash(entry->d_parent, entry->d_name.hash);
+ 	if (!list_empty(&entry->d_hash)) BUG();
+-	spin_lock(&dcache_lock);
++	if (lock) spin_lock(&dcache_lock);
+ 	list_add(&entry->d_hash, list);
+-	spin_unlock(&dcache_lock);
++	if (lock) spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(__d_rehash);
++
++void d_rehash(struct dentry * entry)
++{
++	__d_rehash(entry, 1);
+ }
+ 
+ #define do_switch(x,y) do { \
+--- linux-2.4.18-p4smp/fs/namespace.c~vfs_intent-2.4.18-18-chaos65	2002-06-25 22:16:14.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/namespace.c	2003-07-08 14:45:17.000000000 -0600
+@@ -99,6 +99,7 @@ static void detach_mnt(struct vfsmount *
+ {
+ 	old_nd->dentry = mnt->mnt_mountpoint;
+ 	old_nd->mnt = mnt->mnt_parent;
++        UNPIN(old_nd->dentry, old_nd->mnt, 1);
+ 	mnt->mnt_parent = mnt;
+ 	mnt->mnt_mountpoint = mnt->mnt_root;
+ 	list_del_init(&mnt->mnt_child);
+@@ -110,6 +111,7 @@ static void attach_mnt(struct vfsmount *
+ {
+ 	mnt->mnt_parent = mntget(nd->mnt);
+ 	mnt->mnt_mountpoint = dget(nd->dentry);
++        PIN(nd->dentry, nd->mnt, 1);
+ 	list_add(&mnt->mnt_hash, mount_hashtable+hash(nd->mnt, nd->dentry));
+ 	list_add(&mnt->mnt_child, &nd->mnt->mnt_mounts);
+ 	nd->dentry->d_mounted++;
+@@ -485,14 +487,17 @@ static int do_loopback(struct nameidata 
+ {
+ 	struct nameidata old_nd;
+ 	struct vfsmount *mnt = NULL;
++        struct lookup_intent it = { .it_op = IT_GETATTR };
+ 	int err = mount_is_safe(nd);
+ 	if (err)
+ 		return err;
+ 	if (!old_name || !*old_name)
+ 		return -EINVAL;
+-	err = path_lookup(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd);
+-	if (err)
++	err = path_lookup_it(old_name, LOOKUP_POSITIVE|LOOKUP_FOLLOW, &old_nd, &it);
++	if (err) { 
++                intent_release(&it);
+ 		return err;
++        }
+ 
+ 	down_write(&current->namespace->sem);
+ 	err = -EINVAL;
+@@ -515,6 +520,7 @@ static int do_loopback(struct nameidata 
+ 	}
+ 
+ 	up_write(&current->namespace->sem);
++        intent_release(&it);
+ 	path_release(&old_nd);
+ 	return err;
+ }
+@@ -698,7 +704,8 @@ long do_mount(char * dev_name, char * di
+ 		  unsigned long flags, void *data_page)
+ {
+ 	struct nameidata nd;
+-	int retval = 0;
++        struct lookup_intent it = { .it_op = IT_GETATTR };
++        int retval = 0;
+ 	int mnt_flags = 0;
+ 
+ 	/* Discard magic */
+@@ -722,10 +729,11 @@ long do_mount(char * dev_name, char * di
+ 	flags &= ~(MS_NOSUID|MS_NOEXEC|MS_NODEV);
+ 
+ 	/* ... and get the mountpoint */
+-	retval = path_lookup(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
+-	if (retval)
++	retval = path_lookup_it(dir_name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd, &it);
++	if (retval) { 
++                intent_release(&it);
+ 		return retval;
+-
++        }
+ 	if (flags & MS_REMOUNT)
+ 		retval = do_remount(&nd, flags & ~MS_REMOUNT, mnt_flags,
+ 				    data_page);
+@@ -736,6 +744,8 @@ long do_mount(char * dev_name, char * di
+ 	else
+ 		retval = do_add_mount(&nd, type_page, flags, mnt_flags,
+ 				      dev_name, data_page);
++
++        intent_release(&it);
+ 	path_release(&nd);
+ 	return retval;
+ }
+@@ -901,6 +911,8 @@ asmlinkage long sys_pivot_root(const cha
+ {
+ 	struct vfsmount *tmp;
+ 	struct nameidata new_nd, old_nd, parent_nd, root_parent, user_nd;
++        struct lookup_intent new_it = { .it_op = IT_GETATTR };
++        struct lookup_intent old_it = { .it_op = IT_GETATTR };
+ 	int error;
+ 
+ 	if (!capable(CAP_SYS_ADMIN))
+@@ -908,14 +920,14 @@ asmlinkage long sys_pivot_root(const cha
+ 
+ 	lock_kernel();
+ 
+-	error = __user_walk(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd);
++	error = __user_walk_it(new_root, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &new_nd, &new_it);
+ 	if (error)
+ 		goto out0;
+ 	error = -EINVAL;
+ 	if (!check_mnt(new_nd.mnt))
+ 		goto out1;
+ 
+-	error = __user_walk(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd);
++	error = __user_walk_it(put_old, LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &old_nd, &old_it);
+ 	if (error)
+ 		goto out1;
+ 
+@@ -970,8 +982,10 @@ out2:
+ 	up(&old_nd.dentry->d_inode->i_zombie);
+ 	up_write(&current->namespace->sem);
+ 	path_release(&user_nd);
++        intent_release(&old_it);
+ 	path_release(&old_nd);
+ out1:
++        intent_release(&new_it);
+ 	path_release(&new_nd);
+ out0:
+ 	unlock_kernel();
+--- linux-2.4.18-p4smp/fs/namei.c~vfs_intent-2.4.18-18-chaos65	2003-07-08 14:41:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/namei.c	2003-07-08 14:45:17.000000000 -0600
+@@ -94,6 +94,13 @@
+  * XEmacs seems to be relying on it...
+  */
+ 
++void intent_release(struct lookup_intent *it)
++{
++	if (it && it->it_op_release)
++		it->it_op_release(it);
++
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+  * checking and hopefully speeding things up, we copy filenames to the
+  * kernel data space before using them..
+@@ -260,10 +267,19 @@ void path_release(struct nameidata *nd)
+  * Internal lookup() using the new generic dcache.
+  * SMP-safe
+  */
+-static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *cached_lookup(struct dentry *parent, struct qstr *name,
++				    int flags, struct lookup_intent *it)
+ {
+ 	struct dentry * dentry = d_lookup(parent, name);
+ 
++	if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++		if (!dentry->d_op->d_revalidate_it(dentry, flags, it) &&
++		    !d_invalidate(dentry)) {
++			dput(dentry);
++			dentry = NULL;
++		}
++		return dentry;
++	} else
+ 	if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
+ 		if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
+ 			dput(dentry);
+@@ -281,11 +297,14 @@ static struct dentry * cached_lookup(str
+  * make sure that nobody added the entry to the dcache in the meantime..
+  * SMP-safe
+  */
+-static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
++static struct dentry *real_lookup(struct dentry *parent, struct qstr *name,
++				  int flags, struct lookup_intent *it)
+ {
+ 	struct dentry * result;
+ 	struct inode *dir = parent->d_inode;
+ 
++again:
++
+ 	down(&dir->i_sem);
+ 	/*
+ 	 * First re-do the cached lookup just in case it was created
+@@ -300,6 +319,9 @@ static struct dentry * real_lookup(struc
+ 		result = ERR_PTR(-ENOMEM);
+ 		if (dentry) {
+ 			lock_kernel();
++			if (dir->i_op->lookup_it)
++				result = dir->i_op->lookup_it(dir, dentry, it, flags);
++			else
+ 			result = dir->i_op->lookup(dir, dentry);
+ 			unlock_kernel();
+ 			if (result)
+@@ -321,6 +343,12 @@ static struct dentry * real_lookup(struc
+ 			dput(result);
+ 			result = ERR_PTR(-ENOENT);
+ 		}
++	} else if (result->d_op && result->d_op->d_revalidate_it) {
++		if (!result->d_op->d_revalidate_it(result, flags, it) &&
++		    !d_invalidate(result)) {
++			dput(result);
++			goto again;
++		}
+ 	}
+ 	return result;
+ }
+@@ -334,7 +362,8 @@ int max_recursive_link = 5;
+  * Without that kind of total limit, nasty chains of consecutive
+  * symlinks can cause almost arbitrarily long lookups. 
+  */
+-static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
++static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd,
++				 struct lookup_intent *it)
+ {
+ 	int err;
+ 	if (current->link_count >= max_recursive_link)
+@@ -348,10 +377,21 @@ static inline int do_follow_link(struct 
+ 	current->link_count++;
+ 	current->total_link_count++;
+ 	UPDATE_ATIME(dentry->d_inode);
+-	err = dentry->d_inode->i_op->follow_link(dentry, nd);
++	nd->it = it;
++	if (dentry->d_inode->i_op->follow_link2)
++		err = dentry->d_inode->i_op->follow_link2(dentry, nd, it);
++	else
++		err = dentry->d_inode->i_op->follow_link(dentry, nd);
++        if (!err && it != NULL && !(it->it_int_flags & IT_FL_FOLLOWED)) {
++                /* vfs_follow_link was never called */
++		intent_release(it);
++                path_release(nd);
++                err = -ENOLINK;
++        }
+ 	current->link_count--;
+ 	return err;
+ loop:
++	intent_release(it);
+ 	path_release(nd);
+ 	return -ELOOP;
+ }
+@@ -381,15 +421,26 @@ int follow_up(struct vfsmount **mnt, str
+ 	return __follow_up(mnt, dentry);
+ }
+ 
+-static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
++static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry,
++				struct lookup_intent *it)
+ {
+ 	struct vfsmount *mounted;
+ 
+ 	spin_lock(&dcache_lock);
+ 	mounted = lookup_mnt(*mnt, *dentry);
+ 	if (mounted) {
++		int opc = 0, mode = 0;
+ 		*mnt = mntget(mounted);
+ 		spin_unlock(&dcache_lock);
++		if (it) {
++			opc = it->it_op;
++			mode = it->it_mode;
++		}
++		intent_release(it);
++		if (it) {
++			it->it_op = opc;
++			it->it_mode = mode;
++		}
+ 		dput(*dentry);
+ 		mntput(mounted->mnt_parent);
+ 		*dentry = dget(mounted->mnt_root);
+@@ -401,7 +452,7 @@ static inline int __follow_down(struct v
+ 
+ int follow_down(struct vfsmount **mnt, struct dentry **dentry)
+ {
+-	return __follow_down(mnt,dentry);
++	return __follow_down(mnt,dentry,NULL);
+ }
+  
+ static inline void follow_dotdot(struct nameidata *nd)
+@@ -437,7 +488,7 @@ static inline void follow_dotdot(struct 
+ 		mntput(nd->mnt);
+ 		nd->mnt = parent;
+ 	}
+-	while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry))
++	while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry, NULL))
+ 		;
+ }
+ 
+@@ -449,7 +500,8 @@ static inline void follow_dotdot(struct 
+  *
+  * We expect 'base' to be positive and a directory.
+  */
+-int link_path_walk(const char * name, struct nameidata *nd)
++int link_path_walk_it(const char *name, struct nameidata *nd,
++		      struct lookup_intent *it)
+ {
+ 	struct dentry *dentry;
+ 	struct inode *inode;
+@@ -526,18 +578,18 @@ int link_path_walk(const char * name, st
+ 				break;
+ 		}
+ 		/* This does the actual lookups.. */
+-		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++		dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+ 		if (!dentry) {
+ 			err = -EWOULDBLOCKIO;
+ 			if (atomic)
+ 				break;
+-			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
++			dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE, NULL);
+ 			err = PTR_ERR(dentry);
+ 			if (IS_ERR(dentry))
+ 				break;
+ 		}
+ 		/* Check mountpoints.. */
+-		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
++		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, NULL))
+ 			;
+ 
+ 		err = -ENOENT;
+@@ -548,8 +600,8 @@ int link_path_walk(const char * name, st
+ 		if (!inode->i_op)
+ 			goto out_dput;
+ 
+-		if (inode->i_op->follow_link) {
+-			err = do_follow_link(dentry, nd);
++		if (inode->i_op->follow_link || inode->i_op->follow_link2) {
++			err = do_follow_link(dentry, nd, NULL);
+ 			dput(dentry);
+ 			if (err)
+ 				goto return_err;
+@@ -565,7 +617,7 @@ int link_path_walk(const char * name, st
+ 			nd->dentry = dentry;
+ 		}
+ 		err = -ENOTDIR; 
+-		if (!inode->i_op->lookup)
++		if (!inode->i_op->lookup && !inode->i_op->lookup_it)
+ 			break;
+ 		continue;
+ 		/* here ends the main loop */
+@@ -592,22 +644,23 @@ last_component:
+ 			if (err < 0)
+ 				break;
+ 		}
+-		dentry = cached_lookup(nd->dentry, &this, 0);
++		dentry = cached_lookup(nd->dentry, &this, 0, it);
+ 		if (!dentry) {
+ 			err = -EWOULDBLOCKIO;
+ 			if (atomic)
+ 				break;
+-			dentry = real_lookup(nd->dentry, &this, 0);
++			dentry = real_lookup(nd->dentry, &this, 0, it);
+ 			err = PTR_ERR(dentry);
+ 			if (IS_ERR(dentry))
+ 				break;
+ 		}
+-		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
++		while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry, it))
+ 			;
+ 		inode = dentry->d_inode;
+ 		if ((lookup_flags & LOOKUP_FOLLOW)
+-		    && inode && inode->i_op && inode->i_op->follow_link) {
+-			err = do_follow_link(dentry, nd);
++		    && inode && inode->i_op &&
++		    (inode->i_op->follow_link || inode->i_op->follow_link2)) {
++			err = do_follow_link(dentry, nd, it);
+ 			dput(dentry);
+ 			if (err)
+ 				goto return_err;
+@@ -621,7 +674,8 @@ last_component:
+ 			goto no_inode;
+ 		if (lookup_flags & LOOKUP_DIRECTORY) {
+ 			err = -ENOTDIR; 
+-			if (!inode->i_op || !inode->i_op->lookup)
++			if (!inode->i_op ||
++			    (!inode->i_op->lookup && !inode->i_op->lookup_it))
+ 				break;
+ 		}
+ 		goto return_base;
+@@ -645,7 +699,24 @@ return_reval:
+ 		 * Check the cached dentry for staleness.
+ 		 */
+ 		dentry = nd->dentry;
+-		if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
++        revalidate_again:
++		if (dentry && dentry->d_op && dentry->d_op->d_revalidate_it) {
++			err = -ESTALE;
++			if (!dentry->d_op->d_revalidate_it(dentry, 0, it)) {
++                                struct dentry *new;
++                                err = permission(dentry->d_parent->d_inode,
++                                                 MAY_EXEC);
++                                if (err)
++                                        break;
++                                new = real_lookup(dentry->d_parent,
++                                                  &dentry->d_name, 0, NULL);
++				d_invalidate(dentry);
++                                dput(dentry);
++                                dentry = new;
++                                goto revalidate_again;
++			}
++		} 
++                else if (dentry && dentry->d_op && dentry->d_op->d_revalidate){
+ 			err = -ESTALE;
+ 			if (!dentry->d_op->d_revalidate(dentry, 0)) {
+ 				d_invalidate(dentry);
+@@ -658,15 +729,28 @@ out_dput:
+ 		dput(dentry);
+ 		break;
+ 	}
++	if (err)
++		intent_release(it);
+ 	path_release(nd);
+ return_err:
+ 	return err;
+ }
+ 
++int link_path_walk(const char * name, struct nameidata *nd)
++{
++	return link_path_walk_it(name, nd, NULL);
++}
++
++int path_walk_it(const char * name, struct nameidata *nd, struct lookup_intent *it)
++{
++	current->total_link_count = 0;
++	return link_path_walk_it(name, nd, it);
++}
++
+ int path_walk(const char * name, struct nameidata *nd)
+ {
+ 	current->total_link_count = 0;
+-	return link_path_walk(name, nd);
++	return link_path_walk_it(name, nd, NULL);
+ }
+ 
+ /* SMP-safe */
+@@ -751,6 +835,17 @@ walk_init_root(const char *name, struct 
+ }
+ 
+ /* SMP-safe */
++int path_lookup_it(const char *path, unsigned flags, struct nameidata *nd,
++		   struct lookup_intent *it)
++{
++	int error = 0;
++	if (path_init(path, flags, nd))
++		error = path_walk_it(path, nd, it);
++	return error;
++}
++
++
++/* SMP-safe */
+ int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
+ {
+ 	int error = 0;
+@@ -765,6 +860,7 @@ int path_init(const char *name, unsigned
+ {
+ 	nd->last_type = LAST_ROOT; /* if there are only slashes... */
+ 	nd->flags = flags;
++	nd->it = NULL;
+ 	if (*name=='/')
+ 		return walk_init_root(name,nd);
+ 	read_lock(&current->fs->lock);
+@@ -779,7 +875,8 @@ int path_init(const char *name, unsigned
+  * needs parent already locked. Doesn't follow mounts.
+  * SMP-safe.
+  */
+-struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++struct dentry * lookup_hash_it(struct qstr *name, struct dentry * base,
++			       struct lookup_intent *it)
+ {
+ 	struct dentry * dentry;
+ 	struct inode *inode;
+@@ -802,13 +899,16 @@ struct dentry * lookup_hash(struct qstr 
+ 			goto out;
+ 	}
+ 
+-	dentry = cached_lookup(base, name, 0);
++	dentry = cached_lookup(base, name, 0, it);
+ 	if (!dentry) {
+ 		struct dentry *new = d_alloc(base, name);
+ 		dentry = ERR_PTR(-ENOMEM);
+ 		if (!new)
+ 			goto out;
+ 		lock_kernel();
++		if (inode->i_op->lookup_it)
++			dentry = inode->i_op->lookup_it(inode, new, it, 0);
++		else
+ 		dentry = inode->i_op->lookup(inode, new);
+ 		unlock_kernel();
+ 		if (!dentry)
+@@ -820,6 +920,12 @@ out:
+ 	return dentry;
+ }
+ 
++struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
++{
++	return lookup_hash_it(name, base, NULL);
++}
++
++
+ /* SMP-safe */
+ struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
+ {
+@@ -841,7 +947,7 @@ struct dentry * lookup_one_len(const cha
+ 	}
+ 	this.hash = end_name_hash(hash);
+ 
+-	return lookup_hash(&this, base);
++	return lookup_hash_it(&this, base, NULL);
+ access:
+ 	return ERR_PTR(-EACCES);
+ }
+@@ -872,6 +978,23 @@ int __user_walk(const char *name, unsign
+ 	return err;
+ }
+ 
++int __user_walk_it(const char *name, unsigned flags, struct nameidata *nd,
++		   struct lookup_intent *it)
++{
++	char *tmp;
++	int err;
++
++	tmp = getname(name);
++	err = PTR_ERR(tmp);
++	if (!IS_ERR(tmp)) {
++		err = 0;
++		if (path_init(tmp, flags, nd))
++			err = path_walk_it(tmp, nd, it);
++		putname(tmp);
++	}
++	return err;
++}
++
+ /*
+  * It's inline, so penalty for filesystems that don't use sticky bit is
+  * minimal.
+@@ -969,6 +1092,37 @@ static inline int lookup_flags(unsigned 
+ 	return retval;
+ }
+ 
++static int vfs_create_it(struct inode *dir, struct dentry *dentry, int mode, 
++                  struct lookup_intent *it)
++{
++	int error;
++
++	mode &= S_IALLUGO;
++	mode |= S_IFREG;
++
++	down(&dir->i_zombie);
++	error = may_create(dir, dentry);
++	if (error)
++		goto exit_lock;
++
++	error = -EACCES;	/* shouldn't it be ENOSYS? */
++	if (!dir->i_op || (!dir->i_op->create && !dir->i_op->create_it))
++		goto exit_lock;
++
++	DQUOT_INIT(dir);
++	lock_kernel();
++        if (dir->i_op->create_it)
++                error = dir->i_op->create_it(dir, dentry, mode, it);
++        else
++                error = dir->i_op->create(dir, dentry, mode);
++	unlock_kernel();
++exit_lock:
++	up(&dir->i_zombie);
++	if (!error)
++		inode_dir_notify(dir, DN_CREATE);
++	return error;
++}
++
+ int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
+ {
+ 	int error;
+@@ -987,7 +1141,7 @@ int vfs_create(struct inode *dir, struct
+ 
+ 	DQUOT_INIT(dir);
+ 	lock_kernel();
+-	error = dir->i_op->create(dir, dentry, mode);
++        error = dir->i_op->create(dir, dentry, mode);
+ 	unlock_kernel();
+ exit_lock:
+ 	up(&dir->i_zombie);
+@@ -1045,14 +1199,17 @@ int may_open(struct nameidata *nd, int a
+         return get_lease(inode, flag);
+ }
+ 
+-struct file *filp_open(const char * pathname, int open_flags, int mode)
++extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++				   int flags, struct lookup_intent *it);
++
++struct file *filp_open(const char * pathname, int open_flags, int mode) 
+ {
+ 	int acc_mode, error = 0;
+-	struct inode *inode;
+ 	struct dentry *dentry;
+ 	struct dentry *dir;
+ 	int flag = open_flags;
+ 	struct nameidata nd;
++	struct lookup_intent it = { .it_op = IT_OPEN, .it_flags = open_flags };
+ 	int count = 0;
+ 	
+ 	if (!capable(CAP_SYS_ADMIN))
+@@ -1069,7 +1226,7 @@ struct file *filp_open(const char * path
+ 	 * The simplest case - just a plain lookup.
+ 	 */
+ 	if (!(flag & O_CREAT)) {
+-		error = path_lookup(pathname, lookup_flags(flag), &nd);
++		error = path_lookup_it(pathname, lookup_flags(flag), &nd, &it);
+ 		if (error)
+ 			return ERR_PTR(error);
+ 		dentry = nd.dentry;
+@@ -1079,6 +1236,8 @@ struct file *filp_open(const char * path
+ 	/*
+ 	 * Create - we need to know the parent.
+ 	 */
++	it.it_mode = mode;
++	it.it_op |= IT_CREAT;
+ 	error = path_lookup(pathname, LOOKUP_PARENT, &nd);
+ 	if (error)
+ 		return ERR_PTR(error);
+@@ -1094,7 +1253,7 @@ struct file *filp_open(const char * path
+ 
+ 	dir = nd.dentry;
+ 	down(&dir->d_inode->i_sem);
+-	dentry = lookup_hash(&nd.last, nd.dentry);
++	dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
+ 
+ do_last:
+ 	error = PTR_ERR(dentry);
+@@ -1103,10 +1262,11 @@ do_last:
+ 		goto exit;
+ 	}
+ 
++	it.it_mode = mode;
+ 	/* Negative dentry, just create the file */
+ 	if (!dentry->d_inode) {
+-		error = vfs_create(dir->d_inode, dentry,
+-				   mode & ~current->fs->umask);
++		error = vfs_create_it(dir->d_inode, dentry,
++				   mode & ~current->fs->umask, &it);
+ 		up(&dir->d_inode->i_sem);
+ 		dput(nd.dentry);
+ 		nd.dentry = dentry;
+@@ -1132,12 +1292,12 @@ do_last:
+ 		error = -ELOOP;
+ 		if (flag & O_NOFOLLOW)
+ 			goto exit_dput;
+-		while (__follow_down(&nd.mnt,&dentry) && d_mountpoint(dentry));
++		while (__follow_down(&nd.mnt,&dentry, &it) && d_mountpoint(dentry));
+ 	}
+ 	error = -ENOENT;
+ 	if (!dentry->d_inode)
+ 		goto exit_dput;
+-	if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
++	if (dentry->d_inode->i_op && (dentry->d_inode->i_op->follow_link))
+ 		goto do_link;
+ 
+ 	dput(nd.dentry);
+@@ -1152,11 +1312,13 @@ ok:
+ 	if (!S_ISREG(nd.dentry->d_inode->i_mode))
+ 		open_flags &= ~O_TRUNC;
+ 
+-        return dentry_open(nd.dentry, nd.mnt, open_flags);
++	return dentry_open_it(nd.dentry, nd.mnt, open_flags, &it);
+ 
+ exit_dput:
++	intent_release(&it);
+ 	dput(dentry);
+ exit:
++	intent_release(&it);
+ 	path_release(&nd);
+ 	return ERR_PTR(error);
+ 
+@@ -1175,10 +1337,19 @@ do_link:
+ 	 * are done. Procfs-like symlinks just set LAST_BIND.
+ 	 */
+ 	UPDATE_ATIME(dentry->d_inode);
+-	error = dentry->d_inode->i_op->follow_link(dentry, &nd);
++	nd.it = &it;
++        error = dentry->d_inode->i_op->follow_link(dentry, &nd);
++	if (error) {
++		intent_release(&it);
++	} else if (!(it.it_int_flags & IT_FL_FOLLOWED)) {
++		/* vfs_follow_link was never called */
++		intent_release(&it);
++		path_release(&nd);
++		error = -ENOLINK;
++	}
+ 	dput(dentry);
+ 	if (error)
+-		return error;
++		return ERR_PTR(error);
+ 	if (nd.last_type == LAST_BIND) {
+ 		dentry = nd.dentry;
+ 		goto ok;
+@@ -1197,13 +1368,15 @@ do_link:
+ 	}
+ 	dir = nd.dentry;
+ 	down(&dir->d_inode->i_sem);
+-	dentry = lookup_hash(&nd.last, nd.dentry);
++	dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
+ 	putname(nd.last.name);
+ 	goto do_last;
+ }
+ 
++
+ /* SMP-safe */
+-static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
++static struct dentry *lookup_create(struct nameidata *nd, int is_dir,
++				    struct lookup_intent *it)
+ {
+ 	struct dentry *dentry;
+ 
+@@ -1211,7 +1384,7 @@ static struct dentry *lookup_create(stru
+ 	dentry = ERR_PTR(-EEXIST);
+ 	if (nd->last_type != LAST_NORM)
+ 		goto fail;
+-	dentry = lookup_hash(&nd->last, nd->dentry);
++	dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ 	if (IS_ERR(dentry))
+ 		goto fail;
+ 	if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
+@@ -1267,7 +1440,16 @@ asmlinkage long sys_mknod(const char * f
+ 	error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ 	if (error)
+ 		goto out;
+-	dentry = lookup_create(&nd, 0);
++
++	if (nd.dentry->d_inode->i_op->mknod_raw) {
++		struct inode_operations *op = nd.dentry->d_inode->i_op;
++		error = op->mknod_raw(&nd, mode, dev);
++		/* the file system wants to use normal vfs path now */
++		if (error != -EOPNOTSUPP)
++			goto out2;
++	}
++
++	dentry = lookup_create(&nd, 0, NULL);
+ 	error = PTR_ERR(dentry);
+ 
+ 	mode &= ~current->fs->umask;
+@@ -1288,6 +1470,7 @@ asmlinkage long sys_mknod(const char * f
+ 		dput(dentry);
+ 	}
+ 	up(&nd.dentry->d_inode->i_sem);
++out2:
+ 	path_release(&nd);
+ out:
+ 	putname(tmp);
+@@ -1335,7 +1518,14 @@ asmlinkage long sys_mkdir(const char * p
+ 		error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ 		if (error)
+ 			goto out;
+-		dentry = lookup_create(&nd, 1);
++ 		if (nd.dentry->d_inode->i_op->mkdir_raw) {
++ 			struct inode_operations *op = nd.dentry->d_inode->i_op;
++ 			error = op->mkdir_raw(&nd, mode);
++ 			/* the file system wants to use normal vfs path now */
++ 			if (error != -EOPNOTSUPP)
++ 				goto out2;
++ 		}
++		dentry = lookup_create(&nd, 1, NULL);
+ 		error = PTR_ERR(dentry);
+ 		if (!IS_ERR(dentry)) {
+ 			error = vfs_mkdir(nd.dentry->d_inode, dentry,
+@@ -1343,6 +1533,7 @@ asmlinkage long sys_mkdir(const char * p
+ 			dput(dentry);
+ 		}
+ 		up(&nd.dentry->d_inode->i_sem);
++out2:
+ 		path_release(&nd);
+ out:
+ 		putname(tmp);
+@@ -1443,8 +1634,16 @@ asmlinkage long sys_rmdir(const char * p
+ 			error = -EBUSY;
+ 			goto exit1;
+ 	}
++ 	if (nd.dentry->d_inode->i_op->rmdir_raw) {
++ 		struct inode_operations *op = nd.dentry->d_inode->i_op;
++ 
++ 		error = op->rmdir_raw(&nd);
++ 		/* the file system wants to use normal vfs path now */
++ 		if (error != -EOPNOTSUPP)
++ 			goto exit1;
++ 	}
+ 	down(&nd.dentry->d_inode->i_sem);
+-	dentry = lookup_hash(&nd.last, nd.dentry);
++	dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ 	error = PTR_ERR(dentry);
+ 	if (!IS_ERR(dentry)) {
+ 		error = vfs_rmdir(nd.dentry->d_inode, dentry);
+@@ -1502,8 +1701,15 @@ asmlinkage long sys_unlink(const char * 
+ 	error = -EISDIR;
+ 	if (nd.last_type != LAST_NORM)
+ 		goto exit1;
++ 	if (nd.dentry->d_inode->i_op->unlink_raw) {
++ 		struct inode_operations *op = nd.dentry->d_inode->i_op;
++ 		error = op->unlink_raw(&nd);
++ 		/* the file system wants to use normal vfs path now */
++ 		if (error != -EOPNOTSUPP)
++ 			goto exit1;
++ 	}
+ 	down(&nd.dentry->d_inode->i_sem);
+-	dentry = lookup_hash(&nd.last, nd.dentry);
++	dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ 	error = PTR_ERR(dentry);
+ 	if (!IS_ERR(dentry)) {
+ 		/* Why not before? Because we want correct error value */
+@@ -1570,15 +1776,23 @@ asmlinkage long sys_symlink(const char *
+ 		error = path_lookup(to, LOOKUP_PARENT, &nd);
+ 		if (error)
+ 			goto out;
+-		dentry = lookup_create(&nd, 0);
++		if (nd.dentry->d_inode->i_op->symlink_raw) {
++			struct inode_operations *op = nd.dentry->d_inode->i_op;
++			error = op->symlink_raw(&nd, from);
++			/* the file system wants to use normal vfs path now */
++			if (error != -EOPNOTSUPP)
++				goto out2;
++		}
++		dentry = lookup_create(&nd, 0, NULL);
+ 		error = PTR_ERR(dentry);
+ 		if (!IS_ERR(dentry)) {
+ 			error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+ 			dput(dentry);
+ 		}
+ 		up(&nd.dentry->d_inode->i_sem);
++	out2:
+ 		path_release(&nd);
+-out:
++	out:
+ 		putname(to);
+ 	}
+ 	putname(from);
+@@ -1654,7 +1868,14 @@ asmlinkage long sys_link(const char * ol
+ 		error = -EXDEV;
+ 		if (old_nd.mnt != nd.mnt)
+ 			goto out_release;
+-		new_dentry = lookup_create(&nd, 0);
++                if (nd.dentry->d_inode->i_op->link_raw) {
++                        struct inode_operations *op = nd.dentry->d_inode->i_op;
++                        error = op->link_raw(&old_nd, &nd);
++                        /* the file system wants to use normal vfs path now */
++                        if (error != -EOPNOTSUPP)
++                                goto out_release;
++                }
++		new_dentry = lookup_create(&nd, 0, NULL);
+ 		error = PTR_ERR(new_dentry);
+ 		if (!IS_ERR(new_dentry)) {
+ 			error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+@@ -1698,7 +1919,7 @@ exit:
+  *	   locking].
+  */
+ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
+-	       struct inode *new_dir, struct dentry *new_dentry)
++		   struct inode *new_dir, struct dentry *new_dentry)
+ {
+ 	int error;
+ 	struct inode *target;
+@@ -1777,7 +1998,7 @@ out_unlock:
+ }
+ 
+ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
+-	       struct inode *new_dir, struct dentry *new_dentry)
++		     struct inode *new_dir, struct dentry *new_dentry)
+ {
+ 	int error;
+ 
+@@ -1865,9 +2086,18 @@ static inline int do_rename(const char *
+ 	if (newnd.last_type != LAST_NORM)
+ 		goto exit2;
+ 
++ 	if (old_dir->d_inode->i_op->rename_raw) {
++                lock_kernel();
++ 		error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++                unlock_kernel();
++ 		/* the file system wants to use normal vfs path now */
++ 		if (error != -EOPNOTSUPP)
++ 			goto exit2;
++ 	}
++
+ 	double_lock(new_dir, old_dir);
+ 
+-	old_dentry = lookup_hash(&oldnd.last, old_dir);
++	old_dentry = lookup_hash_it(&oldnd.last, old_dir, NULL);
+ 	error = PTR_ERR(old_dentry);
+ 	if (IS_ERR(old_dentry))
+ 		goto exit3;
+@@ -1883,16 +2113,16 @@ static inline int do_rename(const char *
+ 		if (newnd.last.name[newnd.last.len])
+ 			goto exit4;
+ 	}
+-	new_dentry = lookup_hash(&newnd.last, new_dir);
++	new_dentry = lookup_hash_it(&newnd.last, new_dir, NULL);
+ 	error = PTR_ERR(new_dentry);
+ 	if (IS_ERR(new_dentry))
+ 		goto exit4;
+ 
++
+ 	lock_kernel();
+ 	error = vfs_rename(old_dir->d_inode, old_dentry,
+ 				   new_dir->d_inode, new_dentry);
+ 	unlock_kernel();
+-
+ 	dput(new_dentry);
+ exit4:
+ 	dput(old_dentry);
+@@ -1943,12 +2173,19 @@ out:
+ }
+ 
+ static inline int
+-__vfs_follow_link(struct nameidata *nd, const char *link)
++__vfs_follow_link(struct nameidata *nd, const char *link,
++		  struct lookup_intent *it)
+ {
+ 	int res = 0;
+ 	char *name;
+ 	if (IS_ERR(link))
+ 		goto fail;
++	if (it == NULL)
++		it = nd->it;
++	else if (it != nd->it)
++		printk("it != nd->it: tell phil@clusterfs.com\n");
++	if (it != NULL)
++		it->it_int_flags |= IT_FL_FOLLOWED;
+ 
+ 	if (*link == '/') {
+ 		path_release(nd);
+@@ -1956,7 +2193,7 @@ __vfs_follow_link(struct nameidata *nd, 
+ 			/* weird __emul_prefix() stuff did it */
+ 			goto out;
+ 	}
+-	res = link_path_walk(link, nd);
++	res = link_path_walk_it(link, nd, it);
+ out:
+ 	if (current->link_count || res || nd->last_type!=LAST_NORM)
+ 		return res;
+@@ -1978,7 +2215,13 @@ fail:
+ 
+ int vfs_follow_link(struct nameidata *nd, const char *link)
+ {
+-	return __vfs_follow_link(nd, link);
++	return __vfs_follow_link(nd, link, NULL);
++}
++
++int vfs_follow_link_it(struct nameidata *nd, const char *link,
++		       struct lookup_intent *it)
++{
++	return __vfs_follow_link(nd, link, it);
+ }
+ 
+ /* get the link contents into pagecache */
+@@ -2020,7 +2263,7 @@ int page_follow_link(struct dentry *dent
+ {
+ 	struct page *page = NULL;
+ 	char *s = page_getlink(dentry, &page);
+-	int res = __vfs_follow_link(nd, s);
++	int res = __vfs_follow_link(nd, s, NULL);
+ 	if (page) {
+ 		kunmap(page);
+ 		page_cache_release(page);
+--- linux-2.4.18-p4smp/fs/open.c~vfs_intent-2.4.18-18-chaos65	2003-07-08 14:41:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/open.c	2003-07-08 14:45:17.000000000 -0600
+@@ -19,6 +19,8 @@
+ #include <asm/uaccess.h>
+ 
+ #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m))
++extern int path_walk_it(const char *name, struct nameidata *nd,
++			struct lookup_intent *it);
+ 
+ int vfs_statfs(struct super_block *sb, struct statfs *buf)
+ {
+@@ -95,9 +97,10 @@ void fd_install(unsigned int fd, struct 
+ 	write_unlock(&files->file_lock);
+ }
+ 
+-int do_truncate(struct dentry *dentry, loff_t length)
++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open)
+ {
+ 	struct inode *inode = dentry->d_inode;
++	struct inode_operations *op = dentry->d_inode->i_op;
+ 	int error;
+ 	struct iattr newattrs;
+ 
+@@ -108,7 +111,14 @@ int do_truncate(struct dentry *dentry, l
+ 	down(&inode->i_sem);
+ 	newattrs.ia_size = length;
+ 	newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+-	error = notify_change(dentry, &newattrs);
++	if (called_from_open)
++		newattrs.ia_valid |= ATTR_FROM_OPEN;
++	if (op->setattr_raw) {
++		newattrs.ia_valid |= ATTR_RAW;
++		newattrs.ia_ctime = CURRENT_TIME;
++		error = op->setattr_raw(inode, &newattrs);
++	} else 
++		error = notify_change(dentry, &newattrs);
+ 	up(&inode->i_sem);
+ 	return error;
+ }
+@@ -118,12 +128,13 @@ static inline long do_sys_truncate(const
+ 	struct nameidata nd;
+ 	struct inode * inode;
+ 	int error;
++	struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+ 	error = -EINVAL;
+ 	if (length < 0)	/* sorry, but loff_t says... */
+ 		goto out;
+ 
+-	error = user_path_walk(path, &nd);
++	error = user_path_walk_it(path, &nd, &it);
+ 	if (error)
+ 		goto out;
+ 	inode = nd.dentry->d_inode;
+@@ -163,11 +174,13 @@ static inline long do_sys_truncate(const
+ 	error = locks_verify_truncate(inode, NULL, length);
+ 	if (!error) {
+ 		DQUOT_INIT(inode);
+-		error = do_truncate(nd.dentry, length);
++		intent_release(&it);
++		error = do_truncate(nd.dentry, length, 0);
+ 	}
+ 	put_write_access(inode);
+ 
+ dput_and_out:
++	intent_release(&it);
+ 	path_release(&nd);
+ out:
+ 	return error;
+@@ -215,7 +228,7 @@ static inline long do_sys_ftruncate(unsi
+ 
+ 	error = locks_verify_truncate(inode, file, length);
+ 	if (!error)
+-		error = do_truncate(dentry, length);
++		error = do_truncate(dentry, length, 0);
+ out_putf:
+ 	fput(file);
+ out:
+@@ -260,11 +273,13 @@ asmlinkage long sys_utime(char * filenam
+ 	struct inode * inode;
+ 	struct iattr newattrs;
+ 
+-	error = user_path_walk(filename, &nd);
++	error = user_path_walk_it(filename, &nd, NULL);
+ 	if (error)
+ 		goto out;
+ 	inode = nd.dentry->d_inode;
+ 
++	/* this is safe without a Lustre lock because it only depends
++	   on the super block */
+ 	error = -EROFS;
+ 	if (IS_RDONLY(inode))
+ 		goto dput_and_out;
+@@ -279,11 +294,29 @@ asmlinkage long sys_utime(char * filenam
+ 			goto dput_and_out;
+ 
+ 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+-	} else {
++	}
++
++	if (inode->i_op->setattr_raw) {
++		struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++		newattrs.ia_valid |= ATTR_RAW;
++		error = op->setattr_raw(inode, &newattrs);
++		/* the file system wants to use normal vfs path now */
++		if (error != -EOPNOTSUPP)
++			goto dput_and_out;
++	}
++
++	error = -EROFS;
++	if (IS_RDONLY(inode))
++		goto dput_and_out;
++
++	error = -EPERM;
++	if (!times) {
+ 		if (current->fsuid != inode->i_uid &&
+ 		    (error = permission(inode,MAY_WRITE)) != 0)
+ 			goto dput_and_out;
+ 	}
++
+ 	error = notify_change(nd.dentry, &newattrs);
+ dput_and_out:
+ 	path_release(&nd);
+@@ -304,12 +337,14 @@ asmlinkage long sys_utimes(char * filena
+ 	struct inode * inode;
+ 	struct iattr newattrs;
+ 
+-	error = user_path_walk(filename, &nd);
++	error = user_path_walk_it(filename, &nd, NULL);
+ 
+ 	if (error)
+ 		goto out;
+ 	inode = nd.dentry->d_inode;
+ 
++	/* this is safe without a Lustre lock because it only depends
++	   on the super block */
+ 	error = -EROFS;
+ 	if (IS_RDONLY(inode))
+ 		goto dput_and_out;
+@@ -324,7 +359,20 @@ asmlinkage long sys_utimes(char * filena
+ 		newattrs.ia_atime = times[0].tv_sec;
+ 		newattrs.ia_mtime = times[1].tv_sec;
+ 		newattrs.ia_valid |= ATTR_ATIME_SET | ATTR_MTIME_SET;
+-	} else {
++	}
++
++	if (inode->i_op->setattr_raw) {
++		struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++		newattrs.ia_valid |= ATTR_RAW;
++		error = op->setattr_raw(inode, &newattrs);
++		/* the file system wants to use normal vfs path now */
++		if (error != -EOPNOTSUPP)
++			goto dput_and_out;
++	}
++
++	error = -EPERM;
++	if (!utimes) {
+ 		if (current->fsuid != inode->i_uid &&
+ 		    (error = permission(inode,MAY_WRITE)) != 0)
+ 			goto dput_and_out;
+@@ -347,6 +395,7 @@ asmlinkage long sys_access(const char * 
+ 	int old_fsuid, old_fsgid;
+ 	kernel_cap_t old_cap;
+ 	int res;
++	struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+ 	if (mode & ~S_IRWXO)	/* where's F_OK, X_OK, W_OK, R_OK? */
+ 		return -EINVAL;
+@@ -364,13 +413,14 @@ asmlinkage long sys_access(const char * 
+ 	else
+ 		current->cap_effective = current->cap_permitted;
+ 
+-	res = user_path_walk(filename, &nd);
++	res = user_path_walk_it(filename, &nd, &it);
+ 	if (!res) {
+ 		res = permission(nd.dentry->d_inode, mode);
+ 		/* SuS v2 requires we report a read only fs too */
+ 		if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+ 		   && !special_file(nd.dentry->d_inode->i_mode))
+ 			res = -EROFS;
++		intent_release(&it);
+ 		path_release(&nd);
+ 	}
+ 
+@@ -385,8 +435,11 @@ asmlinkage long sys_chdir(const char * f
+ {
+ 	int error;
+ 	struct nameidata nd;
++	struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+-	error = __user_walk(filename,LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,&nd);
++	error = __user_walk_it(filename,
++			       LOOKUP_POSITIVE|LOOKUP_FOLLOW|LOOKUP_DIRECTORY,
++			       &nd, &it);
+ 	if (error)
+ 		goto out;
+ 
+@@ -397,6 +450,7 @@ asmlinkage long sys_chdir(const char * f
+ 	set_fs_pwd(current->fs, nd.mnt, nd.dentry);
+ 
+ dput_and_out:
++	intent_release(&it);
+ 	path_release(&nd);
+ out:
+ 	return error;
+@@ -436,9 +490,10 @@ asmlinkage long sys_chroot(const char * 
+ {
+ 	int error;
+ 	struct nameidata nd;
++	struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+-	error = __user_walk(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
+-		      LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
++	error = __user_walk_it(filename, LOOKUP_POSITIVE | LOOKUP_FOLLOW |
++			       LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd, &it);
+ 	if (error)
+ 		goto out;
+ 
+@@ -454,6 +509,7 @@ asmlinkage long sys_chroot(const char * 
+ 	set_fs_altroot();
+ 	error = 0;
+ dput_and_out:
++	intent_release(&it);
+ 	path_release(&nd);
+ out:
+ 	return error;
+@@ -508,6 +564,18 @@ asmlinkage long sys_chmod(const char * f
+ 	if (IS_RDONLY(inode))
+ 		goto dput_and_out;
+ 
++	if (inode->i_op->setattr_raw) {
++		struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++		newattrs.ia_mode = mode;
++		newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
++		newattrs.ia_valid |= ATTR_RAW;
++		error = op->setattr_raw(inode, &newattrs);
++		/* the file system wants to use normal vfs path now */
++		if (error != -EOPNOTSUPP)
++			goto dput_and_out;
++	}
++
+ 	error = -EPERM;
+ 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ 		goto dput_and_out;
+@@ -538,6 +606,20 @@ static int chown_common(struct dentry * 
+ 	error = -EROFS;
+ 	if (IS_RDONLY(inode))
+ 		goto out;
++
++	if (inode->i_op->setattr_raw) {
++		struct inode_operations *op = dentry->d_inode->i_op;
++
++		newattrs.ia_uid = user;
++		newattrs.ia_gid = group;
++		newattrs.ia_valid = ATTR_UID | ATTR_GID;
++		newattrs.ia_valid |= ATTR_RAW;
++		error = op->setattr_raw(inode, &newattrs);
++		/* the file system wants to use normal vfs path now */
++		if (error != -EOPNOTSUPP)
++			return error;
++	}
++
+ 	error = -EPERM;
+ 	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ 		goto out;
+@@ -628,7 +710,8 @@ extern ssize_t do_readahead(struct file 
+ /* for files over a certains size it doesn't pay to do readahead on open */
+ #define READAHEAD_CUTOFF 48000
+ 
+-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++			    int flags, struct lookup_intent *it)
+ {
+ 	struct file * f;
+ 	struct inode *inode;
+@@ -649,7 +732,7 @@ struct file *dentry_open(struct dentry *
+ 		error = locks_verify_locked(inode);
+ 		if (!error) {
+ 			DQUOT_INIT(inode);
+-			error = do_truncate(dentry, 0);
++			error = do_truncate(dentry, 0, 1);
+ 		}
+ 		if (error || !(f->f_mode & FMODE_WRITE))
+ 			put_write_access(inode);
+@@ -679,7 +762,9 @@ struct file *dentry_open(struct dentry *
+ 	}
+ 
+ 	if (f->f_op && f->f_op->open) {
++                f->f_it = it;
+ 		error = f->f_op->open(inode,f);
++                f->f_it = NULL;
+ 		if (error)
+ 			goto cleanup_all;
+ 	}
+@@ -693,6 +778,7 @@ struct file *dentry_open(struct dentry *
+ 		do_readahead(f, 0, (48 * 1024) >> PAGE_SHIFT);
+ 	
+ 
++	intent_release(it);
+ 	return f;
+ 
+ cleanup_all:
+@@ -707,11 +793,17 @@ cleanup_all:
+ cleanup_file:
+ 	put_filp(f);
+ cleanup_dentry:
++	intent_release(it);
+ 	dput(dentry);
+ 	mntput(mnt);
+ 	return ERR_PTR(error);
+ }
+ 
++struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++{
++	return dentry_open_it(dentry, mnt, flags, NULL);
++}
++
+ /*
+  * Find an empty file descriptor entry, and mark it busy.
+  */
+--- linux-2.4.18-p4smp/fs/stat.c~vfs_intent-2.4.18-18-chaos65	2003-07-08 14:41:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/stat.c	2003-07-08 14:45:17.000000000 -0600
+@@ -17,21 +17,24 @@
+  * Revalidate the inode. This is required for proper NFS attribute caching.
+  */
+ static __inline__ int
+-do_revalidate(struct dentry *dentry)
++do_revalidate(struct dentry *dentry, struct lookup_intent *it)
+ {
+ 	struct inode * inode = dentry->d_inode;
+-	if (inode->i_op && inode->i_op->revalidate)
++	if (inode->i_op && inode->i_op->revalidate_it)
++		return inode->i_op->revalidate_it(dentry, it);
++	else if (inode->i_op && inode->i_op->revalidate)
+ 		return inode->i_op->revalidate(dentry);
+ 	return 0;
+ }
+ 
+-static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++static int do_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat, 
++                      struct lookup_intent *it)
+ {
+ 	int res = 0;
+ 	unsigned int blocks, indirect;
+ 	struct inode *inode = dentry->d_inode;
+ 
+-	res = do_revalidate(dentry);
++	res = do_revalidate(dentry, it);
+ 	if (res)
+ 		return res;
+ 
+@@ -104,10 +107,12 @@ int vfs_stat(char *name, struct kstat *s
+ {
+ 	struct nameidata nd;
+ 	int error;
++	struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+-	error = user_path_walk(name, &nd);
++	error = user_path_walk_it(name, &nd, &it);
+ 	if (!error) {
+-		error = do_getattr(nd.mnt, nd.dentry, stat);
++		error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++		intent_release(&it);
+ 		path_release(&nd);
+ 	}
+ 	return error;
+@@ -117,10 +122,12 @@ int vfs_lstat(char *name, struct kstat *
+ {
+ 	struct nameidata nd;
+ 	int error;
++	struct lookup_intent it = { .it_op = IT_GETATTR };
+ 
+-	error = user_path_walk_link(name, &nd);
++	error = user_path_walk_link_it(name, &nd, &it);
+ 	if (!error) {
+-		error = do_getattr(nd.mnt, nd.dentry, stat);
++		error = do_getattr(nd.mnt, nd.dentry, stat, &it);
++		intent_release(&it);
+ 		path_release(&nd);
+ 	}
+ 	return error;
+@@ -132,7 +139,7 @@ int vfs_fstat(unsigned int fd, struct ks
+ 	int error = -EBADF;
+ 
+ 	if (f) {
+-		error = do_getattr(f->f_vfsmnt, f->f_dentry, stat);
++		error = do_getattr(f->f_vfsmnt, f->f_dentry, stat, NULL);
+ 		fput(f);
+ 	}
+ 	return error;
+@@ -279,7 +286,7 @@ asmlinkage long sys_readlink(const char 
+ 
+ 		error = -EINVAL;
+ 		if (inode->i_op && inode->i_op->readlink &&
+-		    !(error = do_revalidate(nd.dentry))) {
++		    !(error = do_revalidate(nd.dentry, NULL))) {
+ 			UPDATE_ATIME(inode);
+ 			error = inode->i_op->readlink(nd.dentry, buf, bufsiz);
+ 		}
+--- linux-2.4.18-p4smp/fs/proc/base.c~vfs_intent-2.4.18-18-chaos65	2003-07-08 14:45:12.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/proc/base.c	2003-07-08 14:45:32.000000000 -0600
+@@ -465,6 +465,9 @@ static int proc_pid_follow_link(struct d
+ 
+ 	error = inode->u.proc_i.op.proc_get_link(inode, &nd->dentry, &nd->mnt);
+ 	nd->last_type = LAST_BIND;
++ 
++         if (nd->it != NULL)
++                 nd->it->it_int_flags |= IT_FL_FOLLOWED;
+ out:
+ 	return error;
+ }
+--- linux-2.4.18-p4smp/include/linux/dcache.h~vfs_intent-2.4.18-18-chaos65	2003-07-08 14:41:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/dcache.h	2003-07-08 14:45:17.000000000 -0600
+@@ -6,6 +6,44 @@
+ #include <asm/atomic.h>
+ #include <linux/mount.h>
+ 
++#define IT_OPEN     (1)
++#define IT_CREAT    (1<<1)
++#define IT_READDIR  (1<<2)
++#define IT_GETATTR  (1<<3)
++#define IT_LOOKUP   (1<<4)
++#define IT_UNLINK   (1<<5)
++#define IT_GETXATTR (1<<6)
++#define IT_EXEC     (1<<7)
++#define IT_PIN      (1<<8)
++
++#define IT_FL_LOCKED   (1)
++#define IT_FL_FOLLOWED (1<<1) /* set by vfs_follow_link */
++
++#define INTENT_MAGIC 0x19620323
++
++struct lookup_intent {
++	int it_op;
++	void (*it_op_release)(struct lookup_intent *);
++	int it_magic;
++	int it_mode;
++	int it_flags;
++	int it_disposition;
++	int it_status;
++	int it_int_flags;
++	__u64 it_lock_handle[2];
++	int it_lock_mode;
++	void *it_data;
++};
++
++static inline void intent_init(struct lookup_intent *it, int op, int flags)
++{
++        memset(it, 0, sizeof(*it));
++        it->it_magic = INTENT_MAGIC;
++        it->it_op = op;
++        it->it_flags = flags;
++}
++
++
+ /*
+  * linux/include/linux/dcache.h
+  *
+@@ -91,8 +129,22 @@ struct dentry_operations {
+ 	int (*d_delete)(struct dentry *);
+ 	void (*d_release)(struct dentry *);
+ 	void (*d_iput)(struct dentry *, struct inode *);
++	int (*d_revalidate_it)(struct dentry *, int, struct lookup_intent *);
++	void (*d_pin)(struct dentry *, struct vfsmount * , int);
++	void (*d_unpin)(struct dentry *, struct vfsmount *, int);
+ };
+ 
++#define PIN(de,mnt,flag)  if (de->d_op && de->d_op->d_pin) \
++                                de->d_op->d_pin(de, mnt, flag);
++#define UNPIN(de,mnt,flag)  if (de->d_op && de->d_op->d_unpin) \
++                                de->d_op->d_unpin(de, mnt, flag);
++
++
++/* defined in fs/namei.c */
++extern void intent_release(struct lookup_intent *it);
++/* defined in fs/dcache.c */
++extern void __d_rehash(struct dentry * entry, int lock);
++
+ /* the dentry parameter passed to d_hash and d_compare is the parent
+  * directory of the entries to be compared. It is used in case these
+  * functions need any directory specific information for determining
+@@ -124,6 +176,7 @@ d_iput:		no		no		yes
+ 					 * s_nfsd_free_path semaphore will be down
+ 					 */
+ #define DCACHE_REFERENCED	0x0008  /* Recently used, don't discard. */
++#define DCACHE_LUSTRE_INVALID	0x0010  /* Lustre invalidated */
+ 
+ extern spinlock_t dcache_lock;
+ 
+--- linux-2.4.18-p4smp/include/linux/fs.h~vfs_intent-2.4.18-18-chaos65	2003-07-08 14:41:47.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/fs.h	2003-07-08 14:45:17.000000000 -0600
+@@ -339,6 +339,8 @@ extern void set_bh_page(struct buffer_he
+ #define ATTR_MTIME_SET	256
+ #define ATTR_FORCE	512	/* Not a change, but a change it */
+ #define ATTR_ATTR_FLAG	1024
++#define ATTR_RAW	2048	/* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN	4096	/* called from open path, ie O_TRUNC */
+ 
+ /*
+  * This is the Inode Attributes structure, used for notify_change().  It
+@@ -578,6 +580,7 @@ struct file {
+ 
+ 	/* needed for tty driver, and maybe others */
+ 	void			*private_data;
++	struct lookup_intent    *f_it;
+ 
+ 	/* preallocated helper kiobuf to speedup O_DIRECT */
+ 	struct kiobuf		*f_iobuf;
+@@ -707,6 +710,7 @@ struct nameidata {
+ 	struct qstr last;
+ 	unsigned int flags;
+ 	int last_type;
++	struct lookup_intent *it;
+ };
+ 
+ #define DQUOT_USR_ENABLED	0x01		/* User diskquotas enabled */
+@@ -840,7 +844,8 @@ extern int vfs_symlink(struct inode *, s
+ extern int vfs_link(struct dentry *, struct inode *, struct dentry *);
+ extern int vfs_rmdir(struct inode *, struct dentry *);
+ extern int vfs_unlink(struct inode *, struct dentry *);
+-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
++int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
++               struct inode *new_dir, struct dentry *new_dentry);
+ 
+ /*
+  * File types
+@@ -900,21 +905,34 @@ struct file_operations {
+ 
+ struct inode_operations {
+ 	int (*create) (struct inode *,struct dentry *,int);
++	int (*create_it) (struct inode *,struct dentry *,int, struct lookup_intent *);
+ 	struct dentry * (*lookup) (struct inode *,struct dentry *);
++	struct dentry * (*lookup_it) (struct inode *,struct dentry *, struct lookup_intent *, int flags);
+ 	int (*link) (struct dentry *,struct inode *,struct dentry *);
++	int (*link_raw) (struct nameidata *,struct nameidata *);
+ 	int (*unlink) (struct inode *,struct dentry *);
++	int (*unlink_raw) (struct nameidata *);
+ 	int (*symlink) (struct inode *,struct dentry *,const char *);
++	int (*symlink_raw) (struct nameidata *,const char *);
+ 	int (*mkdir) (struct inode *,struct dentry *,int);
++	int (*mkdir_raw) (struct nameidata *,int);
+ 	int (*rmdir) (struct inode *,struct dentry *);
++	int (*rmdir_raw) (struct nameidata *);
+ 	int (*mknod) (struct inode *,struct dentry *,int,int);
++	int (*mknod_raw) (struct nameidata *,int,dev_t);
+ 	int (*rename) (struct inode *, struct dentry *,
+ 			struct inode *, struct dentry *);
++	int (*rename_raw) (struct nameidata *, struct nameidata *);
+ 	int (*readlink) (struct dentry *, char *,int);
+ 	int (*follow_link) (struct dentry *, struct nameidata *);
++	int (*follow_link2) (struct dentry *, struct nameidata *,
++			     struct lookup_intent *it);
+ 	void (*truncate) (struct inode *);
+ 	int (*permission) (struct inode *, int);
+ 	int (*revalidate) (struct dentry *);
++	int (*revalidate_it) (struct dentry *, struct lookup_intent *);
+ 	int (*setattr) (struct dentry *, struct iattr *);
++ 	int (*setattr_raw) (struct inode *, struct iattr *);
+ 	int (*getattr) (struct dentry *, struct iattr *);
+ };
+ 
+@@ -1119,10 +1137,12 @@ static inline int get_lease(struct inode
+ 
+ asmlinkage long sys_open(const char *, int, int);
+ asmlinkage long sys_close(unsigned int);	/* yes, it's really unsigned */
+-extern int do_truncate(struct dentry *, loff_t start);
++extern int do_truncate(struct dentry *, loff_t start, int called_from_open);
+ 
+ extern struct file *filp_open(const char *, int, int);
+ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
++extern struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt,
++			    int flags, struct lookup_intent *it);
+ extern int filp_close(struct file *, fl_owner_t id);
+ extern char * getname(const char *);
+ 
+@@ -1388,9 +1408,12 @@ typedef int (*read_actor_t)(read_descrip
+ extern loff_t default_llseek(struct file *file, loff_t offset, int origin);
+ 
+ extern int FASTCALL(__user_walk(const char *, unsigned, struct nameidata *));
++extern int FASTCALL(__user_walk_it(const char *, unsigned, struct nameidata *, struct lookup_intent *it));
+ extern int FASTCALL(path_init(const char *, unsigned, struct nameidata *));
+ extern int FASTCALL(path_walk(const char *, struct nameidata *));
+ extern int FASTCALL(path_lookup(const char *, unsigned, struct nameidata *));
++extern int FASTCALL(path_lookup_it(const char *path, unsigned flags, struct nameidata *nd,
++		   struct lookup_intent *it));
+ extern int FASTCALL(link_path_walk(const char *, struct nameidata *));
+ extern void path_release(struct nameidata *);
+ extern int follow_down(struct vfsmount **, struct dentry **);
+@@ -1399,6 +1422,8 @@ extern struct dentry * lookup_one_len(co
+ extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
+ #define user_path_walk(name,nd)	 __user_walk(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd)
+ #define user_path_walk_link(name,nd) __user_walk(name, LOOKUP_POSITIVE, nd)
++#define user_path_walk_it(name,nd,it)  __user_walk_it(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, nd, it)
++#define user_path_walk_link_it(name,nd,it) __user_walk_it(name, LOOKUP_POSITIVE, nd, it)
+ 
+ extern void inode_init_once(struct inode *);
+ extern void iput(struct inode *);
+@@ -1499,6 +1524,8 @@ extern struct file_operations generic_ro
+ 
+ extern int vfs_readlink(struct dentry *, char *, int, const char *);
+ extern int vfs_follow_link(struct nameidata *, const char *);
++extern int vfs_follow_link_it(struct nameidata *, const char *,
++			      struct lookup_intent *it);
+ extern int page_readlink(struct dentry *, char *, int);
+ extern int page_follow_link(struct dentry *, struct nameidata *);
+ extern struct inode_operations page_symlink_inode_operations;
+--- linux-2.4.18-p4smp/kernel/fork.c~vfs_intent-2.4.18-18-chaos65	2003-03-24 11:22:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/kernel/fork.c	2003-07-08 14:45:17.000000000 -0600
+@@ -399,10 +399,13 @@ static inline struct fs_struct *__copy_f
+ 		fs->umask = old->umask;
+ 		read_lock(&old->lock);
+ 		fs->rootmnt = mntget(old->rootmnt);
++                PIN(old->pwd, old->pwdmnt, 0);
++                PIN(old->root, old->rootmnt, 1);
+ 		fs->root = dget(old->root);
+ 		fs->pwdmnt = mntget(old->pwdmnt);
+ 		fs->pwd = dget(old->pwd);
+ 		if (old->altroot) {
++                        PIN(old->altroot, old->altrootmnt, 1);
+ 			fs->altrootmnt = mntget(old->altrootmnt);
+ 			fs->altroot = dget(old->altroot);
+ 		} else {
+--- linux-2.4.18-p4smp/kernel/exit.c~vfs_intent-2.4.18-18-chaos65	2002-10-29 12:27:38.000000000 -0700
++++ linux-2.4.18-p4smp-braam/kernel/exit.c	2003-07-08 14:45:17.000000000 -0600
+@@ -303,11 +303,14 @@ static inline void __put_fs_struct(struc
+ {
+ 	/* No need to hold fs->lock if we are killing it */
+ 	if (atomic_dec_and_test(&fs->count)) {
++                UNPIN(fs->pwd, fs->pwdmnt, 0);
++                UNPIN(fs->root, fs->rootmnt, 1);
+ 		dput(fs->root);
+ 		mntput(fs->rootmnt);
+ 		dput(fs->pwd);
+ 		mntput(fs->pwdmnt);
+ 		if (fs->altroot) {
++                        UNPIN(fs->altroot, fs->altrootmnt, 1);
+ 			dput(fs->altroot);
+ 			mntput(fs->altrootmnt);
+ 		}
+--- linux-2.4.18-p4smp/kernel/ksyms.c~vfs_intent-2.4.18-18-chaos65	2003-07-08 14:41:49.000000000 -0600
++++ linux-2.4.18-p4smp-braam/kernel/ksyms.c	2003-07-08 14:45:17.000000000 -0600
+@@ -294,6 +294,7 @@ EXPORT_SYMBOL(read_cache_page);
+ EXPORT_SYMBOL(set_page_dirty);
+ EXPORT_SYMBOL(vfs_readlink);
+ EXPORT_SYMBOL(vfs_follow_link);
++EXPORT_SYMBOL(vfs_follow_link_it);
+ EXPORT_SYMBOL(page_readlink);
+ EXPORT_SYMBOL(page_follow_link);
+ EXPORT_SYMBOL(page_symlink_inode_operations);
+
+_
diff --git a/lustre/kernel_patches/pc/vfs_intent-2.4.18-18-chaos65.pc b/lustre/kernel_patches/pc/vfs_intent-2.4.18-18-chaos65.pc
new file mode 100644
index 0000000..adb8100
--- /dev/null
+++ b/lustre/kernel_patches/pc/vfs_intent-2.4.18-18-chaos65.pc
@@ -0,0 +1,12 @@
+fs/exec.c
+fs/dcache.c
+fs/namespace.c
+fs/namei.c
+fs/open.c
+fs/stat.c
+fs/proc/base.c
+include/linux/dcache.h
+include/linux/fs.h
+kernel/fork.c
+kernel/exit.c
+kernel/ksyms.c
diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c
index d6029f3..1963e1f 100644
--- a/lustre/ldlm/ldlm_lib.c
+++ b/lustre/ldlm/ldlm_lib.c
@@ -47,7 +47,6 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         char *tmp[] = {imp->imp_target_uuid.uuid,
                        obd->obd_uuid.uuid,
                        (char *)dlm_handle};
-        int rq_opc = (obd->obd_type->typ_ops->o_brw) ? OST_CONNECT :MDS_CONNECT;
         int msg_flags;
 
         ENTRY;
@@ -67,7 +66,7 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         if (obd->obd_namespace == NULL)
                 GOTO(out_disco, rc = -ENOMEM);
 
-        request = ptlrpc_prep_req(imp, rq_opc, 3, size, tmp);
+        request = ptlrpc_prep_req(imp, imp->imp_connect_op, 3, size, tmp);
         if (!request)
                 GOTO(out_ldlm, rc = -ENOMEM);
 
@@ -90,7 +89,7 @@ int client_import_connect(struct lustre_handle *dlm_handle,
         class_export_put(exp);
 
         msg_flags = lustre_msg_get_op_flags(request->rq_repmsg);
-        if (rq_opc == MDS_CONNECT || msg_flags & MSG_CONNECT_REPLAYABLE) {
+        if (msg_flags & MSG_CONNECT_REPLAYABLE) {
                 imp->imp_replayable = 1;
                 CDEBUG(D_HA, "connected to replayable target: %s\n",
                        imp->imp_target_uuid.uuid);
@@ -132,7 +131,16 @@ int client_import_disconnect(struct lustre_handle *dlm_handle, int failover)
                 RETURN(-EINVAL);
         }
 
-        rq_opc = obd->obd_type->typ_ops->o_brw ? OST_DISCONNECT:MDS_DISCONNECT;
+        switch (imp->imp_connect_op) {
+        case OST_CONNECT: rq_opc = OST_DISCONNECT; break;
+        case MDS_CONNECT: rq_opc = MDS_DISCONNECT; break;
+        case MGMT_CONNECT:rq_opc = MGMT_DISCONNECT;break;
+        default:
+                CERROR("don't know how to disconnect from %s (connect_op %d)\n",
+                       imp->imp_target_uuid.uuid, imp->imp_connect_op);
+                RETURN(-EINVAL);
+        }
+
         down(&cli->cl_sem);
         if (!cli->cl_conn_count) {
                 CERROR("disconnecting disconnected device (%s)\n",
diff --git a/lustre/lov/lov_internal.h b/lustre/lov/lov_internal.h
new file mode 100644
index 0000000..cad14ee
--- /dev/null
+++ b/lustre/lov/lov_internal.h
@@ -0,0 +1,12 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2003 Cluster File Systems, Inc.
+ *
+ * This code is issued under the GNU General Public License.
+ * See the file COPYING in this distribution
+ */
+
+int lov_get_stripecnt(struct lov_obd *lov, int stripe_count);
+int lov_alloc_memmd(struct lov_stripe_md **lsmp, int stripe_count);
+void lov_free_memmd(struct lov_stripe_md **lsmp);
diff --git a/lustre/mds/mds_lib.c b/lustre/mds/mds_lib.c
index a72e176..640614e 100644
--- a/lustre/mds/mds_lib.c
+++ b/lustre/mds/mds_lib.c
@@ -57,6 +57,7 @@ void mds_pack_inode2fid(struct ll_fid *fid, struct inode *inode)
         fid->f_type = (S_IFMT & inode->i_mode);
 }
 
+/* Note that we can copy all of the fields, just some will not be "valid" */
 void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
 {
         b->valid = OBD_MD_FLID | OBD_MD_FLATIME | OBD_MD_FLMTIME |
diff --git a/lustre/mgmt/.cvsignore b/lustre/mgmt/.cvsignore
new file mode 100644
index 0000000..067f05c
--- /dev/null
+++ b/lustre/mgmt/.cvsignore
@@ -0,0 +1,9 @@
+.Xrefs
+config.log
+config.status
+configure
+Makefile
+Makefile.in
+.deps
+tags
+TAGS
diff --git a/lustre/mgmt/mgmt_cli.c b/lustre/mgmt/mgmt_cli.c
new file mode 100644
index 0000000..fba49ab
--- /dev/null
+++ b/lustre/mgmt/mgmt_cli.c
@@ -0,0 +1,269 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Implementation of the management/health monitoring client.
+ *
+ *  Copyright (c) 2003 Cluster File Systems, Inc.
+ *   Author: Mike Shaver <shaver@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define EXPORT_SYMTAB
+#define DEBUG_SUBSYSTEM S_MGMT
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include <linux/obd.h>
+#include <linux/obd_class.h>
+#include <linux/obd_support.h>
+#include <linux/lustre_idl.h>
+#include <linux/lustre_net.h>
+#include <linux/lustre_mgmt.h>
+
+/*** Registration and service/thread management. ***/
+
+/* An entry representing one obd which has registered for management events. */
+struct mgmtcli_registrant {
+        struct list_head   chain;
+        struct obd_device *notify_obd;
+        struct obd_uuid   *relevant_uuid;
+};
+ 
+static int mgmtcli_pinger_main(void *arg)
+{
+        struct ptlrpc_svc_data *data = (struct ptlrpc_svc_data *)arg;
+        struct ptlrpc_thread *thread = data->thread;
+        unsigned long flags;
+        struct l_wait_info lwi = { 0 };
+        ENTRY;
+
+        lock_kernel();
+        /* vv ptlrpc_daemonize(); vv */
+        exit_mm(current);
+
+        current->session = 1;
+        current->pgrp = 1;
+        current->tty = NULL;
+
+        exit_files(current);
+        reparent_to_init();
+        /* ^^ ptlrpc_daemonize(); ^^ */
+
+        SIGNAL_MASK_LOCK(current, flags);
+        sigfillset(&current->blocked);
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+
+#if defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20))
+        sprintf(current->comm, "%s|%d", data->name,current->thread.extern_pid);
+#elif defined(__arch_um__) && (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        sprintf(current->comm, "%s|%d", data->name,
+                current->thread.mode.tt.extern_pid);
+#else
+        strcpy(current->comm, data->name);
+#endif
+        unlock_kernel();
+
+        /* Record that the thread is running */
+        thread->t_flags = SVC_RUNNING;
+        wake_up(&thread->t_ctl_waitq);
+
+        /* And now, loop forever, pinging as needed. */
+        l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPING, &lwi);
+        
+        thread->t_flags = SVC_STOPPED;
+        wake_up(&thread->t_ctl_waitq);
+
+        CDEBUG(D_NET, "pinger thread exiting");
+        return 0;
+}
+
+static int mgmtcli_connect_to_svc(struct obd_device *obd)
+{
+        int rc;
+        struct mgmtcli_obd *mc = &obd->u.mgmtcli;
+        struct ptlrpc_svc_data svc_data;
+        struct ptlrpc_thread *thread;
+        struct l_wait_info lwi = { 0 };
+        ENTRY;
+
+        /* Connect to ourselves, and thusly to the mgmt service. */
+        rc = client_import_connect(&mc->mc_ping_handle, obd, &obd->obd_uuid);
+        if (rc) {
+                CERROR("failed to connect to mgmt svc: %d\n", rc);
+                (void)client_obd_cleanup(obd, 0);
+                RETURN(rc);
+        }
+        
+        LASSERT(mc->mc_ping_thread == NULL);
+        OBD_ALLOC(thread, sizeof (*thread));
+        if (thread == NULL)
+                RETURN(-ENOMEM);
+        mc->mc_ping_thread = thread;
+        init_waitqueue_head(&thread->t_ctl_waitq);
+
+        svc_data.name = "mgmtcli";
+        svc_data.thread = thread;
+
+        rc = kernel_thread(mgmtcli_pinger_main, &svc_data, CLONE_VM | CLONE_FILES);
+        if (rc < 0) {
+                CERROR("can't start thread to ping mgmt svc %s: %d\n",
+                       mc->mc_import->imp_target_uuid.uuid, rc);
+                OBD_FREE(mc->mc_ping_thread, sizeof (*mc->mc_ping_thread));
+                (void)client_import_disconnect(&mc->mc_ping_handle, 0);
+                RETURN(rc);
+        }
+        l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_RUNNING, &lwi);
+        
+        RETURN(0);
+}
+
+static int mgmtcli_disconnect_from_svc(struct obd_device *obd)
+{
+        struct mgmtcli_obd *mc = &obd->u.mgmtcli;
+        struct obd_import *imp = mc->mc_import;
+        struct ptlrpc_thread *thread = mc->mc_ping_thread;
+        struct l_wait_info lwi = { 0 };
+        int rc;
+
+        ENTRY;
+        rc = client_import_disconnect(&mc->mc_ping_handle, 0);
+        if (rc) {
+                CERROR("can't disconnect from %s: %d (%s)\n",
+                       imp->imp_target_uuid.uuid, rc,
+                       (thread ? 
+                        "stopping pinger thread anyway" :
+                        "pinger thread already stopped"));
+        }
+
+        if (thread) {
+                thread->t_flags = SVC_STOPPING;
+                wake_up(&thread->t_ctl_waitq);
+                l_wait_event(thread->t_ctl_waitq, thread->t_flags & SVC_STOPPED, &lwi);
+                
+                OBD_FREE(mc->mc_ping_thread, sizeof (*mc->mc_ping_thread));
+        }
+
+        RETURN(rc);
+}
+
+static int mgmtcli_register_for_events(struct obd_device *mgmt_obd,
+                                       struct obd_device *notify_obd,
+                                       struct obd_uuid *relevant_uuid)
+{
+        int start_thread;
+        struct mgmtcli_registrant *reg;
+        struct mgmtcli_obd *mcobd = &mgmt_obd->u.mgmtcli;
+
+        ENTRY;
+        if (strcmp(mgmt_obd->obd_type->typ_name, LUSTRE_MGMTCLI_NAME))
+                RETURN(-EINVAL);
+
+        OBD_ALLOC(reg, sizeof(*reg));
+        if (reg == NULL)
+                RETURN(-ENOMEM);
+
+        reg->notify_obd = notify_obd;
+        reg->relevant_uuid = relevant_uuid; /* XXX hash */
+
+        spin_lock(&mgmt_obd->obd_dev_lock);
+        start_thread = list_empty(&mcobd->mc_registered);
+        list_add(&mcobd->mc_registered, &reg->chain);
+        spin_unlock(&mgmt_obd->obd_dev_lock);
+
+        if (start_thread)
+                RETURN(mgmtcli_connect_to_svc(mgmt_obd));
+
+        RETURN(0);
+}
+
+static int mgmtcli_deregister_for_events(struct obd_device *mgmt_obd,
+                                         struct obd_device *notify_obd)
+{
+        int stop_thread, found = 0;
+        struct mgmtcli_registrant *reg = NULL;
+        struct list_head *tmp, *n;
+        struct mgmtcli_obd *mc = &mgmt_obd->u.mgmtcli;
+
+        ENTRY;
+        if (strcmp(mgmt_obd->obd_type->typ_name, LUSTRE_MGMTCLI_NAME))
+                RETURN(-EINVAL);
+
+        spin_lock(&mgmt_obd->obd_dev_lock);
+        list_for_each_safe(tmp, n, &mc->mc_registered) {
+                reg = list_entry(tmp, struct mgmtcli_registrant, chain);
+                if (reg->notify_obd == notify_obd) {
+                        list_del(&reg->chain);
+                        found = 1;
+                        OBD_FREE(reg, sizeof(*reg));
+                        break;
+                }
+        }
+        stop_thread = list_empty(&mc->mc_registered);
+        spin_unlock(&mgmt_obd->obd_dev_lock);
+
+        if (stop_thread) {
+                LASSERT(found);
+                RETURN(mgmtcli_disconnect_from_svc(mgmt_obd));
+        }
+
+        if (!found)
+                RETURN(-ENOENT);
+        RETURN(0);
+}
+
+/*** OBD scaffolding and module paraphernalia. ***/
+
+static int mgmtcli_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+        struct mgmtcli_obd *mc = &obd->u.mgmtcli;
+        INIT_LIST_HEAD(&mc->mc_registered);
+        
+        /* Initialize our nested client_obd structure. */
+        RETURN(client_obd_setup(obd, len, buf));
+}
+
+static struct obd_ops mgmtcli_obd_ops = {
+        o_owner:   THIS_MODULE,
+        o_setup:   mgmtcli_setup,
+        o_cleanup: client_obd_cleanup
+};
+
+static int __init mgmtcli_init(void)
+{
+        inter_module_register("mgmtcli_register_for_events", THIS_MODULE,
+                              mgmtcli_register_for_events);
+        inter_module_register("mgmtcli_deregister_for_events", THIS_MODULE,
+                              mgmtcli_deregister_for_events);
+        return class_register_type(&mgmtcli_obd_ops, 0, LUSTRE_MGMTCLI_NAME);
+}
+
+static void __exit mgmtcli_exit(void)
+{
+        class_unregister_type(LUSTRE_MGMTCLI_NAME);
+        inter_module_unregister("mgmtcli_register_for_events");
+        inter_module_unregister("mgmtcli_deregister_for_events");
+}
+
+#ifdef __KERNEL__
+MODULE_AUTHOR("Cluster File Systems, Inc. <info@clusterfs.com>");
+MODULE_DESCRIPTION("Lustre monitoring client v0.1");
+MODULE_LICENSE("GPL");
+
+module_init(mgmtcli_init);
+module_exit(mgmtcli_exit);
+#endif
diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h
new file mode 100644
index 0000000..94b5321
--- /dev/null
+++ b/lustre/obdfilter/filter_internal.h
@@ -0,0 +1,122 @@
+#ifndef _FILTER_INTERNAL_H
+#define _FILTER_INTERNAL_H
+
+
+#ifdef __KERNEL__
+# include <linux/spinlock.h>
+#endif
+#include <linux/lustre_handles.h>
+#include <linux/obd.h>
+
+#ifndef OBD_FILTER_DEVICENAME
+# define OBD_FILTER_DEVICENAME "obdfilter"
+#endif
+
+#ifndef OBD_FILTER_SAN_DEVICENAME
+# define OBD_FILTER_SAN_DEVICENAME "sanobdfilter"
+#endif
+
+#define FILTER_LR_SERVER_SIZE    512
+
+#define FILTER_LR_CLIENT_START   8192
+#define FILTER_LR_CLIENT_SIZE    128
+
+#define FILTER_SUBDIR_COUNT      32            /* set to zero for no subdirs */
+
+#define FILTER_MOUNT_RECOV 2
+#define FILTER_RECOVERY_TIMEOUT (obd_timeout * 5 * HZ / 2) /* *waves hands* */
+
+/* Data stored per server at the head of the last_rcvd file.  In le32 order. */
+struct filter_server_data {
+        __u8  fsd_uuid[37];        /* server UUID */
+        __u8  fsd_uuid_padding[3]; /* unused */
+        __u64 fsd_last_objid;      /* last created object ID */
+        __u64 fsd_last_transno;    /* last completed transaction ID */
+        __u64 fsd_mount_count;     /* FILTER incarnation number */
+        __u32 fsd_feature_compat;  /* compatible feature flags */
+        __u32 fsd_feature_rocompat;/* read-only compatible feature flags */
+        __u32 fsd_feature_incompat;/* incompatible feature flags */
+        __u32 fsd_server_size;     /* size of server data area */
+        __u32 fsd_client_start;    /* start of per-client data area */
+        __u16 fsd_client_size;     /* size of per-client data area */
+        __u16 fsd_subdir_count;    /* number of subdirectories for objects */
+        __u64 fsd_catalog_oid;     /* recovery catalog object id */
+        __u32 fsd_catalog_ogen;    /* recovery catalog inode generation */
+        __u8  fsd_peeruuid[37];    /* UUID of MDS associated with this OST */
+        __u8  peer_padding[3];     /* unused */
+        __u8  fsd_padding[FILTER_LR_SERVER_SIZE - 140];
+};
+
+/* Data stored per client in the last_rcvd file.  In le32 order. */
+struct filter_client_data {
+        __u8  fcd_uuid[37];        /* client UUID */
+        __u8  fcd_uuid_padding[3]; /* unused */
+        __u64 fcd_last_rcvd;       /* last completed transaction ID */
+        __u64 fcd_mount_count;     /* FILTER incarnation number */
+        __u64 fcd_last_xid;        /* client RPC xid for the last transaction */
+        __u8  fcd_padding[FILTER_LR_CLIENT_SIZE - 64];
+};
+
+/* file data for open files on OST */
+struct filter_file_data {
+        struct portals_handle ffd_handle;
+        atomic_t              ffd_refcount;
+        struct list_head      ffd_export_list; /* export open list - fed_lock */
+        struct file          *ffd_file;         /* file handle */
+};
+
+struct filter_dentry_data {
+        struct llog_cookie      fdd_cookie;
+        obd_id                  fdd_objid;
+        __u32                   fdd_magic;
+        atomic_t                fdd_open_count;
+        int                     fdd_flags;
+};
+
+#define FILTER_DENTRY_MAGIC 0x9efba101
+#define FILTER_FLAG_DESTROY 0x0001      /* destroy dentry on last file close */
+
+enum {
+        LPROC_FILTER_READ_BYTES = 0,
+        LPROC_FILTER_WRITE_BYTES = 1,
+        LPROC_FILTER_LAST,
+};
+
+/* filter.c */
+struct dentry *filter_parent(struct obd_device *, obd_mode mode, obd_id objid);
+struct dentry *filter_parent_lock(struct obd_device *, obd_mode mode,
+                                  obd_id objid, ldlm_mode_t lock_mode,
+                                  struct lustre_handle *lockh);
+void f_dput(struct dentry *);
+struct dentry *filter_fid2dentry(struct obd_device *, struct dentry *dir,
+                                 obd_mode mode, obd_id id);
+int filter_finish_transno(struct obd_export *, struct obd_trans_info *, int rc);
+__u64 filter_next_id(struct filter_obd *);
+int filter_update_server_data(struct file *, struct filter_server_data *);
+int filter_common_setup(struct obd_device *, obd_count len, void *buf,
+                        char *option);
+
+/* filter_io.c */
+int filter_preprw(int cmd, struct obd_export *, struct obdo *, int objcount,
+                  struct obd_ioobj *, int niocount, struct niobuf_remote *,
+                  struct niobuf_local *, struct obd_trans_info *);
+int filter_commitrw(int cmd, struct obd_export *, int objcount,
+                    struct obd_ioobj *, int niocount, struct niobuf_local *,
+                    struct obd_trans_info *);
+int filter_brw(int cmd, struct lustre_handle *, struct lov_stripe_md *,
+               obd_count oa_bufs, struct brw_page *, struct obd_trans_info *);
+
+/* filter_log.c */
+int filter_log_cancel(struct lustre_handle *, struct lov_stripe_md *,
+                      int num_cookies, struct llog_cookie *, int flags);
+int filter_log_op_create(struct llog_handle *cathandle, struct ll_fid *mds_fid,
+                         obd_id oid, obd_count ogen, struct llog_cookie *);
+int filter_log_op_orphan(struct llog_handle *cathandle, obd_id oid,
+                         obd_count ogen, struct llog_cookie *);
+
+/* filter_san.c */
+int filter_san_setup(struct obd_device *obd, obd_count len, void *buf);
+int filter_san_preprw(int cmd, struct lustre_handle *, int objcount,
+                      struct obd_ioobj *, int niocount, struct niobuf_remote *);
+
+#endif
diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c
new file mode 100644
index 0000000..ee65d89
--- /dev/null
+++ b/lustre/obdfilter/filter_io.c
@@ -0,0 +1,764 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  linux/fs/obdfilter/filter_io.c
+ *
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/pagemap.h> // XXX kill me soon
+#include <linux/version.h>
+
+#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
+#include "filter_internal.h"
+
+static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb)
+{
+        struct address_space *mapping = inode->i_mapping;
+        struct page *page;
+        unsigned long index = lnb->offset >> PAGE_SHIFT;
+        int rc;
+
+        page = grab_cache_page(mapping, index); /* locked page */
+        if (IS_ERR(page))
+                return lnb->rc = PTR_ERR(page);
+
+        lnb->page = page;
+
+        if (inode->i_size < lnb->offset + lnb->len - 1)
+                lnb->rc = inode->i_size - lnb->offset;
+        else
+                lnb->rc = lnb->len;
+
+        if (PageUptodate(page)) {
+                unlock_page(page);
+                return 0;
+        }
+
+        rc = mapping->a_ops->readpage(NULL, page);
+        if (rc < 0) {
+                CERROR("page index %lu, rc = %d\n", index, rc);
+                lnb->page = NULL;
+                page_cache_release(page);
+                return lnb->rc = rc;
+        }
+
+        return 0;
+}
+
+static int filter_finish_page_read(struct niobuf_local *lnb)
+{
+        if (lnb->page == NULL)
+                return 0;
+
+        if (PageUptodate(lnb->page))
+                return 0;
+
+        wait_on_page(lnb->page);
+        if (!PageUptodate(lnb->page)) {
+                CERROR("page index %lu/offset "LPX64" not uptodate\n",
+                       lnb->page->index, lnb->offset);
+                GOTO(err_page, lnb->rc = -EIO);
+        }
+        if (PageError(lnb->page)) {
+                CERROR("page index %lu/offset "LPX64" has error\n",
+                       lnb->page->index, lnb->offset);
+                GOTO(err_page, lnb->rc = -EIO);
+        }
+
+        return 0;
+
+err_page:
+        page_cache_release(lnb->page);
+        lnb->page = NULL;
+        return lnb->rc;
+}
+
+static struct page *lustre_get_page_write(struct inode *inode,
+                                          unsigned long index)
+{
+        struct address_space *mapping = inode->i_mapping;
+        struct page *page;
+        int rc;
+
+        page = grab_cache_page(mapping, index); /* locked page */
+
+        if (!IS_ERR(page)) {
+                /* Note: Called with "O" and "PAGE_SIZE" this is essentially
+                 * a no-op for most filesystems, because we write the whole
+                 * page.  For partial-page I/O this will read in the page.
+                 */
+                rc = mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE);
+                if (rc) {
+                        CERROR("page index %lu, rc = %d\n", index, rc);
+                        if (rc != -ENOSPC)
+                                LBUG();
+                        GOTO(err_unlock, rc);
+                }
+                /* XXX not sure if we need this if we are overwriting page */
+                if (PageError(page)) {
+                        CERROR("error on page index %lu, rc = %d\n", index, rc);
+                        LBUG();
+                        GOTO(err_unlock, rc = -EIO);
+                }
+        }
+        return page;
+
+err_unlock:
+        unlock_page(page);
+        page_cache_release(page);
+        return ERR_PTR(rc);
+}
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+/* We should only change the file mtime (and not the ctime, like
+ * update_inode_times() in generic_file_write()) when we only change data. */
+static inline void inode_update_time(struct inode *inode, int ctime_too)
+{
+        time_t now = CURRENT_TIME;
+        if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
+                return;
+        inode->i_mtime = now;
+        if (ctime_too)
+                inode->i_ctime = now;
+        mark_inode_dirty_sync(inode);
+}
+#endif
+
+static int lustre_commit_write(struct niobuf_local *lnb)
+{
+        struct page *page = lnb->page;
+        unsigned from = lnb->offset & ~PAGE_MASK;
+        unsigned to = from + lnb->len;
+        struct inode *inode = page->mapping->host;
+        int err;
+
+        LASSERT(to <= PAGE_SIZE);
+        err = page->mapping->a_ops->commit_write(NULL, page, from, to);
+        if (!err && IS_SYNC(inode))
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+                wait_on_page_locked(page);
+#else
+                err = waitfor_one_page(page);
+#endif
+        //SetPageUptodate(page); // the client commit_write will do this
+
+        SetPageReferenced(page);
+        unlock_page(page);
+        page_cache_release(page);
+        return err;
+}
+
+int filter_get_page_write(struct inode *inode, struct niobuf_local *lnb,
+                          int *pglocked)
+{
+        unsigned long index = lnb->offset >> PAGE_SHIFT;
+        struct address_space *mapping = inode->i_mapping;
+        struct page *page;
+        int rc;
+
+        //ASSERT_PAGE_INDEX(index, GOTO(err, rc = -EINVAL));
+        if (*pglocked)
+                page = grab_cache_page_nowait(mapping, index); /* locked page */
+        else
+                page = grab_cache_page(mapping, index); /* locked page */
+
+
+        /* This page is currently locked, so get a temporary page instead. */
+        if (page == NULL) {
+                CDEBUG(D_ERROR,"ino %lu page %ld locked\n", inode->i_ino,index);
+                page = alloc_pages(GFP_KERNEL, 0); /* locked page */
+                if (page == NULL) {
+                        CERROR("no memory for a temp page\n");
+                        GOTO(err, rc = -ENOMEM);
+                }
+                page->index = index;
+                lnb->page = page;
+                lnb->flags |= N_LOCAL_TEMP_PAGE;
+        } else if (!IS_ERR(page)) {
+                (*pglocked)++;
+
+                rc = mapping->a_ops->prepare_write(NULL, page,
+                                                   lnb->offset & ~PAGE_MASK,
+                                                   lnb->len);
+                if (rc) {
+                        if (rc != -ENOSPC)
+                                CERROR("page index %lu, rc = %d\n", index, rc);
+                        GOTO(err_unlock, rc);
+                }
+                /* XXX not sure if we need this if we are overwriting page */
+                if (PageError(page)) {
+                        CERROR("error on page index %lu, rc = %d\n", index, rc);
+                        LBUG();
+                        GOTO(err_unlock, rc = -EIO);
+                }
+                lnb->page = page;
+        }
+
+        return 0;
+
+err_unlock:
+        unlock_page(page);
+        page_cache_release(page);
+err:
+        return lnb->rc = rc;
+}
+
+static int filter_preprw_read(struct obd_export *exp, struct obdo *obdo,
+                              int objcount, struct obd_ioobj *obj,
+                              int niocount, struct niobuf_remote *nb,
+                              struct niobuf_local *res,
+                              struct obd_trans_info *oti)
+{
+        struct obd_run_ctxt saved;
+        struct obd_device *obd;
+        struct obd_ioobj *o;
+        struct niobuf_remote *rnb;
+        struct niobuf_local *lnb;
+        struct fsfilt_objinfo *fso;
+        struct dentry *dentry;
+        struct inode *inode;
+        int rc = 0, i, j, tot_bytes = 0, cleanup_phase = 0;
+        unsigned long now = jiffies;
+        ENTRY;
+        LASSERT(objcount == 1);
+
+        obd = exp->exp_obd;
+        if (obd == NULL)
+                RETURN(-EINVAL);
+        OBD_ALLOC(fso, objcount * sizeof(*fso));
+        if (fso == NULL)
+                RETURN(-ENOMEM);
+
+        memset(res, 0, niocount * sizeof(*res));
+
+        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        for (i = 0, o = obj; i < objcount; i++, o++) {
+                struct filter_dentry_data *fdd;
+                LASSERT(o->ioo_bufcnt);
+
+                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
+                if (IS_ERR(dentry))
+                        GOTO(out_objinfo, rc = PTR_ERR(dentry));
+
+                if (dentry->d_inode == NULL) {
+                        CERROR("trying to BRW to non-existent file "LPU64"\n",
+                               o->ioo_id);
+                        f_dput(dentry);
+                        GOTO(out_objinfo, rc = -ENOENT);
+                }
+
+                fso[i].fso_dentry = dentry;
+                fso[i].fso_bufcnt = o->ioo_bufcnt;
+
+                fdd = dentry->d_fsdata;
+                if (fdd == NULL || !atomic_read(&fdd->fdd_open_count))
+                        CDEBUG(D_PAGE, "I/O to unopened object "LPU64"\n",
+                               o->ioo_id);
+        }
+
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow prep setup %lus\n", (jiffies - now) / HZ);
+
+        for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) {
+                dentry = fso[i].fso_dentry;
+                inode = dentry->d_inode;
+
+                for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) {
+                        if (j == 0)
+                                lnb->dentry = dentry;
+                        else
+                                lnb->dentry = dget(dentry);
+
+                        lnb->offset = rnb->offset;
+                        lnb->len    = rnb->len;
+                        lnb->flags  = rnb->flags;
+                        lnb->start  = jiffies;
+
+                        if (inode->i_size <= rnb->offset) {
+                                /* If there's no more data, abort early.
+                                 * lnb->page == NULL and lnb->rc == 0, so it's
+                                 * easy to detect later. */
+                                f_dput(dentry);
+                                lnb->dentry = NULL;
+                                break;
+                        } else {
+                                rc = filter_start_page_read(inode, lnb);
+                        }
+
+                        if (rc) {
+                                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+                                       "page err %u@"LPU64" %u/%u %p: rc %d\n",
+                                       lnb->len, lnb->offset, j, o->ioo_bufcnt,
+                                       dentry, rc);
+                                f_dput(dentry);
+                                GOTO(out_pages, rc);
+                        }
+
+                        tot_bytes += lnb->rc;
+                        if (lnb->rc < lnb->len)
+                                break; /* short read */
+                }
+        }
+
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow prep get page %lus\n", (jiffies - now) / HZ);
+
+        lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, tot_bytes);
+        while (lnb-- > res) {
+                rc = filter_finish_page_read(lnb);
+                if (rc) {
+                        CERROR("error page %u@"LPU64" %u %p: rc %d\n", lnb->len,
+                               lnb->offset, (int)(lnb - res), lnb->dentry, rc);
+                        f_dput(lnb->dentry);
+                        GOTO(out_pages, rc);
+                }
+        }
+
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow prep finish page %lus\n", (jiffies - now) / HZ);
+
+        EXIT;
+out:
+        OBD_FREE(fso, objcount * sizeof(*fso));
+        /* we saved the journal handle into oti->oti_handle instead */
+        current->journal_info = NULL;
+        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        return rc;
+
+out_pages:
+        while (lnb-- > res) {
+                page_cache_release(lnb->page);
+                f_dput(lnb->dentry);
+        }
+        goto out; /* dropped the dentry refs already (one per page) */
+
+out_objinfo:
+        for (i = 0; i < objcount && fso[i].fso_dentry; i++)
+                f_dput(fso[i].fso_dentry);
+        goto out;
+}
+
+/* We need to balance prepare_write() calls with commit_write() calls.
+ * If the page has been prepared, but we have no data for it, we don't
+ * want to overwrite valid data on disk, but we still need to zero out
+ * data for space which was newly allocated.  Like part of what happens
+ * in __block_prepare_write() for newly allocated blocks.
+ *
+ * XXX currently __block_prepare_write() creates buffers for all the
+ *     pages, and the filesystems mark these buffers as BH_New if they
+ *     were newly allocated from disk. We use the BH_New flag similarly. */
+static int filter_commit_write(struct niobuf_local *lnb, int err)
+{
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+        if (err) {
+                unsigned block_start, block_end;
+                struct buffer_head *bh, *head = lnb->page->buffers;
+                unsigned blocksize = head->b_size;
+
+                /* debugging: just seeing if this ever happens */
+                CDEBUG(err == -ENOSPC ? D_INODE : D_ERROR,
+                       "called for ino %lu:%lu on err %d\n",
+                       lnb->page->mapping->host->i_ino, lnb->page->index, err);
+
+                /* Currently one buffer per page, but in the future... */
+                for (bh = head, block_start = 0; bh != head || !block_start;
+                     block_start = block_end, bh = bh->b_this_page) {
+                        block_end = block_start + blocksize;
+                        if (buffer_new(bh)) {
+                                memset(kmap(lnb->page) + block_start, 0,
+                                       blocksize);
+                                kunmap(lnb->page);
+                        }
+                }
+        }
+#endif
+        return lustre_commit_write(lnb);
+}
+
+/* If we ever start to support multi-object BRW RPCs, we will need to get locks
+ * on mulitple inodes.  That isn't all, because there still exists the
+ * possibility of a truncate starting a new transaction while holding the ext3
+ * rwsem = write while some writes (which have started their transactions here)
+ * blocking on the ext3 rwsem = read => lock inversion.
+ *
+ * The handling gets very ugly when dealing with locked pages.  It may be easier
+ * to just get rid of the locked page code (which has problems of its own) and
+ * either discover we do not need it anymore (i.e. it was a symptom of another
+ * bug) or ensure we get the page locks in an appropriate order. */
+static int filter_preprw_write(struct obd_export *exp, struct obdo *obdo,
+                               int objcount, struct obd_ioobj *obj,
+                               int niocount, struct niobuf_remote *nb,
+                               struct niobuf_local *res,
+                               struct obd_trans_info *oti)
+{
+        struct obd_run_ctxt saved;
+        struct obd_device *obd;
+        struct obd_ioobj *o;
+        struct niobuf_remote *rnb;
+        struct niobuf_local *lnb;
+        struct fsfilt_objinfo *fso;
+        struct dentry *dentry;
+        int pglocked = 0, rc = 0, i, j, tot_bytes = 0, cleanup_phase = 0;
+        unsigned long now = jiffies;
+        ENTRY;
+        LASSERT(objcount == 1);
+
+        obd = exp->exp_obd;
+        if (obd == NULL)
+                RETURN(-EINVAL);
+        OBD_ALLOC(fso, objcount * sizeof(*fso));
+        if (fso == NULL)
+                RETURN(-ENOMEM);
+
+        memset(res, 0, niocount * sizeof(*res));
+
+        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        for (i = 0, o = obj; i < objcount; i++, o++) {
+                struct filter_dentry_data *fdd;
+                LASSERT(o->ioo_bufcnt);
+
+                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
+                if (IS_ERR(dentry))
+                        GOTO(out_objinfo, rc = PTR_ERR(dentry));
+
+                if (dentry->d_inode == NULL) {
+                        CERROR("trying to BRW to non-existent file "LPU64"\n",
+                               o->ioo_id);
+                        f_dput(dentry);
+                        GOTO(out_objinfo, rc = -ENOENT);
+                }
+
+                fso[i].fso_dentry = dentry;
+                fso[i].fso_bufcnt = o->ioo_bufcnt;
+
+                down(&dentry->d_inode->i_sem);
+                fdd = dentry->d_fsdata;
+                if (fdd == NULL || !atomic_read(&fdd->fdd_open_count))
+                        CDEBUG(D_PAGE, "I/O to unopened object "LPU64"\n",
+                               o->ioo_id);
+        }
+
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow prep setup %lus\n", (jiffies - now) / HZ);
+
+        LASSERT(oti != NULL);
+        oti->oti_handle = fsfilt_brw_start(obd, objcount, fso, niocount, oti);
+        if (IS_ERR(oti->oti_handle)) {
+                rc = PTR_ERR(oti->oti_handle);
+                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+                       "error starting transaction: rc = %d\n", rc);
+                oti->oti_handle = NULL;
+                GOTO(out_objinfo, rc);
+        }
+
+        for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) {
+                dentry = fso[i].fso_dentry;
+                for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) {
+                        if (j == 0)
+                                lnb->dentry = dentry;
+                        else
+                                lnb->dentry = dget(dentry);
+
+                        lnb->offset = rnb->offset;
+                        lnb->len    = rnb->len;
+                        lnb->flags  = rnb->flags;
+                        lnb->start  = jiffies;
+
+                        rc = filter_get_page_write(dentry->d_inode, lnb,
+                                                   &pglocked);
+                        if (rc)
+                                up(&dentry->d_inode->i_sem);
+
+                        if (rc) {
+                                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+                                       "page err %u@"LPU64" %u/%u %p: rc %d\n",
+                                       lnb->len, lnb->offset, j, o->ioo_bufcnt,
+                                       dentry, rc);
+                                f_dput(dentry);
+                                GOTO(out_pages, rc);
+                        }
+                        tot_bytes += lnb->len;
+                }
+        }
+
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow prep get page %lus\n", (jiffies - now) / HZ);
+
+        lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_WRITE_BYTES,tot_bytes);
+
+        EXIT;
+out:
+        OBD_FREE(fso, objcount * sizeof(*fso));
+        /* we saved the journal handle into oti->oti_handle instead */
+        current->journal_info = NULL;
+        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        return rc;
+
+out_pages:
+        while (lnb-- > res) {
+                filter_commit_write(lnb, rc);
+                up(&lnb->dentry->d_inode->i_sem);
+                f_dput(lnb->dentry);
+        }
+        filter_finish_transno(exp, oti, rc);
+        fsfilt_commit(obd, filter_parent(obd,S_IFREG,obj->ioo_id)->d_inode,
+                      oti->oti_handle, 0);
+        goto out; /* dropped the dentry refs already (one per page) */
+
+out_objinfo:
+        for (i = 0; i < objcount && fso[i].fso_dentry; i++) {
+                up(&fso[i].fso_dentry->d_inode->i_sem);
+                f_dput(fso[i].fso_dentry);
+        }
+        goto out;
+}
+
+int filter_preprw(int cmd, struct obd_export *exp, struct obdo *obdo,
+                  int objcount, struct obd_ioobj *obj, int niocount,
+                  struct niobuf_remote *nb, struct niobuf_local *res,
+                  struct obd_trans_info *oti)
+{
+        if (cmd == OBD_BRW_WRITE)
+                return filter_preprw_write(exp, obdo, objcount, obj, niocount,
+                                           nb, res, oti);
+        else if (cmd == OBD_BRW_READ)
+                return filter_preprw_read(exp, obdo, objcount, obj, niocount,
+                                          nb, res, oti);
+        else
+                LBUG();
+}
+
+/* It is highly unlikely that we would ever get an error here.  The page we want
+ * to get was previously locked, so it had to have already allocated the space,
+ * and we were just writing over the same data, so there would be no hole in the
+ * file.
+ *
+ * XXX: possibility of a race with truncate could exist, need to check that.
+ *      There are no guarantees w.r.t. write order even on a local filesystem,
+ *      although the normal response would be to return the number of bytes
+ *      successfully written and leave the rest to the app. */
+static int filter_write_locked_page(struct niobuf_local *lnb)
+{
+        struct page *lpage;
+        void *lpage_addr, *lnb_addr;
+        int rc;
+        ENTRY;
+
+        lpage = lustre_get_page_write(lnb->dentry->d_inode, lnb->page->index);
+        if (IS_ERR(lpage)) {
+                rc = PTR_ERR(lpage);
+                CERROR("error getting locked page index %ld: rc = %d\n",
+                       lnb->page->index, rc);
+                LBUG();
+                lustre_commit_write(lnb);
+                RETURN(rc);
+        }
+
+        /* 2 kmaps == vanishingly small deadlock opportunity */
+        lpage_addr = kmap(lpage);
+        lnb_addr = kmap(lnb->page);
+
+        memcpy(lpage_addr, lnb_addr, PAGE_SIZE);
+
+        kunmap(lnb->page);
+        kunmap(lpage);
+
+        page_cache_release(lnb->page);
+
+        lnb->page = lpage;
+        rc = lustre_commit_write(lnb);
+        if (rc)
+                CERROR("error committing locked page %ld: rc = %d\n",
+                       lnb->page->index, rc);
+        RETURN(rc);
+}
+
+int filter_commitrw(int cmd, struct obd_export *exp, int objcount,
+                    struct obd_ioobj *obj, int niocount,
+                    struct niobuf_local *res, struct obd_trans_info *oti)
+{
+        struct obd_run_ctxt saved;
+        struct obd_ioobj *o;
+        struct niobuf_local *lnb;
+        struct obd_device *obd = exp->exp_obd;
+        int found_locked = 0, rc = 0, i;
+        int nested_trans = current->journal_info != NULL;
+        unsigned long now = jiffies;  /* DEBUGGING OST TIMEOUTS */
+        ENTRY;
+
+        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+
+        if (cmd & OBD_BRW_WRITE) {
+                LASSERT(oti);
+                LASSERT(current->journal_info == NULL ||
+                        current->journal_info == oti->oti_handle);
+                current->journal_info = oti->oti_handle;
+        }
+
+        for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) {
+                int j;
+
+                if (cmd & OBD_BRW_WRITE) {
+                        inode_update_time(lnb->dentry->d_inode, 1);
+                        up(&lnb->dentry->d_inode->i_sem);
+                }
+                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
+                        if (lnb->page == NULL) {
+                                continue;
+                        }
+
+                        if (lnb->flags & N_LOCAL_TEMP_PAGE) {
+                                found_locked++;
+                                continue;
+                        }
+
+                        if (time_after(jiffies, lnb->start + 15 * HZ))
+                                CERROR("slow commitrw %lus\n",
+                                       (jiffies - lnb->start) / HZ);
+
+                        if (cmd & OBD_BRW_WRITE) {
+                                int err = filter_commit_write(lnb, 0);
+
+                                if (!rc)
+                                        rc = err;
+                        } else {
+                                page_cache_release(lnb->page);
+                        }
+
+                        f_dput(lnb->dentry);
+                        if (time_after(jiffies, lnb->start + 15 * HZ))
+                                CERROR("slow commit_write %lus\n",
+                                       (jiffies - lnb->start) / HZ);
+                }
+        }
+
+        for (i = 0, o = obj, lnb = res; found_locked > 0 && i < objcount;
+             i++, o++) {
+                int j;
+                for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) {
+                        int err;
+                        if (!(lnb->flags & N_LOCAL_TEMP_PAGE))
+                                continue;
+
+                        if (time_after(jiffies, lnb->start + 15 * HZ))
+                                CERROR("slow commitrw locked %lus\n",
+                                       (jiffies - lnb->start) / HZ);
+
+                        err = filter_write_locked_page(lnb);
+                        if (!rc)
+                                rc = err;
+                        f_dput(lnb->dentry);
+                        found_locked--;
+
+                        if (time_after(jiffies, lnb->start + 15 * HZ))
+                                CERROR("slow commit_write locked %lus\n",
+                                       (jiffies - lnb->start) / HZ);
+                }
+        }
+
+        if (cmd & OBD_BRW_WRITE) {
+                /* We just want any dentry for the commit, for now */
+                struct dentry *dparent = filter_parent(obd, S_IFREG, 0);
+                int err;
+
+                rc = filter_finish_transno(exp, oti, rc);
+                err = fsfilt_commit(obd, dparent->d_inode, oti->oti_handle,
+                                    obd_sync_filter);
+                if (err)
+                        rc = err;
+                if (obd_sync_filter)
+                        LASSERT(oti->oti_transno <= obd->obd_last_committed);
+                if (time_after(jiffies, now + 15 * HZ))
+                        CERROR("slow commitrw commit %lus\n", (jiffies-now)/HZ);
+        }
+
+        LASSERT(nested_trans || current->journal_info == NULL);
+        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        RETURN(rc);
+}
+
+int filter_brw(int cmd, struct lustre_handle *conn, struct lov_stripe_md *lsm,
+               obd_count oa_bufs, struct brw_page *pga,
+               struct obd_trans_info *oti)
+{
+        struct obd_export *exp = class_conn2export(conn);
+        struct obd_ioobj ioo;
+        struct niobuf_local *lnb;
+        struct niobuf_remote *rnb;
+        obd_count i;
+        int ret = 0;
+        ENTRY;
+
+        if (exp == NULL)
+                RETURN(-EINVAL);
+
+        OBD_ALLOC(lnb, oa_bufs * sizeof(struct niobuf_local));
+        OBD_ALLOC(rnb, oa_bufs * sizeof(struct niobuf_remote));
+
+        if (lnb == NULL || rnb == NULL)
+                GOTO(out, ret = -ENOMEM);
+
+        for (i = 0; i < oa_bufs; i++) {
+                rnb[i].offset = pga[i].off;
+                rnb[i].len = pga[i].count;
+        }
+
+        ioo.ioo_id = lsm->lsm_object_id;
+        ioo.ioo_gr = 0;
+        ioo.ioo_type = S_IFREG;
+        ioo.ioo_bufcnt = oa_bufs;
+
+        ret = filter_preprw(cmd, exp, NULL, 1, &ioo, oa_bufs, rnb, lnb, oti);
+        if (ret != 0)
+                GOTO(out, ret);
+
+        for (i = 0; i < oa_bufs; i++) {
+                void *virt = kmap(pga[i].pg);
+                obd_off off = pga[i].off & ~PAGE_MASK;
+                void *addr = kmap(lnb[i].page);
+
+                /* 2 kmaps == vanishingly small deadlock opportunity */
+
+                if (cmd & OBD_BRW_WRITE)
+                        memcpy(addr + off, virt + off, pga[i].count);
+                else
+                        memcpy(virt + off, addr + off, pga[i].count);
+
+                kunmap(addr);
+                kunmap(virt);
+        }
+
+        ret = filter_commitrw(cmd, exp, 1, &ioo, oa_bufs, lnb, oti);
+
+out:
+        if (lnb)
+                OBD_FREE(lnb, oa_bufs * sizeof(struct niobuf_local));
+        if (rnb)
+                OBD_FREE(rnb, oa_bufs * sizeof(struct niobuf_remote));
+        class_export_put(exp);
+        RETURN(ret);
+}
diff --git a/lustre/obdfilter/filter_log.c b/lustre/obdfilter/filter_log.c
new file mode 100644
index 0000000..790659d
--- /dev/null
+++ b/lustre/obdfilter/filter_log.c
@@ -0,0 +1,379 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  linux/fs/obdfilter/filter_log.c
+ *
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/version.h>
+
+#include <portals/list.h>
+#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
+#include <linux/lustre_commit_confd.h>
+
+#include "filter_internal.h"
+
+static struct llog_handle *filter_log_create(struct obd_device *obd);
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+static int filter_log_close(struct llog_handle *cathandle,
+                            struct llog_handle *loghandle)
+{
+        struct llog_object_hdr *llh = loghandle->lgh_hdr;
+        struct file *file = loghandle->lgh_file;
+        struct dentry *dparent = NULL, *dchild = NULL;
+        struct lustre_handle parent_lockh;
+        struct llog_logid *lgl = &loghandle->lgh_cookie.lgc_lgl;
+        int rc;
+        ENTRY;
+
+        /* If we are going to delete this log, grab a ref before we close
+         * it so we don't have to immediately do another lookup. */
+        if (llh->llh_hdr.lth_type != LLOG_CATALOG_MAGIC && llh->llh_count == 0){
+                CDEBUG(D_INODE, "deleting log file "LPX64":%x\n",
+                       lgl->lgl_oid, lgl->lgl_ogen);
+                dparent = filter_parent_lock(loghandle->lgh_obd, S_IFREG,
+                                             lgl->lgl_oid,LCK_PW,&parent_lockh);
+                if (IS_ERR(dparent)) {
+                        rc = PTR_ERR(dparent);
+                        CERROR("error locking parent, orphan log %*s: rc %d\n",
+                               file->f_dentry->d_name.len,
+                               file->f_dentry->d_name.name, rc);
+                        RETURN(rc);
+                } else {
+                        dchild = dget(file->f_dentry);
+                        llog_delete_log(cathandle, loghandle);
+                }
+        } else {
+                CDEBUG(D_INODE, "closing log file "LPX64":%x\n",
+                       lgl->lgl_oid, lgl->lgl_ogen);
+        }
+
+        rc = filp_close(file, 0);
+
+        llog_free_handle(loghandle); /* also removes loghandle from list */
+
+        if (dchild != NULL) {
+                int err = vfs_unlink(dparent->d_inode, dchild);
+                if (err) {
+                        CERROR("error unlinking empty log %*s: rc %d\n",
+                               dchild->d_name.len, dchild->d_name.name, err);
+                        if (!rc)
+                                rc = err;
+                }
+                f_dput(dchild);
+                ldlm_lock_decref(&parent_lockh, LCK_PW);
+        }
+        RETURN(rc);
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+static struct llog_handle *filter_log_open(struct obd_device *obd,
+                                           struct llog_cookie *logcookie)
+{
+        struct llog_logid *lgl = &logcookie->lgc_lgl;
+        struct llog_handle *loghandle;
+        struct dentry *dchild;
+        int rc;
+        ENTRY;
+
+        loghandle = llog_alloc_handle();
+        if (!loghandle)
+                RETURN(ERR_PTR(-ENOMEM));
+
+        dchild = filter_fid2dentry(obd, NULL, S_IFREG, lgl->lgl_oid);
+        if (IS_ERR(dchild))
+                GOTO(out_handle, rc = PTR_ERR(dchild));
+
+        if (dchild->d_inode == NULL) {
+                CERROR("logcookie references non-existent object %*s\n",
+                       dchild->d_name.len, dchild->d_name.name);
+                GOTO(out_dentry, rc = -ENOENT);
+        }
+
+        if (dchild->d_inode->i_generation != lgl->lgl_ogen) {
+                CERROR("logcookie for %*s had different generation %x != %x\n",
+                       dchild->d_name.len, dchild->d_name.name,
+                       dchild->d_inode->i_generation, lgl->lgl_ogen);
+                GOTO(out_dentry, rc = -ESTALE);
+        }
+
+        /* dentry_open does a dput(dchild) and mntput(mnt) on error */
+        mntget(obd->u.filter.fo_vfsmnt);
+        loghandle->lgh_file = dentry_open(dchild, obd->u.filter.fo_vfsmnt,
+                                          O_RDWR);
+        if (IS_ERR(loghandle->lgh_file)) {
+                rc = PTR_ERR(loghandle->lgh_file);
+                CERROR("error opening logfile %*s: rc %d\n",
+                       dchild->d_name.len, dchild->d_name.name, rc);
+                GOTO(out_dentry, rc);
+        }
+        memcpy(&loghandle->lgh_cookie, logcookie, sizeof(*logcookie));
+        loghandle->lgh_log_create = filter_log_create;
+        loghandle->lgh_log_open = filter_log_open;
+        loghandle->lgh_log_close = filter_log_close;
+        loghandle->lgh_obd = obd;
+        RETURN(loghandle);
+
+out_dentry:
+        f_dput(dchild);
+out_handle:
+        llog_free_handle(loghandle);
+        RETURN(ERR_PTR(rc));
+}
+
+/* This is a callback from the llog_* functions.
+ * Assumes caller has already pushed us into the kernel context. */
+static struct llog_handle *filter_log_create(struct obd_device *obd)
+{
+        struct filter_obd *filter = &obd->u.filter;
+        struct lustre_handle parent_lockh;
+        struct dentry *dparent, *dchild;
+        struct llog_handle *loghandle;
+        struct file *file;
+        int err, rc;
+        obd_id id;
+        ENTRY;
+
+        loghandle = llog_alloc_handle();
+        if (!loghandle)
+                RETURN(ERR_PTR(-ENOMEM));
+
+ retry:
+        id = filter_next_id(filter);
+
+        dparent = filter_parent_lock(obd, S_IFREG, id, LCK_PW, &parent_lockh);
+        if (IS_ERR(dparent))
+                GOTO(out_ctxt, rc = PTR_ERR(dparent));
+
+        dchild = filter_fid2dentry(obd, dparent, S_IFREG, id);
+        if (IS_ERR(dchild))
+                GOTO(out_lock, rc = PTR_ERR(dchild));
+
+        if (dchild->d_inode != NULL) {
+                /* This would only happen if lastobjid was bad on disk */
+                CERROR("Serious error: objid %*s already exists; is this "
+                       "filesystem corrupt?  I will try to work around it.\n",
+                       dchild->d_name.len, dchild->d_name.name);
+                f_dput(dchild);
+                ldlm_lock_decref(&parent_lockh, LCK_PW);
+                goto retry;
+        }
+
+        rc = vfs_create(dparent->d_inode, dchild, S_IFREG);
+        if (rc) {
+                CERROR("log create failed rc = %d\n", rc);
+                GOTO(out_child, rc);
+        }
+
+        rc = filter_update_server_data(filter->fo_rcvd_filp, filter->fo_fsd);
+        if (rc) {
+                CERROR("can't write lastobjid but log created: rc %d\n",rc);
+                GOTO(out_destroy, rc);
+        }
+
+        /* dentry_open does a dput(dchild) and mntput(mnt) on error */
+        mntget(filter->fo_vfsmnt);
+        file = dentry_open(dchild, filter->fo_vfsmnt, O_RDWR | O_LARGEFILE);
+        if (IS_ERR(file)) {
+                rc = PTR_ERR(file);
+                CERROR("error opening log file "LPX64": rc %d\n", id, rc);
+                GOTO(out_destroy, rc);
+        }
+        ldlm_lock_decref(&parent_lockh, LCK_PW);
+
+        loghandle->lgh_file = file;
+        loghandle->lgh_cookie.lgc_lgl.lgl_oid = id;
+        loghandle->lgh_cookie.lgc_lgl.lgl_ogen = dchild->d_inode->i_generation;
+        loghandle->lgh_log_create = filter_log_create;
+        loghandle->lgh_log_open = filter_log_open;
+        loghandle->lgh_log_close = filter_log_close;
+        loghandle->lgh_obd = obd;
+
+        RETURN(loghandle);
+
+out_destroy:
+        err = vfs_unlink(dparent->d_inode, dchild);
+        if (err)
+                CERROR("error unlinking %*s on error: rc %d\n",
+                       dchild->d_name.len, dchild->d_name.name, err);
+out_child:
+        f_dput(dchild);
+out_lock:
+        ldlm_lock_decref(&parent_lockh, LCK_PW);
+out_ctxt:
+        llog_free_handle(loghandle);
+        RETURN(ERR_PTR(rc));
+}
+
+/* This is called from filter_setup() and should be single threaded */
+static struct llog_handle *filter_get_catalog(struct obd_device *obd)
+{
+        struct filter_obd *filter = &obd->u.filter;
+        struct filter_server_data *fsd = filter->fo_fsd;
+        struct obd_run_ctxt saved;
+        struct llog_handle *cathandle = NULL;
+        int rc;
+        ENTRY;
+
+        push_ctxt(&saved, &filter->fo_ctxt, NULL);
+        if (fsd->fsd_catalog_oid) {
+                struct llog_cookie catcookie;
+
+                catcookie.lgc_lgl.lgl_oid = le64_to_cpu(fsd->fsd_catalog_oid);
+                catcookie.lgc_lgl.lgl_ogen = le32_to_cpu(fsd->fsd_catalog_ogen);
+                cathandle = filter_log_open(obd, &catcookie);
+                if (IS_ERR(cathandle)) {
+                        CERROR("error opening catalog "LPX64":%x: rc %d\n",
+                               catcookie.lgc_lgl.lgl_oid,
+                               catcookie.lgc_lgl.lgl_ogen,
+                               (int)PTR_ERR(cathandle));
+                        fsd->fsd_catalog_oid = 0;
+                        fsd->fsd_catalog_ogen = 0;
+                }
+        }
+
+        if (!fsd->fsd_catalog_oid) {
+                struct llog_logid *lgl;
+
+                cathandle = filter_log_create(obd);
+                if (IS_ERR(cathandle)) {
+                        CERROR("error creating new catalog: rc %d\n",
+                               (int)PTR_ERR(cathandle));
+                        GOTO(out, cathandle);
+                }
+                lgl = &cathandle->lgh_cookie.lgc_lgl;
+                fsd->fsd_catalog_oid = cpu_to_le64(lgl->lgl_oid);
+                fsd->fsd_catalog_ogen = cpu_to_le32(lgl->lgl_ogen);
+                rc = filter_update_server_data(filter->fo_rcvd_filp, fsd);
+                if (rc) {
+                        CERROR("error writing new catalog to disk: rc %d\n",rc);
+                        GOTO(out_handle, rc);
+                }
+        }
+
+        rc = llog_init_catalog(cathandle, &obd->u.filter.fo_mdc_uuid);
+        if (rc)
+                GOTO(out_handle, rc);
+out:
+        pop_ctxt(&saved, &filter->fo_ctxt, NULL);
+        RETURN(cathandle);
+
+out_handle:
+        filter_log_close(cathandle, cathandle);
+        cathandle = ERR_PTR(rc);
+        goto out;
+}
+
+static void filter_put_catalog(struct llog_handle *cathandle)
+{
+        struct llog_handle *loghandle, *n;
+        int rc;
+        ENTRY;
+
+        list_for_each_entry_safe(loghandle, n, &cathandle->lgh_list, lgh_list)
+                filter_log_close(cathandle, loghandle);
+
+        rc = filp_close(cathandle->lgh_file, 0);
+        if (rc)
+                CERROR("error closing catalog: rc %d\n", rc);
+
+        llog_free_handle(cathandle);
+        EXIT;
+}
+
+int filter_log_cancel(struct lustre_handle *conn, struct lov_stripe_md *lsm,
+                      int num_cookies, struct llog_cookie *logcookies,
+                      int flags)
+{
+        struct obd_device *obd = class_conn2obd(conn);
+        struct obd_run_ctxt saved;
+        int rc;
+        ENTRY;
+
+        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        rc = llog_cancel_records(obd->u.filter.fo_catalog, num_cookies,
+                                 logcookies);
+        pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+
+        RETURN(rc);
+}
+
+int filter_log_op_create(struct llog_handle *cathandle, struct ll_fid *mds_fid,
+                         obd_id oid, obd_count ogen,
+                         struct llog_cookie *logcookie)
+{
+        struct llog_create_rec *lcr;
+        int rc;
+        ENTRY;
+
+        OBD_ALLOC(lcr, sizeof(*lcr));
+        if (lcr == NULL)
+                RETURN(-ENOMEM);
+        lcr->lcr_hdr.lth_len = lcr->lcr_end_len = sizeof(*lcr);
+        lcr->lcr_hdr.lth_type = OST_CREATE_REC;
+        lcr->lcr_fid.id = mds_fid->id;
+        lcr->lcr_fid.generation = mds_fid->generation;
+        lcr->lcr_fid.f_type = mds_fid->f_type;
+        lcr->lcr_oid = oid;
+        lcr->lcr_ogen = ogen;
+
+        rc = llog_add_record(cathandle, &lcr->lcr_hdr, logcookie);
+        OBD_FREE(lcr, sizeof(*lcr));
+
+        if (rc > 0) {
+                LASSERT(rc == sizeof(*logcookie));
+                rc = 0;
+        }
+        RETURN(rc);
+}
+
+int filter_log_op_orphan(struct llog_handle *cathandle, obd_id oid,
+                         obd_count ogen, struct llog_cookie *logcookie)
+{
+        struct llog_orphan_rec *lor;
+        int rc;
+        ENTRY;
+
+        OBD_ALLOC(lor, sizeof(*lor));
+        if (lor == NULL)
+                RETURN(-ENOMEM);
+        lor->lor_hdr.lth_len = lor->lor_end_len = sizeof(*lor);
+        lor->lor_hdr.lth_type = OST_ORPHAN_REC;
+        lor->lor_oid = oid;
+        lor->lor_ogen = ogen;
+
+        rc = llog_add_record(cathandle, &lor->lor_hdr, logcookie);
+
+        if (rc > 0) {
+                LASSERT(rc == sizeof(*logcookie));
+                rc = 0;
+        }
+        RETURN(rc);
+}
diff --git a/lustre/obdfilter/filter_san.c b/lustre/obdfilter/filter_san.c
new file mode 100644
index 0000000..5345957
--- /dev/null
+++ b/lustre/obdfilter/filter_san.c
@@ -0,0 +1,130 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  linux/fs/obdfilter/filter_san.c
+ *
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/pagemap.h> // XXX kill me soon
+#include <linux/version.h>
+
+#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
+#include "filter_internal.h"
+
+/* sanobd setup methods - use a specific mount option */
+int filter_san_setup(struct obd_device *obd, obd_count len, void *buf)
+{
+        struct obd_ioctl_data* data = buf;
+        char *option = NULL;
+
+        if (!data->ioc_inlbuf2)
+                RETURN(-EINVAL);
+
+        /* for extN/ext3 filesystem, we must mount it with 'writeback' mode */
+        if (!strcmp(data->ioc_inlbuf2, "extN"))
+                option = "data=writeback";
+        else if (!strcmp(data->ioc_inlbuf2, "ext3"))
+                option = "data=writeback,asyncdel";
+        else
+                LBUG(); /* just a reminder */
+
+        return filter_common_setup(obd, len, buf, option);
+}
+
+int filter_san_preprw(int cmd, struct lustre_handle *conn, int objcount,
+                      struct obd_ioobj *obj, int niocount,
+                      struct niobuf_remote *nb)
+{
+        struct obd_device *obd;
+        struct obd_ioobj *o = obj;
+        struct niobuf_remote *rnb = nb;
+        int rc = 0;
+        int i;
+        ENTRY;
+
+        obd = class_conn2obd(conn);
+        if (!obd) {
+                CDEBUG(D_IOCTL, "invalid client cookie "LPX64"\n",
+                       conn->cookie);
+                RETURN(-EINVAL);
+        }
+
+        for (i = 0; i < objcount; i++, o++) {
+                struct dentry *dentry;
+                struct inode *inode;
+#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
+                sector_t (*fs_bmap)(struct address_space *, sector_t);
+#else
+                int (*fs_bmap)(struct address_space *, long);
+#endif
+                int j;
+
+                dentry = filter_fid2dentry(obd, NULL, o->ioo_type, o->ioo_id);
+                if (IS_ERR(dentry))
+                        GOTO(out, rc = PTR_ERR(dentry));
+                inode = dentry->d_inode;
+                if (!inode) {
+                        CERROR("trying to BRW to non-existent file "LPU64"\n",
+                               o->ioo_id);
+                        f_dput(dentry);
+                        GOTO(out, rc = -ENOENT);
+                }
+                fs_bmap = inode->i_mapping->a_ops->bmap;
+
+                for (j = 0; j < o->ioo_bufcnt; j++, rnb++) {
+                        long block;
+
+                        block = rnb->offset >> inode->i_blkbits;
+
+                        if (cmd == OBD_BRW_READ) {
+                                block = fs_bmap(inode->i_mapping, block);
+                        } else {
+                                loff_t newsize = rnb->offset + rnb->len;
+                                /* fs_prep_san_write will also update inode
+                                 * size for us:
+                                 * (1) new alloced block
+                                 * (2) existed block but size extented
+                                 */
+                                /* FIXME We could call fs_prep_san_write()
+                                 * only once for all the blocks allocation.
+                                 * Now call it once for each block, for
+                                 * simplicity. And if error happens, we
+                                 * probably need to release previous alloced
+                                 * block */
+                                rc = fs_prep_san_write(obd, inode, &block,
+                                                       1, newsize);
+                                if (rc)
+                                        break;
+                        }
+
+                        rnb->offset = block;
+                }
+                f_dput(dentry);
+        }
+out:
+        RETURN(rc);
+}
+
-- 
1.8.3.1