if LINUX25
-basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g')
+basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g' | sed -e 's/^.*\///g')
AM_CPPFLAGS= -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename)
-$(MODULE).o: $($(MODULE)_OBJECTS)
- $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS)
+$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES)
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE)_tmp.o $($(MODULE)_OBJECTS)
+ rm -f $(MODULE)_tmp.c
+ $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(MODULE)_tmp.o
+ $(COMPILE) -UKBUILD_BASENAME -DKBUILD_BASENAME=$(MODULE) -c $(MODULE)_tmp.mod.c
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $(MODULE)_tmp.o $(MODULE)_tmp.mod.o
else
KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common '
case ${linux25} in
yes )
- KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include -DKBUILD_BASENAME=$(MODULE) -DKBUILD_MODNAME=$(MODULE) '
+ KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include'
;;
* )
KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/include '
AC_MSG_CHECKING(for MODVERSIONS)
if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1;
then
- MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB"
- AC_MSG_RESULT(yes)
- else
- MFLAGS=
- AC_MSG_RESULT(no)
+ if test $linux25 != "yes"; then
+ MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB"
+ AC_MSG_RESULT(yes)
+ fi
fi
fi
/* FIXME: Find a better method of setting IRQ affinity...
*/
- call_usermodehelper (argv[0], argv, envp);
+ USERMODEHELPER(argv[0], argv, envp);
#endif
}
#define DEBUG_SUBSYSTEM S_SOCKNAL
#include <linux/kp30.h>
+#include <linux/portals_compat25.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/socknal.h>
#include <sys/stat.h>
#include <sys/mman.h>
-#define BUG() /* workaround for module.h includes */
#include <linux/version.h>
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define BUG() /* workaround for module.h includes */
#include <linux/module.h>
#endif
return 0;
}
+static struct mod_paths {
+ char *name, *path;
+} mod_paths[] = {
+ {"portals", "lustre/portals/libcfs"},
+ {"ksocknal", "lustre/portals/knals/socknal"},
+ {"kptlrouter", "lustre/portals/router"},
+ {"lvfs", "lustre/lvfs"},
+ {"obdclass", "lustre/obdclass"},
+ {"llog_test", "lustre/obdclass"},
+ {"ptlrpc", "lustre/ptlrpc"},
+ {"obdext2", "lustre/obdext2"},
+ {"ost", "lustre/ost"},
+ {"osc", "lustre/osc"},
+ {"mds", "lustre/mds"},
+ {"mdc", "lustre/mdc"},
+ {"llite", "lustre/llite"},
+ {"obdecho", "lustre/obdecho"},
+ {"ldlm", "lustre/ldlm"},
+ {"obdfilter", "lustre/obdfilter"},
+ {"extN", "lustre/extN"},
+ {"lov", "lustre/lov"},
+ {"fsfilt_ext3", "lustre/lvfs"},
+ {"fsfilt_extN", "lustre/lvfs"},
+ {"fsfilt_reiserfs", "lustre/lvfs"},
+ {"mds_ext2", "lustre/mds"},
+ {"mds_ext3", "lustre/mds"},
+ {"mds_extN", "lustre/mds"},
+ {"ptlbd", "lustre/ptlbd"},
+ {"mgmt_svc", "lustre/mgmt"},
+ {"mgmt_cli", "lustre/mgmt"},
+ {NULL, NULL}
+};
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
int jt_dbg_modules(int argc, char **argv)
{
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- struct mod_paths {
- char *name, *path;
- } *mp, mod_paths[] = {
- {"portals", "lustre/portals/libcfs"},
- {"ksocknal", "lustre/portals/knals/socknal"},
- {"kptlrouter", "lustre/portals/router"},
- {"lvfs", "lustre/lvfs"},
- {"obdclass", "lustre/obdclass"},
- {"llog_test", "lustre/obdclass"},
- {"ptlrpc", "lustre/ptlrpc"},
- {"obdext2", "lustre/obdext2"},
- {"ost", "lustre/ost"},
- {"osc", "lustre/osc"},
- {"mds", "lustre/mds"},
- {"mdc", "lustre/mdc"},
- {"llite", "lustre/llite"},
- {"obdecho", "lustre/obdecho"},
- {"ldlm", "lustre/ldlm"},
- {"obdfilter", "lustre/obdfilter"},
- {"extN", "lustre/extN"},
- {"lov", "lustre/lov"},
- {"fsfilt_ext3", "lustre/lvfs"},
- {"fsfilt_extN", "lustre/lvfs"},
- {"fsfilt_reiserfs", "lustre/lvfs"},
- {"mds_ext2", "lustre/mds"},
- {"mds_ext3", "lustre/mds"},
- {"mds_extN", "lustre/mds"},
- {"ptlbd", "lustre/ptlbd"},
- {"mgmt_svc", "lustre/mgmt"},
- {"mgmt_cli", "lustre/mgmt"},
- {NULL, NULL}
- };
+ struct mod_paths *mp;
char *path = "..";
char *kernel = "linux";
}
return 0;
+}
#else
- printf("jt_dbg_module is not yet implemented for Linux 2.5\n");
+int jt_dbg_modules(int argc, char **argv)
+{
+ struct mod_paths *mp;
+ char *path = "..";
+ char *kernel = "linux";
+ const char *proc = "/proc/modules";
+ char modname[128], others[128];
+ long modaddr;
+ int rc;
+ FILE *file;
+
+ if (argc >= 2)
+ path = argv[1];
+ if (argc == 3)
+ kernel = argv[2];
+ if (argc > 3) {
+ printf("%s [path] [kernel]\n", argv[0]);
+ return 0;
+ }
+
+ file = fopen(proc, "r");
+ if (!file) {
+ printf("failed open %s: %s\n", proc, strerror(errno));
+ return 0;
+ }
+
+ while ((rc = fscanf(file, "%s %s %s %s %s %lx\n",
+ modname, others, others, others, others, &modaddr)) == 6) {
+ for (mp = mod_paths; mp->name != NULL; mp++) {
+ if (!strcmp(mp->name, modname))
+ break;
+ }
+ if (mp->name) {
+ printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path,
+ mp->path, mp->name, modaddr);
+ }
+ }
+
return 0;
-#endif /* linux 2.5 */
}
+#endif /* linux 2.5 */
int jt_dbg_panic(int argc, char **argv)
{
if LINUX25
-# We still need to link each module with vermagic.o to get rid of "kernel taited" warnings.
-basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g')
+# FIXME
+# need to be rewritten:
+# - bad hacking in lvfs/Makefile.am obdclass/Makefile.am
+# - .o -> .ko
+#
+basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g' | sed -e 's/^.*\///g')
AM_CPPFLAGS=-I$(top_builddir)/include -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename)
+$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES)
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE)_tmp.o $($(MODULE)_OBJECTS)
+ rm -f $(MODULE)_tmp.c
+ $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(MODULE)_tmp.o
+ $(COMPILE) -UKBUILD_BASENAME -DKBUILD_BASENAME=$(MODULE) -c $(MODULE)_tmp.mod.c
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $(MODULE)_tmp.o $(MODULE)_tmp.mod.o
+
else
AM_CPPFLAGS=-I$(top_builddir)/include
+$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES)
+ $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -o $(MODULE).o $($(MODULE)_OBJECTS)
endif
-$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES)
- $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -o $(MODULE).o $($(MODULE)_OBJECTS)
tags:
rm -f $(top_srcdir)/TAGS
current->tty = NULL;
}
+static inline int cleanup_group_info(void)
+{
+ struct group_info *ginfo;
+
+ ginfo = groups_alloc(2);
+ if (!ginfo)
+ return -ENOMEM;
+
+ ginfo->ngroups = 0;
+ set_current_groups(ginfo);
+ put_group_info(ginfo);
+
+ return 0;
+}
+
#define smp_num_cpus NR_CPUS
#ifndef conditional_schedule
current->tty = NULL;
}
+static inline int cleanup_group_info(void)
+{
+ /* Get rid of unneeded supplementary groups */
+ current->ngroups = 0;
+ memset(current->groups, 0, sizeof(current->groups));
+ return 0;
+}
+
#ifndef conditional_schedule
#define conditional_schedule() if (unlikely(need_resched())) schedule()
#endif
--- /dev/null
+Index: linux-2.6.3/fs/open.c
+===================================================================
+--- linux-2.6.3.orig/fs/open.c 2004-02-23 14:36:25.000000000 -0800
++++ linux-2.6.3/fs/open.c 2004-02-23 20:09:34.000000000 -0800
+@@ -881,6 +881,7 @@
+ return ERR_PTR(error);
+ }
+
++EXPORT_SYMBOL(filp_open);
+
+ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
+ {
+Index: linux-2.6.3/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.3.orig/fs/jbd/journal.c 2004-01-08 22:59:10.000000000 -0800
++++ linux-2.6.3/fs/jbd/journal.c 2004-02-23 20:09:34.000000000 -0800
+@@ -71,6 +71,7 @@
+ EXPORT_SYMBOL(journal_errno);
+ EXPORT_SYMBOL(journal_ack_err);
+ EXPORT_SYMBOL(journal_clear_err);
++EXPORT_SYMBOL(log_start_commit);
+ EXPORT_SYMBOL(log_wait_commit);
+ EXPORT_SYMBOL(journal_start_commit);
+ EXPORT_SYMBOL(journal_wipe);
+Index: linux-2.6.3/fs/ext3/super.c
+===================================================================
+--- linux-2.6.3.orig/fs/ext3/super.c 2004-02-23 14:36:26.000000000 -0800
++++ linux-2.6.3/fs/ext3/super.c 2004-02-23 20:24:30.000000000 -0800
+@@ -115,6 +115,8 @@
+ handle->h_err = err;
+ }
+
++EXPORT_SYMBOL(ext3_journal_abort_handle);
++
+ static char error_buf[1024];
+
+ /* Deal with the reporting of failure conditions on a filesystem such as
+@@ -1772,6 +1774,8 @@
+ return ret;
+ }
+
++EXPORT_SYMBOL(ext3_force_commit);
++
+ /*
+ * Ext3 always journals updates to the superblock itself, so we don't
+ * have to propagate any other updates to the superblock on disk at this
+@@ -2059,6 +2063,10 @@
+ unsigned long *blocks, int *created, int create);
+ EXPORT_SYMBOL(ext3_map_inode_page);
+
++EXPORT_SYMBOL(ext3_xattr_get);
++EXPORT_SYMBOL(ext3_xattr_set_handle);
++EXPORT_SYMBOL(ext3_bread);
++
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+ MODULE_LICENSE("GPL");
--- /dev/null
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
+.new.........fs/nfs/dir.c
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
+.new.........fs/nfs/dir.c
+Index: linux-2.6.3-mm4/fs/nfs/dir.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/nfs/dir.c 2004-03-08 17:05:35.000000000 +0800
++++ linux-2.6.3-mm4/fs/nfs/dir.c 2004-03-08 17:38:58.000000000 +0800
+@@ -773,7 +773,7 @@
+ if (nd->flags & LOOKUP_DIRECTORY)
+ return 0;
+ /* Are we trying to write to a read only partition? */
+- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
+ return 0;
+ return 1;
+ }
+@@ -794,7 +794,7 @@
+ dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+
+ /* Let vfs_create() deal with O_EXCL */
+- if (nd->intent.open.flags & O_EXCL)
++ if (nd->intent.it_flags & O_EXCL)
+ goto no_entry;
+
+ /* Open the file on the server */
+@@ -802,7 +802,7 @@
+ /* Revalidate parent directory attribute cache */
+ nfs_revalidate_inode(NFS_SERVER(dir), dir);
+
+- if (nd->intent.open.flags & O_CREAT) {
++ if (nd->intent.it_flags & O_CREAT) {
+ nfs_begin_data_update(dir);
+ inode = nfs4_atomic_open(dir, dentry, nd);
+ nfs_end_data_update(dir);
+@@ -818,7 +818,7 @@
+ break;
+ /* This turned out not to be a regular file */
+ case -ELOOP:
+- if (!(nd->intent.open.flags & O_NOFOLLOW))
++ if (!(nd->intent.it_flags & O_NOFOLLOW))
+ goto no_open;
+ /* case -EISDIR: */
+ /* case -EINVAL: */
+@@ -852,7 +852,7 @@
+ dir = parent->d_inode;
+ if (!is_atomic_open(dir, nd))
+ goto no_open;
+- openflags = nd->intent.open.flags;
++ openflags = nd->intent.it_flags;
+ if (openflags & O_CREAT) {
+ /* If this is a negative dentry, just drop it */
+ if (!inode)
+Index: linux-2.6.3-mm4/fs/nfs/nfs4proc.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/nfs/nfs4proc.c 2004-03-08 17:02:24.000000000 +0800
++++ linux-2.6.3-mm4/fs/nfs/nfs4proc.c 2004-03-08 17:37:59.000000000 +0800
+@@ -778,17 +778,17 @@
+ struct nfs4_state *state;
+
+ if (nd->flags & LOOKUP_CREATE) {
+- attr.ia_mode = nd->intent.open.create_mode;
++ attr.ia_mode = nd->intent.it_create_mode;
+ attr.ia_valid = ATTR_MODE;
+ if (!IS_POSIXACL(dir))
+ attr.ia_mode &= ~current->fs->umask;
+ } else {
+ attr.ia_valid = 0;
+- BUG_ON(nd->intent.open.flags & O_CREAT);
++ BUG_ON(nd->intent.it_flags & O_CREAT);
+ }
+
+ cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state))
+ return (struct inode *)state;
+Index: linux-2.6.3-mm4/fs/cifs/dir.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/cifs/dir.c 2004-02-18 11:58:34.000000000 +0800
++++ linux-2.6.3-mm4/fs/cifs/dir.c 2004-03-08 17:37:59.000000000 +0800
+@@ -146,18 +146,18 @@
+ if(nd) {
+ cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path));
+
+- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY)
++ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY)
+ desiredAccess = GENERIC_READ;
+- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY)
++ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY)
+ desiredAccess = GENERIC_WRITE;
+- else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR)
++ else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR)
+ desiredAccess = GENERIC_ALL;
+
+- if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
++ if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+ disposition = FILE_CREATE;
+- else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
++ else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+ disposition = FILE_OVERWRITE_IF;
+- else if((nd->intent.open.flags & O_CREAT) == O_CREAT)
++ else if((nd->intent.it_flags & O_CREAT) == O_CREAT)
+ disposition = FILE_OPEN_IF;
+ else {
+ cFYI(1,("Create flag not set in create function"));
+@@ -314,7 +314,7 @@
+ parent_dir_inode, direntry->d_name.name, direntry));
+
+ if(nd) { /* BB removeme */
+- cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags));
++ cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags));
+ } /* BB removeme BB */
+ /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */
+
--- /dev/null
+ Documentation/filesystems/ext2.txt | 16 ++
+ fs/ext3/inode.c | 3
+ fs/ext3/iopen.c | 239 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h | 15 ++
+ fs/ext3/namei.c | 13 ++
+ fs/ext3/super.c | 17 ++
+ include/linux/ext3_fs.h | 2
+ 7 files changed, 304 insertions(+), 1 deletion(-)
+
+Index: linux-2.6.3-mm4/Documentation/filesystems/ext2.txt
+===================================================================
+--- linux-2.6.3-mm4.orig/Documentation/filesystems/ext2.txt 2004-01-09 14:59:18.000000000 +0800
++++ linux-2.6.3-mm4/Documentation/filesystems/ext2.txt 2004-03-08 14:58:44.431196112 +0800
+@@ -35,6 +35,22 @@
+
+ sb=n Use alternate superblock at this location.
+
++iopen Makes an invisible pseudo-directory called
++ __iopen__ available in the root directory
++ of the filesystem. Allows open-by-inode-
++ number. i.e., inode 3145 can be accessed
++ via /mntpt/__iopen__/3145
++
++iopen_nopriv This option makes the iopen directory be
++ world-readable. This may be safer since it
++ allows daemons to run as an unprivileged user,
++ however it significantly changes the security
++ model of a Unix filesystem, since previously
++ all files under a mode 700 directory were not
++ generally avilable even if the
++ permissions on the file itself is
++ world-readable.
++
+ grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
+
+
+Index: linux-2.6.3-mm4/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/ext3/inode.c 2004-03-08 14:57:54.969715400 +0800
++++ linux-2.6.3-mm4/fs/ext3/inode.c 2004-03-08 14:58:44.504185016 +0800
+@@ -37,6 +37,7 @@
+ #include <linux/mpage.h>
+ #include <linux/uio.h>
+ #include "xattr.h"
++#include "iopen.h"
+ #include "acl.h"
+
+ /*
+@@ -2472,6 +2473,8 @@
+ ei->i_acl = EXT3_ACL_NOT_CACHED;
+ ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+ #endif
++ if (ext3_iopen_get_inode(inode))
++ return;
+ if (ext3_get_inode_loc(inode, &iloc, 0))
+ goto bad_inode;
+ bh = iloc.bh;
+Index: linux-2.6.3-mm4/fs/ext3/iopen.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/ext3/iopen.c 2004-03-08 14:58:44.413198848 +0800
++++ linux-2.6.3-mm4/fs/ext3/iopen.c 2004-03-08 14:58:44.576174072 +0800
+@@ -0,0 +1,223 @@
++
++
++/*
++ * linux/fs/ext3/iopen.c
++ *
++ * Special support for open by inode number
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/smp_lock.h>
++#include "iopen.h"
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#define IOPEN_NAME_LEN 32
++
++/*
++ * This implements looking up an inode by number.
++ */
++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
++{
++ struct inode * inode;
++ unsigned long ino;
++ struct list_head *lp;
++ struct dentry *alternate;
++ char buf[IOPEN_NAME_LEN];
++
++ if (dentry->d_name.len >= IOPEN_NAME_LEN)
++ return ERR_PTR(-ENAMETOOLONG);
++
++ memcpy(buf, dentry->d_name.name, dentry->d_name.len);
++ buf[dentry->d_name.len] = 0;
++
++ if (strcmp(buf, ".") == 0)
++ ino = dir->i_ino;
++ else if (strcmp(buf, "..") == 0)
++ ino = EXT3_ROOT_INO;
++ else
++ ino = simple_strtoul(buf, 0, 0);
++
++ if ((ino != EXT3_ROOT_INO &&
++ //ino != EXT3_ACL_IDX_INO &&
++ //ino != EXT3_ACL_DATA_INO &&
++ ino < EXT3_FIRST_INO(dir->i_sb)) ||
++ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
++ return ERR_PTR(-ENOENT);
++
++ inode = iget(dir->i_sb, ino);
++ if (!inode)
++ return ERR_PTR(-EACCES);
++ if (is_bad_inode(inode)) {
++ iput(inode);
++ return ERR_PTR(-ENOENT);
++ }
++
++ /* preferrably return a connected dentry */
++ spin_lock(&dcache_lock);
++ list_for_each(lp, &inode->i_dentry) {
++ alternate = list_entry(lp, struct dentry, d_alias);
++ assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
++ }
++
++ if (!list_empty(&inode->i_dentry)) {
++ alternate = list_entry(inode->i_dentry.next,
++ struct dentry, d_alias);
++ dget_locked(alternate);
++ alternate->d_vfs_flags |= DCACHE_REFERENCED;
++ iput(inode);
++ spin_unlock(&dcache_lock);
++ return alternate;
++ }
++ dentry->d_flags |= DCACHE_DISCONNECTED;
++ spin_unlock(&dcache_lock);
++
++ d_add(dentry, inode);
++ return NULL;
++}
++
++#define do_switch(x,y) do { \
++ __typeof__ (x) __tmp = x; \
++ x = y; y = __tmp; } while (0)
++
++static inline void switch_names(struct dentry * dentry, struct dentry * target)
++{
++ const unsigned char *old_name, *new_name;
++
++ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
++ old_name = target->d_name.name;
++ new_name = dentry->d_name.name;
++ if (old_name == target->d_iname)
++ old_name = dentry->d_iname;
++ if (new_name == dentry->d_iname)
++ new_name = target->d_iname;
++ target->d_name.name = new_name;
++ dentry->d_name.name = old_name;
++}
++
++
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode)
++{
++ struct dentry *tmp, *goal = NULL;
++ struct list_head *lp;
++
++ /* preferrably return a connected dentry */
++ spin_lock(&dcache_lock);
++ /* verify this dentry is really new */
++ assert(!de->d_inode);
++ assert(list_empty(&de->d_subdirs));
++ assert(list_empty(&de->d_alias));
++
++
++ list_for_each(lp, &inode->i_dentry) {
++ tmp = list_entry(lp, struct dentry, d_alias);
++ if (tmp->d_flags & DCACHE_DISCONNECTED) {
++ assert(tmp->d_alias.next == &inode->i_dentry);
++ assert(tmp->d_alias.prev == &inode->i_dentry);
++ goal = tmp;
++ dget_locked(goal);
++ break;
++ }
++ }
++ spin_unlock(&dcache_lock);
++
++ if (!goal)
++ return NULL;
++
++ goal->d_flags &= ~DCACHE_DISCONNECTED;
++ d_rehash(de);
++ d_move(goal, de);
++
++ return goal;
++}
++
++/*
++ * These are the special structures for the iopen pseudo directory.
++ */
++
++static struct inode_operations iopen_inode_operations = {
++ lookup: iopen_lookup, /* BKL held */
++};
++
++static struct file_operations iopen_file_operations = {
++ read: generic_read_dir,
++};
++
++static int match_dentry(struct dentry *dentry, const char *name)
++{
++ int len;
++
++ len = strlen(name);
++ if (dentry->d_name.len != len)
++ return 0;
++ if (strncmp(dentry->d_name.name, name, len))
++ return 0;
++ return 1;
++}
++
++/*
++ * This function is spliced into ext3_lookup and returns 1 the file
++ * name is __iopen__ and dentry has been filled in appropriately.
++ */
++int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry)
++{
++ struct inode * inode;
++
++ if (dir->i_ino != EXT3_ROOT_INO ||
++ !test_opt(dir->i_sb, IOPEN) ||
++ !match_dentry(dentry, "__iopen__"))
++ return 0;
++
++ inode = iget(dir->i_sb, EXT3_BAD_INO);
++
++ if (!inode)
++ return 0;
++ d_add(dentry, inode);
++ return 1;
++}
++
++/*
++ * This function is spliced into read_inode; it returns 1 if inode
++ * number is the one for /__iopen__, in which case the inode is filled
++ * in appropriately. Otherwise, this fuction returns 0.
++ */
++int ext3_iopen_get_inode(struct inode * inode)
++{
++ if (inode->i_ino != EXT3_BAD_INO)
++ return 0;
++
++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++ if (test_opt(inode->i_sb, IOPEN_NOPRIV))
++ inode->i_mode |= 0777;
++ inode->i_uid = 0;
++ inode->i_gid = 0;
++ inode->i_nlink = 1;
++ inode->i_size = 4096;
++ inode->i_atime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME;
++ inode->i_mtime = CURRENT_TIME;
++ EXT3_I(inode)->i_dtime = 0;
++ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size
++ * (for stat), not the fs block
++ * size */
++ inode->i_blocks = 0;
++ inode->i_version = 1;
++ inode->i_generation = 0;
++
++ inode->i_op = &iopen_inode_operations;
++ inode->i_fop = &iopen_file_operations;
++ inode->i_mapping->a_ops = 0;
++
++ return 1;
++}
+Index: linux-2.6.3-mm4/fs/ext3/iopen.h
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/ext3/iopen.h 2004-03-08 14:58:44.413198848 +0800
++++ linux-2.6.3-mm4/fs/ext3/iopen.h 2004-03-08 14:58:44.577173920 +0800
+@@ -0,0 +1,15 @@
++/*
++ * iopen.h
++ *
++ * Special support for opening files by inode number.
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++extern int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry);
++extern int ext3_iopen_get_inode(struct inode * inode);
++
++
+Index: linux-2.6.3-mm4/fs/ext3/namei.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/ext3/namei.c 2004-03-08 14:57:52.978018184 +0800
++++ linux-2.6.3-mm4/fs/ext3/namei.c 2004-03-08 14:58:44.648163128 +0800
+@@ -37,6 +37,7 @@
+ #include <linux/buffer_head.h>
+ #include <linux/smp_lock.h>
+ #include "xattr.h"
++#include "iopen.h"
+ #include "acl.h"
+
+ /*
+@@ -970,15 +971,21 @@
+ }
+ #endif
+
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode);
++
+ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+ {
+ struct inode * inode;
+ struct ext3_dir_entry_2 * de;
+ struct buffer_head * bh;
++ struct dentry *alternate = NULL;
+
+ if (dentry->d_name.len > EXT3_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
++ if (ext3_check_for_iopen(dir, dentry))
++ return NULL;
++
+ bh = ext3_find_entry(dentry, &de);
+ inode = NULL;
+ if (bh) {
+@@ -989,8 +996,14 @@
+ if (!inode)
+ return ERR_PTR(-EACCES);
+ }
++ if (inode && (alternate = iopen_connect_dentry(dentry, inode))) {
++ iput(inode);
++ return alternate;
++ }
++
+ if (inode)
+ return d_splice_alias(inode, dentry);
++
+ d_add(dentry, inode);
+ return NULL;
+ }
+Index: linux-2.6.3-mm4/fs/ext3/super.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/ext3/super.c 2004-03-08 14:57:55.049703240 +0800
++++ linux-2.6.3-mm4/fs/ext3/super.c 2004-03-08 15:03:18.310560120 +0800
+@@ -575,7 +575,7 @@
+ Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+ Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+ Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+- Opt_ignore, Opt_err,
++ Opt_ignore, Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+ };
+
+ static match_table_t tokens = {
+@@ -620,6 +620,9 @@
+ {Opt_ignore, "noquota"},
+ {Opt_ignore, "quota"},
+ {Opt_ignore, "usrquota"},
++ {Opt_iopen, "iopen"},
++ {Opt_noiopen, "noiopen"},
++ {Opt_iopen_nopriv, "iopen_nopriv"},
+ {Opt_err, NULL}
+ };
+
+@@ -869,6 +872,18 @@
+ case Opt_abort:
+ set_opt(sbi->s_mount_opt, ABORT);
+ break;
++ case Opt_iopen:
++ set_opt (sbi->s_mount_opt, IOPEN);
++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++ break;
++ case Opt_noiopen:
++ clear_opt (sbi->s_mount_opt, IOPEN);
++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++ break;
++ case Opt_iopen_nopriv:
++ set_opt (sbi->s_mount_opt, IOPEN);
++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++ break;
+ case Opt_ignore:
+ break;
+ default:
+Index: linux-2.6.3-mm4/fs/ext3/Makefile
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/ext3/Makefile 2004-01-09 14:59:08.000000000 +0800
++++ linux-2.6.3-mm4/fs/ext3/Makefile 2004-03-08 14:58:44.794140936 +0800
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+- ioctl.o namei.o super.o symlink.o hash.o
++ ioctl.o namei.o super.o symlink.o hash.o iopen.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+Index: linux-2.6.3-mm4/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.3-mm4.orig/include/linux/ext3_fs.h 2004-03-08 14:57:53.057006176 +0800
++++ linux-2.6.3-mm4/include/linux/ext3_fs.h 2004-03-08 14:58:44.795140784 +0800
+@@ -325,6 +325,8 @@
+ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
+ #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
+ #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
++#define EXT3_MOUNT_IOPEN 0x10000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV 0x20000 /* Make iopen world-readable */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
--- /dev/null
+
+The complete series of citi nfsv4 patches in a single patch
+
+
+ Makefile | 2
+ fs/Kconfig | 49
+ fs/Makefile | 1
+ fs/inode.c | 2
+ fs/nfs/dir.c | 181 ++
+ fs/nfs/direct.c | 3
+ fs/nfs/file.c | 23
+ fs/nfs/inode.c | 586 +++++----
+ fs/nfs/nfs3proc.c | 43
+ fs/nfs/nfs4proc.c | 988 +++++++---------
+ fs/nfs/nfs4xdr.c | 1931 ++++++++++++++++++++++++--------
+ fs/nfs/pagelist.c | 5
+ fs/nfs/proc.c | 51
+ fs/nfs/read.c | 2
+ fs/nfs/unlink.c | 3
+ fs/nfs/write.c | 207 +--
+ fs/nfs4acl/Makefile | 3
+ fs/nfs4acl/acl.c | 921 +++++++++++++++
+ fs/nfs4acl/acl_syms.c | 51
+ fs/nfsd/Makefile | 2
+ fs/nfsd/nfs3xdr.c | 2
+ fs/nfsd/nfs4idmap.c | 569 +++++++++
+ fs/nfsd/nfs4proc.c | 229 ++-
+ fs/nfsd/nfs4state.c | 440 +++++--
+ fs/nfsd/nfs4xdr.c | 495 +++++---
+ fs/nfsd/nfsctl.c | 7
+ fs/nfsd/nfsproc.c | 1
+ fs/nfsd/nfsxdr.c | 2
+ fs/nfsd/stats.c | 67 -
+ fs/nfsd/vfs.c | 218 +++
+ include/linux/fs.h | 2
+ include/linux/nfs.h | 2
+ include/linux/nfs4.h | 80 +
+ include/linux/nfs4_acl.h | 68 +
+ include/linux/nfs_fs.h | 138 +-
+ include/linux/nfs_page.h | 2
+ include/linux/nfs_xdr.h | 256 +---
+ include/linux/nfsd/nfsd.h | 16
+ include/linux/nfsd/nfsfh.h | 8
+ include/linux/nfsd/state.h | 21
+ include/linux/nfsd/xdr4.h | 37
+ include/linux/nfsd_idmap.h | 54
+ include/linux/sunrpc/auth_gss.h | 2
+ include/linux/sunrpc/cache.h | 13
+ include/linux/sunrpc/gss_api.h | 3
+ include/linux/sunrpc/stats.h | 20
+ include/linux/sunrpc/svc.h | 1
+ include/linux/sunrpc/svcauth.h | 5
+ include/linux/sunrpc/svcauth_gss.h | 35
+ include/linux/sunrpc/xdr.h | 3
+ include/linux/sunrpc/xprt.h | 15
+ net/sunrpc/Makefile | 2
+ net/sunrpc/auth_gss/Makefile | 2
+ net/sunrpc/auth_gss/auth_gss.c | 119 +
+ net/sunrpc/auth_gss/gss_krb5_crypto.c | 18
+ net/sunrpc/auth_gss/gss_krb5_mech.c | 14
+ net/sunrpc/auth_gss/gss_krb5_seal.c | 9
+ net/sunrpc/auth_gss/gss_krb5_seqnum.c | 2
+ net/sunrpc/auth_gss/gss_mech_switch.c | 32
+ net/sunrpc/auth_gss/gss_pseudoflavors.c | 21
+ net/sunrpc/auth_gss/sunrpcgss_syms.c | 2
+ net/sunrpc/auth_gss/svcauth_gss.c | 1018 ++++++++++++++++
+ net/sunrpc/cache.c | 13
+ net/sunrpc/stats.c | 106 -
+ net/sunrpc/sunrpc_syms.c | 5
+ net/sunrpc/svc.c | 4
+ net/sunrpc/svcauth.c | 5
+ net/sunrpc/svcauth_unix.c | 13
+ net/sunrpc/xdr.c | 4
+ net/sunrpc/xprt.c | 210 +--
+ include/linux/sunrpc/name_lookup.h | 38
+ 71 files changed, 7194 insertions(+), 2308 deletions(-)
+
+diff -puN Makefile~CITI_NFS4_ALL Makefile
+--- linux-2.6.3/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:02.000000000 -0500
++++ linux-2.6.3-bfields/Makefile 2004-02-19 16:47:16.000000000 -0500
+@@ -1,7 +1,7 @@
+ VERSION = 2
+ PATCHLEVEL = 6
+ SUBLEVEL = 3
+-EXTRAVERSION =
++EXTRAVERSION = -CITI_NFS4_ALL-1
+ NAME=Feisty Dunnart
+
+ # *DOCUMENTATION*
+diff -puN fs/inode.c~CITI_NFS4_ALL fs/inode.c
+--- linux-2.6.3/fs/inode.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/inode.c 2004-02-19 16:47:03.000000000 -0500
+@@ -1178,6 +1178,8 @@ void inode_update_time(struct inode *ino
+ struct timespec now;
+ int sync_it = 0;
+
++ if (IS_NOCMTIME(inode))
++ return;
+ if (IS_RDONLY(inode))
+ return;
+
+diff -puN fs/Kconfig~CITI_NFS4_ALL fs/Kconfig
+--- linux-2.6.3/fs/Kconfig~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/Kconfig 2004-02-19 16:47:07.000000000 -0500
+@@ -288,7 +288,7 @@ config FS_POSIX_ACL
+ # Never use this symbol for ifdefs.
+ #
+ bool
+- depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL
++ depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || NFS_V4_ACL
+ default y
+
+ config XFS_FS
+@@ -1314,21 +1314,25 @@ config NFS_V3
+ Say Y here if you want your NFS client to be able to speak the newer
+ version 3 of the NFS protocol.
+
+- If unsure, say N.
++ If unsure, say Y.
+
+ config NFS_V4
+ bool "Provide NFSv4 client support (EXPERIMENTAL)"
+ depends on NFS_FS && EXPERIMENTAL
++ select RPCSEC_GSS_KRB5
+ help
+ Say Y here if you want your NFS client to be able to speak the newer
+- version 4 of the NFS protocol. This feature is experimental, and
+- should only be used if you are interested in helping to test NFSv4.
++ version 4 of the NFS protocol.
++
++ Note: Requires auxiliary userspace daemons which may be found on
++ http://www.citi.umich.edu/projects/nfsv4/
+
+ If unsure, say N.
+
+ config NFS_DIRECTIO
+ bool "Allow direct I/O on NFS files (EXPERIMENTAL)"
+ depends on NFS_FS && EXPERIMENTAL
++ select NFS_V4_ACL
+ help
+ This option enables applications to perform uncached I/O on files
+ in NFS file systems using the O_DIRECT open() flag. When O_DIRECT
+@@ -1388,6 +1392,7 @@ config NFSD_V3
+ config NFSD_V4
+ bool "Provide NFSv4 server support (EXPERIMENTAL)"
+ depends on NFSD_V3 && EXPERIMENTAL
++ select NFS_V4_ACL
+ help
+ If you would like to include the NFSv4 server as well as the NFSv2
+ and NFSv3 servers, say Y here. This feature is experimental, and
+@@ -1423,6 +1428,12 @@ config LOCKD_V4
+ depends on NFSD_V3 || NFS_V3
+ default y
+
++config NFS_V4_ACL
++ bool "Provide NFSv4 ACL support"
++ depends on NFSD_V4 || NFS_V4
++ help
++ This allows you to use POSIX ACLs with NFSv4.
++
+ config EXPORTFS
+ tristate
+ default NFSD
+@@ -1431,28 +1442,24 @@ config SUNRPC
+ tristate
+
+ config SUNRPC_GSS
+- tristate "Provide RPCSEC_GSS authentication (EXPERIMENTAL)"
++ tristate
++
++config RPCSEC_GSS_KRB5
++ tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)"
+ depends on SUNRPC && EXPERIMENTAL
+- default SUNRPC if NFS_V4=y
++ select SUNRPC_GSS
++ select CRYPTO
++ select CRYPTO_MD5
++ select CRYPTO_DES
+ help
+- Provides cryptographic authentication for NFS rpc requests. To
+- make this useful, you must also select at least one rpcsec_gss
+- mechanism.
+- Note: You should always select this option if you wish to use
++ Provides for secure RPC calls by means of a gss-api
++ mechanism based on Kerberos V5. This is required for
+ NFSv4.
+
+-config RPCSEC_GSS_KRB5
+- tristate "Kerberos V mechanism for RPCSEC_GSS (EXPERIMENTAL)"
+- depends on SUNRPC_GSS && CRYPTO_DES && CRYPTO_MD5
+- default SUNRPC_GSS if NFS_V4=y
+- help
+- Provides a gss-api mechanism based on Kerberos V5 (this is
+- mandatory for RFC3010-compliant NFSv4 implementations).
+- Requires a userspace daemon;
+- see http://www.citi.umich.edu/projects/nfsv4/.
++ Note: Requires an auxiliary userspace daemon which may be found on
++ http://www.citi.umich.edu/projects/nfsv4/
+
+- Note: If you select this option, please ensure that you also
+- enable the MD5 and DES crypto ciphers.
++ If unsure, say N.
+
+ config SMB_FS
+ tristate "SMB file system support (to mount Windows shares etc.)"
+diff -puN fs/nfs/dir.c~CITI_NFS4_ALL fs/nfs/dir.c
+--- linux-2.6.3/fs/nfs/dir.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/dir.c 2004-02-19 16:47:07.000000000 -0500
+@@ -88,6 +88,10 @@ struct inode_operations nfs4_dir_inode_o
+ .permission = nfs_permission,
+ .getattr = nfs_getattr,
+ .setattr = nfs_setattr,
++#ifdef CONFIG_NFS_V4_ACL
++ .getxattr = nfs_getxattr,
++ .setxattr = nfs_setxattr,
++#endif /* CONFIG_NFS_V4_ACL */
+ };
+
+ #endif /* CONFIG_NFS_V4 */
+@@ -139,11 +143,13 @@ int nfs_readdir_filler(nfs_readdir_descr
+ struct file *file = desc->file;
+ struct inode *inode = file->f_dentry->d_inode;
+ struct rpc_cred *cred = nfs_file_cred(file);
++ unsigned long timestamp;
+ int error;
+
+ dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index);
+
+ again:
++ timestamp = jiffies;
+ error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page,
+ NFS_SERVER(inode)->dtsize, desc->plus);
+ if (error < 0) {
+@@ -157,18 +163,21 @@ int nfs_readdir_filler(nfs_readdir_descr
+ goto error;
+ }
+ SetPageUptodate(page);
++ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+ /* Ensure consistent page alignment of the data.
+ * Note: assumes we have exclusive access to this mapping either
+ * throught inode->i_sem or some other mechanism.
+ */
+- if (page->index == 0)
++ if (page->index == 0) {
+ invalidate_inode_pages(inode->i_mapping);
++ NFS_I(inode)->readdir_timestamp = timestamp;
++ }
+ unlock_page(page);
+ return 0;
+ error:
+ SetPageError(page);
+ unlock_page(page);
+- invalidate_inode_pages(inode->i_mapping);
++ nfs_zap_caches(inode);
+ desc->error = error;
+ return -EIO;
+ }
+@@ -381,6 +390,7 @@ int uncached_readdir(nfs_readdir_descrip
+ page,
+ NFS_SERVER(inode)->dtsize,
+ desc->plus);
++ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+ desc->page = page;
+ desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */
+ if (desc->error >= 0) {
+@@ -459,7 +469,15 @@ static int nfs_readdir(struct file *filp
+ }
+ res = 0;
+ break;
+- } else if (res < 0)
++ }
++ if (res == -ETOOSMALL && desc->plus) {
++ NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS;
++ nfs_zap_caches(inode);
++ desc->plus = 0;
++ desc->entry->eof = 0;
++ continue;
++ }
++ if (res < 0)
+ break;
+
+ res = nfs_do_filldir(desc, dirent, filldir);
+@@ -481,14 +499,19 @@ static int nfs_readdir(struct file *filp
+ * In the case it has, we assume that the dentries are untrustworthy
+ * and may need to be looked up again.
+ */
+-static inline
+-int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
++static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry)
+ {
+ if (IS_ROOT(dentry))
+ return 1;
+- if (nfs_revalidate_inode(NFS_SERVER(dir), dir))
++ if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0
++ || nfs_attribute_timeout(dir))
+ return 0;
+- return time_after(dentry->d_time, NFS_MTIME_UPDATE(dir));
++ return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata);
++}
++
++static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf)
++{
++ dentry->d_fsdata = (void *)verf;
+ }
+
+ /*
+@@ -528,9 +551,7 @@ int nfs_neg_need_reval(struct inode *dir
+ /* Don't revalidate a negative dentry if we're creating a new file */
+ if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE))
+ return 0;
+- if (!nfs_check_verifier(dir, dentry))
+- return 1;
+- return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir));
++ return !nfs_check_verifier(dir, dentry);
+ }
+
+ /*
+@@ -552,6 +573,7 @@ static int nfs_lookup_revalidate(struct
+ int error;
+ struct nfs_fh fhandle;
+ struct nfs_fattr fattr;
++ unsigned long verifier;
+ int isopen = 0;
+
+ parent = dget_parent(dentry);
+@@ -574,6 +596,9 @@ static int nfs_lookup_revalidate(struct
+ goto out_bad;
+ }
+
++ /* Revalidate parent directory attribute cache */
++ nfs_revalidate_inode(NFS_SERVER(dir), dir);
++
+ /* Force a full look up iff the parent directory has changed */
+ if (nfs_check_verifier(dir, dentry)) {
+ if (nfs_lookup_verify_inode(inode, isopen))
+@@ -581,6 +606,12 @@ static int nfs_lookup_revalidate(struct
+ goto out_valid;
+ }
+
++ /*
++ * Note: we're not holding inode->i_sem and so may be racing with
++ * operations that change the directory. We therefore save the
++ * change attribute *before* we do the RPC call.
++ */
++ verifier = nfs_save_change_attribute(dir);
+ error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr);
+ if (!error) {
+ if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0)
+@@ -603,6 +634,7 @@ static int nfs_lookup_revalidate(struct
+
+ out_valid_renew:
+ nfs_renew_times(dentry);
++ nfs_set_verifier(dentry, verifier);
+ out_valid:
+ unlock_kernel();
+ dput(parent);
+@@ -638,6 +670,11 @@ static int nfs_dentry_delete(struct dent
+ /* Unhash it, so that ->d_iput() would be called */
+ return 1;
+ }
++ if (!(dentry->d_sb->s_flags & MS_ACTIVE)) {
++ /* Unhash it, so that ancestors of killed async unlink
++ * files will be cleaned up during umount */
++ return 1;
++ }
+ return 0;
+
+ }
+@@ -693,6 +730,8 @@ static struct dentry *nfs_lookup(struct
+ dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+
+ lock_kernel();
++ /* Revalidate parent directory attribute cache */
++ nfs_revalidate_inode(NFS_SERVER(dir), dir);
+
+ /* If we're doing an exclusive create, optimize away the lookup */
+ if (nfs_is_exclusive_create(dir, nd))
+@@ -715,6 +754,7 @@ no_entry:
+ error = 0;
+ d_add(dentry, inode);
+ nfs_renew_times(dentry);
++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ out_unlock:
+ unlock_kernel();
+ out:
+@@ -768,7 +808,15 @@ static struct dentry *nfs_atomic_lookup(
+
+ /* Open the file on the server */
+ lock_kernel();
+- inode = nfs4_atomic_open(dir, dentry, nd);
++ /* Revalidate parent directory attribute cache */
++ nfs_revalidate_inode(NFS_SERVER(dir), dir);
++
++ if (nd->intent.open.flags & O_CREAT) {
++ nfs_begin_data_update(dir);
++ inode = nfs4_atomic_open(dir, dentry, nd);
++ nfs_end_data_update(dir);
++ } else
++ inode = nfs4_atomic_open(dir, dentry, nd);
+ unlock_kernel();
+ if (IS_ERR(inode)) {
+ error = PTR_ERR(inode);
+@@ -790,6 +838,7 @@ static struct dentry *nfs_atomic_lookup(
+ no_entry:
+ d_add(dentry, inode);
+ nfs_renew_times(dentry);
++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ out:
+ BUG_ON(error > 0);
+ return ERR_PTR(error);
+@@ -801,13 +850,16 @@ static int nfs_open_revalidate(struct de
+ {
+ struct dentry *parent = NULL;
+ struct inode *inode = dentry->d_inode;
++ struct inode *dir;
++ unsigned long verifier;
+ int openflags, ret = 0;
+
+ /* NFS only supports OPEN for regular files */
+ if (inode && !S_ISREG(inode->i_mode))
+ goto no_open;
+ parent = dget_parent(dentry);
+- if (!is_atomic_open(parent->d_inode, nd))
++ dir = parent->d_inode;
++ if (!is_atomic_open(dir, nd))
+ goto no_open;
+ openflags = nd->intent.open.flags;
+ if (openflags & O_CREAT) {
+@@ -821,8 +873,16 @@ static int nfs_open_revalidate(struct de
+ /* We can't create new files, or truncate existing ones here */
+ openflags &= ~(O_CREAT|O_TRUNC);
+
++ /*
++ * Note: we're not holding inode->i_sem and so may be racing with
++ * operations that change the directory. We therefore save the
++ * change attribute *before* we do the RPC call.
++ */
+ lock_kernel();
+- ret = nfs4_open_revalidate(parent->d_inode, dentry, openflags);
++ verifier = nfs_save_change_attribute(dir);
++ ret = nfs4_open_revalidate(dir, dentry, openflags);
++ if (!ret)
++ nfs_set_verifier(dentry, verifier);
+ unlock_kernel();
+ out:
+ dput(parent);
+@@ -869,15 +929,20 @@ int nfs_cached_lookup(struct inode *dir,
+ struct nfs_server *server;
+ struct nfs_entry entry;
+ struct page *page;
+- unsigned long timestamp = NFS_MTIME_UPDATE(dir);
++ unsigned long timestamp;
+ int res;
+
+ if (!NFS_USE_READDIRPLUS(dir))
+ return -ENOENT;
+ server = NFS_SERVER(dir);
+- if (server->flags & NFS_MOUNT_NOAC)
++ /* Don't use readdirplus unless the cache is stable */
++ if ((server->flags & NFS_MOUNT_NOAC) != 0
++ || nfs_caches_unstable(dir)
++ || nfs_attribute_timeout(dir))
+ return -ENOENT;
+- nfs_revalidate_inode(server, dir);
++ if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0)
++ return -ENOENT;
++ timestamp = NFS_I(dir)->readdir_timestamp;
+
+ entry.fh = fh;
+ entry.fattr = fattr;
+@@ -931,6 +996,7 @@ static int nfs_instantiate(struct dentry
+ if (inode) {
+ d_instantiate(dentry, inode);
+ nfs_renew_times(dentry);
++ nfs_set_verifier(dentry, nfs_save_change_attribute(dentry->d_parent->d_inode));
+ error = 0;
+ }
+ return error;
+@@ -969,11 +1035,13 @@ static int nfs_create(struct inode *dir,
+ * does not pass the create flags.
+ */
+ lock_kernel();
+- nfs_zap_caches(dir);
++ nfs_begin_data_update(dir);
+ inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags);
++ nfs_end_data_update(dir);
+ if (!IS_ERR(inode)) {
+ d_instantiate(dentry, inode);
+ nfs_renew_times(dentry);
++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ error = 0;
+ } else {
+ error = PTR_ERR(inode);
+@@ -1004,9 +1072,10 @@ nfs_mknod(struct inode *dir, struct dent
+ attr.ia_valid = ATTR_MODE;
+
+ lock_kernel();
+- nfs_zap_caches(dir);
++ nfs_begin_data_update(dir);
+ error = NFS_PROTO(dir)->mknod(dir, &dentry->d_name, &attr, rdev,
+ &fhandle, &fattr);
++ nfs_end_data_update(dir);
+ if (!error)
+ error = nfs_instantiate(dentry, &fhandle, &fattr);
+ else
+@@ -1041,9 +1110,10 @@ static int nfs_mkdir(struct inode *dir,
+ */
+ d_drop(dentry);
+ #endif
+- nfs_zap_caches(dir);
++ nfs_begin_data_update(dir);
+ error = NFS_PROTO(dir)->mkdir(dir, &dentry->d_name, &attr, &fhandle,
+ &fattr);
++ nfs_end_data_update(dir);
+ if (!error)
+ error = nfs_instantiate(dentry, &fhandle, &fattr);
+ else
+@@ -1060,10 +1130,12 @@ static int nfs_rmdir(struct inode *dir,
+ dir->i_ino, dentry->d_name.name);
+
+ lock_kernel();
+- nfs_zap_caches(dir);
++ nfs_begin_data_update(dir);
+ error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
+- if (!error)
++ /* Ensure the VFS deletes this inode */
++ if (error == 0 && dentry->d_inode != NULL)
+ dentry->d_inode->i_nlink = 0;
++ nfs_end_data_update(dir);
+ unlock_kernel();
+
+ return error;
+@@ -1119,12 +1191,21 @@ dentry->d_parent->d_name.name, dentry->d
+ goto out;
+ } while(sdentry->d_inode != NULL); /* need negative lookup */
+
+- nfs_zap_caches(dir);
+ qsilly.name = silly;
+ qsilly.len = strlen(silly);
+- error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly);
++ nfs_begin_data_update(dir);
++ if (dentry->d_inode) {
++ nfs_begin_data_update(dentry->d_inode);
++ error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
++ dir, &qsilly);
++ nfs_end_data_update(dentry->d_inode);
++ } else
++ error = NFS_PROTO(dir)->rename(dir, &dentry->d_name,
++ dir, &qsilly);
++ nfs_end_data_update(dir);
+ if (!error) {
+ nfs_renew_times(dentry);
++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
+ d_move(dentry, sdentry);
+ error = nfs_async_unlink(dentry);
+ /* If we return 0 we don't unlink */
+@@ -1156,14 +1237,17 @@ static int nfs_safe_remove(struct dentry
+ goto out;
+ }
+
+- nfs_zap_caches(dir);
+- if (inode)
+- NFS_CACHEINV(inode);
+- error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
+- if (error < 0)
+- goto out;
+- if (inode)
+- inode->i_nlink--;
++ nfs_begin_data_update(dir);
++ if (inode != NULL) {
++ nfs_begin_data_update(inode);
++ error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
++ /* The VFS may want to delete this inode */
++ if (error == 0)
++ inode->i_nlink--;
++ nfs_end_data_update(inode);
++ } else
++ error = NFS_PROTO(dir)->remove(dir, &dentry->d_name);
++ nfs_end_data_update(dir);
+ out:
+ return error;
+ }
+@@ -1198,9 +1282,10 @@ static int nfs_unlink(struct inode *dir,
+ spin_unlock(&dentry->d_lock);
+ spin_unlock(&dcache_lock);
+ error = nfs_safe_remove(dentry);
+- if (!error)
++ if (!error) {
+ nfs_renew_times(dentry);
+- else if (need_rehash)
++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
++ } else if (need_rehash)
+ d_rehash(dentry);
+ unlock_kernel();
+ return error;
+@@ -1247,9 +1332,10 @@ dentry->d_parent->d_name.name, dentry->d
+ qsymname.len = strlen(symname);
+
+ lock_kernel();
+- nfs_zap_caches(dir);
++ nfs_begin_data_update(dir);
+ error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname,
+ &attr, &sym_fh, &sym_attr);
++ nfs_end_data_update(dir);
+ if (!error) {
+ error = nfs_instantiate(dentry, &sym_fh, &sym_attr);
+ } else {
+@@ -1281,9 +1367,12 @@ nfs_link(struct dentry *old_dentry, stru
+ */
+ lock_kernel();
+ d_drop(dentry);
+- nfs_zap_caches(dir);
+- NFS_CACHEINV(inode);
++
++ nfs_begin_data_update(dir);
++ nfs_begin_data_update(inode);
+ error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
++ nfs_end_data_update(inode);
++ nfs_end_data_update(dir);
+ unlock_kernel();
+ return error;
+ }
+@@ -1388,16 +1477,23 @@ go_ahead:
+ if (new_inode)
+ d_delete(new_dentry);
+
+- nfs_zap_caches(new_dir);
+- nfs_zap_caches(old_dir);
++ nfs_begin_data_update(old_dir);
++ nfs_begin_data_update(new_dir);
++ nfs_begin_data_update(old_inode);
+ error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name,
+ new_dir, &new_dentry->d_name);
++ nfs_end_data_update(old_inode);
++ nfs_end_data_update(new_dir);
++ nfs_end_data_update(old_dir);
+ out:
+ if (rehash)
+ d_rehash(rehash);
+- if (!error && !S_ISDIR(old_inode->i_mode))
+- d_move(old_dentry, new_dentry);
+- nfs_renew_times(new_dentry);
++ if (!error) {
++ if (!S_ISDIR(old_inode->i_mode))
++ d_move(old_dentry, new_dentry);
++ nfs_renew_times(new_dentry);
++ nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir));
++ }
+
+ /* new dentry created? */
+ if (dentry)
+@@ -1451,7 +1547,8 @@ nfs_permission(struct inode *inode, int
+
+ cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0);
+ if (cache->cred == cred
+- && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) {
++ && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))
++ && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) {
+ if (!(res = cache->err)) {
+ /* Is the mask a subset of an accepted mask? */
+ if ((cache->mask & mask) == mask)
+diff -puN fs/nfs/direct.c~CITI_NFS4_ALL fs/nfs/direct.c
+--- linux-2.6.3/fs/nfs/direct.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/direct.c 2004-02-19 16:47:03.000000000 -0500
+@@ -269,6 +269,7 @@ nfs_direct_write_seg(struct inode *inode
+ if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize)
+ wdata.args.stable = NFS_FILE_SYNC;
+
++ nfs_begin_data_update(inode);
+ retry:
+ need_commit = 0;
+ tot_bytes = 0;
+@@ -334,6 +335,8 @@ retry:
+ VERF_SIZE) != 0)
+ goto sync_retry;
+ }
++ nfs_end_data_update(inode);
++ NFS_FLAGS(inode) |= NFS_INO_INVALID_DATA;
+
+ return tot_bytes;
+
+diff -puN fs/nfs/file.c~CITI_NFS4_ALL fs/nfs/file.c
+--- linux-2.6.3/fs/nfs/file.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/file.c 2004-02-19 16:47:07.000000000 -0500
+@@ -63,6 +63,20 @@ struct inode_operations nfs_file_inode_o
+ .setattr = nfs_setattr,
+ };
+
++#ifdef CONFIG_NFS_V4
++
++struct inode_operations nfs4_file_inode_operations = {
++ .permission = nfs_permission,
++ .getattr = nfs_getattr,
++ .setattr = nfs_setattr,
++#ifdef CONFIG_NFS_V4_ACL
++ .getxattr = nfs_getxattr,
++ .setxattr = nfs_setxattr,
++#endif /* CONFIG_NFS_V4_ACL */
++};
++
++#endif /* CONFIG_NFS_V4 */
++
+ /* Hack for future NFS swap support */
+ #ifndef IS_SWAPFILE
+ # define IS_SWAPFILE(inode) (0)
+@@ -104,11 +118,16 @@ nfs_file_flush(struct file *file)
+
+ dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
++ if ((file->f_mode & FMODE_WRITE) == 0)
++ return 0;
+ lock_kernel();
+- status = nfs_wb_file(inode, file);
++ /* Ensure that data+attribute caches are up to date after close() */
++ status = nfs_wb_all(inode);
+ if (!status) {
+ status = file->f_error;
+ file->f_error = 0;
++ if (!status)
++ __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ }
+ unlock_kernel();
+ return status;
+@@ -179,7 +198,7 @@ nfs_fsync(struct file *file, struct dent
+ dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino);
+
+ lock_kernel();
+- status = nfs_wb_file(inode, file);
++ status = nfs_wb_all(inode);
+ if (!status) {
+ status = file->f_error;
+ file->f_error = 0;
+diff -puN fs/nfs/inode.c~CITI_NFS4_ALL fs/nfs/inode.c
+--- linux-2.6.3/fs/nfs/inode.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/inode.c 2004-02-19 16:47:15.000000000 -0500
+@@ -53,8 +53,8 @@
+ */
+ #define NFS_MAX_READAHEAD RPC_MAXREQS
+
+-void nfs_zap_caches(struct inode *);
+ static void nfs_invalidate_inode(struct inode *);
++static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long);
+
+ static struct inode *nfs_alloc_inode(struct super_block *sb);
+ static void nfs_destroy_inode(struct inode *);
+@@ -118,7 +118,7 @@ nfs_write_inode(struct inode *inode, int
+ {
+ int flags = sync ? FLUSH_WAIT : 0;
+
+- nfs_commit_file(inode, NULL, 0, 0, flags);
++ nfs_commit_inode(inode, 0, 0, flags);
+ }
+
+ static void
+@@ -136,21 +136,24 @@ nfs_delete_inode(struct inode * inode)
+ clear_inode(inode);
+ }
+
+-/*
+- * For the moment, the only task for the NFS clear_inode method is to
+- * release the mmap credential
+- */
+ static void
+ nfs_clear_inode(struct inode *inode)
+ {
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct rpc_cred *cred = nfsi->mm_cred;
+
++#ifdef CONFIG_NFS_V4_ACL
++ if (nfsi->acl != NFS4_ACL_NOT_CACHED)
++ posix_acl_release(nfsi->acl);
++ if (nfsi->default_acl != NFS4_ACL_NOT_CACHED)
++ posix_acl_release(nfsi->default_acl);
++#endif /* CONFIG_NFS_V4_ACL */
+ if (cred)
+ put_rpccred(cred);
+ cred = nfsi->cache_access.cred;
+ if (cred)
+ put_rpccred(cred);
++ BUG_ON(atomic_read(&nfsi->data_updates) != 0);
+ }
+
+ void
+@@ -230,50 +233,23 @@ nfs_block_size(unsigned long bsize, unsi
+ /*
+ * Obtain the root inode of the file system.
+ */
+-static int
+-nfs_get_root(struct inode **rooti, rpc_authflavor_t authflavor, struct super_block *sb, struct nfs_fh *rootfh)
++static struct inode *
++nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo)
+ {
+ struct nfs_server *server = NFS_SB(sb);
+- struct nfs_fattr fattr = { };
++ struct inode *rooti;
+ int error;
+
+- error = server->rpc_ops->getroot(server, rootfh, &fattr);
+- if (error == -EACCES && authflavor > RPC_AUTH_MAXFLAVOR) {
+- /*
+- * Some authentication types (gss/krb5, most notably)
+- * are such that root won't be able to present a
+- * credential for GETATTR (ie, getroot()).
+- *
+- * We still want the mount to succeed.
+- *
+- * So we fake the attr values and mark the inode as such.
+- * On the first succesful traversal, we fix everything.
+- * The auth type test isn't quite correct, but whatever.
+- */
+- dfprintk(VFS, "NFS: faking root inode\n");
+-
+- fattr.fileid = 1;
+- fattr.nlink = 2; /* minimum for a dir */
+- fattr.type = NFDIR;
+- fattr.mode = S_IFDIR|S_IRUGO|S_IXUGO;
+- fattr.size = 4096;
+- fattr.du.nfs3.used = 1;
+- fattr.valid = NFS_ATTR_FATTR|NFS_ATTR_FATTR_V3;
+- } else if (error < 0) {
++ error = server->rpc_ops->getroot(server, rootfh, fsinfo);
++ if (error < 0) {
+ printk(KERN_NOTICE "nfs_get_root: getattr error = %d\n", -error);
+- *rooti = NULL; /* superfluous ... but safe */
+- return error;
++ return ERR_PTR(error);
+ }
+
+- *rooti = nfs_fhget(sb, rootfh, &fattr);
+- if (error == -EACCES && authflavor > RPC_AUTH_MAXFLAVOR) {
+- if (*rooti) {
+- NFS_FLAGS(*rooti) |= NFS_INO_FAKE_ROOT;
+- NFS_CACHEINV((*rooti));
+- error = 0;
+- }
+- }
+- return error;
++ rooti = nfs_fhget(sb, rootfh, fsinfo->fattr);
++ if (!rooti)
++ return ERR_PTR(-ENOMEM);
++ return rooti;
+ }
+
+ /*
+@@ -283,7 +259,7 @@ static int
+ nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor)
+ {
+ struct nfs_server *server;
+- struct inode *root_inode = NULL;
++ struct inode *root_inode;
+ struct nfs_fattr fattr;
+ struct nfs_fsinfo fsinfo = {
+ .fattr = &fattr,
+@@ -299,8 +275,9 @@ nfs_sb_init(struct super_block *sb, rpc_
+
+ sb->s_magic = NFS_SUPER_MAGIC;
+
++ root_inode = nfs_get_root(sb, &server->fh, &fsinfo);
+ /* Did getting the root inode fail? */
+- if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0)
++ if (IS_ERR(root_inode))
+ goto out_no_root;
+ sb->s_root = d_alloc_root(root_inode);
+ if (!sb->s_root)
+@@ -309,10 +286,6 @@ nfs_sb_init(struct super_block *sb, rpc_
+ sb->s_root->d_op = server->rpc_ops->dentry_ops;
+
+ /* Get some general file system info */
+- if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) {
+- printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n");
+- goto out_no_root;
+- }
+ if (server->namelen == 0 &&
+ server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0)
+ server->namelen = pathinfo.max_namelen;
+@@ -368,13 +341,11 @@ nfs_sb_init(struct super_block *sb, rpc_
+ rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100);
+ return 0;
+ /* Yargs. It didn't work out. */
+-out_free_all:
+- if (root_inode)
+- iput(root_inode);
+- return -EINVAL;
+ out_no_root:
+ printk("nfs_read_super: get root inode failed\n");
+- goto out_free_all;
++ if (!IS_ERR(root_inode))
++ iput(root_inode);
++ return -EINVAL;
+ }
+
+ /*
+@@ -627,13 +598,17 @@ static int nfs_show_options(struct seq_f
+ void
+ nfs_zap_caches(struct inode *inode)
+ {
++ struct nfs_inode *nfsi = NFS_I(inode);
++ int mode = inode->i_mode;
++
+ NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode);
+ NFS_ATTRTIMEO_UPDATE(inode) = jiffies;
+
+- invalidate_remote_inode(inode);
+-
+ memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+- NFS_CACHEINV(inode);
++ if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode))
++ nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
++ else
++ nfsi->flags |= NFS_INO_INVALID_ATTR;
+ }
+
+ /*
+@@ -673,9 +648,6 @@ nfs_find_actor(struct inode *inode, void
+ return 0;
+ if (is_bad_inode(inode))
+ return 0;
+- /* Force an attribute cache update if inode->i_count == 0 */
+- if (!atomic_read(&inode->i_count))
+- NFS_CACHEINV(inode);
+ return 1;
+ }
+
+@@ -729,12 +701,12 @@ nfs_fhget(struct super_block *sb, struct
+ inode->i_ino = hash;
+
+ /* We can't support update_atime(), since the server will reset it */
+- inode->i_flags |= S_NOATIME;
++ inode->i_flags |= S_NOATIME|S_NOCMTIME;
+ inode->i_mode = fattr->mode;
+ /* Why so? Because we want revalidate for devices/FIFOs, and
+ * that's precisely what we have in nfs_file_inode_operations.
+ */
+- inode->i_op = &nfs_file_inode_operations;
++ inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops;
+ if (S_ISREG(inode->i_mode)) {
+ inode->i_fop = &nfs_file_operations;
+ inode->i_data.a_ops = &nfs_file_aops;
+@@ -754,10 +726,6 @@ nfs_fhget(struct super_block *sb, struct
+ inode->i_atime = fattr->atime;
+ inode->i_mtime = fattr->mtime;
+ inode->i_ctime = fattr->ctime;
+- nfsi->read_cache_ctime = fattr->ctime;
+- nfsi->read_cache_mtime = fattr->mtime;
+- nfsi->cache_mtime_jiffies = fattr->timestamp;
+- nfsi->read_cache_isize = fattr->size;
+ if (fattr->valid & NFS_ATTR_FATTR_V4)
+ nfsi->change_attr = fattr->change_attr;
+ inode->i_size = nfs_size_to_loff_t(fattr->size);
+@@ -778,7 +746,6 @@ nfs_fhget(struct super_block *sb, struct
+ nfsi->attrtimeo_timestamp = jiffies;
+ memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf));
+ nfsi->cache_access.cred = NULL;
+-
+ unlock_new_inode(inode);
+ } else
+ nfs_refresh_inode(inode, fattr);
+@@ -804,70 +771,50 @@ nfs_setattr(struct dentry *dentry, struc
+ struct nfs_fattr fattr;
+ int error;
+
++ if (attr->ia_valid & ATTR_SIZE) {
++ if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode))
++ attr->ia_valid &= ~ATTR_SIZE;
++ }
++
+ /* Optimization: if the end result is no change, don't RPC */
+ attr->ia_valid &= NFS_VALID_ATTRS;
+ if (attr->ia_valid == 0)
+ return 0;
+
+ lock_kernel();
+-
+- /*
+- * Make sure the inode is up-to-date.
+- */
+- error = nfs_revalidate_inode(NFS_SERVER(inode),inode);
+- if (error) {
+-#ifdef NFS_PARANOIA
+-printk("nfs_setattr: revalidate failed, error=%d\n", error);
+-#endif
+- goto out;
+- }
+-
+- if (!S_ISREG(inode->i_mode)) {
+- attr->ia_valid &= ~ATTR_SIZE;
+- if (attr->ia_valid == 0)
+- goto out;
+- } else {
+- filemap_fdatawrite(inode->i_mapping);
+- error = nfs_wb_all(inode);
+- filemap_fdatawait(inode->i_mapping);
+- if (error)
+- goto out;
+- /* Optimize away unnecessary truncates */
+- if ((attr->ia_valid & ATTR_SIZE) && i_size_read(inode) == attr->ia_size)
+- attr->ia_valid &= ~ATTR_SIZE;
++ nfs_begin_data_update(inode);
++ /* Write all dirty data if we're changing file permissions or size */
++ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) {
++ if (filemap_fdatawrite(inode->i_mapping) == 0)
++ filemap_fdatawait(inode->i_mapping);
++ nfs_wb_all(inode);
+ }
+- if (!attr->ia_valid)
+- goto out;
+-
+ error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr);
+- if (error)
+- goto out;
+- /*
+- * If we changed the size or mtime, update the inode
+- * now to avoid invalidating the page cache.
+- */
+- if (attr->ia_valid & ATTR_SIZE) {
+- if (attr->ia_size != fattr.size)
+- printk("nfs_setattr: attr=%Ld, fattr=%Ld??\n",
+- (long long) attr->ia_size, (long long)fattr.size);
+- vmtruncate(inode, attr->ia_size);
++ if (error == 0) {
++ nfs_refresh_inode(inode, &fattr);
++ if ((attr->ia_valid & ATTR_MODE) != 0) {
++ int mode;
++ mode = inode->i_mode & ~S_IALLUGO;
++ mode |= attr->ia_mode & S_IALLUGO;
++ inode->i_mode = mode;
++ }
++ if ((attr->ia_valid & ATTR_UID) != 0)
++ inode->i_uid = attr->ia_uid;
++ if ((attr->ia_valid & ATTR_GID) != 0)
++ inode->i_gid = attr->ia_gid;
++ if ((attr->ia_valid & ATTR_SIZE) != 0) {
++ i_size_write(inode, attr->ia_size);
++ vmtruncate(inode, attr->ia_size);
++ }
+ }
+-
+- /*
+- * If we changed the size or mtime, update the inode
+- * now to avoid invalidating the page cache.
+- */
+- if (!(fattr.valid & NFS_ATTR_WCC)) {
+- struct nfs_inode *nfsi = NFS_I(inode);
+- fattr.pre_size = nfsi->read_cache_isize;
+- fattr.pre_mtime = nfsi->read_cache_mtime;
+- fattr.pre_ctime = nfsi->read_cache_ctime;
+- fattr.valid |= NFS_ATTR_WCC;
+- }
+- /* Force an attribute cache update */
+- NFS_CACHEINV(inode);
+- error = nfs_refresh_inode(inode, &fattr);
+-out:
++ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) {
++ struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
++ if (*cred) {
++ put_rpccred(*cred);
++ *cred = NULL;
++ }
++ }
++ nfs_end_data_update(inode);
+ unlock_kernel();
+ return error;
+ }
+@@ -895,7 +842,19 @@ nfs_wait_on_inode(struct inode *inode, i
+ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
+ {
+ struct inode *inode = dentry->d_inode;
+- int err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
++ struct nfs_inode *nfsi = NFS_I(inode);
++ int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME;
++ int err;
++
++ if (__IS_FLG(inode, MS_NOATIME))
++ need_atime = 0;
++ else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode))
++ need_atime = 0;
++ /* We may force a getattr if the user cares about atime */
++ if (need_atime)
++ err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
++ else
++ err = nfs_revalidate_inode(NFS_SERVER(inode), inode);
+ if (!err)
+ generic_fillattr(inode, stat);
+ return err;
+@@ -930,8 +889,10 @@ int nfs_open(struct inode *inode, struct
+ auth = NFS_CLIENT(inode)->cl_auth;
+ cred = rpcauth_lookupcred(auth, 0);
+ filp->private_data = cred;
+- if (filp->f_mode & FMODE_WRITE)
++ if ((filp->f_mode & FMODE_WRITE) != 0) {
+ nfs_set_mmcred(inode, cred);
++ nfs_begin_data_update(inode);
++ }
+ return 0;
+ }
+
+@@ -940,6 +901,8 @@ int nfs_release(struct inode *inode, str
+ struct rpc_cred *cred;
+
+ lock_kernel();
++ if ((filp->f_mode & FMODE_WRITE) != 0)
++ nfs_end_data_update(inode);
+ cred = nfs_file_cred(filp);
+ if (cred)
+ put_rpccred(cred);
+@@ -956,6 +919,9 @@ __nfs_revalidate_inode(struct nfs_server
+ {
+ int status = -ESTALE;
+ struct nfs_fattr fattr;
++ struct nfs_inode *nfsi = NFS_I(inode);
++ unsigned long verifier;
++ unsigned int flags;
+
+ dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n",
+ inode->i_sb->s_id, (long long)NFS_FILEID(inode));
+@@ -965,23 +931,22 @@ __nfs_revalidate_inode(struct nfs_server
+ goto out_nowait;
+ if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode)
+ goto out_nowait;
+- if (NFS_FAKE_ROOT(inode)) {
+- dfprintk(VFS, "NFS: not revalidating fake root\n");
+- status = 0;
+- goto out_nowait;
+- }
+
+ while (NFS_REVALIDATING(inode)) {
+ status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING);
+ if (status < 0)
+ goto out_nowait;
+- if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) {
+- status = NFS_STALE(inode) ? -ESTALE : 0;
+- goto out_nowait;
+- }
++ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC)
++ continue;
++ if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME))
++ continue;
++ status = NFS_STALE(inode) ? -ESTALE : 0;
++ goto out_nowait;
+ }
+ NFS_FLAGS(inode) |= NFS_INO_REVALIDATING;
+
++ /* Protect against RPC races by saving the change attribute */
++ verifier = nfs_save_change_attribute(inode);
+ status = NFS_PROTO(inode)->getattr(inode, &fattr);
+ if (status) {
+ dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n",
+@@ -995,13 +960,36 @@ __nfs_revalidate_inode(struct nfs_server
+ goto out;
+ }
+
+- status = nfs_refresh_inode(inode, &fattr);
++ status = nfs_update_inode(inode, &fattr, verifier);
+ if (status) {
+ dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n",
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode), status);
+ goto out;
+ }
++ flags = nfsi->flags;
++ /*
++ * We may need to keep the attributes marked as invalid if
++ * we raced with nfs_end_attr_update().
++ */
++ if (verifier == nfsi->cache_change_attribute)
++ nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME);
++ /* Do the page cache invalidation */
++ if (flags & NFS_INO_INVALID_DATA) {
++ if (S_ISREG(inode->i_mode)) {
++ if (filemap_fdatawrite(inode->i_mapping) == 0)
++ filemap_fdatawait(inode->i_mapping);
++ nfs_wb_all(inode);
++ }
++ nfsi->flags &= ~NFS_INO_INVALID_DATA;
++ invalidate_inode_pages2(inode->i_mapping);
++ memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
++ dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n",
++ inode->i_sb->s_id,
++ (long long)NFS_FILEID(inode));
++ /* This ensures we revalidate dentries */
++ nfsi->cache_change_attribute++;
++ }
+ dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n",
+ inode->i_sb->s_id,
+ (long long)NFS_FILEID(inode));
+@@ -1009,41 +997,104 @@ __nfs_revalidate_inode(struct nfs_server
+ NFS_FLAGS(inode) &= ~NFS_INO_STALE;
+ out:
+ NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING;
+- wake_up(&NFS_I(inode)->nfs_i_wait);
++ wake_up(&nfsi->nfs_i_wait);
+ out_nowait:
+ unlock_kernel();
+ return status;
+ }
+
+-/*
+- * nfs_fattr_obsolete - Test if attribute data is newer than cached data
+- * @inode: inode
+- * @fattr: attributes to test
++/**
++ * nfs_begin_data_update
++ * @inode - pointer to inode
++ * Declare that a set of operations will update file data on the server
++ */
++void nfs_begin_data_update(struct inode *inode)
++{
++ atomic_inc(&NFS_I(inode)->data_updates);
++}
++
++/**
++ * nfs_end_data_update
++ * @inode - pointer to inode
++ * Declare end of the operations that will update file data
++ */
++void nfs_end_data_update(struct inode *inode)
++{
++ struct nfs_inode *nfsi = NFS_I(inode);
++
++ if (atomic_dec_and_test(&nfsi->data_updates)) {
++ nfsi->cache_change_attribute ++;
++ /* Mark the attribute cache for revalidation */
++ nfsi->flags |= NFS_INO_INVALID_ATTR;
++ /* Directories and symlinks: invalidate page cache too */
++ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
++ nfsi->flags |= NFS_INO_INVALID_DATA;
++ }
++}
++
++/**
++ * nfs_refresh_inode - verify consistency of the inode attribute cache
++ * @inode - pointer to inode
++ * @fattr - updated attributes
+ *
+- * Avoid stuffing the attribute cache with obsolete information.
+- * We always accept updates if the attribute cache timed out, or if
+- * fattr->ctime is newer than our cached value.
+- * If fattr->ctime matches the cached value, we still accept the update
+- * if it increases the file size.
++ * Verifies the attribute cache. If we have just changed the attributes,
++ * so that fattr carries weak cache consistency data, then it may
++ * also update the ctime/mtime/change_attribute.
+ */
+-static inline
+-int nfs_fattr_obsolete(struct inode *inode, struct nfs_fattr *fattr)
++int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+ {
+ struct nfs_inode *nfsi = NFS_I(inode);
+- long cdif;
++ loff_t cur_size, new_isize;
++ int data_unstable;
++
++ /* Are we in the process of updating data on the server? */
++ data_unstable = nfs_caches_unstable(inode);
++
++ if (fattr->valid & NFS_ATTR_FATTR_V4) {
++ if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0
++ && nfsi->change_attr == fattr->pre_change_attr)
++ nfsi->change_attr = fattr->change_attr;
++ if (!data_unstable && nfsi->change_attr != fattr->change_attr)
++ nfsi->flags |= NFS_INO_INVALID_ATTR;
++ }
++
++ if ((fattr->valid & NFS_ATTR_FATTR) == 0)
++ return 0;
++
++ /* Has the inode gone and changed behind our back? */
++ if (nfsi->fileid != fattr->fileid
++ || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
++ return -EIO;
+
+- if (time_after(jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo))
+- goto out_valid;
+- cdif = fattr->ctime.tv_sec - nfsi->read_cache_ctime.tv_sec;
+- if (cdif == 0)
+- cdif = fattr->ctime.tv_nsec - nfsi->read_cache_ctime.tv_nsec;
+- if (cdif > 0)
+- goto out_valid;
+- /* Ugh... */
+- if (cdif == 0 && fattr->size > nfsi->read_cache_isize)
+- goto out_valid;
+- return -1;
+- out_valid:
++ cur_size = i_size_read(inode);
++ new_isize = nfs_size_to_loff_t(fattr->size);
++
++ /* If we have atomic WCC data, we may update some attributes */
++ if ((fattr->valid & NFS_ATTR_WCC) != 0) {
++ if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime))
++ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
++ if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime))
++ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
++ }
++
++ /* Verify a few of the more important attributes */
++ if (!data_unstable) {
++ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)
++ || cur_size != new_isize)
++ nfsi->flags |= NFS_INO_INVALID_ATTR;
++ } else if (S_ISREG(inode->i_mode) && new_isize > cur_size)
++ nfsi->flags |= NFS_INO_INVALID_ATTR;
++
++ /* Have any file permissions changed? */
++ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO)
++ || inode->i_uid != fattr->uid
++ || inode->i_gid != fattr->gid)
++ nfsi->flags |= NFS_INO_INVALID_ATTR;
++
++ if (!timespec_equal(&inode->i_atime, &fattr->atime))
++ nfsi->flags |= NFS_INO_INVALID_ATIME;
++
++ nfsi->read_cache_jiffies = fattr->timestamp;
+ return 0;
+ }
+
+@@ -1059,20 +1110,22 @@ int nfs_fattr_obsolete(struct inode *ino
+ *
+ * A very similar scenario holds for the dir cache.
+ */
+-int
+-__nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
++static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier)
+ {
+ struct nfs_inode *nfsi = NFS_I(inode);
+ __u64 new_size;
+ loff_t new_isize;
+- int invalid = 0;
+- int mtime_update = 0;
++ unsigned int invalid = 0;
+ loff_t cur_isize;
++ int data_unstable;
+
+- dfprintk(VFS, "NFS: refresh_inode(%s/%ld ct=%d info=0x%x)\n",
+- inode->i_sb->s_id, inode->i_ino,
++ dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n",
++ __FUNCTION__, inode->i_sb->s_id, inode->i_ino,
+ atomic_read(&inode->i_count), fattr->valid);
+
++ if ((fattr->valid & NFS_ATTR_FATTR) == 0)
++ return 0;
++
+ /* First successful call after mount, fill real data. */
+ if (NFS_FAKE_ROOT(inode)) {
+ dfprintk(VFS, "NFS: updating fake root\n");
+@@ -1081,43 +1134,49 @@ __nfs_refresh_inode(struct inode *inode,
+ }
+
+ if (nfsi->fileid != fattr->fileid) {
+- printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n"
++ printk(KERN_ERR "%s: inode number mismatch\n"
+ "expected (%s/0x%Lx), got (%s/0x%Lx)\n",
++ __FUNCTION__,
+ inode->i_sb->s_id, (long long)nfsi->fileid,
+ inode->i_sb->s_id, (long long)fattr->fileid);
+ goto out_err;
+ }
+
+- /* Throw out obsolete READDIRPLUS attributes */
+- if (time_before(fattr->timestamp, NFS_READTIME(inode)))
+- return 0;
+ /*
+ * Make sure the inode's type hasn't changed.
+ */
+ if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT))
+ goto out_changed;
+
+- new_size = fattr->size;
+- new_isize = nfs_size_to_loff_t(fattr->size);
+-
+- /* Avoid races */
+- if (nfs_fattr_obsolete(inode, fattr))
+- goto out_nochange;
+-
+ /*
+ * Update the read time so we don't revalidate too often.
+ */
+ nfsi->read_cache_jiffies = fattr->timestamp;
+
+- /*
+- * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache.
+- * NOT inode->i_size!!!
+- */
+- if (nfsi->read_cache_isize != new_size) {
++ /* Are we racing with known updates of the metadata on the server? */
++ data_unstable = ! nfs_verify_change_attribute(inode, verifier);
++
++ /* Check if the file size agrees */
++ new_size = fattr->size;
++ new_isize = nfs_size_to_loff_t(fattr->size);
++ cur_isize = i_size_read(inode);
++ if (cur_isize != new_size) {
+ #ifdef NFS_DEBUG_VERBOSE
+ printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
+ #endif
+- invalid = 1;
++ /*
++ * If we have pending writebacks, things can get
++ * messy.
++ */
++ if (S_ISREG(inode->i_mode) && data_unstable) {
++ if (new_isize > cur_isize) {
++ i_size_write(inode, new_isize);
++ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
++ }
++ } else {
++ i_size_write(inode, new_isize);
++ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
++ }
+ }
+
+ /*
+@@ -1125,12 +1184,13 @@ __nfs_refresh_inode(struct inode *inode,
+ * can change this value in VFS without requiring a
+ * cache revalidation.
+ */
+- if (!timespec_equal(&nfsi->read_cache_mtime, &fattr->mtime)) {
++ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) {
++ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime));
+ #ifdef NFS_DEBUG_VERBOSE
+ printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino);
+ #endif
+- invalid = 1;
+- mtime_update = 1;
++ if (!data_unstable)
++ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+ }
+
+ if ((fattr->valid & NFS_ATTR_FATTR_V4)
+@@ -1139,47 +1199,15 @@ __nfs_refresh_inode(struct inode *inode,
+ printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n",
+ inode->i_sb->s_id, inode->i_ino);
+ #endif
+- invalid = 1;
+- }
+-
+- /* Check Weak Cache Consistency data.
+- * If size and mtime match the pre-operation values, we can
+- * assume that any attribute changes were caused by our NFS
+- * operation, so there's no need to invalidate the caches.
+- */
+- if ((fattr->valid & NFS_ATTR_PRE_CHANGE)
+- && nfsi->change_attr == fattr->pre_change_attr) {
+- invalid = 0;
+- }
+- else if ((fattr->valid & NFS_ATTR_WCC)
+- && nfsi->read_cache_isize == fattr->pre_size
+- && timespec_equal(&nfsi->read_cache_mtime, &fattr->pre_mtime)) {
+- invalid = 0;
+- }
+-
+- /*
+- * If we have pending writebacks, things can get
+- * messy.
+- */
+- cur_isize = i_size_read(inode);
+- if (nfs_have_writebacks(inode) && new_isize < cur_isize)
+- new_isize = cur_isize;
+-
+- nfsi->read_cache_ctime = fattr->ctime;
+- inode->i_ctime = fattr->ctime;
+- inode->i_atime = fattr->atime;
+-
+- if (mtime_update) {
+- if (invalid)
+- nfsi->cache_mtime_jiffies = fattr->timestamp;
+- nfsi->read_cache_mtime = fattr->mtime;
+- inode->i_mtime = fattr->mtime;
++ nfsi->change_attr = fattr->change_attr;
++ if (!data_unstable)
++ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
+ }
+
+- nfsi->read_cache_isize = new_size;
+- i_size_write(inode, new_isize);
++ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime));
++ memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime));
+
+- if (inode->i_mode != fattr->mode ||
++ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ||
+ inode->i_uid != fattr->uid ||
+ inode->i_gid != fattr->gid) {
+ struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred;
+@@ -1187,15 +1215,17 @@ __nfs_refresh_inode(struct inode *inode,
+ put_rpccred(*cred);
+ *cred = NULL;
+ }
++ invalid |= NFS_INO_INVALID_ATTR;
+ }
+
+- if (fattr->valid & NFS_ATTR_FATTR_V4)
+- nfsi->change_attr = fattr->change_attr;
+-
+ inode->i_mode = fattr->mode;
+ inode->i_nlink = fattr->nlink;
+ inode->i_uid = fattr->uid;
+ inode->i_gid = fattr->gid;
++#ifdef CONFIG_NFS_V4_ACL
++ nfs4_izap_acl(inode, &nfsi->acl);
++ nfs4_izap_acl(inode, &nfsi->default_acl);
++#endif /* CONFIG_NFS_V4_ACL */
+
+ if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) {
+ /*
+@@ -1207,31 +1237,30 @@ __nfs_refresh_inode(struct inode *inode,
+ inode->i_blocks = fattr->du.nfs2.blocks;
+ inode->i_blksize = fattr->du.nfs2.blocksize;
+ }
+-
+- /* Update attrtimeo value */
+- if (invalid) {
++
++ /* Update attrtimeo value if we're out of the unstable period */
++ if (invalid & NFS_INO_INVALID_ATTR) {
+ nfsi->attrtimeo = NFS_MINATTRTIMEO(inode);
+ nfsi->attrtimeo_timestamp = jiffies;
+- invalidate_remote_inode(inode);
+- memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode)));
+ } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) {
+ if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode))
+ nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode);
+ nfsi->attrtimeo_timestamp = jiffies;
+ }
++ /* Don't invalidate the data if we were to blame */
++ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)
++ || S_ISLNK(inode->i_mode)))
++ invalid &= ~NFS_INO_INVALID_DATA;
++ nfsi->flags |= invalid;
+
+ return 0;
+- out_nochange:
+- if (!timespec_equal(&fattr->atime, &inode->i_atime))
+- inode->i_atime = fattr->atime;
+- return 0;
+ out_changed:
+ /*
+ * Big trouble! The inode has become a different object.
+ */
+ #ifdef NFS_PARANOIA
+- printk(KERN_DEBUG "nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n",
+- inode->i_ino, inode->i_mode, fattr->mode);
++ printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n",
++ __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode);
+ #endif
+ /*
+ * No need to worry about unhashing the dentry, as the
+@@ -1355,6 +1384,82 @@ static struct file_system_type nfs_fs_ty
+ .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT,
+ };
+
++#ifdef CONFIG_NFS_V4_ACL
++
++int
++nfs_setxattr(struct dentry *dentry, const char *key, const void *buf,
++ size_t buflen, int flags)
++{
++ struct posix_acl *acl;
++ int type, error;
++ struct inode *inode = dentry->d_inode;
++
++ if (strlen(key) == sizeof(XATTR_NAME_ACL_ACCESS) - 1 &&
++ memcmp(key, XATTR_NAME_ACL_ACCESS,
++ sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0)
++ type = ACL_TYPE_ACCESS;
++ else if (strlen(key) == sizeof(XATTR_NAME_ACL_DEFAULT) - 1 &&
++ memcmp(key, XATTR_NAME_ACL_DEFAULT,
++ sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0)
++ type = ACL_TYPE_DEFAULT;
++ else
++ return (-EINVAL);
++
++ if (!S_ISREG(inode->i_mode) &&
++ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX))
++ return (-EPERM);
++
++ if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
++ return -EACCES;
++
++ acl = posix_acl_from_xattr(buf, buflen);
++ if (IS_ERR(acl))
++ return (PTR_ERR(acl));
++ if (acl == NULL)
++ return (-ENODATA);
++
++ error = posix_acl_valid(acl);
++ if (error)
++ goto out_free;
++
++ error = nfs4_proc_set_posix_acl(inode, type, acl);
++out_free:
++ posix_acl_release(acl);
++ return error;
++}
++
++ssize_t
++nfs_getxattr(struct dentry *dentry, const char *key, void *buf,
++ size_t buflen)
++{
++ int type = 0;
++ struct inode *inode = dentry->d_inode;
++ struct posix_acl *acl;
++ ssize_t ret;
++
++ if (strlen(key) == sizeof(XATTR_NAME_ACL_ACCESS) - 1 &&
++ memcmp(key, XATTR_NAME_ACL_ACCESS,
++ sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0)
++ type = ACL_TYPE_ACCESS;
++ else if (strlen(key) == sizeof(XATTR_NAME_ACL_DEFAULT) - 1 &&
++ memcmp(key, XATTR_NAME_ACL_DEFAULT,
++ sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0)
++ type = ACL_TYPE_DEFAULT;
++ else
++ return (-EINVAL);
++
++ acl = nfs4_proc_get_posix_acl(inode, type);
++ if (IS_ERR(acl))
++ return (PTR_ERR(acl));
++
++ ret = posix_acl_to_xattr(acl, buf, buflen);
++
++ posix_acl_release(acl);
++ return ret;
++}
++
++#endif /* CONFIG_NFS_V4_ACL */
++
+ #ifdef CONFIG_NFS_V4
+
+ static void nfs4_clear_inode(struct inode *);
+@@ -1601,7 +1706,7 @@ static struct super_block *nfs4_get_sb(s
+
+ if (data->version != NFS4_MOUNT_VERSION) {
+ printk("nfs warning: mount version %s than kernel\n",
+- data->version < NFS_MOUNT_VERSION ? "older" : "newer");
++ data->version < NFS4_MOUNT_VERSION ? "older" : "newer");
+ }
+
+ p = nfs_copy_user_string(NULL, &data->hostname, 256);
+@@ -1699,6 +1804,10 @@ static struct inode *nfs_alloc_inode(str
+ return NULL;
+ nfsi->flags = 0;
+ nfsi->mm_cred = NULL;
++#ifdef CONFIG_NFS_V4_ACL
++ nfsi->acl = NFS4_ACL_NOT_CACHED;
++ nfsi->default_acl = NFS4_ACL_NOT_CACHED;
++#endif /* CONFIG_NFS_V4_ACL */
+ nfs4_zero_state(nfsi);
+ return &nfsi->vfs_inode;
+ }
+@@ -1718,6 +1827,7 @@ static void init_once(void * foo, kmem_c
+ INIT_LIST_HEAD(&nfsi->dirty);
+ INIT_LIST_HEAD(&nfsi->commit);
+ INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC);
++ atomic_set(&nfsi->data_updates, 0);
+ nfsi->ndirty = 0;
+ nfsi->ncommit = 0;
+ nfsi->npages = 0;
+diff -puN fs/nfs/nfs3proc.c~CITI_NFS4_ALL fs/nfs/nfs3proc.c
+--- linux-2.6.3/fs/nfs/nfs3proc.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/nfs3proc.c 2004-02-19 16:47:07.000000000 -0500
+@@ -68,20 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_tas
+ return 1;
+ }
+
+-static void
+-nfs3_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+-{
+- if (fattr->valid & NFS_ATTR_FATTR) {
+- if (!(fattr->valid & NFS_ATTR_WCC)) {
+- fattr->pre_size = NFS_CACHE_ISIZE(inode);
+- fattr->pre_mtime = NFS_CACHE_MTIME(inode);
+- fattr->pre_ctime = NFS_CACHE_CTIME(inode);
+- fattr->valid |= NFS_ATTR_WCC;
+- }
+- nfs_refresh_inode(inode, fattr);
+- }
+-}
+-
+ static struct rpc_cred *
+ nfs_cred(struct inode *inode, struct file *filp)
+ {
+@@ -99,14 +85,18 @@ nfs_cred(struct inode *inode, struct fil
+ */
+ static int
+ nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+- struct nfs_fattr *fattr)
++ struct nfs_fsinfo *info)
+ {
+ int status;
+
+- dprintk("NFS call getroot\n");
+- fattr->valid = 0;
+- status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0);
+- dprintk("NFS reply getroot\n");
++ dprintk("%s: call fsinfo\n", __FUNCTION__);
++ info->fattr->valid = 0;
++ status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0);
++ dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status);
++ if (!(info->fattr->valid & NFS_ATTR_FATTR)) {
++ status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0);
++ dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
++ }
+ return status;
+ }
+
+@@ -280,7 +270,7 @@ nfs3_proc_write(struct nfs_write_data *w
+ msg.rpc_cred = nfs_cred(inode, filp);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
+ if (status >= 0)
+- nfs3_write_refresh_inode(inode, fattr);
++ nfs_refresh_inode(inode, fattr);
+ dprintk("NFS reply write: %d\n", status);
+ return status < 0? status : wdata->res.count;
+ }
+@@ -303,7 +293,7 @@ nfs3_proc_commit(struct nfs_write_data *
+ msg.rpc_cred = nfs_cred(inode, filp);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+ if (status >= 0)
+- nfs3_write_refresh_inode(inode, fattr);
++ nfs_refresh_inode(inode, fattr);
+ dprintk("NFS reply commit: %d\n", status);
+ return status;
+ }
+@@ -777,12 +767,13 @@ nfs3_proc_read_setup(struct nfs_read_dat
+ static void
+ nfs3_write_done(struct rpc_task *task)
+ {
+- struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
++ struct nfs_write_data *data;
+
+ if (nfs3_async_handle_jukebox(task))
+ return;
++ data = (struct nfs_write_data *)task->tk_calldata;
+ if (task->tk_status >= 0)
+- nfs3_write_refresh_inode(data->inode, data->res.fattr);
++ nfs_refresh_inode(data->inode, data->res.fattr);
+ nfs_writeback_done(task);
+ }
+
+@@ -835,12 +826,13 @@ nfs3_proc_write_setup(struct nfs_write_d
+ static void
+ nfs3_commit_done(struct rpc_task *task)
+ {
+- struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
++ struct nfs_write_data *data;
+
+ if (nfs3_async_handle_jukebox(task))
+ return;
++ data = (struct nfs_write_data *)task->tk_calldata;
+ if (task->tk_status >= 0)
+- nfs3_write_refresh_inode(data->inode, data->res.fattr);
++ nfs_refresh_inode(data->inode, data->res.fattr);
+ nfs_commit_done(task);
+ }
+
+@@ -907,6 +899,7 @@ struct nfs_rpc_ops nfs_v3_clientops = {
+ .version = 3, /* protocol version */
+ .dentry_ops = &nfs_dentry_operations,
+ .dir_inode_ops = &nfs_dir_inode_operations,
++ .file_inode_ops = &nfs_file_inode_operations,
+ .getroot = nfs3_proc_get_root,
+ .getattr = nfs3_proc_getattr,
+ .setattr = nfs3_proc_setattr,
+diff -puN fs/nfs/nfs4proc.c~CITI_NFS4_ALL fs/nfs/nfs4proc.c
+--- linux-2.6.3/fs/nfs/nfs4proc.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/nfs4proc.c 2004-02-19 16:47:15.000000000 -0500
+@@ -46,112 +46,20 @@
+ #include <linux/nfs_page.h>
+ #include <linux/smp_lock.h>
+ #include <linux/namei.h>
++#include <linux/nfs4_acl.h>
++#include <linux/nfs_idmap.h>
+
+ #define NFSDBG_FACILITY NFSDBG_PROC
+
+ #define NFS4_POLL_RETRY_TIME (15*HZ)
+
+-#define GET_OP(cp,name) &cp->ops[cp->req_nops].u.name
+-#define OPNUM(cp) cp->ops[cp->req_nops].opnum
+-
++static int nfs4_proc_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
+ static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *);
+ extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus);
+ extern struct rpc_procinfo nfs4_procedures[];
+
+ extern nfs4_stateid zero_stateid;
+
+-static void
+-nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops,
+- struct nfs_server *server, char *tag)
+-{
+- memset(cp, 0, sizeof(*cp));
+- cp->ops = ops;
+- cp->server = server;
+-}
+-
+-static void
+-nfs4_setup_access(struct nfs4_compound *cp, u32 req_access, u32 *resp_supported, u32 *resp_access)
+-{
+- struct nfs4_access *access = GET_OP(cp, access);
+-
+- access->ac_req_access = req_access;
+- access->ac_resp_supported = resp_supported;
+- access->ac_resp_access = resp_access;
+-
+- OPNUM(cp) = OP_ACCESS;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_create_dir(struct nfs4_compound *cp, struct qstr *name,
+- struct iattr *sattr, struct nfs4_change_info *info)
+-{
+- struct nfs4_create *create = GET_OP(cp, create);
+-
+- create->cr_ftype = NF4DIR;
+- create->cr_namelen = name->len;
+- create->cr_name = name->name;
+- create->cr_attrs = sattr;
+- create->cr_cinfo = info;
+-
+- OPNUM(cp) = OP_CREATE;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_create_symlink(struct nfs4_compound *cp, struct qstr *name,
+- struct qstr *linktext, struct iattr *sattr,
+- struct nfs4_change_info *info)
+-{
+- struct nfs4_create *create = GET_OP(cp, create);
+-
+- create->cr_ftype = NF4LNK;
+- create->cr_textlen = linktext->len;
+- create->cr_text = linktext->name;
+- create->cr_namelen = name->len;
+- create->cr_name = name->name;
+- create->cr_attrs = sattr;
+- create->cr_cinfo = info;
+-
+- OPNUM(cp) = OP_CREATE;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_create_special(struct nfs4_compound *cp, struct qstr *name,
+- dev_t dev, struct iattr *sattr,
+- struct nfs4_change_info *info)
+-{
+- int mode = sattr->ia_mode;
+- struct nfs4_create *create = GET_OP(cp, create);
+-
+- BUG_ON(!(sattr->ia_valid & ATTR_MODE));
+- BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
+-
+- if (S_ISFIFO(mode))
+- create->cr_ftype = NF4FIFO;
+- else if (S_ISBLK(mode)) {
+- create->cr_ftype = NF4BLK;
+- create->cr_specdata1 = MAJOR(dev);
+- create->cr_specdata2 = MINOR(dev);
+- }
+- else if (S_ISCHR(mode)) {
+- create->cr_ftype = NF4CHR;
+- create->cr_specdata1 = MAJOR(dev);
+- create->cr_specdata2 = MINOR(dev);
+- }
+- else
+- create->cr_ftype = NF4SOCK;
+-
+- create->cr_namelen = name->len;
+- create->cr_name = name->name;
+- create->cr_attrs = sattr;
+- create->cr_cinfo = info;
+-
+- OPNUM(cp) = OP_CREATE;
+- cp->req_nops++;
+-}
+-
+ /*
+ * This is our standard bitmap for GETATTR requests.
+ */
+@@ -181,126 +89,15 @@ u32 nfs4_statfs_bitmap[2] = {
+ | FATTR4_WORD1_SPACE_TOTAL
+ };
+
+-u32 nfs4_pathconf_bitmap[2] = {
+- FATTR4_WORD0_MAXLINK
+- | FATTR4_WORD0_MAXNAME,
+- 0
+-};
+-
+-static inline void
+-__nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap,
+- struct nfs_fattr *fattr,
+- struct nfs_fsstat *fsstat,
+- struct nfs_pathconf *pathconf)
+-{
+- struct nfs4_getattr *getattr = GET_OP(cp, getattr);
+-
+- getattr->gt_bmval = bitmap;
+- getattr->gt_attrs = fattr;
+- getattr->gt_fsstat = fsstat;
+- getattr->gt_pathconf = pathconf;
+-
+- OPNUM(cp) = OP_GETATTR;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_getattr(struct nfs4_compound *cp,
+- struct nfs_fattr *fattr)
+-{
+- __nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr,
+- NULL, NULL);
+-}
+-
+-static void
+-nfs4_setup_statfs(struct nfs4_compound *cp,
+- struct nfs_fsstat *fsstat)
+-{
+- __nfs4_setup_getattr(cp, nfs4_statfs_bitmap,
+- NULL, fsstat, NULL);
+-}
+-
+-static void
+-nfs4_setup_pathconf(struct nfs4_compound *cp,
+- struct nfs_pathconf *pathconf)
+-{
+- __nfs4_setup_getattr(cp, nfs4_pathconf_bitmap,
+- NULL, NULL, pathconf);
+-}
+-
+-static void
+-nfs4_setup_getfh(struct nfs4_compound *cp, struct nfs_fh *fhandle)
+-{
+- struct nfs4_getfh *getfh = GET_OP(cp, getfh);
+-
+- getfh->gf_fhandle = fhandle;
+-
+- OPNUM(cp) = OP_GETFH;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_link(struct nfs4_compound *cp, struct qstr *name,
+- struct nfs4_change_info *info)
+-{
+- struct nfs4_link *link = GET_OP(cp, link);
+-
+- link->ln_namelen = name->len;
+- link->ln_name = name->name;
+- link->ln_cinfo = info;
+-
+- OPNUM(cp) = OP_LINK;
+- cp->req_nops++;
+-}
+-
+ static void
+-nfs4_setup_lookup(struct nfs4_compound *cp, struct qstr *q)
+-{
+- struct nfs4_lookup *lookup = GET_OP(cp, lookup);
+-
+- lookup->lo_name = q;
+-
+- OPNUM(cp) = OP_LOOKUP;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_putfh(struct nfs4_compound *cp, struct nfs_fh *fhandle)
+-{
+- struct nfs4_putfh *putfh = GET_OP(cp, putfh);
+-
+- putfh->pf_fhandle = fhandle;
+-
+- OPNUM(cp) = OP_PUTFH;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_putrootfh(struct nfs4_compound *cp)
+-{
+- OPNUM(cp) = OP_PUTROOTFH;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_readdir(struct nfs4_compound *cp, u64 cookie, u32 *verifier,
+- struct page **pages, unsigned int bufsize, struct dentry *dentry)
++nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, struct nfs4_readdir_arg *readdir)
+ {
+ u32 *start, *p;
+- struct nfs4_readdir *readdir = GET_OP(cp, readdir);
+
+- BUG_ON(bufsize < 80);
+- readdir->rd_cookie = (cookie > 2) ? cookie : 0;
+- memcpy(&readdir->rd_req_verifier, verifier, sizeof(readdir->rd_req_verifier));
+- readdir->rd_count = bufsize;
+- readdir->rd_bmval[0] = FATTR4_WORD0_FILEID;
+- readdir->rd_bmval[1] = 0;
+- readdir->rd_pages = pages;
+- readdir->rd_pgbase = 0;
++ BUG_ON(readdir->count < 80);
++ readdir->cookie = (cookie > 2) ? cookie : 0;
++ memcpy(&readdir->req_verifier, verifier, sizeof(readdir->req_verifier));
+
+- OPNUM(cp) = OP_READDIR;
+- cp->req_nops++;
+-
+ if (cookie >= 2)
+ return;
+
+@@ -311,7 +108,7 @@ nfs4_setup_readdir(struct nfs4_compound
+ * when talking to the server, we always send cookie 0
+ * instead of 1 or 2.
+ */
+- start = p = (u32 *)kmap_atomic(*pages, KM_USER0);
++ start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0);
+
+ if (cookie == 0) {
+ *p++ = xdr_one; /* next */
+@@ -337,68 +134,12 @@ nfs4_setup_readdir(struct nfs4_compound
+ *p++ = htonl(8); /* attribute buffer length */
+ p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode));
+
+- readdir->rd_pgbase = (char *)p - (char *)start;
+- readdir->rd_count -= readdir->rd_pgbase;
++ readdir->pgbase = (char *)p - (char *)start;
++ readdir->count -= readdir->pgbase;
+ kunmap_atomic(start, KM_USER0);
+ }
+
+ static void
+-nfs4_setup_readlink(struct nfs4_compound *cp, int count, struct page **pages)
+-{
+- struct nfs4_readlink *readlink = GET_OP(cp, readlink);
+-
+- readlink->rl_count = count;
+- readlink->rl_pages = pages;
+-
+- OPNUM(cp) = OP_READLINK;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_remove(struct nfs4_compound *cp, struct qstr *name, struct nfs4_change_info *cinfo)
+-{
+- struct nfs4_remove *remove = GET_OP(cp, remove);
+-
+- remove->rm_namelen = name->len;
+- remove->rm_name = name->name;
+- remove->rm_cinfo = cinfo;
+-
+- OPNUM(cp) = OP_REMOVE;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_rename(struct nfs4_compound *cp, struct qstr *old, struct qstr *new,
+- struct nfs4_change_info *old_cinfo, struct nfs4_change_info *new_cinfo)
+-{
+- struct nfs4_rename *rename = GET_OP(cp, rename);
+-
+- rename->rn_oldnamelen = old->len;
+- rename->rn_oldname = old->name;
+- rename->rn_newnamelen = new->len;
+- rename->rn_newname = new->name;
+- rename->rn_src_cinfo = old_cinfo;
+- rename->rn_dst_cinfo = new_cinfo;
+-
+- OPNUM(cp) = OP_RENAME;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_restorefh(struct nfs4_compound *cp)
+-{
+- OPNUM(cp) = OP_RESTOREFH;
+- cp->req_nops++;
+-}
+-
+-static void
+-nfs4_setup_savefh(struct nfs4_compound *cp)
+-{
+- OPNUM(cp) = OP_SAVEFH;
+- cp->req_nops++;
+-}
+-
+-static void
+ renew_lease(struct nfs_server *server, unsigned long timestamp)
+ {
+ struct nfs4_client *clp = server->nfs4_state;
+@@ -409,47 +150,6 @@ renew_lease(struct nfs_server *server, u
+ }
+
+ static inline void
+-process_lease(struct nfs4_compound *cp)
+-{
+- /*
+- * Generic lease processing: If this operation contains a
+- * lease-renewing operation, and it succeeded, update the RENEW time
+- * in the superblock. Instead of the current time, we use the time
+- * when the request was sent out. (All we know is that the lease was
+- * renewed sometime between then and now, and we have to assume the
+- * worst case.)
+- *
+- * Notes:
+- * (1) renewd doesn't acquire the spinlock when messing with
+- * server->last_renewal; this is OK since rpciod always runs
+- * under the BKL.
+- * (2) cp->timestamp was set at the end of XDR encode.
+- */
+- if (!cp->renew_index)
+- return;
+- if (!cp->toplevel_status || cp->resp_nops > cp->renew_index)
+- renew_lease(cp->server, cp->timestamp);
+-}
+-
+-static int
+-nfs4_call_compound(struct nfs4_compound *cp, struct rpc_cred *cred, int flags)
+-{
+- int status;
+- struct rpc_message msg = {
+- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND],
+- .rpc_argp = cp,
+- .rpc_resp = cp,
+- .rpc_cred = cred,
+- };
+-
+- status = rpc_call_sync(cp->server->client, &msg, flags);
+- if (!status)
+- process_lease(cp);
+-
+- return status;
+-}
+-
+-static inline void
+ process_cinfo(struct nfs4_change_info *info, struct nfs_fattr *fattr)
+ {
+ BUG_ON((fattr->valid & NFS_ATTR_FATTR) == 0);
+@@ -476,11 +176,6 @@ nfs4_open_reclaim(struct nfs4_state_owne
+ .valid = 0,
+ };
+ struct nfs4_change_info d_cinfo;
+- struct nfs4_getattr f_getattr = {
+- .gt_bmval = nfs4_fattr_bitmap,
+- .gt_attrs = &fattr,
+- };
+-
+ struct nfs_open_reclaimargs o_arg = {
+ .fh = NFS_FH(inode),
+ .seqid = sp->so_seqid,
+@@ -488,11 +183,10 @@ nfs4_open_reclaim(struct nfs4_state_owne
+ .share_access = state->state,
+ .clientid = server->nfs4_state->cl_clientid,
+ .claim = NFS4_OPEN_CLAIM_PREVIOUS,
+- .f_getattr = &f_getattr,
+ };
+ struct nfs_openres o_res = {
+- .cinfo = &d_cinfo,
+- .f_getattr = &f_getattr,
++ .cinfo = &d_cinfo,
++ .f_attr = &fattr,
+ .server = server, /* Grrr */
+ };
+ struct rpc_message msg = {
+@@ -528,28 +222,18 @@ nfs4_do_open(struct inode *dir, struct q
+ struct nfs_fattr f_attr = {
+ .valid = 0,
+ };
+- struct nfs4_getattr f_getattr = {
+- .gt_bmval = nfs4_fattr_bitmap,
+- .gt_attrs = &f_attr,
+- };
+- struct nfs4_getattr d_getattr = {
+- .gt_bmval = nfs4_fattr_bitmap,
+- .gt_attrs = &d_attr,
+- };
+ struct nfs_openargs o_arg = {
+ .fh = NFS_FH(dir),
+ .share_access = flags & (FMODE_READ|FMODE_WRITE),
+ .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE,
+ .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED,
+ .name = name,
+- .f_getattr = &f_getattr,
+- .d_getattr = &d_getattr,
+ .server = server,
+ };
+ struct nfs_openres o_res = {
+ .cinfo = &d_cinfo,
+- .f_getattr = &f_getattr,
+- .d_getattr = &d_getattr,
++ .f_attr = &f_attr,
++ .d_attr = &d_attr,
+ .server = server,
+ };
+ struct rpc_message msg = {
+@@ -665,18 +349,14 @@ nfs4_do_setattr(struct nfs_server *serve
+ struct nfs_fh *fhandle, struct iattr *sattr,
+ struct nfs4_state *state)
+ {
+- struct nfs4_getattr getattr = {
+- .gt_bmval = nfs4_fattr_bitmap,
+- .gt_attrs = fattr,
+- };
+ struct nfs_setattrargs arg = {
+ .fh = fhandle,
+ .iap = sattr,
+- .attr = &getattr,
++ .fattr = fattr,
+ .server = server,
+ };
+ struct nfs_setattrres res = {
+- .attr = &getattr,
++ .fattr = fattr,
+ .server = server,
+ };
+ struct rpc_message msg = {
+@@ -822,27 +502,43 @@ nfs4_open_revalidate(struct inode *dir,
+
+ static int
+ nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+- struct nfs_fattr *fattr)
++ struct nfs_fsinfo *info)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[4];
++ struct nfs_fattr * fattr = info->fattr;
+ unsigned char * p;
+ struct qstr q;
+ int status;
++ struct nfs4_getroot_arg args = {
++ .fhandle = fhandle,
++ .name = &q,
++ };
++ struct nfs4_getroot_res res = {
++ .server = server,
++ .fattr = fattr,
++ .fhandle = fhandle,
++ };
++ struct rpc_message msg_head = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETROOT_HEAD],
++ .rpc_argp = NULL,
++ .rpc_resp = &res,
++ };
++ struct rpc_message msg_path = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETROOT_PATH],
++ .rpc_argp = &args,
++ .rpc_resp = &res,
++ };
+
+ /*
+ * Now we do a separate LOOKUP for each component of the mount path.
+ * The LOOKUPs are done separately so that we can conveniently
+ * catch an ERR_WRONGSEC if it occurs along the way...
+ */
+- p = server->mnt_path;
+ fattr->valid = 0;
+- nfs4_setup_compound(&compound, ops, server, "getrootfh");
+- nfs4_setup_putrootfh(&compound);
+- nfs4_setup_getattr(&compound, fattr);
+- nfs4_setup_getfh(&compound, fhandle);
+- if ((status = nfs4_call_compound(&compound, NULL, 0)))
++ status = rpc_call_sync(server->client, &msg_head, 0);
++ if (status)
+ goto out;
++
++ p = server->mnt_path;
+ for (;;) {
+ while (*p == '/')
+ p++;
+@@ -854,12 +550,7 @@ nfs4_proc_get_root(struct nfs_server *se
+ q.len = p - q.name;
+
+ fattr->valid = 0;
+- nfs4_setup_compound(&compound, ops, server, "mount");
+- nfs4_setup_putfh(&compound, fhandle);
+- nfs4_setup_lookup(&compound, &q);
+- nfs4_setup_getattr(&compound, fattr);
+- nfs4_setup_getfh(&compound, fhandle);
+- status = nfs4_call_compound(&compound, NULL, 0);
++ status = rpc_call_sync(server->client,&msg_path,0);
+ if (!status)
+ continue;
+ if (status == -ENOENT) {
+@@ -869,21 +560,27 @@ nfs4_proc_get_root(struct nfs_server *se
+ break;
+ }
+ out:
+- return status;
++ if (status)
++ return status;
++ return nfs4_proc_fsinfo(server, fhandle, info);
+ }
+
+ static int
+ nfs4_proc_getattr(struct inode *inode, struct nfs_fattr *fattr)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[2];
+-
++ struct nfs4_getattr_res res = {
++ .fattr = fattr,
++ .server = NFS_SERVER(inode),
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR],
++ .rpc_argp = NFS_FH(inode),
++ .rpc_resp = &res,
++ };
++
+ fattr->valid = 0;
+
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "getattr");
+- nfs4_setup_putfh(&compound, NFS_FH(inode));
+- nfs4_setup_getattr(&compound, fattr);
+- return nfs4_call_compound(&compound, NULL, 0);
++ return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+ }
+
+ /*
+@@ -945,26 +642,218 @@ out:
+ return status;
+ }
+
++#ifdef CONFIG_NFS_V4_ACL
++
++static inline int
++nfs_name_to_uid_wrapper(void *arg, const char *name, size_t len, __u32 *id)
++{
++ return nfs_map_name_to_uid((struct nfs4_client *)arg, name, len, id);
++}
++
++static inline int
++nfs_name_to_gid_wrapper(void *arg, const char *name, size_t len, __u32 *id)
++{
++ return nfs_map_group_to_gid((struct nfs4_client*)arg, name, len, id);
++}
++
++static inline int
++nfs_uid_to_name_wrapper(void *arg, __u32 id, char *name)
++{
++ return nfs_map_uid_to_name((struct nfs4_client *)arg, id, name);
++}
++
++static inline int
++nfs_gid_to_name_wrapper(void *arg, __u32 id, char *name)
++{
++ return nfs_map_gid_to_group((struct nfs4_client *)arg, id, name);
++}
++
++static struct nfs4_acl_idmapper nfs4_idmapper = {
++ .name2uid = nfs_name_to_uid_wrapper,
++ .name2gid = nfs_name_to_gid_wrapper,
++ .uid2name = nfs_uid_to_name_wrapper,
++ .gid2name = nfs_gid_to_name_wrapper,
++};
++
++/* From fs/ext2/acl.c: */
++
++static inline struct posix_acl *
++nfs4_iget_acl(struct inode *inode, struct posix_acl **i_acl)
++{
++ struct posix_acl *acl = NFS4_ACL_NOT_CACHED;
++
++ spin_lock(&inode->i_lock);
++ if (*i_acl != NFS4_ACL_NOT_CACHED)
++ acl = posix_acl_dup(*i_acl);
++ spin_unlock(&inode->i_lock);
++ return acl;
++}
++
++void
++nfs4_iset_acl(struct inode *inode, struct posix_acl **i_acl,
++ struct posix_acl *acl)
++{
++ spin_lock(&inode->i_lock);
++ if (*i_acl != NFS4_ACL_NOT_CACHED)
++ posix_acl_release(*i_acl);
++ *i_acl = posix_acl_dup(acl);
++ spin_unlock(&inode->i_lock);
++}
++
++void
++nfs4_izap_acl(struct inode *inode, struct posix_acl **i_acl)
++{
++ spin_lock(&inode->i_lock);
++ if (*i_acl != NFS4_ACL_NOT_CACHED)
++ posix_acl_release(*i_acl);
++ *i_acl = NFS4_ACL_NOT_CACHED;
++ spin_unlock(&inode->i_lock);
++}
++
++struct posix_acl *
++nfs4_proc_get_posix_acl(struct inode *inode, int type)
++{
++ struct nfs4_acl *acl = NULL;
++ int error;
++ struct posix_acl *pacl, *dpacl, *ret = NULL;
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL],
++ .rpc_argp = NFS_FH(inode),
++ .rpc_resp = &acl,
++ };
++
++ lock_kernel();
++ error = nfs_revalidate_inode(NFS_SERVER(inode), inode);
++ if (error < 0) {
++ unlock_kernel();
++ return ERR_PTR(error);
++ }
++ if (type == ACL_TYPE_ACCESS)
++ ret = nfs4_iget_acl(inode, &NFS_I(inode)->acl);
++ else
++ ret = nfs4_iget_acl(inode, &NFS_I(inode)->default_acl);
++
++ if (ret != NFS4_ACL_NOT_CACHED) {
++ if (ret == NULL)
++ ret = ERR_PTR(-ENODATA);
++ unlock_kernel();
++ return ret;
++ }
++
++ error = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
++ unlock_kernel();
++
++ if (error < 0)
++ goto out_free;
++
++ error = -ENODATA;
++ if (acl == NULL)
++ goto out_free;
++
++ error = nfs4_acl_nfsv4_to_posix(&nfs4_idmapper, NFS_SERVER(inode)->nfs4_state, acl, &pacl, &dpacl);
++ if (error < 0)
++ goto out_free;
++
++ error = -ERANGE;
++ if (pacl && pacl->a_count > NFS_ACL_MAX_ENTRIES)
++ goto out_free;
++ if (dpacl && dpacl->a_count > NFS_ACL_MAX_ENTRIES)
++ goto out_free;
++
++ nfs4_iset_acl(inode, &NFS_I(inode)->acl, pacl);
++ nfs4_iset_acl(inode, &NFS_I(inode)->default_acl, dpacl);
++
++ ret = (type == ACL_TYPE_ACCESS) ? pacl : dpacl;
++ error = -ENODATA;
++ if (ret == NULL)
++ goto out_free;
++ error = 0;
++out_free:
++ if (error < 0)
++ ret = ERR_PTR(error);
++ nfs4_acl_free(acl);
++ return ret;
++}
++
++int
++nfs4_proc_set_posix_acl(struct inode *inode, int type, struct posix_acl *pacl)
++{
++ struct iattr ia;
++ struct nfs4_acl *acl;
++ struct nfs_fattr fattr;
++ int error;
++ struct nfs_setaclargs arg = {
++ .fh = NFS_FH(inode),
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL],
++ .rpc_argp = &arg,
++ .rpc_resp = NULL,
++ };
++
++ ia.ia_valid = 0;
++ fattr.valid = 0;
++
++ if (pacl && pacl->a_count > NFS_ACL_MAX_ENTRIES)
++ return -ERANGE;
++
++ if (type == ACL_TYPE_ACCESS)
++ acl = nfs4_acl_posix_to_nfsv4(&nfs4_idmapper, NFS_SERVER(inode)->nfs4_state, pacl, NULL);
++ else
++ acl = nfs4_acl_posix_to_nfsv4(&nfs4_idmapper, NFS_SERVER(inode)->nfs4_state, NULL, pacl);
++ if (IS_ERR(acl))
++ return PTR_ERR(acl);
++ arg.acl = acl;
++
++ lock_kernel();
++ error = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0);
++ unlock_kernel();
++
++ nfs4_acl_free(acl);
++
++ if (error)
++ return error;
++
++ if (type == ACL_TYPE_ACCESS)
++ nfs4_iset_acl(inode, &NFS_I(inode)->acl, pacl);
++ else
++ nfs4_iset_acl(inode, &NFS_I(inode)->default_acl, pacl);
++
++ if (type == ACL_TYPE_ACCESS)
++ posix_acl_equiv_mode(pacl, &inode->i_mode);
++
++ return error;
++}
++
++#endif /* CONFIG_NFS_V4_ACL */
++
+ static int
+ nfs4_proc_lookup(struct inode *dir, struct qstr *name,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[5];
+- struct nfs_fattr dir_attr;
+- int status;
+-
++ struct nfs_fattr dir_attr;
++ int status;
++ struct nfs4_lookupargs args = {
++ .dir_fh = NFS_FH(dir),
++ .name = name,
++ };
++ struct nfs4_lookupres res = {
++ .server = NFS_SERVER(dir),
++ .dirattr = &dir_attr,
++ .fattr = fattr,
++ .fhandle = fhandle,
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP],
++ .rpc_argp = &args,
++ .rpc_resp = &res,
++ };
++
+ dir_attr.valid = 0;
+ fattr->valid = 0;
+
+ dprintk("NFS call lookup %s\n", name->name);
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "lookup");
+- nfs4_setup_putfh(&compound, NFS_FH(dir));
+- nfs4_setup_getattr(&compound, &dir_attr);
+- nfs4_setup_lookup(&compound, name);
+- nfs4_setup_getattr(&compound, fattr);
+- nfs4_setup_getfh(&compound, fhandle);
+- status = nfs4_call_compound(&compound, NULL, 0);
++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ dprintk("NFS reply lookup: %d\n", status);
+
+ if (status >= 0)
+@@ -975,11 +864,24 @@ nfs4_proc_lookup(struct inode *dir, stru
+ static int
+ nfs4_proc_access(struct inode *inode, struct rpc_cred *cred, int mode)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[3];
+ struct nfs_fattr fattr;
+ u32 req_access = 0, resp_supported, resp_access;
+ int status;
++ struct nfs4_accessargs args = {
++ .fhandle = NFS_FH(inode),
++ };
++ struct nfs4_accessres res = {
++ .server = NFS_SERVER(inode),
++ .fattr = &fattr,
++ .resp_supported = &resp_supported,
++ .resp_access = &resp_access,
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS],
++ .rpc_argp = &args,
++ .rpc_resp = &res,
++ .rpc_cred = cred,
++ };
+
+ fattr.valid = 0;
+
+@@ -1000,12 +902,9 @@ nfs4_proc_access(struct inode *inode, st
+ if (mode & MAY_EXEC)
+ req_access |= NFS4_ACCESS_EXECUTE;
+ }
++ res.req_access = args.req_access = req_access;
+
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "access");
+- nfs4_setup_putfh(&compound, NFS_FH(inode));
+- nfs4_setup_getattr(&compound, &fattr);
+- nfs4_setup_access(&compound, req_access, &resp_supported, &resp_access);
+- status = nfs4_call_compound(&compound, cred, 0);
++ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+ nfs_refresh_inode(inode, &fattr);
+
+ if (!status) {
+@@ -1046,13 +945,18 @@ nfs4_proc_access(struct inode *inode, st
+ static int
+ nfs4_proc_readlink(struct inode *inode, struct page *page)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[2];
++ struct nfs4_readlink args = {
++ .fh = NFS_FH(inode),
++ .count = PAGE_CACHE_SIZE,
++ .pages = &page,
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK],
++ .rpc_argp = &args,
++ .rpc_resp = NULL,
++ };
+
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "readlink");
+- nfs4_setup_putfh(&compound, NFS_FH(inode));
+- nfs4_setup_readlink(&compound, PAGE_CACHE_SIZE, &page);
+- return nfs4_call_compound(&compound, NULL, 0);
++ return rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+ }
+
+ static int
+@@ -1088,12 +992,8 @@ nfs4_proc_read(struct nfs_read_data *rda
+
+ fattr->valid = 0;
+ status = rpc_call_sync(server->client, &msg, flags);
+- if (!status) {
++ if (!status)
+ renew_lease(server, timestamp);
+- /* Check cache consistency */
+- if (fattr->change_attr != NFS_CHANGE_ATTR(inode))
+- nfs_zap_caches(inode);
+- }
+ dprintk("NFS reply read: %d\n", status);
+ return status;
+ }
+@@ -1130,7 +1030,6 @@ nfs4_proc_write(struct nfs_write_data *w
+
+ fattr->valid = 0;
+ status = rpc_call_sync(server->client, &msg, rpcflags);
+- NFS_CACHEINV(inode);
+ dprintk("NFS reply write: %d\n", status);
+ return status;
+ }
+@@ -1217,18 +1116,26 @@ nfs4_proc_create(struct inode *dir, stru
+ static int
+ nfs4_proc_remove(struct inode *dir, struct qstr *name)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[3];
+ struct nfs4_change_info dir_cinfo;
+ struct nfs_fattr dir_attr;
+ int status;
++ struct nfs4_remove_arg args = {
++ .fhandle = NFS_FH(dir),
++ .name = name,
++ };
++ struct nfs4_remove_res res = {
++ .server = NFS_SERVER(dir),
++ .dir_cinfo = &dir_cinfo,
++ .dir_attr = &dir_attr,
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE],
++ .rpc_argp = &args,
++ .rpc_resp = &res,
++ };
+
+ dir_attr.valid = 0;
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "remove");
+- nfs4_setup_putfh(&compound, NFS_FH(dir));
+- nfs4_setup_remove(&compound, name, &dir_cinfo);
+- nfs4_setup_getattr(&compound, &dir_attr);
+- status = nfs4_call_compound(&compound, NULL, 0);
++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+ if (!status) {
+ process_cinfo(&dir_cinfo, &dir_attr);
+@@ -1237,32 +1144,22 @@ nfs4_proc_remove(struct inode *dir, stru
+ return status;
+ }
+
+-struct unlink_desc {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[3];
+- struct nfs4_change_info cinfo;
+- struct nfs_fattr attrs;
+-};
+-
+ static int
+ nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name)
+ {
+- struct unlink_desc * up;
+- struct nfs4_compound * cp;
++ struct nfs4_unlink *up;
+
+- up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL);
++ up = (struct nfs4_unlink *) kmalloc(sizeof(*up), GFP_KERNEL);
+ if (!up)
+ return -ENOMEM;
+- cp = &up->compound;
+
+- nfs4_setup_compound(cp, up->ops, NFS_SERVER(dir->d_inode), "unlink_setup");
+- nfs4_setup_putfh(cp, NFS_FH(dir->d_inode));
+- nfs4_setup_remove(cp, name, &up->cinfo);
+- nfs4_setup_getattr(cp, &up->attrs);
+-
+- msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND];
+- msg->rpc_argp = cp;
+- msg->rpc_resp = cp;
++ up->server = NFS_SERVER(dir->d_inode);
++ up->fh = NFS_FH(dir->d_inode);
++ up->name = name;
++
++ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_UNLINK];
++ msg->rpc_argp = up;
++ msg->rpc_resp = up;
+ return 0;
+ }
+
+@@ -1270,11 +1167,10 @@ static int
+ nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task)
+ {
+ struct rpc_message *msg = &task->tk_msg;
+- struct unlink_desc *up;
++ struct nfs4_unlink *up;
+
+ if (msg->rpc_argp) {
+- up = (struct unlink_desc *) msg->rpc_argp;
+- process_lease(&up->compound);
++ up = (struct nfs4_unlink *) msg->rpc_argp;
+ process_cinfo(&up->cinfo, &up->attrs);
+ nfs_refresh_inode(dir->d_inode, &up->attrs);
+ kfree(up);
+@@ -1287,24 +1183,32 @@ static int
+ nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
+ struct inode *new_dir, struct qstr *new_name)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[7];
+ struct nfs4_change_info old_cinfo, new_cinfo;
+ struct nfs_fattr old_dir_attr, new_dir_attr;
+ int status;
+-
++ struct nfs4_rename_arg arg = {
++ .old_dir = NFS_FH(old_dir),
++ .new_dir = NFS_FH(new_dir),
++ .old_name = old_name,
++ .new_name = new_name,
++ };
++ struct nfs4_rename_res res = {
++ .server = NFS_SERVER(old_dir),
++ .old_cinfo = &old_cinfo,
++ .new_cinfo = &new_cinfo,
++ .old_fattr = &old_dir_attr,
++ .new_fattr = &new_dir_attr,
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME],
++ .rpc_argp = &arg,
++ .rpc_resp = &res,
++ };
++
+ old_dir_attr.valid = 0;
+ new_dir_attr.valid = 0;
+
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(old_dir), "rename");
+- nfs4_setup_putfh(&compound, NFS_FH(old_dir));
+- nfs4_setup_savefh(&compound);
+- nfs4_setup_putfh(&compound, NFS_FH(new_dir));
+- nfs4_setup_rename(&compound, old_name, new_name, &old_cinfo, &new_cinfo);
+- nfs4_setup_getattr(&compound, &new_dir_attr);
+- nfs4_setup_restorefh(&compound);
+- nfs4_setup_getattr(&compound, &old_dir_attr);
+- status = nfs4_call_compound(&compound, NULL, 0);
++ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0);
+
+ if (!status) {
+ process_cinfo(&old_cinfo, &old_dir_attr);
+@@ -1318,24 +1222,30 @@ nfs4_proc_rename(struct inode *old_dir,
+ static int
+ nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[7];
+ struct nfs4_change_info dir_cinfo;
+ struct nfs_fattr dir_attr, fattr;
+ int status;
+-
++ struct nfs4_link_arg arg = {
++ .fh = NFS_FH(inode),
++ .dir_fh = NFS_FH(dir),
++ .name = name,
++ };
++ struct nfs4_link_res res = {
++ .server = NFS_SERVER(inode),
++ .fattr = &fattr,
++ .dir_attr = &dir_attr,
++ .dir_cinfo = &dir_cinfo,
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK],
++ .rpc_argp = &arg,
++ .rpc_resp = &res,
++ };
++
+ dir_attr.valid = 0;
+ fattr.valid = 0;
+
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "link");
+- nfs4_setup_putfh(&compound, NFS_FH(inode));
+- nfs4_setup_savefh(&compound);
+- nfs4_setup_putfh(&compound, NFS_FH(dir));
+- nfs4_setup_link(&compound, name, &dir_cinfo);
+- nfs4_setup_getattr(&compound, &dir_attr);
+- nfs4_setup_restorefh(&compound);
+- nfs4_setup_getattr(&compound, &fattr);
+- status = nfs4_call_compound(&compound, NULL, 0);
++ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
+
+ if (!status) {
+ process_cinfo(&dir_cinfo, &dir_attr);
+@@ -1350,24 +1260,34 @@ nfs4_proc_symlink(struct inode *dir, str
+ struct iattr *sattr, struct nfs_fh *fhandle,
+ struct nfs_fattr *fattr)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[7];
+ struct nfs_fattr dir_attr;
+ struct nfs4_change_info dir_cinfo;
+ int status;
++ struct nfs4_create_arg arg = {
++ .dir_fh = NFS_FH(dir),
++ .server = NFS_SERVER(dir),
++ .name = name,
++ .u.symlink = path,
++ .attrs = sattr,
++ .ftype = NF4LNK,
++ };
++ struct nfs4_create_res res = {
++ .server = NFS_SERVER(dir),
++ .fhandle = fhandle,
++ .fattr = fattr,
++ .dir_attr = &dir_attr,
++ .dir_cinfo = &dir_cinfo,
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
++ .rpc_argp = &arg,
++ .rpc_resp = &res,
++ };
+
+ dir_attr.valid = 0;
+ fattr->valid = 0;
+
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "symlink");
+- nfs4_setup_putfh(&compound, NFS_FH(dir));
+- nfs4_setup_savefh(&compound);
+- nfs4_setup_create_symlink(&compound, name, path, sattr, &dir_cinfo);
+- nfs4_setup_getattr(&compound, fattr);
+- nfs4_setup_getfh(&compound, fhandle);
+- nfs4_setup_restorefh(&compound);
+- nfs4_setup_getattr(&compound, &dir_attr);
+- status = nfs4_call_compound(&compound, NULL, 0);
++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+ if (!status) {
+ process_cinfo(&dir_cinfo, &dir_attr);
+@@ -1380,24 +1300,33 @@ static int
+ nfs4_proc_mkdir(struct inode *dir, struct qstr *name, struct iattr *sattr,
+ struct nfs_fh *fhandle, struct nfs_fattr *fattr)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[7];
+ struct nfs_fattr dir_attr;
+ struct nfs4_change_info dir_cinfo;
+ int status;
++ struct nfs4_create_arg arg = {
++ .dir_fh = NFS_FH(dir),
++ .server = NFS_SERVER(dir),
++ .name = name,
++ .attrs = sattr,
++ .ftype = NF4DIR,
++ };
++ struct nfs4_create_res res = {
++ .server = NFS_SERVER(dir),
++ .fhandle = fhandle,
++ .fattr = fattr,
++ .dir_attr = &dir_attr,
++ .dir_cinfo = &dir_cinfo,
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
++ .rpc_argp = &arg,
++ .rpc_resp = &res,
++ };
+
+ dir_attr.valid = 0;
+ fattr->valid = 0;
+
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mkdir");
+- nfs4_setup_putfh(&compound, NFS_FH(dir));
+- nfs4_setup_savefh(&compound);
+- nfs4_setup_create_dir(&compound, name, sattr, &dir_cinfo);
+- nfs4_setup_getattr(&compound, fattr);
+- nfs4_setup_getfh(&compound, fhandle);
+- nfs4_setup_restorefh(&compound);
+- nfs4_setup_getattr(&compound, &dir_attr);
+- status = nfs4_call_compound(&compound, NULL, 0);
++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+ if (!status) {
+ process_cinfo(&dir_cinfo, &dir_attr);
+@@ -1411,17 +1340,25 @@ nfs4_proc_readdir(struct dentry *dentry,
+ u64 cookie, struct page *page, unsigned int count, int plus)
+ {
+ struct inode *dir = dentry->d_inode;
+- struct nfs4_compound compound;
+- struct nfs4_op ops[2];
+ int status;
++ struct nfs4_readdir_arg args = {
++ .fh = NFS_FH(dir),
++ .pages = &page,
++ .pgbase = 0,
++ .count = count,
++ };
++ struct nfs4_readdir_res res;
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR],
++ .rpc_argp = &args,
++ .rpc_resp = &res,
++ .rpc_cred = cred,
++ };
+
+ lock_kernel();
+-
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "readdir");
+- nfs4_setup_putfh(&compound, NFS_FH(dir));
+- nfs4_setup_readdir(&compound, cookie, NFS_COOKIEVERF(dir), &page, count, dentry);
+- status = nfs4_call_compound(&compound, cred, 0);
+-
++ nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
++ res.pgbase = args.pgbase;
++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+ unlock_kernel();
+ return status;
+ }
+@@ -1430,24 +1367,50 @@ static int
+ nfs4_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr,
+ dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[7];
+ struct nfs_fattr dir_attr;
+ struct nfs4_change_info dir_cinfo;
+ int status;
++ int mode = sattr->ia_mode;
++ struct nfs4_create_arg arg = {
++ .dir_fh = NFS_FH(dir),
++ .server = NFS_SERVER(dir),
++ .name = name,
++ .attrs = sattr,
++ };
++ struct nfs4_create_res res = {
++ .server = NFS_SERVER(dir),
++ .fhandle = fh,
++ .fattr = fattr,
++ .dir_attr = &dir_attr,
++ .dir_cinfo = &dir_cinfo,
++ };
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE],
++ .rpc_argp = &arg,
++ .rpc_resp = &res,
++ };
+
+ dir_attr.valid = 0;
+ fattr->valid = 0;
++
++ BUG_ON(!(sattr->ia_valid & ATTR_MODE));
++ BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode));
++ if (S_ISFIFO(mode))
++ arg.ftype = NF4FIFO;
++ else if (S_ISBLK(mode)) {
++ arg.ftype = NF4BLK;
++ arg.u.device.specdata1 = MAJOR(rdev);
++ arg.u.device.specdata2 = MINOR(rdev);
++ }
++ else if (S_ISCHR(mode)) {
++ arg.ftype = NF4CHR;
++ arg.u.device.specdata1 = MAJOR(rdev);
++ arg.u.device.specdata2 = MINOR(rdev);
++ }
++ else
++ arg.ftype = NF4SOCK;
+
+- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mknod");
+- nfs4_setup_putfh(&compound, NFS_FH(dir));
+- nfs4_setup_savefh(&compound);
+- nfs4_setup_create_special(&compound, name, rdev,sattr, &dir_cinfo);
+- nfs4_setup_getattr(&compound, fattr);
+- nfs4_setup_getfh(&compound, fh);
+- nfs4_setup_restorefh(&compound);
+- nfs4_setup_getattr(&compound, &dir_attr);
+- status = nfs4_call_compound(&compound, NULL, 0);
++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
+
+ if (!status) {
+ process_cinfo(&dir_cinfo, &dir_attr);
+@@ -1460,14 +1423,13 @@ static int
+ nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_fsstat *fsstat)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[2];
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS],
++ .rpc_argp = fhandle,
++ .rpc_resp = fsstat,
++ };
+
+- memset(fsstat, 0, sizeof(*fsstat));
+- nfs4_setup_compound(&compound, ops, server, "statfs");
+- nfs4_setup_putfh(&compound, fhandle);
+- nfs4_setup_statfs(&compound, fsstat);
+- return nfs4_call_compound(&compound, NULL, 0);
++ return rpc_call_sync(server->client, &msg, 0);
+ }
+
+ static int
+@@ -1480,7 +1442,6 @@ nfs4_proc_fsinfo(struct nfs_server *serv
+ .rpc_resp = fsinfo,
+ };
+
+- memset(fsinfo, 0, sizeof(*fsinfo));
+ return rpc_call_sync(server->client, &msg, 0);
+ }
+
+@@ -1488,14 +1449,13 @@ static int
+ nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
+ struct nfs_pathconf *pathconf)
+ {
+- struct nfs4_compound compound;
+- struct nfs4_op ops[2];
++ struct rpc_message msg = {
++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF],
++ .rpc_argp = fhandle,
++ .rpc_resp = pathconf,
++ };
+
+- memset(pathconf, 0, sizeof(*pathconf));
+- nfs4_setup_compound(&compound, ops, server, "statfs");
+- nfs4_setup_putfh(&compound, fhandle);
+- nfs4_setup_pathconf(&compound, pathconf);
+- return nfs4_call_compound(&compound, NULL, 0);
++ return rpc_call_sync(server->client, &msg, 0);
+ }
+
+ static void
+@@ -1517,7 +1477,6 @@ nfs4_read_done(struct rpc_task *task)
+ {
+ struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata;
+ struct inode *inode = data->inode;
+- struct nfs_fattr *fattr = data->res.fattr;
+
+ if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) {
+ task->tk_action = nfs4_restart_read;
+@@ -1525,11 +1484,6 @@ nfs4_read_done(struct rpc_task *task)
+ }
+ if (task->tk_status > 0)
+ renew_lease(NFS_SERVER(inode), data->timestamp);
+- /* Check cache consistency */
+- if (fattr->change_attr != NFS_CHANGE_ATTR(inode))
+- nfs_zap_caches(inode);
+- if (fattr->bitmap[1] & FATTR4_WORD1_TIME_ACCESS)
+- inode->i_atime = fattr->atime;
+ /* Call back common NFS readpage processing */
+ nfs_readpage_result(task);
+ }
+@@ -1577,21 +1531,6 @@ nfs4_proc_read_setup(struct nfs_read_dat
+ }
+
+ static void
+-nfs4_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+-{
+- /* Check cache consistency */
+- if (fattr->pre_change_attr != NFS_CHANGE_ATTR(inode))
+- nfs_zap_caches(inode);
+- NFS_CHANGE_ATTR(inode) = fattr->change_attr;
+- if (fattr->bitmap[1] & FATTR4_WORD1_SPACE_USED)
+- inode->i_blocks = (fattr->du.nfs3.used + 511) >> 9;
+- if (fattr->bitmap[1] & FATTR4_WORD1_TIME_METADATA)
+- inode->i_ctime = fattr->ctime;
+- if (fattr->bitmap[1] & FATTR4_WORD1_TIME_MODIFY)
+- inode->i_mtime = fattr->mtime;
+-}
+-
+-static void
+ nfs4_restart_write(struct rpc_task *task)
+ {
+ struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata;
+@@ -1617,7 +1556,6 @@ nfs4_write_done(struct rpc_task *task)
+ }
+ if (task->tk_status >= 0)
+ renew_lease(NFS_SERVER(inode), data->timestamp);
+- nfs4_write_refresh_inode(inode, data->res.fattr);
+ /* Call back common NFS writeback processing */
+ nfs_writeback_done(task);
+ }
+@@ -1684,7 +1622,6 @@ nfs4_commit_done(struct rpc_task *task)
+ task->tk_action = nfs4_restart_write;
+ return;
+ }
+- nfs4_write_refresh_inode(inode, data->res.fattr);
+ /* Call back common NFS writeback processing */
+ nfs_commit_done(task);
+ }
+@@ -1807,6 +1744,7 @@ nfs4_proc_file_open(struct inode *inode,
+ if (filp->f_mode & FMODE_WRITE) {
+ lock_kernel();
+ nfs_set_mmcred(inode, state->owner->so_cred);
++ nfs_begin_data_update(inode);
+ unlock_kernel();
+ }
+ filp->private_data = state;
+@@ -1823,6 +1761,11 @@ nfs4_proc_file_release(struct inode *ino
+
+ if (state)
+ nfs4_close_state(state, filp->f_mode);
++ if (filp->f_mode & FMODE_WRITE) {
++ lock_kernel();
++ nfs_end_data_update(inode);
++ unlock_kernel();
++ }
+ return 0;
+ }
+
+@@ -2294,6 +2237,7 @@ struct nfs_rpc_ops nfs_v4_clientops = {
+ .version = 4, /* protocol version */
+ .dentry_ops = &nfs4_dentry_operations,
+ .dir_inode_ops = &nfs4_dir_inode_operations,
++ .file_inode_ops = &nfs4_file_inode_operations,
+ .getroot = nfs4_proc_get_root,
+ .getattr = nfs4_proc_getattr,
+ .setattr = nfs4_proc_setattr,
+diff -puN fs/nfs/pagelist.c~CITI_NFS4_ALL fs/nfs/pagelist.c
+--- linux-2.6.3/fs/nfs/pagelist.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/pagelist.c 2004-02-19 16:47:03.000000000 -0500
+@@ -246,7 +246,6 @@ nfs_coalesce_requests(struct list_head *
+ * nfs_scan_list - Scan a list for matching requests
+ * @head: One of the NFS inode request lists
+ * @dst: Destination list
+- * @file: if set, ensure we match requests from this file
+ * @idx_start: lower bound of page->index to scan
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+@@ -258,7 +257,6 @@ nfs_coalesce_requests(struct list_head *
+ */
+ int
+ nfs_scan_list(struct list_head *head, struct list_head *dst,
+- struct file *file,
+ unsigned long idx_start, unsigned int npages)
+ {
+ struct list_head *pos, *tmp;
+@@ -276,9 +274,6 @@ nfs_scan_list(struct list_head *head, st
+
+ req = nfs_list_entry(pos);
+
+- if (file && req->wb_file != file)
+- continue;
+-
+ if (req->wb_index < idx_start)
+ continue;
+ if (req->wb_index > idx_end)
+diff -puN fs/nfs/proc.c~CITI_NFS4_ALL fs/nfs/proc.c
+--- linux-2.6.3/fs/nfs/proc.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/proc.c 2004-02-19 16:47:07.000000000 -0500
+@@ -49,18 +49,6 @@
+
+ extern struct rpc_procinfo nfs_procedures[];
+
+-static void
+-nfs_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
+-{
+- if (!(fattr->valid & NFS_ATTR_WCC)) {
+- fattr->pre_size = NFS_CACHE_ISIZE(inode);
+- fattr->pre_mtime = NFS_CACHE_MTIME(inode);
+- fattr->pre_ctime = NFS_CACHE_CTIME(inode);
+- fattr->valid |= NFS_ATTR_WCC;
+- }
+- nfs_refresh_inode(inode, fattr);
+-}
+-
+ static struct rpc_cred *
+ nfs_cred(struct inode *inode, struct file *filp)
+ {
+@@ -78,15 +66,33 @@ nfs_cred(struct inode *inode, struct fil
+ */
+ static int
+ nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
+- struct nfs_fattr *fattr)
++ struct nfs_fsinfo *info)
+ {
+- int status;
++ struct nfs_fattr *fattr = info->fattr;
++ struct nfs2_fsstat fsinfo;
++ int status;
+
+- dprintk("NFS call getroot\n");
++ dprintk("%s: call getattr\n", __FUNCTION__);
+ fattr->valid = 0;
+- status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0);
+- dprintk("NFS reply getroot\n");
+- return status;
++ status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0);
++ dprintk("%s: reply getattr %d\n", __FUNCTION__, status);
++ if (status)
++ return status;
++ dprintk("%s: call statfs\n", __FUNCTION__);
++ status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0);
++ dprintk("%s: reply statfs %d\n", __FUNCTION__, status);
++ if (status)
++ return status;
++ info->rtmax = NFS_MAXDATA;
++ info->rtpref = fsinfo.tsize;
++ info->rtmult = fsinfo.bsize;
++ info->wtmax = NFS_MAXDATA;
++ info->wtpref = fsinfo.tsize;
++ info->wtmult = fsinfo.bsize;
++ info->dtpref = fsinfo.tsize;
++ info->maxfilesize = 0x7FFFFFFF;
++ info->lease_time = 0;
++ return 0;
+ }
+
+ /*
+@@ -205,7 +211,7 @@ nfs_proc_write(struct nfs_write_data *wd
+ msg.rpc_cred = nfs_cred(inode, filp);
+ status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
+ if (status >= 0) {
+- nfs_write_refresh_inode(inode, fattr);
++ nfs_refresh_inode(inode, fattr);
+ wdata->res.count = wdata->args.count;
+ wdata->verf.committed = NFS_FILE_SYNC;
+ }
+@@ -331,10 +337,8 @@ nfs_proc_unlink_done(struct dentry *dir,
+ {
+ struct rpc_message *msg = &task->tk_msg;
+
+- if (msg->rpc_argp) {
+- NFS_CACHEINV(dir->d_inode);
++ if (msg->rpc_argp)
+ kfree(msg->rpc_argp);
+- }
+ return 0;
+ }
+
+@@ -584,7 +588,7 @@ nfs_write_done(struct rpc_task *task)
+ struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata;
+
+ if (task->tk_status >= 0)
+- nfs_write_refresh_inode(data->inode, data->res.fattr);
++ nfs_refresh_inode(data->inode, data->res.fattr);
+ nfs_writeback_done(task);
+ }
+
+@@ -665,6 +669,7 @@ struct nfs_rpc_ops nfs_v2_clientops = {
+ .version = 2, /* protocol version */
+ .dentry_ops = &nfs_dentry_operations,
+ .dir_inode_ops = &nfs_dir_inode_operations,
++ .file_inode_ops = &nfs_file_inode_operations,
+ .getroot = nfs_proc_get_root,
+ .getattr = nfs_proc_getattr,
+ .setattr = nfs_proc_setattr,
+diff -puN fs/nfs/read.c~CITI_NFS4_ALL fs/nfs/read.c
+--- linux-2.6.3/fs/nfs/read.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/read.c 2004-02-19 16:47:03.000000000 -0500
+@@ -124,6 +124,7 @@ nfs_readpage_sync(struct file *file, str
+ if (result < rdata.args.count) /* NFSv2ism */
+ break;
+ } while (count);
++ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME;
+
+ if (count)
+ memclear_highpage_flush(page, rdata.args.pgbase, count);
+@@ -266,6 +267,7 @@ nfs_readpage_result(struct rpc_task *tas
+ dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
+ task->tk_pid, task->tk_status);
+
++ NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME;
+ while (!list_empty(&data->pages)) {
+ struct nfs_page *req = nfs_list_entry(data->pages.next);
+ struct page *page = req->wb_page;
+diff -puN fs/nfs/unlink.c~CITI_NFS4_ALL fs/nfs/unlink.c
+--- linux-2.6.3/fs/nfs/unlink.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/unlink.c 2004-02-19 16:47:03.000000000 -0500
+@@ -104,6 +104,7 @@ nfs_async_unlink_init(struct rpc_task *t
+ status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name);
+ if (status < 0)
+ goto out_err;
++ nfs_begin_data_update(dir->d_inode);
+ rpc_call_setup(task, &msg, 0);
+ return;
+ out_err:
+@@ -126,7 +127,7 @@ nfs_async_unlink_done(struct rpc_task *t
+ if (!dir)
+ return;
+ dir_i = dir->d_inode;
+- nfs_zap_caches(dir_i);
++ nfs_end_data_update(dir_i);
+ if (NFS_PROTO(dir_i)->unlink_done(dir, task))
+ return;
+ put_rpccred(data->cred);
+diff -puN fs/nfs/write.c~CITI_NFS4_ALL fs/nfs/write.c
+--- linux-2.6.3/fs/nfs/write.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/write.c 2004-02-19 16:47:03.000000000 -0500
+@@ -74,7 +74,6 @@
+ static struct nfs_page * nfs_update_request(struct file*, struct inode *,
+ struct page *,
+ unsigned int, unsigned int);
+-static void nfs_strategy(struct inode *inode);
+
+ static kmem_cache_t *nfs_wdata_cachep;
+ static mempool_t *nfs_wdata_mempool;
+@@ -124,6 +123,52 @@ void nfs_commit_release(struct rpc_task
+ nfs_commit_free(wdata);
+ }
+
++/* Adjust the file length if we're writing beyond the end */
++static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
++{
++ struct inode *inode = page->mapping->host;
++ loff_t end, i_size = i_size_read(inode);
++ unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
++
++ if (i_size > 0 && page->index < end_index)
++ return;
++ end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count);
++ if (i_size >= end)
++ return;
++ i_size_write(inode, end);
++}
++
++/* We can set the PG_uptodate flag if we see that a write request
++ * covers the full page.
++ */
++static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
++{
++ loff_t end_offs;
++
++ if (PageUptodate(page))
++ return;
++ if (base != 0)
++ return;
++ if (count == PAGE_CACHE_SIZE) {
++ SetPageUptodate(page);
++ return;
++ }
++
++ end_offs = i_size_read(page->mapping->host) - 1;
++ if (end_offs < 0)
++ return;
++ /* Is this the last page? */
++ if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
++ return;
++ /* This is the last page: set PG_uptodate if we cover the entire
++ * extent of the data, then zero the rest of the page.
++ */
++ if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) {
++ memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count);
++ SetPageUptodate(page);
++ }
++}
++
+ /*
+ * Write a page synchronously.
+ * Offset is the data offset within the page.
+@@ -157,6 +202,7 @@ nfs_writepage_sync(struct file *file, st
+ (long long)NFS_FILEID(inode),
+ count, (long long)(page_offset(page) + offset));
+
++ nfs_begin_data_update(inode);
+ do {
+ if (count < wsize && !swapfile)
+ wdata.args.count = count;
+@@ -177,14 +223,12 @@ nfs_writepage_sync(struct file *file, st
+ wdata.args.pgbase += result;
+ written += result;
+ count -= result;
+-
+- /*
+- * If we've extended the file, update the inode
+- * now so we don't invalidate the cache.
+- */
+- if (wdata.args.offset > i_size_read(inode))
+- i_size_write(inode, wdata.args.offset);
+ } while (count);
++ /* Update file length */
++ nfs_grow_file(page, offset, written);
++ /* Set the PG_uptodate flag? */
++ nfs_mark_uptodate(page, offset, written);
++ nfs_end_data_update(inode);
+
+ if (PageError(page))
+ ClearPageError(page);
+@@ -201,18 +245,19 @@ nfs_writepage_async(struct file *file, s
+ unsigned int offset, unsigned int count)
+ {
+ struct nfs_page *req;
+- loff_t end;
+ int status;
+
++ nfs_begin_data_update(inode);
+ req = nfs_update_request(file, inode, page, offset, count);
+ status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
+ if (status < 0)
+ goto out;
++ /* Update file length */
++ nfs_grow_file(page, offset, count);
++ /* Set the PG_uptodate flag? */
++ nfs_mark_uptodate(page, offset, count);
+ nfs_unlock_request(req);
+- nfs_strategy(inode);
+- end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
+- if (i_size_read(inode) < end)
+- i_size_write(inode, end);
++ nfs_end_data_update(inode);
+
+ out:
+ return status;
+@@ -286,7 +331,7 @@ nfs_writepages(struct address_space *map
+ err = generic_writepages(mapping, wbc);
+ if (err)
+ goto out;
+- err = nfs_flush_file(inode, NULL, 0, 0, 0);
++ err = nfs_flush_inode(inode, 0, 0, 0);
+ if (err < 0)
+ goto out;
+ if (wbc->sync_mode == WB_SYNC_HOLD)
+@@ -294,7 +339,7 @@ nfs_writepages(struct address_space *map
+ if (is_sync && wbc->sync_mode == WB_SYNC_ALL) {
+ err = nfs_wb_all(inode);
+ } else
+- nfs_commit_file(inode, NULL, 0, 0, 0);
++ nfs_commit_inode(inode, 0, 0, 0);
+ out:
+ return err;
+ }
+@@ -312,8 +357,10 @@ nfs_inode_add_request(struct inode *inod
+ BUG_ON(error == -EEXIST);
+ if (error)
+ return error;
+- if (!nfsi->npages)
++ if (!nfsi->npages) {
+ igrab(inode);
++ nfs_begin_data_update(inode);
++ }
+ nfsi->npages++;
+ req->wb_count++;
+ return 0;
+@@ -336,6 +383,7 @@ nfs_inode_remove_request(struct nfs_page
+ nfsi->npages--;
+ if (!nfsi->npages) {
+ spin_unlock(&nfs_wreq_lock);
++ nfs_end_data_update(inode);
+ iput(inode);
+ } else
+ spin_unlock(&nfs_wreq_lock);
+@@ -421,7 +469,7 @@ nfs_mark_request_commit(struct nfs_page
+ * Interruptible by signals only if mounted with intr flag.
+ */
+ static int
+-nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages)
++nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages)
+ {
+ struct nfs_inode *nfsi = NFS_I(inode);
+ struct nfs_page *req;
+@@ -441,8 +489,6 @@ nfs_wait_on_requests(struct inode *inode
+ break;
+
+ next = req->wb_index + 1;
+- if (file && req->wb_file != file)
+- continue;
+ if (!NFS_WBACK_BUSY(req))
+ continue;
+
+@@ -453,7 +499,6 @@ nfs_wait_on_requests(struct inode *inode
+ if (error < 0)
+ return error;
+ spin_lock(&nfs_wreq_lock);
+- next = idx_start;
+ res++;
+ }
+ spin_unlock(&nfs_wreq_lock);
+@@ -464,7 +509,6 @@ nfs_wait_on_requests(struct inode *inode
+ * nfs_scan_dirty - Scan an inode for dirty requests
+ * @inode: NFS inode to scan
+ * @dst: destination list
+- * @file: if set, ensure we match requests from this file
+ * @idx_start: lower bound of page->index to scan.
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+@@ -472,11 +516,11 @@ nfs_wait_on_requests(struct inode *inode
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ */
+ static int
+-nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages)
++nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+ {
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int res;
+- res = nfs_scan_list(&nfsi->dirty, dst, file, idx_start, npages);
++ res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages);
+ nfsi->ndirty -= res;
+ sub_page_state(nr_dirty,res);
+ if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
+@@ -489,7 +533,6 @@ nfs_scan_dirty(struct inode *inode, stru
+ * nfs_scan_commit - Scan an inode for commit requests
+ * @inode: NFS inode to scan
+ * @dst: destination list
+- * @file: if set, ensure we collect requests from this file only.
+ * @idx_start: lower bound of page->index to scan.
+ * @npages: idx_start + npages sets the upper bound to scan.
+ *
+@@ -497,11 +540,11 @@ nfs_scan_dirty(struct inode *inode, stru
+ * The requests are *not* checked to ensure that they form a contiguous set.
+ */
+ static int
+-nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages)
++nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
+ {
+ struct nfs_inode *nfsi = NFS_I(inode);
+ int res;
+- res = nfs_scan_list(&nfsi->commit, dst, file, idx_start, npages);
++ res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages);
+ nfsi->ncommit -= res;
+ if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit))
+ printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n");
+@@ -600,46 +643,6 @@ nfs_update_request(struct file* file, st
+ return req;
+ }
+
+-/*
+- * This is the strategy routine for NFS.
+- * It is called by nfs_updatepage whenever the user wrote up to the end
+- * of a page.
+- *
+- * We always try to submit a set of requests in parallel so that the
+- * server's write code can gather writes. This is mainly for the benefit
+- * of NFSv2.
+- *
+- * We never submit more requests than we think the remote can handle.
+- * For UDP sockets, we make sure we don't exceed the congestion window;
+- * for TCP, we limit the number of requests to 8.
+- *
+- * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that
+- * should be sent out in one go. This is for the benefit of NFSv2 servers
+- * that perform write gathering.
+- *
+- * FIXME: Different servers may have different sweet spots.
+- * Record the average congestion window in server struct?
+- */
+-#define NFS_STRATEGY_PAGES 8
+-static void
+-nfs_strategy(struct inode *inode)
+-{
+- unsigned int dirty, wpages;
+-
+- dirty = NFS_I(inode)->ndirty;
+- wpages = NFS_SERVER(inode)->wpages;
+-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+- if (NFS_PROTO(inode)->version == 2) {
+- if (dirty >= NFS_STRATEGY_PAGES * wpages)
+- nfs_flush_file(inode, NULL, 0, 0, 0);
+- } else if (dirty >= wpages)
+- nfs_flush_file(inode, NULL, 0, 0, 0);
+-#else
+- if (dirty >= NFS_STRATEGY_PAGES * wpages)
+- nfs_flush_file(inode, NULL, 0, 0, 0);
+-#endif
+-}
+-
+ int
+ nfs_flush_incompatible(struct file *file, struct page *page)
+ {
+@@ -675,7 +678,6 @@ nfs_updatepage(struct file *file, struct
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = page->mapping->host;
+ struct nfs_page *req;
+- loff_t end;
+ int status = 0;
+
+ dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n",
+@@ -696,6 +698,30 @@ nfs_updatepage(struct file *file, struct
+ return status;
+ }
+
++ nfs_begin_data_update(inode);
++
++
++ /* If we're not using byte range locks, and we know the page
++ * is entirely in cache, it may be more efficient to avoid
++ * fragmenting write requests.
++ */
++ if (PageUptodate(page) && inode->i_flock == NULL) {
++ loff_t end_offs = i_size_read(inode) - 1;
++ unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
++
++ count += offset;
++ offset = 0;
++ if (end_offs < 0) {
++ /* Do nothing */
++ } else if (page->index == end_index) {
++ unsigned int pglen;
++ pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
++ if (count < pglen)
++ count = pglen;
++ } else if (page->index < end_index)
++ count = PAGE_CACHE_SIZE;
++ }
++
+ /*
+ * Try to find an NFS request corresponding to this page
+ * and update it.
+@@ -714,21 +740,14 @@ nfs_updatepage(struct file *file, struct
+ goto done;
+
+ status = 0;
+- end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
+- if (i_size_read(inode) < end)
+- i_size_write(inode, end);
+-
+- /* If we wrote past the end of the page.
+- * Call the strategy routine so it can send out a bunch
+- * of requests.
+- */
+- if (req->wb_pgbase == 0 && req->wb_bytes == PAGE_CACHE_SIZE) {
+- SetPageUptodate(page);
+- nfs_unlock_request(req);
+- nfs_strategy(inode);
+- } else
+- nfs_unlock_request(req);
++
++ /* Update file length */
++ nfs_grow_file(page, offset, count);
++ /* Set the PG_uptodate flag? */
++ nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
++ nfs_unlock_request(req);
+ done:
++ nfs_end_data_update(inode);
+ dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n",
+ status, (long long)i_size_read(inode));
+ if (status < 0)
+@@ -891,10 +910,7 @@ nfs_writeback_done(struct rpc_task *task
+ #endif
+
+ /*
+- * Update attributes as result of writeback.
+- * FIXME: There is an inherent race with invalidate_inode_pages and
+- * writebacks since the page->count is kept > 1 for as long
+- * as the page has a write request pending.
++ * Process the nfs_page list
+ */
+ while (!list_empty(&data->pages)) {
+ req = nfs_list_entry(data->pages.next);
+@@ -1061,7 +1077,7 @@ nfs_commit_done(struct rpc_task *task)
+ }
+ #endif
+
+-int nfs_flush_file(struct inode *inode, struct file *file, unsigned long idx_start,
++int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
+ {
+ LIST_HEAD(head);
+@@ -1069,7 +1085,7 @@ int nfs_flush_file(struct inode *inode,
+ error = 0;
+
+ spin_lock(&nfs_wreq_lock);
+- res = nfs_scan_dirty(inode, &head, file, idx_start, npages);
++ res = nfs_scan_dirty(inode, &head, idx_start, npages);
+ spin_unlock(&nfs_wreq_lock);
+ if (res)
+ error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how);
+@@ -1079,7 +1095,7 @@ int nfs_flush_file(struct inode *inode,
+ }
+
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+-int nfs_commit_file(struct inode *inode, struct file *file, unsigned long idx_start,
++int nfs_commit_inode(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
+ {
+ LIST_HEAD(head);
+@@ -1087,9 +1103,9 @@ int nfs_commit_file(struct inode *inode,
+ error = 0;
+
+ spin_lock(&nfs_wreq_lock);
+- res = nfs_scan_commit(inode, &head, file, idx_start, npages);
++ res = nfs_scan_commit(inode, &head, idx_start, npages);
+ if (res) {
+- res += nfs_scan_commit(inode, &head, NULL, 0, 0);
++ res += nfs_scan_commit(inode, &head, 0, 0);
+ spin_unlock(&nfs_wreq_lock);
+ error = nfs_commit_list(&head, how);
+ } else
+@@ -1100,7 +1116,7 @@ int nfs_commit_file(struct inode *inode,
+ }
+ #endif
+
+-int nfs_sync_file(struct inode *inode, struct file *file, unsigned long idx_start,
++int nfs_sync_inode(struct inode *inode, unsigned long idx_start,
+ unsigned int npages, int how)
+ {
+ int error,
+@@ -1109,18 +1125,15 @@ int nfs_sync_file(struct inode *inode, s
+ wait = how & FLUSH_WAIT;
+ how &= ~FLUSH_WAIT;
+
+- if (!inode && file)
+- inode = file->f_dentry->d_inode;
+-
+ do {
+ error = 0;
+ if (wait)
+- error = nfs_wait_on_requests(inode, file, idx_start, npages);
++ error = nfs_wait_on_requests(inode, idx_start, npages);
+ if (error == 0)
+- error = nfs_flush_file(inode, file, idx_start, npages, how);
++ error = nfs_flush_inode(inode, idx_start, npages, how);
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+ if (error == 0)
+- error = nfs_commit_file(inode, file, idx_start, npages, how);
++ error = nfs_commit_inode(inode, idx_start, npages, how);
+ #endif
+ } while (error > 0);
+ return error;
+diff -puN include/linux/fs.h~CITI_NFS4_ALL include/linux/fs.h
+--- linux-2.6.3/include/linux/fs.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/fs.h 2004-02-19 16:47:03.000000000 -0500
+@@ -137,6 +137,7 @@ extern int leases_enable, dir_notify_ena
+ #define S_DEAD 32 /* removed, but still open directory */
+ #define S_NOQUOTA 64 /* Inode is not counted to quota */
+ #define S_DIRSYNC 128 /* Directory modifications are synchronous */
++#define S_NOCMTIME 256 /* Do not update file c/mtime */
+
+ /*
+ * Note that nosuid etc flags are inode-specific: setting some file-system
+@@ -170,6 +171,7 @@ extern int leases_enable, dir_notify_ena
+ #define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND)
+
+ #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
++#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
+
+ /* the read-only stuff doesn't really belong here, but any other place is
+ probably as bad and I don't want to create yet another include file. */
+diff -puN include/linux/nfs_fs.h~CITI_NFS4_ALL include/linux/nfs_fs.h
+--- linux-2.6.3/include/linux/nfs_fs.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfs_fs.h 2004-02-19 16:47:07.000000000 -0500
+@@ -23,6 +23,10 @@
+ #include <linux/sunrpc/auth.h>
+ #include <linux/sunrpc/clnt.h>
+
++#ifdef CONFIG_NFS_V4
++#include <linux/xattr_acl.h>
++#endif /* CONFIG_NFS_V4 */
++
+ #include <linux/nfs.h>
+ #include <linux/nfs2.h>
+ #include <linux/nfs3.h>
+@@ -99,7 +103,7 @@ struct nfs_inode {
+ /*
+ * Various flags
+ */
+- unsigned short flags;
++ unsigned int flags;
+
+ /*
+ * read_cache_jiffies is when we started read-caching this inode,
+@@ -118,19 +122,22 @@ struct nfs_inode {
+ *
+ * mtime != read_cache_mtime
+ */
++ unsigned long readdir_timestamp;
+ unsigned long read_cache_jiffies;
+- struct timespec read_cache_ctime;
+- struct timespec read_cache_mtime;
+- __u64 read_cache_isize;
+ unsigned long attrtimeo;
+ unsigned long attrtimeo_timestamp;
+ __u64 change_attr; /* v4 only */
+
++ /* "Generation counter" for the attribute cache. This is
++ * bumped whenever we update the metadata on the
++ * server.
++ */
++ unsigned long cache_change_attribute;
+ /*
+- * Timestamp that dates the change made to read_cache_mtime.
+- * This is of use for dentry revalidation
++ * Counter indicating the number of outstanding requests that
++ * will cause a file data update.
+ */
+- unsigned long cache_mtime_jiffies;
++ atomic_t data_updates;
+
+ struct nfs_access_cache cache_access;
+
+@@ -160,7 +167,10 @@ struct nfs_inode {
+ /* NFSv4 state */
+ struct list_head open_states;
+ #endif /* CONFIG_NFS_V4*/
+-
++#ifdef CONFIG_NFS_V4_ACL
++ struct posix_acl *acl;
++ struct posix_acl *default_acl;
++#endif /* CONFIG_NFS_V4_ACL */
+ struct inode vfs_inode;
+ };
+
+@@ -170,7 +180,9 @@ struct nfs_inode {
+ #define NFS_INO_STALE 0x0001 /* possible stale inode */
+ #define NFS_INO_ADVISE_RDPLUS 0x0002 /* advise readdirplus */
+ #define NFS_INO_REVALIDATING 0x0004 /* revalidating attrs */
+-#define NFS_INO_FLUSH 0x0008 /* inode is due for flushing */
++#define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */
++#define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */
++#define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */
+ #define NFS_INO_FAKE_ROOT 0x0080 /* root inode placeholder */
+
+ static inline struct nfs_inode *NFS_I(struct inode *inode)
+@@ -186,15 +198,7 @@ static inline struct nfs_inode *NFS_I(st
+ #define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode)))
+ #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf)
+ #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies)
+-#define NFS_MTIME_UPDATE(inode) (NFS_I(inode)->cache_mtime_jiffies)
+-#define NFS_CACHE_CTIME(inode) (NFS_I(inode)->read_cache_ctime)
+-#define NFS_CACHE_MTIME(inode) (NFS_I(inode)->read_cache_mtime)
+-#define NFS_CACHE_ISIZE(inode) (NFS_I(inode)->read_cache_isize)
+ #define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr)
+-#define NFS_CACHEINV(inode) \
+-do { \
+- NFS_READTIME(inode) = jiffies - NFS_MAXATTRTIMEO(inode) - 1; \
+-} while (0)
+ #define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo)
+ #define NFS_MINATTRTIMEO(inode) \
+ (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \
+@@ -211,6 +215,17 @@ do { \
+
+ #define NFS_FILEID(inode) (NFS_I(inode)->fileid)
+
++static inline int nfs_caches_unstable(struct inode *inode)
++{
++ return atomic_read(&NFS_I(inode)->data_updates) != 0;
++}
++
++static inline void NFS_CACHEINV(struct inode *inode)
++{
++ if (!nfs_caches_unstable(inode))
++ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR;
++}
++
+ static inline int nfs_server_capable(struct inode *inode, int cap)
+ {
+ return NFS_SERVER(inode)->caps & cap;
+@@ -227,13 +242,37 @@ loff_t page_offset(struct page *page)
+ return ((loff_t)page->index) << PAGE_CACHE_SHIFT;
+ }
+
++/**
++ * nfs_save_change_attribute - Returns the inode attribute change cookie
++ * @inode - pointer to inode
++ * The "change attribute" is updated every time we finish an operation
++ * that will result in a metadata change on the server.
++ */
++static inline long nfs_save_change_attribute(struct inode *inode)
++{
++ return NFS_I(inode)->cache_change_attribute;
++}
++
++/**
++ * nfs_verify_change_attribute - Detects NFS inode cache updates
++ * @inode - pointer to inode
++ * @chattr - previously saved change attribute
++ * Return "false" if metadata has been updated (or is in the process of
++ * being updated) since the change attribute was saved.
++ */
++static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr)
++{
++ return !nfs_caches_unstable(inode)
++ && chattr == NFS_I(inode)->cache_change_attribute;
++}
++
+ /*
+ * linux/fs/nfs/inode.c
+ */
+ extern void nfs_zap_caches(struct inode *);
+ extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *,
+ struct nfs_fattr *);
+-extern int __nfs_refresh_inode(struct inode *, struct nfs_fattr *);
++extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
+ extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
+ extern int nfs_permission(struct inode *, int, struct nameidata *);
+ extern void nfs_set_mmcred(struct inode *, struct rpc_cred *);
+@@ -241,6 +280,10 @@ extern int nfs_open(struct inode *, stru
+ extern int nfs_release(struct inode *, struct file *);
+ extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
+ extern int nfs_setattr(struct dentry *, struct iattr *);
++extern void nfs_begin_attr_update(struct inode *);
++extern void nfs_end_attr_update(struct inode *);
++extern void nfs_begin_data_update(struct inode *);
++extern void nfs_end_data_update(struct inode *);
+
+ /*
+ * linux/fs/nfs/file.c
+@@ -309,11 +352,11 @@ extern void nfs_commit_done(struct rpc_t
+ * Try to write back everything synchronously (but check the
+ * return value!)
+ */
+-extern int nfs_sync_file(struct inode *, struct file *, unsigned long, unsigned int, int);
+-extern int nfs_flush_file(struct inode *, struct file *, unsigned long, unsigned int, int);
++extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int);
++extern int nfs_flush_inode(struct inode *, unsigned long, unsigned int, int);
+ extern int nfs_flush_list(struct list_head *, int, int);
+ #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
+-extern int nfs_commit_file(struct inode *, struct file *, unsigned long, unsigned int, int);
++extern int nfs_commit_inode(struct inode *, unsigned long, unsigned int, int);
+ extern int nfs_commit_list(struct list_head *, int);
+ #else
+ static inline int
+@@ -333,7 +376,7 @@ nfs_have_writebacks(struct inode *inode)
+ static inline int
+ nfs_wb_all(struct inode *inode)
+ {
+- int error = nfs_sync_file(inode, 0, 0, 0, FLUSH_WAIT);
++ int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT);
+ return (error < 0) ? error : 0;
+ }
+
+@@ -343,21 +386,11 @@ nfs_wb_all(struct inode *inode)
+ static inline int
+ nfs_wb_page(struct inode *inode, struct page* page)
+ {
+- int error = nfs_sync_file(inode, 0, page->index, 1,
++ int error = nfs_sync_inode(inode, page->index, 1,
+ FLUSH_WAIT | FLUSH_STABLE);
+ return (error < 0) ? error : 0;
+ }
+
+-/*
+- * Write back all pending writes for one user..
+- */
+-static inline int
+-nfs_wb_file(struct inode *inode, struct file *file)
+-{
+- int error = nfs_sync_file(inode, file, 0, 0, FLUSH_WAIT);
+- return (error < 0) ? error : 0;
+-}
+-
+ /* Hack for future NFS swap support */
+ #ifndef IS_SWAPFILE
+ # define IS_SWAPFILE(inode) (0)
+@@ -383,20 +416,27 @@ extern int nfsroot_mount(struct sockadd
+ /*
+ * inline functions
+ */
+-static inline int
+-nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
++
++static inline int nfs_attribute_timeout(struct inode *inode)
+ {
+- if (time_before(jiffies, NFS_READTIME(inode)+NFS_ATTRTIMEO(inode)))
+- return NFS_STALE(inode) ? -ESTALE : 0;
+- return __nfs_revalidate_inode(server, inode);
++ struct nfs_inode *nfsi = NFS_I(inode);
++
++ return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo);
+ }
+
+-static inline int
+-nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr)
++/**
++ * nfs_revalidate_inode - Revalidate the inode attributes
++ * @server - pointer to nfs_server struct
++ * @inode - pointer to inode struct
++ *
++ * Updates inode attribute information by retrieving the data from the server.
++ */
++static inline int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode)
+ {
+- if ((fattr->valid & NFS_ATTR_FATTR) == 0)
+- return 0;
+- return __nfs_refresh_inode(inode,fattr);
++ if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA))
++ && !nfs_attribute_timeout(inode))
++ return NFS_STALE(inode) ? -ESTALE : 0;
++ return __nfs_revalidate_inode(server, inode);
+ }
+
+ static inline loff_t
+@@ -590,6 +630,15 @@ struct nfs4_state {
+
+ extern struct dentry_operations nfs4_dentry_operations;
+ extern struct inode_operations nfs4_dir_inode_operations;
++extern struct inode_operations nfs4_file_inode_operations;
++
++#define NFS_ACL_MAX_ENTRIES 32
++
++/* inode.c */
++extern ssize_t nfs_getxattr(struct dentry *, const char *, void *, size_t);
++extern int nfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
++
++#define NFS4_ACL_NOT_CACHED ((void *)-1)
+
+ /* nfs4proc.c */
+ extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short);
+@@ -602,6 +651,9 @@ int nfs4_do_downgrade(struct inode *inod
+ extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *);
+ extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *);
+ extern int nfs4_open_revalidate(struct inode *, struct dentry *, int);
++struct posix_acl * nfs4_proc_get_posix_acl(struct inode *, int);
++extern int nfs4_proc_set_posix_acl(struct inode *, int, struct posix_acl *);
++void nfs4_izap_acl(struct inode *inode, struct posix_acl **i_acl);
+
+ /* nfs4renewd.c */
+ extern void nfs4_schedule_state_renewal(struct nfs4_client *);
+diff -puN include/linux/nfs_page.h~CITI_NFS4_ALL include/linux/nfs_page.h
+--- linux-2.6.3/include/linux/nfs_page.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfs_page.h 2004-02-19 16:47:03.000000000 -0500
+@@ -53,7 +53,7 @@ extern void nfs_release_request(struct n
+ extern void nfs_list_add_request(struct nfs_page *, struct list_head *);
+
+ extern int nfs_scan_list(struct list_head *, struct list_head *,
+- struct file *, unsigned long, unsigned int);
++ unsigned long, unsigned int);
+ extern int nfs_coalesce_requests(struct list_head *, struct list_head *,
+ unsigned int);
+ extern int nfs_wait_on_request(struct nfs_page *);
+diff -puN include/linux/nfs_xdr.h~CITI_NFS4_ALL include/linux/nfs_xdr.h
+--- linux-2.6.3/include/linux/nfs_xdr.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfs_xdr.h 2004-02-19 16:47:15.000000000 -0500
+@@ -39,6 +39,9 @@ struct nfs_fattr {
+ __u64 change_attr; /* NFSv4 change attribute */
+ __u64 pre_change_attr;/* pre-op NFSv4 change attribute */
+ unsigned long timestamp;
++#ifdef CONFIG_NFS_V4
++ struct nfs4_acl *acl; /* NFSv4 ACL */
++#endif /* CONFIG_NFS_V4 */
+ };
+
+ #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */
+@@ -103,8 +106,6 @@ struct nfs_openargs {
+ nfs4_verifier verifier; /* EXCLUSIVE */
+ } u;
+ struct qstr * name;
+- struct nfs4_getattr * f_getattr;
+- struct nfs4_getattr * d_getattr;
+ struct nfs_server * server; /* Needed for ID mapping */
+ };
+
+@@ -113,8 +114,8 @@ struct nfs_openres {
+ struct nfs_fh fh;
+ struct nfs4_change_info * cinfo;
+ __u32 rflags;
+- struct nfs4_getattr * f_getattr;
+- struct nfs4_getattr * d_getattr;
++ struct nfs_fattr * f_attr;
++ struct nfs_fattr * d_attr;
+ struct nfs_server * server;
+ };
+
+@@ -141,7 +142,6 @@ struct nfs_open_reclaimargs {
+ __u32 id;
+ __u32 share_access;
+ __u32 claim;
+- struct nfs4_getattr * f_getattr;
+ };
+
+ /*
+@@ -319,12 +319,22 @@ struct nfs_setattrargs {
+ struct nfs_fh * fh;
+ nfs4_stateid stateid;
+ struct iattr * iap;
+- struct nfs4_getattr * attr;
++ struct nfs_fattr * fattr;
+ struct nfs_server * server; /* Needed for name mapping */
++#ifdef CONFIG_NFS_V4
++ struct nfs4_acl * acl;
++#endif /* CONFIG_NFS_V4 */
+ };
+
++#ifdef CONFIG_NFS_V4
++struct nfs_setaclargs {
++ struct nfs_fh * fh;
++ struct nfs4_acl * acl;
++};
++#endif /* CONFIG_NFS_V4 */
++
+ struct nfs_setattrres {
+- struct nfs4_getattr * attr;
++ struct nfs_fattr * fattr;
+ struct nfs_server * server;
+ };
+
+@@ -482,118 +492,127 @@ struct nfs4_change_info {
+ u64 after;
+ };
+
+-struct nfs4_access {
+- u32 ac_req_access; /* request */
+- u32 * ac_resp_supported; /* response */
+- u32 * ac_resp_access; /* response */
++struct nfs4_accessargs {
++ struct nfs_fh * fhandle;
++ u32 req_access;
+ };
+
+-struct nfs4_close {
+- char * cl_stateid; /* request */
+- u32 cl_seqid; /* request */
++struct nfs4_accessres {
++ struct nfs_server * server;
++ struct nfs_fattr * fattr;
++ u32 req_access;
++ u32 * resp_supported;
++ u32 * resp_access;
+ };
+
+-struct nfs4_create {
+- u32 cr_ftype; /* request */
+- union { /* request */
+- struct {
+- u32 textlen;
+- const char * text;
+- } symlink; /* NF4LNK */
++struct nfs4_create_arg {
++ u32 ftype;
++ union {
++ struct qstr * symlink; /* NF4LNK */
+ struct {
+ u32 specdata1;
+ u32 specdata2;
+ } device; /* NF4BLK, NF4CHR */
+ } u;
+- u32 cr_namelen; /* request */
+- const char * cr_name; /* request */
+- struct iattr * cr_attrs; /* request */
+- struct nfs4_change_info * cr_cinfo; /* response */
++ struct qstr * name;
++ struct nfs_server * server;
++ struct iattr * attrs;
++ struct nfs_fh * dir_fh;
+ };
+-#define cr_textlen u.symlink.textlen
+-#define cr_text u.symlink.text
+-#define cr_specdata1 u.device.specdata1
+-#define cr_specdata2 u.device.specdata2
+
+-struct nfs4_getattr {
+- u32 * gt_bmval; /* request */
+- struct nfs_fattr * gt_attrs; /* response */
+- struct nfs_fsstat * gt_fsstat; /* response */
+- struct nfs_pathconf * gt_pathconf; /* response */
++struct nfs4_create_res {
++ struct nfs_server * server;
++ struct nfs_fh * fhandle;
++ struct nfs_fattr * fattr;
++ struct nfs_fattr * dir_attr;
++ struct nfs4_change_info * dir_cinfo;
+ };
+
+-struct nfs4_getfh {
+- struct nfs_fh * gf_fhandle; /* response */
++struct nfs4_getattr_res {
++ struct nfs_server * server;
++ struct nfs_fattr * fattr;
+ };
+
+-struct nfs4_link {
+- u32 ln_namelen; /* request */
+- const char * ln_name; /* request */
+- struct nfs4_change_info * ln_cinfo; /* response */
++struct nfs4_getroot_res {
++ struct nfs_server * server;
++ struct nfs_fattr * fattr;
++ struct nfs_fh * fhandle;
+ };
+
+-struct nfs4_lookup {
+- struct qstr * lo_name; /* request */
++struct nfs4_getroot_arg {
++ struct nfs_fh * fhandle;
++ struct qstr * name;
+ };
+
+-struct nfs4_open {
+- struct nfs4_client * op_client_state; /* request */
+- u32 op_share_access; /* request */
+- u32 op_opentype; /* request */
+- u32 op_createmode; /* request */
+- union { /* request */
+- struct iattr * attrs; /* UNCHECKED, GUARDED */
+- nfs4_verifier verifier; /* EXCLUSIVE */
+- } u;
+- struct qstr * op_name; /* request */
+- char * op_stateid; /* response */
+- struct nfs4_change_info * op_cinfo; /* response */
+- u32 * op_rflags; /* response */
+-};
+-#define op_attrs u.attrs
+-#define op_verifier u.verifier
+-
+-struct nfs4_open_confirm {
+- char * oc_stateid; /* request */
+-};
+-
+-struct nfs4_putfh {
+- struct nfs_fh * pf_fhandle; /* request */
+-};
+-
+-struct nfs4_readdir {
+- u64 rd_cookie; /* request */
+- nfs4_verifier rd_req_verifier; /* request */
+- u32 rd_count; /* request */
+- u32 rd_bmval[2]; /* request */
+- nfs4_verifier rd_resp_verifier; /* response */
+- struct page ** rd_pages; /* zero-copy data */
+- unsigned int rd_pgbase; /* zero-copy data */
++struct nfs4_link_arg {
++ struct nfs_fh * fh;
++ struct nfs_fh * dir_fh;
++ struct qstr * name;
++};
++
++struct nfs4_link_res {
++ struct nfs_server * server;
++ struct nfs_fattr * fattr;
++ struct nfs_fattr * dir_attr;
++ struct nfs4_change_info * dir_cinfo;
++};
++
++struct nfs4_lookupargs {
++ struct nfs_fh * dir_fh;
++ struct qstr * name;
++};
++
++struct nfs4_lookupres {
++ struct nfs_server * server;
++ struct nfs_fattr * dirattr;
++ struct nfs_fattr * fattr;
++ struct nfs_fh * fhandle;
++};
++
++struct nfs4_readdir_arg {
++ struct nfs_fh * fh;
++ u64 cookie; /* request */
++ nfs4_verifier req_verifier; /* request */
++ u32 count; /* request */
++ struct page ** pages; /* zero-copy data */
++ unsigned int pgbase; /* zero-copy data */
++};
++
++struct nfs4_readdir_res {
++ nfs4_verifier resp_verifier;
++ unsigned int pgbase;
+ };
+
+ struct nfs4_readlink {
+- u32 rl_count; /* zero-copy data */
+- struct page ** rl_pages; /* zero-copy data */
++ struct nfs_fh * fh;
++ u32 count; /* zero-copy data */
++ struct page ** pages; /* zero-copy data */
+ };
+
+-struct nfs4_remove {
+- u32 rm_namelen; /* request */
+- const char * rm_name; /* request */
+- struct nfs4_change_info * rm_cinfo; /* response */
++struct nfs4_remove_arg {
++ struct nfs_fh * fhandle;
++ struct qstr * name;
+ };
+
+-struct nfs4_rename {
+- u32 rn_oldnamelen; /* request */
+- const char * rn_oldname; /* request */
+- u32 rn_newnamelen; /* request */
+- const char * rn_newname; /* request */
+- struct nfs4_change_info * rn_src_cinfo; /* response */
+- struct nfs4_change_info * rn_dst_cinfo; /* response */
++struct nfs4_remove_res {
++ struct nfs_server * server;
++ struct nfs4_change_info * dir_cinfo;
++ struct nfs_fattr * dir_attr;
+ };
+
+-struct nfs4_setattr {
+- char * st_stateid; /* request */
+- struct iattr * st_iap; /* request */
++struct nfs4_rename_arg {
++ struct nfs_fh * old_dir;
++ struct nfs_fh * new_dir;
++ struct qstr * old_name;
++ struct qstr * new_name;
++};
++
++struct nfs4_rename_res {
++ struct nfs_server * server;
++ struct nfs4_change_info * old_cinfo;
++ struct nfs4_change_info * new_cinfo;
++ struct nfs_fattr * old_fattr;
++ struct nfs_fattr * new_fattr;
+ };
+
+ struct nfs4_setclientid {
+@@ -606,52 +625,12 @@ struct nfs4_setclientid {
+ struct nfs4_client * sc_state; /* response */
+ };
+
+-struct nfs4_op {
+- u32 opnum;
+- union {
+- struct nfs4_access access;
+- struct nfs4_close close;
+- struct nfs4_create create;
+- struct nfs4_getattr getattr;
+- struct nfs4_getfh getfh;
+- struct nfs4_link link;
+- struct nfs4_lookup lookup;
+- struct nfs4_open open;
+- struct nfs4_open_confirm open_confirm;
+- struct nfs4_putfh putfh;
+- struct nfs4_readdir readdir;
+- struct nfs4_readlink readlink;
+- struct nfs4_remove remove;
+- struct nfs4_rename rename;
+- struct nfs4_client * renew;
+- struct nfs4_setattr setattr;
+- } u;
+-};
+-
+-struct nfs4_compound {
+- unsigned int flags; /* defined below */
+- struct nfs_server * server;
+-
+- /* RENEW information */
+- int renew_index;
+- unsigned long timestamp;
+-
+- /* scratch variables for XDR encode/decode */
+- int nops;
+- u32 * p;
+- u32 * end;
+-
+- /* the individual COMPOUND operations */
+- struct nfs4_op *ops;
+-
+- /* request */
+- int req_nops;
+- u32 taglen;
+- char * tag;
+-
+- /* response */
+- int resp_nops;
+- int toplevel_status;
++struct nfs4_unlink {
++ struct nfs_server * server;
++ struct nfs_fh * fh;
++ struct qstr * name;
++ struct nfs4_change_info cinfo; /* NOT a pointer */
++ struct nfs_fattr attrs; /* NOT a pointer */
+ };
+
+ #endif /* CONFIG_NFS_V4 */
+@@ -698,9 +677,10 @@ struct nfs_rpc_ops {
+ int version; /* Protocol version */
+ struct dentry_operations *dentry_ops;
+ struct inode_operations *dir_inode_ops;
++ struct inode_operations *file_inode_ops;
+
+ int (*getroot) (struct nfs_server *, struct nfs_fh *,
+- struct nfs_fattr *);
++ struct nfs_fsinfo *);
+ int (*getattr) (struct inode *, struct nfs_fattr *);
+ int (*setattr) (struct dentry *, struct nfs_fattr *,
+ struct iattr *);
+diff -puN include/linux/sunrpc/xprt.h~CITI_NFS4_ALL include/linux/sunrpc/xprt.h
+--- linux-2.6.3/include/linux/sunrpc/xprt.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/sunrpc/xprt.h 2004-02-19 16:47:05.000000000 -0500
+@@ -95,14 +95,15 @@ struct rpc_rqst {
+ struct rpc_rqst * rq_next; /* free list */
+ int rq_cong; /* has incremented xprt->cong */
+ int rq_received; /* receive completed */
+- u32 rq_seqno; /* gss seq no. used on req. */
++#define GSS_SEQNO_CACHE 4
++ u32 rq_seqnos[GSS_SEQNO_CACHE];
++ /* gss seq no.s used on req. */
+
+ struct list_head rq_list;
+
+ struct xdr_buf rq_private_buf; /* The receive buffer
+ * used in the softirq.
+ */
+-
+ /*
+ * For authentication (e.g. auth_des)
+ */
+@@ -155,6 +156,11 @@ struct rpc_xprt {
+ stream : 1; /* TCP */
+
+ /*
++ * XID
++ */
++ __u32 xid; /* Next XID value to use */
++
++ /*
+ * State of TCP reply receive stuff
+ */
+ u32 tcp_recm, /* Fragment header */
+@@ -164,6 +170,11 @@ struct rpc_xprt {
+ unsigned long tcp_copied, /* copied to request */
+ tcp_flags;
+ /*
++ * Connection of sockets
++ */
++ struct work_struct sock_connect;
++ unsigned short port;
++ /*
+ * Disconnection of idle sockets
+ */
+ struct work_struct task_cleanup;
+diff -puN net/sunrpc/xprt.c~CITI_NFS4_ALL net/sunrpc/xprt.c
+--- linux-2.6.3/net/sunrpc/xprt.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/xprt.c 2004-02-19 16:47:05.000000000 -0500
+@@ -60,6 +60,7 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/file.h>
+ #include <linux/workqueue.h>
++#include <linux/random.h>
+
+ #include <net/sock.h>
+ #include <net/checksum.h>
+@@ -77,6 +78,7 @@
+
+ #define XPRT_MAX_BACKOFF (8)
+ #define XPRT_IDLE_TIMEOUT (5*60*HZ)
++#define XPRT_MAX_RESVPORT (800)
+
+ /*
+ * Local functions
+@@ -87,7 +89,7 @@ static void xprt_disconnect(struct rpc_x
+ static void xprt_connect_status(struct rpc_task *task);
+ static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap,
+ struct rpc_timeout *to);
+-static struct socket *xprt_create_socket(int, struct rpc_timeout *, int);
++static struct socket *xprt_create_socket(struct rpc_xprt *, int, int);
+ static void xprt_bind_socket(struct rpc_xprt *, struct socket *);
+ static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
+
+@@ -455,6 +457,68 @@ out_abort:
+ spin_unlock(&xprt->sock_lock);
+ }
+
++static void
++xprt_socket_connect(void *args)
++{
++ struct rpc_xprt *xprt = (struct rpc_xprt *)args;
++ struct socket *sock = xprt->sock;
++ int status = -EIO;
++
++ if (xprt->shutdown) {
++ rpc_wake_up_status(&xprt->pending, -EIO);
++ return;
++ }
++ if (!xprt->addr.sin_port)
++ goto out_err;
++
++ /*
++ * Start by resetting any existing state
++ */
++ xprt_close(xprt);
++ sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport);
++ if (sock == NULL) {
++ /* couldn't create socket or bind to reserved port;
++ * this is likely a permanent error, so cause an abort */
++ goto out_err;
++ return;
++ }
++ xprt_bind_socket(xprt, sock);
++ xprt_sock_setbufsize(xprt);
++
++ if (!xprt->stream)
++ goto out;
++
++ /*
++ * Tell the socket layer to start connecting...
++ */
++ status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
++ sizeof(xprt->addr), O_NONBLOCK);
++ dprintk("RPC: %p connect status %d connected %d sock state %d\n",
++ xprt, -status, xprt_connected(xprt), sock->sk->sk_state);
++ if (status >= 0)
++ goto out;
++ switch (status) {
++ case -EINPROGRESS:
++ case -EALREADY:
++ break;
++ default:
++ goto out_err;
++ }
++out:
++ spin_lock_bh(&xprt->sock_lock);
++ if (xprt->snd_task)
++ rpc_wake_up_task(xprt->snd_task);
++ spin_unlock_bh(&xprt->sock_lock);
++ return;
++out_err:
++ spin_lock_bh(&xprt->sock_lock);
++ if (xprt->snd_task) {
++ xprt->snd_task->tk_status = status;
++ rpc_wake_up_task(xprt->snd_task);
++ }
++ spin_unlock_bh(&xprt->sock_lock);
++}
++
+ /*
+ * Attempt to connect a TCP socket.
+ *
+@@ -463,9 +527,6 @@ void
+ xprt_connect(struct rpc_task *task)
+ {
+ struct rpc_xprt *xprt = task->tk_xprt;
+- struct socket *sock = xprt->sock;
+- struct sock *inet;
+- int status;
+
+ dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid,
+ xprt, (xprt_connected(xprt) ? "is" : "is not"));
+@@ -486,79 +547,9 @@ xprt_connect(struct rpc_task *task)
+ if (task->tk_rqstp)
+ task->tk_rqstp->rq_bytes_sent = 0;
+
+- /*
+- * We're here because the xprt was marked disconnected.
+- * Start by resetting any existing state.
+- */
+- xprt_close(xprt);
+- if (!(sock = xprt_create_socket(xprt->prot, &xprt->timeout, xprt->resvport))) {
+- /* couldn't create socket or bind to reserved port;
+- * this is likely a permanent error, so cause an abort */
+- task->tk_status = -EIO;
+- goto out_write;
+- }
+- xprt_bind_socket(xprt, sock);
+- xprt_sock_setbufsize(xprt);
+-
+- if (!xprt->stream)
+- goto out_write;
+-
+- inet = sock->sk;
+-
+- /*
+- * Tell the socket layer to start connecting...
+- */
+- status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr,
+- sizeof(xprt->addr), O_NONBLOCK);
+- dprintk("RPC: %4d connect status %d connected %d sock state %d\n",
+- task->tk_pid, -status, xprt_connected(xprt), inet->sk_state);
+-
+- if (status >= 0)
+- return;
+-
+- switch (status) {
+- case -EINPROGRESS:
+- case -EALREADY:
+- /* Protect against TCP socket state changes */
+- lock_sock(inet);
+- if (inet->sk_state != TCP_ESTABLISHED) {
+- dprintk("RPC: %4d waiting for connection\n",
+- task->tk_pid);
+- task->tk_timeout = RPC_CONNECT_TIMEOUT;
+- /* if the socket is already closing, delay briefly */
+- if ((1 << inet->sk_state) &
+- ~(TCPF_SYN_SENT | TCPF_SYN_RECV))
+- task->tk_timeout = RPC_REESTABLISH_TIMEOUT;
+- rpc_sleep_on(&xprt->pending, task, xprt_connect_status,
+- NULL);
+- }
+- release_sock(inet);
+- break;
+- case -ECONNREFUSED:
+- case -ECONNRESET:
+- case -ENOTCONN:
+- if (!RPC_IS_SOFT(task)) {
+- rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
+- task->tk_status = -ENOTCONN;
+- break;
+- }
+- default:
+- /* Report myriad other possible returns. If this file
+- * system is soft mounted, just error out, like Solaris. */
+- if (RPC_IS_SOFT(task)) {
+- printk(KERN_WARNING
+- "RPC: error %d connecting to server %s, exiting\n",
+- -status, task->tk_client->cl_server);
+- task->tk_status = -EIO;
+- goto out_write;
+- }
+- printk(KERN_WARNING "RPC: error %d connecting to server %s\n",
+- -status, task->tk_client->cl_server);
+- /* This will prevent anybody else from reconnecting */
+- rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
+- task->tk_status = status;
+- break;
+- }
++ task->tk_timeout = RPC_CONNECT_TIMEOUT;
++ rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL);
++ schedule_work(&xprt->sock_connect);
+ return;
+ out_write:
+ xprt_release_write(xprt, task);
+@@ -583,6 +574,8 @@ xprt_connect_status(struct rpc_task *tas
+ task->tk_status = -EIO;
+
+ switch (task->tk_status) {
++ case -ECONNREFUSED:
++ case -ECONNRESET:
+ case -ENOTCONN:
+ rpc_delay(task, RPC_REESTABLISH_TIMEOUT);
+ return;
+@@ -1333,22 +1326,14 @@ do_xprt_reserve(struct rpc_task *task)
+ /*
+ * Allocate a 'unique' XID
+ */
+-static u32
+-xprt_alloc_xid(void)
++static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt)
++{
++ return xprt->xid++;
++}
++
++static inline void xprt_init_xid(struct rpc_xprt *xprt)
+ {
+- static spinlock_t xid_lock = SPIN_LOCK_UNLOCKED;
+- static int need_init = 1;
+- static u32 xid;
+- u32 ret;
+-
+- spin_lock(&xid_lock);
+- if (unlikely(need_init)) {
+- xid = get_seconds() << 12;
+- need_init = 0;
+- }
+- ret = xid++;
+- spin_unlock(&xid_lock);
+- return ret;
++ get_random_bytes(&xprt->xid, sizeof(xprt->xid));
+ }
+
+ /*
+@@ -1362,7 +1347,8 @@ xprt_request_init(struct rpc_task *task,
+ req->rq_timeout = xprt->timeout;
+ req->rq_task = task;
+ req->rq_xprt = xprt;
+- req->rq_xid = xprt_alloc_xid();
++ req->rq_xid = xprt_alloc_xid(xprt);
++ memset(req->rq_seqnos, 0, sizeof(req->rq_seqnos));
+ INIT_LIST_HEAD(&req->rq_list);
+ dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid,
+ req, req->rq_xid);
+@@ -1457,11 +1443,13 @@ xprt_setup(int proto, struct sockaddr_in
+ init_waitqueue_head(&xprt->cong_wait);
+
+ INIT_LIST_HEAD(&xprt->recv);
++ INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt);
+ INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt);
+ init_timer(&xprt->timer);
+ xprt->timer.function = xprt_init_autodisconnect;
+ xprt->timer.data = (unsigned long) xprt;
+ xprt->last_used = jiffies;
++ xprt->port = XPRT_MAX_RESVPORT;
+
+ /* Set timeout parameters */
+ if (to) {
+@@ -1481,6 +1469,8 @@ xprt_setup(int proto, struct sockaddr_in
+ req->rq_next = NULL;
+ xprt->free = xprt->slot;
+
++ xprt_init_xid(xprt);
++
+ /* Check whether we want to use a reserved port */
+ xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0;
+
+@@ -1493,30 +1483,28 @@ xprt_setup(int proto, struct sockaddr_in
+ * Bind to a reserved port
+ */
+ static inline int
+-xprt_bindresvport(struct socket *sock)
++xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
+ {
+- struct sockaddr_in myaddr;
++ struct sockaddr_in myaddr = {
++ .sin_family = AF_INET,
++ };
+ int err, port;
+- kernel_cap_t saved_cap = current->cap_effective;
+
+- /* Override capabilities.
+- * They were checked in xprt_create_proto i.e. at mount time
+- */
+- cap_raise(current->cap_effective, CAP_NET_BIND_SERVICE);
+-
+- memset(&myaddr, 0, sizeof(myaddr));
+- myaddr.sin_family = AF_INET;
+- port = 800;
++ /* Were we already bound to a given port? Try to reuse it */
++ port = xprt->port;
+ do {
+ myaddr.sin_port = htons(port);
+ err = sock->ops->bind(sock, (struct sockaddr *) &myaddr,
+ sizeof(myaddr));
+- } while (err == -EADDRINUSE && --port > 0);
+- current->cap_effective = saved_cap;
+-
+- if (err < 0)
+- printk("RPC: Can't bind to reserved port (%d).\n", -err);
++ if (err == 0) {
++ xprt->port = port;
++ return 0;
++ }
++ if (--port == 0)
++ port = XPRT_MAX_RESVPORT;
++ } while (err == -EADDRINUSE && port != xprt->port);
+
++ printk("RPC: Can't bind to reserved port (%d).\n", -err);
+ return err;
+ }
+
+@@ -1580,7 +1568,7 @@ xprt_sock_setbufsize(struct rpc_xprt *xp
+ * and connect stream sockets.
+ */
+ static struct socket *
+-xprt_create_socket(int proto, struct rpc_timeout *to, int resvport)
++xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport)
+ {
+ struct socket *sock;
+ int type, err;
+@@ -1596,7 +1584,7 @@ xprt_create_socket(int proto, struct rpc
+ }
+
+ /* If the caller has the capability, bind to a reserved port */
+- if (resvport && xprt_bindresvport(sock) < 0) {
++ if (resvport && xprt_bindresvport(xprt, sock) < 0) {
+ printk("RPC: can't bind to reserved port.\n");
+ goto failed;
+ }
+diff -puN net/sunrpc/cache.c~CITI_NFS4_ALL net/sunrpc/cache.c
+--- linux-2.6.3/net/sunrpc/cache.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/cache.c 2004-02-19 16:47:03.000000000 -0500
+@@ -325,6 +325,7 @@ int cache_clean(void)
+
+ if (current_detail && current_index < current_detail->hash_size) {
+ struct cache_head *ch, **cp;
++ struct cache_detail *d;
+
+ write_lock(¤t_detail->hash_lock);
+
+@@ -354,12 +355,14 @@ int cache_clean(void)
+ rv = 1;
+ }
+ write_unlock(¤t_detail->hash_lock);
+- if (ch)
+- current_detail->cache_put(ch, current_detail);
+- else
++ d = current_detail;
++ if (!ch)
+ current_index ++;
+- }
+- spin_unlock(&cache_list_lock);
++ spin_unlock(&cache_list_lock);
++ if (ch)
++ d->cache_put(ch, d);
++ } else
++ spin_unlock(&cache_list_lock);
+
+ return rv;
+ }
+diff -puN include/linux/sunrpc/cache.h~CITI_NFS4_ALL include/linux/sunrpc/cache.h
+--- linux-2.6.3/include/linux/sunrpc/cache.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/sunrpc/cache.h 2004-02-19 16:47:03.000000000 -0500
+@@ -132,12 +132,14 @@ struct cache_deferred_req {
+ * If "set" == 0 :
+ * If an entry is found, it is returned
+ * If no entry is found, a new non-VALID entry is created.
+- * If "set" == 1 :
++ * If "set" == 1 and INPLACE == 0 :
+ * If no entry is found a new one is inserted with data from "template"
+ * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE
+ * If a CACHE_VALID entry is found, a new entry is swapped in with data
+ * from "template"
+- * If set == 2, we UPDATE, but don't swap. i.e. update in place
++ * If set == 1, and INPLACE == 1 :
++ * As above, except that if a CACHE_VALID entry is found, we UPDATE in place
++ * instead of swapping in a new entry.
+ *
+ * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not
+ * run but insteead CACHE_NEGATIVE is set in any new item.
+@@ -164,8 +166,8 @@ RTN *FNAME ARGS \
+ RTN *tmp, *new=NULL; \
+ struct cache_head **hp, **head; \
+ SETUP; \
+- retry: \
+ head = &(DETAIL)->hash_table[HASHFN]; \
++ retry: \
+ if (set||new) write_lock(&(DETAIL)->hash_lock); \
+ else read_lock(&(DETAIL)->hash_lock); \
+ for(hp=head; *hp != NULL; hp = &tmp->MEMBER.next) { \
+@@ -175,6 +177,8 @@ RTN *FNAME ARGS \
+ if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \
+ break; \
+ \
++ if (new) \
++ {INIT;} \
+ cache_get(&tmp->MEMBER); \
+ if (set) { \
+ if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\
+@@ -203,6 +207,7 @@ RTN *FNAME ARGS \
+ } \
+ /* Didn't find anything */ \
+ if (new) { \
++ INIT; \
+ new->MEMBER.next = *head; \
+ *head = &new->MEMBER; \
+ (DETAIL)->entries ++; \
+@@ -224,8 +229,6 @@ RTN *FNAME ARGS \
+ if (new) { \
+ cache_init(&new->MEMBER); \
+ cache_get(&new->MEMBER); \
+- INIT; \
+- tmp = new; \
+ goto retry; \
+ } \
+ return NULL; \
+diff -puN net/sunrpc/svcauth.c~CITI_NFS4_ALL net/sunrpc/svcauth.c
+--- linux-2.6.3/net/sunrpc/svcauth.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/svcauth.c 2004-02-19 16:47:04.000000000 -0500
+@@ -150,7 +150,10 @@ DefineCacheLookup(struct auth_domain,
+ &auth_domain_cache,
+ auth_domain_hash(item),
+ auth_domain_match(tmp, item),
+- kfree(new); if(!set) return NULL;
++ kfree(new); if(!set) {
++ write_unlock(&auth_domain_cache.hash_lock);
++ return NULL;
++ }
+ new=item; atomic_inc(&new->h.refcnt),
+ /* no update */,
+ 0 /* no inplace updates */
+diff -puN net/sunrpc/svcauth_unix.c~CITI_NFS4_ALL net/sunrpc/svcauth_unix.c
+--- linux-2.6.3/net/sunrpc/svcauth_unix.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/svcauth_unix.c 2004-02-19 16:47:03.000000000 -0500
+@@ -119,7 +119,8 @@ static inline int ip_map_match(struct ip
+ }
+ static inline void ip_map_init(struct ip_map *new, struct ip_map *item)
+ {
+- new->m_class = strdup(item->m_class);
++ new->m_class = item->m_class;
++ item->m_class = NULL;
+ new->m_addr.s_addr = item->m_addr.s_addr;
+ }
+ static inline void ip_map_update(struct ip_map *new, struct ip_map *item)
+@@ -191,7 +192,9 @@ static int ip_map_parse(struct cache_det
+ } else
+ dom = NULL;
+
+- ipm.m_class = class;
++ ipm.m_class = strdup(class);
++ if (ipm.m_class == NULL)
++ return -ENOMEM;
+ ipm.m_addr.s_addr =
+ htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4);
+ ipm.h.flags = 0;
+@@ -207,6 +210,7 @@ static int ip_map_parse(struct cache_det
+ ip_map_put(&ipmp->h, &ip_map_cache);
+ if (dom)
+ auth_domain_put(dom);
++ if (ipm.m_class) kfree(ipm.m_class);
+ if (!ipmp)
+ return -ENOMEM;
+ cache_flush();
+@@ -266,7 +270,9 @@ int auth_unix_add_addr(struct in_addr ad
+ if (dom->flavour != RPC_AUTH_UNIX)
+ return -EINVAL;
+ udom = container_of(dom, struct unix_domain, h);
+- ip.m_class = "nfsd";
++ ip.m_class = strdup("nfsd");
++ if (!ip.m_class)
++ return -ENOMEM;
+ ip.m_addr = addr;
+ ip.m_client = udom;
+ ip.m_add_change = udom->addr_changes+1;
+@@ -274,6 +280,7 @@ int auth_unix_add_addr(struct in_addr ad
+ ip.h.expiry_time = NEVER;
+
+ ipmp = ip_map_lookup(&ip, 1);
++ if (ip.m_class) kfree(ip.m_class);
+ if (ipmp) {
+ ip_map_put(&ipmp->h, &ip_map_cache);
+ return 0;
+diff -puN fs/nfsd/stats.c~CITI_NFS4_ALL fs/nfsd/stats.c
+--- linux-2.6.3/fs/nfsd/stats.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/stats.c 2004-02-19 16:47:04.000000000 -0500
+@@ -26,6 +26,7 @@
+ #include <linux/kernel.h>
+ #include <linux/time.h>
+ #include <linux/proc_fs.h>
++#include <linux/seq_file.h>
+ #include <linux/stat.h>
+ #include <linux/module.h>
+
+@@ -39,14 +40,11 @@ struct svc_stat nfsd_svcstats = {
+ .program = &nfsd_program,
+ };
+
+-static int
+-nfsd_proc_read(char *buffer, char **start, off_t offset, int count,
+- int *eof, void *data)
++static int nfsd_proc_show(struct seq_file *seq, void *v)
+ {
+- int len;
+- int i;
++ int i;
+
+- len = sprintf(buffer, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
++ seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n",
+ nfsdstats.rchits,
+ nfsdstats.rcmisses,
+ nfsdstats.rcnocache,
+@@ -58,57 +56,42 @@ nfsd_proc_read(char *buffer, char **star
+ nfsdstats.io_read,
+ nfsdstats.io_write);
+ /* thread usage: */
+- len += sprintf(buffer+len, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
++ seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt);
+ for (i=0; i<10; i++) {
+ unsigned int jifs = nfsdstats.th_usage[i];
+ unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ;
+- len += sprintf(buffer+len, " %u.%03u", sec, msec);
++ seq_printf(seq, " %u.%03u", sec, msec);
+ }
+
+ /* newline and ra-cache */
+- len += sprintf(buffer+len, "\nra %u", nfsdstats.ra_size);
++ seq_printf(seq, "\nra %u", nfsdstats.ra_size);
+ for (i=0; i<11; i++)
+- len += sprintf(buffer+len, " %u", nfsdstats.ra_depth[i]);
+- len += sprintf(buffer+len, "\n");
++ seq_printf(seq, " %u", nfsdstats.ra_depth[i]);
++ seq_putc(seq, '\n');
+
++ /* show my rpc info */
++ svc_seq_show(seq, &nfsd_svcstats);
+
+- /* Assume we haven't hit EOF yet. Will be set by svc_proc_read. */
+- *eof = 0;
+-
+- /*
+- * Append generic nfsd RPC statistics if there's room for it.
+- */
+- if (len <= offset) {
+- len = svc_proc_read(buffer, start, offset - len, count,
+- eof, data);
+- return len;
+- }
+-
+- if (len < count) {
+- len += svc_proc_read(buffer + len, start, 0, count - len,
+- eof, data);
+- }
+-
+- if (offset >= len) {
+- *start = buffer;
+- return 0;
+- }
++ return 0;
++}
+
+- *start = buffer + offset;
+- if ((len -= offset) > count)
+- return count;
+- return len;
++static int nfsd_proc_open(struct inode *inode, struct file *file)
++{
++ return single_open(file, nfsd_proc_show, NULL);
+ }
+
++static struct file_operations nfsd_proc_fops = {
++ .owner = THIS_MODULE,
++ .open = nfsd_proc_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
++
+ void
+ nfsd_stat_init(void)
+ {
+- struct proc_dir_entry *ent;
+-
+- if ((ent = svc_proc_register(&nfsd_svcstats)) != 0) {
+- ent->read_proc = nfsd_proc_read;
+- ent->owner = THIS_MODULE;
+- }
++ svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops);
+ }
+
+ void
+diff -puN include/linux/sunrpc/stats.h~CITI_NFS4_ALL include/linux/sunrpc/stats.h
+--- linux-2.6.3/include/linux/sunrpc/stats.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/sunrpc/stats.h 2004-02-19 16:47:04.000000000 -0500
+@@ -48,14 +48,13 @@ void rpc_modcount(struct inode *, int)
+ #ifdef CONFIG_PROC_FS
+ struct proc_dir_entry * rpc_proc_register(struct rpc_stat *);
+ void rpc_proc_unregister(const char *);
+-int rpc_proc_read(char *, char **, off_t, int,
+- int *, void *);
+ void rpc_proc_zero(struct rpc_program *);
+-struct proc_dir_entry * svc_proc_register(struct svc_stat *);
++struct proc_dir_entry * svc_proc_register(struct svc_stat *,
++ struct file_operations *);
+ void svc_proc_unregister(const char *);
+-int svc_proc_read(char *, char **, off_t, int,
+- int *, void *);
+-void svc_proc_zero(struct svc_program *);
++
++void svc_seq_show(struct seq_file *,
++ const struct svc_stat *);
+
+ extern struct proc_dir_entry *proc_net_rpc;
+
+@@ -63,13 +62,14 @@ extern struct proc_dir_entry *proc_net_r
+
+ static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; }
+ static inline void rpc_proc_unregister(const char *p) {}
+-static inline int rpc_proc_read(char *a, char **b, off_t c, int d, int *e, void *f) { return 0; }
+ static inline void rpc_proc_zero(struct rpc_program *p) {}
+
+-static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s) { return NULL; }
++static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s,
++ struct file_operations *f) { return NULL; }
+ static inline void svc_proc_unregister(const char *p) {}
+-static inline int svc_proc_read(char *a, char **b, off_t c, int d, int *e, void *f) { return 0; }
+-static inline void svc_proc_zero(struct svc_program *p) {}
++
++static inline void svc_seq_show(struct seq_file *seq,
++ const struct svc_stat *st) {}
+
+ #define proc_net_rpc NULL
+
+diff -puN net/sunrpc/stats.c~CITI_NFS4_ALL net/sunrpc/stats.c
+--- linux-2.6.3/net/sunrpc/stats.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/stats.c 2004-02-19 16:47:04.000000000 -0500
+@@ -18,6 +18,7 @@
+ #include <linux/kernel.h>
+ #include <linux/sched.h>
+ #include <linux/proc_fs.h>
++#include <linux/seq_file.h>
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/svcsock.h>
+
+@@ -28,70 +29,66 @@ struct proc_dir_entry *proc_net_rpc = NU
+ /*
+ * Get RPC client stats
+ */
+-int
+-rpc_proc_read(char *buffer, char **start, off_t offset, int count,
+- int *eof, void *data)
+-{
+- struct rpc_stat *statp = (struct rpc_stat *) data;
+- struct rpc_program *prog = statp->program;
+- struct rpc_version *vers;
+- int len, i, j;
++static int rpc_proc_show(struct seq_file *seq, void *v) {
++ const struct rpc_stat *statp = seq->private;
++ const struct rpc_program *prog = statp->program;
++ int i, j;
+
+- len = sprintf(buffer,
++ seq_printf(seq,
+ "net %d %d %d %d\n",
+ statp->netcnt,
+ statp->netudpcnt,
+ statp->nettcpcnt,
+ statp->nettcpconn);
+- len += sprintf(buffer + len,
++ seq_printf(seq,
+ "rpc %d %d %d\n",
+ statp->rpccnt,
+ statp->rpcretrans,
+ statp->rpcauthrefresh);
+
+ for (i = 0; i < prog->nrvers; i++) {
+- if (!(vers = prog->version[i]))
++ const struct rpc_version *vers = prog->version[i];
++ if (!vers)
+ continue;
+- len += sprintf(buffer + len, "proc%d %d",
++ seq_printf(seq, "proc%d %d",
+ vers->number, vers->nrprocs);
+ for (j = 0; j < vers->nrprocs; j++)
+- len += sprintf(buffer + len, " %d",
++ seq_printf(seq, " %d",
+ vers->procs[j].p_count);
+- buffer[len++] = '\n';
++ seq_putc(seq, '\n');
+ }
++ return 0;
++}
+
+- if (offset >= len) {
+- *start = buffer;
+- *eof = 1;
+- return 0;
+- }
+- *start = buffer + offset;
+- if ((len -= offset) > count)
+- return count;
+- *eof = 1;
+- return len;
++static int rpc_proc_open(struct inode *inode, struct file *file)
++{
++ return single_open(file, rpc_proc_show, PDE(inode)->data);
+ }
+
++static struct file_operations rpc_proc_fops = {
++ .owner = THIS_MODULE,
++ .open = rpc_proc_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = single_release,
++};
++
+ /*
+ * Get RPC server stats
+ */
+-int
+-svc_proc_read(char *buffer, char **start, off_t offset, int count,
+- int *eof, void *data)
+-{
+- struct svc_stat *statp = (struct svc_stat *) data;
+- struct svc_program *prog = statp->program;
+- struct svc_procedure *proc;
+- struct svc_version *vers;
+- int len, i, j;
++void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) {
++ const struct svc_program *prog = statp->program;
++ const struct svc_procedure *proc;
++ const struct svc_version *vers;
++ int i, j;
+
+- len = sprintf(buffer,
++ seq_printf(seq,
+ "net %d %d %d %d\n",
+ statp->netcnt,
+ statp->netudpcnt,
+ statp->nettcpcnt,
+ statp->nettcpconn);
+- len += sprintf(buffer + len,
++ seq_printf(seq,
+ "rpc %d %d %d %d %d\n",
+ statp->rpccnt,
+ statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt,
+@@ -102,41 +99,36 @@ svc_proc_read(char *buffer, char **start
+ for (i = 0; i < prog->pg_nvers; i++) {
+ if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc))
+ continue;
+- len += sprintf(buffer + len, "proc%d %d", i, vers->vs_nproc);
++ seq_printf(seq, "proc%d %d", i, vers->vs_nproc);
+ for (j = 0; j < vers->vs_nproc; j++, proc++)
+- len += sprintf(buffer + len, " %d", proc->pc_count);
+- buffer[len++] = '\n';
++ seq_printf(seq, " %d", proc->pc_count);
++ seq_putc(seq, '\n');
+ }
+-
+- if (offset >= len) {
+- *start = buffer;
+- *eof = 1;
+- return 0;
+- }
+- *start = buffer + offset;
+- if ((len -= offset) > count)
+- return count;
+- *eof = 1;
+- return len;
+ }
+
+ /*
+ * Register/unregister RPC proc files
+ */
+ static inline struct proc_dir_entry *
+-do_register(const char *name, void *data, int issvc)
++do_register(const char *name, void *data, struct file_operations *fops)
+ {
++ struct proc_dir_entry *ent;
++
+ rpc_proc_init();
+ dprintk("RPC: registering /proc/net/rpc/%s\n", name);
+- return create_proc_read_entry(name, 0, proc_net_rpc,
+- issvc? svc_proc_read : rpc_proc_read,
+- data);
++
++ ent = create_proc_entry(name, 0, proc_net_rpc);
++ if (ent) {
++ ent->proc_fops = fops;
++ ent->data = data;
++ }
++ return ent;
+ }
+
+ struct proc_dir_entry *
+ rpc_proc_register(struct rpc_stat *statp)
+ {
+- return do_register(statp->program->name, statp, 0);
++ return do_register(statp->program->name, statp, &rpc_proc_fops);
+ }
+
+ void
+@@ -146,9 +138,9 @@ rpc_proc_unregister(const char *name)
+ }
+
+ struct proc_dir_entry *
+-svc_proc_register(struct svc_stat *statp)
++svc_proc_register(struct svc_stat *statp, struct file_operations *fops)
+ {
+- return do_register(statp->program->pg_name, statp, 1);
++ return do_register(statp->program->pg_name, statp, fops);
+ }
+
+ void
+@@ -163,7 +155,7 @@ rpc_proc_init(void)
+ dprintk("RPC: registering /proc/net/rpc\n");
+ if (!proc_net_rpc) {
+ struct proc_dir_entry *ent;
+- ent = proc_mkdir("net/rpc", 0);
++ ent = proc_mkdir("rpc", proc_net);
+ if (ent) {
+ ent->owner = THIS_MODULE;
+ proc_net_rpc = ent;
+diff -puN net/sunrpc/sunrpc_syms.c~CITI_NFS4_ALL net/sunrpc/sunrpc_syms.c
+--- linux-2.6.3/net/sunrpc/sunrpc_syms.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/sunrpc_syms.c 2004-02-19 16:47:04.000000000 -0500
+@@ -85,15 +85,16 @@ EXPORT_SYMBOL(svc_recv);
+ EXPORT_SYMBOL(svc_wake_up);
+ EXPORT_SYMBOL(svc_makesock);
+ EXPORT_SYMBOL(svc_reserve);
++EXPORT_SYMBOL(svc_auth_register);
++EXPORT_SYMBOL(auth_domain_lookup);
+
+ /* RPC statistics */
+ #ifdef CONFIG_PROC_FS
+ EXPORT_SYMBOL(rpc_proc_register);
+ EXPORT_SYMBOL(rpc_proc_unregister);
+-EXPORT_SYMBOL(rpc_proc_read);
+ EXPORT_SYMBOL(svc_proc_register);
+ EXPORT_SYMBOL(svc_proc_unregister);
+-EXPORT_SYMBOL(svc_proc_read);
++EXPORT_SYMBOL(svc_seq_show);
+ #endif
+
+ /* caching... */
+diff -puN net/sunrpc/auth_gss/gss_krb5_seal.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_seal.c
+--- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_seal.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_seal.c 2004-02-19 16:47:07.000000000 -0500
+@@ -101,12 +101,12 @@ krb5_make_token(struct krb5_ctx *ctx, in
+ checksum_type = CKSUMTYPE_RSA_MD5;
+ break;
+ default:
+- dprintk("RPC: gss_krb5_seal: ctx->signalg %d not"
++ dprintk("RPC: gss_krb5_seal: ctx->signalg %d not"
+ " supported\n", ctx->signalg);
+ goto out_err;
+ }
+ if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) {
+- dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n",
++ dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n",
+ ctx->sealalg);
+ goto out_err;
+ }
+@@ -151,7 +151,7 @@ krb5_make_token(struct krb5_ctx *ctx, in
+ md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
+ KRB5_CKSUM_LENGTH);
+
+- dprintk("make_seal_token: cksum data: \n");
++ dprintk("RPC: make_seal_token: cksum data: \n");
+ print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
+ break;
+ default:
+@@ -169,8 +169,5 @@ krb5_make_token(struct krb5_ctx *ctx, in
+ return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
+ out_err:
+ if (md5cksum.data) kfree(md5cksum.data);
+- if (token->data) kfree(token->data);
+- token->data = 0;
+- token->len = 0;
+ return GSS_S_FAILURE;
+ }
+diff -puN include/linux/sunrpc/auth_gss.h~CITI_NFS4_ALL include/linux/sunrpc/auth_gss.h
+--- linux-2.6.3/include/linux/sunrpc/auth_gss.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/sunrpc/auth_gss.h 2004-02-19 16:47:04.000000000 -0500
+@@ -62,8 +62,6 @@ struct rpc_gss_init_res {
+ struct xdr_netobj gr_token; /* token */
+ };
+
+-#define GSS_SEQ_WIN 5
+-
+ /* The gss_cl_ctx struct holds all the information the rpcsec_gss client
+ * code needs to know about a single security context. In particular,
+ * gc_gss_ctx is the context handle that is used to do gss-api calls, while
+diff -puN include/linux/sunrpc/gss_api.h~CITI_NFS4_ALL include/linux/sunrpc/gss_api.h
+--- linux-2.6.3/include/linux/sunrpc/gss_api.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/sunrpc/gss_api.h 2004-02-19 16:47:04.000000000 -0500
+@@ -120,6 +120,9 @@ int gss_mech_unregister_all(void);
+ * reference count. */
+ struct gss_api_mech * gss_mech_get_by_OID(struct xdr_netobj *);
+
++/* Similar, but get by name like "krb5", "spkm", etc., instead of OID. */
++struct gss_api_mech *gss_mech_get_by_name(char *);
++
+ /* Just increments the mechanism's reference count and returns its input: */
+ struct gss_api_mech * gss_mech_get(struct gss_api_mech *);
+
+diff -puN /dev/null include/linux/sunrpc/svcauth_gss.h
+--- /dev/null 2004-01-26 19:20:21.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/sunrpc/svcauth_gss.h 2004-02-19 16:47:04.000000000 -0500
+@@ -0,0 +1,35 @@
++/*
++ * linux/include/linux/svcauth_gss.h
++ *
++ * Bruce Fields <bfields@umich.edu>
++ * Copyright (c) 2002 The Regents of the Unviersity of Michigan
++ *
++ * $Id: linux-2.6.3-CITI_NFS4_ALL.patch,v 1.2 2004/03/17 01:04:13 nic Exp $
++ *
++ */
++
++#ifndef _LINUX_SUNRPC_SVCAUTH_GSS_H
++#define _LINUX_SUNRPC_SVCAUTH_GSS_H
++
++#ifdef __KERNEL__
++#include <linux/sched.h>
++#include <linux/sunrpc/types.h>
++#include <linux/sunrpc/xdr.h>
++#include <linux/sunrpc/svcauth.h>
++#include <linux/sunrpc/svcsock.h>
++#include <linux/sunrpc/auth_gss.h>
++
++int gss_svc_init(void);
++int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name);
++
++
++struct gss_svc_data {
++ /* decoded gss client cred: */
++ struct rpc_gss_wire_cred clcred;
++ /* pointer to the beginning of the procedure-specific results, which
++ * may be encrypted/checksummed in svcauth_gss_release: */
++ u32 *body_start;
++};
++
++#endif /* __KERNEL__ */
++#endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */
+diff -puN include/linux/sunrpc/svcauth.h~CITI_NFS4_ALL include/linux/sunrpc/svcauth.h
+--- linux-2.6.3/include/linux/sunrpc/svcauth.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/sunrpc/svcauth.h 2004-02-19 16:47:04.000000000 -0500
+@@ -65,6 +65,10 @@ struct auth_domain {
+ * GARBAGE - rpc garbage_args error
+ * SYSERR - rpc system_err error
+ * DENIED - authp holds reason for denial.
++ * COMPLETE - the reply is encoded already and ready to be sent; no
++ * further processing is necessary. (This is used for processing
++ * null procedure calls which are used to set up encryption
++ * contexts.)
+ *
+ * accept is passed the proc number so that it can accept NULL rpc requests
+ * even if it cannot authenticate the client (as is sometimes appropriate).
+@@ -97,6 +101,7 @@ extern struct auth_ops *authtab[RPC_AUTH
+ #define SVC_DROP 6
+ #define SVC_DENIED 7
+ #define SVC_PENDING 8
++#define SVC_COMPLETE 9
+
+
+ extern int svc_authenticate(struct svc_rqst *rqstp, u32 *authp);
+diff -puN include/linux/sunrpc/svc.h~CITI_NFS4_ALL include/linux/sunrpc/svc.h
+--- linux-2.6.3/include/linux/sunrpc/svc.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/sunrpc/svc.h 2004-02-19 16:47:04.000000000 -0500
+@@ -135,6 +135,7 @@ struct svc_rqst {
+
+ void * rq_argp; /* decoded arguments */
+ void * rq_resp; /* xdr'd results */
++ void * rq_auth_data; /* flavor-specific data */
+
+ int rq_reserved; /* space on socket outq
+ * reserved for this request
+diff -puN net/sunrpc/auth_gss/auth_gss.c~CITI_NFS4_ALL net/sunrpc/auth_gss/auth_gss.c
+--- linux-2.6.3/net/sunrpc/auth_gss/auth_gss.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/auth_gss.c 2004-02-19 16:47:07.000000000 -0500
+@@ -48,6 +48,7 @@
+ #include <linux/sunrpc/clnt.h>
+ #include <linux/sunrpc/auth.h>
+ #include <linux/sunrpc/auth_gss.h>
++#include <linux/sunrpc/svcauth_gss.h>
+ #include <linux/sunrpc/gss_err.h>
+ #include <linux/workqueue.h>
+ #include <linux/sunrpc/rpc_pipe_fs.h>
+@@ -279,7 +280,7 @@ err_free_ctx:
+ kfree(ctx);
+ err:
+ *gc = NULL;
+- dprintk("RPC: gss_parse_init_downcall returning %d\n", err);
++ dprintk("RPC: gss_parse_init_downcall returning %d\n", err);
+ return err;
+ }
+
+@@ -310,8 +311,10 @@ __gss_find_upcall(struct gss_auth *gss_a
+ if (pos->uid != uid)
+ continue;
+ atomic_inc(&pos->count);
++ dprintk("RPC: gss_find_upcall found msg %p\n", pos);
+ return pos;
+ }
++ dprintk("RPC: gss_find_upcall found nothing\n");
+ return NULL;
+ }
+
+@@ -349,6 +352,8 @@ gss_upcall(struct rpc_clnt *clnt, struct
+ uid_t uid = cred->cr_uid;
+ int res = 0;
+
++ dprintk("RPC: %4u gss_upcall for uid %u\n", task->tk_pid, uid);
++
+ retry:
+ spin_lock(&gss_auth->lock);
+ gss_msg = __gss_find_upcall(gss_auth, uid);
+@@ -357,8 +362,10 @@ retry:
+ if (gss_new == NULL) {
+ spin_unlock(&gss_auth->lock);
+ gss_new = kmalloc(sizeof(*gss_new), GFP_KERNEL);
+- if (!gss_new)
++ if (!gss_new) {
++ dprintk("RPC: %4u gss_upcall -ENOMEM\n", task->tk_pid);
+ return -ENOMEM;
++ }
+ goto retry;
+ }
+ gss_msg = gss_new;
+@@ -388,10 +395,12 @@ retry:
+ spin_unlock(&gss_auth->lock);
+ }
+ gss_release_msg(gss_msg);
++ dprintk("RPC: %4u gss_upcall for uid %u result %d", task->tk_pid,
++ uid, res);
+ return res;
+ out_sleep:
+- /* Sleep forever */
+- task->tk_timeout = 0;
++ dprintk("RPC: %4u gss_upcall sleeping\n", task->tk_pid);
++ task->tk_timeout = 0; /* Sleep forever */
+ rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL);
+ spin_unlock(&gss_auth->lock);
+ if (gss_new)
+@@ -476,12 +485,13 @@ gss_pipe_downcall(struct file *filp, con
+ } else
+ spin_unlock(&gss_auth->lock);
+ rpc_release_client(clnt);
++ dprintk("RPC: gss_pipe_downcall returning length %u\n", mlen);
+ return mlen;
+ err:
+ if (ctx)
+ gss_destroy_ctx(ctx);
+ rpc_release_client(clnt);
+- dprintk("RPC: gss_pipe_downcall returning %d\n", err);
++ dprintk("RPC: gss_pipe_downcall returning %d\n", err);
+ return err;
+ }
+
+@@ -519,6 +529,8 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg
+ static unsigned long ratelimit;
+
+ if (msg->errno < 0) {
++ dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n",
++ gss_msg);
+ atomic_inc(&gss_msg->count);
+ gss_unhash_msg(gss_msg);
+ if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) {
+@@ -543,7 +555,8 @@ gss_create(struct rpc_clnt *clnt, rpc_au
+ struct gss_auth *gss_auth;
+ struct rpc_auth * auth;
+
+- dprintk("RPC: creating GSS authenticator for client %p\n",clnt);
++ dprintk("RPC: creating GSS authenticator for client %p\n",clnt);
++
+ if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL)))
+ goto out_dec;
+ gss_auth->mech = gss_pseudoflavor_to_mech(flavor);
+@@ -581,7 +594,8 @@ static void
+ gss_destroy(struct rpc_auth *auth)
+ {
+ struct gss_auth *gss_auth;
+- dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
++
++ dprintk("RPC: destroying GSS authenticator %p flavor %d\n",
+ auth, auth->au_flavor);
+
+ gss_auth = container_of(auth, struct gss_auth, rpc_auth);
+@@ -596,8 +610,7 @@ gss_destroy(struct rpc_auth *auth)
+ static void
+ gss_destroy_ctx(struct gss_cl_ctx *ctx)
+ {
+-
+- dprintk("RPC: gss_destroy_ctx\n");
++ dprintk("RPC: gss_destroy_ctx\n");
+
+ if (ctx->gc_gss_ctx)
+ gss_delete_sec_context(&ctx->gc_gss_ctx);
+@@ -616,7 +629,7 @@ gss_destroy_cred(struct rpc_cred *rc)
+ {
+ struct gss_cred *cred = (struct gss_cred *)rc;
+
+- dprintk("RPC: gss_destroy_cred \n");
++ dprintk("RPC: gss_destroy_cred \n");
+
+ if (cred->gc_ctx)
+ gss_put_ctx(cred->gc_ctx);
+@@ -628,7 +641,7 @@ gss_create_cred(struct rpc_auth *auth, s
+ {
+ struct gss_cred *cred = NULL;
+
+- dprintk("RPC: gss_create_cred for uid %d, flavor %d\n",
++ dprintk("RPC: gss_create_cred for uid %d, flavor %d\n",
+ acred->uid, auth->au_flavor);
+
+ if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL)))
+@@ -648,7 +661,7 @@ gss_create_cred(struct rpc_auth *auth, s
+ return (struct rpc_cred *) cred;
+
+ out_err:
+- dprintk("RPC: gss_create_cred failed\n");
++ dprintk("RPC: gss_create_cred failed\n");
+ if (cred) gss_destroy_cred((struct rpc_cred *)cred);
+ return NULL;
+ }
+@@ -659,6 +672,15 @@ gss_match(struct auth_cred *acred, struc
+ return (rc->cr_uid == acred->uid);
+ }
+
++static void
++shift_seqnos(u32 *seqnos)
++{
++ int i;
++
++ for (i=1; i < GSS_SEQNO_CACHE; i++)
++ seqnos[i] = seqnos[i-1];
++}
++
+ /*
+ * Marshal credentials.
+ * Maybe we should keep a cached credential for performance reasons.
+@@ -678,24 +700,25 @@ gss_marshal(struct rpc_task *task, u32 *
+ struct xdr_buf verf_buf;
+ u32 service;
+
+- dprintk("RPC: gss_marshal\n");
++ dprintk("RPC: %4u gss_marshal\n", task->tk_pid);
+
+ *p++ = htonl(RPC_AUTH_GSS);
+ cred_len = p++;
+
+ service = gss_pseudoflavor_to_service(gss_cred->gc_flavor);
+ if (service == 0) {
+- dprintk("Bad pseudoflavor %d in gss_marshal\n",
+- gss_cred->gc_flavor);
++ dprintk("RPC: %4u Bad pseudoflavor %d in gss_marshal\n",
++ task->tk_pid, gss_cred->gc_flavor);
+ goto out_put_ctx;
+ }
++ shift_seqnos(req->rq_seqnos);
+ spin_lock(&ctx->gc_seq_lock);
+- req->rq_seqno = ctx->gc_seq++;
++ req->rq_seqnos[0] = ctx->gc_seq++;
+ spin_unlock(&ctx->gc_seq_lock);
+
+ *p++ = htonl((u32) RPC_GSS_VERSION);
+ *p++ = htonl((u32) ctx->gc_proc);
+- *p++ = htonl((u32) req->rq_seqno);
++ *p++ = htonl((u32) req->rq_seqnos[0]);
+ *p++ = htonl((u32) service);
+ p = xdr_encode_netobj(p, &ctx->gc_wire_ctx);
+ *cred_len = htonl((p - (cred_len + 1)) << 2);
+@@ -745,6 +768,32 @@ gss_refresh(struct rpc_task *task)
+ return 0;
+ }
+
++static int
++verify_checksum(struct gss_ctx *ctx, struct xdr_netobj *mic, u32 *seqnos)
++{
++ u32 seq, qop_state;
++ struct xdr_buf verf_buf;
++ struct iovec iov;
++ int i;
++
++ for (i=0; i < GSS_SEQNO_CACHE; i++) {
++ if (i && !seqnos[i])
++ goto fail;
++ seq = htonl(seqnos[i]);
++ iov.iov_base = &seq;
++ iov.iov_len = sizeof(seq);
++ xdr_buf_from_iov(&iov, &verf_buf);
++ if (!gss_verify_mic(ctx, &verf_buf, mic, &qop_state))
++ goto success;
++ }
++fail:
++ return -1;
++success:
++ /* So unwrap knows which seqno we used: */
++ seqnos[0] = seqnos[i];
++ return 0;
++}
++
+ static u32 *
+ gss_validate(struct rpc_task *task, u32 *p)
+ {
+@@ -752,28 +801,21 @@ gss_validate(struct rpc_task *task, u32
+ struct gss_cred *gss_cred = container_of(cred, struct gss_cred,
+ gc_base);
+ struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
+- u32 seq, qop_state;
+- struct iovec iov;
+- struct xdr_buf verf_buf;
+ struct xdr_netobj mic;
+ u32 flav,len;
+ u32 service;
+
+- dprintk("RPC: gss_validate\n");
++ dprintk("RPC: %4u gss_validate\n", task->tk_pid);
+
+ flav = ntohl(*p++);
+ if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
+ goto out_bad;
+ if (flav != RPC_AUTH_GSS)
+ goto out_bad;
+- seq = htonl(task->tk_rqstp->rq_seqno);
+- iov.iov_base = &seq;
+- iov.iov_len = sizeof(seq);
+- xdr_buf_from_iov(&iov, &verf_buf);
++
+ mic.data = (u8 *)p;
+ mic.len = len;
+-
+- if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state))
++ if (verify_checksum(ctx->gc_gss_ctx, &mic, task->tk_rqstp->rq_seqnos))
+ goto out_bad;
+ service = gss_pseudoflavor_to_service(gss_cred->gc_flavor);
+ switch (service) {
+@@ -789,9 +831,12 @@ gss_validate(struct rpc_task *task, u32
+ goto out_bad;
+ }
+ gss_put_ctx(ctx);
++ dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n",
++ task->tk_pid);
+ return p + XDR_QUADLEN(len);
+ out_bad:
+ gss_put_ctx(ctx);
++ dprintk("RPC: %4u gss_validate failed.\n", task->tk_pid);
+ return NULL;
+ }
+
+@@ -814,7 +859,7 @@ gss_wrap_req(struct rpc_task *task,
+ u32 offset, *q;
+ struct iovec *iov;
+
+- dprintk("RPC: gss_wrap_body\n");
++ dprintk("RPC: %4u gss_wrap_req\n", task->tk_pid);
+ BUG_ON(!ctx);
+ if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
+ /* The spec seems a little ambiguous here, but I think that not
+@@ -832,7 +877,7 @@ gss_wrap_req(struct rpc_task *task,
+
+ integ_len = p++;
+ offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base;
+- *p++ = htonl(req->rq_seqno);
++ *p++ = htonl(req->rq_seqnos[0]);
+
+ status = encode(rqstp, p, obj);
+ if (status)
+@@ -871,7 +916,7 @@ gss_wrap_req(struct rpc_task *task,
+ status = 0;
+ out:
+ gss_put_ctx(ctx);
+- dprintk("RPC: gss_wrap_req returning %d\n", status);
++ dprintk("RPC: %4u gss_wrap_req returning %d\n", task->tk_pid, status);
+ return status;
+ }
+
+@@ -909,7 +954,7 @@ gss_unwrap_resp(struct rpc_task *task,
+ mic_offset = integ_len + data_offset;
+ if (mic_offset > rcv_buf->len)
+ goto out;
+- if (ntohl(*p++) != req->rq_seqno)
++ if (ntohl(*p++) != req->rq_seqnos[0])
+ goto out;
+
+ if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset,
+@@ -932,7 +977,8 @@ out_decode:
+ status = decode(rqstp, p, obj);
+ out:
+ gss_put_ctx(ctx);
+- dprintk("RPC: gss_unwrap_resp returning %d\n", status);
++ dprintk("RPC: %4u gss_unwrap_resp returning %d\n", task->tk_pid,
++ status);
+ return status;
+ }
+
+@@ -972,6 +1018,15 @@ static int __init init_rpcsec_gss(void)
+ int err = 0;
+
+ err = rpcauth_register(&authgss_ops);
++ if (err)
++ goto out;
++ err = gss_svc_init();
++ if (err)
++ goto out_unregister;
++ return 0;
++out_unregister:
++ rpcauth_unregister(&authgss_ops);
++out:
+ return err;
+ }
+
+diff -puN net/sunrpc/auth_gss/gss_krb5_mech.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_mech.c
+--- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_mech.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_mech.c 2004-02-19 16:47:15.000000000 -0500
+@@ -39,6 +39,8 @@
+ #include <linux/types.h>
+ #include <linux/slab.h>
+ #include <linux/sunrpc/auth.h>
++#include <linux/in.h>
++#include <linux/sunrpc/svcauth_gss.h>
+ #include <linux/sunrpc/gss_krb5.h>
+ #include <linux/sunrpc/xdr.h>
+ #include <linux/crypto.h>
+@@ -98,7 +100,7 @@ get_key(char **p, char *end, struct cryp
+ alg_mode = CRYPTO_TFM_MODE_CBC;
+ break;
+ default:
+- dprintk("RPC: get_key: unsupported algorithm %d\n", alg);
++ dprintk("RPC: get_key: unsupported algorithm %d\n", alg);
+ goto out_err_free_key;
+ }
+ if (!(*res = crypto_alloc_tfm(alg_name, alg_mode)))
+@@ -153,7 +155,7 @@ gss_import_sec_context_kerberos(struct x
+ goto out_err_free_key2;
+
+ ctx_id->internal_ctx_id = ctx;
+- dprintk("Succesfully imported new context.\n");
++ dprintk("RPC: Succesfully imported new context.\n");
+ return 0;
+
+ out_err_free_key2:
+@@ -195,7 +197,7 @@ gss_verify_mic_kerberos(struct gss_ctx
+ if (!maj_stat && qop_state)
+ *qstate = qop_state;
+
+- dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat);
++ dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat);
+ return maj_stat;
+ }
+
+@@ -209,7 +211,7 @@ gss_get_mic_kerberos(struct gss_ctx *ctx
+
+ err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG);
+
+- dprintk("RPC: gss_get_mic_kerberos returning %d\n",err);
++ dprintk("RPC: gss_get_mic_kerberos returning %d\n",err);
+
+ return err;
+ }
+@@ -232,6 +234,10 @@ static int __init init_kerberos_module(v
+ gm = gss_mech_get_by_OID(&gss_mech_krb5_oid);
+ gss_register_triple(RPC_AUTH_GSS_KRB5 , gm, 0, RPC_GSS_SVC_NONE);
+ gss_register_triple(RPC_AUTH_GSS_KRB5I, gm, 0, RPC_GSS_SVC_INTEGRITY);
++ if (svcauth_gss_register_pseudoflavor(RPC_AUTH_GSS_KRB5, "krb5"))
++ printk("Failed to register %s with server!\n", "krb5");
++ if (svcauth_gss_register_pseudoflavor(RPC_AUTH_GSS_KRB5I, "krb5i"))
++ printk("Failed to register %s with server!\n", "krb5i");
+ gss_mech_put(gm);
+ return 0;
+ }
+diff -puN net/sunrpc/auth_gss/gss_mech_switch.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_mech_switch.c
+--- linux-2.6.3/net/sunrpc/auth_gss/gss_mech_switch.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_mech_switch.c 2004-02-19 16:47:07.000000000 -0500
+@@ -43,7 +43,6 @@
+ #include <linux/sunrpc/sched.h>
+ #include <linux/sunrpc/gss_api.h>
+ #include <linux/sunrpc/clnt.h>
+-#include <linux/sunrpc/name_lookup.h>
+
+ #ifdef RPC_DEBUG
+ # define RPCDBG_FACILITY RPCDBG_AUTH
+@@ -82,7 +81,7 @@ gss_mech_register(struct xdr_netobj * me
+ spin_lock(®istered_mechs_lock);
+ list_add(&gm->gm_list, ®istered_mechs);
+ spin_unlock(®istered_mechs_lock);
+- dprintk("RPC: gss_mech_register: registered mechanism with oid:\n");
++ dprintk("RPC: gss_mech_register: registered mechanism with oid:\n");
+ print_hexl((u32 *)mech_type->data, mech_type->len, 0);
+ return 0;
+ }
+@@ -94,11 +93,10 @@ do_gss_mech_unregister(struct gss_api_me
+
+ list_del(&gm->gm_list);
+
+- dprintk("RPC: unregistered mechanism with oid:\n");
++ dprintk("RPC: unregistered mechanism with oid:\n");
+ print_hexl((u32 *)gm->gm_oid.data, gm->gm_oid.len, 0);
+ if (!gss_mech_put(gm)) {
+- dprintk("RPC: We just unregistered a gss_mechanism which"
+- " someone is still using.\n");
++ dprintk("RPC: We just unregistered a gss_mechanism which someone is still using.\n");
+ return -1;
+ } else {
+ return 0;
+@@ -146,7 +144,7 @@ gss_mech_get_by_OID(struct xdr_netobj *m
+ {
+ struct gss_api_mech *pos, *gm = NULL;
+
+- dprintk("RPC: gss_mech_get_by_OID searching for mechanism with OID:\n");
++ dprintk("RPC: gss_mech_get_by_OID searching for mechanism with OID:\n");
+ print_hexl((u32 *)mech_type->data, mech_type->len, 0);
+ spin_lock(®istered_mechs_lock);
+ list_for_each_entry(pos, ®istered_mechs, gm_list) {
+@@ -158,10 +156,27 @@ gss_mech_get_by_OID(struct xdr_netobj *m
+ }
+ }
+ spin_unlock(®istered_mechs_lock);
+- dprintk("RPC: gss_mech_get_by_OID %s it\n", gm ? "found" : "didn't find");
++ dprintk("RPC: gss_mech_get_by_OID %s it\n", gm ? "found" : "didn't find");
+ return gm;
+ }
+
++struct gss_api_mech *
++gss_mech_get_by_name(char *name)
++{
++ struct gss_api_mech *pos, *gm = NULL;
++
++ spin_lock(®istered_mechs_lock);
++ list_for_each_entry(pos, ®istered_mechs, gm_list) {
++ if (0 == strcmp(name, pos->gm_ops->name)) {
++ gm = gss_mech_get(pos);
++ break;
++ }
++ }
++ spin_unlock(®istered_mechs_lock);
++ return gm;
++
++}
++
+ int
+ gss_mech_put(struct gss_api_mech * gm)
+ {
+@@ -228,7 +243,8 @@ gss_verify_mic(struct gss_ctx *context_
+ u32
+ gss_delete_sec_context(struct gss_ctx **context_handle)
+ {
+- dprintk("gss_delete_sec_context deleting %p\n",*context_handle);
++ dprintk("RPC: gss_delete_sec_context deleting %p\n",
++ *context_handle);
+
+ if (!*context_handle)
+ return(GSS_S_NO_CONTEXT);
+diff -puN net/sunrpc/auth_gss/Makefile~CITI_NFS4_ALL net/sunrpc/auth_gss/Makefile
+--- linux-2.6.3/net/sunrpc/auth_gss/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/Makefile 2004-02-19 16:47:04.000000000 -0500
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o
+
+ auth_rpcgss-objs := auth_gss.o gss_pseudoflavors.o gss_generic_token.o \
+- sunrpcgss_syms.o gss_mech_switch.o
++ sunrpcgss_syms.o gss_mech_switch.o svcauth_gss.o
+
+ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
+
+diff -puN net/sunrpc/auth_gss/sunrpcgss_syms.c~CITI_NFS4_ALL net/sunrpc/auth_gss/sunrpcgss_syms.c
+--- linux-2.6.3/net/sunrpc/auth_gss/sunrpcgss_syms.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/sunrpcgss_syms.c 2004-02-19 16:47:04.000000000 -0500
+@@ -8,6 +8,7 @@
+ #include <linux/unistd.h>
+
+ #include <linux/sunrpc/auth_gss.h>
++#include <linux/sunrpc/svcauth_gss.h>
+ #include <linux/sunrpc/gss_asn1.h>
+
+ /* sec_triples: */
+@@ -17,6 +18,7 @@ EXPORT_SYMBOL(gss_cmp_triples);
+ EXPORT_SYMBOL(gss_pseudoflavor_to_mechOID);
+ EXPORT_SYMBOL(gss_pseudoflavor_supported);
+ EXPORT_SYMBOL(gss_pseudoflavor_to_service);
++EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor);
+
+ /* registering gss mechanisms to the mech switching code: */
+ EXPORT_SYMBOL(gss_mech_register);
+diff -puN /dev/null net/sunrpc/auth_gss/svcauth_gss.c
+--- /dev/null 2004-01-26 19:20:21.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/svcauth_gss.c 2004-02-19 16:47:15.000000000 -0500
+@@ -0,0 +1,1018 @@
++/*
++ * Neil Brown <neilb@cse.unsw.edu.au>
++ * J. Bruce Fields <bfields@umich.edu>
++ * Andy Adamson <andros@umich.edu>
++ * Dug Song <dugsong@monkey.org>
++ *
++ * RPCSEC_GSS server authentication.
++ * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078
++ * (gssapi)
++ *
++ * The RPCSEC_GSS involves three stages:
++ * 1/ context creation
++ * 2/ data exchange
++ * 3/ context destruction
++ *
++ * Context creation is handled largely by upcalls to user-space.
++ * In particular, GSS_Accept_sec_context is handled by an upcall
++ * Data exchange is handled entirely within the kernel
++ * In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel.
++ * Context destruction is handled in-kernel
++ * GSS_Delete_sec_context is in-kernel
++ *
++ * Context creation is initiated by a RPCSEC_GSS_INIT request arriving.
++ * The context handle and gss_token are used as a key into the rpcsec_init cache.
++ * The content of this cache includes some of the outputs of GSS_Accept_sec_context,
++ * being major_status, minor_status, context_handle, reply_token.
++ * These are sent back to the client.
++ * Sequence window management is handled by the kernel. The window size if currently
++ * a compile time constant.
++ *
++ * When user-space is happy that a context is established, it places an entry
++ * in the rpcsec_context cache. The key for this cache is the context_handle.
++ * The content includes:
++ * uid/gidlist - for determining access rights
++ * mechanism type
++ * mechanism specific information, such as a key
++ *
++ */
++
++#include <linux/types.h>
++#include <linux/module.h>
++#include <linux/pagemap.h>
++
++#include <linux/sunrpc/auth_gss.h>
++#include <linux/sunrpc/svcauth.h>
++#include <linux/sunrpc/gss_err.h>
++#include <linux/sunrpc/svcauth.h>
++#include <linux/sunrpc/svcauth_gss.h>
++#include <linux/sunrpc/cache.h>
++
++#ifdef RPC_DEBUG
++# define RPCDBG_FACILITY RPCDBG_AUTH
++#endif
++
++/* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests
++ * into replies.
++ *
++ * Key is context handle (\x if empty) and gss_token.
++ * Content is major_status minor_status (integers) context_handle, reply_token.
++ *
++ */
++
++static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b)
++{
++ return a->len == b->len && 0 == memcmp(a->data, b->data, a->len);
++}
++
++#define RSI_HASHBITS 6
++#define RSI_HASHMAX (1<<RSI_HASHBITS)
++#define RSI_HASHMASK (RSI_HASHMAX-1)
++
++struct rsi {
++ struct cache_head h;
++ struct xdr_netobj in_handle, in_token;
++ struct xdr_netobj out_handle, out_token;
++ int major_status, minor_status;
++};
++
++static struct cache_head *rsi_table[RSI_HASHMAX];
++static struct cache_detail rsi_cache;
++static struct rsi *rsi_lookup(struct rsi *item, int set);
++
++static void rsi_free(struct rsi *rsii)
++{
++ kfree(rsii->in_handle.data);
++ kfree(rsii->in_token.data);
++ kfree(rsii->out_handle.data);
++ kfree(rsii->out_token.data);
++}
++
++static void rsi_put(struct cache_head *item, struct cache_detail *cd)
++{
++ struct rsi *rsii = container_of(item, struct rsi, h);
++ if (cache_put(item, cd)) {
++ rsi_free(rsii);
++ kfree(rsii);
++ }
++}
++
++static inline int rsi_hash(struct rsi *item)
++{
++ return hash_mem(item->in_handle.data, item->in_handle.len, RSI_HASHBITS)
++ ^ hash_mem(item->in_token.data, item->in_token.len, RSI_HASHBITS);
++}
++
++static inline int rsi_match(struct rsi *item, struct rsi *tmp)
++{
++ return netobj_equal(&item->in_handle, &tmp->in_handle)
++ && netobj_equal(&item->in_token, &tmp->in_token);
++}
++
++static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len)
++{
++ dst->len = len;
++ dst->data = (len ? kmalloc(len, GFP_KERNEL) : NULL);
++ if (dst->data)
++ memcpy(dst->data, src, len);
++ if (len && !dst->data)
++ return -ENOMEM;
++ return 0;
++}
++
++static inline int dup_netobj(struct xdr_netobj *dst, struct xdr_netobj *src)
++{
++ return dup_to_netobj(dst, src->data, src->len);
++}
++
++static inline void rsi_init(struct rsi *new, struct rsi *item)
++{
++ new->out_handle.data = NULL;
++ new->out_handle.len = 0;
++ new->out_token.data = NULL;
++ new->out_token.len = 0;
++ new->in_handle.len = item->in_handle.len;
++ new->in_handle.data = item->in_handle.data;
++ item->in_handle.len = 0;
++ item->in_handle.data = NULL;
++ new->in_token.len = item->in_token.len;
++ new->in_token.data = item->in_token.data;
++ item->in_token.len = 0;
++ item->in_token.data = NULL;
++ return;
++}
++
++static inline void rsi_update(struct rsi *new, struct rsi *item)
++{
++ BUG_ON(new->out_handle.data || new->out_token.data);
++ new->out_handle.len = item->out_handle.len;
++ item->out_handle.len = 0;
++ new->out_token.len = item->out_token.len;
++ item->out_token.len = 0;
++ new->out_handle.data = item->out_handle.data;
++ item->out_handle.data = NULL;
++ new->out_token.data = item->out_token.data;
++ item->out_token.data = NULL;
++
++ new->major_status = item->major_status;
++ new->minor_status = item->minor_status;
++}
++
++static void rsi_request(struct cache_detail *cd,
++ struct cache_head *h,
++ char **bpp, int *blen)
++{
++ struct rsi *rsii = container_of(h, struct rsi, h);
++
++ qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len);
++ qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len);
++ (*bpp)[-1] = '\n';
++}
++
++
++static int rsi_parse(struct cache_detail *cd,
++ char *mesg, int mlen)
++{
++ /* context token expiry major minor context token */
++ char *buf = mesg;
++ char *ep;
++ int len;
++ struct rsi rsii, *rsip = NULL;
++ time_t expiry;
++ int status = -EINVAL;
++
++ memset(&rsii, 0, sizeof(rsii));
++ /* handle */
++ len = qword_get(&mesg, buf, mlen);
++ if (len < 0)
++ goto out;
++ status = -ENOMEM;
++ if (dup_to_netobj(&rsii.in_handle, buf, len))
++ goto out;
++
++ /* token */
++ len = qword_get(&mesg, buf, mlen);
++ status = -EINVAL;
++ if (len < 0)
++ goto out;;
++ status = -ENOMEM;
++ if (dup_to_netobj(&rsii.in_token, buf, len))
++ goto out;
++
++ rsii.h.flags = 0;
++ /* expiry */
++ expiry = get_expiry(&mesg);
++ status = -EINVAL;
++ if (expiry == 0)
++ goto out;
++
++ /* major/minor */
++ len = qword_get(&mesg, buf, mlen);
++ if (len < 0)
++ goto out;
++ if (len == 0) {
++ goto out;
++ } else {
++ rsii.major_status = simple_strtoul(buf, &ep, 10);
++ if (*ep)
++ goto out;
++ len = qword_get(&mesg, buf, mlen);
++ if (len <= 0)
++ goto out;
++ rsii.minor_status = simple_strtoul(buf, &ep, 10);
++ if (*ep)
++ goto out;
++
++ /* out_handle */
++ len = qword_get(&mesg, buf, mlen);
++ if (len < 0)
++ goto out;
++ status = -ENOMEM;
++ if (dup_to_netobj(&rsii.out_handle, buf, len))
++ goto out;
++
++ /* out_token */
++ len = qword_get(&mesg, buf, mlen);
++ status = -EINVAL;
++ if (len < 0)
++ goto out;
++ status = -ENOMEM;
++ if (dup_to_netobj(&rsii.out_token, buf, len))
++ goto out;
++ }
++ rsii.h.expiry_time = expiry;
++ rsip = rsi_lookup(&rsii, 1);
++ status = 0;
++out:
++ rsi_free(&rsii);
++ if (rsip)
++ rsi_put(&rsip->h, &rsi_cache);
++ return status;
++}
++
++static struct cache_detail rsi_cache = {
++ .hash_size = RSI_HASHMAX,
++ .hash_table = rsi_table,
++ .name = "auth.rpcsec.init",
++ .cache_put = rsi_put,
++ .cache_request = rsi_request,
++ .cache_parse = rsi_parse,
++};
++
++static DefineSimpleCacheLookup(rsi, 0)
++
++/*
++ * The rpcsec_context cache is used to store a context that is
++ * used in data exchange.
++ * The key is a context handle. The content is:
++ * uid, gidlist, mechanism, service-set, mech-specific-data
++ */
++
++#define RSC_HASHBITS 10
++#define RSC_HASHMAX (1<<RSC_HASHBITS)
++#define RSC_HASHMASK (RSC_HASHMAX-1)
++
++#define GSS_SEQ_WIN 128
++
++struct gss_svc_seq_data {
++ /* highest seq number seen so far: */
++ int sd_max;
++ /* for i such that sd_max-GSS_SEQ_WIN < i <= sd_max, the i-th bit of
++ * sd_win is nonzero iff sequence number i has been seen already: */
++ unsigned long sd_win[GSS_SEQ_WIN/BITS_PER_LONG];
++ spinlock_t sd_lock;
++};
++
++struct rsc {
++ struct cache_head h;
++ struct xdr_netobj handle;
++ struct svc_cred cred;
++ struct gss_svc_seq_data seqdata;
++ struct gss_ctx *mechctx;
++};
++
++static struct cache_head *rsc_table[RSC_HASHMAX];
++static struct cache_detail rsc_cache;
++static struct rsc *rsc_lookup(struct rsc *item, int set);
++
++static void rsc_free(struct rsc *rsci)
++{
++ kfree(rsci->handle.data);
++ if (rsci->mechctx)
++ gss_delete_sec_context(&rsci->mechctx);
++}
++
++static void rsc_put(struct cache_head *item, struct cache_detail *cd)
++{
++ struct rsc *rsci = container_of(item, struct rsc, h);
++
++ if (cache_put(item, cd)) {
++ rsc_free(rsci);
++ kfree(rsci);
++ }
++}
++
++static inline int
++rsc_hash(struct rsc *rsci)
++{
++ return hash_mem(rsci->handle.data, rsci->handle.len, RSC_HASHBITS);
++}
++
++static inline int
++rsc_match(struct rsc *new, struct rsc *tmp)
++{
++ return netobj_equal(&new->handle, &tmp->handle);
++}
++
++static inline void
++rsc_init(struct rsc *new, struct rsc *tmp)
++{
++ new->mechctx = NULL;
++ new->handle.len = tmp->handle.len;
++ new->handle.data = tmp->handle.data;
++ tmp->handle.len = 0;
++ tmp->handle.data = NULL;
++}
++
++static inline void
++rsc_update(struct rsc *new, struct rsc *tmp)
++{
++ new->mechctx = tmp->mechctx;
++ tmp->mechctx = NULL;
++ memset(&new->seqdata, 0, sizeof(new->seqdata));
++ spin_lock_init(&new->seqdata.sd_lock);
++ new->cred = tmp->cred;
++}
++
++static int rsc_parse(struct cache_detail *cd,
++ char *mesg, int mlen)
++{
++ /* contexthandle expiry [ uid gid N <n gids> mechname ...mechdata... ] */
++ char *buf = mesg;
++ int len, rv;
++ struct rsc rsci, *rscp = NULL;
++ time_t expiry;
++ int status = -EINVAL;
++
++ memset(&rsci, 0, sizeof(rsci));
++ /* context handle */
++ len = qword_get(&mesg, buf, mlen);
++ if (len < 0) goto out;
++ status = -ENOMEM;
++ if (dup_to_netobj(&rsci.handle, buf, len))
++ goto out;
++
++ rsci.h.flags = 0;
++ /* expiry */
++ expiry = get_expiry(&mesg);
++ status = -EINVAL;
++ if (expiry == 0)
++ goto out;
++
++ /* uid, or NEGATIVE */
++ rv = get_int(&mesg, &rsci.cred.cr_uid);
++ if (rv == -EINVAL)
++ goto out;
++ if (rv == -ENOENT)
++ set_bit(CACHE_NEGATIVE, &rsci.h.flags);
++ else {
++ int N, i;
++ struct gss_api_mech *gm;
++ struct xdr_netobj tmp_buf;
++
++ /* gid */
++ if (get_int(&mesg, &rsci.cred.cr_gid))
++ goto out;
++
++ /* number of additional gid's */
++ if (get_int(&mesg, &N))
++ goto out;
++ if (N > NGROUPS)
++ goto out;
++
++ /* gid's */
++ for (i=0; i<N; i++) {
++ if (get_int(&mesg, &rsci.cred.cr_groups[i]))
++ goto out;
++ }
++ if (N < NGROUPS)
++ rsci.cred.cr_groups[N] = NOGROUP;
++
++ /* mech name */
++ len = qword_get(&mesg, buf, mlen);
++ if (len < 0)
++ goto out;
++ gm = gss_mech_get_by_name(buf);
++ status = -EOPNOTSUPP;
++ if (!gm)
++ goto out;
++
++ status = -EINVAL;
++ /* mech-specific data: */
++ len = qword_get(&mesg, buf, mlen);
++ if (len < 0) {
++ gss_mech_put(gm);
++ goto out;
++ }
++ tmp_buf.len = len;
++ tmp_buf.data = buf;
++ if (gss_import_sec_context(&tmp_buf, gm, &rsci.mechctx)) {
++ gss_mech_put(gm);
++ goto out;
++ }
++ gss_mech_put(gm);
++ }
++ rsci.h.expiry_time = expiry;
++ rscp = rsc_lookup(&rsci, 1);
++ status = 0;
++out:
++ rsc_free(&rsci);
++ if (rscp)
++ rsc_put(&rscp->h, &rsc_cache);
++ return status;
++}
++
++static struct cache_detail rsc_cache = {
++ .hash_size = RSC_HASHMAX,
++ .hash_table = rsc_table,
++ .name = "auth.rpcsec.context",
++ .cache_put = rsc_put,
++ .cache_parse = rsc_parse,
++};
++
++static DefineSimpleCacheLookup(rsc, 0);
++
++struct rsc *
++gss_svc_searchbyctx(struct xdr_netobj *handle)
++{
++ struct rsc rsci;
++ struct rsc *found;
++
++ rsci.handle = *handle;
++ found = rsc_lookup(&rsci, 0);
++ if (!found)
++ return NULL;
++ if (cache_check(&rsc_cache, &found->h, NULL))
++ return NULL;
++ return found;
++}
++
++/* Implements sequence number algorithm as specified in RFC 2203. */
++static int
++gss_check_seq_num(struct rsc *rsci, int seq_num)
++{
++ struct gss_svc_seq_data *sd = &rsci->seqdata;
++
++ spin_lock(&sd->sd_lock);
++ if (seq_num > sd->sd_max) {
++ if (seq_num >= sd->sd_max + GSS_SEQ_WIN) {
++ memset(sd->sd_win,0,sizeof(sd->sd_win));
++ sd->sd_max = seq_num;
++ } else while (sd->sd_max < seq_num) {
++ sd->sd_max++;
++ __clear_bit(sd->sd_max % GSS_SEQ_WIN, sd->sd_win);
++ }
++ __set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win);
++ goto ok;
++ } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) {
++ goto drop;
++ }
++ /* sd_max - GSS_SEQ_WIN < seq_num <= sd_max */
++ if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win))
++ goto drop;
++ok:
++ spin_unlock(&sd->sd_lock);
++ return 1;
++drop:
++ spin_unlock(&sd->sd_lock);
++ return 0;
++}
++
++static inline u32 round_up_to_quad(u32 i)
++{
++ return (i + 3 ) & ~3;
++}
++
++static inline int
++svc_safe_getnetobj(struct iovec *argv, struct xdr_netobj *o)
++{
++ int l;
++
++ if (argv->iov_len < 4)
++ return -1;
++ o->len = ntohl(svc_getu32(argv));
++ l = round_up_to_quad(o->len);
++ if (argv->iov_len < l)
++ return -1;
++ o->data = argv->iov_base;
++ argv->iov_base += l;
++ argv->iov_len -= l;
++ return 0;
++}
++
++static inline int
++svc_safe_putnetobj(struct iovec *resv, struct xdr_netobj *o)
++{
++ u32 *p;
++
++ if (resv->iov_len + 4 > PAGE_SIZE)
++ return -1;
++ svc_putu32(resv, htonl(o->len));
++ p = resv->iov_base + resv->iov_len;
++ resv->iov_len += round_up_to_quad(o->len);
++ if (resv->iov_len > PAGE_SIZE)
++ return -1;
++ memcpy(p, o->data, o->len);
++ memset((u8 *)p + o->len, 0, round_up_to_quad(o->len) - o->len);
++ return 0;
++}
++
++/* Verify the checksum on the header and return SVC_OK on success.
++ * Otherwise, return SVC_DROP (in the case of a bad sequence number)
++ * or return SVC_DENIED and indicate error in authp.
++ */
++static int
++gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
++ u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp)
++{
++ struct gss_ctx *ctx_id = rsci->mechctx;
++ struct xdr_buf rpchdr;
++ struct xdr_netobj checksum;
++ u32 flavor = 0;
++ struct iovec *argv = &rqstp->rq_arg.head[0];
++ struct iovec iov;
++
++ /* data to compute the checksum over: */
++ iov.iov_base = rpcstart;
++ iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart;
++ xdr_buf_from_iov(&iov, &rpchdr);
++
++ *authp = rpc_autherr_badverf;
++ if (argv->iov_len < 4)
++ return SVC_DENIED;
++ flavor = ntohl(svc_getu32(argv));
++ if (flavor != RPC_AUTH_GSS)
++ return SVC_DENIED;
++ if (svc_safe_getnetobj(argv, &checksum))
++ return SVC_DENIED;
++
++ if (rqstp->rq_deferred) /* skip verification of revisited request */
++ return SVC_OK;
++ if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL)
++ != GSS_S_COMPLETE) {
++ *authp = rpcsec_gsserr_credproblem;
++ return SVC_DENIED;
++ }
++
++ if (gc->gc_seq > MAXSEQ) {
++ dprintk("RPC: svcauth_gss: discarding request with large sequence number %d\n",
++ gc->gc_seq);
++ *authp = rpcsec_gsserr_ctxproblem;
++ return SVC_DENIED;
++ }
++ if (!gss_check_seq_num(rsci, gc->gc_seq)) {
++ dprintk("RPC: svcauth_gss: discarding request with old sequence number %d\n",
++ gc->gc_seq);
++ return SVC_DROP;
++ }
++ return SVC_OK;
++}
++
++static int
++gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
++{
++ u32 xdr_seq;
++ u32 maj_stat;
++ struct xdr_buf verf_data;
++ struct xdr_netobj mic;
++ u32 *p;
++ struct iovec iov;
++
++ svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS));
++ xdr_seq = htonl(seq);
++
++ iov.iov_base = &xdr_seq;
++ iov.iov_len = sizeof(xdr_seq);
++ xdr_buf_from_iov(&iov, &verf_data);
++ p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
++ mic.data = (u8 *)(p + 1);
++ maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic);
++ if (maj_stat != GSS_S_COMPLETE)
++ return -1;
++ *p++ = htonl(mic.len);
++ memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len);
++ p += XDR_QUADLEN(mic.len);
++ if (!xdr_ressize_check(rqstp, p))
++ return -1;
++ return 0;
++}
++
++struct gss_domain {
++ struct auth_domain h;
++ u32 pseudoflavor;
++};
++
++/* XXX this should be done in gss_pseudoflavors, and shouldn't be hardcoded: */
++static struct auth_domain *
++find_gss_auth_domain(struct gss_ctx *ctx, u32 svc)
++{
++ switch(gss_get_pseudoflavor(ctx, 0, svc)) {
++ case RPC_AUTH_GSS_KRB5:
++ return auth_domain_find("gss/krb5");
++ case RPC_AUTH_GSS_KRB5I:
++ return auth_domain_find("gss/krb5i");
++ case RPC_AUTH_GSS_KRB5P:
++ return auth_domain_find("gss/krb5p");
++ }
++ return NULL;
++}
++
++int
++svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name)
++{
++ struct gss_domain *new;
++ struct auth_domain *test;
++ static char *prefix = "gss/";
++ int stat = -1;
++
++ new = kmalloc(sizeof(*new), GFP_KERNEL);
++ if (!new)
++ goto out;
++ cache_init(&new->h.h);
++ atomic_inc(&new->h.h.refcnt);
++ new->h.name = kmalloc(strlen(name) + strlen(prefix) + 1, GFP_KERNEL);
++ if (!new->h.name)
++ goto out_free_dom;
++ strcpy(new->h.name, prefix);
++ strcat(new->h.name, name);
++ new->h.flavour = RPC_AUTH_GSS;
++ new->pseudoflavor = pseudoflavor;
++ new->h.h.expiry_time = NEVER;
++ new->h.h.flags = 0;
++
++ test = auth_domain_lookup(&new->h, 1);
++ if (test == &new->h) {
++ BUG_ON(atomic_dec_and_test(&new->h.h.refcnt));
++ } else { /* XXX Duplicate registration? */
++ auth_domain_put(&new->h);
++ goto out;
++ }
++ return 0;
++
++out_free_dom:
++ kfree(new);
++out:
++ return stat;
++}
++
++/* It would be nice if this bit of code could be shared with the client.
++ * Obstacles:
++ * The client shouldn't malloc(), would have to pass in own memory.
++ * The server uses base of head iovec as read pointer, while the
++ * client uses separate pointer. */
++static int
++unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
++{
++ /* XXX audit u32/int uses, sign/overflow issues */
++ int stat = -EINVAL;
++ u32 integ_len, maj_stat;
++ struct xdr_netobj mic;
++ struct xdr_buf integ_buf;
++
++ integ_len = ntohl(svc_getu32(&buf->head[0]));
++ if (integ_len & 3)
++ goto out;
++ if (integ_len > buf->len)
++ goto out;
++ if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
++ goto out;
++ /* copy out mic... */
++ if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
++ goto out;
++ if (mic.len > 256) /* XXX: maximum mic length? */
++ goto out;
++ mic.data = kmalloc(mic.len, GFP_KERNEL);
++ if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len))
++ goto out;
++ maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL);
++ if (maj_stat != GSS_S_COMPLETE)
++ goto out;
++ if (ntohl(svc_getu32(&buf->head[0])) != seq)
++ goto out;
++ stat = 0;
++out:
++ return stat;
++}
++
++/*
++ * Accept an rpcsec packet.
++ * If context establishment, punt to user space
++ * If data exchange, verify/decrypt
++ * If context destruction, handle here
++ * In the context establishment and destruction case we encode
++ * response here and return SVC_COMPLETE.
++ */
++static int
++svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp)
++{
++ struct iovec *argv = &rqstp->rq_arg.head[0];
++ struct iovec *resv = &rqstp->rq_res.head[0];
++ u32 crlen;
++ struct xdr_netobj tmpobj;
++ struct gss_svc_data *svcdata = rqstp->rq_auth_data;
++ struct rpc_gss_wire_cred *gc;
++ struct rsc *rsci = NULL;
++ struct rsi *rsip, rsikey;
++ u32 *rpcstart;
++ u32 *reject_stat = resv->iov_base;
++ int ret;
++
++ dprintk("RPC: svcauth_gss: argv->iov_len = %d\n", argv->iov_len);
++
++ *authp = rpc_autherr_badcred;
++ if (!svcdata)
++ svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL);
++ if (!svcdata)
++ goto auth_err;
++ rqstp->rq_auth_data = svcdata;
++ gc = &svcdata->clcred;
++
++ /* start of rpc packet is 7 u32's back from here:
++ * xid direction rpcversion prog vers proc flavour
++ */
++ rpcstart = argv->iov_base;
++ rpcstart -= 7;
++
++ /* credential is:
++ * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle
++ * at least 5 u32s, and is preceeded by length, so that makes 6.
++ */
++
++ if (argv->iov_len < 5 * 4)
++ goto auth_err;
++ crlen = ntohl(svc_getu32(argv));
++ if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION)
++ goto auth_err;
++ gc->gc_proc = ntohl(svc_getu32(argv));
++ gc->gc_seq = ntohl(svc_getu32(argv));
++ gc->gc_svc = ntohl(svc_getu32(argv));
++ if (svc_safe_getnetobj(argv, &gc->gc_ctx))
++ goto auth_err;
++ if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4)
++ goto auth_err;
++
++ if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
++ goto auth_err;
++
++ /*
++ * We've successfully parsed the credential. Let's check out the
++ * verifier. An AUTH_NULL verifier is allowed (and required) for
++ * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for
++ * PROC_DATA and PROC_DESTROY.
++ *
++ * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length).
++ * AUTH_RPCSEC_GSS verifier is:
++ * 6 (AUTH_RPCSEC_GSS), length, checksum.
++ * checksum is calculated over rpcheader from xid up to here.
++ */
++ *authp = rpc_autherr_badverf;
++ switch (gc->gc_proc) {
++ case RPC_GSS_PROC_INIT:
++ case RPC_GSS_PROC_CONTINUE_INIT:
++ if (argv->iov_len < 2 * 4)
++ goto auth_err;
++ if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL)
++ goto auth_err;
++ if (ntohl(svc_getu32(argv)) != 0)
++ goto auth_err;
++ break;
++ case RPC_GSS_PROC_DATA:
++ case RPC_GSS_PROC_DESTROY:
++ *authp = rpcsec_gsserr_credproblem;
++ rsci = gss_svc_searchbyctx(&gc->gc_ctx);
++ if (!rsci)
++ goto auth_err;
++ switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) {
++ case SVC_OK:
++ break;
++ case SVC_DENIED:
++ goto auth_err;
++ case SVC_DROP:
++ goto drop;
++ }
++ break;
++ default:
++ *authp = rpc_autherr_rejectedcred;
++ goto auth_err;
++ }
++
++ /* now act upon the command: */
++ switch (gc->gc_proc) {
++ case RPC_GSS_PROC_INIT:
++ case RPC_GSS_PROC_CONTINUE_INIT:
++ *authp = rpc_autherr_badcred;
++ if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
++ goto auth_err;
++ memset(&rsikey, 0, sizeof(rsikey));
++ if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
++ goto drop;
++ *authp = rpc_autherr_badverf;
++ if (svc_safe_getnetobj(argv, &tmpobj)) {
++ kfree(rsikey.in_handle.data);
++ goto auth_err;
++ }
++ if (dup_netobj(&rsikey.in_token, &tmpobj)) {
++ kfree(rsikey.in_handle.data);
++ goto drop;
++ }
++
++ rsip = rsi_lookup(&rsikey, 0);
++ rsi_free(&rsikey);
++ if (!rsip) {
++ goto drop;
++ }
++ switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) {
++ case -EAGAIN:
++ goto drop;
++ case -ENOENT:
++ goto drop;
++ case 0:
++ rsci = gss_svc_searchbyctx(&rsip->out_handle);
++ if (!rsci) {
++ goto drop;
++ }
++ if (gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN))
++ goto drop;
++ if (resv->iov_len + 4 > PAGE_SIZE)
++ goto drop;
++ svc_putu32(resv, rpc_success);
++ if (svc_safe_putnetobj(resv, &rsip->out_handle))
++ goto drop;
++ if (resv->iov_len + 3 * 4 > PAGE_SIZE)
++ goto drop;
++ svc_putu32(resv, htonl(rsip->major_status));
++ svc_putu32(resv, htonl(rsip->minor_status));
++ svc_putu32(resv, htonl(GSS_SEQ_WIN));
++ if (svc_safe_putnetobj(resv, &rsip->out_token))
++ goto drop;
++ rqstp->rq_client = NULL;
++ }
++ goto complete;
++ case RPC_GSS_PROC_DESTROY:
++ set_bit(CACHE_NEGATIVE, &rsci->h.flags);
++ if (resv->iov_len + 4 > PAGE_SIZE)
++ goto drop;
++ svc_putu32(resv, rpc_success);
++ goto complete;
++ case RPC_GSS_PROC_DATA:
++ rqstp->rq_client =
++ find_gss_auth_domain(rsci->mechctx, gc->gc_svc);
++ if (rqstp->rq_client == NULL)
++ goto auth_err;
++ *authp = rpcsec_gsserr_ctxproblem;
++ if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
++ goto auth_err;
++
++ rqstp->rq_cred = rsci->cred;
++
++ *authp = rpc_autherr_badcred;
++ switch (gc->gc_svc) {
++ case RPC_GSS_SVC_NONE:
++ break;
++ case RPC_GSS_SVC_INTEGRITY:
++ if (unwrap_integ_data(&rqstp->rq_arg,
++ gc->gc_seq, rsci->mechctx))
++ goto auth_err;
++ /* placeholders for length and seq. number: */
++ svcdata->body_start = resv->iov_base + resv->iov_len;
++ svc_putu32(resv, 0);
++ svc_putu32(resv, 0);
++ break;
++ case RPC_GSS_SVC_PRIVACY:
++ /* currently unsupported */
++ default:
++ goto auth_err;
++ }
++ ret = SVC_OK;
++ goto out;
++ }
++auth_err:
++ /* Restore write pointer to original value: */
++ xdr_ressize_check(rqstp, reject_stat);
++ ret = SVC_DENIED;
++ goto out;
++complete:
++ ret = SVC_COMPLETE;
++ goto out;
++drop:
++ ret = SVC_DROP;
++out:
++ if (rsci)
++ rsc_put(&rsci->h, &rsc_cache);
++ return ret;
++}
++
++static int
++svcauth_gss_release(struct svc_rqst *rqstp)
++{
++ struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
++ struct rpc_gss_wire_cred *gc = &gsd->clcred;
++ struct xdr_buf *resbuf = &rqstp->rq_res;
++ struct xdr_buf integ_buf;
++ struct xdr_netobj mic;
++ struct iovec *resv;
++ u32 *p;
++ int integ_offset, integ_len;
++ struct rsc *rsci;
++ int stat = -EINVAL;
++
++ /* normally not set till svc_send, but we need it here: */
++ resbuf->len = resbuf->head[0].iov_len
++ + resbuf->page_len + resbuf->tail[0].iov_len;
++ switch (gc->gc_svc) {
++ case RPC_GSS_SVC_NONE:
++ break;
++ case RPC_GSS_SVC_INTEGRITY:
++ p = gsd->body_start;
++ /* move accept_stat to right place: */
++ memcpy(p, p + 2, 4);
++ p++;
++ integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
++ integ_len = resbuf->len - integ_offset;
++ BUG_ON(integ_len % 4);
++ *p++ = htonl(integ_len);
++ *p++ = htonl(gc->gc_seq);
++ if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset,
++ integ_len))
++ goto out;
++ if (resbuf->page_len == 0) {
++ BUG_ON(resbuf->tail[0].iov_len);
++ /* Use head for everything */
++ resv = &resbuf->head[0];
++ } else if (resbuf->tail[0].iov_base == NULL) {
++ /* copied from nfsd4_encode_read */
++ svc_take_page(rqstp);
++ resbuf->tail[0].iov_base = page_address(rqstp
++ ->rq_respages[rqstp->rq_resused-1]);
++ rqstp->rq_restailpage = rqstp->rq_resused-1;
++ resbuf->tail[0].iov_len = 0;
++ resv = &resbuf->tail[0];
++ } else {
++ resv = &resbuf->tail[0];
++ }
++ /* XXX bounds checking!: */
++ mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
++ rsci = gss_svc_searchbyctx(&gc->gc_ctx);
++ /* Better error return? Hold count on ctx through
++ * processing instead of looking up again? */
++ if (!rsci)
++ goto out;
++ /* XXX Whoops, we might overflow here: */
++ if (gss_get_mic(rsci->mechctx, 0, &integ_buf, &mic))
++ goto out;
++ svc_putu32(resv, htonl(mic.len));
++ resv->iov_len += mic.len;
++ resbuf->len += mic.len; /* not strictly necessary */
++ /* XXX too late, alas: */
++ if (resbuf->len > PAGE_SIZE)
++ goto out;
++ break;
++ case RPC_GSS_SVC_PRIVACY:
++ default:
++ goto out;
++ }
++
++ stat = 0;
++out:
++ if (rqstp->rq_client)
++ auth_domain_put(rqstp->rq_client);
++ rqstp->rq_client = NULL;
++
++ return stat;
++}
++
++static void
++svcauth_gss_domain_release(struct auth_domain *dom)
++{
++ struct gss_domain *gd = container_of(dom, struct gss_domain, h);
++
++ kfree(dom->name);
++ kfree(gd);
++}
++
++struct auth_ops svcauthops_gss = {
++ .name = "rpcsec_gss",
++ .flavour = RPC_AUTH_GSS,
++ .accept = svcauth_gss_accept,
++ .release = svcauth_gss_release,
++ .domain_release = svcauth_gss_domain_release,
++};
++
++int
++gss_svc_init(void)
++{
++ cache_register(&rsc_cache);
++ cache_register(&rsi_cache);
++ svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss);
++ return 0;
++}
+diff -puN net/sunrpc/svc.c~CITI_NFS4_ALL net/sunrpc/svc.c
+--- linux-2.6.3/net/sunrpc/svc.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/svc.c 2004-02-19 16:47:04.000000000 -0500
+@@ -200,6 +200,8 @@ svc_exit_thread(struct svc_rqst *rqstp)
+ kfree(rqstp->rq_resp);
+ if (rqstp->rq_argp)
+ kfree(rqstp->rq_argp);
++ if (rqstp->rq_auth_data)
++ kfree(rqstp->rq_auth_data);
+ kfree(rqstp);
+
+ /* Release the server */
+@@ -322,6 +324,8 @@ svc_process(struct svc_serv *serv, struc
+ goto err_bad_auth;
+ case SVC_DROP:
+ goto dropit;
++ case SVC_COMPLETE:
++ goto sendit;
+ }
+
+ progp = serv->sv_program;
+diff -puN net/sunrpc/Makefile~CITI_NFS4_ALL net/sunrpc/Makefile
+--- linux-2.6.3/net/sunrpc/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/Makefile 2004-02-19 16:47:05.000000000 -0500
+@@ -2,9 +2,9 @@
+ # Makefile for Linux kernel SUN RPC
+ #
+
+-obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
+
+ obj-$(CONFIG_SUNRPC) += sunrpc.o
++obj-$(CONFIG_SUNRPC_GSS) += auth_gss/
+
+ sunrpc-y := clnt.o xprt.o sched.o \
+ auth.o auth_null.o auth_unix.o \
+diff -puN fs/nfsd/nfs4proc.c~CITI_NFS4_ALL fs/nfsd/nfs4proc.c
+--- linux-2.6.3/fs/nfsd/nfs4proc.c~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/nfs4proc.c 2004-02-19 16:47:15.000000000 -0500
+@@ -52,15 +52,22 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfsd/state.h>
+ #include <linux/nfsd/xdr4.h>
++#ifdef CONFIG_NFS_V4_ACL
++#include <linux/nfs4_acl.h>
++#endif
+
+ #define NFSDDBG_FACILITY NFSDDBG_PROC
+
+-/* Note: The organization of the OPEN code seems a little strange; it
+- * has been superfluously split into three routines, one of which is named
+- * nfsd4_process_open2() even though there is no nfsd4_process_open1()!
+- * This is because the code has been organized in anticipation of a
+- * subsequent patch which will implement more of the NFSv4 state model.
+- */
++static inline void
++fh_dup2(struct svc_fh *dst, struct svc_fh *src)
++{
++ fh_put(dst);
++ dget(src->fh_dentry);
++ if (src->fh_export)
++ cache_get(&src->fh_export->h);
++ *dst = *src;
++}
++
+ static int
+ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+ {
+@@ -89,12 +96,19 @@ do_open_lookup(struct svc_rqst *rqstp, s
+ if (!status) {
+ set_change_info(&open->op_cinfo, current_fh);
+ fh_dup2(current_fh, &resfh);
++ /* XXXJBF: keep a saved svc_fh struct instead?? */
++ open->op_stateowner->so_replay.rp_openfh_len =
++ resfh.fh_handle.fh_size;
++ memcpy(open->op_stateowner->so_replay.rp_openfh,
++ &resfh.fh_handle.fh_base,
++ resfh.fh_handle.fh_size);
+
+ accmode = MAY_NOP;
+ if (open->op_share_access & NFS4_SHARE_ACCESS_READ)
+ accmode = MAY_READ;
+ if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE)
+ accmode |= (MAY_WRITE | MAY_TRUNC);
++ accmode |= MAY_OWNER_OVERRIDE;
+ status = fh_verify(rqstp, current_fh, S_IFREG, accmode);
+ }
+
+@@ -102,19 +116,39 @@ do_open_lookup(struct svc_rqst *rqstp, s
+ return status;
+ }
+
++/*
++ * nfs4_unlock_state() called in encode
++ */
+ static inline int
+ nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+ {
+ int status;
+- dprintk("NFSD: nfsd4_open filename %.*s\n",
+- (int)open->op_fname.len, open->op_fname.data);
++ dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n",
++ (int)open->op_fname.len, open->op_fname.data,
++ open->op_stateowner);
+
+ /* This check required by spec. */
+ if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL)
+ return nfserr_inval;
+
++ open->op_stateowner = NULL;
++ nfs4_lock_state();
++
+ /* check seqid for replay. set nfs4_owner */
+ status = nfsd4_process_open1(open);
++ if (status == NFSERR_REPLAY_ME) {
++ struct nfs4_replay *rp = &open->op_stateowner->so_replay;
++ fh_put(current_fh);
++ current_fh->fh_handle.fh_size = rp->rp_openfh_len;
++ memcpy(¤t_fh->fh_handle.fh_base, rp->rp_openfh,
++ rp->rp_openfh_len);
++ status = fh_verify(rqstp, current_fh, 0, MAY_NOP);
++ if (status)
++ dprintk("nfsd4_open: replay failed"
++ " restoring previous filehandle\n");
++ else
++ status = NFSERR_REPLAY_ME;
++ }
+ if (status)
+ return status;
+ /*
+@@ -172,7 +206,7 @@ static inline int
+ nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh)
+ {
+ if (!save_fh->fh_dentry)
+- return nfserr_nofilehandle;
++ return nfserr_restorefh;
+
+ fh_dup2(current_fh, save_fh);
+ return nfs_ok;
+@@ -204,11 +238,16 @@ nfsd4_access(struct svc_rqst *rqstp, str
+ static inline int
+ nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_commit *commit)
+ {
++ int status;
++
+ u32 *p = (u32 *)commit->co_verf.data;
+ *p++ = nfssvc_boot.tv_sec;
+ *p++ = nfssvc_boot.tv_usec;
+
+- return nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count);
++ status = nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count);
++ if (status == nfserr_symlink)
++ status = nfserr_inval;
++ return status;
+ }
+
+ static inline int
+@@ -221,6 +260,8 @@ nfsd4_create(struct svc_rqst *rqstp, str
+ fh_init(&resfh, NFS4_FHSIZE);
+
+ status = fh_verify(rqstp, current_fh, S_IFDIR, MAY_CREATE);
++ if (status == nfserr_symlink)
++ status = nfserr_notdir;
+ if (status)
+ return status;
+
+@@ -316,8 +357,10 @@ static inline int
+ nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh,
+ struct svc_fh *save_fh, struct nfsd4_link *link)
+ {
+- int status;
++ int status = nfserr_nofilehandle;
+
++ if (!save_fh->fh_dentry)
++ return status;
+ status = nfsd_link(rqstp, current_fh, link->li_name, link->li_namelen, save_fh);
+ if (!status)
+ set_change_info(&link->li_cinfo, current_fh);
+@@ -327,14 +370,18 @@ nfsd4_link(struct svc_rqst *rqstp, struc
+ static inline int
+ nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh)
+ {
+- /*
+- * XXX: We currently violate the spec in one small respect
+- * here. If LOOKUPP is done at the root of the pseudofs,
+- * the spec requires us to return NFSERR_NOENT. Personally,
+- * I think that leaving the filehandle unchanged is more
+- * logical, but this is an academic question anyway, since
+- * no clients actually use LOOKUPP.
+- */
++ struct svc_fh tmp_fh;
++ int ret;
++
++ fh_init(&tmp_fh, NFS4_FHSIZE);
++ if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh,
++ &rqstp->rq_chandle)) != 0)
++ return ret;
++ if (tmp_fh.fh_dentry == current_fh->fh_dentry) {
++ fh_put(&tmp_fh);
++ return nfserr_noent;
++ }
++ fh_put(&tmp_fh);
+ return nfsd_lookup(rqstp, current_fh, "..", 2, current_fh);
+ }
+
+@@ -345,6 +392,20 @@ nfsd4_lookup(struct svc_rqst *rqstp, str
+ }
+
+ static inline int
++access_bits_permit_read(unsigned long access_bmap)
++{
++ return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) ||
++ test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
++}
++
++static inline int
++access_bits_permit_write(unsigned long access_bmap)
++{
++ return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) ||
++ test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap);
++}
++
++static inline int
+ nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read)
+ {
+ struct nfs4_stateid *stp;
+@@ -382,7 +443,7 @@ nfsd4_read(struct svc_rqst *rqstp, struc
+ goto out;
+ }
+ status = nfserr_openmode;
+- if (!(stp->st_share_access & NFS4_SHARE_ACCESS_READ)) {
++ if (!access_bits_permit_read(stp->st_access_bmap)) {
+ dprintk("NFSD: nfsd4_read: file not opened for read!\n");
+ goto out;
+ }
+@@ -397,6 +458,11 @@ out:
+ static inline int
+ nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir)
+ {
++ u64 cookie = readdir->rd_cookie;
++ static const nfs4_verifier zeroverf = {
++ .data[0] = 0,
++ };
++
+ /* no need to check permission - this will be done in nfsd_readdir() */
+
+ if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
+@@ -405,7 +471,8 @@ nfsd4_readdir(struct svc_rqst *rqstp, st
+ readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0;
+ readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1;
+
+- if (readdir->rd_cookie > ~(u32)0)
++ if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) ||
++ (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE)))
+ return nfserr_bad_cookie;
+
+ readdir->rd_rqstp = rqstp;
+@@ -427,6 +494,8 @@ nfsd4_remove(struct svc_rqst *rqstp, str
+ int status;
+
+ status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen);
++ if (status == nfserr_symlink)
++ return nfserr_notdir;
+ if (!status) {
+ fh_unlock(current_fh);
+ set_change_info(&remove->rm_cinfo, current_fh);
+@@ -438,11 +507,25 @@ static inline int
+ nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh,
+ struct svc_fh *save_fh, struct nfsd4_rename *rename)
+ {
+- int status;
++ int status = nfserr_nofilehandle;
+
++ if (!save_fh->fh_dentry)
++ return status;
+ status = nfsd_rename(rqstp, save_fh, rename->rn_sname,
+ rename->rn_snamelen, current_fh,
+ rename->rn_tname, rename->rn_tnamelen);
++
++ /* the underlying filesystem returns different error's than required
++ * by NFSv4. both save_fh and current_fh have been verified.. */
++ if (status == nfserr_isdir)
++ status = nfserr_exist;
++ else if ((status == nfserr_notdir) &&
++ (S_ISDIR(save_fh->fh_dentry->d_inode->i_mode) &&
++ S_ISDIR(current_fh->fh_dentry->d_inode->i_mode)))
++ status = nfserr_exist;
++ else if (status == nfserr_symlink)
++ status = nfserr_notdir;
++
+ if (!status) {
+ set_change_info(&rename->rn_sinfo, current_fh);
+ set_change_info(&rename->rn_tinfo, save_fh);
+@@ -454,14 +537,18 @@ static inline int
+ nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr)
+ {
+ struct nfs4_stateid *stp;
+- int status = nfs_ok;
++ int status = nfserr_nofilehandle;
++
++ if (!current_fh->fh_dentry)
++ goto out;
+
++ status = nfs_ok;
+ if (setattr->sa_iattr.ia_valid & ATTR_SIZE) {
+
+ status = nfserr_bad_stateid;
+ if (ZERO_STATEID(&setattr->sa_stateid) || ONE_STATEID(&setattr->sa_stateid)) {
+ dprintk("NFSD: nfsd4_setattr: magic stateid!\n");
+- return status;
++ goto out;
+ }
+
+ nfs4_lock_state();
+@@ -469,17 +556,27 @@ nfsd4_setattr(struct svc_rqst *rqstp, st
+ &setattr->sa_stateid,
+ CHECK_FH | RDWR_STATE, &stp))) {
+ dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n");
+- goto out;
++ goto out_unlock;
+ }
+ status = nfserr_openmode;
+- if (!(stp->st_share_access & NFS4_SHARE_ACCESS_WRITE)) {
++ if (!access_bits_permit_write(stp->st_access_bmap)) {
+ dprintk("NFSD: nfsd4_setattr: not opened for write!\n");
+- goto out;
++ goto out_unlock;
+ }
+ nfs4_unlock_state();
+ }
+- return (nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr, 0, (time_t)0));
++#ifdef CONFIG_NFS_V4_ACL
++ status = nfs_ok;
++ if (setattr->sa_acl != NULL)
++ status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl);
++ if (status)
++ goto out;
++#endif /* CONFIG_NFS_V4_ACL */
++ status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr,
++ 0, (time_t)0);
+ out:
++ return status;
++out_unlock:
+ nfs4_unlock_state();
+ return status;
+ }
+@@ -513,7 +610,7 @@ nfsd4_write(struct svc_rqst *rqstp, stru
+ }
+
+ status = nfserr_openmode;
+- if (!(stp->st_share_access & NFS4_SHARE_ACCESS_WRITE)) {
++ if (!access_bits_permit_write(stp->st_access_bmap)) {
+ dprintk("NFSD: nfsd4_write: file not open for write!\n");
+ goto out;
+ }
+@@ -526,9 +623,12 @@ zero_stateid:
+ *p++ = nfssvc_boot.tv_sec;
+ *p++ = nfssvc_boot.tv_usec;
+
+- return (nfsd_write(rqstp, current_fh, write->wr_offset,
++ status = nfsd_write(rqstp, current_fh, write->wr_offset,
+ write->wr_vec, write->wr_vlen, write->wr_buflen,
+- &write->wr_how_written));
++ &write->wr_how_written);
++ if (status == nfserr_symlink)
++ status = nfserr_inval;
++ return status;
+ out:
+ nfs4_unlock_state();
+ return status;
+@@ -552,8 +652,9 @@ nfsd4_verify(struct svc_rqst *rqstp, str
+
+ if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0)
+ || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1))
+- return nfserr_notsupp;
+- if (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)
++ return nfserr_attrnotsupp;
++ if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR)
++ || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1))
+ return nfserr_inval;
+ if (verify->ve_attrlen & 3)
+ return nfserr_inval;
+@@ -568,7 +669,8 @@ nfsd4_verify(struct svc_rqst *rqstp, str
+
+ status = nfsd4_encode_fattr(current_fh, current_fh->fh_export,
+ current_fh->fh_dentry, buf,
+- &count, verify->ve_bmval);
++ &count, verify->ve_bmval,
++ rqstp);
+
+ /* this means that nfsd4_encode_fattr() ran out of space */
+ if (status == nfserr_resource && count == 0)
+@@ -658,13 +760,32 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ goto encode_op;
+ }
+
++ /* All operations except RENEW, SETCLIENTID, RESTOREFH
++ * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH
++ * require a valid current filehandle
++ *
++ * SETATTR NOFILEHANDLE error handled in nfsd4_setattr
++ * due to required returned bitmap argument
++ */
++ if ((!current_fh.fh_dentry) &&
++ !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) ||
++ (op->opnum == OP_SETCLIENTID) ||
++ (op->opnum == OP_SETCLIENTID_CONFIRM) ||
++ (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) ||
++ (op->opnum == OP_RELEASE_LOCKOWNER) ||
++ (op->opnum == OP_SETATTR))) {
++ op->status = nfserr_nofilehandle;
++ goto encode_op;
++ }
+ switch (op->opnum) {
+ case OP_ACCESS:
+ op->status = nfsd4_access(rqstp, ¤t_fh, &op->u.access);
+ break;
+ case OP_CLOSE:
+ op->status = nfsd4_close(rqstp, ¤t_fh, &op->u.close);
+- op->replay = &op->u.close.cl_stateowner->so_replay;
++ if (op->u.close.cl_stateowner)
++ op->replay =
++ &op->u.close.cl_stateowner->so_replay;
+ break;
+ case OP_COMMIT:
+ op->status = nfsd4_commit(rqstp, ¤t_fh, &op->u.commit);
+@@ -683,12 +804,18 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ break;
+ case OP_LOCK:
+ op->status = nfsd4_lock(rqstp, ¤t_fh, &op->u.lock);
++ if (op->u.lock.lk_stateowner)
++ op->replay =
++ &op->u.lock.lk_stateowner->so_replay;
+ break;
+ case OP_LOCKT:
+ op->status = nfsd4_lockt(rqstp, ¤t_fh, &op->u.lockt);
+ break;
+ case OP_LOCKU:
+ op->status = nfsd4_locku(rqstp, ¤t_fh, &op->u.locku);
++ if (op->u.locku.lu_stateowner)
++ op->replay =
++ &op->u.locku.lu_stateowner->so_replay;
+ break;
+ case OP_LOOKUP:
+ op->status = nfsd4_lookup(rqstp, ¤t_fh, &op->u.lookup);
+@@ -703,15 +830,21 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ break;
+ case OP_OPEN:
+ op->status = nfsd4_open(rqstp, ¤t_fh, &op->u.open);
+- op->replay = &op->u.open.op_stateowner->so_replay;
++ if (op->u.open.op_stateowner)
++ op->replay =
++ &op->u.open.op_stateowner->so_replay;
+ break;
+ case OP_OPEN_CONFIRM:
+ op->status = nfsd4_open_confirm(rqstp, ¤t_fh, &op->u.open_confirm);
+- op->replay = &op->u.open_confirm.oc_stateowner->so_replay;
++ if (op->u.open_confirm.oc_stateowner)
++ op->replay =
++ &op->u.open_confirm.oc_stateowner->so_replay;
+ break;
+ case OP_OPEN_DOWNGRADE:
+ op->status = nfsd4_open_downgrade(rqstp, ¤t_fh, &op->u.open_downgrade);
+- op->replay = &op->u.open_downgrade.od_stateowner->so_replay;
++ if (op->u.open_downgrade.od_stateowner)
++ op->replay =
++ &op->u.open_downgrade.od_stateowner->so_replay;
+ break;
+ case OP_PUTFH:
+ op->status = nfsd4_putfh(rqstp, ¤t_fh, &op->u.putfh);
+@@ -760,6 +893,9 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ case OP_WRITE:
+ op->status = nfsd4_write(rqstp, ¤t_fh, &op->u.write);
+ break;
++ case OP_RELEASE_LOCKOWNER:
++ op->status = nfsd4_release_lockowner(rqstp, &op->u.release_lockowner);
++ break;
+ default:
+ BUG_ON(op->status == nfs_ok);
+ break;
+@@ -768,7 +904,7 @@ nfsd4_proc_compound(struct svc_rqst *rqs
+ encode_op:
+ if (op->status == NFSERR_REPLAY_ME) {
+ nfsd4_encode_replay(resp, op);
+- status = op->status = NFS_OK;
++ status = op->status = op->replay->rp_status;
+ } else {
+ nfsd4_encode_operation(resp, op);
+ status = op->status;
+@@ -776,20 +912,7 @@ encode_op:
+ }
+
+ out:
+- if (args->ops != args->iops) {
+- kfree(args->ops);
+- args->ops = args->iops;
+- }
+- if (args->tmpp) {
+- kfree(args->tmpp);
+- args->tmpp = NULL;
+- }
+- while (args->to_free) {
+- struct tmpbuf *tb = args->to_free;
+- args->to_free = tb->next;
+- kfree(tb->buf);
+- kfree(tb);
+- }
++ nfsd4_release_compoundargs(args);
+ fh_put(¤t_fh);
+ fh_put(&save_fh);
+ return status;
+diff -puN fs/nfsd/nfs4xdr.c~CITI_NFS4_ALL fs/nfsd/nfs4xdr.c
+--- linux-2.6.3/fs/nfsd/nfs4xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/nfs4xdr.c 2004-02-19 16:47:15.000000000 -0500
+@@ -51,100 +51,103 @@
+ #include <linux/sunrpc/xdr.h>
+ #include <linux/sunrpc/svc.h>
+ #include <linux/sunrpc/clnt.h>
+-#include <linux/sunrpc/name_lookup.h>
+ #include <linux/nfsd/nfsd.h>
+ #include <linux/nfsd/state.h>
+ #include <linux/nfsd/xdr4.h>
++#include <linux/nfsd_idmap.h>
++#include <linux/nfs4.h>
++#include <linux/nfs4_acl.h>
+
+ #define NFSDDBG_FACILITY NFSDDBG_XDR
+
+-/*
+- * From Peter Astrand <peter@cendio.se>: The following routines check
+- * whether a filename supplied by the client is valid.
+- */
+-static const char trailing_bytes_for_utf8[256] = {
+- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
++static const char utf8_byte_len[256] = {
++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5
++ 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
++ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0
+ };
+
+ static inline int
+-is_legal_iso_utf8_sequence(unsigned char *source, int length)
++is_legal_utf8_sequence(unsigned char *source, int length)
+ {
+- unsigned char a;
+- unsigned char *srcptr;
++ unsigned char *ptr;
++ unsigned char c;
+
+- srcptr = source + length;
++ if (length==1) return 1;
+
+- switch (length) {
+- /* Everything else falls through when "1"... */
++ /* Check for overlong sequence, and check second byte */
++ c = *(source + 1);
++ switch (*source) {
++ case 0xE0: /* 3 bytes */
++ if ( c < 0xA0 ) return 0;
++ break;
++ case 0xF0: /* 4 bytes */
++ if ( c < 0x90 ) return 0;
++ break;
++ case 0xF8: /* 5 bytes */
++ if ( c < 0xC8 ) return 0;
++ break;
++ case 0xFC: /* 6 bytes */
++ if ( c < 0x84 ) return 0;
++ break;
+ default:
+- /* Sequences with more than 6 bytes are invalid */
+- return 0;
++ if ( (c & 0xC0) != 0x80) return 0;
++ }
+
+- /*
+- Byte 3-6 must be 80..BF
+- */
+- case 6:
+- if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+- case 5:
+- if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+- case 4:
+- if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+- case 3:
+- if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0;
+-
+- case 2:
+- a = *--srcptr;
+-
+- /* Upper limit */
+- if (a > 0xBF)
+- /* 2nd byte may never be > 0xBF */
+- return 0;
++ /* Check that trailing bytes look like 10xxxxxx */
++ for (ptr = source++ + length - 1; ptr>source; ptr--)
++ if ( ((*ptr) & 0xC0) != 0x80 ) return 0;
++ return 1;
++}
+
+- /*
+- Lower limits checks, to detect non-shortest forms.
+- No fall-through in this inner switch.
+- */
+- switch (*source) {
+- case 0xE0: /* 3 bytes */
+- if (a < 0xA0) return 0;
+- break;
+- case 0xF0: /* 4 bytes */
+- if (a < 0x90) return 0;
+- break;
+- case 0xF8: /* 5 bytes */
+- if (a < 0xC8) return 0;
+- break;
+- case 0xFC: /* 6 bytes */
+- if (a < 0x84) return 0;
+- break;
+- default:
+- /* In all cases, 2nd byte must be >= 0x80 (because leading
+- 10...) */
+- if (a < 0x80) return 0;
+- }
++/* This does some screening on disallowed unicode characters. It is NOT
++ * comprehensive.
++ */
++static int
++is_allowed_utf8_char(unsigned char *source, int length)
++{
++ /* We assume length and source point to a valid utf8 sequence */
++ unsigned char c;
+
+- case 1:
+- /* Invalid ranges */
+- if (*source >= 0x80 && *source < 0xC2)
+- /* Multibyte char with value < 0xC2, non-shortest */
+- return 0;
+- if (*source > 0xFD)
+- /* Leading byte starting with 11111110 is illegal */
+- return 0;
+- if (!*source)
+- return 0;
++ /* Disallow F0000 and up (in utf8, F3B08080) */
++ if (*source > 0xF3 ) return 0;
++ c = *(source + 1);
++ switch (*source) {
++ case 0xF3:
++ if (c >= 0xB0) return 0;
++ break;
++ /* Disallow D800-F8FF (in utf8, EDA080-EFA3BF */
++ case 0xED:
++ if (c >= 0xA0) return 0;
++ break;
++ case 0xEE:
++ return 0;
++ break;
++ case 0xEF:
++ if (c <= 0xA3) return 0;
++ /* Disallow FFF9-FFFF (EFBFB9-EFBFBF) */
++ if (c==0xBF)
++ /* Don't need to check <=0xBF, since valid utf8 */
++ if ( *(source+2) >= 0xB9) return 0;
++ break;
+ }
+-
+ return 1;
+ }
+
++/* This routine should really check to see that the proper stringprep
++ * mappings have been applied. Instead, we do a simple screen of some
++ * of the more obvious illegal values by calling is_allowed_utf8_char.
++ * This will allow many illegal strings through, but if a client behaves,
++ * it will get full functionality. The other option (apart from full
++ * stringprep checking) is to limit everything to an easily handled subset,
++ * such as 7-bit ascii.
++ *
++ * Note - currently calling routines ignore return value except as boolean.
++ */
+ static int
+ check_utf8(char *str, int len)
+ {
+@@ -155,11 +158,17 @@ check_utf8(char *str, int len)
+ sourceend = str + len;
+
+ while (chunk < sourceend) {
+- chunklen = trailing_bytes_for_utf8[*chunk]+1;
++ chunklen = utf8_byte_len[*chunk];
++ if (!chunklen)
++ return nfserr_inval;
+ if (chunk + chunklen > sourceend)
+ return nfserr_inval;
+- if (!is_legal_iso_utf8_sequence(chunk, chunklen))
++ if (!is_legal_utf8_sequence(chunk, chunklen))
++ return nfserr_inval;
++ if (!is_allowed_utf8_char(chunk, chunklen))
+ return nfserr_inval;
++ if ( (chunklen==1) && (!*chunk) )
++ return nfserr_inval; /* Disallow embedded nulls */
+ chunk += chunklen;
+ }
+
+@@ -280,27 +289,40 @@ u32 *read_buf(struct nfsd4_compoundargs
+ return p;
+ }
+
+-char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
++static int
++defer_free(struct nfsd4_compoundargs *argp,
++ void (*release)(const void *), void *p)
+ {
+ struct tmpbuf *tb;
++
++ tb = kmalloc(sizeof(*tb), GFP_KERNEL);
++ if (!tb)
++ return -ENOMEM;
++ tb->buf = p;
++ tb->release = release;
++ tb->next = argp->to_free;
++ argp->to_free = tb;
++ return 0;
++}
++
++char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes)
++{
++ void *new = NULL;
+ if (p == argp->tmp) {
+- p = kmalloc(nbytes, GFP_KERNEL);
+- if (!p) return NULL;
++ new = kmalloc(nbytes, GFP_KERNEL);
++ if (!new) return NULL;
++ p = new;
+ memcpy(p, argp->tmp, nbytes);
+ } else {
+ if (p != argp->tmpp)
+ BUG();
+ argp->tmpp = NULL;
+ }
+- tb = kmalloc(sizeof(*tb), GFP_KERNEL);
+- if (!tb) {
+- kfree(p);
++ if (defer_free(argp, kfree, p)) {
++ kfree(new);
+ return NULL;
+- }
+- tb->buf = p;
+- tb->next = argp->to_free;
+- argp->to_free = tb;
+- return (char*)p;
++ } else
++ return (char *)p;
+ }
+
+
+@@ -328,7 +350,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoun
+ }
+
+ static int
+-nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr)
++nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr,
++ struct nfs4_acl **acl)
+ {
+ int expected_len, len = 0;
+ u32 dummy32;
+@@ -344,7 +367,7 @@ nfsd4_decode_fattr(struct nfsd4_compound
+ * read-only attributes return ERR_INVAL.
+ */
+ if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1))
+- return nfserr_notsupp;
++ return nfserr_attrnotsupp;
+ if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1))
+ return nfserr_inval;
+
+@@ -357,6 +380,39 @@ nfsd4_decode_fattr(struct nfsd4_compound
+ READ64(iattr->ia_size);
+ iattr->ia_valid |= ATTR_SIZE;
+ }
++#ifdef CONFIG_NFS_V4_ACL
++ if (bmval[0] & FATTR4_WORD0_ACL) {
++ int nace, i;
++ struct nfs4_ace ace;
++
++ READ_BUF(4); len += 4;
++ READ32(nace);
++
++ *acl = nfs4_acl_new();
++ if (*acl == NULL) {
++ status = -ENOMEM;
++ goto out_nfserr;
++ }
++ defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl);
++
++ for (i = 0; i < nace; i++) {
++ READ_BUF(16); len += 16;
++ READ32(ace.type);
++ READ32(ace.flag);
++ READ32(ace.access_mask);
++ READ32(ace.wholen);
++ READ_BUF(ace.wholen);
++ len += XDR_QUADLEN(ace.wholen) << 2;
++ if (nfs4_acl_add_ace(*acl, ace.type, ace.flag,
++ ace.access_mask, (char *)p, ace.wholen) < 0) {
++ status = -ENOMEM;
++ goto out_nfserr;
++ }
++ p += XDR_QUADLEN(ace.wholen);
++ }
++ } else
++ *acl = NULL;
++#endif /* CONFIG_NFS_V4_ACL */
+ if (bmval[1] & FATTR4_WORD1_MODE) {
+ READ_BUF(4);
+ len += 4;
+@@ -373,7 +429,7 @@ nfsd4_decode_fattr(struct nfsd4_compound
+ READMEM(buf, dummy32);
+ if (check_utf8(buf, dummy32))
+ return nfserr_inval;
+- if ((status = name_get_uid(buf, dummy32, &iattr->ia_uid)))
++ if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid)))
+ goto out_nfserr;
+ iattr->ia_valid |= ATTR_UID;
+ }
+@@ -386,7 +442,7 @@ nfsd4_decode_fattr(struct nfsd4_compound
+ READMEM(buf, dummy32);
+ if (check_utf8(buf, dummy32))
+ return nfserr_inval;
+- if ((status = name_get_gid(buf, dummy32, &iattr->ia_gid)))
++ if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid)))
+ goto out_nfserr;
+ iattr->ia_valid |= ATTR_GID;
+ }
+@@ -482,6 +538,7 @@ nfsd4_decode_close(struct nfsd4_compound
+ {
+ DECODE_HEAD;
+
++ (int)close->cl_stateowner = -1;
+ READ_BUF(4 + sizeof(stateid_t));
+ READ32(close->cl_seqid);
+ READ32(close->cl_stateid.si_generation);
+@@ -540,7 +597,7 @@ nfsd4_decode_create(struct nfsd4_compoun
+ if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval)))
+ return status;
+
+- if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr)))
++ if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl)))
+ goto out;
+
+ DECODE_TAIL;
+@@ -572,6 +629,7 @@ nfsd4_decode_lock(struct nfsd4_compounda
+ {
+ DECODE_HEAD;
+
++ (int)lock->lk_stateowner = -1;
+ /*
+ * type, reclaim(boolean), offset, length, new_lock_owner(boolean)
+ */
+@@ -629,6 +687,7 @@ nfsd4_decode_locku(struct nfsd4_compound
+ {
+ DECODE_HEAD;
+
++ (int)locku->lu_stateowner = -1;
+ READ_BUF(24 + sizeof(stateid_t));
+ READ32(locku->lu_type);
+ if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT))
+@@ -664,6 +723,7 @@ nfsd4_decode_open(struct nfsd4_compounda
+
+ memset(open->op_bmval, 0, sizeof(open->op_bmval));
+ open->op_iattr.ia_valid = 0;
++ (int)open->op_stateowner = -1;
+
+ /* seqid, share_access, share_deny, clientid, ownerlen */
+ READ_BUF(16 + sizeof(clientid_t));
+@@ -686,7 +746,7 @@ nfsd4_decode_open(struct nfsd4_compounda
+ switch (open->op_createmode) {
+ case NFS4_CREATE_UNCHECKED:
+ case NFS4_CREATE_GUARDED:
+- if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr)))
++ if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl)))
+ goto out;
+ break;
+ case NFS4_CREATE_EXCLUSIVE:
+@@ -739,6 +799,7 @@ nfsd4_decode_open_confirm(struct nfsd4_c
+ {
+ DECODE_HEAD;
+
++ (int)open_conf->oc_stateowner = -1;
+ READ_BUF(4 + sizeof(stateid_t));
+ READ32(open_conf->oc_req_stateid.si_generation);
+ COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t));
+@@ -752,6 +813,7 @@ nfsd4_decode_open_downgrade(struct nfsd4
+ {
+ DECODE_HEAD;
+
++ (int)open_down->od_stateowner = -1;
+ READ_BUF(4 + sizeof(stateid_t));
+ READ32(open_down->od_stateid.si_generation);
+ COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t));
+@@ -861,7 +923,7 @@ nfsd4_decode_setattr(struct nfsd4_compou
+ READ_BUF(sizeof(stateid_t));
+ READ32(setattr->sa_stateid.si_generation);
+ COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t));
+- if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr)))
++ if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl)))
+ goto out;
+
+ DECODE_TAIL;
+@@ -928,7 +990,7 @@ nfsd4_decode_write(struct nfsd4_compound
+ int len;
+ DECODE_HEAD;
+
+- READ_BUF(sizeof(stateid_t) + 16);
++ READ_BUF(sizeof(stateid_opaque_t) + 20);
+ READ32(write->wr_stateid.si_generation);
+ COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t));
+ READ64(write->wr_offset);
+@@ -972,6 +1034,20 @@ nfsd4_decode_write(struct nfsd4_compound
+ }
+
+ static int
++nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner)
++{
++ DECODE_HEAD;
++
++ READ_BUF(12);
++ COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t));
++ READ32(rlockowner->rl_owner.len);
++ READ_BUF(rlockowner->rl_owner.len);
++ READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len);
++
++ DECODE_TAIL;
++}
++
++static int
+ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
+ {
+ DECODE_HEAD;
+@@ -1043,6 +1119,13 @@ nfsd4_decode_compound(struct nfsd4_compo
+ op->opnum = ntohl(*argp->p++);
+
+ switch (op->opnum) {
++ case 2: /* Reserved operation */
++ op->opnum = OP_ILLEGAL;
++ if (argp->minorversion == 0)
++ op->status = nfserr_op_illegal;
++ else
++ op->status = nfserr_minor_vers_mismatch;
++ break;
+ case OP_ACCESS:
+ op->status = nfsd4_decode_access(argp, &op->u.access);
+ break;
+@@ -1136,14 +1219,12 @@ nfsd4_decode_compound(struct nfsd4_compo
+ case OP_WRITE:
+ op->status = nfsd4_decode_write(argp, &op->u.write);
+ break;
++ case OP_RELEASE_LOCKOWNER:
++ op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner);
++ break;
+ default:
+- /*
+- * According to spec, anything greater than OP_WRITE
+- * is treated as OP_WRITE+1 in the response.
+- */
+- if (op->opnum > OP_WRITE)
+- op->opnum = OP_WRITE + 1;
+- op->status = nfserr_notsupp;
++ op->opnum = OP_ILLEGAL;
++ op->status = nfserr_op_illegal;
+ break;
+ }
+
+@@ -1183,10 +1264,10 @@ nfsd4_decode_compound(struct nfsd4_compo
+ } while (0)
+ #define WRITECINFO(c) do { \
+ *p++ = htonl(c.atomic); \
+- *p++ = htonl(c.before_size); \
+- *p++ = htonl(c.before_ctime); \
+- *p++ = htonl(c.after_size); \
+- *p++ = htonl(c.after_ctime); \
++ *p++ = htonl(c.before_ctime_sec); \
++ *p++ = htonl(c.before_ctime_nsec); \
++ *p++ = htonl(c.after_ctime_sec); \
++ *p++ = htonl(c.after_ctime_nsec); \
+ } while (0)
+
+ #define RESERVE_SPACE(nbytes) do { \
+@@ -1209,10 +1290,13 @@ nfsd4_decode_compound(struct nfsd4_compo
+ * "seqid-mutating" NFSv4 operation. This is
+ * where seqids are incremented, and the
+ * replay cache is filled.
++ *
++ * if stateowner != -1 then called with nfs4_lock_state() held
+ */
+
+ #define ENCODE_SEQID_OP_TAIL(stateowner) do { \
+- if (seqid_mutating_err(nfserr) && stateowner) { \
++ if (seqid_mutating_err(nfserr) && stateowner \
++ && ((int)stateowner != -1)) { \
+ if (stateowner->so_confirmed) \
+ stateowner->so_seqid++; \
+ stateowner->so_replay.rp_status = nfserr; \
+@@ -1220,7 +1304,8 @@ nfsd4_decode_compound(struct nfsd4_compo
+ (((char *)(resp)->p - (char *)save)); \
+ memcpy(stateowner->so_replay.rp_buf, save, \
+ stateowner->so_replay.rp_buflen); \
+- } } while(0)
++ } } while(0); \
++ if ((int)stateowner != -1) nfs4_unlock_state();
+
+
+ static u32 nfs4_ftypes[16] = {
+@@ -1239,13 +1324,16 @@ static u32 nfs4_ftypes[16] = {
+ */
+ int
+ nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+- struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval)
++ struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval,
++ struct svc_rqst *rqstp)
+ {
+ u32 bmval0 = bmval[0];
+ u32 bmval1 = bmval[1];
+ struct kstat stat;
+- struct name_ent *owner = NULL;
+- struct name_ent *group = NULL;
++ char owner[IDMAP_NAMESZ];
++ u32 ownerlen = 0;
++ char group[IDMAP_NAMESZ];
++ u32 grouplen = 0;
+ struct svc_fh tempfh;
+ struct kstatfs statfs;
+ int buflen = *countp << 2;
+@@ -1254,6 +1342,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
+ u64 dummy64;
+ u32 *p = buffer;
+ int status;
++ struct nfs4_acl *acl = NULL;
+
+ BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1);
+ BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0);
+@@ -1277,15 +1366,30 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
+ fhp = &tempfh;
+ }
+ if (bmval1 & FATTR4_WORD1_OWNER) {
+- status = name_get_user(stat.uid, &owner);
+- if (status)
++ int temp = nfsd_map_uid_to_name(rqstp, stat.uid, owner);
++ if (temp < 0) {
++ status = temp;
+ goto out_nfserr;
++ }
++ ownerlen = (unsigned) temp;
+ }
+ if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+- status = name_get_group(stat.gid, &group);
+- if (status)
++ int temp = nfsd_map_gid_to_name(rqstp, stat.gid, group);
++ if (temp < 0) {
++ status = temp;
++ goto out_nfserr;
++ }
++ grouplen = (unsigned) temp;
++ }
++#ifdef CONFIG_NFS_V4_ACL
++ if (bmval0 & FATTR4_WORD0_ACL) {
++ status = nfsd4_get_nfs4_acl(rqstp, dentry, &acl);
++ if (status == -EOPNOTSUPP)
++ bmval0 &= ~FATTR4_WORD0_ACL;
++ else if (status < 0)
+ goto out_nfserr;
+ }
++#endif /* CONFIG_NFS_V4_ACL */
+
+ if ((buflen -= 16) < 0)
+ goto out_resource;
+@@ -1317,32 +1421,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
+ }
+ if (bmval0 & FATTR4_WORD0_CHANGE) {
+ /*
+- * XXX: We currently use the inode ctime as the nfsv4 "changeid"
+- * attribute. This violates the spec, which says
+- *
+- * The server may return the object's time_modify attribute
+- * for this attribute, but only if the file system object
+- * can not be updated more frequently than the resolution
+- * of time_modify.
+- *
+- * Since we only have 1-second ctime resolution, this is a pretty
+- * serious violation. Indeed, 1-second ctime resolution is known
+- * to be a problem in practice in the NFSv3 world.
+- *
+- * The real solution to this problem is probably to work on
+- * adding high-resolution mtimes to the VFS layer.
+- *
+- * Note: Started using i_size for the high 32 bits of the changeid.
+- *
+- * Note 2: This _must_ be consistent with the scheme for writing
++ * Note: This _must_ be consistent with the scheme for writing
+ * change_info, so any changes made here must be reflected there
+ * as well. (See xdr4.h:set_change_info() and the WRITECINFO()
+ * macro above.)
+ */
+ if ((buflen -= 8) < 0)
+ goto out_resource;
+- WRITE32(stat.size);
+- WRITE32(stat.mtime.tv_sec); /* AK: nsec dropped? */
++ WRITE32(stat.ctime.tv_sec);
++ WRITE32(stat.ctime.tv_nsec); /* AK: nsec dropped? */
+ }
+ if (bmval0 & FATTR4_WORD0_SIZE) {
+ if ((buflen -= 8) < 0)
+@@ -1387,10 +1474,48 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
+ goto out_resource;
+ WRITE32(0);
+ }
++#ifdef CONFIG_NFS_V4_ACL
++ if (bmval0 & FATTR4_WORD0_ACL) {
++ struct nfs4_ace *ace;
++ struct list_head *h;
++ int alen;
++
++ if (acl == NULL) {
++ if ((buflen -= 4) < 0)
++ goto out_resource;
++
++ WRITE32(0);
++ goto out_acl;
++ }
++
++ alen = acl->naces * 16 + 4;
++
++ list_for_each(h, &acl->ace_head) {
++ ace = list_entry(h, struct nfs4_ace, l_ace);
++ alen += XDR_QUADLEN(ace->wholen) << 2;
++ }
++
++ if ((buflen -= alen) < 0)
++ goto out_resource;
++
++ WRITE32(acl->naces);
++
++ list_for_each(h, &acl->ace_head) {
++ ace = list_entry(h, struct nfs4_ace, l_ace);
++
++ WRITE32(ace->type);
++ WRITE32(ace->flag);
++ WRITE32(ace->access_mask);
++ WRITE32(ace->wholen);
++ WRITEMEM(ace->who, ace->wholen);
++ }
++ }
++out_acl:
++#endif /* CONFIG_NFS_V4_ACL */
+ if (bmval0 & FATTR4_WORD0_ACLSUPPORT) {
+ if ((buflen -= 4) < 0)
+ goto out_resource;
+- WRITE32(0);
++ WRITE32(1);
+ }
+ if (bmval0 & FATTR4_WORD0_CANSETTIME) {
+ if ((buflen -= 4) < 0)
+@@ -1485,20 +1610,18 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
+ WRITE32(stat.nlink);
+ }
+ if (bmval1 & FATTR4_WORD1_OWNER) {
+- int namelen = strlen(owner->name);
+- buflen -= (XDR_QUADLEN(namelen) << 2) + 4;
++ buflen -= (XDR_QUADLEN(ownerlen) << 2) + 4;
+ if (buflen < 0)
+ goto out_resource;
+- WRITE32(namelen);
+- WRITEMEM(owner->name, namelen);
++ WRITE32(ownerlen);
++ WRITEMEM(owner, ownerlen);
+ }
+ if (bmval1 & FATTR4_WORD1_OWNER_GROUP) {
+- int namelen = strlen(group->name);
+- buflen -= (XDR_QUADLEN(namelen) << 2) + 4;
++ buflen -= (XDR_QUADLEN(grouplen) << 2) + 4;
+ if (buflen < 0)
+ goto out_resource;
+- WRITE32(namelen);
+- WRITEMEM(group->name, namelen);
++ WRITE32(grouplen);
++ WRITEMEM(group, grouplen);
+ }
+ if (bmval1 & FATTR4_WORD1_RAWDEV) {
+ if ((buflen -= 8) < 0)
+@@ -1564,12 +1687,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s
+ status = nfs_ok;
+
+ out:
++#ifdef CONFIG_NFS_V4_ACL
++ nfs4_acl_free(acl);
++#endif
+ if (fhp == &tempfh)
+ fh_put(&tempfh);
+- if (owner)
+- name_put(owner);
+- if (group)
+- name_put(group);
+ return status;
+ out_nfserr:
+ status = nfserrno(status);
+@@ -1648,7 +1770,8 @@ nfsd4_encode_dirent(struct readdir_cd *c
+ }
+
+ nfserr = nfsd4_encode_fattr(NULL, exp,
+- dentry, p, &buflen, cd->rd_bmval);
++ dentry, p, &buflen, cd->rd_bmval,
++ cd->rd_rqstp);
+ if (!nfserr) {
+ p += buflen;
+ goto out;
+@@ -1701,7 +1824,7 @@ out:
+ return 0;
+
+ nospc:
+- cd->common.err = nfserr_readdir_nospc;
++ cd->common.err = nfserr_toosmall;
+ return -EINVAL;
+ }
+
+@@ -1771,7 +1894,8 @@ nfsd4_encode_getattr(struct nfsd4_compou
+
+ buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2);
+ nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry,
+- resp->p, &buflen, getattr->ga_bmval);
++ resp->p, &buflen, getattr->ga_bmval,
++ resp->rqstp);
+
+ if (!nfserr)
+ resp->p += buflen;
+@@ -1871,7 +1995,7 @@ nfsd4_encode_open(struct nfsd4_compoundr
+ ENCODE_SEQID_OP_HEAD;
+
+ if (nfserr)
+- return;
++ goto out;
+
+ RESERVE_SPACE(36 + sizeof(stateid_t));
+ WRITE32(open->op_stateid.si_generation);
+@@ -1925,7 +2049,8 @@ nfsd4_encode_open(struct nfsd4_compoundr
+ default:
+ BUG();
+ }
+-
++ /* XXX save filehandle here */
++out:
+ ENCODE_SEQID_OP_TAIL(open->op_stateowner);
+ }
+
+@@ -1995,6 +2120,8 @@ nfsd4_encode_read(struct nfsd4_compoundr
+ read->rd_offset,
+ read->rd_iov, read->rd_vlen,
+ &maxcount);
++ if (nfserr == nfserr_symlink)
++ nfserr = nfserr_inval;
+ if (nfserr)
+ return nfserr;
+ eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size);
+@@ -2052,6 +2179,8 @@ nfsd4_encode_readlink(struct nfsd4_compo
+ * assume that truncation occurred, and return NFS4ERR_RESOURCE.
+ */
+ nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount);
++ if (nfserr == nfserr_isdir)
++ return nfserr_inval;
+ if (nfserr)
+ return nfserr;
+
+@@ -2081,7 +2210,7 @@ nfsd4_encode_readdir(struct nfsd4_compou
+ {
+ int maxcount;
+ loff_t offset;
+- u32 *page;
++ u32 *page, *savep;
+ ENCODE_HEAD;
+
+ if (nfserr)
+@@ -2090,6 +2219,7 @@ nfsd4_encode_readdir(struct nfsd4_compou
+ return nfserr_resource;
+
+ RESERVE_SPACE(8); /* verifier */
++ savep = p;
+
+ /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */
+ WRITE32(0);
+@@ -2107,8 +2237,10 @@ nfsd4_encode_readdir(struct nfsd4_compou
+ * pointer and eof field.
+ */
+ maxcount = (maxcount >> 2) - 4;
+- if (maxcount < 0)
+- return nfserr_readdir_nospc;
++ if (maxcount < 0) {
++ nfserr = nfserr_toosmall;
++ goto err_no_verf;
++ }
+
+ svc_take_page(resp->rqstp);
+ page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]);
+@@ -2122,11 +2254,13 @@ nfsd4_encode_readdir(struct nfsd4_compou
+ &offset,
+ &readdir->common, nfsd4_encode_dirent);
+ if (nfserr == nfs_ok &&
+- readdir->common.err == nfserr_readdir_nospc &&
++ readdir->common.err == nfserr_toosmall &&
+ readdir->buffer == page)
+- nfserr = nfserr_readdir_nospc;
++ nfserr = nfserr_toosmall;
++ if (nfserr == nfserr_symlink)
++ nfserr = nfserr_notdir;
+ if (nfserr)
+- return nfserr;
++ goto err_no_verf;
+
+ if (readdir->offset)
+ xdr_encode_hyper(readdir->offset, offset);
+@@ -2146,6 +2280,10 @@ nfsd4_encode_readdir(struct nfsd4_compou
+ resp->end = resp->p + PAGE_SIZE/4;
+
+ return 0;
++err_no_verf:
++ p = savep;
++ ADJUST_ARGS();
++ return nfserr;
+ }
+
+ static void
+@@ -2237,7 +2375,7 @@ nfsd4_encode_operation(struct nfsd4_comp
+
+ RESERVE_SPACE(8);
+ WRITE32(op->opnum);
+- statp = p++; /* to be backfilled at the end */
++ statp = p++; /* to be backfilled at the end */
+ ADJUST_ARGS();
+
+ switch (op->opnum) {
+@@ -2324,6 +2462,8 @@ nfsd4_encode_operation(struct nfsd4_comp
+ case OP_WRITE:
+ nfsd4_encode_write(resp, op->status, &op->u.write);
+ break;
++ case OP_RELEASE_LOCKOWNER:
++ break;
+ default:
+ break;
+ }
+@@ -2340,6 +2480,8 @@ nfsd4_encode_operation(struct nfsd4_comp
+ *
+ * XDR note: do not encode rp->rp_buflen: the buffer contains the
+ * previously sent already encoded operation.
++ *
++ * called with nfs4_lock_state() held
+ */
+ void
+ nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op)
+@@ -2351,12 +2493,13 @@ nfsd4_encode_replay(struct nfsd4_compoun
+
+ RESERVE_SPACE(8);
+ WRITE32(op->opnum);
+- WRITE32(NFS_OK);
++ *p++ = rp->rp_status; /* already xdr'ed */
+ ADJUST_ARGS();
+
+ RESERVE_SPACE(rp->rp_buflen);
+ WRITEMEM(rp->rp_buf, rp->rp_buflen);
+ ADJUST_ARGS();
++ nfs4_unlock_state();
+ }
+
+ /*
+@@ -2369,6 +2512,24 @@ nfs4svc_encode_voidres(struct svc_rqst *
+ return xdr_ressize_check(rqstp, p);
+ }
+
++void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args)
++{
++ if (args->ops != args->iops) {
++ kfree(args->ops);
++ args->ops = args->iops;
++ }
++ if (args->tmpp) {
++ kfree(args->tmpp);
++ args->tmpp = NULL;
++ }
++ while (args->to_free) {
++ struct tmpbuf *tb = args->to_free;
++ args->to_free = tb->next;
++ tb->release(tb->buf);
++ kfree(tb);
++ }
++}
++
+ int
+ nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundargs *args)
+ {
+@@ -2381,23 +2542,11 @@ nfs4svc_decode_compoundargs(struct svc_r
+ args->tmpp = NULL;
+ args->to_free = NULL;
+ args->ops = args->iops;
++ args->rqstp = rqstp;
+
+ status = nfsd4_decode_compound(args);
+ if (status) {
+- if (args->ops != args->iops) {
+- kfree(args->ops);
+- args->ops = args->iops;
+- }
+- if (args->tmpp) {
+- kfree(args->tmpp);
+- args->tmpp = NULL;
+- }
+- while (args->to_free) {
+- struct tmpbuf *tb = args->to_free;
+- args->to_free = tb->next;
+- kfree(tb->buf);
+- kfree(tb);
+- }
++ nfsd4_release_compoundargs(args);
+ }
+ return !status;
+ }
+diff -puN include/linux/nfsd/state.h~CITI_NFS4_ALL include/linux/nfsd/state.h
+--- linux-2.6.3/include/linux/nfsd/state.h~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfsd/state.h 2004-02-19 16:47:15.000000000 -0500
+@@ -113,6 +113,8 @@ struct nfs4_replay {
+ unsigned int rp_buflen;
+ char *rp_buf;
+ unsigned intrp_allocated;
++ int rp_openfh_len;
++ char rp_openfh[NFS4_FHSIZE];
+ char rp_ibuf[NFSD4_REPLAY_ISIZE];
+ };
+
+@@ -128,12 +130,20 @@ struct nfs4_replay {
+ * so_perfilestate: heads the list of nfs4_stateid (either open or lock)
+ * and is used to ensure no dangling nfs4_stateid references when we
+ * release a stateowner.
++* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when
++* close is called to reap associated byte-range locks
++* so_close_lru: (open) stateowner is placed on this list instead of being
++* reaped (when so_perfilestate is empty) to hold the last close replay.
++* reaped by laundramat thread after lease period.
+ */
+ struct nfs4_stateowner {
+ struct list_head so_idhash; /* hash by so_id */
+ struct list_head so_strhash; /* hash by op_name */
+ struct list_head so_perclient; /* nfs4_client->cl_perclient */
+ struct list_head so_perfilestate; /* list: nfs4_stateid */
++ struct list_head so_perlockowner; /* nfs4_stateid->st_perlockowner */
++ struct list_head so_close_lru; /* tail queue */
++ time_t so_time; /* time of placement on so_close_lru */
+ int so_is_open_owner; /* 1=openowner,0=lockowner */
+ u32 so_id;
+ struct nfs4_client * so_client;
+@@ -164,21 +174,23 @@ struct nfs4_file {
+ * st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry
+ * st_perfile: file_hashtbl[] entry.
+ * st_perfile_state: nfs4_stateowner->so_perfilestate
+-* st_share_access: used only for open stateid
+-* st_share_deny: used only for open stateid
++* st_perlockowner: (open stateid) list of lock nfs4_stateowners
++* st_access_bmap: used only for open stateid
++* st_deny_bmap: used only for open stateid
+ */
+
+ struct nfs4_stateid {
+ struct list_head st_hash;
+ struct list_head st_perfile;
+ struct list_head st_perfilestate;
++ struct list_head st_perlockowner;
+ struct nfs4_stateowner * st_stateowner;
+ struct nfs4_file * st_file;
+ stateid_t st_stateid;
+ struct file st_vfs_file;
+ int st_vfs_set;
+- unsigned int st_share_access;
+- unsigned int st_share_deny;
++ unsigned long st_access_bmap;
++ unsigned long st_deny_bmap;
+ };
+
+ /* flags for preprocess_seqid_op() */
+@@ -187,6 +199,7 @@ struct nfs4_stateid {
+ #define OPEN_STATE 0x00000004
+ #define LOCK_STATE 0x00000008
+ #define RDWR_STATE 0x00000010
++#define CLOSE_STATE 0x00000020
+
+ #define seqid_mutating_err(err) \
+ (((err) != nfserr_stale_clientid) && \
+diff -puN fs/nfsd/Makefile~CITI_NFS4_ALL fs/nfsd/Makefile
+--- linux-2.6.3/fs/nfsd/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/Makefile 2004-02-19 16:47:06.000000000 -0500
+@@ -7,5 +7,5 @@ obj-$(CONFIG_NFSD) += nfsd.o
+ nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \
+ export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o
+ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
+-nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o
++nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o
+ nfsd-objs := $(nfsd-y)
+diff -puN /dev/null fs/nfsd/nfs4idmap.c
+--- /dev/null 2004-01-26 19:20:21.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/nfs4idmap.c 2004-02-19 16:47:06.000000000 -0500
+@@ -0,0 +1,569 @@
++/*
++ * fs/nfsd/nfs4idmap.c
++ *
++ * Mapping of UID/GIDs to name and vice versa.
++ *
++ * Copyright (c) 2002, 2003 The Regents of the University of
++ * Michigan. All rights reserved.
++ *
++ * Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ * 2. Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in the
++ * documentation and/or other materials provided with the distribution.
++ * 3. Neither the name of the University nor the names of its
++ * contributors may be used to endorse or promote products derived
++ * from this software without specific prior written permission.
++ *
++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/init.h>
++
++#include <linux/mm.h>
++#include <linux/utsname.h>
++#include <linux/errno.h>
++#include <linux/string.h>
++#include <linux/sunrpc/clnt.h>
++#include <linux/nfs.h>
++#include <linux/nfs4.h>
++#include <linux/nfs_fs.h>
++#include <linux/nfs_page.h>
++#include <linux/smp_lock.h>
++#include <linux/sunrpc/cache.h>
++#include <linux/nfsd_idmap.h>
++#include <linux/list.h>
++#include <linux/sched.h>
++#include <linux/time.h>
++#include <linux/seq_file.h>
++#include <linux/sunrpc/svcauth.h>
++
++/*
++ * Cache entry
++ */
++
++/*
++ * XXX we know that IDMAP_NAMESZ < PAGE_SIZE, but it's ugly to rely on
++ * that.
++ */
++
++#define IDMAP_TYPE_USER 0
++#define IDMAP_TYPE_GROUP 1
++
++struct ent {
++ struct cache_head h;
++ int type; /* User / Group */
++ uid_t id;
++ char name[IDMAP_NAMESZ];
++ char authname[IDMAP_NAMESZ];
++};
++
++#define DefineSimpleCacheLookupMap(STRUCT, FUNC) \
++ DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \
++ (struct STRUCT *item, int set), /*no setup */, \
++ & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \
++ STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0)
++
++/* Common entry handling */
++
++#define ENT_HASHBITS 8
++#define ENT_HASHMAX (1 << ENT_HASHBITS)
++#define ENT_HASHMASK (ENT_HASHMAX - 1)
++
++static inline void
++ent_init(struct ent *new, struct ent *itm)
++{
++ new->id = itm->id;
++ new->type = itm->type;
++
++ strlcpy(new->name, itm->name, sizeof(new->name));
++ strlcpy(new->authname, itm->authname, sizeof(new->name));
++}
++
++static inline void
++ent_update(struct ent *new, struct ent *itm)
++{
++ ent_init(new, itm);
++}
++
++void
++ent_put(struct cache_head *ch, struct cache_detail *cd)
++{
++ if (cache_put(ch, cd)) {
++ struct ent *map = container_of(ch, struct ent, h);
++ kfree(map);
++ }
++}
++
++/*
++ * ID -> Name cache
++ */
++
++static struct cache_head *idtoname_table[ENT_HASHMAX];
++
++static uint32_t
++idtoname_hash(struct ent *ent)
++{
++ uint32_t hash;
++
++ hash = hash_str(ent->authname, ENT_HASHBITS);
++ hash = hash_long(hash ^ ent->id, ENT_HASHBITS);
++
++ /* Flip LSB for user/group */
++ if (ent->type == IDMAP_TYPE_GROUP)
++ hash ^= 1;
++
++ return hash;
++}
++
++static void
++idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
++ int *blen)
++{
++ struct ent *ent = container_of(ch, struct ent, h);
++ char idstr[11];
++
++ qword_add(bpp, blen, ent->authname);
++ snprintf(idstr, sizeof(idstr), "%d", ent->id);
++ qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user");
++ qword_add(bpp, blen, idstr);
++
++ (*bpp)[-1] = '\n';
++}
++
++static inline int
++idtoname_match(struct ent *a, struct ent *b)
++{
++ return (a->id == b->id && a->type == b->type &&
++ strcmp(a->authname, b->authname) == 0);
++}
++
++static int
++idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
++{
++ struct ent *ent;
++
++ if (h == NULL) {
++ seq_puts(m, "#domain type id [name]\n");
++ return 0;
++ }
++ ent = container_of(h, struct ent, h);
++ seq_printf(m, "%s %s %d", ent->authname,
++ ent->type == IDMAP_TYPE_GROUP ? "group" : "user",
++ ent->id);
++ if (test_bit(CACHE_VALID, &h->flags))
++ seq_printf(m, " %s", ent->name);
++ seq_printf(m, "\n");
++ return 0;
++}
++
++static int idtoname_parse(struct cache_detail *, char *, int);
++static struct ent *idtoname_lookup(struct ent *, int);
++
++struct cache_detail idtoname_cache = {
++ .hash_size = ENT_HASHMAX,
++ .hash_table = idtoname_table,
++ .name = "nfs4.idtoname",
++ .cache_put = ent_put,
++ .cache_request = idtoname_request,
++ .cache_parse = idtoname_parse,
++ .cache_show = idtoname_show,
++};
++
++int
++idtoname_parse(struct cache_detail *cd, char *buf, int buflen)
++{
++ struct ent ent, *res;
++ char *buf1, *bp;
++ int error = -EINVAL;
++
++ if (buf[buflen - 1] != '\n')
++ return (-EINVAL);
++ buf[buflen - 1]= '\0';
++
++ buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if (buf1 == NULL)
++ return (-ENOMEM);
++
++ memset(&ent, 0, sizeof(ent));
++
++ /* Authentication name */
++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
++ goto out;
++ memcpy(ent.authname, buf1, sizeof(ent.authname));
++
++ /* Type */
++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
++ goto out;
++ ent.type = strcmp(buf1, "user") == 0 ?
++ IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
++
++ /* ID */
++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
++ goto out;
++ ent.id = simple_strtoul(buf1, &bp, 10);
++ if (bp == buf1)
++ goto out;
++
++ /* expiry */
++ ent.h.expiry_time = get_expiry(&buf);
++ if (ent.h.expiry_time == 0)
++ goto out;
++
++ /* Name */
++ error = qword_get(&buf, buf1, PAGE_SIZE);
++ if (error == -EINVAL)
++ goto out;
++ if (error == -ENOENT)
++ set_bit(CACHE_NEGATIVE, &ent.h.flags);
++ else {
++ if (error >= IDMAP_NAMESZ) {
++ error = -EINVAL;
++ goto out;
++ }
++ memcpy(ent.name, buf1, sizeof(ent.name));
++ }
++ error = -ENOMEM;
++ if ((res = idtoname_lookup(&ent, 1)) == NULL)
++ goto out;
++
++ ent_put(&res->h, &idtoname_cache);
++
++ error = 0;
++out:
++ kfree(buf1);
++
++ return error;
++}
++
++static DefineSimpleCacheLookupMap(ent, idtoname);
++
++/*
++ * Name -> ID cache
++ */
++
++static struct cache_head *nametoid_table[ENT_HASHMAX];
++
++static inline int
++nametoid_hash(struct ent *ent)
++{
++ return hash_str(ent->name, ENT_HASHBITS);
++}
++
++void
++nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp,
++ int *blen)
++{
++ struct ent *ent = container_of(ch, struct ent, h);
++
++ qword_add(bpp, blen, ent->authname);
++ qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user");
++ qword_add(bpp, blen, ent->name);
++
++ (*bpp)[-1] = '\n';
++}
++
++static inline int
++nametoid_match(struct ent *a, struct ent *b)
++{
++ return (a->type == b->type && strcmp(a->name, b->name) == 0 &&
++ strcmp(a->authname, b->authname) == 0);
++}
++
++static int
++nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h)
++{
++ struct ent *ent;
++
++ if (h == NULL) {
++ seq_puts(m, "#domain type name [id]\n");
++ return 0;
++ }
++ ent = container_of(h, struct ent, h);
++ seq_printf(m, "%s %s %s", ent->authname,
++ ent->type == IDMAP_TYPE_GROUP ? "group" : "user",
++ ent->name);
++ if (test_bit(CACHE_VALID, &h->flags))
++ seq_printf(m, " %d", ent->id);
++ seq_printf(m, "\n");
++ return 0;
++}
++
++static struct ent *nametoid_lookup(struct ent *, int);
++int nametoid_parse(struct cache_detail *, char *, int);
++
++struct cache_detail nametoid_cache = {
++ .hash_size = ENT_HASHMAX,
++ .hash_table = nametoid_table,
++ .name = "nfs4.nametoid",
++ .cache_put = ent_put,
++ .cache_request = nametoid_request,
++ .cache_parse = nametoid_parse,
++ .cache_show = nametoid_show,
++};
++
++int
++nametoid_parse(struct cache_detail *cd, char *buf, int buflen)
++{
++ struct ent ent, *res;
++ char *buf1;
++ int error = -EINVAL;
++
++ if (buf[buflen - 1] != '\n')
++ return (-EINVAL);
++ buf[buflen - 1]= '\0';
++
++ buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if (buf1 == NULL)
++ return (-ENOMEM);
++
++ memset(&ent, 0, sizeof(ent));
++
++ /* Authentication name */
++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
++ goto out;
++ memcpy(ent.authname, buf1, sizeof(ent.authname));
++
++ /* Type */
++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0)
++ goto out;
++ ent.type = strcmp(buf1, "user") == 0 ?
++ IDMAP_TYPE_USER : IDMAP_TYPE_GROUP;
++
++ /* Name */
++ error = qword_get(&buf, buf1, PAGE_SIZE);
++ if (error <= 0 || error >= IDMAP_NAMESZ)
++ goto out;
++ memcpy(ent.name, buf1, sizeof(ent.name));
++
++ /* expiry */
++ ent.h.expiry_time = get_expiry(&buf);
++ if (ent.h.expiry_time == 0)
++ goto out;
++
++ /* ID */
++ error = get_int(&buf, &ent.id);
++ if (error == -EINVAL)
++ goto out;
++ if (error == -ENOENT)
++ set_bit(CACHE_NEGATIVE, &ent.h.flags);
++
++ error = -ENOMEM;
++ if ((res = nametoid_lookup(&ent, 1)) == NULL)
++ goto out;
++
++ ent_put(&res->h, &nametoid_cache);
++ error = 0;
++out:
++ kfree(buf1);
++
++ return (error);
++}
++
++static DefineSimpleCacheLookupMap(ent, nametoid);
++
++/*
++ * Exported API
++ */
++
++void
++nfsd_idmap_init(void)
++{
++ cache_register(&idtoname_cache);
++ cache_register(&nametoid_cache);
++}
++
++void
++nfsd_idmap_shutdown(void)
++{
++ cache_unregister(&idtoname_cache);
++ cache_unregister(&nametoid_cache);
++}
++
++/*
++ * Deferred request handling
++ */
++
++struct idmap_defer_req {
++ struct cache_req req;
++ struct cache_deferred_req deferred_req;
++ wait_queue_head_t waitq;
++ atomic_t count;
++};
++
++static void
++put_mdr(struct idmap_defer_req *mdr)
++{
++ if (atomic_dec_and_test(&mdr->count))
++ kfree(mdr);
++}
++
++static void
++idmap_revisit(struct cache_deferred_req *dreq, int toomany)
++{
++ struct idmap_defer_req *mdr =
++ container_of(dreq, struct idmap_defer_req, deferred_req);
++
++ wake_up(&mdr->waitq);
++ put_mdr(mdr);
++}
++
++static struct cache_deferred_req *
++idmap_defer(struct cache_req *req)
++{
++ struct idmap_defer_req *mdr =
++ container_of(req, struct idmap_defer_req, req);
++
++ mdr->deferred_req.revisit = idmap_revisit;
++ return (&mdr->deferred_req);
++}
++
++static int threads_waiting = 0;
++
++static inline int
++idmap_lookup_wait(struct idmap_defer_req *mdr, wait_queue_t waitq, struct
++ svc_rqst *rqstp) {
++ int ret = -ETIMEDOUT;
++
++ set_task_state(current, TASK_INTERRUPTIBLE);
++ lock_kernel();
++ /* XXX: Does it matter that threads_waiting isn't per-server? */
++ /* Note: BKL prevents races with nfsd_svc and other lookups */
++ if (2 * threads_waiting > rqstp->rq_server->sv_nrthreads)
++ goto out;
++ threads_waiting++;
++ schedule_timeout(10 * HZ);
++ threads_waiting--;
++ ret = 0;
++out:
++ unlock_kernel();
++ remove_wait_queue(&mdr->waitq, &waitq);
++ set_task_state(current, TASK_RUNNING);
++ put_mdr(mdr);
++ return ret;
++}
++
++static int
++idmap_lookup(struct svc_rqst *rqstp,
++ struct ent *(*lookup_fn)(struct ent *, int), struct ent *key,
++ struct cache_detail *detail, struct ent **item)
++{
++ struct idmap_defer_req *mdr;
++ DECLARE_WAITQUEUE(waitq, current);
++ int ret;
++
++ *item = lookup_fn(key, 0);
++ if (!*item)
++ return -ENOMEM;
++ mdr = kmalloc(sizeof(*mdr), GFP_KERNEL);
++ memset(mdr, 0, sizeof(*mdr));
++ init_waitqueue_head(&mdr->waitq);
++ add_wait_queue(&mdr->waitq, &waitq);
++ atomic_set(&mdr->count, 2);
++ mdr->req.defer = idmap_defer;
++ ret = cache_check(detail, &(*item)->h, &mdr->req);
++ if (ret == -EAGAIN) {
++ ret = idmap_lookup_wait(mdr, waitq, rqstp);
++ if (ret)
++ goto out;
++ /* Try again, but don't wait. */
++ *item = lookup_fn(key, 0);
++ ret = -ENOMEM;
++ if (!*item)
++ goto out;
++ ret = -ETIMEDOUT;
++ if (!test_bit(CACHE_VALID, &(*item)->h.flags)) {
++ ent_put(&(*item)->h, detail);
++ goto out;
++ }
++ ret = cache_check(detail, &(*item)->h, NULL);
++ }
++out:
++ return ret;
++}
++
++static int
++idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen,
++ uid_t *id)
++{
++ struct ent *item, key = {
++ .type = type,
++ };
++ int ret;
++
++ if (namelen + 1 > sizeof(key.name))
++ return -EINVAL;
++ memcpy(key.name, name, namelen);
++ key.name[namelen] = '\0';
++ strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname));
++ ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item);
++ if (ret)
++ return ret;
++ *id = item->id;
++ ent_put(&item->h, &nametoid_cache);
++ return 0;
++}
++
++static int
++idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name)
++{
++ struct ent *item, key = {
++ .id = id,
++ .type = type,
++ };
++ int ret;
++
++ strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname));
++ ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item);
++ if (ret)
++ return ret;
++ ret = strlen(item->name);
++ BUG_ON(ret > IDMAP_NAMESZ);
++ memcpy(name, item->name, ret);
++ ent_put(&item->h, &idtoname_cache);
++ return ret;
++}
++
++int
++nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen,
++ __u32 *id)
++{
++ return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id);
++}
++
++int
++nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen,
++ __u32 *id)
++{
++ return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id);
++}
++
++int
++nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
++{
++ return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name);
++}
++
++int
++nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name)
++{
++ return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name);
++}
+diff -puN fs/nfsd/nfsctl.c~CITI_NFS4_ALL fs/nfsd/nfsctl.c
+--- linux-2.6.3/fs/nfsd/nfsctl.c~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/nfsctl.c 2004-02-19 16:47:06.000000000 -0500
+@@ -24,6 +24,7 @@
+ #include <linux/init.h>
+
+ #include <linux/nfs.h>
++#include <linux/nfsd_idmap.h>
+ #include <linux/sunrpc/svc.h>
+ #include <linux/nfsd/nfsd.h>
+ #include <linux/nfsd/cache.h>
+@@ -436,6 +437,9 @@ static int __init init_nfsd(void)
+ nfsd_cache_init(); /* RPC reply cache */
+ nfsd_export_init(); /* Exports table */
+ nfsd_lockd_init(); /* lockd->nfsd callbacks */
++#ifdef CONFIG_NFSD_V4
++ nfsd_idmap_init(); /* Name to ID mapping */
++#endif /* CONFIG_NFSD_V4 */
+ if (proc_mkdir("fs/nfs", 0)) {
+ struct proc_dir_entry *entry;
+ entry = create_proc_entry("fs/nfs/exports", 0, NULL);
+@@ -462,6 +466,9 @@ static void __exit exit_nfsd(void)
+ remove_proc_entry("fs/nfs", NULL);
+ nfsd_stat_shutdown();
+ nfsd_lockd_shutdown();
++#ifdef CONFIG_NFSD_V4
++ nfsd_idmap_shutdown();
++#endif /* CONFIG_NFSD_V4 */
+ unregister_filesystem(&nfsd_fs_type);
+ }
+
+diff -puN fs/nfsd/nfsproc.c~CITI_NFS4_ALL fs/nfsd/nfsproc.c
+--- linux-2.6.3/fs/nfsd/nfsproc.c~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/nfsproc.c 2004-02-19 16:47:06.000000000 -0500
+@@ -585,6 +585,7 @@ nfserrno (int errno)
+ { nfserr_dquot, -EDQUOT },
+ #endif
+ { nfserr_stale, -ESTALE },
++ { nfserr_jukebox, -ETIMEDOUT },
+ { nfserr_dropit, -EAGAIN },
+ { nfserr_dropit, -ENOMEM },
+ { -1, -EIO }
+diff -puN /dev/null include/linux/nfsd_idmap.h
+--- /dev/null 2004-01-26 19:20:21.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfsd_idmap.h 2004-02-19 16:47:06.000000000 -0500
+@@ -0,0 +1,54 @@
++/*
++ * include/linux/nfsd_idmap.h
++ *
++ * Mapping of UID to name and vice versa.
++ *
++ * Copyright (c) 2002, 2003 The Regents of the University of
++ * Michigan. All rights reserved.
++> *
++ * Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ * 2. Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in the
++ * documentation and/or other materials provided with the distribution.
++ * 3. Neither the name of the University nor the names of its
++ * contributors may be used to endorse or promote products derived
++ * from this software without specific prior written permission.
++ *
++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#ifndef LINUX_NFSD_IDMAP_H
++#define LINUX_NFSD_IDMAP_H
++
++#include <linux/in.h>
++#include <linux/sunrpc/svc.h>
++
++/* XXX from linux/nfs_idmap.h */
++#define IDMAP_NAMESZ 128
++
++void nfsd_idmap_init(void);
++void nfsd_idmap_shutdown(void);
++
++int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *);
++int nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *);
++int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *);
++int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *);
++
++#endif /* LINUX_NFSD_IDMAP_H */
+diff -puN include/linux/nfsd/xdr4.h~CITI_NFS4_ALL include/linux/nfsd/xdr4.h
+--- linux-2.6.3/include/linux/nfsd/xdr4.h~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfsd/xdr4.h 2004-02-19 16:47:10.000000000 -0500
+@@ -39,6 +39,8 @@
+ #ifndef _LINUX_NFSD_XDR4_H
+ #define _LINUX_NFSD_XDR4_H
+
++#include <linux/nfs4.h>
++
+ #define NFSD4_MAX_TAGLEN 128
+ #define XDR_LEN(n) (((n) + 3) & ~3)
+
+@@ -54,10 +56,10 @@ typedef struct {
+
+ struct nfsd4_change_info {
+ u32 atomic;
+- u32 before_size;
+- u32 before_ctime;
+- u32 after_size;
+- u32 after_ctime;
++ u32 before_ctime_sec;
++ u32 before_ctime_nsec;
++ u32 after_ctime_sec;
++ u32 after_ctime_nsec;
+ };
+
+ struct nfsd4_access {
+@@ -95,6 +97,7 @@ struct nfsd4_create {
+ u32 cr_bmval[2]; /* request */
+ struct iattr cr_iattr; /* request */
+ struct nfsd4_change_info cr_cinfo; /* response */
++ struct nfs4_acl *cr_acl;
+ };
+ #define cr_linklen u.link.namelen
+ #define cr_linkname u.link.name
+@@ -216,7 +219,7 @@ struct nfsd4_open {
+ u32 op_rflags; /* response */
+ int op_truncate; /* used during processing */
+ struct nfs4_stateowner *op_stateowner; /* used during processing */
+-
++ struct nfs4_acl *op_acl;
+ };
+ #define op_iattr u.iattr
+ #define op_verf u.verf
+@@ -263,6 +266,10 @@ struct nfsd4_readdir {
+ u32 * offset;
+ };
+
++struct nfsd4_release_lockowner {
++ clientid_t rl_clientid;
++ struct xdr_netobj rl_owner;
++};
+ struct nfsd4_readlink {
+ struct svc_rqst *rl_rqstp; /* request */
+ struct svc_fh * rl_fhp; /* request */
+@@ -287,6 +294,7 @@ struct nfsd4_setattr {
+ stateid_t sa_stateid; /* request */
+ u32 sa_bmval[2]; /* request */
+ struct iattr sa_iattr; /* request */
++ struct nfs4_acl *sa_acl;
+ };
+
+ struct nfsd4_setclientid {
+@@ -359,6 +367,7 @@ struct nfsd4_op {
+ struct nfsd4_setclientid_confirm setclientid_confirm;
+ struct nfsd4_verify verify;
+ struct nfsd4_write write;
++ struct nfsd4_release_lockowner release_lockowner;
+ } u;
+ struct nfs4_replay * replay;
+ };
+@@ -373,9 +382,12 @@ struct nfsd4_compoundargs {
+ u32 * tmpp;
+ struct tmpbuf {
+ struct tmpbuf *next;
++ void (*release)(const void *);
+ void *buf;
+ } *to_free;
+-
++
++ struct svc_rqst *rqstp;
++
+ u32 taglen;
+ char * tag;
+ u32 minorversion;
+@@ -404,10 +416,10 @@ set_change_info(struct nfsd4_change_info
+ {
+ BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved);
+ cinfo->atomic = 1;
+- cinfo->before_size = fhp->fh_pre_size;
+- cinfo->before_ctime = fhp->fh_pre_ctime.tv_sec;
+- cinfo->after_size = fhp->fh_post_size;
+- cinfo->after_ctime = fhp->fh_post_ctime.tv_sec;
++ cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec;
++ cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec;
++ cinfo->after_ctime_sec = fhp->fh_post_ctime.tv_sec;
++ cinfo->after_ctime_nsec = fhp->fh_post_ctime.tv_nsec;
+ }
+
+ int nfs4svc_encode_voidres(struct svc_rqst *, u32 *, void *);
+@@ -419,7 +431,7 @@ void nfsd4_encode_operation(struct nfsd4
+ void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op);
+ int nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp,
+ struct dentry *dentry, u32 *buffer, int *countp,
+- u32 *bmval);
++ u32 *bmval, struct svc_rqst *);
+ extern int nfsd4_setclientid(struct svc_rqst *rqstp,
+ struct nfsd4_setclientid *setclid);
+ extern int nfsd4_setclientid_confirm(struct svc_rqst *rqstp,
+@@ -439,6 +451,9 @@ extern int nfsd4_lockt(struct svc_rqst *
+ struct nfsd4_lockt *lockt);
+ extern int nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh,
+ struct nfsd4_locku *locku);
++extern int nfsd4_release_lockowner(struct svc_rqst *rqstp,
++ struct nfsd4_release_lockowner *rlockowner);
++extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *);
+ #endif
+
+ /*
+diff -puN -L include/linux/sunrpc/name_lookup.h include/linux/sunrpc/name_lookup.h~CITI_NFS4_ALL /dev/null
+--- linux-2.6.3/include/linux/sunrpc/name_lookup.h
++++ /dev/null 2004-01-26 19:20:21.000000000 -0500
+@@ -1,38 +0,0 @@
+-
+-/*
+- * map between user/group name and id for a given 'client'
+- */
+-
+-struct name_ent {
+- char name[20];
+-};
+-static inline int name_get_user(int uid, struct name_ent **namep)
+-{
+- struct name_ent *n = kmalloc(sizeof(*n),GFP_KERNEL);
+- if (n) sprintf(n->name, "%d",uid);
+- *namep = n;
+- return n ? 0 : -ENOMEM;
+-}
+-static inline int name_get_group(int uid, struct name_ent **namep)
+-{
+- struct name_ent *n = kmalloc(sizeof(*n),GFP_KERNEL);
+- if (n) sprintf(n->name, "%d",uid);
+- *namep = n;
+- return n ? 0 : -ENOMEM;
+-}
+-static inline int name_get_uid(char *name, int name_len, int *uidp)
+-{
+- *uidp = simple_strtoul(name, NULL, 0);
+- return 0;
+-}
+-
+-static inline int name_get_gid(char *name, int name_len, int *gidp)
+-{
+- *gidp = simple_strtoul(name, NULL, 0);
+- return 0;
+-}
+-
+-static inline void name_put(struct name_ent *ent)
+-{
+- kfree(ent);
+-}
+diff -puN fs/Makefile~CITI_NFS4_ALL fs/Makefile
+--- linux-2.6.3/fs/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500
++++ linux-2.6.3-bfields/fs/Makefile 2004-02-19 16:47:06.000000000 -0500
+@@ -68,6 +68,7 @@ obj-$(CONFIG_NFS_FS) += nfs/
+ obj-$(CONFIG_EXPORTFS) += exportfs/
+ obj-$(CONFIG_NFSD) += nfsd/
+ obj-$(CONFIG_LOCKD) += lockd/
++obj-$(CONFIG_NFS_V4_ACL) += nfs4acl/
+ obj-$(CONFIG_NLS) += nls/
+ obj-$(CONFIG_SYSV_FS) += sysv/
+ obj-$(CONFIG_SMB_FS) += smbfs/
+diff -puN /dev/null fs/nfs4acl/acl.c
+--- /dev/null 2004-01-26 19:20:21.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs4acl/acl.c 2004-02-19 16:47:06.000000000 -0500
+@@ -0,0 +1,921 @@
++/*
++ * fs/nfs4acl/acl.c
++ *
++ * Common NFSv4 ACL handling code.
++ *
++ * Copyright (c) 2002, 2003 The Regents of the University of Michigan.
++ * All rights reserved.
++ *
++ * Marius Aamodt Eriksen <marius@umich.edu>
++ * Jeff Sedlak <jsedlak@umich.edu>
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ * 2. Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in the
++ * documentation and/or other materials provided with the distribution.
++ * 3. Neither the name of the University nor the names of its
++ * contributors may be used to endorse or promote products derived
++ * from this software without specific prior written permission.
++ *
++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/types.h>
++#include <linux/fs.h>
++#include <linux/nfs_fs.h>
++#include <linux/posix_acl.h>
++#include <linux/nfs4.h>
++#include <linux/nfs4_acl.h>
++
++#define NFS4_READ_MODE (NFS4_ACE_READ_DATA | NFS4_ACE_READ_NAMED_ATTRS)
++#define NFS4_WRITE_MODE (NFS4_ACE_WRITE_DATA | NFS4_ACE_WRITE_NAMED_ATTRS | NFS4_ACE_APPEND_DATA | NFS4_ACE_DELETE_CHILD)
++#define NFS4_EXECUTE_MODE NFS4_ACE_EXECUTE
++#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL)
++#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL)
++
++#define MASK_EQUAL(mask1, mask2) \
++ ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) )
++
++static u32
++mask_from_posix(unsigned short perm, int owner)
++{
++ int mask = NFS4_ANYONE_MODE;
++ if (owner)
++ mask |= NFS4_OWNER_MODE;
++ if (perm & ACL_READ)
++ mask |= NFS4_READ_MODE;
++ if (perm & ACL_WRITE)
++ mask |= NFS4_WRITE_MODE;
++ if (perm & ACL_EXECUTE)
++ mask |= NFS4_EXECUTE_MODE;
++ return mask;
++}
++
++static int
++mode_from_nfs4(u32 perm, unsigned short *mode, int owner)
++{
++ /* XXX we might also want to ignore DELETE_CHILD on non-directories */
++ /* XXX also add special interpretation to EXECUTE on directories */
++ u32 ignore = NFS4_ACE_SYNCHRONIZE;
++
++ *mode = 0;
++ if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE)
++ *mode |= ACL_READ;
++ if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE)
++ *mode |= ACL_WRITE;
++ if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE)
++ *mode |= ACL_EXECUTE;
++ if (!MASK_EQUAL(ignore|perm, ignore|mask_from_posix(*mode, owner)))
++ return -EINVAL;
++ return 0;
++}
++
++struct ace_container {
++ struct nfs4_ace *ace;
++ struct list_head ace_l;
++};
++
++static short ace2type(struct nfs4_ace *);
++static int _posix_to_nfsv4_one(struct nfs4_acl_idmapper *, void *idarg, struct posix_acl *, struct nfs4_acl *, int);
++static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl_idmapper *, void *idarg, struct nfs4_acl *);
++
++struct nfs4_acl *
++nfs4_acl_posix_to_nfsv4(struct nfs4_acl_idmapper *idmapper, void *idarg,
++ struct posix_acl *pacl, struct posix_acl *dpacl)
++{
++ struct nfs4_acl *acl;
++ int error = -EINVAL;
++
++ if ((pacl != NULL &&
++ (posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) ||
++ (dpacl != NULL &&
++ (posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0)))
++ goto out_err;
++
++ acl = nfs4_acl_new();
++ if (acl == NULL) {
++ error = -ENOMEM;
++ goto out_err;
++ }
++
++ if (pacl != NULL) {
++ error = _posix_to_nfsv4_one(idmapper, idarg, pacl, acl, 0);
++ if (error < 0)
++ goto out_acl;
++ }
++
++ if (dpacl != NULL) {
++ error = _posix_to_nfsv4_one(idmapper, idarg, dpacl, acl,
++ NFS4_ACE_FILE_INHERIT_ACE |
++ NFS4_ACE_DIRECTORY_INHERIT_ACE |
++ NFS4_ACE_INHERIT_ONLY_ACE);
++ if (error < 0)
++ goto out_acl;
++ }
++
++ return acl;
++
++out_acl:
++ nfs4_acl_free(acl);
++out_err:
++ acl = ERR_PTR(error);
++
++ return acl;
++}
++
++static int
++nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, char *owner,
++ int owner_len)
++{
++ int error;
++
++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
++ eflag, mask, owner, owner_len);
++ if (error < 0)
++ return error;
++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
++ eflag, ~mask, owner, owner_len);
++ return error;
++}
++
++/* We assume the acl has been verified with posix_acl_valid. */
++static int
++_posix_to_nfsv4_one(struct nfs4_acl_idmapper *idmapper, void *idarg,
++ struct posix_acl *pacl, struct nfs4_acl *acl, int eflag)
++{
++ struct posix_acl_entry *pa, *pe, *group_owner_entry;
++ int error = -EINVAL;
++ u32 mask, mask_mask;
++ char xname[IDMAP_NAMESZ];
++ int xnamelen;
++
++ BUG_ON(pacl->a_count < 3);
++ pe = pacl->a_entries + pacl->a_count;
++ pa = pe - 2; /* if mask entry exists, it's second from the last. */
++ if (pa->e_tag == ACL_MASK)
++ mask_mask = ~mask_from_posix(pa->e_perm, 0);
++ else
++ mask_mask = 0;
++
++ pa = pacl->a_entries;
++ BUG_ON(pa->e_tag != ACL_USER_OBJ);
++ mask = mask_from_posix(pa->e_perm, 1);
++ error = nfs4_acl_add_pair(acl, eflag, mask, "OWNER@",
++ sizeof("OWNER@") - 1);
++ if (error < 0)
++ goto out;
++ pa++;
++
++ while (pa->e_tag == ACL_USER) {
++ mask = mask_from_posix(pa->e_perm, 0);
++ error = idmapper->uid2name(idarg, pa->e_id, xname);
++ if (error < 0)
++ goto out;
++ xnamelen = error;
++
++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
++ eflag, mask_mask, xname, xnamelen);
++ if (error < 0)
++ goto out;
++
++
++ error = nfs4_acl_add_pair(acl, eflag, mask, xname, xnamelen);
++ if (error < 0)
++ goto out;
++ pa++;
++ }
++
++ /* In the case of groups, we apply allow ACEs first, then deny ACEs,
++ * since a user can be in more than one group. */
++
++ /* allow ACEs */
++
++ if (pacl->a_count > 3) {
++ BUG_ON(pa->e_tag != ACL_GROUP_OBJ);
++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
++ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask,
++ "GROUP@", sizeof("GROUP@") - 1);
++ if (error < 0)
++ goto out;
++ }
++ group_owner_entry = pa;
++ mask = mask_from_posix(pa->e_perm, 0);
++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
++ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask,
++ "GROUP@", sizeof("GROUP@") - 1);
++ if (error < 0)
++ goto out;
++ pa++;
++
++ while (pa->e_tag == ACL_GROUP) {
++ mask = mask_from_posix(pa->e_perm, 0);
++ error = idmapper->gid2name(idarg, pa->e_id, xname);
++ if (error < 0)
++ goto out;
++ xnamelen = error;
++
++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
++ NFS4_ACE_IDENTIFIER_GROUP | eflag,
++ mask_mask, xname, xnamelen);
++ if (error < 0)
++ goto out;
++
++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE,
++ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, xname, xnamelen);
++ if (error < 0)
++ goto out;
++ pa++;
++ }
++
++ /* deny ACEs */
++
++ pa = group_owner_entry;
++ mask = mask_from_posix(pa->e_perm, 0);
++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
++ NFS4_ACE_IDENTIFIER_GROUP | eflag,
++ ~mask, "GROUP@", sizeof("GROUP@") - 1);
++ if (error < 0)
++ goto out;
++ pa++;
++ while (pa->e_tag == ACL_GROUP) {
++ mask = mask_from_posix(pa->e_perm, 0);
++ error = idmapper->gid2name(idarg, pa->e_id, xname);
++ if (error < 0)
++ goto out;
++ xnamelen = error;
++
++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE,
++ NFS4_ACE_IDENTIFIER_GROUP | eflag, ~mask, xname, xnamelen);
++ if (error < 0)
++ goto out;
++ pa++;
++ }
++
++ if (pa->e_tag == ACL_MASK)
++ pa++;
++ BUG_ON(pa->e_tag != ACL_OTHER);
++ mask = mask_from_posix(pa->e_perm, 0);
++ error = nfs4_acl_add_pair(acl, eflag, mask, "EVERYONE@",
++ sizeof("EVERYONE@") - 1);
++
++out:
++ return error;
++}
++
++static void
++sort_pacl_range(struct posix_acl *pacl, int start, int end) {
++ int sorted = 0, i;
++ struct posix_acl_entry tmp;
++
++ /* We just do a bubble sort; easy to do in place, and we're not
++ * expecting acl's to be long enough to justify anything more. */
++ while (!sorted) {
++ sorted = 1;
++ for (i = start; i < end; i++) {
++ if (pacl->a_entries[i].e_id
++ > pacl->a_entries[i+1].e_id) {
++ sorted = 0;
++ tmp = pacl->a_entries[i];
++ pacl->a_entries[i] = pacl->a_entries[i+1];
++ pacl->a_entries[i+1] = tmp;
++ }
++ }
++ }
++}
++
++static void
++sort_pacl(struct posix_acl *pacl)
++{
++ /* posix_acl_valid requires that users and groups be in order
++ * by uid/gid. */
++ int i, j;
++
++ if (pacl->a_count <= 4)
++ return; /* no users or groups */
++ i = 1;
++ while (pacl->a_entries[i].e_tag == ACL_USER)
++ i++;
++ sort_pacl_range(pacl, 1, i-1);
++
++ BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ);
++ j = i++;
++ while (pacl->a_entries[j].e_tag == ACL_GROUP)
++ j++;
++ sort_pacl_range(pacl, i, j-1);
++ return;
++}
++
++static int
++write_pace(struct nfs4_ace *ace, struct posix_acl *pacl,
++ struct posix_acl_entry **pace, short tag,
++ struct nfs4_acl_idmapper *idmapper, void *idarg)
++{
++ struct posix_acl_entry *this = *pace;;
++
++ if (*pace == pacl->a_entries + pacl->a_count)
++ return -EINVAL; /* fell off the end */
++ (*pace)++;
++ this->e_tag = tag;
++ if (mode_from_nfs4(ace->access_mask, &this->e_perm,
++ tag == ACL_USER_OBJ))
++ return -EINVAL;
++ switch (tag) {
++ case ACL_USER:
++ return idmapper->name2uid(idarg, ace->who, ace->wholen,
++ &this->e_id);
++ case ACL_GROUP:
++ return idmapper->name2gid(idarg, ace->who, ace->wholen,
++ &this->e_id);
++ default:
++ this->e_id = ACL_UNDEFINED_ID;
++ return 0;
++ }
++}
++
++static struct nfs4_ace *
++get_next_v4_ace(struct list_head **p, struct list_head *head)
++{
++ struct nfs4_ace *ace;
++
++ *p = (*p)->next;
++ if (*p == head)
++ return NULL;
++ ace = list_entry(*p, struct nfs4_ace, l_ace);
++
++ return ace;
++}
++
++int
++nfs4_acl_nfsv4_to_posix(struct nfs4_acl_idmapper *idmapper, void *idarg,
++ struct nfs4_acl *acl, struct posix_acl **pacl,
++ struct posix_acl **dpacl)
++{
++ struct nfs4_acl *dacl;
++ int error = -ENOMEM;
++
++ *pacl = NULL;
++ *dpacl = NULL;
++
++ dacl = nfs4_acl_new();
++ if (dacl == NULL)
++ goto out;
++
++ error = nfs4_acl_split(acl, dacl);
++ if (error < 0)
++ goto out_acl;
++
++ if (pacl != NULL) {
++ if (acl->naces == 0) {
++ error = -ENODATA;
++ goto try_dpacl;
++ }
++
++ *pacl = _nfsv4_to_posix_one(idmapper, idarg, acl);
++ if (IS_ERR(*pacl)) {
++ error = PTR_ERR(*pacl);
++ *pacl = NULL;
++ goto out_acl;
++ }
++ }
++
++try_dpacl:
++ if (dpacl != NULL) {
++ if (dacl->naces == 0) {
++ if (pacl == NULL || *pacl == NULL)
++ error = -ENODATA;
++ goto out_acl;
++ }
++
++ error = 0;
++ *dpacl = _nfsv4_to_posix_one(idmapper, idarg, dacl);
++ if (IS_ERR(*dpacl)) {
++ error = PTR_ERR(*dpacl);
++ *dpacl = NULL;
++ goto out_acl;
++ }
++ }
++
++out_acl:
++ if (error && pacl) {
++ posix_acl_release(*pacl);
++ *pacl = NULL;
++ }
++ nfs4_acl_free(dacl);
++out:
++ return error;
++}
++
++static int
++complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny)
++{
++ return MASK_EQUAL(allow->access_mask, ~deny->access_mask) &&
++ allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE &&
++ deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE &&
++ allow->flag == deny->flag &&
++ allow->wholen == deny->wholen &&
++ memcmp(allow->who, deny->who, allow->wholen) == 0;
++}
++
++static inline int
++user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
++ struct posix_acl *pacl, struct posix_acl_entry **pace,
++ struct nfs4_acl_idmapper *idmapper, void *idarg)
++{
++ int error = -EINVAL;
++ struct nfs4_ace *ace, *ace2;
++
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++ if (ace2type(ace) != ACL_USER_OBJ)
++ goto out;
++ error = write_pace(ace, pacl, pace, ACL_USER_OBJ, idmapper, idarg);
++ if (error < 0)
++ goto out;
++ error = -EINVAL;
++ ace2 = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace2 == NULL)
++ goto out;
++ if (!complementary_ace_pair(ace, ace2))
++ goto out;
++ error = 0;
++out:
++ return error;
++}
++
++static inline int
++users_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
++ struct nfs4_ace **mask_ace,
++ struct posix_acl *pacl, struct posix_acl_entry **pace,
++ struct nfs4_acl_idmapper *idmapper, void *idarg)
++{
++ int error = -EINVAL;
++ struct nfs4_ace *ace, *ace2;
++
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++ while (ace2type(ace) == ACL_USER) {
++ if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
++ goto out;
++ if (*mask_ace &&
++ !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
++ goto out;
++ *mask_ace = ace;
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
++ goto out;
++ error = write_pace(ace, pacl, pace, ACL_USER, idmapper, idarg);
++ if (error < 0)
++ goto out;
++ error = -EINVAL;
++ ace2 = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace2 == NULL)
++ goto out;
++ if (!complementary_ace_pair(ace, ace2))
++ goto out;
++ if ((*mask_ace)->flag != ace2->flag ||
++ ace2->wholen != (*mask_ace)->wholen ||
++ memcmp(ace2->who, (*mask_ace)->who,
++ (*mask_ace)->wholen) != 0)
++ goto out;
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++ }
++ error = 0;
++out:
++ return error;
++}
++
++static inline int
++group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
++ struct nfs4_ace **mask_ace,
++ struct posix_acl *pacl, struct posix_acl_entry **pace,
++ struct nfs4_acl_idmapper *idmapper, void *idarg)
++{
++ int error = -EINVAL;
++ struct nfs4_ace *ace, *ace2;
++ struct ace_container *ac;
++ struct list_head group_l;
++
++ INIT_LIST_HEAD(&group_l);
++ ace = list_entry(*p, struct nfs4_ace, l_ace);
++
++ /* group owner (mask and allow aces) */
++
++ if (pacl->a_count != 3) {
++ /* then the group owner should be preceded by mask */
++ if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE)
++ goto out;
++ if (*mask_ace &&
++ !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
++ goto out;
++ *mask_ace = ace;
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++
++ if ((*mask_ace)->flag != ace->flag ||
++ ace->wholen != (*mask_ace)->wholen ||
++ memcmp(ace->who, (*mask_ace)->who,
++ (*mask_ace)->wholen) != 0)
++ goto out;
++ }
++
++ if (ace2type(ace) != ACL_GROUP_OBJ)
++ goto out;
++
++ ac = kmalloc(sizeof(*ac), GFP_KERNEL);
++ error = -ENOMEM;
++ if (ac == NULL)
++ goto out;
++ ac->ace = ace;
++ list_add_tail(&ac->ace_l, &group_l);
++
++ error = -EINVAL;
++ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
++ goto out;
++
++ error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, idmapper, idarg);
++ if (error < 0)
++ goto out;
++
++ error = -EINVAL;
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++
++ /* groups (mask and allow aces) */
++
++ while (ace2type(ace) == ACL_GROUP) {
++ if (*mask_ace == NULL)
++ goto out;
++
++ if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE ||
++ !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask))
++ goto out;
++ *mask_ace = ace;
++
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++ ac = kmalloc(sizeof(*ac), GFP_KERNEL);
++ error = -ENOMEM;
++ if (ac == NULL)
++ goto out;
++ error = -EINVAL;
++ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE ||
++ ace->wholen != (*mask_ace)->wholen ||
++ memcmp(ace->who, (*mask_ace)->who, (*mask_ace)->wholen) != 0)
++ goto out;
++
++ ac->ace = ace;
++ list_add_tail(&ac->ace_l, &group_l);
++
++ error = write_pace(ace, pacl, pace, ACL_GROUP, idmapper, idarg);
++ if (error < 0)
++ goto out;
++ error = -EINVAL;
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++ }
++
++ /* group owner (deny ace) */
++
++ if (ace2type(ace) != ACL_GROUP_OBJ)
++ goto out;
++ ac = list_entry(group_l.next, struct ace_container, ace_l);
++ ace2 = ac->ace;
++ if (!complementary_ace_pair(ace2, ace))
++ goto out;
++ list_del(group_l.next);
++ kfree(ac);
++
++ /* groups (deny aces) */
++
++ while (!list_empty(&group_l)) {
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++ if (ace2type(ace) != ACL_GROUP)
++ goto out;
++ ac = list_entry(group_l.next, struct ace_container, ace_l);
++ ace2 = ac->ace;
++ if (!complementary_ace_pair(ace2, ace))
++ goto out;
++ list_del(group_l.next);
++ kfree(ac);
++ }
++
++ ace = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace == NULL)
++ goto out;
++ if (ace2type(ace) != ACL_OTHER)
++ goto out;
++ error = 0;
++out:
++ while (!list_empty(&group_l)) {
++ ac = list_entry(group_l.next, struct ace_container, ace_l);
++ list_del(group_l.next);
++ kfree(ac);
++ }
++ return error;
++}
++
++static inline int
++mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
++ struct nfs4_ace **mask_ace,
++ struct posix_acl *pacl, struct posix_acl_entry **pace,
++ struct nfs4_acl_idmapper *idmapper, void *idarg)
++{
++ int error = -EINVAL;
++ struct nfs4_ace *ace;
++
++ ace = list_entry(*p, struct nfs4_ace, l_ace);
++ if (pacl->a_count != 3) {
++ if (*mask_ace == NULL)
++ goto out;
++ (*mask_ace)->access_mask = ~(*mask_ace)->access_mask;
++ write_pace(*mask_ace, pacl, pace, ACL_MASK, idmapper, idarg);
++ }
++ error = 0;
++out:
++ return error;
++}
++
++static inline int
++other_from_v4(struct nfs4_acl *n4acl, struct list_head **p,
++ struct posix_acl *pacl, struct posix_acl_entry **pace,
++ struct nfs4_acl_idmapper *idmapper, void *idarg)
++{
++ int error = -EINVAL;
++ struct nfs4_ace *ace, *ace2;
++
++ ace = list_entry(*p, struct nfs4_ace, l_ace);
++ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE)
++ goto out;
++ error = write_pace(ace, pacl, pace, ACL_OTHER, idmapper, idarg);
++ if (error < 0)
++ goto out;
++ error = -EINVAL;
++ ace2 = get_next_v4_ace(p, &n4acl->ace_head);
++ if (ace2 == NULL)
++ goto out;
++ if (!complementary_ace_pair(ace, ace2))
++ goto out;
++ error = 0;
++out:
++ return error;
++}
++
++static int
++calculate_posix_ace_count(struct nfs4_acl *n4acl)
++{
++ if (n4acl->naces == 6) /* owner, owner group, and other only */
++ return 3;
++ else { /* Otherwise there must be a mask entry. */
++ /* Also, the remaining entries are for named users and
++ * groups, and come in threes (mask, allow, deny): */
++ if ( (n4acl->naces - 7) % 3)
++ return -1;
++ return 4 + (n4acl->naces - 7)/3;
++ }
++}
++
++
++static struct posix_acl *
++_nfsv4_to_posix_one(struct nfs4_acl_idmapper *idmapper, void *idarg, struct nfs4_acl *n4acl)
++{
++ struct posix_acl *pacl;
++ int error = -EINVAL, nace = 0;
++ struct list_head *p;
++ struct nfs4_ace *mask_ace = NULL;
++ struct posix_acl_entry *pace;
++
++ nace = calculate_posix_ace_count(n4acl);
++
++ pacl = posix_acl_alloc(nace, GFP_KERNEL);
++ error = -ENOMEM;
++ if (pacl == NULL)
++ goto out_err;
++
++ pace = &pacl->a_entries[0];
++ p = &n4acl->ace_head;
++
++ error = user_obj_from_v4(n4acl, &p, pacl, &pace, idmapper, idarg);
++ if (error)
++ goto out_acl;
++
++ error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, idmapper,
++ idarg);
++ if (error)
++ goto out_acl;
++
++ error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace,
++ idmapper, idarg);
++ if (error)
++ goto out_acl;
++
++ error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, idmapper, idarg);
++ if (error)
++ goto out_acl;
++ error = other_from_v4(n4acl, &p, pacl, &pace, idmapper, idarg);
++ if (error)
++ goto out_acl;
++
++ error = -EINVAL;
++ if (p->next != &n4acl->ace_head)
++ goto out_acl;
++ if (pace != pacl->a_entries + pacl->a_count)
++ goto out_acl;
++
++ sort_pacl(pacl);
++
++ return pacl;
++out_acl:
++ posix_acl_release(pacl);
++out_err:
++ pacl = ERR_PTR(error);
++ return pacl;
++}
++
++
++struct nfs4_acl *
++nfs4_acl_new(void)
++{
++ struct nfs4_acl *acl;
++
++ if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL)
++ return NULL;
++
++ acl->naces = 0;
++ INIT_LIST_HEAD(&acl->ace_head);
++
++ return acl;
++}
++
++void
++nfs4_acl_free(struct nfs4_acl *acl)
++{
++ struct list_head *h;
++ struct nfs4_ace *ace;
++
++ if (!acl)
++ return;
++
++ while (!list_empty(&acl->ace_head)) {
++ h = acl->ace_head.next;
++ list_del(h);
++ ace = list_entry(h, struct nfs4_ace, l_ace);
++ if (ace->who != NULL)
++ kfree(ace->who);
++ kfree(ace);
++ }
++
++ kfree(acl);
++
++ return;
++}
++
++int
++nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask,
++ char *who, u32 wholen)
++{
++ struct nfs4_ace *ace;
++
++ if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL)
++ return -1;
++
++ ace->type = type;
++ ace->flag = flag;
++ ace->access_mask = access_mask;
++ if (wholen > 0) {
++ if ((ace->who = kmalloc(wholen, GFP_KERNEL)) == NULL)
++ goto fail;
++ memcpy(ace->who, who, wholen);
++ }
++ ace->wholen = wholen;
++
++ list_add_tail(&ace->l_ace, &acl->ace_head);
++
++ return ++acl->naces; /* XXXJBF: why? */
++
++fail:
++ kfree(ace);
++ return -1;
++}
++
++
++int
++nfs4_acl_merge(struct nfs4_acl *fromacl, struct nfs4_acl *withacl)
++{
++ struct nfs4_ace *ace;
++ struct list_head *h;
++
++ if (fromacl == NULL || withacl == NULL)
++ return 0;
++
++ while (!list_empty(&fromacl->ace_head)) {
++ h = fromacl->ace_head.next;
++ list_del(h);
++ ace = list_entry(h, struct nfs4_ace, l_ace);
++ /* XXX */
++ ace->flag |= NFS4_ACE_FILE_INHERIT_ACE |
++ NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE;
++ list_add_tail(&ace->l_ace, &withacl->ace_head);
++ withacl->naces++;
++ }
++
++ return 0;
++}
++
++int
++nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl)
++{
++ struct list_head *h, *n;
++ struct nfs4_ace *ace;
++ int error = 0;
++
++ list_for_each_safe(h, n, &acl->ace_head) {
++ ace = list_entry(h, struct nfs4_ace, l_ace);
++
++ if (!(ace->flag & NFS4_ACE_DIRECTORY_INHERIT_ACE &&
++ ace->flag & NFS4_ACE_FILE_INHERIT_ACE &&
++ ace->flag & NFS4_ACE_INHERIT_ONLY_ACE))
++ continue;
++
++ error = nfs4_acl_add_ace(dacl, ace->type, ace->flag,
++ ace->access_mask, ace->who, ace->wholen) == -1;
++ if (error < 0)
++ goto out;
++
++ list_del(h);
++ if (ace->who != NULL)
++ kfree(ace->who);
++ kfree(ace);
++ acl->naces--;
++ }
++
++out:
++ return error;
++}
++
++static struct {
++ char *string;
++ int stringlen;
++ short type;
++} s2t_map[] = {
++ {
++ .string = "OWNER@",
++ .stringlen = sizeof("OWNER@") - 1,
++ .type = ACL_USER_OBJ
++ },
++ {
++ .string = "GROUP@",
++ .stringlen = sizeof("GROUP@") - 1,
++ .type = ACL_GROUP_OBJ
++ },
++ {
++ .string = "EVERYONE@",
++ .stringlen = sizeof("EVERYONE@") - 1,
++ .type = ACL_OTHER
++ },
++};
++
++static short
++ace2type(struct nfs4_ace *ace)
++{
++ int i;
++
++ if (ace->who == NULL || ace->wholen <= 0)
++ return (0);
++
++ for (i = 0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++)
++ if (s2t_map[i].stringlen == ace->wholen &&
++ strncmp(s2t_map[i].string, ace->who, ace->wholen) == 0)
++ return (s2t_map[i].type);
++
++ return (ace->flag & NFS4_ACE_IDENTIFIER_GROUP ? ACL_GROUP : ACL_USER);
++}
+diff -puN /dev/null fs/nfs4acl/acl_syms.c
+--- /dev/null 2004-01-26 19:20:21.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs4acl/acl_syms.c 2004-02-19 16:47:06.000000000 -0500
+@@ -0,0 +1,51 @@
++/*
++ * fs/nfs4acl/acl_syms.c
++ *
++ * Common NFSv4 ACL handling symbol exports.
++ *
++ * Copyright (c) 2002 The Regents of the University of Michigan.
++ * All rights reserved.
++ *
++ * Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ * 2. Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in the
++ * documentation and/or other materials provided with the distribution.
++ * 3. Neither the name of the University nor the names of its
++ * contributors may be used to endorse or promote products derived
++ * from this software without specific prior written permission.
++ *
++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++
++#include <linux/config.h>
++#include <linux/module.h>
++
++#include <linux/types.h>
++#include <linux/fs.h>
++#include <linux/posix_acl.h>
++#include <linux/nfs4.h>
++#include <linux/nfs4_acl.h>
++
++EXPORT_SYMBOL(nfs4_acl_new);
++EXPORT_SYMBOL(nfs4_acl_free);
++EXPORT_SYMBOL(nfs4_acl_merge);
++EXPORT_SYMBOL(nfs4_acl_split);
++EXPORT_SYMBOL(nfs4_acl_add_ace);
+diff -puN /dev/null fs/nfs4acl/Makefile
+--- /dev/null 2004-01-26 19:20:21.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs4acl/Makefile 2004-02-19 16:47:06.000000000 -0500
+@@ -0,0 +1,3 @@
++obj-$(CONFIG_NFS_V4_ACL) += nfs4acl.o
++
++nfs4acl-objs := acl.o acl_syms.o
+diff -puN /dev/null include/linux/nfs4_acl.h
+--- /dev/null 2004-01-26 19:20:21.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfs4_acl.h 2004-02-19 16:47:06.000000000 -0500
+@@ -0,0 +1,68 @@
++/*
++ * include/linux/nfs4_acl.c
++ *
++ * Common NFSv4 ACL handling definitions.
++ *
++ * Copyright (c) 2002 The Regents of the University of Michigan.
++ * All rights reserved.
++ *
++ * Marius Aamodt Eriksen <marius@umich.edu>
++ *
++ * Redistribution and use in source and binary forms, with or without
++ * modification, are permitted provided that the following conditions
++ * are met:
++ *
++ * 1. Redistributions of source code must retain the above copyright
++ * notice, this list of conditions and the following disclaimer.
++ * 2. Redistributions in binary form must reproduce the above copyright
++ * notice, this list of conditions and the following disclaimer in the
++ * documentation and/or other materials provided with the distribution.
++ * 3. Neither the name of the University nor the names of its
++ * contributors may be used to endorse or promote products derived
++ * from this software without specific prior written permission.
++ *
++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
++ */
++
++#ifndef LINUX_NFS4_ACL_H
++#define LINUX_NFS4_ACL_H
++
++#include <linux/posix_acl.h>
++
++#define NFS4_ACL_TYPE_ACCESS 0
++#define NFS4_ACL_TYPE_DEFAULT 1
++
++/* XXX from include/linux/nfs_idmap.h: */
++#define IDMAP_NAMESZ 128
++
++struct nfs4_acl_idmapper {
++ int (*name2uid)(void *, const char *, size_t len, __u32 *);
++ int (*name2gid)(void *, const char *, size_t len, __u32 *);
++ int (*uid2name)(void *, __u32, char *);
++ int (*gid2name)(void *, __u32, char *);
++};
++
++struct nfs4_acl *nfs4_acl_new(void);
++void nfs4_acl_free(struct nfs4_acl *);
++int nfs4_acl_merge(struct nfs4_acl *, struct nfs4_acl *);
++int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *);
++int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32,
++ u32, char *, u32);
++void nfs4_acl_print(struct nfs4_acl *);
++struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct nfs4_acl_idmapper *, void *,
++ struct posix_acl *, struct posix_acl *);
++int nfs4_acl_nfsv4_to_posix(struct nfs4_acl_idmapper *, void *,
++ struct nfs4_acl *, struct posix_acl **,
++ struct posix_acl **);
++
++#endif /* LINUX_NFS4_ACL_H */
+diff -puN include/linux/nfs4.h~CITI_NFS4_ALL include/linux/nfs4.h
+--- linux-2.6.3/include/linux/nfs4.h~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfs4.h 2004-02-19 16:47:14.000000000 -0500
+@@ -37,14 +37,67 @@
+ #define NFS4_SHARE_ACCESS_BOTH 0x0003
+ #define NFS4_SHARE_DENY_READ 0x0001
+ #define NFS4_SHARE_DENY_WRITE 0x0002
++#define NFS4_SHARE_DENY_BOTH 0x0003
+
+ #define NFS4_SET_TO_SERVER_TIME 0
+ #define NFS4_SET_TO_CLIENT_TIME 1
+
+-#define NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE 0
+-#define NFS4_ACE_ACCESS_DENIED_ACE_TYPE 1
+-#define NFS4_ACE_SYSTEM_AUDIT_ACE_TYPE 2
+-#define NFS4_ACE_SYSTEM_ALARM_ACE_TYPE 3
++#define ACL4_SUPPORT_ALLOW_ACL 0x00000001
++#define ACL4_SUPPORT_DENY_ACL 0x00000002
++#define ACL4_SUPPORT_AUDIT_ACL 0x00000004
++#define ACL4_SUPPORT_ALARM_ACL 0x00000008
++
++#define NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE 0x00000000
++#define NFS4_ACE_ACCESS_DENIED_ACE_TYPE 0x00000001
++#define NFS4_ACE_SYSTEM_AUDIT_ACE_TYPE 0x00000002
++#define NFS4_ACE_SYSTEM_ALARM_ACE_TYPE 0x00000003
++
++#define NFS4_ACE_FILE_INHERIT_ACE 0x00000001
++#define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002
++#define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004
++#define NFS4_ACE_INHERIT_ONLY_ACE 0x00000008
++#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010
++#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020
++#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040
++#define NFS4_ACE_OWNER 0x00000080
++#define NFS4_ACE_GROUP 0x00000100
++#define NFS4_ACE_EVERYONE 0x00000200
++
++#define NFS4_ACE_READ_DATA 0x00000001
++#define NFS4_ACE_LIST_DIRECTORY 0x00000001
++#define NFS4_ACE_WRITE_DATA 0x00000002
++#define NFS4_ACE_ADD_FILE 0x00000002
++#define NFS4_ACE_APPEND_DATA 0x00000004
++#define NFS4_ACE_ADD_SUBDIRECTORY 0x00000004
++#define NFS4_ACE_READ_NAMED_ATTRS 0x00000008
++#define NFS4_ACE_WRITE_NAMED_ATTRS 0x00000010
++#define NFS4_ACE_EXECUTE 0x00000020
++#define NFS4_ACE_DELETE_CHILD 0x00000040
++#define NFS4_ACE_READ_ATTRIBUTES 0x00000080
++#define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100
++#define NFS4_ACE_DELETE 0x00010000
++#define NFS4_ACE_READ_ACL 0x00020000
++#define NFS4_ACE_WRITE_ACL 0x00040000
++#define NFS4_ACE_WRITE_OWNER 0x00080000
++#define NFS4_ACE_SYNCHRONIZE 0x00100000
++#define NFS4_ACE_GENERIC_READ 0x00120081
++#define NFS4_ACE_GENERIC_WRITE 0x00160106
++#define NFS4_ACE_GENERIC_EXECUTE 0x001200A0
++#define NFS4_ACE_MASK_ALL 0x001F01FF
++
++struct nfs4_ace {
++ u32 type;
++ u32 flag;
++ u32 access_mask;
++ char *who;
++ u32 wholen;
++ struct list_head l_ace;
++};
++
++struct nfs4_acl {
++ u32 naces;
++ struct list_head ace_head;
++};
+
+ typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier;
+ typedef struct { char data[16]; } nfs4_stateid;
+@@ -86,6 +139,8 @@ enum nfs_opnum4 {
+ OP_SETCLIENTID_CONFIRM = 36,
+ OP_VERIFY = 37,
+ OP_WRITE = 38,
++ OP_RELEASE_LOCKOWNER = 39,
++ OP_ILLEGAL = 10044,
+ };
+
+ enum nfsstat4 {
+@@ -283,7 +338,6 @@ enum lock_type4 {
+
+ enum {
+ NFSPROC4_CLNT_NULL = 0, /* Unused */
+- NFSPROC4_CLNT_COMPOUND, /* Soon to be unused */
+ NFSPROC4_CLNT_READ,
+ NFSPROC4_CLNT_WRITE,
+ NFSPROC4_CLNT_COMMIT,
+@@ -300,6 +354,22 @@ enum {
+ NFSPROC4_CLNT_LOCK,
+ NFSPROC4_CLNT_LOCKT,
+ NFSPROC4_CLNT_LOCKU,
++ NFSPROC4_CLNT_GETACL,
++ NFSPROC4_CLNT_SETACL,
++ NFSPROC4_CLNT_ACCESS,
++ NFSPROC4_CLNT_GETATTR,
++ NFSPROC4_CLNT_LOOKUP,
++ NFSPROC4_CLNT_GETROOT_HEAD,
++ NFSPROC4_CLNT_GETROOT_PATH,
++ NFSPROC4_CLNT_REMOVE,
++ NFSPROC4_CLNT_RENAME,
++ NFSPROC4_CLNT_LINK,
++ NFSPROC4_CLNT_CREATE,
++ NFSPROC4_CLNT_PATHCONF,
++ NFSPROC4_CLNT_STATFS,
++ NFSPROC4_CLNT_UNLINK,
++ NFSPROC4_CLNT_READLINK,
++ NFSPROC4_CLNT_READDIR,
+ };
+
+ #endif
+diff -puN fs/nfs/nfs4xdr.c~CITI_NFS4_ALL fs/nfs/nfs4xdr.c
+--- linux-2.6.3/fs/nfs/nfs4xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfs/nfs4xdr.c 2004-02-19 16:47:15.000000000 -0500
+@@ -51,6 +51,7 @@
+ #include <linux/nfs4.h>
+ #include <linux/nfs_fs.h>
+ #include <linux/nfs_idmap.h>
++#include <linux/nfs4_acl.h>
+
+ #define NFSDBG_FACILITY NFSDBG_XDR
+
+@@ -81,11 +82,15 @@ static int nfs_stat_to_errno(int);
+ #define decode_putrootfh_maxsz op_decode_hdr_maxsz
+ #define encode_getfh_maxsz op_encode_hdr_maxsz
+ #define decode_getfh_maxsz op_decode_hdr_maxsz + 1 + \
+- (NFS4_FHSIZE >> 2)
++ ((3+NFS4_FHSIZE) >> 2)
+ #define encode_getattr_maxsz op_encode_hdr_maxsz + 3
+-#define nfs4_fattr_bitmap_maxsz 26 + 2 * ((NFS4_MAXNAMLEN +1) >> 2)
++#define nfs4_name_maxsz ( 1 + ((3+NFS4_MAXNAMLEN) >> 2) )
++#define nfs4_fattr_bitmap_maxsz 36 + 2 * nfs4_name_maxsz
+ #define decode_getattr_maxsz op_decode_hdr_maxsz + 3 + \
+ nfs4_fattr_bitmap_maxsz
++#define encode_setattr_maxsz op_decode_hdr_maxsz + 4 + \
++ nfs4_fattr_bitmap_maxsz
++#define decode_setattr_maxsz op_decode_hdr_maxsz + 3
+ #define encode_savefh_maxsz op_encode_hdr_maxsz
+ #define decode_savefh_maxsz op_decode_hdr_maxsz
+ #define encode_restorefh_maxsz op_encode_hdr_maxsz
+@@ -115,6 +120,18 @@ static int nfs_stat_to_errno(int);
+ 3 + (NFS4_VERIFIER_SIZE >> 2)
+ #define decode_setclientid_confirm_maxsz \
+ op_decode_hdr_maxsz
++#define encode_lookup_maxsz op_encode_hdr_maxsz + \
++ 1 + ((3 + NFS_MAXFHSIZE) >> 2)
++#define encode_remove_maxsz op_encode_hdr_maxsz + \
++ nfs4_name_maxsz
++#define encode_rename_maxsz op_encode_hdr_maxsz + \
++ 2 * nfs4_name_maxsz
++#define encode_link_maxsz op_encode_hdr_maxsz + \
++ nfs4_name_maxsz
++#define encode_create_maxsz op_encode_hdr_maxsz + \
++ 2 + 2 * nfs4_name_maxsz + \
++ nfs4_fattr_bitmap_maxsz
++#define decode_create_maxsz op_decode_hdr_maxsz + 8
+
+ #define NFS4_enc_compound_sz 1024 /* XXX: large enough? */
+ #define NFS4_dec_compound_sz 1024 /* XXX: large enough? */
+@@ -126,6 +143,18 @@ static int nfs_stat_to_errno(int);
+ decode_putfh_maxsz + \
+ decode_read_getattr_maxsz + \
+ op_decode_hdr_maxsz + 2
++#define NFS4_enc_readlink_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ op_encode_hdr_maxsz
++#define NFS4_dec_readlink_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz
++#define NFS4_enc_readdir_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ op_encode_hdr_maxsz + 9
++#define NFS4_dec_readdir_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz + 2
+ #define NFS4_enc_write_sz compound_encode_hdr_maxsz + \
+ encode_putfh_maxsz + \
+ encode_pre_write_getattr_maxsz + \
+@@ -255,8 +284,136 @@ static int nfs_stat_to_errno(int);
+ decode_putfh_maxsz + \
+ decode_getattr_maxsz + \
+ op_decode_hdr_maxsz + 4
+-
+-
++#define NFS4_enc_getacl_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_getattr_maxsz
++#define username_maxsz 1 + ((IDMAP_NAMESZ + 3) >> 2)
++#define ace_maxsz 3 + username_maxsz
++#define acl_maxentries (NFS_ACL_MAX_ENTRIES - 3) * 3 + 6
++#define acl_maxsz 1 + (acl_maxentries) * (ace_maxsz)
++#define NFS4_dec_getacl_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz + 3 + 1 + acl_maxsz
++#define NFS4_enc_setacl_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ op_encode_hdr_maxsz + 4 + 1 + acl_maxsz
++#define NFS4_dec_setacl_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ decode_setattr_maxsz
++#define NFS4_enc_access_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_getattr_maxsz + \
++ op_encode_hdr_maxsz + 1
++#define NFS4_dec_access_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ decode_getattr_maxsz + \
++ op_decode_hdr_maxsz + 2
++#define NFS4_enc_getattr_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_getattr_maxsz
++#define NFS4_dec_getattr_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ decode_getattr_maxsz
++#define NFS4_enc_lookup_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_getattr_maxsz + \
++ encode_lookup_maxsz + \
++ encode_getattr_maxsz + \
++ encode_getfh_maxsz
++#define NFS4_dec_lookup_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ decode_getattr_maxsz + \
++ op_decode_hdr_maxsz + \
++ decode_getattr_maxsz + \
++ decode_getfh_maxsz
++#define NFS4_enc_getroot_head_sz compound_encode_hdr_maxsz + \
++ op_encode_hdr_maxsz + 1 + \
++ encode_getattr_maxsz + \
++ encode_getfh_maxsz
++#define NFS4_dec_getroot_head_sz compound_decode_hdr_maxsz + \
++ op_decode_hdr_maxsz + \
++ decode_getattr_maxsz + \
++ decode_getfh_maxsz
++#define NFS4_enc_getroot_path_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_lookup_maxsz + \
++ encode_getattr_maxsz + \
++ encode_getfh_maxsz
++#define NFS4_dec_getroot_path_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz + \
++ decode_getattr_maxsz + \
++ decode_getfh_maxsz
++#define NFS4_enc_remove_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_remove_maxsz + \
++ encode_getattr_maxsz
++#define NFS4_dec_remove_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz + 5 + \
++ decode_getattr_maxsz
++#define NFS4_enc_unlink_sz NFS4_enc_remove_sz
++#define NFS4_dec_unlink_sz NFS4_dec_remove_sz
++#define NFS4_enc_rename_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_savefh_maxsz + \
++ encode_putfh_maxsz + \
++ encode_rename_maxsz + \
++ encode_getattr_maxsz + \
++ encode_restorefh_maxsz + \
++ encode_getattr_maxsz
++#define NFS4_dec_rename_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ decode_savefh_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz + 5 + 5 + \
++ decode_getattr_maxsz + \
++ decode_restorefh_maxsz + \
++ decode_getattr_maxsz
++#define NFS4_enc_link_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_savefh_maxsz + \
++ encode_putfh_maxsz + \
++ encode_link_maxsz + \
++ encode_getattr_maxsz + \
++ encode_restorefh_maxsz + \
++ encode_getattr_maxsz
++#define NFS4_dec_link_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ decode_savefh_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz + 5 + \
++ decode_getattr_maxsz + \
++ decode_restorefh_maxsz + \
++ decode_getattr_maxsz
++#define NFS4_enc_create_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_savefh_maxsz + \
++ encode_create_maxsz + \
++ encode_getattr_maxsz + \
++ encode_getfh_maxsz + \
++ encode_restorefh_maxsz + \
++ encode_getattr_maxsz
++#define NFS4_dec_create_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz + \
++ decode_create_maxsz + \
++ decode_getattr_maxsz + \
++ decode_getfh_maxsz + \
++ op_decode_hdr_maxsz + \
++ decode_getattr_maxsz
++#define NFS4_enc_pathconf_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_getattr_maxsz
++#define NFS4_dec_pathconf_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz + 6
++#define NFS4_enc_statfs_sz compound_encode_hdr_maxsz + \
++ encode_putfh_maxsz + \
++ encode_getattr_maxsz
++#define NFS4_dec_statfs_sz compound_decode_hdr_maxsz + \
++ decode_putfh_maxsz + \
++ op_decode_hdr_maxsz + 12
+
+ static struct {
+ unsigned int mode;
+@@ -333,8 +490,7 @@ encode_compound_hdr(struct xdr_stream *x
+ }
+
+ static int
+-encode_attrs(struct xdr_stream *xdr, struct iattr *iap,
+- struct nfs_server *server)
++encode_attrs(struct xdr_stream *xdr, struct iattr *iap, struct nfs_server *server)
+ {
+ char owner_name[IDMAP_NAMESZ];
+ char owner_group[IDMAP_NAMESZ];
+@@ -352,7 +508,7 @@ encode_attrs(struct xdr_stream *xdr, str
+ * In the worst-case, this would be
+ * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime)
+ * = 36 bytes, plus any contribution from variable-length fields
+- * such as owner/group/acl's.
++ * such as owner/group.
+ */
+ len = 16;
+
+@@ -392,6 +548,7 @@ encode_attrs(struct xdr_stream *xdr, str
+ len += 16;
+ else if (iap->ia_valid & ATTR_MTIME)
+ len += 4;
++
+ RESERVE_SPACE(len);
+
+ /*
+@@ -462,13 +619,13 @@ encode_attrs(struct xdr_stream *xdr, str
+ }
+
+ static int
+-encode_access(struct xdr_stream *xdr, struct nfs4_access *access)
++encode_access(struct xdr_stream *xdr, u32 access)
+ {
+ uint32_t *p;
+
+ RESERVE_SPACE(8);
+ WRITE32(OP_ACCESS);
+- WRITE32(access->ac_req_access);
++ WRITE32(access);
+
+ return 0;
+ }
+@@ -500,37 +657,36 @@ encode_commit(struct xdr_stream *xdr, st
+ }
+
+ static int
+-encode_create(struct xdr_stream *xdr, struct nfs4_create *create,
+- struct nfs_server *server)
++encode_create(struct xdr_stream *xdr, struct nfs4_create_arg *create)
+ {
+ uint32_t *p;
+
+ RESERVE_SPACE(8);
+ WRITE32(OP_CREATE);
+- WRITE32(create->cr_ftype);
++ WRITE32(create->ftype);
+
+- switch (create->cr_ftype) {
++ switch (create->ftype) {
+ case NF4LNK:
+- RESERVE_SPACE(4 + create->cr_textlen);
+- WRITE32(create->cr_textlen);
+- WRITEMEM(create->cr_text, create->cr_textlen);
++ RESERVE_SPACE(4 + create->u.symlink->len);
++ WRITE32(create->u.symlink->len);
++ WRITEMEM(create->u.symlink->name, create->u.symlink->len);
+ break;
+
+ case NF4BLK: case NF4CHR:
+ RESERVE_SPACE(8);
+- WRITE32(create->cr_specdata1);
+- WRITE32(create->cr_specdata2);
++ WRITE32(create->u.device.specdata1);
++ WRITE32(create->u.device.specdata2);
+ break;
+
+ default:
+ break;
+ }
+
+- RESERVE_SPACE(4 + create->cr_namelen);
+- WRITE32(create->cr_namelen);
+- WRITEMEM(create->cr_name, create->cr_namelen);
++ RESERVE_SPACE(4 + create->name->len);
++ WRITE32(create->name->len);
++ WRITEMEM(create->name->name, create->name->len);
+
+- return encode_attrs(xdr, create->cr_attrs, server);
++ return encode_attrs(xdr, create->attrs, create->server);
+ }
+
+ static int
+@@ -558,11 +714,14 @@ encode_getattr_two(struct xdr_stream *xd
+ return 0;
+ }
+
++extern u32 nfs4_fattr_bitmap[];
++extern u32 nfs4_statfs_bitmap[];
++
+ static inline int
+-encode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr)
++encode_getfattr(struct xdr_stream *xdr)
+ {
+- return encode_getattr_two(xdr, getattr->gt_bmval[0],
+- getattr->gt_bmval[1]);
++ return encode_getattr_two(xdr, nfs4_fattr_bitmap[0],
++ nfs4_fattr_bitmap[1]);
+ }
+
+ /*
+@@ -618,14 +777,14 @@ encode_getfh(struct xdr_stream *xdr)
+ }
+
+ static int
+-encode_link(struct xdr_stream *xdr, struct nfs4_link *link)
++encode_link(struct xdr_stream *xdr, struct qstr *name)
+ {
+ uint32_t *p;
+
+- RESERVE_SPACE(8 + link->ln_namelen);
++ RESERVE_SPACE(8 + name->len);
+ WRITE32(OP_LINK);
+- WRITE32(link->ln_namelen);
+- WRITEMEM(link->ln_name, link->ln_namelen);
++ WRITE32(name->len);
++ WRITEMEM(name->name, name->len);
+
+ return 0;
+ }
+@@ -705,15 +864,15 @@ encode_locku(struct xdr_stream *xdr, str
+ }
+
+ static int
+-encode_lookup(struct xdr_stream *xdr, struct nfs4_lookup *lookup)
++encode_lookup(struct xdr_stream *xdr, struct qstr *name)
+ {
+- int len = lookup->lo_name->len;
++ int len = name->len;
+ uint32_t *p;
+
+ RESERVE_SPACE(8 + len);
+ WRITE32(OP_LOOKUP);
+ WRITE32(len);
+- WRITEMEM(lookup->lo_name->name, len);
++ WRITEMEM(name->name, len);
+
+ return 0;
+ }
+@@ -883,7 +1042,7 @@ encode_read(struct xdr_stream *xdr, stru
+ }
+
+ static int
+-encode_readdir(struct xdr_stream *xdr, struct nfs4_readdir *readdir, struct rpc_rqst *req)
++encode_readdir(struct xdr_stream *xdr, struct nfs4_readdir_arg *readdir, struct rpc_rqst *req)
+ {
+ struct rpc_auth *auth = req->rq_task->tk_auth;
+ int replen;
+@@ -891,21 +1050,21 @@ encode_readdir(struct xdr_stream *xdr, s
+
+ RESERVE_SPACE(32+sizeof(nfs4_verifier));
+ WRITE32(OP_READDIR);
+- WRITE64(readdir->rd_cookie);
+- WRITEMEM(readdir->rd_req_verifier.data, sizeof(readdir->rd_req_verifier.data));
+- WRITE32(readdir->rd_count >> 5); /* meaningless "dircount" field */
+- WRITE32(readdir->rd_count);
++ WRITE64(readdir->cookie);
++ WRITEMEM(readdir->req_verifier.data, sizeof(readdir->req_verifier.data));
++ WRITE32(readdir->count >> 5); /* meaningless "dircount" field */
++ WRITE32(readdir->count);
+ WRITE32(2);
+- WRITE32(readdir->rd_bmval[0]);
+- WRITE32(readdir->rd_bmval[1]);
++ WRITE32(FATTR4_WORD0_FILEID);
++ WRITE32(0);
+
+ /* set up reply iovec
+ * toplevel_status + taglen + rescount + OP_PUTFH + status
+ * + OP_READDIR + status + verifer(2) = 9
+ */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2;
+- xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->rd_pages,
+- readdir->rd_pgbase, readdir->rd_count);
++ xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages,
++ readdir->pgbase, readdir->count);
+
+ return 0;
+ }
+@@ -925,37 +1084,37 @@ encode_readlink(struct xdr_stream *xdr,
+ * + OP_READLINK + status = 7
+ */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2;
+- xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->rl_pages, 0, readlink->rl_count);
++ xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->pages, 0, readlink->count);
+
+ return 0;
+ }
+
+ static int
+-encode_remove(struct xdr_stream *xdr, struct nfs4_remove *remove)
++encode_remove(struct xdr_stream *xdr, struct qstr *name)
+ {
+ uint32_t *p;
+
+- RESERVE_SPACE(8 + remove->rm_namelen);
++ RESERVE_SPACE(8 + name->len);
+ WRITE32(OP_REMOVE);
+- WRITE32(remove->rm_namelen);
+- WRITEMEM(remove->rm_name, remove->rm_namelen);
++ WRITE32(name->len);
++ WRITEMEM(name->name, name->len);
+
+ return 0;
+ }
+
+ static int
+-encode_rename(struct xdr_stream *xdr, struct nfs4_rename *rename)
++encode_rename(struct xdr_stream *xdr, struct qstr *oldname, struct qstr *newname)
+ {
+ uint32_t *p;
+
+- RESERVE_SPACE(8 + rename->rn_oldnamelen);
++ RESERVE_SPACE(8 + oldname->len);
+ WRITE32(OP_RENAME);
+- WRITE32(rename->rn_oldnamelen);
+- WRITEMEM(rename->rn_oldname, rename->rn_oldnamelen);
++ WRITE32(oldname->len);
++ WRITEMEM(oldname->name, oldname->len);
+
+- RESERVE_SPACE(4 + rename->rn_newnamelen);
+- WRITE32(rename->rn_newnamelen);
+- WRITEMEM(rename->rn_newname, rename->rn_newnamelen);
++ RESERVE_SPACE(4 + newname->len);
++ WRITE32(newname->len);
++ WRITEMEM(newname->name, newname->len);
+
+ return 0;
+ }
+@@ -1011,6 +1170,39 @@ encode_setattr(struct xdr_stream *xdr, s
+ return 0;
+ }
+
++extern nfs4_stateid zero_stateid;
++
++#ifdef CONFIG_NFS_V4_ACL
++
++static int
++encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg)
++{
++ uint32_t *p, *attrbuflen;
++ struct nfs4_ace *ace;
++ struct nfs4_acl *acl = arg->acl;
++
++ RESERVE_SPACE(4+sizeof(zero_stateid.data));
++ WRITE32(OP_SETATTR);
++ WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data));
++ RESERVE_SPACE(4*4);
++ WRITE32(1);
++ WRITE32(FATTR4_WORD0_ACL);
++ attrbuflen = p++;
++ WRITE32(acl->naces);
++ list_for_each_entry(ace, &acl->ace_head, l_ace) {
++ RESERVE_SPACE(4*4 + (XDR_QUADLEN(ace->wholen) << 2));
++ WRITE32(ace->type);
++ WRITE32(ace->flag);
++ WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL);
++ WRITE32(ace->wholen);
++ WRITEMEM(ace->who, ace->wholen);
++ }
++ *attrbuflen = htonl((char *)p - (char *)attrbuflen - 4);
++ return 0;
++}
++
++#endif /* CONFIG_NFS_V4_ACL */
++
+ static int
+ encode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid *setclientid)
+ {
+@@ -1068,312 +1260,566 @@ encode_write(struct xdr_stream *xdr, str
+
+ return 0;
+ }
+-
+-/* FIXME: this sucks */
+-static int
+-encode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqst *req)
+-{
+- struct compound_hdr hdr = {
+- .taglen = cp->taglen,
+- .tag = cp->tag,
+- .nops = cp->req_nops,
+- };
+- int i, status = 0;
+-
+- encode_compound_hdr(xdr, &hdr);
+-
+- for (i = 0; i < cp->req_nops; i++) {
+- switch (cp->ops[i].opnum) {
+- case OP_ACCESS:
+- status = encode_access(xdr, &cp->ops[i].u.access);
+- break;
+- case OP_CREATE:
+- status = encode_create(xdr, &cp->ops[i].u.create, cp->server);
+- break;
+- case OP_GETATTR:
+- status = encode_getattr(xdr, &cp->ops[i].u.getattr);
+- break;
+- case OP_GETFH:
+- status = encode_getfh(xdr);
+- break;
+- case OP_LINK:
+- status = encode_link(xdr, &cp->ops[i].u.link);
+- break;
+- case OP_LOOKUP:
+- status = encode_lookup(xdr, &cp->ops[i].u.lookup);
+- break;
+- case OP_PUTFH:
+- status = encode_putfh(xdr, cp->ops[i].u.putfh.pf_fhandle);
+- break;
+- case OP_PUTROOTFH:
+- status = encode_putrootfh(xdr);
+- break;
+- case OP_READDIR:
+- status = encode_readdir(xdr, &cp->ops[i].u.readdir, req);
+- break;
+- case OP_READLINK:
+- status = encode_readlink(xdr, &cp->ops[i].u.readlink, req);
+- break;
+- case OP_REMOVE:
+- status = encode_remove(xdr, &cp->ops[i].u.remove);
+- break;
+- case OP_RENAME:
+- status = encode_rename(xdr, &cp->ops[i].u.rename);
+- break;
+- case OP_RESTOREFH:
+- status = encode_restorefh(xdr);
+- break;
+- case OP_SAVEFH:
+- status = encode_savefh(xdr);
+- break;
+- default:
+- BUG();
+- }
+- if (status)
+- return status;
+- }
+-
+- return 0;
+-}
+ /*
+ * END OF "GENERIC" ENCODE ROUTINES.
+ */
+
+-
+ /*
+- * Encode COMPOUND argument
++ * Encode ACCESS request
+ */
+ static int
+-nfs4_xdr_enc_compound(struct rpc_rqst *req, uint32_t *p, struct nfs4_compound *cp)
++nfs4_xdr_enc_access(struct rpc_rqst *req, uint32_t *p, struct nfs4_accessargs *args)
+ {
+ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 3,
++ };
+ int status;
+-
+- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+- status = encode_compound(&xdr, cp, req);
+- cp->timestamp = jiffies;
+- return status;
+-}
+-/*
+- * Encode a CLOSE request
+- */
+-static int
+-nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
+-{
+- struct xdr_stream xdr;
+- struct compound_hdr hdr = {
+- .nops = 2,
+- };
+- int status;
+
+- xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+- encode_compound_hdr(&xdr, &hdr);
+- status = encode_putfh(&xdr, args->fh);
+- if(status)
+- goto out;
+- status = encode_close(&xdr, args);
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fhandle);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
++ if (status)
++ goto out;
++ status = encode_access(&xdr, args->req_access);
+ out:
+- return status;
++ return status;
+ }
+
+ /*
+- * Encode an OPEN request
++ * Encode LOOKUP request
+ */
+ static int
+-nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
++nfs4_xdr_enc_lookup(struct rpc_rqst *req, uint32_t *p, struct nfs4_lookupargs *args)
+ {
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+- .nops = 7,
++ .nops = 5,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+- status = encode_putfh(&xdr, args->fh);
++ status = encode_putfh(&xdr, args->dir_fh);
+ if (status)
+ goto out;
+- status = encode_savefh(&xdr);
++ status = encode_getfattr(&xdr);
+ if (status)
+ goto out;
+- status = encode_open(&xdr, args);
++ status = encode_lookup(&xdr, args->name);
+ if (status)
+ goto out;
+- status = encode_getattr(&xdr, args->f_getattr);
++ status = encode_getfattr(&xdr);
+ if (status)
+ goto out;
+ status = encode_getfh(&xdr);
+- if (status)
+- goto out;
+- status = encode_restorefh(&xdr);
+- if (status)
+- goto out;
+- status = encode_getattr(&xdr, args->d_getattr);
+ out:
+ return status;
+ }
+
+ /*
+- * Encode an OPEN_CONFIRM request
++ * Encode GETROOT_HEAD request
+ */
+ static int
+-nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args)
++nfs4_xdr_enc_getroot_head(struct rpc_rqst *req, uint32_t *p, void *args)
+ {
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+- .nops = 2,
++ .nops = 3,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+- status = encode_putfh(&xdr, args->fh);
+- if(status)
++ status = encode_putrootfh(&xdr);
++ if (status)
+ goto out;
+- status = encode_open_confirm(&xdr, args);
++ status = encode_getfattr(&xdr);
++ if (status)
++ goto out;
++ status = encode_getfh(&xdr);
+ out:
+ return status;
+ }
+
+ /*
+- * Encode an OPEN request
++ * Encode GETROOT_PATH request
+ */
+ static int
+-nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p,
+- struct nfs_open_reclaimargs *args)
++nfs4_xdr_enc_getroot_path(struct rpc_rqst *req, uint32_t *p, struct nfs4_getroot_arg *args)
+ {
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+- .nops = 3,
++ .nops = 4,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+- status = encode_putfh(&xdr, args->fh);
++ status = encode_putfh(&xdr, args->fhandle);
+ if (status)
+ goto out;
+- status = encode_open_reclaim(&xdr, args);
++ status = encode_lookup(&xdr, args->name);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
+ if (status)
+ goto out;
+- status = encode_getattr(&xdr, args->f_getattr);
++ status = encode_getfh(&xdr);
+ out:
+ return status;
+ }
+
+ /*
+- * Encode an OPEN_DOWNGRADE request
++ * Encode REMOVE request
+ */
+ static int
+-nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
++nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, struct nfs4_remove_arg *args)
+ {
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+- .nops = 2,
++ .nops = 3,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+- status = encode_putfh(&xdr, args->fh);
++ status = encode_putfh(&xdr, args->fhandle);
+ if (status)
+ goto out;
+- status = encode_open_downgrade(&xdr, args);
++ status = encode_remove(&xdr, args->name);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
+ out:
+ return status;
+ }
+
+ /*
+- * Encode a LOCK request
++ * Encode UNLINK request
+ */
+ static int
+-nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
++nfs4_xdr_enc_unlink(struct rpc_rqst *req, uint32_t *p, struct nfs4_unlink *args)
+ {
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+- .nops = 2,
++ .nops = 3,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+- if(status)
++ if (status)
+ goto out;
+- status = encode_lock(&xdr, args);
++ status = encode_remove(&xdr, args->name);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
+ out:
+ return status;
+ }
+
+ /*
+- * Encode a LOCKT request
++ * Encode RENAME request
+ */
+ static int
+-nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
++nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, struct nfs4_rename_arg *args)
+ {
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+- .nops = 2,
++ .nops = 7,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+- status = encode_putfh(&xdr, args->fh);
+- if(status)
++ status = encode_putfh(&xdr, args->old_dir);
++ if (status)
+ goto out;
+- status = encode_lockt(&xdr, args);
++ status = encode_savefh(&xdr);
++ if (status)
++ goto out;
++ status = encode_putfh(&xdr, args->new_dir);
++ if (status)
++ goto out;
++ status = encode_rename(&xdr, args->old_name, args->new_name);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
++ if (status)
++ goto out;
++ status = encode_restorefh(&xdr);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
+ out:
+ return status;
+ }
+
+ /*
+- * Encode a LOCKU request
++ * Encode LINK request
+ */
+ static int
+-nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
++nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, struct nfs4_link_arg *args)
+ {
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+- .nops = 2,
++ .nops = 7,
+ };
+ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+ status = encode_putfh(&xdr, args->fh);
+- if(status)
++ if (status)
+ goto out;
+- status = encode_locku(&xdr, args);
++ status = encode_savefh(&xdr);
++ if (status)
++ goto out;
++ status = encode_putfh(&xdr, args->dir_fh);
++ if (status)
++ goto out;
++ status = encode_link(&xdr, args->name);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
++ if (status)
++ goto out;
++ status = encode_restorefh(&xdr);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
+ out:
+ return status;
+ }
+
+ /*
+- * Encode a READ request
++ * Encode CREATE request
+ */
+ static int
+-nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args)
++nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, struct nfs4_create_arg *args)
+ {
+- struct rpc_auth *auth = req->rq_task->tk_auth;
+ struct xdr_stream xdr;
+ struct compound_hdr hdr = {
+- .nops = 3,
++ .nops = 7,
+ };
+- int replen, status;
++ int status;
+
+ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
+ encode_compound_hdr(&xdr, &hdr);
+- status = encode_putfh(&xdr, args->fh);
++ status = encode_putfh(&xdr, args->dir_fh);
+ if (status)
+ goto out;
+- status = encode_read(&xdr, args);
++ status = encode_savefh(&xdr);
+ if (status)
+ goto out;
+- status = encode_read_getattr(&xdr);
+-
+- /* set up reply iovec
+- * toplevel status + taglen=0 + rescount + OP_PUTFH + status
++ status = encode_create(&xdr, args);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
++ if (status)
++ goto out;
++ status = encode_getfh(&xdr);
++ if (status)
++ goto out;
++ status = encode_restorefh(&xdr);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
++out:
++ return status;
++}
++
++/*
++ * Encode GETATTR request
++ */
++static int
++nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, struct nfs_fh *fh)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, fh);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
++ out:
++ return status;
++}
++
++/*
++ * Encode a CLOSE request
++ */
++static int
++nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if(status)
++ goto out;
++ status = encode_close(&xdr, args);
++out:
++ return status;
++}
++
++/*
++ * Encode an OPEN request
++ */
++static int
++nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 7,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if (status)
++ goto out;
++ status = encode_savefh(&xdr);
++ if (status)
++ goto out;
++ status = encode_open(&xdr, args);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
++ if (status)
++ goto out;
++ status = encode_getfh(&xdr);
++ if (status)
++ goto out;
++ status = encode_restorefh(&xdr);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
++out:
++ return status;
++}
++
++/*
++ * Encode an OPEN_CONFIRM request
++ */
++static int
++nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if(status)
++ goto out;
++ status = encode_open_confirm(&xdr, args);
++out:
++ return status;
++}
++
++/*
++ * Encode an OPEN request
++ */
++static int
++nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p,
++ struct nfs_open_reclaimargs *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 3,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if (status)
++ goto out;
++ status = encode_open_reclaim(&xdr, args);
++ if (status)
++ goto out;
++ status = encode_getfattr(&xdr);
++out:
++ return status;
++}
++
++/*
++ * Encode an OPEN_DOWNGRADE request
++ */
++static int
++nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if (status)
++ goto out;
++ status = encode_open_downgrade(&xdr, args);
++out:
++ return status;
++}
++
++/*
++ * Encode a LOCK request
++ */
++static int
++nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if(status)
++ goto out;
++ status = encode_lock(&xdr, args);
++out:
++ return status;
++}
++
++/*
++ * Encode a LOCKT request
++ */
++static int
++nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if(status)
++ goto out;
++ status = encode_lockt(&xdr, args);
++out:
++ return status;
++}
++
++/*
++ * Encode a LOCKU request
++ */
++static int
++nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if(status)
++ goto out;
++ status = encode_locku(&xdr, args);
++out:
++ return status;
++}
++
++/*
++ * Encode a READLINK request
++ */
++static int
++nfs4_xdr_enc_readlink(struct rpc_rqst *req, uint32_t *p, struct nfs4_readlink *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if(status)
++ goto out;
++ status = encode_readlink(&xdr, args, req);
++out:
++ return status;
++}
++
++/*
++ * Encode a READDIR request
++ */
++static int
++nfs4_xdr_enc_readdir(struct rpc_rqst *req, uint32_t *p, struct nfs4_readdir_arg *args)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if(status)
++ goto out;
++ status = encode_readdir(&xdr, args, req);
++out:
++ return status;
++}
++
++/*
++ * Encode a READ request
++ */
++static int
++nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args)
++{
++ struct rpc_auth *auth = req->rq_task->tk_auth;
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 3,
++ };
++ int replen, status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if (status)
++ goto out;
++ status = encode_read(&xdr, args);
++ if (status)
++ goto out;
++ status = encode_read_getattr(&xdr);
++
++ /* set up reply iovec
++ * toplevel status + taglen=0 + rescount + OP_PUTFH + status
+ * + OP_READ + status + eof + datalen = 9
+ */
+ replen = (RPC_REPHDRSIZE + auth->au_rslack +
+@@ -1405,12 +1851,62 @@ nfs4_xdr_enc_setattr(struct rpc_rqst *re
+ status = encode_setattr(&xdr, args, args->server);
+ if(status)
+ goto out;
+- status = encode_getattr(&xdr, args->attr);
++ status = encode_getfattr(&xdr);
++out:
++ return status;
++}
++
++#ifdef CONFIG_NFS_V4_ACL
++
++/*
++ * Encode an SETACL request
++ */
++static int
++nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args)
++
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, args->fh);
++ if(status)
++ goto out;
++ status = encode_setacl(&xdr, args);
+ out:
+ return status;
+ }
+
+ /*
++ * Encode a GETACL request
++ */
++static int
++nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,struct nfs_fh *fhandle)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, fhandle);
++ if (status)
++ goto out;
++ status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0);
++out:
++ return status;
++
++}
++
++#endif /* CONFIG_NFS_V4_ACL */
++
++/*
+ * Encode a WRITE request
+ */
+ static int
+@@ -1487,6 +1983,48 @@ nfs4_xdr_enc_fsinfo(struct rpc_rqst *req
+ }
+
+ /*
++ * a PATHCONF request
++ */
++static int
++nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_fh *fhandle)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, fhandle);
++ if (!status)
++ status = encode_getattr_one(&xdr,FATTR4_WORD0_MAXLINK |
++ FATTR4_WORD0_MAXNAME );
++ return status;
++}
++
++/*
++ * a STATFS request
++ */
++static int
++nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fh *fhandle)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr = {
++ .nops = 2,
++ };
++ int status;
++
++ xdr_init_encode(&xdr, &req->rq_snd_buf, p);
++ encode_compound_hdr(&xdr, &hdr);
++ status = encode_putfh(&xdr, fhandle);
++ if (!status)
++ status = encode_getattr_two(&xdr,nfs4_statfs_bitmap[0],
++ nfs4_statfs_bitmap[1]);
++ return status;
++}
++
++/*
+ * a RENEW request
+ */
+ static int
+@@ -1636,7 +2174,7 @@ decode_change_info(struct xdr_stream *xd
+ }
+
+ static int
+-decode_access(struct xdr_stream *xdr, struct nfs4_access *access)
++decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
+ {
+ uint32_t *p;
+ uint32_t supp, acc;
+@@ -1648,12 +2186,12 @@ decode_access(struct xdr_stream *xdr, st
+ READ_BUF(8);
+ READ32(supp);
+ READ32(acc);
+- if ((supp & ~access->ac_req_access) || (acc & ~supp)) {
++ if ((supp & ~access->req_access) || (acc & ~supp)) {
+ printk(KERN_NOTICE "NFS: server returned bad bits in access call!\n");
+ return -EIO;
+ }
+- *access->ac_resp_supported = supp;
+- *access->ac_resp_access = acc;
++ *access->resp_supported = supp;
++ *access->resp_access = acc;
+ return 0;
+ }
+
+@@ -1686,7 +2224,7 @@ decode_commit(struct xdr_stream *xdr, st
+ }
+
+ static int
+-decode_create(struct xdr_stream *xdr, struct nfs4_create *create)
++decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+ {
+ uint32_t *p;
+ uint32_t bmlen;
+@@ -1695,7 +2233,7 @@ decode_create(struct xdr_stream *xdr, st
+ status = decode_op_hdr(xdr, OP_CREATE);
+ if (status)
+ return status;
+- if ((status = decode_change_info(xdr, create->cr_cinfo)))
++ if ((status = decode_change_info(xdr, cinfo)))
+ return status;
+ READ_BUF(4);
+ READ32(bmlen);
+@@ -1703,17 +2241,144 @@ decode_create(struct xdr_stream *xdr, st
+ return 0;
+ }
+
+-extern uint32_t nfs4_fattr_bitmap[2];
+ extern uint32_t nfs4_fsstat_bitmap[2];
+-extern uint32_t nfs4_pathconf_bitmap[2];
+
+ static int
+-decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr,
++decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat)
++{
++ uint32_t attrlen, bmlen,
++ bmval0 = 0,
++ bmval1 = 0,
++ len = 0;
++ uint32_t *p;
++ int status;
++
++ status = decode_op_hdr(xdr, OP_GETATTR);
++ if (status)
++ return status;
++
++ READ_BUF(4);
++ READ32(bmlen);
++ if (bmlen > 2)
++ goto xdr_error;
++
++ READ_BUF((bmlen << 2) + 4);
++ if (bmlen > 0)
++ READ32(bmval0);
++ if (bmlen > 1)
++ READ32(bmval1);
++ READ32(attrlen);
++
++ if ((bmval0 & ~nfs4_statfs_bitmap[0]) ||
++ (bmval1 & ~nfs4_statfs_bitmap[1])) {
++ dprintk("read_attrs: server returned bad attributes!\n");
++ goto xdr_error;
++ }
++
++ if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
++ READ_BUF(8);
++ len += 8;
++ READ64(fsstat->afiles);
++ dprintk("read_attrs: files_avail=0x%Lx\n", (long long) fsstat->afiles);
++ }
++ if (bmval0 & FATTR4_WORD0_FILES_FREE) {
++ READ_BUF(8);
++ len += 8;
++ READ64(fsstat->ffiles);
++ dprintk("read_attrs: files_free=0x%Lx\n", (long long) fsstat->ffiles);
++ }
++ if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
++ READ_BUF(8);
++ len += 8;
++ READ64(fsstat->tfiles);
++ dprintk("read_attrs: files_tot=0x%Lx\n", (long long) fsstat->tfiles);
++ }
++
++ if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
++ READ_BUF(8);
++ len += 8;
++ READ64(fsstat->abytes);
++ dprintk("read_attrs: savail=0x%Lx\n", (long long) fsstat->abytes);
++ }
++ if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
++ READ_BUF(8);
++ len += 8;
++ READ64(fsstat->fbytes);
++ dprintk("read_attrs: sfree=0x%Lx\n", (long long) fsstat->fbytes);
++ }
++ if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
++ READ_BUF(8);
++ len += 8;
++ READ64(fsstat->tbytes);
++ dprintk("read_attrs: stotal=0x%Lx\n", (long long) fsstat->tbytes);
++ }
++ if (len != attrlen)
++ goto xdr_error;
++
++ DECODE_TAIL;
++}
++
++static int
++decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf)
++{
++ uint32_t bmlen,
++ attrlen = 0,
++ bmval0 = 0,
++ bmval1 = 0,
++ len = 0;
++ uint32_t *p;
++ int status;
++
++ status = decode_op_hdr(xdr, OP_GETATTR);
++ if (status)
++ return status;
++
++ READ_BUF(4);
++ READ32(bmlen);
++ if ( (bmlen < 1) || (bmlen >2) )
++ goto xdr_error;
++ READ_BUF((bmlen << 2) + 4);
++ READ32(bmval0);
++ if (bmval0 & ~(FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME)) {
++ goto out_bad_bitmap;
++ }
++ if (bmlen == 2) {
++ READ32(bmval1);
++ if (bmval1 != 0)
++ goto out_bad_bitmap;
++ }
++
++ READ32(attrlen);
++ if (bmval0 & FATTR4_WORD0_MAXLINK) {
++ READ_BUF(4);
++ len += 4;
++ READ32(pathconf->max_link);
++ dprintk("read_attrs: maxlink=%d\n", pathconf->max_link);
++ }
++ if (bmval0 & FATTR4_WORD0_MAXNAME) {
++ READ_BUF(4);
++ len += 4;
++ READ32(pathconf->max_namelen);
++ dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen);
++ }
++
++ if (len != attrlen)
++ goto xdr_error;
++ return 0;
++
++out_bad_bitmap:
++ printk(KERN_NOTICE "%s: server returned bad attribute bitmap\n",__FUNCTION__);
++ return -EIO;
++
++xdr_error:
++ printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__);
++ return -EIO;
++}
++
++static int
++decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *nfp,
+ struct nfs_server *server)
+ {
+- struct nfs_fattr *nfp = getattr->gt_attrs;
+- struct nfs_fsstat *fsstat = getattr->gt_fsstat;
+- struct nfs_pathconf *pathconf = getattr->gt_pathconf;
+ uint32_t attrlen, dummy32, bmlen,
+ bmval0 = 0,
+ bmval1 = 0,
+@@ -1739,25 +2404,25 @@ decode_getattr(struct xdr_stream *xdr, s
+ READ32(bmval1);
+ READ32(attrlen);
+
+- if ((bmval0 & ~getattr->gt_bmval[0]) ||
+- (bmval1 & ~getattr->gt_bmval[1])) {
++ if ((bmval0 & ~nfs4_fattr_bitmap[0]) ||
++ (bmval1 & ~nfs4_fattr_bitmap[1])) {
+ dprintk("read_attrs: server returned bad attributes!\n");
+ goto xdr_error;
+ }
+- if (nfp) {
+- nfp->bitmap[0] = bmval0;
+- nfp->bitmap[1] = bmval1;
+- }
++
++ BUG_ON(!nfp);
++
++ nfp->bitmap[0] = bmval0;
++ nfp->bitmap[1] = bmval1;
+
+ /*
+ * In case the server doesn't return some attributes,
+ * we initialize them here to some nominal values..
+ */
+- if (nfp) {
+- nfp->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
+- nfp->nlink = 1;
+- nfp->timestamp = jiffies;
+- }
++ nfp->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4;
++ nfp->nlink = 1;
++ nfp->timestamp = jiffies;
++
+ if (bmval0 & FATTR4_WORD0_TYPE) {
+ READ_BUF(4);
+ len += 4;
+@@ -1797,37 +2462,6 @@ decode_getattr(struct xdr_stream *xdr, s
+ READ64(nfp->fileid);
+ dprintk("read_attrs: fileid=%Ld\n", (long long) nfp->fileid);
+ }
+- if (bmval0 & FATTR4_WORD0_FILES_AVAIL) {
+- READ_BUF(8);
+- len += 8;
+- READ64(fsstat->afiles);
+- dprintk("read_attrs: files_avail=0x%Lx\n", (long long) fsstat->afiles);
+- }
+- if (bmval0 & FATTR4_WORD0_FILES_FREE) {
+- READ_BUF(8);
+- len += 8;
+- READ64(fsstat->ffiles);
+- dprintk("read_attrs: files_free=0x%Lx\n", (long long) fsstat->ffiles);
+- }
+- if (bmval0 & FATTR4_WORD0_FILES_TOTAL) {
+- READ_BUF(8);
+- len += 8;
+- READ64(fsstat->tfiles);
+- dprintk("read_attrs: files_tot=0x%Lx\n", (long long) fsstat->tfiles);
+- }
+- if (bmval0 & FATTR4_WORD0_MAXLINK) {
+- READ_BUF(4);
+- len += 4;
+- READ32(pathconf->max_link);
+- dprintk("read_attrs: maxlink=%d\n", pathconf->max_link);
+- }
+- if (bmval0 & FATTR4_WORD0_MAXNAME) {
+- READ_BUF(4);
+- len += 4;
+- READ32(pathconf->max_namelen);
+- dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen);
+- }
+-
+ if (bmval1 & FATTR4_WORD1_MODE) {
+ READ_BUF(4);
+ len += 4;
+@@ -1851,9 +2485,11 @@ decode_getattr(struct xdr_stream *xdr, s
+ }
+ READ_BUF(dummy32);
+ len += (XDR_QUADLEN(dummy32) << 2);
+- if ((status = nfs_map_name_to_uid(server->nfs4_state, (char *)p, dummy32,
+- &nfp->uid)) < 0) {
+- dprintk("read_attrs: name-to-uid mapping failed!\n");
++ status = nfs_map_name_to_uid(server->nfs4_state, (char *)p,
++ dummy32, &nfp->uid);
++ if (status) {
++ dprintk("read_attrs: nfs_map_name_to_uid failed!\n");
++ /* goto out; */
+ nfp->uid = -2;
+ }
+ dprintk("read_attrs: uid=%d\n", (int)nfp->uid);
+@@ -1868,10 +2504,12 @@ decode_getattr(struct xdr_stream *xdr, s
+ }
+ READ_BUF(dummy32);
+ len += (XDR_QUADLEN(dummy32) << 2);
+- if ((status = nfs_map_group_to_gid(server->nfs4_state, (char *)p, dummy32,
+- &nfp->gid)) < 0) {
+- dprintk("read_attrs: group-to-gid mapping failed!\n");
++ status = nfs_map_group_to_gid(server->nfs4_state, (char *)p,
++ dummy32, &nfp->gid);
++ if (status) {
++ dprintk("read_attrs: gss_get_num failed!\n");
+ nfp->gid = -2;
++ /* goto out; */
+ }
+ dprintk("read_attrs: gid=%d\n", (int)nfp->gid);
+ }
+@@ -1882,28 +2520,10 @@ decode_getattr(struct xdr_stream *xdr, s
+ len += 8;
+ READ32(major);
+ READ32(minor);
+- nfp->rdev = MKDEV(major, minor);
+- if (MAJOR(nfp->rdev) != major || MINOR(nfp->rdev) != minor)
+- nfp->rdev = 0;
+- dprintk("read_attrs: rdev=%u:%u\n", major, minor);
+- }
+- if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) {
+- READ_BUF(8);
+- len += 8;
+- READ64(fsstat->abytes);
+- dprintk("read_attrs: savail=0x%Lx\n", (long long) fsstat->abytes);
+- }
+- if (bmval1 & FATTR4_WORD1_SPACE_FREE) {
+- READ_BUF(8);
+- len += 8;
+- READ64(fsstat->fbytes);
+- dprintk("read_attrs: sfree=0x%Lx\n", (long long) fsstat->fbytes);
+- }
+- if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) {
+- READ_BUF(8);
+- len += 8;
+- READ64(fsstat->tbytes);
+- dprintk("read_attrs: stotal=0x%Lx\n", (long long) fsstat->tbytes);
++ nfp->rdev = MKDEV(major, minor);
++ if (MAJOR(nfp->rdev) != major || MINOR(nfp->rdev) != minor)
++ nfp->rdev = 0;
++ dprintk("read_attrs: rdev=%u:%u\n", major, minor);
+ }
+ if (bmval1 & FATTR4_WORD1_SPACE_USED) {
+ READ_BUF(8);
+@@ -1935,6 +2555,88 @@ decode_getattr(struct xdr_stream *xdr, s
+ DECODE_TAIL;
+ }
+
++#ifdef CONFIG_NFS_V4_ACL
++
++static int
++decode_getacl(struct xdr_stream *xdr, struct nfs4_acl **aclp)
++{
++ uint32_t attrlen, bmlen,
++ bmval0 = 0,
++ bmval1 = 0,
++ len = 0;
++ uint32_t *p;
++ int status;
++
++ status = decode_op_hdr(xdr, OP_GETATTR);
++ if (status)
++ return status;
++
++ READ_BUF(4);
++ READ32(bmlen);
++ if (bmlen > 2)
++ goto xdr_error;
++
++ READ_BUF((bmlen << 2) + 4);
++ if (bmlen > 0)
++ READ32(bmval0);
++ if (bmlen > 1)
++ READ32(bmval1);
++ READ32(attrlen);
++
++ if ((bmval0 & ~FATTR4_WORD0_ACL) || (bmval1)) {
++ dprintk("read_attrs: server returned bad attributes!\n");
++ goto xdr_error;
++ }
++ if (bmval0 & FATTR4_WORD0_ACL) {
++ struct nfs4_acl *acl;
++ struct nfs4_ace ace;
++ int i;
++ u_int nace;
++
++ if (aclp == NULL)
++ goto xdr_error; /* XXX MARIUS */
++
++ READ_BUF(4); len += 4;
++ READ32(nace);
++
++ if (nace == 0) {
++ *aclp = NULL;
++ goto out_acl;
++ }
++
++ acl = *aclp = nfs4_acl_new();
++ if (acl == NULL) {
++ status = -ENOMEM;
++ goto out;
++ }
++
++ for (i = 0; i < nace; i++) {
++ READ_BUF(16); len += 16;
++ READ32(ace.type);
++ READ32(ace.flag);
++ READ32(ace.access_mask);
++ ace.access_mask &= NFS4_ACE_MASK_ALL;
++ READ32(ace.wholen);
++ READ_BUF(ace.wholen);
++ len += XDR_QUADLEN(ace.wholen) << 2;
++ status = nfs4_acl_add_ace(acl, ace.type, ace.flag,
++ ace.access_mask, (char *)p, ace.wholen);
++ if (status < 0)
++ goto out;
++ p += XDR_QUADLEN(ace.wholen);
++ }
++ } else if (aclp != NULL)
++ *aclp = NULL;
++out_acl:
++
++ if (len != attrlen)
++ goto xdr_error;
++
++ DECODE_TAIL;
++}
++
++#endif /* CONFIG_NFS_V4_ACL */
++
+ static int
+ decode_change_attr(struct xdr_stream *xdr, uint64_t *change_attr)
+ {
+@@ -2067,6 +2769,77 @@ out_bad_bitmap:
+ return -EIO;
+ }
+
++static int
++decode_putfh(struct xdr_stream *xdr)
++{
++ return decode_op_hdr(xdr, OP_PUTFH);
++}
++
++static int
++decode_setattr(struct xdr_stream *xdr)
++{
++ uint32_t *p;
++ uint32_t bmlen;
++ int status;
++
++
++ status = decode_op_hdr(xdr, OP_SETATTR);
++ if (status)
++ return status;
++ READ_BUF(4);
++ READ32(bmlen);
++ READ_BUF(bmlen << 2);
++ return 0;
++}
++
++#ifdef CONFIG_NFS_V4_ACL
++
++/*
++ * Decode SETACL response
++ */
++static int
++nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_setattr(&xdr);
++out:
++ return status;
++}
++
++/*
++ * Decode GETACL response
++ */
++static int
++nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_acl **res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_getacl(&xdr, res);
++
++out:
++ return status;
++}
++
++#endif /* CONFIG_NFS_V4_ACL */
+
+ static int
+ decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo)
+@@ -2137,9 +2910,8 @@ out_bad_bitmap:
+ }
+
+ static int
+-decode_getfh(struct xdr_stream *xdr, struct nfs4_getfh *getfh)
++decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh)
+ {
+- struct nfs_fh *fh = getfh->gf_fhandle;
+ uint32_t *p;
+ uint32_t len;
+ int status;
+@@ -2161,14 +2933,14 @@ decode_getfh(struct xdr_stream *xdr, str
+ }
+
+ static int
+-decode_link(struct xdr_stream *xdr, struct nfs4_link *link)
++decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+ {
+ int status;
+
+ status = decode_op_hdr(xdr, OP_LINK);
+ if (status)
+ return status;
+- return decode_change_info(xdr, link->ln_cinfo);
++ return decode_change_info(xdr, cinfo);
+ }
+
+ /*
+@@ -2296,12 +3068,6 @@ decode_open_downgrade(struct xdr_stream
+ }
+
+ static int
+-decode_putfh(struct xdr_stream *xdr)
+-{
+- return decode_op_hdr(xdr, OP_PUTFH);
+-}
+-
+-static int
+ decode_putrootfh(struct xdr_stream *xdr)
+ {
+ return decode_op_hdr(xdr, OP_PUTROOTFH);
+@@ -2336,7 +3102,7 @@ decode_read(struct xdr_stream *xdr, stru
+ }
+
+ static int
+-decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir *readdir)
++decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir)
+ {
+ struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+ struct page *page = *rcvbuf->pages;
+@@ -2350,7 +3116,7 @@ decode_readdir(struct xdr_stream *xdr, s
+ if (status)
+ return status;
+ READ_BUF(8);
+- COPYMEM(readdir->rd_resp_verifier.data, 8);
++ COPYMEM(readdir->resp_verifier.data, 8);
+
+ hdrlen = (char *) p - (char *) iov->iov_base;
+ recvd = req->rq_received - hdrlen;
+@@ -2358,9 +3124,9 @@ decode_readdir(struct xdr_stream *xdr, s
+ pglen = recvd;
+ xdr_read_pages(xdr, pglen);
+
+- BUG_ON(pglen + readdir->rd_pgbase > PAGE_CACHE_SIZE);
++ BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE);
+ kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0);
+- end = (uint32_t *) ((char *)p + pglen + readdir->rd_pgbase);
++ end = (uint32_t *) ((char *)p + pglen + readdir->pgbase);
+ entry = p;
+ for (nr = 0; *p++; nr++) {
+ if (p + 3 > end)
+@@ -2421,7 +3187,7 @@ err_unmap:
+ }
+
+ static int
+-decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readlink *readlink)
++decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req)
+ {
+ struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
+ struct iovec *iov = rcvbuf->head;
+@@ -2469,30 +3235,30 @@ decode_restorefh(struct xdr_stream *xdr)
+ }
+
+ static int
+-decode_remove(struct xdr_stream *xdr, struct nfs4_remove *remove)
++decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo)
+ {
+ int status;
+
+ status = decode_op_hdr(xdr, OP_REMOVE);
+ if (status)
+ goto out;
+- status = decode_change_info(xdr, remove->rm_cinfo);
++ status = decode_change_info(xdr, cinfo);
+ out:
+ return status;
+ }
+
+ static int
+-decode_rename(struct xdr_stream *xdr, struct nfs4_rename *rename)
++decode_rename(struct xdr_stream *xdr, struct nfs4_change_info *old_cinfo,
++ struct nfs4_change_info *new_cinfo)
+ {
+ int status;
+
+ status = decode_op_hdr(xdr, OP_RENAME);
+ if (status)
+ goto out;
+- if ((status = decode_change_info(xdr, rename->rn_src_cinfo)))
+- goto out;
+- if ((status = decode_change_info(xdr, rename->rn_dst_cinfo)))
++ if ((status = decode_change_info(xdr, old_cinfo)))
+ goto out;
++ status = decode_change_info(xdr, new_cinfo);
+ out:
+ return status;
+ }
+@@ -2510,23 +3276,6 @@ decode_savefh(struct xdr_stream *xdr)
+ }
+
+ static int
+-decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res)
+-{
+- uint32_t *p;
+- uint32_t bmlen;
+- int status;
+-
+-
+- status = decode_op_hdr(xdr, OP_SETATTR);
+- if (status)
+- return status;
+- READ_BUF(4);
+- READ32(bmlen);
+- READ_BUF(bmlen << 2);
+- return 0;
+-}
+-
+-static int
+ decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp)
+ {
+ uint32_t *p;
+@@ -2566,158 +3315,348 @@ decode_setclientid(struct xdr_stream *xd
+ }
+
+ static int
+-decode_setclientid_confirm(struct xdr_stream *xdr)
++decode_setclientid_confirm(struct xdr_stream *xdr)
++{
++ return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
++}
++
++static int
++decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
++{
++ uint32_t *p;
++ int status;
++
++ status = decode_op_hdr(xdr, OP_WRITE);
++ if (status)
++ return status;
++
++ READ_BUF(16);
++ READ32(res->count);
++ READ32(res->verf->committed);
++ COPYMEM(res->verf->verifier, 8);
++ return 0;
++}
++
++/*
++ * Decode OPEN_DOWNGRADE response
++ */
++static int
++nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_open_downgrade(&xdr, res);
++out:
++ return status;
++}
++
++/*
++ * END OF "GENERIC" DECODE ROUTINES.
++ */
++
++/*
++ * Decode ACCESS response
++ */
++static int
++nfs4_xdr_dec_access(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_accessres *res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->fattr, res->server);
++ if (status)
++ goto out;
++ status = decode_access(&xdr, res);
++out:
++ return status;
++}
++
++/*
++ * Decode LOOKUP response
++ */
++static int
++nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookupres *res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->dirattr, res->server);
++ if (status)
++ goto out;
++ status = decode_lookup(&xdr);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->fattr, res->server);
++ if (status)
++ goto out;
++ status = decode_getfh(&xdr, res->fhandle);
++out:
++ return status;
++}
++
++/*
++ * Decode GETROOT_HEAD response
++ */
++static int
++nfs4_xdr_dec_getroot_head(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getroot_res *res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putrootfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->fattr, res->server);
++ if (status)
++ goto out;
++ status = decode_getfh(&xdr, res->fhandle);
++out:
++ return status;
++}
++
++/*
++ * Decode GETROOT_PATH response
++ */
++static int
++nfs4_xdr_dec_getroot_path(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getroot_res *res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_lookup(&xdr);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->fattr, res->server);
++ if (status)
++ goto out;
++ status = decode_getfh(&xdr, res->fhandle);
++out:
++ return status;
++}
++
++/*
++ * Decode REMOVE response
++ */
++static int
++nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_remove_res *res)
+ {
+- return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM);
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_remove(&xdr, res->dir_cinfo);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->dir_attr, res->server);
++out:
++ return status;
+ }
+
++/*
++ * Decode UNLINK response
++ */
+ static int
+-decode_write(struct xdr_stream *xdr, struct nfs_writeres *res)
++nfs4_xdr_dec_unlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_unlink *res)
+ {
+- uint32_t *p;
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
+ int status;
+-
+- status = decode_op_hdr(xdr, OP_WRITE);
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+- return status;
+-
+- READ_BUF(16);
+- READ32(res->count);
+- READ32(res->verf->committed);
+- COPYMEM(res->verf->verifier, 8);
+- return 0;
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_remove(&xdr, &res->cinfo);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, &res->attrs, res->server);
++out:
++ return status;
+ }
+
+-/* FIXME: this sucks */
++/*
++ * Decode RENAME response
++ */
+ static int
+-decode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqst *req)
++nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_rename_res *res)
+ {
++ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+- struct nfs4_op *op;
+ int status;
+-
+- status = decode_compound_hdr(xdr, &hdr);
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
+ if (status)
+ goto out;
+-
+- cp->toplevel_status = hdr.status;
+-
+- /*
+- * We need this if our zero-copy I/O is going to work. Rumor has
+- * it that the spec will soon mandate it...
+- */
+- if (hdr.taglen != cp->taglen)
+- dprintk("nfs4: non-conforming server returns tag length mismatch!\n");
+-
+- cp->resp_nops = hdr.nops;
+- if (hdr.nops > cp->req_nops) {
+- dprintk("nfs4: resp_nops > req_nops!\n");
+- goto xdr_error;
+- }
+-
+- op = &cp->ops[0];
+- for (cp->nops = 0; cp->nops < cp->resp_nops; cp->nops++, op++) {
+- switch (op->opnum) {
+- case OP_ACCESS:
+- status = decode_access(xdr, &op->u.access);
+- break;
+- case OP_CREATE:
+- status = decode_create(xdr, &op->u.create);
+- break;
+- case OP_GETATTR:
+- status = decode_getattr(xdr, &op->u.getattr, cp->server);
+- break;
+- case OP_GETFH:
+- status = decode_getfh(xdr, &op->u.getfh);
+- break;
+- case OP_LINK:
+- status = decode_link(xdr, &op->u.link);
+- break;
+- case OP_LOOKUP:
+- status = decode_lookup(xdr);
+- break;
+- case OP_PUTFH:
+- status = decode_putfh(xdr);
+- break;
+- case OP_PUTROOTFH:
+- status = decode_putrootfh(xdr);
+- break;
+- case OP_READDIR:
+- status = decode_readdir(xdr, req, &op->u.readdir);
+- break;
+- case OP_READLINK:
+- status = decode_readlink(xdr, req, &op->u.readlink);
+- break;
+- case OP_RESTOREFH:
+- status = decode_restorefh(xdr);
+- break;
+- case OP_REMOVE:
+- status = decode_remove(xdr, &op->u.remove);
+- break;
+- case OP_RENAME:
+- status = decode_rename(xdr, &op->u.rename);
+- break;
+- case OP_SAVEFH:
+- status = decode_savefh(xdr);
+- break;
+- default:
+- BUG();
+- return -EIO;
+- }
+- if (status)
+- break;
+- }
+-
+- DECODE_TAIL;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_savefh(&xdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_rename(&xdr, res->old_cinfo, res->new_cinfo);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->new_fattr, res->server);
++ if (status)
++ goto out;
++ status = decode_restorefh(&xdr);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->old_fattr, res->server);
++out:
++ return status;
+ }
+
+ /*
+- * Decode OPEN_DOWNGRADE response
++ * Decode LINK response
+ */
+ static int
+-nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res)
++nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_link_res *res)
+ {
+- struct xdr_stream xdr;
+- struct compound_hdr hdr;
+- int status;
+-
+- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+- status = decode_compound_hdr(&xdr, &hdr);
+- if (status)
+- goto out;
+- status = decode_putfh(&xdr);
+- if (status)
+- goto out;
+- status = decode_open_downgrade(&xdr, res);
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_savefh(&xdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_link(&xdr, res->dir_cinfo);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->dir_attr, res->server);
++ if (status)
++ goto out;
++ status = decode_restorefh(&xdr);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->fattr, res->server);
+ out:
+- return status;
++ return status;
+ }
+
+ /*
+- * END OF "GENERIC" DECODE ROUTINES.
++ * Decode CREATE response
+ */
++static int
++nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_savefh(&xdr);
++ if (status)
++ goto out;
++ status = decode_create(&xdr,res->dir_cinfo);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->fattr, res->server);
++ if (status)
++ goto out;
++ status = decode_getfh(&xdr, res->fhandle);
++ if (status)
++ goto out;
++ status = decode_restorefh(&xdr);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->dir_attr, res->server);
++out:
++ return status;
++}
+
+ /*
+- * Decode COMPOUND response
++ * Decode GETATTR response
+ */
+ static int
+-nfs4_xdr_dec_compound(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_compound *cp)
++nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getattr_res *res)
+ {
+ struct xdr_stream xdr;
++ struct compound_hdr hdr;
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+- if ((status = decode_compound(&xdr, cp, rqstp)))
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
+ goto out;
+-
+- status = 0;
+- if (cp->toplevel_status)
+- status = -nfs_stat_to_errno(cp->toplevel_status);
+-
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_getfattr(&xdr, res->fattr, res->server);
+ out:
+ return status;
++
+ }
+
++
+ /*
+ * Decode CLOSE response
+ */
+@@ -2748,9 +3687,6 @@ nfs4_xdr_dec_open(struct rpc_rqst *rqstp
+ {
+ struct xdr_stream xdr;
+ struct compound_hdr hdr;
+- struct nfs4_getfh gfh = {
+- .gf_fhandle = &res->fh,
+- };
+ int status;
+
+ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
+@@ -2766,16 +3702,16 @@ nfs4_xdr_dec_open(struct rpc_rqst *rqstp
+ status = decode_open(&xdr, res);
+ if (status)
+ goto out;
+- status = decode_getattr(&xdr, res->f_getattr, res->server);
++ status = decode_getfattr(&xdr, res->f_attr, res->server);
+ if (status)
+ goto out;
+- status = decode_getfh(&xdr, &gfh);
++ status = decode_getfh(&xdr, &res->fh);
+ if (status)
+ goto out;
+ status = decode_restorefh(&xdr);
+ if (status)
+ goto out;
+- status = decode_getattr(&xdr, res->d_getattr, res->server);
++ status = decode_getfattr(&xdr, res->d_attr, res->server);
+ if (status)
+ goto out;
+ out:
+@@ -2824,7 +3760,7 @@ nfs4_xdr_dec_open_reclaim(struct rpc_rqs
+ status = decode_open(&xdr, res);
+ if (status)
+ goto out;
+- status = decode_getattr(&xdr, res->f_getattr, res->server);
++ status = decode_getfattr(&xdr, res->f_attr, res->server);
+ out:
+ return status;
+ }
+@@ -2846,10 +3782,10 @@ nfs4_xdr_dec_setattr(struct rpc_rqst *rq
+ status = decode_putfh(&xdr);
+ if (status)
+ goto out;
+- status = decode_setattr(&xdr, res);
++ status = decode_setattr(&xdr);
+ if (status)
+ goto out;
+- status = decode_getattr(&xdr, res->attr, res->server);
++ status = decode_getfattr(&xdr, res->fattr, res->server);
+ out:
+ return status;
+ }
+@@ -2921,6 +3857,50 @@ out:
+ }
+
+ /*
++ * Decode READLINK response
++ */
++static int
++nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, uint32_t *p, void *res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_readlink(&xdr, rqstp);
++out:
++ return status;
++}
++
++/*
++ * Decode READDIR response
++ */
++static int
++nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_readdir_res *res)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (status)
++ goto out;
++ status = decode_putfh(&xdr);
++ if (status)
++ goto out;
++ status = decode_readdir(&xdr, rqstp, res);
++out:
++ return status;
++}
++
++/*
+ * Decode Read response
+ */
+ static int
+@@ -3033,6 +4013,44 @@ nfs4_xdr_dec_fsinfo(struct rpc_rqst *req
+ }
+
+ /*
++ * PATHCONF request
++ */
++static int
++nfs4_xdr_dec_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_pathconf *pathconf)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (!status)
++ status = decode_putfh(&xdr);
++ if (!status)
++ status = decode_pathconf(&xdr, pathconf);
++ return status;
++}
++
++/*
++ * STATFS request
++ */
++static int
++nfs4_xdr_dec_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fsstat *fsstat)
++{
++ struct xdr_stream xdr;
++ struct compound_hdr hdr;
++ int status;
++
++ xdr_init_decode(&xdr, &req->rq_rcv_buf, p);
++ status = decode_compound_hdr(&xdr, &hdr);
++ if (!status)
++ status = decode_putfh(&xdr);
++ if (!status)
++ status = decode_statfs(&xdr, fsstat);
++ return status;
++}
++
++/*
+ * Decode RENEW response
+ */
+ static int
+@@ -3201,7 +4219,6 @@ nfs_stat_to_errno(int stat)
+ }
+
+ struct rpc_procinfo nfs4_procedures[] = {
+- PROC(COMPOUND, enc_compound, dec_compound),
+ PROC(READ, enc_read, dec_read),
+ PROC(WRITE, enc_write, dec_write),
+ PROC(COMMIT, enc_commit, dec_commit),
+@@ -3218,6 +4235,24 @@ struct rpc_procinfo nfs4_procedures[] =
+ PROC(LOCK, enc_lock, dec_lock),
+ PROC(LOCKT, enc_lockt, dec_lockt),
+ PROC(LOCKU, enc_locku, dec_locku),
++#ifdef CONFIG_NFS_V4_ACL
++ PROC(GETACL, enc_getacl, dec_getacl),
++ PROC(SETACL, enc_setacl, dec_setacl),
++#endif /* CONFIG_NFS_V4_ACL */
++ PROC(ACCESS, enc_access, dec_access),
++ PROC(GETATTR, enc_getattr, dec_getattr),
++ PROC(LOOKUP, enc_lookup, dec_lookup),
++ PROC(GETROOT_HEAD, enc_getroot_head, dec_getroot_head),
++ PROC(GETROOT_PATH, enc_getroot_path, dec_getroot_path),
++ PROC(REMOVE, enc_remove, dec_remove),
++ PROC(RENAME, enc_rename, dec_rename),
++ PROC(LINK, enc_link, dec_link),
++ PROC(CREATE, enc_create, dec_create),
++ PROC(PATHCONF, enc_pathconf, dec_pathconf),
++ PROC(STATFS, enc_statfs, dec_statfs),
++ PROC(UNLINK, enc_unlink, dec_unlink),
++ PROC(READLINK, enc_readlink, dec_readlink),
++ PROC(READDIR, enc_readdir, dec_readdir),
+ };
+
+ struct rpc_version nfs_version4 = {
+diff -puN fs/nfsd/vfs.c~CITI_NFS4_ALL fs/nfsd/vfs.c
+--- linux-2.6.3/fs/nfsd/vfs.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/vfs.c 2004-02-19 16:47:12.000000000 -0500
+@@ -44,6 +44,16 @@
+ #include <linux/nfsd/nfsfh.h>
+ #include <linux/quotaops.h>
+ #include <linux/dnotify.h>
++#ifdef CONFIG_NFSD_V4
++#include <linux/posix_acl.h>
++#include <linux/posix_acl_xattr.h>
++#include <linux/xattr_acl.h>
++#include <linux/xattr.h>
++#include <linux/nfs4.h>
++#include <linux/nfs4_acl.h>
++#include <linux/nfsd_idmap.h>
++#include <linux/security.h>
++#endif /* CONFIG_NFSD_V4 */
+
+ #include <asm/uaccess.h>
+
+@@ -341,6 +351,204 @@ out_nfserr:
+ goto out;
+ }
+
++#ifdef CONFIG_NFS_V4_ACL
++
++static int
++set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key)
++{
++ int len;
++ size_t buflen;
++ char *buf = NULL;
++ int error = 0;
++ struct inode *inode = dentry->d_inode;
++
++ buflen = posix_acl_xattr_size(pacl->a_count);
++ buf = kmalloc(buflen, GFP_KERNEL);
++ error = -ENOMEM;
++ if (buf == NULL)
++ goto out;
++
++ len = posix_acl_to_xattr(pacl, buf, buflen);
++ if (len < 0) {
++ error = len;
++ goto out;
++ }
++
++ error = -EOPNOTSUPP;
++ if (inode->i_op && inode->i_op->setxattr) {
++ down(&inode->i_sem);
++ security_inode_setxattr(dentry, key, buf, len, 0);
++ error = inode->i_op->setxattr(dentry, key, buf, len, 0);
++ if (!error)
++ security_inode_post_setxattr(dentry, key, buf, len, 0);
++ up(&inode->i_sem);
++ }
++out:
++ kfree(buf);
++ return (error);
++}
++
++static inline int
++nfsd_name_to_uid_wrapper(void *arg, const char *name, size_t len, __u32 *id)
++{
++ return nfsd_map_name_to_uid((struct svc_rqst *)arg, name, len, id);
++}
++
++static inline int
++nfsd_name_to_gid_wrapper(void *arg, const char *name, size_t len, __u32 *id)
++{
++ return nfsd_map_name_to_gid((struct svc_rqst *)arg, name, len, id);
++}
++
++static inline int
++nfsd_uid_to_name_wrapper(void *arg, __u32 id, char *name)
++{
++ return nfsd_map_uid_to_name((struct svc_rqst *)arg, id, name);
++}
++
++static inline int
++nfsd_gid_to_name_wrapper(void *arg, __u32 id, char *name)
++{
++ return nfsd_map_gid_to_name((struct svc_rqst *)arg, id, name);
++}
++
++static struct nfs4_acl_idmapper nfsd_idmapper = {
++ .name2uid = nfsd_name_to_uid_wrapper,
++ .name2gid = nfsd_name_to_gid_wrapper,
++ .uid2name = nfsd_uid_to_name_wrapper,
++ .gid2name = nfsd_gid_to_name_wrapper,
++};
++
++
++int
++nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
++ struct nfs4_acl *acl)
++{
++ int error;
++ struct dentry *dentry;
++ struct inode *inode;
++ struct posix_acl *pacl = NULL, *dpacl = NULL;
++
++ /* Get inode */
++ error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR);
++ if (error)
++ goto out;
++
++ dentry = fhp->fh_dentry;
++ inode = dentry->d_inode;
++
++ error = nfs4_acl_nfsv4_to_posix(&nfsd_idmapper, rqstp, acl, &pacl, &dpacl);
++ if (error < 0)
++ goto out_nfserr;
++
++ if (pacl) {
++ error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS);
++ if (error < 0)
++ goto out_nfserr;
++ }
++
++ if (dpacl) {
++ error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT);
++ if (error < 0)
++ goto out_nfserr;
++ }
++
++ error = nfs_ok;
++
++out:
++ posix_acl_release(pacl);
++ posix_acl_release(dpacl);
++ return (error);
++out_nfserr:
++ error = nfserrno(error);
++ goto out;
++}
++
++static struct posix_acl *
++_get_posix_acl(struct dentry *dentry, char *key)
++{
++ struct inode *inode = dentry->d_inode;
++ char *buf = NULL;
++ int buflen, error = 0;
++ struct posix_acl *pacl = NULL;
++
++ down(&inode->i_sem);
++
++ buflen = inode->i_op->getxattr(dentry, key, NULL, 0);
++ if (buflen <= 0) {
++ error = buflen < 0 ? buflen : -ENODATA;
++ goto out_sem;
++ }
++
++ buf = kmalloc(buflen, GFP_KERNEL);
++ if (buf == NULL) {
++ error = -ENOMEM;
++ goto out_sem;
++ }
++
++ error = -EOPNOTSUPP;
++ if (inode->i_op && inode->i_op->getxattr) {
++ error = security_inode_getxattr(dentry, key);
++ if (error)
++ goto out_sem;
++ error = inode->i_op->getxattr(dentry, key, buf, buflen);
++ }
++ if (error < 0)
++ goto out_sem;
++
++ error = 0;
++ up(&inode->i_sem);
++
++ pacl = posix_acl_from_xattr(buf, buflen);
++ out:
++ kfree(buf);
++ return pacl;
++ out_sem:
++ up(&inode->i_sem);
++ pacl = ERR_PTR(error);
++ goto out;
++}
++
++int
++nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl)
++{
++ struct inode *inode = dentry->d_inode;
++ int error = 0;
++ struct posix_acl *pacl = NULL, *dpacl = NULL;
++
++ pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS);
++ if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA)
++ pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL);
++ if (IS_ERR(pacl)) {
++ error = PTR_ERR(pacl);
++ pacl = NULL;
++ goto out;
++ }
++
++ if (S_ISDIR(inode->i_mode)) {
++ dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT);
++ if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA)
++ dpacl = NULL;
++ else if (IS_ERR(dpacl)) {
++ error = PTR_ERR(dpacl);
++ dpacl = NULL;
++ goto out;
++ }
++ }
++
++ *acl = nfs4_acl_posix_to_nfsv4(&nfsd_idmapper, rqstp, pacl, dpacl);
++ if (IS_ERR(*acl)) {
++ error = PTR_ERR(*acl);
++ *acl = NULL;
++ }
++ out:
++ posix_acl_release(pacl);
++ posix_acl_release(dpacl);
++ return error;
++}
++
++#endif /* CONFIG_NFS_V4_ACL */
++
+ #ifdef CONFIG_NFSD_V3
+ /*
+ * Check server access rights to a file system object
+@@ -458,11 +666,15 @@ nfsd_open(struct svc_rqst *rqstp, struct
+ int flags = O_RDONLY|O_LARGEFILE, err;
+
+ /*
+- * If we get here, then the client has already done an "open",
++ * If we get here, then for regular files,
++ * the client has already done an "open",
+ * and (hopefully) checked permission - so allow OWNER_OVERRIDE
+ * in case a chmod has now revoked permission.
+ */
+- err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
++ if (type == S_IFDIR)
++ err = fh_verify(rqstp, fhp, type, access);
++ else
++ err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE);
+ if (err)
+ goto out;
+
+@@ -1494,7 +1706,7 @@ nfsd_readdir(struct svc_rqst *rqstp, str
+ err = cdp->err;
+ *offsetp = file.f_pos;
+
+- if (err == nfserr_eof || err == nfserr_readdir_nospc)
++ if (err == nfserr_eof || err == nfserr_toosmall)
+ err = nfs_ok; /* can still be found in ->err */
+ out_close:
+ nfsd_close(&file);
+diff -puN include/linux/nfsd/nfsd.h~CITI_NFS4_ALL include/linux/nfsd/nfsd.h
+--- linux-2.6.3/include/linux/nfsd/nfsd.h~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfsd/nfsd.h 2004-02-19 16:47:11.000000000 -0500
+@@ -76,6 +76,11 @@ int nfsd_lookup(struct svc_rqst *, stru
+ const char *, int, struct svc_fh *);
+ int nfsd_setattr(struct svc_rqst *, struct svc_fh *,
+ struct iattr *, int, time_t);
++#ifdef CONFIG_NFSD_V4
++int nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *,
++ struct nfs4_acl *);
++int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **);
++#endif /* CONFIG_NFSD_V4 */
+ int nfsd_create(struct svc_rqst *, struct svc_fh *,
+ char *name, int len, struct iattr *attrs,
+ int type, dev_t rdev, struct svc_fh *res);
+@@ -190,9 +195,12 @@ void nfsd_lockd_shutdown(void);
+ #define nfserr_bad_seqid __constant_htonl(NFSERR_BAD_SEQID)
+ #define nfserr_symlink __constant_htonl(NFSERR_SYMLINK)
+ #define nfserr_not_same __constant_htonl(NFSERR_NOT_SAME)
+-#define nfserr_readdir_nospc __constant_htonl(NFSERR_READDIR_NOSPC)
++#define nfserr_restorefh __constant_htonl(NFSERR_RESTOREFH)
++#define nfserr_attrnotsupp __constant_htonl(NFSERR_ATTRNOTSUPP)
+ #define nfserr_bad_xdr __constant_htonl(NFSERR_BAD_XDR)
+ #define nfserr_openmode __constant_htonl(NFSERR_OPENMODE)
++#define nfserr_locks_held __constant_htonl(NFSERR_LOCKS_HELD)
++#define nfserr_op_illegal __constant_htonl(NFSERR_OP_ILLEGAL)
+
+ /* error codes for internal use */
+ /* if a request fails due to kmalloc failure, it gets dropped.
+@@ -247,7 +255,6 @@ static inline int is_fsid(struct svc_fh
+
+ /*
+ * The following attributes are currently not supported by the NFSv4 server:
+- * ACL (will be supported in a forthcoming patch)
+ * ARCHIVE (deprecated anyway)
+ * FS_LOCATIONS (will be supported eventually)
+ * HIDDEN (unlikely to be supported any time soon)
+@@ -267,7 +274,7 @@ static inline int is_fsid(struct svc_fh
+ | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \
+ | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_HOMOGENEOUS \
+ | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \
+- | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE)
++ | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL)
+
+ #define NFSD_SUPPORTED_ATTRS_WORD1 \
+ (FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \
+@@ -282,7 +289,8 @@ static inline int is_fsid(struct svc_fh
+ (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET)
+
+ /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */
+-#define NFSD_WRITEABLE_ATTRS_WORD0 FATTR4_WORD0_SIZE
++#define NFSD_WRITEABLE_ATTRS_WORD0 \
++(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL )
+ #define NFSD_WRITEABLE_ATTRS_WORD1 \
+ (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \
+ | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY_SET)
+diff -puN net/sunrpc/auth_gss/gss_krb5_crypto.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_crypto.c
+--- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_crypto.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_crypto.c 2004-02-19 16:47:07.000000000 -0500
+@@ -58,14 +58,14 @@ krb5_encrypt(
+ struct scatterlist sg[1];
+ u8 local_iv[16] = {0};
+
+- dprintk("RPC: krb5_encrypt: input data:\n");
++ dprintk("RPC: krb5_encrypt: input data:\n");
+ print_hexl((u32 *)in, length, 0);
+
+ if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+ goto out;
+
+ if (crypto_tfm_alg_ivsize(tfm) > 16) {
+- dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n",
++ dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n",
+ crypto_tfm_alg_ivsize(tfm));
+ goto out;
+ }
+@@ -80,10 +80,10 @@ krb5_encrypt(
+
+ ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv);
+
+- dprintk("RPC: krb5_encrypt: output data:\n");
++ dprintk("RPC: krb5_encrypt: output data:\n");
+ print_hexl((u32 *)out, length, 0);
+ out:
+- dprintk("krb5_encrypt returns %d\n",ret);
++ dprintk("RPC: krb5_encrypt returns %d\n",ret);
+ return(ret);
+ }
+
+@@ -99,14 +99,14 @@ krb5_decrypt(
+ struct scatterlist sg[1];
+ u8 local_iv[16] = {0};
+
+- dprintk("RPC: krb5_decrypt: input data:\n");
++ dprintk("RPC: krb5_decrypt: input data:\n");
+ print_hexl((u32 *)in, length, 0);
+
+ if (length % crypto_tfm_alg_blocksize(tfm) != 0)
+ goto out;
+
+ if (crypto_tfm_alg_ivsize(tfm) > 16) {
+- dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n",
++ dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n",
+ crypto_tfm_alg_ivsize(tfm));
+ goto out;
+ }
+@@ -120,10 +120,10 @@ krb5_decrypt(
+
+ ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv);
+
+- dprintk("RPC: krb5_decrypt: output_data:\n");
++ dprintk("RPC: krb5_decrypt: output_data:\n");
+ print_hexl((u32 *)out, length, 0);
+ out:
+- dprintk("gss_k5decrypt returns %d\n",ret);
++ dprintk("RPC: gss_k5decrypt returns %d\n",ret);
+ return(ret);
+ }
+
+@@ -152,7 +152,7 @@ krb5_make_checksum(s32 cksumtype, char *
+ cksumname = "md5";
+ break;
+ default:
+- dprintk("RPC: krb5_make_checksum:"
++ dprintk("RPC: krb5_make_checksum:"
+ " unsupported checksum %d", cksumtype);
+ goto out;
+ }
+diff -puN net/sunrpc/auth_gss/gss_krb5_seqnum.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_seqnum.c
+--- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_seqnum.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_seqnum.c 2004-02-19 16:47:07.000000000 -0500
+@@ -70,7 +70,7 @@ krb5_get_seq_num(struct crypto_tfm *key,
+ s32 code;
+ unsigned char plain[8];
+
+- dprintk("krb5_get_seq_num: \n");
++ dprintk("RPC: krb5_get_seq_num:\n");
+
+ if ((code = krb5_decrypt(key, cksum, buf, plain, 8)))
+ return code;
+diff -puN net/sunrpc/auth_gss/gss_pseudoflavors.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_pseudoflavors.c
+--- linux-2.6.3/net/sunrpc/auth_gss/gss_pseudoflavors.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_pseudoflavors.c 2004-02-19 16:47:07.000000000 -0500
+@@ -82,12 +82,13 @@ gss_register_triple(u32 pseudoflavor, st
+
+ spin_lock(®istered_triples_lock);
+ if (do_lookup_triple_by_pseudoflavor(pseudoflavor)) {
+- printk("Registered pseudoflavor %d again\n", pseudoflavor);
++ printk(KERN_WARNING "RPC: Registered pseudoflavor %d again\n",
++ pseudoflavor);
+ goto err_unlock;
+ }
+ list_add(&triple->triples, ®istered_triples);
+ spin_unlock(®istered_triples_lock);
+- dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor);
++ dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor);
+
+ return 0;
+
+@@ -145,7 +146,7 @@ gss_cmp_triples(u32 oid_len, char *oid_d
+ oid.len = oid_len;
+ oid.data = oid_data;
+
+- dprintk("RPC: gss_cmp_triples \n");
++ dprintk("RPC: gss_cmp_triples\n");
+ print_sec_triple(&oid,qop,service);
+
+ spin_lock(®istered_triples_lock);
+@@ -158,7 +159,7 @@ gss_cmp_triples(u32 oid_len, char *oid_d
+ }
+ }
+ spin_unlock(®istered_triples_lock);
+- dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor);
++ dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor);
+ return pseudoflavor;
+ }
+
+@@ -193,8 +194,8 @@ gss_pseudoflavor_to_service(u32 pseudofl
+ triple = do_lookup_triple_by_pseudoflavor(pseudoflavor);
+ spin_unlock(®istered_triples_lock);
+ if (!triple) {
+- dprintk("RPC: gss_pseudoflavor_to_service called with"
+- " unsupported pseudoflavor %d\n", pseudoflavor);
++ dprintk("RPC: gss_pseudoflavor_to_service called with unsupported pseudoflavor %d\n",
++ pseudoflavor);
+ return 0;
+ }
+ return triple->service;
+@@ -211,8 +212,8 @@ gss_pseudoflavor_to_mech(u32 pseudoflavo
+ if (triple)
+ mech = gss_mech_get(triple->mech);
+ else
+- dprintk("RPC: gss_pseudoflavor_to_mech called with"
+- " unsupported pseudoflavor %d\n", pseudoflavor);
++ dprintk("RPC: gss_pseudoflavor_to_mech called with unsupported pseudoflavor %d\n",
++ pseudoflavor);
+ return mech;
+ }
+
+@@ -223,8 +224,8 @@ gss_pseudoflavor_to_mechOID(u32 pseudofl
+
+ mech = gss_pseudoflavor_to_mech(pseudoflavor);
+ if (!mech) {
+- dprintk("RPC: gss_pseudoflavor_to_mechOID called with"
+- " unsupported pseudoflavor %d\n", pseudoflavor);
++ dprintk("RPC: gss_pseudoflavor_to_mechOID called with unsupported pseudoflavor %d\n",
++ pseudoflavor);
+ return -1;
+ }
+ oid->len = mech->gm_oid.len;
+diff -puN fs/nfsd/nfs4state.c~CITI_NFS4_ALL fs/nfsd/nfs4state.c
+--- linux-2.6.3/fs/nfsd/nfs4state.c~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/nfs4state.c 2004-02-19 16:47:15.000000000 -0500
+@@ -43,6 +43,7 @@
+ #include <linux/nfsd/cache.h>
+ #include <linux/mount.h>
+ #include <linux/workqueue.h>
++#include <linux/smp_lock.h>
+ #include <linux/nfs4.h>
+ #include <linux/nfsd/state.h>
+ #include <linux/nfsd/xdr4.h>
+@@ -135,12 +136,16 @@ static void release_file(struct nfs4_fil
+ *
+ * client_lru holds client queue ordered by nfs4_client.cl_time
+ * for lease renewal.
++ *
++ * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time
++ * for last close replay.
+ */
+ static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE];
+ static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE];
+ static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE];
+ static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE];
+ static struct list_head client_lru;
++static struct list_head close_lru;
+
+ static inline void
+ renew_client(struct nfs4_client *clp)
+@@ -269,8 +274,7 @@ cmp_clid(clientid_t * cl1, clientid_t *
+ /* XXX what about NGROUP */
+ static int
+ cmp_creds(struct svc_cred *cr1, struct svc_cred *cr2){
+- return((cr1->cr_uid == cr2->cr_uid) &&
+- (cr1->cr_gid == cr2->cr_gid));
++ return(cr1->cr_uid == cr2->cr_uid);
+
+ }
+
+@@ -772,6 +776,9 @@ alloc_init_open_stateowner(unsigned int
+ INIT_LIST_HEAD(&sop->so_strhash);
+ INIT_LIST_HEAD(&sop->so_perclient);
+ INIT_LIST_HEAD(&sop->so_perfilestate);
++ INIT_LIST_HEAD(&sop->so_perlockowner); /* not used */
++ INIT_LIST_HEAD(&sop->so_close_lru);
++ sop->so_time = 0;
+ list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]);
+ list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]);
+ list_add(&sop->so_perclient, &clp->cl_perclient);
+@@ -790,13 +797,29 @@ alloc_init_open_stateowner(unsigned int
+ }
+
+ static void
++release_stateid_lockowner(struct nfs4_stateid *open_stp)
++{
++ struct nfs4_stateowner *lock_sop;
++
++ while (!list_empty(&open_stp->st_perlockowner)) {
++ lock_sop = list_entry(open_stp->st_perlockowner.next,
++ struct nfs4_stateowner, so_perlockowner);
++ /* list_del(&open_stp->st_perlockowner); */
++ BUG_ON(lock_sop->so_is_open_owner);
++ release_stateowner(lock_sop);
++ }
++}
++
++static void
+ release_stateowner(struct nfs4_stateowner *sop)
+ {
+ struct nfs4_stateid *stp;
+
+- list_del_init(&sop->so_idhash);
+- list_del_init(&sop->so_strhash);
+- list_del_init(&sop->so_perclient);
++ list_del(&sop->so_idhash);
++ list_del(&sop->so_strhash);
++ list_del(&sop->so_perclient);
++ list_del(&sop->so_perlockowner);
++ list_del(&sop->so_close_lru);
+ del_perclient++;
+ while (!list_empty(&sop->so_perfilestate)) {
+ stp = list_entry(sop->so_perfilestate.next,
+@@ -815,6 +838,7 @@ init_stateid(struct nfs4_stateid *stp, s
+
+ INIT_LIST_HEAD(&stp->st_hash);
+ INIT_LIST_HEAD(&stp->st_perfilestate);
++ INIT_LIST_HEAD(&stp->st_perlockowner);
+ INIT_LIST_HEAD(&stp->st_perfile);
+ list_add(&stp->st_hash, &stateid_hashtbl[hashval]);
+ list_add(&stp->st_perfilestate, &sop->so_perfilestate);
+@@ -826,24 +850,30 @@ init_stateid(struct nfs4_stateid *stp, s
+ stp->st_stateid.si_stateownerid = sop->so_id;
+ stp->st_stateid.si_fileid = fp->fi_id;
+ stp->st_stateid.si_generation = 0;
+- stp->st_share_access = open->op_share_access;
+- stp->st_share_deny = open->op_share_deny;
++ stp->st_access_bmap = 0;
++ stp->st_deny_bmap = 0;
++ __set_bit(open->op_share_access, &stp->st_access_bmap);
++ __set_bit(open->op_share_deny, &stp->st_deny_bmap);
+ }
+
+ static void
+ release_stateid(struct nfs4_stateid *stp, int flags) {
+
+- list_del_init(&stp->st_hash);
++ list_del(&stp->st_hash);
+ list_del_perfile++;
+- list_del_init(&stp->st_perfile);
+- list_del_init(&stp->st_perfilestate);
++ list_del(&stp->st_perfile);
++ list_del(&stp->st_perfilestate);
+ if((stp->st_vfs_set) && (flags & OPEN_STATE)) {
++ release_stateid_lockowner(stp);
+ nfsd_close(&stp->st_vfs_file);
+ vfsclose++;
+ dput(stp->st_vfs_file.f_dentry);
+ mntput(stp->st_vfs_file.f_vfsmnt);
++ } else if ((stp->st_vfs_set) && (flags & LOCK_STATE)) {
++ struct file *filp = &stp->st_vfs_file;
++
++ locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner);
+ }
+- /* should use a slab cache */
+ kfree(stp);
+ stp = NULL;
+ }
+@@ -852,12 +882,25 @@ static void
+ release_file(struct nfs4_file *fp)
+ {
+ free_file++;
+- list_del_init(&fp->fi_hash);
++ list_del(&fp->fi_hash);
+ iput(fp->fi_inode);
+ kfree(fp);
+ }
+
+ void
++move_to_close_lru(struct nfs4_stateowner *sop)
++{
++ dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop);
++ /* remove stateowner from all other hash lists except perclient */
++ list_del_init(&sop->so_idhash);
++ list_del_init(&sop->so_strhash);
++ list_del_init(&sop->so_perlockowner);
++
++ list_add_tail(&sop->so_close_lru, &close_lru);
++ sop->so_time = get_seconds();
++}
++
++void
+ release_state_owner(struct nfs4_stateid *stp, struct nfs4_stateowner **sopp,
+ int flag)
+ {
+@@ -866,16 +909,13 @@ release_state_owner(struct nfs4_stateid
+
+ dprintk("NFSD: release_state_owner\n");
+ release_stateid(stp, flag);
+- /*
+- * release unused nfs4_stateowners.
+- * XXX will need to be placed on an open_stateid_lru list to be
++
++ /* place unused nfs4_stateowners on so_close_lru list to be
+ * released by the laundromat service after the lease period
+ * to enable us to handle CLOSE replay
+ */
+- if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) {
+- release_stateowner(sop);
+- *sopp = NULL;
+- }
++ if (sop->so_confirmed && list_empty(&sop->so_perfilestate))
++ move_to_close_lru(sop);
+ /* unused nfs4_file's are releseed. XXX slab cache? */
+ if (list_empty(&fp->fi_perfile)) {
+ release_file(fp);
+@@ -940,15 +980,46 @@ find_file(unsigned int hashval, struct i
+ return 0;
+ }
+
++#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0)
++#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0)
++
++void
++set_access(unsigned int *access, unsigned long bmap) {
++ int i;
++
++ *access = 0;
++ for (i = 1; i < 4; i++) {
++ if(test_bit(i, &bmap))
++ *access |= i;
++ }
++}
++
++void
++set_deny(unsigned int *deny, unsigned long bmap) {
++ int i;
++
++ *deny = 0;
++ for (i = 0; i < 4; i++) {
++ if(test_bit(i, &bmap))
++ *deny |= i ;
++ }
++}
++
+ static int
+ test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) {
+- if ((stp->st_share_access & open->op_share_deny) ||
+- (stp->st_share_deny & open->op_share_access)) {
++ unsigned int access, deny;
++
++ set_access(&access, stp->st_access_bmap);
++ set_deny(&deny, stp->st_deny_bmap);
++ if ((access & open->op_share_deny) || (deny & open->op_share_access))
+ return 0;
+- }
+ return 1;
+ }
+
++/*
++ * Called to check deny when READ with all zero stateid or
++ * WRITE with all zero or all one stateid
++ */
+ int
+ nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type)
+ {
+@@ -965,7 +1036,8 @@ nfs4_share_conflict(struct svc_fh *curre
+ /* Search for conflicting share reservations */
+ list_for_each_safe(pos, next, &fp->fi_perfile) {
+ stp = list_entry(pos, struct nfs4_stateid, st_perfile);
+- if (stp->st_share_deny & deny_type)
++ if (test_bit(deny_type, &stp->st_deny_bmap) ||
++ test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap))
+ return nfserr_share_denied;
+ }
+ }
+@@ -1010,6 +1082,8 @@ nfs4_file_downgrade(struct file *filp, u
+ * notfound:
+ * verify clientid
+ * create new owner
++ *
++ * called with nfs4_lock_state() held.
+ */
+ int
+ nfsd4_process_open1(struct nfsd4_open *open)
+@@ -1028,7 +1102,6 @@ nfsd4_process_open1(struct nfsd4_open *o
+ if (STALE_CLIENTID(&open->op_clientid))
+ goto out;
+
+- nfs4_lock_state();
+ strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner);
+ if (find_openstateowner_str(strhashval, open, &sop)) {
+ open->op_stateowner = sop;
+@@ -1086,10 +1159,11 @@ instantiate_new_owner:
+ renew:
+ renew_client(sop->so_client);
+ out:
+- nfs4_unlock_state();
+ return status;
+ }
+-
++/*
++ * called with nfs4_lock_state() held.
++ */
+ int
+ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open)
+ {
+@@ -1108,7 +1182,10 @@ nfsd4_process_open2(struct svc_rqst *rqs
+
+ ino = current_fh->fh_dentry->d_inode;
+
+- nfs4_lock_state();
++ status = nfserr_inval;
++ if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny))
++ goto out;
++
+ fi_hashval = file_hashval(ino);
+ if (find_file(fi_hashval, ino, &fp)) {
+ /* Search for conflicting share reservations */
+@@ -1119,6 +1196,9 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ stp = stq;
+ continue;
+ }
++ /* ignore lock owners */
++ if (stq->st_stateowner->so_is_open_owner == 0)
++ continue;
+ if (!test_share(stq,open))
+ goto out;
+ }
+@@ -1137,7 +1217,7 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ GFP_KERNEL)) == NULL)
+ goto out;
+
+- if (open->op_share_access && NFS4_SHARE_ACCESS_WRITE)
++ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+ flags = MAY_WRITE;
+ else
+ flags = MAY_READ;
+@@ -1156,15 +1236,18 @@ nfsd4_process_open2(struct svc_rqst *rqs
+ /* This is an upgrade of an existing OPEN.
+ * OR the incoming share with the existing
+ * nfs4_stateid share */
+- int share_access = open->op_share_access;
++ unsigned int share_access;
+
+- share_access &= ~(stp->st_share_access);
++ set_access(&share_access, stp->st_access_bmap);
++ share_access = ~share_access;
++ share_access &= open->op_share_access;
+
+ /* update the struct file */
+ if ((status = nfs4_file_upgrade(&stp->st_vfs_file, share_access)))
+ goto out;
+- stp->st_share_access |= share_access;
+- stp->st_share_deny |= open->op_share_deny;
++ /* remember the open */
++ set_bit(open->op_share_access, &stp->st_access_bmap);
++ set_bit(open->op_share_deny, &stp->st_deny_bmap);
+ /* bump the stateid */
+ update_stateid(&stp->st_stateid);
+ }
+@@ -1194,7 +1277,6 @@ out:
+ if (!open->op_stateowner->so_confirmed)
+ open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM;
+
+- nfs4_unlock_state();
+ return status;
+ out_free:
+ kfree(stp);
+@@ -1250,9 +1332,11 @@ time_t
+ nfs4_laundromat(void)
+ {
+ struct nfs4_client *clp;
++ struct nfs4_stateowner *sop;
+ struct list_head *pos, *next;
+ time_t cutoff = get_seconds() - NFSD_LEASE_TIME;
+- time_t t, return_val = NFSD_LEASE_TIME;
++ time_t t, clientid_val = NFSD_LEASE_TIME;
++ time_t u, close_val = NFSD_LEASE_TIME;
+
+ nfs4_lock_state();
+
+@@ -1261,18 +1345,30 @@ nfs4_laundromat(void)
+ clp = list_entry(pos, struct nfs4_client, cl_lru);
+ if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) {
+ t = clp->cl_time - cutoff;
+- if (return_val > t)
+- return_val = t;
++ if (clientid_val > t)
++ clientid_val = t;
+ break;
+ }
+ dprintk("NFSD: purging unused client (clientid %08x)\n",
+ clp->cl_clientid.cl_id);
+ expire_client(clp);
+ }
+- if (return_val < NFSD_LAUNDROMAT_MINTIMEOUT)
+- return_val = NFSD_LAUNDROMAT_MINTIMEOUT;
++ list_for_each_safe(pos, next, &close_lru) {
++ sop = list_entry(pos, struct nfs4_stateowner, so_close_lru);
++ if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) {
++ u = sop->so_time - cutoff;
++ if (close_val > u)
++ close_val = u;
++ break;
++ }
++ dprintk("NFSD: purging unused open stateowner (so_id %d)\n",
++ sop->so_id);
++ release_stateowner(sop);
++ }
++ if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT)
++ clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT;
+ nfs4_unlock_state();
+- return return_val;
++ return clientid_val;
+ }
+
+ void
+@@ -1285,17 +1381,22 @@ laundromat_main(void *not_used)
+ schedule_delayed_work(&laundromat_work, t*HZ);
+ }
+
+-/* search ownerid_hashtbl[] for stateid owner (stateid->si_stateownerid) */
++/* search ownerid_hashtbl[] and close_lru for stateid owner
++ * (stateid->si_stateownerid)
++ */
+ struct nfs4_stateowner *
+-find_openstateowner_id(u32 st_id) {
++find_openstateowner_id(u32 st_id, int flags) {
+ struct list_head *pos, *next;
+ struct nfs4_stateowner *local = NULL;
+- unsigned int hashval = ownerid_hashval(st_id);
+
+- list_for_each_safe(pos, next, &ownerid_hashtbl[hashval]) {
+- local = list_entry(pos, struct nfs4_stateowner, so_idhash);
+- if(local->so_id == st_id)
+- return local;
++ dprintk("NFSD: find_openstateowner_id %d\n", st_id);
++ if (flags & CLOSE_STATE) {
++ list_for_each_safe(pos, next, &close_lru) {
++ local = list_entry(pos, struct nfs4_stateowner,
++ so_close_lru);
++ if(local->so_id == st_id)
++ return local;
++ }
+ }
+ return NULL;
+ }
+@@ -1303,7 +1404,8 @@ find_openstateowner_id(u32 st_id) {
+ static inline int
+ nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp)
+ {
+- return (fhp->fh_dentry != stp->st_vfs_file.f_dentry);
++ return (stp->st_vfs_set == 0 ||
++ fhp->fh_dentry->d_inode->i_ino != stp->st_vfs_file.f_dentry->d_inode->i_ino);
+ }
+
+ static int
+@@ -1375,7 +1477,7 @@ out:
+ * Checks for sequence id mutating operations.
+ */
+ int
+-nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp)
++nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid)
+ {
+ int status;
+ struct nfs4_stateid *stp;
+@@ -1412,6 +1514,21 @@ nfs4_preprocess_seqid_op(struct svc_fh *
+
+ status = nfserr_bad_stateid;
+
++ /* for new lock stateowners, check that the lock->v.new.open_stateid
++ * refers to an open stateowner, and that the lockclid
++ * (nfs4_lock->v.new.clientid) is the same as the
++ * open_stateid->st_stateowner->so_client->clientid
++ */
++ if (lockclid) {
++ struct nfs4_stateowner *sop = stp->st_stateowner;
++ struct nfs4_client *clp = sop->so_client;
++
++ if (!sop->so_is_open_owner)
++ goto out;
++ if (!cmp_clid(&clp->cl_clientid, lockclid))
++ goto out;
++ }
++
+ if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) {
+ printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n");
+ goto out;
+@@ -1463,24 +1580,30 @@ no_nfs4_stateid:
+ * starting by trying to look up the stateowner.
+ * If stateowner is not found - stateid is bad.
+ */
+- if (!(sop = find_openstateowner_id(stateid->si_stateownerid))) {
++ if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) {
+ printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n");
+ status = nfserr_bad_stateid;
+ goto out;
+ }
++ *sopp = sop;
+
+ check_replay:
+ if (seqid == sop->so_seqid) {
+ printk("NFSD: preprocess_seqid_op: retransmission?\n");
+ /* indicate replay to calling function */
+ status = NFSERR_REPLAY_ME;
+- } else
++ } else {
+ printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid);
+
++ *sopp = NULL;
+ status = nfserr_bad_seqid;
++ }
+ goto out;
+ }
+
++/*
++ * nfs4_unlock_state(); called in encode
++ */
+ int
+ nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc)
+ {
+@@ -1491,13 +1614,17 @@ nfsd4_open_confirm(struct svc_rqst *rqst
+ dprintk("NFSD: nfsd4_open_confirm on file %.*s\n",
+ (int)current_fh->fh_dentry->d_name.len,
+ current_fh->fh_dentry->d_name.name);
++
++ if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0)))
++ goto out;
++
+ oc->oc_stateowner = NULL;
+ nfs4_lock_state();
+
+ if ((status = nfs4_preprocess_seqid_op(current_fh, oc->oc_seqid,
+ &oc->oc_req_stateid,
+ CHECK_FH | CONFIRM | OPEN_STATE,
+- &oc->oc_stateowner, &stp)))
++ &oc->oc_stateowner, &stp, NULL)))
+ goto out;
+
+ sop = oc->oc_stateowner;
+@@ -1512,49 +1639,89 @@ nfsd4_open_confirm(struct svc_rqst *rqst
+ stp->st_stateid.si_generation);
+ status = nfs_ok;
+ out:
+- nfs4_unlock_state();
+ return status;
+ }
++
++
++/*
++ * unset all bits in union bitmap (bmap) that
++ * do not exist in share (from successful OPEN_DOWNGRADE)
++ */
++static void
++reset_union_bmap_access(unsigned long access, unsigned long *bmap)
++{
++ int i;
++ for (i = 1; i < 4; i++) {
++ if ((i & access) != i)
++ __clear_bit(i, bmap);
++ }
++}
++
++static void
++reset_union_bmap_deny(unsigned long deny, unsigned long *bmap)
++{
++ int i;
++ for (i = 0; i < 4; i++) {
++ if ((i & deny) != i)
++ __clear_bit(i, bmap);
++ }
++}
++
++/*
++ * nfs4_unlock_state(); called in encode
++ */
++
+ int
+ nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od)
+ {
+ int status;
+ struct nfs4_stateid *stp;
++ unsigned int share_access;
+
+ dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n",
+ (int)current_fh->fh_dentry->d_name.len,
+ current_fh->fh_dentry->d_name.name);
+
++ od->od_stateowner = NULL;
++ status = nfserr_inval;
++ if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny))
++ goto out;
++
+ nfs4_lock_state();
+ if ((status = nfs4_preprocess_seqid_op(current_fh, od->od_seqid,
+ &od->od_stateid,
+ CHECK_FH | OPEN_STATE,
+- &od->od_stateowner, &stp)))
++ &od->od_stateowner, &stp, NULL)))
+ goto out;
+
+ status = nfserr_inval;
+- if (od->od_share_access & ~stp->st_share_access) {
+- dprintk("NFSD:access not a subset current=%08x, desired=%08x\n",
+- stp->st_share_access, od->od_share_access);
++ if (!test_bit(od->od_share_access, &stp->st_access_bmap)) {
++ dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n",
++ stp->st_access_bmap, od->od_share_access);
+ goto out;
+ }
+- if (od->od_share_deny & ~stp->st_share_deny) {
+- dprintk("NFSD:deny not a subset current=%08x, desired=%08x\n",
+- stp->st_share_deny, od->od_share_deny);
++ if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) {
++ dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n",
++ stp->st_deny_bmap, od->od_share_deny);
+ goto out;
+ }
++ set_access(&share_access, stp->st_access_bmap);
+ nfs4_file_downgrade(&stp->st_vfs_file,
+- stp->st_share_access & ~od->od_share_access);
+- stp->st_share_access = od->od_share_access;
+- stp->st_share_deny = od->od_share_deny;
++ share_access & ~od->od_share_access);
++
++ reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap);
++ reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap);
++
+ update_stateid(&stp->st_stateid);
+ memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t));
+ status = nfs_ok;
+ out:
+- nfs4_unlock_state();
+ return status;
+ }
+
++/*
++ * nfs4_unlock_state() called after encode
++ */
+ int
+ nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close)
+ {
+@@ -1565,11 +1732,13 @@ nfsd4_close(struct svc_rqst *rqstp, stru
+ (int)current_fh->fh_dentry->d_name.len,
+ current_fh->fh_dentry->d_name.name);
+
++ close->cl_stateowner = NULL;
+ nfs4_lock_state();
++ /* check close_lru for replay */
+ if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid,
+ &close->cl_stateid,
+- CHECK_FH | OPEN_STATE,
+- &close->cl_stateowner, &stp)))
++ CHECK_FH | OPEN_STATE | CLOSE_STATE,
++ &close->cl_stateowner, &stp, NULL)))
+ goto out;
+ /*
+ * Return success, but first update the stateid.
+@@ -1581,7 +1750,6 @@ nfsd4_close(struct svc_rqst *rqstp, stru
+ /* release_state_owner() calls nfsd_close() if needed */
+ release_state_owner(stp, &close->cl_stateowner, OPEN_STATE);
+ out:
+- nfs4_unlock_state();
+ return status;
+ }
+
+@@ -1717,7 +1885,7 @@ find_lockstateowner_str(unsigned int has
+ */
+
+ static struct nfs4_stateowner *
+-alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_lock *lock) {
++alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) {
+ struct nfs4_stateowner *sop;
+ struct nfs4_replay *rp;
+ unsigned int idhashval;
+@@ -1729,9 +1897,13 @@ alloc_init_lock_stateowner(unsigned int
+ INIT_LIST_HEAD(&sop->so_strhash);
+ INIT_LIST_HEAD(&sop->so_perclient);
+ INIT_LIST_HEAD(&sop->so_perfilestate);
++ INIT_LIST_HEAD(&sop->so_perlockowner);
++ INIT_LIST_HEAD(&sop->so_close_lru); /* not used */
++ sop->so_time = 0;
+ list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]);
+ list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]);
+ list_add(&sop->so_perclient, &clp->cl_perclient);
++ list_add(&sop->so_perlockowner, &open_stp->st_perlockowner);
+ add_perclient++;
+ sop->so_is_open_owner = 0;
+ sop->so_id = current_ownerid++;
+@@ -1755,10 +1927,10 @@ alloc_init_lock_stateid(struct nfs4_stat
+ if ((stp = kmalloc(sizeof(struct nfs4_stateid),
+ GFP_KERNEL)) == NULL)
+ goto out;
+-
+ INIT_LIST_HEAD(&stp->st_hash);
+ INIT_LIST_HEAD(&stp->st_perfile);
+ INIT_LIST_HEAD(&stp->st_perfilestate);
++ INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */
+ list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]);
+ list_add(&stp->st_perfile, &fp->fi_perfile);
+ list_add_perfile++;
+@@ -1771,15 +1943,24 @@ alloc_init_lock_stateid(struct nfs4_stat
+ stp->st_stateid.si_generation = 0;
+ stp->st_vfs_file = open_stp->st_vfs_file;
+ stp->st_vfs_set = open_stp->st_vfs_set;
+- stp->st_share_access = -1;
+- stp->st_share_deny = -1;
++ stp->st_access_bmap = open_stp->st_access_bmap;
++ stp->st_deny_bmap = open_stp->st_deny_bmap;
+
+ out:
+ return stp;
+ }
+
++int
++check_lock_length(u64 offset, u64 length)
++{
++ return ((length == 0) || ((length != ~(u64)0) &&
++ LOFF_OVERFLOW(offset, length)));
++}
++
+ /*
+ * LOCK operation
++ *
++ * nfs4_unlock_state(); called in encode
+ */
+ int
+ nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock)
+@@ -1795,6 +1976,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
+ dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n",
+ lock->lk_offset, lock->lk_length);
+
++ if (check_lock_length(lock->lk_offset, lock->lk_length))
++ return nfserr_inval;
++
+ lock->lk_stateowner = NULL;
+ nfs4_lock_state();
+
+@@ -1812,12 +1996,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
+ printk("NFSD: nfsd4_lock: clientid is stale!\n");
+ goto out;
+ }
++ /* does the clientid in the lock owner own the open stateid? */
++
+ /* validate and update open stateid and open seqid */
+ status = nfs4_preprocess_seqid_op(current_fh,
+ lock->lk_new_open_seqid,
+ &lock->lk_new_open_stateid,
+ CHECK_FH | OPEN_STATE,
+- &open_sop, &open_stp);
++ &open_sop, &open_stp,
++ &lock->v.new.clientid);
+ if (status)
+ goto out;
+ /* create lockowner and lock stateid */
+@@ -1836,8 +2023,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
+ &lock->v.new.clientid, &lock_sop))
+ goto out;
+ status = nfserr_resource;
+- if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval,
+- open_sop->so_client, lock)))
++ if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock)))
+ goto out;
+ if ((lock_stp = alloc_init_lock_stateid(lock->lk_stateowner,
+ fp, open_stp)) == NULL)
+@@ -1850,7 +2036,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc
+ lock->lk_old_lock_seqid,
+ &lock->lk_old_lock_stateid,
+ CHECK_FH | LOCK_STATE,
+- &lock->lk_stateowner, &lock_stp);
++ &lock->lk_stateowner, &lock_stp, NULL);
+ if (status)
+ goto out;
+ }
+@@ -1938,7 +2124,6 @@ out_destroy_new_stateid:
+ release_state_owner(lock_stp, &lock->lk_stateowner, LOCK_STATE);
+ }
+ out:
+- nfs4_unlock_state();
+ return status;
+ }
+
+@@ -1956,6 +2141,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru
+ unsigned int strhashval;
+ int status;
+
++ if (check_lock_length(lockt->lt_offset, lockt->lt_length))
++ return nfserr_inval;
++
+ lockt->lt_stateowner = NULL;
+ nfs4_lock_state();
+
+@@ -1967,6 +2155,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru
+
+ if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) {
+ printk("NFSD: nfsd4_lockt: fh_verify() failed!\n");
++ if (status == nfserr_symlink)
++ status = nfserr_inval;
+ goto out;
+ }
+
+@@ -1989,13 +2179,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru
+ strhashval = lock_ownerstr_hashval(inode,
+ lockt->lt_clientid.cl_id, lockt->lt_owner);
+
+- if (find_lockstateowner_str(strhashval, &lockt->lt_owner,
++ find_lockstateowner_str(strhashval, &lockt->lt_owner,
+ &lockt->lt_clientid,
+- &lockt->lt_stateowner)) {
+- printk("NFSD: nsfd4_lockt: lookup_lockowner() failed!\n");
+- goto out;
+- }
+-
++ &lockt->lt_stateowner);
+ sop = lockt->lt_stateowner;
+ if (sop) {
+ file_lock.fl_owner = (fl_owner_t) sop;
+@@ -2032,7 +2218,10 @@ out:
+ nfs4_unlock_state();
+ return status;
+ }
+-
++
++/*
++ * nfs4_unlock_state(); called in encode
++ */
+ int
+ nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku)
+ {
+@@ -2043,13 +2232,18 @@ nfsd4_locku(struct svc_rqst *rqstp, stru
+
+ dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n",
+ locku->lu_offset, locku->lu_length);
++
++ if (check_lock_length(locku->lu_offset, locku->lu_length))
++ return nfserr_inval;
++
++ locku->lu_stateowner = NULL;
+ nfs4_lock_state();
+
+ if ((status = nfs4_preprocess_seqid_op(current_fh,
+ locku->lu_seqid,
+ &locku->lu_stateid,
+ CHECK_FH | LOCK_STATE,
+- &locku->lu_stateowner, &stp)))
++ &locku->lu_stateowner, &stp, NULL)))
+ goto out;
+
+ filp = &stp->st_vfs_file;
+@@ -2085,7 +2279,6 @@ nfsd4_locku(struct svc_rqst *rqstp, stru
+ memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t));
+
+ out:
+- nfs4_unlock_state();
+ return status;
+
+ out_nfserr:
+@@ -2093,6 +2286,84 @@ out_nfserr:
+ goto out;
+ }
+
++/*
++ * returns
++ * 1: locks held by lockowner
++ * 0: no locks held by lockowner
++ */
++static int
++check_for_locks(struct file *filp, struct nfs4_stateowner *lowner)
++{
++ struct file_lock **flpp;
++ struct inode *inode = filp->f_dentry->d_inode;
++ int status = 0;
++
++ lock_kernel();
++ for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) {
++ if ((*flpp)->fl_owner == (fl_owner_t)lowner)
++ status = 1;
++ goto out;
++ }
++out:
++ unlock_kernel();
++ return status;
++}
++
++int
++nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner)
++{
++ clientid_t *clid = &rlockowner->rl_clientid;
++ struct list_head *pos, *next;
++ struct nfs4_stateowner *local = NULL;
++ struct xdr_netobj *owner = &rlockowner->rl_owner;
++ int status, i;
++
++ dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n",
++ clid->cl_boot, clid->cl_id);
++
++ /* XXX check for lease expiration */
++
++ status = nfserr_stale_clientid;
++ if (STALE_CLIENTID(clid)) {
++ printk("NFSD: nfsd4_release_lockowner: clientid is stale!\n");
++ return status;
++ }
++
++ nfs4_lock_state();
++
++ /* find the lockowner */
++ status = nfs_ok;
++ for (i=0; i < LOCK_HASH_SIZE; i++) {
++ list_for_each_safe(pos, next, &lock_ownerstr_hashtbl[i]) {
++ local = list_entry(pos, struct nfs4_stateowner,
++ so_strhash);
++ if(cmp_owner_str(local, owner, clid))
++ break;
++ }
++ }
++ if (local) {
++ struct nfs4_stateid *stp;
++
++ /* check for any locks held by any stateid associated with the
++ * (lock) stateowner */
++ status = nfserr_locks_held;
++ list_for_each_safe(pos, next, &local->so_perfilestate) {
++ stp = list_entry(pos, struct nfs4_stateid,
++ st_perfilestate);
++ if(stp->st_vfs_set) {
++ if (check_for_locks(&stp->st_vfs_file, local))
++ goto out;
++ }
++ }
++ /* no locks held by (lock) stateowner */
++ status = nfs_ok;
++ release_stateowner(local);
++ }
++out:
++ nfs4_unlock_state();
++ return status;
++}
++
+ /*
+ * Start and stop routines
+ */
+@@ -2128,6 +2399,7 @@ nfs4_state_init(void)
+ memset(&zerostateid, 0, sizeof(stateid_t));
+ memset(&onestateid, ~0, sizeof(stateid_t));
+
++ INIT_LIST_HEAD(&close_lru);
+ INIT_LIST_HEAD(&client_lru);
+ init_MUTEX(&client_sema);
+ boot_time = get_seconds();
+diff -puN fs/nfsd/nfs3xdr.c~CITI_NFS4_ALL fs/nfsd/nfs3xdr.c
+--- linux-2.6.3/fs/nfsd/nfs3xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/nfs3xdr.c 2004-02-19 16:47:08.000000000 -0500
+@@ -796,7 +796,7 @@ encode_entry(struct readdir_cd *ccd, con
+ elen = slen + NFS3_ENTRY_BAGGAGE
+ + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0);
+ if (cd->buflen < elen) {
+- cd->common.err = nfserr_readdir_nospc;
++ cd->common.err = nfserr_toosmall;
+ return -EINVAL;
+ }
+ *p++ = xdr_one; /* mark entry present */
+diff -puN fs/nfsd/nfsxdr.c~CITI_NFS4_ALL fs/nfsd/nfsxdr.c
+--- linux-2.6.3/fs/nfsd/nfsxdr.c~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500
++++ linux-2.6.3-bfields/fs/nfsd/nfsxdr.c 2004-02-19 16:47:08.000000000 -0500
+@@ -484,7 +484,7 @@ nfssvc_encode_entry(struct readdir_cd *c
+
+ slen = XDR_QUADLEN(namlen);
+ if ((buflen = cd->buflen - slen - 4) < 0) {
+- cd->common.err = nfserr_readdir_nospc;
++ cd->common.err = nfserr_toosmall;
+ return -EINVAL;
+ }
+ *p++ = xdr_one; /* mark entry present */
+diff -puN include/linux/nfs.h~CITI_NFS4_ALL include/linux/nfs.h
+--- linux-2.6.3/include/linux/nfs.h~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfs.h 2004-02-19 16:47:08.000000000 -0500
+@@ -92,7 +92,7 @@
+ NFSERR_NOT_SAME = 10027, /* v4 */
+ NFSERR_LOCK_RANGE = 10028, /* v4 */
+ NFSERR_SYMLINK = 10029, /* v4 */
+- NFSERR_READDIR_NOSPC = 10030, /* v4 */
++ NFSERR_RESTOREFH = 10030, /* v4 */
+ NFSERR_LEASE_MOVED = 10031, /* v4 */
+ NFSERR_ATTRNOTSUPP = 10032, /* v4 */
+ NFSERR_NO_GRACE = 10033, /* v4 */
+diff -puN include/linux/nfsd/nfsfh.h~CITI_NFS4_ALL include/linux/nfsd/nfsfh.h
+--- linux-2.6.3/include/linux/nfsd/nfsfh.h~CITI_NFS4_ALL 2004-02-19 16:47:10.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/nfsd/nfsfh.h 2004-02-19 16:47:10.000000000 -0500
+@@ -209,14 +209,6 @@ fh_copy(struct svc_fh *dst, struct svc_f
+ return dst;
+ }
+
+-static __inline__ void
+-fh_dup2(struct svc_fh *dst, struct svc_fh *src)
+-{
+- fh_put(dst);
+- dget(src->fh_dentry);
+- *dst = *src;
+-}
+-
+ static __inline__ struct svc_fh *
+ fh_init(struct svc_fh *fhp, int maxsize)
+ {
+diff -puN include/linux/sunrpc/xdr.h~CITI_NFS4_ALL include/linux/sunrpc/xdr.h
+--- linux-2.6.3/include/linux/sunrpc/xdr.h~CITI_NFS4_ALL 2004-02-19 16:47:15.000000000 -0500
++++ linux-2.6.3-bfields/include/linux/sunrpc/xdr.h 2004-02-19 16:47:15.000000000 -0500
+@@ -225,6 +225,9 @@ xdr_reserve_space(struct xdr_stream *xdr
+ extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages,
+ unsigned int base, unsigned int len);
+ extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
++int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len);
++int read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj);
++
+
+ /*
+ * Initialize an xdr_stream for decoding data.
+diff -puN net/sunrpc/xdr.c~CITI_NFS4_ALL net/sunrpc/xdr.c
+--- linux-2.6.3/net/sunrpc/xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:15.000000000 -0500
++++ linux-2.6.3-bfields/net/sunrpc/xdr.c 2004-02-19 16:47:15.000000000 -0500
+@@ -799,7 +799,7 @@ xdr_buf_subsegment(struct xdr_buf *buf,
+ }
+
+ /* obj is assumed to point to allocated memory of size at least len: */
+-static int
++int
+ read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
+ {
+ struct xdr_buf subbuf;
+@@ -824,7 +824,7 @@ out:
+ return status;
+ }
+
+-static int
++int
+ read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
+ {
+ u32 raw;
+
+_
--- /dev/null
+Index: linux-2.6.3/fs/nfs/dir.c
+===================================================================
+--- linux-2.6.3.orig/fs/nfs/dir.c 2004-02-23 14:36:26.000000000 -0800
++++ linux-2.6.3/fs/nfs/dir.c 2004-02-23 14:46:49.000000000 -0800
+@@ -782,7 +782,7 @@
+ if (nd->flags & LOOKUP_DIRECTORY)
+ return 0;
+ /* Are we trying to write to a read only partition? */
+- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
+ return 0;
+ return 1;
+ }
+@@ -803,7 +803,7 @@
+ dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+
+ /* Let vfs_create() deal with O_EXCL */
+- if (nd->intent.open.flags & O_EXCL)
++ if (nd->intent.it_flags & O_EXCL)
+ goto no_entry;
+
+ /* Open the file on the server */
+@@ -811,7 +811,7 @@
+ /* Revalidate parent directory attribute cache */
+ nfs_revalidate_inode(NFS_SERVER(dir), dir);
+
+- if (nd->intent.open.flags & O_CREAT) {
++ if (nd->intent.it_flags & O_CREAT) {
+ nfs_begin_data_update(dir);
+ inode = nfs4_atomic_open(dir, dentry, nd);
+ nfs_end_data_update(dir);
+@@ -827,7 +827,7 @@
+ break;
+ /* This turned out not to be a regular file */
+ case -ELOOP:
+- if (!(nd->intent.open.flags & O_NOFOLLOW))
++ if (!(nd->intent.it_flags & O_NOFOLLOW))
+ goto no_open;
+ /* case -EISDIR: */
+ /* case -EINVAL: */
+@@ -861,7 +861,7 @@
+ dir = parent->d_inode;
+ if (!is_atomic_open(dir, nd))
+ goto no_open;
+- openflags = nd->intent.open.flags;
++ openflags = nd->intent.it_flags;
+ if (openflags & O_CREAT) {
+ /* If this is a negative dentry, just drop it */
+ if (!inode)
+.old..........pc/linux-2.6.3-nfs-intent/fs/nfs/nfs4proc.c
+.new.........fs/nfs/nfs4proc.c
+Index: linux-2.6.3/fs/nfs/nfs4proc.c
+===================================================================
+--- linux-2.6.3.orig/fs/nfs/nfs4proc.c 2004-02-23 14:36:26.000000000 -0800
++++ linux-2.6.3/fs/nfs/nfs4proc.c 2004-02-23 14:36:26.000000000 -0800
+@@ -458,17 +458,17 @@
+ struct nfs4_state *state;
+
+ if (nd->flags & LOOKUP_CREATE) {
+- attr.ia_mode = nd->intent.open.create_mode;
++ attr.ia_mode = nd->intent.it_create_mode;
+ attr.ia_valid = ATTR_MODE;
+ if (!IS_POSIXACL(dir))
+ attr.ia_mode &= ~current->fs->umask;
+ } else {
+ attr.ia_valid = 0;
+- BUG_ON(nd->intent.open.flags & O_CREAT);
++ BUG_ON(nd->intent.it_flags & O_CREAT);
+ }
+
+ cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state))
+ return (struct inode *)state;
--- /dev/null
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
+.new.........fs/nfs/dir.c
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c
+.new.........fs/nfs/dir.c
+Index: linux-2.6.3-20/fs/nfs/dir.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/nfs/dir.c 2004-03-08 14:23:40.000000000 -0800
++++ linux-2.6.3-20/fs/nfs/dir.c 2004-03-08 17:07:34.000000000 -0800
+@@ -751,7 +751,7 @@
+ if (nd->flags & LOOKUP_DIRECTORY)
+ return 0;
+ /* Are we trying to write to a read only partition? */
+- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE)))
+ return 0;
+ return 1;
+ }
+@@ -772,7 +772,7 @@
+ dentry->d_op = NFS_PROTO(dir)->dentry_ops;
+
+ /* Let vfs_create() deal with O_EXCL */
+- if (nd->intent.open.flags & O_EXCL)
++ if (nd->intent.it_flags & O_EXCL)
+ goto no_entry;
+
+ /* Open the file on the server */
+@@ -788,7 +788,7 @@
+ break;
+ /* This turned out not to be a regular file */
+ case -ELOOP:
+- if (!(nd->intent.open.flags & O_NOFOLLOW))
++ if (!(nd->intent.it_flags & O_NOFOLLOW))
+ goto no_open;
+ /* case -EISDIR: */
+ /* case -EINVAL: */
+@@ -818,7 +818,7 @@
+ parent = dget_parent(dentry);
+ if (!is_atomic_open(parent->d_inode, nd))
+ goto no_open;
+- openflags = nd->intent.open.flags;
++ openflags = nd->intent.it_flags;
+ if (openflags & O_CREAT) {
+ /* If this is a negative dentry, just drop it */
+ if (!inode)
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/nfs4proc.c
+.new.........fs/nfs/nfs4proc.c
+Index: linux-2.6.3-20/fs/nfs/nfs4proc.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/nfs/nfs4proc.c 2004-03-05 02:07:03.000000000 -0800
++++ linux-2.6.3-20/fs/nfs/nfs4proc.c 2004-03-08 17:07:34.000000000 -0800
+@@ -778,17 +778,17 @@
+ struct nfs4_state *state;
+
+ if (nd->flags & LOOKUP_CREATE) {
+- attr.ia_mode = nd->intent.open.create_mode;
++ attr.ia_mode = nd->intent.it_create_mode;
+ attr.ia_valid = ATTR_MODE;
+ if (!IS_POSIXACL(dir))
+ attr.ia_mode &= ~current->fs->umask;
+ } else {
+ attr.ia_valid = 0;
+- BUG_ON(nd->intent.open.flags & O_CREAT);
++ BUG_ON(nd->intent.it_flags & O_CREAT);
+ }
+
+ cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0);
+- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred);
++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred);
+ put_rpccred(cred);
+ if (IS_ERR(state))
+ return (struct inode *)state;
+.old..........pc/linux-2.6.3-nfs-intent-suse/fs/cifs/dir.c
+.new.........fs/cifs/dir.c
+Index: linux-2.6.3-20/fs/cifs/dir.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/cifs/dir.c 2004-03-05 02:07:03.000000000 -0800
++++ linux-2.6.3-20/fs/cifs/dir.c 2004-03-08 17:16:19.000000000 -0800
+@@ -146,18 +146,18 @@
+ if(nd) {
+ cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path));
+
+- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY)
++ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY)
+ desiredAccess = GENERIC_READ;
+- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY)
++ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY)
+ desiredAccess = GENERIC_WRITE;
+- else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR)
++ else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR)
+ desiredAccess = GENERIC_ALL;
+
+- if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
++ if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
+ disposition = FILE_CREATE;
+- else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
++ else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
+ disposition = FILE_OVERWRITE_IF;
+- else if((nd->intent.open.flags & O_CREAT) == O_CREAT)
++ else if((nd->intent.it_flags & O_CREAT) == O_CREAT)
+ disposition = FILE_OPEN_IF;
+ else {
+ cFYI(1,("Create flag not set in create function"));
+@@ -314,7 +314,7 @@
+ parent_dir_inode, direntry->d_name.name, direntry));
+
+ if(nd) { /* BB removeme */
+- cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags));
++ cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags));
+ } /* BB removeme BB */
+ /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */
+
--- /dev/null
+Index: linux-2.6.3/arch/i386/kernel/sys_i386.c
+===================================================================
+--- linux-2.6.3.orig/arch/i386/kernel/sys_i386.c 2004-02-23 14:21:03.000000000 -0800
++++ linux-2.6.3/arch/i386/kernel/sys_i386.c 2004-02-23 14:24:38.000000000 -0800
+@@ -56,7 +56,7 @@
+ }
+
+ down_write(¤t->mm->mmap_sem);
+- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
++ error = do_mmap_pgoff(current->mm, file, addr, len, prot, flags, pgoff);
+ up_write(¤t->mm->mmap_sem);
+
+ if (file)
--- /dev/null
+diff -Naur a/arch/um/config.release b/arch/um/config.release
+--- a/arch/um/config.release 2004-02-11 12:16:48.000000000 -0500
++++ b/arch/um/config.release 2004-02-11 12:29:03.000000000 -0500
+@@ -228,7 +228,6 @@
+ CONFIG_EXT2_FS=y
+ CONFIG_SYSV_FS=m
+ CONFIG_UDF_FS=m
+-# CONFIG_UDF_RW is not set
+ CONFIG_UFS_FS=m
+ # CONFIG_UFS_FS_WRITE is not set
+
+diff -Naur a/arch/um/defconfig b/arch/um/defconfig
+--- a/arch/um/defconfig 2004-02-11 12:16:02.000000000 -0500
++++ b/arch/um/defconfig 2004-02-11 12:27:57.000000000 -0500
+@@ -3,29 +3,19 @@
+ #
+ CONFIG_USERMODE=y
+ CONFIG_MMU=y
+-CONFIG_SWAP=y
+ CONFIG_UID16=y
+ CONFIG_RWSEM_GENERIC_SPINLOCK=y
+-CONFIG_CONFIG_LOG_BUF_SHIFT=14
+
+ #
+-# Code maturity level options
+-#
+-CONFIG_EXPERIMENTAL=y
+-
+-#
+-# General Setup
++# UML-specific options
+ #
+ CONFIG_MODE_TT=y
+ CONFIG_MODE_SKAS=y
+ CONFIG_NET=y
+-CONFIG_SYSVIPC=y
+-CONFIG_BSD_PROCESS_ACCT=y
+-CONFIG_SYSCTL=y
+-CONFIG_BINFMT_AOUT=y
+ CONFIG_BINFMT_ELF=y
+ CONFIG_BINFMT_MISC=y
+ CONFIG_HOSTFS=y
++CONFIG_HPPFS=y
+ CONFIG_MCONSOLE=y
+ CONFIG_MAGIC_SYSRQ=y
+ # CONFIG_HOST_2G_2G is not set
+@@ -36,12 +26,41 @@
+ # CONFIG_HIGHMEM is not set
+ CONFIG_PROC_MM=y
+ CONFIG_KERNEL_STACK_ORDER=2
++CONFIG_UML_REAL_TIME_CLOCK=y
++
++#
++# Code maturity level options
++#
++CONFIG_EXPERIMENTAL=y
++CONFIG_CLEAN_COMPILE=y
++CONFIG_STANDALONE=y
++CONFIG_BROKEN_ON_SMP=y
++
++#
++# General setup
++#
++CONFIG_SWAP=y
++CONFIG_SYSVIPC=y
++CONFIG_BSD_PROCESS_ACCT=y
++CONFIG_SYSCTL=y
++CONFIG_LOG_BUF_SHIFT=14
++# CONFIG_IKCONFIG is not set
++# CONFIG_EMBEDDED is not set
++CONFIG_KALLSYMS=y
++CONFIG_FUTEX=y
++CONFIG_EPOLL=y
++CONFIG_IOSCHED_NOOP=y
++CONFIG_IOSCHED_AS=y
++CONFIG_IOSCHED_DEADLINE=y
+
+ #
+ # Loadable module support
+ #
+-CONFIG_MODULES=y
+-# CONFIG_KMOD is not set
++# CONFIG_MODULES is not set
++
++#
++# Generic Driver Options
++#
+
+ #
+ # Character Devices
+@@ -69,6 +88,7 @@
+ #
+ CONFIG_BLK_DEV_UBD=y
+ # CONFIG_BLK_DEV_UBD_SYNC is not set
++CONFIG_BLK_DEV_COW_COMMON=y
+ CONFIG_BLK_DEV_LOOP=y
+ CONFIG_BLK_DEV_NBD=y
+ CONFIG_BLK_DEV_RAM=y
+@@ -78,7 +98,7 @@
+ CONFIG_NETDEVICES=y
+
+ #
+-# Network Devices
++# UML Network Devices
+ #
+ CONFIG_UML_NET=y
+ CONFIG_UML_NET_ETHERTAP=y
+@@ -88,22 +108,6 @@
+ CONFIG_UML_NET_MCAST=y
+ # CONFIG_UML_NET_PCAP is not set
+ CONFIG_UML_NET_SLIRP=y
+-CONFIG_DUMMY=y
+-# CONFIG_BONDING is not set
+-# CONFIG_EQUALIZER is not set
+-CONFIG_TUN=y
+-# CONFIG_ETHERTAP is not set
+-CONFIG_PPP=y
+-# CONFIG_PPP_MULTILINK is not set
+-# CONFIG_PPP_ASYNC is not set
+-# CONFIG_PPP_SYNC_TTY is not set
+-# CONFIG_PPP_DEFLATE is not set
+-# CONFIG_PPP_BSDCOMP is not set
+-# CONFIG_PPPOE is not set
+-CONFIG_SLIP=y
+-# CONFIG_SLIP_COMPRESSED is not set
+-# CONFIG_SLIP_SMART is not set
+-# CONFIG_SLIP_MODE_SLIP6 is not set
+
+ #
+ # Networking support
+@@ -115,8 +119,6 @@
+ CONFIG_PACKET=y
+ CONFIG_PACKET_MMAP=y
+ # CONFIG_NETLINK_DEV is not set
+-# CONFIG_NETFILTER is not set
+-# CONFIG_FILTER is not set
+ CONFIG_UNIX=y
+ # CONFIG_NET_KEY is not set
+ CONFIG_INET=y
+@@ -130,8 +132,11 @@
+ # CONFIG_SYN_COOKIES is not set
+ # CONFIG_INET_AH is not set
+ # CONFIG_INET_ESP is not set
+-# CONFIG_XFRM_USER is not set
++# CONFIG_INET_IPCOMP is not set
+ # CONFIG_IPV6 is not set
++# CONFIG_DECNET is not set
++# CONFIG_BRIDGE is not set
++# CONFIG_NETFILTER is not set
+
+ #
+ # SCTP Configuration (EXPERIMENTAL)
+@@ -140,9 +145,9 @@
+ # CONFIG_IP_SCTP is not set
+ # CONFIG_ATM is not set
+ # CONFIG_VLAN_8021Q is not set
+-# CONFIG_LLC is not set
+-# CONFIG_DECNET is not set
+-# CONFIG_BRIDGE is not set
++# CONFIG_LLC2 is not set
++# CONFIG_IPX is not set
++# CONFIG_ATALK is not set
+ # CONFIG_X25 is not set
+ # CONFIG_LAPB is not set
+ # CONFIG_NET_DIVERT is not set
+@@ -160,6 +165,10 @@
+ # Network testing
+ #
+ # CONFIG_NET_PKTGEN is not set
++CONFIG_DUMMY=y
++# CONFIG_BONDING is not set
++# CONFIG_EQUALIZER is not set
++CONFIG_TUN=y
+
+ #
+ # Ethernet (10 or 100Mbit)
+@@ -171,12 +180,28 @@
+ #
+
+ #
++# Ethernet (10000 Mbit)
++#
++CONFIG_PPP=y
++# CONFIG_PPP_MULTILINK is not set
++# CONFIG_PPP_FILTER is not set
++# CONFIG_PPP_ASYNC is not set
++# CONFIG_PPP_SYNC_TTY is not set
++# CONFIG_PPP_DEFLATE is not set
++# CONFIG_PPP_BSDCOMP is not set
++# CONFIG_PPPOE is not set
++CONFIG_SLIP=y
++# CONFIG_SLIP_COMPRESSED is not set
++# CONFIG_SLIP_SMART is not set
++# CONFIG_SLIP_MODE_SLIP6 is not set
++
++#
+ # Wireless LAN (non-hamradio)
+ #
+ # CONFIG_NET_RADIO is not set
+
+ #
+-# Token Ring devices (depends on LLC=y)
++# Token Ring devices
+ #
+ # CONFIG_SHAPER is not set
+
+@@ -186,68 +211,101 @@
+ # CONFIG_WAN is not set
+
+ #
++# Amateur Radio support
++#
++# CONFIG_HAMRADIO is not set
++
++#
++# IrDA (infrared) support
++#
++# CONFIG_IRDA is not set
++
++#
++# Bluetooth support
++#
++# CONFIG_BT is not set
++
++#
+ # File systems
+ #
++CONFIG_EXT2_FS=y
++# CONFIG_EXT2_FS_XATTR is not set
++# CONFIG_EXT3_FS is not set
++# CONFIG_JBD is not set
++CONFIG_REISERFS_FS=y
++# CONFIG_REISERFS_CHECK is not set
++# CONFIG_REISERFS_PROC_INFO is not set
++# CONFIG_JFS_FS is not set
++# CONFIG_XFS_FS is not set
++CONFIG_MINIX_FS=y
++# CONFIG_ROMFS_FS is not set
+ CONFIG_QUOTA=y
+ # CONFIG_QFMT_V1 is not set
+ # CONFIG_QFMT_V2 is not set
+ CONFIG_QUOTACTL=y
+-CONFIG_AUTOFS_FS=m
+-CONFIG_AUTOFS4_FS=m
+-CONFIG_REISERFS_FS=m
+-# CONFIG_REISERFS_CHECK is not set
+-# CONFIG_REISERFS_PROC_INFO is not set
++CONFIG_AUTOFS_FS=y
++CONFIG_AUTOFS4_FS=y
++
++#
++# CD-ROM/DVD Filesystems
++#
++CONFIG_ISO9660_FS=y
++# CONFIG_JOLIET is not set
++# CONFIG_ZISOFS is not set
++# CONFIG_UDF_FS is not set
++
++#
++# DOS/FAT/NT Filesystems
++#
++CONFIG_FAT_FS=y
++CONFIG_MSDOS_FS=y
++CONFIG_VFAT_FS=y
++# CONFIG_NTFS_FS is not set
++
++#
++# Pseudo filesystems
++#
++CONFIG_PROC_FS=y
++CONFIG_PROC_KCORE=y
++CONFIG_DEVFS_FS=y
++CONFIG_DEVFS_MOUNT=y
++# CONFIG_DEVFS_DEBUG is not set
++CONFIG_DEVPTS_FS=y
++# CONFIG_DEVPTS_FS_XATTR is not set
++# CONFIG_TMPFS is not set
++# CONFIG_HUGETLB_PAGE is not set
++CONFIG_RAMFS=y
++
++#
++# Miscellaneous filesystems
++#
+ # CONFIG_ADFS_FS is not set
+ # CONFIG_AFFS_FS is not set
+ # CONFIG_HFS_FS is not set
+ # CONFIG_BEFS_FS is not set
+ # CONFIG_BFS_FS is not set
+-# CONFIG_EXT3_FS is not set
+-# CONFIG_JBD is not set
+-CONFIG_FAT_FS=m
+-CONFIG_MSDOS_FS=m
+-CONFIG_VFAT_FS=m
+ # CONFIG_EFS_FS is not set
+ CONFIG_JFFS_FS=y
+ CONFIG_JFFS_FS_VERBOSE=0
+-CONFIG_JFFS_PROC_FS=y
+ # CONFIG_JFFS2_FS is not set
+ # CONFIG_CRAMFS is not set
+-# CONFIG_TMPFS is not set
+-CONFIG_RAMFS=y
+-CONFIG_ISO9660_FS=m
+-# CONFIG_JOLIET is not set
+-# CONFIG_ZISOFS is not set
+-# CONFIG_JFS_FS is not set
+-CONFIG_MINIX_FS=m
+ # CONFIG_VXFS_FS is not set
+-# CONFIG_NTFS_FS is not set
+ # CONFIG_HPFS_FS is not set
+-CONFIG_PROC_FS=y
+-CONFIG_DEVFS_FS=y
+-CONFIG_DEVFS_MOUNT=y
+-# CONFIG_DEVFS_DEBUG is not set
+-CONFIG_DEVPTS_FS=y
+ # CONFIG_QNX4FS_FS is not set
+-# CONFIG_ROMFS_FS is not set
+-CONFIG_EXT2_FS=y
+-# CONFIG_EXT2_FS_XATTR is not set
+ # CONFIG_SYSV_FS is not set
+-# CONFIG_UDF_FS is not set
+ # CONFIG_UFS_FS is not set
+-# CONFIG_XFS_FS is not set
+
+ #
+ # Network File Systems
+ #
+-# CONFIG_CODA_FS is not set
+-# CONFIG_INTERMEZZO_FS is not set
+ # CONFIG_NFS_FS is not set
+ # CONFIG_NFSD is not set
+ # CONFIG_EXPORTFS is not set
+-# CONFIG_CIFS is not set
+ # CONFIG_SMB_FS is not set
++# CONFIG_CIFS is not set
+ # CONFIG_NCP_FS is not set
++# CONFIG_CODA_FS is not set
++# CONFIG_INTERMEZZO_FS is not set
+ # CONFIG_AFS_FS is not set
+
+ #
+@@ -317,28 +375,7 @@
+ #
+ # SCSI support
+ #
+-CONFIG_SCSI=y
+-CONFIG_GENERIC_ISA_DMA=y
+-
+-#
+-# SCSI support type (disk, tape, CD-ROM)
+-#
+-CONFIG_BLK_DEV_SD=y
+-CONFIG_SD_EXTRA_DEVS=40
+-CONFIG_CHR_DEV_ST=y
+-CONFIG_BLK_DEV_SR=y
+-CONFIG_BLK_DEV_SR_VENDOR=y
+-CONFIG_SR_EXTRA_DEVS=2
+-CONFIG_CHR_DEV_SG=y
+-
+-#
+-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs
+-#
+-CONFIG_SCSI_DEBUG_QUEUES=y
+-CONFIG_SCSI_MULTI_LUN=y
+-CONFIG_SCSI_CONSTANTS=y
+-CONFIG_SCSI_LOGGING=y
+-CONFIG_SCSI_DEBUG=y
++# CONFIG_SCSI is not set
+
+ #
+ # Multi-device support (RAID and LVM)
+@@ -360,6 +397,7 @@
+ CONFIG_MTD_BLOCK=y
+ # CONFIG_FTL is not set
+ # CONFIG_NFTL is not set
++# CONFIG_INFTL is not set
+
+ #
+ # RAM/ROM/Flash chip drivers
+@@ -374,20 +412,21 @@
+ #
+ # Mapping drivers for chip access
+ #
++# CONFIG_MTD_COMPLEX_MAPPINGS is not set
+
+ #
+ # Self-contained MTD device drivers
+ #
+ # CONFIG_MTD_SLRAM is not set
+ # CONFIG_MTD_MTDRAM is not set
+-CONFIG_MTD_BLKMTD=m
++CONFIG_MTD_BLKMTD=y
+
+ #
+ # Disk-On-Chip Device Drivers
+ #
+-# CONFIG_MTD_DOC1000 is not set
+ # CONFIG_MTD_DOC2000 is not set
+ # CONFIG_MTD_DOC2001 is not set
++# CONFIG_MTD_DOC2001PLUS is not set
+
+ #
+ # NAND Flash Device Drivers
+diff -Naur a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
+--- a/arch/um/drivers/chan_kern.c 2004-02-11 12:16:50.000000000 -0500
++++ b/arch/um/drivers/chan_kern.c 2004-02-11 12:29:06.000000000 -0500
+@@ -8,6 +8,7 @@
+ #include <linux/list.h>
+ #include <linux/slab.h>
+ #include <linux/tty.h>
++#include <linux/string.h>
+ #include <linux/tty_flip.h>
+ #include <asm/irq.h>
+ #include "chan_kern.h"
+@@ -265,6 +266,11 @@
+ {
+ int n = 0;
+
++ if(chan == NULL){
++ CONFIG_CHUNK(str, size, n, "none", 1);
++ return(n);
++ }
++
+ CONFIG_CHUNK(str, size, n, chan->ops->type, 0);
+
+ if(chan->dev == NULL){
+@@ -420,7 +426,8 @@
+ INIT_LIST_HEAD(chans);
+ }
+
+- if((out = strchr(str, ',')) != NULL){
++ out = strchr(str, ',');
++ if(out != NULL){
+ in = str;
+ *out = '\0';
+ out++;
+@@ -475,12 +482,15 @@
+ goto out;
+ }
+ err = chan->ops->read(chan->fd, &c, chan->data);
+- if(err > 0) tty_receive_char(tty, c);
++ if(err > 0)
++ tty_receive_char(tty, c);
+ } while(err > 0);
++
+ if(err == 0) reactivate_fd(chan->fd, irq);
+ if(err == -EIO){
+ if(chan->primary){
+- if(tty != NULL) tty_hangup(tty);
++ if(tty != NULL)
++ tty_hangup(tty);
+ line_disable(dev, irq);
+ close_chan(chans);
+ free_chan(chans);
+diff -Naur a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
+--- a/arch/um/drivers/chan_user.c 2004-02-11 12:14:14.000000000 -0500
++++ b/arch/um/drivers/chan_user.c 2004-02-11 12:25:42.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -7,7 +7,6 @@
+ #include <stdlib.h>
+ #include <errno.h>
+ #include <termios.h>
+-#include <fcntl.h>
+ #include <string.h>
+ #include <signal.h>
+ #include <sys/stat.h>
+@@ -24,29 +23,27 @@
+
+ void generic_close(int fd, void *unused)
+ {
+- close(fd);
++ os_close_file(fd);
+ }
+
+ int generic_read(int fd, char *c_out, void *unused)
+ {
+ int n;
+
+- n = read(fd, c_out, sizeof(*c_out));
+- if(n < 0){
+- if(errno == EAGAIN) return(0);
+- return(-errno);
+- }
+- else if(n == 0) return(-EIO);
+- return(1);
++ n = os_read_file(fd, c_out, sizeof(*c_out));
++
++ if(n == -EAGAIN)
++ return(0);
++ else if(n == 0)
++ return(-EIO);
++ return(n);
+ }
+
++/* XXX Trivial wrapper around os_write_file */
++
+ int generic_write(int fd, const char *buf, int n, void *unused)
+ {
+- int count;
+-
+- count = write(fd, buf, n);
+- if(count < 0) return(-errno);
+- return(count);
++ return(os_write_file(fd, buf, n));
+ }
+
+ int generic_console_write(int fd, const char *buf, int n, void *unused)
+@@ -68,15 +65,18 @@
+ int generic_window_size(int fd, void *unused, unsigned short *rows_out,
+ unsigned short *cols_out)
+ {
+- struct winsize size;
+- int ret = 0;
++ int rows, cols;
++ int ret;
++
++ ret = os_window_size(fd, &rows, &cols);
++ if(ret < 0)
++ return(ret);
++
++ ret = ((*rows_out != rows) || (*cols_out != cols));
++
++ *rows_out = rows;
++ *cols_out = cols;
+
+- if(ioctl(fd, TIOCGWINSZ, &size) == 0){
+- ret = ((*rows_out != size.ws_row) ||
+- (*cols_out != size.ws_col));
+- *rows_out = size.ws_row;
+- *cols_out = size.ws_col;
+- }
+ return(ret);
+ }
+
+@@ -100,14 +100,16 @@
+ struct winch_data *data = arg;
+ sigset_t sigs;
+ int pty_fd, pipe_fd;
++ int count, err;
+ char c = 1;
+
+- close(data->close_me);
++ os_close_file(data->close_me);
+ pty_fd = data->pty_fd;
+ pipe_fd = data->pipe_fd;
+- if(write(pipe_fd, &c, sizeof(c)) != sizeof(c))
++ count = os_write_file(pipe_fd, &c, sizeof(c));
++ if(count != sizeof(c))
+ printk("winch_thread : failed to write synchronization "
+- "byte, errno = %d\n", errno);
++ "byte, err = %d\n", -count);
+
+ signal(SIGWINCH, winch_handler);
+ sigfillset(&sigs);
+@@ -123,26 +125,24 @@
+ exit(1);
+ }
+
+- if(ioctl(pty_fd, TIOCSCTTY, 0) < 0){
+- printk("winch_thread : TIOCSCTTY failed, errno = %d\n", errno);
+- exit(1);
+- }
+- if(tcsetpgrp(pty_fd, os_getpid()) < 0){
+- printk("winch_thread : tcsetpgrp failed, errno = %d\n", errno);
++ err = os_new_tty_pgrp(pty_fd, os_getpid());
++ if(err < 0){
++ printk("winch_thread : new_tty_pgrp failed, err = %d\n", -err);
+ exit(1);
+ }
+
+- if(read(pipe_fd, &c, sizeof(c)) != sizeof(c))
++ count = os_read_file(pipe_fd, &c, sizeof(c));
++ if(count != sizeof(c))
+ printk("winch_thread : failed to read synchronization byte, "
+- "errno = %d\n", errno);
++ "err = %d\n", -count);
+
+ while(1){
+ pause();
+
+- if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)){
+- printk("winch_thread : write failed, errno = %d\n",
+- errno);
+- }
++ count = os_write_file(pipe_fd, &c, sizeof(c));
++ if(count != sizeof(c))
++ printk("winch_thread : write failed, err = %d\n",
++ -count);
+ }
+ }
+
+@@ -154,8 +154,8 @@
+ char c;
+
+ err = os_pipe(fds, 1, 1);
+- if(err){
+- printk("winch_tramp : os_pipe failed, errno = %d\n", -err);
++ if(err < 0){
++ printk("winch_tramp : os_pipe failed, err = %d\n", -err);
+ return(err);
+ }
+
+@@ -168,12 +168,12 @@
+ return(pid);
+ }
+
+- close(fds[1]);
++ os_close_file(fds[1]);
+ *fd_out = fds[0];
+- n = read(fds[0], &c, sizeof(c));
++ n = os_read_file(fds[0], &c, sizeof(c));
+ if(n != sizeof(c)){
+ printk("winch_tramp : failed to read synchronization byte\n");
+- printk("read returned %d, errno = %d\n", n, errno);
++ printk("read failed, err = %d\n", -n);
+ printk("fd %d will not support SIGWINCH\n", fd);
+ *fd_out = -1;
+ }
+@@ -183,20 +183,24 @@
+ void register_winch(int fd, void *device_data)
+ {
+ int pid, thread, thread_fd;
++ int count;
+ char c = 1;
+
+- if(!isatty(fd)) return;
++ if(!isatty(fd))
++ return;
+
+ pid = tcgetpgrp(fd);
+- if(!CHOOSE_MODE(is_tracer_winch(pid, fd, device_data), 0) &&
+- (pid == -1)){
++ if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd,
++ device_data) && (pid == -1)){
+ thread = winch_tramp(fd, device_data, &thread_fd);
+ if(fd != -1){
+ register_winch_irq(thread_fd, fd, thread, device_data);
+
+- if(write(thread_fd, &c, sizeof(c)) != sizeof(c))
++ count = os_write_file(thread_fd, &c, sizeof(c));
++ if(count != sizeof(c))
+ printk("register_winch : failed to write "
+- "synchronization byte\n");
++ "synchronization byte, err = %d\n",
++ -count);
+ }
+ }
+ }
+diff -Naur a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h
+--- a/arch/um/drivers/cow.h 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/drivers/cow.h 2004-02-11 12:26:08.000000000 -0500
+@@ -0,0 +1,41 @@
++#ifndef __COW_H__
++#define __COW_H__
++
++#include <asm/types.h>
++
++#if __BYTE_ORDER == __BIG_ENDIAN
++# define ntohll(x) (x)
++# define htonll(x) (x)
++#elif __BYTE_ORDER == __LITTLE_ENDIAN
++# define ntohll(x) bswap_64(x)
++# define htonll(x) bswap_64(x)
++#else
++#error "__BYTE_ORDER not defined"
++#endif
++
++extern int init_cow_file(int fd, char *cow_file, char *backing_file,
++ int sectorsize, int alignment, int *bitmap_offset_out,
++ unsigned long *bitmap_len_out, int *data_offset_out);
++
++extern int file_reader(__u64 offset, char *buf, int len, void *arg);
++extern int read_cow_header(int (*reader)(__u64, char *, int, void *),
++ void *arg, __u32 *version_out,
++ char **backing_file_out, time_t *mtime_out,
++ __u64 *size_out, int *sectorsize_out,
++ __u32 *align_out, int *bitmap_offset_out);
++
++extern int write_cow_header(char *cow_file, int fd, char *backing_file,
++ int sectorsize, int alignment, long long *size);
++
++extern void cow_sizes(int version, __u64 size, int sectorsize, int align,
++ int bitmap_offset, unsigned long *bitmap_len_out,
++ int *data_offset_out);
++
++#endif
++
++/*
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/cow_kern.c b/arch/um/drivers/cow_kern.c
+--- a/arch/um/drivers/cow_kern.c 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/drivers/cow_kern.c 2004-02-11 12:29:06.000000000 -0500
+@@ -0,0 +1,630 @@
++#define COW_MAJOR 60
++#define MAJOR_NR COW_MAJOR
++
++#include <linux/stddef.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/stat.h>
++#include <linux/vmalloc.h>
++#include <linux/blkdev.h>
++#include <linux/blk.h>
++#include <linux/fs.h>
++#include <linux/genhd.h>
++#include <linux/devfs_fs.h>
++#include <asm/uaccess.h>
++#include "2_5compat.h"
++#include "cow.h"
++#include "ubd_user.h"
++
++#define COW_SHIFT 4
++
++struct cow {
++ int count;
++ char *cow_path;
++ dev_t cow_dev;
++ struct block_device *cow_bdev;
++ char *backing_path;
++ dev_t backing_dev;
++ struct block_device *backing_bdev;
++ int sectorsize;
++ unsigned long *bitmap;
++ unsigned long bitmap_len;
++ int bitmap_offset;
++ int data_offset;
++ devfs_handle_t devfs;
++ struct semaphore sem;
++ struct semaphore io_sem;
++ atomic_t working;
++ spinlock_t io_lock;
++ struct buffer_head *bh;
++ struct buffer_head *bhtail;
++ void *end_io;
++};
++
++#define DEFAULT_COW { \
++ .count = 0, \
++ .cow_path = NULL, \
++ .cow_dev = 0, \
++ .backing_path = NULL, \
++ .backing_dev = 0, \
++ .bitmap = NULL, \
++ .bitmap_len = 0, \
++ .bitmap_offset = 0, \
++ .data_offset = 0, \
++ .devfs = NULL, \
++ .working = ATOMIC_INIT(0), \
++ .io_lock = SPIN_LOCK_UNLOCKED, \
++}
++
++#define MAX_DEV (8)
++#define MAX_MINOR (MAX_DEV << COW_SHIFT)
++
++struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW };
++
++/* Not modified by this driver */
++static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE };
++static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 };
++
++/* Protected by cow_lock */
++static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 };
++
++static struct hd_struct cow_part[MAX_MINOR] =
++ { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } };
++
++/* Protected by io_request_lock */
++static request_queue_t *cow_queue;
++
++static int cow_open(struct inode *inode, struct file *filp);
++static int cow_release(struct inode * inode, struct file * file);
++static int cow_ioctl(struct inode * inode, struct file * file,
++ unsigned int cmd, unsigned long arg);
++static int cow_revalidate(kdev_t rdev);
++
++static struct block_device_operations cow_blops = {
++ .open = cow_open,
++ .release = cow_release,
++ .ioctl = cow_ioctl,
++ .revalidate = cow_revalidate,
++};
++
++/* Initialized in an initcall, and unchanged thereafter */
++devfs_handle_t cow_dir_handle;
++
++#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \
++{ \
++ .major = maj, \
++ .major_name = name, \
++ .minor_shift = shift, \
++ .max_p = 1 << shift, \
++ .part = parts, \
++ .sizes = bsizes, \
++ .nr_real = max, \
++ .real_devices = NULL, \
++ .next = NULL, \
++ .fops = blops, \
++ .de_arr = NULL, \
++ .flags = 0 \
++}
++
++static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED;
++
++static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part,
++ COW_SHIFT, sizes, MAX_DEV,
++ &cow_blops);
++
++static int cow_add(int n)
++{
++ struct cow *dev = &cow_dev[n];
++ char name[sizeof("nnnnnn\0")];
++ int err = -ENODEV;
++
++ if(dev->cow_path == NULL)
++ goto out;
++
++ sprintf(name, "%d", n);
++ dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE,
++ MAJOR_NR, n << COW_SHIFT, S_IFBLK |
++ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP,
++ &cow_blops, NULL);
++
++ init_MUTEX_LOCKED(&dev->sem);
++ init_MUTEX(&dev->io_sem);
++
++ return(0);
++
++ out:
++ return(err);
++}
++
++/*
++ * Add buffer_head to back of pending list
++ */
++static void cow_add_bh(struct cow *cow, struct buffer_head *bh)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&cow->io_lock, flags);
++ if(cow->bhtail != NULL){
++ cow->bhtail->b_reqnext = bh;
++ cow->bhtail = bh;
++ }
++ else {
++ cow->bh = bh;
++ cow->bhtail = bh;
++ }
++ spin_unlock_irqrestore(&cow->io_lock, flags);
++}
++
++/*
++* Grab first pending buffer
++*/
++static struct buffer_head *cow_get_bh(struct cow *cow)
++{
++ struct buffer_head *bh;
++
++ spin_lock_irq(&cow->io_lock);
++ bh = cow->bh;
++ if(bh != NULL){
++ if(bh == cow->bhtail)
++ cow->bhtail = NULL;
++ cow->bh = bh->b_reqnext;
++ bh->b_reqnext = NULL;
++ }
++ spin_unlock_irq(&cow->io_lock);
++
++ return(bh);
++}
++
++static void cow_handle_bh(struct cow *cow, struct buffer_head *bh,
++ struct buffer_head **cow_bh, int ncow_bh)
++{
++ int i;
++
++ if(ncow_bh > 0)
++ ll_rw_block(WRITE, ncow_bh, cow_bh);
++
++ for(i = 0; i < ncow_bh ; i++){
++ wait_on_buffer(cow_bh[i]);
++ brelse(cow_bh[i]);
++ }
++
++ ll_rw_block(WRITE, 1, &bh);
++ brelse(bh);
++}
++
++static struct buffer_head *cow_new_bh(struct cow *dev, int sector)
++{
++ struct buffer_head *bh;
++
++ sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize;
++ bh = getblk(dev->cow_dev, sector, dev->sectorsize);
++ memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])),
++ dev->sectorsize);
++ return(bh);
++}
++
++/* Copied from loop.c, needed to avoid deadlocking in make_request. */
++
++static int cow_thread(void *data)
++{
++ struct cow *dev = data;
++ struct buffer_head *bh;
++
++ daemonize();
++ exit_files(current);
++
++ sprintf(current->comm, "cow%d", dev - cow_dev);
++
++ spin_lock_irq(¤t->sigmask_lock);
++ sigfillset(¤t->blocked);
++ flush_signals(current);
++ spin_unlock_irq(¤t->sigmask_lock);
++
++ atomic_inc(&dev->working);
++
++ current->policy = SCHED_OTHER;
++ current->nice = -20;
++
++ current->flags |= PF_NOIO;
++
++ /*
++ * up sem, we are running
++ */
++ up(&dev->sem);
++
++ for(;;){
++ int start, len, nbh, i, update_bitmap = 0;
++ struct buffer_head *cow_bh[2];
++
++ down_interruptible(&dev->io_sem);
++ /*
++ * could be upped because of tear-down, not because of
++ * pending work
++ */
++ if(!atomic_read(&dev->working))
++ break;
++
++ bh = cow_get_bh(dev);
++ if(bh == NULL){
++ printk(KERN_ERR "cow: missing bh\n");
++ continue;
++ }
++
++ start = bh->b_blocknr * bh->b_size / dev->sectorsize;
++ len = bh->b_size / dev->sectorsize;
++ for(i = 0; i < len ; i++){
++ if(ubd_test_bit(start + i,
++ (unsigned char *) dev->bitmap))
++ continue;
++
++ update_bitmap = 1;
++ ubd_set_bit(start + i, (unsigned char *) dev->bitmap);
++ }
++
++ cow_bh[0] = NULL;
++ cow_bh[1] = NULL;
++ nbh = 0;
++ if(update_bitmap){
++ cow_bh[0] = cow_new_bh(dev, start);
++ nbh++;
++ if(start / dev->sectorsize !=
++ (start + len) / dev->sectorsize){
++ cow_bh[1] = cow_new_bh(dev, start + len);
++ nbh++;
++ }
++ }
++
++ bh->b_dev = dev->cow_dev;
++ bh->b_blocknr += dev->data_offset / dev->sectorsize;
++
++ cow_handle_bh(dev, bh, cow_bh, nbh);
++
++ /*
++ * upped both for pending work and tear-down, lo_pending
++ * will hit zero then
++ */
++ if(atomic_dec_and_test(&dev->working))
++ break;
++ }
++
++ up(&dev->sem);
++ return(0);
++}
++
++static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh)
++{
++ struct cow *dev;
++ int n, minor;
++
++ minor = MINOR(bh->b_rdev);
++ n = minor >> COW_SHIFT;
++ dev = &cow_dev[n];
++
++ dev->end_io = NULL;
++ if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){
++ bh->b_rdev = dev->cow_dev;
++ bh->b_rsector += dev->data_offset / dev->sectorsize;
++ }
++ else if(rw == WRITE){
++ bh->b_dev = dev->cow_dev;
++ bh->b_blocknr += dev->data_offset / dev->sectorsize;
++
++ cow_add_bh(dev, bh);
++ up(&dev->io_sem);
++ return(0);
++ }
++ else {
++ bh->b_rdev = dev->backing_dev;
++ }
++
++ return(1);
++}
++
++int cow_init(void)
++{
++ int i;
++
++ cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL);
++ if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) {
++ printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR);
++ return -1;
++ }
++ read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */
++ blksize_size[MAJOR_NR] = blk_sizes;
++ blk_size[MAJOR_NR] = sizes;
++ INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes);
++
++ cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR);
++ blk_init_queue(cow_queue, NULL);
++ INIT_ELV(cow_queue, &cow_queue->elevator);
++ blk_queue_make_request(cow_queue, cow_make_request);
++
++ add_gendisk(&cow_gendisk);
++
++ for(i=0;i<MAX_DEV;i++)
++ cow_add(i);
++
++ return(0);
++}
++
++__initcall(cow_init);
++
++static int reader(__u64 start, char *buf, int count, void *arg)
++{
++ dev_t dev = *((dev_t *) arg);
++ struct buffer_head *bh;
++ __u64 block;
++ int cur, offset, left, n, blocksize = get_hardsect_size(dev);
++
++ if(blocksize == 0)
++ panic("Zero blocksize");
++
++ block = start / blocksize;
++ offset = start % blocksize;
++ left = count;
++ cur = 0;
++ while(left > 0){
++ n = (left > blocksize) ? blocksize : left;
++
++ bh = bread(dev, block, (n < 512) ? 512 : n);
++ if(bh == NULL)
++ return(-EIO);
++
++ n -= offset;
++ memcpy(&buf[cur], bh->b_data + offset, n);
++ block++;
++ left -= n;
++ cur += n;
++ offset = 0;
++ brelse(bh);
++ }
++
++ return(count);
++}
++
++static int cow_open(struct inode *inode, struct file *filp)
++{
++ int (*dev_ioctl)(struct inode *, struct file *, unsigned int,
++ unsigned long);
++ mm_segment_t fs;
++ struct cow *dev;
++ __u64 size;
++ __u32 version, align;
++ time_t mtime;
++ char *backing_file;
++ int n, offset, err = 0;
++
++ n = DEVICE_NR(inode->i_rdev);
++ if(n >= MAX_DEV)
++ return(-ENODEV);
++ dev = &cow_dev[n];
++ offset = n << COW_SHIFT;
++
++ spin_lock(&cow_lock);
++
++ if(dev->count == 0){
++ dev->cow_dev = name_to_kdev_t(dev->cow_path);
++ if(dev->cow_dev == 0){
++ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") "
++ "failed\n", dev->cow_path);
++ err = -ENODEV;
++ }
++
++ dev->backing_dev = name_to_kdev_t(dev->backing_path);
++ if(dev->backing_dev == 0){
++ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") "
++ "failed\n", dev->backing_path);
++ err = -ENODEV;
++ }
++
++ if(err)
++ goto out;
++
++ dev->cow_bdev = bdget(dev->cow_dev);
++ if(dev->cow_bdev == NULL){
++ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n",
++ dev->cow_path);
++ err = -ENOMEM;
++ }
++ dev->backing_bdev = bdget(dev->backing_dev);
++ if(dev->backing_bdev == NULL){
++ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n",
++ dev->backing_path);
++ err = -ENOMEM;
++ }
++
++ if(err)
++ goto out;
++
++ err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0,
++ BDEV_RAW);
++ if(err){
++ printk("cow_open - blkdev_get of COW device failed, "
++ "error = %d\n", err);
++ goto out;
++ }
++
++ err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW);
++ if(err){
++ printk("cow_open - blkdev_get of backing device "
++ "failed, error = %d\n", err);
++ goto out;
++ }
++
++ err = read_cow_header(reader, &dev->cow_dev, &version,
++ &backing_file, &mtime, &size,
++ &dev->sectorsize, &align,
++ &dev->bitmap_offset);
++ if(err){
++ printk(KERN_ERR "cow_open - read_cow_header failed, "
++ "err = %d\n", err);
++ goto out;
++ }
++
++ cow_sizes(version, size, dev->sectorsize, align,
++ dev->bitmap_offset, &dev->bitmap_len,
++ &dev->data_offset);
++ dev->bitmap = (void *) vmalloc(dev->bitmap_len);
++ if(dev->bitmap == NULL){
++ err = -ENOMEM;
++ printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
++ goto out;
++ }
++ flush_tlb_kernel_vm();
++
++ err = reader(dev->bitmap_offset, (char *) dev->bitmap,
++ dev->bitmap_len, &dev->cow_dev);
++ if(err < 0){
++ printk(KERN_ERR "Failed to read COW bitmap\n");
++ vfree(dev->bitmap);
++ goto out;
++ }
++
++ dev_ioctl = dev->backing_bdev->bd_op->ioctl;
++ fs = get_fs();
++ set_fs(KERNEL_DS);
++ err = (*dev_ioctl)(inode, filp, BLKGETSIZE,
++ (unsigned long) &sizes[offset]);
++ set_fs(fs);
++ if(err){
++ printk(KERN_ERR "cow_open - BLKGETSIZE failed, "
++ "error = %d\n", err);
++ goto out;
++ }
++
++ kernel_thread(cow_thread, dev,
++ CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
++ down(&dev->sem);
++ }
++ dev->count++;
++ out:
++ spin_unlock(&cow_lock);
++ return(err);
++}
++
++static int cow_release(struct inode * inode, struct file * file)
++{
++ struct cow *dev;
++ int n, err;
++
++ n = DEVICE_NR(inode->i_rdev);
++ if(n >= MAX_DEV)
++ return(-ENODEV);
++ dev = &cow_dev[n];
++
++ spin_lock(&cow_lock);
++
++ if(--dev->count > 0)
++ goto out;
++
++ err = blkdev_put(dev->cow_bdev, BDEV_RAW);
++ if(err)
++ printk("cow_release - blkdev_put of cow device failed, "
++ "error = %d\n", err);
++ bdput(dev->cow_bdev);
++ dev->cow_bdev = 0;
++
++ err = blkdev_put(dev->backing_bdev, BDEV_RAW);
++ if(err)
++ printk("cow_release - blkdev_put of backing device failed, "
++ "error = %d\n", err);
++ bdput(dev->backing_bdev);
++ dev->backing_bdev = 0;
++
++ out:
++ spin_unlock(&cow_lock);
++ return(0);
++}
++
++static int cow_ioctl(struct inode * inode, struct file * file,
++ unsigned int cmd, unsigned long arg)
++{
++ struct cow *dev;
++ int (*dev_ioctl)(struct inode *, struct file *, unsigned int,
++ unsigned long);
++ int n;
++
++ n = DEVICE_NR(inode->i_rdev);
++ if(n >= MAX_DEV)
++ return(-ENODEV);
++ dev = &cow_dev[n];
++
++ dev_ioctl = dev->backing_bdev->bd_op->ioctl;
++ return((*dev_ioctl)(inode, file, cmd, arg));
++}
++
++static int cow_revalidate(kdev_t rdev)
++{
++ printk(KERN_ERR "Need to implement cow_revalidate\n");
++ return(0);
++}
++
++static int parse_unit(char **ptr)
++{
++ char *str = *ptr, *end;
++ int n = -1;
++
++ if(isdigit(*str)) {
++ n = simple_strtoul(str, &end, 0);
++ if(end == str)
++ return(-1);
++ *ptr = end;
++ }
++ else if (('a' <= *str) && (*str <= 'h')) {
++ n = *str - 'a';
++ str++;
++ *ptr = str;
++ }
++ return(n);
++}
++
++static int cow_setup(char *str)
++{
++ struct cow *dev;
++ char *cow_name, *backing_name;
++ int unit;
++
++ unit = parse_unit(&str);
++ if(unit < 0){
++ printk(KERN_ERR "cow_setup - Couldn't parse unit number\n");
++ return(1);
++ }
++
++ if(*str != '='){
++ printk(KERN_ERR "cow_setup - Missing '=' after unit "
++ "number\n");
++ return(1);
++ }
++ str++;
++
++ cow_name = str;
++ backing_name = strchr(str, ',');
++ if(backing_name == NULL){
++ printk(KERN_ERR "cow_setup - missing backing device name\n");
++ return(0);
++ }
++ *backing_name = '\0';
++ backing_name++;
++
++ spin_lock(&cow_lock);
++
++ dev = &cow_dev[unit];
++ dev->cow_path = cow_name;
++ dev->backing_path = backing_name;
++
++ spin_unlock(&cow_lock);
++ return(0);
++}
++
++__setup("cow", cow_setup);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h
+--- a/arch/um/drivers/cow_sys.h 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/drivers/cow_sys.h 2004-02-11 12:27:42.000000000 -0500
+@@ -0,0 +1,48 @@
++#ifndef __COW_SYS_H__
++#define __COW_SYS_H__
++
++#include "kern_util.h"
++#include "user_util.h"
++#include "os.h"
++#include "user.h"
++
++static inline void *cow_malloc(int size)
++{
++ return(um_kmalloc(size));
++}
++
++static inline void cow_free(void *ptr)
++{
++ kfree(ptr);
++}
++
++#define cow_printf printk
++
++static inline char *cow_strdup(char *str)
++{
++ return(uml_strdup(str));
++}
++
++static inline int cow_seek_file(int fd, __u64 offset)
++{
++ return(os_seek_file(fd, offset));
++}
++
++static inline int cow_file_size(char *file, __u64 *size_out)
++{
++ return(os_file_size(file, size_out));
++}
++
++static inline int cow_write_file(int fd, char *buf, int size)
++{
++ return(os_write_file(fd, buf, size));
++}
++
++#endif
++
++/*
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
+--- a/arch/um/drivers/cow_user.c 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/drivers/cow_user.c 2004-02-11 12:27:36.000000000 -0500
+@@ -0,0 +1,375 @@
++#include <stddef.h>
++#include <string.h>
++#include <errno.h>
++#include <unistd.h>
++#include <byteswap.h>
++#include <sys/time.h>
++#include <sys/param.h>
++#include <sys/user.h>
++#include <netinet/in.h>
++
++#include "os.h"
++
++#include "cow.h"
++#include "cow_sys.h"
++
++#define PATH_LEN_V1 256
++
++struct cow_header_v1 {
++ int magic;
++ int version;
++ char backing_file[PATH_LEN_V1];
++ time_t mtime;
++ __u64 size;
++ int sectorsize;
++};
++
++#define PATH_LEN_V2 MAXPATHLEN
++
++struct cow_header_v2 {
++ unsigned long magic;
++ unsigned long version;
++ char backing_file[PATH_LEN_V2];
++ time_t mtime;
++ __u64 size;
++ int sectorsize;
++};
++
++/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in
++ * case other systems have different values for MAXPATHLEN
++ */
++#define PATH_LEN_V3 4096
++
++/* Changes from V2 -
++ * PATH_LEN_V3 as described above
++ * Explicitly specify field bit lengths for systems with different
++ * lengths for the usual C types. Not sure whether char or
++ * time_t should be changed, this can be changed later without
++ * breaking compatibility
++ * Add alignment field so that different alignments can be used for the
++ * bitmap and data
++ * Add cow_format field to allow for the possibility of different ways
++ * of specifying the COW blocks. For now, the only value is 0,
++ * for the traditional COW bitmap.
++ * Move the backing_file field to the end of the header. This allows
++ * for the possibility of expanding it into the padding required
++ * by the bitmap alignment.
++ * The bitmap and data portions of the file will be aligned as specified
++ * by the alignment field. This is to allow COW files to be
++ * put on devices with restrictions on access alignments, such as
++ * /dev/raw, with a 512 byte alignment restriction. This also
++ * allows the data to be more aligned more strictly than on
++ * sector boundaries. This is needed for ubd-mmap, which needs
++ * the data to be page aligned.
++ * Fixed (finally!) the rounding bug
++ */
++
++struct cow_header_v3 {
++ __u32 magic;
++ __u32 version;
++ time_t mtime;
++ __u64 size;
++ __u32 sectorsize;
++ __u32 alignment;
++ __u32 cow_format;
++ char backing_file[PATH_LEN_V3];
++};
++
++/* COW format definitions - for now, we have only the usual COW bitmap */
++#define COW_BITMAP 0
++
++union cow_header {
++ struct cow_header_v1 v1;
++ struct cow_header_v2 v2;
++ struct cow_header_v3 v3;
++};
++
++#define COW_MAGIC 0x4f4f4f4d /* MOOO */
++#define COW_VERSION 3
++
++#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len))
++#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align)
++
++void cow_sizes(int version, __u64 size, int sectorsize, int align,
++ int bitmap_offset, unsigned long *bitmap_len_out,
++ int *data_offset_out)
++{
++ if(version < 3){
++ *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
++
++ *data_offset_out = bitmap_offset + *bitmap_len_out;
++ *data_offset_out = (*data_offset_out + sectorsize - 1) /
++ sectorsize;
++ *data_offset_out *= sectorsize;
++ }
++ else {
++ *bitmap_len_out = DIV_ROUND(size, sectorsize);
++ *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8);
++
++ *data_offset_out = bitmap_offset + *bitmap_len_out;
++ *data_offset_out = ROUND_UP(*data_offset_out, align);
++ }
++}
++
++static int absolutize(char *to, int size, char *from)
++{
++ char save_cwd[256], *slash;
++ int remaining;
++
++ if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
++ cow_printf("absolutize : unable to get cwd - errno = %d\n",
++ errno);
++ return(-1);
++ }
++ slash = strrchr(from, '/');
++ if(slash != NULL){
++ *slash = '\0';
++ if(chdir(from)){
++ *slash = '/';
++ cow_printf("absolutize : Can't cd to '%s' - "
++ "errno = %d\n", from, errno);
++ return(-1);
++ }
++ *slash = '/';
++ if(getcwd(to, size) == NULL){
++ cow_printf("absolutize : unable to get cwd of '%s' - "
++ "errno = %d\n", from, errno);
++ return(-1);
++ }
++ remaining = size - strlen(to);
++ if(strlen(slash) + 1 > remaining){
++ cow_printf("absolutize : unable to fit '%s' into %d "
++ "chars\n", from, size);
++ return(-1);
++ }
++ strcat(to, slash);
++ }
++ else {
++ if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){
++ cow_printf("absolutize : unable to fit '%s' into %d "
++ "chars\n", from, size);
++ return(-1);
++ }
++ strcpy(to, save_cwd);
++ strcat(to, "/");
++ strcat(to, from);
++ }
++ chdir(save_cwd);
++ return(0);
++}
++
++int write_cow_header(char *cow_file, int fd, char *backing_file,
++ int sectorsize, int alignment, long long *size)
++{
++ struct cow_header_v3 *header;
++ unsigned long modtime;
++ int err;
++
++ err = cow_seek_file(fd, 0);
++ if(err < 0){
++ cow_printf("write_cow_header - lseek failed, err = %d\n", -err);
++ goto out;
++ }
++
++ err = -ENOMEM;
++ header = cow_malloc(sizeof(*header));
++ if(header == NULL){
++ cow_printf("Failed to allocate COW V3 header\n");
++ goto out;
++ }
++ header->magic = htonl(COW_MAGIC);
++ header->version = htonl(COW_VERSION);
++
++ err = -EINVAL;
++ if(strlen(backing_file) > sizeof(header->backing_file) - 1){
++ cow_printf("Backing file name \"%s\" is too long - names are "
++ "limited to %d characters\n", backing_file,
++ sizeof(header->backing_file) - 1);
++ goto out_free;
++ }
++
++ if(absolutize(header->backing_file, sizeof(header->backing_file),
++ backing_file))
++ goto out_free;
++
++ err = os_file_modtime(header->backing_file, &modtime);
++ if(err < 0){
++ cow_printf("Backing file '%s' mtime request failed, "
++ "err = %d\n", header->backing_file, -err);
++ goto out_free;
++ }
++
++ err = cow_file_size(header->backing_file, size);
++ if(err < 0){
++ cow_printf("Couldn't get size of backing file '%s', "
++ "err = %d\n", header->backing_file, -err);
++ goto out_free;
++ }
++
++ header->mtime = htonl(modtime);
++ header->size = htonll(*size);
++ header->sectorsize = htonl(sectorsize);
++ header->alignment = htonl(alignment);
++ header->cow_format = COW_BITMAP;
++
++ err = os_write_file(fd, header, sizeof(*header));
++ if(err != sizeof(*header)){
++ cow_printf("Write of header to new COW file '%s' failed, "
++ "err = %d\n", cow_file, -err);
++ goto out_free;
++ }
++ err = 0;
++ out_free:
++ cow_free(header);
++ out:
++ return(err);
++}
++
++int file_reader(__u64 offset, char *buf, int len, void *arg)
++{
++ int fd = *((int *) arg);
++
++ return(pread(fd, buf, len, offset));
++}
++
++/* XXX Need to sanity-check the values read from the header */
++
++int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
++ __u32 *version_out, char **backing_file_out,
++ time_t *mtime_out, __u64 *size_out,
++ int *sectorsize_out, __u32 *align_out,
++ int *bitmap_offset_out)
++{
++ union cow_header *header;
++ char *file;
++ int err, n;
++ unsigned long version, magic;
++
++ header = cow_malloc(sizeof(*header));
++ if(header == NULL){
++ cow_printf("read_cow_header - Failed to allocate header\n");
++ return(-ENOMEM);
++ }
++ err = -EINVAL;
++ n = (*reader)(0, (char *) header, sizeof(*header), arg);
++ if(n < offsetof(typeof(header->v1), backing_file)){
++ cow_printf("read_cow_header - short header\n");
++ goto out;
++ }
++
++ magic = header->v1.magic;
++ if(magic == COW_MAGIC) {
++ version = header->v1.version;
++ }
++ else if(magic == ntohl(COW_MAGIC)){
++ version = ntohl(header->v1.version);
++ }
++ /* No error printed because the non-COW case comes through here */
++ else goto out;
++
++ *version_out = version;
++
++ if(version == 1){
++ if(n < sizeof(header->v1)){
++ cow_printf("read_cow_header - failed to read V1 "
++ "header\n");
++ goto out;
++ }
++ *mtime_out = header->v1.mtime;
++ *size_out = header->v1.size;
++ *sectorsize_out = header->v1.sectorsize;
++ *bitmap_offset_out = sizeof(header->v1);
++ *align_out = *sectorsize_out;
++ file = header->v1.backing_file;
++ }
++ else if(version == 2){
++ if(n < sizeof(header->v2)){
++ cow_printf("read_cow_header - failed to read V2 "
++ "header\n");
++ goto out;
++ }
++ *mtime_out = ntohl(header->v2.mtime);
++ *size_out = ntohll(header->v2.size);
++ *sectorsize_out = ntohl(header->v2.sectorsize);
++ *bitmap_offset_out = sizeof(header->v2);
++ *align_out = *sectorsize_out;
++ file = header->v2.backing_file;
++ }
++ else if(version == 3){
++ if(n < sizeof(header->v3)){
++ cow_printf("read_cow_header - failed to read V2 "
++ "header\n");
++ goto out;
++ }
++ *mtime_out = ntohl(header->v3.mtime);
++ *size_out = ntohll(header->v3.size);
++ *sectorsize_out = ntohl(header->v3.sectorsize);
++ *align_out = ntohl(header->v3.alignment);
++ *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out);
++ file = header->v3.backing_file;
++ }
++ else {
++ cow_printf("read_cow_header - invalid COW version\n");
++ goto out;
++ }
++ err = -ENOMEM;
++ *backing_file_out = cow_strdup(file);
++ if(*backing_file_out == NULL){
++ cow_printf("read_cow_header - failed to allocate backing "
++ "file\n");
++ goto out;
++ }
++ err = 0;
++ out:
++ cow_free(header);
++ return(err);
++}
++
++int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
++ int alignment, int *bitmap_offset_out,
++ unsigned long *bitmap_len_out, int *data_offset_out)
++{
++ __u64 size, offset;
++ char zero = 0;
++ int err;
++
++ err = write_cow_header(cow_file, fd, backing_file, sectorsize,
++ alignment, &size);
++ if(err)
++ goto out;
++
++ *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment);
++ cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out,
++ bitmap_len_out, data_offset_out);
++
++ offset = *data_offset_out + size - sizeof(zero);
++ err = cow_seek_file(fd, offset);
++ if(err < 0){
++ cow_printf("cow bitmap lseek failed : err = %d\n", -err);
++ goto out;
++ }
++
++ /* does not really matter how much we write it is just to set EOF
++ * this also sets the entire COW bitmap
++ * to zero without having to allocate it
++ */
++ err = cow_write_file(fd, &zero, sizeof(zero));
++ if(err != sizeof(zero)){
++ cow_printf("Write of bitmap to new COW file '%s' failed, "
++ "err = %d\n", cow_file, -err);
++ err = -EINVAL;
++ goto out;
++ }
++
++ return(0);
++
++ out:
++ return(err);
++}
++
++/*
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c
+--- a/arch/um/drivers/daemon_user.c 2004-02-11 12:15:11.000000000 -0500
++++ b/arch/um/drivers/daemon_user.c 2004-02-11 12:26:57.000000000 -0500
+@@ -53,7 +53,8 @@
+ struct request_v3 req;
+ int fd, n, err;
+
+- if((pri->control = socket(AF_UNIX, SOCK_STREAM, 0)) < 0){
++ pri->control = socket(AF_UNIX, SOCK_STREAM, 0);
++ if(pri->control < 0){
+ printk("daemon_open : control socket failed, errno = %d\n",
+ errno);
+ return(-errno);
+@@ -67,7 +68,8 @@
+ goto out;
+ }
+
+- if((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0){
++ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
++ if(fd < 0){
+ printk("daemon_open : data socket failed, errno = %d\n",
+ errno);
+ err = -errno;
+@@ -91,18 +93,18 @@
+ req.version = SWITCH_VERSION;
+ req.type = REQ_NEW_CONTROL;
+ req.sock = *local_addr;
+- n = write(pri->control, &req, sizeof(req));
++ n = os_write_file(pri->control, &req, sizeof(req));
+ if(n != sizeof(req)){
+- printk("daemon_open : control setup request returned %d, "
+- "errno = %d\n", n, errno);
++ printk("daemon_open : control setup request failed, err = %d\n",
++ -n);
+ err = -ENOTCONN;
+ goto out;
+ }
+
+- n = read(pri->control, sun, sizeof(*sun));
++ n = os_read_file(pri->control, sun, sizeof(*sun));
+ if(n != sizeof(*sun)){
+- printk("daemon_open : read of data socket returned %d, "
+- "errno = %d\n", n, errno);
++ printk("daemon_open : read of data socket failed, err = %d\n",
++ -n);
+ err = -ENOTCONN;
+ goto out_close;
+ }
+@@ -111,9 +113,9 @@
+ return(fd);
+
+ out_close:
+- close(fd);
++ os_close_file(fd);
+ out:
+- close(pri->control);
++ os_close_file(pri->control);
+ return(err);
+ }
+
+@@ -153,8 +155,8 @@
+ {
+ struct daemon_data *pri = data;
+
+- close(pri->fd);
+- close(pri->control);
++ os_close_file(pri->fd);
++ os_close_file(pri->control);
+ if(pri->data_addr != NULL) kfree(pri->data_addr);
+ if(pri->ctl_addr != NULL) kfree(pri->ctl_addr);
+ if(pri->local_addr != NULL) kfree(pri->local_addr);
+diff -Naur a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c
+--- a/arch/um/drivers/fd.c 2004-02-11 12:16:47.000000000 -0500
++++ b/arch/um/drivers/fd.c 2004-02-11 12:29:01.000000000 -0500
+@@ -35,7 +35,8 @@
+ printk("fd_init : couldn't parse file descriptor '%s'\n", str);
+ return(NULL);
+ }
+- if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL);
++ data = um_kmalloc(sizeof(*data));
++ if(data == NULL) return(NULL);
+ *data = ((struct fd_chan) { .fd = n,
+ .raw = opts->raw });
+ return(data);
+diff -Naur a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c
+--- a/arch/um/drivers/harddog_user.c 2004-02-11 12:14:38.000000000 -0500
++++ b/arch/um/drivers/harddog_user.c 2004-02-11 12:26:41.000000000 -0500
+@@ -27,10 +27,10 @@
+ dup2(data->stdin, 0);
+ dup2(data->stdout, 1);
+ dup2(data->stdout, 2);
+- close(data->stdin);
+- close(data->stdout);
+- close(data->close_me[0]);
+- close(data->close_me[1]);
++ os_close_file(data->stdin);
++ os_close_file(data->stdout);
++ os_close_file(data->close_me[0]);
++ os_close_file(data->close_me[1]);
+ }
+
+ int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock)
+@@ -44,15 +44,15 @@
+ char **args = NULL;
+
+ err = os_pipe(in_fds, 1, 0);
+- if(err){
+- printk("harddog_open - os_pipe failed, errno = %d\n", -err);
+- return(err);
++ if(err < 0){
++ printk("harddog_open - os_pipe failed, err = %d\n", -err);
++ goto out;
+ }
+
+ err = os_pipe(out_fds, 1, 0);
+- if(err){
+- printk("harddog_open - os_pipe failed, errno = %d\n", -err);
+- return(err);
++ if(err < 0){
++ printk("harddog_open - os_pipe failed, err = %d\n", -err);
++ goto out_close_in;
+ }
+
+ data.stdin = out_fds[0];
+@@ -72,42 +72,47 @@
+
+ pid = run_helper(pre_exec, &data, args, NULL);
+
+- close(out_fds[0]);
+- close(in_fds[1]);
++ os_close_file(out_fds[0]);
++ os_close_file(in_fds[1]);
+
+ if(pid < 0){
+ err = -pid;
+- printk("harddog_open - run_helper failed, errno = %d\n", err);
+- goto out;
++ printk("harddog_open - run_helper failed, errno = %d\n", -err);
++ goto out_close_out;
+ }
+
+- n = read(in_fds[0], &c, sizeof(c));
++ n = os_read_file(in_fds[0], &c, sizeof(c));
+ if(n == 0){
+ printk("harddog_open - EOF on watchdog pipe\n");
+ helper_wait(pid);
+ err = -EIO;
+- goto out;
++ goto out_close_out;
+ }
+ else if(n < 0){
+ printk("harddog_open - read of watchdog pipe failed, "
+- "errno = %d\n", errno);
++ "err = %d\n", -n);
+ helper_wait(pid);
+- err = -errno;
+- goto out;
++ err = n;
++ goto out_close_out;
+ }
+ *in_fd_ret = in_fds[0];
+ *out_fd_ret = out_fds[1];
+ return(0);
++
++ out_close_in:
++ os_close_file(in_fds[0]);
++ os_close_file(in_fds[1]);
++ out_close_out:
++ os_close_file(out_fds[0]);
++ os_close_file(out_fds[1]);
+ out:
+- close(out_fds[1]);
+- close(in_fds[0]);
+ return(err);
+ }
+
+ void stop_watchdog(int in_fd, int out_fd)
+ {
+- close(in_fd);
+- close(out_fd);
++ os_close_file(in_fd);
++ os_close_file(out_fd);
+ }
+
+ int ping_watchdog(int fd)
+@@ -115,11 +120,12 @@
+ int n;
+ char c = '\n';
+
+- n = write(fd, &c, sizeof(c));
+- if(n < sizeof(c)){
+- printk("ping_watchdog - write failed, errno = %d\n",
+- errno);
+- return(-errno);
++ n = os_write_file(fd, &c, sizeof(c));
++ if(n != sizeof(c)){
++ printk("ping_watchdog - write failed, err = %d\n", -n);
++ if(n < 0)
++ return(n);
++ return(-EIO);
+ }
+ return 1;
+
+diff -Naur a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
+--- a/arch/um/drivers/hostaudio_kern.c 2004-02-11 12:16:48.000000000 -0500
++++ b/arch/um/drivers/hostaudio_kern.c 2004-02-11 12:29:03.000000000 -0500
+@@ -5,12 +5,12 @@
+
+ #include "linux/config.h"
+ #include "linux/module.h"
+-#include "linux/version.h"
+ #include "linux/init.h"
+ #include "linux/slab.h"
+ #include "linux/fs.h"
+ #include "linux/sound.h"
+ #include "linux/soundcard.h"
++#include "asm/uaccess.h"
+ #include "kern_util.h"
+ #include "init.h"
+ #include "hostaudio.h"
+@@ -19,30 +19,39 @@
+ char *dsp = HOSTAUDIO_DEV_DSP;
+ char *mixer = HOSTAUDIO_DEV_MIXER;
+
++#define DSP_HELP \
++" This is used to specify the host dsp device to the hostaudio driver.\n" \
++" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n"
++
++#define MIXER_HELP \
++" This is used to specify the host mixer device to the hostaudio driver.\n" \
++" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n"
++
+ #ifndef MODULE
+ static int set_dsp(char *name, int *add)
+ {
+- dsp = uml_strdup(name);
++ dsp = name;
+ return(0);
+ }
+
+-__uml_setup("dsp=", set_dsp,
+-"dsp=<dsp device>\n"
+-" This is used to specify the host dsp device to the hostaudio driver.\n"
+-" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n"
+-);
++__uml_setup("dsp=", set_dsp, "dsp=<dsp device>\n" DSP_HELP);
+
+ static int set_mixer(char *name, int *add)
+ {
+- mixer = uml_strdup(name);
++ mixer = name;
+ return(0);
+ }
+
+-__uml_setup("mixer=", set_mixer,
+-"mixer=<mixer device>\n"
+-" This is used to specify the host mixer device to the hostaudio driver.\n"
+-" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n"
+-);
++__uml_setup("mixer=", set_mixer, "mixer=<mixer device>\n" MIXER_HELP);
++
++#else /*MODULE*/
++
++MODULE_PARM(dsp, "s");
++MODULE_PARM_DESC(dsp, DSP_HELP);
++
++MODULE_PARM(mixer, "s");
++MODULE_PARM_DESC(mixer, MIXER_HELP);
++
+ #endif
+
+ /* /dev/dsp file operations */
+@@ -51,23 +60,55 @@
+ loff_t *ppos)
+ {
+ struct hostaudio_state *state = file->private_data;
++ void *kbuf;
++ int err;
+
+ #ifdef DEBUG
+ printk("hostaudio: read called, count = %d\n", count);
+ #endif
+
+- return(hostaudio_read_user(state, buffer, count, ppos));
++ kbuf = kmalloc(count, GFP_KERNEL);
++ if(kbuf == NULL)
++ return(-ENOMEM);
++
++ err = hostaudio_read_user(state, kbuf, count, ppos);
++ if(err < 0)
++ goto out;
++
++ if(copy_to_user(buffer, kbuf, err))
++ err = -EFAULT;
++
++ out:
++ kfree(kbuf);
++ return(err);
+ }
+
+ static ssize_t hostaudio_write(struct file *file, const char *buffer,
+ size_t count, loff_t *ppos)
+ {
+ struct hostaudio_state *state = file->private_data;
++ void *kbuf;
++ int err;
+
+ #ifdef DEBUG
+ printk("hostaudio: write called, count = %d\n", count);
+ #endif
+- return(hostaudio_write_user(state, buffer, count, ppos));
++
++ kbuf = kmalloc(count, GFP_KERNEL);
++ if(kbuf == NULL)
++ return(-ENOMEM);
++
++ err = -EFAULT;
++ if(copy_from_user(kbuf, buffer, count))
++ goto out;
++
++ err = hostaudio_write_user(state, kbuf, count, ppos);
++ if(err < 0)
++ goto out;
++
++ out:
++ kfree(kbuf);
++ return(err);
+ }
+
+ static unsigned int hostaudio_poll(struct file *file,
+@@ -86,12 +127,43 @@
+ unsigned int cmd, unsigned long arg)
+ {
+ struct hostaudio_state *state = file->private_data;
++ unsigned long data = 0;
++ int err;
+
+ #ifdef DEBUG
+ printk("hostaudio: ioctl called, cmd = %u\n", cmd);
+ #endif
++ switch(cmd){
++ case SNDCTL_DSP_SPEED:
++ case SNDCTL_DSP_STEREO:
++ case SNDCTL_DSP_GETBLKSIZE:
++ case SNDCTL_DSP_CHANNELS:
++ case SNDCTL_DSP_SUBDIVIDE:
++ case SNDCTL_DSP_SETFRAGMENT:
++ if(get_user(data, (int *) arg))
++ return(-EFAULT);
++ break;
++ default:
++ break;
++ }
++
++ err = hostaudio_ioctl_user(state, cmd, (unsigned long) &data);
++
++ switch(cmd){
++ case SNDCTL_DSP_SPEED:
++ case SNDCTL_DSP_STEREO:
++ case SNDCTL_DSP_GETBLKSIZE:
++ case SNDCTL_DSP_CHANNELS:
++ case SNDCTL_DSP_SUBDIVIDE:
++ case SNDCTL_DSP_SETFRAGMENT:
++ if(put_user(data, (int *) arg))
++ return(-EFAULT);
++ break;
++ default:
++ break;
++ }
+
+- return(hostaudio_ioctl_user(state, cmd, arg));
++ return(err);
+ }
+
+ static int hostaudio_open(struct inode *inode, struct file *file)
+@@ -225,7 +297,8 @@
+
+ static int __init hostaudio_init_module(void)
+ {
+- printk(KERN_INFO "UML Audio Relay\n");
++ printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n",
++ dsp, mixer);
+
+ module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1);
+ if(module_data.dev_audio < 0){
+diff -Naur a/arch/um/drivers/hostaudio_user.c b/arch/um/drivers/hostaudio_user.c
+--- a/arch/um/drivers/hostaudio_user.c 2004-02-11 12:16:08.000000000 -0500
++++ b/arch/um/drivers/hostaudio_user.c 2004-02-11 12:28:04.000000000 -0500
+@@ -4,9 +4,6 @@
+ */
+
+ #include <sys/types.h>
+-#include <sys/stat.h>
+-#include <sys/ioctl.h>
+-#include <fcntl.h>
+ #include <unistd.h>
+ #include <errno.h>
+ #include "hostaudio.h"
+@@ -20,45 +17,31 @@
+ ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer,
+ size_t count, loff_t *ppos)
+ {
+- ssize_t ret;
+-
+ #ifdef DEBUG
+ printk("hostaudio: read_user called, count = %d\n", count);
+ #endif
+
+- ret = read(state->fd, buffer, count);
+-
+- if(ret < 0) return(-errno);
+- return(ret);
++ return(os_read_file(state->fd, buffer, count));
+ }
+
+ ssize_t hostaudio_write_user(struct hostaudio_state *state, const char *buffer,
+ size_t count, loff_t *ppos)
+ {
+- ssize_t ret;
+-
+ #ifdef DEBUG
+ printk("hostaudio: write_user called, count = %d\n", count);
+ #endif
+
+- ret = write(state->fd, buffer, count);
+-
+- if(ret < 0) return(-errno);
+- return(ret);
++ return(os_write_file(state->fd, buffer, count));
+ }
+
+ int hostaudio_ioctl_user(struct hostaudio_state *state, unsigned int cmd,
+ unsigned long arg)
+ {
+- int ret;
+ #ifdef DEBUG
+ printk("hostaudio: ioctl_user called, cmd = %u\n", cmd);
+ #endif
+
+- ret = ioctl(state->fd, cmd, arg);
+-
+- if(ret < 0) return(-errno);
+- return(ret);
++ return(os_ioctl_generic(state->fd, cmd, arg));
+ }
+
+ int hostaudio_open_user(struct hostaudio_state *state, int r, int w, char *dsp)
+@@ -67,14 +50,15 @@
+ printk("hostaudio: open_user called\n");
+ #endif
+
+- state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
+-
+- if(state->fd >= 0) return(0);
++ state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0);
+
+- printk("hostaudio_open_user failed to open '%s', errno = %d\n",
+- dsp, errno);
++ if(state->fd < 0) {
++ printk("hostaudio_open_user failed to open '%s', err = %d\n",
++ dsp, -state->fd);
++ return(state->fd);
++ }
+
+- return(-errno);
++ return(0);
+ }
+
+ int hostaudio_release_user(struct hostaudio_state *state)
+@@ -82,10 +66,10 @@
+ #ifdef DEBUG
+ printk("hostaudio: release called\n");
+ #endif
+- if(state->fd >= 0){
+- close(state->fd);
+- state->fd=-1;
+- }
++ if(state->fd >= 0){
++ os_close_file(state->fd);
++ state->fd = -1;
++ }
+
+ return(0);
+ }
+@@ -95,15 +79,11 @@
+ int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state,
+ unsigned int cmd, unsigned long arg)
+ {
+- int ret;
+ #ifdef DEBUG
+ printk("hostmixer: ioctl_user called cmd = %u\n",cmd);
+ #endif
+
+- ret = ioctl(state->fd, cmd, arg);
+- if(ret < 0)
+- return(-errno);
+- return(ret);
++ return(os_ioctl_generic(state->fd, cmd, arg));
+ }
+
+ int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, int w,
+@@ -115,12 +95,13 @@
+
+ state->fd = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0);
+
+- if(state->fd >= 0) return(0);
+-
+- printk("hostaudio_open_mixdev_user failed to open '%s', errno = %d\n",
+- mixer, errno);
++ if(state->fd < 0) {
++ printk("hostaudio_open_mixdev_user failed to open '%s', "
++ "err = %d\n", mixer, state->fd);
++ return(state->fd);
++ }
+
+- return(-errno);
++ return(0);
+ }
+
+ int hostmixer_release_mixdev_user(struct hostmixer_state *state)
+@@ -130,7 +111,7 @@
+ #endif
+
+ if(state->fd >= 0){
+- close(state->fd);
++ os_close_file(state->fd);
+ state->fd = -1;
+ }
+
+diff -Naur a/arch/um/drivers/line.c b/arch/um/drivers/line.c
+--- a/arch/um/drivers/line.c 2004-02-11 12:16:38.000000000 -0500
++++ b/arch/um/drivers/line.c 2004-02-11 12:28:38.000000000 -0500
+@@ -6,8 +6,8 @@
+ #include "linux/sched.h"
+ #include "linux/slab.h"
+ #include "linux/list.h"
++#include "linux/interrupt.h"
+ #include "linux/devfs_fs_kernel.h"
+-#include "asm/irq.h"
+ #include "asm/uaccess.h"
+ #include "chan_kern.h"
+ #include "irq_user.h"
+@@ -16,38 +16,55 @@
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "os.h"
++#include "irq_kern.h"
+
+ #define LINE_BUFSIZE 4096
+
+-void line_interrupt(int irq, void *data, struct pt_regs *unused)
++static irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ struct line *dev = data;
+
+ if(dev->count > 0)
+ chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq,
+ dev);
++ return IRQ_HANDLED;
+ }
+
+-void line_timer_cb(void *arg)
++static void line_timer_cb(void *arg)
+ {
+ struct line *dev = arg;
+
+ line_interrupt(dev->driver->read_irq, dev, NULL);
+ }
+
+-static void buffer_data(struct line *line, const char *buf, int len)
++static int write_room(struct line *dev)
+ {
+- int end;
++ int n;
++
++ if(dev->buffer == NULL) return(LINE_BUFSIZE - 1);
++
++ n = dev->head - dev->tail;
++ if(n <= 0) n = LINE_BUFSIZE + n;
++ return(n - 1);
++}
++
++static int buffer_data(struct line *line, const char *buf, int len)
++{
++ int end, room;
+
+ if(line->buffer == NULL){
+ line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC);
+ if(line->buffer == NULL){
+ printk("buffer_data - atomic allocation failed\n");
+- return;
++ return(0);
+ }
+ line->head = line->buffer;
+ line->tail = line->buffer;
+ }
++
++ room = write_room(line);
++ len = (len > room) ? room : len;
++
+ end = line->buffer + LINE_BUFSIZE - line->tail;
+ if(len < end){
+ memcpy(line->tail, buf, len);
+@@ -60,6 +77,8 @@
+ memcpy(line->buffer, buf, len);
+ line->tail = line->buffer + len;
+ }
++
++ return(len);
+ }
+
+ static int flush_buffer(struct line *line)
+@@ -95,7 +114,7 @@
+ struct line *line;
+ char *new;
+ unsigned long flags;
+- int n, err, i;
++ int n, err, i, ret = 0;
+
+ if(tty->stopped) return 0;
+
+@@ -104,9 +123,13 @@
+ if(new == NULL)
+ return(0);
+ n = copy_from_user(new, buf, len);
+- if(n == len)
+- return(-EFAULT);
+ buf = new;
++ if(n == len){
++ len = -EFAULT;
++ goto out_free;
++ }
++
++ len -= n;
+ }
+
+ i = tty->index;
+@@ -115,41 +138,50 @@
+ down(&line->sem);
+ if(line->head != line->tail){
+ local_irq_save(flags);
+- buffer_data(line, buf, len);
++ ret += buffer_data(line, buf, len);
+ err = flush_buffer(line);
+ local_irq_restore(flags);
+ if(err <= 0)
+- goto out;
++ goto out_up;
+ }
+ else {
+ n = write_chan(&line->chan_list, buf, len,
+ line->driver->write_irq);
+ if(n < 0){
+- len = n;
+- goto out;
++ ret = n;
++ goto out_up;
+ }
+- if(n < len)
+- buffer_data(line, buf + n, len - n);
++
++ len -= n;
++ ret += n;
++ if(len > 0)
++ ret += buffer_data(line, buf + n, len);
+ }
+- out:
++ out_up:
+ up(&line->sem);
+- return(len);
++ out_free:
++ if(from_user)
++ kfree(buf);
++ return(ret);
+ }
+
+-void line_write_interrupt(int irq, void *data, struct pt_regs *unused)
++static irqreturn_t line_write_interrupt(int irq, void *data,
++ struct pt_regs *unused)
+ {
+ struct line *dev = data;
+ struct tty_struct *tty = dev->tty;
+ int err;
+
+ err = flush_buffer(dev);
+- if(err == 0) return;
++ if(err == 0)
++ return(IRQ_NONE);
+ else if(err < 0){
+ dev->head = dev->buffer;
+ dev->tail = dev->buffer;
+ }
+
+- if(tty == NULL) return;
++ if(tty == NULL)
++ return(IRQ_NONE);
+
+ if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) &&
+ (tty->ldisc.write_wakeup != NULL))
+@@ -161,21 +193,9 @@
+ * writes.
+ */
+
+- if (waitqueue_active(&tty->write_wait))
++ if(waitqueue_active(&tty->write_wait))
+ wake_up_interruptible(&tty->write_wait);
+-
+-}
+-
+-int line_write_room(struct tty_struct *tty)
+-{
+- struct line *dev = tty->driver_data;
+- int n;
+-
+- if(dev->buffer == NULL) return(LINE_BUFSIZE - 1);
+-
+- n = dev->head - dev->tail;
+- if(n <= 0) n = LINE_BUFSIZE + n;
+- return(n - 1);
++ return(IRQ_HANDLED);
+ }
+
+ int line_setup_irq(int fd, int input, int output, void *data)
+@@ -305,7 +325,7 @@
+ if(*end != '='){
+ printk(KERN_ERR "line_setup failed to parse \"%s\"\n",
+ init);
+- return(1);
++ return(0);
+ }
+ init = end;
+ }
+@@ -313,12 +333,12 @@
+ if((n >= 0) && (n >= num)){
+ printk("line_setup - %d out of range ((0 ... %d) allowed)\n",
+ n, num);
+- return(1);
++ return(0);
+ }
+ else if(n >= 0){
+ if(lines[n].count > 0){
+ printk("line_setup - device %d is open\n", n);
+- return(1);
++ return(0);
+ }
+ if(lines[n].init_pri <= INIT_ONE){
+ lines[n].init_pri = INIT_ONE;
+@@ -332,7 +352,7 @@
+ else if(!all_allowed){
+ printk("line_setup - can't configure all devices from "
+ "mconsole\n");
+- return(1);
++ return(0);
+ }
+ else {
+ for(i = 0; i < num; i++){
+@@ -346,7 +366,7 @@
+ }
+ }
+ }
+- return(0);
++ return(1);
+ }
+
+ int line_config(struct line *lines, int num, char *str)
+@@ -357,7 +377,7 @@
+ printk("line_config - uml_strdup failed\n");
+ return(-ENOMEM);
+ }
+- return(line_setup(lines, num, new, 0));
++ return(!line_setup(lines, num, new, 0));
+ }
+
+ int line_get_config(char *name, struct line *lines, int num, char *str,
+@@ -369,7 +389,7 @@
+
+ dev = simple_strtoul(name, &end, 0);
+ if((*end != '\0') || (end == name)){
+- *error_out = "line_setup failed to parse device number";
++ *error_out = "line_get_config failed to parse device number";
+ return(0);
+ }
+
+@@ -379,15 +399,15 @@
+ }
+
+ line = &lines[dev];
++
+ down(&line->sem);
+-
+ if(!line->valid)
+ CONFIG_CHUNK(str, size, n, "none", 1);
+ else if(line->count == 0)
+ CONFIG_CHUNK(str, size, n, line->init_str, 1);
+ else n = chan_config_string(&line->chan_list, str, size, error_out);
+-
+ up(&line->sem);
++
+ return(n);
+ }
+
+@@ -396,7 +416,14 @@
+ char config[sizeof("conxxxx=none\0")];
+
+ sprintf(config, "%s=none", str);
+- return(line_setup(lines, num, config, 0));
++ return(!line_setup(lines, num, config, 0));
++}
++
++int line_write_room(struct tty_struct *tty)
++{
++ struct line *dev = tty->driver_data;
++
++ return(write_room(dev));
+ }
+
+ struct tty_driver *line_register_devfs(struct lines *set,
+@@ -412,7 +439,8 @@
+ return NULL;
+
+ driver->driver_name = line_driver->name;
+- driver->name = line_driver->devfs_name;
++ driver->name = line_driver->device_name;
++ driver->devfs_name = line_driver->devfs_name;
+ driver->major = line_driver->major;
+ driver->minor_start = line_driver->minor_start;
+ driver->type = line_driver->type;
+@@ -432,7 +460,7 @@
+
+ for(i = 0; i < nlines; i++){
+ if(!lines[i].valid)
+- tty_unregister_devfs(driver, i);
++ tty_unregister_device(driver, i);
+ }
+
+ mconsole_register_dev(&line_driver->mc);
+@@ -465,24 +493,25 @@
+ struct line *line;
+ };
+
+-void winch_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ struct winch *winch = data;
+ struct tty_struct *tty;
+ int err;
+ char c;
+
+- err = generic_read(winch->fd, &c, NULL);
+- if(err < 0){
+- if(err != -EAGAIN){
+- printk("winch_interrupt : read failed, errno = %d\n",
+- -err);
+- printk("fd %d is losing SIGWINCH support\n",
+- winch->tty_fd);
+- free_irq(irq, data);
+- return;
++ if(winch->fd != -1){
++ err = generic_read(winch->fd, &c, NULL);
++ if(err < 0){
++ if(err != -EAGAIN){
++ printk("winch_interrupt : read failed, "
++ "errno = %d\n", -err);
++ printk("fd %d is losing SIGWINCH support\n",
++ winch->tty_fd);
++ return(IRQ_HANDLED);
++ }
++ goto out;
+ }
+- goto out;
+ }
+ tty = winch->line->tty;
+ if(tty != NULL){
+@@ -492,7 +521,9 @@
+ kill_pg(tty->pgrp, SIGWINCH, 1);
+ }
+ out:
+- reactivate_fd(winch->fd, WINCH_IRQ);
++ if(winch->fd != -1)
++ reactivate_fd(winch->fd, WINCH_IRQ);
++ return(IRQ_HANDLED);
+ }
+
+ DECLARE_MUTEX(winch_handler_sem);
+@@ -529,7 +560,10 @@
+
+ list_for_each(ele, &winch_handlers){
+ winch = list_entry(ele, struct winch, list);
+- close(winch->fd);
++ if(winch->fd != -1){
++ deactivate_fd(winch->fd, WINCH_IRQ);
++ os_close_file(winch->fd);
++ }
+ if(winch->pid != -1)
+ os_kill_process(winch->pid, 1);
+ }
+diff -Naur a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
+--- a/arch/um/drivers/Makefile 2004-02-11 12:15:52.000000000 -0500
++++ b/arch/um/drivers/Makefile 2004-02-11 12:27:45.000000000 -0500
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
++# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com)
+ # Licensed under the GPL
+ #
+
+@@ -39,6 +39,8 @@
+ obj-$(CONFIG_TTY_CHAN) += tty.o
+ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
+ obj-$(CONFIG_UML_WATCHDOG) += harddog.o
++obj-$(CONFIG_BLK_DEV_COW) += cow_kern.o
++obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
+
+ obj-y += stdio_console.o $(CHAN_OBJS)
+
+@@ -46,18 +48,7 @@
+
+ USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \
+ null.o pty.o tty.o xterm.o
+-USER_OBJS := $(foreach file,$(USER_OBJS),arch/um/drivers/$(file))
++USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+
+ $(USER_OBJS) : %.o: %.c
+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+-
+-clean:
+-
+-modules:
+-
+-fastdep:
+-
+-dep:
+-
+-archmrproper: clean
+-
+diff -Naur a/arch/um/drivers/mcast_user.c b/arch/um/drivers/mcast_user.c
+--- a/arch/um/drivers/mcast_user.c 2004-02-11 12:15:46.000000000 -0500
++++ b/arch/um/drivers/mcast_user.c 2004-02-11 12:27:37.000000000 -0500
+@@ -23,6 +23,7 @@
+ #include "kern_util.h"
+ #include "user_util.h"
+ #include "user.h"
++#include "os.h"
+
+ #define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER)
+
+@@ -62,7 +63,8 @@
+ goto out;
+ }
+
+- if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0){
++ fd = socket(AF_INET, SOCK_DGRAM, 0);
++ if (fd < 0){
+ printk("mcast_open : data socket failed, errno = %d\n",
+ errno);
+ fd = -ENOMEM;
+@@ -72,7 +74,7 @@
+ if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) {
+ printk("mcast_open: SO_REUSEADDR failed, errno = %d\n",
+ errno);
+- close(fd);
++ os_close_file(fd);
+ fd = -EINVAL;
+ goto out;
+ }
+@@ -82,7 +84,7 @@
+ sizeof(pri->ttl)) < 0) {
+ printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n",
+ errno);
+- close(fd);
++ os_close_file(fd);
+ fd = -EINVAL;
+ goto out;
+ }
+@@ -91,7 +93,7 @@
+ if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) {
+ printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n",
+ errno);
+- close(fd);
++ os_close_file(fd);
+ fd = -EINVAL;
+ goto out;
+ }
+@@ -99,7 +101,7 @@
+ /* bind socket to mcast address */
+ if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) {
+ printk("mcast_open : data bind failed, errno = %d\n", errno);
+- close(fd);
++ os_close_file(fd);
+ fd = -EINVAL;
+ goto out;
+ }
+@@ -115,7 +117,7 @@
+ "interface on the host.\n");
+ printk("eth0 should be configured in order to use the "
+ "multicast transport.\n");
+- close(fd);
++ os_close_file(fd);
+ fd = -EINVAL;
+ }
+
+@@ -137,7 +139,7 @@
+ errno);
+ }
+
+- close(fd);
++ os_close_file(fd);
+ }
+
+ int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri)
+diff -Naur a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
+--- a/arch/um/drivers/mconsole_kern.c 2004-02-11 12:14:15.000000000 -0500
++++ b/arch/um/drivers/mconsole_kern.c 2004-02-11 12:25:42.000000000 -0500
+@@ -1,6 +1,6 @@
+ /*
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
+- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -15,6 +15,9 @@
+ #include "linux/sysrq.h"
+ #include "linux/workqueue.h"
+ #include "linux/module.h"
++#include "linux/file.h"
++#include "linux/fs.h"
++#include "linux/namei.h"
+ #include "linux/proc_fs.h"
+ #include "asm/irq.h"
+ #include "asm/uaccess.h"
+@@ -27,6 +30,7 @@
+ #include "init.h"
+ #include "os.h"
+ #include "umid.h"
++#include "irq_kern.h"
+
+ static int do_unlink_socket(struct notifier_block *notifier,
+ unsigned long what, void *data)
+@@ -67,7 +71,7 @@
+
+ DECLARE_WORK(mconsole_work, mc_work_proc, NULL);
+
+-void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ int fd;
+ struct mconsole_entry *new;
+@@ -75,9 +79,10 @@
+
+ fd = (int) dev_id;
+ while (mconsole_get_request(fd, &req)){
+- if(req.cmd->as_interrupt) (*req.cmd->handler)(&req);
++ if(req.cmd->context == MCONSOLE_INTR)
++ (*req.cmd->handler)(&req);
+ else {
+- new = kmalloc(sizeof(req), GFP_ATOMIC);
++ new = kmalloc(sizeof(*new), GFP_ATOMIC);
+ if(new == NULL)
+ mconsole_reply(&req, "Out of memory", 1, 0);
+ else {
+@@ -88,6 +93,7 @@
+ }
+ if(!list_empty(&mc_requests)) schedule_work(&mconsole_work);
+ reactivate_fd(fd, MCONSOLE_IRQ);
++ return(IRQ_HANDLED);
+ }
+
+ void mconsole_version(struct mc_request *req)
+@@ -100,20 +106,110 @@
+ mconsole_reply(req, version, 0, 0);
+ }
+
++void mconsole_log(struct mc_request *req)
++{
++ int len;
++ char *ptr = req->request.data;
++
++ ptr += strlen("log");
++ while(isspace(*ptr)) ptr++;
++
++ len = req->len - (ptr - req->request.data);
++ printk("%.*s", len, ptr);
++ mconsole_reply(req, "", 0, 0);
++}
++
++void mconsole_proc(struct mc_request *req)
++{
++ struct nameidata nd;
++ struct file_system_type *proc;
++ struct super_block *super;
++ struct file *file;
++ int n, err;
++ char *ptr = req->request.data, *buf;
++
++ ptr += strlen("proc");
++ while(isspace(*ptr)) ptr++;
++
++ proc = get_fs_type("proc");
++ if(proc == NULL){
++ mconsole_reply(req, "procfs not registered", 1, 0);
++ goto out;
++ }
++
++ super = (*proc->get_sb)(proc, 0, NULL, NULL);
++ put_filesystem(proc);
++ if(super == NULL){
++ mconsole_reply(req, "Failed to get procfs superblock", 1, 0);
++ goto out;
++ }
++ up_write(&super->s_umount);
++
++ nd.dentry = super->s_root;
++ nd.mnt = NULL;
++ nd.flags = O_RDONLY + 1;
++ nd.last_type = LAST_ROOT;
++
++ err = link_path_walk(ptr, &nd);
++ if(err){
++ mconsole_reply(req, "Failed to look up file", 1, 0);
++ goto out_kill;
++ }
++
++ file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++ if(IS_ERR(file)){
++ mconsole_reply(req, "Failed to open file", 1, 0);
++ goto out_kill;
++ }
++
++ buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if(buf == NULL){
++ mconsole_reply(req, "Failed to allocate buffer", 1, 0);
++ goto out_fput;
++ }
++
++ if((file->f_op != NULL) && (file->f_op->read != NULL)){
++ do {
++ n = (*file->f_op->read)(file, buf, PAGE_SIZE - 1,
++ &file->f_pos);
++ if(n >= 0){
++ buf[n] = '\0';
++ mconsole_reply(req, buf, 0, (n > 0));
++ }
++ else {
++ mconsole_reply(req, "Read of file failed",
++ 1, 0);
++ goto out_free;
++ }
++ } while(n > 0);
++ }
++ else mconsole_reply(req, "", 0, 0);
++
++ out_free:
++ kfree(buf);
++ out_fput:
++ fput(file);
++ out_kill:
++ deactivate_super(super);
++ out: ;
++}
++
+ #define UML_MCONSOLE_HELPTEXT \
+-"Commands:
+- version - Get kernel version
+- help - Print this message
+- halt - Halt UML
+- reboot - Reboot UML
+- config <dev>=<config> - Add a new device to UML;
+- same syntax as command line
+- config <dev> - Query the configuration of a device
+- remove <dev> - Remove a device from UML
+- sysrq <letter> - Performs the SysRq action controlled by the letter
+- cad - invoke the Ctl-Alt-Del handler
+- stop - pause the UML; it will do nothing until it receives a 'go'
+- go - continue the UML after a 'stop'
++"Commands: \n\
++ version - Get kernel version \n\
++ help - Print this message \n\
++ halt - Halt UML \n\
++ reboot - Reboot UML \n\
++ config <dev>=<config> - Add a new device to UML; \n\
++ same syntax as command line \n\
++ config <dev> - Query the configuration of a device \n\
++ remove <dev> - Remove a device from UML \n\
++ sysrq <letter> - Performs the SysRq action controlled by the letter \n\
++ cad - invoke the Ctl-Alt-Del handler \n\
++ stop - pause the UML; it will do nothing until it receives a 'go' \n\
++ go - continue the UML after a 'stop' \n\
++ log <string> - make UML enter <string> into the kernel log\n\
++ proc <file> - returns the contents of the UML's /proc/<file>\n\
+ "
+
+ void mconsole_help(struct mc_request *req)
+@@ -302,7 +398,7 @@
+ if(umid_file_name("mconsole", file, sizeof(file))) return(-1);
+ snprintf(mconsole_socket_name, sizeof(file), "%s", file);
+
+- sock = create_unix_socket(file, sizeof(file));
++ sock = os_create_unix_socket(file, sizeof(file), 1);
+ if (sock < 0){
+ printk("Failed to initialize management console\n");
+ return(1);
+@@ -344,11 +440,16 @@
+ if(buf == NULL)
+ return(-ENOMEM);
+
+- if(copy_from_user(buf, buffer, count))
+- return(-EFAULT);
++ if(copy_from_user(buf, buffer, count)){
++ count = -EFAULT;
++ goto out;
++ }
++
+ buf[count] = '\0';
+
+ mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count);
++ out:
++ kfree(buf);
+ return(count);
+ }
+
+diff -Naur a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
+--- a/arch/um/drivers/mconsole_user.c 2004-02-11 12:14:27.000000000 -0500
++++ b/arch/um/drivers/mconsole_user.c 2004-02-11 12:26:08.000000000 -0500
+@@ -1,6 +1,6 @@
+ /*
+ * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org)
+- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -18,16 +18,18 @@
+ #include "umid.h"
+
+ static struct mconsole_command commands[] = {
+- { "version", mconsole_version, 1 },
+- { "halt", mconsole_halt, 0 },
+- { "reboot", mconsole_reboot, 0 },
+- { "config", mconsole_config, 0 },
+- { "remove", mconsole_remove, 0 },
+- { "sysrq", mconsole_sysrq, 1 },
+- { "help", mconsole_help, 1 },
+- { "cad", mconsole_cad, 1 },
+- { "stop", mconsole_stop, 0 },
+- { "go", mconsole_go, 1 },
++ { "version", mconsole_version, MCONSOLE_INTR },
++ { "halt", mconsole_halt, MCONSOLE_PROC },
++ { "reboot", mconsole_reboot, MCONSOLE_PROC },
++ { "config", mconsole_config, MCONSOLE_PROC },
++ { "remove", mconsole_remove, MCONSOLE_PROC },
++ { "sysrq", mconsole_sysrq, MCONSOLE_INTR },
++ { "help", mconsole_help, MCONSOLE_INTR },
++ { "cad", mconsole_cad, MCONSOLE_INTR },
++ { "stop", mconsole_stop, MCONSOLE_PROC },
++ { "go", mconsole_go, MCONSOLE_INTR },
++ { "log", mconsole_log, MCONSOLE_INTR },
++ { "proc", mconsole_proc, MCONSOLE_PROC },
+ };
+
+ /* Initialized in mconsole_init, which is an initcall */
+@@ -139,6 +141,7 @@
+ memcpy(reply.data, str, len);
+ reply.data[len] = '\0';
+ total -= len;
++ str += len;
+ reply.len = len + 1;
+
+ len = sizeof(reply) + reply.len - sizeof(reply.data);
+diff -Naur a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c
+--- a/arch/um/drivers/mmapper_kern.c 2004-02-11 12:14:23.000000000 -0500
++++ b/arch/um/drivers/mmapper_kern.c 2004-02-11 12:26:07.000000000 -0500
+@@ -120,7 +120,10 @@
+ printk(KERN_INFO "Mapper v0.1\n");
+
+ v_buf = (char *) find_iomem("mmapper", &mmapper_size);
+- if(mmapper_size == 0) return(0);
++ if(mmapper_size == 0){
++ printk(KERN_ERR "mmapper_init - find_iomem failed\n");
++ return(0);
++ }
+
+ p_buf = __pa(v_buf);
+
+diff -Naur a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
+--- a/arch/um/drivers/net_kern.c 2004-02-11 12:15:23.000000000 -0500
++++ b/arch/um/drivers/net_kern.c 2004-02-11 12:27:11.000000000 -0500
+@@ -26,6 +26,7 @@
+ #include "mconsole_kern.h"
+ #include "init.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+
+ static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED;
+ LIST_HEAD(opened);
+@@ -37,7 +38,8 @@
+ struct sk_buff *skb;
+
+ /* If we can't allocate memory, try again next round. */
+- if ((skb = dev_alloc_skb(dev->mtu)) == NULL) {
++ skb = dev_alloc_skb(dev->mtu);
++ if (skb == NULL) {
+ lp->stats.rx_dropped++;
+ return 0;
+ }
+@@ -61,14 +63,14 @@
+ return pkt_len;
+ }
+
+-void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ struct net_device *dev = dev_id;
+ struct uml_net_private *lp = dev->priv;
+ int err;
+
+ if(!netif_running(dev))
+- return;
++ return(IRQ_NONE);
+
+ spin_lock(&lp->lock);
+ while((err = uml_net_rx(dev)) > 0) ;
+@@ -83,6 +85,7 @@
+
+ out:
+ spin_unlock(&lp->lock);
++ return(IRQ_HANDLED);
+ }
+
+ static int uml_net_open(struct net_device *dev)
+@@ -252,37 +255,6 @@
+ #endif
+ }
+
+-/*
+- * default do nothing hard header packet routines for struct net_device init.
+- * real ethernet transports will overwrite with real routines.
+- */
+-static int uml_net_hard_header(struct sk_buff *skb, struct net_device *dev,
+- unsigned short type, void *daddr, void *saddr, unsigned len)
+-{
+- return(0); /* no change */
+-}
+-
+-static int uml_net_rebuild_header(struct sk_buff *skb)
+-{
+- return(0); /* ignore */
+-}
+-
+-static int uml_net_header_cache(struct neighbour *neigh, struct hh_cache *hh)
+-{
+- return(-1); /* fail */
+-}
+-
+-static void uml_net_header_cache_update(struct hh_cache *hh,
+- struct net_device *dev, unsigned char * haddr)
+-{
+- /* ignore */
+-}
+-
+-static int uml_net_header_parse(struct sk_buff *skb, unsigned char *haddr)
+-{
+- return(0); /* nothing */
+-}
+-
+ static spinlock_t devices_lock = SPIN_LOCK_UNLOCKED;
+ static struct list_head devices = LIST_HEAD_INIT(devices);
+
+@@ -292,7 +264,7 @@
+ struct uml_net *device;
+ struct net_device *dev;
+ struct uml_net_private *lp;
+- int err, size;
++ int save, err, size;
+
+ size = transport->private_size + sizeof(struct uml_net_private) +
+ sizeof(((struct uml_net_private *) 0)->user);
+@@ -334,12 +306,6 @@
+ snprintf(dev->name, sizeof(dev->name), "eth%d", n);
+ device->dev = dev;
+
+- dev->hard_header = uml_net_hard_header;
+- dev->rebuild_header = uml_net_rebuild_header;
+- dev->hard_header_cache = uml_net_header_cache;
+- dev->header_cache_update= uml_net_header_cache_update;
+- dev->hard_header_parse = uml_net_header_parse;
+-
+ (*transport->kern->init)(dev, init);
+
+ dev->mtu = transport->user->max_packet;
+@@ -362,21 +328,29 @@
+ return 1;
+ lp = dev->priv;
+
+- INIT_LIST_HEAD(&lp->list);
+- spin_lock_init(&lp->lock);
+- lp->dev = dev;
+- lp->fd = -1;
+- lp->mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0 };
+- lp->have_mac = device->have_mac;
+- lp->protocol = transport->kern->protocol;
+- lp->open = transport->user->open;
+- lp->close = transport->user->close;
+- lp->remove = transport->user->remove;
+- lp->read = transport->kern->read;
+- lp->write = transport->kern->write;
+- lp->add_address = transport->user->add_address;
+- lp->delete_address = transport->user->delete_address;
+- lp->set_mtu = transport->user->set_mtu;
++ /* lp.user is the first four bytes of the transport data, which
++ * has already been initialized. This structure assignment will
++ * overwrite that, so we make sure that .user gets overwritten with
++ * what it already has.
++ */
++ save = lp->user[0];
++ *lp = ((struct uml_net_private)
++ { .list = LIST_HEAD_INIT(lp->list),
++ .lock = SPIN_LOCK_UNLOCKED,
++ .dev = dev,
++ .fd = -1,
++ .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0},
++ .have_mac = device->have_mac,
++ .protocol = transport->kern->protocol,
++ .open = transport->user->open,
++ .close = transport->user->close,
++ .remove = transport->user->remove,
++ .read = transport->kern->read,
++ .write = transport->kern->write,
++ .add_address = transport->user->add_address,
++ .delete_address = transport->user->delete_address,
++ .set_mtu = transport->user->set_mtu,
++ .user = { save } });
+
+ init_timer(&lp->tl);
+ lp->tl.function = uml_net_user_timer_expire;
+@@ -609,7 +583,8 @@
+ unregister_netdev(dev);
+
+ list_del(&device->list);
+- free_netdev(device);
++ kfree(device);
++ free_netdev(dev);
+ return(0);
+ }
+
+diff -Naur a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c
+--- a/arch/um/drivers/net_user.c 2004-02-11 12:14:28.000000000 -0500
++++ b/arch/um/drivers/net_user.c 2004-02-11 12:26:11.000000000 -0500
+@@ -26,8 +26,7 @@
+ if(gate_addr == NULL) return(0);
+ if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0],
+ &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){
+- printk("Invalid tap IP address - '%s'\n",
+- gate_addr);
++ printk("Invalid tap IP address - '%s'\n", gate_addr);
+ return(-EINVAL);
+ }
+ return(0);
+@@ -60,18 +59,18 @@
+ }
+
+ *output = '\0';
+- if(read(fd, &remain, sizeof(remain)) != sizeof(remain)){
+- printk("read_output - read of length failed, errno = %d\n",
+- errno);
++ n = os_read_file(fd, &remain, sizeof(remain));
++ if(n != sizeof(remain)){
++ printk("read_output - read of length failed, err = %d\n", -n);
+ return;
+ }
+
+ while(remain != 0){
+ n = (remain < len) ? remain : len;
+- actual = read(fd, output, n);
++ actual = os_read_file(fd, output, n);
+ if(actual != n){
+ printk("read_output - read of data failed, "
+- "errno = %d\n", errno);
++ "err = %d\n", -actual);
+ return;
+ }
+ remain -= actual;
+@@ -83,13 +82,12 @@
+ {
+ int n;
+
+- while(((n = read(fd, buf, len)) < 0) && (errno == EINTR)) ;
++ n = os_read_file(fd, buf, len);
+
+- if(n < 0){
+- if(errno == EAGAIN) return(0);
+- return(-errno);
+- }
+- else if(n == 0) return(-ENOTCONN);
++ if(n == -EAGAIN)
++ return(0);
++ else if(n == 0)
++ return(-ENOTCONN);
+ return(n);
+ }
+
+@@ -112,13 +110,13 @@
+ {
+ int n;
+
+- while(((n = write(fd, buf, len)) < 0) && (errno == EINTR)) ;
+- if(n < 0){
+- if(errno == EAGAIN) return(0);
+- return(-errno);
+- }
+- else if(n == 0) return(-ENOTCONN);
+- return(n);
++ n = os_write_file(fd, buf, len);
++
++ if(n == -EAGAIN)
++ return(0);
++ else if(n == 0)
++ return(-ENOTCONN);
++ return(n);
+ }
+
+ int net_send(int fd, void *buf, int len)
+@@ -157,7 +155,7 @@
+ {
+ struct change_pre_exec_data *data = arg;
+
+- close(data->close_me);
++ os_close_file(data->close_me);
+ dup2(data->stdout, 1);
+ }
+
+@@ -167,15 +165,15 @@
+ struct change_pre_exec_data pe_data;
+
+ err = os_pipe(fds, 1, 0);
+- if(err){
+- printk("change_tramp - pipe failed, errno = %d\n", -err);
++ if(err < 0){
++ printk("change_tramp - pipe failed, err = %d\n", -err);
+ return(err);
+ }
+ pe_data.close_me = fds[0];
+ pe_data.stdout = fds[1];
+ pid = run_helper(change_pre_exec, &pe_data, argv, NULL);
+
+- close(fds[1]);
++ os_close_file(fds[1]);
+ read_output(fds[0], output, output_len);
+ waitpid(pid, NULL, 0);
+ return(pid);
+diff -Naur a/arch/um/drivers/null.c b/arch/um/drivers/null.c
+--- a/arch/um/drivers/null.c 2004-02-11 12:14:21.000000000 -0500
++++ b/arch/um/drivers/null.c 2004-02-11 12:26:02.000000000 -0500
+@@ -5,7 +5,6 @@
+
+ #include <stdlib.h>
+ #include <errno.h>
+-#include <fcntl.h>
+ #include "chan_user.h"
+ #include "os.h"
+
+diff -Naur a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
+--- a/arch/um/drivers/port_kern.c 2004-02-11 12:14:18.000000000 -0500
++++ b/arch/um/drivers/port_kern.c 2004-02-11 12:26:00.000000000 -0500
+@@ -6,6 +6,7 @@
+ #include "linux/list.h"
+ #include "linux/sched.h"
+ #include "linux/slab.h"
++#include "linux/interrupt.h"
+ #include "linux/irq.h"
+ #include "linux/spinlock.h"
+ #include "linux/errno.h"
+@@ -14,6 +15,7 @@
+ #include "kern_util.h"
+ #include "kern.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ #include "port.h"
+ #include "init.h"
+ #include "os.h"
+@@ -38,21 +40,21 @@
+ struct connection {
+ struct list_head list;
+ int fd;
+- int helper_pid;
++ int helper_pid;
+ int socket[2];
+ int telnetd_pid;
+ struct port_list *port;
+ };
+
+-static void pipe_interrupt(int irq, void *data, struct pt_regs *regs)
++static irqreturn_t pipe_interrupt(int irq, void *data, struct pt_regs *regs)
+ {
+ struct connection *conn = data;
+ int fd;
+
+- fd = os_rcv_fd(conn->socket[0], &conn->helper_pid);
++ fd = os_rcv_fd(conn->socket[0], &conn->helper_pid);
+ if(fd < 0){
+ if(fd == -EAGAIN)
+- return;
++ return(IRQ_NONE);
+
+ printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n",
+ -fd);
+@@ -65,6 +67,7 @@
+ list_add(&conn->list, &conn->port->connections);
+
+ up(&conn->port->sem);
++ return(IRQ_HANDLED);
+ }
+
+ static int port_accept(struct port_list *port)
+@@ -102,8 +105,7 @@
+ }
+
+ list_add(&conn->list, &port->pending);
+- ret = 1;
+- goto out;
++ return(1);
+
+ out_free:
+ kfree(conn);
+@@ -138,12 +140,13 @@
+
+ DECLARE_WORK(port_work, port_work_proc, NULL);
+
+-static void port_interrupt(int irq, void *data, struct pt_regs *regs)
++static irqreturn_t port_interrupt(int irq, void *data, struct pt_regs *regs)
+ {
+ struct port_list *port = data;
+
+ port->has_connection = 1;
+ schedule_work(&port_work);
++ return(IRQ_HANDLED);
+ }
+
+ void *port_data(int port_num)
+diff -Naur a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c
+--- a/arch/um/drivers/port_user.c 2004-02-11 12:15:59.000000000 -0500
++++ b/arch/um/drivers/port_user.c 2004-02-11 12:27:52.000000000 -0500
+@@ -47,10 +47,12 @@
+ return(NULL);
+ }
+
+- if((kern_data = port_data(port)) == NULL)
++ kern_data = port_data(port);
++ if(kern_data == NULL)
+ return(NULL);
+
+- if((data = um_kmalloc(sizeof(*data))) == NULL)
++ data = um_kmalloc(sizeof(*data));
++ if(data == NULL)
+ goto err;
+
+ *data = ((struct port_chan) { .raw = opts->raw,
+@@ -90,7 +92,7 @@
+ struct port_chan *data = d;
+
+ port_remove_dev(data->kernel_data);
+- close(fd);
++ os_close_file(fd);
+ }
+
+ int port_console_write(int fd, const char *buf, int n, void *d)
+@@ -130,11 +132,15 @@
+ goto out;
+ }
+
+- if((listen(fd, 1) < 0) || (os_set_fd_block(fd, 0))){
++ if(listen(fd, 1) < 0){
+ err = -errno;
+ goto out;
+ }
+
++ err = os_set_fd_block(fd, 0);
++ if(err < 0)
++ goto out;
++
+ return(fd);
+ out:
+ os_close_file(fd);
+@@ -153,10 +159,10 @@
+ dup2(data->sock_fd, 0);
+ dup2(data->sock_fd, 1);
+ dup2(data->sock_fd, 2);
+- close(data->sock_fd);
++ os_close_file(data->sock_fd);
+ dup2(data->pipe_fd, 3);
+ os_shutdown_socket(3, 1, 0);
+- close(data->pipe_fd);
++ os_close_file(data->pipe_fd);
+ }
+
+ int port_connection(int fd, int *socket, int *pid_out)
+@@ -166,11 +172,12 @@
+ "/usr/lib/uml/port-helper", NULL };
+ struct port_pre_exec_data data;
+
+- if((new = os_accept_connection(fd)) < 0)
+- return(-errno);
++ new = os_accept_connection(fd);
++ if(new < 0)
++ return(new);
+
+ err = os_pipe(socket, 0, 0);
+- if(err)
++ if(err < 0)
+ goto out_close;
+
+ data = ((struct port_pre_exec_data)
+@@ -186,11 +193,11 @@
+
+ out_shutdown:
+ os_shutdown_socket(socket[0], 1, 1);
+- close(socket[0]);
++ os_close_file(socket[0]);
+ os_shutdown_socket(socket[1], 1, 1);
+- close(socket[1]);
++ os_close_file(socket[1]);
+ out_close:
+- close(new);
++ os_close_file(new);
+ return(err);
+ }
+
+diff -Naur a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c
+--- a/arch/um/drivers/pty.c 2004-02-11 12:16:37.000000000 -0500
++++ b/arch/um/drivers/pty.c 2004-02-11 12:28:37.000000000 -0500
+@@ -7,12 +7,12 @@
+ #include <unistd.h>
+ #include <string.h>
+ #include <errno.h>
+-#include <fcntl.h>
+ #include <termios.h>
+ #include "chan_user.h"
+ #include "user.h"
+ #include "user_util.h"
+ #include "kern_util.h"
++#include "os.h"
+
+ struct pty_chan {
+ void (*announce)(char *dev_name, int dev);
+@@ -26,7 +26,8 @@
+ {
+ struct pty_chan *data;
+
+- if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL);
++ data = um_kmalloc(sizeof(*data));
++ if(data == NULL) return(NULL);
+ *data = ((struct pty_chan) { .announce = opts->announce,
+ .dev = device,
+ .raw = opts->raw });
+@@ -39,7 +40,8 @@
+ char *dev;
+ int fd;
+
+- if((fd = get_pty()) < 0){
++ fd = get_pty();
++ if(fd < 0){
+ printk("open_pts : Failed to open pts\n");
+ return(-errno);
+ }
+@@ -57,29 +59,27 @@
+
+ int getmaster(char *line)
+ {
+- struct stat stb;
+ char *pty, *bank, *cp;
+- int master;
++ int master, err;
+
+ pty = &line[strlen("/dev/ptyp")];
+ for (bank = "pqrs"; *bank; bank++) {
+ line[strlen("/dev/pty")] = *bank;
+ *pty = '0';
+- if (stat(line, &stb) < 0)
++ if (os_stat_file(line, NULL) < 0)
+ break;
+ for (cp = "0123456789abcdef"; *cp; cp++) {
+ *pty = *cp;
+- master = open(line, O_RDWR);
++ master = os_open_file(line, of_rdwr(OPENFLAGS()), 0);
+ if (master >= 0) {
+ char *tp = &line[strlen("/dev/")];
+- int ok;
+
+ /* verify slave side is usable */
+ *tp = 't';
+- ok = access(line, R_OK|W_OK) == 0;
++ err = os_access(line, OS_ACC_RW_OK);
+ *tp = 'p';
+- if (ok) return(master);
+- (void) close(master);
++ if(err == 0) return(master);
++ (void) os_close_file(master);
+ }
+ }
+ }
+diff -Naur a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c
+--- a/arch/um/drivers/slip_user.c 2004-02-11 12:16:37.000000000 -0500
++++ b/arch/um/drivers/slip_user.c 2004-02-11 12:28:37.000000000 -0500
+@@ -4,11 +4,9 @@
+ #include <stddef.h>
+ #include <sched.h>
+ #include <string.h>
+-#include <sys/fcntl.h>
+ #include <sys/errno.h>
+ #include <sys/termios.h>
+ #include <sys/wait.h>
+-#include <sys/ioctl.h>
+ #include <sys/signal.h>
+ #include "user_util.h"
+ #include "kern_util.h"
+@@ -65,9 +63,9 @@
+ {
+ struct slip_pre_exec_data *data = arg;
+
+- if(data->stdin != -1) dup2(data->stdin, 0);
++ if(data->stdin >= 0) dup2(data->stdin, 0);
+ dup2(data->stdout, 1);
+- if(data->close_me != -1) close(data->close_me);
++ if(data->close_me >= 0) os_close_file(data->close_me);
+ }
+
+ static int slip_tramp(char **argv, int fd)
+@@ -77,8 +75,8 @@
+ int status, pid, fds[2], err, output_len;
+
+ err = os_pipe(fds, 1, 0);
+- if(err){
+- printk("slip_tramp : pipe failed, errno = %d\n", -err);
++ if(err < 0){
++ printk("slip_tramp : pipe failed, err = %d\n", -err);
+ return(err);
+ }
+
+@@ -96,7 +94,7 @@
+ printk("slip_tramp : failed to allocate output "
+ "buffer\n");
+
+- close(fds[1]);
++ os_close_file(fds[1]);
+ read_output(fds[0], output, output_len);
+ if(output != NULL){
+ printk("%s", output);
+@@ -105,7 +103,7 @@
+ if(waitpid(pid, &status, 0) < 0) err = errno;
+ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){
+ printk("'%s' didn't exit with status 0\n", argv[0]);
+- err = EINVAL;
++ err = -EINVAL;
+ }
+ }
+ return(err);
+@@ -118,15 +116,17 @@
+ char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
+ char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf,
+ NULL };
+- int sfd, mfd, disc, sencap, err;
++ int sfd, mfd, err;
+
+- if((mfd = get_pty()) < 0){
+- printk("umn : Failed to open pty\n");
+- return(-1);
++ mfd = get_pty();
++ if(mfd < 0){
++ printk("umn : Failed to open pty, err = %d\n", -mfd);
++ return(mfd);
+ }
+- if((sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0)) < 0){
+- printk("Couldn't open tty for slip line\n");
+- return(-1);
++ sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0);
++ if(sfd < 0){
++ printk("Couldn't open tty for slip line, err = %d\n", -sfd);
++ return(sfd);
+ }
+ if(set_up_tty(sfd)) return(-1);
+ pri->slave = sfd;
+@@ -138,28 +138,23 @@
+
+ err = slip_tramp(argv, sfd);
+
+- if(err != 0){
+- printk("slip_tramp failed - errno = %d\n", err);
+- return(-err);
++ if(err < 0){
++ printk("slip_tramp failed - err = %d\n", -err);
++ return(err);
+ }
+- if(ioctl(pri->slave, SIOCGIFNAME, pri->name) < 0){
+- printk("SIOCGIFNAME failed, errno = %d\n", errno);
+- return(-errno);
++ err = os_get_ifname(pri->slave, pri->name);
++ if(err < 0){
++ printk("get_ifname failed, err = %d\n", -err);
++ return(err);
+ }
+ iter_addresses(pri->dev, open_addr, pri->name);
+ }
+ else {
+- disc = N_SLIP;
+- if(ioctl(sfd, TIOCSETD, &disc) < 0){
+- printk("Failed to set slip line discipline - "
+- "errno = %d\n", errno);
+- return(-errno);
+- }
+- sencap = 0;
+- if(ioctl(sfd, SIOCSIFENCAP, &sencap) < 0){
+- printk("Failed to set slip encapsulation - "
+- "errno = %d\n", errno);
+- return(-errno);
++ err = os_set_slip(sfd);
++ if(err < 0){
++ printk("Failed to set slip discipline encapsulation - "
++ "err = %d\n", -err);
++ return(err);
+ }
+ }
+ return(mfd);
+@@ -181,9 +176,9 @@
+ err = slip_tramp(argv, -1);
+
+ if(err != 0)
+- printk("slip_tramp failed - errno = %d\n", err);
+- close(fd);
+- close(pri->slave);
++ printk("slip_tramp failed - errno = %d\n", -err);
++ os_close_file(fd);
++ os_close_file(pri->slave);
+ pri->slave = -1;
+ }
+
+@@ -243,7 +238,7 @@
+ {
+ struct slip_data *pri = data;
+
+- if(pri->slave == -1) return;
++ if(pri->slave < 0) return;
+ open_addr(addr, netmask, pri->name);
+ }
+
+@@ -252,7 +247,7 @@
+ {
+ struct slip_data *pri = data;
+
+- if(pri->slave == -1) return;
++ if(pri->slave < 0) return;
+ close_addr(addr, netmask, pri->name);
+ }
+
+diff -Naur a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c
+--- a/arch/um/drivers/slirp_user.c 2004-02-11 12:16:03.000000000 -0500
++++ b/arch/um/drivers/slirp_user.c 2004-02-11 12:27:58.000000000 -0500
+@@ -4,7 +4,6 @@
+ #include <stddef.h>
+ #include <sched.h>
+ #include <string.h>
+-#include <sys/fcntl.h>
+ #include <sys/errno.h>
+ #include <sys/wait.h>
+ #include <sys/signal.h>
+@@ -48,15 +47,15 @@
+
+ return(pid);
+ }
+-
++
++/* XXX This is just a trivial wrapper around os_pipe */
+ static int slirp_datachan(int *mfd, int *sfd)
+ {
+ int fds[2], err;
+
+ err = os_pipe(fds, 1, 1);
+- if(err){
+- printk("slirp_datachan: Failed to open pipe, errno = %d\n",
+- -err);
++ if(err < 0){
++ printk("slirp_datachan: Failed to open pipe, err = %d\n", -err);
+ return(err);
+ }
+
+@@ -77,7 +76,7 @@
+ pid = slirp_tramp(pri->argw.argv, sfd);
+
+ if(pid < 0){
+- printk("slirp_tramp failed - errno = %d\n", pid);
++ printk("slirp_tramp failed - errno = %d\n", -pid);
+ os_close_file(sfd);
+ os_close_file(mfd);
+ return(pid);
+@@ -97,8 +96,8 @@
+ struct slirp_data *pri = data;
+ int status,err;
+
+- close(fd);
+- close(pri->slave);
++ os_close_file(fd);
++ os_close_file(pri->slave);
+
+ pri->slave = -1;
+
+diff -Naur a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
+--- a/arch/um/drivers/ssl.c 2004-02-11 12:15:28.000000000 -0500
++++ b/arch/um/drivers/ssl.c 2004-02-11 12:27:17.000000000 -0500
+@@ -10,6 +10,7 @@
+ #include "linux/major.h"
+ #include "linux/mm.h"
+ #include "linux/init.h"
++#include "linux/console.h"
+ #include "asm/termbits.h"
+ #include "asm/irq.h"
+ #include "line.h"
+@@ -53,8 +54,9 @@
+
+ static struct line_driver driver = {
+ .name = "UML serial line",
+- .devfs_name = "tts/%d",
+- .major = TTYAUX_MAJOR,
++ .device_name = "ttS",
++ .devfs_name = "tts/",
++ .major = TTY_MAJOR,
+ .minor_start = 64,
+ .type = TTY_DRIVER_TYPE_SERIAL,
+ .subtype = 0,
+@@ -149,6 +151,9 @@
+ case TCSETSW:
+ case TCGETA:
+ case TIOCMGET:
++ case TCSBRK:
++ case TCSBRKP:
++ case TIOCMSET:
+ ret = -ENOIOCTLCMD;
+ break;
+ default:
+@@ -212,6 +217,37 @@
+ */
+ static int ssl_init_done = 0;
+
++static void ssl_console_write(struct console *c, const char *string,
++ unsigned len)
++{
++ struct line *line = &serial_lines[c->index];
++ if(ssl_init_done)
++ down(&line->sem);
++ console_write_chan(&line->chan_list, string, len);
++ if(ssl_init_done)
++ up(&line->sem);
++}
++
++static struct tty_driver *ssl_console_device(struct console *c, int *index)
++{
++ *index = c->index;
++ return ssl_driver;
++}
++
++static int ssl_console_setup(struct console *co, char *options)
++{
++ return(0);
++}
++
++static struct console ssl_cons = {
++ name: "ttyS",
++ write: ssl_console_write,
++ device: ssl_console_device,
++ setup: ssl_console_setup,
++ flags: CON_PRINTBUFFER,
++ index: -1,
++};
++
+ int ssl_init(void)
+ {
+ char *new_title;
+@@ -227,17 +263,18 @@
+ new_title = add_xterm_umid(opts.xterm_title);
+ if(new_title != NULL) opts.xterm_title = new_title;
+
++ register_console(&ssl_cons);
+ ssl_init_done = 1;
+ return(0);
+ }
+
+-__initcall(ssl_init);
++late_initcall(ssl_init);
+
+ static int ssl_chan_setup(char *str)
+ {
+- line_setup(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0]),
+- str, 1);
+- return(1);
++ return(line_setup(serial_lines,
++ sizeof(serial_lines)/sizeof(serial_lines[0]),
++ str, 1));
+ }
+
+ __setup("ssl", ssl_chan_setup);
+diff -Naur a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
+--- a/arch/um/drivers/stdio_console.c 2004-02-11 12:14:32.000000000 -0500
++++ b/arch/um/drivers/stdio_console.c 2004-02-11 12:26:14.000000000 -0500
+@@ -83,7 +83,8 @@
+
+ static struct line_driver driver = {
+ .name = "UML console",
+- .devfs_name = "vc/%d",
++ .device_name = "tty",
++ .devfs_name = "vc/",
+ .major = TTY_MAJOR,
+ .minor_start = 0,
+ .type = TTY_DRIVER_TYPE_CONSOLE,
+@@ -159,6 +160,15 @@
+
+ static int con_init_done = 0;
+
++static struct tty_operations console_ops = {
++ .open = con_open,
++ .close = con_close,
++ .write = con_write,
++ .chars_in_buffer = chars_in_buffer,
++ .set_termios = set_termios,
++ .write_room = line_write_room,
++};
++
+ int stdio_init(void)
+ {
+ char *new_title;
+@@ -166,7 +176,8 @@
+ printk(KERN_INFO "Initializing stdio console driver\n");
+
+ console_driver = line_register_devfs(&console_lines, &driver,
+- &console_ops, vts, sizeof(vts)/sizeof(vts[0]));
++ &console_ops, vts,
++ sizeof(vts)/sizeof(vts[0]));
+
+ lines_init(vts, sizeof(vts)/sizeof(vts[0]));
+
+@@ -178,24 +189,19 @@
+ return(0);
+ }
+
+-__initcall(stdio_init);
++late_initcall(stdio_init);
+
+ static void console_write(struct console *console, const char *string,
+ unsigned len)
+ {
+- if(con_init_done) down(&vts[console->index].sem);
+- console_write_chan(&vts[console->index].chan_list, string, len);
+- if(con_init_done) up(&vts[console->index].sem);
+-}
++ struct line *line = &vts[console->index];
+
+-static struct tty_operations console_ops = {
+- .open = con_open,
+- .close = con_close,
+- .write = con_write,
+- .chars_in_buffer = chars_in_buffer,
+- .set_termios = set_termios,
+- .write_room = line_write_room,
+-};
++ if(con_init_done)
++ down(&line->sem);
++ console_write_chan(&line->chan_list, string, len);
++ if(con_init_done)
++ up(&line->sem);
++}
+
+ static struct tty_driver *console_device(struct console *c, int *index)
+ {
+@@ -208,22 +214,28 @@
+ return(0);
+ }
+
+-static struct console stdiocons = INIT_CONSOLE("tty", console_write,
+- console_device, console_setup,
+- CON_PRINTBUFFER);
++static struct console stdiocons = {
++ name: "tty",
++ write: console_write,
++ device: console_device,
++ setup: console_setup,
++ flags: CON_PRINTBUFFER,
++ index: -1,
++};
+
+-static void __init stdio_console_init(void)
++static int __init stdio_console_init(void)
+ {
+ INIT_LIST_HEAD(&vts[0].chan_list);
+ list_add(&init_console_chan.list, &vts[0].chan_list);
+ register_console(&stdiocons);
++ return(0);
+ }
++
+ console_initcall(stdio_console_init);
+
+ static int console_chan_setup(char *str)
+ {
+- line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1);
+- return(1);
++ return(line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1));
+ }
+
+ __setup("con", console_chan_setup);
+diff -Naur a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c
+--- a/arch/um/drivers/tty.c 2004-02-11 12:15:02.000000000 -0500
++++ b/arch/um/drivers/tty.c 2004-02-11 12:26:51.000000000 -0500
+@@ -5,7 +5,6 @@
+
+ #include <stdio.h>
+ #include <termios.h>
+-#include <fcntl.h>
+ #include <errno.h>
+ #include <unistd.h>
+ #include "chan_user.h"
+@@ -30,7 +29,8 @@
+ }
+ str++;
+
+- if((data = um_kmalloc(sizeof(*data))) == NULL)
++ data = um_kmalloc(sizeof(*data));
++ if(data == NULL)
+ return(NULL);
+ *data = ((struct tty_chan) { .dev = str,
+ .raw = opts->raw });
+diff -Naur a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
+--- a/arch/um/drivers/ubd_kern.c 2004-02-11 12:15:25.000000000 -0500
++++ b/arch/um/drivers/ubd_kern.c 2004-02-11 12:27:12.000000000 -0500
+@@ -8,6 +8,13 @@
+ * old style ubd by setting UBD_SHIFT to 0
+ * 2002-09-27...2002-10-18 massive tinkering for 2.5
+ * partitions have changed in 2.5
++ * 2003-01-29 more tinkering for 2.5.59-1
++ * This should now address the sysfs problems and has
++ * the symlink for devfs to allow for booting with
++ * the common /dev/ubd/discX/... names rather than
++ * only /dev/ubdN/discN this version also has lots of
++ * clean ups preparing for ubd-many.
++ * James McMechan
+ */
+
+ #define MAJOR_NR UBD_MAJOR
+@@ -40,9 +47,12 @@
+ #include "mconsole_kern.h"
+ #include "init.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ #include "ubd_user.h"
+ #include "2_5compat.h"
+ #include "os.h"
++#include "mem.h"
++#include "mem_kern.h"
+
+ static spinlock_t ubd_io_lock = SPIN_LOCK_UNLOCKED;
+ static spinlock_t ubd_lock = SPIN_LOCK_UNLOCKED;
+@@ -56,6 +66,10 @@
+
+ #define MAX_DEV (8)
+
++/* Changed in early boot */
++static int ubd_do_mmap = 0;
++#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE
++
+ static struct block_device_operations ubd_blops = {
+ .owner = THIS_MODULE,
+ .open = ubd_open,
+@@ -67,7 +81,7 @@
+ static request_queue_t *ubd_queue;
+
+ /* Protected by ubd_lock */
+-static int fake_major = 0;
++static int fake_major = MAJOR_NR;
+
+ static struct gendisk *ubd_gendisk[MAX_DEV];
+ static struct gendisk *fake_gendisk[MAX_DEV];
+@@ -96,13 +110,19 @@
+
+ struct ubd {
+ char *file;
+- int is_dir;
+ int count;
+ int fd;
+ __u64 size;
+ struct openflags boot_openflags;
+ struct openflags openflags;
++ int no_cow;
+ struct cow cow;
++
++ int map_writes;
++ int map_reads;
++ int nomap_writes;
++ int nomap_reads;
++ int write_maps;
+ };
+
+ #define DEFAULT_COW { \
+@@ -115,21 +135,28 @@
+
+ #define DEFAULT_UBD { \
+ .file = NULL, \
+- .is_dir = 0, \
+ .count = 0, \
+ .fd = -1, \
+ .size = -1, \
+ .boot_openflags = OPEN_FLAGS, \
+ .openflags = OPEN_FLAGS, \
++ .no_cow = 0, \
+ .cow = DEFAULT_COW, \
++ .map_writes = 0, \
++ .map_reads = 0, \
++ .nomap_writes = 0, \
++ .nomap_reads = 0, \
++ .write_maps = 0, \
+ }
+
+ struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
+
+ static int ubd0_init(void)
+ {
+- if(ubd_dev[0].file == NULL)
+- ubd_dev[0].file = "root_fs";
++ struct ubd *dev = &ubd_dev[0];
++
++ if(dev->file == NULL)
++ dev->file = "root_fs";
+ return(0);
+ }
+
+@@ -196,19 +223,46 @@
+ " Create ide0 entries that map onto ubd devices.\n\n"
+ );
+
++static int parse_unit(char **ptr)
++{
++ char *str = *ptr, *end;
++ int n = -1;
++
++ if(isdigit(*str)) {
++ n = simple_strtoul(str, &end, 0);
++ if(end == str)
++ return(-1);
++ *ptr = end;
++ }
++ else if (('a' <= *str) && (*str <= 'h')) {
++ n = *str - 'a';
++ str++;
++ *ptr = str;
++ }
++ return(n);
++}
++
+ static int ubd_setup_common(char *str, int *index_out)
+ {
++ struct ubd *dev;
+ struct openflags flags = global_openflags;
+ char *backing_file;
+ int n, err;
+
+ if(index_out) *index_out = -1;
+- n = *str++;
++ n = *str;
+ if(n == '='){
+- static int fake_major_allowed = 1;
+ char *end;
+ int major;
+
++ str++;
++ if(!strcmp(str, "mmap")){
++ CHOOSE_MODE(printk("mmap not supported by the ubd "
++ "driver in tt mode\n"),
++ ubd_do_mmap = 1);
++ return(0);
++ }
++
+ if(!strcmp(str, "sync")){
+ global_openflags.s = 1;
+ return(0);
+@@ -220,20 +274,14 @@
+ return(1);
+ }
+
+- if(!fake_major_allowed){
+- printk(KERN_ERR "Can't assign a fake major twice\n");
+- return(1);
+- }
+-
+ err = 1;
+ spin_lock(&ubd_lock);
+- if(!fake_major_allowed){
++ if(fake_major != MAJOR_NR){
+ printk(KERN_ERR "Can't assign a fake major twice\n");
+ goto out1;
+ }
+
+ fake_major = major;
+- fake_major_allowed = 0;
+
+ printk(KERN_INFO "Setting extra ubd major number to %d\n",
+ major);
+@@ -243,25 +291,23 @@
+ return(err);
+ }
+
+- if(n < '0'){
+- printk(KERN_ERR "ubd_setup : index out of range\n"); }
+-
+- if((n >= '0') && (n <= '9')) n -= '0';
+- else if((n >= 'a') && (n <= 'z')) n -= 'a';
+- else {
+- printk(KERN_ERR "ubd_setup : device syntax invalid\n");
++ n = parse_unit(&str);
++ if(n < 0){
++ printk(KERN_ERR "ubd_setup : couldn't parse unit number "
++ "'%s'\n", str);
+ return(1);
+ }
+ if(n >= MAX_DEV){
+- printk(KERN_ERR "ubd_setup : index out of range "
+- "(%d devices)\n", MAX_DEV);
++ printk(KERN_ERR "ubd_setup : index %d out of range "
++ "(%d devices)\n", n, MAX_DEV);
+ return(1);
+ }
+
+ err = 1;
+ spin_lock(&ubd_lock);
+
+- if(ubd_dev[n].file != NULL){
++ dev = &ubd_dev[n];
++ if(dev->file != NULL){
+ printk(KERN_ERR "ubd_setup : device already configured\n");
+ goto out2;
+ }
+@@ -276,6 +322,11 @@
+ flags.s = 1;
+ str++;
+ }
++ if (*str == 'd'){
++ dev->no_cow = 1;
++ str++;
++ }
++
+ if(*str++ != '='){
+ printk(KERN_ERR "ubd_setup : Expected '='\n");
+ goto out2;
+@@ -284,14 +335,17 @@
+ err = 0;
+ backing_file = strchr(str, ',');
+ if(backing_file){
+- *backing_file = '\0';
+- backing_file++;
++ if(dev->no_cow)
++ printk(KERN_ERR "Can't specify both 'd' and a "
++ "cow file\n");
++ else {
++ *backing_file = '\0';
++ backing_file++;
++ }
+ }
+- ubd_dev[n].file = str;
+- if(ubd_is_dir(ubd_dev[n].file))
+- ubd_dev[n].is_dir = 1;
+- ubd_dev[n].cow.file = backing_file;
+- ubd_dev[n].boot_openflags = flags;
++ dev->file = str;
++ dev->cow.file = backing_file;
++ dev->boot_openflags = flags;
+ out2:
+ spin_unlock(&ubd_lock);
+ return(err);
+@@ -321,8 +375,7 @@
+ static int fakehd_set = 0;
+ static int fakehd(char *str)
+ {
+- printk(KERN_INFO
+- "fakehd : Changing ubd name to \"hd\".\n");
++ printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
+ fakehd_set = 1;
+ return 1;
+ }
+@@ -368,32 +421,42 @@
+ {
+ struct io_thread_req req;
+ struct request *rq = elv_next_request(ubd_queue);
+- int n;
++ int n, err;
+
+ do_ubd = NULL;
+ intr_count++;
+ n = read_ubd_fs(thread_fd, &req, sizeof(req));
+ if(n != sizeof(req)){
+ printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
+- "errno = %d\n", os_getpid(), -n);
++ "err = %d\n", os_getpid(), -n);
+ spin_lock(&ubd_io_lock);
+ end_request(rq, 0);
+ spin_unlock(&ubd_io_lock);
+ return;
+ }
+
+- if((req.offset != ((__u64) (rq->sector)) << 9) ||
+- (req.length != (rq->current_nr_sectors) << 9))
++ if((req.op != UBD_MMAP) &&
++ ((req.offset != ((__u64) (rq->sector)) << 9) ||
++ (req.length != (rq->current_nr_sectors) << 9)))
+ panic("I/O op mismatch");
+
++ if(req.map_fd != -1){
++ err = physmem_subst_mapping(req.buffer, req.map_fd,
++ req.map_offset, 1);
++ if(err)
++ printk("ubd_handler - physmem_subst_mapping failed, "
++ "err = %d\n", -err);
++ }
++
+ ubd_finish(rq, req.error);
+ reactivate_fd(thread_fd, UBD_IRQ);
+ do_ubd_request(ubd_queue);
+ }
+
+-static void ubd_intr(int irq, void *dev, struct pt_regs *unused)
++static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
+ {
+ ubd_handler();
++ return(IRQ_HANDLED);
+ }
+
+ /* Only changed by ubd_init, which is an initcall. */
+@@ -417,10 +480,14 @@
+
+ static void ubd_close(struct ubd *dev)
+ {
++ if(ubd_do_mmap)
++ physmem_forget_descriptor(dev->fd);
+ os_close_file(dev->fd);
+ if(dev->cow.file == NULL)
+ return;
+
++ if(ubd_do_mmap)
++ physmem_forget_descriptor(dev->cow.fd);
+ os_close_file(dev->cow.fd);
+ vfree(dev->cow.bitmap);
+ dev->cow.bitmap = NULL;
+@@ -429,18 +496,20 @@
+ static int ubd_open_dev(struct ubd *dev)
+ {
+ struct openflags flags;
+- int err, n, create_cow, *create_ptr;
++ char **back_ptr;
++ int err, create_cow, *create_ptr;
+
++ dev->openflags = dev->boot_openflags;
+ create_cow = 0;
+ create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
+- dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file,
++ back_ptr = dev->no_cow ? NULL : &dev->cow.file;
++ dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
+ &dev->cow.bitmap_offset, &dev->cow.bitmap_len,
+ &dev->cow.data_offset, create_ptr);
+
+ if((dev->fd == -ENOENT) && create_cow){
+- n = dev - ubd_dev;
+ dev->fd = create_cow_file(dev->file, dev->cow.file,
+- dev->openflags, 1 << 9,
++ dev->openflags, 1 << 9, PAGE_SIZE,
+ &dev->cow.bitmap_offset,
+ &dev->cow.bitmap_len,
+ &dev->cow.data_offset);
+@@ -455,13 +524,17 @@
+ if(dev->cow.file != NULL){
+ err = -ENOMEM;
+ dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
+- if(dev->cow.bitmap == NULL) goto error;
++ if(dev->cow.bitmap == NULL){
++ printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
++ goto error;
++ }
+ flush_tlb_kernel_vm();
+
+ err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
+ dev->cow.bitmap_offset,
+ dev->cow.bitmap_len);
+- if(err) goto error;
++ if(err < 0)
++ goto error;
+
+ flags = dev->openflags;
+ flags.w = 0;
+@@ -481,17 +554,31 @@
+
+ {
+ struct gendisk *disk;
++ char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
++ int err;
+
+ disk = alloc_disk(1 << UBD_SHIFT);
+- if (!disk)
+- return -ENOMEM;
++ if(disk == NULL)
++ return(-ENOMEM);
+
+ disk->major = major;
+ disk->first_minor = unit << UBD_SHIFT;
+ disk->fops = &ubd_blops;
+ set_capacity(disk, size / 512);
+- sprintf(disk->disk_name, "ubd");
+- sprintf(disk->devfs_name, "ubd/disc%d", unit);
++ if(major == MAJOR_NR){
++ sprintf(disk->disk_name, "ubd%c", 'a' + unit);
++ sprintf(disk->devfs_name, "ubd/disc%d", unit);
++ sprintf(from, "ubd/%d", unit);
++ sprintf(to, "disc%d/disc", unit);
++ err = devfs_mk_symlink(from, to);
++ if(err)
++ printk("ubd_new_disk failed to make link from %s to "
++ "%s, error = %d\n", from, to, err);
++ }
++ else {
++ sprintf(disk->disk_name, "ubd_fake%d", unit);
++ sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
++ }
+
+ disk->private_data = &ubd_dev[unit];
+ disk->queue = ubd_queue;
+@@ -506,24 +593,21 @@
+ struct ubd *dev = &ubd_dev[n];
+ int err;
+
+- if(dev->is_dir)
+- return(-EISDIR);
+-
+- if (!dev->file)
++ if(dev->file == NULL)
+ return(-ENODEV);
+
+ if (ubd_open_dev(dev))
+ return(-ENODEV);
+
+ err = ubd_file_size(dev, &dev->size);
+- if(err)
++ if(err < 0)
+ return(err);
+
+ err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
+ if(err)
+ return(err);
+
+- if(fake_major)
++ if(fake_major != MAJOR_NR)
+ ubd_new_disk(fake_major, dev->size, n,
+ &fake_gendisk[n]);
+
+@@ -561,42 +645,42 @@
+ return(err);
+ }
+
+-static int ubd_get_config(char *dev, char *str, int size, char **error_out)
++static int ubd_get_config(char *name, char *str, int size, char **error_out)
+ {
+- struct ubd *ubd;
++ struct ubd *dev;
+ char *end;
+- int major, n = 0;
++ int n, len = 0;
+
+- major = simple_strtoul(dev, &end, 0);
+- if((*end != '\0') || (end == dev)){
+- *error_out = "ubd_get_config : didn't parse major number";
++ n = simple_strtoul(name, &end, 0);
++ if((*end != '\0') || (end == name)){
++ *error_out = "ubd_get_config : didn't parse device number";
+ return(-1);
+ }
+
+- if((major >= MAX_DEV) || (major < 0)){
+- *error_out = "ubd_get_config : major number out of range";
++ if((n >= MAX_DEV) || (n < 0)){
++ *error_out = "ubd_get_config : device number out of range";
+ return(-1);
+ }
+
+- ubd = &ubd_dev[major];
++ dev = &ubd_dev[n];
+ spin_lock(&ubd_lock);
+
+- if(ubd->file == NULL){
+- CONFIG_CHUNK(str, size, n, "", 1);
++ if(dev->file == NULL){
++ CONFIG_CHUNK(str, size, len, "", 1);
+ goto out;
+ }
+
+- CONFIG_CHUNK(str, size, n, ubd->file, 0);
++ CONFIG_CHUNK(str, size, len, dev->file, 0);
+
+- if(ubd->cow.file != NULL){
+- CONFIG_CHUNK(str, size, n, ",", 0);
+- CONFIG_CHUNK(str, size, n, ubd->cow.file, 1);
++ if(dev->cow.file != NULL){
++ CONFIG_CHUNK(str, size, len, ",", 0);
++ CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
+ }
+- else CONFIG_CHUNK(str, size, n, "", 1);
++ else CONFIG_CHUNK(str, size, len, "", 1);
+
+ out:
+ spin_unlock(&ubd_lock);
+- return(n);
++ return(len);
+ }
+
+ static int ubd_remove(char *str)
+@@ -604,11 +688,9 @@
+ struct ubd *dev;
+ int n, err = -ENODEV;
+
+- if(!isdigit(*str))
+- return(err); /* it should be a number 0-7/a-h */
++ n = parse_unit(&str);
+
+- n = *str - '0';
+- if(n >= MAX_DEV)
++ if((n < 0) || (n >= MAX_DEV))
+ return(err);
+
+ dev = &ubd_dev[n];
+@@ -669,7 +751,7 @@
+
+ elevator_init(ubd_queue, &elevator_noop);
+
+- if (fake_major != 0) {
++ if (fake_major != MAJOR_NR) {
+ char name[sizeof("ubd_nnn\0")];
+
+ snprintf(name, sizeof(name), "ubd_%d", fake_major);
+@@ -696,6 +778,7 @@
+ io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *),
+ &thread_fd);
+ if(io_pid < 0){
++ io_pid = -1;
+ printk(KERN_ERR
+ "ubd : Failed to start I/O thread (errno = %d) - "
+ "falling back to synchronous I/O\n", -io_pid);
+@@ -703,8 +786,8 @@
+ }
+ err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr,
+ SA_INTERRUPT, "ubd", ubd_dev);
+- if(err != 0) printk(KERN_ERR
+- "um_request_irq failed - errno = %d\n", -err);
++ if(err != 0)
++ printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err);
+ return(err);
+ }
+
+@@ -714,15 +797,9 @@
+ {
+ struct gendisk *disk = inode->i_bdev->bd_disk;
+ struct ubd *dev = disk->private_data;
+- int err = -EISDIR;
+-
+- if(dev->is_dir == 1)
+- goto out;
++ int err = 0;
+
+- err = 0;
+ if(dev->count == 0){
+- dev->openflags = dev->boot_openflags;
+-
+ err = ubd_open_dev(dev);
+ if(err){
+ printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
+@@ -749,62 +826,156 @@
+ return(0);
+ }
+
+-void cowify_req(struct io_thread_req *req, struct ubd *dev)
++static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask,
++ __u64 *cow_offset, unsigned long *bitmap,
++ __u64 bitmap_offset, unsigned long *bitmap_words,
++ __u64 bitmap_len)
++{
++ __u64 sector = io_offset >> 9;
++ int i, update_bitmap = 0;
++
++ for(i = 0; i < length >> 9; i++){
++ if(cow_mask != NULL)
++ ubd_set_bit(i, (unsigned char *) cow_mask);
++ if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
++ continue;
++
++ update_bitmap = 1;
++ ubd_set_bit(sector + i, (unsigned char *) bitmap);
++ }
++
++ if(!update_bitmap)
++ return;
++
++ *cow_offset = sector / (sizeof(unsigned long) * 8);
++
++ /* This takes care of the case where we're exactly at the end of the
++ * device, and *cow_offset + 1 is off the end. So, just back it up
++ * by one word. Thanks to Lynn Kerby for the fix and James McMechan
++ * for the original diagnosis.
++ */
++ if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) /
++ sizeof(unsigned long) - 1))
++ (*cow_offset)--;
++
++ bitmap_words[0] = bitmap[*cow_offset];
++ bitmap_words[1] = bitmap[*cow_offset + 1];
++
++ *cow_offset *= sizeof(unsigned long);
++ *cow_offset += bitmap_offset;
++}
++
++static void cowify_req(struct io_thread_req *req, unsigned long *bitmap,
++ __u64 bitmap_offset, __u64 bitmap_len)
+ {
+- int i, update_bitmap, sector = req->offset >> 9;
++ __u64 sector = req->offset >> 9;
++ int i;
+
+ if(req->length > (sizeof(req->sector_mask) * 8) << 9)
+ panic("Operation too long");
++
+ if(req->op == UBD_READ) {
+ for(i = 0; i < req->length >> 9; i++){
+- if(ubd_test_bit(sector + i, (unsigned char *)
+- dev->cow.bitmap)){
++ if(ubd_test_bit(sector + i, (unsigned char *) bitmap))
+ ubd_set_bit(i, (unsigned char *)
+ &req->sector_mask);
+- }
+ }
+- }
+- else {
+- update_bitmap = 0;
+- for(i = 0; i < req->length >> 9; i++){
+- ubd_set_bit(i, (unsigned char *)
+- &req->sector_mask);
+- if(!ubd_test_bit(sector + i, (unsigned char *)
+- dev->cow.bitmap))
+- update_bitmap = 1;
+- ubd_set_bit(sector + i, (unsigned char *)
+- dev->cow.bitmap);
+- }
+- if(update_bitmap){
+- req->cow_offset = sector / (sizeof(unsigned long) * 8);
+- req->bitmap_words[0] =
+- dev->cow.bitmap[req->cow_offset];
+- req->bitmap_words[1] =
+- dev->cow.bitmap[req->cow_offset + 1];
+- req->cow_offset *= sizeof(unsigned long);
+- req->cow_offset += dev->cow.bitmap_offset;
++ }
++ else cowify_bitmap(req->offset, req->length, &req->sector_mask,
++ &req->cow_offset, bitmap, bitmap_offset,
++ req->bitmap_words, bitmap_len);
++}
++
++static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset)
++{
++ __u64 sector;
++ unsigned char *bitmap;
++ int bit, i;
++
++ /* mmap must have been requested on the command line */
++ if(!ubd_do_mmap)
++ return(-1);
++
++ /* The buffer must be page aligned */
++ if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0)
++ return(-1);
++
++ /* The request must be a page long */
++ if((req->current_nr_sectors << 9) != PAGE_SIZE)
++ return(-1);
++
++ if(dev->cow.file == NULL)
++ return(dev->fd);
++
++ sector = offset >> 9;
++ bitmap = (unsigned char *) dev->cow.bitmap;
++ bit = ubd_test_bit(sector, bitmap);
++
++ for(i = 1; i < req->current_nr_sectors; i++){
++ if(ubd_test_bit(sector + i, bitmap) != bit)
++ return(-1);
++ }
++
++ if(bit || (rq_data_dir(req) == WRITE))
++ offset += dev->cow.data_offset;
++
++ /* The data on disk must be page aligned */
++ if((offset % UBD_MMAP_BLOCK_SIZE) != 0)
++ return(-1);
++
++ return(bit ? dev->fd : dev->cow.fd);
++}
++
++static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset,
++ struct request *req,
++ struct io_thread_req *io_req)
++{
++ int err;
++
++ if(rq_data_dir(req) == WRITE){
++ /* Writes are almost no-ops since the new data is already in the
++ * host page cache
++ */
++ dev->map_writes++;
++ if(dev->cow.file != NULL)
++ cowify_bitmap(io_req->offset, io_req->length,
++ &io_req->sector_mask, &io_req->cow_offset,
++ dev->cow.bitmap, dev->cow.bitmap_offset,
++ io_req->bitmap_words,
++ dev->cow.bitmap_len);
++ }
++ else {
++ int w;
++
++ if((dev->cow.file != NULL) && (fd == dev->cow.fd))
++ w = 0;
++ else w = dev->openflags.w;
++
++ if((dev->cow.file != NULL) && (fd == dev->fd))
++ offset += dev->cow.data_offset;
++
++ err = physmem_subst_mapping(req->buffer, fd, offset, w);
++ if(err){
++ printk("physmem_subst_mapping failed, err = %d\n",
++ -err);
++ return(1);
+ }
++ dev->map_reads++;
+ }
++ io_req->op = UBD_MMAP;
++ io_req->buffer = req->buffer;
++ return(0);
+ }
+
+ static int prepare_request(struct request *req, struct io_thread_req *io_req)
+ {
+ struct gendisk *disk = req->rq_disk;
+ struct ubd *dev = disk->private_data;
+- __u64 block;
+- int nsect;
++ __u64 offset;
++ int len, fd;
+
+ if(req->rq_status == RQ_INACTIVE) return(1);
+
+- if(dev->is_dir){
+- strcpy(req->buffer, "HOSTFS:");
+- strcat(req->buffer, dev->file);
+- spin_lock(&ubd_io_lock);
+- end_request(req, 1);
+- spin_unlock(&ubd_io_lock);
+- return(1);
+- }
+-
+ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
+ printk("Write attempted on readonly ubd device %s\n",
+ disk->disk_name);
+@@ -814,23 +985,49 @@
+ return(1);
+ }
+
+- block = req->sector;
+- nsect = req->current_nr_sectors;
++ offset = ((__u64) req->sector) << 9;
++ len = req->current_nr_sectors << 9;
+
+- io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE;
+ io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
+ io_req->fds[1] = dev->fd;
++ io_req->map_fd = -1;
++ io_req->cow_offset = -1;
++ io_req->offset = offset;
++ io_req->length = len;
++ io_req->error = 0;
++ io_req->sector_mask = 0;
++
++ fd = mmap_fd(req, dev, io_req->offset);
++ if(fd > 0){
++ /* If mmapping is otherwise OK, but the first access to the
++ * page is a write, then it's not mapped in yet. So we have
++ * to write the data to disk first, then we can map the disk
++ * page in and continue normally from there.
++ */
++ if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){
++ io_req->map_fd = dev->fd;
++ io_req->map_offset = io_req->offset +
++ dev->cow.data_offset;
++ dev->write_maps++;
++ }
++ else return(prepare_mmap_request(dev, fd, io_req->offset, req,
++ io_req));
++ }
++
++ if(rq_data_dir(req) == READ)
++ dev->nomap_reads++;
++ else dev->nomap_writes++;
++
++ io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE;
+ io_req->offsets[0] = 0;
+ io_req->offsets[1] = dev->cow.data_offset;
+- io_req->offset = ((__u64) block) << 9;
+- io_req->length = nsect << 9;
+ io_req->buffer = req->buffer;
+ io_req->sectorsize = 1 << 9;
+- io_req->sector_mask = 0;
+- io_req->cow_offset = -1;
+- io_req->error = 0;
+
+- if(dev->cow.file != NULL) cowify_req(io_req, dev);
++ if(dev->cow.file != NULL)
++ cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset,
++ dev->cow.bitmap_len);
++
+ return(0);
+ }
+
+@@ -841,7 +1038,7 @@
+ int err, n;
+
+ if(thread_fd == -1){
+- while(!list_empty(&q->queue_head)){
++ while(!elv_queue_empty(q)){
+ req = elv_next_request(q);
+ err = prepare_request(req, &io_req);
+ if(!err){
+@@ -851,7 +1048,8 @@
+ }
+ }
+ else {
+- if(do_ubd || list_empty(&q->queue_head)) return;
++ if(do_ubd || elv_queue_empty(q))
++ return;
+ req = elv_next_request(q);
+ err = prepare_request(req, &io_req);
+ if(!err){
+@@ -885,7 +1083,7 @@
+ g.heads = 128;
+ g.sectors = 32;
+ g.cylinders = dev->size / (128 * 32 * 512);
+- g.start = 2;
++ g.start = get_start_sect(inode->i_bdev);
+ return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
+
+ case HDIO_SET_UNMASKINTR:
+@@ -935,6 +1133,142 @@
+ return(-EINVAL);
+ }
+
++static int ubd_check_remapped(int fd, unsigned long address, int is_write,
++ __u64 offset)
++{
++ __u64 bitmap_offset;
++ unsigned long new_bitmap[2];
++ int i, err, n;
++
++ /* If it's not a write access, we can't do anything about it */
++ if(!is_write)
++ return(0);
++
++ /* We have a write */
++ for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){
++ struct ubd *dev = &ubd_dev[i];
++
++ if((dev->fd != fd) && (dev->cow.fd != fd))
++ continue;
++
++ /* It's a write to a ubd device */
++
++ if(!dev->openflags.w){
++ /* It's a write access on a read-only device - probably
++ * shouldn't happen. If the kernel is trying to change
++ * something with no intention of writing it back out,
++ * then this message will clue us in that this needs
++ * fixing
++ */
++ printk("Write access to mapped page from readonly ubd "
++ "device %d\n", i);
++ return(0);
++ }
++
++ /* It's a write to a writeable ubd device - it must be COWed
++ * because, otherwise, the page would have been mapped in
++ * writeable
++ */
++
++ if(!dev->cow.file)
++ panic("Write fault on writeable non-COW ubd device %d",
++ i);
++
++ /* It should also be an access to the backing file since the
++ * COW pages should be mapped in read-write
++ */
++
++ if(fd == dev->fd)
++ panic("Write fault on a backing page of ubd "
++ "device %d\n", i);
++
++ /* So, we do the write, copying the backing data to the COW
++ * file...
++ */
++
++ err = os_seek_file(dev->fd, offset + dev->cow.data_offset);
++ if(err < 0)
++ panic("Couldn't seek to %lld in COW file of ubd "
++ "device %d, err = %d",
++ offset + dev->cow.data_offset, i, -err);
++
++ n = os_write_file(dev->fd, (void *) address, PAGE_SIZE);
++ if(n != PAGE_SIZE)
++ panic("Couldn't copy data to COW file of ubd "
++ "device %d, err = %d", i, -n);
++
++ /* ... updating the COW bitmap... */
++
++ cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset,
++ dev->cow.bitmap, dev->cow.bitmap_offset,
++ new_bitmap, dev->cow.bitmap_len);
++
++ err = os_seek_file(dev->fd, bitmap_offset);
++ if(err < 0)
++ panic("Couldn't seek to %lld in COW file of ubd "
++ "device %d, err = %d", bitmap_offset, i, -err);
++
++ n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap));
++ if(n != sizeof(new_bitmap))
++ panic("Couldn't update bitmap of ubd device %d, "
++ "err = %d", i, -n);
++
++ /* Maybe we can map the COW page in, and maybe we can't. If
++ * it is a pre-V3 COW file, we can't, since the alignment will
++ * be wrong. If it is a V3 or later COW file which has been
++ * moved to a system with a larger page size, then maybe we
++ * can't, depending on the exact location of the page.
++ */
++
++ offset += dev->cow.data_offset;
++
++ /* Remove the remapping, putting the original anonymous page
++ * back. If the COW file can be mapped in, that is done.
++ * Otherwise, the COW page is read in.
++ */
++
++ if(!physmem_remove_mapping((void *) address))
++ panic("Address 0x%lx not remapped by ubd device %d",
++ address, i);
++ if((offset % UBD_MMAP_BLOCK_SIZE) == 0)
++ physmem_subst_mapping((void *) address, dev->fd,
++ offset, 1);
++ else {
++ err = os_seek_file(dev->fd, offset);
++ if(err < 0)
++ panic("Couldn't seek to %lld in COW file of "
++ "ubd device %d, err = %d", offset, i,
++ -err);
++
++ n = os_read_file(dev->fd, (void *) address, PAGE_SIZE);
++ if(n != PAGE_SIZE)
++ panic("Failed to read page from offset %llx of "
++ "COW file of ubd device %d, err = %d",
++ offset, i, -n);
++ }
++
++ return(1);
++ }
++
++ /* It's not a write on a ubd device */
++ return(0);
++}
++
++static struct remapper ubd_remapper = {
++ .list = LIST_HEAD_INIT(ubd_remapper.list),
++ .proc = ubd_check_remapped,
++};
++
++static int ubd_remapper_setup(void)
++{
++ if(ubd_do_mmap)
++ register_remapper(&ubd_remapper);
++
++ return(0);
++}
++
++__initcall(ubd_remapper_setup);
++
+ /*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
+--- a/arch/um/drivers/ubd_user.c 2004-02-11 12:14:32.000000000 -0500
++++ b/arch/um/drivers/ubd_user.c 2004-02-11 12:26:14.000000000 -0500
+@@ -11,11 +11,8 @@
+ #include <signal.h>
+ #include <string.h>
+ #include <netinet/in.h>
+-#include <sys/stat.h>
+ #include <sys/time.h>
+-#include <sys/fcntl.h>
+ #include <sys/socket.h>
+-#include <string.h>
+ #include <sys/mman.h>
+ #include <sys/param.h>
+ #include "asm/types.h"
+@@ -24,146 +21,30 @@
+ #include "user.h"
+ #include "ubd_user.h"
+ #include "os.h"
++#include "cow.h"
+
+ #include <endian.h>
+ #include <byteswap.h>
+-#if __BYTE_ORDER == __BIG_ENDIAN
+-# define ntohll(x) (x)
+-# define htonll(x) (x)
+-#elif __BYTE_ORDER == __LITTLE_ENDIAN
+-# define ntohll(x) bswap_64(x)
+-# define htonll(x) bswap_64(x)
+-#else
+-#error "__BYTE_ORDER not defined"
+-#endif
+-
+-#define PATH_LEN_V1 256
+-
+-struct cow_header_v1 {
+- int magic;
+- int version;
+- char backing_file[PATH_LEN_V1];
+- time_t mtime;
+- __u64 size;
+- int sectorsize;
+-};
+-
+-#define PATH_LEN_V2 MAXPATHLEN
+-
+-struct cow_header_v2 {
+- unsigned long magic;
+- unsigned long version;
+- char backing_file[PATH_LEN_V2];
+- time_t mtime;
+- __u64 size;
+- int sectorsize;
+-};
+-
+-union cow_header {
+- struct cow_header_v1 v1;
+- struct cow_header_v2 v2;
+-};
+-
+-#define COW_MAGIC 0x4f4f4f4d /* MOOO */
+-#define COW_VERSION 2
+-
+-static void sizes(__u64 size, int sectorsize, int bitmap_offset,
+- unsigned long *bitmap_len_out, int *data_offset_out)
+-{
+- *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
+-
+- *data_offset_out = bitmap_offset + *bitmap_len_out;
+- *data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize;
+- *data_offset_out *= sectorsize;
+-}
+-
+-static int read_cow_header(int fd, int *magic_out, char **backing_file_out,
+- time_t *mtime_out, __u64 *size_out,
+- int *sectorsize_out, int *bitmap_offset_out)
+-{
+- union cow_header *header;
+- char *file;
+- int err, n;
+- unsigned long version, magic;
+-
+- header = um_kmalloc(sizeof(*header));
+- if(header == NULL){
+- printk("read_cow_header - Failed to allocate header\n");
+- return(-ENOMEM);
+- }
+- err = -EINVAL;
+- n = read(fd, header, sizeof(*header));
+- if(n < offsetof(typeof(header->v1), backing_file)){
+- printk("read_cow_header - short header\n");
+- goto out;
+- }
+-
+- magic = header->v1.magic;
+- if(magic == COW_MAGIC) {
+- version = header->v1.version;
+- }
+- else if(magic == ntohl(COW_MAGIC)){
+- version = ntohl(header->v1.version);
+- }
+- else goto out;
+-
+- *magic_out = COW_MAGIC;
+-
+- if(version == 1){
+- if(n < sizeof(header->v1)){
+- printk("read_cow_header - failed to read V1 header\n");
+- goto out;
+- }
+- *mtime_out = header->v1.mtime;
+- *size_out = header->v1.size;
+- *sectorsize_out = header->v1.sectorsize;
+- *bitmap_offset_out = sizeof(header->v1);
+- file = header->v1.backing_file;
+- }
+- else if(version == 2){
+- if(n < sizeof(header->v2)){
+- printk("read_cow_header - failed to read V2 header\n");
+- goto out;
+- }
+- *mtime_out = ntohl(header->v2.mtime);
+- *size_out = ntohll(header->v2.size);
+- *sectorsize_out = ntohl(header->v2.sectorsize);
+- *bitmap_offset_out = sizeof(header->v2);
+- file = header->v2.backing_file;
+- }
+- else {
+- printk("read_cow_header - invalid COW version\n");
+- goto out;
+- }
+- err = -ENOMEM;
+- *backing_file_out = uml_strdup(file);
+- if(*backing_file_out == NULL){
+- printk("read_cow_header - failed to allocate backing file\n");
+- goto out;
+- }
+- err = 0;
+- out:
+- kfree(header);
+- return(err);
+-}
+
+ static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
+ {
+- struct stat buf1, buf2;
++ struct uml_stat buf1, buf2;
++ int err;
+
+ if(from_cmdline == NULL) return(1);
+ if(!strcmp(from_cmdline, from_cow)) return(1);
+
+- if(stat(from_cmdline, &buf1) < 0){
+- printk("Couldn't stat '%s', errno = %d\n", from_cmdline,
+- errno);
++ err = os_stat_file(from_cmdline, &buf1);
++ if(err < 0){
++ printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
+ return(1);
+ }
+- if(stat(from_cow, &buf2) < 0){
+- printk("Couldn't stat '%s', errno = %d\n", from_cow, errno);
++ err = os_stat_file(from_cow, &buf2);
++ if(err < 0){
++ printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
+ return(1);
+ }
+- if((buf1.st_dev == buf2.st_dev) && (buf1.st_ino == buf2.st_ino))
++ if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
+ return(1);
+
+ printk("Backing file mismatch - \"%s\" requested,\n"
+@@ -174,20 +55,21 @@
+
+ static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
+ {
+- struct stat64 buf;
++ unsigned long modtime;
+ long long actual;
+ int err;
+
+- if(stat64(file, &buf) < 0){
+- printk("Failed to stat backing file \"%s\", errno = %d\n",
+- file, errno);
+- return(-errno);
++ err = os_file_modtime(file, &modtime);
++ if(err < 0){
++ printk("Failed to get modification time of backing file "
++ "\"%s\", err = %d\n", file, -err);
++ return(err);
+ }
+
+ err = os_file_size(file, &actual);
+- if(err){
++ if(err < 0){
+ printk("Failed to get size of backing file \"%s\", "
+- "errno = %d\n", file, -err);
++ "err = %d\n", file, -err);
+ return(err);
+ }
+
+@@ -196,9 +78,9 @@
+ "file\n", size, actual);
+ return(-EINVAL);
+ }
+- if(buf.st_mtime != mtime){
++ if(modtime != mtime){
+ printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
+- "file\n", mtime, buf.st_mtime);
++ "file\n", mtime, modtime);
+ return(-EINVAL);
+ }
+ return(0);
+@@ -209,124 +91,16 @@
+ int err;
+
+ err = os_seek_file(fd, offset);
+- if(err != 0) return(-errno);
+- err = read(fd, buf, len);
+- if(err < 0) return(-errno);
+- return(0);
+-}
++ if(err < 0)
++ return(err);
+
+-static int absolutize(char *to, int size, char *from)
+-{
+- char save_cwd[256], *slash;
+- int remaining;
++ err = os_read_file(fd, buf, len);
++ if(err < 0)
++ return(err);
+
+- if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
+- printk("absolutize : unable to get cwd - errno = %d\n", errno);
+- return(-1);
+- }
+- slash = strrchr(from, '/');
+- if(slash != NULL){
+- *slash = '\0';
+- if(chdir(from)){
+- *slash = '/';
+- printk("absolutize : Can't cd to '%s' - errno = %d\n",
+- from, errno);
+- return(-1);
+- }
+- *slash = '/';
+- if(getcwd(to, size) == NULL){
+- printk("absolutize : unable to get cwd of '%s' - "
+- "errno = %d\n", from, errno);
+- return(-1);
+- }
+- remaining = size - strlen(to);
+- if(strlen(slash) + 1 > remaining){
+- printk("absolutize : unable to fit '%s' into %d "
+- "chars\n", from, size);
+- return(-1);
+- }
+- strcat(to, slash);
+- }
+- else {
+- if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){
+- printk("absolutize : unable to fit '%s' into %d "
+- "chars\n", from, size);
+- return(-1);
+- }
+- strcpy(to, save_cwd);
+- strcat(to, "/");
+- strcat(to, from);
+- }
+- chdir(save_cwd);
+ return(0);
+ }
+
+-static int write_cow_header(char *cow_file, int fd, char *backing_file,
+- int sectorsize, long long *size)
+-{
+- struct cow_header_v2 *header;
+- struct stat64 buf;
+- int err;
+-
+- err = os_seek_file(fd, 0);
+- if(err != 0){
+- printk("write_cow_header - lseek failed, errno = %d\n", errno);
+- return(-errno);
+- }
+-
+- err = -ENOMEM;
+- header = um_kmalloc(sizeof(*header));
+- if(header == NULL){
+- printk("Failed to allocate COW V2 header\n");
+- goto out;
+- }
+- header->magic = htonl(COW_MAGIC);
+- header->version = htonl(COW_VERSION);
+-
+- err = -EINVAL;
+- if(strlen(backing_file) > sizeof(header->backing_file) - 1){
+- printk("Backing file name \"%s\" is too long - names are "
+- "limited to %d characters\n", backing_file,
+- sizeof(header->backing_file) - 1);
+- goto out_free;
+- }
+-
+- if(absolutize(header->backing_file, sizeof(header->backing_file),
+- backing_file))
+- goto out_free;
+-
+- err = stat64(header->backing_file, &buf);
+- if(err < 0){
+- printk("Stat of backing file '%s' failed, errno = %d\n",
+- header->backing_file, errno);
+- err = -errno;
+- goto out_free;
+- }
+-
+- err = os_file_size(header->backing_file, size);
+- if(err){
+- printk("Couldn't get size of backing file '%s', errno = %d\n",
+- header->backing_file, -*size);
+- goto out_free;
+- }
+-
+- header->mtime = htonl(buf.st_mtime);
+- header->size = htonll(*size);
+- header->sectorsize = htonl(sectorsize);
+-
+- err = write(fd, header, sizeof(*header));
+- if(err != sizeof(*header)){
+- printk("Write of header to new COW file '%s' failed, "
+- "errno = %d\n", cow_file, errno);
+- goto out_free;
+- }
+- err = 0;
+- out_free:
+- kfree(header);
+- out:
+- return(err);
+-}
+-
+ int open_ubd_file(char *file, struct openflags *openflags,
+ char **backing_file_out, int *bitmap_offset_out,
+ unsigned long *bitmap_len_out, int *data_offset_out,
+@@ -334,26 +108,36 @@
+ {
+ time_t mtime;
+ __u64 size;
++ __u32 version, align;
+ char *backing_file;
+- int fd, err, sectorsize, magic, same, mode = 0644;
++ int fd, err, sectorsize, same, mode = 0644;
+
+- if((fd = os_open_file(file, *openflags, mode)) < 0){
++ fd = os_open_file(file, *openflags, mode);
++ if(fd < 0){
+ if((fd == -ENOENT) && (create_cow_out != NULL))
+ *create_cow_out = 1;
+ if(!openflags->w ||
+ ((errno != EROFS) && (errno != EACCES))) return(-errno);
+ openflags->w = 0;
+- if((fd = os_open_file(file, *openflags, mode)) < 0)
++ fd = os_open_file(file, *openflags, mode);
++ if(fd < 0)
+ return(fd);
+ }
++
++ err = os_lock_file(fd, openflags->w);
++ if(err < 0){
++ printk("Failed to lock '%s', err = %d\n", file, -err);
++ goto out_close;
++ }
++
+ if(backing_file_out == NULL) return(fd);
+
+- err = read_cow_header(fd, &magic, &backing_file, &mtime, &size,
+- §orsize, bitmap_offset_out);
++ err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
++ &size, §orsize, &align, bitmap_offset_out);
+ if(err && (*backing_file_out != NULL)){
+ printk("Failed to read COW header from COW file \"%s\", "
+- "errno = %d\n", file, err);
+- goto error;
++ "errno = %d\n", file, -err);
++ goto out_close;
+ }
+ if(err) return(fd);
+
+@@ -363,36 +147,33 @@
+
+ if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
+ printk("Switching backing file to '%s'\n", *backing_file_out);
+- err = write_cow_header(file, fd, *backing_file_out,
+- sectorsize, &size);
++ err = write_cow_header(file, fd, *backing_file_out,
++ sectorsize, align, &size);
+ if(err){
+- printk("Switch failed, errno = %d\n", err);
++ printk("Switch failed, errno = %d\n", -err);
+ return(err);
+ }
+ }
+ else {
+ *backing_file_out = backing_file;
+ err = backing_file_mismatch(*backing_file_out, size, mtime);
+- if(err) goto error;
++ if(err) goto out_close;
+ }
+
+- sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out,
+- data_offset_out);
++ cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
++ bitmap_len_out, data_offset_out);
+
+ return(fd);
+- error:
+- close(fd);
++ out_close:
++ os_close_file(fd);
+ return(err);
+ }
+
+ int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
+- int sectorsize, int *bitmap_offset_out,
++ int sectorsize, int alignment, int *bitmap_offset_out,
+ unsigned long *bitmap_len_out, int *data_offset_out)
+ {
+- __u64 blocks;
+- long zero;
+- int err, fd, i;
+- long long size;
++ int err, fd;
+
+ flags.c = 1;
+ fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
+@@ -403,57 +184,49 @@
+ goto out;
+ }
+
+- err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size);
+- if(err) goto out_close;
+-
+- blocks = (size + sectorsize - 1) / sectorsize;
+- blocks = (blocks + sizeof(long) * 8 - 1) / (sizeof(long) * 8);
+- zero = 0;
+- for(i = 0; i < blocks; i++){
+- err = write(fd, &zero, sizeof(zero));
+- if(err != sizeof(zero)){
+- printk("Write of bitmap to new COW file '%s' failed, "
+- "errno = %d\n", cow_file, errno);
+- goto out_close;
+- }
+- }
+-
+- sizes(size, sectorsize, sizeof(struct cow_header_v2),
+- bitmap_len_out, data_offset_out);
+- *bitmap_offset_out = sizeof(struct cow_header_v2);
+-
+- return(fd);
+-
+- out_close:
+- close(fd);
++ err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
++ bitmap_offset_out, bitmap_len_out,
++ data_offset_out);
++ if(!err)
++ return(fd);
++ os_close_file(fd);
+ out:
+ return(err);
+ }
+
++/* XXX Just trivial wrappers around os_read_file and os_write_file */
+ int read_ubd_fs(int fd, void *buffer, int len)
+ {
+- int n;
+-
+- n = read(fd, buffer, len);
+- if(n < 0) return(-errno);
+- else return(n);
++ return(os_read_file(fd, buffer, len));
+ }
+
+ int write_ubd_fs(int fd, char *buffer, int len)
+ {
+- int n;
+-
+- n = write(fd, buffer, len);
+- if(n < 0) return(-errno);
+- else return(n);
++ return(os_write_file(fd, buffer, len));
+ }
+
+-int ubd_is_dir(char *file)
++static int update_bitmap(struct io_thread_req *req)
+ {
+- struct stat64 buf;
++ int n;
++
++ if(req->cow_offset == -1)
++ return(0);
++
++ n = os_seek_file(req->fds[1], req->cow_offset);
++ if(n < 0){
++ printk("do_io - bitmap lseek failed : err = %d\n", -n);
++ return(1);
++ }
++
++ n = os_write_file(req->fds[1], &req->bitmap_words,
++ sizeof(req->bitmap_words));
++ if(n != sizeof(req->bitmap_words)){
++ printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
++ req->fds[1]);
++ return(1);
++ }
+
+- if(stat64(file, &buf) < 0) return(0);
+- return(S_ISDIR(buf.st_mode));
++ return(0);
+ }
+
+ void do_io(struct io_thread_req *req)
+@@ -461,8 +234,18 @@
+ char *buf;
+ unsigned long len;
+ int n, nsectors, start, end, bit;
++ int err;
+ __u64 off;
+
++ if(req->op == UBD_MMAP){
++ /* Touch the page to force the host to do any necessary IO to
++ * get it into memory
++ */
++ n = *((volatile int *) req->buffer);
++ req->error = update_bitmap(req);
++ return;
++ }
++
+ nsectors = req->length / req->sectorsize;
+ start = 0;
+ do {
+@@ -473,15 +256,14 @@
+ &req->sector_mask) == bit))
+ end++;
+
+- if(end != nsectors)
+- printk("end != nsectors\n");
+ off = req->offset + req->offsets[bit] +
+ start * req->sectorsize;
+ len = (end - start) * req->sectorsize;
+ buf = &req->buffer[start * req->sectorsize];
+
+- if(os_seek_file(req->fds[bit], off) != 0){
+- printk("do_io - lseek failed : errno = %d\n", errno);
++ err = os_seek_file(req->fds[bit], off);
++ if(err < 0){
++ printk("do_io - lseek failed : err = %d\n", -err);
+ req->error = 1;
+ return;
+ }
+@@ -490,11 +272,10 @@
+ do {
+ buf = &buf[n];
+ len -= n;
+- n = read(req->fds[bit], buf, len);
++ n = os_read_file(req->fds[bit], buf, len);
+ if (n < 0) {
+- printk("do_io - read returned %d : "
+- "errno = %d fd = %d\n", n,
+- errno, req->fds[bit]);
++ printk("do_io - read failed, err = %d "
++ "fd = %d\n", -n, req->fds[bit]);
+ req->error = 1;
+ return;
+ }
+@@ -502,11 +283,10 @@
+ if (n < len) memset(&buf[n], 0, len - n);
+ }
+ else {
+- n = write(req->fds[bit], buf, len);
++ n = os_write_file(req->fds[bit], buf, len);
+ if(n != len){
+- printk("do_io - write returned %d : "
+- "errno = %d fd = %d\n", n,
+- errno, req->fds[bit]);
++ printk("do_io - write failed err = %d "
++ "fd = %d\n", -n, req->fds[bit]);
+ req->error = 1;
+ return;
+ }
+@@ -515,24 +295,7 @@
+ start = end;
+ } while(start < nsectors);
+
+- if(req->cow_offset != -1){
+- if(os_seek_file(req->fds[1], req->cow_offset) != 0){
+- printk("do_io - bitmap lseek failed : errno = %d\n",
+- errno);
+- req->error = 1;
+- return;
+- }
+- n = write(req->fds[1], &req->bitmap_words,
+- sizeof(req->bitmap_words));
+- if(n != sizeof(req->bitmap_words)){
+- printk("do_io - bitmap update returned %d : "
+- "errno = %d fd = %d\n", n, errno, req->fds[1]);
+- req->error = 1;
+- return;
+- }
+- }
+- req->error = 0;
+- return;
++ req->error = update_bitmap(req);
+ }
+
+ /* Changed in start_io_thread, which is serialized by being called only
+@@ -550,19 +313,23 @@
+
+ signal(SIGWINCH, SIG_IGN);
+ while(1){
+- n = read(kernel_fd, &req, sizeof(req));
+- if(n < 0) printk("io_thread - read returned %d, errno = %d\n",
+- n, errno);
+- else if(n < sizeof(req)){
+- printk("io_thread - short read : length = %d\n", n);
++ n = os_read_file(kernel_fd, &req, sizeof(req));
++ if(n != sizeof(req)){
++ if(n < 0)
++ printk("io_thread - read failed, fd = %d, "
++ "err = %d\n", kernel_fd, -n);
++ else {
++ printk("io_thread - short read, fd = %d, "
++ "length = %d\n", kernel_fd, n);
++ }
+ continue;
+ }
+ io_count++;
+ do_io(&req);
+- n = write(kernel_fd, &req, sizeof(req));
++ n = os_write_file(kernel_fd, &req, sizeof(req));
+ if(n != sizeof(req))
+- printk("io_thread - write failed, errno = %d\n",
+- errno);
++ printk("io_thread - write failed, fd = %d, err = %d\n",
++ kernel_fd, -n);
+ }
+ }
+
+@@ -571,10 +338,11 @@
+ int pid, fds[2], err;
+
+ err = os_pipe(fds, 1, 1);
+- if(err){
+- printk("start_io_thread - os_pipe failed, errno = %d\n", -err);
+- return(-1);
++ if(err < 0){
++ printk("start_io_thread - os_pipe failed, err = %d\n", -err);
++ goto out;
+ }
++
+ kernel_fd = fds[0];
+ *fd_out = fds[1];
+
+@@ -582,32 +350,19 @@
+ NULL);
+ if(pid < 0){
+ printk("start_io_thread - clone failed : errno = %d\n", errno);
+- return(-errno);
++ goto out_close;
+ }
+- return(pid);
+-}
+-
+-#ifdef notdef
+-int start_io_thread(unsigned long sp, int *fd_out)
+-{
+- int pid;
+
+- if((kernel_fd = get_pty()) < 0) return(-1);
+- raw(kernel_fd, 0);
+- if((*fd_out = open(ptsname(kernel_fd), O_RDWR)) < 0){
+- printk("Couldn't open tty for IO\n");
+- return(-1);
+- }
+-
+- pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD,
+- NULL);
+- if(pid < 0){
+- printk("start_io_thread - clone failed : errno = %d\n", errno);
+- return(-errno);
+- }
+ return(pid);
++
++ out_close:
++ os_close_file(fds[0]);
++ os_close_file(fds[1]);
++ kernel_fd = -1;
++ *fd_out = -1;
++ out:
++ return(err);
+ }
+-#endif
+
+ /*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+diff -Naur a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
+--- a/arch/um/drivers/xterm.c 2004-02-11 12:14:17.000000000 -0500
++++ b/arch/um/drivers/xterm.c 2004-02-11 12:26:00.000000000 -0500
+@@ -8,7 +8,6 @@
+ #include <unistd.h>
+ #include <string.h>
+ #include <errno.h>
+-#include <fcntl.h>
+ #include <termios.h>
+ #include <signal.h>
+ #include <sched.h>
+@@ -36,7 +35,8 @@
+ {
+ struct xterm_chan *data;
+
+- if((data = malloc(sizeof(*data))) == NULL) return(NULL);
++ data = malloc(sizeof(*data));
++ if(data == NULL) return(NULL);
+ *data = ((struct xterm_chan) { .pid = -1,
+ .helper_pid = -1,
+ .device = device,
+@@ -93,7 +93,7 @@
+ "/usr/lib/uml/port-helper", "-uml-socket",
+ file, NULL };
+
+- if(access(argv[4], X_OK))
++ if(os_access(argv[4], OS_ACC_X_OK) < 0)
+ argv[4] = "port-helper";
+
+ fd = mkstemp(file);
+@@ -106,13 +106,13 @@
+ printk("xterm_open : unlink failed, errno = %d\n", errno);
+ return(-errno);
+ }
+- close(fd);
++ os_close_file(fd);
+
+- fd = create_unix_socket(file, sizeof(file));
++ fd = os_create_unix_socket(file, sizeof(file), 1);
+ if(fd < 0){
+ printk("xterm_open : create_unix_socket failed, errno = %d\n",
+ -fd);
+- return(-fd);
++ return(fd);
+ }
+
+ sprintf(title, data->title, data->device);
+@@ -128,15 +128,16 @@
+ if(data->direct_rcv)
+ new = os_rcv_fd(fd, &data->helper_pid);
+ else {
+- if((err = os_set_fd_block(fd, 0)) != 0){
++ err = os_set_fd_block(fd, 0);
++ if(err < 0){
+ printk("xterm_open : failed to set descriptor "
+- "non-blocking, errno = %d\n", err);
++ "non-blocking, err = %d\n", -err);
+ return(err);
+ }
+ new = xterm_fd(fd, &data->helper_pid);
+ }
+ if(new < 0){
+- printk("xterm_open : os_rcv_fd failed, errno = %d\n", -new);
++ printk("xterm_open : os_rcv_fd failed, err = %d\n", -new);
+ goto out;
+ }
+
+@@ -160,7 +161,7 @@
+ if(data->helper_pid != -1)
+ os_kill_process(data->helper_pid, 0);
+ data->helper_pid = -1;
+- close(fd);
++ os_close_file(fd);
+ }
+
+ void xterm_free(void *d)
+diff -Naur a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c
+--- a/arch/um/drivers/xterm_kern.c 2004-02-11 12:16:10.000000000 -0500
++++ b/arch/um/drivers/xterm_kern.c 2004-02-11 12:28:20.000000000 -0500
+@@ -5,9 +5,12 @@
+
+ #include "linux/errno.h"
+ #include "linux/slab.h"
++#include "linux/signal.h"
++#include "linux/interrupt.h"
+ #include "asm/semaphore.h"
+ #include "asm/irq.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ #include "kern_util.h"
+ #include "os.h"
+ #include "xterm.h"
+@@ -19,17 +22,18 @@
+ int new_fd;
+ };
+
+-static void xterm_interrupt(int irq, void *data, struct pt_regs *regs)
++static irqreturn_t xterm_interrupt(int irq, void *data, struct pt_regs *regs)
+ {
+ struct xterm_wait *xterm = data;
+ int fd;
+
+ fd = os_rcv_fd(xterm->fd, &xterm->pid);
+ if(fd == -EAGAIN)
+- return;
++ return(IRQ_NONE);
+
+ xterm->new_fd = fd;
+ up(&xterm->sem);
++ return(IRQ_HANDLED);
+ }
+
+ int xterm_fd(int socket, int *pid_out)
+@@ -54,7 +58,8 @@
+ if(err){
+ printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, "
+ "err = %d\n", err);
+- return(err);
++ ret = err;
++ goto out;
+ }
+ down(&data->sem);
+
+@@ -62,6 +67,7 @@
+
+ ret = data->new_fd;
+ *pid_out = data->pid;
++ out:
+ kfree(data);
+
+ return(ret);
+diff -Naur a/arch/um/dyn.lds.S b/arch/um/dyn.lds.S
+--- a/arch/um/dyn.lds.S 2004-02-11 12:15:45.000000000 -0500
++++ b/arch/um/dyn.lds.S 2004-02-11 12:27:35.000000000 -0500
+@@ -10,12 +10,15 @@
+ {
+ . = START + SIZEOF_HEADERS;
+ .interp : { *(.interp) }
+- . = ALIGN(4096);
+ __binary_start = .;
+ . = ALIGN(4096); /* Init code and data */
+ _stext = .;
+ __init_begin = .;
+- .text.init : { *(.text.init) }
++ .init.text : {
++ _sinittext = .;
++ *(.init.text)
++ _einittext = .;
++ }
+
+ . = ALIGN(4096);
+
+@@ -67,7 +70,7 @@
+
+ #include "asm/common.lds.S"
+
+- .data.init : { *(.data.init) }
++ init.data : { *(.init.data) }
+
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+diff -Naur a/arch/um/include/2_5compat.h b/arch/um/include/2_5compat.h
+--- a/arch/um/include/2_5compat.h 2004-02-11 12:15:23.000000000 -0500
++++ b/arch/um/include/2_5compat.h 2004-02-11 12:27:10.000000000 -0500
+@@ -6,20 +6,6 @@
+ #ifndef __2_5_COMPAT_H__
+ #define __2_5_COMPAT_H__
+
+-#include "linux/version.h"
+-
+-#define INIT_CONSOLE(dev_name, write_proc, device_proc, setup_proc, f) { \
+- name : dev_name, \
+- write : write_proc, \
+- read : NULL, \
+- device : device_proc, \
+- setup : setup_proc, \
+- flags : f, \
+- index : -1, \
+- cflag : 0, \
+- next : NULL \
+-}
+-
+ #define INIT_HARDSECT(arr, maj, sizes)
+
+ #define SET_PRI(task) do ; while(0)
+diff -Naur a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h
+--- a/arch/um/include/irq_kern.h 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/include/irq_kern.h 2004-02-11 12:27:15.000000000 -0500
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#ifndef __IRQ_KERN_H__
++#define __IRQ_KERN_H__
++
++#include "linux/interrupt.h"
++
++extern int um_request_irq(unsigned int irq, int fd, int type,
++ irqreturn_t (*handler)(int, void *,
++ struct pt_regs *),
++ unsigned long irqflags, const char * devname,
++ void *dev_id);
++
++#endif
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
+--- a/arch/um/include/kern_util.h 2004-02-11 12:15:00.000000000 -0500
++++ b/arch/um/include/kern_util.h 2004-02-11 12:26:51.000000000 -0500
+@@ -63,10 +63,9 @@
+ extern void *syscall_sp(void *t);
+ extern void syscall_trace(void);
+ extern int hz(void);
+-extern void idle_timer(void);
++extern void uml_idle_timer(void);
+ extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs);
+ extern int external_pid(void *t);
+-extern int pid_to_processor_id(int pid);
+ extern void boot_timer_handler(int sig);
+ extern void interrupt_end(void);
+ extern void initial_thread_cb(void (*proc)(void *), void *arg);
+@@ -90,9 +89,7 @@
+ extern char *uml_strdup(char *string);
+ extern void unprotect_kernel_mem(void);
+ extern void protect_kernel_mem(void);
+-extern void set_kmem_end(unsigned long);
+ extern void uml_cleanup(void);
+-extern int pid_to_processor_id(int pid);
+ extern void set_current(void *t);
+ extern void lock_signalled_task(void *t);
+ extern void IPI_handler(int cpu);
+@@ -101,7 +98,9 @@
+ extern int clear_user_proc(void *buf, int size);
+ extern int copy_to_user_proc(void *to, void *from, int size);
+ extern int copy_from_user_proc(void *to, void *from, int size);
++extern int strlen_user_proc(char *str);
+ extern void bus_handler(int sig, union uml_pt_regs *regs);
++extern void winch(int sig, union uml_pt_regs *regs);
+ extern long execute_syscall(void *r);
+ extern int smp_sigio_handler(void);
+ extern void *get_current(void);
+@@ -112,6 +111,8 @@
+ extern void free_irq(unsigned int, void *);
+ extern int um_in_interrupt(void);
+ extern int cpu(void);
++extern unsigned long long time_stamp(void);
++
+ #endif
+
+ /*
+diff -Naur a/arch/um/include/line.h b/arch/um/include/line.h
+--- a/arch/um/include/line.h 2004-02-11 12:16:27.000000000 -0500
++++ b/arch/um/include/line.h 2004-02-11 12:28:24.000000000 -0500
+@@ -9,12 +9,14 @@
+ #include "linux/list.h"
+ #include "linux/workqueue.h"
+ #include "linux/tty.h"
++#include "linux/interrupt.h"
+ #include "asm/semaphore.h"
+ #include "chan_user.h"
+ #include "mconsole_kern.h"
+
+ struct line_driver {
+ char *name;
++ char *device_name;
+ char *devfs_name;
+ short major;
+ short minor_start;
+@@ -67,8 +69,6 @@
+
+ #define LINES_INIT(n) { num : n }
+
+-extern void line_interrupt(int irq, void *data, struct pt_regs *unused);
+-extern void line_write_interrupt(int irq, void *data, struct pt_regs *unused);
+ extern void line_close(struct line *lines, struct tty_struct *tty);
+ extern int line_open(struct line *lines, struct tty_struct *tty,
+ struct chan_opts *opts);
+diff -Naur a/arch/um/include/mconsole.h b/arch/um/include/mconsole.h
+--- a/arch/um/include/mconsole.h 2004-02-11 12:15:15.000000000 -0500
++++ b/arch/um/include/mconsole.h 2004-02-11 12:27:02.000000000 -0500
+@@ -41,11 +41,13 @@
+
+ struct mc_request;
+
++enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC };
++
+ struct mconsole_command
+ {
+ char *command;
+ void (*handler)(struct mc_request *req);
+- int as_interrupt;
++ enum mc_context context;
+ };
+
+ struct mc_request
+@@ -77,6 +79,8 @@
+ extern void mconsole_cad(struct mc_request *req);
+ extern void mconsole_stop(struct mc_request *req);
+ extern void mconsole_go(struct mc_request *req);
++extern void mconsole_log(struct mc_request *req);
++extern void mconsole_proc(struct mc_request *req);
+
+ extern int mconsole_get_request(int fd, struct mc_request *req);
+ extern int mconsole_notify(char *sock_name, int type, const void *data,
+diff -Naur a/arch/um/include/mem.h b/arch/um/include/mem.h
+--- a/arch/um/include/mem.h 2004-02-11 12:17:08.000000000 -0500
++++ b/arch/um/include/mem.h 2004-02-11 12:29:12.000000000 -0500
+@@ -1,19 +1,18 @@
+ /*
+- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2002, 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+ #ifndef __MEM_H__
+ #define __MEM_H__
+
+-struct vm_reserved {
+- struct list_head list;
+- unsigned long start;
+- unsigned long end;
+-};
++#include "linux/types.h"
+
+-extern void set_usable_vm(unsigned long start, unsigned long end);
+-extern void set_kmem_end(unsigned long new);
++extern int phys_mapping(unsigned long phys, __u64 *offset_out);
++extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w);
++extern int is_remapped(void *virt);
++extern int physmem_remove_mapping(void *virt);
++extern void physmem_forget_descriptor(int fd);
+
+ #endif
+
+diff -Naur a/arch/um/include/mem_kern.h b/arch/um/include/mem_kern.h
+--- a/arch/um/include/mem_kern.h 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/include/mem_kern.h 2004-02-11 12:27:40.000000000 -0500
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com)
++ * Licensed under the GPL
++ */
++
++#ifndef __MEM_KERN_H__
++#define __MEM_KERN_H__
++
++#include "linux/list.h"
++#include "linux/types.h"
++
++struct remapper {
++ struct list_head list;
++ int (*proc)(int, unsigned long, int, __u64);
++};
++
++extern void register_remapper(struct remapper *info);
++
++#endif
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/include/mem_user.h b/arch/um/include/mem_user.h
+--- a/arch/um/include/mem_user.h 2004-02-11 12:16:03.000000000 -0500
++++ b/arch/um/include/mem_user.h 2004-02-11 12:27:57.000000000 -0500
+@@ -32,43 +32,38 @@
+ #ifndef _MEM_USER_H
+ #define _MEM_USER_H
+
+-struct mem_region {
++struct iomem_region {
++ struct iomem_region *next;
+ char *driver;
+- unsigned long start_pfn;
+- unsigned long start;
+- unsigned long len;
+- void *mem_map;
+ int fd;
++ int size;
++ unsigned long phys;
++ unsigned long virt;
+ };
+
+-extern struct mem_region *regions[];
+-extern struct mem_region physmem_region;
++extern struct iomem_region *iomem_regions;
++extern int iomem_size;
+
+ #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1))
+
+ extern unsigned long host_task_size;
+ extern unsigned long task_size;
+
++extern void check_devanon(void);
+ extern int init_mem_user(void);
+ extern int create_mem_file(unsigned long len);
+-extern void setup_range(int fd, char *driver, unsigned long start,
+- unsigned long pfn, unsigned long total, int need_vm,
+- struct mem_region *region, void *reserved);
+ extern void setup_memory(void *entry);
+ extern unsigned long find_iomem(char *driver, unsigned long *len_out);
+-extern int init_maps(struct mem_region *region);
+-extern int nregions(void);
+-extern int reserve_vm(unsigned long start, unsigned long end, void *e);
++extern int init_maps(unsigned long physmem, unsigned long iomem,
++ unsigned long highmem);
+ extern unsigned long get_vm(unsigned long len);
+ extern void setup_physmem(unsigned long start, unsigned long usable,
+- unsigned long len);
+-extern int setup_region(struct mem_region *region, void *entry);
++ unsigned long len, unsigned long highmem);
+ extern void add_iomem(char *name, int fd, unsigned long size);
+-extern struct mem_region *phys_region(unsigned long phys);
+ extern unsigned long phys_offset(unsigned long phys);
+ extern void unmap_physmem(void);
+-extern int map_memory(unsigned long virt, unsigned long phys,
+- unsigned long len, int r, int w, int x);
++extern void map_memory(unsigned long virt, unsigned long phys,
++ unsigned long len, int r, int w, int x);
+ extern int protect_memory(unsigned long addr, unsigned long len,
+ int r, int w, int x, int must_succeed);
+ extern unsigned long get_kmem_end(void);
+diff -Naur a/arch/um/include/os.h b/arch/um/include/os.h
+--- a/arch/um/include/os.h 2004-02-11 12:14:31.000000000 -0500
++++ b/arch/um/include/os.h 2004-02-11 12:26:12.000000000 -0500
+@@ -17,6 +17,32 @@
+ #define OS_TYPE_FIFO 6
+ #define OS_TYPE_SOCK 7
+
++/* os_access() flags */
++#define OS_ACC_F_OK 0 /* Test for existence. */
++#define OS_ACC_X_OK 1 /* Test for execute permission. */
++#define OS_ACC_W_OK 2 /* Test for write permission. */
++#define OS_ACC_R_OK 4 /* Test for read permission. */
++#define OS_ACC_RW_OK (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */
++
++/*
++ * types taken from stat_file() in hostfs_user.c
++ * (if they are wrong here, they are wrong there...).
++ */
++struct uml_stat {
++ int ust_dev; /* device */
++ unsigned long long ust_ino; /* inode */
++ int ust_mode; /* protection */
++ int ust_nlink; /* number of hard links */
++ int ust_uid; /* user ID of owner */
++ int ust_gid; /* group ID of owner */
++ unsigned long long ust_size; /* total size, in bytes */
++ int ust_blksize; /* blocksize for filesystem I/O */
++ unsigned long long ust_blocks; /* number of blocks allocated */
++ unsigned long ust_atime; /* time of last access */
++ unsigned long ust_mtime; /* time of last modification */
++ unsigned long ust_ctime; /* time of last change */
++};
++
+ struct openflags {
+ unsigned int r : 1;
+ unsigned int w : 1;
+@@ -84,29 +110,47 @@
+ flags.e = 1;
+ return(flags);
+ }
+-
++
+ static inline struct openflags of_cloexec(struct openflags flags)
+ {
+ flags.cl = 1;
+ return(flags);
+ }
+
++extern int os_stat_file(const char *file_name, struct uml_stat *buf);
++extern int os_stat_fd(const int fd, struct uml_stat *buf);
++extern int os_access(const char *file, int mode);
++extern void os_print_error(int error, const char* str);
++extern int os_get_exec_close(int fd, int *close_on_exec);
++extern int os_set_exec_close(int fd, int close_on_exec);
++extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg);
++extern int os_window_size(int fd, int *rows, int *cols);
++extern int os_new_tty_pgrp(int fd, int pid);
++extern int os_get_ifname(int fd, char *namebuf);
++extern int os_set_slip(int fd);
++extern int os_set_owner(int fd, int pid);
++extern int os_sigio_async(int master, int slave);
++extern int os_mode_fd(int fd, int mode);
++
+ extern int os_seek_file(int fd, __u64 offset);
+ extern int os_open_file(char *file, struct openflags flags, int mode);
+ extern int os_read_file(int fd, void *buf, int len);
+-extern int os_write_file(int fd, void *buf, int count);
++extern int os_write_file(int fd, const void *buf, int count);
+ extern int os_file_size(char *file, long long *size_out);
++extern int os_file_modtime(char *file, unsigned long *modtime);
+ extern int os_pipe(int *fd, int stream, int close_on_exec);
+ extern int os_set_fd_async(int fd, int owner);
+ extern int os_set_fd_block(int fd, int blocking);
+ extern int os_accept_connection(int fd);
++extern int os_create_unix_socket(char *file, int len, int close_on_exec);
+ extern int os_shutdown_socket(int fd, int r, int w);
+ extern void os_close_file(int fd);
+ extern int os_rcv_fd(int fd, int *helper_pid_out);
+-extern int create_unix_socket(char *file, int len);
++extern int create_unix_socket(char *file, int len, int close_on_exec);
+ extern int os_connect_socket(char *name);
+ extern int os_file_type(char *file);
+ extern int os_file_mode(char *file, struct openflags *mode_out);
++extern int os_lock_file(int fd, int excl);
+
+ extern unsigned long os_process_pc(int pid);
+ extern int os_process_parent(int pid);
+@@ -115,11 +159,12 @@
+ extern void os_usr1_process(int pid);
+ extern int os_getpid(void);
+
+-extern int os_map_memory(void *virt, int fd, unsigned long off,
++extern int os_map_memory(void *virt, int fd, unsigned long long off,
+ unsigned long len, int r, int w, int x);
+ extern int os_protect_memory(void *addr, unsigned long len,
+ int r, int w, int x);
+ extern int os_unmap_memory(void *addr, int len);
++extern void os_flush_stdout(void);
+
+ #endif
+
+diff -Naur a/arch/um/include/skas_ptrace.h b/arch/um/include/skas_ptrace.h
+--- a/arch/um/include/skas_ptrace.h 2004-02-11 12:16:37.000000000 -0500
++++ b/arch/um/include/skas_ptrace.h 2004-02-11 12:28:37.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
+ * Licensed under the GPL
+ */
+
+diff -Naur a/arch/um/include/sysdep-i386/frame_user.h b/arch/um/include/sysdep-i386/frame_user.h
+--- a/arch/um/include/sysdep-i386/frame_user.h 2004-02-11 12:16:44.000000000 -0500
++++ b/arch/um/include/sysdep-i386/frame_user.h 2004-02-11 12:29:00.000000000 -0500
+@@ -56,26 +56,26 @@
+ * it would have to be __builtin_frame_address(1).
+ */
+
+-static inline unsigned long frame_restorer(void)
+-{
+- unsigned long *fp;
+-
+- fp = __builtin_frame_address(0);
+- return((unsigned long) (fp + 1));
+-}
++#define frame_restorer() \
++({ \
++ unsigned long *fp; \
++\
++ fp = __builtin_frame_address(0); \
++ ((unsigned long) (fp + 1)); \
++})
+
+ /* Similarly, this returns the value of sp when the handler was first
+ * entered. This is used to calculate the proper sp when delivering
+ * signals.
+ */
+
+-static inline unsigned long frame_sp(void)
+-{
+- unsigned long *fp;
+-
+- fp = __builtin_frame_address(0);
+- return((unsigned long) (fp + 1));
+-}
++#define frame_sp() \
++({ \
++ unsigned long *fp; \
++\
++ fp = __builtin_frame_address(0); \
++ ((unsigned long) (fp + 1)); \
++})
+
+ #endif
+
+diff -Naur a/arch/um/include/sysdep-i386/sigcontext.h b/arch/um/include/sysdep-i386/sigcontext.h
+--- a/arch/um/include/sysdep-i386/sigcontext.h 2004-02-11 12:16:08.000000000 -0500
++++ b/arch/um/include/sysdep-i386/sigcontext.h 2004-02-11 12:28:19.000000000 -0500
+@@ -28,8 +28,8 @@
+ */
+ #define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0)
+
+-/* These are General Protection and Page Fault */
+-#define SEGV_IS_FIXABLE(trap) ((trap == 13) || (trap == 14))
++/* This is Page Fault */
++#define SEGV_IS_FIXABLE(trap) (trap == 14)
+
+ #define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc)))
+
+diff -Naur a/arch/um/include/ubd_user.h b/arch/um/include/ubd_user.h
+--- a/arch/um/include/ubd_user.h 2004-02-11 12:15:48.000000000 -0500
++++ b/arch/um/include/ubd_user.h 2004-02-11 12:27:41.000000000 -0500
+@@ -9,7 +9,7 @@
+
+ #include "os.h"
+
+-enum ubd_req { UBD_READ, UBD_WRITE };
++enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP };
+
+ struct io_thread_req {
+ enum ubd_req op;
+@@ -20,8 +20,10 @@
+ char *buffer;
+ int sectorsize;
+ unsigned long sector_mask;
+- unsigned long cow_offset;
++ unsigned long long cow_offset;
+ unsigned long bitmap_words[2];
++ int map_fd;
++ unsigned long long map_offset;
+ int error;
+ };
+
+@@ -31,7 +33,7 @@
+ int *create_cow_out);
+ extern int create_cow_file(char *cow_file, char *backing_file,
+ struct openflags flags, int sectorsize,
+- int *bitmap_offset_out,
++ int alignment, int *bitmap_offset_out,
+ unsigned long *bitmap_len_out,
+ int *data_offset_out);
+ extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
+@@ -39,7 +41,6 @@
+ extern int write_ubd_fs(int fd, char *buffer, int len);
+ extern int start_io_thread(unsigned long sp, int *fds_out);
+ extern void do_io(struct io_thread_req *req);
+-extern int ubd_is_dir(char *file);
+
+ static inline int ubd_test_bit(__u64 bit, unsigned char *data)
+ {
+diff -Naur a/arch/um/include/um_uaccess.h b/arch/um/include/um_uaccess.h
+--- a/arch/um/include/um_uaccess.h 2004-02-11 12:16:07.000000000 -0500
++++ b/arch/um/include/um_uaccess.h 2004-02-11 12:28:02.000000000 -0500
+@@ -38,22 +38,73 @@
+ from, n));
+ }
+
++/*
++ * strncpy_from_user: - Copy a NUL terminated string from userspace.
++ * @dst: Destination address, in kernel space. This buffer must be at
++ * least @count bytes long.
++ * @src: Source address, in user space.
++ * @count: Maximum number of bytes to copy, including the trailing NUL.
++ *
++ * Copies a NUL-terminated string from userspace to kernel space.
++ *
++ * On success, returns the length of the string (not including the trailing
++ * NUL).
++ *
++ * If access to userspace fails, returns -EFAULT (some data may have been
++ * copied).
++ *
++ * If @count is smaller than the length of the string, copies @count bytes
++ * and returns @count.
++ */
++
+ static inline int strncpy_from_user(char *dst, const char *src, int count)
+ {
+ return(CHOOSE_MODE_PROC(strncpy_from_user_tt, strncpy_from_user_skas,
+ dst, src, count));
+ }
+
++/*
++ * __clear_user: - Zero a block of memory in user space, with less checking.
++ * @to: Destination address, in user space.
++ * @n: Number of bytes to zero.
++ *
++ * Zero a block of memory in user space. Caller must check
++ * the specified block with access_ok() before calling this function.
++ *
++ * Returns number of bytes that could not be cleared.
++ * On success, this will be zero.
++ */
+ static inline int __clear_user(void *mem, int len)
+ {
+ return(CHOOSE_MODE_PROC(__clear_user_tt, __clear_user_skas, mem, len));
+ }
+
++/*
++ * clear_user: - Zero a block of memory in user space.
++ * @to: Destination address, in user space.
++ * @n: Number of bytes to zero.
++ *
++ * Zero a block of memory in user space.
++ *
++ * Returns number of bytes that could not be cleared.
++ * On success, this will be zero.
++ */
+ static inline int clear_user(void *mem, int len)
+ {
+ return(CHOOSE_MODE_PROC(clear_user_tt, clear_user_skas, mem, len));
+ }
+
++/*
++ * strlen_user: - Get the size of a string in user space.
++ * @str: The string to measure.
++ * @n: The maximum valid length
++ *
++ * Get the size of a NUL-terminated string in user space.
++ *
++ * Returns the size of the string INCLUDING the terminating NUL.
++ * On exception, returns 0.
++ * If the string is too long, returns a value greater than @n.
++ */
+ static inline int strnlen_user(const void *str, int len)
+ {
+ return(CHOOSE_MODE_PROC(strnlen_user_tt, strnlen_user_skas, str, len));
+diff -Naur a/arch/um/include/user.h b/arch/um/include/user.h
+--- a/arch/um/include/user.h 2004-02-11 12:14:17.000000000 -0500
++++ b/arch/um/include/user.h 2004-02-11 12:26:00.000000000 -0500
+@@ -14,6 +14,7 @@
+ extern void kfree(void *ptr);
+ extern int in_aton(char *str);
+ extern int open_gdb_chan(void);
++extern int strlcpy(char *, const char *, int);
+
+ #endif
+
+diff -Naur a/arch/um/include/user_util.h b/arch/um/include/user_util.h
+--- a/arch/um/include/user_util.h 2004-02-11 12:14:23.000000000 -0500
++++ b/arch/um/include/user_util.h 2004-02-11 12:26:06.000000000 -0500
+@@ -14,8 +14,6 @@
+ extern int unlockpt(int __fd);
+ extern char *ptsname(int __fd);
+
+-enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB };
+-
+ struct cpu_task {
+ int pid;
+ void *task;
+@@ -59,7 +57,6 @@
+ extern void *add_signal_handler(int sig, void (*handler)(int));
+ extern int start_fork_tramp(void *arg, unsigned long temp_stack,
+ int clone_flags, int (*tramp)(void *));
+-extern int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags);
+ extern int linux_main(int argc, char **argv);
+ extern void set_cmdline(char *cmd);
+ extern void input_cb(void (*proc)(void *), void *arg, int arg_len);
+@@ -86,11 +83,13 @@
+ extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr);
+ extern void write_sigio_workaround(void);
+ extern void arch_check_bugs(void);
++extern int cpu_feature(char *what, char *buf, int len);
+ extern int arch_handle_signal(int sig, union uml_pt_regs *regs);
+ extern int arch_fixup(unsigned long address, void *sc_ptr);
+ extern void forward_pending_sigio(int target);
+ extern int can_do_skas(void);
+-
++extern void arch_init_thread(void);
++
+ #endif
+
+ /*
+diff -Naur a/arch/um/Kconfig b/arch/um/Kconfig
+--- a/arch/um/Kconfig 2004-02-11 12:15:26.000000000 -0500
++++ b/arch/um/Kconfig 2004-02-11 12:27:15.000000000 -0500
+@@ -61,6 +61,20 @@
+
+ config NET
+ bool "Networking support"
++ help
++ Unless you really know what you are doing, you should say Y here.
++ The reason is that some programs need kernel networking support even
++ when running on a stand-alone machine that isn't connected to any
++ other computer. If you are upgrading from an older kernel, you
++ should consider updating your networking tools too because changes
++ in the kernel and the tools often go hand in hand. The tools are
++ contained in the package net-tools, the location and version number
++ of which are given in Documentation/Changes.
++
++ For a general introduction to Linux networking, it is highly
++ recommended to read the NET-HOWTO, available from
++ <http://www.tldp.org/docs.html#howto>.
++
+
+ source "fs/Kconfig.binfmt"
+
+@@ -85,6 +99,19 @@
+ If you'd like to be able to work with files stored on the host,
+ say Y or M here; otherwise say N.
+
++config HPPFS
++ tristate "HoneyPot ProcFS"
++ help
++ hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
++ entries to be overridden, removed, or fabricated from the host.
++ Its purpose is to allow a UML to appear to be a physical machine
++ by removing or changing anything in /proc which gives away the
++ identity of a UML.
++
++ See http://user-mode-linux.sf.net/hppfs.html for more information.
++
++ You only need this if you are setting up a UML honeypot. Otherwise,
++ it is safe to say 'N' here.
+
+ config MCONSOLE
+ bool "Management console"
+@@ -105,6 +132,16 @@
+ config MAGIC_SYSRQ
+ bool "Magic SysRq key"
+ depends on MCONSOLE
++ help
++ If you say Y here, you will have some control over the system even
++ if the system crashes for example during kernel debugging (e.g., you
++ will be able to flush the buffer cache to disk, reboot the system
++ immediately or dump some status information). This is accomplished
++ by pressing various keys while holding SysRq (Alt+PrintScreen). It
++ also works on a serial console (on PC hardware at least), if you
++ send a BREAK and then within 5 seconds a command keypress. The
++ keys are documented in Documentation/sysrq.txt. Don't say Y
++ unless you really know what this hack does.
+
+ config HOST_2G_2G
+ bool "2G/2G host address space split"
+@@ -160,6 +197,9 @@
+ config HIGHMEM
+ bool "Highmem support"
+
++config PROC_MM
++ bool "/proc/mm support"
++
+ config KERNEL_STACK_ORDER
+ int "Kernel stack size order"
+ default 2
+@@ -168,6 +208,17 @@
+ be 1 << order pages. The default is OK unless you're running Valgrind
+ on UML, in which case, set this to 3.
+
++config UML_REAL_TIME_CLOCK
++ bool "Real-time Clock"
++ default y
++ help
++ This option makes UML time deltas match wall clock deltas. This should
++ normally be enabled. The exception would be if you are debugging with
++ UML and spend long times with UML stopped at a breakpoint. In this
++ case, when UML is restarted, it will call the timer enough times to make
++ up for the time spent at the breakpoint. This could result in a
++ noticable lag. If this is a problem, then disable this option.
++
+ endmenu
+
+ source "init/Kconfig"
+@@ -240,6 +291,10 @@
+ config PT_PROXY
+ bool "Enable ptrace proxy"
+ depends on XTERM_CHAN && DEBUG_INFO
++ help
++ This option enables a debugging interface which allows gdb to debug
++ the kernel without needing to actually attach to kernel threads.
++ If you want to do kernel debugging, say Y here; otherwise say N.
+
+ config GPROF
+ bool "Enable gprof support"
+diff -Naur a/arch/um/Kconfig_block b/arch/um/Kconfig_block
+--- a/arch/um/Kconfig_block 2004-02-11 12:16:04.000000000 -0500
++++ b/arch/um/Kconfig_block 2004-02-11 12:28:00.000000000 -0500
+@@ -29,6 +29,20 @@
+ wise choice too. In all other cases (for example, if you're just
+ playing around with User-Mode Linux) you can choose N.
+
++# Turn this back on when the driver actually works
++#
++#config BLK_DEV_COW
++# tristate "COW block device"
++# help
++# This is a layered driver which sits above two other block devices.
++# One is read-only, and the other is a read-write layer which stores
++# all changes. This provides the illusion that the read-only layer
++# can be mounted read-write and changed.
++
++config BLK_DEV_COW_COMMON
++ bool
++ default BLK_DEV_COW || BLK_DEV_UBD
++
+ config BLK_DEV_LOOP
+ tristate "Loopback device support"
+
+diff -Naur a/arch/um/Kconfig_net b/arch/um/Kconfig_net
+--- a/arch/um/Kconfig_net 2004-02-11 12:15:54.000000000 -0500
++++ b/arch/um/Kconfig_net 2004-02-11 12:27:47.000000000 -0500
+@@ -1,5 +1,5 @@
+
+-menu "Network Devices"
++menu "UML Network Devices"
+ depends on NET
+
+ # UML virtual driver
+@@ -176,73 +176,5 @@
+
+ Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
+
+-
+-# Below are hardware-independent drivers mirrored from
+-# drivers/net/Config.in. It would be nice if Linux
+-# had HW independent drivers separated from the other
+-# but it does not. Until then each non-ISA/PCI arch
+-# needs to provide it's own menu of network drivers
+-config DUMMY
+- tristate "Dummy net driver support"
+-
+-config BONDING
+- tristate "Bonding driver support"
+-
+-config EQUALIZER
+- tristate "EQL (serial line load balancing) support"
+-
+-config TUN
+- tristate "Universal TUN/TAP device driver support"
+-
+-config ETHERTAP
+- tristate "Ethertap network tap (OBSOLETE)"
+- depends on EXPERIMENTAL && NETLINK
+-
+-config PPP
+- tristate "PPP (point-to-point protocol) support"
+-
+-config PPP_MULTILINK
+- bool "PPP multilink support (EXPERIMENTAL)"
+- depends on PPP && EXPERIMENTAL
+-
+-config PPP_FILTER
+- bool "PPP filtering"
+- depends on PPP && FILTER
+-
+-config PPP_ASYNC
+- tristate "PPP support for async serial ports"
+- depends on PPP
+-
+-config PPP_SYNC_TTY
+- tristate "PPP support for sync tty ports"
+- depends on PPP
+-
+-config PPP_DEFLATE
+- tristate "PPP Deflate compression"
+- depends on PPP
+-
+-config PPP_BSDCOMP
+- tristate "PPP BSD-Compress compression"
+- depends on PPP
+-
+-config PPPOE
+- tristate "PPP over Ethernet (EXPERIMENTAL)"
+- depends on PPP && EXPERIMENTAL
+-
+-config SLIP
+- tristate "SLIP (serial line) support"
+-
+-config SLIP_COMPRESSED
+- bool "CSLIP compressed headers"
+- depends on SLIP=y
+-
+-config SLIP_SMART
+- bool "Keepalive and linefill"
+- depends on SLIP=y
+-
+-config SLIP_MODE_SLIP6
+- bool "Six bit SLIP encapsulation"
+- depends on SLIP=y
+-
+ endmenu
+
+diff -Naur a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in
+--- a/arch/um/kernel/config.c.in 2004-02-11 12:16:10.000000000 -0500
++++ b/arch/um/kernel/config.c.in 2004-02-11 12:28:20.000000000 -0500
+@@ -7,9 +7,7 @@
+ #include <stdlib.h>
+ #include "init.h"
+
+-static __initdata char *config = "
+-CONFIG
+-";
++static __initdata char *config = "CONFIG";
+
+ static int __init print_config(char *line, int *add)
+ {
+diff -Naur a/arch/um/kernel/exec_kern.c b/arch/um/kernel/exec_kern.c
+--- a/arch/um/kernel/exec_kern.c 2004-02-11 12:14:34.000000000 -0500
++++ b/arch/um/kernel/exec_kern.c 2004-02-11 12:26:17.000000000 -0500
+@@ -32,10 +32,15 @@
+ CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp);
+ }
+
++extern void log_exec(char **argv, void *tty);
++
+ static int execve1(char *file, char **argv, char **env)
+ {
+ int error;
+
++#ifdef CONFIG_TTY_LOG
++ log_exec(argv, current->tty);
++#endif
+ error = do_execve(file, argv, env, ¤t->thread.regs);
+ if (error == 0){
+ current->ptrace &= ~PT_DTRACE;
+diff -Naur a/arch/um/kernel/frame.c b/arch/um/kernel/frame.c
+--- a/arch/um/kernel/frame.c 2004-02-11 12:14:57.000000000 -0500
++++ b/arch/um/kernel/frame.c 2004-02-11 12:26:47.000000000 -0500
+@@ -279,7 +279,7 @@
+ struct sc_frame_raw raw_sc;
+ struct si_frame_raw raw_si;
+ void *stack, *sigstack;
+- unsigned long top, sig_top, base;
++ unsigned long top, base;
+
+ stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+@@ -292,7 +292,6 @@
+ }
+
+ top = (unsigned long) stack + PAGE_SIZE - sizeof(void *);
+- sig_top = (unsigned long) sigstack + PAGE_SIZE;
+
+ /* Get the sigcontext, no sigrestorer layout */
+ raw_sc.restorer = 0;
+diff -Naur a/arch/um/kernel/frame_kern.c b/arch/um/kernel/frame_kern.c
+--- a/arch/um/kernel/frame_kern.c 2004-02-11 12:17:12.000000000 -0500
++++ b/arch/um/kernel/frame_kern.c 2004-02-11 12:29:17.000000000 -0500
+@@ -6,7 +6,6 @@
+ #include "asm/ptrace.h"
+ #include "asm/uaccess.h"
+ #include "asm/signal.h"
+-#include "asm/uaccess.h"
+ #include "asm/ucontext.h"
+ #include "frame_kern.h"
+ #include "sigcontext.h"
+@@ -29,12 +28,15 @@
+ sizeof(restorer)));
+ }
+
++extern int userspace_pid[];
++
+ static int copy_sc_to_user(void *to, void *fp, struct pt_regs *from,
+ struct arch_frame_data *arch)
+ {
+ return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs),
+ arch),
+- copy_sc_to_user_skas(to, fp, &from->regs,
++ copy_sc_to_user_skas(userspace_pid[0], to, fp,
++ &from->regs,
+ current->thread.cr2,
+ current->thread.err)));
+ }
+diff -Naur a/arch/um/kernel/helper.c b/arch/um/kernel/helper.c
+--- a/arch/um/kernel/helper.c 2004-02-11 12:15:26.000000000 -0500
++++ b/arch/um/kernel/helper.c 2004-02-11 12:27:13.000000000 -0500
+@@ -7,7 +7,6 @@
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <errno.h>
+-#include <fcntl.h>
+ #include <sched.h>
+ #include <sys/signal.h>
+ #include <sys/wait.h>
+@@ -33,6 +32,7 @@
+ {
+ struct helper_data *data = arg;
+ char **argv = data->argv;
++ int errval;
+
+ if(helper_pause){
+ signal(SIGHUP, helper_hup);
+@@ -41,8 +41,9 @@
+ if(data->pre_exec != NULL)
+ (*data->pre_exec)(data->pre_data);
+ execvp(argv[0], argv);
++ errval = errno;
+ printk("execvp of '%s' failed - errno = %d\n", argv[0], errno);
+- write(data->fd, &errno, sizeof(errno));
++ os_write_file(data->fd, &errval, sizeof(errval));
+ os_kill_process(os_getpid(), 0);
+ return(0);
+ }
+@@ -59,17 +60,20 @@
+ if((stack_out != NULL) && (*stack_out != 0))
+ stack = *stack_out;
+ else stack = alloc_stack(0, um_in_interrupt());
+- if(stack == 0) return(-ENOMEM);
++ if(stack == 0)
++ return(-ENOMEM);
+
+ err = os_pipe(fds, 1, 0);
+- if(err){
+- printk("run_helper : pipe failed, errno = %d\n", -err);
+- return(err);
++ if(err < 0){
++ printk("run_helper : pipe failed, err = %d\n", -err);
++ goto out_free;
+ }
+- if(fcntl(fds[1], F_SETFD, 1) != 0){
+- printk("run_helper : setting FD_CLOEXEC failed, errno = %d\n",
+- errno);
+- return(-errno);
++
++ err = os_set_exec_close(fds[1], 1);
++ if(err < 0){
++ printk("run_helper : setting FD_CLOEXEC failed, err = %d\n",
++ -err);
++ goto out_close;
+ }
+
+ sp = stack + page_size() - sizeof(void *);
+@@ -80,23 +84,34 @@
+ pid = clone(helper_child, (void *) sp, CLONE_VM | SIGCHLD, &data);
+ if(pid < 0){
+ printk("run_helper : clone failed, errno = %d\n", errno);
+- return(-errno);
++ err = -errno;
++ goto out_close;
+ }
+- close(fds[1]);
+- n = read(fds[0], &err, sizeof(err));
++
++ os_close_file(fds[1]);
++ n = os_read_file(fds[0], &err, sizeof(err));
+ if(n < 0){
+- printk("run_helper : read on pipe failed, errno = %d\n",
+- errno);
+- return(-errno);
++ printk("run_helper : read on pipe failed, err = %d\n", -n);
++ err = n;
++ goto out_kill;
+ }
+ else if(n != 0){
+ waitpid(pid, NULL, 0);
+- pid = -err;
++ pid = -errno;
+ }
+
+ if(stack_out == NULL) free_stack(stack, 0);
+ else *stack_out = stack;
+ return(pid);
++
++ out_kill:
++ os_kill_process(pid, 1);
++ out_close:
++ os_close_file(fds[0]);
++ os_close_file(fds[1]);
++ out_free:
++ free_stack(stack, 0);
++ return(err);
+ }
+
+ int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags,
+@@ -117,9 +132,11 @@
+ }
+ if(stack_out == NULL){
+ pid = waitpid(pid, &status, 0);
+- if(pid < 0)
++ if(pid < 0){
+ printk("run_helper_thread - wait failed, errno = %d\n",
+- pid);
++ errno);
++ pid = -errno;
++ }
+ if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0))
+ printk("run_helper_thread - thread returned status "
+ "0x%x\n", status);
+diff -Naur a/arch/um/kernel/initrd_user.c b/arch/um/kernel/initrd_user.c
+--- a/arch/um/kernel/initrd_user.c 2004-02-11 12:14:28.000000000 -0500
++++ b/arch/um/kernel/initrd_user.c 2004-02-11 12:26:11.000000000 -0500
+@@ -6,7 +6,6 @@
+ #include <unistd.h>
+ #include <sys/types.h>
+ #include <sys/stat.h>
+-#include <fcntl.h>
+ #include <errno.h>
+
+ #include "user_util.h"
+@@ -19,13 +18,15 @@
+ {
+ int fd, n;
+
+- if((fd = os_open_file(filename, of_read(OPENFLAGS()), 0)) < 0){
+- printk("Opening '%s' failed - errno = %d\n", filename, errno);
++ fd = os_open_file(filename, of_read(OPENFLAGS()), 0);
++ if(fd < 0){
++ printk("Opening '%s' failed - err = %d\n", filename, -fd);
+ return(-1);
+ }
+- if((n = read(fd, buf, size)) != size){
+- printk("Read of %d bytes from '%s' returned %d, errno = %d\n",
+- size, filename, n, errno);
++ n = os_read_file(fd, buf, size);
++ if(n != size){
++ printk("Read of %d bytes from '%s' failed, err = %d\n", size,
++ filename, -n);
+ return(-1);
+ }
+ return(0);
+diff -Naur a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
+--- a/arch/um/kernel/init_task.c 2004-02-11 12:17:10.000000000 -0500
++++ b/arch/um/kernel/init_task.c 2004-02-11 12:29:16.000000000 -0500
+@@ -8,7 +8,6 @@
+ #include "linux/module.h"
+ #include "linux/sched.h"
+ #include "linux/init_task.h"
+-#include "linux/version.h"
+ #include "asm/uaccess.h"
+ #include "asm/pgtable.h"
+ #include "user_util.h"
+@@ -18,7 +17,7 @@
+ struct mm_struct init_mm = INIT_MM(init_mm);
+ static struct files_struct init_files = INIT_FILES;
+ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
+-
++static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+ EXPORT_SYMBOL(init_mm);
+
+ /*
+@@ -43,26 +42,12 @@
+ __attribute__((__section__(".data.init_task"))) =
+ { INIT_THREAD_INFO(init_task) };
+
+-struct task_struct *alloc_task_struct(void)
+-{
+- return((struct task_struct *)
+- __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER));
+-}
+-
+ void unprotect_stack(unsigned long stack)
+ {
+ protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE,
+ 1, 1, 0, 1);
+ }
+
+-void free_task_struct(struct task_struct *task)
+-{
+- /* free_pages decrements the page counter and only actually frees
+- * the pages if they are now not accessed by anything.
+- */
+- free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER);
+-}
+-
+ /*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
+--- a/arch/um/kernel/irq.c 2004-02-11 12:16:32.000000000 -0500
++++ b/arch/um/kernel/irq.c 2004-02-11 12:28:31.000000000 -0500
+@@ -29,6 +29,7 @@
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+
+ static void register_irq_proc (unsigned int irq);
+
+@@ -83,65 +84,55 @@
+ end_none
+ };
+
+-/* Not changed */
+-volatile unsigned long irq_err_count;
+-
+ /*
+ * Generic, controller-independent functions:
+ */
+
+-int get_irq_list(char *buf)
++int show_interrupts(struct seq_file *p, void *v)
+ {
+- int i, j;
+- unsigned long flags;
++ int i = *(loff_t *) v, j;
+ struct irqaction * action;
+- char *p = buf;
++ unsigned long flags;
+
+- p += sprintf(p, " ");
+- for (j=0; j<num_online_cpus(); j++)
+- p += sprintf(p, "CPU%d ",j);
+- *p++ = '\n';
++ if (i == 0) {
++ seq_printf(p, " ");
++ for (j=0; j<NR_CPUS; j++)
++ if (cpu_online(j))
++ seq_printf(p, "CPU%d ",j);
++ seq_putc(p, '\n');
++ }
+
+- for (i = 0 ; i < NR_IRQS ; i++) {
++ if (i < NR_IRQS) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+- goto end;
+- p += sprintf(p, "%3d: ",i);
++ goto skip;
++ seq_printf(p, "%3d: ",i);
+ #ifndef CONFIG_SMP
+- p += sprintf(p, "%10u ", kstat_irqs(i));
++ seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+- for (j = 0; j < num_online_cpus(); j++)
+- p += sprintf(p, "%10u ",
+- kstat_cpu(cpu_logical_map(j)).irqs[i]);
++ for (j = 0; j < NR_CPUS; j++)
++ if (cpu_online(j))
++ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ #endif
+- p += sprintf(p, " %14s", irq_desc[i].handler->typename);
+- p += sprintf(p, " %s", action->name);
++ seq_printf(p, " %14s", irq_desc[i].handler->typename);
++ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+- p += sprintf(p, ", %s", action->name);
+- *p++ = '\n';
+- end:
++ seq_printf(p, ", %s", action->name);
++
++ seq_putc(p, '\n');
++skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
++ } else if (i == NR_IRQS) {
++ seq_printf(p, "NMI: ");
++ for (j = 0; j < NR_CPUS; j++)
++ if (cpu_online(j))
++ seq_printf(p, "%10u ", nmi_count(j));
++ seq_putc(p, '\n');
+ }
+- p += sprintf(p, "\n");
+-#ifdef notdef
+-#ifdef CONFIG_SMP
+- p += sprintf(p, "LOC: ");
+- for (j = 0; j < num_online_cpus(); j++)
+- p += sprintf(p, "%10u ",
+- apic_timer_irqs[cpu_logical_map(j)]);
+- p += sprintf(p, "\n");
+-#endif
+-#endif
+- p += sprintf(p, "ERR: %10lu\n", irq_err_count);
+- return p - buf;
+-}
+
+-
+-int show_interrupts(struct seq_file *p, void *v)
+-{
+- return(0);
++ return 0;
+ }
+
+ /*
+@@ -230,8 +221,11 @@
+
+ void disable_irq(unsigned int irq)
+ {
++ irq_desc_t *desc = irq_desc + irq;
++
+ disable_irq_nosync(irq);
+- synchronize_irq(irq);
++ if(desc->action)
++ synchronize_irq(irq);
+ }
+
+ /**
+@@ -252,7 +246,7 @@
+ spin_lock_irqsave(&desc->lock, flags);
+ switch (desc->depth) {
+ case 1: {
+- unsigned int status = desc->status & ~IRQ_DISABLED;
++ unsigned int status = desc->status & IRQ_DISABLED;
+ desc->status = status;
+ if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) {
+ desc->status = status | IRQ_REPLAY;
+@@ -282,13 +276,12 @@
+ * 0 return value means that this irq is already being
+ * handled by some other CPU. (or is disabled)
+ */
+- int cpu = smp_processor_id();
+ irq_desc_t *desc = irq_desc + irq;
+ struct irqaction * action;
+ unsigned int status;
+
+ irq_enter();
+- kstat_cpu(cpu).irqs[irq]++;
++ kstat_this_cpu.irqs[irq]++;
+ spin_lock(&desc->lock);
+ desc->handler->ack(irq);
+ /*
+@@ -385,7 +378,7 @@
+ */
+
+ int request_irq(unsigned int irq,
+- void (*handler)(int, void *, struct pt_regs *),
++ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char * devname,
+ void *dev_id)
+@@ -433,15 +426,19 @@
+ EXPORT_SYMBOL(request_irq);
+
+ int um_request_irq(unsigned int irq, int fd, int type,
+- void (*handler)(int, void *, struct pt_regs *),
++ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags, const char * devname,
+ void *dev_id)
+ {
+- int retval;
++ int err;
+
+- retval = request_irq(irq, handler, irqflags, devname, dev_id);
+- if(retval) return(retval);
+- return(activate_fd(irq, fd, type, dev_id));
++ err = request_irq(irq, handler, irqflags, devname, dev_id);
++ if(err)
++ return(err);
++
++ if(fd != -1)
++ err = activate_fd(irq, fd, type, dev_id);
++ return(err);
+ }
+
+ /* this was setup_x86_irq but it seems pretty generic */
+@@ -474,7 +471,8 @@
+ */
+ spin_lock_irqsave(&desc->lock,flags);
+ p = &desc->action;
+- if ((old = *p) != NULL) {
++ old = *p;
++ if (old != NULL) {
+ /* Can't share interrupts unless both agree to */
+ if (!(old->flags & new->flags & SA_SHIRQ)) {
+ spin_unlock_irqrestore(&desc->lock,flags);
+@@ -586,12 +584,14 @@
+ unsigned long count, void *data)
+ {
+ int irq = (long) data, full_count = count, err;
+- cpumask_t new_value, tmp;
++ cpumask_t new_value;
+
+ if (!irq_desc[irq].handler->set_affinity)
+ return -EIO;
+
+ err = cpumask_parse(buffer, count, new_value);
++ if(err)
++ return(err);
+
+ #ifdef CONFIG_SMP
+ /*
+@@ -599,9 +599,11 @@
+ * way to make the system unusable accidentally :-) At least
+ * one online CPU still has to be targeted.
+ */
+- cpus_and(tmp, new_value, cpu_online_map);
+- if (cpus_empty(tmp))
++ { cpumask_t tmp;
++ cpus_and(tmp, new_value, cpu_online_map);
++ if (cpus_empty(tmp))
+ return -EINVAL;
++ }
+ #endif
+
+ irq_affinity[irq] = new_value;
+@@ -614,6 +616,7 @@
+ int count, int *eof, void *data)
+ {
+ int len = cpumask_snprintf(page, count, *(cpumask_t *)data);
++
+ if (count - len < 2)
+ return -EINVAL;
+ len += sprintf(page + len, "\n");
+diff -Naur a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c
+--- a/arch/um/kernel/irq_user.c 2004-02-11 12:15:17.000000000 -0500
++++ b/arch/um/kernel/irq_user.c 2004-02-11 12:27:06.000000000 -0500
+@@ -6,7 +6,6 @@
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <errno.h>
+-#include <fcntl.h>
+ #include <signal.h>
+ #include <string.h>
+ #include <sys/poll.h>
+@@ -49,7 +48,8 @@
+
+ if(smp_sigio_handler()) return;
+ while(1){
+- if((n = poll(pollfds, pollfds_num, 0)) < 0){
++ n = poll(pollfds, pollfds_num, 0);
++ if(n < 0){
+ if(errno == EINTR) continue;
+ printk("sigio_handler : poll returned %d, "
+ "errno = %d\n", n, errno);
+@@ -366,34 +366,31 @@
+
+ void forward_ipi(int fd, int pid)
+ {
+- if(fcntl(fd, F_SETOWN, pid) < 0){
+- int save_errno = errno;
+- if(fcntl(fd, F_GETOWN, 0) != pid){
+- printk("forward_ipi: F_SETOWN failed, fd = %d, "
+- "me = %d, target = %d, errno = %d\n", fd,
+- os_getpid(), pid, save_errno);
+- }
+- }
++ int err;
++
++ err = os_set_owner(fd, pid);
++ if(err < 0)
++ printk("forward_ipi: set_owner failed, fd = %d, me = %d, "
++ "target = %d, err = %d\n", fd, os_getpid(), pid, -err);
+ }
+
+ void forward_interrupts(int pid)
+ {
+ struct irq_fd *irq;
+ unsigned long flags;
++ int err;
+
+ flags = irq_lock();
+ for(irq=active_fds;irq != NULL;irq = irq->next){
+- if(fcntl(irq->fd, F_SETOWN, pid) < 0){
+- int save_errno = errno;
+- if(fcntl(irq->fd, F_GETOWN, 0) != pid){
+- /* XXX Just remove the irq rather than
+- * print out an infinite stream of these
+- */
+- printk("Failed to forward %d to pid %d, "
+- "errno = %d\n", irq->fd, pid,
+- save_errno);
+- }
++ err = os_set_owner(irq->fd, pid);
++ if(err < 0){
++ /* XXX Just remove the irq rather than
++ * print out an infinite stream of these
++ */
++ printk("Failed to forward %d to pid %d, err = %d\n",
++ irq->fd, pid, -err);
+ }
++
+ irq->pid = pid;
+ }
+ irq_unlock(flags);
+diff -Naur a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c
+--- a/arch/um/kernel/ksyms.c 2004-02-11 12:14:17.000000000 -0500
++++ b/arch/um/kernel/ksyms.c 2004-02-11 12:26:00.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -34,34 +34,63 @@
+ EXPORT_SYMBOL(flush_tlb_range);
+ EXPORT_SYMBOL(host_task_size);
+ EXPORT_SYMBOL(arch_validate);
++EXPORT_SYMBOL(get_kmem_end);
+
+-EXPORT_SYMBOL(region_pa);
+-EXPORT_SYMBOL(region_va);
+-EXPORT_SYMBOL(phys_mem_map);
+-EXPORT_SYMBOL(page_mem_map);
+ EXPORT_SYMBOL(page_to_phys);
+ EXPORT_SYMBOL(phys_to_page);
+ EXPORT_SYMBOL(high_physmem);
+ EXPORT_SYMBOL(empty_zero_page);
+ EXPORT_SYMBOL(um_virt_to_phys);
++EXPORT_SYMBOL(__virt_to_page);
++EXPORT_SYMBOL(to_phys);
++EXPORT_SYMBOL(to_virt);
+ EXPORT_SYMBOL(mode_tt);
+ EXPORT_SYMBOL(handle_page_fault);
+
++#ifdef CONFIG_MODE_TT
++EXPORT_SYMBOL(copy_from_user_tt);
++EXPORT_SYMBOL(copy_to_user_tt);
++#endif
++
++#ifdef CONFIG_MODE_SKAS
++EXPORT_SYMBOL(copy_to_user_skas);
++EXPORT_SYMBOL(copy_from_user_skas);
++#endif
++
++EXPORT_SYMBOL(os_stat_fd);
++EXPORT_SYMBOL(os_stat_file);
++EXPORT_SYMBOL(os_access);
++EXPORT_SYMBOL(os_print_error);
++EXPORT_SYMBOL(os_get_exec_close);
++EXPORT_SYMBOL(os_set_exec_close);
+ EXPORT_SYMBOL(os_getpid);
+ EXPORT_SYMBOL(os_open_file);
+ EXPORT_SYMBOL(os_read_file);
+ EXPORT_SYMBOL(os_write_file);
+ EXPORT_SYMBOL(os_seek_file);
++EXPORT_SYMBOL(os_lock_file);
++EXPORT_SYMBOL(os_ioctl_generic);
+ EXPORT_SYMBOL(os_pipe);
+ EXPORT_SYMBOL(os_file_type);
++EXPORT_SYMBOL(os_file_mode);
++EXPORT_SYMBOL(os_file_size);
++EXPORT_SYMBOL(os_flush_stdout);
+ EXPORT_SYMBOL(os_close_file);
++EXPORT_SYMBOL(os_set_fd_async);
++EXPORT_SYMBOL(os_set_fd_block);
+ EXPORT_SYMBOL(helper_wait);
+ EXPORT_SYMBOL(os_shutdown_socket);
++EXPORT_SYMBOL(os_create_unix_socket);
+ EXPORT_SYMBOL(os_connect_socket);
++EXPORT_SYMBOL(os_accept_connection);
++EXPORT_SYMBOL(os_rcv_fd);
+ EXPORT_SYMBOL(run_helper);
+ EXPORT_SYMBOL(start_thread);
+ EXPORT_SYMBOL(dump_thread);
+
++EXPORT_SYMBOL(do_gettimeofday);
++EXPORT_SYMBOL(do_settimeofday);
++
+ /* This is here because UML expands open to sys_open, not to a system
+ * call instruction.
+ */
+@@ -90,3 +119,13 @@
+ EXPORT_SYMBOL(kmap_atomic_to_page);
+ #endif
+
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
+--- a/arch/um/kernel/Makefile 2004-02-11 12:16:04.000000000 -0500
++++ b/arch/um/kernel/Makefile 2004-02-11 12:28:00.000000000 -0500
+@@ -7,11 +7,11 @@
+
+ obj-y = checksum.o config.o exec_kern.o exitcode.o frame_kern.o frame.o \
+ helper.o init_task.o irq.o irq_user.o ksyms.o mem.o mem_user.o \
+- process.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \
+- sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o \
+- syscall_user.o sysrq.o sys_call_table.o tempfile.o time.o \
+- time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o \
+- umid.o user_syms.o user_util.o
++ physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \
++ sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \
++ syscall_kern.o syscall_user.o sysrq.o sys_call_table.o tempfile.o \
++ time.o time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o \
++ um_arch.o umid.o user_syms.o user_util.o
+
+ obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o
+ obj-$(CONFIG_GPROF) += gprof_syms.o
+@@ -36,31 +36,22 @@
+
+ CFLAGS_frame.o := $(patsubst -fomit-frame-pointer,,$(USER_CFLAGS))
+
+-$(USER_OBJS) : %.o: %.c
+- $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+-
+ # This has to be separate because it needs be compiled with frame pointers
+ # regardless of how the rest of the kernel is built.
+
+ $(obj)/frame.o: $(src)/frame.c
+ $(CC) $(CFLAGS_$(notdir $@)) -c -o $@ $<
+
+-QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; while(<STDIN>) { $$_ =~ s/CONFIG/$$config/; print $$_ }'
++$(USER_OBJS) : %.o: %.c
++ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+
+-$(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config
+- $(PERL) -e $(QUOTE) < $(src)/config.c.in > $@
++QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while(<STDIN>) { $$_ =~ s/CONFIG/$$config/; print $$_ }'
+
+ $(obj)/config.o : $(obj)/config.c
+
+-clean:
+- rm -f config.c
+- for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done
+-
+-modules:
+-
+-fastdep:
+-
+-dep:
+-
+-archmrproper: clean
++quiet_cmd_quote = QUOTE $@
++cmd_quote = $(PERL) -e $(QUOTE) < $< > $@
+
++targets += config.c
++$(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config FORCE
++ $(call if_changed,quote)
+diff -Naur a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
+--- a/arch/um/kernel/mem.c 2004-02-11 12:15:11.000000000 -0500
++++ b/arch/um/kernel/mem.c 2004-02-11 12:26:57.000000000 -0500
+@@ -1,74 +1,66 @@
+ /*
+- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+-#include "linux/config.h"
+-#include "linux/module.h"
+-#include "linux/types.h"
++#include "linux/stddef.h"
++#include "linux/kernel.h"
+ #include "linux/mm.h"
+-#include "linux/fs.h"
+-#include "linux/init.h"
+ #include "linux/bootmem.h"
+ #include "linux/swap.h"
+-#include "linux/slab.h"
+-#include "linux/vmalloc.h"
+ #include "linux/highmem.h"
++#include "linux/gfp.h"
+ #include "asm/page.h"
+-#include "asm/pgtable.h"
++#include "asm/fixmap.h"
+ #include "asm/pgalloc.h"
+-#include "asm/bitops.h"
+-#include "asm/uaccess.h"
+-#include "asm/tlb.h"
+ #include "user_util.h"
+ #include "kern_util.h"
+-#include "mem_user.h"
+-#include "mem.h"
+ #include "kern.h"
+-#include "init.h"
+-#include "os.h"
+-#include "mode_kern.h"
++#include "mem_user.h"
+ #include "uml_uaccess.h"
++#include "os.h"
++
++extern char __binary_start;
+
+ /* Changed during early boot */
+-pgd_t swapper_pg_dir[1024];
+-unsigned long high_physmem;
+-unsigned long vm_start;
+-unsigned long vm_end;
+-unsigned long highmem;
+ unsigned long *empty_zero_page = NULL;
+ unsigned long *empty_bad_page = NULL;
+-
+-/* Not modified */
+-const char bad_pmd_string[] = "Bad pmd in pte_alloc: %08lx\n";
+-
+-extern char __init_begin, __init_end;
+-extern long physmem_size;
+-
+-/* Not changed by UML */
+-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+-
+-/* Changed during early boot */
++pgd_t swapper_pg_dir[1024];
++unsigned long highmem;
+ int kmalloc_ok = 0;
+
+-#define NREGIONS (phys_region_index(0xffffffff) - phys_region_index(0x0) + 1)
+-struct mem_region *regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = NULL };
+-#define REGION_SIZE ((0xffffffff & ~REGION_MASK) + 1)
+-
+-/* Changed during early boot */
+ static unsigned long brk_end;
+
++void unmap_physmem(void)
++{
++ os_unmap_memory((void *) brk_end, uml_reserved - brk_end);
++}
++
+ static void map_cb(void *unused)
+ {
+ map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0);
+ }
+
+-void unmap_physmem(void)
++#ifdef CONFIG_HIGHMEM
++static void setup_highmem(unsigned long highmem_start,
++ unsigned long highmem_len)
+ {
+- os_unmap_memory((void *) brk_end, uml_reserved - brk_end);
+-}
++ struct page *page;
++ unsigned long highmem_pfn;
++ int i;
+
+-extern char __binary_start;
++ highmem_start_page = virt_to_page(highmem_start);
++
++ highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT;
++ for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){
++ page = &mem_map[highmem_pfn + i];
++ ClearPageReserved(page);
++ set_bit(PG_highmem, &page->flags);
++ atomic_set(&page->count, 1);
++ __free_page(page);
++ }
++}
++#endif
+
+ void mem_init(void)
+ {
+@@ -103,50 +95,15 @@
+ totalhigh_pages = highmem >> PAGE_SHIFT;
+ totalram_pages += totalhigh_pages;
+ num_physpages = totalram_pages;
+- max_mapnr = totalram_pages;
+ max_pfn = totalram_pages;
+ printk(KERN_INFO "Memory: %luk available\n",
+ (unsigned long) nr_free_pages() << (PAGE_SHIFT-10));
+ kmalloc_ok = 1;
+-}
+-
+-/* Changed during early boot */
+-static unsigned long kmem_top = 0;
+-
+-unsigned long get_kmem_end(void)
+-{
+- if(kmem_top == 0)
+- kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas);
+- return(kmem_top);
+-}
+-
+-void set_kmem_end(unsigned long new)
+-{
+- kmem_top = new;
+-}
+
+ #ifdef CONFIG_HIGHMEM
+-/* Changed during early boot */
+-pte_t *kmap_pte;
+-pgprot_t kmap_prot;
+-
+-EXPORT_SYMBOL(kmap_prot);
+-EXPORT_SYMBOL(kmap_pte);
+-
+-#define kmap_get_fixmap_pte(vaddr) \
+- pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+-
+-void __init kmap_init(void)
+-{
+- unsigned long kmap_vstart;
+-
+- /* cache the first kmap pte */
+- kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+- kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+-
+- kmap_prot = PAGE_KERNEL;
++ setup_highmem(end_iomem, highmem);
++#endif
+ }
+-#endif /* CONFIG_HIGHMEM */
+
+ static void __init fixrange_init(unsigned long start, unsigned long end,
+ pgd_t *pgd_base)
+@@ -178,76 +135,24 @@
+ }
+ }
+
+-int init_maps(struct mem_region *region)
+-{
+- struct page *p, *map;
+- int i, n, len;
+-
+- if(region == &physmem_region){
+- region->mem_map = mem_map;
+- return(0);
+- }
+- else if(region->mem_map != NULL) return(0);
+-
+- n = region->len >> PAGE_SHIFT;
+- len = n * sizeof(struct page);
+- if(kmalloc_ok){
+- map = kmalloc(len, GFP_KERNEL);
+- if(map == NULL) map = vmalloc(len);
+- }
+- else map = alloc_bootmem_low_pages(len);
+-
+- if(map == NULL)
+- return(-ENOMEM);
+- for(i = 0; i < n; i++){
+- p = &map[i];
+- set_page_count(p, 0);
+- SetPageReserved(p);
+- INIT_LIST_HEAD(&p->list);
+- }
+- region->mem_map = map;
+- return(0);
+-}
++#if CONFIG_HIGHMEM
++pte_t *kmap_pte;
++pgprot_t kmap_prot;
+
+-DECLARE_MUTEX(regions_sem);
++#define kmap_get_fixmap_pte(vaddr) \
++ pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+-static int setup_one_range(int fd, char *driver, unsigned long start,
+- unsigned long pfn, int len,
+- struct mem_region *region)
++void __init kmap_init(void)
+ {
+- int i;
+-
+- down(®ions_sem);
+- for(i = 0; i < NREGIONS; i++){
+- if(regions[i] == NULL) break;
+- }
+- if(i == NREGIONS){
+- printk("setup_range : no free regions\n");
+- i = -1;
+- goto out;
+- }
+-
+- if(fd == -1)
+- fd = create_mem_file(len);
++ unsigned long kmap_vstart;
+
+- if(region == NULL){
+- region = alloc_bootmem_low_pages(sizeof(*region));
+- if(region == NULL)
+- panic("Failed to allocating mem_region");
+- }
++ /* cache the first kmap pte */
++ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
++ kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+- *region = ((struct mem_region) { .driver = driver,
+- .start_pfn = pfn,
+- .start = start,
+- .len = len,
+- .fd = fd } );
+- regions[i] = region;
+- out:
+- up(®ions_sem);
+- return(i);
++ kmap_prot = PAGE_KERNEL;
+ }
+
+-#ifdef CONFIG_HIGHMEM
+ static void init_highmem(void)
+ {
+ pgd_t *pgd;
+@@ -268,63 +173,20 @@
+
+ kmap_init();
+ }
+-
+-void setup_highmem(unsigned long len)
+-{
+- struct mem_region *region;
+- struct page *page, *map;
+- unsigned long phys;
+- int i, cur, index;
+-
+- phys = physmem_size;
+- do {
+- cur = min(len, (unsigned long) REGION_SIZE);
+- i = setup_one_range(-1, NULL, -1, phys >> PAGE_SHIFT, cur,
+- NULL);
+- if(i == -1){
+- printk("setup_highmem - setup_one_range failed\n");
+- return;
+- }
+- region = regions[i];
+- index = phys / PAGE_SIZE;
+- region->mem_map = &mem_map[index];
+-
+- map = region->mem_map;
+- for(i = 0; i < (cur >> PAGE_SHIFT); i++){
+- page = &map[i];
+- ClearPageReserved(page);
+- set_bit(PG_highmem, &page->flags);
+- atomic_set(&page->count, 1);
+- __free_page(page);
+- }
+- phys += cur;
+- len -= cur;
+- } while(len > 0);
+-}
+-#endif
++#endif /* CONFIG_HIGHMEM */
+
+ void paging_init(void)
+ {
+- struct mem_region *region;
+- unsigned long zones_size[MAX_NR_ZONES], start, end, vaddr;
+- int i, index;
++ unsigned long zones_size[MAX_NR_ZONES], vaddr;
++ int i;
+
+ empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
+ empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE);
+ for(i=0;i<sizeof(zones_size)/sizeof(zones_size[0]);i++)
+ zones_size[i] = 0;
+- zones_size[0] = (high_physmem >> PAGE_SHIFT) -
+- (uml_physmem >> PAGE_SHIFT);
++ zones_size[0] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT);
+ zones_size[2] = highmem >> PAGE_SHIFT;
+ free_area_init(zones_size);
+- start = phys_region_index(__pa(uml_physmem));
+- end = phys_region_index(__pa(high_physmem - 1));
+- for(i = start; i <= end; i++){
+- region = regions[i];
+- index = (region->start - uml_physmem) / PAGE_SIZE;
+- region->mem_map = &mem_map[index];
+- if(i > start) free_bootmem(__pa(region->start), region->len);
+- }
+
+ /*
+ * Fixed mappings, only the page table structure has to be
+@@ -335,15 +197,33 @@
+
+ #ifdef CONFIG_HIGHMEM
+ init_highmem();
+- setup_highmem(highmem);
+ #endif
+ }
+
+-pte_t __bad_page(void)
++struct page *arch_validate(struct page *page, int mask, int order)
+ {
+- clear_page(empty_bad_page);
+- return pte_mkdirty(mk_pte((struct page *) empty_bad_page,
+- PAGE_SHARED));
++ unsigned long addr, zero = 0;
++ int i;
++
++ again:
++ if(page == NULL) return(page);
++ if(PageHighMem(page)) return(page);
++
++ addr = (unsigned long) page_address(page);
++ for(i = 0; i < (1 << order); i++){
++ current->thread.fault_addr = (void *) addr;
++ if(__do_copy_to_user((void *) addr, &zero,
++ sizeof(zero),
++ ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher)){
++ if(!(mask & __GFP_WAIT)) return(NULL);
++ else break;
++ }
++ addr += PAGE_SIZE;
++ }
++ if(i == (1 << order)) return(page);
++ page = alloc_pages(mask, order);
++ goto again;
+ }
+
+ /* This can't do anything because nothing in the kernel image can be freed
+@@ -401,395 +281,6 @@
+ printk("%d pages swap cached\n", cached);
+ }
+
+-static int __init uml_mem_setup(char *line, int *add)
+-{
+- char *retptr;
+- physmem_size = memparse(line,&retptr);
+- return 0;
+-}
+-__uml_setup("mem=", uml_mem_setup,
+-"mem=<Amount of desired ram>\n"
+-" This controls how much \"physical\" memory the kernel allocates\n"
+-" for the system. The size is specified as a number followed by\n"
+-" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n"
+-" This is not related to the amount of memory in the physical\n"
+-" machine. It can be more, and the excess, if it's ever used, will\n"
+-" just be swapped out.\n Example: mem=64M\n\n"
+-);
+-
+-struct page *arch_validate(struct page *page, int mask, int order)
+-{
+- unsigned long addr, zero = 0;
+- int i;
+-
+- again:
+- if(page == NULL) return(page);
+- if(PageHighMem(page)) return(page);
+-
+- addr = (unsigned long) page_address(page);
+- for(i = 0; i < (1 << order); i++){
+- current->thread.fault_addr = (void *) addr;
+- if(__do_copy_to_user((void *) addr, &zero,
+- sizeof(zero),
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher)){
+- if(!(mask & __GFP_WAIT)) return(NULL);
+- else break;
+- }
+- addr += PAGE_SIZE;
+- }
+- if(i == (1 << order)) return(page);
+- page = alloc_pages(mask, order);
+- goto again;
+-}
+-
+-DECLARE_MUTEX(vm_reserved_sem);
+-static struct list_head vm_reserved = LIST_HEAD_INIT(vm_reserved);
+-
+-/* Static structures, linked in to the list in early boot */
+-static struct vm_reserved head = {
+- .list = LIST_HEAD_INIT(head.list),
+- .start = 0,
+- .end = 0xffffffff
+-};
+-
+-static struct vm_reserved tail = {
+- .list = LIST_HEAD_INIT(tail.list),
+- .start = 0,
+- .end = 0xffffffff
+-};
+-
+-void set_usable_vm(unsigned long start, unsigned long end)
+-{
+- list_add(&head.list, &vm_reserved);
+- list_add(&tail.list, &head.list);
+- head.end = start;
+- tail.start = end;
+-}
+-
+-int reserve_vm(unsigned long start, unsigned long end, void *e)
+-
+-{
+- struct vm_reserved *entry = e, *reserved, *prev;
+- struct list_head *ele;
+- int err;
+-
+- down(&vm_reserved_sem);
+- list_for_each(ele, &vm_reserved){
+- reserved = list_entry(ele, struct vm_reserved, list);
+- if(reserved->start >= end) goto found;
+- }
+- panic("Reserved vm out of range");
+- found:
+- prev = list_entry(ele->prev, struct vm_reserved, list);
+- if(prev->end > start)
+- panic("Can't reserve vm");
+- if(entry == NULL)
+- entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+- if(entry == NULL){
+- printk("reserve_vm : Failed to allocate entry\n");
+- err = -ENOMEM;
+- goto out;
+- }
+- *entry = ((struct vm_reserved)
+- { .list = LIST_HEAD_INIT(entry->list),
+- .start = start,
+- .end = end });
+- list_add(&entry->list, &prev->list);
+- err = 0;
+- out:
+- up(&vm_reserved_sem);
+- return(0);
+-}
+-
+-unsigned long get_vm(unsigned long len)
+-{
+- struct vm_reserved *this, *next;
+- struct list_head *ele;
+- unsigned long start;
+- int err;
+-
+- down(&vm_reserved_sem);
+- list_for_each(ele, &vm_reserved){
+- this = list_entry(ele, struct vm_reserved, list);
+- next = list_entry(ele->next, struct vm_reserved, list);
+- if((this->start < next->start) &&
+- (this->end + len + PAGE_SIZE <= next->start))
+- goto found;
+- }
+- up(&vm_reserved_sem);
+- return(0);
+- found:
+- up(&vm_reserved_sem);
+- start = (unsigned long) UML_ROUND_UP(this->end) + PAGE_SIZE;
+- err = reserve_vm(start, start + len, NULL);
+- if(err) return(0);
+- return(start);
+-}
+-
+-int nregions(void)
+-{
+- return(NREGIONS);
+-}
+-
+-void setup_range(int fd, char *driver, unsigned long start, unsigned long pfn,
+- unsigned long len, int need_vm, struct mem_region *region,
+- void *reserved)
+-{
+- int i, cur;
+-
+- do {
+- cur = min(len, (unsigned long) REGION_SIZE);
+- i = setup_one_range(fd, driver, start, pfn, cur, region);
+- region = regions[i];
+- if(need_vm && setup_region(region, reserved)){
+- kfree(region);
+- regions[i] = NULL;
+- return;
+- }
+- start += cur;
+- if(pfn != -1) pfn += cur;
+- len -= cur;
+- } while(len > 0);
+-}
+-
+-struct iomem {
+- char *name;
+- int fd;
+- unsigned long size;
+-};
+-
+-/* iomem regions can only be added on the command line at the moment.
+- * Locking will be needed when they can be added via mconsole.
+- */
+-
+-struct iomem iomem_regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] =
+- { .name = NULL,
+- .fd = -1,
+- .size = 0 } };
+-
+-int num_iomem_regions = 0;
+-
+-void add_iomem(char *name, int fd, unsigned long size)
+-{
+- if(num_iomem_regions == sizeof(iomem_regions)/sizeof(iomem_regions[0]))
+- return;
+- size = (size + PAGE_SIZE - 1) & PAGE_MASK;
+- iomem_regions[num_iomem_regions++] =
+- ((struct iomem) { .name = name,
+- .fd = fd,
+- .size = size } );
+-}
+-
+-int setup_iomem(void)
+-{
+- struct iomem *iomem;
+- int i;
+-
+- for(i = 0; i < num_iomem_regions; i++){
+- iomem = &iomem_regions[i];
+- setup_range(iomem->fd, iomem->name, -1, -1, iomem->size, 1,
+- NULL, NULL);
+- }
+- return(0);
+-}
+-
+-__initcall(setup_iomem);
+-
+-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+-#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
+-
+-/* Changed during early boot */
+-static struct mem_region physmem_region;
+-static struct vm_reserved physmem_reserved;
+-
+-void setup_physmem(unsigned long start, unsigned long reserve_end,
+- unsigned long len)
+-{
+- struct mem_region *region = &physmem_region;
+- struct vm_reserved *reserved = &physmem_reserved;
+- unsigned long cur, pfn = 0;
+- int do_free = 1, bootmap_size;
+-
+- do {
+- cur = min(len, (unsigned long) REGION_SIZE);
+- if(region == NULL)
+- region = alloc_bootmem_low_pages(sizeof(*region));
+- if(reserved == NULL)
+- reserved = alloc_bootmem_low_pages(sizeof(*reserved));
+- if((region == NULL) || (reserved == NULL))
+- panic("Couldn't allocate physmem region or vm "
+- "reservation\n");
+- setup_range(-1, NULL, start, pfn, cur, 1, region, reserved);
+-
+- if(do_free){
+- unsigned long reserve = reserve_end - start;
+- int pfn = PFN_UP(__pa(reserve_end));
+- int delta = (len - reserve) >> PAGE_SHIFT;
+-
+- bootmap_size = init_bootmem(pfn, pfn + delta);
+- free_bootmem(__pa(reserve_end) + bootmap_size,
+- cur - bootmap_size - reserve);
+- do_free = 0;
+- }
+- start += cur;
+- pfn += cur >> PAGE_SHIFT;
+- len -= cur;
+- region = NULL;
+- reserved = NULL;
+- } while(len > 0);
+-}
+-
+-struct mem_region *phys_region(unsigned long phys)
+-{
+- unsigned int n = phys_region_index(phys);
+-
+- if(regions[n] == NULL)
+- panic("Physical address in uninitialized region");
+- return(regions[n]);
+-}
+-
+-unsigned long phys_offset(unsigned long phys)
+-{
+- return(phys_addr(phys));
+-}
+-
+-struct page *phys_mem_map(unsigned long phys)
+-{
+- return((struct page *) phys_region(phys)->mem_map);
+-}
+-
+-struct page *pte_mem_map(pte_t pte)
+-{
+- return(phys_mem_map(pte_val(pte)));
+-}
+-
+-struct mem_region *page_region(struct page *page, int *index_out)
+-{
+- int i;
+- struct mem_region *region;
+- struct page *map;
+-
+- for(i = 0; i < NREGIONS; i++){
+- region = regions[i];
+- if(region == NULL) continue;
+- map = region->mem_map;
+- if((page >= map) && (page < &map[region->len >> PAGE_SHIFT])){
+- if(index_out != NULL) *index_out = i;
+- return(region);
+- }
+- }
+- panic("No region found for page");
+- return(NULL);
+-}
+-
+-unsigned long page_to_pfn(struct page *page)
+-{
+- struct mem_region *region = page_region(page, NULL);
+-
+- return(region->start_pfn + (page - (struct page *) region->mem_map));
+-}
+-
+-struct mem_region *pfn_to_region(unsigned long pfn, int *index_out)
+-{
+- struct mem_region *region;
+- int i;
+-
+- for(i = 0; i < NREGIONS; i++){
+- region = regions[i];
+- if(region == NULL)
+- continue;
+-
+- if((region->start_pfn <= pfn) &&
+- (region->start_pfn + (region->len >> PAGE_SHIFT) > pfn)){
+- if(index_out != NULL)
+- *index_out = i;
+- return(region);
+- }
+- }
+- return(NULL);
+-}
+-
+-struct page *pfn_to_page(unsigned long pfn)
+-{
+- struct mem_region *region = pfn_to_region(pfn, NULL);
+- struct page *mem_map = (struct page *) region->mem_map;
+-
+- return(&mem_map[pfn - region->start_pfn]);
+-}
+-
+-unsigned long phys_to_pfn(unsigned long p)
+-{
+- struct mem_region *region = regions[phys_region_index(p)];
+-
+- return(region->start_pfn + (phys_addr(p) >> PAGE_SHIFT));
+-}
+-
+-unsigned long pfn_to_phys(unsigned long pfn)
+-{
+- int n;
+- struct mem_region *region = pfn_to_region(pfn, &n);
+-
+- return(mk_phys((pfn - region->start_pfn) << PAGE_SHIFT, n));
+-}
+-
+-struct page *page_mem_map(struct page *page)
+-{
+- return((struct page *) page_region(page, NULL)->mem_map);
+-}
+-
+-extern unsigned long region_pa(void *virt)
+-{
+- struct mem_region *region;
+- unsigned long addr = (unsigned long) virt;
+- int i;
+-
+- for(i = 0; i < NREGIONS; i++){
+- region = regions[i];
+- if(region == NULL) continue;
+- if((region->start <= addr) &&
+- (addr <= region->start + region->len))
+- return(mk_phys(addr - region->start, i));
+- }
+- panic("region_pa : no region for virtual address");
+- return(0);
+-}
+-
+-extern void *region_va(unsigned long phys)
+-{
+- return((void *) (phys_region(phys)->start + phys_addr(phys)));
+-}
+-
+-unsigned long page_to_phys(struct page *page)
+-{
+- int n;
+- struct mem_region *region = page_region(page, &n);
+- struct page *map = region->mem_map;
+- return(mk_phys((page - map) << PAGE_SHIFT, n));
+-}
+-
+-struct page *phys_to_page(unsigned long phys)
+-{
+- struct page *mem_map;
+-
+- mem_map = phys_mem_map(phys);
+- return(mem_map + (phys_offset(phys) >> PAGE_SHIFT));
+-}
+-
+-static int setup_mem_maps(void)
+-{
+- struct mem_region *region;
+- int i;
+-
+- for(i = 0; i < NREGIONS; i++){
+- region = regions[i];
+- if((region != NULL) && (region->fd > 0)) init_maps(region);
+- }
+- return(0);
+-}
+-
+-__initcall(setup_mem_maps);
+-
+ /*
+ * Allocate and free page tables.
+ */
+diff -Naur a/arch/um/kernel/mem_user.c b/arch/um/kernel/mem_user.c
+--- a/arch/um/kernel/mem_user.c 2004-02-11 12:15:47.000000000 -0500
++++ b/arch/um/kernel/mem_user.c 2004-02-11 12:27:38.000000000 -0500
+@@ -34,10 +34,9 @@
+ #include <stddef.h>
+ #include <stdarg.h>
+ #include <unistd.h>
+-#include <fcntl.h>
+ #include <errno.h>
+ #include <string.h>
+-#include <sys/stat.h>
++#include <fcntl.h>
+ #include <sys/types.h>
+ #include <sys/mman.h>
+ #include "kern_util.h"
+@@ -47,105 +46,145 @@
+ #include "init.h"
+ #include "os.h"
+ #include "tempfile.h"
++#include "kern_constants.h"
+
+ extern struct mem_region physmem_region;
+
+ #define TEMPNAME_TEMPLATE "vm_file-XXXXXX"
+
+-int create_mem_file(unsigned long len)
++static int create_tmp_file(unsigned long len)
+ {
+- int fd;
++ int fd, err;
+ char zero;
+
+ fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1);
+- if (fchmod(fd, 0777) < 0){
+- perror("fchmod");
++ if(fd < 0) {
++ os_print_error(fd, "make_tempfile");
++ exit(1);
++ }
++
++ err = os_mode_fd(fd, 0777);
++ if(err < 0){
++ os_print_error(err, "os_mode_fd");
+ exit(1);
+ }
+- if(os_seek_file(fd, len) < 0){
+- perror("lseek");
++ err = os_seek_file(fd, len);
++ if(err < 0){
++ os_print_error(err, "os_seek_file");
+ exit(1);
+ }
+ zero = 0;
+- if(write(fd, &zero, 1) != 1){
+- perror("write");
++ err = os_write_file(fd, &zero, 1);
++ if(err != 1){
++ os_print_error(err, "os_write_file");
+ exit(1);
+ }
+- if(fcntl(fd, F_SETFD, 1) != 0)
+- perror("Setting FD_CLOEXEC failed");
++
+ return(fd);
+ }
+
+-int setup_region(struct mem_region *region, void *entry)
++static int have_devanon = 0;
++
++void check_devanon(void)
++{
++ int fd;
++
++ printk("Checking for /dev/anon on the host...");
++ fd = open("/dev/anon", O_RDWR);
++ if(fd < 0){
++ printk("Not available (open failed with errno %d)\n", errno);
++ return;
++ }
++
++ printk("OK\n");
++ have_devanon = 1;
++}
++
++static int create_anon_file(unsigned long len)
+ {
+- void *loc, *start;
+- char *driver;
+- int err, offset;
+-
+- if(region->start != -1){
+- err = reserve_vm(region->start,
+- region->start + region->len, entry);
+- if(err){
+- printk("setup_region : failed to reserve "
+- "0x%x - 0x%x for driver '%s'\n",
+- region->start,
+- region->start + region->len,
+- region->driver);
+- return(-1);
+- }
+- }
+- else region->start = get_vm(region->len);
+- if(region->start == 0){
+- if(region->driver == NULL) driver = "physmem";
+- else driver = region->driver;
+- printk("setup_region : failed to find vm for "
+- "driver '%s' (length %d)\n", driver, region->len);
+- return(-1);
+- }
+- if(region->start == uml_physmem){
+- start = (void *) uml_reserved;
+- offset = uml_reserved - uml_physmem;
+- }
+- else {
+- start = (void *) region->start;
+- offset = 0;
+- }
+-
+- loc = mmap(start, region->len - offset, PROT_READ | PROT_WRITE,
+- MAP_SHARED | MAP_FIXED, region->fd, offset);
+- if(loc != start){
+- perror("Mapping memory");
++ void *addr;
++ int fd;
++
++ fd = open("/dev/anon", O_RDWR);
++ if(fd < 0) {
++ os_print_error(fd, "opening /dev/anon");
+ exit(1);
+ }
+- return(0);
++
++ addr = mmap(NULL, len, PROT_READ | PROT_WRITE , MAP_PRIVATE, fd, 0);
++ if(addr == MAP_FAILED){
++ os_print_error((int) addr, "mapping physmem file");
++ exit(1);
++ }
++ munmap(addr, len);
++
++ return(fd);
++}
++
++int create_mem_file(unsigned long len)
++{
++ int err, fd;
++
++ if(have_devanon)
++ fd = create_anon_file(len);
++ else fd = create_tmp_file(len);
++
++ err = os_set_exec_close(fd, 1);
++ if(err < 0)
++ os_print_error(err, "exec_close");
++ return(fd);
+ }
+
++struct iomem_region *iomem_regions = NULL;
++int iomem_size = 0;
++
+ static int __init parse_iomem(char *str, int *add)
+ {
+- struct stat buf;
++ struct iomem_region *new;
++ struct uml_stat buf;
+ char *file, *driver;
+- int fd;
++ int fd, err;
+
+ driver = str;
+ file = strchr(str,',');
+ if(file == NULL){
+- printk("parse_iomem : failed to parse iomem\n");
+- return(1);
++ printf("parse_iomem : failed to parse iomem\n");
++ goto out;
+ }
+ *file = '\0';
+ file++;
+ fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0);
+ if(fd < 0){
+- printk("parse_iomem - Couldn't open io file, errno = %d\n",
+- errno);
+- return(1);
+- }
+- if(fstat(fd, &buf) < 0) {
+- printk("parse_iomem - cannot fstat file, errno = %d\n", errno);
+- return(1);
++ os_print_error(fd, "parse_iomem - Couldn't open io file");
++ goto out;
+ }
+- add_iomem(driver, fd, buf.st_size);
++
++ err = os_stat_fd(fd, &buf);
++ if(err < 0){
++ os_print_error(err, "parse_iomem - cannot stat_fd file");
++ goto out_close;
++ }
++
++ new = malloc(sizeof(*new));
++ if(new == NULL){
++ perror("Couldn't allocate iomem_region struct");
++ goto out_close;
++ }
++
++ *new = ((struct iomem_region) { .next = iomem_regions,
++ .driver = driver,
++ .fd = fd,
++ .size = buf.ust_size,
++ .phys = 0,
++ .virt = 0 });
++ iomem_regions = new;
++ iomem_size += new->size + UM_KERN_PAGE_SIZE;
++
+ return(0);
++ out_close:
++ os_close_file(fd);
++ out:
++ return(1);
+ }
+
+ __uml_setup("iomem=", parse_iomem,
+@@ -153,73 +192,20 @@
+ " Configure <file> as an IO memory region named <name>.\n\n"
+ );
+
+-#ifdef notdef
+-int logging = 0;
+-int logging_fd = -1;
+-
+-int logging_line = 0;
+-char logging_buf[256];
+-
+-void log(char *fmt, ...)
+-{
+- va_list ap;
+- struct timeval tv;
+- struct openflags flags;
+-
+- if(logging == 0) return;
+- if(logging_fd < 0){
+- flags = of_create(of_trunc(of_rdrw(OPENFLAGS())));
+- logging_fd = os_open_file("log", flags, 0644);
+- }
+- gettimeofday(&tv, NULL);
+- sprintf(logging_buf, "%d\t %u.%u ", logging_line++, tv.tv_sec,
+- tv.tv_usec);
+- va_start(ap, fmt);
+- vsprintf(&logging_buf[strlen(logging_buf)], fmt, ap);
+- va_end(ap);
+- write(logging_fd, logging_buf, strlen(logging_buf));
+-}
+-#endif
+-
+-int map_memory(unsigned long virt, unsigned long phys, unsigned long len,
+- int r, int w, int x)
+-{
+- struct mem_region *region = phys_region(phys);
+-
+- return(os_map_memory((void *) virt, region->fd, phys_offset(phys), len,
+- r, w, x));
+-}
+-
+ int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x,
+ int must_succeed)
+ {
+- if(os_protect_memory((void *) addr, len, r, w, x) < 0){
++ int err;
++
++ err = os_protect_memory((void *) addr, len, r, w, x);
++ if(err < 0){
+ if(must_succeed)
+- panic("protect failed, errno = %d", errno);
+- else return(-errno);
++ panic("protect failed, err = %d", -err);
++ else return(err);
+ }
+ return(0);
+ }
+
+-unsigned long find_iomem(char *driver, unsigned long *len_out)
+-{
+- struct mem_region *region;
+- int i, n;
+-
+- n = nregions();
+- for(i = 0; i < n; i++){
+- region = regions[i];
+- if(region == NULL) continue;
+- if((region->driver != NULL) &&
+- !strcmp(region->driver, driver)){
+- *len_out = region->len;
+- return(region->start);
+- }
+- }
+- *len_out = 0;
+- return 0;
+-}
+-
+ /*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c
+--- a/arch/um/kernel/physmem.c 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/kernel/physmem.c 2004-02-11 12:26:07.000000000 -0500
+@@ -0,0 +1,468 @@
++/*
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
++ * Licensed under the GPL
++ */
++
++#include "linux/mm.h"
++#include "linux/ghash.h"
++#include "linux/slab.h"
++#include "linux/vmalloc.h"
++#include "linux/bootmem.h"
++#include "asm/types.h"
++#include "asm/pgtable.h"
++#include "kern_util.h"
++#include "user_util.h"
++#include "mode_kern.h"
++#include "mem.h"
++#include "mem_user.h"
++#include "os.h"
++#include "kern.h"
++#include "init.h"
++
++#if 0
++static pgd_t physmem_pgd[PTRS_PER_PGD];
++
++static struct phys_desc *lookup_mapping(void *addr)
++{
++ pgd = &physmem_pgd[pgd_index(addr)];
++ if(pgd_none(pgd))
++ return(NULL);
++
++ pmd = pmd_offset(pgd, addr);
++ if(pmd_none(pmd))
++ return(NULL);
++
++ pte = pte_offset_kernel(pmd, addr);
++ return((struct phys_desc *) pte_val(pte));
++}
++
++static struct add_mapping(void *addr, struct phys_desc *new)
++{
++}
++#endif
++
++#define PHYS_HASHSIZE (8192)
++
++struct phys_desc;
++
++DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc);
++
++struct phys_desc {
++ struct virtmem_ptrs virt_ptrs;
++ int fd;
++ __u64 offset;
++ void *virt;
++ unsigned long phys;
++ struct list_head list;
++};
++
++struct virtmem_table virtmem_hash;
++
++static int virt_cmp(void *virt1, void *virt2)
++{
++ return(virt1 != virt2);
++}
++
++static int virt_hash(void *virt)
++{
++ unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT;
++ return(addr % PHYS_HASHSIZE);
++}
++
++DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp,
++ virt_hash);
++
++LIST_HEAD(descriptor_mappings);
++
++struct desc_mapping {
++ int fd;
++ struct list_head list;
++ struct list_head pages;
++};
++
++static struct desc_mapping *find_mapping(int fd)
++{
++ struct desc_mapping *desc;
++ struct list_head *ele;
++
++ list_for_each(ele, &descriptor_mappings){
++ desc = list_entry(ele, struct desc_mapping, list);
++ if(desc->fd == fd)
++ return(desc);
++ }
++
++ return(NULL);
++}
++
++static struct desc_mapping *descriptor_mapping(int fd)
++{
++ struct desc_mapping *desc;
++
++ desc = find_mapping(fd);
++ if(desc != NULL)
++ return(desc);
++
++ desc = kmalloc(sizeof(*desc), GFP_ATOMIC);
++ if(desc == NULL)
++ return(NULL);
++
++ *desc = ((struct desc_mapping)
++ { .fd = fd,
++ .list = LIST_HEAD_INIT(desc->list),
++ .pages = LIST_HEAD_INIT(desc->pages) });
++ list_add(&desc->list, &descriptor_mappings);
++
++ return(desc);
++}
++
++int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w)
++{
++ struct desc_mapping *fd_maps;
++ struct phys_desc *desc;
++ unsigned long phys;
++ int err;
++
++ fd_maps = descriptor_mapping(fd);
++ if(fd_maps == NULL)
++ return(-ENOMEM);
++
++ phys = __pa(virt);
++ if(find_virtmem_hash(&virtmem_hash, virt) != NULL)
++ panic("Address 0x%p is already substituted\n", virt);
++
++ err = -ENOMEM;
++ desc = kmalloc(sizeof(*desc), GFP_ATOMIC);
++ if(desc == NULL)
++ goto out;
++
++ *desc = ((struct phys_desc)
++ { .virt_ptrs = { NULL, NULL },
++ .fd = fd,
++ .offset = offset,
++ .virt = virt,
++ .phys = __pa(virt),
++ .list = LIST_HEAD_INIT(desc->list) });
++ insert_virtmem_hash(&virtmem_hash, desc);
++
++ list_add(&desc->list, &fd_maps->pages);
++
++ virt = (void *) ((unsigned long) virt & PAGE_MASK);
++ err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0);
++ if(!err)
++ goto out;
++
++ remove_virtmem_hash(&virtmem_hash, desc);
++ kfree(desc);
++ out:
++ return(err);
++}
++
++static int physmem_fd = -1;
++
++static void remove_mapping(struct phys_desc *desc)
++{
++ void *virt = desc->virt;
++ int err;
++
++ remove_virtmem_hash(&virtmem_hash, desc);
++ list_del(&desc->list);
++ kfree(desc);
++
++ err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0);
++ if(err)
++ panic("Failed to unmap block device page from physical memory, "
++ "errno = %d", -err);
++}
++
++int physmem_remove_mapping(void *virt)
++{
++ struct phys_desc *desc;
++
++ virt = (void *) ((unsigned long) virt & PAGE_MASK);
++ desc = find_virtmem_hash(&virtmem_hash, virt);
++ if(desc == NULL)
++ return(0);
++
++ remove_mapping(desc);
++ return(1);
++}
++
++void physmem_forget_descriptor(int fd)
++{
++ struct desc_mapping *desc;
++ struct phys_desc *page;
++ struct list_head *ele, *next;
++ __u64 offset;
++ void *addr;
++ int err;
++
++ desc = find_mapping(fd);
++ if(desc == NULL)
++ return;
++
++ list_for_each_safe(ele, next, &desc->pages){
++ page = list_entry(ele, struct phys_desc, list);
++ offset = page->offset;
++ addr = page->virt;
++ remove_mapping(page);
++ err = os_seek_file(fd, offset);
++ if(err)
++ panic("physmem_forget_descriptor - failed to seek "
++ "to %lld in fd %d, error = %d\n",
++ offset, fd, -err);
++ err = os_read_file(fd, addr, PAGE_SIZE);
++ if(err < 0)
++ panic("physmem_forget_descriptor - failed to read "
++ "from fd %d to 0x%p, error = %d\n",
++ fd, addr, -err);
++ }
++
++ list_del(&desc->list);
++ kfree(desc);
++}
++
++void arch_free_page(struct page *page, int order)
++{
++ void *virt;
++ int i;
++
++ for(i = 0; i < (1 << order); i++){
++ virt = __va(page_to_phys(page + i));
++ physmem_remove_mapping(virt);
++ }
++}
++
++int is_remapped(void *virt)
++{
++ return(find_virtmem_hash(&virtmem_hash, virt) != NULL);
++}
++
++/* Changed during early boot */
++unsigned long high_physmem;
++
++extern unsigned long physmem_size;
++
++void *to_virt(unsigned long phys)
++{
++ return((void *) uml_physmem + phys);
++}
++
++unsigned long to_phys(void *virt)
++{
++ return(((unsigned long) virt) - uml_physmem);
++}
++
++int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem)
++{
++ struct page *p, *map;
++ unsigned long phys_len, phys_pages, highmem_len, highmem_pages;
++ unsigned long iomem_len, iomem_pages, total_len, total_pages;
++ int i;
++
++ phys_pages = physmem >> PAGE_SHIFT;
++ phys_len = phys_pages * sizeof(struct page);
++
++ iomem_pages = iomem >> PAGE_SHIFT;
++ iomem_len = iomem_pages * sizeof(struct page);
++
++ highmem_pages = highmem >> PAGE_SHIFT;
++ highmem_len = highmem_pages * sizeof(struct page);
++
++ total_pages = phys_pages + iomem_pages + highmem_pages;
++ total_len = phys_len + iomem_pages + highmem_len;
++
++ if(kmalloc_ok){
++ map = kmalloc(total_len, GFP_KERNEL);
++ if(map == NULL)
++ map = vmalloc(total_len);
++ }
++ else map = alloc_bootmem_low_pages(total_len);
++
++ if(map == NULL)
++ return(-ENOMEM);
++
++ for(i = 0; i < total_pages; i++){
++ p = &map[i];
++ set_page_count(p, 0);
++ SetPageReserved(p);
++ INIT_LIST_HEAD(&p->list);
++ }
++
++ mem_map = map;
++ max_mapnr = total_pages;
++ return(0);
++}
++
++struct page *phys_to_page(const unsigned long phys)
++{
++ return(&mem_map[phys >> PAGE_SHIFT]);
++}
++
++struct page *__virt_to_page(const unsigned long virt)
++{
++ return(&mem_map[__pa(virt) >> PAGE_SHIFT]);
++}
++
++unsigned long page_to_phys(struct page *page)
++{
++ return((page - mem_map) << PAGE_SHIFT);
++}
++
++pte_t mk_pte(struct page *page, pgprot_t pgprot)
++{
++ pte_t pte;
++
++ pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot);
++ if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte));
++ return(pte);
++}
++
++/* Changed during early boot */
++static unsigned long kmem_top = 0;
++
++unsigned long get_kmem_end(void)
++{
++ if(kmem_top == 0)
++ kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas);
++ return(kmem_top);
++}
++
++void map_memory(unsigned long virt, unsigned long phys, unsigned long len,
++ int r, int w, int x)
++{
++ __u64 offset;
++ int fd, err;
++
++ fd = phys_mapping(phys, &offset);
++ err = os_map_memory((void *) virt, fd, offset, len, r, w, x);
++ if(err)
++ panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, "
++ "err = %d\n", virt, fd, offset, len, r, w, x, err);
++}
++
++#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
++
++void setup_physmem(unsigned long start, unsigned long reserve_end,
++ unsigned long len, unsigned long highmem)
++{
++ unsigned long reserve = reserve_end - start;
++ int pfn = PFN_UP(__pa(reserve_end));
++ int delta = (len - reserve) >> PAGE_SHIFT;
++ int err, offset, bootmap_size;
++
++ physmem_fd = create_mem_file(len + highmem);
++
++ offset = uml_reserved - uml_physmem;
++ err = os_map_memory((void *) uml_reserved, physmem_fd, offset,
++ len - offset, 1, 1, 0);
++ if(err < 0){
++ os_print_error(err, "Mapping memory");
++ exit(1);
++ }
++
++ bootmap_size = init_bootmem(pfn, pfn + delta);
++ free_bootmem(__pa(reserve_end) + bootmap_size,
++ len - bootmap_size - reserve);
++}
++
++int phys_mapping(unsigned long phys, __u64 *offset_out)
++{
++ struct phys_desc *desc = find_virtmem_hash(&virtmem_hash,
++ __va(phys & PAGE_MASK));
++ int fd = -1;
++
++ if(desc != NULL){
++ fd = desc->fd;
++ *offset_out = desc->offset;
++ }
++ else if(phys < physmem_size){
++ fd = physmem_fd;
++ *offset_out = phys;
++ }
++ else if(phys < __pa(end_iomem)){
++ struct iomem_region *region = iomem_regions;
++
++ while(region != NULL){
++ if((phys >= region->phys) &&
++ (phys < region->phys + region->size)){
++ fd = region->fd;
++ *offset_out = phys - region->phys;
++ break;
++ }
++ region = region->next;
++ }
++ }
++ else if(phys < __pa(end_iomem) + highmem){
++ fd = physmem_fd;
++ *offset_out = phys - iomem_size;
++ }
++
++ return(fd);
++}
++
++static int __init uml_mem_setup(char *line, int *add)
++{
++ char *retptr;
++ physmem_size = memparse(line,&retptr);
++ return 0;
++}
++__uml_setup("mem=", uml_mem_setup,
++"mem=<Amount of desired ram>\n"
++" This controls how much \"physical\" memory the kernel allocates\n"
++" for the system. The size is specified as a number followed by\n"
++" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n"
++" This is not related to the amount of memory in the host. It can\n"
++" be more, and the excess, if it's ever used, will just be swapped out.\n"
++" Example: mem=64M\n\n"
++);
++
++unsigned long find_iomem(char *driver, unsigned long *len_out)
++{
++ struct iomem_region *region = iomem_regions;
++
++ while(region != NULL){
++ if(!strcmp(region->driver, driver)){
++ *len_out = region->size;
++ return(region->virt);
++ }
++ }
++
++ return(0);
++}
++
++int setup_iomem(void)
++{
++ struct iomem_region *region = iomem_regions;
++ unsigned long iomem_start = high_physmem + PAGE_SIZE;
++ int err;
++
++ while(region != NULL){
++ err = os_map_memory((void *) iomem_start, region->fd, 0,
++ region->size, 1, 1, 0);
++ if(err)
++ printk("Mapping iomem region for driver '%s' failed, "
++ "errno = %d\n", region->driver, -err);
++ else {
++ region->virt = iomem_start;
++ region->phys = __pa(region->virt);
++ }
++
++ iomem_start += region->size + PAGE_SIZE;
++ region = region->next;
++ }
++
++ return(0);
++}
++
++__initcall(setup_iomem);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/kernel/process.c b/arch/um/kernel/process.c
+--- a/arch/um/kernel/process.c 2004-02-11 12:16:36.000000000 -0500
++++ b/arch/um/kernel/process.c 2004-02-11 12:28:35.000000000 -0500
+@@ -9,12 +9,10 @@
+ #include <sched.h>
+ #include <errno.h>
+ #include <stdarg.h>
+-#include <fcntl.h>
+ #include <stdlib.h>
+ #include <setjmp.h>
+ #include <sys/time.h>
+ #include <sys/ptrace.h>
+-#include <sys/ioctl.h>
+ #include <sys/wait.h>
+ #include <sys/mman.h>
+ #include <asm/ptrace.h>
+@@ -58,7 +56,11 @@
+ {
+ int flags = altstack ? SA_ONSTACK : 0;
+
+- set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags,
++ /* NODEFER is set here because SEGV isn't turned back on when the
++ * handler is ready to receive signals. This causes any segfault
++ * during a copy_user to kill the process because the fault is blocked.
++ */
++ set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags | SA_NODEFER,
+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags,
+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+@@ -72,7 +74,6 @@
+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ set_handler(SIGUSR2, (__sighandler_t) sig_handler,
+ SA_NOMASK | flags, -1);
+- (void) CHOOSE_MODE(signal(SIGCHLD, SIG_IGN), (void *) 0);
+ signal(SIGHUP, SIG_IGN);
+
+ init_irq_signals(altstack);
+@@ -123,11 +124,12 @@
+ /* Start the process and wait for it to kill itself */
+ new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg);
+ if(new_pid < 0) return(-errno);
+- while((err = waitpid(new_pid, &status, 0) < 0) && (errno == EINTR)) ;
++ while(((err = waitpid(new_pid, &status, 0)) < 0) && (errno == EINTR)) ;
+ if(err < 0) panic("Waiting for outer trampoline failed - errno = %d",
+ errno);
+ if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL))
+- panic("outer trampoline didn't exit with SIGKILL");
++ panic("outer trampoline didn't exit with SIGKILL, "
++ "status = %d", status);
+
+ return(arg.pid);
+ }
+@@ -138,7 +140,7 @@
+
+ os_stop_process(os_getpid());
+
+- if(read(fd, &c, sizeof(c)) != sizeof(c))
++ if(os_read_file(fd, &c, sizeof(c)) != sizeof(c))
+ panic("read failed in suspend_new_thread");
+ }
+
+@@ -233,7 +235,7 @@
+ int n;
+
+ *jmp_ptr = &buf;
+- n = setjmp(buf);
++ n = sigsetjmp(buf, 1);
+ if(n != 0)
+ return(n);
+ (*fn)(arg);
+@@ -273,7 +275,7 @@
+ stop_ptraced_child(pid, stack, 1);
+
+ printf("Checking for /proc/mm...");
+- if(access("/proc/mm", W_OK)){
++ if(os_access("/proc/mm", OS_ACC_W_OK) < 0){
+ printf("not found\n");
+ ret = 0;
+ }
+diff -Naur a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
+--- a/arch/um/kernel/process_kern.c 2004-02-11 12:15:46.000000000 -0500
++++ b/arch/um/kernel/process_kern.c 2004-02-11 12:27:37.000000000 -0500
+@@ -16,6 +16,7 @@
+ #include "linux/module.h"
+ #include "linux/init.h"
+ #include "linux/capability.h"
++#include "linux/spinlock.h"
+ #include "asm/unistd.h"
+ #include "asm/mman.h"
+ #include "asm/segment.h"
+@@ -23,7 +24,6 @@
+ #include "asm/pgtable.h"
+ #include "asm/processor.h"
+ #include "asm/tlbflush.h"
+-#include "asm/spinlock.h"
+ #include "asm/uaccess.h"
+ #include "asm/user.h"
+ #include "user_util.h"
+@@ -52,17 +52,12 @@
+
+ struct task_struct *get_task(int pid, int require)
+ {
+- struct task_struct *task, *ret;
++ struct task_struct *ret;
+
+- ret = NULL;
+ read_lock(&tasklist_lock);
+- for_each_process(task){
+- if(task->pid == pid){
+- ret = task;
+- break;
+- }
+- }
++ ret = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock);
++
+ if(require && (ret == NULL)) panic("get_task couldn't find a task\n");
+ return(ret);
+ }
+@@ -95,7 +90,8 @@
+ int flags = GFP_KERNEL;
+
+ if(atomic) flags |= GFP_ATOMIC;
+- if((page = __get_free_pages(flags, order)) == 0)
++ page = __get_free_pages(flags, order);
++ if(page == 0)
+ return(0);
+ stack_protections(page);
+ return(page);
+@@ -103,13 +99,15 @@
+
+ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+ {
+- struct task_struct *p;
++ int pid;
+
+ current->thread.request.u.thread.proc = fn;
+ current->thread.request.u.thread.arg = arg;
+- p = do_fork(CLONE_VM | flags, 0, NULL, 0, NULL, NULL);
+- if(IS_ERR(p)) panic("do_fork failed in kernel_thread");
+- return(p->pid);
++ pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0, NULL,
++ NULL);
++ if(pid < 0)
++ panic("do_fork failed in kernel_thread, errno = %d", pid);
++ return(pid);
+ }
+
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+@@ -129,7 +127,7 @@
+ { external_pid(task), task });
+ }
+
+-void *switch_to(void *prev, void *next, void *last)
++void *_switch_to(void *prev, void *next, void *last)
+ {
+ return(CHOOSE_MODE(switch_to_tt(prev, next),
+ switch_to_skas(prev, next)));
+@@ -149,7 +147,7 @@
+ void exit_thread(void)
+ {
+ CHOOSE_MODE(exit_thread_tt(), exit_thread_skas());
+- unprotect_stack((unsigned long) current->thread_info);
++ unprotect_stack((unsigned long) current_thread);
+ }
+
+ void *get_current(void)
+@@ -157,6 +155,10 @@
+ return(current);
+ }
+
++void prepare_to_copy(struct task_struct *tsk)
++{
++}
++
+ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
+ unsigned long stack_top, struct task_struct * p,
+ struct pt_regs *regs)
+@@ -190,7 +192,7 @@
+
+ void default_idle(void)
+ {
+- idle_timer();
++ uml_idle_timer();
+
+ atomic_inc(&init_mm.mm_count);
+ current->mm = &init_mm;
+@@ -367,10 +369,15 @@
+ return(clear_user(buf, size));
+ }
+
++int strlen_user_proc(char *str)
++{
++ return(strlen_user(str));
++}
++
+ int smp_sigio_handler(void)
+ {
+ #ifdef CONFIG_SMP
+- int cpu = current->thread_info->cpu;
++ int cpu = current_thread->cpu;
+ IPI_handler(cpu);
+ if(cpu != 0)
+ return(1);
+@@ -385,7 +392,7 @@
+
+ int cpu(void)
+ {
+- return(current->thread_info->cpu);
++ return(current_thread->cpu);
+ }
+
+ /*
+diff -Naur a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
+--- a/arch/um/kernel/ptrace.c 2004-02-11 12:14:24.000000000 -0500
++++ b/arch/um/kernel/ptrace.c 2004-02-11 12:26:07.000000000 -0500
+@@ -311,11 +311,8 @@
+
+ /* the 0x80 provides a way for the tracing parent to distinguish
+ between a syscall stop and SIGTRAP delivery */
+- current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+- ? 0x80 : 0);
+- current->state = TASK_STOPPED;
+- notify_parent(current, SIGCHLD);
+- schedule();
++ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
++ ? 0x80 : 0));
+
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+diff -Naur a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c
+--- a/arch/um/kernel/reboot.c 2004-02-11 12:15:45.000000000 -0500
++++ b/arch/um/kernel/reboot.c 2004-02-11 12:27:36.000000000 -0500
+@@ -15,6 +15,7 @@
+ #ifdef CONFIG_SMP
+ static void kill_idlers(int me)
+ {
++#ifdef CONFIG_MODE_TT
+ struct task_struct *p;
+ int i;
+
+@@ -23,6 +24,7 @@
+ if((p != NULL) && (p->thread.mode.tt.extern_pid != me))
+ os_kill_process(p->thread.mode.tt.extern_pid, 0);
+ }
++#endif
+ }
+ #endif
+
+diff -Naur a/arch/um/kernel/sigio_kern.c b/arch/um/kernel/sigio_kern.c
+--- a/arch/um/kernel/sigio_kern.c 2004-02-11 12:14:33.000000000 -0500
++++ b/arch/um/kernel/sigio_kern.c 2004-02-11 12:26:17.000000000 -0500
+@@ -6,18 +6,21 @@
+ #include "linux/kernel.h"
+ #include "linux/list.h"
+ #include "linux/slab.h"
+-#include "asm/irq.h"
++#include "linux/signal.h"
++#include "linux/interrupt.h"
+ #include "init.h"
+ #include "sigio.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+
+ /* Protected by sigio_lock() called from write_sigio_workaround */
+ static int sigio_irq_fd = -1;
+
+-void sigio_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ read_sigio_fd(sigio_irq_fd);
+ reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
++ return(IRQ_HANDLED);
+ }
+
+ int write_sigio_irq(int fd)
+diff -Naur a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c
+--- a/arch/um/kernel/sigio_user.c 2004-02-11 12:16:48.000000000 -0500
++++ b/arch/um/kernel/sigio_user.c 2004-02-11 12:29:02.000000000 -0500
+@@ -7,7 +7,6 @@
+ #include <stdlib.h>
+ #include <termios.h>
+ #include <pty.h>
+-#include <fcntl.h>
+ #include <signal.h>
+ #include <errno.h>
+ #include <string.h>
+@@ -26,7 +25,7 @@
+ int pty_close_sigio = 0;
+
+ /* Used as a flag during SIGIO testing early in boot */
+-static int got_sigio = 0;
++static volatile int got_sigio = 0;
+
+ void __init handler(int sig)
+ {
+@@ -45,7 +44,7 @@
+
+ info->err = 0;
+ if(openpty(&info->master, &info->slave, NULL, NULL, NULL))
+- info->err = errno;
++ info->err = -errno;
+ }
+
+ void __init check_one_sigio(void (*proc)(int, int))
+@@ -53,11 +52,11 @@
+ struct sigaction old, new;
+ struct termios tt;
+ struct openpty_arg pty = { .master = -1, .slave = -1 };
+- int master, slave, flags;
++ int master, slave, err;
+
+ initial_thread_cb(openpty_cb, &pty);
+ if(pty.err){
+- printk("openpty failed, errno = %d\n", pty.err);
++ printk("openpty failed, errno = %d\n", -pty.err);
+ return;
+ }
+
+@@ -69,23 +68,16 @@
+ return;
+ }
+
++ /* XXX These can fail with EINTR */
+ if(tcgetattr(master, &tt) < 0)
+ panic("check_sigio : tcgetattr failed, errno = %d\n", errno);
+ cfmakeraw(&tt);
+ if(tcsetattr(master, TCSADRAIN, &tt) < 0)
+ panic("check_sigio : tcsetattr failed, errno = %d\n", errno);
+
+- if((flags = fcntl(master, F_GETFL)) < 0)
+- panic("tty_fds : fcntl F_GETFL failed, errno = %d\n", errno);
+-
+- if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) ||
+- (fcntl(master, F_SETOWN, os_getpid()) < 0))
+- panic("check_sigio : fcntl F_SETFL or F_SETOWN failed, "
+- "errno = %d\n", errno);
+-
+- if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0))
+- panic("check_sigio : fcntl F_SETFL failed, errno = %d\n",
+- errno);
++ err = os_sigio_async(master, slave);
++ if(err < 0)
++ panic("tty_fds : sigio_async failed, err = %d\n", -err);
+
+ if(sigaction(SIGIO, NULL, &old) < 0)
+ panic("check_sigio : sigaction 1 failed, errno = %d\n", errno);
+@@ -97,8 +89,8 @@
+ got_sigio = 0;
+ (*proc)(master, slave);
+
+- close(master);
+- close(slave);
++ os_close_file(master);
++ os_close_file(slave);
+
+ if(sigaction(SIGIO, &old, NULL) < 0)
+ panic("check_sigio : sigaction 3 failed, errno = %d\n", errno);
+@@ -112,25 +104,25 @@
+ printk("Checking that host ptys support output SIGIO...");
+
+ memset(buf, 0, sizeof(buf));
+- while(write(master, buf, sizeof(buf)) > 0) ;
++
++ while(os_write_file(master, buf, sizeof(buf)) > 0) ;
+ if(errno != EAGAIN)
+ panic("check_sigio : write failed, errno = %d\n", errno);
+-
+- while(((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio) ;
++ while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ;
+
+ if(got_sigio){
+ printk("Yes\n");
+ pty_output_sigio = 1;
+ }
+- else if(errno == EAGAIN) printk("No, enabling workaround\n");
+- else panic("check_sigio : read failed, errno = %d\n", errno);
++ else if(n == -EAGAIN) printk("No, enabling workaround\n");
++ else panic("check_sigio : read failed, err = %d\n", n);
+ }
+
+ static void tty_close(int master, int slave)
+ {
+ printk("Checking that host ptys support SIGIO on close...");
+
+- close(slave);
++ os_close_file(slave);
+ if(got_sigio){
+ printk("Yes\n");
+ pty_close_sigio = 1;
+@@ -140,7 +132,8 @@
+
+ void __init check_sigio(void)
+ {
+- if(access("/dev/ptmx", R_OK) && access("/dev/ptyp0", R_OK)){
++ if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) &&
++ (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){
+ printk("No pseudo-terminals available - skipping pty SIGIO "
+ "check\n");
+ return;
+@@ -201,11 +194,10 @@
+ p = &fds->poll[i];
+ if(p->revents == 0) continue;
+ if(p->fd == sigio_private[1]){
+- n = read(sigio_private[1], &c, sizeof(c));
++ n = os_read_file(sigio_private[1], &c, sizeof(c));
+ if(n != sizeof(c))
+ printk("write_sigio_thread : "
+- "read failed, errno = %d\n",
+- errno);
++ "read failed, err = %d\n", -n);
+ tmp = current_poll;
+ current_poll = next_poll;
+ next_poll = tmp;
+@@ -218,10 +210,10 @@
+ (fds->used - i) * sizeof(*fds->poll));
+ }
+
+- n = write(respond_fd, &c, sizeof(c));
++ n = os_write_file(respond_fd, &c, sizeof(c));
+ if(n != sizeof(c))
+ printk("write_sigio_thread : write failed, "
+- "errno = %d\n", errno);
++ "err = %d\n", -n);
+ }
+ }
+ }
+@@ -252,15 +244,15 @@
+ char c;
+
+ flags = set_signals(0);
+- n = write(sigio_private[0], &c, sizeof(c));
++ n = os_write_file(sigio_private[0], &c, sizeof(c));
+ if(n != sizeof(c)){
+- printk("update_thread : write failed, errno = %d\n", errno);
++ printk("update_thread : write failed, err = %d\n", -n);
+ goto fail;
+ }
+
+- n = read(sigio_private[0], &c, sizeof(c));
++ n = os_read_file(sigio_private[0], &c, sizeof(c));
+ if(n != sizeof(c)){
+- printk("update_thread : read failed, errno = %d\n", errno);
++ printk("update_thread : read failed, err = %d\n", -n);
+ goto fail;
+ }
+
+@@ -271,10 +263,10 @@
+ if(write_sigio_pid != -1)
+ os_kill_process(write_sigio_pid, 1);
+ write_sigio_pid = -1;
+- close(sigio_private[0]);
+- close(sigio_private[1]);
+- close(write_sigio_fds[0]);
+- close(write_sigio_fds[1]);
++ os_close_file(sigio_private[0]);
++ os_close_file(sigio_private[1]);
++ os_close_file(write_sigio_fds[0]);
++ os_close_file(write_sigio_fds[1]);
+ sigio_unlock();
+ set_signals(flags);
+ }
+@@ -369,15 +361,15 @@
+ goto out;
+
+ err = os_pipe(write_sigio_fds, 1, 1);
+- if(err){
++ if(err < 0){
+ printk("write_sigio_workaround - os_pipe 1 failed, "
+- "errno = %d\n", -err);
++ "err = %d\n", -err);
+ goto out;
+ }
+ err = os_pipe(sigio_private, 1, 1);
+- if(err){
++ if(err < 0){
+ printk("write_sigio_workaround - os_pipe 2 failed, "
+- "errno = %d\n", -err);
++ "err = %d\n", -err);
+ goto out_close1;
+ }
+ if(setup_initial_poll(sigio_private[1]))
+@@ -399,11 +391,11 @@
+ os_kill_process(write_sigio_pid, 1);
+ write_sigio_pid = -1;
+ out_close2:
+- close(sigio_private[0]);
+- close(sigio_private[1]);
++ os_close_file(sigio_private[0]);
++ os_close_file(sigio_private[1]);
+ out_close1:
+- close(write_sigio_fds[0]);
+- close(write_sigio_fds[1]);
++ os_close_file(write_sigio_fds[0]);
++ os_close_file(write_sigio_fds[1]);
+ sigio_unlock();
+ }
+
+@@ -412,10 +404,16 @@
+ int n;
+ char c;
+
+- n = read(fd, &c, sizeof(c));
++ n = os_read_file(fd, &c, sizeof(c));
+ if(n != sizeof(c)){
+- printk("read_sigio_fd - read failed, errno = %d\n", errno);
+- return(-errno);
++ if(n < 0) {
++ printk("read_sigio_fd - read failed, err = %d\n", -n);
++ return(n);
++ }
++ else {
++ printk("read_sigio_fd - short read, bytes = %d\n", n);
++ return(-EIO);
++ }
+ }
+ return(n);
+ }
+diff -Naur a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c
+--- a/arch/um/kernel/signal_kern.c 2004-02-11 12:15:52.000000000 -0500
++++ b/arch/um/kernel/signal_kern.c 2004-02-11 12:27:44.000000000 -0500
+@@ -36,7 +36,7 @@
+ if(sig == SIGSEGV){
+ struct k_sigaction *ka;
+
+- ka = ¤t->sig->action[SIGSEGV - 1];
++ ka = ¤t->sighand->action[SIGSEGV - 1];
+ ka->sa.sa_handler = SIG_DFL;
+ }
+ force_sig(SIGSEGV, current);
+@@ -60,10 +60,10 @@
+ int err, ret;
+
+ ret = 0;
++ /* Always make any pending restarted system calls return -EINTR */
++ current_thread_info()->restart_block.fn = do_no_restart_syscall;
+ switch(error){
+ case -ERESTART_RESTARTBLOCK:
+- current_thread_info()->restart_block.fn =
+- do_no_restart_syscall;
+ case -ERESTARTNOHAND:
+ ret = -EINTR;
+ break;
+@@ -142,7 +142,7 @@
+ return(0);
+
+ /* Whee! Actually deliver the signal. */
+- ka = ¤t->sig->action[sig -1 ];
++ ka = ¤t->sighand->action[sig -1 ];
+ err = handle_signal(regs, sig, ka, &info, oldset, error);
+ if(!err) return(1);
+
+@@ -201,7 +201,7 @@
+ }
+ }
+
+-int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize)
++int sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize)
+ {
+ sigset_t saveset, newset;
+
+@@ -227,20 +227,59 @@
+ }
+ }
+
++int sys_sigaction(int sig, const struct old_sigaction __user *act,
++ struct old_sigaction __user *oact)
++{
++ struct k_sigaction new_ka, old_ka;
++ int ret;
++
++ if (act) {
++ old_sigset_t mask;
++ if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
++ __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
++ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
++ return -EFAULT;
++ __get_user(new_ka.sa.sa_flags, &act->sa_flags);
++ __get_user(mask, &act->sa_mask);
++ siginitset(&new_ka.sa.sa_mask, mask);
++ }
++
++ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
++
++ if (!ret && oact) {
++ if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
++ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
++ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
++ return -EFAULT;
++ __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
++ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
++ }
++
++ return ret;
++}
++
++int sys_sigaltstack(const stack_t *uss, stack_t *uoss)
++{
++ return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs)));
++}
++
++extern int userspace_pid[];
++
+ static int copy_sc_from_user(struct pt_regs *to, void *from,
+ struct arch_frame_data *arch)
+ {
+ int ret;
+
+ ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from, arch),
+- copy_sc_from_user_skas(&to->regs, from));
++ copy_sc_from_user_skas(userspace_pid[0],
++ &to->regs, from));
+ return(ret);
+ }
+
+ int sys_sigreturn(struct pt_regs regs)
+ {
+- void *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs));
+- void *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs));
++ void __user *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs));
++ void __user *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs));
+ int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
+
+ spin_lock_irq(¤t->sighand->siglock);
+@@ -257,8 +296,8 @@
+
+ int sys_rt_sigreturn(struct pt_regs regs)
+ {
+- struct ucontext *uc = sp_to_uc(PT_REGS_SP(¤t->thread.regs));
+- void *fp;
++ unsigned long sp = PT_REGS_SP(¤t->thread.regs);
++ struct ucontext __user *uc = sp_to_uc(sp);
+ int sig_size = _NSIG_WORDS * sizeof(unsigned long);
+
+ spin_lock_irq(¤t->sighand->siglock);
+@@ -266,7 +305,6 @@
+ sigdelsetmask(¤t->blocked, ~_BLOCKABLE);
+ recalc_sigpending();
+ spin_unlock_irq(¤t->sighand->siglock);
+- fp = (void *) (((unsigned long) uc) + sizeof(struct ucontext));
+ copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext,
+ &signal_frame_si.common.arch);
+ return(PT_REGS_SYSCALL_RET(¤t->thread.regs));
+diff -Naur a/arch/um/kernel/skas/include/mode.h b/arch/um/kernel/skas/include/mode.h
+--- a/arch/um/kernel/skas/include/mode.h 2004-02-11 12:15:48.000000000 -0500
++++ b/arch/um/kernel/skas/include/mode.h 2004-02-11 12:27:41.000000000 -0500
+@@ -12,14 +12,16 @@
+ extern int have_fpx_regs;
+
+ extern void user_time_init_skas(void);
+-extern int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr);
+-extern int copy_sc_to_user_skas(void *to_ptr, void *fp,
++extern int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs,
++ void *from_ptr);
++extern int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp,
+ union uml_pt_regs *regs,
+ unsigned long fault_addr, int fault_type);
+ extern void sig_handler_common_skas(int sig, void *sc_ptr);
+ extern void halt_skas(void);
+ extern void reboot_skas(void);
+ extern void kill_off_processes_skas(void);
++extern int is_skas_winch(int pid, int fd, void *data);
+
+ #endif
+
+diff -Naur a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h
+--- a/arch/um/kernel/skas/include/skas.h 2004-02-11 12:14:37.000000000 -0500
++++ b/arch/um/kernel/skas/include/skas.h 2004-02-11 12:26:22.000000000 -0500
+@@ -8,7 +8,7 @@
+
+ #include "sysdep/ptrace.h"
+
+-extern int userspace_pid;
++extern int userspace_pid[];
+
+ extern void switch_threads(void *me, void *next);
+ extern void thread_wait(void *sw, void *fb);
+@@ -32,7 +32,7 @@
+ extern int new_mm(int from);
+ extern void save_registers(union uml_pt_regs *regs);
+ extern void restore_registers(union uml_pt_regs *regs);
+-extern void start_userspace(void);
++extern void start_userspace(int cpu);
+ extern void init_registers(int pid);
+
+ #endif
+diff -Naur a/arch/um/kernel/skas/include/uaccess.h b/arch/um/kernel/skas/include/uaccess.h
+--- a/arch/um/kernel/skas/include/uaccess.h 2004-02-11 12:15:17.000000000 -0500
++++ b/arch/um/kernel/skas/include/uaccess.h 2004-02-11 12:27:05.000000000 -0500
+@@ -6,20 +6,12 @@
+ #ifndef __SKAS_UACCESS_H
+ #define __SKAS_UACCESS_H
+
+-#include "linux/string.h"
+-#include "linux/sched.h"
+-#include "linux/err.h"
+-#include "asm/processor.h"
+-#include "asm/pgtable.h"
+ #include "asm/errno.h"
+-#include "asm/current.h"
+-#include "asm/a.out.h"
+-#include "kern_util.h"
+
+ #define access_ok_skas(type, addr, size) \
+ ((segment_eq(get_fs(), KERNEL_DS)) || \
+ (((unsigned long) (addr) < TASK_SIZE) && \
+- ((unsigned long) (addr) + (size) < TASK_SIZE)))
++ ((unsigned long) (addr) + (size) <= TASK_SIZE)))
+
+ static inline int verify_area_skas(int type, const void * addr,
+ unsigned long size)
+@@ -27,197 +19,12 @@
+ return(access_ok_skas(type, addr, size) ? 0 : -EFAULT);
+ }
+
+-static inline unsigned long maybe_map(unsigned long virt, int is_write)
+-{
+- pte_t pte;
+-
+- void *phys = um_virt_to_phys(current, virt, &pte);
+- int dummy_code;
+-
+- if(IS_ERR(phys) || (is_write && !pte_write(pte))){
+- if(handle_page_fault(virt, 0, is_write, 0, &dummy_code))
+- return(0);
+- phys = um_virt_to_phys(current, virt, NULL);
+- }
+- return((unsigned long) __va((unsigned long) phys));
+-}
+-
+-static inline int buffer_op(unsigned long addr, int len,
+- int (*op)(unsigned long addr, int len, void *arg),
+- void *arg)
+-{
+- int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len);
+- int remain = len, n;
+-
+- n = (*op)(addr, size, arg);
+- if(n != 0)
+- return(n < 0 ? remain : 0);
+-
+- addr += size;
+- remain -= size;
+- if(remain == 0)
+- return(0);
+-
+- while(addr < ((addr + remain) & PAGE_MASK)){
+- n = (*op)(addr, PAGE_SIZE, arg);
+- if(n != 0)
+- return(n < 0 ? remain : 0);
+-
+- addr += PAGE_SIZE;
+- remain -= PAGE_SIZE;
+- }
+- if(remain == 0)
+- return(0);
+-
+- n = (*op)(addr, remain, arg);
+- if(n != 0)
+- return(n < 0 ? remain : 0);
+- return(0);
+-}
+-
+-static inline int copy_chunk_from_user(unsigned long from, int len, void *arg)
+-{
+- unsigned long *to_ptr = arg, to = *to_ptr;
+-
+- from = maybe_map(from, 0);
+- if(from == 0)
+- return(-1);
+-
+- memcpy((void *) to, (void *) from, len);
+- *to_ptr += len;
+- return(0);
+-}
+-
+-static inline int copy_from_user_skas(void *to, const void *from, int n)
+-{
+- if(segment_eq(get_fs(), KERNEL_DS)){
+- memcpy(to, from, n);
+- return(0);
+- }
+-
+- return(access_ok_skas(VERIFY_READ, from, n) ?
+- buffer_op((unsigned long) from, n, copy_chunk_from_user, &to) :
+- n);
+-}
+-
+-static inline int copy_chunk_to_user(unsigned long to, int len, void *arg)
+-{
+- unsigned long *from_ptr = arg, from = *from_ptr;
+-
+- to = maybe_map(to, 1);
+- if(to == 0)
+- return(-1);
+-
+- memcpy((void *) to, (void *) from, len);
+- *from_ptr += len;
+- return(0);
+-}
+-
+-static inline int copy_to_user_skas(void *to, const void *from, int n)
+-{
+- if(segment_eq(get_fs(), KERNEL_DS)){
+- memcpy(to, from, n);
+- return(0);
+- }
+-
+- return(access_ok_skas(VERIFY_WRITE, to, n) ?
+- buffer_op((unsigned long) to, n, copy_chunk_to_user, &from) :
+- n);
+-}
+-
+-static inline int strncpy_chunk_from_user(unsigned long from, int len,
+- void *arg)
+-{
+- char **to_ptr = arg, *to = *to_ptr;
+- int n;
+-
+- from = maybe_map(from, 0);
+- if(from == 0)
+- return(-1);
+-
+- strncpy(to, (void *) from, len);
+- n = strnlen(to, len);
+- *to_ptr += n;
+-
+- if(n < len)
+- return(1);
+- return(0);
+-}
+-
+-static inline int strncpy_from_user_skas(char *dst, const char *src, int count)
+-{
+- int n;
+- char *ptr = dst;
+-
+- if(segment_eq(get_fs(), KERNEL_DS)){
+- strncpy(dst, src, count);
+- return(strnlen(dst, count));
+- }
+-
+- if(!access_ok_skas(VERIFY_READ, src, 1))
+- return(-EFAULT);
+-
+- n = buffer_op((unsigned long) src, count, strncpy_chunk_from_user,
+- &ptr);
+- if(n != 0)
+- return(-EFAULT);
+- return(strnlen(dst, count));
+-}
+-
+-static inline int clear_chunk(unsigned long addr, int len, void *unused)
+-{
+- addr = maybe_map(addr, 1);
+- if(addr == 0)
+- return(-1);
+-
+- memset((void *) addr, 0, len);
+- return(0);
+-}
+-
+-static inline int __clear_user_skas(void *mem, int len)
+-{
+- return(buffer_op((unsigned long) mem, len, clear_chunk, NULL));
+-}
+-
+-static inline int clear_user_skas(void *mem, int len)
+-{
+- if(segment_eq(get_fs(), KERNEL_DS)){
+- memset(mem, 0, len);
+- return(0);
+- }
+-
+- return(access_ok_skas(VERIFY_WRITE, mem, len) ?
+- buffer_op((unsigned long) mem, len, clear_chunk, NULL) : len);
+-}
+-
+-static inline int strnlen_chunk(unsigned long str, int len, void *arg)
+-{
+- int *len_ptr = arg, n;
+-
+- str = maybe_map(str, 0);
+- if(str == 0)
+- return(-1);
+-
+- n = strnlen((void *) str, len);
+- *len_ptr += n;
+-
+- if(n < len)
+- return(1);
+- return(0);
+-}
+-
+-static inline int strnlen_user_skas(const void *str, int len)
+-{
+- int count = 0, n;
+-
+- if(segment_eq(get_fs(), KERNEL_DS))
+- return(strnlen(str, len) + 1);
+-
+- n = buffer_op((unsigned long) str, len, strnlen_chunk, &count);
+- if(n == 0)
+- return(count + 1);
+- return(-EFAULT);
+-}
++extern int copy_from_user_skas(void *to, const void *from, int n);
++extern int copy_to_user_skas(void *to, const void *from, int n);
++extern int strncpy_from_user_skas(char *dst, const char *src, int count);
++extern int __clear_user_skas(void *mem, int len);
++extern int clear_user_skas(void *mem, int len);
++extern int strnlen_user_skas(const void *str, int len);
+
+ #endif
+
+diff -Naur a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
+--- a/arch/um/kernel/skas/Makefile 2004-02-11 12:14:38.000000000 -0500
++++ b/arch/um/kernel/skas/Makefile 2004-02-11 12:26:41.000000000 -0500
+@@ -5,20 +5,24 @@
+
+ obj-y = exec_kern.o exec_user.o mem.o mem_user.o mmu.o process.o \
+ process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o \
+- sys-$(SUBARCH)/
++ uaccess.o sys-$(SUBARCH)/
++
++host-progs := util/mk_ptregs
++clean-files := include/skas_ptregs.h
+
+ USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o
+ USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+
+-include/skas_ptregs.h : util/mk_ptregs
+- util/mk_ptregs > $@
+-
+-util/mk_ptregs :
+- $(MAKE) -C util
++$(TOPDIR)/arch/um/include/skas_ptregs.h : $(src)/util/mk_ptregs
++ @echo -n ' Generating $@'
++ @$< > $@.tmp
++ @if [ -r $@ ] && cmp -s $@ $@.tmp; then \
++ echo ' (unchanged)'; \
++ rm -f $@.tmp; \
++ else \
++ echo ' (updated)'; \
++ mv -f $@.tmp $@; \
++ fi
+
+ $(USER_OBJS) : %.o: %.c
+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+-
+-clean :
+- $(MAKE) -C util clean
+- $(RM) -f include/skas_ptregs.h
+diff -Naur a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c
+--- a/arch/um/kernel/skas/mem_user.c 2004-02-11 12:14:58.000000000 -0500
++++ b/arch/um/kernel/skas/mem_user.c 2004-02-11 12:26:50.000000000 -0500
+@@ -7,6 +7,7 @@
+ #include <sys/mman.h>
+ #include <sys/ptrace.h>
+ #include "mem_user.h"
++#include "mem.h"
+ #include "user.h"
+ #include "os.h"
+ #include "proc_mm.h"
+@@ -15,12 +16,12 @@
+ int r, int w, int x)
+ {
+ struct proc_mm_op map;
+- struct mem_region *region;
+- int prot, n;
++ __u64 offset;
++ int prot, n, phys_fd;
+
+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
+ (x ? PROT_EXEC : 0);
+- region = phys_region(phys);
++ phys_fd = phys_mapping(phys, &offset);
+
+ map = ((struct proc_mm_op) { .op = MM_MMAP,
+ .u =
+@@ -30,12 +31,12 @@
+ .prot = prot,
+ .flags = MAP_SHARED |
+ MAP_FIXED,
+- .fd = region->fd,
+- .offset = phys_offset(phys)
++ .fd = phys_fd,
++ .offset = offset
+ } } } );
+ n = os_write_file(fd, &map, sizeof(map));
+ if(n != sizeof(map))
+- printk("map : /proc/mm map failed, errno = %d\n", errno);
++ printk("map : /proc/mm map failed, err = %d\n", -n);
+ }
+
+ int unmap(int fd, void *addr, int len)
+@@ -49,8 +50,13 @@
+ { .addr = (unsigned long) addr,
+ .len = len } } } );
+ n = os_write_file(fd, &unmap, sizeof(unmap));
+- if((n != 0) && (n != sizeof(unmap)))
+- return(-errno);
++ if(n != sizeof(unmap)) {
++ if(n < 0)
++ return(n);
++ else if(n > 0)
++ return(-EIO);
++ }
++
+ return(0);
+ }
+
+@@ -71,11 +77,15 @@
+ .prot = prot } } } );
+
+ n = os_write_file(fd, &protect, sizeof(protect));
+- if((n != 0) && (n != sizeof(protect))){
++ if(n != sizeof(protect)) {
++ if(n == 0) return(0);
++
+ if(must_succeed)
+- panic("protect failed, errno = %d", errno);
+- return(-errno);
++ panic("protect failed, err = %d", -n);
++
++ return(-EIO);
+ }
++
+ return(0);
+ }
+
+diff -Naur a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c
+--- a/arch/um/kernel/skas/mmu.c 2004-02-11 12:17:12.000000000 -0500
++++ b/arch/um/kernel/skas/mmu.c 2004-02-11 12:29:17.000000000 -0500
+@@ -22,9 +22,11 @@
+ else from = -1;
+
+ mm->context.skas.mm_fd = new_mm(from);
+- if(mm->context.skas.mm_fd < 0)
+- panic("init_new_context_skas - new_mm failed, errno = %d\n",
+- mm->context.skas.mm_fd);
++ if(mm->context.skas.mm_fd < 0){
++ printk("init_new_context_skas - new_mm failed, errno = %d\n",
++ mm->context.skas.mm_fd);
++ return(mm->context.skas.mm_fd);
++ }
+
+ return(0);
+ }
+diff -Naur a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
+--- a/arch/um/kernel/skas/process.c 2004-02-11 12:16:47.000000000 -0500
++++ b/arch/um/kernel/skas/process.c 2004-02-11 12:29:02.000000000 -0500
+@@ -4,6 +4,7 @@
+ */
+
+ #include <stdlib.h>
++#include <unistd.h>
+ #include <errno.h>
+ #include <signal.h>
+ #include <setjmp.h>
+@@ -24,6 +25,18 @@
+ #include "os.h"
+ #include "proc_mm.h"
+ #include "skas_ptrace.h"
++#include "chan_user.h"
++
++int is_skas_winch(int pid, int fd, void *data)
++{
++ if(pid != getpid())
++ return(0);
++
++ register_winch_irq(-1, fd, -1, data);
++ return(1);
++}
++
++/* These are set once at boot time and not changed thereafter */
+
+ unsigned long exec_regs[FRAME_SIZE];
+ unsigned long exec_fp_regs[HOST_FP_SIZE];
+@@ -48,11 +61,11 @@
+ int err, syscall_nr, status;
+
+ syscall_nr = PT_SYSCALL_NR(regs->skas.regs);
++ UPT_SYSCALL_NR(regs) = syscall_nr;
+ if(syscall_nr < 1){
+ relay_signal(SIGTRAP, regs);
+ return;
+ }
+- UPT_SYSCALL_NR(regs) = syscall_nr;
+
+ err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_getpid);
+ if(err < 0)
+@@ -72,8 +85,6 @@
+ handle_syscall(regs);
+ }
+
+-int userspace_pid;
+-
+ static int userspace_tramp(void *arg)
+ {
+ init_new_thread_signals(0);
+@@ -83,7 +94,11 @@
+ return(0);
+ }
+
+-void start_userspace(void)
++/* Each element set once, and only accessed by a single processor anyway */
++#define NR_CPUS 1
++int userspace_pid[NR_CPUS];
++
++void start_userspace(int cpu)
+ {
+ void *stack;
+ unsigned long sp;
+@@ -114,21 +129,21 @@
+ if(munmap(stack, PAGE_SIZE) < 0)
+ panic("start_userspace : munmap failed, errno = %d\n", errno);
+
+- userspace_pid = pid;
++ userspace_pid[cpu] = pid;
+ }
+
+ void userspace(union uml_pt_regs *regs)
+ {
+- int err, status, op;
++ int err, status, op, pid = userspace_pid[0];
+
+ restore_registers(regs);
+
+- err = ptrace(PTRACE_SYSCALL, userspace_pid, 0, 0);
++ err = ptrace(PTRACE_SYSCALL, pid, 0, 0);
+ if(err)
+ panic("userspace - PTRACE_SYSCALL failed, errno = %d\n",
+ errno);
+ while(1){
+- err = waitpid(userspace_pid, &status, WUNTRACED);
++ err = waitpid(pid, &status, WUNTRACED);
+ if(err < 0)
+ panic("userspace - waitpid failed, errno = %d\n",
+ errno);
+@@ -139,16 +154,17 @@
+ if(WIFSTOPPED(status)){
+ switch(WSTOPSIG(status)){
+ case SIGSEGV:
+- handle_segv(userspace_pid);
++ handle_segv(pid);
+ break;
+ case SIGTRAP:
+- handle_trap(userspace_pid, regs);
++ handle_trap(pid, regs);
+ break;
+ case SIGIO:
+ case SIGVTALRM:
+ case SIGILL:
+ case SIGBUS:
+ case SIGFPE:
++ case SIGWINCH:
+ user_signal(WSTOPSIG(status), regs);
+ break;
+ default:
+@@ -162,7 +178,7 @@
+
+ op = singlestepping_skas() ? PTRACE_SINGLESTEP :
+ PTRACE_SYSCALL;
+- err = ptrace(op, userspace_pid, 0, 0);
++ err = ptrace(op, pid, 0, 0);
+ if(err)
+ panic("userspace - PTRACE_SYSCALL failed, "
+ "errno = %d\n", errno);
+@@ -177,7 +193,7 @@
+ *switch_buf_ptr = &switch_buf;
+ *fork_buf_ptr = &fork_buf;
+
+- if(setjmp(fork_buf) == 0)
++ if(sigsetjmp(fork_buf, 1) == 0)
+ new_thread_proc(stack, handler);
+
+ remove_sigstack();
+@@ -189,16 +205,16 @@
+
+ *switch_buf = &buf;
+ fork_buf = fb;
+- if(setjmp(buf) == 0)
+- longjmp(*fork_buf, 1);
++ if(sigsetjmp(buf, 1) == 0)
++ siglongjmp(*fork_buf, 1);
+ }
+
+-static int move_registers(int int_op, int fp_op, union uml_pt_regs *regs,
+- unsigned long *fp_regs)
++static int move_registers(int pid, int int_op, int fp_op,
++ union uml_pt_regs *regs, unsigned long *fp_regs)
+ {
+- if(ptrace(int_op, userspace_pid, 0, regs->skas.regs) < 0)
++ if(ptrace(int_op, pid, 0, regs->skas.regs) < 0)
+ return(-errno);
+- if(ptrace(fp_op, userspace_pid, 0, fp_regs) < 0)
++ if(ptrace(fp_op, pid, 0, fp_regs) < 0)
+ return(-errno);
+ return(0);
+ }
+@@ -217,10 +233,11 @@
+ fp_regs = regs->skas.fp;
+ }
+
+- err = move_registers(PTRACE_GETREGS, fp_op, regs, fp_regs);
++ err = move_registers(userspace_pid[0], PTRACE_GETREGS, fp_op, regs,
++ fp_regs);
+ if(err)
+ panic("save_registers - saving registers failed, errno = %d\n",
+- err);
++ -err);
+ }
+
+ void restore_registers(union uml_pt_regs *regs)
+@@ -237,10 +254,11 @@
+ fp_regs = regs->skas.fp;
+ }
+
+- err = move_registers(PTRACE_SETREGS, fp_op, regs, fp_regs);
++ err = move_registers(userspace_pid[0], PTRACE_SETREGS, fp_op, regs,
++ fp_regs);
+ if(err)
+ panic("restore_registers - saving registers failed, "
+- "errno = %d\n", err);
++ "errno = %d\n", -err);
+ }
+
+ void switch_threads(void *me, void *next)
+@@ -248,8 +266,8 @@
+ jmp_buf my_buf, **me_ptr = me, *next_buf = next;
+
+ *me_ptr = &my_buf;
+- if(setjmp(my_buf) == 0)
+- longjmp(*next_buf, 1);
++ if(sigsetjmp(my_buf, 1) == 0)
++ siglongjmp(*next_buf, 1);
+ }
+
+ static jmp_buf initial_jmpbuf;
+@@ -265,14 +283,14 @@
+ int n;
+
+ *fork_buf_ptr = &initial_jmpbuf;
+- n = setjmp(initial_jmpbuf);
++ n = sigsetjmp(initial_jmpbuf, 1);
+ if(n == 0)
+ new_thread_proc((void *) stack, new_thread_handler);
+ else if(n == 1)
+ remove_sigstack();
+ else if(n == 2){
+ (*cb_proc)(cb_arg);
+- longjmp(*cb_back, 1);
++ siglongjmp(*cb_back, 1);
+ }
+ else if(n == 3){
+ kmalloc_ok = 0;
+@@ -282,7 +300,7 @@
+ kmalloc_ok = 0;
+ return(1);
+ }
+- longjmp(**switch_buf, 1);
++ siglongjmp(**switch_buf, 1);
+ }
+
+ void remove_sigstack(void)
+@@ -304,8 +322,8 @@
+ cb_back = &here;
+
+ block_signals();
+- if(setjmp(here) == 0)
+- longjmp(initial_jmpbuf, 2);
++ if(sigsetjmp(here, 1) == 0)
++ siglongjmp(initial_jmpbuf, 2);
+ unblock_signals();
+
+ cb_proc = NULL;
+@@ -316,22 +334,23 @@
+ void halt_skas(void)
+ {
+ block_signals();
+- longjmp(initial_jmpbuf, 3);
++ siglongjmp(initial_jmpbuf, 3);
+ }
+
+ void reboot_skas(void)
+ {
+ block_signals();
+- longjmp(initial_jmpbuf, 4);
++ siglongjmp(initial_jmpbuf, 4);
+ }
+
+ int new_mm(int from)
+ {
+ struct proc_mm_op copy;
+- int n, fd = os_open_file("/proc/mm", of_write(OPENFLAGS()), 0);
++ int n, fd = os_open_file("/proc/mm",
++ of_cloexec(of_write(OPENFLAGS())), 0);
+
+ if(fd < 0)
+- return(-errno);
++ return(fd);
+
+ if(from != -1){
+ copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS,
+@@ -340,8 +359,9 @@
+ n = os_write_file(fd, ©, sizeof(copy));
+ if(n != sizeof(copy))
+ printk("new_mm : /proc/mm copy_segments failed, "
+- "errno = %d\n", errno);
++ "err = %d\n", -n);
+ }
++
+ return(fd);
+ }
+
+@@ -349,7 +369,8 @@
+ {
+ int err;
+
+- err = ptrace(PTRACE_SWITCH_MM, userspace_pid, 0, mm_fd);
++#warning need cpu pid in switch_mm_skas
++ err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd);
+ if(err)
+ panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n",
+ errno);
+@@ -357,7 +378,8 @@
+
+ void kill_off_processes_skas(void)
+ {
+- os_kill_process(userspace_pid, 1);
++#warning need to loop over userspace_pids in kill_off_processes_skas
++ os_kill_process(userspace_pid[0], 1);
+ }
+
+ void init_registers(int pid)
+diff -Naur a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c
+--- a/arch/um/kernel/skas/process_kern.c 2004-02-11 12:14:33.000000000 -0500
++++ b/arch/um/kernel/skas/process_kern.c 2004-02-11 12:26:15.000000000 -0500
+@@ -61,11 +61,13 @@
+ thread_wait(¤t->thread.mode.skas.switch_buf,
+ current->thread.mode.skas.fork_buf);
+
+-#ifdef CONFIG_SMP
+- schedule_tail(NULL);
+-#endif
++ if(current->thread.prev_sched != NULL)
++ schedule_tail(current->thread.prev_sched);
+ current->thread.prev_sched = NULL;
+
++ /* The return value is 1 if the kernel thread execs a process,
++ * 0 if it just exits
++ */
+ n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf);
+ if(n == 1)
+ userspace(¤t->thread.regs.regs);
+@@ -93,9 +95,8 @@
+ current->thread.mode.skas.fork_buf);
+
+ force_flush_all();
+-#ifdef CONFIG_SMP
+- schedule_tail(current->thread.prev_sched);
+-#endif
++ if(current->thread.prev_sched != NULL)
++ schedule_tail(current->thread.prev_sched);
+ current->thread.prev_sched = NULL;
+ unblock_signals();
+
+@@ -136,7 +137,7 @@
+
+ void init_idle_skas(void)
+ {
+- cpu_tasks[current->thread_info->cpu].pid = os_getpid();
++ cpu_tasks[current_thread->cpu].pid = os_getpid();
+ default_idle();
+ }
+
+@@ -160,11 +161,11 @@
+
+ int start_uml_skas(void)
+ {
+- start_userspace();
++ start_userspace(0);
+ capture_signal_stack();
++ uml_idle_timer();
+
+ init_new_thread_signals(1);
+- idle_timer();
+
+ init_task.thread.request.u.thread.proc = start_kernel_proc;
+ init_task.thread.request.u.thread.arg = NULL;
+@@ -175,12 +176,14 @@
+
+ int external_pid_skas(struct task_struct *task)
+ {
+- return(userspace_pid);
++#warning Need to look up userspace_pid by cpu
++ return(userspace_pid[0]);
+ }
+
+ int thread_pid_skas(struct task_struct *task)
+ {
+- return(userspace_pid);
++#warning Need to look up userspace_pid by cpu
++ return(userspace_pid[0]);
+ }
+
+ /*
+diff -Naur a/arch/um/kernel/skas/syscall_kern.c b/arch/um/kernel/skas/syscall_kern.c
+--- a/arch/um/kernel/skas/syscall_kern.c 2004-02-11 12:15:58.000000000 -0500
++++ b/arch/um/kernel/skas/syscall_kern.c 2004-02-11 12:27:51.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+diff -Naur a/arch/um/kernel/skas/sys-i386/Makefile b/arch/um/kernel/skas/sys-i386/Makefile
+--- a/arch/um/kernel/skas/sys-i386/Makefile 2004-02-11 12:16:38.000000000 -0500
++++ b/arch/um/kernel/skas/sys-i386/Makefile 2004-02-11 12:28:37.000000000 -0500
+@@ -10,5 +10,3 @@
+
+ $(USER_OBJS) : %.o: %.c
+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+-
+-clean :
+diff -Naur a/arch/um/kernel/skas/sys-i386/sigcontext.c b/arch/um/kernel/skas/sys-i386/sigcontext.c
+--- a/arch/um/kernel/skas/sys-i386/sigcontext.c 2004-02-11 12:14:18.000000000 -0500
++++ b/arch/um/kernel/skas/sys-i386/sigcontext.c 2004-02-11 12:26:01.000000000 -0500
+@@ -12,10 +12,9 @@
+ #include "kern_util.h"
+ #include "user.h"
+ #include "sigcontext.h"
++#include "mode.h"
+
+-extern int userspace_pid;
+-
+-int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr)
++int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, void *from_ptr)
+ {
+ struct sigcontext sc, *from = from_ptr;
+ unsigned long fpregs[FP_FRAME_SIZE];
+@@ -41,13 +40,12 @@
+ regs->skas.regs[EIP] = sc.eip;
+ regs->skas.regs[CS] = sc.cs;
+ regs->skas.regs[EFL] = sc.eflags;
+- regs->skas.regs[UESP] = sc.esp_at_signal;
+ regs->skas.regs[SS] = sc.ss;
+ regs->skas.fault_addr = sc.cr2;
+ regs->skas.fault_type = FAULT_WRITE(sc.err);
+ regs->skas.trap_type = sc.trapno;
+
+- err = ptrace(PTRACE_SETFPREGS, userspace_pid, 0, fpregs);
++ err = ptrace(PTRACE_SETFPREGS, pid, 0, fpregs);
+ if(err < 0){
+ printk("copy_sc_to_user - PTRACE_SETFPREGS failed, "
+ "errno = %d\n", errno);
+@@ -57,8 +55,9 @@
+ return(0);
+ }
+
+-int copy_sc_to_user_skas(void *to_ptr, void *fp, union uml_pt_regs *regs,
+- unsigned long fault_addr, int fault_type)
++int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp,
++ union uml_pt_regs *regs, unsigned long fault_addr,
++ int fault_type)
+ {
+ struct sigcontext sc, *to = to_ptr;
+ struct _fpstate *to_fp;
+@@ -86,7 +85,7 @@
+ sc.err = TO_SC_ERR(fault_type);
+ sc.trapno = regs->skas.trap_type;
+
+- err = ptrace(PTRACE_GETFPREGS, userspace_pid, 0, fpregs);
++ err = ptrace(PTRACE_GETFPREGS, pid, 0, fpregs);
+ if(err < 0){
+ printk("copy_sc_to_user - PTRACE_GETFPREGS failed, "
+ "errno = %d\n", errno);
+diff -Naur a/arch/um/kernel/skas/trap_user.c b/arch/um/kernel/skas/trap_user.c
+--- a/arch/um/kernel/skas/trap_user.c 2004-02-11 12:15:17.000000000 -0500
++++ b/arch/um/kernel/skas/trap_user.c 2004-02-11 12:27:06.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -41,8 +41,6 @@
+ {
+ struct signal_info *info;
+
+- if(sig == SIGVTALRM)
+- missed_ticks[cpu()]++;
+ regs->skas.is_user = 1;
+ regs->skas.fault_addr = 0;
+ regs->skas.fault_type = 0;
+diff -Naur a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c
+--- a/arch/um/kernel/skas/uaccess.c 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/kernel/skas/uaccess.c 2004-02-11 12:28:20.000000000 -0500
+@@ -0,0 +1,219 @@
++/*
++ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
++ * Licensed under the GPL
++ */
++
++#include "linux/stddef.h"
++#include "linux/kernel.h"
++#include "linux/string.h"
++#include "linux/fs.h"
++#include "linux/highmem.h"
++#include "asm/page.h"
++#include "asm/pgtable.h"
++#include "asm/uaccess.h"
++#include "kern_util.h"
++
++extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr,
++ pte_t *pte_out);
++
++static unsigned long maybe_map(unsigned long virt, int is_write)
++{
++ pte_t pte;
++ int err;
++
++ void *phys = um_virt_to_phys(current, virt, &pte);
++ int dummy_code;
++
++ if(IS_ERR(phys) || (is_write && !pte_write(pte))){
++ err = handle_page_fault(virt, 0, is_write, 0, &dummy_code);
++ if(err)
++ return(0);
++ phys = um_virt_to_phys(current, virt, NULL);
++ }
++ return((unsigned long) phys);
++}
++
++static int do_op(unsigned long addr, int len, int is_write,
++ int (*op)(unsigned long addr, int len, void *arg), void *arg)
++{
++ struct page *page;
++ int n;
++
++ addr = maybe_map(addr, is_write);
++ if(addr == -1)
++ return(-1);
++
++ page = phys_to_page(addr);
++ addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK);
++ n = (*op)(addr, len, arg);
++ kunmap(page);
++
++ return(n);
++}
++
++static int buffer_op(unsigned long addr, int len, int is_write,
++ int (*op)(unsigned long addr, int len, void *arg),
++ void *arg)
++{
++ int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len);
++ int remain = len, n;
++
++ n = do_op(addr, size, is_write, op, arg);
++ if(n != 0)
++ return(n < 0 ? remain : 0);
++
++ addr += size;
++ remain -= size;
++ if(remain == 0)
++ return(0);
++
++ while(addr < ((addr + remain) & PAGE_MASK)){
++ n = do_op(addr, PAGE_SIZE, is_write, op, arg);
++ if(n != 0)
++ return(n < 0 ? remain : 0);
++
++ addr += PAGE_SIZE;
++ remain -= PAGE_SIZE;
++ }
++ if(remain == 0)
++ return(0);
++
++ n = do_op(addr, remain, is_write, op, arg);
++ if(n != 0)
++ return(n < 0 ? remain : 0);
++ return(0);
++}
++
++static int copy_chunk_from_user(unsigned long from, int len, void *arg)
++{
++ unsigned long *to_ptr = arg, to = *to_ptr;
++
++ memcpy((void *) to, (void *) from, len);
++ *to_ptr += len;
++ return(0);
++}
++
++int copy_from_user_skas(void *to, const void *from, int n)
++{
++ if(segment_eq(get_fs(), KERNEL_DS)){
++ memcpy(to, from, n);
++ return(0);
++ }
++
++ return(access_ok_skas(VERIFY_READ, from, n) ?
++ buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to):
++ n);
++}
++
++static int copy_chunk_to_user(unsigned long to, int len, void *arg)
++{
++ unsigned long *from_ptr = arg, from = *from_ptr;
++
++ memcpy((void *) to, (void *) from, len);
++ *from_ptr += len;
++ return(0);
++}
++
++int copy_to_user_skas(void *to, const void *from, int n)
++{
++ if(segment_eq(get_fs(), KERNEL_DS)){
++ memcpy(to, from, n);
++ return(0);
++ }
++
++ return(access_ok_skas(VERIFY_WRITE, to, n) ?
++ buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) :
++ n);
++}
++
++static int strncpy_chunk_from_user(unsigned long from, int len, void *arg)
++{
++ char **to_ptr = arg, *to = *to_ptr;
++ int n;
++
++ strncpy(to, (void *) from, len);
++ n = strnlen(to, len);
++ *to_ptr += n;
++
++ if(n < len)
++ return(1);
++ return(0);
++}
++
++int strncpy_from_user_skas(char *dst, const char *src, int count)
++{
++ int n;
++ char *ptr = dst;
++
++ if(segment_eq(get_fs(), KERNEL_DS)){
++ strncpy(dst, src, count);
++ return(strnlen(dst, count));
++ }
++
++ if(!access_ok_skas(VERIFY_READ, src, 1))
++ return(-EFAULT);
++
++ n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user,
++ &ptr);
++ if(n != 0)
++ return(-EFAULT);
++ return(strnlen(dst, count));
++}
++
++static int clear_chunk(unsigned long addr, int len, void *unused)
++{
++ memset((void *) addr, 0, len);
++ return(0);
++}
++
++int __clear_user_skas(void *mem, int len)
++{
++ return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL));
++}
++
++int clear_user_skas(void *mem, int len)
++{
++ if(segment_eq(get_fs(), KERNEL_DS)){
++ memset(mem, 0, len);
++ return(0);
++ }
++
++ return(access_ok_skas(VERIFY_WRITE, mem, len) ?
++ buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len);
++}
++
++static int strnlen_chunk(unsigned long str, int len, void *arg)
++{
++ int *len_ptr = arg, n;
++
++ n = strnlen((void *) str, len);
++ *len_ptr += n;
++
++ if(n < len)
++ return(1);
++ return(0);
++}
++
++int strnlen_user_skas(const void *str, int len)
++{
++ int count = 0, n;
++
++ if(segment_eq(get_fs(), KERNEL_DS))
++ return(strnlen(str, len) + 1);
++
++ n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count);
++ if(n == 0)
++ return(count + 1);
++ return(-EFAULT);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/kernel/skas/util/Makefile b/arch/um/kernel/skas/util/Makefile
+--- a/arch/um/kernel/skas/util/Makefile 2004-02-11 12:16:34.000000000 -0500
++++ b/arch/um/kernel/skas/util/Makefile 2004-02-11 12:28:33.000000000 -0500
+@@ -1,10 +1,9 @@
+ all: mk_ptregs
+
+ mk_ptregs : mk_ptregs.o
+- $(CC) -o mk_ptregs mk_ptregs.o
++ $(HOSTCC) -o mk_ptregs mk_ptregs.o
+
+ mk_ptregs.o : mk_ptregs.c
+- $(CC) -c $<
++ $(HOSTCC) -c $<
+
+-clean :
+- $(RM) -f mk_ptregs *.o *~
++clean-files := mk_ptregs *.o *~
+diff -Naur a/arch/um/kernel/skas/util/mk_ptregs.c b/arch/um/kernel/skas/util/mk_ptregs.c
+--- a/arch/um/kernel/skas/util/mk_ptregs.c 2004-02-11 12:15:12.000000000 -0500
++++ b/arch/um/kernel/skas/util/mk_ptregs.c 2004-02-11 12:27:00.000000000 -0500
+@@ -1,3 +1,4 @@
++#include <stdio.h>
+ #include <asm/ptrace.h>
+ #include <asm/user.h>
+
+diff -Naur a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
+--- a/arch/um/kernel/smp.c 2004-02-11 12:14:32.000000000 -0500
++++ b/arch/um/kernel/smp.c 2004-02-11 12:26:13.000000000 -0500
+@@ -1,9 +1,15 @@
+ /*
+- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+ #include "linux/config.h"
++#include "linux/percpu.h"
++#include "asm/pgalloc.h"
++#include "asm/tlb.h"
++
++/* For some reason, mmu_gathers are referenced when CONFIG_SMP is off. */
++DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
+
+ #ifdef CONFIG_SMP
+
+@@ -23,7 +29,7 @@
+ #include "os.h"
+
+ /* CPU online map, set by smp_boot_cpus */
+-unsigned long cpu_online_map = cpumask_of_cpu(0);
++unsigned long cpu_online_map = CPU_MASK_NONE;
+
+ EXPORT_SYMBOL(cpu_online_map);
+
+@@ -55,7 +61,7 @@
+
+ void smp_send_reschedule(int cpu)
+ {
+- write(cpu_data[cpu].ipi_pipe[1], "R", 1);
++ os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1);
+ num_reschedules_sent++;
+ }
+
+@@ -100,35 +106,34 @@
+
+ printk(KERN_INFO "Stopping all CPUs...");
+ for(i = 0; i < num_online_cpus(); i++){
+- if(i == current->thread_info->cpu)
++ if(i == current_thread->cpu)
+ continue;
+- write(cpu_data[i].ipi_pipe[1], "S", 1);
++ os_write_file(cpu_data[i].ipi_pipe[1], "S", 1);
+ }
+ printk("done\n");
+ }
+
+-static cpumask_t smp_commenced_mask;
+-static cpumask_t smp_callin_map = CPU_MASK_NONE;
++static cpumask_t smp_commenced_mask = CPU_MASK_NONE;
++static cpumask_t cpu_callin_map = CPU_MASK_NONE;
+
+ static int idle_proc(void *cpup)
+ {
+ int cpu = (int) cpup, err;
+
+ err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1);
+- if(err)
+- panic("CPU#%d failed to create IPI pipe, errno = %d", cpu,
+- -err);
++ if(err < 0)
++ panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err);
+
+ activate_ipi(cpu_data[cpu].ipi_pipe[0],
+ current->thread.mode.tt.extern_pid);
+
+ wmb();
+- if (cpu_test_and_set(cpu, &smp_callin_map)) {
++ if (cpu_test_and_set(cpu, cpu_callin_map)) {
+ printk("huh, CPU#%d already present??\n", cpu);
+ BUG();
+ }
+
+- while (!cpu_isset(cpu, &smp_commenced_mask))
++ while (!cpu_isset(cpu, smp_commenced_mask))
+ cpu_relax();
+
+ cpu_set(cpu, cpu_online_map);
+@@ -143,16 +148,20 @@
+
+ current->thread.request.u.thread.proc = idle_proc;
+ current->thread.request.u.thread.arg = (void *) cpu;
+- new_task = do_fork(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, NULL);
+- if(IS_ERR(new_task)) panic("do_fork failed in idle_thread");
++ new_task = copy_process(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL,
++ NULL);
++ if(IS_ERR(new_task))
++ panic("copy_process failed in idle_thread, error = %ld",
++ PTR_ERR(new_task));
+
+ cpu_tasks[cpu] = ((struct cpu_task)
+ { .pid = new_task->thread.mode.tt.extern_pid,
+ .task = new_task } );
+ idle_threads[cpu] = new_task;
+- CHOOSE_MODE(write(new_task->thread.mode.tt.switch_pipe[1], &c,
++ CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c,
+ sizeof(c)),
+ ({ panic("skas mode doesn't support SMP"); }));
++ wake_up_forked_process(new_task);
+ return(new_task);
+ }
+
+@@ -160,15 +169,17 @@
+ {
+ struct task_struct *idle;
+ unsigned long waittime;
+- int err, cpu;
++ int err, cpu, me = smp_processor_id();
+
+- cpu_set(0, cpu_online_map);
+- cpu_set(0, smp_callin_map);
++ cpu_clear(me, cpu_online_map);
++ cpu_set(me, cpu_online_map);
++ cpu_set(me, cpu_callin_map);
+
+- err = os_pipe(cpu_data[0].ipi_pipe, 1, 1);
+- if(err) panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
++ err = os_pipe(cpu_data[me].ipi_pipe, 1, 1);
++ if(err < 0)
++ panic("CPU#0 failed to create IPI pipe, errno = %d", -err);
+
+- activate_ipi(cpu_data[0].ipi_pipe[0],
++ activate_ipi(cpu_data[me].ipi_pipe[0],
+ current->thread.mode.tt.extern_pid);
+
+ for(cpu = 1; cpu < ncpus; cpu++){
+@@ -180,10 +191,10 @@
+ unhash_process(idle);
+
+ waittime = 200000000;
+- while (waittime-- && !cpu_isset(cpu, smp_callin_map))
++ while (waittime-- && !cpu_isset(cpu, cpu_callin_map))
+ cpu_relax();
+
+- if (cpu_isset(cpu, smp_callin_map))
++ if (cpu_isset(cpu, cpu_callin_map))
+ printk("done\n");
+ else printk("failed\n");
+ }
+@@ -216,7 +227,7 @@
+ int fd;
+
+ fd = cpu_data[cpu].ipi_pipe[0];
+- while (read(fd, &c, 1) == 1) {
++ while (os_read_file(fd, &c, 1) == 1) {
+ switch (c) {
+ case 'C':
+ smp_call_function_slave(cpu);
+@@ -273,9 +284,9 @@
+ info = _info;
+
+ for (i=0;i<NR_CPUS;i++)
+- if((i != current->thread_info->cpu) &&
++ if((i != current_thread->cpu) &&
+ cpu_isset(i, cpu_online_map))
+- write(cpu_data[i].ipi_pipe[1], "C", 1);
++ os_write_file(cpu_data[i].ipi_pipe[1], "C", 1);
+
+ while (atomic_read(&scf_started) != cpus)
+ barrier();
+diff -Naur a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c
+--- a/arch/um/kernel/syscall_kern.c 2004-02-11 12:16:08.000000000 -0500
++++ b/arch/um/kernel/syscall_kern.c 2004-02-11 12:28:04.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -35,39 +35,40 @@
+
+ long sys_fork(void)
+ {
+- struct task_struct *p;
++ long ret;
+
+ current->thread.forking = 1;
+- p = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL);
++ ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL);
+ current->thread.forking = 0;
+- return(IS_ERR(p) ? PTR_ERR(p) : p->pid);
++ return(ret);
+ }
+
+-long sys_clone(unsigned long clone_flags, unsigned long newsp)
++long sys_clone(unsigned long clone_flags, unsigned long newsp,
++ int *parent_tid, int *child_tid)
+ {
+- struct task_struct *p;
++ long ret;
+
+ current->thread.forking = 1;
+- p = do_fork(clone_flags, newsp, NULL, 0, NULL, NULL);
++ ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid);
+ current->thread.forking = 0;
+- return(IS_ERR(p) ? PTR_ERR(p) : p->pid);
++ return(ret);
+ }
+
+ long sys_vfork(void)
+ {
+- struct task_struct *p;
++ long ret;
+
+ current->thread.forking = 1;
+- p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, NULL);
++ ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL,
++ NULL);
+ current->thread.forking = 0;
+- return(IS_ERR(p) ? PTR_ERR(p) : p->pid);
++ return(ret);
+ }
+
+ /* common code for old and new mmaps */
+-static inline long do_mmap2(
+- unsigned long addr, unsigned long len,
+- unsigned long prot, unsigned long flags,
+- unsigned long fd, unsigned long pgoff)
++long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len,
++ unsigned long prot, unsigned long flags, unsigned long fd,
++ unsigned long pgoff)
+ {
+ int error = -EBADF;
+ struct file * file = NULL;
+@@ -79,9 +80,9 @@
+ goto out;
+ }
+
+- down_write(¤t->mm->mmap_sem);
+- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+- up_write(¤t->mm->mmap_sem);
++ down_write(&mm->mmap_sem);
++ error = do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff);
++ up_write(&mm->mmap_sem);
+
+ if (file)
+ fput(file);
+@@ -93,7 +94,7 @@
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
+ {
+- return do_mmap2(addr, len, prot, flags, fd, pgoff);
++ return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff);
+ }
+
+ /*
+@@ -120,7 +121,8 @@
+ if (offset & ~PAGE_MASK)
+ goto out;
+
+- err = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
++ err = do_mmap2(current->mm, addr, len, prot, flags, fd,
++ offset >> PAGE_SHIFT);
+ out:
+ return err;
+ }
+@@ -135,43 +137,12 @@
+
+ error = do_pipe(fd);
+ if (!error) {
+- if (copy_to_user(fildes, fd, 2*sizeof(int)))
++ if (copy_to_user(fildes, fd, sizeof(fd)))
+ error = -EFAULT;
+ }
+ return error;
+ }
+
+-int sys_sigaction(int sig, const struct old_sigaction *act,
+- struct old_sigaction *oact)
+-{
+- struct k_sigaction new_ka, old_ka;
+- int ret;
+-
+- if (act) {
+- old_sigset_t mask;
+- if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+- __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+- return -EFAULT;
+- __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+- __get_user(mask, &act->sa_mask);
+- siginitset(&new_ka.sa.sa_mask, mask);
+- }
+-
+- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+-
+- if (!ret && oact) {
+- if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+- return -EFAULT;
+- __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+- }
+-
+- return ret;
+-}
+-
+ /*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+ *
+@@ -253,7 +224,7 @@
+ return sys_shmctl (first, second,
+ (struct shmid_ds *) ptr);
+ default:
+- return -EINVAL;
++ return -ENOSYS;
+ }
+ }
+
+@@ -302,11 +273,6 @@
+ return error;
+ }
+
+-int sys_sigaltstack(const stack_t *uss, stack_t *uoss)
+-{
+- return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs)));
+-}
+-
+ long execute_syscall(void *r)
+ {
+ return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r));
+diff -Naur a/arch/um/kernel/sys_call_table.c b/arch/um/kernel/sys_call_table.c
+--- a/arch/um/kernel/sys_call_table.c 2004-02-11 12:16:34.000000000 -0500
++++ b/arch/um/kernel/sys_call_table.c 2004-02-11 12:28:33.000000000 -0500
+@@ -5,7 +5,6 @@
+
+ #include "linux/config.h"
+ #include "linux/unistd.h"
+-#include "linux/version.h"
+ #include "linux/sys.h"
+ #include "linux/swap.h"
+ #include "linux/sysctl.h"
+@@ -219,15 +218,30 @@
+ extern syscall_handler_t sys_gettid;
+ extern syscall_handler_t sys_readahead;
+ extern syscall_handler_t sys_tkill;
++extern syscall_handler_t sys_setxattr;
++extern syscall_handler_t sys_lsetxattr;
++extern syscall_handler_t sys_fsetxattr;
++extern syscall_handler_t sys_getxattr;
++extern syscall_handler_t sys_lgetxattr;
++extern syscall_handler_t sys_fgetxattr;
++extern syscall_handler_t sys_listxattr;
++extern syscall_handler_t sys_llistxattr;
++extern syscall_handler_t sys_flistxattr;
++extern syscall_handler_t sys_removexattr;
++extern syscall_handler_t sys_lremovexattr;
++extern syscall_handler_t sys_fremovexattr;
+ extern syscall_handler_t sys_sendfile64;
+ extern syscall_handler_t sys_futex;
+ extern syscall_handler_t sys_sched_setaffinity;
+ extern syscall_handler_t sys_sched_getaffinity;
++extern syscall_handler_t sys_set_thread_area;
++extern syscall_handler_t sys_get_thread_area;
+ extern syscall_handler_t sys_io_setup;
+ extern syscall_handler_t sys_io_destroy;
+ extern syscall_handler_t sys_io_getevents;
+ extern syscall_handler_t sys_io_submit;
+ extern syscall_handler_t sys_io_cancel;
++extern syscall_handler_t sys_fadvise64;
+ extern syscall_handler_t sys_exit_group;
+ extern syscall_handler_t sys_lookup_dcookie;
+ extern syscall_handler_t sys_epoll_create;
+@@ -235,6 +249,20 @@
+ extern syscall_handler_t sys_epoll_wait;
+ extern syscall_handler_t sys_remap_file_pages;
+ extern syscall_handler_t sys_set_tid_address;
++extern syscall_handler_t sys_timer_create;
++extern syscall_handler_t sys_timer_settime;
++extern syscall_handler_t sys_timer_gettime;
++extern syscall_handler_t sys_timer_getoverrun;
++extern syscall_handler_t sys_timer_delete;
++extern syscall_handler_t sys_clock_settime;
++extern syscall_handler_t sys_clock_gettime;
++extern syscall_handler_t sys_clock_getres;
++extern syscall_handler_t sys_clock_nanosleep;
++extern syscall_handler_t sys_statfs64;
++extern syscall_handler_t sys_fstatfs64;
++extern syscall_handler_t sys_tgkill;
++extern syscall_handler_t sys_utimes;
++extern syscall_handler_t sys_fadvise64_64;
+
+ #ifdef CONFIG_NFSD
+ #define NFSSERVCTL sys_nfsservctl
+@@ -246,7 +274,7 @@
+ extern syscall_handler_t um_time;
+ extern syscall_handler_t um_stime;
+
+-#define LAST_GENERIC_SYSCALL __NR_set_tid_address
++#define LAST_GENERIC_SYSCALL __NR_vserver
+
+ #if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL
+ #define LAST_SYSCALL LAST_GENERIC_SYSCALL
+@@ -455,32 +483,37 @@
+ [ __NR_stat64 ] = sys_stat64,
+ [ __NR_lstat64 ] = sys_lstat64,
+ [ __NR_fstat64 ] = sys_fstat64,
+- [ __NR_fcntl64 ] = sys_fcntl64,
+ [ __NR_getdents64 ] = sys_getdents64,
++ [ __NR_fcntl64 ] = sys_fcntl64,
++ [ 223 ] = sys_ni_syscall,
+ [ __NR_gettid ] = sys_gettid,
+ [ __NR_readahead ] = sys_readahead,
+- [ __NR_setxattr ] = sys_ni_syscall,
+- [ __NR_lsetxattr ] = sys_ni_syscall,
+- [ __NR_fsetxattr ] = sys_ni_syscall,
+- [ __NR_getxattr ] = sys_ni_syscall,
+- [ __NR_lgetxattr ] = sys_ni_syscall,
+- [ __NR_fgetxattr ] = sys_ni_syscall,
+- [ __NR_listxattr ] = sys_ni_syscall,
+- [ __NR_llistxattr ] = sys_ni_syscall,
+- [ __NR_flistxattr ] = sys_ni_syscall,
+- [ __NR_removexattr ] = sys_ni_syscall,
+- [ __NR_lremovexattr ] = sys_ni_syscall,
+- [ __NR_fremovexattr ] = sys_ni_syscall,
++ [ __NR_setxattr ] = sys_setxattr,
++ [ __NR_lsetxattr ] = sys_lsetxattr,
++ [ __NR_fsetxattr ] = sys_fsetxattr,
++ [ __NR_getxattr ] = sys_getxattr,
++ [ __NR_lgetxattr ] = sys_lgetxattr,
++ [ __NR_fgetxattr ] = sys_fgetxattr,
++ [ __NR_listxattr ] = sys_listxattr,
++ [ __NR_llistxattr ] = sys_llistxattr,
++ [ __NR_flistxattr ] = sys_flistxattr,
++ [ __NR_removexattr ] = sys_removexattr,
++ [ __NR_lremovexattr ] = sys_lremovexattr,
++ [ __NR_fremovexattr ] = sys_fremovexattr,
+ [ __NR_tkill ] = sys_tkill,
+ [ __NR_sendfile64 ] = sys_sendfile64,
+ [ __NR_futex ] = sys_futex,
+ [ __NR_sched_setaffinity ] = sys_sched_setaffinity,
+ [ __NR_sched_getaffinity ] = sys_sched_getaffinity,
++ [ __NR_set_thread_area ] = sys_ni_syscall,
++ [ __NR_get_thread_area ] = sys_ni_syscall,
+ [ __NR_io_setup ] = sys_io_setup,
+ [ __NR_io_destroy ] = sys_io_destroy,
+ [ __NR_io_getevents ] = sys_io_getevents,
+ [ __NR_io_submit ] = sys_io_submit,
+ [ __NR_io_cancel ] = sys_io_cancel,
++ [ __NR_fadvise64 ] = sys_fadvise64,
++ [ 251 ] = sys_ni_syscall,
+ [ __NR_exit_group ] = sys_exit_group,
+ [ __NR_lookup_dcookie ] = sys_lookup_dcookie,
+ [ __NR_epoll_create ] = sys_epoll_create,
+@@ -488,6 +521,21 @@
+ [ __NR_epoll_wait ] = sys_epoll_wait,
+ [ __NR_remap_file_pages ] = sys_remap_file_pages,
+ [ __NR_set_tid_address ] = sys_set_tid_address,
++ [ __NR_timer_create ] = sys_timer_create,
++ [ __NR_timer_settime ] = sys_timer_settime,
++ [ __NR_timer_gettime ] = sys_timer_gettime,
++ [ __NR_timer_getoverrun ] = sys_timer_getoverrun,
++ [ __NR_timer_delete ] = sys_timer_delete,
++ [ __NR_clock_settime ] = sys_clock_settime,
++ [ __NR_clock_gettime ] = sys_clock_gettime,
++ [ __NR_clock_getres ] = sys_clock_getres,
++ [ __NR_clock_nanosleep ] = sys_clock_nanosleep,
++ [ __NR_statfs64 ] = sys_statfs64,
++ [ __NR_fstatfs64 ] = sys_fstatfs64,
++ [ __NR_tgkill ] = sys_tgkill,
++ [ __NR_utimes ] = sys_utimes,
++ [ __NR_fadvise64_64 ] = sys_fadvise64_64,
++ [ __NR_vserver ] = sys_ni_syscall,
+
+ ARCH_SYSCALLS
+ [ LAST_SYSCALL + 1 ... NR_syscalls ] =
+diff -Naur a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
+--- a/arch/um/kernel/sysrq.c 2004-02-11 12:14:56.000000000 -0500
++++ b/arch/um/kernel/sysrq.c 2004-02-11 12:26:47.000000000 -0500
+@@ -55,6 +55,14 @@
+ show_trace((unsigned long *)esp);
+ }
+
++void show_stack(struct task_struct *task, unsigned long *sp)
++{
++ if(task)
++ show_trace_task(task);
++ else
++ show_trace(sp);
++}
++
+ /*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/kernel/tempfile.c b/arch/um/kernel/tempfile.c
+--- a/arch/um/kernel/tempfile.c 2004-02-11 12:15:48.000000000 -0500
++++ b/arch/um/kernel/tempfile.c 2004-02-11 12:27:41.000000000 -0500
+@@ -28,6 +28,7 @@
+ }
+ if((dir == NULL) || (*dir == '\0'))
+ dir = "/tmp";
++
+ tempdir = malloc(strlen(dir) + 2);
+ if(tempdir == NULL){
+ fprintf(stderr, "Failed to malloc tempdir, "
+@@ -49,7 +50,8 @@
+ else
+ *tempname = 0;
+ strcat(tempname, template);
+- if((fd = mkstemp(tempname)) < 0){
++ fd = mkstemp(tempname);
++ if(fd < 0){
+ fprintf(stderr, "open - cannot create %s: %s\n", tempname,
+ strerror(errno));
+ return -1;
+@@ -59,7 +61,8 @@
+ return -1;
+ }
+ if(out_tempname){
+- if((*out_tempname = strdup(tempname)) == NULL){
++ *out_tempname = strdup(tempname);
++ if(*out_tempname == NULL){
+ perror("strdup");
+ return -1;
+ }
+diff -Naur a/arch/um/kernel/time.c b/arch/um/kernel/time.c
+--- a/arch/um/kernel/time.c 2004-02-11 12:14:28.000000000 -0500
++++ b/arch/um/kernel/time.c 2004-02-11 12:26:11.000000000 -0500
+@@ -4,24 +4,33 @@
+ */
+
+ #include <stdio.h>
++#include <stdlib.h>
+ #include <unistd.h>
+ #include <time.h>
+ #include <sys/time.h>
+ #include <signal.h>
+ #include <errno.h>
+-#include "linux/module.h"
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "user.h"
+ #include "process.h"
+ #include "signal_user.h"
+ #include "time_user.h"
++#include "kern_constants.h"
++
++/* XXX This really needs to be declared and initialized in a kernel file since
++ * it's in <linux/time.h>
++ */
++extern struct timespec wall_to_monotonic;
+
+ extern struct timeval xtime;
+
++struct timeval local_offset = { 0, 0 };
++
+ void timer(void)
+ {
+ gettimeofday(&xtime, NULL);
++ timeradd(&xtime, &local_offset, &xtime);
+ }
+
+ void set_interval(int timer_type)
+@@ -66,7 +75,7 @@
+ errno);
+ }
+
+-void idle_timer(void)
++void uml_idle_timer(void)
+ {
+ if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR)
+ panic("Couldn't unset SIGVTALRM handler");
+@@ -76,14 +85,56 @@
+ set_interval(ITIMER_REAL);
+ }
+
++static unsigned long long get_host_hz(void)
++{
++ char mhzline[16], *end;
++ int ret, mult, mhz, rest, len;
++
++ ret = cpu_feature("cpu MHz", mhzline,
++ sizeof(mhzline) / sizeof(mhzline[0]));
++ if(!ret)
++ panic ("Could not get host MHZ");
++
++ mhz = strtoul(mhzline, &end, 10);
++
++ /* This business is to parse a floating point number without using
++ * floating types.
++ */
++
++ rest = 0;
++ mult = 0;
++ if(*end == '.'){
++ end++;
++ len = strlen(end);
++ if(len < 6)
++ mult = 6 - len;
++ else if(len > 6)
++ end[6] = '\0';
++ rest = strtoul(end, NULL, 10);
++ while(mult-- > 0)
++ rest *= 10;
++ }
++
++ return(1000000 * mhz + rest);
++}
++
++unsigned long long host_hz = 0;
++
++extern int do_posix_clock_monotonic_gettime(struct timespec *tp);
++
+ void time_init(void)
+ {
++ struct timespec now;
++
++ host_hz = get_host_hz();
+ if(signal(SIGVTALRM, boot_timer_handler) == SIG_ERR)
+ panic("Couldn't set SIGVTALRM handler");
+ set_interval(ITIMER_VIRTUAL);
+-}
+
+-struct timeval local_offset = { 0, 0 };
++ do_posix_clock_monotonic_gettime(&now);
++ wall_to_monotonic.tv_sec = -now.tv_sec;
++ wall_to_monotonic.tv_nsec = -now.tv_nsec;
++}
+
+ void do_gettimeofday(struct timeval *tv)
+ {
+@@ -95,15 +146,13 @@
+ time_unlock(flags);
+ }
+
+-EXPORT_SYMBOL(do_gettimeofday);
+-
+ int do_settimeofday(struct timespec *tv)
+ {
+ struct timeval now;
+ unsigned long flags;
+ struct timeval tv_in;
+
+- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
++ if ((unsigned long) tv->tv_nsec >= UM_NSEC_PER_SEC)
+ return -EINVAL;
+
+ tv_in.tv_sec = tv->tv_sec;
+@@ -113,9 +162,9 @@
+ gettimeofday(&now, NULL);
+ timersub(&tv_in, &now, &local_offset);
+ time_unlock(flags);
+-}
+
+-EXPORT_SYMBOL(do_settimeofday);
++ return(0);
++}
+
+ void idle_sleep(int secs)
+ {
+diff -Naur a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c
+--- a/arch/um/kernel/time_kern.c 2004-02-11 12:15:59.000000000 -0500
++++ b/arch/um/kernel/time_kern.c 2004-02-11 12:27:52.000000000 -0500
+@@ -30,6 +30,14 @@
+ return(HZ);
+ }
+
++/*
++ * Scheduler clock - returns current time in nanosec units.
++ */
++unsigned long long sched_clock(void)
++{
++ return (unsigned long long)jiffies_64 * (1000000000 / HZ);
++}
++
+ /* Changed at early boot */
+ int timer_irq_inited = 0;
+
+@@ -39,13 +47,47 @@
+ */
+ int __attribute__ ((__section__ (".unprotected"))) missed_ticks[NR_CPUS];
+
++static int first_tick;
++static unsigned long long prev_tsc;
++static long long delta; /* Deviation per interval */
++
++extern unsigned long long host_hz;
++
+ void timer_irq(union uml_pt_regs *regs)
+ {
+- int cpu = current->thread_info->cpu, ticks = missed_ticks[cpu];
++ unsigned long long ticks = 0;
++
++ if(!timer_irq_inited){
++ /* This is to ensure that ticks don't pile up when
++ * the timer handler is suspended */
++ first_tick = 0;
++ return;
++ }
++
++ if(first_tick){
++#if defined(CONFIG_UML_REAL_TIME_CLOCK)
++ unsigned long long tsc;
++ /* We've had 1 tick */
++ tsc = time_stamp();
++
++ delta += tsc - prev_tsc;
++ prev_tsc = tsc;
++
++ ticks += (delta * HZ) / host_hz;
++ delta -= (ticks * host_hz) / HZ;
++#else
++ ticks = 1;
++#endif
++ }
++ else {
++ prev_tsc = time_stamp();
++ first_tick = 1;
++ }
+
+- if(!timer_irq_inited) return;
+- missed_ticks[cpu] = 0;
+- while(ticks--) do_IRQ(TIMER_IRQ, regs);
++ while(ticks > 0){
++ do_IRQ(TIMER_IRQ, regs);
++ ticks--;
++ }
+ }
+
+ void boot_timer_handler(int sig)
+@@ -58,12 +100,13 @@
+ do_timer(®s);
+ }
+
+-void um_timer(int irq, void *dev, struct pt_regs *regs)
++irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs)
+ {
+ do_timer(regs);
+- write_seqlock(&xtime_lock);
++ write_seqlock_irq(&xtime_lock);
+ timer();
+- write_sequnlock(&xtime_lock);
++ write_sequnlock_irq(&xtime_lock);
++ return(IRQ_HANDLED);
+ }
+
+ long um_time(int * tloc)
+@@ -81,12 +124,12 @@
+ long um_stime(int * tptr)
+ {
+ int value;
+- struct timeval new;
++ struct timespec new;
+
+ if (get_user(value, tptr))
+ return -EFAULT;
+ new.tv_sec = value;
+- new.tv_usec = 0;
++ new.tv_nsec = 0;
+ do_settimeofday(&new);
+ return 0;
+ }
+@@ -125,9 +168,11 @@
+ void timer_handler(int sig, union uml_pt_regs *regs)
+ {
+ #ifdef CONFIG_SMP
++ local_irq_disable();
+ update_process_times(user_context(UPT_SP(regs)));
++ local_irq_enable();
+ #endif
+- if(current->thread_info->cpu == 0)
++ if(current_thread->cpu == 0)
+ timer_irq(regs);
+ }
+
+@@ -136,6 +181,7 @@
+ unsigned long time_lock(void)
+ {
+ unsigned long flags;
++
+ spin_lock_irqsave(&timer_spinlock, flags);
+ return(flags);
+ }
+@@ -150,8 +196,8 @@
+ int err;
+
+ CHOOSE_MODE(user_time_init_tt(), user_time_init_skas());
+- if((err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer",
+- NULL)) != 0)
++ err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL);
++ if(err != 0)
+ printk(KERN_ERR "timer_init : request_irq failed - "
+ "errno = %d\n", -err);
+ timer_irq_inited = 1;
+@@ -160,7 +206,6 @@
+
+ __initcall(timer_init);
+
+-
+ /*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c
+--- a/arch/um/kernel/trap_kern.c 2004-02-11 12:14:18.000000000 -0500
++++ b/arch/um/kernel/trap_kern.c 2004-02-11 12:26:00.000000000 -0500
+@@ -16,12 +16,15 @@
+ #include "asm/tlbflush.h"
+ #include "asm/a.out.h"
+ #include "asm/current.h"
++#include "asm/irq.h"
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "kern.h"
+ #include "chan_kern.h"
+ #include "mconsole_kern.h"
+ #include "2_5compat.h"
++#include "mem.h"
++#include "mem_kern.h"
+
+ int handle_page_fault(unsigned long address, unsigned long ip,
+ int is_write, int is_user, int *code_out)
+@@ -51,12 +54,12 @@
+ if(is_write && !(vma->vm_flags & VM_WRITE))
+ goto out;
+ page = address & PAGE_MASK;
+- if(page == (unsigned long) current->thread_info + PAGE_SIZE)
++ if(page == (unsigned long) current_thread + PAGE_SIZE)
+ panic("Kernel stack overflow");
+ pgd = pgd_offset(mm, page);
+ pmd = pmd_offset(pgd, page);
+- survive:
+ do {
++ survive:
+ switch (handle_mm_fault(mm, vma, address, is_write)){
+ case VM_FAULT_MINOR:
+ current->min_flt++;
+@@ -71,14 +74,20 @@
+ err = -ENOMEM;
+ goto out_of_memory;
+ default:
+- BUG();
++ if (current->pid == 1) {
++ up_read(&mm->mmap_sem);
++ yield();
++ down_read(&mm->mmap_sem);
++ goto survive;
++ }
++ goto out;
+ }
+ pte = pte_offset_kernel(pmd, page);
+ } while(!pte_present(*pte));
++ err = 0;
+ *pte = pte_mkyoung(*pte);
+ if(pte_write(*pte)) *pte = pte_mkdirty(*pte);
+ flush_tlb_page(vma, page);
+- err = 0;
+ out:
+ up_read(&mm->mmap_sem);
+ return(err);
+@@ -98,6 +107,33 @@
+ goto out;
+ }
+
++LIST_HEAD(physmem_remappers);
++
++void register_remapper(struct remapper *info)
++{
++ list_add(&info->list, &physmem_remappers);
++}
++
++static int check_remapped_addr(unsigned long address, int is_write)
++{
++ struct remapper *remapper;
++ struct list_head *ele;
++ __u64 offset;
++ int fd;
++
++ fd = phys_mapping(__pa(address), &offset);
++ if(fd == -1)
++ return(0);
++
++ list_for_each(ele, &physmem_remappers){
++ remapper = list_entry(ele, struct remapper, list);
++ if((*remapper->proc)(fd, address, is_write, offset))
++ return(1);
++ }
++
++ return(0);
++}
++
+ unsigned long segv(unsigned long address, unsigned long ip, int is_write,
+ int is_user, void *sc)
+ {
+@@ -109,7 +145,9 @@
+ flush_tlb_kernel_vm();
+ return(0);
+ }
+- if(current->mm == NULL)
++ else if(check_remapped_addr(address & PAGE_MASK, is_write))
++ return(0);
++ else if(current->mm == NULL)
+ panic("Segfault with no mm");
+ err = handle_page_fault(address, ip, is_write, is_user, &si.si_code);
+
+@@ -120,9 +158,8 @@
+ current->thread.fault_addr = (void *) address;
+ do_longjmp(catcher, 1);
+ }
+- else if(current->thread.fault_addr != NULL){
++ else if(current->thread.fault_addr != NULL)
+ panic("fault_addr set but no fault catcher");
+- }
+ else if(arch_fixup(ip, sc))
+ return(0);
+
+@@ -155,8 +192,6 @@
+ {
+ struct siginfo si;
+
+- printk(KERN_ERR "Unfixable SEGV in '%s' (pid %d) at 0x%lx "
+- "(ip 0x%lx)\n", current->comm, current->pid, address, ip);
+ si.si_signo = SIGSEGV;
+ si.si_code = SEGV_ACCERR;
+ si.si_addr = (void *) address;
+@@ -180,6 +215,11 @@
+ else relay_signal(sig, regs);
+ }
+
++void winch(int sig, union uml_pt_regs *regs)
++{
++ do_IRQ(WINCH_IRQ, regs);
++}
++
+ void trap_init(void)
+ {
+ }
+diff -Naur a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c
+--- a/arch/um/kernel/trap_user.c 2004-02-11 12:15:23.000000000 -0500
++++ b/arch/um/kernel/trap_user.c 2004-02-11 12:27:10.000000000 -0500
+@@ -5,11 +5,9 @@
+
+ #include <stdlib.h>
+ #include <errno.h>
+-#include <fcntl.h>
+ #include <setjmp.h>
+ #include <signal.h>
+ #include <sys/time.h>
+-#include <sys/ioctl.h>
+ #include <sys/ptrace.h>
+ #include <sys/wait.h>
+ #include <asm/page.h>
+@@ -82,6 +80,8 @@
+ .is_irq = 0 },
+ [ SIGILL ] { .handler = relay_signal,
+ .is_irq = 0 },
++ [ SIGWINCH ] { .handler = winch,
++ .is_irq = 1 },
+ [ SIGBUS ] { .handler = bus_handler,
+ .is_irq = 0 },
+ [ SIGSEGV] { .handler = segv_handler,
+@@ -123,7 +123,7 @@
+ {
+ jmp_buf *buf = b;
+
+- longjmp(*buf, val);
++ siglongjmp(*buf, val);
+ }
+
+ /*
+diff -Naur a/arch/um/kernel/tt/exec_kern.c b/arch/um/kernel/tt/exec_kern.c
+--- a/arch/um/kernel/tt/exec_kern.c 2004-02-11 12:14:28.000000000 -0500
++++ b/arch/um/kernel/tt/exec_kern.c 2004-02-11 12:26:11.000000000 -0500
+@@ -17,6 +17,7 @@
+ #include "mem_user.h"
+ #include "os.h"
+ #include "tlb.h"
++#include "mode.h"
+
+ static int exec_tramp(void *sig_stack)
+ {
+@@ -47,17 +48,17 @@
+ do_exit(SIGKILL);
+ }
+
+- if(current->thread_info->cpu == 0)
++ if(current_thread->cpu == 0)
+ forward_interrupts(new_pid);
+ current->thread.request.op = OP_EXEC;
+ current->thread.request.u.exec.pid = new_pid;
+- unprotect_stack((unsigned long) current->thread_info);
++ unprotect_stack((unsigned long) current_thread);
+ os_usr1_process(os_getpid());
+
+ enable_timer();
+ free_page(stack);
+ protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1);
+- task_protections((unsigned long) current->thread_info);
++ task_protections((unsigned long) current_thread);
+ force_flush_all();
+ unblock_signals();
+ }
+diff -Naur a/arch/um/kernel/tt/include/mode.h b/arch/um/kernel/tt/include/mode.h
+--- a/arch/um/kernel/tt/include/mode.h 2004-02-11 12:16:01.000000000 -0500
++++ b/arch/um/kernel/tt/include/mode.h 2004-02-11 12:27:56.000000000 -0500
+@@ -8,6 +8,8 @@
+
+ #include "sysdep/ptrace.h"
+
++enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB };
++
+ extern int tracing_pid;
+
+ extern int tracer(int (*init_proc)(void *), void *sp);
+diff -Naur a/arch/um/kernel/tt/include/uaccess.h b/arch/um/kernel/tt/include/uaccess.h
+--- a/arch/um/kernel/tt/include/uaccess.h 2004-02-11 12:16:02.000000000 -0500
++++ b/arch/um/kernel/tt/include/uaccess.h 2004-02-11 12:27:56.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -43,65 +43,19 @@
+
+ extern int __do_copy_from_user(void *to, const void *from, int n,
+ void **fault_addr, void **fault_catcher);
+-
+-static inline int copy_from_user_tt(void *to, const void *from, int n)
+-{
+- return(access_ok_tt(VERIFY_READ, from, n) ?
+- __do_copy_from_user(to, from, n,
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher) : n);
+-}
+-
+-static inline int copy_to_user_tt(void *to, const void *from, int n)
+-{
+- return(access_ok_tt(VERIFY_WRITE, to, n) ?
+- __do_copy_to_user(to, from, n,
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher) : n);
+-}
+-
+ extern int __do_strncpy_from_user(char *dst, const char *src, size_t n,
+ void **fault_addr, void **fault_catcher);
+-
+-static inline int strncpy_from_user_tt(char *dst, const char *src, int count)
+-{
+- int n;
+-
+- if(!access_ok_tt(VERIFY_READ, src, 1)) return(-EFAULT);
+- n = __do_strncpy_from_user(dst, src, count,
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher);
+- if(n < 0) return(-EFAULT);
+- return(n);
+-}
+-
+ extern int __do_clear_user(void *mem, size_t len, void **fault_addr,
+ void **fault_catcher);
+-
+-static inline int __clear_user_tt(void *mem, int len)
+-{
+- return(__do_clear_user(mem, len,
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher));
+-}
+-
+-static inline int clear_user_tt(void *mem, int len)
+-{
+- return(access_ok_tt(VERIFY_WRITE, mem, len) ?
+- __do_clear_user(mem, len,
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher) : len);
+-}
+-
+ extern int __do_strnlen_user(const char *str, unsigned long n,
+ void **fault_addr, void **fault_catcher);
+
+-static inline int strnlen_user_tt(const void *str, int len)
+-{
+- return(__do_strnlen_user(str, len,
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher));
+-}
++extern int copy_from_user_tt(void *to, const void *from, int n);
++extern int copy_to_user_tt(void *to, const void *from, int n);
++extern int strncpy_from_user_tt(char *dst, const char *src, int count);
++extern int __clear_user_tt(void *mem, int len);
++extern int clear_user_tt(void *mem, int len);
++extern int strnlen_user_tt(const void *str, int len);
+
+ #endif
+
+diff -Naur a/arch/um/kernel/tt/Makefile b/arch/um/kernel/tt/Makefile
+--- a/arch/um/kernel/tt/Makefile 2004-02-11 12:15:45.000000000 -0500
++++ b/arch/um/kernel/tt/Makefile 2004-02-11 12:27:36.000000000 -0500
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++# Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com)
+ # Licensed under the GPL
+ #
+
+@@ -7,7 +7,7 @@
+
+ obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \
+ syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \
+- uaccess_user.o sys-$(SUBARCH)/
++ uaccess.o uaccess_user.o sys-$(SUBARCH)/
+
+ obj-$(CONFIG_PT_PROXY) += gdb_kern.o ptproxy/
+
+@@ -27,5 +27,3 @@
+
+ $(obj)/unmap_fin.o : $(src)/unmap.o
+ ld -r -o $@ $< -lc -L/usr/lib
+-
+-clean :
+diff -Naur a/arch/um/kernel/tt/mem_user.c b/arch/um/kernel/tt/mem_user.c
+--- a/arch/um/kernel/tt/mem_user.c 2004-02-11 12:14:34.000000000 -0500
++++ b/arch/um/kernel/tt/mem_user.c 2004-02-11 12:26:17.000000000 -0500
+@@ -25,14 +25,13 @@
+ size = (unsigned long) segment_end -
+ (unsigned long) segment_start;
+ data = create_mem_file(size);
+- if((addr = mmap(NULL, size, PROT_WRITE | PROT_READ,
+- MAP_SHARED, data, 0)) == MAP_FAILED){
++ addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, data, 0);
++ if(addr == MAP_FAILED){
+ perror("mapping new data segment");
+ exit(1);
+ }
+ memcpy(addr, segment_start, size);
+- if(switcheroo(data, prot, addr, segment_start,
+- size) < 0){
++ if(switcheroo(data, prot, addr, segment_start, size) < 0){
+ printf("switcheroo failed\n");
+ exit(1);
+ }
+diff -Naur a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c
+--- a/arch/um/kernel/tt/process_kern.c 2004-02-11 12:16:33.000000000 -0500
++++ b/arch/um/kernel/tt/process_kern.c 2004-02-11 12:28:32.000000000 -0500
+@@ -62,7 +62,7 @@
+ reading = 0;
+ err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
+ if(err != sizeof(c))
+- panic("write of switch_pipe failed, errno = %d", -err);
++ panic("write of switch_pipe failed, err = %d", -err);
+
+ reading = 1;
+ if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD))
+@@ -104,48 +104,72 @@
+
+ void release_thread_tt(struct task_struct *task)
+ {
+- os_kill_process(task->thread.mode.tt.extern_pid, 0);
++ int pid = task->thread.mode.tt.extern_pid;
++
++ if(os_getpid() != pid)
++ os_kill_process(pid, 0);
+ }
+
+ void exit_thread_tt(void)
+ {
+- close(current->thread.mode.tt.switch_pipe[0]);
+- close(current->thread.mode.tt.switch_pipe[1]);
++ os_close_file(current->thread.mode.tt.switch_pipe[0]);
++ os_close_file(current->thread.mode.tt.switch_pipe[1]);
+ }
+
+ void schedule_tail(task_t *prev);
+
+ static void new_thread_handler(int sig)
+ {
++ unsigned long disable;
+ int (*fn)(void *);
+ void *arg;
+
+ fn = current->thread.request.u.thread.proc;
+ arg = current->thread.request.u.thread.arg;
++
+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1);
++ disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) |
++ (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1));
++ SC_SIGMASK(UPT_SC(¤t->thread.regs.regs)) &= ~disable;
++
+ suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
+
+- block_signals();
++ force_flush_all();
++ if(current->thread.prev_sched != NULL)
++ schedule_tail(current->thread.prev_sched);
++ current->thread.prev_sched = NULL;
++
+ init_new_thread_signals(1);
+-#ifdef CONFIG_SMP
+- schedule_tail(current->thread.prev_sched);
+-#endif
+ enable_timer();
+ free_page(current->thread.temp_stack);
+ set_cmdline("(kernel thread)");
+- force_flush_all();
+
+- current->thread.prev_sched = NULL;
+ change_sig(SIGUSR1, 1);
+ change_sig(SIGVTALRM, 1);
+ change_sig(SIGPROF, 1);
+- unblock_signals();
++ local_irq_enable();
+ if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf))
+ do_exit(0);
+ }
+
+ static int new_thread_proc(void *stack)
+ {
++ /* local_irq_disable is needed to block out signals until this thread is
++ * properly scheduled. Otherwise, the tracing thread will get mighty
++ * upset about any signals that arrive before that.
++ * This has the complication that it sets the saved signal mask in
++ * the sigcontext to block signals. This gets restored when this
++ * thread (or a descendant, since they get a copy of this sigcontext)
++ * returns to userspace.
++ * So, this is compensated for elsewhere.
++ * XXX There is still a small window until local_irq_disable() actually
++ * finishes where signals are possible - shouldn't be a problem in
++ * practice since SIGIO hasn't been forwarded here yet, and the
++ * local_irq_disable should finish before a SIGVTALRM has time to be
++ * delivered.
++ */
++
++ local_irq_disable();
+ init_new_thread_stack(stack, new_thread_handler);
+ os_usr1_process(os_getpid());
+ return(0);
+@@ -156,7 +180,7 @@
+ * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off,
+ * so it is blocked before it's called. They are re-enabled on sigreturn
+ * despite the fact that they were blocked when the SIGUSR1 was issued because
+- * copy_thread copies the parent's signcontext, including the signal mask
++ * copy_thread copies the parent's sigcontext, including the signal mask
+ * onto the signal frame.
+ */
+
+@@ -165,35 +189,32 @@
+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1);
+ suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
+
+-#ifdef CONFIG_SMP
+- schedule_tail(NULL);
+-#endif
++ force_flush_all();
++ if(current->thread.prev_sched != NULL)
++ schedule_tail(current->thread.prev_sched);
++ current->thread.prev_sched = NULL;
++
+ enable_timer();
+ change_sig(SIGVTALRM, 1);
+ local_irq_enable();
+- force_flush_all();
+ if(current->mm != current->parent->mm)
+ protect_memory(uml_reserved, high_physmem - uml_reserved, 1,
+ 1, 0, 1);
+- task_protections((unsigned long) current->thread_info);
+-
+- current->thread.prev_sched = NULL;
++ task_protections((unsigned long) current_thread);
+
+ free_page(current->thread.temp_stack);
++ local_irq_disable();
+ change_sig(SIGUSR1, 0);
+ set_user_mode(current);
+ }
+
+-static int sigusr1 = SIGUSR1;
+-
+ int fork_tramp(void *stack)
+ {
+- int sig = sigusr1;
+-
+ local_irq_disable();
++ arch_init_thread();
+ init_new_thread_stack(stack, finish_fork_handler);
+
+- kill(os_getpid(), sig);
++ os_usr1_process(os_getpid());
+ return(0);
+ }
+
+@@ -213,8 +234,8 @@
+ }
+
+ err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1);
+- if(err){
+- printk("copy_thread : pipe failed, errno = %d\n", -err);
++ if(err < 0){
++ printk("copy_thread : pipe failed, err = %d\n", -err);
+ return(err);
+ }
+
+@@ -377,8 +398,8 @@
+
+ pages = (1 << CONFIG_KERNEL_STACK_ORDER);
+
+- start = (unsigned long) current->thread_info + PAGE_SIZE;
+- end = (unsigned long) current + PAGE_SIZE * pages;
++ start = (unsigned long) current_thread + PAGE_SIZE;
++ end = (unsigned long) current_thread + PAGE_SIZE * pages;
+ protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1);
+ protect_memory(end, high_physmem - end, 1, w, 1, 1);
+
+@@ -454,8 +475,9 @@
+
+ init_task.thread.mode.tt.extern_pid = pid;
+ err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1);
+- if(err) panic("Can't create switch pipe for init_task, errno = %d",
+- err);
++ if(err)
++ panic("Can't create switch pipe for init_task, errno = %d",
++ -err);
+ }
+
+ int singlestepping_tt(void *t)
+diff -Naur a/arch/um/kernel/tt/ptproxy/Makefile b/arch/um/kernel/tt/ptproxy/Makefile
+--- a/arch/um/kernel/tt/ptproxy/Makefile 2004-02-11 12:15:11.000000000 -0500
++++ b/arch/um/kernel/tt/ptproxy/Makefile 2004-02-11 12:26:57.000000000 -0500
+@@ -9,5 +9,3 @@
+
+ $(USER_OBJS) : %.o: %.c
+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+-
+-clean:
+diff -Naur a/arch/um/kernel/tt/ptproxy/proxy.c b/arch/um/kernel/tt/ptproxy/proxy.c
+--- a/arch/um/kernel/tt/ptproxy/proxy.c 2004-02-11 12:15:57.000000000 -0500
++++ b/arch/um/kernel/tt/ptproxy/proxy.c 2004-02-11 12:27:50.000000000 -0500
+@@ -15,7 +15,6 @@
+ #include <unistd.h>
+ #include <signal.h>
+ #include <string.h>
+-#include <fcntl.h>
+ #include <termios.h>
+ #include <sys/wait.h>
+ #include <sys/types.h>
+@@ -293,10 +292,10 @@
+ }
+
+ char gdb_init_string[] =
+-"att 1
+-b panic
+-b stop
+-handle SIGWINCH nostop noprint pass
++"att 1 \n\
++b panic \n\
++b stop \n\
++handle SIGWINCH nostop noprint pass \n\
+ ";
+
+ int start_debugger(char *prog, int startup, int stop, int *fd_out)
+@@ -304,7 +303,8 @@
+ int slave, child;
+
+ slave = open_gdb_chan();
+- if((child = fork()) == 0){
++ child = fork();
++ if(child == 0){
+ char *tempname = NULL;
+ int fd;
+
+@@ -327,18 +327,19 @@
+ exit(1);
+ #endif
+ }
+- if((fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0)) < 0){
+- printk("start_debugger : make_tempfile failed, errno = %d\n",
+- errno);
++ fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0);
++ if(fd < 0){
++ printk("start_debugger : make_tempfile failed,"
++ "err = %d\n", -fd);
+ exit(1);
+ }
+- write(fd, gdb_init_string, sizeof(gdb_init_string) - 1);
++ os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1);
+ if(startup){
+ if(stop){
+- write(fd, "b start_kernel\n",
++ os_write_file(fd, "b start_kernel\n",
+ strlen("b start_kernel\n"));
+ }
+- write(fd, "c\n", strlen("c\n"));
++ os_write_file(fd, "c\n", strlen("c\n"));
+ }
+ if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){
+ printk("start_debugger : PTRACE_TRACEME failed, "
+diff -Naur a/arch/um/kernel/tt/ptproxy/sysdep.c b/arch/um/kernel/tt/ptproxy/sysdep.c
+--- a/arch/um/kernel/tt/ptproxy/sysdep.c 2004-02-11 12:16:27.000000000 -0500
++++ b/arch/um/kernel/tt/ptproxy/sysdep.c 2004-02-11 12:28:24.000000000 -0500
+@@ -9,6 +9,7 @@
+ #include <string.h>
+ #include <stdlib.h>
+ #include <signal.h>
++#include <errno.h>
+ #include <sys/types.h>
+ #include <sys/ptrace.h>
+ #include <asm/ptrace.h>
+diff -Naur a/arch/um/kernel/tt/ptproxy/wait.c b/arch/um/kernel/tt/ptproxy/wait.c
+--- a/arch/um/kernel/tt/ptproxy/wait.c 2004-02-11 12:16:37.000000000 -0500
++++ b/arch/um/kernel/tt/ptproxy/wait.c 2004-02-11 12:28:37.000000000 -0500
+@@ -56,21 +56,23 @@
+ int real_wait_return(struct debugger *debugger)
+ {
+ unsigned long ip;
+- int err, pid;
++ int pid;
+
+ pid = debugger->pid;
++
+ ip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0);
+- ip = IP_RESTART_SYSCALL(ip);
+- err = ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip);
++ IP_RESTART_SYSCALL(ip);
++
+ if(ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip) < 0)
+ tracer_panic("real_wait_return : Failed to restart system "
+- "call, errno = %d\n");
++ "call, errno = %d\n", errno);
++
+ if((ptrace(PTRACE_SYSCALL, debugger->pid, 0, SIGCHLD) < 0) ||
+ (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) ||
+ (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) ||
+ debugger_normal_return(debugger, -1))
+ tracer_panic("real_wait_return : gdb failed to wait, "
+- "errno = %d\n");
++ "errno = %d\n", errno);
+ return(0);
+ }
+
+diff -Naur a/arch/um/kernel/tt/syscall_kern.c b/arch/um/kernel/tt/syscall_kern.c
+--- a/arch/um/kernel/tt/syscall_kern.c 2004-02-11 12:14:00.000000000 -0500
++++ b/arch/um/kernel/tt/syscall_kern.c 2004-02-11 12:25:40.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+diff -Naur a/arch/um/kernel/tt/sys-i386/Makefile b/arch/um/kernel/tt/sys-i386/Makefile
+--- a/arch/um/kernel/tt/sys-i386/Makefile 2004-02-11 12:15:17.000000000 -0500
++++ b/arch/um/kernel/tt/sys-i386/Makefile 2004-02-11 12:27:05.000000000 -0500
+@@ -10,5 +10,3 @@
+
+ $(USER_OBJS) : %.o: %.c
+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+-
+-clean :
+diff -Naur a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c
+--- a/arch/um/kernel/tt/tlb.c 2004-02-11 12:14:18.000000000 -0500
++++ b/arch/um/kernel/tt/tlb.c 2004-02-11 12:26:01.000000000 -0500
+@@ -10,6 +10,7 @@
+ #include "asm/page.h"
+ #include "asm/pgtable.h"
+ #include "asm/uaccess.h"
++#include "asm/tlbflush.h"
+ #include "user_util.h"
+ #include "mem_user.h"
+ #include "os.h"
+diff -Naur a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c
+--- a/arch/um/kernel/tt/tracer.c 2004-02-11 12:14:16.000000000 -0500
++++ b/arch/um/kernel/tt/tracer.c 2004-02-11 12:25:59.000000000 -0500
+@@ -39,16 +39,17 @@
+ return(0);
+
+ register_winch_irq(tracer_winch[0], fd, -1, data);
+- return(0);
++ return(1);
+ }
+
+ static void tracer_winch_handler(int sig)
+ {
++ int n;
+ char c = 1;
+
+- if(write(tracer_winch[1], &c, sizeof(c)) != sizeof(c))
+- printk("tracer_winch_handler - write failed, errno = %d\n",
+- errno);
++ n = os_write_file(tracer_winch[1], &c, sizeof(c));
++ if(n != sizeof(c))
++ printk("tracer_winch_handler - write failed, err = %d\n", -n);
+ }
+
+ /* Called only by the tracing thread during initialization */
+@@ -58,9 +59,8 @@
+ int err;
+
+ err = os_pipe(tracer_winch, 1, 1);
+- if(err){
+- printk("setup_tracer_winch : os_pipe failed, errno = %d\n",
+- -err);
++ if(err < 0){
++ printk("setup_tracer_winch : os_pipe failed, err = %d\n", -err);
+ return;
+ }
+ signal(SIGWINCH, tracer_winch_handler);
+@@ -130,8 +130,8 @@
+ case SIGTSTP:
+ if(ptrace(PTRACE_CONT, pid, 0, sig) < 0)
+ tracer_panic("sleeping_process_signal : Failed to "
+- "continue pid %d, errno = %d\n", pid,
+- sig);
++ "continue pid %d, signal = %d, "
++ "errno = %d\n", pid, sig, errno);
+ break;
+
+ /* This happens when the debugger (e.g. strace) is doing system call
+@@ -145,7 +145,7 @@
+ if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0)
+ tracer_panic("sleeping_process_signal : Failed to "
+ "PTRACE_SYSCALL pid %d, errno = %d\n",
+- pid, sig);
++ pid, errno);
+ break;
+ case SIGSTOP:
+ break;
+@@ -218,7 +218,7 @@
+ err = attach(debugger_parent);
+ if(err){
+ printf("Failed to attach debugger parent %d, "
+- "errno = %d\n", debugger_parent, err);
++ "errno = %d\n", debugger_parent, -err);
+ debugger_parent = -1;
+ }
+ else {
+@@ -233,7 +233,8 @@
+ }
+ set_cmdline("(tracing thread)");
+ while(1){
+- if((pid = waitpid(-1, &status, WUNTRACED)) <= 0){
++ pid = waitpid(-1, &status, WUNTRACED);
++ if(pid <= 0){
+ if(errno != ECHILD){
+ printf("wait failed - errno = %d\n", errno);
+ }
+@@ -401,7 +402,7 @@
+
+ if(!strcmp(line, "go")) debug_stop = 0;
+ else if(!strcmp(line, "parent")) debug_parent = 1;
+- else printk("Unknown debug option : '%s'\n", line);
++ else printf("Unknown debug option : '%s'\n", line);
+
+ line = next;
+ }
+diff -Naur a/arch/um/kernel/tt/uaccess.c b/arch/um/kernel/tt/uaccess.c
+--- a/arch/um/kernel/tt/uaccess.c 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/kernel/tt/uaccess.c 2004-02-11 12:25:43.000000000 -0500
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
++ * Licensed under the GPL
++ */
++
++#include "linux/sched.h"
++#include "asm/uaccess.h"
++
++int copy_from_user_tt(void *to, const void *from, int n)
++{
++ if(!access_ok_tt(VERIFY_READ, from, n))
++ return(n);
++
++ return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher));
++}
++
++int copy_to_user_tt(void *to, const void *from, int n)
++{
++ if(!access_ok_tt(VERIFY_WRITE, to, n))
++ return(n);
++
++ return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher));
++}
++
++int strncpy_from_user_tt(char *dst, const char *src, int count)
++{
++ int n;
++
++ if(!access_ok_tt(VERIFY_READ, src, 1))
++ return(-EFAULT);
++
++ n = __do_strncpy_from_user(dst, src, count,
++ ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher);
++ if(n < 0) return(-EFAULT);
++ return(n);
++}
++
++int __clear_user_tt(void *mem, int len)
++{
++ return(__do_clear_user(mem, len,
++ ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher));
++}
++
++int clear_user_tt(void *mem, int len)
++{
++ if(!access_ok_tt(VERIFY_WRITE, mem, len))
++ return(len);
++
++ return(__do_clear_user(mem, len, ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher));
++}
++
++int strnlen_user_tt(const void *str, int len)
++{
++ return(__do_strnlen_user(str, len,
++ ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher));
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/kernel/tt/uaccess_user.c b/arch/um/kernel/tt/uaccess_user.c
+--- a/arch/um/kernel/tt/uaccess_user.c 2004-02-11 12:14:38.000000000 -0500
++++ b/arch/um/kernel/tt/uaccess_user.c 2004-02-11 12:26:42.000000000 -0500
+@@ -8,15 +8,20 @@
+ #include <string.h>
+ #include "user_util.h"
+ #include "uml_uaccess.h"
++#include "task.h"
++#include "kern_util.h"
+
+ int __do_copy_from_user(void *to, const void *from, int n,
+ void **fault_addr, void **fault_catcher)
+ {
++ struct tt_regs save = TASK_REGS(get_current())->tt;
+ unsigned long fault;
+ int faulted;
+
+ fault = __do_user_copy(to, from, n, fault_addr, fault_catcher,
+ __do_copy, &faulted);
++ TASK_REGS(get_current())->tt = save;
++
+ if(!faulted) return(0);
+ else return(n - (fault - (unsigned long) from));
+ }
+@@ -29,11 +34,14 @@
+ int __do_strncpy_from_user(char *dst, const char *src, unsigned long count,
+ void **fault_addr, void **fault_catcher)
+ {
++ struct tt_regs save = TASK_REGS(get_current())->tt;
+ unsigned long fault;
+ int faulted;
+
+ fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher,
+ __do_strncpy, &faulted);
++ TASK_REGS(get_current())->tt = save;
++
+ if(!faulted) return(strlen(dst));
+ else return(-1);
+ }
+@@ -46,11 +54,14 @@
+ int __do_clear_user(void *mem, unsigned long len,
+ void **fault_addr, void **fault_catcher)
+ {
++ struct tt_regs save = TASK_REGS(get_current())->tt;
+ unsigned long fault;
+ int faulted;
+
+ fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher,
+ __do_clear, &faulted);
++ TASK_REGS(get_current())->tt = save;
++
+ if(!faulted) return(0);
+ else return(len - (fault - (unsigned long) mem));
+ }
+@@ -58,19 +69,20 @@
+ int __do_strnlen_user(const char *str, unsigned long n,
+ void **fault_addr, void **fault_catcher)
+ {
++ struct tt_regs save = TASK_REGS(get_current())->tt;
+ int ret;
+ unsigned long *faddrp = (unsigned long *)fault_addr;
+ jmp_buf jbuf;
+
+ *fault_catcher = &jbuf;
+- if(setjmp(jbuf) == 0){
++ if(sigsetjmp(jbuf, 1) == 0)
+ ret = strlen(str) + 1;
+- }
+- else {
+- ret = *faddrp - (unsigned long) str;
+- }
++ else ret = *faddrp - (unsigned long) str;
++
+ *fault_addr = NULL;
+ *fault_catcher = NULL;
++
++ TASK_REGS(get_current())->tt = save;
+ return ret;
+ }
+
+diff -Naur a/arch/um/kernel/tt/unmap.c b/arch/um/kernel/tt/unmap.c
+--- a/arch/um/kernel/tt/unmap.c 2004-02-11 12:16:26.000000000 -0500
++++ b/arch/um/kernel/tt/unmap.c 2004-02-11 12:28:22.000000000 -0500
+@@ -3,10 +3,7 @@
+ * Licensed under the GPL
+ */
+
+-#include <stdio.h>
+-#include <errno.h>
+ #include <sys/mman.h>
+-#include "user.h"
+
+ int switcheroo(int fd, int prot, void *from, void *to, int size)
+ {
+diff -Naur a/arch/um/kernel/tty_log.c b/arch/um/kernel/tty_log.c
+--- a/arch/um/kernel/tty_log.c 2004-02-11 12:15:58.000000000 -0500
++++ b/arch/um/kernel/tty_log.c 2004-02-11 12:27:51.000000000 -0500
+@@ -9,10 +9,10 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <unistd.h>
+-#include <fcntl.h>
+ #include <sys/time.h>
+ #include "init.h"
+ #include "user.h"
++#include "kern_util.h"
+ #include "os.h"
+
+ #define TTY_LOG_DIR "./"
+@@ -24,29 +24,40 @@
+ #define TTY_LOG_OPEN 1
+ #define TTY_LOG_CLOSE 2
+ #define TTY_LOG_WRITE 3
++#define TTY_LOG_EXEC 4
++
++#define TTY_READ 1
++#define TTY_WRITE 2
+
+ struct tty_log_buf {
+ int what;
+ unsigned long tty;
+ int len;
++ int direction;
++ unsigned long sec;
++ unsigned long usec;
+ };
+
+-int open_tty_log(void *tty)
++int open_tty_log(void *tty, void *current_tty)
+ {
+ struct timeval tv;
+ struct tty_log_buf data;
+ char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")];
+ int fd;
+
++ gettimeofday(&tv, NULL);
+ if(tty_log_fd != -1){
+- data = ((struct tty_log_buf) { what : TTY_LOG_OPEN,
+- tty : (unsigned long) tty,
+- len : 0 });
+- write(tty_log_fd, &data, sizeof(data));
++ data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN,
++ .tty = (unsigned long) tty,
++ .len = sizeof(current_tty),
++ .direction = 0,
++ .sec = tv.tv_sec,
++ .usec = tv.tv_usec } );
++ os_write_file(tty_log_fd, &data, sizeof(data));
++ os_write_file(tty_log_fd, ¤t_tty, data.len);
+ return(tty_log_fd);
+ }
+
+- gettimeofday(&tv, NULL);
+ sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec,
+ (unsigned int) tv.tv_usec);
+
+@@ -62,30 +73,117 @@
+ void close_tty_log(int fd, void *tty)
+ {
+ struct tty_log_buf data;
++ struct timeval tv;
+
+ if(tty_log_fd != -1){
+- data = ((struct tty_log_buf) { what : TTY_LOG_CLOSE,
+- tty : (unsigned long) tty,
+- len : 0 });
+- write(tty_log_fd, &data, sizeof(data));
++ gettimeofday(&tv, NULL);
++ data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE,
++ .tty = (unsigned long) tty,
++ .len = 0,
++ .direction = 0,
++ .sec = tv.tv_sec,
++ .usec = tv.tv_usec } );
++ os_write_file(tty_log_fd, &data, sizeof(data));
+ return;
+ }
+- close(fd);
++ os_close_file(fd);
+ }
+
+-int write_tty_log(int fd, char *buf, int len, void *tty)
++static int log_chunk(int fd, const char *buf, int len)
+ {
++ int total = 0, try, missed, n;
++ char chunk[64];
++
++ while(len > 0){
++ try = (len > sizeof(chunk)) ? sizeof(chunk) : len;
++ missed = copy_from_user_proc(chunk, (char *) buf, try);
++ try -= missed;
++ n = os_write_file(fd, chunk, try);
++ if(n != try) {
++ if(n < 0)
++ return(n);
++ return(-EIO);
++ }
++ if(missed != 0)
++ return(-EFAULT);
++
++ len -= try;
++ total += try;
++ buf += try;
++ }
++
++ return(total);
++}
++
++int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read)
++{
++ struct timeval tv;
+ struct tty_log_buf data;
++ int direction;
+
+ if(fd == tty_log_fd){
+- data = ((struct tty_log_buf) { what : TTY_LOG_WRITE,
+- tty : (unsigned long) tty,
+- len : len });
+- write(tty_log_fd, &data, sizeof(data));
++ gettimeofday(&tv, NULL);
++ direction = is_read ? TTY_READ : TTY_WRITE;
++ data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE,
++ .tty = (unsigned long) tty,
++ .len = len,
++ .direction = direction,
++ .sec = tv.tv_sec,
++ .usec = tv.tv_usec } );
++ os_write_file(tty_log_fd, &data, sizeof(data));
+ }
+- return(write(fd, buf, len));
++
++ return(log_chunk(fd, buf, len));
+ }
+
++void log_exec(char **argv, void *tty)
++{
++ struct timeval tv;
++ struct tty_log_buf data;
++ char **ptr,*arg;
++ int len;
++
++ if(tty_log_fd == -1) return;
++
++ gettimeofday(&tv, NULL);
++
++ len = 0;
++ for(ptr = argv; ; ptr++){
++ if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
++ return;
++ if(arg == NULL) break;
++ len += strlen_user_proc(arg);
++ }
++
++ data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC,
++ .tty = (unsigned long) tty,
++ .len = len,
++ .direction = 0,
++ .sec = tv.tv_sec,
++ .usec = tv.tv_usec } );
++ os_write_file(tty_log_fd, &data, sizeof(data));
++
++ for(ptr = argv; ; ptr++){
++ if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
++ return;
++ if(arg == NULL) break;
++ log_chunk(tty_log_fd, arg, strlen_user_proc(arg));
++ }
++}
++
++extern void register_tty_logger(int (*opener)(void *, void *),
++ int (*writer)(int, const char *, int,
++ void *, int),
++ void (*closer)(int, void *));
++
++static int register_logger(void)
++{
++ register_tty_logger(open_tty_log, write_tty_log, close_tty_log);
++ return(0);
++}
++
++__uml_initcall(register_logger);
++
+ static int __init set_tty_log_dir(char *name, int *add)
+ {
+ tty_log_dir = name;
+@@ -104,7 +202,7 @@
+
+ tty_log_fd = strtoul(name, &end, 0);
+ if((*end != '\0') || (end == name)){
+- printk("set_tty_log_fd - strtoul failed on '%s'\n", name);
++ printf("set_tty_log_fd - strtoul failed on '%s'\n", name);
+ tty_log_fd = -1;
+ }
+ return 0;
+diff -Naur a/arch/um/kernel/uaccess_user.c b/arch/um/kernel/uaccess_user.c
+--- a/arch/um/kernel/uaccess_user.c 2004-02-11 12:16:09.000000000 -0500
++++ b/arch/um/kernel/uaccess_user.c 2004-02-11 12:28:20.000000000 -0500
+@@ -20,7 +20,7 @@
+
+ jmp_buf jbuf;
+ *fault_catcher = &jbuf;
+- if(setjmp(jbuf) == 0){
++ if(sigsetjmp(jbuf, 1) == 0){
+ (*op)(to, from, n);
+ ret = 0;
+ *faulted_out = 0;
+diff -Naur a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
+--- a/arch/um/kernel/um_arch.c 2004-02-11 12:16:29.000000000 -0500
++++ b/arch/um/kernel/um_arch.c 2004-02-11 12:28:28.000000000 -0500
+@@ -38,13 +38,18 @@
+ #include "mode_kern.h"
+ #include "mode.h"
+
+-#define DEFAULT_COMMAND_LINE "root=6200"
++#define DEFAULT_COMMAND_LINE "root=98:0"
+
+ struct cpuinfo_um boot_cpu_data = {
+ .loops_per_jiffy = 0,
+ .ipi_pipe = { -1, -1 }
+ };
+
++/* Placeholder to make UML link until the vsyscall stuff is actually
++ * implemented
++ */
++void *__kernel_vsyscall;
++
+ unsigned long thread_saved_pc(struct task_struct *task)
+ {
+ return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas,
+@@ -53,18 +58,22 @@
+
+ static int show_cpuinfo(struct seq_file *m, void *v)
+ {
+- int index;
++ int index = 0;
+
+- index = (struct cpuinfo_um *)v - cpu_data;
+ #ifdef CONFIG_SMP
++ index = (struct cpuinfo_um *) v - cpu_data;
+ if (!cpu_online(index))
+ return 0;
+ #endif
+
+- seq_printf(m, "bogomips\t: %lu.%02lu\n",
++ seq_printf(m, "processor\t: %d\n", index);
++ seq_printf(m, "vendor_id\t: User Mode Linux\n");
++ seq_printf(m, "model name\t: UML\n");
++ seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas"));
++ seq_printf(m, "host\t\t: %s\n", host_info);
++ seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+ loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
+- seq_printf(m, "host\t\t: %s\n", host_info);
+
+ return(0);
+ }
+@@ -134,12 +143,12 @@
+ if(umid != NULL){
+ snprintf(argv1_begin,
+ (argv1_end - argv1_begin) * sizeof(*ptr),
+- "(%s)", umid);
++ "(%s) ", umid);
+ ptr = &argv1_begin[strlen(argv1_begin)];
+ }
+ else ptr = argv1_begin;
+
+- snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), " [%s]", cmd);
++ snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd);
+ memset(argv1_begin + strlen(argv1_begin), '\0',
+ argv1_end - argv1_begin - strlen(argv1_begin));
+ #endif
+@@ -179,7 +188,7 @@
+ static int __init uml_ncpus_setup(char *line, int *add)
+ {
+ if (!sscanf(line, "%d", &ncpus)) {
+- printk("Couldn't parse [%s]\n", line);
++ printf("Couldn't parse [%s]\n", line);
+ return -1;
+ }
+
+@@ -210,7 +219,7 @@
+
+ static int __init mode_tt_setup(char *line, int *add)
+ {
+- printk("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n");
++ printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n");
+ return(0);
+ }
+
+@@ -221,7 +230,7 @@
+
+ static int __init mode_tt_setup(char *line, int *add)
+ {
+- printk("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n");
++ printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n");
+ return(0);
+ }
+
+@@ -291,7 +300,7 @@
+
+ /* Set during early boot */
+ unsigned long brk_start;
+-static struct vm_reserved kernel_vm_reserved;
++unsigned long end_iomem;
+
+ #define MIN_VMALLOC (32 * 1024 * 1024)
+
+@@ -299,7 +308,7 @@
+ {
+ unsigned long avail;
+ unsigned long virtmem_size, max_physmem;
+- unsigned int i, add, err;
++ unsigned int i, add;
+
+ for (i = 1; i < argc; i++){
+ if((i == 1) && (argv[i][0] == ' ')) continue;
+@@ -328,12 +337,16 @@
+ argv1_end = &argv[1][strlen(argv[1])];
+ #endif
+
+- set_usable_vm(uml_physmem, get_kmem_end());
+-
+ highmem = 0;
+- max_physmem = get_kmem_end() - uml_physmem - MIN_VMALLOC;
+- if(physmem_size > max_physmem){
+- highmem = physmem_size - max_physmem;
++ iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK;
++ max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC;
++
++ /* Zones have to begin on a 1 << MAX_ORDER page boundary,
++ * so this makes sure that's true for highmem
++ */
++ max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1);
++ if(physmem_size + iomem_size > max_physmem){
++ highmem = physmem_size + iomem_size - max_physmem;
+ physmem_size -= highmem;
+ #ifndef CONFIG_HIGHMEM
+ highmem = 0;
+@@ -343,11 +356,19 @@
+ }
+
+ high_physmem = uml_physmem + physmem_size;
+- high_memory = (void *) high_physmem;
++ end_iomem = high_physmem + iomem_size;
++ high_memory = (void *) end_iomem;
+
+ start_vm = VMALLOC_START;
+
+- setup_physmem(uml_physmem, uml_reserved, physmem_size);
++ setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem);
++ if(init_maps(physmem_size, iomem_size, highmem)){
++ printf("Failed to allocate mem_map for %ld bytes of physical "
++ "memory and %ld bytes of highmem\n", physmem_size,
++ highmem);
++ exit(1);
++ }
++
+ virtmem_size = physmem_size;
+ avail = get_kmem_end() - start_vm;
+ if(physmem_size > avail) virtmem_size = avail;
+@@ -357,18 +378,13 @@
+ printf("Kernel virtual memory size shrunk to %ld bytes\n",
+ virtmem_size);
+
+- err = reserve_vm(high_physmem, end_vm, &kernel_vm_reserved);
+- if(err){
+- printf("Failed to reserve VM area for kernel VM\n");
+- exit(1);
+- }
+-
+ uml_postsetup();
+
+ init_task.thread.kernel_stack = (unsigned long) &init_thread_info +
+ 2 * PAGE_SIZE;
+
+ task_protections((unsigned long) &init_thread_info);
++ os_flush_stdout();
+
+ return(CHOOSE_MODE(start_uml_tt(), start_uml_skas()));
+ }
+@@ -377,7 +393,7 @@
+ void *unused2)
+ {
+ #ifdef CONFIG_MAGIC_SYSRQ
+- handle_sysrq('p', ¤t->thread.regs, NULL, NULL);
++ handle_sysrq('p', ¤t->thread.regs, NULL);
+ #endif
+ machine_halt();
+ return(0);
+@@ -403,6 +419,11 @@
+ arch_check_bugs();
+ check_ptrace();
+ check_sigio();
++ check_devanon();
++}
++
++void apply_alternatives(void *start, void *end)
++{
+ }
+
+ /*
+diff -Naur a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
+--- a/arch/um/kernel/umid.c 2004-02-11 12:16:43.000000000 -0500
++++ b/arch/um/kernel/umid.c 2004-02-11 12:28:42.000000000 -0500
+@@ -5,7 +5,6 @@
+
+ #include <stdio.h>
+ #include <unistd.h>
+-#include <fcntl.h>
+ #include <errno.h>
+ #include <string.h>
+ #include <stdlib.h>
+@@ -33,18 +32,19 @@
+ static int umid_is_random = 1;
+ static int umid_inited = 0;
+
+-static int make_umid(void);
++static int make_umid(int (*printer)(const char *fmt, ...));
+
+-static int __init set_umid(char *name, int is_random)
++static int __init set_umid(char *name, int is_random,
++ int (*printer)(const char *fmt, ...))
+ {
+ if(umid_inited){
+- printk("Unique machine name can't be set twice\n");
++ (*printer)("Unique machine name can't be set twice\n");
+ return(-1);
+ }
+
+ if(strlen(name) > UMID_LEN - 1)
+- printk("Unique machine name is being truncated to %s "
+- "characters\n", UMID_LEN);
++ (*printer)("Unique machine name is being truncated to %s "
++ "characters\n", UMID_LEN);
+ strlcpy(umid, name, sizeof(umid));
+
+ umid_is_random = is_random;
+@@ -54,7 +54,7 @@
+
+ static int __init set_umid_arg(char *name, int *add)
+ {
+- return(set_umid(name, 0));
++ return(set_umid(name, 0, printf));
+ }
+
+ __uml_setup("umid=", set_umid_arg,
+@@ -67,7 +67,7 @@
+ {
+ int n;
+
+- if(!umid_inited && make_umid()) return(-1);
++ if(!umid_inited && make_umid(printk)) return(-1);
+
+ n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1;
+ if(n > len){
+@@ -85,22 +85,23 @@
+ {
+ char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")];
+ char pid[sizeof("nnnnn\0")];
+- int fd;
++ int fd, n;
+
+ if(umid_file_name("pid", file, sizeof(file))) return 0;
+
+ fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))),
+ 0644);
+ if(fd < 0){
+- printk("Open of machine pid file \"%s\" failed - "
+- "errno = %d\n", file, -fd);
++ printf("Open of machine pid file \"%s\" failed - "
++ "err = %d\n", file, -fd);
+ return 0;
+ }
+
+ sprintf(pid, "%d\n", os_getpid());
+- if(write(fd, pid, strlen(pid)) != strlen(pid))
+- printk("Write of pid file failed - errno = %d\n", errno);
+- close(fd);
++ n = os_write_file(fd, pid, strlen(pid));
++ if(n != strlen(pid))
++ printf("Write of pid file failed - err = %d\n", -n);
++ os_close_file(fd);
+ return 0;
+ }
+
+@@ -111,7 +112,8 @@
+ int len;
+ char file[256];
+
+- if((directory = opendir(dir)) == NULL){
++ directory = opendir(dir);
++ if(directory == NULL){
+ printk("actually_do_remove : couldn't open directory '%s', "
+ "errno = %d\n", dir, errno);
+ return(1);
+@@ -160,22 +162,24 @@
+ {
+ char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")];
+ char pid[sizeof("nnnnn\0")], *end;
+- int dead, fd, p;
++ int dead, fd, p, n;
+
+ sprintf(file, "%s/pid", dir);
+ dead = 0;
+- if((fd = os_open_file(file, of_read(OPENFLAGS()), 0)) < 0){
++ fd = os_open_file(file, of_read(OPENFLAGS()), 0);
++ if(fd < 0){
+ if(fd != -ENOENT){
+ printk("not_dead_yet : couldn't open pid file '%s', "
+- "errno = %d\n", file, -fd);
++ "err = %d\n", file, -fd);
+ return(1);
+ }
+ dead = 1;
+ }
+ if(fd > 0){
+- if(read(fd, pid, sizeof(pid)) < 0){
++ n = os_read_file(fd, pid, sizeof(pid));
++ if(n < 0){
+ printk("not_dead_yet : couldn't read pid file '%s', "
+- "errno = %d\n", file, errno);
++ "err = %d\n", file, -n);
+ return(1);
+ }
+ p = strtoul(pid, &end, 0);
+@@ -197,7 +201,7 @@
+ if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){
+ uml_dir = malloc(strlen(name) + 1);
+ if(uml_dir == NULL){
+- printk("Failed to malloc uml_dir - error = %d\n",
++ printf("Failed to malloc uml_dir - error = %d\n",
+ errno);
+ uml_dir = name;
+ return(0);
+@@ -217,7 +221,7 @@
+ char *home = getenv("HOME");
+
+ if(home == NULL){
+- printk("make_uml_dir : no value in environment for "
++ printf("make_uml_dir : no value in environment for "
+ "$HOME\n");
+ exit(1);
+ }
+@@ -232,57 +236,59 @@
+ dir[len + 1] = '\0';
+ }
+
+- if((uml_dir = malloc(strlen(dir) + 1)) == NULL){
++ uml_dir = malloc(strlen(dir) + 1);
++ if(uml_dir == NULL){
+ printf("make_uml_dir : malloc failed, errno = %d\n", errno);
+ exit(1);
+ }
+ strcpy(uml_dir, dir);
+
+ if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){
+- printk("Failed to mkdir %s - errno = %i\n", uml_dir, errno);
++ printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno);
+ return(-1);
+ }
+ return 0;
+ }
+
+-static int __init make_umid(void)
++static int __init make_umid(int (*printer)(const char *fmt, ...))
+ {
+ int fd, err;
+ char tmp[strlen(uml_dir) + UMID_LEN + 1];
+
+ strlcpy(tmp, uml_dir, sizeof(tmp));
+
+- if(*umid == 0){
++ if(!umid_inited){
+ strcat(tmp, "XXXXXX");
+ fd = mkstemp(tmp);
+ if(fd < 0){
+- printk("make_umid - mkstemp failed, errno = %d\n",
+- errno);
++ (*printer)("make_umid - mkstemp failed, errno = %d\n",
++ errno);
+ return(1);
+ }
+
+- close(fd);
++ os_close_file(fd);
+ /* There's a nice tiny little race between this unlink and
+ * the mkdir below. It'd be nice if there were a mkstemp
+ * for directories.
+ */
+ unlink(tmp);
+- set_umid(&tmp[strlen(uml_dir)], 1);
++ set_umid(&tmp[strlen(uml_dir)], 1, printer);
+ }
+
+ sprintf(tmp, "%s%s", uml_dir, umid);
+
+- if((err = mkdir(tmp, 0777)) < 0){
++ err = mkdir(tmp, 0777);
++ if(err < 0){
+ if(errno == EEXIST){
+ if(not_dead_yet(tmp)){
+- printk("umid '%s' is in use\n", umid);
++ (*printer)("umid '%s' is in use\n", umid);
+ return(-1);
+ }
+ err = mkdir(tmp, 0777);
+ }
+ }
+ if(err < 0){
+- printk("Failed to create %s - errno = %d\n", umid, errno);
++ (*printer)("Failed to create %s - errno = %d\n", umid, errno);
+ return(-1);
+ }
+
+@@ -295,7 +301,13 @@
+ );
+
+ __uml_postsetup(make_uml_dir);
+-__uml_postsetup(make_umid);
++
++static int __init make_umid_setup(void)
++{
++ return(make_umid(printf));
++}
++
++__uml_postsetup(make_umid_setup);
+ __uml_postsetup(create_pid_file);
+
+ /*
+diff -Naur a/arch/um/kernel/user_syms.c b/arch/um/kernel/user_syms.c
+--- a/arch/um/kernel/user_syms.c 2004-02-11 12:16:42.000000000 -0500
++++ b/arch/um/kernel/user_syms.c 2004-02-11 12:28:41.000000000 -0500
+@@ -1,7 +1,7 @@
+ #include <stdio.h>
+ #include <unistd.h>
+-#include <fcntl.h>
+ #include <dirent.h>
++#include <fcntl.h>
+ #include <errno.h>
+ #include <utime.h>
+ #include <string.h>
+@@ -16,46 +16,61 @@
+ * since this includes various user-level headers.
+ */
+
+-struct module_symbol
++/* Had to update this: this changed in late 2.5 to add CRC and other beasts
++ * and was never updated here- 13 Dec 2003-Blaisorblade
++ */
++
++/* v850 toolchain uses a `_' prefix for all user symbols */
++#ifndef MODULE_SYMBOL_PREFIX
++#define MODULE_SYMBOL_PREFIX ""
++#endif
++
++struct kernel_symbol
+ {
+ unsigned long value;
+ const char *name;
+ };
+
+-/* Indirect stringification. */
+-
+-#define __MODULE_STRING_1(x) #x
+-#define __MODULE_STRING(x) __MODULE_STRING_1(x)
+-
+-#if !defined(__AUTOCONF_INCLUDED__)
+-
+-#define __EXPORT_SYMBOL(sym,str) error config_must_be_included_before_module
+-#define EXPORT_SYMBOL(var) error config_must_be_included_before_module
+-#define EXPORT_SYMBOL_NOVERS(var) error config_must_be_included_before_module
+-
+-#elif !defined(UML_CONFIG_MODULES)
++#if !defined(UML_CONFIG_MODULES)
++#define EXPORT_SYMBOL(sym)
++#define EXPORT_SYMBOL_GPL(sym)
++#define EXPORT_SYMBOL_NOVERS(sym)
++
++#else /*UML_CONFIG_MODULES*/
++#ifndef __GENKSYMS__
++#ifdef UML_CONFIG_MODVERSIONS
++/* Mark the CRC weak since genksyms apparently decides not to
++ * generate a checksums for some symbols */
++#define __CRC_SYMBOL(sym, sec) \
++ extern void *__crc_##sym __attribute__((weak)); \
++ static const unsigned long __kcrctab_##sym \
++ __attribute__((section("__kcrctab" sec), unused)) \
++ = (unsigned long) &__crc_##sym;
++#else
++#define __CRC_SYMBOL(sym, sec)
++#endif
+
+-#define __EXPORT_SYMBOL(sym,str)
+-#define EXPORT_SYMBOL(var)
+-#define EXPORT_SYMBOL_NOVERS(var)
++/* For every exported symbol, place a struct in the __ksymtab section */
++#define __EXPORT_SYMBOL(sym, sec) \
++ __CRC_SYMBOL(sym, sec) \
++ static const char __kstrtab_##sym[] \
++ __attribute__((section("__ksymtab_strings"))) \
++ = MODULE_SYMBOL_PREFIX #sym; \
++ static const struct kernel_symbol __ksymtab_##sym \
++ __attribute__((section("__ksymtab" sec), unused)) \
++ = { (unsigned long)&sym, __kstrtab_##sym }
+
+-#else
++#define EXPORT_SYMBOL(sym) \
++ __EXPORT_SYMBOL(sym, "")
+
+-#define __EXPORT_SYMBOL(sym, str) \
+-const char __kstrtab_##sym[] \
+-__attribute__((section(".kstrtab"))) = str; \
+-const struct module_symbol __ksymtab_##sym \
+-__attribute__((section("__ksymtab"))) = \
+-{ (unsigned long)&sym, __kstrtab_##sym }
++#define EXPORT_SYMBOL_GPL(sym) \
++ __EXPORT_SYMBOL(sym, "_gpl")
+
+-#if defined(__MODVERSIONS__) || !defined(UML_CONFIG_MODVERSIONS)
+-#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var))
+-#else
+-#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var)))
+ #endif
+
+-#define EXPORT_SYMBOL_NOVERS(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var))
+-
++/* We don't mangle the actual symbol anymore, so no need for
++ * special casing EXPORT_SYMBOL_NOVERS. FIXME: Deprecated */
++#define EXPORT_SYMBOL_NOVERS(sym) EXPORT_SYMBOL(sym)
+ #endif
+
+ EXPORT_SYMBOL(__errno_location);
+@@ -109,5 +124,18 @@
+
+ EXPORT_SYMBOL(memset);
+ EXPORT_SYMBOL(strstr);
++EXPORT_SYMBOL(printf);
++EXPORT_SYMBOL(strlen);
+
+ EXPORT_SYMBOL(find_iomem);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/kernel/user_util.c b/arch/um/kernel/user_util.c
+--- a/arch/um/kernel/user_util.c 2004-02-11 12:14:27.000000000 -0500
++++ b/arch/um/kernel/user_util.c 2004-02-11 12:26:09.000000000 -0500
+@@ -5,7 +5,6 @@
+
+ #include <stdio.h>
+ #include <stdlib.h>
+-#include <fcntl.h>
+ #include <unistd.h>
+ #include <limits.h>
+ #include <sys/mman.h>
+@@ -82,7 +81,8 @@
+ int status, ret;
+
+ while(1){
+- if(((ret = waitpid(pid, &status, WUNTRACED)) < 0) ||
++ ret = waitpid(pid, &status, WUNTRACED);
++ if((ret < 0) ||
+ !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){
+ if(ret < 0){
+ if(errno == EINTR) continue;
+@@ -119,17 +119,6 @@
+ }
+ }
+
+-int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags)
+-{
+- int pid;
+-
+- pid = clone(fn, sp, flags, arg);
+- if(pid < 0) return(-1);
+- wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL);
+- ptrace(PTRACE_CONT, pid, 0, 0);
+- return(pid);
+-}
+-
+ int raw(int fd, int complain)
+ {
+ struct termios tt;
+diff -Naur a/arch/um/main.c b/arch/um/main.c
+--- a/arch/um/main.c 2004-02-11 12:16:28.000000000 -0500
++++ b/arch/um/main.c 2004-02-11 12:28:27.000000000 -0500
+@@ -8,6 +8,7 @@
+ #include <stdlib.h>
+ #include <string.h>
+ #include <signal.h>
++#include <errno.h>
+ #include <sys/resource.h>
+ #include <sys/mman.h>
+ #include <sys/user.h>
+@@ -123,12 +124,14 @@
+
+ set_stklim();
+
+- if((new_argv = malloc((argc + 1) * sizeof(char *))) == NULL){
++ new_argv = malloc((argc + 1) * sizeof(char *));
++ if(new_argv == NULL){
+ perror("Mallocing argv");
+ exit(1);
+ }
+ for(i=0;i<argc;i++){
+- if((new_argv[i] = strdup(argv[i])) == NULL){
++ new_argv[i] = strdup(argv[i]);
++ if(new_argv[i] == NULL){
+ perror("Mallocing an arg");
+ exit(1);
+ }
+diff -Naur a/arch/um/Makefile b/arch/um/Makefile
+--- a/arch/um/Makefile 2004-02-11 12:15:58.000000000 -0500
++++ b/arch/um/Makefile 2004-02-11 12:27:51.000000000 -0500
+@@ -22,17 +22,21 @@
+ $(ARCH_DIR)/sys-$(SUBARCH)/
+
+ # Have to precede the include because the included Makefiles reference them.
+-SYMLINK_HEADERS = include/asm-um/archparam.h include/asm-um/system.h \
+- include/asm-um/sigcontext.h include/asm-um/processor.h \
+- include/asm-um/ptrace.h include/asm-um/arch-signal.h
++SYMLINK_HEADERS = archparam.h system.h sigcontext.h processor.h ptrace.h \
++ arch-signal.h module.h
++SYMLINK_HEADERS := $(foreach header,$(SYMLINK_HEADERS),include/asm-um/$(header))
+
+ ARCH_SYMLINKS = include/asm-um/arch $(ARCH_DIR)/include/sysdep $(ARCH_DIR)/os \
+ $(SYMLINK_HEADERS) $(ARCH_DIR)/include/uml-config.h
+
+ GEN_HEADERS += $(ARCH_DIR)/include/task.h $(ARCH_DIR)/include/kern_constants.h
+
+-include $(ARCH_DIR)/Makefile-$(SUBARCH)
+-include $(ARCH_DIR)/Makefile-os-$(OS)
++# This target adds dependencies to "prepare". They are defined in the included
++# Makefiles (see Makefile-i386).
++
++.PHONY: sys_prepare
++sys_prepare:
++ @:
+
+ MAKEFILE-$(CONFIG_MODE_TT) += Makefile-tt
+ MAKEFILE-$(CONFIG_MODE_SKAS) += Makefile-skas
+@@ -41,6 +45,9 @@
+ include $(addprefix $(ARCH_DIR)/,$(MAKEFILE-y))
+ endif
+
++include $(ARCH_DIR)/Makefile-$(SUBARCH)
++include $(ARCH_DIR)/Makefile-os-$(OS)
++
+ EXTRAVERSION := $(EXTRAVERSION)-1um
+
+ ARCH_INCLUDE = -I$(ARCH_DIR)/include
+@@ -52,14 +59,20 @@
+
+ CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
+ -D_LARGEFILE64_SOURCE $(ARCH_INCLUDE) -Derrno=kernel_errno \
+- $(MODE_INCLUDE)
++ -Dsigprocmask=kernel_sigprocmask $(MODE_INCLUDE)
+
+ LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
+
++# These are needed for clean and mrproper, since in that case .config is not
++# included; the values here are meaningless
++
++CONFIG_NEST_LEVEL ?= 0
++CONFIG_KERNEL_HALF_GIGS ?= 0
++
+ SIZE = (($(CONFIG_NEST_LEVEL) + $(CONFIG_KERNEL_HALF_GIGS)) * 0x20000000)
+
+ ifeq ($(CONFIG_MODE_SKAS), y)
+-$(SYS_HEADERS) : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h
++$(SYS_HEADERS) : $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h
+ endif
+
+ include/linux/version.h: arch/$(ARCH)/Makefile
+@@ -98,17 +111,17 @@
+ CONFIG_KERNEL_STACK_ORDER ?= 2
+ STACK_SIZE := $(shell echo $$[ 4096 * (1 << $(CONFIG_KERNEL_STACK_ORDER)) ] )
+
+-AFLAGS_vmlinux.lds.o = -U$(SUBARCH) \
++AFLAGS_vmlinux.lds.o = $(shell echo -U$(SUBARCH) \
+ -DSTART=$$(($(TOP_ADDR) - $(SIZE))) -DELF_ARCH=$(ELF_ARCH) \
+ -DELF_FORMAT=\"$(ELF_FORMAT)\" $(CPP_MODE_TT) \
+- -DKERNEL_STACK_SIZE=$(STACK_SIZE)
++ -DKERNEL_STACK_SIZE=$(STACK_SIZE))
+
+-AFLAGS_$(LD_SCRIPT-y:.s=).o = $(AFLAGS_vmlinux.lds.o) -P -C -Uum
++export AFLAGS_$(LD_SCRIPT-y:.s=).o = $(AFLAGS_vmlinux.lds.o) -P -C -Uum
+
+ LD_SCRIPT-y := $(ARCH_DIR)/$(LD_SCRIPT-y)
+
+-$(LD_SCRIPT-y) : $(LD_SCRIPT-y:.s=.S) scripts FORCE
+- $(call if_changed_dep,as_s_S)
++#$(LD_SCRIPT-y) : $(LD_SCRIPT-y:.s=.S) scripts FORCE
++# $(call if_changed_dep,as_s_S)
+
+ linux: vmlinux $(LD_SCRIPT-y)
+ $(CC) -Wl,-T,$(LD_SCRIPT-y) $(LINK-y) $(LINK_WRAPS) \
+@@ -116,37 +129,47 @@
+
+ USER_CFLAGS := $(patsubst -I%,,$(CFLAGS))
+ USER_CFLAGS := $(patsubst -Derrno=kernel_errno,,$(USER_CFLAGS))
++USER_CFLAGS := $(patsubst -Dsigprocmask=kernel_sigprocmask,,$(USER_CFLAGS))
+ USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \
+ $(MODE_INCLUDE)
+
+ # To get a definition of F_SETSIG
+ USER_CFLAGS += -D_GNU_SOURCE
+
++# From main Makefile, these options are set after including the ARCH makefile.
++# So copy them here.
++
++ifdef CONFIG_CC_OPTIMIZE_FOR_SIZE
++USER_CFLAGS += -Os
++else
++USER_CFLAGS += -O2
++endif
++
++ifndef CONFIG_FRAME_POINTER
++USER_CFLAGS += -fomit-frame-pointer
++endif
++
++ifdef CONFIG_DEBUG_INFO
++USER_CFLAGS += -g
++endif
++
+ CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/uml.lds.s \
+- $(ARCH_DIR)/dyn_link.ld.s $(GEN_HEADERS)
++ $(ARCH_DIR)/dyn_link.ld.s $(ARCH_DIR)/include/uml-config.h \
++ $(GEN_HEADERS)
+
+-$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c
+- $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $<
++MRPROPER_FILES += $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) \
++ $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS))
++
++$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c sys_prepare
++ @ echo ' MAIN $@'
++ @ $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $<
+
+ archmrproper:
+- for d in $(ARCH_SUBDIRS) $(ARCH_DIR)/util; \
+- do \
+- $(MAKE) -C $$d archmrproper; \
+- done
+- rm -f $(CLEAN_FILES) $(SYMLINK_HEADERS) $(ARCH_SYMLINKS) include/asm \
+- $(addprefix $(ARCH_DIR)/kernel/,$(KERN_SYMLINKS))
+-
+-archclean: sysclean
+- for d in $(ARCH_SUBDIRS) $(ARCH_DIR)/util; \
+- do \
+- $(MAKE) -C $$d clean; \
+- done
+- find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
+- -o -name '*.gcov' \) -type f -print | xargs rm -f
+- rm -f linux x.i gmon.out $(ARCH_DIR)/link.ld $(GEN_HEADERS)
++ @:
+
+-archdep:
+- for d in $(ARCH_SUBDIRS); do $(MAKE) -C $$d fastdep; done
++archclean:
++ @find . \( -name '*.bb' -o -name '*.bbg' -o -name '*.da' \
++ -o -name '*.gcov' \) -type f -print | xargs rm -f
+
+ $(SYMLINK_HEADERS):
+ cd $(TOPDIR)/$(dir $@) ; \
+@@ -161,19 +184,26 @@
+ $(ARCH_DIR)/os:
+ cd $(ARCH_DIR) && ln -sf os-$(OS) os
+
+-$(ARCH_DIR)/include/uml-config.h :
+- sed 's/ CONFIG/ UML_CONFIG/' $(TOPDIR)/include/linux/autoconf.h > $@
++# Generated files
++define filechk_umlconfig
++ sed 's/ CONFIG/ UML_CONFIG/'
++endef
++
++$(ARCH_DIR)/include/uml-config.h : $(TOPDIR)/include/linux/autoconf.h
++ $(call filechk,umlconfig)
++
++filechk_gen_header = $<
+
+ $(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task
+- $< > $@
++ $(call filechk,gen_header)
+
+ $(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants
+- $< > $@
++ $(call filechk,gen_header)
+
+-$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h \
+- $(ARCH_DIR)/util FORCE ;
++$(ARCH_DIR)/util/mk_task $(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util \
++ sys_prepare FORCE ;
+
+ $(ARCH_DIR)/util: FORCE
+- @$(call descend,$@,)
++ $(Q)$(MAKE) $(build)=$@
+
+-export SUBARCH USER_CFLAGS OS
++export SUBARCH USER_CFLAGS OS
+diff -Naur a/arch/um/Makefile-i386 b/arch/um/Makefile-i386
+--- a/arch/um/Makefile-i386 2004-02-11 12:16:28.000000000 -0500
++++ b/arch/um/Makefile-i386 2004-02-11 12:28:27.000000000 -0500
+@@ -16,22 +16,27 @@
+
+ SYS_HEADERS = $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h
+
++sys_prepare: $(SYS_DIR)/sc.h
++
+ prepare: $(SYS_HEADERS)
+
++filechk_$(SYS_DIR)/sc.h := $(SYS_UTIL_DIR)/mk_sc
++
+ $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc
+- $< > $@
++ $(call filechk,$@)
++
++filechk_$(SYS_DIR)/thread.h := $(SYS_UTIL_DIR)/mk_thread
+
+ $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread
+- $< > $@
++ $(call filechk,$@)
+
+-$(SYS_UTIL_DIR)/mk_sc: FORCE ;
+- @$(call descend,$(SYS_UTIL_DIR),$@)
++$(SYS_UTIL_DIR)/mk_sc: scripts/fixdep include/config/MARKER FORCE ;
++ $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@
+
+-$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE ;
+- @$(call descend,$(SYS_UTIL_DIR),$@)
++$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) sys_prepare FORCE ;
++ $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@
+
+ $(SYS_UTIL_DIR): include/asm FORCE
+- @$(call descend,$@,)
++ $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR)
+
+-sysclean :
+- rm -f $(SYS_HEADERS)
++CLEAN_FILES += $(SYS_HEADERS)
+diff -Naur a/arch/um/Makefile-skas b/arch/um/Makefile-skas
+--- a/arch/um/Makefile-skas 2004-02-11 12:15:22.000000000 -0500
++++ b/arch/um/Makefile-skas 2004-02-11 12:27:10.000000000 -0500
+@@ -14,7 +14,7 @@
+ LINK_SKAS = -Wl,-rpath,/lib
+ LD_SCRIPT_SKAS = dyn.lds.s
+
+-GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h
++GEN_HEADERS += $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h
+
+-$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h :
+- $(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h
++$(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h :
++ $(Q)$(MAKE) $(build)=$(ARCH_DIR)/kernel/skas $@
+diff -Naur a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
+--- a/arch/um/os-Linux/drivers/ethertap_kern.c 2004-02-11 12:14:17.000000000 -0500
++++ b/arch/um/os-Linux/drivers/ethertap_kern.c 2004-02-11 12:26:00.000000000 -0500
+@@ -8,7 +8,6 @@
+ #include "linux/init.h"
+ #include "linux/netdevice.h"
+ #include "linux/etherdevice.h"
+-#include "linux/init.h"
+ #include "net_kern.h"
+ #include "net_user.h"
+ #include "etap.h"
+diff -Naur a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
+--- a/arch/um/os-Linux/drivers/ethertap_user.c 2004-02-11 12:15:29.000000000 -0500
++++ b/arch/um/os-Linux/drivers/ethertap_user.c 2004-02-11 12:27:17.000000000 -0500
+@@ -8,7 +8,6 @@
+ #include <stdio.h>
+ #include <unistd.h>
+ #include <stddef.h>
+-#include <fcntl.h>
+ #include <stdlib.h>
+ #include <sys/errno.h>
+ #include <sys/socket.h>
+@@ -42,13 +41,14 @@
+ {
+ struct addr_change change;
+ void *output;
++ int n;
+
+ change.what = op;
+ memcpy(change.addr, addr, sizeof(change.addr));
+ memcpy(change.netmask, netmask, sizeof(change.netmask));
+- if(write(fd, &change, sizeof(change)) != sizeof(change))
+- printk("etap_change - request failed, errno = %d\n",
+- errno);
++ n = os_write_file(fd, &change, sizeof(change));
++ if(n != sizeof(change))
++ printk("etap_change - request failed, err = %d\n", -n);
+ output = um_kmalloc(page_size());
+ if(output == NULL)
+ printk("etap_change : Failed to allocate output buffer\n");
+@@ -82,15 +82,15 @@
+ struct etap_pre_exec_data *data = arg;
+
+ dup2(data->control_remote, 1);
+- close(data->data_me);
+- close(data->control_me);
++ os_close_file(data->data_me);
++ os_close_file(data->control_me);
+ }
+
+ static int etap_tramp(char *dev, char *gate, int control_me,
+ int control_remote, int data_me, int data_remote)
+ {
+ struct etap_pre_exec_data pe_data;
+- int pid, status, err;
++ int pid, status, err, n;
+ char version_buf[sizeof("nnnnn\0")];
+ char data_fd_buf[sizeof("nnnnnn\0")];
+ char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
+@@ -114,21 +114,21 @@
+ pe_data.data_me = data_me;
+ pid = run_helper(etap_pre_exec, &pe_data, args, NULL);
+
+- if(pid < 0) err = errno;
+- close(data_remote);
+- close(control_remote);
+- if(read(control_me, &c, sizeof(c)) != sizeof(c)){
+- printk("etap_tramp : read of status failed, errno = %d\n",
+- errno);
+- return(EINVAL);
++ if(pid < 0) err = pid;
++ os_close_file(data_remote);
++ os_close_file(control_remote);
++ n = os_read_file(control_me, &c, sizeof(c));
++ if(n != sizeof(c)){
++ printk("etap_tramp : read of status failed, err = %d\n", -n);
++ return(-EINVAL);
+ }
+ if(c != 1){
+ printk("etap_tramp : uml_net failed\n");
+- err = EINVAL;
+- if(waitpid(pid, &status, 0) < 0) err = errno;
+- else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)){
++ err = -EINVAL;
++ if(waitpid(pid, &status, 0) < 0)
++ err = -errno;
++ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1))
+ printk("uml_net didn't exit with status 1\n");
+- }
+ }
+ return(err);
+ }
+@@ -143,14 +143,14 @@
+ if(err) return(err);
+
+ err = os_pipe(data_fds, 0, 0);
+- if(err){
+- printk("data os_pipe failed - errno = %d\n", -err);
++ if(err < 0){
++ printk("data os_pipe failed - err = %d\n", -err);
+ return(err);
+ }
+
+ err = os_pipe(control_fds, 1, 0);
+- if(err){
+- printk("control os_pipe failed - errno = %d\n", -err);
++ if(err < 0){
++ printk("control os_pipe failed - err = %d\n", -err);
+ return(err);
+ }
+
+@@ -167,9 +167,9 @@
+ kfree(output);
+ }
+
+- if(err != 0){
+- printk("etap_tramp failed - errno = %d\n", err);
+- return(-err);
++ if(err < 0){
++ printk("etap_tramp failed - err = %d\n", -err);
++ return(err);
+ }
+
+ pri->data_fd = data_fds[0];
+@@ -183,11 +183,11 @@
+ struct ethertap_data *pri = data;
+
+ iter_addresses(pri->dev, etap_close_addr, &pri->control_fd);
+- close(fd);
++ os_close_file(fd);
+ os_shutdown_socket(pri->data_fd, 1, 1);
+- close(pri->data_fd);
++ os_close_file(pri->data_fd);
+ pri->data_fd = -1;
+- close(pri->control_fd);
++ os_close_file(pri->control_fd);
+ pri->control_fd = -1;
+ }
+
+diff -Naur a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
+--- a/arch/um/os-Linux/drivers/tuntap_user.c 2004-02-11 12:17:08.000000000 -0500
++++ b/arch/um/os-Linux/drivers/tuntap_user.c 2004-02-11 12:29:13.000000000 -0500
+@@ -8,7 +8,6 @@
+ #include <stdlib.h>
+ #include <unistd.h>
+ #include <errno.h>
+-#include <fcntl.h>
+ #include <sys/wait.h>
+ #include <sys/socket.h>
+ #include <sys/un.h>
+@@ -61,7 +60,7 @@
+ struct tuntap_pre_exec_data *data = arg;
+
+ dup2(data->stdout, 1);
+- close(data->close_me);
++ os_close_file(data->close_me);
+ }
+
+ static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote,
+@@ -86,7 +85,7 @@
+
+ if(pid < 0) return(-pid);
+
+- close(remote);
++ os_close_file(remote);
+
+ msg.msg_name = NULL;
+ msg.msg_namelen = 0;
+@@ -107,19 +106,19 @@
+ if(n < 0){
+ printk("tuntap_open_tramp : recvmsg failed - errno = %d\n",
+ errno);
+- return(errno);
++ return(-errno);
+ }
+ waitpid(pid, NULL, 0);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if(cmsg == NULL){
+ printk("tuntap_open_tramp : didn't receive a message\n");
+- return(EINVAL);
++ return(-EINVAL);
+ }
+ if((cmsg->cmsg_level != SOL_SOCKET) ||
+ (cmsg->cmsg_type != SCM_RIGHTS)){
+ printk("tuntap_open_tramp : didn't receive a descriptor\n");
+- return(EINVAL);
++ return(-EINVAL);
+ }
+ *fd_out = ((int *) CMSG_DATA(cmsg))[0];
+ return(0);
+@@ -133,27 +132,29 @@
+ int err, fds[2], len, used;
+
+ err = tap_open_common(pri->dev, pri->gate_addr);
+- if(err) return(err);
++ if(err < 0)
++ return(err);
+
+ if(pri->fixed_config){
+- if((pri->fd = open("/dev/net/tun", O_RDWR)) < 0){
+- printk("Failed to open /dev/net/tun, errno = %d\n",
+- errno);
+- return(-errno);
++ pri->fd = os_open_file("/dev/net/tun", of_rdwr(OPENFLAGS()), 0);
++ if(pri->fd < 0){
++ printk("Failed to open /dev/net/tun, err = %d\n",
++ -pri->fd);
++ return(pri->fd);
+ }
+ memset(&ifr, 0, sizeof(ifr));
+- ifr.ifr_flags = IFF_TAP;
++ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+ strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name));
+ if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){
+- printk("TUNSETIFF failed, errno = %d", errno);
+- close(pri->fd);
++ printk("TUNSETIFF failed, errno = %d\n", errno);
++ os_close_file(pri->fd);
+ return(-errno);
+ }
+ }
+ else {
+ err = os_pipe(fds, 0, 0);
+- if(err){
+- printk("tuntap_open : os_pipe failed - errno = %d\n",
++ if(err < 0){
++ printk("tuntap_open : os_pipe failed - err = %d\n",
+ -err);
+ return(err);
+ }
+@@ -166,19 +167,19 @@
+ fds[1], buffer, len, &used);
+
+ output = buffer;
+- if(err == 0){
+- pri->dev_name = uml_strdup(buffer);
+- output += IFNAMSIZ;
+- printk(output);
+- free_output_buffer(buffer);
+- }
+- else {
+- printk(output);
++ if(err < 0) {
++ printk("%s", output);
+ free_output_buffer(buffer);
+- printk("tuntap_open_tramp failed - errno = %d\n", err);
+- return(-err);
++ printk("tuntap_open_tramp failed - err = %d\n", -err);
++ return(err);
+ }
+- close(fds[0]);
++
++ pri->dev_name = uml_strdup(buffer);
++ output += IFNAMSIZ;
++ printk("%s", output);
++ free_output_buffer(buffer);
++
++ os_close_file(fds[0]);
+ iter_addresses(pri->dev, open_addr, pri->dev_name);
+ }
+
+@@ -191,7 +192,7 @@
+
+ if(!pri->fixed_config)
+ iter_addresses(pri->dev, close_addr, pri->dev_name);
+- close(fd);
++ os_close_file(fd);
+ pri->fd = -1;
+ }
+
+diff -Naur a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
+--- a/arch/um/os-Linux/file.c 2004-02-11 12:17:07.000000000 -0500
++++ b/arch/um/os-Linux/file.c 2004-02-11 12:29:09.000000000 -0500
+@@ -8,6 +8,8 @@
+ #include <errno.h>
+ #include <fcntl.h>
+ #include <signal.h>
++#include <sys/types.h>
++#include <sys/stat.h>
+ #include <sys/socket.h>
+ #include <sys/un.h>
+ #include <sys/ioctl.h>
+@@ -17,33 +19,235 @@
+ #include "user.h"
+ #include "kern_util.h"
+
+-int os_file_type(char *file)
++static void copy_stat(struct uml_stat *dst, struct stat64 *src)
++{
++ *dst = ((struct uml_stat) {
++ .ust_dev = src->st_dev, /* device */
++ .ust_ino = src->st_ino, /* inode */
++ .ust_mode = src->st_mode, /* protection */
++ .ust_nlink = src->st_nlink, /* number of hard links */
++ .ust_uid = src->st_uid, /* user ID of owner */
++ .ust_gid = src->st_gid, /* group ID of owner */
++ .ust_size = src->st_size, /* total size, in bytes */
++ .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */
++ .ust_blocks = src->st_blocks, /* number of blocks allocated */
++ .ust_atime = src->st_atime, /* time of last access */
++ .ust_mtime = src->st_mtime, /* time of last modification */
++ .ust_ctime = src->st_ctime, /* time of last change */
++ });
++}
++
++int os_stat_fd(const int fd, struct uml_stat *ubuf)
++{
++ struct stat64 sbuf;
++ int err;
++
++ do {
++ err = fstat64(fd, &sbuf);
++ } while((err < 0) && (errno == EINTR)) ;
++
++ if(err < 0)
++ return(-errno);
++
++ if(ubuf != NULL)
++ copy_stat(ubuf, &sbuf);
++ return(err);
++}
++
++int os_stat_file(const char *file_name, struct uml_stat *ubuf)
++{
++ struct stat64 sbuf;
++ int err;
++
++ do {
++ err = stat64(file_name, &sbuf);
++ } while((err < 0) && (errno == EINTR)) ;
++
++ if(err < 0)
++ return(-errno);
++
++ if(ubuf != NULL)
++ copy_stat(ubuf, &sbuf);
++ return(err);
++}
++
++int os_access(const char* file, int mode)
++{
++ int amode, err;
++
++ amode=(mode&OS_ACC_R_OK ? R_OK : 0) | (mode&OS_ACC_W_OK ? W_OK : 0) |
++ (mode&OS_ACC_X_OK ? X_OK : 0) | (mode&OS_ACC_F_OK ? F_OK : 0) ;
++
++ err = access(file, amode);
++ if(err < 0)
++ return(-errno);
++
++ return(0);
++}
++
++void os_print_error(int error, const char* str)
++{
++ errno = error < 0 ? -error : error;
++
++ perror(str);
++}
++
++/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */
++int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg)
++{
++ int err;
++
++ err = ioctl(fd, cmd, arg);
++ if(err < 0)
++ return(-errno);
++
++ return(err);
++}
++
++int os_window_size(int fd, int *rows, int *cols)
++{
++ struct winsize size;
++
++ if(ioctl(fd, TIOCGWINSZ, &size) < 0)
++ return(-errno);
++
++ *rows = size.ws_row;
++ *cols = size.ws_col;
++
++ return(0);
++}
++
++int os_new_tty_pgrp(int fd, int pid)
+ {
+- struct stat64 buf;
++ if(ioctl(fd, TIOCSCTTY, 0) < 0){
++ printk("TIOCSCTTY failed, errno = %d\n", errno);
++ return(-errno);
++ }
++
++ if(tcsetpgrp(fd, pid) < 0){
++ printk("tcsetpgrp failed, errno = %d\n", errno);
++ return(-errno);
++ }
++
++ return(0);
++}
++
++/* FIXME: ensure namebuf in os_get_if_name is big enough */
++int os_get_ifname(int fd, char* namebuf)
++{
++ if(ioctl(fd, SIOCGIFNAME, namebuf) < 0)
++ return(-errno);
++
++ return(0);
++}
++
++int os_set_slip(int fd)
++{
++ int disc, sencap;
++
++ disc = N_SLIP;
++ if(ioctl(fd, TIOCSETD, &disc) < 0){
++ printk("Failed to set slip line discipline - "
++ "errno = %d\n", errno);
++ return(-errno);
++ }
++
++ sencap = 0;
++ if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0){
++ printk("Failed to set slip encapsulation - "
++ "errno = %d\n", errno);
++ return(-errno);
++ }
++
++ return(0);
++}
++
++int os_set_owner(int fd, int pid)
++{
++ if(fcntl(fd, F_SETOWN, pid) < 0){
++ int save_errno = errno;
++
++ if(fcntl(fd, F_GETOWN, 0) != pid)
++ return(-save_errno);
++ }
++
++ return(0);
++}
++
++/* FIXME? moved wholesale from sigio_user.c to get fcntls out of that file */
++int os_sigio_async(int master, int slave)
++{
++ int flags;
+
+- if(stat64(file, &buf) == -1)
++ flags = fcntl(master, F_GETFL);
++ if(flags < 0) {
++ printk("fcntl F_GETFL failed, errno = %d\n", errno);
+ return(-errno);
++ }
++
++ if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) ||
++ (fcntl(master, F_SETOWN, os_getpid()) < 0)){
++ printk("fcntl F_SETFL or F_SETOWN failed, errno = %d\n", errno);
++ return(-errno);
++ }
++
++ if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)){
++ printk("fcntl F_SETFL failed, errno = %d\n", errno);
++ return(-errno);
++ }
+
+- if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR);
+- else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK);
+- else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV);
+- else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV);
+- else if(S_ISFIFO(buf.st_mode)) return(OS_TYPE_FIFO);
+- else if(S_ISSOCK(buf.st_mode)) return(OS_TYPE_SOCK);
++ return(0);
++}
++
++int os_mode_fd(int fd, int mode)
++{
++ int err;
++
++ do {
++ err = fchmod(fd, mode);
++ } while((err < 0) && (errno==EINTR)) ;
++
++ if(err < 0)
++ return(-errno);
++
++ return(0);
++}
++
++int os_file_type(char *file)
++{
++ struct uml_stat buf;
++ int err;
++
++ err = os_stat_file(file, &buf);
++ if(err < 0)
++ return(err);
++
++ if(S_ISDIR(buf.ust_mode)) return(OS_TYPE_DIR);
++ else if(S_ISLNK(buf.ust_mode)) return(OS_TYPE_SYMLINK);
++ else if(S_ISCHR(buf.ust_mode)) return(OS_TYPE_CHARDEV);
++ else if(S_ISBLK(buf.ust_mode)) return(OS_TYPE_BLOCKDEV);
++ else if(S_ISFIFO(buf.ust_mode)) return(OS_TYPE_FIFO);
++ else if(S_ISSOCK(buf.ust_mode)) return(OS_TYPE_SOCK);
+ else return(OS_TYPE_FILE);
+ }
+
+ int os_file_mode(char *file, struct openflags *mode_out)
+ {
++ int err;
++
+ *mode_out = OPENFLAGS();
+
+- if(!access(file, W_OK)) *mode_out = of_write(*mode_out);
+- else if(errno != EACCES)
+- return(-errno);
++ err = os_access(file, OS_ACC_W_OK);
++ if((err < 0) && (err != -EACCES))
++ return(err);
+
+- if(!access(file, R_OK)) *mode_out = of_read(*mode_out);
+- else if(errno != EACCES)
+- return(-errno);
++ *mode_out = of_write(*mode_out);
++
++ err = os_access(file, OS_ACC_R_OK);
++ if((err < 0) && (err != -EACCES))
++ return(err);
++
++ *mode_out = of_read(*mode_out);
+
+ return(0);
+ }
+@@ -63,16 +267,14 @@
+ if(flags.e) f |= O_EXCL;
+
+ fd = open64(file, f, mode);
+- if(fd < 0) return(-errno);
+-
+- if(flags.cl){
+- if(fcntl(fd, F_SETFD, 1)){
+- close(fd);
+- return(-errno);
+- }
++ if(fd < 0)
++ return(-errno);
++
++ if(flags.cl && fcntl(fd, F_SETFD, 1)){
++ os_close_file(fd);
++ return(-errno);
+ }
+
+- return(fd);
+ return(fd);
+ }
+
+@@ -90,7 +292,7 @@
+
+ err = connect(fd, (struct sockaddr *) &sock, sizeof(sock));
+ if(err)
+- return(err);
++ return(-errno);
+
+ return(fd);
+ }
+@@ -109,88 +311,162 @@
+ return(0);
+ }
+
+-int os_read_file(int fd, void *buf, int len)
++static int fault_buffer(void *start, int len,
++ int (*copy_proc)(void *addr, void *buf, int len))
+ {
+- int n;
++ int page = getpagesize(), i;
++ char c;
+
+- /* Force buf into memory if it's not already. */
++ for(i = 0; i < len; i += page){
++ if((*copy_proc)(start + i, &c, sizeof(c)))
++ return(-EFAULT);
++ }
++ if((len % page) != 0){
++ if((*copy_proc)(start + len - 1, &c, sizeof(c)))
++ return(-EFAULT);
++ }
++ return(0);
++}
+
+- /* XXX This fails if buf is kernel memory */
+-#ifdef notdef
+- if(copy_to_user_proc(buf, &c, sizeof(c)))
+- return(-EFAULT);
+-#endif
++static int file_io(int fd, void *buf, int len,
++ int (*io_proc)(int fd, void *buf, int len),
++ int (*copy_user_proc)(void *addr, void *buf, int len))
++{
++ int n, err;
++
++ do {
++ n = (*io_proc)(fd, buf, len);
++ if((n < 0) && (errno == EFAULT)){
++ err = fault_buffer(buf, len, copy_user_proc);
++ if(err)
++ return(err);
++ n = (*io_proc)(fd, buf, len);
++ }
++ } while((n < 0) && (errno == EINTR));
+
+- n = read(fd, buf, len);
+ if(n < 0)
+ return(-errno);
+ return(n);
+ }
+
+-int os_write_file(int fd, void *buf, int count)
++int os_read_file(int fd, void *buf, int len)
+ {
+- int n;
+-
+- /* Force buf into memory if it's not already. */
+-
+- /* XXX This fails if buf is kernel memory */
+-#ifdef notdef
+- if(copy_to_user_proc(buf, buf, buf[0]))
+- return(-EFAULT);
+-#endif
++ return(file_io(fd, buf, len, (int (*)(int, void *, int)) read,
++ copy_from_user_proc));
++}
+
+- n = write(fd, buf, count);
+- if(n < 0)
+- return(-errno);
+- return(n);
++int os_write_file(int fd, const void *buf, int len)
++{
++ return(file_io(fd, (void *) buf, len,
++ (int (*)(int, void *, int)) write, copy_to_user_proc));
+ }
+
+ int os_file_size(char *file, long long *size_out)
+ {
+- struct stat64 buf;
++ struct uml_stat buf;
++ int err;
+
+- if(stat64(file, &buf) == -1){
+- printk("Couldn't stat \"%s\" : errno = %d\n", file, errno);
+- return(-errno);
++ err = os_stat_file(file, &buf);
++ if(err < 0){
++ printk("Couldn't stat \"%s\" : err = %d\n", file, -err);
++ return(err);
+ }
+- if(S_ISBLK(buf.st_mode)){
++
++ if(S_ISBLK(buf.ust_mode)){
+ int fd, blocks;
+
+- if((fd = open64(file, O_RDONLY)) < 0){
+- printk("Couldn't open \"%s\", errno = %d\n", file,
+- errno);
+- return(-errno);
++ fd = os_open_file(file, of_read(OPENFLAGS()), 0);
++ if(fd < 0){
++ printk("Couldn't open \"%s\", errno = %d\n", file, -fd);
++ return(fd);
+ }
+ if(ioctl(fd, BLKGETSIZE, &blocks) < 0){
+ printk("Couldn't get the block size of \"%s\", "
+ "errno = %d\n", file, errno);
+- close(fd);
+- return(-errno);
++ err = -errno;
++ os_close_file(fd);
++ return(err);
+ }
+ *size_out = ((long long) blocks) * 512;
+- close(fd);
++ os_close_file(fd);
+ return(0);
+ }
+- *size_out = buf.st_size;
++ *size_out = buf.ust_size;
++ return(0);
++}
++
++int os_file_modtime(char *file, unsigned long *modtime)
++{
++ struct uml_stat buf;
++ int err;
++
++ err = os_stat_file(file, &buf);
++ if(err < 0){
++ printk("Couldn't stat \"%s\" : err = %d\n", file, -err);
++ return(err);
++ }
++
++ *modtime = buf.ust_mtime;
+ return(0);
+ }
+
++int os_get_exec_close(int fd, int* close_on_exec)
++{
++ int ret;
++
++ do {
++ ret = fcntl(fd, F_GETFD);
++ } while((ret < 0) && (errno == EINTR)) ;
++
++ if(ret < 0)
++ return(-errno);
++
++ *close_on_exec = (ret&FD_CLOEXEC) ? 1 : 0;
++ return(ret);
++}
++
++int os_set_exec_close(int fd, int close_on_exec)
++{
++ int flag, err;
++
++ if(close_on_exec) flag = FD_CLOEXEC;
++ else flag = 0;
++
++ do {
++ err = fcntl(fd, F_SETFD, flag);
++ } while((err < 0) && (errno == EINTR)) ;
++
++ if(err < 0)
++ return(-errno);
++ return(err);
++}
++
+ int os_pipe(int *fds, int stream, int close_on_exec)
+ {
+ int err, type = stream ? SOCK_STREAM : SOCK_DGRAM;
+
+ err = socketpair(AF_UNIX, type, 0, fds);
+- if(err)
++ if(err < 0)
+ return(-errno);
+
+ if(!close_on_exec)
+ return(0);
+
+- if((fcntl(fds[0], F_SETFD, 1) < 0) || (fcntl(fds[1], F_SETFD, 1) < 0))
+- printk("os_pipe : Setting FD_CLOEXEC failed, errno = %d",
+- errno);
++ err = os_set_exec_close(fds[0], 1);
++ if(err < 0)
++ goto error;
++
++ err = os_set_exec_close(fds[1], 1);
++ if(err < 0)
++ goto error;
+
+ return(0);
++
++ error:
++ printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err);
++ os_close_file(fds[1]);
++ os_close_file(fds[0]);
++ return(err);
+ }
+
+ int os_set_fd_async(int fd, int owner)
+@@ -270,7 +546,7 @@
+ return(-EINVAL);
+ }
+ err = shutdown(fd, what);
+- if(err)
++ if(err < 0)
+ return(-errno);
+ return(0);
+ }
+@@ -315,7 +591,7 @@
+ return(new);
+ }
+
+-int create_unix_socket(char *file, int len)
++int os_create_unix_socket(char *file, int len, int close_on_exec)
+ {
+ struct sockaddr_un addr;
+ int sock, err;
+@@ -327,6 +603,13 @@
+ return(-errno);
+ }
+
++ if(close_on_exec) {
++ err = os_set_exec_close(sock, 1);
++ if(err < 0)
++ printk("create_unix_socket : close_on_exec failed, "
++ "err = %d", -err);
++ }
++
+ addr.sun_family = AF_UNIX;
+
+ /* XXX Be more careful about overflow */
+@@ -334,14 +617,45 @@
+
+ err = bind(sock, (struct sockaddr *) &addr, sizeof(addr));
+ if (err < 0){
+- printk("create_listening_socket - bind failed, errno = %d\n",
+- errno);
++ printk("create_listening_socket at '%s' - bind failed, "
++ "errno = %d\n", file, errno);
+ return(-errno);
+ }
+
+ return(sock);
+ }
+
++void os_flush_stdout(void)
++{
++ fflush(stdout);
++}
++
++int os_lock_file(int fd, int excl)
++{
++ int type = excl ? F_WRLCK : F_RDLCK;
++ struct flock lock = ((struct flock) { .l_type = type,
++ .l_whence = SEEK_SET,
++ .l_start = 0,
++ .l_len = 0 } );
++ int err, save;
++
++ err = fcntl(fd, F_SETLK, &lock);
++ if(!err)
++ goto out;
++
++ save = -errno;
++ err = fcntl(fd, F_GETLK, &lock);
++ if(err){
++ err = -errno;
++ goto out;
++ }
++
++ printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid);
++ err = save;
++ out:
++ return(err);
++}
++
+ /*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
+--- a/arch/um/os-Linux/Makefile 2004-02-11 12:15:56.000000000 -0500
++++ b/arch/um/os-Linux/Makefile 2004-02-11 12:27:48.000000000 -0500
+@@ -9,7 +9,3 @@
+
+ $(USER_OBJS) : %.o: %.c
+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+-
+-clean :
+-
+-archmrproper:
+diff -Naur a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
+--- a/arch/um/os-Linux/process.c 2004-02-11 12:14:23.000000000 -0500
++++ b/arch/um/os-Linux/process.c 2004-02-11 12:26:04.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2002 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -7,32 +7,37 @@
+ #include <stdio.h>
+ #include <errno.h>
+ #include <signal.h>
++#include <linux/unistd.h>
+ #include <sys/mman.h>
+ #include <sys/wait.h>
+ #include "os.h"
+ #include "user.h"
+
++#define ARBITRARY_ADDR -1
++#define FAILURE_PID -1
++
+ unsigned long os_process_pc(int pid)
+ {
+ char proc_stat[sizeof("/proc/#####/stat\0")], buf[256];
+ unsigned long pc;
+- int fd;
++ int fd, err;
+
+ sprintf(proc_stat, "/proc/%d/stat", pid);
+ fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0);
+ if(fd < 0){
+- printk("os_process_pc - couldn't open '%s', errno = %d\n",
+- proc_stat, errno);
+- return(-1);
++ printk("os_process_pc - couldn't open '%s', err = %d\n",
++ proc_stat, -fd);
++ return(ARBITRARY_ADDR);
+ }
+- if(read(fd, buf, sizeof(buf)) < 0){
+- printk("os_process_pc - couldn't read '%s', errno = %d\n",
+- proc_stat, errno);
+- close(fd);
+- return(-1);
++ err = os_read_file(fd, buf, sizeof(buf));
++ if(err < 0){
++ printk("os_process_pc - couldn't read '%s', err = %d\n",
++ proc_stat, -err);
++ os_close_file(fd);
++ return(ARBITRARY_ADDR);
+ }
+- close(fd);
+- pc = -1;
++ os_close_file(fd);
++ pc = ARBITRARY_ADDR;
+ if(sscanf(buf, "%*d %*s %*c %*d %*d %*d %*d %*d %*d %*d %*d "
+ "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d "
+ "%*d %*d %*d %*d %ld", &pc) != 1){
+@@ -52,22 +57,23 @@
+ snprintf(stat, sizeof(stat), "/proc/%d/stat", pid);
+ fd = os_open_file(stat, of_read(OPENFLAGS()), 0);
+ if(fd < 0){
+- printk("Couldn't open '%s', errno = %d\n", stat, -fd);
+- return(-1);
++ printk("Couldn't open '%s', err = %d\n", stat, -fd);
++ return(FAILURE_PID);
+ }
+
+- n = read(fd, data, sizeof(data));
+- close(fd);
++ n = os_read_file(fd, data, sizeof(data));
++ os_close_file(fd);
+
+ if(n < 0){
+- printk("Couldn't read '%s', errno = %d\n", stat);
+- return(-1);
++ printk("Couldn't read '%s', err = %d\n", stat, -n);
++ return(FAILURE_PID);
+ }
+
+- parent = -1;
++ parent = FAILURE_PID;
+ /* XXX This will break if there is a space in the command */
+ n = sscanf(data, "%*d %*s %*c %d", &parent);
+- if(n != 1) printk("Failed to scan '%s'\n", data);
++ if(n != 1)
++ printk("Failed to scan '%s'\n", data);
+
+ return(parent);
+ }
+@@ -87,7 +93,8 @@
+
+ void os_usr1_process(int pid)
+ {
+- kill(pid, SIGUSR1);
++ syscall(__NR_tkill, pid, SIGUSR1);
++ /* kill(pid, SIGUSR1); */
+ }
+
+ int os_getpid(void)
+@@ -95,7 +102,7 @@
+ return(getpid());
+ }
+
+-int os_map_memory(void *virt, int fd, unsigned long off, unsigned long len,
++int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len,
+ int r, int w, int x)
+ {
+ void *loc;
+@@ -104,8 +111,8 @@
+ prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) |
+ (x ? PROT_EXEC : 0);
+
+- loc = mmap((void *) virt, len, prot, MAP_SHARED | MAP_FIXED,
+- fd, off);
++ loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED,
++ fd, off);
+ if(loc == MAP_FAILED)
+ return(-errno);
+ return(0);
+@@ -126,7 +133,8 @@
+ int err;
+
+ err = munmap(addr, len);
+- if(err < 0) return(-errno);
++ if(err < 0)
++ return(-errno);
+ return(0);
+ }
+
+diff -Naur a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c
+--- a/arch/um/os-Linux/tty.c 2004-02-11 12:15:53.000000000 -0500
++++ b/arch/um/os-Linux/tty.c 2004-02-11 12:27:46.000000000 -0500
+@@ -28,10 +28,10 @@
+ struct grantpt_info info;
+ int fd;
+
+- if((fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0)) < 0){
+- printk("get_pty : Couldn't open /dev/ptmx - errno = %d\n",
+- errno);
+- return(-1);
++ fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0);
++ if(fd < 0){
++ printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd);
++ return(fd);
+ }
+
+ info.fd = fd;
+@@ -39,7 +39,7 @@
+
+ if(info.res < 0){
+ printk("get_pty : Couldn't grant pty - errno = %d\n",
+- info.err);
++ -info.err);
+ return(-1);
+ }
+ if(unlockpt(fd) < 0){
+diff -Naur a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
+--- a/arch/um/sys-i386/bugs.c 2004-02-11 12:16:28.000000000 -0500
++++ b/arch/um/sys-i386/bugs.c 2004-02-11 12:28:27.000000000 -0500
+@@ -4,20 +4,21 @@
+ */
+
+ #include <unistd.h>
+-#include <fcntl.h>
+ #include <errno.h>
+ #include <string.h>
+ #include <sys/signal.h>
++#include <asm/ldt.h>
+ #include "kern_util.h"
+ #include "user.h"
+ #include "sysdep/ptrace.h"
+ #include "task.h"
++#include "os.h"
+
+ #define MAXTOKEN 64
+
+ /* Set during early boot */
+-int cpu_has_cmov = 1;
+-int cpu_has_xmm = 0;
++int host_has_cmov = 1;
++int host_has_xmm = 0;
+
+ static char token(int fd, char *buf, int len, char stop)
+ {
+@@ -27,13 +28,15 @@
+ ptr = buf;
+ end = &buf[len];
+ do {
+- n = read(fd, ptr, sizeof(*ptr));
++ n = os_read_file(fd, ptr, sizeof(*ptr));
+ c = *ptr++;
+- if(n == 0) return(0);
+- else if(n != sizeof(*ptr)){
+- printk("Reading /proc/cpuinfo failed, "
+- "errno = %d\n", errno);
+- return(-errno);
++ if(n != sizeof(*ptr)){
++ if(n == 0) return(0);
++ printk("Reading /proc/cpuinfo failed, err = %d\n", -n);
++ if(n < 0)
++ return(n);
++ else
++ return(-EIO);
+ }
+ } while((c != '\n') && (c != stop) && (ptr < end));
+
+@@ -45,45 +48,79 @@
+ return(c);
+ }
+
+-static int check_cpu_feature(char *feature, int *have_it)
++static int find_cpuinfo_line(int fd, char *key, char *scratch, int len)
+ {
+- char buf[MAXTOKEN], c;
+- int fd, len = sizeof(buf)/sizeof(buf[0]), n;
+-
+- printk("Checking for host processor %s support...", feature);
+- fd = open("/proc/cpuinfo", O_RDONLY);
+- if(fd < 0){
+- printk("Couldn't open /proc/cpuinfo, errno = %d\n", errno);
+- return(0);
+- }
++ int n;
++ char c;
+
+- *have_it = 0;
+- buf[len - 1] = '\0';
++ scratch[len - 1] = '\0';
+ while(1){
+- c = token(fd, buf, len - 1, ':');
+- if(c <= 0) goto out;
++ c = token(fd, scratch, len - 1, ':');
++ if(c <= 0)
++ return(0);
+ else if(c != ':'){
+ printk("Failed to find ':' in /proc/cpuinfo\n");
+- goto out;
++ return(0);
+ }
+
+- if(!strncmp(buf, "flags", strlen("flags"))) break;
++ if(!strncmp(scratch, key, strlen(key)))
++ return(1);
+
+ do {
+- n = read(fd, &c, sizeof(c));
++ n = os_read_file(fd, &c, sizeof(c));
+ if(n != sizeof(c)){
+ printk("Failed to find newline in "
+- "/proc/cpuinfo, n = %d, errno = %d\n",
+- n, errno);
+- goto out;
++ "/proc/cpuinfo, err = %d\n", -n);
++ return(0);
+ }
+ } while(c != '\n');
+ }
++ return(0);
++}
++
++int cpu_feature(char *what, char *buf, int len)
++{
++ int fd, ret = 0;
++
++ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0);
++ if(fd < 0){
++ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd);
++ return(0);
++ }
++
++ if(!find_cpuinfo_line(fd, what, buf, len)){
++ printk("Couldn't find '%s' line in /proc/cpuinfo\n", what);
++ goto out_close;
++ }
++
++ token(fd, buf, len, '\n');
++ ret = 1;
++
++ out_close:
++ os_close_file(fd);
++ return(ret);
++}
++
++static int check_cpu_flag(char *feature, int *have_it)
++{
++ char buf[MAXTOKEN], c;
++ int fd, len = sizeof(buf)/sizeof(buf[0]);
++
++ printk("Checking for host processor %s support...", feature);
++ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0);
++ if(fd < 0){
++ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd);
++ return(0);
++ }
++
++ *have_it = 0;
++ if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0])))
++ goto out;
+
+ c = token(fd, buf, len - 1, ' ');
+ if(c < 0) goto out;
+ else if(c != ' '){
+- printk("Failed to find ':' in /proc/cpuinfo\n");
++ printk("Failed to find ' ' in /proc/cpuinfo\n");
+ goto out;
+ }
+
+@@ -100,21 +137,48 @@
+ out:
+ if(*have_it == 0) printk("No\n");
+ else if(*have_it == 1) printk("Yes\n");
+- close(fd);
++ os_close_file(fd);
+ return(1);
+ }
+
++#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems
++ * for some people.
++ */
++static void disable_lcall(void)
++{
++ struct modify_ldt_ldt_s ldt;
++ int err;
++
++ bzero(&ldt, sizeof(ldt));
++ ldt.entry_number = 7;
++ ldt.base_addr = 0;
++ ldt.limit = 0;
++ err = modify_ldt(1, &ldt, sizeof(ldt));
++ if(err)
++ printk("Failed to disable lcall7 - errno = %d\n", errno);
++}
++#endif
++
++void arch_init_thread(void)
++{
++#if 0
++ disable_lcall();
++#endif
++}
++
+ void arch_check_bugs(void)
+ {
+ int have_it;
+
+- if(access("/proc/cpuinfo", R_OK)){
++ if(os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0){
+ printk("/proc/cpuinfo not available - skipping CPU capability "
+ "checks\n");
+ return;
+ }
+- if(check_cpu_feature("cmov", &have_it)) cpu_has_cmov = have_it;
+- if(check_cpu_feature("xmm", &have_it)) cpu_has_xmm = have_it;
++ if(check_cpu_flag("cmov", &have_it))
++ host_has_cmov = have_it;
++ if(check_cpu_flag("xmm", &have_it))
++ host_has_xmm = have_it;
+ }
+
+ int arch_handle_signal(int sig, union uml_pt_regs *regs)
+@@ -130,18 +194,18 @@
+ if((*((char *) ip) != 0x0f) || ((*((char *) (ip + 1)) & 0xf0) != 0x40))
+ return(0);
+
+- if(cpu_has_cmov == 0)
++ if(host_has_cmov == 0)
+ panic("SIGILL caused by cmov, which this processor doesn't "
+ "implement, boot a filesystem compiled for older "
+ "processors");
+- else if(cpu_has_cmov == 1)
++ else if(host_has_cmov == 1)
+ panic("SIGILL caused by cmov, which this processor claims to "
+ "implement");
+- else if(cpu_has_cmov == -1)
++ else if(host_has_cmov == -1)
+ panic("SIGILL caused by cmov, couldn't tell if this processor "
+ "implements it, boot a filesystem compiled for older "
+ "processors");
+- else panic("Bad value for cpu_has_cmov (%d)", cpu_has_cmov);
++ else panic("Bad value for host_has_cmov (%d)", host_has_cmov);
+ return(0);
+ }
+
+diff -Naur a/arch/um/sys-i386/extable.c b/arch/um/sys-i386/extable.c
+--- a/arch/um/sys-i386/extable.c 2004-02-11 12:16:45.000000000 -0500
++++ b/arch/um/sys-i386/extable.c 1969-12-31 19:00:00.000000000 -0500
+@@ -1,30 +0,0 @@
+-/*
+- * linux/arch/i386/mm/extable.c
+- */
+-
+-#include <linux/config.h>
+-#include <linux/module.h>
+-#include <linux/spinlock.h>
+-#include <asm/uaccess.h>
+-
+-/* Simple binary search */
+-const struct exception_table_entry *
+-search_extable(const struct exception_table_entry *first,
+- const struct exception_table_entry *last,
+- unsigned long value)
+-{
+- while (first <= last) {
+- const struct exception_table_entry *mid;
+- long diff;
+-
+- mid = (last - first) / 2 + first;
+- diff = mid->insn - value;
+- if (diff == 0)
+- return mid;
+- else if (diff < 0)
+- first = mid+1;
+- else
+- last = mid-1;
+- }
+- return NULL;
+-}
+diff -Naur a/arch/um/sys-i386/fault.c b/arch/um/sys-i386/fault.c
+--- a/arch/um/sys-i386/fault.c 2004-02-11 12:15:44.000000000 -0500
++++ b/arch/um/sys-i386/fault.c 2004-02-11 12:27:19.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -7,16 +7,24 @@
+ #include "sysdep/ptrace.h"
+ #include "sysdep/sigcontext.h"
+
+-extern unsigned long search_exception_table(unsigned long addr);
++/* These two are from asm-um/uaccess.h and linux/module.h, check them. */
++struct exception_table_entry
++{
++ unsigned long insn;
++ unsigned long fixup;
++};
+
++const struct exception_table_entry *search_exception_tables(unsigned long add);
++
++/* Compare this to arch/i386/mm/extable.c:fixup_exception() */
+ int arch_fixup(unsigned long address, void *sc_ptr)
+ {
+ struct sigcontext *sc = sc_ptr;
+- unsigned long fixup;
++ const struct exception_table_entry *fixup;
+
+ fixup = search_exception_tables(address);
+ if(fixup != 0){
+- sc->eip = fixup;
++ sc->eip = fixup->fixup;
+ return(1);
+ }
+ return(0);
+diff -Naur a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
+--- a/arch/um/sys-i386/Makefile 2004-02-11 12:14:27.000000000 -0500
++++ b/arch/um/sys-i386/Makefile 2004-02-11 12:26:08.000000000 -0500
+@@ -1,7 +1,8 @@
+-obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o module.o \
+- ptrace.o ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o
++obj-y = bugs.o checksum.o fault.o ksyms.o ldt.o ptrace.o ptrace_user.o \
++ semaphore.o sigcontext.o syscalls.o sysrq.o time.o
+
+ obj-$(CONFIG_HIGHMEM) += highmem.o
++obj-$(CONFIG_MODULES) += module.o
+
+ USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o
+ USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+@@ -9,6 +10,8 @@
+ SYMLINKS = semaphore.c highmem.c module.c
+ SYMLINKS := $(foreach f,$(SYMLINKS),$(src)/$f)
+
++clean-files := $(SYMLINKS)
++
+ semaphore.c-dir = kernel
+ highmem.c-dir = mm
+ module.c-dir = kernel
+@@ -24,19 +27,4 @@
+ $(SYMLINKS):
+ $(call make_link,$@)
+
+-clean:
+- $(MAKE) -C util clean
+-
+-fastdep:
+-
+-dep:
+-
+-archmrproper:
+- rm -f $(SYMLINKS)
+-
+-archclean:
+-
+-archdep:
+-
+-modules:
+-
++subdir- := util
+diff -Naur a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c
+--- a/arch/um/sys-i386/ptrace_user.c 2004-02-11 12:14:33.000000000 -0500
++++ b/arch/um/sys-i386/ptrace_user.c 2004-02-11 12:26:15.000000000 -0500
+@@ -39,10 +39,10 @@
+ nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]);
+ for(i = 0; i < nregs; i++){
+ if((i == 4) || (i == 5)) continue;
+- if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i],
++ if(ptrace(PTRACE_POKEUSER, pid, &dummy->u_debugreg[i],
+ regs[i]) < 0)
+- printk("write_debugregs - ptrace failed, "
+- "errno = %d\n", errno);
++ printk("write_debugregs - ptrace failed on "
++ "register %d, errno = %d\n", errno);
+ }
+ }
+
+@@ -54,7 +54,7 @@
+ dummy = NULL;
+ nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]);
+ for(i = 0; i < nregs; i++){
+- regs[i] = ptrace(PTRACE_PEEKUSR, pid,
++ regs[i] = ptrace(PTRACE_PEEKUSER, pid,
+ &dummy->u_debugreg[i], 0);
+ }
+ }
+diff -Naur a/arch/um/sys-i386/time.c b/arch/um/sys-i386/time.c
+--- a/arch/um/sys-i386/time.c 1969-12-31 19:00:00.000000000 -0500
++++ b/arch/um/sys-i386/time.c 2004-02-11 12:26:16.000000000 -0500
+@@ -0,0 +1,24 @@
++/*
++ * sys-i386/time.c
++ * Created 25.9.2002 Sapan Bhatia
++ *
++ */
++
++unsigned long long time_stamp(void)
++{
++ unsigned long low, high;
++
++ asm("rdtsc" : "=a" (low), "=d" (high));
++ return((((unsigned long long) high) << 32) + low);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/sys-i386/util/Makefile b/arch/um/sys-i386/util/Makefile
+--- a/arch/um/sys-i386/util/Makefile 2004-02-11 12:16:48.000000000 -0500
++++ b/arch/um/sys-i386/util/Makefile 2004-02-11 12:29:04.000000000 -0500
+@@ -1,15 +1,10 @@
++host-progs := mk_sc mk_thread
++always := $(host-progs)
+
+-host-progs := mk_sc
+-always := $(host-progs) mk_thread
+-targets := mk_thread_kern.o mk_thread_user.o
++mk_thread-objs := mk_thread_kern.o mk_thread_user.o
+
+-mk_sc-objs := mk_sc.o
+-
+-$(obj)/mk_thread : $(obj)/mk_thread_kern.o $(obj)/mk_thread_user.o
+- $(CC) $(CFLAGS) -o $@ $^
+-
+-$(obj)/mk_thread_user.o : $(src)/mk_thread_user.c
+- $(CC) $(USER_CFLAGS) -c -o $@ $<
++HOSTCFLAGS_mk_thread_kern.o := $(CFLAGS)
++HOSTCFLAGS_mk_thread_user.o := $(USER_CFLAGS)
+
+ clean :
+ $(RM) -f $(build-targets)
+diff -Naur a/arch/um/sys-i386/util/mk_sc.c b/arch/um/sys-i386/util/mk_sc.c
+--- a/arch/um/sys-i386/util/mk_sc.c 2004-02-11 12:16:38.000000000 -0500
++++ b/arch/um/sys-i386/util/mk_sc.c 2004-02-11 12:28:37.000000000 -0500
+@@ -38,6 +38,7 @@
+ SC_OFFSET("SC_ERR", err);
+ SC_OFFSET("SC_CR2", cr2);
+ SC_OFFSET("SC_FPSTATE", fpstate);
++ SC_OFFSET("SC_SIGMASK", oldmask);
+ SC_FP_OFFSET("SC_FP_CW", cw);
+ SC_FP_OFFSET("SC_FP_SW", sw);
+ SC_FP_OFFSET("SC_FP_TAG", tag);
+diff -Naur a/arch/um/sys-ia64/Makefile b/arch/um/sys-ia64/Makefile
+--- a/arch/um/sys-ia64/Makefile 2004-02-11 12:14:16.000000000 -0500
++++ b/arch/um/sys-ia64/Makefile 2004-02-11 12:25:43.000000000 -0500
+@@ -7,18 +7,5 @@
+ $(OBJ): $(OBJS)
+ rm -f $@
+ $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@
+-clean:
+- rm -f $(OBJS)
+
+-fastdep:
+-
+-archmrproper:
+-
+-archclean:
+- rm -f link.ld
+- @$(MAKEBOOT) clean
+-
+-archdep:
+- @$(MAKEBOOT) dep
+-
+-modules:
++clean-files := $(OBJS) link.ld
+diff -Naur a/arch/um/sys-ppc/Makefile b/arch/um/sys-ppc/Makefile
+--- a/arch/um/sys-ppc/Makefile 2004-02-11 12:15:23.000000000 -0500
++++ b/arch/um/sys-ppc/Makefile 2004-02-11 12:27:11.000000000 -0500
+@@ -66,13 +66,4 @@
+ $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o
+ rm -f asm
+
+-clean:
+- rm -f $(OBJS)
+- rm -f ppc_defs.h
+- rm -f checksum.S semaphore.c mk_defs.c
+-
+-fastdep:
+-
+-dep:
+-
+-modules:
++clean-files := $(OBJS) ppc_defs.h checksum.S semaphore.c mk_defs.c
+diff -Naur a/arch/um/uml.lds.S b/arch/um/uml.lds.S
+--- a/arch/um/uml.lds.S 2004-02-11 12:15:21.000000000 -0500
++++ b/arch/um/uml.lds.S 2004-02-11 12:27:08.000000000 -0500
+@@ -9,7 +9,6 @@
+ {
+ . = START + SIZEOF_HEADERS;
+
+- . = ALIGN(4096);
+ __binary_start = .;
+ #ifdef MODE_TT
+ .thread_private : {
+@@ -26,7 +25,11 @@
+ . = ALIGN(4096); /* Init code and data */
+ _stext = .;
+ __init_begin = .;
+- .text.init : { *(.text.init) }
++ .init.text : {
++ _sinittext = .;
++ *(.init.text)
++ _einittext = .;
++ }
+ . = ALIGN(4096);
+ .text :
+ {
+@@ -38,7 +41,7 @@
+
+ #include "asm/common.lds.S"
+
+- .data.init : { *(.data.init) }
++ init.data : { *(init.data) }
+ .data :
+ {
+ . = ALIGN(KERNEL_STACK_SIZE); /* init_task */
+diff -Naur a/arch/um/util/Makefile b/arch/um/util/Makefile
+--- a/arch/um/util/Makefile 2004-02-11 12:17:07.000000000 -0500
++++ b/arch/um/util/Makefile 2004-02-11 12:29:10.000000000 -0500
+@@ -1,23 +1,8 @@
+-always := mk_task mk_constants
+-targets := mk_task_user.o mk_task_kern.o \
+- mk_constants_user.o mk_constants_kern.o
++host-progs := mk_task mk_constants
++always := $(host-progs)
+
+-$(obj)/mk_task: $(obj)/mk_task_user.o $(obj)/mk_task_kern.o
+- $(CC) -o $@ $^
++mk_task-objs := mk_task_user.o mk_task_kern.o
++mk_constants-objs := mk_constants_user.o mk_constants_kern.o
+
+-$(obj)/mk_task_user.o: $(src)/mk_task_user.c
+- $(CC) -o $@ -c $<
+-
+-$(obj)/mk_constants : $(obj)/mk_constants_user.o $(obj)/mk_constants_kern.o
+- $(CC) -o $@ $^
+-
+-$(obj)/mk_constants_user.o : $(src)/mk_constants_user.c
+- $(CC) -c $< -o $@
+-
+-$(obj)/mk_constants_kern.o : $(src)/mk_constants_kern.c
+- $(CC) $(CFLAGS) -c $< -o $@
+-
+-clean:
+- $(RM) $(build-targets)
+-
+-archmrproper:
++HOSTCFLAGS_mk_task_kern.o := $(CFLAGS)
++HOSTCFLAGS_mk_constants_kern.o := $(CFLAGS)
+diff -Naur a/arch/um/util/mk_constants_kern.c b/arch/um/util/mk_constants_kern.c
+--- a/arch/um/util/mk_constants_kern.c 2004-02-11 12:14:19.000000000 -0500
++++ b/arch/um/util/mk_constants_kern.c 2004-02-11 12:26:02.000000000 -0500
+@@ -1,5 +1,6 @@
+ #include "linux/kernel.h"
+ #include "linux/stringify.h"
++#include "linux/time.h"
+ #include "asm/page.h"
+
+ extern void print_head(void);
+@@ -11,6 +12,7 @@
+ {
+ print_head();
+ print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE);
++
+ print_constant_str("UM_KERN_EMERG", KERN_EMERG);
+ print_constant_str("UM_KERN_ALERT", KERN_ALERT);
+ print_constant_str("UM_KERN_CRIT", KERN_CRIT);
+@@ -19,6 +21,8 @@
+ print_constant_str("UM_KERN_NOTICE", KERN_NOTICE);
+ print_constant_str("UM_KERN_INFO", KERN_INFO);
+ print_constant_str("UM_KERN_DEBUG", KERN_DEBUG);
++
++ print_constant_int("UM_NSEC_PER_SEC", NSEC_PER_SEC);
+ print_tail();
+ return(0);
+ }
+diff -Naur a/drivers/base/Makefile b/drivers/base/Makefile
+--- a/drivers/base/Makefile 2004-02-11 12:16:32.000000000 -0500
++++ b/drivers/base/Makefile 2004-02-11 12:28:31.000000000 -0500
+@@ -2,7 +2,8 @@
+
+ obj-y := core.o sys.o interface.o bus.o \
+ driver.o class.o class_simple.o platform.o \
+- cpu.o firmware.o init.o map.o dmapool.o
++ cpu.o firmware.o init.o map.o
+ obj-y += power/
+ obj-$(CONFIG_FW_LOADER) += firmware_class.o
+ obj-$(CONFIG_NUMA) += node.o
++obj-$(CONFIG_PCI) += dmapool.o
+diff -Naur a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
+--- a/fs/hostfs/hostfs.h 1969-12-31 19:00:00.000000000 -0500
++++ b/fs/hostfs/hostfs.h 2004-02-11 12:25:42.000000000 -0500
+@@ -0,0 +1,79 @@
++#ifndef __UM_FS_HOSTFS
++#define __UM_FS_HOSTFS
++
++#include "os.h"
++
++/* These are exactly the same definitions as in fs.h, but the names are
++ * changed so that this file can be included in both kernel and user files.
++ */
++
++#define HOSTFS_ATTR_MODE 1
++#define HOSTFS_ATTR_UID 2
++#define HOSTFS_ATTR_GID 4
++#define HOSTFS_ATTR_SIZE 8
++#define HOSTFS_ATTR_ATIME 16
++#define HOSTFS_ATTR_MTIME 32
++#define HOSTFS_ATTR_CTIME 64
++#define HOSTFS_ATTR_ATIME_SET 128
++#define HOSTFS_ATTR_MTIME_SET 256
++#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */
++#define HOSTFS_ATTR_ATTR_FLAG 1024
++
++struct hostfs_iattr {
++ unsigned int ia_valid;
++ mode_t ia_mode;
++ uid_t ia_uid;
++ gid_t ia_gid;
++ loff_t ia_size;
++ struct timespec ia_atime;
++ struct timespec ia_mtime;
++ struct timespec ia_ctime;
++ unsigned int ia_attr_flags;
++};
++
++extern int stat_file(const char *path, unsigned long long *inode_out,
++ int *mode_out, int *nlink_out, int *uid_out, int *gid_out,
++ unsigned long long *size_out, struct timespec *atime_out,
++ struct timespec *mtime_out, struct timespec *ctime_out,
++ int *blksize_out, unsigned long long *blocks_out);
++extern int access_file(char *path, int r, int w, int x);
++extern int open_file(char *path, int r, int w, int append);
++extern int file_type(const char *path, int *rdev);
++extern void *open_dir(char *path, int *err_out);
++extern char *read_dir(void *stream, unsigned long long *pos,
++ unsigned long long *ino_out, int *len_out);
++extern void close_file(void *stream);
++extern void close_dir(void *stream);
++extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
++extern int write_file(int fd, unsigned long long *offset, const char *buf,
++ int len);
++extern int lseek_file(int fd, long long offset, int whence);
++extern int file_create(char *name, int ur, int uw, int ux, int gr,
++ int gw, int gx, int or, int ow, int ox);
++extern int set_attr(const char *file, struct hostfs_iattr *attrs);
++extern int make_symlink(const char *from, const char *to);
++extern int unlink_file(const char *file);
++extern int do_mkdir(const char *file, int mode);
++extern int do_rmdir(const char *file);
++extern int do_mknod(const char *file, int mode, int dev);
++extern int link_file(const char *from, const char *to);
++extern int do_readlink(char *file, char *buf, int size);
++extern int rename_file(char *from, char *to);
++extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
++ long long *bfree_out, long long *bavail_out,
++ long long *files_out, long long *ffree_out,
++ void *fsid_out, int fsid_size, long *namelen_out,
++ long *spare_out);
++
++#endif
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
+--- a/fs/hostfs/hostfs_kern.c 1969-12-31 19:00:00.000000000 -0500
++++ b/fs/hostfs/hostfs_kern.c 2004-02-11 12:25:59.000000000 -0500
+@@ -0,0 +1,1008 @@
++/*
++ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ *
++ * Ported the filesystem routines to 2.5.
++ * 2003-02-10 Petr Baudis <pasky@ucw.cz>
++ */
++
++#include <linux/stddef.h>
++#include <linux/fs.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/pagemap.h>
++#include <linux/blkdev.h>
++#include <linux/list.h>
++#include <linux/buffer_head.h>
++#include <linux/root_dev.h>
++#include <linux/statfs.h>
++#include <asm/uaccess.h>
++#include "hostfs.h"
++#include "kern_util.h"
++#include "kern.h"
++#include "user_util.h"
++#include "2_5compat.h"
++#include "init.h"
++
++struct hostfs_inode_info {
++ char *host_filename;
++ int fd;
++ int mode;
++ struct inode vfs_inode;
++};
++
++static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
++{
++ return(list_entry(inode, struct hostfs_inode_info, vfs_inode));
++}
++
++#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_dentry->d_inode)
++
++int hostfs_d_delete(struct dentry *dentry)
++{
++ return(1);
++}
++
++struct dentry_operations hostfs_dentry_ops = {
++ .d_delete = hostfs_d_delete,
++};
++
++/* Changed in hostfs_args before the kernel starts running */
++static char *root_ino = "/";
++static int append = 0;
++
++#define HOSTFS_SUPER_MAGIC 0x00c0ffee
++
++static struct inode_operations hostfs_iops;
++static struct inode_operations hostfs_dir_iops;
++static struct address_space_operations hostfs_link_aops;
++
++static int __init hostfs_args(char *options, int *add)
++{
++ char *ptr;
++
++ ptr = strchr(options, ',');
++ if(ptr != NULL)
++ *ptr++ = '\0';
++ if(*options != '\0')
++ root_ino = options;
++
++ options = ptr;
++ while(options){
++ ptr = strchr(options, ',');
++ if(ptr != NULL)
++ *ptr++ = '\0';
++ if(*options != '\0'){
++ if(!strcmp(options, "append"))
++ append = 1;
++ else printf("hostfs_args - unsupported option - %s\n",
++ options);
++ }
++ options = ptr;
++ }
++ return(0);
++}
++
++__uml_setup("hostfs=", hostfs_args,
++"hostfs=<root dir>,<flags>,...\n"
++" This is used to set hostfs parameters. The root directory argument\n"
++" is used to confine all hostfs mounts to within the specified directory\n"
++" tree on the host. If this isn't specified, then a user inside UML can\n"
++" mount anything on the host that's accessible to the user that's running\n"
++" it.\n"
++" The only flag currently supported is 'append', which specifies that all\n"
++" files opened by hostfs will be opened in append mode.\n\n"
++);
++
++static char *dentry_name(struct dentry *dentry, int extra)
++{
++ struct dentry *parent;
++ char *root, *name;
++ int len;
++
++ len = 0;
++ parent = dentry;
++ while(parent->d_parent != parent){
++ len += parent->d_name.len + 1;
++ parent = parent->d_parent;
++ }
++
++ root = HOSTFS_I(parent->d_inode)->host_filename;
++ len += strlen(root);
++ name = kmalloc(len + extra + 1, GFP_KERNEL);
++ if(name == NULL) return(NULL);
++
++ name[len] = '\0';
++ parent = dentry;
++ while(parent->d_parent != parent){
++ len -= parent->d_name.len + 1;
++ name[len] = '/';
++ strncpy(&name[len + 1], parent->d_name.name,
++ parent->d_name.len);
++ parent = parent->d_parent;
++ }
++ strncpy(name, root, strlen(root));
++ return(name);
++}
++
++static char *inode_name(struct inode *ino, int extra)
++{
++ struct dentry *dentry;
++
++ dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
++ return(dentry_name(dentry, extra));
++}
++
++static int read_name(struct inode *ino, char *name)
++{
++ /* The non-int inode fields are copied into ints by stat_file and
++ * then copied into the inode because passing the actual pointers
++ * in and having them treated as int * breaks on big-endian machines
++ */
++ int err;
++ int i_mode, i_nlink, i_blksize;
++ unsigned long long i_size;
++ unsigned long long i_ino;
++ unsigned long long i_blocks;
++
++ err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
++ &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
++ &ino->i_ctime, &i_blksize, &i_blocks);
++ if(err)
++ return(err);
++
++ ino->i_ino = i_ino;
++ ino->i_mode = i_mode;
++ ino->i_nlink = i_nlink;
++ ino->i_size = i_size;
++ ino->i_blksize = i_blksize;
++ ino->i_blocks = i_blocks;
++ if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid()))
++ ino->i_uid = 0;
++ return(0);
++}
++
++static char *follow_link(char *link)
++{
++ int len, n;
++ char *name, *resolved, *end;
++
++ len = 64;
++ while(1){
++ n = -ENOMEM;
++ name = kmalloc(len, GFP_KERNEL);
++ if(name == NULL)
++ goto out;
++
++ n = do_readlink(link, name, len);
++ if(n < len)
++ break;
++ len *= 2;
++ kfree(name);
++ }
++ if(n < 0)
++ goto out_free;
++
++ if(*name == '/')
++ return(name);
++
++ end = strrchr(link, '/');
++ if(end == NULL)
++ return(name);
++
++ *(end + 1) = '\0';
++ len = strlen(link) + strlen(name) + 1;
++
++ resolved = kmalloc(len, GFP_KERNEL);
++ if(resolved == NULL){
++ n = -ENOMEM;
++ goto out_free;
++ }
++
++ sprintf(resolved, "%s%s", link, name);
++ kfree(name);
++ kfree(link);
++ return(resolved);
++
++ out_free:
++ kfree(name);
++ out:
++ return(ERR_PTR(n));
++}
++
++static int read_inode(struct inode *ino)
++{
++ char *name;
++ int err = 0;
++
++ /* Unfortunately, we are called from iget() when we don't have a dentry
++ * allocated yet.
++ */
++ if(list_empty(&ino->i_dentry))
++ goto out;
++
++ err = -ENOMEM;
++ name = inode_name(ino, 0);
++ if(name == NULL)
++ goto out;
++
++ if(file_type(name, NULL) == OS_TYPE_SYMLINK){
++ name = follow_link(name);
++ if(IS_ERR(name)){
++ err = PTR_ERR(name);
++ goto out;
++ }
++ }
++
++ err = read_name(ino, name);
++ kfree(name);
++ out:
++ return(err);
++}
++
++int hostfs_statfs(struct super_block *sb, struct kstatfs *sf)
++{
++ /* do_statfs uses struct statfs64 internally, but the linux kernel
++ * struct statfs still has 32-bit versions for most of these fields,
++ * so we convert them here
++ */
++ int err;
++ long long f_blocks;
++ long long f_bfree;
++ long long f_bavail;
++ long long f_files;
++ long long f_ffree;
++
++ err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename,
++ &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
++ &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
++ &sf->f_namelen, sf->f_spare);
++ if(err) return(err);
++ sf->f_blocks = f_blocks;
++ sf->f_bfree = f_bfree;
++ sf->f_bavail = f_bavail;
++ sf->f_files = f_files;
++ sf->f_ffree = f_ffree;
++ sf->f_type = HOSTFS_SUPER_MAGIC;
++ return(0);
++}
++
++static struct inode *hostfs_alloc_inode(struct super_block *sb)
++{
++ struct hostfs_inode_info *hi;
++
++ hi = kmalloc(sizeof(*hi), GFP_KERNEL);
++ if(hi == NULL)
++ return(NULL);
++
++ *hi = ((struct hostfs_inode_info) { .host_filename = NULL,
++ .fd = -1,
++ .mode = 0 });
++ inode_init_once(&hi->vfs_inode);
++ return(&hi->vfs_inode);
++}
++
++static void hostfs_destroy_inode(struct inode *inode)
++{
++ if(HOSTFS_I(inode)->host_filename)
++ kfree(HOSTFS_I(inode)->host_filename);
++
++ if(HOSTFS_I(inode)->fd != -1)
++ close_file(&HOSTFS_I(inode)->fd);
++
++ kfree(HOSTFS_I(inode));
++}
++
++static void hostfs_read_inode(struct inode *inode)
++{
++ read_inode(inode);
++}
++
++static struct super_operations hostfs_sbops = {
++ .alloc_inode = hostfs_alloc_inode,
++ .destroy_inode = hostfs_destroy_inode,
++ .read_inode = hostfs_read_inode,
++ .statfs = hostfs_statfs,
++};
++
++int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
++{
++ void *dir;
++ char *name;
++ unsigned long long next, ino;
++ int error, len;
++
++ name = dentry_name(file->f_dentry, 0);
++ if(name == NULL) return(-ENOMEM);
++ dir = open_dir(name, &error);
++ kfree(name);
++ if(dir == NULL) return(-error);
++ next = file->f_pos;
++ while((name = read_dir(dir, &next, &ino, &len)) != NULL){
++ error = (*filldir)(ent, name, len, file->f_pos,
++ ino, DT_UNKNOWN);
++ if(error) break;
++ file->f_pos = next;
++ }
++ close_dir(dir);
++ return(0);
++}
++
++int hostfs_file_open(struct inode *ino, struct file *file)
++{
++ char *name;
++ int mode = 0, r = 0, w = 0, fd;
++
++ mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
++ if((mode & HOSTFS_I(ino)->mode) == mode)
++ return(0);
++
++ /* The file may already have been opened, but with the wrong access,
++ * so this resets things and reopens the file with the new access.
++ */
++ if(HOSTFS_I(ino)->fd != -1){
++ close_file(&HOSTFS_I(ino)->fd);
++ HOSTFS_I(ino)->fd = -1;
++ }
++
++ HOSTFS_I(ino)->mode |= mode;
++ if(HOSTFS_I(ino)->mode & FMODE_READ)
++ r = 1;
++ if(HOSTFS_I(ino)->mode & FMODE_WRITE)
++ w = 1;
++ if(w)
++ r = 1;
++
++ name = dentry_name(file->f_dentry, 0);
++ if(name == NULL)
++ return(-ENOMEM);
++
++ fd = open_file(name, r, w, append);
++ kfree(name);
++ if(fd < 0) return(fd);
++ FILE_HOSTFS_I(file)->fd = fd;
++
++ return(0);
++}
++
++int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
++{
++ return(0);
++}
++
++static struct file_operations hostfs_file_fops = {
++ .llseek = generic_file_llseek,
++ .read = generic_file_read,
++ .write = generic_file_write,
++ .mmap = generic_file_mmap,
++ .open = hostfs_file_open,
++ .release = NULL,
++ .fsync = hostfs_fsync,
++};
++
++static struct file_operations hostfs_dir_fops = {
++ .readdir = hostfs_readdir,
++ .read = generic_read_dir,
++};
++
++int hostfs_writepage(struct page *page, struct writeback_control *wbc)
++{
++ struct address_space *mapping = page->mapping;
++ struct inode *inode = mapping->host;
++ char *buffer;
++ unsigned long long base;
++ int count = PAGE_CACHE_SIZE;
++ int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
++ int err;
++
++ if (page->index >= end_index)
++ count = inode->i_size & (PAGE_CACHE_SIZE-1);
++
++ buffer = kmap(page);
++ base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
++
++ err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
++ if(err != count){
++ ClearPageUptodate(page);
++ goto out;
++ }
++
++ if (base > inode->i_size)
++ inode->i_size = base;
++
++ if (PageError(page))
++ ClearPageError(page);
++ err = 0;
++
++ out:
++ kunmap(page);
++
++ unlock_page(page);
++ return err;
++}
++
++int hostfs_readpage(struct file *file, struct page *page)
++{
++ char *buffer;
++ long long start;
++ int err = 0;
++
++ start = (long long) page->index << PAGE_CACHE_SHIFT;
++ buffer = kmap(page);
++ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
++ PAGE_CACHE_SIZE);
++ if(err < 0) goto out;
++
++ memset(&buffer[err], 0, PAGE_CACHE_SIZE - err);
++
++ flush_dcache_page(page);
++ SetPageUptodate(page);
++ if (PageError(page)) ClearPageError(page);
++ err = 0;
++ out:
++ kunmap(page);
++ unlock_page(page);
++ return(err);
++}
++
++int hostfs_prepare_write(struct file *file, struct page *page,
++ unsigned int from, unsigned int to)
++{
++ char *buffer;
++ long long start, tmp;
++ int err;
++
++ start = (long long) page->index << PAGE_CACHE_SHIFT;
++ buffer = kmap(page);
++ if(from != 0){
++ tmp = start;
++ err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer,
++ from);
++ if(err < 0) goto out;
++ }
++ if(to != PAGE_CACHE_SIZE){
++ start += to;
++ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to,
++ PAGE_CACHE_SIZE - to);
++ if(err < 0) goto out;
++ }
++ err = 0;
++ out:
++ kunmap(page);
++ return(err);
++}
++
++int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
++ unsigned to)
++{
++ struct address_space *mapping = page->mapping;
++ struct inode *inode = mapping->host;
++ char *buffer;
++ long long start;
++ int err = 0;
++
++ start = (long long) (page->index << PAGE_CACHE_SHIFT) + from;
++ buffer = kmap(page);
++ err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from,
++ to - from);
++ if(err > 0) err = 0;
++ if(!err && (start > inode->i_size))
++ inode->i_size = start;
++
++ kunmap(page);
++ return(err);
++}
++
++static struct address_space_operations hostfs_aops = {
++ .writepage = hostfs_writepage,
++ .readpage = hostfs_readpage,
++/* .set_page_dirty = __set_page_dirty_nobuffers, */
++ .prepare_write = hostfs_prepare_write,
++ .commit_write = hostfs_commit_write
++};
++
++static int init_inode(struct inode *inode, struct dentry *dentry)
++{
++ char *name;
++ int type, err = -ENOMEM, rdev;
++
++ if(dentry){
++ name = dentry_name(dentry, 0);
++ if(name == NULL)
++ goto out;
++ type = file_type(name, &rdev);
++ kfree(name);
++ }
++ else type = OS_TYPE_DIR;
++
++ err = 0;
++ if(type == OS_TYPE_SYMLINK)
++ inode->i_op = &page_symlink_inode_operations;
++ else if(type == OS_TYPE_DIR)
++ inode->i_op = &hostfs_dir_iops;
++ else inode->i_op = &hostfs_iops;
++
++ if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
++ else inode->i_fop = &hostfs_file_fops;
++
++ if(type == OS_TYPE_SYMLINK)
++ inode->i_mapping->a_ops = &hostfs_link_aops;
++ else inode->i_mapping->a_ops = &hostfs_aops;
++
++ switch (type) {
++ case OS_TYPE_CHARDEV:
++ init_special_inode(inode, S_IFCHR, rdev);
++ break;
++ case OS_TYPE_BLOCKDEV:
++ init_special_inode(inode, S_IFBLK, rdev);
++ break;
++ case OS_TYPE_FIFO:
++ init_special_inode(inode, S_IFIFO, 0);
++ break;
++ case OS_TYPE_SOCK:
++ init_special_inode(inode, S_IFSOCK, 0);
++ break;
++ }
++ out:
++ return(err);
++}
++
++int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
++ struct nameidata *nd)
++{
++ struct inode *inode;
++ char *name;
++ int error, fd;
++
++ error = -ENOMEM;
++ inode = iget(dir->i_sb, 0);
++ if(inode == NULL) goto out;
++
++ error = init_inode(inode, dentry);
++ if(error)
++ goto out_put;
++
++ error = -ENOMEM;
++ name = dentry_name(dentry, 0);
++ if(name == NULL)
++ goto out_put;
++
++ fd = file_create(name,
++ mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
++ mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
++ mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
++ if(fd < 0)
++ error = fd;
++ else error = read_name(inode, name);
++
++ kfree(name);
++ if(error)
++ goto out_put;
++
++ HOSTFS_I(inode)->fd = fd;
++ HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE;
++ d_instantiate(dentry, inode);
++ return(0);
++
++ out_put:
++ iput(inode);
++ out:
++ return(error);
++}
++
++struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct inode *inode;
++ char *name;
++ int err;
++
++ err = -ENOMEM;
++ inode = iget(ino->i_sb, 0);
++ if(inode == NULL)
++ goto out;
++
++ err = init_inode(inode, dentry);
++ if(err)
++ goto out_put;
++
++ err = -ENOMEM;
++ name = dentry_name(dentry, 0);
++ if(name == NULL)
++ goto out_put;
++
++ err = read_name(inode, name);
++ kfree(name);
++ if(err == -ENOENT){
++ iput(inode);
++ inode = NULL;
++ }
++ else if(err)
++ goto out_put;
++
++ d_add(dentry, inode);
++ dentry->d_op = &hostfs_dentry_ops;
++ return(NULL);
++
++ out_put:
++ iput(inode);
++ out:
++ return(ERR_PTR(err));
++}
++
++static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
++{
++ char *file;
++ int len;
++
++ file = inode_name(ino, dentry->d_name.len + 1);
++ if(file == NULL) return(NULL);
++ strcat(file, "/");
++ len = strlen(file);
++ strncat(file, dentry->d_name.name, dentry->d_name.len);
++ file[len + dentry->d_name.len] = '\0';
++ return(file);
++}
++
++int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
++{
++ char *from_name, *to_name;
++ int err;
++
++ if((from_name = inode_dentry_name(ino, from)) == NULL)
++ return(-ENOMEM);
++ to_name = dentry_name(to, 0);
++ if(to_name == NULL){
++ kfree(from_name);
++ return(-ENOMEM);
++ }
++ err = link_file(to_name, from_name);
++ kfree(from_name);
++ kfree(to_name);
++ return(err);
++}
++
++int hostfs_unlink(struct inode *ino, struct dentry *dentry)
++{
++ char *file;
++ int err;
++
++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++ if(append)
++ return(-EPERM);
++
++ err = unlink_file(file);
++ kfree(file);
++ return(err);
++}
++
++int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
++{
++ char *file;
++ int err;
++
++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++ err = make_symlink(file, to);
++ kfree(file);
++ return(err);
++}
++
++int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
++{
++ char *file;
++ int err;
++
++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++ err = do_mkdir(file, mode);
++ kfree(file);
++ return(err);
++}
++
++int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
++{
++ char *file;
++ int err;
++
++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++ err = do_rmdir(file);
++ kfree(file);
++ return(err);
++}
++
++int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
++{
++ struct inode *inode;
++ char *name;
++ int err = -ENOMEM;
++
++ inode = iget(dir->i_sb, 0);
++ if(inode == NULL)
++ goto out;
++
++ err = init_inode(inode, dentry);
++ if(err)
++ goto out_put;
++
++ err = -ENOMEM;
++ name = dentry_name(dentry, 0);
++ if(name == NULL)
++ goto out_put;
++
++ init_special_inode(inode, mode, dev);
++ err = do_mknod(name, mode, dev);
++ if(err)
++ goto out_free;
++
++ err = read_name(inode, name);
++ kfree(name);
++ if(err)
++ goto out_put;
++
++ d_instantiate(dentry, inode);
++ return(0);
++
++ out_free:
++ kfree(name);
++ out_put:
++ iput(inode);
++ out:
++ return(err);
++}
++
++int hostfs_rename(struct inode *from_ino, struct dentry *from,
++ struct inode *to_ino, struct dentry *to)
++{
++ char *from_name, *to_name;
++ int err;
++
++ if((from_name = inode_dentry_name(from_ino, from)) == NULL)
++ return(-ENOMEM);
++ if((to_name = inode_dentry_name(to_ino, to)) == NULL){
++ kfree(from_name);
++ return(-ENOMEM);
++ }
++ err = rename_file(from_name, to_name);
++ kfree(from_name);
++ kfree(to_name);
++ return(err);
++}
++
++void hostfs_truncate(struct inode *ino)
++{
++ not_implemented();
++}
++
++int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
++{
++ char *name;
++ int r = 0, w = 0, x = 0, err;
++
++ if(desired & MAY_READ) r = 1;
++ if(desired & MAY_WRITE) w = 1;
++ if(desired & MAY_EXEC) x = 1;
++ name = inode_name(ino, 0);
++ if(name == NULL) return(-ENOMEM);
++ err = access_file(name, r, w, x);
++ kfree(name);
++ if(!err) err = vfs_permission(ino, desired);
++ return(err);
++}
++
++int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
++{
++ struct hostfs_iattr attrs;
++ char *name;
++ int err;
++
++ if(append)
++ attr->ia_valid &= ~ATTR_SIZE;
++
++ attrs.ia_valid = 0;
++ if(attr->ia_valid & ATTR_MODE){
++ attrs.ia_valid |= HOSTFS_ATTR_MODE;
++ attrs.ia_mode = attr->ia_mode;
++ }
++ if(attr->ia_valid & ATTR_UID){
++ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
++ (attr->ia_uid == 0))
++ attr->ia_uid = getuid();
++ attrs.ia_valid |= HOSTFS_ATTR_UID;
++ attrs.ia_uid = attr->ia_uid;
++ }
++ if(attr->ia_valid & ATTR_GID){
++ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
++ (attr->ia_gid == 0))
++ attr->ia_gid = getuid();
++ attrs.ia_valid |= HOSTFS_ATTR_GID;
++ attrs.ia_gid = attr->ia_gid;
++ }
++ if(attr->ia_valid & ATTR_SIZE){
++ attrs.ia_valid |= HOSTFS_ATTR_SIZE;
++ attrs.ia_size = attr->ia_size;
++ }
++ if(attr->ia_valid & ATTR_ATIME){
++ attrs.ia_valid |= HOSTFS_ATTR_ATIME;
++ attrs.ia_atime = attr->ia_atime;
++ }
++ if(attr->ia_valid & ATTR_MTIME){
++ attrs.ia_valid |= HOSTFS_ATTR_MTIME;
++ attrs.ia_mtime = attr->ia_mtime;
++ }
++ if(attr->ia_valid & ATTR_CTIME){
++ attrs.ia_valid |= HOSTFS_ATTR_CTIME;
++ attrs.ia_ctime = attr->ia_ctime;
++ }
++ if(attr->ia_valid & ATTR_ATIME_SET){
++ attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET;
++ }
++ if(attr->ia_valid & ATTR_MTIME_SET){
++ attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
++ }
++ name = dentry_name(dentry, 0);
++ if(name == NULL) return(-ENOMEM);
++ err = set_attr(name, &attrs);
++ kfree(name);
++ if(err)
++ return(err);
++
++ return(inode_setattr(dentry->d_inode, attr));
++}
++
++int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ generic_fillattr(dentry->d_inode, stat);
++ return(0);
++}
++
++static struct inode_operations hostfs_iops = {
++ .create = hostfs_create,
++ .link = hostfs_link,
++ .unlink = hostfs_unlink,
++ .symlink = hostfs_symlink,
++ .mkdir = hostfs_mkdir,
++ .rmdir = hostfs_rmdir,
++ .mknod = hostfs_mknod,
++ .rename = hostfs_rename,
++ .truncate = hostfs_truncate,
++ .permission = hostfs_permission,
++ .setattr = hostfs_setattr,
++ .getattr = hostfs_getattr,
++};
++
++static struct inode_operations hostfs_dir_iops = {
++ .create = hostfs_create,
++ .lookup = hostfs_lookup,
++ .link = hostfs_link,
++ .unlink = hostfs_unlink,
++ .symlink = hostfs_symlink,
++ .mkdir = hostfs_mkdir,
++ .rmdir = hostfs_rmdir,
++ .mknod = hostfs_mknod,
++ .rename = hostfs_rename,
++ .truncate = hostfs_truncate,
++ .permission = hostfs_permission,
++ .setattr = hostfs_setattr,
++ .getattr = hostfs_getattr,
++};
++
++int hostfs_link_readpage(struct file *file, struct page *page)
++{
++ char *buffer, *name;
++ long long start;
++ int err;
++
++ start = page->index << PAGE_CACHE_SHIFT;
++ buffer = kmap(page);
++ name = inode_name(page->mapping->host, 0);
++ if(name == NULL) return(-ENOMEM);
++ err = do_readlink(name, buffer, PAGE_CACHE_SIZE);
++ kfree(name);
++ if(err == PAGE_CACHE_SIZE)
++ err = -E2BIG;
++ else if(err > 0){
++ flush_dcache_page(page);
++ SetPageUptodate(page);
++ if (PageError(page)) ClearPageError(page);
++ err = 0;
++ }
++ kunmap(page);
++ unlock_page(page);
++ return(err);
++}
++
++static struct address_space_operations hostfs_link_aops = {
++ .readpage = hostfs_link_readpage,
++};
++
++static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
++{
++ struct inode *root_inode;
++ char *name, *data = d;
++ int err;
++
++ sb->s_blocksize = 1024;
++ sb->s_blocksize_bits = 10;
++ sb->s_magic = HOSTFS_SUPER_MAGIC;
++ sb->s_op = &hostfs_sbops;
++
++ if((data == NULL) || (*data == '\0'))
++ data = root_ino;
++
++ err = -ENOMEM;
++ name = kmalloc(strlen(data) + 1, GFP_KERNEL);
++ if(name == NULL)
++ goto out;
++
++ strcpy(name, data);
++
++ root_inode = iget(sb, 0);
++ if(root_inode == NULL)
++ goto out_free;
++
++ err = init_inode(root_inode, NULL);
++ if(err)
++ goto out_put;
++
++ HOSTFS_I(root_inode)->host_filename = name;
++
++ err = -ENOMEM;
++ sb->s_root = d_alloc_root(root_inode);
++ if(sb->s_root == NULL)
++ goto out_put;
++
++ err = read_inode(root_inode);
++ if(err)
++ goto out_put;
++
++ return(0);
++
++ out_put:
++ iput(root_inode);
++ out_free:
++ kfree(name);
++ out:
++ return(err);
++}
++
++static struct super_block *hostfs_read_sb(struct file_system_type *type,
++ int flags, const char *dev_name,
++ void *data)
++{
++ return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common));
++}
++
++static struct file_system_type hostfs_type = {
++ .owner = THIS_MODULE,
++ .name = "hostfs",
++ .get_sb = hostfs_read_sb,
++ .kill_sb = kill_anon_super,
++ .fs_flags = 0,
++};
++
++static int __init init_hostfs(void)
++{
++ return(register_filesystem(&hostfs_type));
++}
++
++static void __exit exit_hostfs(void)
++{
++ unregister_filesystem(&hostfs_type);
++}
++
++module_init(init_hostfs)
++module_exit(exit_hostfs)
++MODULE_LICENSE("GPL");
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
+--- a/fs/hostfs/hostfs_user.c 1969-12-31 19:00:00.000000000 -0500
++++ b/fs/hostfs/hostfs_user.c 2004-02-11 12:26:11.000000000 -0500
+@@ -0,0 +1,361 @@
++/*
++ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#include <unistd.h>
++#include <stdio.h>
++#include <fcntl.h>
++#include <dirent.h>
++#include <errno.h>
++#include <utime.h>
++#include <string.h>
++#include <sys/stat.h>
++#include <sys/time.h>
++#include <sys/vfs.h>
++#include "hostfs.h"
++#include "kern_util.h"
++#include "user.h"
++
++int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
++ int *nlink_out, int *uid_out, int *gid_out,
++ unsigned long long *size_out, struct timespec *atime_out,
++ struct timespec *mtime_out, struct timespec *ctime_out,
++ int *blksize_out, unsigned long long *blocks_out)
++{
++ struct stat64 buf;
++
++ if(lstat64(path, &buf) < 0)
++ return(-errno);
++
++ /* See the Makefile for why STAT64_INO_FIELD is passed in
++ * by the build
++ */
++ if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD;
++ if(mode_out != NULL) *mode_out = buf.st_mode;
++ if(nlink_out != NULL) *nlink_out = buf.st_nlink;
++ if(uid_out != NULL) *uid_out = buf.st_uid;
++ if(gid_out != NULL) *gid_out = buf.st_gid;
++ if(size_out != NULL) *size_out = buf.st_size;
++ if(atime_out != NULL) {
++ atime_out->tv_sec = buf.st_atime;
++ atime_out->tv_nsec = 0;
++ }
++ if(mtime_out != NULL) {
++ mtime_out->tv_sec = buf.st_mtime;
++ mtime_out->tv_nsec = 0;
++ }
++ if(ctime_out != NULL) {
++ ctime_out->tv_sec = buf.st_ctime;
++ ctime_out->tv_nsec = 0;
++ }
++ if(blksize_out != NULL) *blksize_out = buf.st_blksize;
++ if(blocks_out != NULL) *blocks_out = buf.st_blocks;
++ return(0);
++}
++
++int file_type(const char *path, int *rdev)
++{
++ struct stat64 buf;
++
++ if(lstat64(path, &buf) < 0)
++ return(-errno);
++ if(rdev != NULL)
++ *rdev = buf.st_rdev;
++
++ if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR);
++ else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK);
++ else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV);
++ else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV);
++ else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO);
++ else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK);
++ else return(OS_TYPE_FILE);
++}
++
++int access_file(char *path, int r, int w, int x)
++{
++ int mode = 0;
++
++ if(r) mode = R_OK;
++ if(w) mode |= W_OK;
++ if(x) mode |= X_OK;
++ if(access(path, mode) != 0) return(-errno);
++ else return(0);
++}
++
++int open_file(char *path, int r, int w, int append)
++{
++ int mode = 0, fd;
++
++ if(r && !w)
++ mode = O_RDONLY;
++ else if(!r && w)
++ mode = O_WRONLY;
++ else if(r && w)
++ mode = O_RDWR;
++ else panic("Impossible mode in open_file");
++
++ if(append)
++ mode |= O_APPEND;
++ fd = open64(path, mode);
++ if(fd < 0) return(-errno);
++ else return(fd);
++}
++
++void *open_dir(char *path, int *err_out)
++{
++ DIR *dir;
++
++ dir = opendir(path);
++ *err_out = errno;
++ if(dir == NULL) return(NULL);
++ return(dir);
++}
++
++char *read_dir(void *stream, unsigned long long *pos,
++ unsigned long long *ino_out, int *len_out)
++{
++ DIR *dir = stream;
++ struct dirent *ent;
++
++ seekdir(dir, *pos);
++ ent = readdir(dir);
++ if(ent == NULL) return(NULL);
++ *len_out = strlen(ent->d_name);
++ *ino_out = ent->d_ino;
++ *pos = telldir(dir);
++ return(ent->d_name);
++}
++
++int read_file(int fd, unsigned long long *offset, char *buf, int len)
++{
++ int n;
++
++ n = pread64(fd, buf, len, *offset);
++ if(n < 0) return(-errno);
++ *offset += n;
++ return(n);
++}
++
++int write_file(int fd, unsigned long long *offset, const char *buf, int len)
++{
++ int n;
++
++ n = pwrite64(fd, buf, len, *offset);
++ if(n < 0) return(-errno);
++ *offset += n;
++ return(n);
++}
++
++int lseek_file(int fd, long long offset, int whence)
++{
++ int ret;
++
++ ret = lseek64(fd, offset, whence);
++ if(ret < 0) return(-errno);
++ return(0);
++}
++
++void close_file(void *stream)
++{
++ close(*((int *) stream));
++}
++
++void close_dir(void *stream)
++{
++ closedir(stream);
++}
++
++int file_create(char *name, int ur, int uw, int ux, int gr,
++ int gw, int gx, int or, int ow, int ox)
++{
++ int mode, fd;
++
++ mode = 0;
++ mode |= ur ? S_IRUSR : 0;
++ mode |= uw ? S_IWUSR : 0;
++ mode |= ux ? S_IXUSR : 0;
++ mode |= gr ? S_IRGRP : 0;
++ mode |= gw ? S_IWGRP : 0;
++ mode |= gx ? S_IXGRP : 0;
++ mode |= or ? S_IROTH : 0;
++ mode |= ow ? S_IWOTH : 0;
++ mode |= ox ? S_IXOTH : 0;
++ fd = open64(name, O_CREAT | O_RDWR, mode);
++ if(fd < 0)
++ return(-errno);
++ return(fd);
++}
++
++int set_attr(const char *file, struct hostfs_iattr *attrs)
++{
++ struct utimbuf buf;
++ int err, ma;
++
++ if(attrs->ia_valid & HOSTFS_ATTR_MODE){
++ if(chmod(file, attrs->ia_mode) != 0) return(-errno);
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_UID){
++ if(chown(file, attrs->ia_uid, -1)) return(-errno);
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_GID){
++ if(chown(file, -1, attrs->ia_gid)) return(-errno);
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_SIZE){
++ if(truncate(file, attrs->ia_size)) return(-errno);
++ }
++ ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET;
++ if((attrs->ia_valid & ma) == ma){
++ buf.actime = attrs->ia_atime.tv_sec;
++ buf.modtime = attrs->ia_mtime.tv_sec;
++ if(utime(file, &buf) != 0) return(-errno);
++ }
++ else {
++ struct timespec ts;
++
++ if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){
++ err = stat_file(file, NULL, NULL, NULL, NULL, NULL,
++ NULL, NULL, &ts, NULL, NULL, NULL);
++ if(err != 0)
++ return(err);
++ buf.actime = attrs->ia_atime.tv_sec;
++ buf.modtime = ts.tv_sec;
++ if(utime(file, &buf) != 0)
++ return(-errno);
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){
++ err = stat_file(file, NULL, NULL, NULL, NULL, NULL,
++ NULL, &ts, NULL, NULL, NULL, NULL);
++ if(err != 0)
++ return(err);
++ buf.actime = ts.tv_sec;
++ buf.modtime = attrs->ia_mtime.tv_sec;
++ if(utime(file, &buf) != 0)
++ return(-errno);
++ }
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ;
++ if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){
++ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
++ &attrs->ia_atime, &attrs->ia_mtime, NULL,
++ NULL, NULL);
++ if(err != 0) return(err);
++ }
++ return(0);
++}
++
++int make_symlink(const char *from, const char *to)
++{
++ int err;
++
++ err = symlink(to, from);
++ if(err) return(-errno);
++ return(0);
++}
++
++int unlink_file(const char *file)
++{
++ int err;
++
++ err = unlink(file);
++ if(err) return(-errno);
++ return(0);
++}
++
++int do_mkdir(const char *file, int mode)
++{
++ int err;
++
++ err = mkdir(file, mode);
++ if(err) return(-errno);
++ return(0);
++}
++
++int do_rmdir(const char *file)
++{
++ int err;
++
++ err = rmdir(file);
++ if(err) return(-errno);
++ return(0);
++}
++
++int do_mknod(const char *file, int mode, int dev)
++{
++ int err;
++
++ err = mknod(file, mode, dev);
++ if(err) return(-errno);
++ return(0);
++}
++
++int link_file(const char *to, const char *from)
++{
++ int err;
++
++ err = link(to, from);
++ if(err) return(-errno);
++ return(0);
++}
++
++int do_readlink(char *file, char *buf, int size)
++{
++ int n;
++
++ n = readlink(file, buf, size);
++ if(n < 0)
++ return(-errno);
++ if(n < size)
++ buf[n] = '\0';
++ return(n);
++}
++
++int rename_file(char *from, char *to)
++{
++ int err;
++
++ err = rename(from, to);
++ if(err < 0) return(-errno);
++ return(0);
++}
++
++int do_statfs(char *root, long *bsize_out, long long *blocks_out,
++ long long *bfree_out, long long *bavail_out,
++ long long *files_out, long long *ffree_out,
++ void *fsid_out, int fsid_size, long *namelen_out,
++ long *spare_out)
++{
++ struct statfs64 buf;
++ int err;
++
++ err = statfs64(root, &buf);
++ if(err < 0) return(-errno);
++ *bsize_out = buf.f_bsize;
++ *blocks_out = buf.f_blocks;
++ *bfree_out = buf.f_bfree;
++ *bavail_out = buf.f_bavail;
++ *files_out = buf.f_files;
++ *ffree_out = buf.f_ffree;
++ memcpy(fsid_out, &buf.f_fsid,
++ sizeof(buf.f_fsid) > fsid_size ? fsid_size :
++ sizeof(buf.f_fsid));
++ *namelen_out = buf.f_namelen;
++ spare_out[0] = buf.f_spare[0];
++ spare_out[1] = buf.f_spare[1];
++ spare_out[2] = buf.f_spare[2];
++ spare_out[3] = buf.f_spare[3];
++ spare_out[4] = buf.f_spare[4];
++ spare_out[5] = buf.f_spare[5];
++ return(0);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hostfs/Makefile b/fs/hostfs/Makefile
+--- a/fs/hostfs/Makefile 1969-12-31 19:00:00.000000000 -0500
++++ b/fs/hostfs/Makefile 2004-02-11 12:25:42.000000000 -0500
+@@ -0,0 +1,26 @@
++#
++# Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
++# Licensed under the GPL
++#
++
++# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino
++# to __st_ino. It stayed in the same place, so as long as the correct name
++# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa.
++
++STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \
++ echo __)st_ino
++
++hostfs-objs := hostfs_kern.o hostfs_user.o
++
++obj-y =
++obj-$(CONFIG_HOSTFS) += hostfs.o
++
++SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs))
++
++USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(SINGLE_OBJS))
++USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
++
++USER_CFLAGS += -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD)
++
++$(USER_OBJS) : %.o: %.c
++ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+diff -Naur a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
+--- a/fs/hppfs/hppfs_kern.c 1969-12-31 19:00:00.000000000 -0500
++++ b/fs/hppfs/hppfs_kern.c 2004-02-11 12:27:10.000000000 -0500
+@@ -0,0 +1,811 @@
++/*
++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/dcache.h>
++#include <linux/statfs.h>
++#include <asm/uaccess.h>
++#include <asm/fcntl.h>
++#include "os.h"
++
++static int init_inode(struct inode *inode, struct dentry *dentry);
++
++struct hppfs_data {
++ struct list_head list;
++ char contents[PAGE_SIZE - sizeof(struct list_head)];
++};
++
++struct hppfs_private {
++ struct file proc_file;
++ int host_fd;
++ loff_t len;
++ struct hppfs_data *contents;
++};
++
++struct hppfs_inode_info {
++ struct dentry *proc_dentry;
++ struct inode vfs_inode;
++};
++
++static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
++{
++ return(list_entry(inode, struct hppfs_inode_info, vfs_inode));
++}
++
++#define HPPFS_SUPER_MAGIC 0xb00000ee
++
++static struct super_operations hppfs_sbops;
++
++static int is_pid(struct dentry *dentry)
++{
++ struct super_block *sb;
++ int i;
++
++ sb = dentry->d_sb;
++ if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root))
++ return(0);
++
++ for(i = 0; i < dentry->d_name.len; i++){
++ if(!isdigit(dentry->d_name.name[i]))
++ return(0);
++ }
++ return(1);
++}
++
++static char *dentry_name(struct dentry *dentry, int extra)
++{
++ struct dentry *parent;
++ char *root, *name;
++ const char *seg_name;
++ int len, seg_len;
++
++ len = 0;
++ parent = dentry;
++ while(parent->d_parent != parent){
++ if(is_pid(parent))
++ len += strlen("pid") + 1;
++ else len += parent->d_name.len + 1;
++ parent = parent->d_parent;
++ }
++
++ root = "proc";
++ len += strlen(root);
++ name = kmalloc(len + extra + 1, GFP_KERNEL);
++ if(name == NULL) return(NULL);
++
++ name[len] = '\0';
++ parent = dentry;
++ while(parent->d_parent != parent){
++ if(is_pid(parent)){
++ seg_name = "pid";
++ seg_len = strlen("pid");
++ }
++ else {
++ seg_name = parent->d_name.name;
++ seg_len = parent->d_name.len;
++ }
++
++ len -= seg_len + 1;
++ name[len] = '/';
++ strncpy(&name[len + 1], seg_name, seg_len);
++ parent = parent->d_parent;
++ }
++ strncpy(name, root, strlen(root));
++ return(name);
++}
++
++struct dentry_operations hppfs_dentry_ops = {
++};
++
++static int file_removed(struct dentry *dentry, const char *file)
++{
++ char *host_file;
++ int extra, fd;
++
++ extra = 0;
++ if(file != NULL) extra += strlen(file) + 1;
++
++ host_file = dentry_name(dentry, extra + strlen("/remove"));
++ if(host_file == NULL){
++ printk("file_removed : allocation failed\n");
++ return(-ENOMEM);
++ }
++
++ if(file != NULL){
++ strcat(host_file, "/");
++ strcat(host_file, file);
++ }
++ strcat(host_file, "/remove");
++
++ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
++ kfree(host_file);
++ if(fd > 0){
++ os_close_file(fd);
++ return(1);
++ }
++ return(0);
++}
++
++static void hppfs_read_inode(struct inode *ino)
++{
++ struct inode *proc_ino;
++
++ if(HPPFS_I(ino)->proc_dentry == NULL)
++ return;
++
++ proc_ino = HPPFS_I(ino)->proc_dentry->d_inode;
++ ino->i_uid = proc_ino->i_uid;
++ ino->i_gid = proc_ino->i_gid;
++ ino->i_atime = proc_ino->i_atime;
++ ino->i_mtime = proc_ino->i_mtime;
++ ino->i_ctime = proc_ino->i_ctime;
++ ino->i_ino = proc_ino->i_ino;
++ ino->i_mode = proc_ino->i_mode;
++ ino->i_nlink = proc_ino->i_nlink;
++ ino->i_size = proc_ino->i_size;
++ ino->i_blksize = proc_ino->i_blksize;
++ ino->i_blocks = proc_ino->i_blocks;
++}
++
++static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct dentry *proc_dentry, *new, *parent;
++ struct inode *inode;
++ int err, deleted;
++
++ deleted = file_removed(dentry, NULL);
++ if(deleted < 0)
++ return(ERR_PTR(deleted));
++ else if(deleted)
++ return(ERR_PTR(-ENOENT));
++
++ err = -ENOMEM;
++ parent = HPPFS_I(ino)->proc_dentry;
++ down(&parent->d_inode->i_sem);
++ proc_dentry = d_lookup(parent, &dentry->d_name);
++ if(proc_dentry == NULL){
++ proc_dentry = d_alloc(parent, &dentry->d_name);
++ if(proc_dentry == NULL){
++ up(&parent->d_inode->i_sem);
++ goto out;
++ }
++ new = (*parent->d_inode->i_op->lookup)(parent->d_inode,
++ proc_dentry, NULL);
++ if(new){
++ dput(proc_dentry);
++ proc_dentry = new;
++ }
++ }
++ up(&parent->d_inode->i_sem);
++
++ if(IS_ERR(proc_dentry))
++ return(proc_dentry);
++
++ inode = iget(ino->i_sb, 0);
++ if(inode == NULL)
++ goto out_dput;
++
++ err = init_inode(inode, proc_dentry);
++ if(err)
++ goto out_put;
++
++ hppfs_read_inode(inode);
++
++ d_add(dentry, inode);
++ dentry->d_op = &hppfs_dentry_ops;
++ return(NULL);
++
++ out_put:
++ iput(inode);
++ out_dput:
++ dput(proc_dentry);
++ out:
++ return(ERR_PTR(err));
++}
++
++static struct inode_operations hppfs_file_iops = {
++};
++
++static ssize_t read_proc(struct file *file, char *buf, ssize_t count,
++ loff_t *ppos, int is_user)
++{
++ ssize_t (*read)(struct file *, char *, size_t, loff_t *);
++ ssize_t n;
++
++ read = file->f_dentry->d_inode->i_fop->read;
++
++ if(!is_user)
++ set_fs(KERNEL_DS);
++
++ n = (*read)(file, buf, count, &file->f_pos);
++
++ if(!is_user)
++ set_fs(USER_DS);
++
++ if(ppos) *ppos = file->f_pos;
++ return(n);
++}
++
++static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
++{
++ ssize_t n;
++ int cur, err;
++ char *new_buf;
++
++ n = -ENOMEM;
++ new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if(new_buf == NULL){
++ printk("hppfs_read_file : kmalloc failed\n");
++ goto out;
++ }
++ n = 0;
++ while(count > 0){
++ cur = min_t(ssize_t, count, PAGE_SIZE);
++ err = os_read_file(fd, new_buf, cur);
++ if(err < 0){
++ printk("hppfs_read : read failed, errno = %d\n",
++ count);
++ n = err;
++ goto out_free;
++ }
++ else if(err == 0)
++ break;
++
++ if(copy_to_user(buf, new_buf, err)){
++ n = -EFAULT;
++ goto out_free;
++ }
++ n += err;
++ count -= err;
++ }
++ out_free:
++ kfree(new_buf);
++ out:
++ return(n);
++}
++
++static ssize_t hppfs_read(struct file *file, char *buf, size_t count,
++ loff_t *ppos)
++{
++ struct hppfs_private *hppfs = file->private_data;
++ struct hppfs_data *data;
++ loff_t off;
++ int err;
++
++ if(hppfs->contents != NULL){
++ if(*ppos >= hppfs->len) return(0);
++
++ data = hppfs->contents;
++ off = *ppos;
++ while(off >= sizeof(data->contents)){
++ data = list_entry(data->list.next, struct hppfs_data,
++ list);
++ off -= sizeof(data->contents);
++ }
++
++ if(off + count > hppfs->len)
++ count = hppfs->len - off;
++ copy_to_user(buf, &data->contents[off], count);
++ *ppos += count;
++ }
++ else if(hppfs->host_fd != -1){
++ err = os_seek_file(hppfs->host_fd, *ppos);
++ if(err){
++ printk("hppfs_read : seek failed, errno = %d\n", err);
++ return(err);
++ }
++ count = hppfs_read_file(hppfs->host_fd, buf, count);
++ if(count > 0)
++ *ppos += count;
++ }
++ else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1);
++
++ return(count);
++}
++
++static ssize_t hppfs_write(struct file *file, const char *buf, size_t len,
++ loff_t *ppos)
++{
++ struct hppfs_private *data = file->private_data;
++ struct file *proc_file = &data->proc_file;
++ ssize_t (*write)(struct file *, const char *, size_t, loff_t *);
++ int err;
++
++ write = proc_file->f_dentry->d_inode->i_fop->write;
++
++ proc_file->f_pos = file->f_pos;
++ err = (*write)(proc_file, buf, len, &proc_file->f_pos);
++ file->f_pos = proc_file->f_pos;
++
++ return(err);
++}
++
++static int open_host_sock(char *host_file, int *filter_out)
++{
++ char *end;
++ int fd;
++
++ end = &host_file[strlen(host_file)];
++ strcpy(end, "/rw");
++ *filter_out = 1;
++ fd = os_connect_socket(host_file);
++ if(fd > 0)
++ return(fd);
++
++ strcpy(end, "/r");
++ *filter_out = 0;
++ fd = os_connect_socket(host_file);
++ return(fd);
++}
++
++static void free_contents(struct hppfs_data *head)
++{
++ struct hppfs_data *data;
++ struct list_head *ele, *next;
++
++ if(head == NULL) return;
++
++ list_for_each_safe(ele, next, &head->list){
++ data = list_entry(ele, struct hppfs_data, list);
++ kfree(data);
++ }
++ kfree(head);
++}
++
++static struct hppfs_data *hppfs_get_data(int fd, int filter,
++ struct file *proc_file,
++ struct file *hppfs_file,
++ loff_t *size_out)
++{
++ struct hppfs_data *data, *new, *head;
++ int n, err;
++
++ err = -ENOMEM;
++ data = kmalloc(sizeof(*data), GFP_KERNEL);
++ if(data == NULL){
++ printk("hppfs_get_data : head allocation failed\n");
++ goto failed;
++ }
++
++ INIT_LIST_HEAD(&data->list);
++
++ head = data;
++ *size_out = 0;
++
++ if(filter){
++ while((n = read_proc(proc_file, data->contents,
++ sizeof(data->contents), NULL, 0)) > 0)
++ os_write_file(fd, data->contents, n);
++ err = os_shutdown_socket(fd, 0, 1);
++ if(err){
++ printk("hppfs_get_data : failed to shut down "
++ "socket\n");
++ goto failed_free;
++ }
++ }
++ while(1){
++ n = os_read_file(fd, data->contents, sizeof(data->contents));
++ if(n < 0){
++ err = n;
++ printk("hppfs_get_data : read failed, errno = %d\n",
++ err);
++ goto failed_free;
++ }
++ else if(n == 0)
++ break;
++
++ *size_out += n;
++
++ if(n < sizeof(data->contents))
++ break;
++
++ new = kmalloc(sizeof(*data), GFP_KERNEL);
++ if(new == 0){
++ printk("hppfs_get_data : data allocation failed\n");
++ err = -ENOMEM;
++ goto failed_free;
++ }
++
++ INIT_LIST_HEAD(&new->list);
++ list_add(&new->list, &data->list);
++ data = new;
++ }
++ return(head);
++
++ failed_free:
++ free_contents(head);
++ failed:
++ return(ERR_PTR(err));
++}
++
++static struct hppfs_private *hppfs_data(void)
++{
++ struct hppfs_private *data;
++
++ data = kmalloc(sizeof(*data), GFP_KERNEL);
++ if(data == NULL)
++ return(data);
++
++ *data = ((struct hppfs_private ) { .host_fd = -1,
++ .len = -1,
++ .contents = NULL } );
++ return(data);
++}
++
++static int file_mode(int fmode)
++{
++ if(fmode == (FMODE_READ | FMODE_WRITE))
++ return(O_RDWR);
++ if(fmode == FMODE_READ)
++ return(O_RDONLY);
++ if(fmode == FMODE_WRITE)
++ return(O_WRONLY);
++ return(0);
++}
++
++static int hppfs_open(struct inode *inode, struct file *file)
++{
++ struct hppfs_private *data;
++ struct dentry *proc_dentry;
++ char *host_file;
++ int err, fd, type, filter;
++
++ err = -ENOMEM;
++ data = hppfs_data();
++ if(data == NULL)
++ goto out;
++
++ host_file = dentry_name(file->f_dentry, strlen("/rw"));
++ if(host_file == NULL)
++ goto out_free2;
++
++ proc_dentry = HPPFS_I(inode)->proc_dentry;
++
++ /* XXX This isn't closed anywhere */
++ err = open_private_file(&data->proc_file, proc_dentry,
++ file_mode(file->f_mode));
++ if(err)
++ goto out_free1;
++
++ type = os_file_type(host_file);
++ if(type == OS_TYPE_FILE){
++ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
++ if(fd >= 0)
++ data->host_fd = fd;
++ else printk("hppfs_open : failed to open '%s', errno = %d\n",
++ host_file, -fd);
++
++ data->contents = NULL;
++ }
++ else if(type == OS_TYPE_DIR){
++ fd = open_host_sock(host_file, &filter);
++ if(fd > 0){
++ data->contents = hppfs_get_data(fd, filter,
++ &data->proc_file,
++ file, &data->len);
++ if(!IS_ERR(data->contents))
++ data->host_fd = fd;
++ }
++ else printk("hppfs_open : failed to open a socket in "
++ "'%s', errno = %d\n", host_file, -fd);
++ }
++ kfree(host_file);
++
++ file->private_data = data;
++ return(0);
++
++ out_free1:
++ kfree(host_file);
++ out_free2:
++ free_contents(data->contents);
++ kfree(data);
++ out:
++ return(err);
++}
++
++static int hppfs_dir_open(struct inode *inode, struct file *file)
++{
++ struct hppfs_private *data;
++ struct dentry *proc_dentry;
++ int err;
++
++ err = -ENOMEM;
++ data = hppfs_data();
++ if(data == NULL)
++ goto out;
++
++ proc_dentry = HPPFS_I(inode)->proc_dentry;
++ err = open_private_file(&data->proc_file, proc_dentry,
++ file_mode(file->f_mode));
++ if(err)
++ goto out_free;
++
++ file->private_data = data;
++ return(0);
++
++ out_free:
++ kfree(data);
++ out:
++ return(err);
++}
++
++static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
++{
++ struct hppfs_private *data = file->private_data;
++ struct file *proc_file = &data->proc_file;
++ loff_t (*llseek)(struct file *, loff_t, int);
++ loff_t ret;
++
++ llseek = proc_file->f_dentry->d_inode->i_fop->llseek;
++ if(llseek != NULL){
++ ret = (*llseek)(proc_file, off, where);
++ if(ret < 0)
++ return(ret);
++ }
++
++ return(default_llseek(file, off, where));
++}
++
++static struct file_operations hppfs_file_fops = {
++ .owner = NULL,
++ .llseek = hppfs_llseek,
++ .read = hppfs_read,
++ .write = hppfs_write,
++ .open = hppfs_open,
++};
++
++struct hppfs_dirent {
++ void *vfs_dirent;
++ filldir_t filldir;
++ struct dentry *dentry;
++};
++
++static int hppfs_filldir(void *d, const char *name, int size,
++ loff_t offset, ino_t inode, unsigned int type)
++{
++ struct hppfs_dirent *dirent = d;
++
++ if(file_removed(dirent->dentry, name))
++ return(0);
++
++ return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset,
++ inode, type));
++}
++
++static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
++{
++ struct hppfs_private *data = file->private_data;
++ struct file *proc_file = &data->proc_file;
++ int (*readdir)(struct file *, void *, filldir_t);
++ struct hppfs_dirent dirent = ((struct hppfs_dirent)
++ { .vfs_dirent = ent,
++ .filldir = filldir,
++ .dentry = file->f_dentry } );
++ int err;
++
++ readdir = proc_file->f_dentry->d_inode->i_fop->readdir;
++
++ proc_file->f_pos = file->f_pos;
++ err = (*readdir)(proc_file, &dirent, hppfs_filldir);
++ file->f_pos = proc_file->f_pos;
++
++ return(err);
++}
++
++static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
++{
++ return(0);
++}
++
++static struct file_operations hppfs_dir_fops = {
++ .owner = NULL,
++ .readdir = hppfs_readdir,
++ .open = hppfs_dir_open,
++ .fsync = hppfs_fsync,
++};
++
++static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf)
++{
++ sf->f_blocks = 0;
++ sf->f_bfree = 0;
++ sf->f_bavail = 0;
++ sf->f_files = 0;
++ sf->f_ffree = 0;
++ sf->f_type = HPPFS_SUPER_MAGIC;
++ return(0);
++}
++
++static struct inode *hppfs_alloc_inode(struct super_block *sb)
++{
++ struct hppfs_inode_info *hi;
++
++ hi = kmalloc(sizeof(*hi), GFP_KERNEL);
++ if(hi == NULL)
++ return(NULL);
++
++ *hi = ((struct hppfs_inode_info) { .proc_dentry = NULL });
++ inode_init_once(&hi->vfs_inode);
++ return(&hi->vfs_inode);
++}
++
++void hppfs_delete_inode(struct inode *ino)
++{
++ clear_inode(ino);
++}
++
++static void hppfs_destroy_inode(struct inode *inode)
++{
++ kfree(HPPFS_I(inode));
++}
++
++static struct super_operations hppfs_sbops = {
++ .alloc_inode = hppfs_alloc_inode,
++ .destroy_inode = hppfs_destroy_inode,
++ .read_inode = hppfs_read_inode,
++ .delete_inode = hppfs_delete_inode,
++ .statfs = hppfs_statfs,
++};
++
++static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen)
++{
++ struct file proc_file;
++ struct dentry *proc_dentry;
++ int (*readlink)(struct dentry *, char *, int);
++ int err, n;
++
++ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
++ err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
++ if(err)
++ return(err);
++
++ readlink = proc_dentry->d_inode->i_op->readlink;
++ n = (*readlink)(proc_dentry, buffer, buflen);
++
++ close_private_file(&proc_file);
++
++ return(n);
++}
++
++static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++ struct file proc_file;
++ struct dentry *proc_dentry;
++ int (*follow_link)(struct dentry *, struct nameidata *);
++ int err, n;
++
++ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
++ err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
++ if(err)
++ return(err);
++
++ follow_link = proc_dentry->d_inode->i_op->follow_link;
++ n = (*follow_link)(proc_dentry, nd);
++
++ close_private_file(&proc_file);
++
++ return(n);
++}
++
++static struct inode_operations hppfs_dir_iops = {
++ .lookup = hppfs_lookup,
++};
++
++static struct inode_operations hppfs_link_iops = {
++ .readlink = hppfs_readlink,
++ .follow_link = hppfs_follow_link,
++};
++
++static int init_inode(struct inode *inode, struct dentry *dentry)
++{
++ if(S_ISDIR(dentry->d_inode->i_mode)){
++ inode->i_op = &hppfs_dir_iops;
++ inode->i_fop = &hppfs_dir_fops;
++ }
++ else if(S_ISLNK(dentry->d_inode->i_mode)){
++ inode->i_op = &hppfs_link_iops;
++ inode->i_fop = &hppfs_file_fops;
++ }
++ else {
++ inode->i_op = &hppfs_file_iops;
++ inode->i_fop = &hppfs_file_fops;
++ }
++
++ HPPFS_I(inode)->proc_dentry = dentry;
++
++ return(0);
++}
++
++static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
++{
++ struct inode *root_inode;
++ struct file_system_type *procfs;
++ struct super_block *proc_sb;
++ int err;
++
++ err = -ENOENT;
++ procfs = get_fs_type("proc");
++ if(procfs == NULL)
++ goto out;
++
++ if(list_empty(&procfs->fs_supers))
++ goto out;
++
++ proc_sb = list_entry(procfs->fs_supers.next, struct super_block,
++ s_instances);
++
++ sb->s_blocksize = 1024;
++ sb->s_blocksize_bits = 10;
++ sb->s_magic = HPPFS_SUPER_MAGIC;
++ sb->s_op = &hppfs_sbops;
++
++ root_inode = iget(sb, 0);
++ if(root_inode == NULL)
++ goto out;
++
++ err = init_inode(root_inode, proc_sb->s_root);
++ if(err)
++ goto out_put;
++
++ err = -ENOMEM;
++ sb->s_root = d_alloc_root(root_inode);
++ if(sb->s_root == NULL)
++ goto out_put;
++
++ hppfs_read_inode(root_inode);
++
++ return(0);
++
++ out_put:
++ iput(root_inode);
++ out:
++ return(err);
++}
++
++static struct super_block *hppfs_read_super(struct file_system_type *type,
++ int flags, const char *dev_name,
++ void *data)
++{
++ return(get_sb_nodev(type, flags, data, hppfs_fill_super));
++}
++
++static struct file_system_type hppfs_type = {
++ .owner = THIS_MODULE,
++ .name = "hppfs",
++ .get_sb = hppfs_read_super,
++ .kill_sb = kill_anon_super,
++ .fs_flags = 0,
++};
++
++static int __init init_hppfs(void)
++{
++ return(register_filesystem(&hppfs_type));
++}
++
++static void __exit exit_hppfs(void)
++{
++ unregister_filesystem(&hppfs_type);
++}
++
++module_init(init_hppfs)
++module_exit(exit_hppfs)
++MODULE_LICENSE("GPL");
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hppfs/Makefile b/fs/hppfs/Makefile
+--- a/fs/hppfs/Makefile 1969-12-31 19:00:00.000000000 -0500
++++ b/fs/hppfs/Makefile 2004-02-11 12:27:35.000000000 -0500
+@@ -0,0 +1,19 @@
++#
++# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com)
++# Licensed under the GPL
++#
++
++hppfs-objs := hppfs_kern.o
++
++obj-y =
++obj-$(CONFIG_HPPFS) += hppfs.o
++
++clean:
++
++modules:
++
++fastdep:
++
++dep:
++
++archmrproper: clean
+diff -Naur a/fs/Makefile b/fs/Makefile
+--- a/fs/Makefile 2004-02-11 12:15:52.000000000 -0500
++++ b/fs/Makefile 2004-02-11 12:27:45.000000000 -0500
+@@ -91,3 +91,5 @@
+ obj-$(CONFIG_XFS_FS) += xfs/
+ obj-$(CONFIG_AFS_FS) += afs/
+ obj-$(CONFIG_BEFS_FS) += befs/
++obj-$(CONFIG_HOSTFS) += hostfs/
++obj-$(CONFIG_HPPFS) += hppfs/
+diff -Naur a/include/asm-um/archparam-i386.h b/include/asm-um/archparam-i386.h
+--- a/include/asm-um/archparam-i386.h 2004-02-11 12:16:32.000000000 -0500
++++ b/include/asm-um/archparam-i386.h 2004-02-11 12:28:31.000000000 -0500
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com)
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
+ * Licensed under the GPL
+ */
+
+@@ -56,6 +56,83 @@
+ pr_reg[16] = PT_REGS_SS(regs); \
+ } while(0);
+
++#if 0 /* Turn this back on when UML has VSYSCALL working */
++#define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL))
++#else
++#define VSYSCALL_BASE NULL
++#endif
++
++#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE)
++#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall)
++extern void *__kernel_vsyscall;
++
++/*
++ * Architecture-neutral AT_ values in 0-17, leave some room
++ * for more of them, start the x86-specific ones at 32.
++ */
++#define AT_SYSINFO 32
++#define AT_SYSINFO_EHDR 33
++
++#define ARCH_DLINFO \
++do { \
++ NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \
++ NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \
++} while (0)
++
++/*
++ * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
++ * extra segments containing the vsyscall DSO contents. Dumping its
++ * contents makes post-mortem fully interpretable later without matching up
++ * the same kernel and hardware config to see what PC values meant.
++ * Dumping its extra ELF program headers includes all the other information
++ * a debugger needs to easily find how the vsyscall DSO was being used.
++ */
++#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum)
++#define ELF_CORE_WRITE_EXTRA_PHDRS \
++do { \
++ const struct elf_phdr *const vsyscall_phdrs = \
++ (const struct elf_phdr *) (VSYSCALL_BASE \
++ + VSYSCALL_EHDR->e_phoff); \
++ int i; \
++ Elf32_Off ofs = 0; \
++ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
++ struct elf_phdr phdr = vsyscall_phdrs[i]; \
++ if (phdr.p_type == PT_LOAD) { \
++ ofs = phdr.p_offset = offset; \
++ offset += phdr.p_filesz; \
++ } \
++ else \
++ phdr.p_offset += ofs; \
++ phdr.p_paddr = 0; /* match other core phdrs */ \
++ DUMP_WRITE(&phdr, sizeof(phdr)); \
++ } \
++} while (0)
++#define ELF_CORE_WRITE_EXTRA_DATA \
++do { \
++ const struct elf_phdr *const vsyscall_phdrs = \
++ (const struct elf_phdr *) (VSYSCALL_BASE \
++ + VSYSCALL_EHDR->e_phoff); \
++ int i; \
++ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
++ if (vsyscall_phdrs[i].p_type == PT_LOAD) \
++ DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \
++ vsyscall_phdrs[i].p_filesz); \
++ } \
++} while (0)
++
++#define R_386_NONE 0
++#define R_386_32 1
++#define R_386_PC32 2
++#define R_386_GOT32 3
++#define R_386_PLT32 4
++#define R_386_COPY 5
++#define R_386_GLOB_DAT 6
++#define R_386_JMP_SLOT 7
++#define R_386_RELATIVE 8
++#define R_386_GOTOFF 9
++#define R_386_GOTPC 10
++#define R_386_NUM 11
++
+ /********* Bits for asm-um/delay.h **********/
+
+ typedef unsigned long um_udelay_t;
+diff -Naur a/include/asm-um/common.lds.S b/include/asm-um/common.lds.S
+--- a/include/asm-um/common.lds.S 2004-02-11 12:14:28.000000000 -0500
++++ b/include/asm-um/common.lds.S 2004-02-11 12:26:11.000000000 -0500
+@@ -1,3 +1,5 @@
++#include <asm-generic/vmlinux.lds.h>
++
+ .fini : { *(.fini) } =0x9090
+ _etext = .;
+ PROVIDE (etext = .);
+@@ -13,18 +15,6 @@
+
+ RODATA
+
+- __start___ksymtab = .; /* Kernel symbol table */
+- __ksymtab : { *(__ksymtab) }
+- __stop___ksymtab = .;
+-
+- __start___gpl_ksymtab = .; /* Kernel symbol table: GPL-only symbols */
+- __gpl_ksymtab : { *(__gpl_ksymtab) }
+- __stop___gpl_ksymtab = .;
+-
+- __start___kallsyms = .; /* All kernel symbols */
+- __kallsyms : { *(__kallsyms) }
+- __stop___kallsyms = .;
+-
+ .unprotected : { *(.unprotected) }
+ . = ALIGN(4096);
+ PROVIDE (_unprotected_end = .);
+@@ -67,11 +57,17 @@
+ }
+ __initcall_end = .;
+
++ __con_initcall_start = .;
++ .con_initcall.init : { *(.con_initcall.init) }
++ __con_initcall_end = .;
++
+ __uml_initcall_start = .;
+ .uml.initcall.init : { *(.uml.initcall.init) }
+ __uml_initcall_end = .;
+ __init_end = .;
+
++ SECURITY_INIT
++
+ __exitcall_begin = .;
+ .exitcall : { *(.exitcall.exit) }
+ __exitcall_end = .;
+@@ -80,7 +76,33 @@
+ .uml.exitcall : { *(.uml.exitcall.exit) }
+ __uml_exitcall_end = .;
+
+- . = ALIGN(4096);
++ . = ALIGN(4);
++ __alt_instructions = .;
++ .altinstructions : { *(.altinstructions) }
++ __alt_instructions_end = .;
++ .altinstr_replacement : { *(.altinstr_replacement) }
++ /* .exit.text is discard at runtime, not link time, to deal with references
++ from .altinstructions and .eh_frame */
++ .exit.text : { *(.exit.text) }
++ .exit.data : { *(.exit.data) }
++
++ __preinit_array_start = .;
++ .preinit_array : { *(.preinit_array) }
++ __preinit_array_end = .;
++ __init_array_start = .;
++ .init_array : { *(.init_array) }
++ __init_array_end = .;
++ __fini_array_start = .;
++ .fini_array : { *(.fini_array) }
++ __fini_array_end = .;
++
++ . = ALIGN(4096);
+ __initramfs_start = .;
+ .init.ramfs : { *(.init.ramfs) }
+ __initramfs_end = .;
++
++ /* Sections to be discarded */
++ /DISCARD/ : {
++ *(.exitcall.exit)
++ }
++
+diff -Naur a/include/asm-um/cpufeature.h b/include/asm-um/cpufeature.h
+--- a/include/asm-um/cpufeature.h 1969-12-31 19:00:00.000000000 -0500
++++ b/include/asm-um/cpufeature.h 2004-02-11 12:25:42.000000000 -0500
+@@ -0,0 +1,6 @@
++#ifndef __UM_CPUFEATURE_H
++#define __UM_CPUFEATURE_H
++
++#include "asm/arch/cpufeature.h"
++
++#endif
+diff -Naur a/include/asm-um/current.h b/include/asm-um/current.h
+--- a/include/asm-um/current.h 2004-02-11 12:14:18.000000000 -0500
++++ b/include/asm-um/current.h 2004-02-11 12:26:01.000000000 -0500
+@@ -16,8 +16,10 @@
+ #define CURRENT_THREAD(dummy) (((unsigned long) &dummy) & \
+ (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER))
+
+-#define current ({ int dummy; \
+- ((struct thread_info *) CURRENT_THREAD(dummy))->task; })
++#define current_thread \
++ ({ int dummy; ((struct thread_info *) CURRENT_THREAD(dummy)); })
++
++#define current (current_thread->task)
+
+ #endif /* __ASSEMBLY__ */
+
+diff -Naur a/include/asm-um/elf.h b/include/asm-um/elf.h
+--- a/include/asm-um/elf.h 2004-02-11 12:16:01.000000000 -0500
++++ b/include/asm-um/elf.h 2004-02-11 12:27:55.000000000 -0500
+@@ -15,4 +15,17 @@
+
+ #define USE_ELF_CORE_DUMP
+
++#define R_386_NONE 0
++#define R_386_32 1
++#define R_386_PC32 2
++#define R_386_GOT32 3
++#define R_386_PLT32 4
++#define R_386_COPY 5
++#define R_386_GLOB_DAT 6
++#define R_386_JMP_SLOT 7
++#define R_386_RELATIVE 8
++#define R_386_GOTOFF 9
++#define R_386_GOTPC 10
++#define R_386_NUM 11
++
+ #endif
+diff -Naur a/include/asm-um/fixmap.h b/include/asm-um/fixmap.h
+--- a/include/asm-um/fixmap.h 2004-02-11 12:16:42.000000000 -0500
++++ b/include/asm-um/fixmap.h 2004-02-11 12:28:41.000000000 -0500
+@@ -34,6 +34,7 @@
+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+ #endif
++ FIX_VSYSCALL,
+ __end_of_fixed_addresses
+ };
+
+@@ -63,6 +64,13 @@
+ #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+ #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
++/*
++ * This is the range that is readable by user mode, and things
++ * acting like user mode such as get_user_pages.
++ */
++#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL))
++#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
++
+ extern void __this_fixmap_does_not_exist(void);
+
+ /*
+diff -Naur a/include/asm-um/irq.h b/include/asm-um/irq.h
+--- a/include/asm-um/irq.h 2004-02-11 12:17:06.000000000 -0500
++++ b/include/asm-um/irq.h 2004-02-11 12:29:07.000000000 -0500
+@@ -1,15 +1,6 @@
+ #ifndef __UM_IRQ_H
+ #define __UM_IRQ_H
+
+-/* The i386 irq.h has a struct task_struct in a prototype without including
+- * sched.h. This forward declaration kills the resulting warning.
+- */
+-struct task_struct;
+-
+-#include "asm/ptrace.h"
+-
+-#undef NR_IRQS
+-
+ #define TIMER_IRQ 0
+ #define UMN_IRQ 1
+ #define CONSOLE_IRQ 2
+@@ -28,8 +19,4 @@
+ #define LAST_IRQ XTERM_IRQ
+ #define NR_IRQS (LAST_IRQ + 1)
+
+-extern int um_request_irq(unsigned int irq, int fd, int type,
+- void (*handler)(int, void *, struct pt_regs *),
+- unsigned long irqflags, const char * devname,
+- void *dev_id);
+ #endif
+diff -Naur a/include/asm-um/local.h b/include/asm-um/local.h
+--- a/include/asm-um/local.h 1969-12-31 19:00:00.000000000 -0500
++++ b/include/asm-um/local.h 2004-02-11 12:27:52.000000000 -0500
+@@ -0,0 +1,6 @@
++#ifndef __UM_LOCAL_H
++#define __UM_LOCAL_H
++
++#include "asm/arch/local.h"
++
++#endif
+diff -Naur a/include/asm-um/module-generic.h b/include/asm-um/module-generic.h
+--- a/include/asm-um/module-generic.h 1969-12-31 19:00:00.000000000 -0500
++++ b/include/asm-um/module-generic.h 2004-02-11 12:27:42.000000000 -0500
+@@ -0,0 +1,6 @@
++#ifndef __UM_MODULE_GENERIC_H
++#define __UM_MODULE_GENERIC_H
++
++#include "asm/arch/module.h"
++
++#endif
+diff -Naur a/include/asm-um/module-i386.h b/include/asm-um/module-i386.h
+--- a/include/asm-um/module-i386.h 1969-12-31 19:00:00.000000000 -0500
++++ b/include/asm-um/module-i386.h 2004-02-11 12:27:42.000000000 -0500
+@@ -0,0 +1,13 @@
++#ifndef __UM_MODULE_I386_H
++#define __UM_MODULE_I386_H
++
++/* UML is simple */
++struct mod_arch_specific
++{
++};
++
++#define Elf_Shdr Elf32_Shdr
++#define Elf_Sym Elf32_Sym
++#define Elf_Ehdr Elf32_Ehdr
++
++#endif
+diff -Naur a/include/asm-um/page.h b/include/asm-um/page.h
+--- a/include/asm-um/page.h 2004-02-11 12:15:52.000000000 -0500
++++ b/include/asm-um/page.h 2004-02-11 12:27:45.000000000 -0500
+@@ -1,10 +1,14 @@
++/*
++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com)
++ * Licensed under the GPL
++ */
++
+ #ifndef __UM_PAGE_H
+ #define __UM_PAGE_H
+
+ struct page;
+
+ #include "asm/arch/page.h"
+-#include "asm/bug.h"
+
+ #undef __pa
+ #undef __va
+@@ -24,25 +28,36 @@
+
+ #define __va_space (8*1024*1024)
+
+-extern unsigned long region_pa(void *virt);
+-extern void *region_va(unsigned long phys);
+-
+-#define __pa(virt) region_pa((void *) (virt))
+-#define __va(phys) region_va((unsigned long) (phys))
+-
+-extern unsigned long page_to_pfn(struct page *page);
+-extern struct page *pfn_to_page(unsigned long pfn);
++extern unsigned long to_phys(void *virt);
++extern void *to_virt(unsigned long phys);
+
+-extern struct page *phys_to_page(unsigned long phys);
++#define __pa(virt) to_phys((void *) virt)
++#define __va(phys) to_virt((unsigned long) phys)
+
+-#define virt_to_page(v) (phys_to_page(__pa(v)))
++#define page_to_pfn(page) ((page) - mem_map)
++#define pfn_to_page(pfn) (mem_map + (pfn))
+
+-extern struct page *page_mem_map(struct page *page);
++#define phys_to_pfn(p) ((p) >> PAGE_SHIFT)
++#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT)
+
+-#define pfn_valid(pfn) (page_mem_map(pfn_to_page(pfn)) != NULL)
+-#define virt_addr_valid(v) pfn_valid(__pa(v) >> PAGE_SHIFT)
++#define pfn_valid(pfn) ((pfn) < max_mapnr)
++#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v)))
+
+ extern struct page *arch_validate(struct page *page, int mask, int order);
+ #define HAVE_ARCH_VALIDATE
+
++extern void arch_free_page(struct page *page, int order);
++#define HAVE_ARCH_FREE_PAGE
++
+ #endif
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
+--- a/include/asm-um/pgtable.h 2004-02-11 12:17:12.000000000 -0500
++++ b/include/asm-um/pgtable.h 2004-02-11 12:29:17.000000000 -0500
+@@ -12,8 +12,6 @@
+ #include "asm/page.h"
+ #include "asm/fixmap.h"
+
+-extern pgd_t swapper_pg_dir[1024];
+-
+ extern void *um_virt_to_phys(struct task_struct *task, unsigned long virt,
+ pte_t *pte_out);
+
+@@ -49,6 +47,8 @@
+ #define pgd_ERROR(e) \
+ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
++extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
++
+ /*
+ * pgd entries used up by user/kernel:
+ */
+@@ -65,10 +65,10 @@
+ * area for the same reason. ;)
+ */
+
+-extern unsigned long high_physmem;
++extern unsigned long end_iomem;
+
+ #define VMALLOC_OFFSET (__va_space)
+-#define VMALLOC_START (((unsigned long) high_physmem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
++#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1))
+
+ #ifdef CONFIG_HIGHMEM
+ # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)
+@@ -78,12 +78,13 @@
+
+ #define _PAGE_PRESENT 0x001
+ #define _PAGE_NEWPAGE 0x002
+-#define _PAGE_PROTNONE 0x004 /* If not present */
+-#define _PAGE_RW 0x008
+-#define _PAGE_USER 0x010
+-#define _PAGE_ACCESSED 0x020
+-#define _PAGE_DIRTY 0x040
+-#define _PAGE_NEWPROT 0x080
++#define _PAGE_NEWPROT 0x004
++#define _PAGE_FILE 0x008 /* set:pagecache unset:swap */
++#define _PAGE_PROTNONE 0x010 /* If not present */
++#define _PAGE_RW 0x020
++#define _PAGE_USER 0x040
++#define _PAGE_ACCESSED 0x080
++#define _PAGE_DIRTY 0x100
+
+ #define REGION_MASK 0xf0000000
+ #define REGION_SHIFT 28
+@@ -143,7 +144,8 @@
+
+ #define BAD_PAGETABLE __bad_pagetable()
+ #define BAD_PAGE __bad_page()
+-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page))
++
++#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page)
+
+ /* number of bits that fit into a memory pointer */
+ #define BITS_PER_PTR (8*sizeof(unsigned long))
+@@ -164,9 +166,6 @@
+
+ #define pte_clear(xp) do { pte_val(*(xp)) = _PAGE_NEWPAGE; } while (0)
+
+-#define phys_region_index(x) (((x) & REGION_MASK) >> REGION_SHIFT)
+-#define pte_region_index(x) phys_region_index(pte_val(x))
+-
+ #define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE))
+ #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+ #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT)
+@@ -188,19 +187,25 @@
+
+ #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+
+-extern struct page *pte_mem_map(pte_t pte);
+-extern struct page *phys_mem_map(unsigned long phys);
+-extern unsigned long phys_to_pfn(unsigned long p);
+-extern unsigned long pfn_to_phys(unsigned long pfn);
+-
+-#define pte_page(x) pfn_to_page(pte_pfn(x))
+-#define pte_address(x) (__va(pte_val(x) & PAGE_MASK))
+-#define mk_phys(a, r) ((a) + (r << REGION_SHIFT))
+-#define phys_addr(p) ((p) & ~REGION_MASK)
+-#define phys_page(p) (phys_mem_map(p) + ((phys_addr(p)) >> PAGE_SHIFT))
++#define pte_page(pte) phys_to_page(pte_val(pte))
++#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK)
++
+ #define pte_pfn(x) phys_to_pfn(pte_val(x))
+ #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
+-#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
++
++extern struct page *phys_to_page(const unsigned long phys);
++extern struct page *__virt_to_page(const unsigned long virt);
++#define virt_to_page(addr) __virt_to_page((const unsigned long) addr)
++
++/*
++ * Bits 0 through 3 are taken
++ */
++#define PTE_FILE_MAX_BITS 28
++
++#define pte_to_pgoff(pte) ((pte).pte_low >> 4)
++
++#define pgoff_to_pte(off) \
++ ((pte_t) { ((off) << 4) + _PAGE_FILE })
+
+ static inline pte_t pte_mknewprot(pte_t pte)
+ {
+@@ -235,6 +240,12 @@
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
++static inline int pte_user(pte_t pte)
++{
++ return((pte_val(pte) & _PAGE_USER) &&
++ !(pte_val(pte) & _PAGE_PROTNONE));
++}
++
+ static inline int pte_read(pte_t pte)
+ {
+ return((pte_val(pte) & _PAGE_USER) &&
+@@ -252,6 +263,14 @@
+ !(pte_val(pte) & _PAGE_PROTNONE));
+ }
+
++/*
++ * The following only works if pte_present() is not true.
++ */
++static inline int pte_file(pte_t pte)
++{
++ return (pte).pte_low & _PAGE_FILE;
++}
++
+ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
+ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
+ static inline int pte_newpage(pte_t pte) { return pte_val(pte) & _PAGE_NEWPAGE; }
+@@ -334,14 +353,7 @@
+ * and a page entry and page directory to the page they refer to.
+ */
+
+-#define mk_pte(page, pgprot) \
+-({ \
+- pte_t __pte; \
+- \
+- pte_val(__pte) = page_to_phys(page) + pgprot_val(pgprot);\
+- if(pte_present(__pte)) pte_mknewprot(pte_mknewpage(__pte)); \
+- __pte; \
+-})
++extern pte_t mk_pte(struct page *page, pgprot_t pgprot);
+
+ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
+ {
+@@ -351,17 +363,27 @@
+ }
+
+ #define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
+-#define pmd_page(pmd) (phys_mem_map(pmd_val(pmd) & PAGE_MASK) + \
+- ((phys_addr(pmd_val(pmd)) >> PAGE_SHIFT)))
+
+-/* to find an entry in a page-table-directory. */
++/*
++ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
++ *
++ * this macro returns the index of the entry in the pgd page which would
++ * control the given virtual address
++ */
+ #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+-/* to find an entry in a page-table-directory */
++/*
++ * pgd_offset() returns a (pgd_t *)
++ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
++ */
+ #define pgd_offset(mm, address) \
+ ((mm)->pgd + ((address) >> PGDIR_SHIFT))
+
+-/* to find an entry in a kernel page-table-directory */
++
++/*
++ * a shortcut which implies the use of the kernel's pgd, instead
++ * of a process's
++ */
+ #define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+ #define pmd_index(address) \
+@@ -373,7 +395,12 @@
+ return (pmd_t *) dir;
+ }
+
+-/* Find an entry in the third-level page table.. */
++/*
++ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
++ *
++ * this macro returns the index of the entry in the pte page which would
++ * control the given virtual address
++ */
+ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ #define pte_offset_kernel(dir, address) \
+ ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+@@ -399,11 +426,11 @@
+ #define update_mmu_cache(vma,address,pte) do ; while (0)
+
+ /* Encode and de-code a swap entry */
+-#define __swp_type(x) (((x).val >> 3) & 0x7f)
+-#define __swp_offset(x) ((x).val >> 10)
++#define __swp_type(x) (((x).val >> 4) & 0x3f)
++#define __swp_offset(x) ((x).val >> 11)
+
+ #define __swp_entry(type, offset) \
+- ((swp_entry_t) { ((type) << 3) | ((offset) << 10) })
++ ((swp_entry_t) { ((type) << 4) | ((offset) << 11) })
+ #define __pte_to_swp_entry(pte) \
+ ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
+ #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+diff -Naur a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
+--- a/include/asm-um/processor-generic.h 2004-02-11 12:14:28.000000000 -0500
++++ b/include/asm-um/processor-generic.h 2004-02-11 12:26:10.000000000 -0500
+@@ -11,9 +11,7 @@
+ struct task_struct;
+
+ #include "linux/config.h"
+-#include "linux/signal.h"
+ #include "asm/ptrace.h"
+-#include "asm/siginfo.h"
+ #include "choose-mode.h"
+
+ struct mm_struct;
+@@ -22,23 +20,6 @@
+
+ #define cpu_relax() do ; while (0)
+
+-#ifdef CONFIG_MODE_TT
+-struct proc_tt_mode {
+- int extern_pid;
+- int tracing;
+- int switch_pipe[2];
+- int singlestep_syscall;
+- int vm_seq;
+-};
+-#endif
+-
+-#ifdef CONFIG_MODE_SKAS
+-struct proc_skas_mode {
+- void *switch_buf;
+- void *fork_buf;
+-};
+-#endif
+-
+ struct thread_struct {
+ int forking;
+ unsigned long kernel_stack;
+@@ -46,6 +27,7 @@
+ struct pt_regs regs;
+ unsigned long cr2;
+ int err;
++ unsigned long trap_no;
+ void *fault_addr;
+ void *fault_catcher;
+ struct task_struct *prev_sched;
+@@ -54,10 +36,20 @@
+ struct arch_thread arch;
+ union {
+ #ifdef CONFIG_MODE_TT
+- struct proc_tt_mode tt;
++ struct {
++ int extern_pid;
++ int tracing;
++ int switch_pipe[2];
++ int singlestep_syscall;
++ int vm_seq;
++ } tt;
+ #endif
+ #ifdef CONFIG_MODE_SKAS
+- struct proc_skas_mode skas;
++ struct {
++ void *switch_buf;
++ void *fork_buf;
++ int mm_count;
++ } skas;
+ #endif
+ } mode;
+ struct {
+@@ -101,14 +93,19 @@
+ } mm_segment_t;
+
+ extern struct task_struct *alloc_task_struct(void);
+-extern void free_task_struct(struct task_struct *task);
+
+ extern void release_thread(struct task_struct *);
+ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+ extern void dump_thread(struct pt_regs *regs, struct user *u);
++extern void prepare_to_copy(struct task_struct *tsk);
+
+ extern unsigned long thread_saved_pc(struct task_struct *t);
+
++static inline void mm_copy_segments(struct mm_struct *from_mm,
++ struct mm_struct *new_mm)
++{
++}
++
+ #define init_stack (init_thread_union.stack)
+
+ /*
+diff -Naur a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h
+--- a/include/asm-um/processor-i386.h 2004-02-11 12:14:17.000000000 -0500
++++ b/include/asm-um/processor-i386.h 2004-02-11 12:26:00.000000000 -0500
+@@ -6,8 +6,8 @@
+ #ifndef __UM_PROCESSOR_I386_H
+ #define __UM_PROCESSOR_I386_H
+
+-extern int cpu_has_xmm;
+-extern int cpu_has_cmov;
++extern int host_has_xmm;
++extern int host_has_cmov;
+
+ struct arch_thread {
+ unsigned long debugregs[8];
+diff -Naur a/include/asm-um/sections.h b/include/asm-um/sections.h
+--- a/include/asm-um/sections.h 1969-12-31 19:00:00.000000000 -0500
++++ b/include/asm-um/sections.h 2004-02-11 12:27:57.000000000 -0500
+@@ -0,0 +1,7 @@
++#ifndef _UM_SECTIONS_H
++#define _UM_SECTIONS_H
++
++/* nothing to see, move along */
++#include <asm-generic/sections.h>
++
++#endif
+diff -Naur a/include/asm-um/smp.h b/include/asm-um/smp.h
+--- a/include/asm-um/smp.h 2004-02-11 12:14:12.000000000 -0500
++++ b/include/asm-um/smp.h 2004-02-11 12:25:41.000000000 -0500
+@@ -10,7 +10,7 @@
+
+ extern cpumask_t cpu_online_map;
+
+-#define smp_processor_id() (current->thread_info->cpu)
++#define smp_processor_id() (current_thread->cpu)
+ #define cpu_logical_map(n) (n)
+ #define cpu_number_map(n) (n)
+ #define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */
+diff -Naur a/include/asm-um/smplock.h b/include/asm-um/smplock.h
+--- a/include/asm-um/smplock.h 2004-02-11 12:14:27.000000000 -0500
++++ b/include/asm-um/smplock.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,6 +0,0 @@
+-#ifndef __UM_SMPLOCK_H
+-#define __UM_SMPLOCK_H
+-
+-#include "asm/arch/smplock.h"
+-
+-#endif
+diff -Naur a/include/asm-um/spinlock.h b/include/asm-um/spinlock.h
+--- a/include/asm-um/spinlock.h 2004-02-11 12:16:39.000000000 -0500
++++ b/include/asm-um/spinlock.h 1969-12-31 19:00:00.000000000 -0500
+@@ -1,10 +0,0 @@
+-#ifndef __UM_SPINLOCK_H
+-#define __UM_SPINLOCK_H
+-
+-#include "linux/config.h"
+-
+-#ifdef CONFIG_SMP
+-#include "asm/arch/spinlock.h"
+-#endif
+-
+-#endif
+diff -Naur a/include/asm-um/system-generic.h b/include/asm-um/system-generic.h
+--- a/include/asm-um/system-generic.h 2004-02-11 12:17:08.000000000 -0500
++++ b/include/asm-um/system-generic.h 2004-02-11 12:29:12.000000000 -0500
+@@ -23,8 +23,10 @@
+ extern void block_signals(void);
+ extern void unblock_signals(void);
+
+-#define local_save_flags(flags) do { (flags) = get_signals(); } while(0)
+-#define local_irq_restore(flags) do { set_signals(flags); } while(0)
++#define local_save_flags(flags) do { typecheck(unsigned long, flags); \
++ (flags) = get_signals(); } while(0)
++#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \
++ set_signals(flags); } while(0)
+
+ #define local_irq_save(flags) do { local_save_flags(flags); \
+ local_irq_disable(); } while(0)
+@@ -39,4 +41,7 @@
+ (flags == 0); \
+ })
+
++extern void *_switch_to(void *prev, void *next, void *last);
++#define switch_to(prev, next, last) prev = _switch_to(prev, next, last)
++
+ #endif
+diff -Naur a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
+--- a/include/asm-um/thread_info.h 2004-02-11 12:14:39.000000000 -0500
++++ b/include/asm-um/thread_info.h 2004-02-11 12:26:45.000000000 -0500
+@@ -9,6 +9,7 @@
+ #ifndef __ASSEMBLY__
+
+ #include <asm/processor.h>
++#include <asm/types.h>
+
+ struct thread_info {
+ struct task_struct *task; /* main task structure */
+@@ -43,15 +44,18 @@
+ static inline struct thread_info *current_thread_info(void)
+ {
+ struct thread_info *ti;
+- __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~16383UL));
++ unsigned long mask = PAGE_SIZE *
++ (1 << CONFIG_KERNEL_STACK_ORDER) - 1;
++ __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~mask));
+ return ti;
+ }
+
+ /* thread information allocation */
+-#define THREAD_SIZE (4*PAGE_SIZE)
+-#define alloc_thread_info(tsk) ((struct thread_info *) \
+- __get_free_pages(GFP_KERNEL,2))
+-#define free_thread_info(ti) free_pages((unsigned long) (ti), 2)
++#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE)
++#define alloc_thread_info(tsk) \
++ ((struct thread_info *) kmalloc(THREAD_SIZE, GFP_KERNEL))
++#define free_thread_info(ti) kfree(ti)
++
+ #define get_thread_info(ti) get_task_struct((ti)->task)
+ #define put_thread_info(ti) put_task_struct((ti)->task)
+
+@@ -65,11 +69,13 @@
+ #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
+ * TIF_NEED_RESCHED
+ */
++#define TIF_RESTART_BLOCK 4
+
+ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+ #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+ #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
++#define _TIF_RESTART_BLOCK (1 << TIF_RESTART_BLOCK)
+
+ #endif
+
+diff -Naur a/include/asm-um/timex.h b/include/asm-um/timex.h
+--- a/include/asm-um/timex.h 2004-02-11 12:16:00.000000000 -0500
++++ b/include/asm-um/timex.h 2004-02-11 12:27:53.000000000 -0500
+@@ -1,8 +1,6 @@
+ #ifndef __UM_TIMEX_H
+ #define __UM_TIMEX_H
+
+-#include "linux/time.h"
+-
+ typedef unsigned long cycles_t;
+
+ #define cacheflush_time (0)
+diff -Naur a/include/asm-um/uaccess.h b/include/asm-um/uaccess.h
+--- a/include/asm-um/uaccess.h 2004-02-11 12:16:04.000000000 -0500
++++ b/include/asm-um/uaccess.h 2004-02-11 12:28:00.000000000 -0500
+@@ -6,6 +6,8 @@
+ #ifndef __UM_UACCESS_H
+ #define __UM_UACCESS_H
+
++#include "linux/sched.h"
++
+ #define VERIFY_READ 0
+ #define VERIFY_WRITE 1
+
+diff -Naur a/include/asm-um/unistd.h b/include/asm-um/unistd.h
+--- a/include/asm-um/unistd.h 2004-02-11 12:16:33.000000000 -0500
++++ b/include/asm-um/unistd.h 2004-02-11 12:28:32.000000000 -0500
+@@ -33,7 +33,10 @@
+ set_fs(KERNEL_DS); \
+ ret = sys(args); \
+ set_fs(fs); \
+- return ret;
++ if (ret >= 0) \
++ return ret; \
++ errno = -(long)ret; \
++ return -1;
+
+ static inline long open(const char *pathname, int flags, int mode)
+ {
+diff -Naur a/include/linux/gfp.h b/include/linux/gfp.h
+--- a/include/linux/gfp.h 2004-02-11 12:14:33.000000000 -0500
++++ b/include/linux/gfp.h 2004-02-11 12:26:16.000000000 -0500
+@@ -63,6 +63,11 @@
+ * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
+ * optimized to &contig_page_data at compile-time.
+ */
++
++#ifndef HAVE_ARCH_FREE_PAGE
++static inline void arch_free_page(struct page *page, int order) { }
++#endif
++
+ extern struct page * FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
+ static inline struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order)
+ {
+diff -Naur a/include/linux/ghash.h b/include/linux/ghash.h
+--- a/include/linux/ghash.h 1969-12-31 19:00:00.000000000 -0500
++++ b/include/linux/ghash.h 2004-02-11 12:26:13.000000000 -0500
+@@ -0,0 +1,236 @@
++/*
++ * include/linux/ghash.h -- generic hashing with fuzzy retrieval
++ *
++ * (C) 1997 Thomas Schoebel-Theuer
++ *
++ * The algorithms implemented here seem to be a completely new invention,
++ * and I'll publish the fundamentals in a paper.
++ */
++
++#ifndef _GHASH_H
++#define _GHASH_H
++/* HASHSIZE _must_ be a power of two!!! */
++
++
++#define DEF_HASH_FUZZY_STRUCTS(NAME,HASHSIZE,TYPE) \
++\
++struct NAME##_table {\
++ TYPE * hashtable[HASHSIZE];\
++ TYPE * sorted_list;\
++ int nr_entries;\
++};\
++\
++struct NAME##_ptrs {\
++ TYPE * next_hash;\
++ TYPE * prev_hash;\
++ TYPE * next_sorted;\
++ TYPE * prev_sorted;\
++};
++
++#define DEF_HASH_FUZZY(LINKAGE,NAME,HASHSIZE,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,KEYEQ,HASHFN)\
++\
++LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\
++{\
++ int ix = HASHFN(elem->KEY);\
++ TYPE ** base = &tbl->hashtable[ix];\
++ TYPE * ptr = *base;\
++ TYPE * prev = NULL;\
++\
++ tbl->nr_entries++;\
++ while(ptr && KEYCMP(ptr->KEY, elem->KEY)) {\
++ base = &ptr->PTRS.next_hash;\
++ prev = ptr;\
++ ptr = *base;\
++ }\
++ elem->PTRS.next_hash = ptr;\
++ elem->PTRS.prev_hash = prev;\
++ if(ptr) {\
++ ptr->PTRS.prev_hash = elem;\
++ }\
++ *base = elem;\
++\
++ ptr = prev;\
++ if(!ptr) {\
++ ptr = tbl->sorted_list;\
++ prev = NULL;\
++ } else {\
++ prev = ptr->PTRS.prev_sorted;\
++ }\
++ while(ptr) {\
++ TYPE * next = ptr->PTRS.next_hash;\
++ if(next && KEYCMP(next->KEY, elem->KEY)) {\
++ prev = ptr;\
++ ptr = next;\
++ } else if(KEYCMP(ptr->KEY, elem->KEY)) {\
++ prev = ptr;\
++ ptr = ptr->PTRS.next_sorted;\
++ } else\
++ break;\
++ }\
++ elem->PTRS.next_sorted = ptr;\
++ elem->PTRS.prev_sorted = prev;\
++ if(ptr) {\
++ ptr->PTRS.prev_sorted = elem;\
++ }\
++ if(prev) {\
++ prev->PTRS.next_sorted = elem;\
++ } else {\
++ tbl->sorted_list = elem;\
++ }\
++}\
++\
++LINKAGE void remove_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\
++{\
++ TYPE * next = elem->PTRS.next_hash;\
++ TYPE * prev = elem->PTRS.prev_hash;\
++\
++ tbl->nr_entries--;\
++ if(next)\
++ next->PTRS.prev_hash = prev;\
++ if(prev)\
++ prev->PTRS.next_hash = next;\
++ else {\
++ int ix = HASHFN(elem->KEY);\
++ tbl->hashtable[ix] = next;\
++ }\
++\
++ next = elem->PTRS.next_sorted;\
++ prev = elem->PTRS.prev_sorted;\
++ if(next)\
++ next->PTRS.prev_sorted = prev;\
++ if(prev)\
++ prev->PTRS.next_sorted = next;\
++ else\
++ tbl->sorted_list = next;\
++}\
++\
++LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\
++{\
++ int ix = hashfn(pos);\
++ TYPE * ptr = tbl->hashtable[ix];\
++ while(ptr && KEYCMP(ptr->KEY, pos))\
++ ptr = ptr->PTRS.next_hash;\
++ if(ptr && !KEYEQ(ptr->KEY, pos))\
++ ptr = NULL;\
++ return ptr;\
++}\
++\
++LINKAGE TYPE * find_##NAME##_hash_fuzzy(struct NAME##_table * tbl, KEYTYPE pos)\
++{\
++ int ix;\
++ int offset;\
++ TYPE * ptr;\
++ TYPE * next;\
++\
++ ptr = tbl->sorted_list;\
++ if(!ptr || KEYCMP(pos, ptr->KEY))\
++ return NULL;\
++ ix = HASHFN(pos);\
++ offset = HASHSIZE;\
++ do {\
++ offset >>= 1;\
++ next = tbl->hashtable[(ix+offset) & ((HASHSIZE)-1)];\
++ if(next && (KEYCMP(next->KEY, pos) || KEYEQ(next->KEY, pos))\
++ && KEYCMP(ptr->KEY, next->KEY))\
++ ptr = next;\
++ } while(offset);\
++\
++ for(;;) {\
++ next = ptr->PTRS.next_hash;\
++ if(next) {\
++ if(KEYCMP(next->KEY, pos)) {\
++ ptr = next;\
++ continue;\
++ }\
++ }\
++ next = ptr->PTRS.next_sorted;\
++ if(next && KEYCMP(next->KEY, pos)) {\
++ ptr = next;\
++ continue;\
++ }\
++ return ptr;\
++ }\
++ return NULL;\
++}
++
++/* LINKAGE - empty or "static", depending on whether you want the definitions to
++ * be public or not
++ * NAME - a string to stick in names to make this hash table type distinct from
++ * any others
++ * HASHSIZE - number of buckets
++ * TYPE - type of data contained in the buckets - must be a structure, one
++ * field is of type NAME_ptrs, another is the hash key
++ * PTRS - TYPE must contain a field of type NAME_ptrs, PTRS is the name of that
++ * field
++ * KEYTYPE - type of the key field within TYPE
++ * KEY - name of the key field within TYPE
++ * KEYCMP - pointer to function that compares KEYTYPEs to each other - the
++ * prototype is int KEYCMP(KEYTYPE, KEYTYPE), it returns zero for equal,
++ * non-zero for not equal
++ * HASHFN - the hash function - the prototype is int HASHFN(KEYTYPE),
++ * it returns a number in the range 0 ... HASHSIZE - 1
++ * Call DEF_HASH_STRUCTS, define your hash table as a NAME_table, then call
++ * DEF_HASH.
++ */
++
++#define DEF_HASH_STRUCTS(NAME,HASHSIZE,TYPE) \
++\
++struct NAME##_table {\
++ TYPE * hashtable[HASHSIZE];\
++ int nr_entries;\
++};\
++\
++struct NAME##_ptrs {\
++ TYPE * next_hash;\
++ TYPE * prev_hash;\
++};
++
++#define DEF_HASH(LINKAGE,NAME,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,HASHFN)\
++\
++LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\
++{\
++ int ix = HASHFN(elem->KEY);\
++ TYPE ** base = &tbl->hashtable[ix];\
++ TYPE * ptr = *base;\
++ TYPE * prev = NULL;\
++\
++ tbl->nr_entries++;\
++ while(ptr && KEYCMP(ptr->KEY, elem->KEY)) {\
++ base = &ptr->PTRS.next_hash;\
++ prev = ptr;\
++ ptr = *base;\
++ }\
++ elem->PTRS.next_hash = ptr;\
++ elem->PTRS.prev_hash = prev;\
++ if(ptr) {\
++ ptr->PTRS.prev_hash = elem;\
++ }\
++ *base = elem;\
++}\
++\
++LINKAGE void remove_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\
++{\
++ TYPE * next = elem->PTRS.next_hash;\
++ TYPE * prev = elem->PTRS.prev_hash;\
++\
++ tbl->nr_entries--;\
++ if(next)\
++ next->PTRS.prev_hash = prev;\
++ if(prev)\
++ prev->PTRS.next_hash = next;\
++ else {\
++ int ix = HASHFN(elem->KEY);\
++ tbl->hashtable[ix] = next;\
++ }\
++}\
++\
++LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\
++{\
++ int ix = HASHFN(pos);\
++ TYPE * ptr = tbl->hashtable[ix];\
++ while(ptr && KEYCMP(ptr->KEY, pos))\
++ ptr = ptr->PTRS.next_hash;\
++ return ptr;\
++}
++
++#endif
+diff -Naur a/include/linux/mm.h b/include/linux/mm.h
+--- a/include/linux/mm.h 2004-02-11 12:14:17.000000000 -0500
++++ b/include/linux/mm.h 2004-02-11 12:26:00.000000000 -0500
+@@ -507,6 +507,9 @@
+ return __set_page_dirty_buffers(page);
+ }
+
++extern long do_mprotect(struct mm_struct *mm, unsigned long start,
++ size_t len, unsigned long prot);
++
+ /*
+ * On a two-level page table, this ends up being trivial. Thus the
+ * inlining and the symmetry break with pte_alloc_map() that does all
+@@ -537,9 +540,10 @@
+
+ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+
+-extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+- unsigned long len, unsigned long prot,
+- unsigned long flag, unsigned long pgoff);
++extern unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file *file,
++ unsigned long addr, unsigned long len,
++ unsigned long prot, unsigned long flag,
++ unsigned long pgoff);
+
+ static inline unsigned long do_mmap(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot,
+@@ -549,7 +553,8 @@
+ if ((offset + PAGE_ALIGN(len)) < offset)
+ goto out;
+ if (!(offset & ~PAGE_MASK))
+- ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
++ ret = do_mmap_pgoff(current->mm, file, addr, len, prot, flag,
++ offset >> PAGE_SHIFT);
+ out:
+ return ret;
+ }
+diff -Naur a/include/linux/proc_mm.h b/include/linux/proc_mm.h
+--- a/include/linux/proc_mm.h 1969-12-31 19:00:00.000000000 -0500
++++ b/include/linux/proc_mm.h 2004-02-11 12:25:40.000000000 -0500
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#ifndef __PROC_MM_H
++#define __PROC_MM_H
++
++#include "linux/sched.h"
++
++#define MM_MMAP 54
++#define MM_MUNMAP 55
++#define MM_MPROTECT 56
++#define MM_COPY_SEGMENTS 57
++
++struct mm_mmap {
++ unsigned long addr;
++ unsigned long len;
++ unsigned long prot;
++ unsigned long flags;
++ unsigned long fd;
++ unsigned long offset;
++};
++
++struct mm_munmap {
++ unsigned long addr;
++ unsigned long len;
++};
++
++struct mm_mprotect {
++ unsigned long addr;
++ unsigned long len;
++ unsigned int prot;
++};
++
++struct proc_mm_op {
++ int op;
++ union {
++ struct mm_mmap mmap;
++ struct mm_munmap munmap;
++ struct mm_mprotect mprotect;
++ int copy_segments;
++ } u;
++};
++
++extern struct mm_struct *proc_mm_get_mm(int fd);
++
++#endif
+diff -Naur a/include/linux/time.h b/include/linux/time.h
+--- a/include/linux/time.h 2004-02-11 12:15:59.000000000 -0500
++++ b/include/linux/time.h 2004-02-11 12:27:52.000000000 -0500
+@@ -41,7 +41,7 @@
+ * Have the 32 bit jiffies value wrap 5 minutes after boot
+ * so jiffies wrap bugs show up earlier.
+ */
+-#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ))
++#define INITIAL_JIFFIES ((unsigned long)(0))
+
+ /*
+ * Change timeval to jiffies, trying to avoid the
+diff -Naur a/mm/Makefile b/mm/Makefile
+--- a/mm/Makefile 2004-02-11 12:15:59.000000000 -0500
++++ b/mm/Makefile 2004-02-11 12:27:53.000000000 -0500
+@@ -12,3 +12,5 @@
+ slab.o swap.o truncate.o vmscan.o $(mmu-y)
+
+ obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
++obj-$(CONFIG_PROC_MM) += proc_mm.o
++
+diff -Naur a/mm/mmap.c b/mm/mmap.c
+--- a/mm/mmap.c 2004-02-11 12:15:58.000000000 -0500
++++ b/mm/mmap.c 2004-02-11 12:27:51.000000000 -0500
+@@ -460,11 +460,11 @@
+ * The caller must hold down_write(current->mm->mmap_sem).
+ */
+
+-unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
+- unsigned long len, unsigned long prot,
+- unsigned long flags, unsigned long pgoff)
++unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file * file,
++ unsigned long addr, unsigned long len,
++ unsigned long prot, unsigned long flags,
++ unsigned long pgoff)
+ {
+- struct mm_struct * mm = current->mm;
+ struct vm_area_struct * vma, * prev;
+ struct inode *inode;
+ unsigned int vm_flags;
+diff -Naur a/mm/mprotect.c b/mm/mprotect.c
+--- a/mm/mprotect.c 2004-02-11 12:15:11.000000000 -0500
++++ b/mm/mprotect.c 2004-02-11 12:26:58.000000000 -0500
+@@ -222,7 +222,8 @@
+ }
+
+ asmlinkage long
+-sys_mprotect(unsigned long start, size_t len, unsigned long prot)
++do_mprotect(struct mm_struct *mm, unsigned long start, size_t len,
++ unsigned long prot)
+ {
+ unsigned long vm_flags, nstart, end, tmp;
+ struct vm_area_struct * vma, * next, * prev;
+@@ -245,9 +246,9 @@
+
+ vm_flags = calc_vm_prot_bits(prot);
+
+- down_write(¤t->mm->mmap_sem);
++ down_write(&mm->mmap_sem);
+
+- vma = find_vma_prev(current->mm, start, &prev);
++ vma = find_vma_prev(mm, start, &prev);
+ error = -ENOMEM;
+ if (!vma)
+ goto out;
+@@ -326,6 +327,11 @@
+ prev->vm_mm->map_count--;
+ }
+ out:
+- up_write(¤t->mm->mmap_sem);
++ up_write(&mm->mmap_sem);
+ return error;
+ }
++
++asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot)
++{
++ return(do_mprotect(current->mm, start, len, prot));
++}
+diff -Naur a/mm/page_alloc.c b/mm/page_alloc.c
+--- a/mm/page_alloc.c 2004-02-11 12:14:18.000000000 -0500
++++ b/mm/page_alloc.c 2004-02-11 12:26:01.000000000 -0500
+@@ -268,6 +268,8 @@
+ LIST_HEAD(list);
+ int i;
+
++ arch_free_page(page, order);
++
+ mod_page_state(pgfree, 1 << order);
+ for (i = 0 ; i < (1 << order) ; ++i)
+ free_pages_check(__FUNCTION__, page + i);
+@@ -449,6 +451,8 @@
+ struct per_cpu_pages *pcp;
+ unsigned long flags;
+
++ arch_free_page(page, 0);
++
+ kernel_map_pages(page, 1, 0);
+ inc_page_state(pgfree);
+ free_pages_check(__FUNCTION__, page);
+diff -Naur a/mm/proc_mm.c b/mm/proc_mm.c
+--- a/mm/proc_mm.c 1969-12-31 19:00:00.000000000 -0500
++++ b/mm/proc_mm.c 2004-02-11 12:27:05.000000000 -0500
+@@ -0,0 +1,174 @@
++/*
++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#include "linux/mm.h"
++#include "linux/init.h"
++#include "linux/proc_fs.h"
++#include "linux/proc_mm.h"
++#include "linux/file.h"
++#include "asm/uaccess.h"
++#include "asm/mmu_context.h"
++
++static struct file_operations proc_mm_fops;
++
++struct mm_struct *proc_mm_get_mm(int fd)
++{
++ struct mm_struct *ret = ERR_PTR(-EBADF);
++ struct file *file;
++
++ file = fget(fd);
++ if (!file)
++ goto out;
++
++ ret = ERR_PTR(-EINVAL);
++ if(file->f_op != &proc_mm_fops)
++ goto out_fput;
++
++ ret = file->private_data;
++ out_fput:
++ fput(file);
++ out:
++ return(ret);
++}
++
++extern long do_mmap2(struct mm_struct *mm, unsigned long addr,
++ unsigned long len, unsigned long prot,
++ unsigned long flags, unsigned long fd,
++ unsigned long pgoff);
++
++static ssize_t write_proc_mm(struct file *file, const char *buffer,
++ size_t count, loff_t *ppos)
++{
++ struct mm_struct *mm = file->private_data;
++ struct proc_mm_op req;
++ int n, ret;
++
++ if(count > sizeof(req))
++ return(-EINVAL);
++
++ n = copy_from_user(&req, buffer, count);
++ if(n != 0)
++ return(-EFAULT);
++
++ ret = count;
++ switch(req.op){
++ case MM_MMAP: {
++ struct mm_mmap *map = &req.u.mmap;
++
++ ret = do_mmap2(mm, map->addr, map->len, map->prot,
++ map->flags, map->fd, map->offset >> PAGE_SHIFT);
++ if((ret & ~PAGE_MASK) == 0)
++ ret = count;
++
++ break;
++ }
++ case MM_MUNMAP: {
++ struct mm_munmap *unmap = &req.u.munmap;
++
++ down_write(&mm->mmap_sem);
++ ret = do_munmap(mm, unmap->addr, unmap->len);
++ up_write(&mm->mmap_sem);
++
++ if(ret == 0)
++ ret = count;
++ break;
++ }
++ case MM_MPROTECT: {
++ struct mm_mprotect *protect = &req.u.mprotect;
++
++ ret = do_mprotect(mm, protect->addr, protect->len,
++ protect->prot);
++ if(ret == 0)
++ ret = count;
++ break;
++ }
++
++ case MM_COPY_SEGMENTS: {
++ struct mm_struct *from = proc_mm_get_mm(req.u.copy_segments);
++
++ if(IS_ERR(from)){
++ ret = PTR_ERR(from);
++ break;
++ }
++
++ mm_copy_segments(from, mm);
++ break;
++ }
++ default:
++ ret = -EINVAL;
++ break;
++ }
++
++ return(ret);
++}
++
++static int open_proc_mm(struct inode *inode, struct file *file)
++{
++ struct mm_struct *mm = mm_alloc();
++ int ret;
++
++ ret = -ENOMEM;
++ if(mm == NULL)
++ goto out_mem;
++
++ ret = init_new_context(current, mm);
++ if(ret)
++ goto out_free;
++
++ spin_lock(&mmlist_lock);
++ list_add(&mm->mmlist, ¤t->mm->mmlist);
++ mmlist_nr++;
++ spin_unlock(&mmlist_lock);
++
++ file->private_data = mm;
++
++ return(0);
++
++ out_free:
++ mmput(mm);
++ out_mem:
++ return(ret);
++}
++
++static int release_proc_mm(struct inode *inode, struct file *file)
++{
++ struct mm_struct *mm = file->private_data;
++
++ mmput(mm);
++ return(0);
++}
++
++static struct file_operations proc_mm_fops = {
++ .open = open_proc_mm,
++ .release = release_proc_mm,
++ .write = write_proc_mm,
++};
++
++static int make_proc_mm(void)
++{
++ struct proc_dir_entry *ent;
++
++ ent = create_proc_entry("mm", 0222, &proc_root);
++ if(ent == NULL){
++ printk("make_proc_mm : Failed to register /proc/mm\n");
++ return(0);
++ }
++ ent->proc_fops = &proc_mm_fops;
++
++ return(0);
++}
++
++__initcall(make_proc_mm);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
--- /dev/null
+ fs/exec.c | 18 +++++++---
+ fs/namei.c | 86 +++++++++++++++++++++++++++++++++++++++++++++----
+ fs/namespace.c | 2 +
+ fs/nfs/dir.c | 4 +-
+ fs/open.c | 62 +++++++++++++++++++++++------------
+ fs/stat.c | 24 ++++++++++---
+ include/linux/dcache.h | 3 +
+ include/linux/fs.h | 8 ++++
+ include/linux/namei.h | 56 ++++++++++++++++++++++++++-----
+ kernel/ksyms.c | 8 ++++
+ 10 files changed, 222 insertions(+), 49 deletions(-)
+
+.old..........pc/vfs_intent-2.6.3-suse/fs/exec.c
+.new.........fs/exec.c
+Index: linux-2.6.3-20/fs/exec.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/exec.c 2004-03-05 02:07:04.000000000 -0800
++++ linux-2.6.3-20/fs/exec.c 2004-03-08 14:23:40.000000000 -0800
+@@ -121,8 +121,11 @@
+ struct file * file;
+ struct nameidata nd;
+ int error;
++ intent_init(&nd.intent, IT_OPEN);
+
+- nd.intent.open.flags = FMODE_READ;
++ error = user_path_walk_it(library, &nd);
++
++ nd.intent.it_flags = O_RDONLY;
+ error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
+ if (error)
+ goto out;
+@@ -135,7 +138,7 @@
+ if (error)
+ goto exit;
+
+- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent);
+ error = PTR_ERR(file);
+ if (IS_ERR(file))
+ goto out;
+@@ -475,8 +478,9 @@
+ int err;
+ struct file *file;
+
+- nd.intent.open.flags = FMODE_READ;
+- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd);
++ intent_init(&nd.intent, IT_OPEN);
++ nd.intent.it_flags = O_RDONLY;
++ err = path_lookup(name, LOOKUP_FOLLOW, &nd);
+ file = ERR_PTR(err);
+
+ if (!err) {
+@@ -489,7 +493,7 @@
+ err = -EACCES;
+ file = ERR_PTR(err);
+ if (!err) {
+- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent);
+ if (!IS_ERR(file)) {
+ err = deny_write_access(file);
+ if (err) {
+.old..........pc/vfs_intent-2.6.3-suse/fs/namei.c
+.new.........fs/namei.c
+Index: linux-2.6.3-20/fs/namei.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/namei.c 2004-03-05 02:07:04.000000000 -0800
++++ linux-2.6.3-20/fs/namei.c 2004-03-08 14:32:24.000000000 -0800
+@@ -269,8 +269,19 @@
+ return 0;
+ }
+
++void intent_release(struct lookup_intent *it)
++{
++ if (!it)
++ return;
++ if (it->it_magic != INTENT_MAGIC)
++ return;
++ if (it->it_op_release)
++ it->it_op_release(it);
++}
++
+ void path_release(struct nameidata *nd)
+ {
++ intent_release(&nd->intent);
+ dput(nd->dentry);
+ mntput(nd->mnt);
+ }
+@@ -347,7 +358,10 @@
+ {
+ struct dentry * result;
+ struct inode *dir = parent->d_inode;
++ int counter = 0;
+
++again:
++ counter++;
+ down(&dir->i_sem);
+ /*
+ * First re-do the cached lookup just in case it was created
+@@ -386,7 +400,10 @@
+ if (result->d_op && result->d_op->d_revalidate) {
+ if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) {
+ dput(result);
+- result = ERR_PTR(-ENOENT);
++ if (counter > 10)
++ result = ERR_PTR(-ESTALE);
++ if (!IS_ERR(result))
++ goto again;
+ }
+ }
+ return result;
+@@ -563,6 +580,31 @@
+ return PTR_ERR(dentry);
+ }
+
++static int revalidate_special(struct nameidata *nd)
++{
++ struct dentry *dentry = nd->dentry;
++ int err, counter = 0;
++
++ if (!dentry->d_op || !dentry->d_op->d_revalidate)
++ return 0;
++ revalidate_again:
++ if (!dentry->d_op->d_revalidate(dentry, nd)) {
++ struct dentry *new;
++ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC,nd)))
++ return err;
++ new = real_lookup(dentry->d_parent, &dentry->d_name, nd);
++ d_invalidate(dentry);
++ dput(dentry);
++ nd->dentry = dentry = new;
++ counter++;
++ if (counter < 10)
++ goto revalidate_again;
++ printk("excessive revalidate_it loops\n");
++ return -ESTALE;
++ }
++ return 0;
++}
++
+ /*
+ * Name resolution.
+ *
+@@ -663,7 +705,9 @@
+
+ if (inode->i_op->follow_link) {
+ mntget(next.mnt);
++ nd->flags |= LOOKUP_LINK_NOTLAST;
+ err = do_follow_link(next.dentry, nd);
++ nd->flags &= ~LOOKUP_LINK_NOTLAST;
+ dput(next.dentry);
+ mntput(next.mnt);
+ if (err)
+@@ -702,6 +746,11 @@
+ inode = nd->dentry->d_inode;
+ /* fallthrough */
+ case 1:
++ nd->flags |= LOOKUP_LAST;
++ err = revalidate_special(nd);
++ nd->flags &= ~LOOKUP_LAST;
++ if (err)
++ break;
+ goto return_reval;
+ }
+ if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
+@@ -709,7 +758,9 @@
+ if (err < 0)
+ break;
+ }
++ nd->flags |= LOOKUP_LAST;
+ err = do_lookup(nd, &this, &next);
++ nd->flags &= ~LOOKUP_LAST;
+ if (err)
+ break;
+ follow_mount(&next.mnt, &next.dentry);
+@@ -935,7 +986,7 @@
+ }
+
+ /* SMP-safe */
+-struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
++struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd)
+ {
+ unsigned long hash;
+ struct qstr this;
+@@ -955,11 +1006,16 @@
+ }
+ this.hash = end_name_hash(hash);
+
+- return lookup_hash(&this, base);
++ return __lookup_hash(&this, base, nd);
+ access:
+ return ERR_PTR(-EACCES);
+ }
+
++struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
++{
++ return lookup_one_len_it(name, base, len, NULL);
++}
++
+ /*
+ * namei()
+ *
+@@ -971,7 +1027,7 @@
+ * that namei follows links, while lnamei does not.
+ * SMP-safe
+ */
+-int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
++int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)
+ {
+ char *tmp = getname(name);
+ int err = PTR_ERR(tmp);
+@@ -983,6 +1039,12 @@
+ return err;
+ }
+
++int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd)
++{
++ intent_init(&nd->intent, IT_LOOKUP);
++ return __user_walk_it(name, flags, nd);
++}
++
+ /*
+ * It's inline, so penalty for filesystems that don't use sticky bit is
+ * minimal.
+@@ -1255,8 +1317,8 @@
+ acc_mode |= MAY_APPEND;
+
+ /* Fill in the open() intent data */
+- nd->intent.open.flags = flag;
+- nd->intent.open.create_mode = mode;
++ nd->intent.it_flags = flag;
++ nd->intent.it_create_mode = mode;
+
+ /*
+ * The simplest case - just a plain lookup.
+@@ -1271,6 +1333,7 @@
+ /*
+ * Create - we need to know the parent.
+ */
++ nd->intent.it_op |= IT_CREAT;
+ error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd);
+ if (error)
+ return error;
+@@ -1287,7 +1350,9 @@
+ dir = nd->dentry;
+ nd->flags &= ~LOOKUP_PARENT;
+ down(&dir->d_inode->i_sem);
++ nd->flags |= LOOKUP_LAST;
+ dentry = __lookup_hash(&nd->last, nd->dentry, nd);
++ nd->flags &= ~LOOKUP_LAST;
+
+ do_last:
+ error = PTR_ERR(dentry);
+@@ -1392,7 +1457,9 @@
+ }
+ dir = nd->dentry;
+ down(&dir->d_inode->i_sem);
++ nd->flags |= LOOKUP_LAST;
+ dentry = __lookup_hash(&nd->last, nd->dentry, nd);
++ nd->flags &= ~LOOKUP_LAST;
+ putname(nd->last.name);
+ goto do_last;
+ }
+@@ -2154,7 +2221,9 @@
+ __vfs_follow_link(struct nameidata *nd, const char *link)
+ {
+ int res = 0;
++ struct lookup_intent it = nd->intent;
+ char *name;
++
+ if (IS_ERR(link))
+ goto fail;
+
+@@ -2164,6 +2233,10 @@
+ /* weird __emul_prefix() stuff did it */
+ goto out;
+ }
++
++ intent_init(&nd->intent, it.it_op);
++ nd->intent.it_flags = it.it_flags;
++ nd->intent.it_create_mode = it.it_create_mode;
+ res = link_path_walk(link, nd);
+ out:
+ if (current->link_count || res || nd->last_type!=LAST_NORM)
+.old..........pc/vfs_intent-2.6.3-suse/fs/namespace.c
+.new.........fs/namespace.c
+Index: linux-2.6.3-20/fs/namespace.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/namespace.c 2004-03-05 02:07:04.000000000 -0800
++++ linux-2.6.3-20/fs/namespace.c 2004-03-08 14:23:40.000000000 -0800
+@@ -744,6 +744,7 @@
+ int retval = 0;
+ int mnt_flags = 0;
+
++ intent_init(&nd.intent, IT_LOOKUP);
+ /* Discard magic */
+ if ((flags & MS_MGC_MSK) == MS_MGC_VAL)
+ flags &= ~MS_MGC_MSK;
+.old..........pc/vfs_intent-2.6.3-suse/fs/open.c
+.new.........fs/open.c
+Index: linux-2.6.3-20/fs/open.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/open.c 2004-03-05 02:07:04.000000000 -0800
++++ linux-2.6.3-20/fs/open.c 2004-03-08 14:23:40.000000000 -0800
+@@ -202,7 +202,7 @@
+ struct nameidata nd;
+ struct inode * inode;
+ int error;
+-
++ intent_init(&nd.intent, IT_GETATTR);
+ error = -EINVAL;
+ if (length < 0) /* sorry, but loff_t says... */
+ goto out;
+@@ -461,6 +461,7 @@
+ int old_fsuid, old_fsgid;
+ kernel_cap_t old_cap;
+ int res;
++ intent_init(&nd.intent, IT_GETATTR);
+
+ if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
+ return -EINVAL;
+@@ -492,6 +493,7 @@
+ if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode)
+ && !special_file(nd.dentry->d_inode->i_mode))
+ res = -EROFS;
++
+ path_release(&nd);
+ }
+
+@@ -506,6 +508,7 @@
+ {
+ struct nameidata nd;
+ int error;
++ intent_init(&nd.intent, IT_GETATTR);
+
+ error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd);
+ if (error)
+@@ -557,6 +560,7 @@
+ {
+ struct nameidata nd;
+ int error;
++ intent_init(&nd.intent, IT_GETATTR);
+
+ error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd);
+ if (error)
+@@ -629,7 +633,7 @@
+ error = -EROFS;
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
+-
++
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto dput_and_out;
+@@ -737,27 +741,8 @@
+ * for the internal routines (ie open_namei()/follow_link() etc). 00 is
+ * used by symlinks.
+ */
+-struct file *filp_open(const char * filename, int flags, int mode)
+-{
+- int namei_flags, error;
+- struct nameidata nd;
+-
+- namei_flags = flags;
+- if ((namei_flags+1) & O_ACCMODE)
+- namei_flags++;
+- if (namei_flags & O_TRUNC)
+- namei_flags |= 2;
+-
+- error = open_namei(filename, namei_flags, mode, &nd);
+- if (!error)
+- return dentry_open(nd.dentry, nd.mnt, flags);
+-
+- return ERR_PTR(error);
+-}
+-
+-EXPORT_SYMBOL(filp_open);
+-
+-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags,
++ struct lookup_intent *it)
+ {
+ struct file * f;
+ struct inode *inode;
+@@ -769,6 +754,7 @@
+ goto cleanup_dentry;
+ f->f_flags = flags;
+ f->f_mode = (flags+1) & O_ACCMODE;
++ f->f_it = it;
+ inode = dentry->d_inode;
+ if (f->f_mode & FMODE_WRITE) {
+ error = get_write_access(inode);
+@@ -788,6 +774,7 @@
+ error = f->f_op->open(inode,f);
+ if (error)
+ goto cleanup_all;
++ intent_release(it);
+ }
+ f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
+
+@@ -812,6 +799,7 @@
+ cleanup_file:
+ put_filp(f);
+ cleanup_dentry:
++ intent_release(it);
+ dput(dentry);
+ mntput(mnt);
+ return ERR_PTR(error);
+@@ -819,6 +807,36 @@
+
+ EXPORT_SYMBOL(dentry_open);
+
++struct file *filp_open(const char * filename, int flags, int mode)
++{
++ int namei_flags, error;
++ struct file * temp_filp;
++ struct nameidata nd;
++ intent_init(&nd.intent, IT_OPEN);
++
++ namei_flags = flags;
++ if ((namei_flags+1) & O_ACCMODE)
++ namei_flags++;
++ if (namei_flags & O_TRUNC)
++ namei_flags |= 2;
++
++ error = open_namei(filename, namei_flags, mode, &nd);
++ if (!error) {
++ temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent);
++ return temp_filp;
++ }
++ return ERR_PTR(error);
++}
++
++
++struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
++{
++ struct lookup_intent it;
++ intent_init(&it, IT_LOOKUP);
++
++ return dentry_open_it(dentry, mnt, flags, &it);
++}
++
+ /*
+ * Find an empty file descriptor entry, and mark it busy.
+ */
+.old..........pc/vfs_intent-2.6.3-suse/fs/stat.c
+.new.........fs/stat.c
+Index: linux-2.6.3-20/fs/stat.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/stat.c 2004-03-05 02:07:04.000000000 -0800
++++ linux-2.6.3-20/fs/stat.c 2004-03-08 14:23:40.000000000 -0800
+@@ -36,7 +36,7 @@
+
+ EXPORT_SYMBOL(generic_fillattr);
+
+-int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat)
+ {
+ struct inode *inode = dentry->d_inode;
+ int retval;
+@@ -45,6 +45,8 @@
+ if (retval)
+ return retval;
+
++ if (inode->i_op->getattr_it)
++ return inode->i_op->getattr_it(mnt, dentry, it, stat);
+ if (inode->i_op->getattr)
+ return inode->i_op->getattr(mnt, dentry, stat);
+
+@@ -61,14 +63,20 @@
+
+ EXPORT_SYMBOL(vfs_getattr);
+
++int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
++{
++ return vfs_getattr_it(mnt, dentry, NULL, stat);
++}
++
+ int vfs_stat(char __user *name, struct kstat *stat)
+ {
+ struct nameidata nd;
+ int error;
++ intent_init(&nd.intent, IT_GETATTR);
+
+- error = user_path_walk(name, &nd);
++ error = user_path_walk_it(name, &nd);
+ if (!error) {
+- error = vfs_getattr(nd.mnt, nd.dentry, stat);
++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
+ path_release(&nd);
+ }
+ return error;
+@@ -80,10 +88,11 @@
+ {
+ struct nameidata nd;
+ int error;
++ intent_init(&nd.intent, IT_GETATTR);
+
+- error = user_path_walk_link(name, &nd);
++ error = user_path_walk_link_it(name, &nd);
+ if (!error) {
+- error = vfs_getattr(nd.mnt, nd.dentry, stat);
++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat);
+ path_release(&nd);
+ }
+ return error;
+@@ -95,9 +104,12 @@
+ {
+ struct file *f = fget(fd);
+ int error = -EBADF;
++ struct nameidata nd;
++ intent_init(&nd.intent, IT_GETATTR);
+
+ if (f) {
+- error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat);
++ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat);
++ intent_release(&nd.intent);
+ fput(f);
+ }
+ return error;
+.old..........pc/vfs_intent-2.6.3-suse/fs/nfs/dir.c
+.new.........fs/nfs/dir.c
+Index: linux-2.6.3-20/fs/nfs/dir.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/nfs/dir.c 2004-03-05 02:07:03.000000000 -0800
++++ linux-2.6.3-20/fs/nfs/dir.c 2004-03-08 14:23:40.000000000 -0800
+@@ -681,7 +681,7 @@
+ return 0;
+ if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE))
+ return 0;
+- return (nd->intent.open.flags & O_EXCL) != 0;
++ return (nd->intent.it_flags & O_EXCL) != 0;
+ }
+
+ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
+@@ -972,7 +972,7 @@
+ attr.ia_valid = ATTR_MODE;
+
+ if (nd && (nd->flags & LOOKUP_CREATE))
+- open_flags = nd->intent.open.flags;
++ open_flags = nd->intent.it_flags;
+
+ /*
+ * The 0 argument passed into the create function should one day
+.old..........pc/vfs_intent-2.6.3-suse/fs/inode.c
+.new.........fs/inode.c
+Index: linux-2.6.3-20/fs/inode.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/inode.c 2004-03-05 02:07:04.000000000 -0800
++++ linux-2.6.3-20/fs/inode.c 2004-03-08 14:23:40.000000000 -0800
+@@ -223,6 +223,7 @@
+ inodes_stat.nr_unused--;
+ }
+
++EXPORT_SYMBOL(__iget);
+ /**
+ * clear_inode - clear an inode
+ * @inode: inode to clear
+.old..........pc/vfs_intent-2.6.3-suse/fs/super.c
+.new.........fs/super.c
+Index: linux-2.6.3-20/fs/super.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/super.c 2004-03-05 02:07:04.000000000 -0800
++++ linux-2.6.3-20/fs/super.c 2004-03-08 14:23:40.000000000 -0800
+@@ -841,6 +841,8 @@
+ return (struct vfsmount *)sb;
+ }
+
++EXPORT_SYMBOL(do_kern_mount);
++
+ struct vfsmount *kern_mount(struct file_system_type *type)
+ {
+ return do_kern_mount(type->name, 0, type->name, NULL);
+.old..........pc/vfs_intent-2.6.3-suse/include/linux/dcache.h
+.new.........include/linux/dcache.h
+Index: linux-2.6.3-20/include/linux/dcache.h
+===================================================================
+--- linux-2.6.3-20.orig/include/linux/dcache.h 2004-03-05 02:07:17.000000000 -0800
++++ linux-2.6.3-20/include/linux/dcache.h 2004-03-08 14:23:40.000000000 -0800
+@@ -4,6 +4,7 @@
+ #ifdef __KERNEL__
+
+ #include <asm/atomic.h>
++#include <linux/string.h>
+ #include <linux/list.h>
+ #include <linux/spinlock.h>
+ #include <linux/cache.h>
+@@ -35,6 +36,8 @@
+ char name_str[0];
+ };
+
++#include <linux/namei.h>
++
+ struct dentry_stat_t {
+ int nr_dentry;
+ int nr_unused;
+.old..........pc/vfs_intent-2.6.3-suse/include/linux/fs.h
+.new.........include/linux/fs.h
+Index: linux-2.6.3-20/include/linux/fs.h
+===================================================================
+--- linux-2.6.3-20.orig/include/linux/fs.h 2004-03-05 02:07:17.000000000 -0800
++++ linux-2.6.3-20/include/linux/fs.h 2004-03-08 14:23:41.000000000 -0800
+@@ -243,6 +243,8 @@
+ #define ATTR_ATTR_FLAG 1024
+ #define ATTR_KILL_SUID 2048
+ #define ATTR_KILL_SGID 4096
++#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */
++#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */
+
+ /*
+ * This is the Inode Attributes structure, used for notify_change(). It
+@@ -409,6 +411,7 @@
+ struct block_device *i_bdev;
+ struct cdev *i_cdev;
+ int i_cindex;
++ void *i_filterdata;
+
+ unsigned long i_dnotify_mask; /* Directory notify events */
+ struct dnotify_struct *i_dnotify; /* for directory notifications */
+@@ -541,6 +544,7 @@
+ spinlock_t f_ep_lock;
+ #endif /* #ifdef CONFIG_EPOLL */
+ struct address_space *f_mapping;
++ struct lookup_intent *f_it;
+ };
+ extern spinlock_t files_lock;
+ #define file_list_lock() spin_lock(&files_lock);
+@@ -846,7 +850,9 @@
+ void (*truncate) (struct inode *);
+ int (*permission) (struct inode *, int, struct nameidata *);
+ int (*setattr) (struct dentry *, struct iattr *);
++ int (*setattr_raw) (struct inode *, struct iattr *);
+ int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *);
++ int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *);
+ int (*setxattr) (struct dentry *, const char *,const void *,size_t,int);
+ ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t);
+ ssize_t (*listxattr) (struct dentry *, char *, size_t);
+@@ -1062,6 +1068,7 @@
+ extern int unregister_filesystem(struct file_system_type *);
+ extern struct vfsmount *kern_mount(struct file_system_type *);
+ extern int may_umount(struct vfsmount *);
++struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data);
+ extern long do_mount(char *, char *, char *, unsigned long, void *);
+
+ extern int vfs_statfs(struct super_block *, struct kstatfs *);
+@@ -1126,6 +1133,7 @@
+ extern int do_truncate(struct dentry *, loff_t start);
+ extern struct file *filp_open(const char *, int, int);
+ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
++extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *);
+ extern int filp_close(struct file *, fl_owner_t id);
+ extern char * getname(const char __user *);
+
+.old..........pc/vfs_intent-2.6.3-suse/include/linux/namei.h
+.new.........include/linux/namei.h
+Index: linux-2.6.3-20/include/linux/namei.h
+===================================================================
+--- linux-2.6.3-20.orig/include/linux/namei.h 2004-03-05 02:07:18.000000000 -0800
++++ linux-2.6.3-20/include/linux/namei.h 2004-03-08 14:23:41.000000000 -0800
+@@ -2,25 +2,55 @@
+ #define _LINUX_NAMEI_H
+
+ #include <linux/linkage.h>
++#include <linux/string.h>
+
+ struct vfsmount;
++struct nameidata;
+
+-struct open_intent {
+- int flags;
+- int create_mode;
++/* intent opcodes */
++#define IT_OPEN (1)
++#define IT_CREAT (1<<1)
++#define IT_READDIR (1<<2)
++#define IT_GETATTR (1<<3)
++#define IT_LOOKUP (1<<4)
++#define IT_UNLINK (1<<5)
++#define IT_TRUNC (1<<6)
++#define IT_GETXATTR (1<<7)
++
++struct lustre_intent_data {
++ int it_disposition;
++ int it_status;
++ __u64 it_lock_handle;
++ void *it_data;
++ int it_lock_mode;
+ };
+
++#define INTENT_MAGIC 0x19620323
++struct lookup_intent {
++ int it_magic;
++ void (*it_op_release)(struct lookup_intent *);
++ int it_op;
++ int it_flags;
++ int it_create_mode;
++ union {
++ struct lustre_intent_data lustre;
++ } d;
++};
++
++static inline void intent_init(struct lookup_intent *it, int op)
++{
++ memset(it, 0, sizeof(*it));
++ it->it_magic = INTENT_MAGIC;
++ it->it_op = op;
++}
++
+ struct nameidata {
+ struct dentry *dentry;
+ struct vfsmount *mnt;
+ struct qstr last;
+ unsigned int flags;
+ int last_type;
+-
+- /* Intent data */
+- union {
+- struct open_intent open;
+- } intent;
++ struct lookup_intent intent;
+ };
+
+ /*
+@@ -41,6 +71,9 @@
+ #define LOOKUP_CONTINUE 4
+ #define LOOKUP_PARENT 16
+ #define LOOKUP_NOALT 32
++#define LOOKUP_LAST (1<<6)
++#define LOOKUP_LINK_NOTLAST (1<<7)
++
+ /*
+ * Intent data
+ */
+@@ -49,6 +82,12 @@
+ #define LOOKUP_ACCESS (0x0400)
+
+ extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *));
++extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd));
++#define user_path_walk_it(name,nd) \
++ __user_walk_it(name, LOOKUP_FOLLOW, nd)
++#define user_path_walk_link_it(name,nd) \
++ __user_walk_it(name, 0, nd)
++extern void intent_release(struct lookup_intent *);
+ #define user_path_walk(name,nd) \
+ __user_walk(name, LOOKUP_FOLLOW, nd)
+ #define user_path_walk_link(name,nd) \
+@@ -60,7 +99,6 @@
+
+ extern struct dentry * lookup_one_len(const char *, struct dentry *, int);
+ extern struct dentry * lookup_hash(struct qstr *, struct dentry *);
+-
+ extern int follow_down(struct vfsmount **, struct dentry **);
+ extern int follow_up(struct vfsmount **, struct dentry **);
+
+.old..........pc/vfs_intent-2.6.3-suse/kernel/exit.c
+.new.........kernel/exit.c
+Index: linux-2.6.3-20/kernel/exit.c
+===================================================================
+--- linux-2.6.3-20.orig/kernel/exit.c 2004-03-05 02:07:17.000000000 -0800
++++ linux-2.6.3-20/kernel/exit.c 2004-03-08 14:23:41.000000000 -0800
+@@ -258,6 +258,8 @@
+ write_unlock_irq(&tasklist_lock);
+ }
+
++EXPORT_SYMBOL(reparent_to_init);
++
+ void __set_special_pids(pid_t session, pid_t pgrp)
+ {
+ struct task_struct *curr = current;
+@@ -427,6 +429,8 @@
+ __exit_files(tsk);
+ }
+
++EXPORT_SYMBOL(exit_files);
++
+ static inline void __put_fs_struct(struct fs_struct *fs)
+ {
+ /* No need to hold fs->lock if we are killing it */
--- /dev/null
+ 0 files changed
+
+Index: linux-2.6.3-mm4/fs/namei.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/namei.c 2004-03-08 14:46:20.906229088 +0800
++++ linux-2.6.3-mm4/fs/namei.c 2004-03-08 14:51:27.317647472 +0800
+@@ -1277,7 +1277,7 @@
+ if (!error) {
+ DQUOT_INIT(inode);
+
+- error = do_truncate(dentry, 0);
++ error = do_truncate(dentry, 0, 1);
+ }
+ put_write_access(inode);
+ if (error)
+@@ -1527,6 +1527,7 @@
+ char * tmp;
+ struct dentry * dentry;
+ struct nameidata nd;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ if (S_ISDIR(mode))
+ return -EPERM;
+@@ -1537,6 +1538,15 @@
+ error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
++
++ if (nd.dentry->d_inode->i_op->mknod_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mknod_raw(&nd, mode, dev);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++
+ dentry = lookup_create(&nd, 0);
+ error = PTR_ERR(dentry);
+
+@@ -1563,6 +1573,7 @@
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1604,10 +1615,18 @@
+ if (!IS_ERR(tmp)) {
+ struct dentry *dentry;
+ struct nameidata nd;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
++ if (nd.dentry->d_inode->i_op->mkdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mkdir_raw(&nd, mode);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
+ dentry = lookup_create(&nd, 1);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+@@ -1617,6 +1636,7 @@
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1697,6 +1717,7 @@
+ char * name;
+ struct dentry *dentry;
+ struct nameidata nd;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ name = getname(pathname);
+ if(IS_ERR(name))
+@@ -1717,6 +1738,16 @@
+ error = -EBUSY;
+ goto exit1;
+ }
++
++ if (nd.dentry->d_inode->i_op->rmdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ error = op->rmdir_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
++
+ down(&nd.dentry->d_inode->i_sem);
+ dentry = lookup_hash(&nd.last, nd.dentry);
+ error = PTR_ERR(dentry);
+@@ -1775,6 +1806,7 @@
+ struct dentry *dentry;
+ struct nameidata nd;
+ struct inode *inode = NULL;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ name = getname(pathname);
+ if(IS_ERR(name))
+@@ -1786,6 +1818,13 @@
+ error = -EISDIR;
+ if (nd.last_type != LAST_NORM)
+ goto exit1;
++ if (nd.dentry->d_inode->i_op->unlink_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
+ down(&nd.dentry->d_inode->i_sem);
+ dentry = lookup_hash(&nd.last, nd.dentry);
+ error = PTR_ERR(dentry);
+@@ -1853,10 +1892,18 @@
+ if (!IS_ERR(to)) {
+ struct dentry *dentry;
+ struct nameidata nd;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ error = path_lookup(to, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
++ if (nd.dentry->d_inode->i_op->symlink_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->symlink_raw(&nd, from);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
+ dentry = lookup_create(&nd, 0);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+@@ -1864,6 +1911,7 @@
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(to);
+@@ -1927,6 +1975,8 @@
+ struct nameidata nd, old_nd;
+ int error;
+ char * to;
++ intent_init(&nd.intent, IT_LOOKUP);
++ intent_init(&old_nd.intent, IT_LOOKUP);
+
+ to = getname(newname);
+ if (IS_ERR(to))
+@@ -1941,6 +1991,13 @@
+ error = -EXDEV;
+ if (old_nd.mnt != nd.mnt)
+ goto out_release;
++ if (nd.dentry->d_inode->i_op->link_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->link_raw(&old_nd, &nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out_release;
++ }
+ new_dentry = lookup_create(&nd, 0);
+ error = PTR_ERR(new_dentry);
+ if (!IS_ERR(new_dentry)) {
+@@ -1991,7 +2048,7 @@
+ * locking].
+ */
+ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
++ struct inode *new_dir, struct dentry *new_dentry)
+ {
+ int error = 0;
+ struct inode *target;
+@@ -2036,7 +2093,7 @@
+ }
+
+ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
++ struct inode *new_dir, struct dentry *new_dentry)
+ {
+ struct inode *target;
+ int error;
+@@ -2113,6 +2170,8 @@
+ struct dentry * old_dentry, *new_dentry;
+ struct dentry * trap;
+ struct nameidata oldnd, newnd;
++ intent_init(&oldnd.intent, IT_LOOKUP);
++ intent_init(&newnd.intent, IT_LOOKUP);
+
+ error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
+ if (error)
+@@ -2135,6 +2194,13 @@
+ if (newnd.last_type != LAST_NORM)
+ goto exit2;
+
++ if (old_dir->d_inode->i_op->rename_raw) {
++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
++
+ trap = lock_rename(new_dir, old_dir);
+
+ old_dentry = lookup_hash(&oldnd.last, old_dir);
+@@ -2166,8 +2232,7 @@
+ if (new_dentry == trap)
+ goto exit5;
+
+- error = vfs_rename(old_dir->d_inode, old_dentry,
+- new_dir->d_inode, new_dentry);
++ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry);
+ exit5:
+ dput(new_dentry);
+ exit4:
+Index: linux-2.6.3-mm4/fs/open.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/open.c 2004-03-08 14:46:21.050207200 +0800
++++ linux-2.6.3-mm4/fs/open.c 2004-03-08 14:55:01.025158992 +0800
+@@ -180,9 +180,10 @@
+ return error;
+ }
+
+-int do_truncate(struct dentry *dentry, loff_t length)
++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open)
+ {
+ int err;
++ struct inode_operations *op = dentry->d_inode->i_op;
+ struct iattr newattrs;
+
+ /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
+@@ -193,7 +194,14 @@
+ newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+ down(&dentry->d_inode->i_sem);
+ down_write(&dentry->d_inode->i_alloc_sem);
+- err = notify_change(dentry, &newattrs);
++ if (called_from_open)
++ newattrs.ia_valid |= ATTR_FROM_OPEN;
++ if (op->setattr_raw) {
++ newattrs.ia_valid |= ATTR_RAW;
++ newattrs.ia_ctime = CURRENT_TIME;
++ err = op->setattr_raw(dentry->d_inode, &newattrs);
++ } else
++ err = notify_change(dentry, &newattrs);
+ up_write(&dentry->d_inode->i_alloc_sem);
+ up(&dentry->d_inode->i_sem);
+ return err;
+@@ -249,7 +257,7 @@
+ error = locks_verify_truncate(inode, NULL, length);
+ if (!error) {
+ DQUOT_INIT(inode);
+- error = do_truncate(nd.dentry, length);
++ error = do_truncate(nd.dentry, length, 0);
+ }
+ put_write_access(inode);
+
+@@ -301,7 +309,7 @@
+
+ error = locks_verify_truncate(inode, file, length);
+ if (!error)
+- error = do_truncate(dentry, length);
++ error = do_truncate(dentry, length, 0);
+ out_putf:
+ fput(file);
+ out:
+@@ -380,9 +388,19 @@
+ (error = permission(inode,MAY_WRITE,&nd)) != 0)
+ goto dput_and_out;
+ }
+- down(&inode->i_sem);
+- error = notify_change(nd.dentry, &newattrs);
+- up(&inode->i_sem);
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ } else {
++ down(&inode->i_sem);
++ error = notify_change(nd.dentry, &newattrs);
++ up(&inode->i_sem);
++ }
+ dput_and_out:
+ path_release(&nd);
+ out:
+@@ -433,9 +451,19 @@
+ (error = permission(inode,MAY_WRITE,&nd)) != 0)
+ goto dput_and_out;
+ }
+- down(&inode->i_sem);
+- error = notify_change(nd.dentry, &newattrs);
+- up(&inode->i_sem);
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ } else {
++ down(&inode->i_sem);
++ error = notify_change(nd.dentry, &newattrs);
++ up(&inode->i_sem);
++ }
+ dput_and_out:
+ path_release(&nd);
+ out:
+@@ -636,6 +664,18 @@
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
+
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_mode = mode;
++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto dput_and_out;
+@@ -669,6 +709,18 @@
+ if (IS_RDONLY(inode))
+ goto out;
+ error = -EPERM;
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = dentry->d_inode->i_op;
++
++ newattrs.ia_uid = user;
++ newattrs.ia_gid = group;
++ newattrs.ia_valid = ATTR_UID | ATTR_GID;
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ return error;
++ }
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto out;
+ newattrs.ia_valid = ATTR_CTIME;
+@@ -682,6 +734,7 @@
+ }
+ if (!S_ISDIR(inode->i_mode))
+ newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
++
+ down(&inode->i_sem);
+ error = notify_change(dentry, &newattrs);
+ up(&inode->i_sem);
+Index: linux-2.6.3-mm4/fs/exec.c
+===================================================================
+--- linux-2.6.3-mm4.orig/fs/exec.c 2004-03-08 14:46:20.758251584 +0800
++++ linux-2.6.3-mm4/fs/exec.c 2004-03-08 14:51:27.454626648 +0800
+@@ -1408,7 +1408,7 @@
+ goto close_fail;
+ if (!file->f_op->write)
+ goto close_fail;
+- if (do_truncate(file->f_dentry, 0) != 0)
++ if (do_truncate(file->f_dentry, 0, 0) != 0)
+ goto close_fail;
+
+ retval = binfmt->core_dump(signr, regs, file);
+Index: linux-2.6.3-mm4/include/linux/fs.h
+===================================================================
+--- linux-2.6.3-mm4.orig/include/linux/fs.h 2004-03-08 14:46:21.391155368 +0800
++++ linux-2.6.3-mm4/include/linux/fs.h 2004-03-08 14:56:25.775275016 +0800
+@@ -843,13 +843,20 @@
+ int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
+ struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
+ int (*link) (struct dentry *,struct inode *,struct dentry *);
++ int (*link_raw) (struct nameidata *,struct nameidata *);
+ int (*unlink) (struct inode *,struct dentry *);
++ int (*unlink_raw) (struct nameidata *);
+ int (*symlink) (struct inode *,struct dentry *,const char *);
++ int (*symlink_raw) (struct nameidata *,const char *);
+ int (*mkdir) (struct inode *,struct dentry *,int);
++ int (*mkdir_raw) (struct nameidata *,int);
+ int (*rmdir) (struct inode *,struct dentry *);
++ int (*rmdir_raw) (struct nameidata *);
+ int (*mknod) (struct inode *,struct dentry *,int,dev_t);
++ int (*mknod_raw) (struct nameidata *,int,dev_t);
+ int (*rename) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
++ int (*rename_raw) (struct nameidata *, struct nameidata *);
+ int (*readlink) (struct dentry *, char __user *,int);
+ int (*follow_link) (struct dentry *, struct nameidata *);
+ void (*truncate) (struct inode *);
+@@ -1133,7 +1140,7 @@
+
+ /* fs/open.c */
+
+-extern int do_truncate(struct dentry *, loff_t start);
++extern int do_truncate(struct dentry *, loff_t start, int called_from_open);
+ extern struct file *filp_open(const char *, int, int);
+ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
+ extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *);
+Index: linux-2.6.3-mm4/net/unix/af_unix.c
+===================================================================
+--- linux-2.6.3-mm4.orig/net/unix/af_unix.c 2004-02-26 14:22:03.000000000 +0800
++++ linux-2.6.3-mm4/net/unix/af_unix.c 2004-03-08 14:51:27.591605824 +0800
+@@ -592,6 +592,7 @@
+ int err = 0;
+
+ if (sunname->sun_path[0]) {
++ intent_init(&nd.intent, IT_LOOKUP);
+ err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+ if (err)
+ goto fail;
--- /dev/null
+ 0 files changed
+
+.old..........pc/vfs_nointent_2.6.0-suse/fs/namei.c
+.new.........fs/namei.c
+Index: linux-2.6.3-20/fs/namei.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/namei.c 2004-03-08 14:32:24.000000000 -0800
++++ linux-2.6.3-20/fs/namei.c 2004-03-08 14:40:01.000000000 -0800
+@@ -1276,7 +1276,7 @@
+ if (!error) {
+ DQUOT_INIT(inode);
+
+- error = do_truncate(dentry, 0);
++ error = do_truncate(dentry, 0, 1);
+ }
+ put_write_access(inode);
+ if (error)
+@@ -1526,6 +1526,7 @@
+ char * tmp;
+ struct dentry * dentry;
+ struct nameidata nd;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ if (S_ISDIR(mode))
+ return -EPERM;
+@@ -1536,6 +1537,15 @@
+ error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
++
++ if (nd.dentry->d_inode->i_op->mknod_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mknod_raw(&nd, mode, dev);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
++
+ dentry = lookup_create(&nd, 0);
+ error = PTR_ERR(dentry);
+
+@@ -1562,6 +1572,7 @@
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1603,10 +1614,18 @@
+ if (!IS_ERR(tmp)) {
+ struct dentry *dentry;
+ struct nameidata nd;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ error = path_lookup(tmp, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
++ if (nd.dentry->d_inode->i_op->mkdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->mkdir_raw(&nd, mode);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
+ dentry = lookup_create(&nd, 1);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+@@ -1616,6 +1635,7 @@
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(tmp);
+@@ -1696,6 +1716,7 @@
+ char * name;
+ struct dentry *dentry;
+ struct nameidata nd;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ name = getname(pathname);
+ if(IS_ERR(name))
+@@ -1716,6 +1737,16 @@
+ error = -EBUSY;
+ goto exit1;
+ }
++
++ if (nd.dentry->d_inode->i_op->rmdir_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ error = op->rmdir_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
++
+ down(&nd.dentry->d_inode->i_sem);
+ dentry = lookup_hash(&nd.last, nd.dentry);
+ error = PTR_ERR(dentry);
+@@ -1774,6 +1805,7 @@
+ struct dentry *dentry;
+ struct nameidata nd;
+ struct inode *inode = NULL;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ name = getname(pathname);
+ if(IS_ERR(name))
+@@ -1785,6 +1817,13 @@
+ error = -EISDIR;
+ if (nd.last_type != LAST_NORM)
+ goto exit1;
++ if (nd.dentry->d_inode->i_op->unlink_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->unlink_raw(&nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit1;
++ }
+ down(&nd.dentry->d_inode->i_sem);
+ dentry = lookup_hash(&nd.last, nd.dentry);
+ error = PTR_ERR(dentry);
+@@ -1852,10 +1891,18 @@
+ if (!IS_ERR(to)) {
+ struct dentry *dentry;
+ struct nameidata nd;
++ intent_init(&nd.intent, IT_LOOKUP);
+
+ error = path_lookup(to, LOOKUP_PARENT, &nd);
+ if (error)
+ goto out;
++ if (nd.dentry->d_inode->i_op->symlink_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->symlink_raw(&nd, from);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out2;
++ }
+ dentry = lookup_create(&nd, 0);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+@@ -1863,6 +1910,7 @@
+ dput(dentry);
+ }
+ up(&nd.dentry->d_inode->i_sem);
++out2:
+ path_release(&nd);
+ out:
+ putname(to);
+@@ -1926,6 +1974,8 @@
+ struct nameidata nd, old_nd;
+ int error;
+ char * to;
++ intent_init(&nd.intent, IT_LOOKUP);
++ intent_init(&old_nd.intent, IT_LOOKUP);
+
+ to = getname(newname);
+ if (IS_ERR(to))
+@@ -1940,6 +1990,13 @@
+ error = -EXDEV;
+ if (old_nd.mnt != nd.mnt)
+ goto out_release;
++ if (nd.dentry->d_inode->i_op->link_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++ error = op->link_raw(&old_nd, &nd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto out_release;
++ }
+ new_dentry = lookup_create(&nd, 0);
+ error = PTR_ERR(new_dentry);
+ if (!IS_ERR(new_dentry)) {
+@@ -1990,7 +2047,7 @@
+ * locking].
+ */
+ int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
++ struct inode *new_dir, struct dentry *new_dentry)
+ {
+ int error = 0;
+ struct inode *target;
+@@ -2035,7 +2092,7 @@
+ }
+
+ int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
+- struct inode *new_dir, struct dentry *new_dentry)
++ struct inode *new_dir, struct dentry *new_dentry)
+ {
+ struct inode *target;
+ int error;
+@@ -2112,6 +2169,8 @@
+ struct dentry * old_dentry, *new_dentry;
+ struct dentry * trap;
+ struct nameidata oldnd, newnd;
++ intent_init(&oldnd.intent, IT_LOOKUP);
++ intent_init(&newnd.intent, IT_LOOKUP);
+
+ error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
+ if (error)
+@@ -2134,6 +2193,13 @@
+ if (newnd.last_type != LAST_NORM)
+ goto exit2;
+
++ if (old_dir->d_inode->i_op->rename_raw) {
++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto exit2;
++ }
++
+ trap = lock_rename(new_dir, old_dir);
+
+ old_dentry = lookup_hash(&oldnd.last, old_dir);
+@@ -2165,8 +2231,7 @@
+ if (new_dentry == trap)
+ goto exit5;
+
+- error = vfs_rename(old_dir->d_inode, old_dentry,
+- new_dir->d_inode, new_dentry);
++ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry);
+ exit5:
+ dput(new_dentry);
+ exit4:
+.old..........pc/vfs_nointent_2.6.0-suse/fs/open.c
+.new.........fs/open.c
+Index: linux-2.6.3-20/fs/open.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/open.c 2004-03-08 14:23:40.000000000 -0800
++++ linux-2.6.3-20/fs/open.c 2004-03-08 14:40:01.000000000 -0800
+@@ -180,9 +180,10 @@
+ return error;
+ }
+
+-int do_truncate(struct dentry *dentry, loff_t length)
++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open)
+ {
+ int err;
++ struct inode_operations *op = dentry->d_inode->i_op;
+ struct iattr newattrs;
+
+ /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
+@@ -192,7 +193,14 @@
+ newattrs.ia_size = length;
+ newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME;
+ down(&dentry->d_inode->i_sem);
+- err = notify_change(dentry, &newattrs);
++ if (called_from_open)
++ newattrs.ia_valid |= ATTR_FROM_OPEN;
++ if (op->setattr_raw) {
++ newattrs.ia_valid |= ATTR_RAW;
++ newattrs.ia_ctime = CURRENT_TIME;
++ err = op->setattr_raw(dentry->d_inode, &newattrs);
++ } else
++ err = notify_change(dentry, &newattrs);
+ up(&dentry->d_inode->i_sem);
+ return err;
+ }
+@@ -247,7 +255,7 @@
+ error = locks_verify_truncate(inode, NULL, length);
+ if (!error) {
+ DQUOT_INIT(inode);
+- error = do_truncate(nd.dentry, length);
++ error = do_truncate(nd.dentry, length, 0);
+ }
+ put_write_access(inode);
+
+@@ -299,7 +307,7 @@
+
+ error = locks_verify_truncate(inode, file, length);
+ if (!error)
+- error = do_truncate(dentry, length);
++ error = do_truncate(dentry, length, 0);
+ out_putf:
+ fput(file);
+ out:
+@@ -378,9 +386,19 @@
+ (error = permission(inode,MAY_WRITE,&nd)) != 0)
+ goto dput_and_out;
+ }
+- down(&inode->i_sem);
+- error = notify_change(nd.dentry, &newattrs);
+- up(&inode->i_sem);
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ } else {
++ down(&inode->i_sem);
++ error = notify_change(nd.dentry, &newattrs);
++ up(&inode->i_sem);
++ }
+ dput_and_out:
+ path_release(&nd);
+ out:
+@@ -431,9 +449,19 @@
+ (error = permission(inode,MAY_WRITE,&nd)) != 0)
+ goto dput_and_out;
+ }
+- down(&inode->i_sem);
+- error = notify_change(nd.dentry, &newattrs);
+- up(&inode->i_sem);
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ } else {
++ down(&inode->i_sem);
++ error = notify_change(nd.dentry, &newattrs);
++ up(&inode->i_sem);
++ }
+ dput_and_out:
+ path_release(&nd);
+ out:
+@@ -634,6 +662,18 @@
+ if (IS_RDONLY(inode))
+ goto dput_and_out;
+
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = nd.dentry->d_inode->i_op;
++
++ newattrs.ia_mode = mode;
++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ goto dput_and_out;
++ }
++
+ error = -EPERM;
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto dput_and_out;
+@@ -667,6 +707,18 @@
+ if (IS_RDONLY(inode))
+ goto out;
+ error = -EPERM;
++ if (inode->i_op->setattr_raw) {
++ struct inode_operations *op = dentry->d_inode->i_op;
++
++ newattrs.ia_uid = user;
++ newattrs.ia_gid = group;
++ newattrs.ia_valid = ATTR_UID | ATTR_GID;
++ newattrs.ia_valid |= ATTR_RAW;
++ error = op->setattr_raw(inode, &newattrs);
++ /* the file system wants to use normal vfs path now */
++ if (error != -EOPNOTSUPP)
++ return error;
++ }
+ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
+ goto out;
+ newattrs.ia_valid = ATTR_CTIME;
+@@ -680,6 +732,7 @@
+ }
+ if (!S_ISDIR(inode->i_mode))
+ newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID;
++
+ down(&inode->i_sem);
+ error = notify_change(dentry, &newattrs);
+ up(&inode->i_sem);
+.old..........pc/vfs_nointent_2.6.0-suse/fs/exec.c
+.new.........fs/exec.c
+Index: linux-2.6.3-20/fs/exec.c
+===================================================================
+--- linux-2.6.3-20.orig/fs/exec.c 2004-03-08 14:23:40.000000000 -0800
++++ linux-2.6.3-20/fs/exec.c 2004-03-08 14:40:01.000000000 -0800
+@@ -1406,7 +1406,7 @@
+ goto close_fail;
+ if (!file->f_op->write)
+ goto close_fail;
+- if (do_truncate(file->f_dentry, 0) != 0)
++ if (do_truncate(file->f_dentry, 0, 0) != 0)
+ goto close_fail;
+
+ retval = binfmt->core_dump(signr, regs, file);
+.old..........pc/vfs_nointent_2.6.0-suse/include/linux/fs.h
+.new.........include/linux/fs.h
+Index: linux-2.6.3-20/include/linux/fs.h
+===================================================================
+--- linux-2.6.3-20.orig/include/linux/fs.h 2004-03-08 14:23:41.000000000 -0800
++++ linux-2.6.3-20/include/linux/fs.h 2004-03-08 14:42:10.000000000 -0800
+@@ -838,13 +838,20 @@
+ int (*create) (struct inode *,struct dentry *,int, struct nameidata *);
+ struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *);
+ int (*link) (struct dentry *,struct inode *,struct dentry *);
++ int (*link_raw) (struct nameidata *,struct nameidata *);
+ int (*unlink) (struct inode *,struct dentry *);
++ int (*unlink_raw) (struct nameidata *);
+ int (*symlink) (struct inode *,struct dentry *,const char *);
++ int (*symlink_raw) (struct nameidata *,const char *);
+ int (*mkdir) (struct inode *,struct dentry *,int);
++ int (*mkdir_raw) (struct nameidata *,int);
+ int (*rmdir) (struct inode *,struct dentry *);
++ int (*rmdir_raw) (struct nameidata *);
+ int (*mknod) (struct inode *,struct dentry *,int,dev_t);
++ int (*mknod_raw) (struct nameidata *,int,dev_t);
+ int (*rename) (struct inode *, struct dentry *,
+ struct inode *, struct dentry *);
++ int (*rename_raw) (struct nameidata *, struct nameidata *);
+ int (*readlink) (struct dentry *, char __user *,int);
+ int (*follow_link) (struct dentry *, struct nameidata *);
+ void (*truncate) (struct inode *);
+@@ -1130,7 +1137,7 @@
+
+ /* fs/open.c */
+
+-extern int do_truncate(struct dentry *, loff_t start);
++extern int do_truncate(struct dentry *, loff_t start, int called_from_open);
+ extern struct file *filp_open(const char *, int, int);
+ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int);
+ extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *);
+.old..........pc/vfs_nointent_2.6.0-suse/net/unix/af_unix.c
+.new.........net/unix/af_unix.c
+Index: linux-2.6.3-20/net/unix/af_unix.c
+===================================================================
+--- linux-2.6.3-20.orig/net/unix/af_unix.c 2004-03-05 02:07:04.000000000 -0800
++++ linux-2.6.3-20/net/unix/af_unix.c 2004-03-08 14:40:01.000000000 -0800
+@@ -592,6 +592,7 @@
+ int err = 0;
+
+ if (sunname->sun_path[0]) {
++ intent_init(&nd.intent, IT_LOOKUP);
+ err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd);
+ if (err)
+ goto fail;
--- /dev/null
+lustre_version.patch
+vfs_intent-2.6.3.patch
+vfs_nointent-2.6.3-mm4.patch
+vfs_races_2.5.72_rev1.patch
+ext3-wantedi-2.6.3.patch
+ext3-san-jdike-2.5.73.patch
+iopen-2.6.3-mm4.patch
+export-truncate-2.5.63.patch
+export_symbols-2.6.0.patch
+ext3-map_inode_page-2.6.0.patch
+removepage-2.6.0.patch
+dev_read_only_2.6.0.patch
+kernel_text_address-2.6.3.patch
+ext3-init-generation-2.6.0.patch
+ext3-ea-in-inode-2.6.0.patch
+fs-intent-2.6.3-mm4.patch
+export-2.6.3.patch
--- /dev/null
+lustre_version.patch
+vfs_intent-2.6.3-suse.patch
+vfs_nointent_2.6.3-suse.patch
+vfs_races_2.5.72_rev1.patch
+ext3-wantedi-2.6.3.patch
+ext3-san-jdike-2.5.73.patch
+nfs-cifs-intent-2.6.3-suse.patch
+iopen-2.6.0-test6.patch
+export-truncate-2.5.63.patch
+export_symbols-2.6.0.patch
+ext3-map_inode_page-2.6.0.patch
+removepage-2.6.0.patch
+dev_read_only_2.6.0.patch
+kernel_text_address-2.6.3.patch
+ext3-init-generation-2.6.0.patch
+ext3-ea-in-inode-2.6.0.patch
+export-2.6.3.patch
--- /dev/null
+uml-patch-2.6.3-rc2-1.patch
+uml-fix-2.6.3.patch
+lustre_version.patch
+vfs_intent-2.6.3.patch
+vfs_nointent_2.6.0-uml1.patch
+vfs_races_2.5.72_rev1.patch
+ext3-wantedi-2.6.3.patch
+ext3-san-jdike-2.5.73.patch
+iopen-2.6.0-test6.patch
+export-truncate-2.5.63.patch
+export_symbols-2.6.0.patch
+ext3-map_inode_page-2.6.0.patch
+removepage-2.6.0.patch
+dev_read_only_2.6.0.patch
+kernel_text_address-2.6.3.patch
+ext3-init-generation-2.6.0.patch
+ext3-ea-in-inode-2.6.0.patch
+linux-2.6.3-CITI_NFS4_ALL.patch
+linux-2.6.3-nfs-intent.patch
+export-2.6.3.patch
modulefs_DATA = llite.o
EXTRA_PROGRAMS = llite
-llite_SOURCES = dcache.c dir.c file.c llite_close.c llite_lib.c llite_nfs.c
-llite_SOURCES += lproc_llite.c namei.c rw.c rw24.c super.c super25.c
-llite_SOURCES += symlink.c sysctl.c llite_internal.h
+COMMON_SRC = dcache.c dir.c file.c llite_close.c llite_lib.c llite_nfs.c rw.c \
+ lproc_llite.c namei.c symlink.c sysctl.c llite_internal.h
+
+if LINUX25
+llite_SOURCES = $(COMMON_SRC) rw26.c super25.c
+else
+llite_SOURCES = $(COMMON_SRC) rw24.c super.c
+endif
include $(top_srcdir)/Rules
GOTO(out_root, err);
}
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
+#warning "Please fix this"
+#else
/* bug 2805 - set VM readahead to zero */
vm_max_readahead = vm_min_readahead = 0;
+#endif
sb->s_root = d_alloc_root(root);
RETURN(err);
writepage: ll_writepage_26,
writepages: generic_writepages,
set_page_dirty: __set_page_dirty_nobuffers,
- sync_page: ll_sync_page,
+ sync_page: NULL,
prepare_write: ll_prepare_write,
commit_write: ll_commit_write,
removepage: ll_removepage,
#include <linux/lprocfs_status.h>
#include "llite_internal.h"
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
extern struct address_space_operations ll_aops;
extern struct address_space_operations ll_dir_aops;
module_init(init_lustre_lite);
module_exit(exit_lustre_lite);
-#endif
#include <linux/lprocfs_status.h>
#include "llite_internal.h"
-#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
-
struct super_block * ll_get_sb(struct file_system_type *fs_type,
int flags, const char *devname, void * data)
{
module_init(init_lustre_lite);
module_exit(exit_lustre_lite);
-#endif
# This code is issued under the GNU General Public License.
# See the file COPYING in this distribution
DEFS=
-MODULE = lvfs
-
-
-if EXTN
-FSMOD = fsfilt_extN
-else
-FSMOD = fsfilt_ext3
-endif
-
if LIBLUSTRE
noinst_LIBRARIES = liblvfs.a
#endif
else
-modulefs_DATA = lvfs.o $(FSMOD).o fsfilt_reiserfs.o
-EXTRA_PROGRAMS = lvfs $(FSMOD) fsfilt_reiserfs
-lvfs_SOURCES = lvfs_common.c lvfs_linux.c fsfilt.c lvfs_internal.h
+MODULE = lvfs
+
+if EXTN
+FSMOD = fsfilt_extN
+else
+FSMOD = fsfilt_ext3
endif
+modulefs_DATA = lvfs.o $(FSMOD).o
+
+EXTRA_PROGRAMS = lvfs $(FSMOD)
+lvfs_SOURCES = lvfs_common.c lvfs_linux.c fsfilt.c lvfs_internal.h
+if EXTN
+fsfilt_extN_SOURCES = fsfilt_extN.c lvfs_internal.h
+else
+fsfilt_ext3_SOURCES = fsfilt_ext3.c lvfs_internal.h
+endif
+endif
include $(top_srcdir)/Rules
+
+if LINUX25
+# workaround for fsfilt_ext3
+$(FSMOD).o: $(FSMOD).c
+ $(COMPILE) -UKBUILD_MODNAME -DKBUILD_MODNAME=$(FSMOD) -c -o $(FSMOD)_tmp.o $<
+ rm -f $(FSMOD)_tmp.c
+ $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(FSMOD)_tmp.o
+ $(COMPILE) -UKBUILD_MODNAME -UKBUILD_BASENAME -DKBUILD_BASENAME=$(FSMOD) \
+ -c $(FSMOD)_tmp.mod.c
+ $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r \
+ -o $(FSMOD).o $(FSMOD)_tmp.o $(FSMOD)_tmp.mod.o
+endif
#define DEBUG_SUBSYSTEM S_FILTER
+#include <linux/init.h>
+#include <linux/module.h>
#include <linux/fs.h>
#include <linux/jbd.h>
#include <linux/slab.h>
#include <linux/lustre_fsfilt.h>
#include <linux/obd.h>
#include <linux/obd_class.h>
-#include <linux/module.h>
static kmem_cache_t *fcb_cache;
static atomic_t fcb_cache_count = ATOMIC_INIT(0);
# define ASSERT_KERNEL_CTXT(msg) do {} while(0)
#endif
+#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0))
+#define current_ngroups current->group_info->ngroups
+#define current_groups current->group_info->small_block
+#else
+#define current_ngroups current->ngroups
+#define current_groups current->groups
+#endif
+
/* push / pop to root of obd store */
void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx,
struct obd_ucred *uc)
LASSERT(atomic_read(&new_ctx->pwd->d_count));
save->pwd = dget(current->fs->pwd);
save->pwdmnt = mntget(current->fs->pwdmnt);
- save->ngroups = current->ngroups;
+ save->ngroups = current_ngroups;
LASSERT(save->pwd);
LASSERT(save->pwdmnt);
save->ouc.ouc_fsuid = current->fsuid;
save->ouc.ouc_fsgid = current->fsgid;
save->ouc.ouc_cap = current->cap_effective;
- save->ouc.ouc_suppgid1 = current->groups[0];
- save->ouc.ouc_suppgid2 = current->groups[1];
+ save->ouc.ouc_suppgid1 = current_groups[0];
+ save->ouc.ouc_suppgid2 = current_groups[1];
current->fsuid = uc->ouc_fsuid;
current->fsgid = uc->ouc_fsgid;
current->cap_effective = uc->ouc_cap;
- current->ngroups = 0;
+ current_ngroups = 0;
if (uc->ouc_suppgid1 != -1)
- current->groups[current->ngroups++] = uc->ouc_suppgid1;
+ current_groups[current_ngroups++] = uc->ouc_suppgid1;
if (uc->ouc_suppgid2 != -1)
- current->groups[current->ngroups++] = uc->ouc_suppgid2;
+ current_groups[current_ngroups++] = uc->ouc_suppgid2;
}
set_fs(new_ctx->fs);
set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd);
current->fsuid = saved->ouc.ouc_fsuid;
current->fsgid = saved->ouc.ouc_fsgid;
current->cap_effective = saved->ouc.ouc_cap;
- current->ngroups = saved->ngroups;
- current->groups[0] = saved->ouc.ouc_suppgid1;
- current->groups[1] = saved->ouc.ouc_suppgid2;
+ current_ngroups = saved->ngroups;
+ current_groups[0] = saved->ouc.ouc_suppgid1;
+ current_groups[1] = saved->ouc.ouc_suppgid2;
}
/*
int rc;
ENTRY;
-
- /* Get rid of unneeded supplementary groups */
- current->ngroups = 0;
- memset(current->groups, 0, sizeof(current->groups));
+ rc = cleanup_group_info();
+ if (rc)
+ RETURN(rc);
mds->mds_vfsmnt = mnt;
mds->mds_sb = mnt->mnt_root->d_inode->i_sb;
endif
include $(top_srcdir)/Rules
+
+if LINUX25
+# workaround for llog_test
+llog_test.o: llog_test.c
+ $(COMPILE) -UKBUILD_MODNAME -DKBUILD_MODNAME=llog_test -c -o llog_test_tmp.o $<
+ rm -f llog_test_tmp.c
+ $(LINUX)/scripts/modpost $(LINUX)/vmlinux llog_test_tmp.o
+ $(COMPILE) -UKBUILD_MODNAME -UKBUILD_BASENAME -DKBUILD_BASENAME=llog_test \
+ -c llog_test_tmp.mod.c
+ $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r \
+ -o llog_test.o llog_test_tmp.o llog_test_tmp.mod.o
+endif
MODULE = obdfilter
modulefs_DATA = obdfilter.o
EXTRA_PROGRAMS = obdfilter
-obdfilter_SOURCES = filter.c filter_io.c filter_log.c filter_san.c \
- filter_io_24.c lproc_obdfilter.c filter_internal.h filter_lvb.c
+
+COMMON_SRC = filter.c filter_io.c filter_log.c filter_san.c lproc_obdfilter.c \
+ filter_internal.h filter_lvb.c
+
+if LINUX25
+obdfilter_SOURCES = $(COMMON_SRC) filter_io_26.c
+else
+obdfilter_SOURCES = $(COMMON_SRC) filter_io_24.c
+endif
include $(top_srcdir)/Rules
#include <linux/pagemap.h> // XXX kill me soon
#include <linux/version.h>
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
-
#define DEBUG_SUBSYSTEM S_FILTER
#include <linux/iobuf.h>
RETURN(rc);
}
-
-#endif
-
int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
struct obd_ioobj *obj, int niocount,
- struct niobuf_local *res, struct obd_trans_info *oti)
+ struct niobuf_local *res, struct obd_trans_info *oti,
+ int rc)
{
struct obd_device *obd = exp->exp_obd;
struct obd_run_ctxt saved;
struct fsfilt_objinfo fso;
struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, };
struct inode *inode = NULL;
- int rc = 0, i, k, cleanup_phase = 0, err;
+ int i, k, cleanup_phase = 0, err;
unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */
int blocks_per_page;
struct dio_request *dreq;
LASSERT(objcount == 1);
LASSERT(current->journal_info == NULL);
+ if (rc != 0)
+ GOTO(cleanup, rc);
+
inode = res->dentry->d_inode;
blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
LASSERT(blocks_per_page <= MAX_BLOCKS_PER_PAGE);
int rc;
ENTRY;
- /* Get rid of unneeded supplementary groups */
- current->ngroups = 0;
- memset(current->groups, 0, sizeof(current->groups));
+ rc = cleanup_group_info();
+ if (rc)
+ RETURN(rc);
rc = llog_start_commit_thread();
if (rc < 0)
if LINUX25
-basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g')
+basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g' | sed -e 's/^.*\///g')
AM_CPPFLAGS= -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename)
-$(MODULE).o: $($(MODULE)_OBJECTS)
- $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS)
+$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES)
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE)_tmp.o $($(MODULE)_OBJECTS)
+ rm -f $(MODULE)_tmp.c
+ $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(MODULE)_tmp.o
+ $(COMPILE) -UKBUILD_BASENAME -DKBUILD_BASENAME=$(MODULE) -c $(MODULE)_tmp.mod.c
+ $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $(MODULE)_tmp.o $(MODULE)_tmp.mod.o
else
KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common '
case ${linux25} in
yes )
- KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include -DKBUILD_BASENAME=$(MODULE) -DKBUILD_MODNAME=$(MODULE) '
+ KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include'
;;
* )
KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/include '
AC_MSG_CHECKING(for MODVERSIONS)
if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1;
then
- MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB"
- AC_MSG_RESULT(yes)
- else
- MFLAGS=
- AC_MSG_RESULT(no)
+ if test $linux25 != "yes"; then
+ MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB"
+ AC_MSG_RESULT(yes)
+ fi
fi
fi
/* FIXME: Find a better method of setting IRQ affinity...
*/
- call_usermodehelper (argv[0], argv, envp);
+ USERMODEHELPER(argv[0], argv, envp);
#endif
}
#define DEBUG_SUBSYSTEM S_SOCKNAL
#include <linux/kp30.h>
+#include <linux/portals_compat25.h>
#include <portals/p30.h>
#include <portals/lib-p30.h>
#include <portals/socknal.h>
#include <sys/stat.h>
#include <sys/mman.h>
-#define BUG() /* workaround for module.h includes */
#include <linux/version.h>
#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+#define BUG() /* workaround for module.h includes */
#include <linux/module.h>
#endif
return 0;
}
+static struct mod_paths {
+ char *name, *path;
+} mod_paths[] = {
+ {"portals", "lustre/portals/libcfs"},
+ {"ksocknal", "lustre/portals/knals/socknal"},
+ {"kptlrouter", "lustre/portals/router"},
+ {"lvfs", "lustre/lvfs"},
+ {"obdclass", "lustre/obdclass"},
+ {"llog_test", "lustre/obdclass"},
+ {"ptlrpc", "lustre/ptlrpc"},
+ {"obdext2", "lustre/obdext2"},
+ {"ost", "lustre/ost"},
+ {"osc", "lustre/osc"},
+ {"mds", "lustre/mds"},
+ {"mdc", "lustre/mdc"},
+ {"llite", "lustre/llite"},
+ {"obdecho", "lustre/obdecho"},
+ {"ldlm", "lustre/ldlm"},
+ {"obdfilter", "lustre/obdfilter"},
+ {"extN", "lustre/extN"},
+ {"lov", "lustre/lov"},
+ {"fsfilt_ext3", "lustre/lvfs"},
+ {"fsfilt_extN", "lustre/lvfs"},
+ {"fsfilt_reiserfs", "lustre/lvfs"},
+ {"mds_ext2", "lustre/mds"},
+ {"mds_ext3", "lustre/mds"},
+ {"mds_extN", "lustre/mds"},
+ {"ptlbd", "lustre/ptlbd"},
+ {"mgmt_svc", "lustre/mgmt"},
+ {"mgmt_cli", "lustre/mgmt"},
+ {NULL, NULL}
+};
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
int jt_dbg_modules(int argc, char **argv)
{
-#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
- struct mod_paths {
- char *name, *path;
- } *mp, mod_paths[] = {
- {"portals", "lustre/portals/libcfs"},
- {"ksocknal", "lustre/portals/knals/socknal"},
- {"kptlrouter", "lustre/portals/router"},
- {"lvfs", "lustre/lvfs"},
- {"obdclass", "lustre/obdclass"},
- {"llog_test", "lustre/obdclass"},
- {"ptlrpc", "lustre/ptlrpc"},
- {"obdext2", "lustre/obdext2"},
- {"ost", "lustre/ost"},
- {"osc", "lustre/osc"},
- {"mds", "lustre/mds"},
- {"mdc", "lustre/mdc"},
- {"llite", "lustre/llite"},
- {"obdecho", "lustre/obdecho"},
- {"ldlm", "lustre/ldlm"},
- {"obdfilter", "lustre/obdfilter"},
- {"extN", "lustre/extN"},
- {"lov", "lustre/lov"},
- {"fsfilt_ext3", "lustre/lvfs"},
- {"fsfilt_extN", "lustre/lvfs"},
- {"fsfilt_reiserfs", "lustre/lvfs"},
- {"mds_ext2", "lustre/mds"},
- {"mds_ext3", "lustre/mds"},
- {"mds_extN", "lustre/mds"},
- {"ptlbd", "lustre/ptlbd"},
- {"mgmt_svc", "lustre/mgmt"},
- {"mgmt_cli", "lustre/mgmt"},
- {NULL, NULL}
- };
+ struct mod_paths *mp;
char *path = "..";
char *kernel = "linux";
}
return 0;
+}
#else
- printf("jt_dbg_module is not yet implemented for Linux 2.5\n");
+int jt_dbg_modules(int argc, char **argv)
+{
+ struct mod_paths *mp;
+ char *path = "..";
+ char *kernel = "linux";
+ const char *proc = "/proc/modules";
+ char modname[128], others[128];
+ long modaddr;
+ int rc;
+ FILE *file;
+
+ if (argc >= 2)
+ path = argv[1];
+ if (argc == 3)
+ kernel = argv[2];
+ if (argc > 3) {
+ printf("%s [path] [kernel]\n", argv[0]);
+ return 0;
+ }
+
+ file = fopen(proc, "r");
+ if (!file) {
+ printf("failed open %s: %s\n", proc, strerror(errno));
+ return 0;
+ }
+
+ while ((rc = fscanf(file, "%s %s %s %s %s %lx\n",
+ modname, others, others, others, others, &modaddr)) == 6) {
+ for (mp = mod_paths; mp->name != NULL; mp++) {
+ if (!strcmp(mp->name, modname))
+ break;
+ }
+ if (mp->name) {
+ printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path,
+ mp->path, mp->name, modaddr);
+ }
+ }
+
return 0;
-#endif /* linux 2.5 */
}
+#endif /* linux 2.5 */
int jt_dbg_panic(int argc, char **argv)
{
#!/bin/sh -e
CONFLICTS=cvs-merge-conflicts
-CVS=cvs
+CVS="cvs -z3"
if [ -f .mergeinfo ] ; then
echo ".mergeinfo exists - clean up first"