From 4721137e38a657ab5fdccb2b75c7a7d0e3957a4d Mon Sep 17 00:00:00 2001 From: nic Date: Wed, 17 Mar 2004 01:04:43 +0000 Subject: [PATCH] land b_v26 (20040316_1603) --- lnet/Rules.linux | 10 +- lnet/archdep.m4 | 11 +- lnet/klnds/socklnd/socklnd.c | 2 +- lnet/klnds/socklnd/socklnd.h | 1 + lnet/utils/debug.c | 112 +- lustre/Rules | 19 +- lustre/include/linux/lustre_compat25.h | 23 + lustre/kernel_patches/patches/export-2.6.3.patch | 57 + .../patches/fs-intent-2.6.3-mm4.patch | 116 + .../kernel_patches/patches/iopen-2.6.3-mm4.patch | 422 + .../patches/linux-2.6.3-CITI_NFS4_ALL.patch | 14513 ++++++++++++++ .../patches/linux-2.6.3-nfs-intent.patch | 76 + .../patches/nfs-cifs-intent-2.6.3-suse.patch | 111 + lustre/kernel_patches/patches/uml-fix-2.6.3.patch | 13 + .../patches/uml-patch-2.6.3-rc2-1.patch | 18844 +++++++++++++++++++ .../patches/vfs_intent-2.6.3-suse.patch | 777 + .../patches/vfs_nointent-2.6.3-mm4.patch | 423 + .../patches/vfs_nointent_2.6.3-suse.patch | 433 + lustre/kernel_patches/series/2.6.3-mm4 | 17 + lustre/kernel_patches/series/suse-2.6.3 | 17 + lustre/kernel_patches/series/vanilla-2.6.3-nfs4 | 20 + lustre/llite/Makefile.am | 11 +- lustre/llite/llite_lib.c | 4 + lustre/llite/rw26.c | 2 +- lustre/llite/super.c | 2 - lustre/llite/super25.c | 3 - lustre/lvfs/Makefile.am | 40 +- lustre/lvfs/fsfilt_ext3.c | 3 +- lustre/lvfs/lvfs_linux.c | 26 +- lustre/mds/mds_fs.c | 7 +- lustre/obdclass/Makefile.am | 12 + lustre/obdfilter/Makefile.am | 11 +- lustre/obdfilter/filter_io_24.c | 5 - lustre/obdfilter/filter_io_26.c | 8 +- lustre/ost/ost_handler.c | 6 +- lustre/portals/Rules.linux | 10 +- lustre/portals/archdep.m4 | 11 +- lustre/portals/knals/socknal/socknal.c | 2 +- lustre/portals/knals/socknal/socknal.h | 1 + lustre/portals/utils/debug.c | 112 +- lustre/scripts/merge1.sh | 2 +- 41 files changed, 36151 insertions(+), 144 deletions(-) create mode 100644 lustre/kernel_patches/patches/export-2.6.3.patch create mode 100644 lustre/kernel_patches/patches/fs-intent-2.6.3-mm4.patch create mode 100644 lustre/kernel_patches/patches/iopen-2.6.3-mm4.patch create mode 100644 lustre/kernel_patches/patches/linux-2.6.3-CITI_NFS4_ALL.patch create mode 100644 lustre/kernel_patches/patches/linux-2.6.3-nfs-intent.patch create mode 100644 lustre/kernel_patches/patches/nfs-cifs-intent-2.6.3-suse.patch create mode 100644 lustre/kernel_patches/patches/uml-fix-2.6.3.patch create mode 100644 lustre/kernel_patches/patches/uml-patch-2.6.3-rc2-1.patch create mode 100644 lustre/kernel_patches/patches/vfs_intent-2.6.3-suse.patch create mode 100644 lustre/kernel_patches/patches/vfs_nointent-2.6.3-mm4.patch create mode 100644 lustre/kernel_patches/patches/vfs_nointent_2.6.3-suse.patch create mode 100644 lustre/kernel_patches/series/2.6.3-mm4 create mode 100644 lustre/kernel_patches/series/suse-2.6.3 create mode 100644 lustre/kernel_patches/series/vanilla-2.6.3-nfs4 diff --git a/lnet/Rules.linux b/lnet/Rules.linux index 93943b7..232a248 100644 --- a/lnet/Rules.linux +++ b/lnet/Rules.linux @@ -3,11 +3,15 @@ if LINUX25 -basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g') +basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g' | sed -e 's/^.*\///g') AM_CPPFLAGS= -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename) -$(MODULE).o: $($(MODULE)_OBJECTS) - $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS) +$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES) + $(LD) -m $(MOD_LINK) -r -o $(MODULE)_tmp.o $($(MODULE)_OBJECTS) + rm -f $(MODULE)_tmp.c + $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(MODULE)_tmp.o + $(COMPILE) -UKBUILD_BASENAME -DKBUILD_BASENAME=$(MODULE) -c $(MODULE)_tmp.mod.c + $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $(MODULE)_tmp.o $(MODULE)_tmp.mod.o else diff --git a/lnet/archdep.m4 b/lnet/archdep.m4 index 7801957..65cfaff 100644 --- a/lnet/archdep.m4 +++ b/lnet/archdep.m4 @@ -65,7 +65,7 @@ case ${host_cpu} in KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common ' case ${linux25} in yes ) - KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include -DKBUILD_BASENAME=$(MODULE) -DKBUILD_MODNAME=$(MODULE) ' + KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include' ;; * ) KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/include ' @@ -206,11 +206,10 @@ if test $host_cpu != "lib" ; then AC_MSG_CHECKING(for MODVERSIONS) if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1; then - MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB" - AC_MSG_RESULT(yes) - else - MFLAGS= - AC_MSG_RESULT(no) + if test $linux25 != "yes"; then + MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB" + AC_MSG_RESULT(yes) + fi fi fi diff --git a/lnet/klnds/socklnd/socklnd.c b/lnet/klnds/socklnd/socklnd.c index 2c44b43..0dd5d11 100644 --- a/lnet/klnds/socklnd/socklnd.c +++ b/lnet/klnds/socklnd/socklnd.c @@ -196,7 +196,7 @@ ksocknal_bind_irq (unsigned int irq) /* FIXME: Find a better method of setting IRQ affinity... */ - call_usermodehelper (argv[0], argv, envp); + USERMODEHELPER(argv[0], argv, envp); #endif } diff --git a/lnet/klnds/socklnd/socklnd.h b/lnet/klnds/socklnd/socklnd.h index db8c842..17a7e49 100644 --- a/lnet/klnds/socklnd/socklnd.h +++ b/lnet/klnds/socklnd/socklnd.h @@ -60,6 +60,7 @@ #define DEBUG_SUBSYSTEM S_SOCKNAL #include +#include #include #include #include diff --git a/lnet/utils/debug.c b/lnet/utils/debug.c index 01e690f..69880ea 100644 --- a/lnet/utils/debug.c +++ b/lnet/utils/debug.c @@ -43,10 +43,10 @@ #include #include -#define BUG() /* workaround for module.h includes */ #include #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define BUG() /* workaround for module.h includes */ #include #endif @@ -524,41 +524,43 @@ int jt_dbg_mark_debug_buf(int argc, char **argv) return 0; } +static struct mod_paths { + char *name, *path; +} mod_paths[] = { + {"portals", "lustre/portals/libcfs"}, + {"ksocknal", "lustre/portals/knals/socknal"}, + {"kptlrouter", "lustre/portals/router"}, + {"lvfs", "lustre/lvfs"}, + {"obdclass", "lustre/obdclass"}, + {"llog_test", "lustre/obdclass"}, + {"ptlrpc", "lustre/ptlrpc"}, + {"obdext2", "lustre/obdext2"}, + {"ost", "lustre/ost"}, + {"osc", "lustre/osc"}, + {"mds", "lustre/mds"}, + {"mdc", "lustre/mdc"}, + {"llite", "lustre/llite"}, + {"obdecho", "lustre/obdecho"}, + {"ldlm", "lustre/ldlm"}, + {"obdfilter", "lustre/obdfilter"}, + {"extN", "lustre/extN"}, + {"lov", "lustre/lov"}, + {"fsfilt_ext3", "lustre/lvfs"}, + {"fsfilt_extN", "lustre/lvfs"}, + {"fsfilt_reiserfs", "lustre/lvfs"}, + {"mds_ext2", "lustre/mds"}, + {"mds_ext3", "lustre/mds"}, + {"mds_extN", "lustre/mds"}, + {"ptlbd", "lustre/ptlbd"}, + {"mgmt_svc", "lustre/mgmt"}, + {"mgmt_cli", "lustre/mgmt"}, + {NULL, NULL} +}; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) int jt_dbg_modules(int argc, char **argv) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - struct mod_paths { - char *name, *path; - } *mp, mod_paths[] = { - {"portals", "lustre/portals/libcfs"}, - {"ksocknal", "lustre/portals/knals/socknal"}, - {"kptlrouter", "lustre/portals/router"}, - {"lvfs", "lustre/lvfs"}, - {"obdclass", "lustre/obdclass"}, - {"llog_test", "lustre/obdclass"}, - {"ptlrpc", "lustre/ptlrpc"}, - {"obdext2", "lustre/obdext2"}, - {"ost", "lustre/ost"}, - {"osc", "lustre/osc"}, - {"mds", "lustre/mds"}, - {"mdc", "lustre/mdc"}, - {"llite", "lustre/llite"}, - {"obdecho", "lustre/obdecho"}, - {"ldlm", "lustre/ldlm"}, - {"obdfilter", "lustre/obdfilter"}, - {"extN", "lustre/extN"}, - {"lov", "lustre/lov"}, - {"fsfilt_ext3", "lustre/lvfs"}, - {"fsfilt_extN", "lustre/lvfs"}, - {"fsfilt_reiserfs", "lustre/lvfs"}, - {"mds_ext2", "lustre/mds"}, - {"mds_ext3", "lustre/mds"}, - {"mds_extN", "lustre/mds"}, - {"ptlbd", "lustre/ptlbd"}, - {"mgmt_svc", "lustre/mgmt"}, - {"mgmt_cli", "lustre/mgmt"}, - {NULL, NULL} - }; + struct mod_paths *mp; char *path = ".."; char *kernel = "linux"; @@ -592,11 +594,49 @@ int jt_dbg_modules(int argc, char **argv) } return 0; +} #else - printf("jt_dbg_module is not yet implemented for Linux 2.5\n"); +int jt_dbg_modules(int argc, char **argv) +{ + struct mod_paths *mp; + char *path = ".."; + char *kernel = "linux"; + const char *proc = "/proc/modules"; + char modname[128], others[128]; + long modaddr; + int rc; + FILE *file; + + if (argc >= 2) + path = argv[1]; + if (argc == 3) + kernel = argv[2]; + if (argc > 3) { + printf("%s [path] [kernel]\n", argv[0]); + return 0; + } + + file = fopen(proc, "r"); + if (!file) { + printf("failed open %s: %s\n", proc, strerror(errno)); + return 0; + } + + while ((rc = fscanf(file, "%s %s %s %s %s %lx\n", + modname, others, others, others, others, &modaddr)) == 6) { + for (mp = mod_paths; mp->name != NULL; mp++) { + if (!strcmp(mp->name, modname)) + break; + } + if (mp->name) { + printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path, + mp->path, mp->name, modaddr); + } + } + return 0; -#endif /* linux 2.5 */ } +#endif /* linux 2.5 */ int jt_dbg_panic(int argc, char **argv) { diff --git a/lustre/Rules b/lustre/Rules index b28540a..8846e3b 100644 --- a/lustre/Rules +++ b/lustre/Rules @@ -12,18 +12,29 @@ if LINUX25 -# We still need to link each module with vermagic.o to get rid of "kernel taited" warnings. -basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g') +# FIXME +# need to be rewritten: +# - bad hacking in lvfs/Makefile.am obdclass/Makefile.am +# - .o -> .ko +# +basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g' | sed -e 's/^.*\///g') AM_CPPFLAGS=-I$(top_builddir)/include -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename) +$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES) + $(LD) -m $(MOD_LINK) -r -o $(MODULE)_tmp.o $($(MODULE)_OBJECTS) + rm -f $(MODULE)_tmp.c + $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(MODULE)_tmp.o + $(COMPILE) -UKBUILD_BASENAME -DKBUILD_BASENAME=$(MODULE) -c $(MODULE)_tmp.mod.c + $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $(MODULE)_tmp.o $(MODULE)_tmp.mod.o + else AM_CPPFLAGS=-I$(top_builddir)/include +$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES) + $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -o $(MODULE).o $($(MODULE)_OBJECTS) endif -$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES) - $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r -o $(MODULE).o $($(MODULE)_OBJECTS) tags: rm -f $(top_srcdir)/TAGS diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h index df59db4..4a4e3a0 100644 --- a/lustre/include/linux/lustre_compat25.h +++ b/lustre/include/linux/lustre_compat25.h @@ -85,6 +85,21 @@ static inline void lustre_daemonize_helper(void) current->tty = NULL; } +static inline int cleanup_group_info(void) +{ + struct group_info *ginfo; + + ginfo = groups_alloc(2); + if (!ginfo) + return -ENOMEM; + + ginfo->ngroups = 0; + set_current_groups(ginfo); + put_group_info(ginfo); + + return 0; +} + #define smp_num_cpus NR_CPUS #ifndef conditional_schedule @@ -160,6 +175,14 @@ static inline void lustre_daemonize_helper(void) current->tty = NULL; } +static inline int cleanup_group_info(void) +{ + /* Get rid of unneeded supplementary groups */ + current->ngroups = 0; + memset(current->groups, 0, sizeof(current->groups)); + return 0; +} + #ifndef conditional_schedule #define conditional_schedule() if (unlikely(need_resched())) schedule() #endif diff --git a/lustre/kernel_patches/patches/export-2.6.3.patch b/lustre/kernel_patches/patches/export-2.6.3.patch new file mode 100644 index 0000000..8821d86 --- /dev/null +++ b/lustre/kernel_patches/patches/export-2.6.3.patch @@ -0,0 +1,57 @@ +Index: linux-2.6.3/fs/open.c +=================================================================== +--- linux-2.6.3.orig/fs/open.c 2004-02-23 14:36:25.000000000 -0800 ++++ linux-2.6.3/fs/open.c 2004-02-23 20:09:34.000000000 -0800 +@@ -881,6 +881,7 @@ + return ERR_PTR(error); + } + ++EXPORT_SYMBOL(filp_open); + + struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) + { +Index: linux-2.6.3/fs/jbd/journal.c +=================================================================== +--- linux-2.6.3.orig/fs/jbd/journal.c 2004-01-08 22:59:10.000000000 -0800 ++++ linux-2.6.3/fs/jbd/journal.c 2004-02-23 20:09:34.000000000 -0800 +@@ -71,6 +71,7 @@ + EXPORT_SYMBOL(journal_errno); + EXPORT_SYMBOL(journal_ack_err); + EXPORT_SYMBOL(journal_clear_err); ++EXPORT_SYMBOL(log_start_commit); + EXPORT_SYMBOL(log_wait_commit); + EXPORT_SYMBOL(journal_start_commit); + EXPORT_SYMBOL(journal_wipe); +Index: linux-2.6.3/fs/ext3/super.c +=================================================================== +--- linux-2.6.3.orig/fs/ext3/super.c 2004-02-23 14:36:26.000000000 -0800 ++++ linux-2.6.3/fs/ext3/super.c 2004-02-23 20:24:30.000000000 -0800 +@@ -115,6 +115,8 @@ + handle->h_err = err; + } + ++EXPORT_SYMBOL(ext3_journal_abort_handle); ++ + static char error_buf[1024]; + + /* Deal with the reporting of failure conditions on a filesystem such as +@@ -1772,6 +1774,8 @@ + return ret; + } + ++EXPORT_SYMBOL(ext3_force_commit); ++ + /* + * Ext3 always journals updates to the superblock itself, so we don't + * have to propagate any other updates to the superblock on disk at this +@@ -2059,6 +2063,10 @@ + unsigned long *blocks, int *created, int create); + EXPORT_SYMBOL(ext3_map_inode_page); + ++EXPORT_SYMBOL(ext3_xattr_get); ++EXPORT_SYMBOL(ext3_xattr_set_handle); ++EXPORT_SYMBOL(ext3_bread); ++ + MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others"); + MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions"); + MODULE_LICENSE("GPL"); diff --git a/lustre/kernel_patches/patches/fs-intent-2.6.3-mm4.patch b/lustre/kernel_patches/patches/fs-intent-2.6.3-mm4.patch new file mode 100644 index 0000000..a5c8725 --- /dev/null +++ b/lustre/kernel_patches/patches/fs-intent-2.6.3-mm4.patch @@ -0,0 +1,116 @@ +.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c +.new.........fs/nfs/dir.c +.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c +.new.........fs/nfs/dir.c +Index: linux-2.6.3-mm4/fs/nfs/dir.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/nfs/dir.c 2004-03-08 17:05:35.000000000 +0800 ++++ linux-2.6.3-mm4/fs/nfs/dir.c 2004-03-08 17:38:58.000000000 +0800 +@@ -773,7 +773,7 @@ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ +- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) ++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; + } +@@ -794,7 +794,7 @@ + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ +- if (nd->intent.open.flags & O_EXCL) ++ if (nd->intent.it_flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ +@@ -802,7 +802,7 @@ + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + +- if (nd->intent.open.flags & O_CREAT) { ++ if (nd->intent.it_flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); +@@ -818,7 +818,7 @@ + break; + /* This turned out not to be a regular file */ + case -ELOOP: +- if (!(nd->intent.open.flags & O_NOFOLLOW)) ++ if (!(nd->intent.it_flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ +@@ -852,7 +852,7 @@ + dir = parent->d_inode; + if (!is_atomic_open(dir, nd)) + goto no_open; +- openflags = nd->intent.open.flags; ++ openflags = nd->intent.it_flags; + if (openflags & O_CREAT) { + /* If this is a negative dentry, just drop it */ + if (!inode) +Index: linux-2.6.3-mm4/fs/nfs/nfs4proc.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/nfs/nfs4proc.c 2004-03-08 17:02:24.000000000 +0800 ++++ linux-2.6.3-mm4/fs/nfs/nfs4proc.c 2004-03-08 17:37:59.000000000 +0800 +@@ -778,17 +778,17 @@ + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { +- attr.ia_mode = nd->intent.open.create_mode; ++ attr.ia_mode = nd->intent.it_create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; +- BUG_ON(nd->intent.open.flags & O_CREAT); ++ BUG_ON(nd->intent.it_flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); +- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); ++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; +Index: linux-2.6.3-mm4/fs/cifs/dir.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/cifs/dir.c 2004-02-18 11:58:34.000000000 +0800 ++++ linux-2.6.3-mm4/fs/cifs/dir.c 2004-03-08 17:37:59.000000000 +0800 +@@ -146,18 +146,18 @@ + if(nd) { + cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path)); + +- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) ++ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY) + desiredAccess = GENERIC_READ; +- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) ++ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) + desiredAccess = GENERIC_WRITE; +- else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) ++ else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) + desiredAccess = GENERIC_ALL; + +- if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) ++ if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + disposition = FILE_CREATE; +- else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) ++ else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + disposition = FILE_OVERWRITE_IF; +- else if((nd->intent.open.flags & O_CREAT) == O_CREAT) ++ else if((nd->intent.it_flags & O_CREAT) == O_CREAT) + disposition = FILE_OPEN_IF; + else { + cFYI(1,("Create flag not set in create function")); +@@ -314,7 +314,7 @@ + parent_dir_inode, direntry->d_name.name, direntry)); + + if(nd) { /* BB removeme */ +- cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags)); ++ cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags)); + } /* BB removeme BB */ + /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */ + diff --git a/lustre/kernel_patches/patches/iopen-2.6.3-mm4.patch b/lustre/kernel_patches/patches/iopen-2.6.3-mm4.patch new file mode 100644 index 0000000..79b1f17 --- /dev/null +++ b/lustre/kernel_patches/patches/iopen-2.6.3-mm4.patch @@ -0,0 +1,422 @@ + Documentation/filesystems/ext2.txt | 16 ++ + fs/ext3/inode.c | 3 + fs/ext3/iopen.c | 239 +++++++++++++++++++++++++++++++++++++ + fs/ext3/iopen.h | 15 ++ + fs/ext3/namei.c | 13 ++ + fs/ext3/super.c | 17 ++ + include/linux/ext3_fs.h | 2 + 7 files changed, 304 insertions(+), 1 deletion(-) + +Index: linux-2.6.3-mm4/Documentation/filesystems/ext2.txt +=================================================================== +--- linux-2.6.3-mm4.orig/Documentation/filesystems/ext2.txt 2004-01-09 14:59:18.000000000 +0800 ++++ linux-2.6.3-mm4/Documentation/filesystems/ext2.txt 2004-03-08 14:58:44.431196112 +0800 +@@ -35,6 +35,22 @@ + + sb=n Use alternate superblock at this location. + ++iopen Makes an invisible pseudo-directory called ++ __iopen__ available in the root directory ++ of the filesystem. Allows open-by-inode- ++ number. i.e., inode 3145 can be accessed ++ via /mntpt/__iopen__/3145 ++ ++iopen_nopriv This option makes the iopen directory be ++ world-readable. This may be safer since it ++ allows daemons to run as an unprivileged user, ++ however it significantly changes the security ++ model of a Unix filesystem, since previously ++ all files under a mode 700 directory were not ++ generally avilable even if the ++ permissions on the file itself is ++ world-readable. ++ + grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2. + + +Index: linux-2.6.3-mm4/fs/ext3/inode.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/ext3/inode.c 2004-03-08 14:57:54.969715400 +0800 ++++ linux-2.6.3-mm4/fs/ext3/inode.c 2004-03-08 14:58:44.504185016 +0800 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -2472,6 +2473,8 @@ + ei->i_acl = EXT3_ACL_NOT_CACHED; + ei->i_default_acl = EXT3_ACL_NOT_CACHED; + #endif ++ if (ext3_iopen_get_inode(inode)) ++ return; + if (ext3_get_inode_loc(inode, &iloc, 0)) + goto bad_inode; + bh = iloc.bh; +Index: linux-2.6.3-mm4/fs/ext3/iopen.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/ext3/iopen.c 2004-03-08 14:58:44.413198848 +0800 ++++ linux-2.6.3-mm4/fs/ext3/iopen.c 2004-03-08 14:58:44.576174072 +0800 +@@ -0,0 +1,223 @@ ++ ++ ++/* ++ * linux/fs/ext3/iopen.c ++ * ++ * Special support for open by inode number ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include "iopen.h" ++ ++#ifndef assert ++#define assert(test) J_ASSERT(test) ++#endif ++ ++#define IOPEN_NAME_LEN 32 ++ ++/* ++ * This implements looking up an inode by number. ++ */ ++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) ++{ ++ struct inode * inode; ++ unsigned long ino; ++ struct list_head *lp; ++ struct dentry *alternate; ++ char buf[IOPEN_NAME_LEN]; ++ ++ if (dentry->d_name.len >= IOPEN_NAME_LEN) ++ return ERR_PTR(-ENAMETOOLONG); ++ ++ memcpy(buf, dentry->d_name.name, dentry->d_name.len); ++ buf[dentry->d_name.len] = 0; ++ ++ if (strcmp(buf, ".") == 0) ++ ino = dir->i_ino; ++ else if (strcmp(buf, "..") == 0) ++ ino = EXT3_ROOT_INO; ++ else ++ ino = simple_strtoul(buf, 0, 0); ++ ++ if ((ino != EXT3_ROOT_INO && ++ //ino != EXT3_ACL_IDX_INO && ++ //ino != EXT3_ACL_DATA_INO && ++ ino < EXT3_FIRST_INO(dir->i_sb)) || ++ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count)) ++ return ERR_PTR(-ENOENT); ++ ++ inode = iget(dir->i_sb, ino); ++ if (!inode) ++ return ERR_PTR(-EACCES); ++ if (is_bad_inode(inode)) { ++ iput(inode); ++ return ERR_PTR(-ENOENT); ++ } ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ list_for_each(lp, &inode->i_dentry) { ++ alternate = list_entry(lp, struct dentry, d_alias); ++ assert(!(alternate->d_flags & DCACHE_DISCONNECTED)); ++ } ++ ++ if (!list_empty(&inode->i_dentry)) { ++ alternate = list_entry(inode->i_dentry.next, ++ struct dentry, d_alias); ++ dget_locked(alternate); ++ alternate->d_vfs_flags |= DCACHE_REFERENCED; ++ iput(inode); ++ spin_unlock(&dcache_lock); ++ return alternate; ++ } ++ dentry->d_flags |= DCACHE_DISCONNECTED; ++ spin_unlock(&dcache_lock); ++ ++ d_add(dentry, inode); ++ return NULL; ++} ++ ++#define do_switch(x,y) do { \ ++ __typeof__ (x) __tmp = x; \ ++ x = y; y = __tmp; } while (0) ++ ++static inline void switch_names(struct dentry * dentry, struct dentry * target) ++{ ++ const unsigned char *old_name, *new_name; ++ ++ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); ++ old_name = target->d_name.name; ++ new_name = dentry->d_name.name; ++ if (old_name == target->d_iname) ++ old_name = dentry->d_iname; ++ if (new_name == dentry->d_iname) ++ new_name = target->d_iname; ++ target->d_name.name = new_name; ++ dentry->d_name.name = old_name; ++} ++ ++ ++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode) ++{ ++ struct dentry *tmp, *goal = NULL; ++ struct list_head *lp; ++ ++ /* preferrably return a connected dentry */ ++ spin_lock(&dcache_lock); ++ /* verify this dentry is really new */ ++ assert(!de->d_inode); ++ assert(list_empty(&de->d_subdirs)); ++ assert(list_empty(&de->d_alias)); ++ ++ ++ list_for_each(lp, &inode->i_dentry) { ++ tmp = list_entry(lp, struct dentry, d_alias); ++ if (tmp->d_flags & DCACHE_DISCONNECTED) { ++ assert(tmp->d_alias.next == &inode->i_dentry); ++ assert(tmp->d_alias.prev == &inode->i_dentry); ++ goal = tmp; ++ dget_locked(goal); ++ break; ++ } ++ } ++ spin_unlock(&dcache_lock); ++ ++ if (!goal) ++ return NULL; ++ ++ goal->d_flags &= ~DCACHE_DISCONNECTED; ++ d_rehash(de); ++ d_move(goal, de); ++ ++ return goal; ++} ++ ++/* ++ * These are the special structures for the iopen pseudo directory. ++ */ ++ ++static struct inode_operations iopen_inode_operations = { ++ lookup: iopen_lookup, /* BKL held */ ++}; ++ ++static struct file_operations iopen_file_operations = { ++ read: generic_read_dir, ++}; ++ ++static int match_dentry(struct dentry *dentry, const char *name) ++{ ++ int len; ++ ++ len = strlen(name); ++ if (dentry->d_name.len != len) ++ return 0; ++ if (strncmp(dentry->d_name.name, name, len)) ++ return 0; ++ return 1; ++} ++ ++/* ++ * This function is spliced into ext3_lookup and returns 1 the file ++ * name is __iopen__ and dentry has been filled in appropriately. ++ */ ++int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry) ++{ ++ struct inode * inode; ++ ++ if (dir->i_ino != EXT3_ROOT_INO || ++ !test_opt(dir->i_sb, IOPEN) || ++ !match_dentry(dentry, "__iopen__")) ++ return 0; ++ ++ inode = iget(dir->i_sb, EXT3_BAD_INO); ++ ++ if (!inode) ++ return 0; ++ d_add(dentry, inode); ++ return 1; ++} ++ ++/* ++ * This function is spliced into read_inode; it returns 1 if inode ++ * number is the one for /__iopen__, in which case the inode is filled ++ * in appropriately. Otherwise, this fuction returns 0. ++ */ ++int ext3_iopen_get_inode(struct inode * inode) ++{ ++ if (inode->i_ino != EXT3_BAD_INO) ++ return 0; ++ ++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR; ++ if (test_opt(inode->i_sb, IOPEN_NOPRIV)) ++ inode->i_mode |= 0777; ++ inode->i_uid = 0; ++ inode->i_gid = 0; ++ inode->i_nlink = 1; ++ inode->i_size = 4096; ++ inode->i_atime = CURRENT_TIME; ++ inode->i_ctime = CURRENT_TIME; ++ inode->i_mtime = CURRENT_TIME; ++ EXT3_I(inode)->i_dtime = 0; ++ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size ++ * (for stat), not the fs block ++ * size */ ++ inode->i_blocks = 0; ++ inode->i_version = 1; ++ inode->i_generation = 0; ++ ++ inode->i_op = &iopen_inode_operations; ++ inode->i_fop = &iopen_file_operations; ++ inode->i_mapping->a_ops = 0; ++ ++ return 1; ++} +Index: linux-2.6.3-mm4/fs/ext3/iopen.h +=================================================================== +--- linux-2.6.3-mm4.orig/fs/ext3/iopen.h 2004-03-08 14:58:44.413198848 +0800 ++++ linux-2.6.3-mm4/fs/ext3/iopen.h 2004-03-08 14:58:44.577173920 +0800 +@@ -0,0 +1,15 @@ ++/* ++ * iopen.h ++ * ++ * Special support for opening files by inode number. ++ * ++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu). ++ * ++ * This file may be redistributed under the terms of the GNU General ++ * Public License. ++ */ ++ ++extern int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry); ++extern int ext3_iopen_get_inode(struct inode * inode); ++ ++ +Index: linux-2.6.3-mm4/fs/ext3/namei.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/ext3/namei.c 2004-03-08 14:57:52.978018184 +0800 ++++ linux-2.6.3-mm4/fs/ext3/namei.c 2004-03-08 14:58:44.648163128 +0800 +@@ -37,6 +37,7 @@ + #include + #include + #include "xattr.h" ++#include "iopen.h" + #include "acl.h" + + /* +@@ -970,15 +971,21 @@ + } + #endif + ++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode); ++ + static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd) + { + struct inode * inode; + struct ext3_dir_entry_2 * de; + struct buffer_head * bh; ++ struct dentry *alternate = NULL; + + if (dentry->d_name.len > EXT3_NAME_LEN) + return ERR_PTR(-ENAMETOOLONG); + ++ if (ext3_check_for_iopen(dir, dentry)) ++ return NULL; ++ + bh = ext3_find_entry(dentry, &de); + inode = NULL; + if (bh) { +@@ -989,8 +996,14 @@ + if (!inode) + return ERR_PTR(-EACCES); + } ++ if (inode && (alternate = iopen_connect_dentry(dentry, inode))) { ++ iput(inode); ++ return alternate; ++ } ++ + if (inode) + return d_splice_alias(inode, dentry); ++ + d_add(dentry, inode); + return NULL; + } +Index: linux-2.6.3-mm4/fs/ext3/super.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/ext3/super.c 2004-03-08 14:57:55.049703240 +0800 ++++ linux-2.6.3-mm4/fs/ext3/super.c 2004-03-08 15:03:18.310560120 +0800 +@@ -575,7 +575,7 @@ + Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, + Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, +- Opt_ignore, Opt_err, ++ Opt_ignore, Opt_err, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + }; + + static match_table_t tokens = { +@@ -620,6 +620,9 @@ + {Opt_ignore, "noquota"}, + {Opt_ignore, "quota"}, + {Opt_ignore, "usrquota"}, ++ {Opt_iopen, "iopen"}, ++ {Opt_noiopen, "noiopen"}, ++ {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_err, NULL} + }; + +@@ -869,6 +872,18 @@ + case Opt_abort: + set_opt(sbi->s_mount_opt, ABORT); + break; ++ case Opt_iopen: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_noiopen: ++ clear_opt (sbi->s_mount_opt, IOPEN); ++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; ++ case Opt_iopen_nopriv: ++ set_opt (sbi->s_mount_opt, IOPEN); ++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV); ++ break; + case Opt_ignore: + break; + default: +Index: linux-2.6.3-mm4/fs/ext3/Makefile +=================================================================== +--- linux-2.6.3-mm4.orig/fs/ext3/Makefile 2004-01-09 14:59:08.000000000 +0800 ++++ linux-2.6.3-mm4/fs/ext3/Makefile 2004-03-08 14:58:44.794140936 +0800 +@@ -5,7 +5,7 @@ + obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +- ioctl.o namei.o super.o symlink.o hash.o ++ ioctl.o namei.o super.o symlink.o hash.o iopen.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.3-mm4/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.3-mm4.orig/include/linux/ext3_fs.h 2004-03-08 14:57:53.057006176 +0800 ++++ linux-2.6.3-mm4/include/linux/ext3_fs.h 2004-03-08 14:58:44.795140784 +0800 +@@ -325,6 +325,8 @@ + #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ + #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ + #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ ++#define EXT3_MOUNT_IOPEN 0x10000 /* Allow access via iopen */ ++#define EXT3_MOUNT_IOPEN_NOPRIV 0x20000 /* Make iopen world-readable */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef _LINUX_EXT2_FS_H diff --git a/lustre/kernel_patches/patches/linux-2.6.3-CITI_NFS4_ALL.patch b/lustre/kernel_patches/patches/linux-2.6.3-CITI_NFS4_ALL.patch new file mode 100644 index 0000000..16e191b --- /dev/null +++ b/lustre/kernel_patches/patches/linux-2.6.3-CITI_NFS4_ALL.patch @@ -0,0 +1,14513 @@ + +The complete series of citi nfsv4 patches in a single patch + + + Makefile | 2 + fs/Kconfig | 49 + fs/Makefile | 1 + fs/inode.c | 2 + fs/nfs/dir.c | 181 ++ + fs/nfs/direct.c | 3 + fs/nfs/file.c | 23 + fs/nfs/inode.c | 586 +++++---- + fs/nfs/nfs3proc.c | 43 + fs/nfs/nfs4proc.c | 988 +++++++--------- + fs/nfs/nfs4xdr.c | 1931 ++++++++++++++++++++++++-------- + fs/nfs/pagelist.c | 5 + fs/nfs/proc.c | 51 + fs/nfs/read.c | 2 + fs/nfs/unlink.c | 3 + fs/nfs/write.c | 207 +-- + fs/nfs4acl/Makefile | 3 + fs/nfs4acl/acl.c | 921 +++++++++++++++ + fs/nfs4acl/acl_syms.c | 51 + fs/nfsd/Makefile | 2 + fs/nfsd/nfs3xdr.c | 2 + fs/nfsd/nfs4idmap.c | 569 +++++++++ + fs/nfsd/nfs4proc.c | 229 ++- + fs/nfsd/nfs4state.c | 440 +++++-- + fs/nfsd/nfs4xdr.c | 495 +++++--- + fs/nfsd/nfsctl.c | 7 + fs/nfsd/nfsproc.c | 1 + fs/nfsd/nfsxdr.c | 2 + fs/nfsd/stats.c | 67 - + fs/nfsd/vfs.c | 218 +++ + include/linux/fs.h | 2 + include/linux/nfs.h | 2 + include/linux/nfs4.h | 80 + + include/linux/nfs4_acl.h | 68 + + include/linux/nfs_fs.h | 138 +- + include/linux/nfs_page.h | 2 + include/linux/nfs_xdr.h | 256 +--- + include/linux/nfsd/nfsd.h | 16 + include/linux/nfsd/nfsfh.h | 8 + include/linux/nfsd/state.h | 21 + include/linux/nfsd/xdr4.h | 37 + include/linux/nfsd_idmap.h | 54 + include/linux/sunrpc/auth_gss.h | 2 + include/linux/sunrpc/cache.h | 13 + include/linux/sunrpc/gss_api.h | 3 + include/linux/sunrpc/stats.h | 20 + include/linux/sunrpc/svc.h | 1 + include/linux/sunrpc/svcauth.h | 5 + include/linux/sunrpc/svcauth_gss.h | 35 + include/linux/sunrpc/xdr.h | 3 + include/linux/sunrpc/xprt.h | 15 + net/sunrpc/Makefile | 2 + net/sunrpc/auth_gss/Makefile | 2 + net/sunrpc/auth_gss/auth_gss.c | 119 + + net/sunrpc/auth_gss/gss_krb5_crypto.c | 18 + net/sunrpc/auth_gss/gss_krb5_mech.c | 14 + net/sunrpc/auth_gss/gss_krb5_seal.c | 9 + net/sunrpc/auth_gss/gss_krb5_seqnum.c | 2 + net/sunrpc/auth_gss/gss_mech_switch.c | 32 + net/sunrpc/auth_gss/gss_pseudoflavors.c | 21 + net/sunrpc/auth_gss/sunrpcgss_syms.c | 2 + net/sunrpc/auth_gss/svcauth_gss.c | 1018 ++++++++++++++++ + net/sunrpc/cache.c | 13 + net/sunrpc/stats.c | 106 - + net/sunrpc/sunrpc_syms.c | 5 + net/sunrpc/svc.c | 4 + net/sunrpc/svcauth.c | 5 + net/sunrpc/svcauth_unix.c | 13 + net/sunrpc/xdr.c | 4 + net/sunrpc/xprt.c | 210 +-- + include/linux/sunrpc/name_lookup.h | 38 + 71 files changed, 7194 insertions(+), 2308 deletions(-) + +diff -puN Makefile~CITI_NFS4_ALL Makefile +--- linux-2.6.3/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:02.000000000 -0500 ++++ linux-2.6.3-bfields/Makefile 2004-02-19 16:47:16.000000000 -0500 +@@ -1,7 +1,7 @@ + VERSION = 2 + PATCHLEVEL = 6 + SUBLEVEL = 3 +-EXTRAVERSION = ++EXTRAVERSION = -CITI_NFS4_ALL-1 + NAME=Feisty Dunnart + + # *DOCUMENTATION* +diff -puN fs/inode.c~CITI_NFS4_ALL fs/inode.c +--- linux-2.6.3/fs/inode.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/inode.c 2004-02-19 16:47:03.000000000 -0500 +@@ -1178,6 +1178,8 @@ void inode_update_time(struct inode *ino + struct timespec now; + int sync_it = 0; + ++ if (IS_NOCMTIME(inode)) ++ return; + if (IS_RDONLY(inode)) + return; + +diff -puN fs/Kconfig~CITI_NFS4_ALL fs/Kconfig +--- linux-2.6.3/fs/Kconfig~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/Kconfig 2004-02-19 16:47:07.000000000 -0500 +@@ -288,7 +288,7 @@ config FS_POSIX_ACL + # Never use this symbol for ifdefs. + # + bool +- depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL ++ depends on EXT2_FS_POSIX_ACL || EXT3_FS_POSIX_ACL || JFS_POSIX_ACL || NFS_V4_ACL + default y + + config XFS_FS +@@ -1314,21 +1314,25 @@ config NFS_V3 + Say Y here if you want your NFS client to be able to speak the newer + version 3 of the NFS protocol. + +- If unsure, say N. ++ If unsure, say Y. + + config NFS_V4 + bool "Provide NFSv4 client support (EXPERIMENTAL)" + depends on NFS_FS && EXPERIMENTAL ++ select RPCSEC_GSS_KRB5 + help + Say Y here if you want your NFS client to be able to speak the newer +- version 4 of the NFS protocol. This feature is experimental, and +- should only be used if you are interested in helping to test NFSv4. ++ version 4 of the NFS protocol. ++ ++ Note: Requires auxiliary userspace daemons which may be found on ++ http://www.citi.umich.edu/projects/nfsv4/ + + If unsure, say N. + + config NFS_DIRECTIO + bool "Allow direct I/O on NFS files (EXPERIMENTAL)" + depends on NFS_FS && EXPERIMENTAL ++ select NFS_V4_ACL + help + This option enables applications to perform uncached I/O on files + in NFS file systems using the O_DIRECT open() flag. When O_DIRECT +@@ -1388,6 +1392,7 @@ config NFSD_V3 + config NFSD_V4 + bool "Provide NFSv4 server support (EXPERIMENTAL)" + depends on NFSD_V3 && EXPERIMENTAL ++ select NFS_V4_ACL + help + If you would like to include the NFSv4 server as well as the NFSv2 + and NFSv3 servers, say Y here. This feature is experimental, and +@@ -1423,6 +1428,12 @@ config LOCKD_V4 + depends on NFSD_V3 || NFS_V3 + default y + ++config NFS_V4_ACL ++ bool "Provide NFSv4 ACL support" ++ depends on NFSD_V4 || NFS_V4 ++ help ++ This allows you to use POSIX ACLs with NFSv4. ++ + config EXPORTFS + tristate + default NFSD +@@ -1431,28 +1442,24 @@ config SUNRPC + tristate + + config SUNRPC_GSS +- tristate "Provide RPCSEC_GSS authentication (EXPERIMENTAL)" ++ tristate ++ ++config RPCSEC_GSS_KRB5 ++ tristate "Secure RPC: Kerberos V mechanism (EXPERIMENTAL)" + depends on SUNRPC && EXPERIMENTAL +- default SUNRPC if NFS_V4=y ++ select SUNRPC_GSS ++ select CRYPTO ++ select CRYPTO_MD5 ++ select CRYPTO_DES + help +- Provides cryptographic authentication for NFS rpc requests. To +- make this useful, you must also select at least one rpcsec_gss +- mechanism. +- Note: You should always select this option if you wish to use ++ Provides for secure RPC calls by means of a gss-api ++ mechanism based on Kerberos V5. This is required for + NFSv4. + +-config RPCSEC_GSS_KRB5 +- tristate "Kerberos V mechanism for RPCSEC_GSS (EXPERIMENTAL)" +- depends on SUNRPC_GSS && CRYPTO_DES && CRYPTO_MD5 +- default SUNRPC_GSS if NFS_V4=y +- help +- Provides a gss-api mechanism based on Kerberos V5 (this is +- mandatory for RFC3010-compliant NFSv4 implementations). +- Requires a userspace daemon; +- see http://www.citi.umich.edu/projects/nfsv4/. ++ Note: Requires an auxiliary userspace daemon which may be found on ++ http://www.citi.umich.edu/projects/nfsv4/ + +- Note: If you select this option, please ensure that you also +- enable the MD5 and DES crypto ciphers. ++ If unsure, say N. + + config SMB_FS + tristate "SMB file system support (to mount Windows shares etc.)" +diff -puN fs/nfs/dir.c~CITI_NFS4_ALL fs/nfs/dir.c +--- linux-2.6.3/fs/nfs/dir.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/dir.c 2004-02-19 16:47:07.000000000 -0500 +@@ -88,6 +88,10 @@ struct inode_operations nfs4_dir_inode_o + .permission = nfs_permission, + .getattr = nfs_getattr, + .setattr = nfs_setattr, ++#ifdef CONFIG_NFS_V4_ACL ++ .getxattr = nfs_getxattr, ++ .setxattr = nfs_setxattr, ++#endif /* CONFIG_NFS_V4_ACL */ + }; + + #endif /* CONFIG_NFS_V4 */ +@@ -139,11 +143,13 @@ int nfs_readdir_filler(nfs_readdir_descr + struct file *file = desc->file; + struct inode *inode = file->f_dentry->d_inode; + struct rpc_cred *cred = nfs_file_cred(file); ++ unsigned long timestamp; + int error; + + dfprintk(VFS, "NFS: nfs_readdir_filler() reading cookie %Lu into page %lu.\n", (long long)desc->entry->cookie, page->index); + + again: ++ timestamp = jiffies; + error = NFS_PROTO(inode)->readdir(file->f_dentry, cred, desc->entry->cookie, page, + NFS_SERVER(inode)->dtsize, desc->plus); + if (error < 0) { +@@ -157,18 +163,21 @@ int nfs_readdir_filler(nfs_readdir_descr + goto error; + } + SetPageUptodate(page); ++ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + /* Ensure consistent page alignment of the data. + * Note: assumes we have exclusive access to this mapping either + * throught inode->i_sem or some other mechanism. + */ +- if (page->index == 0) ++ if (page->index == 0) { + invalidate_inode_pages(inode->i_mapping); ++ NFS_I(inode)->readdir_timestamp = timestamp; ++ } + unlock_page(page); + return 0; + error: + SetPageError(page); + unlock_page(page); +- invalidate_inode_pages(inode->i_mapping); ++ nfs_zap_caches(inode); + desc->error = error; + return -EIO; + } +@@ -381,6 +390,7 @@ int uncached_readdir(nfs_readdir_descrip + page, + NFS_SERVER(inode)->dtsize, + desc->plus); ++ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + desc->page = page; + desc->ptr = kmap(page); /* matching kunmap in nfs_do_filldir */ + if (desc->error >= 0) { +@@ -459,7 +469,15 @@ static int nfs_readdir(struct file *filp + } + res = 0; + break; +- } else if (res < 0) ++ } ++ if (res == -ETOOSMALL && desc->plus) { ++ NFS_FLAGS(inode) &= ~NFS_INO_ADVISE_RDPLUS; ++ nfs_zap_caches(inode); ++ desc->plus = 0; ++ desc->entry->eof = 0; ++ continue; ++ } ++ if (res < 0) + break; + + res = nfs_do_filldir(desc, dirent, filldir); +@@ -481,14 +499,19 @@ static int nfs_readdir(struct file *filp + * In the case it has, we assume that the dentries are untrustworthy + * and may need to be looked up again. + */ +-static inline +-int nfs_check_verifier(struct inode *dir, struct dentry *dentry) ++static inline int nfs_check_verifier(struct inode *dir, struct dentry *dentry) + { + if (IS_ROOT(dentry)) + return 1; +- if (nfs_revalidate_inode(NFS_SERVER(dir), dir)) ++ if ((NFS_FLAGS(dir) & NFS_INO_INVALID_ATTR) != 0 ++ || nfs_attribute_timeout(dir)) + return 0; +- return time_after(dentry->d_time, NFS_MTIME_UPDATE(dir)); ++ return nfs_verify_change_attribute(dir, (unsigned long)dentry->d_fsdata); ++} ++ ++static inline void nfs_set_verifier(struct dentry * dentry, unsigned long verf) ++{ ++ dentry->d_fsdata = (void *)verf; + } + + /* +@@ -528,9 +551,7 @@ int nfs_neg_need_reval(struct inode *dir + /* Don't revalidate a negative dentry if we're creating a new file */ + if ((ndflags & LOOKUP_CREATE) && !(ndflags & LOOKUP_CONTINUE)) + return 0; +- if (!nfs_check_verifier(dir, dentry)) +- return 1; +- return time_after(jiffies, dentry->d_time + NFS_ATTRTIMEO(dir)); ++ return !nfs_check_verifier(dir, dentry); + } + + /* +@@ -552,6 +573,7 @@ static int nfs_lookup_revalidate(struct + int error; + struct nfs_fh fhandle; + struct nfs_fattr fattr; ++ unsigned long verifier; + int isopen = 0; + + parent = dget_parent(dentry); +@@ -574,6 +596,9 @@ static int nfs_lookup_revalidate(struct + goto out_bad; + } + ++ /* Revalidate parent directory attribute cache */ ++ nfs_revalidate_inode(NFS_SERVER(dir), dir); ++ + /* Force a full look up iff the parent directory has changed */ + if (nfs_check_verifier(dir, dentry)) { + if (nfs_lookup_verify_inode(inode, isopen)) +@@ -581,6 +606,12 @@ static int nfs_lookup_revalidate(struct + goto out_valid; + } + ++ /* ++ * Note: we're not holding inode->i_sem and so may be racing with ++ * operations that change the directory. We therefore save the ++ * change attribute *before* we do the RPC call. ++ */ ++ verifier = nfs_save_change_attribute(dir); + error = nfs_cached_lookup(dir, dentry, &fhandle, &fattr); + if (!error) { + if (memcmp(NFS_FH(inode), &fhandle, sizeof(struct nfs_fh))!= 0) +@@ -603,6 +634,7 @@ static int nfs_lookup_revalidate(struct + + out_valid_renew: + nfs_renew_times(dentry); ++ nfs_set_verifier(dentry, verifier); + out_valid: + unlock_kernel(); + dput(parent); +@@ -638,6 +670,11 @@ static int nfs_dentry_delete(struct dent + /* Unhash it, so that ->d_iput() would be called */ + return 1; + } ++ if (!(dentry->d_sb->s_flags & MS_ACTIVE)) { ++ /* Unhash it, so that ancestors of killed async unlink ++ * files will be cleaned up during umount */ ++ return 1; ++ } + return 0; + + } +@@ -693,6 +730,8 @@ static struct dentry *nfs_lookup(struct + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + lock_kernel(); ++ /* Revalidate parent directory attribute cache */ ++ nfs_revalidate_inode(NFS_SERVER(dir), dir); + + /* If we're doing an exclusive create, optimize away the lookup */ + if (nfs_is_exclusive_create(dir, nd)) +@@ -715,6 +754,7 @@ no_entry: + error = 0; + d_add(dentry, inode); + nfs_renew_times(dentry); ++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + out_unlock: + unlock_kernel(); + out: +@@ -768,7 +808,15 @@ static struct dentry *nfs_atomic_lookup( + + /* Open the file on the server */ + lock_kernel(); +- inode = nfs4_atomic_open(dir, dentry, nd); ++ /* Revalidate parent directory attribute cache */ ++ nfs_revalidate_inode(NFS_SERVER(dir), dir); ++ ++ if (nd->intent.open.flags & O_CREAT) { ++ nfs_begin_data_update(dir); ++ inode = nfs4_atomic_open(dir, dentry, nd); ++ nfs_end_data_update(dir); ++ } else ++ inode = nfs4_atomic_open(dir, dentry, nd); + unlock_kernel(); + if (IS_ERR(inode)) { + error = PTR_ERR(inode); +@@ -790,6 +838,7 @@ static struct dentry *nfs_atomic_lookup( + no_entry: + d_add(dentry, inode); + nfs_renew_times(dentry); ++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + out: + BUG_ON(error > 0); + return ERR_PTR(error); +@@ -801,13 +850,16 @@ static int nfs_open_revalidate(struct de + { + struct dentry *parent = NULL; + struct inode *inode = dentry->d_inode; ++ struct inode *dir; ++ unsigned long verifier; + int openflags, ret = 0; + + /* NFS only supports OPEN for regular files */ + if (inode && !S_ISREG(inode->i_mode)) + goto no_open; + parent = dget_parent(dentry); +- if (!is_atomic_open(parent->d_inode, nd)) ++ dir = parent->d_inode; ++ if (!is_atomic_open(dir, nd)) + goto no_open; + openflags = nd->intent.open.flags; + if (openflags & O_CREAT) { +@@ -821,8 +873,16 @@ static int nfs_open_revalidate(struct de + /* We can't create new files, or truncate existing ones here */ + openflags &= ~(O_CREAT|O_TRUNC); + ++ /* ++ * Note: we're not holding inode->i_sem and so may be racing with ++ * operations that change the directory. We therefore save the ++ * change attribute *before* we do the RPC call. ++ */ + lock_kernel(); +- ret = nfs4_open_revalidate(parent->d_inode, dentry, openflags); ++ verifier = nfs_save_change_attribute(dir); ++ ret = nfs4_open_revalidate(dir, dentry, openflags); ++ if (!ret) ++ nfs_set_verifier(dentry, verifier); + unlock_kernel(); + out: + dput(parent); +@@ -869,15 +929,20 @@ int nfs_cached_lookup(struct inode *dir, + struct nfs_server *server; + struct nfs_entry entry; + struct page *page; +- unsigned long timestamp = NFS_MTIME_UPDATE(dir); ++ unsigned long timestamp; + int res; + + if (!NFS_USE_READDIRPLUS(dir)) + return -ENOENT; + server = NFS_SERVER(dir); +- if (server->flags & NFS_MOUNT_NOAC) ++ /* Don't use readdirplus unless the cache is stable */ ++ if ((server->flags & NFS_MOUNT_NOAC) != 0 ++ || nfs_caches_unstable(dir) ++ || nfs_attribute_timeout(dir)) + return -ENOENT; +- nfs_revalidate_inode(server, dir); ++ if ((NFS_FLAGS(dir) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) != 0) ++ return -ENOENT; ++ timestamp = NFS_I(dir)->readdir_timestamp; + + entry.fh = fh; + entry.fattr = fattr; +@@ -931,6 +996,7 @@ static int nfs_instantiate(struct dentry + if (inode) { + d_instantiate(dentry, inode); + nfs_renew_times(dentry); ++ nfs_set_verifier(dentry, nfs_save_change_attribute(dentry->d_parent->d_inode)); + error = 0; + } + return error; +@@ -969,11 +1035,13 @@ static int nfs_create(struct inode *dir, + * does not pass the create flags. + */ + lock_kernel(); +- nfs_zap_caches(dir); ++ nfs_begin_data_update(dir); + inode = NFS_PROTO(dir)->create(dir, &dentry->d_name, &attr, open_flags); ++ nfs_end_data_update(dir); + if (!IS_ERR(inode)) { + d_instantiate(dentry, inode); + nfs_renew_times(dentry); ++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + error = 0; + } else { + error = PTR_ERR(inode); +@@ -1004,9 +1072,10 @@ nfs_mknod(struct inode *dir, struct dent + attr.ia_valid = ATTR_MODE; + + lock_kernel(); +- nfs_zap_caches(dir); ++ nfs_begin_data_update(dir); + error = NFS_PROTO(dir)->mknod(dir, &dentry->d_name, &attr, rdev, + &fhandle, &fattr); ++ nfs_end_data_update(dir); + if (!error) + error = nfs_instantiate(dentry, &fhandle, &fattr); + else +@@ -1041,9 +1110,10 @@ static int nfs_mkdir(struct inode *dir, + */ + d_drop(dentry); + #endif +- nfs_zap_caches(dir); ++ nfs_begin_data_update(dir); + error = NFS_PROTO(dir)->mkdir(dir, &dentry->d_name, &attr, &fhandle, + &fattr); ++ nfs_end_data_update(dir); + if (!error) + error = nfs_instantiate(dentry, &fhandle, &fattr); + else +@@ -1060,10 +1130,12 @@ static int nfs_rmdir(struct inode *dir, + dir->i_ino, dentry->d_name.name); + + lock_kernel(); +- nfs_zap_caches(dir); ++ nfs_begin_data_update(dir); + error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name); +- if (!error) ++ /* Ensure the VFS deletes this inode */ ++ if (error == 0 && dentry->d_inode != NULL) + dentry->d_inode->i_nlink = 0; ++ nfs_end_data_update(dir); + unlock_kernel(); + + return error; +@@ -1119,12 +1191,21 @@ dentry->d_parent->d_name.name, dentry->d + goto out; + } while(sdentry->d_inode != NULL); /* need negative lookup */ + +- nfs_zap_caches(dir); + qsilly.name = silly; + qsilly.len = strlen(silly); +- error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, dir, &qsilly); ++ nfs_begin_data_update(dir); ++ if (dentry->d_inode) { ++ nfs_begin_data_update(dentry->d_inode); ++ error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, ++ dir, &qsilly); ++ nfs_end_data_update(dentry->d_inode); ++ } else ++ error = NFS_PROTO(dir)->rename(dir, &dentry->d_name, ++ dir, &qsilly); ++ nfs_end_data_update(dir); + if (!error) { + nfs_renew_times(dentry); ++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); + d_move(dentry, sdentry); + error = nfs_async_unlink(dentry); + /* If we return 0 we don't unlink */ +@@ -1156,14 +1237,17 @@ static int nfs_safe_remove(struct dentry + goto out; + } + +- nfs_zap_caches(dir); +- if (inode) +- NFS_CACHEINV(inode); +- error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); +- if (error < 0) +- goto out; +- if (inode) +- inode->i_nlink--; ++ nfs_begin_data_update(dir); ++ if (inode != NULL) { ++ nfs_begin_data_update(inode); ++ error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); ++ /* The VFS may want to delete this inode */ ++ if (error == 0) ++ inode->i_nlink--; ++ nfs_end_data_update(inode); ++ } else ++ error = NFS_PROTO(dir)->remove(dir, &dentry->d_name); ++ nfs_end_data_update(dir); + out: + return error; + } +@@ -1198,9 +1282,10 @@ static int nfs_unlink(struct inode *dir, + spin_unlock(&dentry->d_lock); + spin_unlock(&dcache_lock); + error = nfs_safe_remove(dentry); +- if (!error) ++ if (!error) { + nfs_renew_times(dentry); +- else if (need_rehash) ++ nfs_set_verifier(dentry, nfs_save_change_attribute(dir)); ++ } else if (need_rehash) + d_rehash(dentry); + unlock_kernel(); + return error; +@@ -1247,9 +1332,10 @@ dentry->d_parent->d_name.name, dentry->d + qsymname.len = strlen(symname); + + lock_kernel(); +- nfs_zap_caches(dir); ++ nfs_begin_data_update(dir); + error = NFS_PROTO(dir)->symlink(dir, &dentry->d_name, &qsymname, + &attr, &sym_fh, &sym_attr); ++ nfs_end_data_update(dir); + if (!error) { + error = nfs_instantiate(dentry, &sym_fh, &sym_attr); + } else { +@@ -1281,9 +1367,12 @@ nfs_link(struct dentry *old_dentry, stru + */ + lock_kernel(); + d_drop(dentry); +- nfs_zap_caches(dir); +- NFS_CACHEINV(inode); ++ ++ nfs_begin_data_update(dir); ++ nfs_begin_data_update(inode); + error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name); ++ nfs_end_data_update(inode); ++ nfs_end_data_update(dir); + unlock_kernel(); + return error; + } +@@ -1388,16 +1477,23 @@ go_ahead: + if (new_inode) + d_delete(new_dentry); + +- nfs_zap_caches(new_dir); +- nfs_zap_caches(old_dir); ++ nfs_begin_data_update(old_dir); ++ nfs_begin_data_update(new_dir); ++ nfs_begin_data_update(old_inode); + error = NFS_PROTO(old_dir)->rename(old_dir, &old_dentry->d_name, + new_dir, &new_dentry->d_name); ++ nfs_end_data_update(old_inode); ++ nfs_end_data_update(new_dir); ++ nfs_end_data_update(old_dir); + out: + if (rehash) + d_rehash(rehash); +- if (!error && !S_ISDIR(old_inode->i_mode)) +- d_move(old_dentry, new_dentry); +- nfs_renew_times(new_dentry); ++ if (!error) { ++ if (!S_ISDIR(old_inode->i_mode)) ++ d_move(old_dentry, new_dentry); ++ nfs_renew_times(new_dentry); ++ nfs_set_verifier(new_dentry, nfs_save_change_attribute(new_dir)); ++ } + + /* new dentry created? */ + if (dentry) +@@ -1451,7 +1547,8 @@ nfs_permission(struct inode *inode, int + + cred = rpcauth_lookupcred(NFS_CLIENT(inode)->cl_auth, 0); + if (cache->cred == cred +- && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode))) { ++ && time_before(jiffies, cache->jiffies + NFS_ATTRTIMEO(inode)) ++ && !(NFS_FLAGS(inode) & NFS_INO_INVALID_ATTR)) { + if (!(res = cache->err)) { + /* Is the mask a subset of an accepted mask? */ + if ((cache->mask & mask) == mask) +diff -puN fs/nfs/direct.c~CITI_NFS4_ALL fs/nfs/direct.c +--- linux-2.6.3/fs/nfs/direct.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/direct.c 2004-02-19 16:47:03.000000000 -0500 +@@ -269,6 +269,7 @@ nfs_direct_write_seg(struct inode *inode + if (IS_SYNC(inode) || NFS_PROTO(inode)->version == 2 || count <= wsize) + wdata.args.stable = NFS_FILE_SYNC; + ++ nfs_begin_data_update(inode); + retry: + need_commit = 0; + tot_bytes = 0; +@@ -334,6 +335,8 @@ retry: + VERF_SIZE) != 0) + goto sync_retry; + } ++ nfs_end_data_update(inode); ++ NFS_FLAGS(inode) |= NFS_INO_INVALID_DATA; + + return tot_bytes; + +diff -puN fs/nfs/file.c~CITI_NFS4_ALL fs/nfs/file.c +--- linux-2.6.3/fs/nfs/file.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/file.c 2004-02-19 16:47:07.000000000 -0500 +@@ -63,6 +63,20 @@ struct inode_operations nfs_file_inode_o + .setattr = nfs_setattr, + }; + ++#ifdef CONFIG_NFS_V4 ++ ++struct inode_operations nfs4_file_inode_operations = { ++ .permission = nfs_permission, ++ .getattr = nfs_getattr, ++ .setattr = nfs_setattr, ++#ifdef CONFIG_NFS_V4_ACL ++ .getxattr = nfs_getxattr, ++ .setxattr = nfs_setxattr, ++#endif /* CONFIG_NFS_V4_ACL */ ++}; ++ ++#endif /* CONFIG_NFS_V4 */ ++ + /* Hack for future NFS swap support */ + #ifndef IS_SWAPFILE + # define IS_SWAPFILE(inode) (0) +@@ -104,11 +118,16 @@ nfs_file_flush(struct file *file) + + dfprintk(VFS, "nfs: flush(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + ++ if ((file->f_mode & FMODE_WRITE) == 0) ++ return 0; + lock_kernel(); +- status = nfs_wb_file(inode, file); ++ /* Ensure that data+attribute caches are up to date after close() */ ++ status = nfs_wb_all(inode); + if (!status) { + status = file->f_error; + file->f_error = 0; ++ if (!status) ++ __nfs_revalidate_inode(NFS_SERVER(inode), inode); + } + unlock_kernel(); + return status; +@@ -179,7 +198,7 @@ nfs_fsync(struct file *file, struct dent + dfprintk(VFS, "nfs: fsync(%s/%ld)\n", inode->i_sb->s_id, inode->i_ino); + + lock_kernel(); +- status = nfs_wb_file(inode, file); ++ status = nfs_wb_all(inode); + if (!status) { + status = file->f_error; + file->f_error = 0; +diff -puN fs/nfs/inode.c~CITI_NFS4_ALL fs/nfs/inode.c +--- linux-2.6.3/fs/nfs/inode.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/inode.c 2004-02-19 16:47:15.000000000 -0500 +@@ -53,8 +53,8 @@ + */ + #define NFS_MAX_READAHEAD RPC_MAXREQS + +-void nfs_zap_caches(struct inode *); + static void nfs_invalidate_inode(struct inode *); ++static int nfs_update_inode(struct inode *, struct nfs_fattr *, unsigned long); + + static struct inode *nfs_alloc_inode(struct super_block *sb); + static void nfs_destroy_inode(struct inode *); +@@ -118,7 +118,7 @@ nfs_write_inode(struct inode *inode, int + { + int flags = sync ? FLUSH_WAIT : 0; + +- nfs_commit_file(inode, NULL, 0, 0, flags); ++ nfs_commit_inode(inode, 0, 0, flags); + } + + static void +@@ -136,21 +136,24 @@ nfs_delete_inode(struct inode * inode) + clear_inode(inode); + } + +-/* +- * For the moment, the only task for the NFS clear_inode method is to +- * release the mmap credential +- */ + static void + nfs_clear_inode(struct inode *inode) + { + struct nfs_inode *nfsi = NFS_I(inode); + struct rpc_cred *cred = nfsi->mm_cred; + ++#ifdef CONFIG_NFS_V4_ACL ++ if (nfsi->acl != NFS4_ACL_NOT_CACHED) ++ posix_acl_release(nfsi->acl); ++ if (nfsi->default_acl != NFS4_ACL_NOT_CACHED) ++ posix_acl_release(nfsi->default_acl); ++#endif /* CONFIG_NFS_V4_ACL */ + if (cred) + put_rpccred(cred); + cred = nfsi->cache_access.cred; + if (cred) + put_rpccred(cred); ++ BUG_ON(atomic_read(&nfsi->data_updates) != 0); + } + + void +@@ -230,50 +233,23 @@ nfs_block_size(unsigned long bsize, unsi + /* + * Obtain the root inode of the file system. + */ +-static int +-nfs_get_root(struct inode **rooti, rpc_authflavor_t authflavor, struct super_block *sb, struct nfs_fh *rootfh) ++static struct inode * ++nfs_get_root(struct super_block *sb, struct nfs_fh *rootfh, struct nfs_fsinfo *fsinfo) + { + struct nfs_server *server = NFS_SB(sb); +- struct nfs_fattr fattr = { }; ++ struct inode *rooti; + int error; + +- error = server->rpc_ops->getroot(server, rootfh, &fattr); +- if (error == -EACCES && authflavor > RPC_AUTH_MAXFLAVOR) { +- /* +- * Some authentication types (gss/krb5, most notably) +- * are such that root won't be able to present a +- * credential for GETATTR (ie, getroot()). +- * +- * We still want the mount to succeed. +- * +- * So we fake the attr values and mark the inode as such. +- * On the first succesful traversal, we fix everything. +- * The auth type test isn't quite correct, but whatever. +- */ +- dfprintk(VFS, "NFS: faking root inode\n"); +- +- fattr.fileid = 1; +- fattr.nlink = 2; /* minimum for a dir */ +- fattr.type = NFDIR; +- fattr.mode = S_IFDIR|S_IRUGO|S_IXUGO; +- fattr.size = 4096; +- fattr.du.nfs3.used = 1; +- fattr.valid = NFS_ATTR_FATTR|NFS_ATTR_FATTR_V3; +- } else if (error < 0) { ++ error = server->rpc_ops->getroot(server, rootfh, fsinfo); ++ if (error < 0) { + printk(KERN_NOTICE "nfs_get_root: getattr error = %d\n", -error); +- *rooti = NULL; /* superfluous ... but safe */ +- return error; ++ return ERR_PTR(error); + } + +- *rooti = nfs_fhget(sb, rootfh, &fattr); +- if (error == -EACCES && authflavor > RPC_AUTH_MAXFLAVOR) { +- if (*rooti) { +- NFS_FLAGS(*rooti) |= NFS_INO_FAKE_ROOT; +- NFS_CACHEINV((*rooti)); +- error = 0; +- } +- } +- return error; ++ rooti = nfs_fhget(sb, rootfh, fsinfo->fattr); ++ if (!rooti) ++ return ERR_PTR(-ENOMEM); ++ return rooti; + } + + /* +@@ -283,7 +259,7 @@ static int + nfs_sb_init(struct super_block *sb, rpc_authflavor_t authflavor) + { + struct nfs_server *server; +- struct inode *root_inode = NULL; ++ struct inode *root_inode; + struct nfs_fattr fattr; + struct nfs_fsinfo fsinfo = { + .fattr = &fattr, +@@ -299,8 +275,9 @@ nfs_sb_init(struct super_block *sb, rpc_ + + sb->s_magic = NFS_SUPER_MAGIC; + ++ root_inode = nfs_get_root(sb, &server->fh, &fsinfo); + /* Did getting the root inode fail? */ +- if (nfs_get_root(&root_inode, authflavor, sb, &server->fh) < 0) ++ if (IS_ERR(root_inode)) + goto out_no_root; + sb->s_root = d_alloc_root(root_inode); + if (!sb->s_root) +@@ -309,10 +286,6 @@ nfs_sb_init(struct super_block *sb, rpc_ + sb->s_root->d_op = server->rpc_ops->dentry_ops; + + /* Get some general file system info */ +- if (server->rpc_ops->fsinfo(server, &server->fh, &fsinfo) < 0) { +- printk(KERN_NOTICE "NFS: cannot retrieve file system info.\n"); +- goto out_no_root; +- } + if (server->namelen == 0 && + server->rpc_ops->pathconf(server, &server->fh, &pathinfo) >= 0) + server->namelen = pathinfo.max_namelen; +@@ -368,13 +341,11 @@ nfs_sb_init(struct super_block *sb, rpc_ + rpc_setbufsize(server->client, server->wsize + 100, server->rsize + 100); + return 0; + /* Yargs. It didn't work out. */ +-out_free_all: +- if (root_inode) +- iput(root_inode); +- return -EINVAL; + out_no_root: + printk("nfs_read_super: get root inode failed\n"); +- goto out_free_all; ++ if (!IS_ERR(root_inode)) ++ iput(root_inode); ++ return -EINVAL; + } + + /* +@@ -627,13 +598,17 @@ static int nfs_show_options(struct seq_f + void + nfs_zap_caches(struct inode *inode) + { ++ struct nfs_inode *nfsi = NFS_I(inode); ++ int mode = inode->i_mode; ++ + NFS_ATTRTIMEO(inode) = NFS_MINATTRTIMEO(inode); + NFS_ATTRTIMEO_UPDATE(inode) = jiffies; + +- invalidate_remote_inode(inode); +- + memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); +- NFS_CACHEINV(inode); ++ if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) ++ nfsi->flags |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; ++ else ++ nfsi->flags |= NFS_INO_INVALID_ATTR; + } + + /* +@@ -673,9 +648,6 @@ nfs_find_actor(struct inode *inode, void + return 0; + if (is_bad_inode(inode)) + return 0; +- /* Force an attribute cache update if inode->i_count == 0 */ +- if (!atomic_read(&inode->i_count)) +- NFS_CACHEINV(inode); + return 1; + } + +@@ -729,12 +701,12 @@ nfs_fhget(struct super_block *sb, struct + inode->i_ino = hash; + + /* We can't support update_atime(), since the server will reset it */ +- inode->i_flags |= S_NOATIME; ++ inode->i_flags |= S_NOATIME|S_NOCMTIME; + inode->i_mode = fattr->mode; + /* Why so? Because we want revalidate for devices/FIFOs, and + * that's precisely what we have in nfs_file_inode_operations. + */ +- inode->i_op = &nfs_file_inode_operations; ++ inode->i_op = NFS_SB(sb)->rpc_ops->file_inode_ops; + if (S_ISREG(inode->i_mode)) { + inode->i_fop = &nfs_file_operations; + inode->i_data.a_ops = &nfs_file_aops; +@@ -754,10 +726,6 @@ nfs_fhget(struct super_block *sb, struct + inode->i_atime = fattr->atime; + inode->i_mtime = fattr->mtime; + inode->i_ctime = fattr->ctime; +- nfsi->read_cache_ctime = fattr->ctime; +- nfsi->read_cache_mtime = fattr->mtime; +- nfsi->cache_mtime_jiffies = fattr->timestamp; +- nfsi->read_cache_isize = fattr->size; + if (fattr->valid & NFS_ATTR_FATTR_V4) + nfsi->change_attr = fattr->change_attr; + inode->i_size = nfs_size_to_loff_t(fattr->size); +@@ -778,7 +746,6 @@ nfs_fhget(struct super_block *sb, struct + nfsi->attrtimeo_timestamp = jiffies; + memset(nfsi->cookieverf, 0, sizeof(nfsi->cookieverf)); + nfsi->cache_access.cred = NULL; +- + unlock_new_inode(inode); + } else + nfs_refresh_inode(inode, fattr); +@@ -804,70 +771,50 @@ nfs_setattr(struct dentry *dentry, struc + struct nfs_fattr fattr; + int error; + ++ if (attr->ia_valid & ATTR_SIZE) { ++ if (!S_ISREG(inode->i_mode) || attr->ia_size == i_size_read(inode)) ++ attr->ia_valid &= ~ATTR_SIZE; ++ } ++ + /* Optimization: if the end result is no change, don't RPC */ + attr->ia_valid &= NFS_VALID_ATTRS; + if (attr->ia_valid == 0) + return 0; + + lock_kernel(); +- +- /* +- * Make sure the inode is up-to-date. +- */ +- error = nfs_revalidate_inode(NFS_SERVER(inode),inode); +- if (error) { +-#ifdef NFS_PARANOIA +-printk("nfs_setattr: revalidate failed, error=%d\n", error); +-#endif +- goto out; +- } +- +- if (!S_ISREG(inode->i_mode)) { +- attr->ia_valid &= ~ATTR_SIZE; +- if (attr->ia_valid == 0) +- goto out; +- } else { +- filemap_fdatawrite(inode->i_mapping); +- error = nfs_wb_all(inode); +- filemap_fdatawait(inode->i_mapping); +- if (error) +- goto out; +- /* Optimize away unnecessary truncates */ +- if ((attr->ia_valid & ATTR_SIZE) && i_size_read(inode) == attr->ia_size) +- attr->ia_valid &= ~ATTR_SIZE; ++ nfs_begin_data_update(inode); ++ /* Write all dirty data if we're changing file permissions or size */ ++ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE)) != 0) { ++ if (filemap_fdatawrite(inode->i_mapping) == 0) ++ filemap_fdatawait(inode->i_mapping); ++ nfs_wb_all(inode); + } +- if (!attr->ia_valid) +- goto out; +- + error = NFS_PROTO(inode)->setattr(dentry, &fattr, attr); +- if (error) +- goto out; +- /* +- * If we changed the size or mtime, update the inode +- * now to avoid invalidating the page cache. +- */ +- if (attr->ia_valid & ATTR_SIZE) { +- if (attr->ia_size != fattr.size) +- printk("nfs_setattr: attr=%Ld, fattr=%Ld??\n", +- (long long) attr->ia_size, (long long)fattr.size); +- vmtruncate(inode, attr->ia_size); ++ if (error == 0) { ++ nfs_refresh_inode(inode, &fattr); ++ if ((attr->ia_valid & ATTR_MODE) != 0) { ++ int mode; ++ mode = inode->i_mode & ~S_IALLUGO; ++ mode |= attr->ia_mode & S_IALLUGO; ++ inode->i_mode = mode; ++ } ++ if ((attr->ia_valid & ATTR_UID) != 0) ++ inode->i_uid = attr->ia_uid; ++ if ((attr->ia_valid & ATTR_GID) != 0) ++ inode->i_gid = attr->ia_gid; ++ if ((attr->ia_valid & ATTR_SIZE) != 0) { ++ i_size_write(inode, attr->ia_size); ++ vmtruncate(inode, attr->ia_size); ++ } + } +- +- /* +- * If we changed the size or mtime, update the inode +- * now to avoid invalidating the page cache. +- */ +- if (!(fattr.valid & NFS_ATTR_WCC)) { +- struct nfs_inode *nfsi = NFS_I(inode); +- fattr.pre_size = nfsi->read_cache_isize; +- fattr.pre_mtime = nfsi->read_cache_mtime; +- fattr.pre_ctime = nfsi->read_cache_ctime; +- fattr.valid |= NFS_ATTR_WCC; +- } +- /* Force an attribute cache update */ +- NFS_CACHEINV(inode); +- error = nfs_refresh_inode(inode, &fattr); +-out: ++ if ((attr->ia_valid & (ATTR_MODE|ATTR_UID|ATTR_GID)) != 0) { ++ struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; ++ if (*cred) { ++ put_rpccred(*cred); ++ *cred = NULL; ++ } ++ } ++ nfs_end_data_update(inode); + unlock_kernel(); + return error; + } +@@ -895,7 +842,19 @@ nfs_wait_on_inode(struct inode *inode, i + int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) + { + struct inode *inode = dentry->d_inode; +- int err = nfs_revalidate_inode(NFS_SERVER(inode), inode); ++ struct nfs_inode *nfsi = NFS_I(inode); ++ int need_atime = nfsi->flags & NFS_INO_INVALID_ATIME; ++ int err; ++ ++ if (__IS_FLG(inode, MS_NOATIME)) ++ need_atime = 0; ++ else if (__IS_FLG(inode, MS_NODIRATIME) && S_ISDIR(inode->i_mode)) ++ need_atime = 0; ++ /* We may force a getattr if the user cares about atime */ ++ if (need_atime) ++ err = __nfs_revalidate_inode(NFS_SERVER(inode), inode); ++ else ++ err = nfs_revalidate_inode(NFS_SERVER(inode), inode); + if (!err) + generic_fillattr(inode, stat); + return err; +@@ -930,8 +889,10 @@ int nfs_open(struct inode *inode, struct + auth = NFS_CLIENT(inode)->cl_auth; + cred = rpcauth_lookupcred(auth, 0); + filp->private_data = cred; +- if (filp->f_mode & FMODE_WRITE) ++ if ((filp->f_mode & FMODE_WRITE) != 0) { + nfs_set_mmcred(inode, cred); ++ nfs_begin_data_update(inode); ++ } + return 0; + } + +@@ -940,6 +901,8 @@ int nfs_release(struct inode *inode, str + struct rpc_cred *cred; + + lock_kernel(); ++ if ((filp->f_mode & FMODE_WRITE) != 0) ++ nfs_end_data_update(inode); + cred = nfs_file_cred(filp); + if (cred) + put_rpccred(cred); +@@ -956,6 +919,9 @@ __nfs_revalidate_inode(struct nfs_server + { + int status = -ESTALE; + struct nfs_fattr fattr; ++ struct nfs_inode *nfsi = NFS_I(inode); ++ unsigned long verifier; ++ unsigned int flags; + + dfprintk(PAGECACHE, "NFS: revalidating (%s/%Ld)\n", + inode->i_sb->s_id, (long long)NFS_FILEID(inode)); +@@ -965,23 +931,22 @@ __nfs_revalidate_inode(struct nfs_server + goto out_nowait; + if (NFS_STALE(inode) && inode != inode->i_sb->s_root->d_inode) + goto out_nowait; +- if (NFS_FAKE_ROOT(inode)) { +- dfprintk(VFS, "NFS: not revalidating fake root\n"); +- status = 0; +- goto out_nowait; +- } + + while (NFS_REVALIDATING(inode)) { + status = nfs_wait_on_inode(inode, NFS_INO_REVALIDATING); + if (status < 0) + goto out_nowait; +- if (time_before(jiffies,NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) { +- status = NFS_STALE(inode) ? -ESTALE : 0; +- goto out_nowait; +- } ++ if (NFS_SERVER(inode)->flags & NFS_MOUNT_NOAC) ++ continue; ++ if (NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA|NFS_INO_INVALID_ATIME)) ++ continue; ++ status = NFS_STALE(inode) ? -ESTALE : 0; ++ goto out_nowait; + } + NFS_FLAGS(inode) |= NFS_INO_REVALIDATING; + ++ /* Protect against RPC races by saving the change attribute */ ++ verifier = nfs_save_change_attribute(inode); + status = NFS_PROTO(inode)->getattr(inode, &fattr); + if (status) { + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) getattr failed, error=%d\n", +@@ -995,13 +960,36 @@ __nfs_revalidate_inode(struct nfs_server + goto out; + } + +- status = nfs_refresh_inode(inode, &fattr); ++ status = nfs_update_inode(inode, &fattr, verifier); + if (status) { + dfprintk(PAGECACHE, "nfs_revalidate_inode: (%s/%Ld) refresh failed, error=%d\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode), status); + goto out; + } ++ flags = nfsi->flags; ++ /* ++ * We may need to keep the attributes marked as invalid if ++ * we raced with nfs_end_attr_update(). ++ */ ++ if (verifier == nfsi->cache_change_attribute) ++ nfsi->flags &= ~(NFS_INO_INVALID_ATTR|NFS_INO_INVALID_ATIME); ++ /* Do the page cache invalidation */ ++ if (flags & NFS_INO_INVALID_DATA) { ++ if (S_ISREG(inode->i_mode)) { ++ if (filemap_fdatawrite(inode->i_mapping) == 0) ++ filemap_fdatawait(inode->i_mapping); ++ nfs_wb_all(inode); ++ } ++ nfsi->flags &= ~NFS_INO_INVALID_DATA; ++ invalidate_inode_pages2(inode->i_mapping); ++ memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); ++ dfprintk(PAGECACHE, "NFS: (%s/%Ld) data cache invalidated\n", ++ inode->i_sb->s_id, ++ (long long)NFS_FILEID(inode)); ++ /* This ensures we revalidate dentries */ ++ nfsi->cache_change_attribute++; ++ } + dfprintk(PAGECACHE, "NFS: (%s/%Ld) revalidation complete\n", + inode->i_sb->s_id, + (long long)NFS_FILEID(inode)); +@@ -1009,41 +997,104 @@ __nfs_revalidate_inode(struct nfs_server + NFS_FLAGS(inode) &= ~NFS_INO_STALE; + out: + NFS_FLAGS(inode) &= ~NFS_INO_REVALIDATING; +- wake_up(&NFS_I(inode)->nfs_i_wait); ++ wake_up(&nfsi->nfs_i_wait); + out_nowait: + unlock_kernel(); + return status; + } + +-/* +- * nfs_fattr_obsolete - Test if attribute data is newer than cached data +- * @inode: inode +- * @fattr: attributes to test ++/** ++ * nfs_begin_data_update ++ * @inode - pointer to inode ++ * Declare that a set of operations will update file data on the server ++ */ ++void nfs_begin_data_update(struct inode *inode) ++{ ++ atomic_inc(&NFS_I(inode)->data_updates); ++} ++ ++/** ++ * nfs_end_data_update ++ * @inode - pointer to inode ++ * Declare end of the operations that will update file data ++ */ ++void nfs_end_data_update(struct inode *inode) ++{ ++ struct nfs_inode *nfsi = NFS_I(inode); ++ ++ if (atomic_dec_and_test(&nfsi->data_updates)) { ++ nfsi->cache_change_attribute ++; ++ /* Mark the attribute cache for revalidation */ ++ nfsi->flags |= NFS_INO_INVALID_ATTR; ++ /* Directories and symlinks: invalidate page cache too */ ++ if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) ++ nfsi->flags |= NFS_INO_INVALID_DATA; ++ } ++} ++ ++/** ++ * nfs_refresh_inode - verify consistency of the inode attribute cache ++ * @inode - pointer to inode ++ * @fattr - updated attributes + * +- * Avoid stuffing the attribute cache with obsolete information. +- * We always accept updates if the attribute cache timed out, or if +- * fattr->ctime is newer than our cached value. +- * If fattr->ctime matches the cached value, we still accept the update +- * if it increases the file size. ++ * Verifies the attribute cache. If we have just changed the attributes, ++ * so that fattr carries weak cache consistency data, then it may ++ * also update the ctime/mtime/change_attribute. + */ +-static inline +-int nfs_fattr_obsolete(struct inode *inode, struct nfs_fattr *fattr) ++int nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) + { + struct nfs_inode *nfsi = NFS_I(inode); +- long cdif; ++ loff_t cur_size, new_isize; ++ int data_unstable; ++ ++ /* Are we in the process of updating data on the server? */ ++ data_unstable = nfs_caches_unstable(inode); ++ ++ if (fattr->valid & NFS_ATTR_FATTR_V4) { ++ if ((fattr->valid & NFS_ATTR_PRE_CHANGE) != 0 ++ && nfsi->change_attr == fattr->pre_change_attr) ++ nfsi->change_attr = fattr->change_attr; ++ if (!data_unstable && nfsi->change_attr != fattr->change_attr) ++ nfsi->flags |= NFS_INO_INVALID_ATTR; ++ } ++ ++ if ((fattr->valid & NFS_ATTR_FATTR) == 0) ++ return 0; ++ ++ /* Has the inode gone and changed behind our back? */ ++ if (nfsi->fileid != fattr->fileid ++ || (inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) ++ return -EIO; + +- if (time_after(jiffies, nfsi->read_cache_jiffies + nfsi->attrtimeo)) +- goto out_valid; +- cdif = fattr->ctime.tv_sec - nfsi->read_cache_ctime.tv_sec; +- if (cdif == 0) +- cdif = fattr->ctime.tv_nsec - nfsi->read_cache_ctime.tv_nsec; +- if (cdif > 0) +- goto out_valid; +- /* Ugh... */ +- if (cdif == 0 && fattr->size > nfsi->read_cache_isize) +- goto out_valid; +- return -1; +- out_valid: ++ cur_size = i_size_read(inode); ++ new_isize = nfs_size_to_loff_t(fattr->size); ++ ++ /* If we have atomic WCC data, we may update some attributes */ ++ if ((fattr->valid & NFS_ATTR_WCC) != 0) { ++ if (timespec_equal(&inode->i_ctime, &fattr->pre_ctime)) ++ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); ++ if (timespec_equal(&inode->i_mtime, &fattr->pre_mtime)) ++ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); ++ } ++ ++ /* Verify a few of the more important attributes */ ++ if (!data_unstable) { ++ if (!timespec_equal(&inode->i_mtime, &fattr->mtime) ++ || cur_size != new_isize) ++ nfsi->flags |= NFS_INO_INVALID_ATTR; ++ } else if (S_ISREG(inode->i_mode) && new_isize > cur_size) ++ nfsi->flags |= NFS_INO_INVALID_ATTR; ++ ++ /* Have any file permissions changed? */ ++ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) ++ || inode->i_uid != fattr->uid ++ || inode->i_gid != fattr->gid) ++ nfsi->flags |= NFS_INO_INVALID_ATTR; ++ ++ if (!timespec_equal(&inode->i_atime, &fattr->atime)) ++ nfsi->flags |= NFS_INO_INVALID_ATIME; ++ ++ nfsi->read_cache_jiffies = fattr->timestamp; + return 0; + } + +@@ -1059,20 +1110,22 @@ int nfs_fattr_obsolete(struct inode *ino + * + * A very similar scenario holds for the dir cache. + */ +-int +-__nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) ++static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr, unsigned long verifier) + { + struct nfs_inode *nfsi = NFS_I(inode); + __u64 new_size; + loff_t new_isize; +- int invalid = 0; +- int mtime_update = 0; ++ unsigned int invalid = 0; + loff_t cur_isize; ++ int data_unstable; + +- dfprintk(VFS, "NFS: refresh_inode(%s/%ld ct=%d info=0x%x)\n", +- inode->i_sb->s_id, inode->i_ino, ++ dfprintk(VFS, "NFS: %s(%s/%ld ct=%d info=0x%x)\n", ++ __FUNCTION__, inode->i_sb->s_id, inode->i_ino, + atomic_read(&inode->i_count), fattr->valid); + ++ if ((fattr->valid & NFS_ATTR_FATTR) == 0) ++ return 0; ++ + /* First successful call after mount, fill real data. */ + if (NFS_FAKE_ROOT(inode)) { + dfprintk(VFS, "NFS: updating fake root\n"); +@@ -1081,43 +1134,49 @@ __nfs_refresh_inode(struct inode *inode, + } + + if (nfsi->fileid != fattr->fileid) { +- printk(KERN_ERR "nfs_refresh_inode: inode number mismatch\n" ++ printk(KERN_ERR "%s: inode number mismatch\n" + "expected (%s/0x%Lx), got (%s/0x%Lx)\n", ++ __FUNCTION__, + inode->i_sb->s_id, (long long)nfsi->fileid, + inode->i_sb->s_id, (long long)fattr->fileid); + goto out_err; + } + +- /* Throw out obsolete READDIRPLUS attributes */ +- if (time_before(fattr->timestamp, NFS_READTIME(inode))) +- return 0; + /* + * Make sure the inode's type hasn't changed. + */ + if ((inode->i_mode & S_IFMT) != (fattr->mode & S_IFMT)) + goto out_changed; + +- new_size = fattr->size; +- new_isize = nfs_size_to_loff_t(fattr->size); +- +- /* Avoid races */ +- if (nfs_fattr_obsolete(inode, fattr)) +- goto out_nochange; +- + /* + * Update the read time so we don't revalidate too often. + */ + nfsi->read_cache_jiffies = fattr->timestamp; + +- /* +- * Note: NFS_CACHE_ISIZE(inode) reflects the state of the cache. +- * NOT inode->i_size!!! +- */ +- if (nfsi->read_cache_isize != new_size) { ++ /* Are we racing with known updates of the metadata on the server? */ ++ data_unstable = ! nfs_verify_change_attribute(inode, verifier); ++ ++ /* Check if the file size agrees */ ++ new_size = fattr->size; ++ new_isize = nfs_size_to_loff_t(fattr->size); ++ cur_isize = i_size_read(inode); ++ if (cur_isize != new_size) { + #ifdef NFS_DEBUG_VERBOSE + printk(KERN_DEBUG "NFS: isize change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); + #endif +- invalid = 1; ++ /* ++ * If we have pending writebacks, things can get ++ * messy. ++ */ ++ if (S_ISREG(inode->i_mode) && data_unstable) { ++ if (new_isize > cur_isize) { ++ i_size_write(inode, new_isize); ++ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; ++ } ++ } else { ++ i_size_write(inode, new_isize); ++ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; ++ } + } + + /* +@@ -1125,12 +1184,13 @@ __nfs_refresh_inode(struct inode *inode, + * can change this value in VFS without requiring a + * cache revalidation. + */ +- if (!timespec_equal(&nfsi->read_cache_mtime, &fattr->mtime)) { ++ if (!timespec_equal(&inode->i_mtime, &fattr->mtime)) { ++ memcpy(&inode->i_mtime, &fattr->mtime, sizeof(inode->i_mtime)); + #ifdef NFS_DEBUG_VERBOSE + printk(KERN_DEBUG "NFS: mtime change on %s/%ld\n", inode->i_sb->s_id, inode->i_ino); + #endif +- invalid = 1; +- mtime_update = 1; ++ if (!data_unstable) ++ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + + if ((fattr->valid & NFS_ATTR_FATTR_V4) +@@ -1139,47 +1199,15 @@ __nfs_refresh_inode(struct inode *inode, + printk(KERN_DEBUG "NFS: change_attr change on %s/%ld\n", + inode->i_sb->s_id, inode->i_ino); + #endif +- invalid = 1; +- } +- +- /* Check Weak Cache Consistency data. +- * If size and mtime match the pre-operation values, we can +- * assume that any attribute changes were caused by our NFS +- * operation, so there's no need to invalidate the caches. +- */ +- if ((fattr->valid & NFS_ATTR_PRE_CHANGE) +- && nfsi->change_attr == fattr->pre_change_attr) { +- invalid = 0; +- } +- else if ((fattr->valid & NFS_ATTR_WCC) +- && nfsi->read_cache_isize == fattr->pre_size +- && timespec_equal(&nfsi->read_cache_mtime, &fattr->pre_mtime)) { +- invalid = 0; +- } +- +- /* +- * If we have pending writebacks, things can get +- * messy. +- */ +- cur_isize = i_size_read(inode); +- if (nfs_have_writebacks(inode) && new_isize < cur_isize) +- new_isize = cur_isize; +- +- nfsi->read_cache_ctime = fattr->ctime; +- inode->i_ctime = fattr->ctime; +- inode->i_atime = fattr->atime; +- +- if (mtime_update) { +- if (invalid) +- nfsi->cache_mtime_jiffies = fattr->timestamp; +- nfsi->read_cache_mtime = fattr->mtime; +- inode->i_mtime = fattr->mtime; ++ nfsi->change_attr = fattr->change_attr; ++ if (!data_unstable) ++ invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA; + } + +- nfsi->read_cache_isize = new_size; +- i_size_write(inode, new_isize); ++ memcpy(&inode->i_ctime, &fattr->ctime, sizeof(inode->i_ctime)); ++ memcpy(&inode->i_atime, &fattr->atime, sizeof(inode->i_atime)); + +- if (inode->i_mode != fattr->mode || ++ if ((inode->i_mode & S_IALLUGO) != (fattr->mode & S_IALLUGO) || + inode->i_uid != fattr->uid || + inode->i_gid != fattr->gid) { + struct rpc_cred **cred = &NFS_I(inode)->cache_access.cred; +@@ -1187,15 +1215,17 @@ __nfs_refresh_inode(struct inode *inode, + put_rpccred(*cred); + *cred = NULL; + } ++ invalid |= NFS_INO_INVALID_ATTR; + } + +- if (fattr->valid & NFS_ATTR_FATTR_V4) +- nfsi->change_attr = fattr->change_attr; +- + inode->i_mode = fattr->mode; + inode->i_nlink = fattr->nlink; + inode->i_uid = fattr->uid; + inode->i_gid = fattr->gid; ++#ifdef CONFIG_NFS_V4_ACL ++ nfs4_izap_acl(inode, &nfsi->acl); ++ nfs4_izap_acl(inode, &nfsi->default_acl); ++#endif /* CONFIG_NFS_V4_ACL */ + + if (fattr->valid & (NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4)) { + /* +@@ -1207,31 +1237,30 @@ __nfs_refresh_inode(struct inode *inode, + inode->i_blocks = fattr->du.nfs2.blocks; + inode->i_blksize = fattr->du.nfs2.blocksize; + } +- +- /* Update attrtimeo value */ +- if (invalid) { ++ ++ /* Update attrtimeo value if we're out of the unstable period */ ++ if (invalid & NFS_INO_INVALID_ATTR) { + nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = jiffies; +- invalidate_remote_inode(inode); +- memset(NFS_COOKIEVERF(inode), 0, sizeof(NFS_COOKIEVERF(inode))); + } else if (time_after(jiffies, nfsi->attrtimeo_timestamp+nfsi->attrtimeo)) { + if ((nfsi->attrtimeo <<= 1) > NFS_MAXATTRTIMEO(inode)) + nfsi->attrtimeo = NFS_MAXATTRTIMEO(inode); + nfsi->attrtimeo_timestamp = jiffies; + } ++ /* Don't invalidate the data if we were to blame */ ++ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ++ || S_ISLNK(inode->i_mode))) ++ invalid &= ~NFS_INO_INVALID_DATA; ++ nfsi->flags |= invalid; + + return 0; +- out_nochange: +- if (!timespec_equal(&fattr->atime, &inode->i_atime)) +- inode->i_atime = fattr->atime; +- return 0; + out_changed: + /* + * Big trouble! The inode has become a different object. + */ + #ifdef NFS_PARANOIA +- printk(KERN_DEBUG "nfs_refresh_inode: inode %ld mode changed, %07o to %07o\n", +- inode->i_ino, inode->i_mode, fattr->mode); ++ printk(KERN_DEBUG "%s: inode %ld mode changed, %07o to %07o\n", ++ __FUNCTION__, inode->i_ino, inode->i_mode, fattr->mode); + #endif + /* + * No need to worry about unhashing the dentry, as the +@@ -1355,6 +1384,82 @@ static struct file_system_type nfs_fs_ty + .fs_flags = FS_ODD_RENAME|FS_REVAL_DOT, + }; + ++#ifdef CONFIG_NFS_V4_ACL ++ ++int ++nfs_setxattr(struct dentry *dentry, const char *key, const void *buf, ++ size_t buflen, int flags) ++{ ++ struct posix_acl *acl; ++ int type, error; ++ struct inode *inode = dentry->d_inode; ++ ++ if (strlen(key) == sizeof(XATTR_NAME_ACL_ACCESS) - 1 && ++ memcmp(key, XATTR_NAME_ACL_ACCESS, ++ sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0) ++ type = ACL_TYPE_ACCESS; ++ else if (strlen(key) == sizeof(XATTR_NAME_ACL_DEFAULT) - 1 && ++ memcmp(key, XATTR_NAME_ACL_DEFAULT, ++ sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0) ++ type = ACL_TYPE_DEFAULT; ++ else ++ return (-EINVAL); ++ ++ if (!S_ISREG(inode->i_mode) && ++ (!S_ISDIR(inode->i_mode) || inode->i_mode & S_ISVTX)) ++ return (-EPERM); ++ ++ if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode)) ++ return -EACCES; ++ ++ acl = posix_acl_from_xattr(buf, buflen); ++ if (IS_ERR(acl)) ++ return (PTR_ERR(acl)); ++ if (acl == NULL) ++ return (-ENODATA); ++ ++ error = posix_acl_valid(acl); ++ if (error) ++ goto out_free; ++ ++ error = nfs4_proc_set_posix_acl(inode, type, acl); ++out_free: ++ posix_acl_release(acl); ++ return error; ++} ++ ++ssize_t ++nfs_getxattr(struct dentry *dentry, const char *key, void *buf, ++ size_t buflen) ++{ ++ int type = 0; ++ struct inode *inode = dentry->d_inode; ++ struct posix_acl *acl; ++ ssize_t ret; ++ ++ if (strlen(key) == sizeof(XATTR_NAME_ACL_ACCESS) - 1 && ++ memcmp(key, XATTR_NAME_ACL_ACCESS, ++ sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0) ++ type = ACL_TYPE_ACCESS; ++ else if (strlen(key) == sizeof(XATTR_NAME_ACL_DEFAULT) - 1 && ++ memcmp(key, XATTR_NAME_ACL_DEFAULT, ++ sizeof(XATTR_NAME_ACL_ACCESS) - 1) == 0) ++ type = ACL_TYPE_DEFAULT; ++ else ++ return (-EINVAL); ++ ++ acl = nfs4_proc_get_posix_acl(inode, type); ++ if (IS_ERR(acl)) ++ return (PTR_ERR(acl)); ++ ++ ret = posix_acl_to_xattr(acl, buf, buflen); ++ ++ posix_acl_release(acl); ++ return ret; ++} ++ ++#endif /* CONFIG_NFS_V4_ACL */ ++ + #ifdef CONFIG_NFS_V4 + + static void nfs4_clear_inode(struct inode *); +@@ -1601,7 +1706,7 @@ static struct super_block *nfs4_get_sb(s + + if (data->version != NFS4_MOUNT_VERSION) { + printk("nfs warning: mount version %s than kernel\n", +- data->version < NFS_MOUNT_VERSION ? "older" : "newer"); ++ data->version < NFS4_MOUNT_VERSION ? "older" : "newer"); + } + + p = nfs_copy_user_string(NULL, &data->hostname, 256); +@@ -1699,6 +1804,10 @@ static struct inode *nfs_alloc_inode(str + return NULL; + nfsi->flags = 0; + nfsi->mm_cred = NULL; ++#ifdef CONFIG_NFS_V4_ACL ++ nfsi->acl = NFS4_ACL_NOT_CACHED; ++ nfsi->default_acl = NFS4_ACL_NOT_CACHED; ++#endif /* CONFIG_NFS_V4_ACL */ + nfs4_zero_state(nfsi); + return &nfsi->vfs_inode; + } +@@ -1718,6 +1827,7 @@ static void init_once(void * foo, kmem_c + INIT_LIST_HEAD(&nfsi->dirty); + INIT_LIST_HEAD(&nfsi->commit); + INIT_RADIX_TREE(&nfsi->nfs_page_tree, GFP_ATOMIC); ++ atomic_set(&nfsi->data_updates, 0); + nfsi->ndirty = 0; + nfsi->ncommit = 0; + nfsi->npages = 0; +diff -puN fs/nfs/nfs3proc.c~CITI_NFS4_ALL fs/nfs/nfs3proc.c +--- linux-2.6.3/fs/nfs/nfs3proc.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/nfs3proc.c 2004-02-19 16:47:07.000000000 -0500 +@@ -68,20 +68,6 @@ nfs3_async_handle_jukebox(struct rpc_tas + return 1; + } + +-static void +-nfs3_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +-{ +- if (fattr->valid & NFS_ATTR_FATTR) { +- if (!(fattr->valid & NFS_ATTR_WCC)) { +- fattr->pre_size = NFS_CACHE_ISIZE(inode); +- fattr->pre_mtime = NFS_CACHE_MTIME(inode); +- fattr->pre_ctime = NFS_CACHE_CTIME(inode); +- fattr->valid |= NFS_ATTR_WCC; +- } +- nfs_refresh_inode(inode, fattr); +- } +-} +- + static struct rpc_cred * + nfs_cred(struct inode *inode, struct file *filp) + { +@@ -99,14 +85,18 @@ nfs_cred(struct inode *inode, struct fil + */ + static int + nfs3_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, +- struct nfs_fattr *fattr) ++ struct nfs_fsinfo *info) + { + int status; + +- dprintk("NFS call getroot\n"); +- fattr->valid = 0; +- status = rpc_call(server->client, NFS3PROC_GETATTR, fhandle, fattr, 0); +- dprintk("NFS reply getroot\n"); ++ dprintk("%s: call fsinfo\n", __FUNCTION__); ++ info->fattr->valid = 0; ++ status = rpc_call(server->client_sys, NFS3PROC_FSINFO, fhandle, info, 0); ++ dprintk("%s: reply fsinfo %d\n", __FUNCTION__, status); ++ if (!(info->fattr->valid & NFS_ATTR_FATTR)) { ++ status = rpc_call(server->client_sys, NFS3PROC_GETATTR, fhandle, info->fattr, 0); ++ dprintk("%s: reply getattr %d\n", __FUNCTION__, status); ++ } + return status; + } + +@@ -280,7 +270,7 @@ nfs3_proc_write(struct nfs_write_data *w + msg.rpc_cred = nfs_cred(inode, filp); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags); + if (status >= 0) +- nfs3_write_refresh_inode(inode, fattr); ++ nfs_refresh_inode(inode, fattr); + dprintk("NFS reply write: %d\n", status); + return status < 0? status : wdata->res.count; + } +@@ -303,7 +293,7 @@ nfs3_proc_commit(struct nfs_write_data * + msg.rpc_cred = nfs_cred(inode, filp); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + if (status >= 0) +- nfs3_write_refresh_inode(inode, fattr); ++ nfs_refresh_inode(inode, fattr); + dprintk("NFS reply commit: %d\n", status); + return status; + } +@@ -777,12 +767,13 @@ nfs3_proc_read_setup(struct nfs_read_dat + static void + nfs3_write_done(struct rpc_task *task) + { +- struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; ++ struct nfs_write_data *data; + + if (nfs3_async_handle_jukebox(task)) + return; ++ data = (struct nfs_write_data *)task->tk_calldata; + if (task->tk_status >= 0) +- nfs3_write_refresh_inode(data->inode, data->res.fattr); ++ nfs_refresh_inode(data->inode, data->res.fattr); + nfs_writeback_done(task); + } + +@@ -835,12 +826,13 @@ nfs3_proc_write_setup(struct nfs_write_d + static void + nfs3_commit_done(struct rpc_task *task) + { +- struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; ++ struct nfs_write_data *data; + + if (nfs3_async_handle_jukebox(task)) + return; ++ data = (struct nfs_write_data *)task->tk_calldata; + if (task->tk_status >= 0) +- nfs3_write_refresh_inode(data->inode, data->res.fattr); ++ nfs_refresh_inode(data->inode, data->res.fattr); + nfs_commit_done(task); + } + +@@ -907,6 +899,7 @@ struct nfs_rpc_ops nfs_v3_clientops = { + .version = 3, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, ++ .file_inode_ops = &nfs_file_inode_operations, + .getroot = nfs3_proc_get_root, + .getattr = nfs3_proc_getattr, + .setattr = nfs3_proc_setattr, +diff -puN fs/nfs/nfs4proc.c~CITI_NFS4_ALL fs/nfs/nfs4proc.c +--- linux-2.6.3/fs/nfs/nfs4proc.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/nfs4proc.c 2004-02-19 16:47:15.000000000 -0500 +@@ -46,112 +46,20 @@ + #include + #include + #include ++#include ++#include + + #define NFSDBG_FACILITY NFSDBG_PROC + + #define NFS4_POLL_RETRY_TIME (15*HZ) + +-#define GET_OP(cp,name) &cp->ops[cp->req_nops].u.name +-#define OPNUM(cp) cp->ops[cp->req_nops].opnum +- ++static int nfs4_proc_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); + static int nfs4_async_handle_error(struct rpc_task *, struct nfs_server *); + extern u32 *nfs4_decode_dirent(u32 *p, struct nfs_entry *entry, int plus); + extern struct rpc_procinfo nfs4_procedures[]; + + extern nfs4_stateid zero_stateid; + +-static void +-nfs4_setup_compound(struct nfs4_compound *cp, struct nfs4_op *ops, +- struct nfs_server *server, char *tag) +-{ +- memset(cp, 0, sizeof(*cp)); +- cp->ops = ops; +- cp->server = server; +-} +- +-static void +-nfs4_setup_access(struct nfs4_compound *cp, u32 req_access, u32 *resp_supported, u32 *resp_access) +-{ +- struct nfs4_access *access = GET_OP(cp, access); +- +- access->ac_req_access = req_access; +- access->ac_resp_supported = resp_supported; +- access->ac_resp_access = resp_access; +- +- OPNUM(cp) = OP_ACCESS; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_create_dir(struct nfs4_compound *cp, struct qstr *name, +- struct iattr *sattr, struct nfs4_change_info *info) +-{ +- struct nfs4_create *create = GET_OP(cp, create); +- +- create->cr_ftype = NF4DIR; +- create->cr_namelen = name->len; +- create->cr_name = name->name; +- create->cr_attrs = sattr; +- create->cr_cinfo = info; +- +- OPNUM(cp) = OP_CREATE; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_create_symlink(struct nfs4_compound *cp, struct qstr *name, +- struct qstr *linktext, struct iattr *sattr, +- struct nfs4_change_info *info) +-{ +- struct nfs4_create *create = GET_OP(cp, create); +- +- create->cr_ftype = NF4LNK; +- create->cr_textlen = linktext->len; +- create->cr_text = linktext->name; +- create->cr_namelen = name->len; +- create->cr_name = name->name; +- create->cr_attrs = sattr; +- create->cr_cinfo = info; +- +- OPNUM(cp) = OP_CREATE; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_create_special(struct nfs4_compound *cp, struct qstr *name, +- dev_t dev, struct iattr *sattr, +- struct nfs4_change_info *info) +-{ +- int mode = sattr->ia_mode; +- struct nfs4_create *create = GET_OP(cp, create); +- +- BUG_ON(!(sattr->ia_valid & ATTR_MODE)); +- BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); +- +- if (S_ISFIFO(mode)) +- create->cr_ftype = NF4FIFO; +- else if (S_ISBLK(mode)) { +- create->cr_ftype = NF4BLK; +- create->cr_specdata1 = MAJOR(dev); +- create->cr_specdata2 = MINOR(dev); +- } +- else if (S_ISCHR(mode)) { +- create->cr_ftype = NF4CHR; +- create->cr_specdata1 = MAJOR(dev); +- create->cr_specdata2 = MINOR(dev); +- } +- else +- create->cr_ftype = NF4SOCK; +- +- create->cr_namelen = name->len; +- create->cr_name = name->name; +- create->cr_attrs = sattr; +- create->cr_cinfo = info; +- +- OPNUM(cp) = OP_CREATE; +- cp->req_nops++; +-} +- + /* + * This is our standard bitmap for GETATTR requests. + */ +@@ -181,126 +89,15 @@ u32 nfs4_statfs_bitmap[2] = { + | FATTR4_WORD1_SPACE_TOTAL + }; + +-u32 nfs4_pathconf_bitmap[2] = { +- FATTR4_WORD0_MAXLINK +- | FATTR4_WORD0_MAXNAME, +- 0 +-}; +- +-static inline void +-__nfs4_setup_getattr(struct nfs4_compound *cp, u32 *bitmap, +- struct nfs_fattr *fattr, +- struct nfs_fsstat *fsstat, +- struct nfs_pathconf *pathconf) +-{ +- struct nfs4_getattr *getattr = GET_OP(cp, getattr); +- +- getattr->gt_bmval = bitmap; +- getattr->gt_attrs = fattr; +- getattr->gt_fsstat = fsstat; +- getattr->gt_pathconf = pathconf; +- +- OPNUM(cp) = OP_GETATTR; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_getattr(struct nfs4_compound *cp, +- struct nfs_fattr *fattr) +-{ +- __nfs4_setup_getattr(cp, nfs4_fattr_bitmap, fattr, +- NULL, NULL); +-} +- +-static void +-nfs4_setup_statfs(struct nfs4_compound *cp, +- struct nfs_fsstat *fsstat) +-{ +- __nfs4_setup_getattr(cp, nfs4_statfs_bitmap, +- NULL, fsstat, NULL); +-} +- +-static void +-nfs4_setup_pathconf(struct nfs4_compound *cp, +- struct nfs_pathconf *pathconf) +-{ +- __nfs4_setup_getattr(cp, nfs4_pathconf_bitmap, +- NULL, NULL, pathconf); +-} +- +-static void +-nfs4_setup_getfh(struct nfs4_compound *cp, struct nfs_fh *fhandle) +-{ +- struct nfs4_getfh *getfh = GET_OP(cp, getfh); +- +- getfh->gf_fhandle = fhandle; +- +- OPNUM(cp) = OP_GETFH; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_link(struct nfs4_compound *cp, struct qstr *name, +- struct nfs4_change_info *info) +-{ +- struct nfs4_link *link = GET_OP(cp, link); +- +- link->ln_namelen = name->len; +- link->ln_name = name->name; +- link->ln_cinfo = info; +- +- OPNUM(cp) = OP_LINK; +- cp->req_nops++; +-} +- + static void +-nfs4_setup_lookup(struct nfs4_compound *cp, struct qstr *q) +-{ +- struct nfs4_lookup *lookup = GET_OP(cp, lookup); +- +- lookup->lo_name = q; +- +- OPNUM(cp) = OP_LOOKUP; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_putfh(struct nfs4_compound *cp, struct nfs_fh *fhandle) +-{ +- struct nfs4_putfh *putfh = GET_OP(cp, putfh); +- +- putfh->pf_fhandle = fhandle; +- +- OPNUM(cp) = OP_PUTFH; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_putrootfh(struct nfs4_compound *cp) +-{ +- OPNUM(cp) = OP_PUTROOTFH; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_readdir(struct nfs4_compound *cp, u64 cookie, u32 *verifier, +- struct page **pages, unsigned int bufsize, struct dentry *dentry) ++nfs4_setup_readdir(u64 cookie, u32 *verifier, struct dentry *dentry, struct nfs4_readdir_arg *readdir) + { + u32 *start, *p; +- struct nfs4_readdir *readdir = GET_OP(cp, readdir); + +- BUG_ON(bufsize < 80); +- readdir->rd_cookie = (cookie > 2) ? cookie : 0; +- memcpy(&readdir->rd_req_verifier, verifier, sizeof(readdir->rd_req_verifier)); +- readdir->rd_count = bufsize; +- readdir->rd_bmval[0] = FATTR4_WORD0_FILEID; +- readdir->rd_bmval[1] = 0; +- readdir->rd_pages = pages; +- readdir->rd_pgbase = 0; ++ BUG_ON(readdir->count < 80); ++ readdir->cookie = (cookie > 2) ? cookie : 0; ++ memcpy(&readdir->req_verifier, verifier, sizeof(readdir->req_verifier)); + +- OPNUM(cp) = OP_READDIR; +- cp->req_nops++; +- + if (cookie >= 2) + return; + +@@ -311,7 +108,7 @@ nfs4_setup_readdir(struct nfs4_compound + * when talking to the server, we always send cookie 0 + * instead of 1 or 2. + */ +- start = p = (u32 *)kmap_atomic(*pages, KM_USER0); ++ start = p = (u32 *)kmap_atomic(*readdir->pages, KM_USER0); + + if (cookie == 0) { + *p++ = xdr_one; /* next */ +@@ -337,68 +134,12 @@ nfs4_setup_readdir(struct nfs4_compound + *p++ = htonl(8); /* attribute buffer length */ + p = xdr_encode_hyper(p, NFS_FILEID(dentry->d_parent->d_inode)); + +- readdir->rd_pgbase = (char *)p - (char *)start; +- readdir->rd_count -= readdir->rd_pgbase; ++ readdir->pgbase = (char *)p - (char *)start; ++ readdir->count -= readdir->pgbase; + kunmap_atomic(start, KM_USER0); + } + + static void +-nfs4_setup_readlink(struct nfs4_compound *cp, int count, struct page **pages) +-{ +- struct nfs4_readlink *readlink = GET_OP(cp, readlink); +- +- readlink->rl_count = count; +- readlink->rl_pages = pages; +- +- OPNUM(cp) = OP_READLINK; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_remove(struct nfs4_compound *cp, struct qstr *name, struct nfs4_change_info *cinfo) +-{ +- struct nfs4_remove *remove = GET_OP(cp, remove); +- +- remove->rm_namelen = name->len; +- remove->rm_name = name->name; +- remove->rm_cinfo = cinfo; +- +- OPNUM(cp) = OP_REMOVE; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_rename(struct nfs4_compound *cp, struct qstr *old, struct qstr *new, +- struct nfs4_change_info *old_cinfo, struct nfs4_change_info *new_cinfo) +-{ +- struct nfs4_rename *rename = GET_OP(cp, rename); +- +- rename->rn_oldnamelen = old->len; +- rename->rn_oldname = old->name; +- rename->rn_newnamelen = new->len; +- rename->rn_newname = new->name; +- rename->rn_src_cinfo = old_cinfo; +- rename->rn_dst_cinfo = new_cinfo; +- +- OPNUM(cp) = OP_RENAME; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_restorefh(struct nfs4_compound *cp) +-{ +- OPNUM(cp) = OP_RESTOREFH; +- cp->req_nops++; +-} +- +-static void +-nfs4_setup_savefh(struct nfs4_compound *cp) +-{ +- OPNUM(cp) = OP_SAVEFH; +- cp->req_nops++; +-} +- +-static void + renew_lease(struct nfs_server *server, unsigned long timestamp) + { + struct nfs4_client *clp = server->nfs4_state; +@@ -409,47 +150,6 @@ renew_lease(struct nfs_server *server, u + } + + static inline void +-process_lease(struct nfs4_compound *cp) +-{ +- /* +- * Generic lease processing: If this operation contains a +- * lease-renewing operation, and it succeeded, update the RENEW time +- * in the superblock. Instead of the current time, we use the time +- * when the request was sent out. (All we know is that the lease was +- * renewed sometime between then and now, and we have to assume the +- * worst case.) +- * +- * Notes: +- * (1) renewd doesn't acquire the spinlock when messing with +- * server->last_renewal; this is OK since rpciod always runs +- * under the BKL. +- * (2) cp->timestamp was set at the end of XDR encode. +- */ +- if (!cp->renew_index) +- return; +- if (!cp->toplevel_status || cp->resp_nops > cp->renew_index) +- renew_lease(cp->server, cp->timestamp); +-} +- +-static int +-nfs4_call_compound(struct nfs4_compound *cp, struct rpc_cred *cred, int flags) +-{ +- int status; +- struct rpc_message msg = { +- .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND], +- .rpc_argp = cp, +- .rpc_resp = cp, +- .rpc_cred = cred, +- }; +- +- status = rpc_call_sync(cp->server->client, &msg, flags); +- if (!status) +- process_lease(cp); +- +- return status; +-} +- +-static inline void + process_cinfo(struct nfs4_change_info *info, struct nfs_fattr *fattr) + { + BUG_ON((fattr->valid & NFS_ATTR_FATTR) == 0); +@@ -476,11 +176,6 @@ nfs4_open_reclaim(struct nfs4_state_owne + .valid = 0, + }; + struct nfs4_change_info d_cinfo; +- struct nfs4_getattr f_getattr = { +- .gt_bmval = nfs4_fattr_bitmap, +- .gt_attrs = &fattr, +- }; +- + struct nfs_open_reclaimargs o_arg = { + .fh = NFS_FH(inode), + .seqid = sp->so_seqid, +@@ -488,11 +183,10 @@ nfs4_open_reclaim(struct nfs4_state_owne + .share_access = state->state, + .clientid = server->nfs4_state->cl_clientid, + .claim = NFS4_OPEN_CLAIM_PREVIOUS, +- .f_getattr = &f_getattr, + }; + struct nfs_openres o_res = { +- .cinfo = &d_cinfo, +- .f_getattr = &f_getattr, ++ .cinfo = &d_cinfo, ++ .f_attr = &fattr, + .server = server, /* Grrr */ + }; + struct rpc_message msg = { +@@ -528,28 +222,18 @@ nfs4_do_open(struct inode *dir, struct q + struct nfs_fattr f_attr = { + .valid = 0, + }; +- struct nfs4_getattr f_getattr = { +- .gt_bmval = nfs4_fattr_bitmap, +- .gt_attrs = &f_attr, +- }; +- struct nfs4_getattr d_getattr = { +- .gt_bmval = nfs4_fattr_bitmap, +- .gt_attrs = &d_attr, +- }; + struct nfs_openargs o_arg = { + .fh = NFS_FH(dir), + .share_access = flags & (FMODE_READ|FMODE_WRITE), + .opentype = (flags & O_CREAT) ? NFS4_OPEN_CREATE : NFS4_OPEN_NOCREATE, + .createmode = (flags & O_EXCL) ? NFS4_CREATE_EXCLUSIVE : NFS4_CREATE_UNCHECKED, + .name = name, +- .f_getattr = &f_getattr, +- .d_getattr = &d_getattr, + .server = server, + }; + struct nfs_openres o_res = { + .cinfo = &d_cinfo, +- .f_getattr = &f_getattr, +- .d_getattr = &d_getattr, ++ .f_attr = &f_attr, ++ .d_attr = &d_attr, + .server = server, + }; + struct rpc_message msg = { +@@ -665,18 +349,14 @@ nfs4_do_setattr(struct nfs_server *serve + struct nfs_fh *fhandle, struct iattr *sattr, + struct nfs4_state *state) + { +- struct nfs4_getattr getattr = { +- .gt_bmval = nfs4_fattr_bitmap, +- .gt_attrs = fattr, +- }; + struct nfs_setattrargs arg = { + .fh = fhandle, + .iap = sattr, +- .attr = &getattr, ++ .fattr = fattr, + .server = server, + }; + struct nfs_setattrres res = { +- .attr = &getattr, ++ .fattr = fattr, + .server = server, + }; + struct rpc_message msg = { +@@ -822,27 +502,43 @@ nfs4_open_revalidate(struct inode *dir, + + static int + nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, +- struct nfs_fattr *fattr) ++ struct nfs_fsinfo *info) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[4]; ++ struct nfs_fattr * fattr = info->fattr; + unsigned char * p; + struct qstr q; + int status; ++ struct nfs4_getroot_arg args = { ++ .fhandle = fhandle, ++ .name = &q, ++ }; ++ struct nfs4_getroot_res res = { ++ .server = server, ++ .fattr = fattr, ++ .fhandle = fhandle, ++ }; ++ struct rpc_message msg_head = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETROOT_HEAD], ++ .rpc_argp = NULL, ++ .rpc_resp = &res, ++ }; ++ struct rpc_message msg_path = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETROOT_PATH], ++ .rpc_argp = &args, ++ .rpc_resp = &res, ++ }; + + /* + * Now we do a separate LOOKUP for each component of the mount path. + * The LOOKUPs are done separately so that we can conveniently + * catch an ERR_WRONGSEC if it occurs along the way... + */ +- p = server->mnt_path; + fattr->valid = 0; +- nfs4_setup_compound(&compound, ops, server, "getrootfh"); +- nfs4_setup_putrootfh(&compound); +- nfs4_setup_getattr(&compound, fattr); +- nfs4_setup_getfh(&compound, fhandle); +- if ((status = nfs4_call_compound(&compound, NULL, 0))) ++ status = rpc_call_sync(server->client, &msg_head, 0); ++ if (status) + goto out; ++ ++ p = server->mnt_path; + for (;;) { + while (*p == '/') + p++; +@@ -854,12 +550,7 @@ nfs4_proc_get_root(struct nfs_server *se + q.len = p - q.name; + + fattr->valid = 0; +- nfs4_setup_compound(&compound, ops, server, "mount"); +- nfs4_setup_putfh(&compound, fhandle); +- nfs4_setup_lookup(&compound, &q); +- nfs4_setup_getattr(&compound, fattr); +- nfs4_setup_getfh(&compound, fhandle); +- status = nfs4_call_compound(&compound, NULL, 0); ++ status = rpc_call_sync(server->client,&msg_path,0); + if (!status) + continue; + if (status == -ENOENT) { +@@ -869,21 +560,27 @@ nfs4_proc_get_root(struct nfs_server *se + break; + } + out: +- return status; ++ if (status) ++ return status; ++ return nfs4_proc_fsinfo(server, fhandle, info); + } + + static int + nfs4_proc_getattr(struct inode *inode, struct nfs_fattr *fattr) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[2]; +- ++ struct nfs4_getattr_res res = { ++ .fattr = fattr, ++ .server = NFS_SERVER(inode), ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETATTR], ++ .rpc_argp = NFS_FH(inode), ++ .rpc_resp = &res, ++ }; ++ + fattr->valid = 0; + +- nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "getattr"); +- nfs4_setup_putfh(&compound, NFS_FH(inode)); +- nfs4_setup_getattr(&compound, fattr); +- return nfs4_call_compound(&compound, NULL, 0); ++ return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + } + + /* +@@ -945,26 +642,218 @@ out: + return status; + } + ++#ifdef CONFIG_NFS_V4_ACL ++ ++static inline int ++nfs_name_to_uid_wrapper(void *arg, const char *name, size_t len, __u32 *id) ++{ ++ return nfs_map_name_to_uid((struct nfs4_client *)arg, name, len, id); ++} ++ ++static inline int ++nfs_name_to_gid_wrapper(void *arg, const char *name, size_t len, __u32 *id) ++{ ++ return nfs_map_group_to_gid((struct nfs4_client*)arg, name, len, id); ++} ++ ++static inline int ++nfs_uid_to_name_wrapper(void *arg, __u32 id, char *name) ++{ ++ return nfs_map_uid_to_name((struct nfs4_client *)arg, id, name); ++} ++ ++static inline int ++nfs_gid_to_name_wrapper(void *arg, __u32 id, char *name) ++{ ++ return nfs_map_gid_to_group((struct nfs4_client *)arg, id, name); ++} ++ ++static struct nfs4_acl_idmapper nfs4_idmapper = { ++ .name2uid = nfs_name_to_uid_wrapper, ++ .name2gid = nfs_name_to_gid_wrapper, ++ .uid2name = nfs_uid_to_name_wrapper, ++ .gid2name = nfs_gid_to_name_wrapper, ++}; ++ ++/* From fs/ext2/acl.c: */ ++ ++static inline struct posix_acl * ++nfs4_iget_acl(struct inode *inode, struct posix_acl **i_acl) ++{ ++ struct posix_acl *acl = NFS4_ACL_NOT_CACHED; ++ ++ spin_lock(&inode->i_lock); ++ if (*i_acl != NFS4_ACL_NOT_CACHED) ++ acl = posix_acl_dup(*i_acl); ++ spin_unlock(&inode->i_lock); ++ return acl; ++} ++ ++void ++nfs4_iset_acl(struct inode *inode, struct posix_acl **i_acl, ++ struct posix_acl *acl) ++{ ++ spin_lock(&inode->i_lock); ++ if (*i_acl != NFS4_ACL_NOT_CACHED) ++ posix_acl_release(*i_acl); ++ *i_acl = posix_acl_dup(acl); ++ spin_unlock(&inode->i_lock); ++} ++ ++void ++nfs4_izap_acl(struct inode *inode, struct posix_acl **i_acl) ++{ ++ spin_lock(&inode->i_lock); ++ if (*i_acl != NFS4_ACL_NOT_CACHED) ++ posix_acl_release(*i_acl); ++ *i_acl = NFS4_ACL_NOT_CACHED; ++ spin_unlock(&inode->i_lock); ++} ++ ++struct posix_acl * ++nfs4_proc_get_posix_acl(struct inode *inode, int type) ++{ ++ struct nfs4_acl *acl = NULL; ++ int error; ++ struct posix_acl *pacl, *dpacl, *ret = NULL; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], ++ .rpc_argp = NFS_FH(inode), ++ .rpc_resp = &acl, ++ }; ++ ++ lock_kernel(); ++ error = nfs_revalidate_inode(NFS_SERVER(inode), inode); ++ if (error < 0) { ++ unlock_kernel(); ++ return ERR_PTR(error); ++ } ++ if (type == ACL_TYPE_ACCESS) ++ ret = nfs4_iget_acl(inode, &NFS_I(inode)->acl); ++ else ++ ret = nfs4_iget_acl(inode, &NFS_I(inode)->default_acl); ++ ++ if (ret != NFS4_ACL_NOT_CACHED) { ++ if (ret == NULL) ++ ret = ERR_PTR(-ENODATA); ++ unlock_kernel(); ++ return ret; ++ } ++ ++ error = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); ++ unlock_kernel(); ++ ++ if (error < 0) ++ goto out_free; ++ ++ error = -ENODATA; ++ if (acl == NULL) ++ goto out_free; ++ ++ error = nfs4_acl_nfsv4_to_posix(&nfs4_idmapper, NFS_SERVER(inode)->nfs4_state, acl, &pacl, &dpacl); ++ if (error < 0) ++ goto out_free; ++ ++ error = -ERANGE; ++ if (pacl && pacl->a_count > NFS_ACL_MAX_ENTRIES) ++ goto out_free; ++ if (dpacl && dpacl->a_count > NFS_ACL_MAX_ENTRIES) ++ goto out_free; ++ ++ nfs4_iset_acl(inode, &NFS_I(inode)->acl, pacl); ++ nfs4_iset_acl(inode, &NFS_I(inode)->default_acl, dpacl); ++ ++ ret = (type == ACL_TYPE_ACCESS) ? pacl : dpacl; ++ error = -ENODATA; ++ if (ret == NULL) ++ goto out_free; ++ error = 0; ++out_free: ++ if (error < 0) ++ ret = ERR_PTR(error); ++ nfs4_acl_free(acl); ++ return ret; ++} ++ ++int ++nfs4_proc_set_posix_acl(struct inode *inode, int type, struct posix_acl *pacl) ++{ ++ struct iattr ia; ++ struct nfs4_acl *acl; ++ struct nfs_fattr fattr; ++ int error; ++ struct nfs_setaclargs arg = { ++ .fh = NFS_FH(inode), ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SETACL], ++ .rpc_argp = &arg, ++ .rpc_resp = NULL, ++ }; ++ ++ ia.ia_valid = 0; ++ fattr.valid = 0; ++ ++ if (pacl && pacl->a_count > NFS_ACL_MAX_ENTRIES) ++ return -ERANGE; ++ ++ if (type == ACL_TYPE_ACCESS) ++ acl = nfs4_acl_posix_to_nfsv4(&nfs4_idmapper, NFS_SERVER(inode)->nfs4_state, pacl, NULL); ++ else ++ acl = nfs4_acl_posix_to_nfsv4(&nfs4_idmapper, NFS_SERVER(inode)->nfs4_state, NULL, pacl); ++ if (IS_ERR(acl)) ++ return PTR_ERR(acl); ++ arg.acl = acl; ++ ++ lock_kernel(); ++ error = rpc_call_sync(NFS_SERVER(inode)->client, &msg, 0); ++ unlock_kernel(); ++ ++ nfs4_acl_free(acl); ++ ++ if (error) ++ return error; ++ ++ if (type == ACL_TYPE_ACCESS) ++ nfs4_iset_acl(inode, &NFS_I(inode)->acl, pacl); ++ else ++ nfs4_iset_acl(inode, &NFS_I(inode)->default_acl, pacl); ++ ++ if (type == ACL_TYPE_ACCESS) ++ posix_acl_equiv_mode(pacl, &inode->i_mode); ++ ++ return error; ++} ++ ++#endif /* CONFIG_NFS_V4_ACL */ ++ + static int + nfs4_proc_lookup(struct inode *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[5]; +- struct nfs_fattr dir_attr; +- int status; +- ++ struct nfs_fattr dir_attr; ++ int status; ++ struct nfs4_lookupargs args = { ++ .dir_fh = NFS_FH(dir), ++ .name = name, ++ }; ++ struct nfs4_lookupres res = { ++ .server = NFS_SERVER(dir), ++ .dirattr = &dir_attr, ++ .fattr = fattr, ++ .fhandle = fhandle, ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LOOKUP], ++ .rpc_argp = &args, ++ .rpc_resp = &res, ++ }; ++ + dir_attr.valid = 0; + fattr->valid = 0; + + dprintk("NFS call lookup %s\n", name->name); +- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "lookup"); +- nfs4_setup_putfh(&compound, NFS_FH(dir)); +- nfs4_setup_getattr(&compound, &dir_attr); +- nfs4_setup_lookup(&compound, name); +- nfs4_setup_getattr(&compound, fattr); +- nfs4_setup_getfh(&compound, fhandle); +- status = nfs4_call_compound(&compound, NULL, 0); ++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + dprintk("NFS reply lookup: %d\n", status); + + if (status >= 0) +@@ -975,11 +864,24 @@ nfs4_proc_lookup(struct inode *dir, stru + static int + nfs4_proc_access(struct inode *inode, struct rpc_cred *cred, int mode) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[3]; + struct nfs_fattr fattr; + u32 req_access = 0, resp_supported, resp_access; + int status; ++ struct nfs4_accessargs args = { ++ .fhandle = NFS_FH(inode), ++ }; ++ struct nfs4_accessres res = { ++ .server = NFS_SERVER(inode), ++ .fattr = &fattr, ++ .resp_supported = &resp_supported, ++ .resp_access = &resp_access, ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ACCESS], ++ .rpc_argp = &args, ++ .rpc_resp = &res, ++ .rpc_cred = cred, ++ }; + + fattr.valid = 0; + +@@ -1000,12 +902,9 @@ nfs4_proc_access(struct inode *inode, st + if (mode & MAY_EXEC) + req_access |= NFS4_ACCESS_EXECUTE; + } ++ res.req_access = args.req_access = req_access; + +- nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "access"); +- nfs4_setup_putfh(&compound, NFS_FH(inode)); +- nfs4_setup_getattr(&compound, &fattr); +- nfs4_setup_access(&compound, req_access, &resp_supported, &resp_access); +- status = nfs4_call_compound(&compound, cred, 0); ++ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + nfs_refresh_inode(inode, &fattr); + + if (!status) { +@@ -1046,13 +945,18 @@ nfs4_proc_access(struct inode *inode, st + static int + nfs4_proc_readlink(struct inode *inode, struct page *page) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[2]; ++ struct nfs4_readlink args = { ++ .fh = NFS_FH(inode), ++ .count = PAGE_CACHE_SIZE, ++ .pages = &page, ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READLINK], ++ .rpc_argp = &args, ++ .rpc_resp = NULL, ++ }; + +- nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "readlink"); +- nfs4_setup_putfh(&compound, NFS_FH(inode)); +- nfs4_setup_readlink(&compound, PAGE_CACHE_SIZE, &page); +- return nfs4_call_compound(&compound, NULL, 0); ++ return rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + } + + static int +@@ -1088,12 +992,8 @@ nfs4_proc_read(struct nfs_read_data *rda + + fattr->valid = 0; + status = rpc_call_sync(server->client, &msg, flags); +- if (!status) { ++ if (!status) + renew_lease(server, timestamp); +- /* Check cache consistency */ +- if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) +- nfs_zap_caches(inode); +- } + dprintk("NFS reply read: %d\n", status); + return status; + } +@@ -1130,7 +1030,6 @@ nfs4_proc_write(struct nfs_write_data *w + + fattr->valid = 0; + status = rpc_call_sync(server->client, &msg, rpcflags); +- NFS_CACHEINV(inode); + dprintk("NFS reply write: %d\n", status); + return status; + } +@@ -1217,18 +1116,26 @@ nfs4_proc_create(struct inode *dir, stru + static int + nfs4_proc_remove(struct inode *dir, struct qstr *name) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[3]; + struct nfs4_change_info dir_cinfo; + struct nfs_fattr dir_attr; + int status; ++ struct nfs4_remove_arg args = { ++ .fhandle = NFS_FH(dir), ++ .name = name, ++ }; ++ struct nfs4_remove_res res = { ++ .server = NFS_SERVER(dir), ++ .dir_cinfo = &dir_cinfo, ++ .dir_attr = &dir_attr, ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_REMOVE], ++ .rpc_argp = &args, ++ .rpc_resp = &res, ++ }; + + dir_attr.valid = 0; +- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "remove"); +- nfs4_setup_putfh(&compound, NFS_FH(dir)); +- nfs4_setup_remove(&compound, name, &dir_cinfo); +- nfs4_setup_getattr(&compound, &dir_attr); +- status = nfs4_call_compound(&compound, NULL, 0); ++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + if (!status) { + process_cinfo(&dir_cinfo, &dir_attr); +@@ -1237,32 +1144,22 @@ nfs4_proc_remove(struct inode *dir, stru + return status; + } + +-struct unlink_desc { +- struct nfs4_compound compound; +- struct nfs4_op ops[3]; +- struct nfs4_change_info cinfo; +- struct nfs_fattr attrs; +-}; +- + static int + nfs4_proc_unlink_setup(struct rpc_message *msg, struct dentry *dir, struct qstr *name) + { +- struct unlink_desc * up; +- struct nfs4_compound * cp; ++ struct nfs4_unlink *up; + +- up = (struct unlink_desc *) kmalloc(sizeof(*up), GFP_KERNEL); ++ up = (struct nfs4_unlink *) kmalloc(sizeof(*up), GFP_KERNEL); + if (!up) + return -ENOMEM; +- cp = &up->compound; + +- nfs4_setup_compound(cp, up->ops, NFS_SERVER(dir->d_inode), "unlink_setup"); +- nfs4_setup_putfh(cp, NFS_FH(dir->d_inode)); +- nfs4_setup_remove(cp, name, &up->cinfo); +- nfs4_setup_getattr(cp, &up->attrs); +- +- msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMPOUND]; +- msg->rpc_argp = cp; +- msg->rpc_resp = cp; ++ up->server = NFS_SERVER(dir->d_inode); ++ up->fh = NFS_FH(dir->d_inode); ++ up->name = name; ++ ++ msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_UNLINK]; ++ msg->rpc_argp = up; ++ msg->rpc_resp = up; + return 0; + } + +@@ -1270,11 +1167,10 @@ static int + nfs4_proc_unlink_done(struct dentry *dir, struct rpc_task *task) + { + struct rpc_message *msg = &task->tk_msg; +- struct unlink_desc *up; ++ struct nfs4_unlink *up; + + if (msg->rpc_argp) { +- up = (struct unlink_desc *) msg->rpc_argp; +- process_lease(&up->compound); ++ up = (struct nfs4_unlink *) msg->rpc_argp; + process_cinfo(&up->cinfo, &up->attrs); + nfs_refresh_inode(dir->d_inode, &up->attrs); + kfree(up); +@@ -1287,24 +1183,32 @@ static int + nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name, + struct inode *new_dir, struct qstr *new_name) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[7]; + struct nfs4_change_info old_cinfo, new_cinfo; + struct nfs_fattr old_dir_attr, new_dir_attr; + int status; +- ++ struct nfs4_rename_arg arg = { ++ .old_dir = NFS_FH(old_dir), ++ .new_dir = NFS_FH(new_dir), ++ .old_name = old_name, ++ .new_name = new_name, ++ }; ++ struct nfs4_rename_res res = { ++ .server = NFS_SERVER(old_dir), ++ .old_cinfo = &old_cinfo, ++ .new_cinfo = &new_cinfo, ++ .old_fattr = &old_dir_attr, ++ .new_fattr = &new_dir_attr, ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_RENAME], ++ .rpc_argp = &arg, ++ .rpc_resp = &res, ++ }; ++ + old_dir_attr.valid = 0; + new_dir_attr.valid = 0; + +- nfs4_setup_compound(&compound, ops, NFS_SERVER(old_dir), "rename"); +- nfs4_setup_putfh(&compound, NFS_FH(old_dir)); +- nfs4_setup_savefh(&compound); +- nfs4_setup_putfh(&compound, NFS_FH(new_dir)); +- nfs4_setup_rename(&compound, old_name, new_name, &old_cinfo, &new_cinfo); +- nfs4_setup_getattr(&compound, &new_dir_attr); +- nfs4_setup_restorefh(&compound); +- nfs4_setup_getattr(&compound, &old_dir_attr); +- status = nfs4_call_compound(&compound, NULL, 0); ++ status = rpc_call_sync(NFS_CLIENT(old_dir), &msg, 0); + + if (!status) { + process_cinfo(&old_cinfo, &old_dir_attr); +@@ -1318,24 +1222,30 @@ nfs4_proc_rename(struct inode *old_dir, + static int + nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *name) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[7]; + struct nfs4_change_info dir_cinfo; + struct nfs_fattr dir_attr, fattr; + int status; +- ++ struct nfs4_link_arg arg = { ++ .fh = NFS_FH(inode), ++ .dir_fh = NFS_FH(dir), ++ .name = name, ++ }; ++ struct nfs4_link_res res = { ++ .server = NFS_SERVER(inode), ++ .fattr = &fattr, ++ .dir_attr = &dir_attr, ++ .dir_cinfo = &dir_cinfo, ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LINK], ++ .rpc_argp = &arg, ++ .rpc_resp = &res, ++ }; ++ + dir_attr.valid = 0; + fattr.valid = 0; + +- nfs4_setup_compound(&compound, ops, NFS_SERVER(inode), "link"); +- nfs4_setup_putfh(&compound, NFS_FH(inode)); +- nfs4_setup_savefh(&compound); +- nfs4_setup_putfh(&compound, NFS_FH(dir)); +- nfs4_setup_link(&compound, name, &dir_cinfo); +- nfs4_setup_getattr(&compound, &dir_attr); +- nfs4_setup_restorefh(&compound); +- nfs4_setup_getattr(&compound, &fattr); +- status = nfs4_call_compound(&compound, NULL, 0); ++ status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0); + + if (!status) { + process_cinfo(&dir_cinfo, &dir_attr); +@@ -1350,24 +1260,34 @@ nfs4_proc_symlink(struct inode *dir, str + struct iattr *sattr, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[7]; + struct nfs_fattr dir_attr; + struct nfs4_change_info dir_cinfo; + int status; ++ struct nfs4_create_arg arg = { ++ .dir_fh = NFS_FH(dir), ++ .server = NFS_SERVER(dir), ++ .name = name, ++ .u.symlink = path, ++ .attrs = sattr, ++ .ftype = NF4LNK, ++ }; ++ struct nfs4_create_res res = { ++ .server = NFS_SERVER(dir), ++ .fhandle = fhandle, ++ .fattr = fattr, ++ .dir_attr = &dir_attr, ++ .dir_cinfo = &dir_cinfo, ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], ++ .rpc_argp = &arg, ++ .rpc_resp = &res, ++ }; + + dir_attr.valid = 0; + fattr->valid = 0; + +- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "symlink"); +- nfs4_setup_putfh(&compound, NFS_FH(dir)); +- nfs4_setup_savefh(&compound); +- nfs4_setup_create_symlink(&compound, name, path, sattr, &dir_cinfo); +- nfs4_setup_getattr(&compound, fattr); +- nfs4_setup_getfh(&compound, fhandle); +- nfs4_setup_restorefh(&compound); +- nfs4_setup_getattr(&compound, &dir_attr); +- status = nfs4_call_compound(&compound, NULL, 0); ++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + if (!status) { + process_cinfo(&dir_cinfo, &dir_attr); +@@ -1380,24 +1300,33 @@ static int + nfs4_proc_mkdir(struct inode *dir, struct qstr *name, struct iattr *sattr, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[7]; + struct nfs_fattr dir_attr; + struct nfs4_change_info dir_cinfo; + int status; ++ struct nfs4_create_arg arg = { ++ .dir_fh = NFS_FH(dir), ++ .server = NFS_SERVER(dir), ++ .name = name, ++ .attrs = sattr, ++ .ftype = NF4DIR, ++ }; ++ struct nfs4_create_res res = { ++ .server = NFS_SERVER(dir), ++ .fhandle = fhandle, ++ .fattr = fattr, ++ .dir_attr = &dir_attr, ++ .dir_cinfo = &dir_cinfo, ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], ++ .rpc_argp = &arg, ++ .rpc_resp = &res, ++ }; + + dir_attr.valid = 0; + fattr->valid = 0; + +- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mkdir"); +- nfs4_setup_putfh(&compound, NFS_FH(dir)); +- nfs4_setup_savefh(&compound); +- nfs4_setup_create_dir(&compound, name, sattr, &dir_cinfo); +- nfs4_setup_getattr(&compound, fattr); +- nfs4_setup_getfh(&compound, fhandle); +- nfs4_setup_restorefh(&compound); +- nfs4_setup_getattr(&compound, &dir_attr); +- status = nfs4_call_compound(&compound, NULL, 0); ++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + if (!status) { + process_cinfo(&dir_cinfo, &dir_attr); +@@ -1411,17 +1340,25 @@ nfs4_proc_readdir(struct dentry *dentry, + u64 cookie, struct page *page, unsigned int count, int plus) + { + struct inode *dir = dentry->d_inode; +- struct nfs4_compound compound; +- struct nfs4_op ops[2]; + int status; ++ struct nfs4_readdir_arg args = { ++ .fh = NFS_FH(dir), ++ .pages = &page, ++ .pgbase = 0, ++ .count = count, ++ }; ++ struct nfs4_readdir_res res; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READDIR], ++ .rpc_argp = &args, ++ .rpc_resp = &res, ++ .rpc_cred = cred, ++ }; + + lock_kernel(); +- +- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "readdir"); +- nfs4_setup_putfh(&compound, NFS_FH(dir)); +- nfs4_setup_readdir(&compound, cookie, NFS_COOKIEVERF(dir), &page, count, dentry); +- status = nfs4_call_compound(&compound, cred, 0); +- ++ nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); ++ res.pgbase = args.pgbase; ++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + unlock_kernel(); + return status; + } +@@ -1430,24 +1367,50 @@ static int + nfs4_proc_mknod(struct inode *dir, struct qstr *name, struct iattr *sattr, + dev_t rdev, struct nfs_fh *fh, struct nfs_fattr *fattr) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[7]; + struct nfs_fattr dir_attr; + struct nfs4_change_info dir_cinfo; + int status; ++ int mode = sattr->ia_mode; ++ struct nfs4_create_arg arg = { ++ .dir_fh = NFS_FH(dir), ++ .server = NFS_SERVER(dir), ++ .name = name, ++ .attrs = sattr, ++ }; ++ struct nfs4_create_res res = { ++ .server = NFS_SERVER(dir), ++ .fhandle = fh, ++ .fattr = fattr, ++ .dir_attr = &dir_attr, ++ .dir_cinfo = &dir_cinfo, ++ }; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CREATE], ++ .rpc_argp = &arg, ++ .rpc_resp = &res, ++ }; + + dir_attr.valid = 0; + fattr->valid = 0; ++ ++ BUG_ON(!(sattr->ia_valid & ATTR_MODE)); ++ BUG_ON(!S_ISFIFO(mode) && !S_ISBLK(mode) && !S_ISCHR(mode) && !S_ISSOCK(mode)); ++ if (S_ISFIFO(mode)) ++ arg.ftype = NF4FIFO; ++ else if (S_ISBLK(mode)) { ++ arg.ftype = NF4BLK; ++ arg.u.device.specdata1 = MAJOR(rdev); ++ arg.u.device.specdata2 = MINOR(rdev); ++ } ++ else if (S_ISCHR(mode)) { ++ arg.ftype = NF4CHR; ++ arg.u.device.specdata1 = MAJOR(rdev); ++ arg.u.device.specdata2 = MINOR(rdev); ++ } ++ else ++ arg.ftype = NF4SOCK; + +- nfs4_setup_compound(&compound, ops, NFS_SERVER(dir), "mknod"); +- nfs4_setup_putfh(&compound, NFS_FH(dir)); +- nfs4_setup_savefh(&compound); +- nfs4_setup_create_special(&compound, name, rdev,sattr, &dir_cinfo); +- nfs4_setup_getattr(&compound, fattr); +- nfs4_setup_getfh(&compound, fh); +- nfs4_setup_restorefh(&compound); +- nfs4_setup_getattr(&compound, &dir_attr); +- status = nfs4_call_compound(&compound, NULL, 0); ++ status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0); + + if (!status) { + process_cinfo(&dir_cinfo, &dir_attr); +@@ -1460,14 +1423,13 @@ static int + nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_fsstat *fsstat) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[2]; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_STATFS], ++ .rpc_argp = fhandle, ++ .rpc_resp = fsstat, ++ }; + +- memset(fsstat, 0, sizeof(*fsstat)); +- nfs4_setup_compound(&compound, ops, server, "statfs"); +- nfs4_setup_putfh(&compound, fhandle); +- nfs4_setup_statfs(&compound, fsstat); +- return nfs4_call_compound(&compound, NULL, 0); ++ return rpc_call_sync(server->client, &msg, 0); + } + + static int +@@ -1480,7 +1442,6 @@ nfs4_proc_fsinfo(struct nfs_server *serv + .rpc_resp = fsinfo, + }; + +- memset(fsinfo, 0, sizeof(*fsinfo)); + return rpc_call_sync(server->client, &msg, 0); + } + +@@ -1488,14 +1449,13 @@ static int + nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, + struct nfs_pathconf *pathconf) + { +- struct nfs4_compound compound; +- struct nfs4_op ops[2]; ++ struct rpc_message msg = { ++ .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_PATHCONF], ++ .rpc_argp = fhandle, ++ .rpc_resp = pathconf, ++ }; + +- memset(pathconf, 0, sizeof(*pathconf)); +- nfs4_setup_compound(&compound, ops, server, "statfs"); +- nfs4_setup_putfh(&compound, fhandle); +- nfs4_setup_pathconf(&compound, pathconf); +- return nfs4_call_compound(&compound, NULL, 0); ++ return rpc_call_sync(server->client, &msg, 0); + } + + static void +@@ -1517,7 +1477,6 @@ nfs4_read_done(struct rpc_task *task) + { + struct nfs_read_data *data = (struct nfs_read_data *) task->tk_calldata; + struct inode *inode = data->inode; +- struct nfs_fattr *fattr = data->res.fattr; + + if (nfs4_async_handle_error(task, NFS_SERVER(inode)) == -EAGAIN) { + task->tk_action = nfs4_restart_read; +@@ -1525,11 +1484,6 @@ nfs4_read_done(struct rpc_task *task) + } + if (task->tk_status > 0) + renew_lease(NFS_SERVER(inode), data->timestamp); +- /* Check cache consistency */ +- if (fattr->change_attr != NFS_CHANGE_ATTR(inode)) +- nfs_zap_caches(inode); +- if (fattr->bitmap[1] & FATTR4_WORD1_TIME_ACCESS) +- inode->i_atime = fattr->atime; + /* Call back common NFS readpage processing */ + nfs_readpage_result(task); + } +@@ -1577,21 +1531,6 @@ nfs4_proc_read_setup(struct nfs_read_dat + } + + static void +-nfs4_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +-{ +- /* Check cache consistency */ +- if (fattr->pre_change_attr != NFS_CHANGE_ATTR(inode)) +- nfs_zap_caches(inode); +- NFS_CHANGE_ATTR(inode) = fattr->change_attr; +- if (fattr->bitmap[1] & FATTR4_WORD1_SPACE_USED) +- inode->i_blocks = (fattr->du.nfs3.used + 511) >> 9; +- if (fattr->bitmap[1] & FATTR4_WORD1_TIME_METADATA) +- inode->i_ctime = fattr->ctime; +- if (fattr->bitmap[1] & FATTR4_WORD1_TIME_MODIFY) +- inode->i_mtime = fattr->mtime; +-} +- +-static void + nfs4_restart_write(struct rpc_task *task) + { + struct nfs_write_data *data = (struct nfs_write_data *)task->tk_calldata; +@@ -1617,7 +1556,6 @@ nfs4_write_done(struct rpc_task *task) + } + if (task->tk_status >= 0) + renew_lease(NFS_SERVER(inode), data->timestamp); +- nfs4_write_refresh_inode(inode, data->res.fattr); + /* Call back common NFS writeback processing */ + nfs_writeback_done(task); + } +@@ -1684,7 +1622,6 @@ nfs4_commit_done(struct rpc_task *task) + task->tk_action = nfs4_restart_write; + return; + } +- nfs4_write_refresh_inode(inode, data->res.fattr); + /* Call back common NFS writeback processing */ + nfs_commit_done(task); + } +@@ -1807,6 +1744,7 @@ nfs4_proc_file_open(struct inode *inode, + if (filp->f_mode & FMODE_WRITE) { + lock_kernel(); + nfs_set_mmcred(inode, state->owner->so_cred); ++ nfs_begin_data_update(inode); + unlock_kernel(); + } + filp->private_data = state; +@@ -1823,6 +1761,11 @@ nfs4_proc_file_release(struct inode *ino + + if (state) + nfs4_close_state(state, filp->f_mode); ++ if (filp->f_mode & FMODE_WRITE) { ++ lock_kernel(); ++ nfs_end_data_update(inode); ++ unlock_kernel(); ++ } + return 0; + } + +@@ -2294,6 +2237,7 @@ struct nfs_rpc_ops nfs_v4_clientops = { + .version = 4, /* protocol version */ + .dentry_ops = &nfs4_dentry_operations, + .dir_inode_ops = &nfs4_dir_inode_operations, ++ .file_inode_ops = &nfs4_file_inode_operations, + .getroot = nfs4_proc_get_root, + .getattr = nfs4_proc_getattr, + .setattr = nfs4_proc_setattr, +diff -puN fs/nfs/pagelist.c~CITI_NFS4_ALL fs/nfs/pagelist.c +--- linux-2.6.3/fs/nfs/pagelist.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/pagelist.c 2004-02-19 16:47:03.000000000 -0500 +@@ -246,7 +246,6 @@ nfs_coalesce_requests(struct list_head * + * nfs_scan_list - Scan a list for matching requests + * @head: One of the NFS inode request lists + * @dst: Destination list +- * @file: if set, ensure we match requests from this file + * @idx_start: lower bound of page->index to scan + * @npages: idx_start + npages sets the upper bound to scan. + * +@@ -258,7 +257,6 @@ nfs_coalesce_requests(struct list_head * + */ + int + nfs_scan_list(struct list_head *head, struct list_head *dst, +- struct file *file, + unsigned long idx_start, unsigned int npages) + { + struct list_head *pos, *tmp; +@@ -276,9 +274,6 @@ nfs_scan_list(struct list_head *head, st + + req = nfs_list_entry(pos); + +- if (file && req->wb_file != file) +- continue; +- + if (req->wb_index < idx_start) + continue; + if (req->wb_index > idx_end) +diff -puN fs/nfs/proc.c~CITI_NFS4_ALL fs/nfs/proc.c +--- linux-2.6.3/fs/nfs/proc.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/proc.c 2004-02-19 16:47:07.000000000 -0500 +@@ -49,18 +49,6 @@ + + extern struct rpc_procinfo nfs_procedures[]; + +-static void +-nfs_write_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) +-{ +- if (!(fattr->valid & NFS_ATTR_WCC)) { +- fattr->pre_size = NFS_CACHE_ISIZE(inode); +- fattr->pre_mtime = NFS_CACHE_MTIME(inode); +- fattr->pre_ctime = NFS_CACHE_CTIME(inode); +- fattr->valid |= NFS_ATTR_WCC; +- } +- nfs_refresh_inode(inode, fattr); +-} +- + static struct rpc_cred * + nfs_cred(struct inode *inode, struct file *filp) + { +@@ -78,15 +66,33 @@ nfs_cred(struct inode *inode, struct fil + */ + static int + nfs_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, +- struct nfs_fattr *fattr) ++ struct nfs_fsinfo *info) + { +- int status; ++ struct nfs_fattr *fattr = info->fattr; ++ struct nfs2_fsstat fsinfo; ++ int status; + +- dprintk("NFS call getroot\n"); ++ dprintk("%s: call getattr\n", __FUNCTION__); + fattr->valid = 0; +- status = rpc_call(server->client, NFSPROC_GETATTR, fhandle, fattr, 0); +- dprintk("NFS reply getroot\n"); +- return status; ++ status = rpc_call(server->client_sys, NFSPROC_GETATTR, fhandle, fattr, 0); ++ dprintk("%s: reply getattr %d\n", __FUNCTION__, status); ++ if (status) ++ return status; ++ dprintk("%s: call statfs\n", __FUNCTION__); ++ status = rpc_call(server->client_sys, NFSPROC_STATFS, fhandle, &fsinfo, 0); ++ dprintk("%s: reply statfs %d\n", __FUNCTION__, status); ++ if (status) ++ return status; ++ info->rtmax = NFS_MAXDATA; ++ info->rtpref = fsinfo.tsize; ++ info->rtmult = fsinfo.bsize; ++ info->wtmax = NFS_MAXDATA; ++ info->wtpref = fsinfo.tsize; ++ info->wtmult = fsinfo.bsize; ++ info->dtpref = fsinfo.tsize; ++ info->maxfilesize = 0x7FFFFFFF; ++ info->lease_time = 0; ++ return 0; + } + + /* +@@ -205,7 +211,7 @@ nfs_proc_write(struct nfs_write_data *wd + msg.rpc_cred = nfs_cred(inode, filp); + status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags); + if (status >= 0) { +- nfs_write_refresh_inode(inode, fattr); ++ nfs_refresh_inode(inode, fattr); + wdata->res.count = wdata->args.count; + wdata->verf.committed = NFS_FILE_SYNC; + } +@@ -331,10 +337,8 @@ nfs_proc_unlink_done(struct dentry *dir, + { + struct rpc_message *msg = &task->tk_msg; + +- if (msg->rpc_argp) { +- NFS_CACHEINV(dir->d_inode); ++ if (msg->rpc_argp) + kfree(msg->rpc_argp); +- } + return 0; + } + +@@ -584,7 +588,7 @@ nfs_write_done(struct rpc_task *task) + struct nfs_write_data *data = (struct nfs_write_data *) task->tk_calldata; + + if (task->tk_status >= 0) +- nfs_write_refresh_inode(data->inode, data->res.fattr); ++ nfs_refresh_inode(data->inode, data->res.fattr); + nfs_writeback_done(task); + } + +@@ -665,6 +669,7 @@ struct nfs_rpc_ops nfs_v2_clientops = { + .version = 2, /* protocol version */ + .dentry_ops = &nfs_dentry_operations, + .dir_inode_ops = &nfs_dir_inode_operations, ++ .file_inode_ops = &nfs_file_inode_operations, + .getroot = nfs_proc_get_root, + .getattr = nfs_proc_getattr, + .setattr = nfs_proc_setattr, +diff -puN fs/nfs/read.c~CITI_NFS4_ALL fs/nfs/read.c +--- linux-2.6.3/fs/nfs/read.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/read.c 2004-02-19 16:47:03.000000000 -0500 +@@ -124,6 +124,7 @@ nfs_readpage_sync(struct file *file, str + if (result < rdata.args.count) /* NFSv2ism */ + break; + } while (count); ++ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; + + if (count) + memclear_highpage_flush(page, rdata.args.pgbase, count); +@@ -266,6 +267,7 @@ nfs_readpage_result(struct rpc_task *tas + dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", + task->tk_pid, task->tk_status); + ++ NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; + while (!list_empty(&data->pages)) { + struct nfs_page *req = nfs_list_entry(data->pages.next); + struct page *page = req->wb_page; +diff -puN fs/nfs/unlink.c~CITI_NFS4_ALL fs/nfs/unlink.c +--- linux-2.6.3/fs/nfs/unlink.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/unlink.c 2004-02-19 16:47:03.000000000 -0500 +@@ -104,6 +104,7 @@ nfs_async_unlink_init(struct rpc_task *t + status = NFS_PROTO(dir->d_inode)->unlink_setup(&msg, dir, &data->name); + if (status < 0) + goto out_err; ++ nfs_begin_data_update(dir->d_inode); + rpc_call_setup(task, &msg, 0); + return; + out_err: +@@ -126,7 +127,7 @@ nfs_async_unlink_done(struct rpc_task *t + if (!dir) + return; + dir_i = dir->d_inode; +- nfs_zap_caches(dir_i); ++ nfs_end_data_update(dir_i); + if (NFS_PROTO(dir_i)->unlink_done(dir, task)) + return; + put_rpccred(data->cred); +diff -puN fs/nfs/write.c~CITI_NFS4_ALL fs/nfs/write.c +--- linux-2.6.3/fs/nfs/write.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/write.c 2004-02-19 16:47:03.000000000 -0500 +@@ -74,7 +74,6 @@ + static struct nfs_page * nfs_update_request(struct file*, struct inode *, + struct page *, + unsigned int, unsigned int); +-static void nfs_strategy(struct inode *inode); + + static kmem_cache_t *nfs_wdata_cachep; + static mempool_t *nfs_wdata_mempool; +@@ -124,6 +123,52 @@ void nfs_commit_release(struct rpc_task + nfs_commit_free(wdata); + } + ++/* Adjust the file length if we're writing beyond the end */ ++static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count) ++{ ++ struct inode *inode = page->mapping->host; ++ loff_t end, i_size = i_size_read(inode); ++ unsigned long end_index = (i_size - 1) >> PAGE_CACHE_SHIFT; ++ ++ if (i_size > 0 && page->index < end_index) ++ return; ++ end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + ((loff_t)offset+count); ++ if (i_size >= end) ++ return; ++ i_size_write(inode, end); ++} ++ ++/* We can set the PG_uptodate flag if we see that a write request ++ * covers the full page. ++ */ ++static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count) ++{ ++ loff_t end_offs; ++ ++ if (PageUptodate(page)) ++ return; ++ if (base != 0) ++ return; ++ if (count == PAGE_CACHE_SIZE) { ++ SetPageUptodate(page); ++ return; ++ } ++ ++ end_offs = i_size_read(page->mapping->host) - 1; ++ if (end_offs < 0) ++ return; ++ /* Is this the last page? */ ++ if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT)) ++ return; ++ /* This is the last page: set PG_uptodate if we cover the entire ++ * extent of the data, then zero the rest of the page. ++ */ ++ if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) { ++ memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count); ++ SetPageUptodate(page); ++ } ++} ++ + /* + * Write a page synchronously. + * Offset is the data offset within the page. +@@ -157,6 +202,7 @@ nfs_writepage_sync(struct file *file, st + (long long)NFS_FILEID(inode), + count, (long long)(page_offset(page) + offset)); + ++ nfs_begin_data_update(inode); + do { + if (count < wsize && !swapfile) + wdata.args.count = count; +@@ -177,14 +223,12 @@ nfs_writepage_sync(struct file *file, st + wdata.args.pgbase += result; + written += result; + count -= result; +- +- /* +- * If we've extended the file, update the inode +- * now so we don't invalidate the cache. +- */ +- if (wdata.args.offset > i_size_read(inode)) +- i_size_write(inode, wdata.args.offset); + } while (count); ++ /* Update file length */ ++ nfs_grow_file(page, offset, written); ++ /* Set the PG_uptodate flag? */ ++ nfs_mark_uptodate(page, offset, written); ++ nfs_end_data_update(inode); + + if (PageError(page)) + ClearPageError(page); +@@ -201,18 +245,19 @@ nfs_writepage_async(struct file *file, s + unsigned int offset, unsigned int count) + { + struct nfs_page *req; +- loff_t end; + int status; + ++ nfs_begin_data_update(inode); + req = nfs_update_request(file, inode, page, offset, count); + status = (IS_ERR(req)) ? PTR_ERR(req) : 0; + if (status < 0) + goto out; ++ /* Update file length */ ++ nfs_grow_file(page, offset, count); ++ /* Set the PG_uptodate flag? */ ++ nfs_mark_uptodate(page, offset, count); + nfs_unlock_request(req); +- nfs_strategy(inode); +- end = ((loff_t)page->index<sync_mode == WB_SYNC_HOLD) +@@ -294,7 +339,7 @@ nfs_writepages(struct address_space *map + if (is_sync && wbc->sync_mode == WB_SYNC_ALL) { + err = nfs_wb_all(inode); + } else +- nfs_commit_file(inode, NULL, 0, 0, 0); ++ nfs_commit_inode(inode, 0, 0, 0); + out: + return err; + } +@@ -312,8 +357,10 @@ nfs_inode_add_request(struct inode *inod + BUG_ON(error == -EEXIST); + if (error) + return error; +- if (!nfsi->npages) ++ if (!nfsi->npages) { + igrab(inode); ++ nfs_begin_data_update(inode); ++ } + nfsi->npages++; + req->wb_count++; + return 0; +@@ -336,6 +383,7 @@ nfs_inode_remove_request(struct nfs_page + nfsi->npages--; + if (!nfsi->npages) { + spin_unlock(&nfs_wreq_lock); ++ nfs_end_data_update(inode); + iput(inode); + } else + spin_unlock(&nfs_wreq_lock); +@@ -421,7 +469,7 @@ nfs_mark_request_commit(struct nfs_page + * Interruptible by signals only if mounted with intr flag. + */ + static int +-nfs_wait_on_requests(struct inode *inode, struct file *file, unsigned long idx_start, unsigned int npages) ++nfs_wait_on_requests(struct inode *inode, unsigned long idx_start, unsigned int npages) + { + struct nfs_inode *nfsi = NFS_I(inode); + struct nfs_page *req; +@@ -441,8 +489,6 @@ nfs_wait_on_requests(struct inode *inode + break; + + next = req->wb_index + 1; +- if (file && req->wb_file != file) +- continue; + if (!NFS_WBACK_BUSY(req)) + continue; + +@@ -453,7 +499,6 @@ nfs_wait_on_requests(struct inode *inode + if (error < 0) + return error; + spin_lock(&nfs_wreq_lock); +- next = idx_start; + res++; + } + spin_unlock(&nfs_wreq_lock); +@@ -464,7 +509,6 @@ nfs_wait_on_requests(struct inode *inode + * nfs_scan_dirty - Scan an inode for dirty requests + * @inode: NFS inode to scan + * @dst: destination list +- * @file: if set, ensure we match requests from this file + * @idx_start: lower bound of page->index to scan. + * @npages: idx_start + npages sets the upper bound to scan. + * +@@ -472,11 +516,11 @@ nfs_wait_on_requests(struct inode *inode + * The requests are *not* checked to ensure that they form a contiguous set. + */ + static int +-nfs_scan_dirty(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) ++nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) + { + struct nfs_inode *nfsi = NFS_I(inode); + int res; +- res = nfs_scan_list(&nfsi->dirty, dst, file, idx_start, npages); ++ res = nfs_scan_list(&nfsi->dirty, dst, idx_start, npages); + nfsi->ndirty -= res; + sub_page_state(nr_dirty,res); + if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty)) +@@ -489,7 +533,6 @@ nfs_scan_dirty(struct inode *inode, stru + * nfs_scan_commit - Scan an inode for commit requests + * @inode: NFS inode to scan + * @dst: destination list +- * @file: if set, ensure we collect requests from this file only. + * @idx_start: lower bound of page->index to scan. + * @npages: idx_start + npages sets the upper bound to scan. + * +@@ -497,11 +540,11 @@ nfs_scan_dirty(struct inode *inode, stru + * The requests are *not* checked to ensure that they form a contiguous set. + */ + static int +-nfs_scan_commit(struct inode *inode, struct list_head *dst, struct file *file, unsigned long idx_start, unsigned int npages) ++nfs_scan_commit(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages) + { + struct nfs_inode *nfsi = NFS_I(inode); + int res; +- res = nfs_scan_list(&nfsi->commit, dst, file, idx_start, npages); ++ res = nfs_scan_list(&nfsi->commit, dst, idx_start, npages); + nfsi->ncommit -= res; + if ((nfsi->ncommit == 0) != list_empty(&nfsi->commit)) + printk(KERN_ERR "NFS: desynchronized value of nfs_i.ncommit.\n"); +@@ -600,46 +643,6 @@ nfs_update_request(struct file* file, st + return req; + } + +-/* +- * This is the strategy routine for NFS. +- * It is called by nfs_updatepage whenever the user wrote up to the end +- * of a page. +- * +- * We always try to submit a set of requests in parallel so that the +- * server's write code can gather writes. This is mainly for the benefit +- * of NFSv2. +- * +- * We never submit more requests than we think the remote can handle. +- * For UDP sockets, we make sure we don't exceed the congestion window; +- * for TCP, we limit the number of requests to 8. +- * +- * NFS_STRATEGY_PAGES gives the minimum number of requests for NFSv2 that +- * should be sent out in one go. This is for the benefit of NFSv2 servers +- * that perform write gathering. +- * +- * FIXME: Different servers may have different sweet spots. +- * Record the average congestion window in server struct? +- */ +-#define NFS_STRATEGY_PAGES 8 +-static void +-nfs_strategy(struct inode *inode) +-{ +- unsigned int dirty, wpages; +- +- dirty = NFS_I(inode)->ndirty; +- wpages = NFS_SERVER(inode)->wpages; +-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +- if (NFS_PROTO(inode)->version == 2) { +- if (dirty >= NFS_STRATEGY_PAGES * wpages) +- nfs_flush_file(inode, NULL, 0, 0, 0); +- } else if (dirty >= wpages) +- nfs_flush_file(inode, NULL, 0, 0, 0); +-#else +- if (dirty >= NFS_STRATEGY_PAGES * wpages) +- nfs_flush_file(inode, NULL, 0, 0, 0); +-#endif +-} +- + int + nfs_flush_incompatible(struct file *file, struct page *page) + { +@@ -675,7 +678,6 @@ nfs_updatepage(struct file *file, struct + struct dentry *dentry = file->f_dentry; + struct inode *inode = page->mapping->host; + struct nfs_page *req; +- loff_t end; + int status = 0; + + dprintk("NFS: nfs_updatepage(%s/%s %d@%Ld)\n", +@@ -696,6 +698,30 @@ nfs_updatepage(struct file *file, struct + return status; + } + ++ nfs_begin_data_update(inode); ++ ++ ++ /* If we're not using byte range locks, and we know the page ++ * is entirely in cache, it may be more efficient to avoid ++ * fragmenting write requests. ++ */ ++ if (PageUptodate(page) && inode->i_flock == NULL) { ++ loff_t end_offs = i_size_read(inode) - 1; ++ unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT; ++ ++ count += offset; ++ offset = 0; ++ if (end_offs < 0) { ++ /* Do nothing */ ++ } else if (page->index == end_index) { ++ unsigned int pglen; ++ pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1; ++ if (count < pglen) ++ count = pglen; ++ } else if (page->index < end_index) ++ count = PAGE_CACHE_SIZE; ++ } ++ + /* + * Try to find an NFS request corresponding to this page + * and update it. +@@ -714,21 +740,14 @@ nfs_updatepage(struct file *file, struct + goto done; + + status = 0; +- end = ((loff_t)page->index<wb_pgbase == 0 && req->wb_bytes == PAGE_CACHE_SIZE) { +- SetPageUptodate(page); +- nfs_unlock_request(req); +- nfs_strategy(inode); +- } else +- nfs_unlock_request(req); ++ ++ /* Update file length */ ++ nfs_grow_file(page, offset, count); ++ /* Set the PG_uptodate flag? */ ++ nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); ++ nfs_unlock_request(req); + done: ++ nfs_end_data_update(inode); + dprintk("NFS: nfs_updatepage returns %d (isize %Ld)\n", + status, (long long)i_size_read(inode)); + if (status < 0) +@@ -891,10 +910,7 @@ nfs_writeback_done(struct rpc_task *task + #endif + + /* +- * Update attributes as result of writeback. +- * FIXME: There is an inherent race with invalidate_inode_pages and +- * writebacks since the page->count is kept > 1 for as long +- * as the page has a write request pending. ++ * Process the nfs_page list + */ + while (!list_empty(&data->pages)) { + req = nfs_list_entry(data->pages.next); +@@ -1061,7 +1077,7 @@ nfs_commit_done(struct rpc_task *task) + } + #endif + +-int nfs_flush_file(struct inode *inode, struct file *file, unsigned long idx_start, ++int nfs_flush_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) + { + LIST_HEAD(head); +@@ -1069,7 +1085,7 @@ int nfs_flush_file(struct inode *inode, + error = 0; + + spin_lock(&nfs_wreq_lock); +- res = nfs_scan_dirty(inode, &head, file, idx_start, npages); ++ res = nfs_scan_dirty(inode, &head, idx_start, npages); + spin_unlock(&nfs_wreq_lock); + if (res) + error = nfs_flush_list(&head, NFS_SERVER(inode)->wpages, how); +@@ -1079,7 +1095,7 @@ int nfs_flush_file(struct inode *inode, + } + + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +-int nfs_commit_file(struct inode *inode, struct file *file, unsigned long idx_start, ++int nfs_commit_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) + { + LIST_HEAD(head); +@@ -1087,9 +1103,9 @@ int nfs_commit_file(struct inode *inode, + error = 0; + + spin_lock(&nfs_wreq_lock); +- res = nfs_scan_commit(inode, &head, file, idx_start, npages); ++ res = nfs_scan_commit(inode, &head, idx_start, npages); + if (res) { +- res += nfs_scan_commit(inode, &head, NULL, 0, 0); ++ res += nfs_scan_commit(inode, &head, 0, 0); + spin_unlock(&nfs_wreq_lock); + error = nfs_commit_list(&head, how); + } else +@@ -1100,7 +1116,7 @@ int nfs_commit_file(struct inode *inode, + } + #endif + +-int nfs_sync_file(struct inode *inode, struct file *file, unsigned long idx_start, ++int nfs_sync_inode(struct inode *inode, unsigned long idx_start, + unsigned int npages, int how) + { + int error, +@@ -1109,18 +1125,15 @@ int nfs_sync_file(struct inode *inode, s + wait = how & FLUSH_WAIT; + how &= ~FLUSH_WAIT; + +- if (!inode && file) +- inode = file->f_dentry->d_inode; +- + do { + error = 0; + if (wait) +- error = nfs_wait_on_requests(inode, file, idx_start, npages); ++ error = nfs_wait_on_requests(inode, idx_start, npages); + if (error == 0) +- error = nfs_flush_file(inode, file, idx_start, npages, how); ++ error = nfs_flush_inode(inode, idx_start, npages, how); + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) + if (error == 0) +- error = nfs_commit_file(inode, file, idx_start, npages, how); ++ error = nfs_commit_inode(inode, idx_start, npages, how); + #endif + } while (error > 0); + return error; +diff -puN include/linux/fs.h~CITI_NFS4_ALL include/linux/fs.h +--- linux-2.6.3/include/linux/fs.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/fs.h 2004-02-19 16:47:03.000000000 -0500 +@@ -137,6 +137,7 @@ extern int leases_enable, dir_notify_ena + #define S_DEAD 32 /* removed, but still open directory */ + #define S_NOQUOTA 64 /* Inode is not counted to quota */ + #define S_DIRSYNC 128 /* Directory modifications are synchronous */ ++#define S_NOCMTIME 256 /* Do not update file c/mtime */ + + /* + * Note that nosuid etc flags are inode-specific: setting some file-system +@@ -170,6 +171,7 @@ extern int leases_enable, dir_notify_ena + #define IS_ONE_SECOND(inode) __IS_FLG(inode, MS_ONE_SECOND) + + #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD) ++#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) + + /* the read-only stuff doesn't really belong here, but any other place is + probably as bad and I don't want to create yet another include file. */ +diff -puN include/linux/nfs_fs.h~CITI_NFS4_ALL include/linux/nfs_fs.h +--- linux-2.6.3/include/linux/nfs_fs.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfs_fs.h 2004-02-19 16:47:07.000000000 -0500 +@@ -23,6 +23,10 @@ + #include + #include + ++#ifdef CONFIG_NFS_V4 ++#include ++#endif /* CONFIG_NFS_V4 */ ++ + #include + #include + #include +@@ -99,7 +103,7 @@ struct nfs_inode { + /* + * Various flags + */ +- unsigned short flags; ++ unsigned int flags; + + /* + * read_cache_jiffies is when we started read-caching this inode, +@@ -118,19 +122,22 @@ struct nfs_inode { + * + * mtime != read_cache_mtime + */ ++ unsigned long readdir_timestamp; + unsigned long read_cache_jiffies; +- struct timespec read_cache_ctime; +- struct timespec read_cache_mtime; +- __u64 read_cache_isize; + unsigned long attrtimeo; + unsigned long attrtimeo_timestamp; + __u64 change_attr; /* v4 only */ + ++ /* "Generation counter" for the attribute cache. This is ++ * bumped whenever we update the metadata on the ++ * server. ++ */ ++ unsigned long cache_change_attribute; + /* +- * Timestamp that dates the change made to read_cache_mtime. +- * This is of use for dentry revalidation ++ * Counter indicating the number of outstanding requests that ++ * will cause a file data update. + */ +- unsigned long cache_mtime_jiffies; ++ atomic_t data_updates; + + struct nfs_access_cache cache_access; + +@@ -160,7 +167,10 @@ struct nfs_inode { + /* NFSv4 state */ + struct list_head open_states; + #endif /* CONFIG_NFS_V4*/ +- ++#ifdef CONFIG_NFS_V4_ACL ++ struct posix_acl *acl; ++ struct posix_acl *default_acl; ++#endif /* CONFIG_NFS_V4_ACL */ + struct inode vfs_inode; + }; + +@@ -170,7 +180,9 @@ struct nfs_inode { + #define NFS_INO_STALE 0x0001 /* possible stale inode */ + #define NFS_INO_ADVISE_RDPLUS 0x0002 /* advise readdirplus */ + #define NFS_INO_REVALIDATING 0x0004 /* revalidating attrs */ +-#define NFS_INO_FLUSH 0x0008 /* inode is due for flushing */ ++#define NFS_INO_INVALID_ATTR 0x0008 /* cached attrs are invalid */ ++#define NFS_INO_INVALID_DATA 0x0010 /* cached data is invalid */ ++#define NFS_INO_INVALID_ATIME 0x0020 /* cached atime is invalid */ + #define NFS_INO_FAKE_ROOT 0x0080 /* root inode placeholder */ + + static inline struct nfs_inode *NFS_I(struct inode *inode) +@@ -186,15 +198,7 @@ static inline struct nfs_inode *NFS_I(st + #define NFS_ADDR(inode) (RPC_PEERADDR(NFS_CLIENT(inode))) + #define NFS_COOKIEVERF(inode) (NFS_I(inode)->cookieverf) + #define NFS_READTIME(inode) (NFS_I(inode)->read_cache_jiffies) +-#define NFS_MTIME_UPDATE(inode) (NFS_I(inode)->cache_mtime_jiffies) +-#define NFS_CACHE_CTIME(inode) (NFS_I(inode)->read_cache_ctime) +-#define NFS_CACHE_MTIME(inode) (NFS_I(inode)->read_cache_mtime) +-#define NFS_CACHE_ISIZE(inode) (NFS_I(inode)->read_cache_isize) + #define NFS_CHANGE_ATTR(inode) (NFS_I(inode)->change_attr) +-#define NFS_CACHEINV(inode) \ +-do { \ +- NFS_READTIME(inode) = jiffies - NFS_MAXATTRTIMEO(inode) - 1; \ +-} while (0) + #define NFS_ATTRTIMEO(inode) (NFS_I(inode)->attrtimeo) + #define NFS_MINATTRTIMEO(inode) \ + (S_ISDIR(inode->i_mode)? NFS_SERVER(inode)->acdirmin \ +@@ -211,6 +215,17 @@ do { \ + + #define NFS_FILEID(inode) (NFS_I(inode)->fileid) + ++static inline int nfs_caches_unstable(struct inode *inode) ++{ ++ return atomic_read(&NFS_I(inode)->data_updates) != 0; ++} ++ ++static inline void NFS_CACHEINV(struct inode *inode) ++{ ++ if (!nfs_caches_unstable(inode)) ++ NFS_FLAGS(inode) |= NFS_INO_INVALID_ATTR; ++} ++ + static inline int nfs_server_capable(struct inode *inode, int cap) + { + return NFS_SERVER(inode)->caps & cap; +@@ -227,13 +242,37 @@ loff_t page_offset(struct page *page) + return ((loff_t)page->index) << PAGE_CACHE_SHIFT; + } + ++/** ++ * nfs_save_change_attribute - Returns the inode attribute change cookie ++ * @inode - pointer to inode ++ * The "change attribute" is updated every time we finish an operation ++ * that will result in a metadata change on the server. ++ */ ++static inline long nfs_save_change_attribute(struct inode *inode) ++{ ++ return NFS_I(inode)->cache_change_attribute; ++} ++ ++/** ++ * nfs_verify_change_attribute - Detects NFS inode cache updates ++ * @inode - pointer to inode ++ * @chattr - previously saved change attribute ++ * Return "false" if metadata has been updated (or is in the process of ++ * being updated) since the change attribute was saved. ++ */ ++static inline int nfs_verify_change_attribute(struct inode *inode, unsigned long chattr) ++{ ++ return !nfs_caches_unstable(inode) ++ && chattr == NFS_I(inode)->cache_change_attribute; ++} ++ + /* + * linux/fs/nfs/inode.c + */ + extern void nfs_zap_caches(struct inode *); + extern struct inode *nfs_fhget(struct super_block *, struct nfs_fh *, + struct nfs_fattr *); +-extern int __nfs_refresh_inode(struct inode *, struct nfs_fattr *); ++extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); + extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); + extern int nfs_permission(struct inode *, int, struct nameidata *); + extern void nfs_set_mmcred(struct inode *, struct rpc_cred *); +@@ -241,6 +280,10 @@ extern int nfs_open(struct inode *, stru + extern int nfs_release(struct inode *, struct file *); + extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *); + extern int nfs_setattr(struct dentry *, struct iattr *); ++extern void nfs_begin_attr_update(struct inode *); ++extern void nfs_end_attr_update(struct inode *); ++extern void nfs_begin_data_update(struct inode *); ++extern void nfs_end_data_update(struct inode *); + + /* + * linux/fs/nfs/file.c +@@ -309,11 +352,11 @@ extern void nfs_commit_done(struct rpc_t + * Try to write back everything synchronously (but check the + * return value!) + */ +-extern int nfs_sync_file(struct inode *, struct file *, unsigned long, unsigned int, int); +-extern int nfs_flush_file(struct inode *, struct file *, unsigned long, unsigned int, int); ++extern int nfs_sync_inode(struct inode *, unsigned long, unsigned int, int); ++extern int nfs_flush_inode(struct inode *, unsigned long, unsigned int, int); + extern int nfs_flush_list(struct list_head *, int, int); + #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) +-extern int nfs_commit_file(struct inode *, struct file *, unsigned long, unsigned int, int); ++extern int nfs_commit_inode(struct inode *, unsigned long, unsigned int, int); + extern int nfs_commit_list(struct list_head *, int); + #else + static inline int +@@ -333,7 +376,7 @@ nfs_have_writebacks(struct inode *inode) + static inline int + nfs_wb_all(struct inode *inode) + { +- int error = nfs_sync_file(inode, 0, 0, 0, FLUSH_WAIT); ++ int error = nfs_sync_inode(inode, 0, 0, FLUSH_WAIT); + return (error < 0) ? error : 0; + } + +@@ -343,21 +386,11 @@ nfs_wb_all(struct inode *inode) + static inline int + nfs_wb_page(struct inode *inode, struct page* page) + { +- int error = nfs_sync_file(inode, 0, page->index, 1, ++ int error = nfs_sync_inode(inode, page->index, 1, + FLUSH_WAIT | FLUSH_STABLE); + return (error < 0) ? error : 0; + } + +-/* +- * Write back all pending writes for one user.. +- */ +-static inline int +-nfs_wb_file(struct inode *inode, struct file *file) +-{ +- int error = nfs_sync_file(inode, file, 0, 0, FLUSH_WAIT); +- return (error < 0) ? error : 0; +-} +- + /* Hack for future NFS swap support */ + #ifndef IS_SWAPFILE + # define IS_SWAPFILE(inode) (0) +@@ -383,20 +416,27 @@ extern int nfsroot_mount(struct sockadd + /* + * inline functions + */ +-static inline int +-nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) ++ ++static inline int nfs_attribute_timeout(struct inode *inode) + { +- if (time_before(jiffies, NFS_READTIME(inode)+NFS_ATTRTIMEO(inode))) +- return NFS_STALE(inode) ? -ESTALE : 0; +- return __nfs_revalidate_inode(server, inode); ++ struct nfs_inode *nfsi = NFS_I(inode); ++ ++ return time_after(jiffies, nfsi->read_cache_jiffies+nfsi->attrtimeo); + } + +-static inline int +-nfs_refresh_inode(struct inode *inode, struct nfs_fattr *fattr) ++/** ++ * nfs_revalidate_inode - Revalidate the inode attributes ++ * @server - pointer to nfs_server struct ++ * @inode - pointer to inode struct ++ * ++ * Updates inode attribute information by retrieving the data from the server. ++ */ ++static inline int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode) + { +- if ((fattr->valid & NFS_ATTR_FATTR) == 0) +- return 0; +- return __nfs_refresh_inode(inode,fattr); ++ if (!(NFS_FLAGS(inode) & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA)) ++ && !nfs_attribute_timeout(inode)) ++ return NFS_STALE(inode) ? -ESTALE : 0; ++ return __nfs_revalidate_inode(server, inode); + } + + static inline loff_t +@@ -590,6 +630,15 @@ struct nfs4_state { + + extern struct dentry_operations nfs4_dentry_operations; + extern struct inode_operations nfs4_dir_inode_operations; ++extern struct inode_operations nfs4_file_inode_operations; ++ ++#define NFS_ACL_MAX_ENTRIES 32 ++ ++/* inode.c */ ++extern ssize_t nfs_getxattr(struct dentry *, const char *, void *, size_t); ++extern int nfs_setxattr(struct dentry *, const char *, const void *, size_t, int); ++ ++#define NFS4_ACL_NOT_CACHED ((void *)-1) + + /* nfs4proc.c */ + extern int nfs4_proc_setclientid(struct nfs4_client *, u32, unsigned short); +@@ -602,6 +651,9 @@ int nfs4_do_downgrade(struct inode *inod + extern int nfs4_wait_clnt_recover(struct rpc_clnt *, struct nfs4_client *); + extern struct inode *nfs4_atomic_open(struct inode *, struct dentry *, struct nameidata *); + extern int nfs4_open_revalidate(struct inode *, struct dentry *, int); ++struct posix_acl * nfs4_proc_get_posix_acl(struct inode *, int); ++extern int nfs4_proc_set_posix_acl(struct inode *, int, struct posix_acl *); ++void nfs4_izap_acl(struct inode *inode, struct posix_acl **i_acl); + + /* nfs4renewd.c */ + extern void nfs4_schedule_state_renewal(struct nfs4_client *); +diff -puN include/linux/nfs_page.h~CITI_NFS4_ALL include/linux/nfs_page.h +--- linux-2.6.3/include/linux/nfs_page.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfs_page.h 2004-02-19 16:47:03.000000000 -0500 +@@ -53,7 +53,7 @@ extern void nfs_release_request(struct n + extern void nfs_list_add_request(struct nfs_page *, struct list_head *); + + extern int nfs_scan_list(struct list_head *, struct list_head *, +- struct file *, unsigned long, unsigned int); ++ unsigned long, unsigned int); + extern int nfs_coalesce_requests(struct list_head *, struct list_head *, + unsigned int); + extern int nfs_wait_on_request(struct nfs_page *); +diff -puN include/linux/nfs_xdr.h~CITI_NFS4_ALL include/linux/nfs_xdr.h +--- linux-2.6.3/include/linux/nfs_xdr.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfs_xdr.h 2004-02-19 16:47:15.000000000 -0500 +@@ -39,6 +39,9 @@ struct nfs_fattr { + __u64 change_attr; /* NFSv4 change attribute */ + __u64 pre_change_attr;/* pre-op NFSv4 change attribute */ + unsigned long timestamp; ++#ifdef CONFIG_NFS_V4 ++ struct nfs4_acl *acl; /* NFSv4 ACL */ ++#endif /* CONFIG_NFS_V4 */ + }; + + #define NFS_ATTR_WCC 0x0001 /* pre-op WCC data */ +@@ -103,8 +106,6 @@ struct nfs_openargs { + nfs4_verifier verifier; /* EXCLUSIVE */ + } u; + struct qstr * name; +- struct nfs4_getattr * f_getattr; +- struct nfs4_getattr * d_getattr; + struct nfs_server * server; /* Needed for ID mapping */ + }; + +@@ -113,8 +114,8 @@ struct nfs_openres { + struct nfs_fh fh; + struct nfs4_change_info * cinfo; + __u32 rflags; +- struct nfs4_getattr * f_getattr; +- struct nfs4_getattr * d_getattr; ++ struct nfs_fattr * f_attr; ++ struct nfs_fattr * d_attr; + struct nfs_server * server; + }; + +@@ -141,7 +142,6 @@ struct nfs_open_reclaimargs { + __u32 id; + __u32 share_access; + __u32 claim; +- struct nfs4_getattr * f_getattr; + }; + + /* +@@ -319,12 +319,22 @@ struct nfs_setattrargs { + struct nfs_fh * fh; + nfs4_stateid stateid; + struct iattr * iap; +- struct nfs4_getattr * attr; ++ struct nfs_fattr * fattr; + struct nfs_server * server; /* Needed for name mapping */ ++#ifdef CONFIG_NFS_V4 ++ struct nfs4_acl * acl; ++#endif /* CONFIG_NFS_V4 */ + }; + ++#ifdef CONFIG_NFS_V4 ++struct nfs_setaclargs { ++ struct nfs_fh * fh; ++ struct nfs4_acl * acl; ++}; ++#endif /* CONFIG_NFS_V4 */ ++ + struct nfs_setattrres { +- struct nfs4_getattr * attr; ++ struct nfs_fattr * fattr; + struct nfs_server * server; + }; + +@@ -482,118 +492,127 @@ struct nfs4_change_info { + u64 after; + }; + +-struct nfs4_access { +- u32 ac_req_access; /* request */ +- u32 * ac_resp_supported; /* response */ +- u32 * ac_resp_access; /* response */ ++struct nfs4_accessargs { ++ struct nfs_fh * fhandle; ++ u32 req_access; + }; + +-struct nfs4_close { +- char * cl_stateid; /* request */ +- u32 cl_seqid; /* request */ ++struct nfs4_accessres { ++ struct nfs_server * server; ++ struct nfs_fattr * fattr; ++ u32 req_access; ++ u32 * resp_supported; ++ u32 * resp_access; + }; + +-struct nfs4_create { +- u32 cr_ftype; /* request */ +- union { /* request */ +- struct { +- u32 textlen; +- const char * text; +- } symlink; /* NF4LNK */ ++struct nfs4_create_arg { ++ u32 ftype; ++ union { ++ struct qstr * symlink; /* NF4LNK */ + struct { + u32 specdata1; + u32 specdata2; + } device; /* NF4BLK, NF4CHR */ + } u; +- u32 cr_namelen; /* request */ +- const char * cr_name; /* request */ +- struct iattr * cr_attrs; /* request */ +- struct nfs4_change_info * cr_cinfo; /* response */ ++ struct qstr * name; ++ struct nfs_server * server; ++ struct iattr * attrs; ++ struct nfs_fh * dir_fh; + }; +-#define cr_textlen u.symlink.textlen +-#define cr_text u.symlink.text +-#define cr_specdata1 u.device.specdata1 +-#define cr_specdata2 u.device.specdata2 + +-struct nfs4_getattr { +- u32 * gt_bmval; /* request */ +- struct nfs_fattr * gt_attrs; /* response */ +- struct nfs_fsstat * gt_fsstat; /* response */ +- struct nfs_pathconf * gt_pathconf; /* response */ ++struct nfs4_create_res { ++ struct nfs_server * server; ++ struct nfs_fh * fhandle; ++ struct nfs_fattr * fattr; ++ struct nfs_fattr * dir_attr; ++ struct nfs4_change_info * dir_cinfo; + }; + +-struct nfs4_getfh { +- struct nfs_fh * gf_fhandle; /* response */ ++struct nfs4_getattr_res { ++ struct nfs_server * server; ++ struct nfs_fattr * fattr; + }; + +-struct nfs4_link { +- u32 ln_namelen; /* request */ +- const char * ln_name; /* request */ +- struct nfs4_change_info * ln_cinfo; /* response */ ++struct nfs4_getroot_res { ++ struct nfs_server * server; ++ struct nfs_fattr * fattr; ++ struct nfs_fh * fhandle; + }; + +-struct nfs4_lookup { +- struct qstr * lo_name; /* request */ ++struct nfs4_getroot_arg { ++ struct nfs_fh * fhandle; ++ struct qstr * name; + }; + +-struct nfs4_open { +- struct nfs4_client * op_client_state; /* request */ +- u32 op_share_access; /* request */ +- u32 op_opentype; /* request */ +- u32 op_createmode; /* request */ +- union { /* request */ +- struct iattr * attrs; /* UNCHECKED, GUARDED */ +- nfs4_verifier verifier; /* EXCLUSIVE */ +- } u; +- struct qstr * op_name; /* request */ +- char * op_stateid; /* response */ +- struct nfs4_change_info * op_cinfo; /* response */ +- u32 * op_rflags; /* response */ +-}; +-#define op_attrs u.attrs +-#define op_verifier u.verifier +- +-struct nfs4_open_confirm { +- char * oc_stateid; /* request */ +-}; +- +-struct nfs4_putfh { +- struct nfs_fh * pf_fhandle; /* request */ +-}; +- +-struct nfs4_readdir { +- u64 rd_cookie; /* request */ +- nfs4_verifier rd_req_verifier; /* request */ +- u32 rd_count; /* request */ +- u32 rd_bmval[2]; /* request */ +- nfs4_verifier rd_resp_verifier; /* response */ +- struct page ** rd_pages; /* zero-copy data */ +- unsigned int rd_pgbase; /* zero-copy data */ ++struct nfs4_link_arg { ++ struct nfs_fh * fh; ++ struct nfs_fh * dir_fh; ++ struct qstr * name; ++}; ++ ++struct nfs4_link_res { ++ struct nfs_server * server; ++ struct nfs_fattr * fattr; ++ struct nfs_fattr * dir_attr; ++ struct nfs4_change_info * dir_cinfo; ++}; ++ ++struct nfs4_lookupargs { ++ struct nfs_fh * dir_fh; ++ struct qstr * name; ++}; ++ ++struct nfs4_lookupres { ++ struct nfs_server * server; ++ struct nfs_fattr * dirattr; ++ struct nfs_fattr * fattr; ++ struct nfs_fh * fhandle; ++}; ++ ++struct nfs4_readdir_arg { ++ struct nfs_fh * fh; ++ u64 cookie; /* request */ ++ nfs4_verifier req_verifier; /* request */ ++ u32 count; /* request */ ++ struct page ** pages; /* zero-copy data */ ++ unsigned int pgbase; /* zero-copy data */ ++}; ++ ++struct nfs4_readdir_res { ++ nfs4_verifier resp_verifier; ++ unsigned int pgbase; + }; + + struct nfs4_readlink { +- u32 rl_count; /* zero-copy data */ +- struct page ** rl_pages; /* zero-copy data */ ++ struct nfs_fh * fh; ++ u32 count; /* zero-copy data */ ++ struct page ** pages; /* zero-copy data */ + }; + +-struct nfs4_remove { +- u32 rm_namelen; /* request */ +- const char * rm_name; /* request */ +- struct nfs4_change_info * rm_cinfo; /* response */ ++struct nfs4_remove_arg { ++ struct nfs_fh * fhandle; ++ struct qstr * name; + }; + +-struct nfs4_rename { +- u32 rn_oldnamelen; /* request */ +- const char * rn_oldname; /* request */ +- u32 rn_newnamelen; /* request */ +- const char * rn_newname; /* request */ +- struct nfs4_change_info * rn_src_cinfo; /* response */ +- struct nfs4_change_info * rn_dst_cinfo; /* response */ ++struct nfs4_remove_res { ++ struct nfs_server * server; ++ struct nfs4_change_info * dir_cinfo; ++ struct nfs_fattr * dir_attr; + }; + +-struct nfs4_setattr { +- char * st_stateid; /* request */ +- struct iattr * st_iap; /* request */ ++struct nfs4_rename_arg { ++ struct nfs_fh * old_dir; ++ struct nfs_fh * new_dir; ++ struct qstr * old_name; ++ struct qstr * new_name; ++}; ++ ++struct nfs4_rename_res { ++ struct nfs_server * server; ++ struct nfs4_change_info * old_cinfo; ++ struct nfs4_change_info * new_cinfo; ++ struct nfs_fattr * old_fattr; ++ struct nfs_fattr * new_fattr; + }; + + struct nfs4_setclientid { +@@ -606,52 +625,12 @@ struct nfs4_setclientid { + struct nfs4_client * sc_state; /* response */ + }; + +-struct nfs4_op { +- u32 opnum; +- union { +- struct nfs4_access access; +- struct nfs4_close close; +- struct nfs4_create create; +- struct nfs4_getattr getattr; +- struct nfs4_getfh getfh; +- struct nfs4_link link; +- struct nfs4_lookup lookup; +- struct nfs4_open open; +- struct nfs4_open_confirm open_confirm; +- struct nfs4_putfh putfh; +- struct nfs4_readdir readdir; +- struct nfs4_readlink readlink; +- struct nfs4_remove remove; +- struct nfs4_rename rename; +- struct nfs4_client * renew; +- struct nfs4_setattr setattr; +- } u; +-}; +- +-struct nfs4_compound { +- unsigned int flags; /* defined below */ +- struct nfs_server * server; +- +- /* RENEW information */ +- int renew_index; +- unsigned long timestamp; +- +- /* scratch variables for XDR encode/decode */ +- int nops; +- u32 * p; +- u32 * end; +- +- /* the individual COMPOUND operations */ +- struct nfs4_op *ops; +- +- /* request */ +- int req_nops; +- u32 taglen; +- char * tag; +- +- /* response */ +- int resp_nops; +- int toplevel_status; ++struct nfs4_unlink { ++ struct nfs_server * server; ++ struct nfs_fh * fh; ++ struct qstr * name; ++ struct nfs4_change_info cinfo; /* NOT a pointer */ ++ struct nfs_fattr attrs; /* NOT a pointer */ + }; + + #endif /* CONFIG_NFS_V4 */ +@@ -698,9 +677,10 @@ struct nfs_rpc_ops { + int version; /* Protocol version */ + struct dentry_operations *dentry_ops; + struct inode_operations *dir_inode_ops; ++ struct inode_operations *file_inode_ops; + + int (*getroot) (struct nfs_server *, struct nfs_fh *, +- struct nfs_fattr *); ++ struct nfs_fsinfo *); + int (*getattr) (struct inode *, struct nfs_fattr *); + int (*setattr) (struct dentry *, struct nfs_fattr *, + struct iattr *); +diff -puN include/linux/sunrpc/xprt.h~CITI_NFS4_ALL include/linux/sunrpc/xprt.h +--- linux-2.6.3/include/linux/sunrpc/xprt.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/sunrpc/xprt.h 2004-02-19 16:47:05.000000000 -0500 +@@ -95,14 +95,15 @@ struct rpc_rqst { + struct rpc_rqst * rq_next; /* free list */ + int rq_cong; /* has incremented xprt->cong */ + int rq_received; /* receive completed */ +- u32 rq_seqno; /* gss seq no. used on req. */ ++#define GSS_SEQNO_CACHE 4 ++ u32 rq_seqnos[GSS_SEQNO_CACHE]; ++ /* gss seq no.s used on req. */ + + struct list_head rq_list; + + struct xdr_buf rq_private_buf; /* The receive buffer + * used in the softirq. + */ +- + /* + * For authentication (e.g. auth_des) + */ +@@ -155,6 +156,11 @@ struct rpc_xprt { + stream : 1; /* TCP */ + + /* ++ * XID ++ */ ++ __u32 xid; /* Next XID value to use */ ++ ++ /* + * State of TCP reply receive stuff + */ + u32 tcp_recm, /* Fragment header */ +@@ -164,6 +170,11 @@ struct rpc_xprt { + unsigned long tcp_copied, /* copied to request */ + tcp_flags; + /* ++ * Connection of sockets ++ */ ++ struct work_struct sock_connect; ++ unsigned short port; ++ /* + * Disconnection of idle sockets + */ + struct work_struct task_cleanup; +diff -puN net/sunrpc/xprt.c~CITI_NFS4_ALL net/sunrpc/xprt.c +--- linux-2.6.3/net/sunrpc/xprt.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/xprt.c 2004-02-19 16:47:05.000000000 -0500 +@@ -60,6 +60,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -77,6 +78,7 @@ + + #define XPRT_MAX_BACKOFF (8) + #define XPRT_IDLE_TIMEOUT (5*60*HZ) ++#define XPRT_MAX_RESVPORT (800) + + /* + * Local functions +@@ -87,7 +89,7 @@ static void xprt_disconnect(struct rpc_x + static void xprt_connect_status(struct rpc_task *task); + static struct rpc_xprt * xprt_setup(int proto, struct sockaddr_in *ap, + struct rpc_timeout *to); +-static struct socket *xprt_create_socket(int, struct rpc_timeout *, int); ++static struct socket *xprt_create_socket(struct rpc_xprt *, int, int); + static void xprt_bind_socket(struct rpc_xprt *, struct socket *); + static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); + +@@ -455,6 +457,68 @@ out_abort: + spin_unlock(&xprt->sock_lock); + } + ++static void ++xprt_socket_connect(void *args) ++{ ++ struct rpc_xprt *xprt = (struct rpc_xprt *)args; ++ struct socket *sock = xprt->sock; ++ int status = -EIO; ++ ++ if (xprt->shutdown) { ++ rpc_wake_up_status(&xprt->pending, -EIO); ++ return; ++ } ++ if (!xprt->addr.sin_port) ++ goto out_err; ++ ++ /* ++ * Start by resetting any existing state ++ */ ++ xprt_close(xprt); ++ sock = xprt_create_socket(xprt, xprt->prot, xprt->resvport); ++ if (sock == NULL) { ++ /* couldn't create socket or bind to reserved port; ++ * this is likely a permanent error, so cause an abort */ ++ goto out_err; ++ return; ++ } ++ xprt_bind_socket(xprt, sock); ++ xprt_sock_setbufsize(xprt); ++ ++ if (!xprt->stream) ++ goto out; ++ ++ /* ++ * Tell the socket layer to start connecting... ++ */ ++ status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, ++ sizeof(xprt->addr), O_NONBLOCK); ++ dprintk("RPC: %p connect status %d connected %d sock state %d\n", ++ xprt, -status, xprt_connected(xprt), sock->sk->sk_state); ++ if (status >= 0) ++ goto out; ++ switch (status) { ++ case -EINPROGRESS: ++ case -EALREADY: ++ break; ++ default: ++ goto out_err; ++ } ++out: ++ spin_lock_bh(&xprt->sock_lock); ++ if (xprt->snd_task) ++ rpc_wake_up_task(xprt->snd_task); ++ spin_unlock_bh(&xprt->sock_lock); ++ return; ++out_err: ++ spin_lock_bh(&xprt->sock_lock); ++ if (xprt->snd_task) { ++ xprt->snd_task->tk_status = status; ++ rpc_wake_up_task(xprt->snd_task); ++ } ++ spin_unlock_bh(&xprt->sock_lock); ++} ++ + /* + * Attempt to connect a TCP socket. + * +@@ -463,9 +527,6 @@ void + xprt_connect(struct rpc_task *task) + { + struct rpc_xprt *xprt = task->tk_xprt; +- struct socket *sock = xprt->sock; +- struct sock *inet; +- int status; + + dprintk("RPC: %4d xprt_connect xprt %p %s connected\n", task->tk_pid, + xprt, (xprt_connected(xprt) ? "is" : "is not")); +@@ -486,79 +547,9 @@ xprt_connect(struct rpc_task *task) + if (task->tk_rqstp) + task->tk_rqstp->rq_bytes_sent = 0; + +- /* +- * We're here because the xprt was marked disconnected. +- * Start by resetting any existing state. +- */ +- xprt_close(xprt); +- if (!(sock = xprt_create_socket(xprt->prot, &xprt->timeout, xprt->resvport))) { +- /* couldn't create socket or bind to reserved port; +- * this is likely a permanent error, so cause an abort */ +- task->tk_status = -EIO; +- goto out_write; +- } +- xprt_bind_socket(xprt, sock); +- xprt_sock_setbufsize(xprt); +- +- if (!xprt->stream) +- goto out_write; +- +- inet = sock->sk; +- +- /* +- * Tell the socket layer to start connecting... +- */ +- status = sock->ops->connect(sock, (struct sockaddr *) &xprt->addr, +- sizeof(xprt->addr), O_NONBLOCK); +- dprintk("RPC: %4d connect status %d connected %d sock state %d\n", +- task->tk_pid, -status, xprt_connected(xprt), inet->sk_state); +- +- if (status >= 0) +- return; +- +- switch (status) { +- case -EINPROGRESS: +- case -EALREADY: +- /* Protect against TCP socket state changes */ +- lock_sock(inet); +- if (inet->sk_state != TCP_ESTABLISHED) { +- dprintk("RPC: %4d waiting for connection\n", +- task->tk_pid); +- task->tk_timeout = RPC_CONNECT_TIMEOUT; +- /* if the socket is already closing, delay briefly */ +- if ((1 << inet->sk_state) & +- ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) +- task->tk_timeout = RPC_REESTABLISH_TIMEOUT; +- rpc_sleep_on(&xprt->pending, task, xprt_connect_status, +- NULL); +- } +- release_sock(inet); +- break; +- case -ECONNREFUSED: +- case -ECONNRESET: +- case -ENOTCONN: +- if (!RPC_IS_SOFT(task)) { +- rpc_delay(task, RPC_REESTABLISH_TIMEOUT); +- task->tk_status = -ENOTCONN; +- break; +- } +- default: +- /* Report myriad other possible returns. If this file +- * system is soft mounted, just error out, like Solaris. */ +- if (RPC_IS_SOFT(task)) { +- printk(KERN_WARNING +- "RPC: error %d connecting to server %s, exiting\n", +- -status, task->tk_client->cl_server); +- task->tk_status = -EIO; +- goto out_write; +- } +- printk(KERN_WARNING "RPC: error %d connecting to server %s\n", +- -status, task->tk_client->cl_server); +- /* This will prevent anybody else from reconnecting */ +- rpc_delay(task, RPC_REESTABLISH_TIMEOUT); +- task->tk_status = status; +- break; +- } ++ task->tk_timeout = RPC_CONNECT_TIMEOUT; ++ rpc_sleep_on(&xprt->pending, task, xprt_connect_status, NULL); ++ schedule_work(&xprt->sock_connect); + return; + out_write: + xprt_release_write(xprt, task); +@@ -583,6 +574,8 @@ xprt_connect_status(struct rpc_task *tas + task->tk_status = -EIO; + + switch (task->tk_status) { ++ case -ECONNREFUSED: ++ case -ECONNRESET: + case -ENOTCONN: + rpc_delay(task, RPC_REESTABLISH_TIMEOUT); + return; +@@ -1333,22 +1326,14 @@ do_xprt_reserve(struct rpc_task *task) + /* + * Allocate a 'unique' XID + */ +-static u32 +-xprt_alloc_xid(void) ++static inline u32 xprt_alloc_xid(struct rpc_xprt *xprt) ++{ ++ return xprt->xid++; ++} ++ ++static inline void xprt_init_xid(struct rpc_xprt *xprt) + { +- static spinlock_t xid_lock = SPIN_LOCK_UNLOCKED; +- static int need_init = 1; +- static u32 xid; +- u32 ret; +- +- spin_lock(&xid_lock); +- if (unlikely(need_init)) { +- xid = get_seconds() << 12; +- need_init = 0; +- } +- ret = xid++; +- spin_unlock(&xid_lock); +- return ret; ++ get_random_bytes(&xprt->xid, sizeof(xprt->xid)); + } + + /* +@@ -1362,7 +1347,8 @@ xprt_request_init(struct rpc_task *task, + req->rq_timeout = xprt->timeout; + req->rq_task = task; + req->rq_xprt = xprt; +- req->rq_xid = xprt_alloc_xid(); ++ req->rq_xid = xprt_alloc_xid(xprt); ++ memset(req->rq_seqnos, 0, sizeof(req->rq_seqnos)); + INIT_LIST_HEAD(&req->rq_list); + dprintk("RPC: %4d reserved req %p xid %08x\n", task->tk_pid, + req, req->rq_xid); +@@ -1457,11 +1443,13 @@ xprt_setup(int proto, struct sockaddr_in + init_waitqueue_head(&xprt->cong_wait); + + INIT_LIST_HEAD(&xprt->recv); ++ INIT_WORK(&xprt->sock_connect, xprt_socket_connect, xprt); + INIT_WORK(&xprt->task_cleanup, xprt_socket_autoclose, xprt); + init_timer(&xprt->timer); + xprt->timer.function = xprt_init_autodisconnect; + xprt->timer.data = (unsigned long) xprt; + xprt->last_used = jiffies; ++ xprt->port = XPRT_MAX_RESVPORT; + + /* Set timeout parameters */ + if (to) { +@@ -1481,6 +1469,8 @@ xprt_setup(int proto, struct sockaddr_in + req->rq_next = NULL; + xprt->free = xprt->slot; + ++ xprt_init_xid(xprt); ++ + /* Check whether we want to use a reserved port */ + xprt->resvport = capable(CAP_NET_BIND_SERVICE) ? 1 : 0; + +@@ -1493,30 +1483,28 @@ xprt_setup(int proto, struct sockaddr_in + * Bind to a reserved port + */ + static inline int +-xprt_bindresvport(struct socket *sock) ++xprt_bindresvport(struct rpc_xprt *xprt, struct socket *sock) + { +- struct sockaddr_in myaddr; ++ struct sockaddr_in myaddr = { ++ .sin_family = AF_INET, ++ }; + int err, port; +- kernel_cap_t saved_cap = current->cap_effective; + +- /* Override capabilities. +- * They were checked in xprt_create_proto i.e. at mount time +- */ +- cap_raise(current->cap_effective, CAP_NET_BIND_SERVICE); +- +- memset(&myaddr, 0, sizeof(myaddr)); +- myaddr.sin_family = AF_INET; +- port = 800; ++ /* Were we already bound to a given port? Try to reuse it */ ++ port = xprt->port; + do { + myaddr.sin_port = htons(port); + err = sock->ops->bind(sock, (struct sockaddr *) &myaddr, + sizeof(myaddr)); +- } while (err == -EADDRINUSE && --port > 0); +- current->cap_effective = saved_cap; +- +- if (err < 0) +- printk("RPC: Can't bind to reserved port (%d).\n", -err); ++ if (err == 0) { ++ xprt->port = port; ++ return 0; ++ } ++ if (--port == 0) ++ port = XPRT_MAX_RESVPORT; ++ } while (err == -EADDRINUSE && port != xprt->port); + ++ printk("RPC: Can't bind to reserved port (%d).\n", -err); + return err; + } + +@@ -1580,7 +1568,7 @@ xprt_sock_setbufsize(struct rpc_xprt *xp + * and connect stream sockets. + */ + static struct socket * +-xprt_create_socket(int proto, struct rpc_timeout *to, int resvport) ++xprt_create_socket(struct rpc_xprt *xprt, int proto, int resvport) + { + struct socket *sock; + int type, err; +@@ -1596,7 +1584,7 @@ xprt_create_socket(int proto, struct rpc + } + + /* If the caller has the capability, bind to a reserved port */ +- if (resvport && xprt_bindresvport(sock) < 0) { ++ if (resvport && xprt_bindresvport(xprt, sock) < 0) { + printk("RPC: can't bind to reserved port.\n"); + goto failed; + } +diff -puN net/sunrpc/cache.c~CITI_NFS4_ALL net/sunrpc/cache.c +--- linux-2.6.3/net/sunrpc/cache.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/cache.c 2004-02-19 16:47:03.000000000 -0500 +@@ -325,6 +325,7 @@ int cache_clean(void) + + if (current_detail && current_index < current_detail->hash_size) { + struct cache_head *ch, **cp; ++ struct cache_detail *d; + + write_lock(¤t_detail->hash_lock); + +@@ -354,12 +355,14 @@ int cache_clean(void) + rv = 1; + } + write_unlock(¤t_detail->hash_lock); +- if (ch) +- current_detail->cache_put(ch, current_detail); +- else ++ d = current_detail; ++ if (!ch) + current_index ++; +- } +- spin_unlock(&cache_list_lock); ++ spin_unlock(&cache_list_lock); ++ if (ch) ++ d->cache_put(ch, d); ++ } else ++ spin_unlock(&cache_list_lock); + + return rv; + } +diff -puN include/linux/sunrpc/cache.h~CITI_NFS4_ALL include/linux/sunrpc/cache.h +--- linux-2.6.3/include/linux/sunrpc/cache.h~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/sunrpc/cache.h 2004-02-19 16:47:03.000000000 -0500 +@@ -132,12 +132,14 @@ struct cache_deferred_req { + * If "set" == 0 : + * If an entry is found, it is returned + * If no entry is found, a new non-VALID entry is created. +- * If "set" == 1 : ++ * If "set" == 1 and INPLACE == 0 : + * If no entry is found a new one is inserted with data from "template" + * If a non-CACHE_VALID entry is found, it is updated from template using UPDATE + * If a CACHE_VALID entry is found, a new entry is swapped in with data + * from "template" +- * If set == 2, we UPDATE, but don't swap. i.e. update in place ++ * If set == 1, and INPLACE == 1 : ++ * As above, except that if a CACHE_VALID entry is found, we UPDATE in place ++ * instead of swapping in a new entry. + * + * If the passed handle has the CACHE_NEGATIVE flag set, then UPDATE is not + * run but insteead CACHE_NEGATIVE is set in any new item. +@@ -164,8 +166,8 @@ RTN *FNAME ARGS \ + RTN *tmp, *new=NULL; \ + struct cache_head **hp, **head; \ + SETUP; \ +- retry: \ + head = &(DETAIL)->hash_table[HASHFN]; \ ++ retry: \ + if (set||new) write_lock(&(DETAIL)->hash_lock); \ + else read_lock(&(DETAIL)->hash_lock); \ + for(hp=head; *hp != NULL; hp = &tmp->MEMBER.next) { \ +@@ -175,6 +177,8 @@ RTN *FNAME ARGS \ + if (set && !INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags) && !new) \ + break; \ + \ ++ if (new) \ ++ {INIT;} \ + cache_get(&tmp->MEMBER); \ + if (set) { \ + if (!INPLACE && test_bit(CACHE_VALID, &tmp->MEMBER.flags))\ +@@ -203,6 +207,7 @@ RTN *FNAME ARGS \ + } \ + /* Didn't find anything */ \ + if (new) { \ ++ INIT; \ + new->MEMBER.next = *head; \ + *head = &new->MEMBER; \ + (DETAIL)->entries ++; \ +@@ -224,8 +229,6 @@ RTN *FNAME ARGS \ + if (new) { \ + cache_init(&new->MEMBER); \ + cache_get(&new->MEMBER); \ +- INIT; \ +- tmp = new; \ + goto retry; \ + } \ + return NULL; \ +diff -puN net/sunrpc/svcauth.c~CITI_NFS4_ALL net/sunrpc/svcauth.c +--- linux-2.6.3/net/sunrpc/svcauth.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/svcauth.c 2004-02-19 16:47:04.000000000 -0500 +@@ -150,7 +150,10 @@ DefineCacheLookup(struct auth_domain, + &auth_domain_cache, + auth_domain_hash(item), + auth_domain_match(tmp, item), +- kfree(new); if(!set) return NULL; ++ kfree(new); if(!set) { ++ write_unlock(&auth_domain_cache.hash_lock); ++ return NULL; ++ } + new=item; atomic_inc(&new->h.refcnt), + /* no update */, + 0 /* no inplace updates */ +diff -puN net/sunrpc/svcauth_unix.c~CITI_NFS4_ALL net/sunrpc/svcauth_unix.c +--- linux-2.6.3/net/sunrpc/svcauth_unix.c~CITI_NFS4_ALL 2004-02-19 16:47:03.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/svcauth_unix.c 2004-02-19 16:47:03.000000000 -0500 +@@ -119,7 +119,8 @@ static inline int ip_map_match(struct ip + } + static inline void ip_map_init(struct ip_map *new, struct ip_map *item) + { +- new->m_class = strdup(item->m_class); ++ new->m_class = item->m_class; ++ item->m_class = NULL; + new->m_addr.s_addr = item->m_addr.s_addr; + } + static inline void ip_map_update(struct ip_map *new, struct ip_map *item) +@@ -191,7 +192,9 @@ static int ip_map_parse(struct cache_det + } else + dom = NULL; + +- ipm.m_class = class; ++ ipm.m_class = strdup(class); ++ if (ipm.m_class == NULL) ++ return -ENOMEM; + ipm.m_addr.s_addr = + htonl((((((b1<<8)|b2)<<8)|b3)<<8)|b4); + ipm.h.flags = 0; +@@ -207,6 +210,7 @@ static int ip_map_parse(struct cache_det + ip_map_put(&ipmp->h, &ip_map_cache); + if (dom) + auth_domain_put(dom); ++ if (ipm.m_class) kfree(ipm.m_class); + if (!ipmp) + return -ENOMEM; + cache_flush(); +@@ -266,7 +270,9 @@ int auth_unix_add_addr(struct in_addr ad + if (dom->flavour != RPC_AUTH_UNIX) + return -EINVAL; + udom = container_of(dom, struct unix_domain, h); +- ip.m_class = "nfsd"; ++ ip.m_class = strdup("nfsd"); ++ if (!ip.m_class) ++ return -ENOMEM; + ip.m_addr = addr; + ip.m_client = udom; + ip.m_add_change = udom->addr_changes+1; +@@ -274,6 +280,7 @@ int auth_unix_add_addr(struct in_addr ad + ip.h.expiry_time = NEVER; + + ipmp = ip_map_lookup(&ip, 1); ++ if (ip.m_class) kfree(ip.m_class); + if (ipmp) { + ip_map_put(&ipmp->h, &ip_map_cache); + return 0; +diff -puN fs/nfsd/stats.c~CITI_NFS4_ALL fs/nfsd/stats.c +--- linux-2.6.3/fs/nfsd/stats.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/stats.c 2004-02-19 16:47:04.000000000 -0500 +@@ -26,6 +26,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -39,14 +40,11 @@ struct svc_stat nfsd_svcstats = { + .program = &nfsd_program, + }; + +-static int +-nfsd_proc_read(char *buffer, char **start, off_t offset, int count, +- int *eof, void *data) ++static int nfsd_proc_show(struct seq_file *seq, void *v) + { +- int len; +- int i; ++ int i; + +- len = sprintf(buffer, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n", ++ seq_printf(seq, "rc %u %u %u\nfh %u %u %u %u %u\nio %u %u\n", + nfsdstats.rchits, + nfsdstats.rcmisses, + nfsdstats.rcnocache, +@@ -58,57 +56,42 @@ nfsd_proc_read(char *buffer, char **star + nfsdstats.io_read, + nfsdstats.io_write); + /* thread usage: */ +- len += sprintf(buffer+len, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt); ++ seq_printf(seq, "th %u %u", nfsdstats.th_cnt, nfsdstats.th_fullcnt); + for (i=0; i<10; i++) { + unsigned int jifs = nfsdstats.th_usage[i]; + unsigned int sec = jifs / HZ, msec = (jifs % HZ)*1000/HZ; +- len += sprintf(buffer+len, " %u.%03u", sec, msec); ++ seq_printf(seq, " %u.%03u", sec, msec); + } + + /* newline and ra-cache */ +- len += sprintf(buffer+len, "\nra %u", nfsdstats.ra_size); ++ seq_printf(seq, "\nra %u", nfsdstats.ra_size); + for (i=0; i<11; i++) +- len += sprintf(buffer+len, " %u", nfsdstats.ra_depth[i]); +- len += sprintf(buffer+len, "\n"); ++ seq_printf(seq, " %u", nfsdstats.ra_depth[i]); ++ seq_putc(seq, '\n'); + ++ /* show my rpc info */ ++ svc_seq_show(seq, &nfsd_svcstats); + +- /* Assume we haven't hit EOF yet. Will be set by svc_proc_read. */ +- *eof = 0; +- +- /* +- * Append generic nfsd RPC statistics if there's room for it. +- */ +- if (len <= offset) { +- len = svc_proc_read(buffer, start, offset - len, count, +- eof, data); +- return len; +- } +- +- if (len < count) { +- len += svc_proc_read(buffer + len, start, 0, count - len, +- eof, data); +- } +- +- if (offset >= len) { +- *start = buffer; +- return 0; +- } ++ return 0; ++} + +- *start = buffer + offset; +- if ((len -= offset) > count) +- return count; +- return len; ++static int nfsd_proc_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, nfsd_proc_show, NULL); + } + ++static struct file_operations nfsd_proc_fops = { ++ .owner = THIS_MODULE, ++ .open = nfsd_proc_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ + void + nfsd_stat_init(void) + { +- struct proc_dir_entry *ent; +- +- if ((ent = svc_proc_register(&nfsd_svcstats)) != 0) { +- ent->read_proc = nfsd_proc_read; +- ent->owner = THIS_MODULE; +- } ++ svc_proc_register(&nfsd_svcstats, &nfsd_proc_fops); + } + + void +diff -puN include/linux/sunrpc/stats.h~CITI_NFS4_ALL include/linux/sunrpc/stats.h +--- linux-2.6.3/include/linux/sunrpc/stats.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/sunrpc/stats.h 2004-02-19 16:47:04.000000000 -0500 +@@ -48,14 +48,13 @@ void rpc_modcount(struct inode *, int) + #ifdef CONFIG_PROC_FS + struct proc_dir_entry * rpc_proc_register(struct rpc_stat *); + void rpc_proc_unregister(const char *); +-int rpc_proc_read(char *, char **, off_t, int, +- int *, void *); + void rpc_proc_zero(struct rpc_program *); +-struct proc_dir_entry * svc_proc_register(struct svc_stat *); ++struct proc_dir_entry * svc_proc_register(struct svc_stat *, ++ struct file_operations *); + void svc_proc_unregister(const char *); +-int svc_proc_read(char *, char **, off_t, int, +- int *, void *); +-void svc_proc_zero(struct svc_program *); ++ ++void svc_seq_show(struct seq_file *, ++ const struct svc_stat *); + + extern struct proc_dir_entry *proc_net_rpc; + +@@ -63,13 +62,14 @@ extern struct proc_dir_entry *proc_net_r + + static inline struct proc_dir_entry *rpc_proc_register(struct rpc_stat *s) { return NULL; } + static inline void rpc_proc_unregister(const char *p) {} +-static inline int rpc_proc_read(char *a, char **b, off_t c, int d, int *e, void *f) { return 0; } + static inline void rpc_proc_zero(struct rpc_program *p) {} + +-static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s) { return NULL; } ++static inline struct proc_dir_entry *svc_proc_register(struct svc_stat *s, ++ struct file_operations *f) { return NULL; } + static inline void svc_proc_unregister(const char *p) {} +-static inline int svc_proc_read(char *a, char **b, off_t c, int d, int *e, void *f) { return 0; } +-static inline void svc_proc_zero(struct svc_program *p) {} ++ ++static inline void svc_seq_show(struct seq_file *seq, ++ const struct svc_stat *st) {} + + #define proc_net_rpc NULL + +diff -puN net/sunrpc/stats.c~CITI_NFS4_ALL net/sunrpc/stats.c +--- linux-2.6.3/net/sunrpc/stats.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/stats.c 2004-02-19 16:47:04.000000000 -0500 +@@ -18,6 +18,7 @@ + #include + #include + #include ++#include + #include + #include + +@@ -28,70 +29,66 @@ struct proc_dir_entry *proc_net_rpc = NU + /* + * Get RPC client stats + */ +-int +-rpc_proc_read(char *buffer, char **start, off_t offset, int count, +- int *eof, void *data) +-{ +- struct rpc_stat *statp = (struct rpc_stat *) data; +- struct rpc_program *prog = statp->program; +- struct rpc_version *vers; +- int len, i, j; ++static int rpc_proc_show(struct seq_file *seq, void *v) { ++ const struct rpc_stat *statp = seq->private; ++ const struct rpc_program *prog = statp->program; ++ int i, j; + +- len = sprintf(buffer, ++ seq_printf(seq, + "net %d %d %d %d\n", + statp->netcnt, + statp->netudpcnt, + statp->nettcpcnt, + statp->nettcpconn); +- len += sprintf(buffer + len, ++ seq_printf(seq, + "rpc %d %d %d\n", + statp->rpccnt, + statp->rpcretrans, + statp->rpcauthrefresh); + + for (i = 0; i < prog->nrvers; i++) { +- if (!(vers = prog->version[i])) ++ const struct rpc_version *vers = prog->version[i]; ++ if (!vers) + continue; +- len += sprintf(buffer + len, "proc%d %d", ++ seq_printf(seq, "proc%d %d", + vers->number, vers->nrprocs); + for (j = 0; j < vers->nrprocs; j++) +- len += sprintf(buffer + len, " %d", ++ seq_printf(seq, " %d", + vers->procs[j].p_count); +- buffer[len++] = '\n'; ++ seq_putc(seq, '\n'); + } ++ return 0; ++} + +- if (offset >= len) { +- *start = buffer; +- *eof = 1; +- return 0; +- } +- *start = buffer + offset; +- if ((len -= offset) > count) +- return count; +- *eof = 1; +- return len; ++static int rpc_proc_open(struct inode *inode, struct file *file) ++{ ++ return single_open(file, rpc_proc_show, PDE(inode)->data); + } + ++static struct file_operations rpc_proc_fops = { ++ .owner = THIS_MODULE, ++ .open = rpc_proc_open, ++ .read = seq_read, ++ .llseek = seq_lseek, ++ .release = single_release, ++}; ++ + /* + * Get RPC server stats + */ +-int +-svc_proc_read(char *buffer, char **start, off_t offset, int count, +- int *eof, void *data) +-{ +- struct svc_stat *statp = (struct svc_stat *) data; +- struct svc_program *prog = statp->program; +- struct svc_procedure *proc; +- struct svc_version *vers; +- int len, i, j; ++void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp) { ++ const struct svc_program *prog = statp->program; ++ const struct svc_procedure *proc; ++ const struct svc_version *vers; ++ int i, j; + +- len = sprintf(buffer, ++ seq_printf(seq, + "net %d %d %d %d\n", + statp->netcnt, + statp->netudpcnt, + statp->nettcpcnt, + statp->nettcpconn); +- len += sprintf(buffer + len, ++ seq_printf(seq, + "rpc %d %d %d %d %d\n", + statp->rpccnt, + statp->rpcbadfmt+statp->rpcbadauth+statp->rpcbadclnt, +@@ -102,41 +99,36 @@ svc_proc_read(char *buffer, char **start + for (i = 0; i < prog->pg_nvers; i++) { + if (!(vers = prog->pg_vers[i]) || !(proc = vers->vs_proc)) + continue; +- len += sprintf(buffer + len, "proc%d %d", i, vers->vs_nproc); ++ seq_printf(seq, "proc%d %d", i, vers->vs_nproc); + for (j = 0; j < vers->vs_nproc; j++, proc++) +- len += sprintf(buffer + len, " %d", proc->pc_count); +- buffer[len++] = '\n'; ++ seq_printf(seq, " %d", proc->pc_count); ++ seq_putc(seq, '\n'); + } +- +- if (offset >= len) { +- *start = buffer; +- *eof = 1; +- return 0; +- } +- *start = buffer + offset; +- if ((len -= offset) > count) +- return count; +- *eof = 1; +- return len; + } + + /* + * Register/unregister RPC proc files + */ + static inline struct proc_dir_entry * +-do_register(const char *name, void *data, int issvc) ++do_register(const char *name, void *data, struct file_operations *fops) + { ++ struct proc_dir_entry *ent; ++ + rpc_proc_init(); + dprintk("RPC: registering /proc/net/rpc/%s\n", name); +- return create_proc_read_entry(name, 0, proc_net_rpc, +- issvc? svc_proc_read : rpc_proc_read, +- data); ++ ++ ent = create_proc_entry(name, 0, proc_net_rpc); ++ if (ent) { ++ ent->proc_fops = fops; ++ ent->data = data; ++ } ++ return ent; + } + + struct proc_dir_entry * + rpc_proc_register(struct rpc_stat *statp) + { +- return do_register(statp->program->name, statp, 0); ++ return do_register(statp->program->name, statp, &rpc_proc_fops); + } + + void +@@ -146,9 +138,9 @@ rpc_proc_unregister(const char *name) + } + + struct proc_dir_entry * +-svc_proc_register(struct svc_stat *statp) ++svc_proc_register(struct svc_stat *statp, struct file_operations *fops) + { +- return do_register(statp->program->pg_name, statp, 1); ++ return do_register(statp->program->pg_name, statp, fops); + } + + void +@@ -163,7 +155,7 @@ rpc_proc_init(void) + dprintk("RPC: registering /proc/net/rpc\n"); + if (!proc_net_rpc) { + struct proc_dir_entry *ent; +- ent = proc_mkdir("net/rpc", 0); ++ ent = proc_mkdir("rpc", proc_net); + if (ent) { + ent->owner = THIS_MODULE; + proc_net_rpc = ent; +diff -puN net/sunrpc/sunrpc_syms.c~CITI_NFS4_ALL net/sunrpc/sunrpc_syms.c +--- linux-2.6.3/net/sunrpc/sunrpc_syms.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/sunrpc_syms.c 2004-02-19 16:47:04.000000000 -0500 +@@ -85,15 +85,16 @@ EXPORT_SYMBOL(svc_recv); + EXPORT_SYMBOL(svc_wake_up); + EXPORT_SYMBOL(svc_makesock); + EXPORT_SYMBOL(svc_reserve); ++EXPORT_SYMBOL(svc_auth_register); ++EXPORT_SYMBOL(auth_domain_lookup); + + /* RPC statistics */ + #ifdef CONFIG_PROC_FS + EXPORT_SYMBOL(rpc_proc_register); + EXPORT_SYMBOL(rpc_proc_unregister); +-EXPORT_SYMBOL(rpc_proc_read); + EXPORT_SYMBOL(svc_proc_register); + EXPORT_SYMBOL(svc_proc_unregister); +-EXPORT_SYMBOL(svc_proc_read); ++EXPORT_SYMBOL(svc_seq_show); + #endif + + /* caching... */ +diff -puN net/sunrpc/auth_gss/gss_krb5_seal.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_seal.c +--- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_seal.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_seal.c 2004-02-19 16:47:07.000000000 -0500 +@@ -101,12 +101,12 @@ krb5_make_token(struct krb5_ctx *ctx, in + checksum_type = CKSUMTYPE_RSA_MD5; + break; + default: +- dprintk("RPC: gss_krb5_seal: ctx->signalg %d not" ++ dprintk("RPC: gss_krb5_seal: ctx->signalg %d not" + " supported\n", ctx->signalg); + goto out_err; + } + if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) { +- dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n", ++ dprintk("RPC: gss_krb5_seal: ctx->sealalg %d not supported\n", + ctx->sealalg); + goto out_err; + } +@@ -151,7 +151,7 @@ krb5_make_token(struct krb5_ctx *ctx, in + md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH, + KRB5_CKSUM_LENGTH); + +- dprintk("make_seal_token: cksum data: \n"); ++ dprintk("RPC: make_seal_token: cksum data: \n"); + print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0); + break; + default: +@@ -169,8 +169,5 @@ krb5_make_token(struct krb5_ctx *ctx, in + return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE); + out_err: + if (md5cksum.data) kfree(md5cksum.data); +- if (token->data) kfree(token->data); +- token->data = 0; +- token->len = 0; + return GSS_S_FAILURE; + } +diff -puN include/linux/sunrpc/auth_gss.h~CITI_NFS4_ALL include/linux/sunrpc/auth_gss.h +--- linux-2.6.3/include/linux/sunrpc/auth_gss.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/sunrpc/auth_gss.h 2004-02-19 16:47:04.000000000 -0500 +@@ -62,8 +62,6 @@ struct rpc_gss_init_res { + struct xdr_netobj gr_token; /* token */ + }; + +-#define GSS_SEQ_WIN 5 +- + /* The gss_cl_ctx struct holds all the information the rpcsec_gss client + * code needs to know about a single security context. In particular, + * gc_gss_ctx is the context handle that is used to do gss-api calls, while +diff -puN include/linux/sunrpc/gss_api.h~CITI_NFS4_ALL include/linux/sunrpc/gss_api.h +--- linux-2.6.3/include/linux/sunrpc/gss_api.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/sunrpc/gss_api.h 2004-02-19 16:47:04.000000000 -0500 +@@ -120,6 +120,9 @@ int gss_mech_unregister_all(void); + * reference count. */ + struct gss_api_mech * gss_mech_get_by_OID(struct xdr_netobj *); + ++/* Similar, but get by name like "krb5", "spkm", etc., instead of OID. */ ++struct gss_api_mech *gss_mech_get_by_name(char *); ++ + /* Just increments the mechanism's reference count and returns its input: */ + struct gss_api_mech * gss_mech_get(struct gss_api_mech *); + +diff -puN /dev/null include/linux/sunrpc/svcauth_gss.h +--- /dev/null 2004-01-26 19:20:21.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/sunrpc/svcauth_gss.h 2004-02-19 16:47:04.000000000 -0500 +@@ -0,0 +1,35 @@ ++/* ++ * linux/include/linux/svcauth_gss.h ++ * ++ * Bruce Fields ++ * Copyright (c) 2002 The Regents of the Unviersity of Michigan ++ * ++ * $Id: linux-2.6.3-CITI_NFS4_ALL.patch,v 1.2 2004/03/17 01:04:13 nic Exp $ ++ * ++ */ ++ ++#ifndef _LINUX_SUNRPC_SVCAUTH_GSS_H ++#define _LINUX_SUNRPC_SVCAUTH_GSS_H ++ ++#ifdef __KERNEL__ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++int gss_svc_init(void); ++int svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name); ++ ++ ++struct gss_svc_data { ++ /* decoded gss client cred: */ ++ struct rpc_gss_wire_cred clcred; ++ /* pointer to the beginning of the procedure-specific results, which ++ * may be encrypted/checksummed in svcauth_gss_release: */ ++ u32 *body_start; ++}; ++ ++#endif /* __KERNEL__ */ ++#endif /* _LINUX_SUNRPC_SVCAUTH_GSS_H */ +diff -puN include/linux/sunrpc/svcauth.h~CITI_NFS4_ALL include/linux/sunrpc/svcauth.h +--- linux-2.6.3/include/linux/sunrpc/svcauth.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/sunrpc/svcauth.h 2004-02-19 16:47:04.000000000 -0500 +@@ -65,6 +65,10 @@ struct auth_domain { + * GARBAGE - rpc garbage_args error + * SYSERR - rpc system_err error + * DENIED - authp holds reason for denial. ++ * COMPLETE - the reply is encoded already and ready to be sent; no ++ * further processing is necessary. (This is used for processing ++ * null procedure calls which are used to set up encryption ++ * contexts.) + * + * accept is passed the proc number so that it can accept NULL rpc requests + * even if it cannot authenticate the client (as is sometimes appropriate). +@@ -97,6 +101,7 @@ extern struct auth_ops *authtab[RPC_AUTH + #define SVC_DROP 6 + #define SVC_DENIED 7 + #define SVC_PENDING 8 ++#define SVC_COMPLETE 9 + + + extern int svc_authenticate(struct svc_rqst *rqstp, u32 *authp); +diff -puN include/linux/sunrpc/svc.h~CITI_NFS4_ALL include/linux/sunrpc/svc.h +--- linux-2.6.3/include/linux/sunrpc/svc.h~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/sunrpc/svc.h 2004-02-19 16:47:04.000000000 -0500 +@@ -135,6 +135,7 @@ struct svc_rqst { + + void * rq_argp; /* decoded arguments */ + void * rq_resp; /* xdr'd results */ ++ void * rq_auth_data; /* flavor-specific data */ + + int rq_reserved; /* space on socket outq + * reserved for this request +diff -puN net/sunrpc/auth_gss/auth_gss.c~CITI_NFS4_ALL net/sunrpc/auth_gss/auth_gss.c +--- linux-2.6.3/net/sunrpc/auth_gss/auth_gss.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/auth_gss.c 2004-02-19 16:47:07.000000000 -0500 +@@ -48,6 +48,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -279,7 +280,7 @@ err_free_ctx: + kfree(ctx); + err: + *gc = NULL; +- dprintk("RPC: gss_parse_init_downcall returning %d\n", err); ++ dprintk("RPC: gss_parse_init_downcall returning %d\n", err); + return err; + } + +@@ -310,8 +311,10 @@ __gss_find_upcall(struct gss_auth *gss_a + if (pos->uid != uid) + continue; + atomic_inc(&pos->count); ++ dprintk("RPC: gss_find_upcall found msg %p\n", pos); + return pos; + } ++ dprintk("RPC: gss_find_upcall found nothing\n"); + return NULL; + } + +@@ -349,6 +352,8 @@ gss_upcall(struct rpc_clnt *clnt, struct + uid_t uid = cred->cr_uid; + int res = 0; + ++ dprintk("RPC: %4u gss_upcall for uid %u\n", task->tk_pid, uid); ++ + retry: + spin_lock(&gss_auth->lock); + gss_msg = __gss_find_upcall(gss_auth, uid); +@@ -357,8 +362,10 @@ retry: + if (gss_new == NULL) { + spin_unlock(&gss_auth->lock); + gss_new = kmalloc(sizeof(*gss_new), GFP_KERNEL); +- if (!gss_new) ++ if (!gss_new) { ++ dprintk("RPC: %4u gss_upcall -ENOMEM\n", task->tk_pid); + return -ENOMEM; ++ } + goto retry; + } + gss_msg = gss_new; +@@ -388,10 +395,12 @@ retry: + spin_unlock(&gss_auth->lock); + } + gss_release_msg(gss_msg); ++ dprintk("RPC: %4u gss_upcall for uid %u result %d", task->tk_pid, ++ uid, res); + return res; + out_sleep: +- /* Sleep forever */ +- task->tk_timeout = 0; ++ dprintk("RPC: %4u gss_upcall sleeping\n", task->tk_pid); ++ task->tk_timeout = 0; /* Sleep forever */ + rpc_sleep_on(&gss_msg->waitq, task, NULL, NULL); + spin_unlock(&gss_auth->lock); + if (gss_new) +@@ -476,12 +485,13 @@ gss_pipe_downcall(struct file *filp, con + } else + spin_unlock(&gss_auth->lock); + rpc_release_client(clnt); ++ dprintk("RPC: gss_pipe_downcall returning length %u\n", mlen); + return mlen; + err: + if (ctx) + gss_destroy_ctx(ctx); + rpc_release_client(clnt); +- dprintk("RPC: gss_pipe_downcall returning %d\n", err); ++ dprintk("RPC: gss_pipe_downcall returning %d\n", err); + return err; + } + +@@ -519,6 +529,8 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg + static unsigned long ratelimit; + + if (msg->errno < 0) { ++ dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n", ++ gss_msg); + atomic_inc(&gss_msg->count); + gss_unhash_msg(gss_msg); + if (msg->errno == -ETIMEDOUT || msg->errno == -EPIPE) { +@@ -543,7 +555,8 @@ gss_create(struct rpc_clnt *clnt, rpc_au + struct gss_auth *gss_auth; + struct rpc_auth * auth; + +- dprintk("RPC: creating GSS authenticator for client %p\n",clnt); ++ dprintk("RPC: creating GSS authenticator for client %p\n",clnt); ++ + if (!(gss_auth = kmalloc(sizeof(*gss_auth), GFP_KERNEL))) + goto out_dec; + gss_auth->mech = gss_pseudoflavor_to_mech(flavor); +@@ -581,7 +594,8 @@ static void + gss_destroy(struct rpc_auth *auth) + { + struct gss_auth *gss_auth; +- dprintk("RPC: destroying GSS authenticator %p flavor %d\n", ++ ++ dprintk("RPC: destroying GSS authenticator %p flavor %d\n", + auth, auth->au_flavor); + + gss_auth = container_of(auth, struct gss_auth, rpc_auth); +@@ -596,8 +610,7 @@ gss_destroy(struct rpc_auth *auth) + static void + gss_destroy_ctx(struct gss_cl_ctx *ctx) + { +- +- dprintk("RPC: gss_destroy_ctx\n"); ++ dprintk("RPC: gss_destroy_ctx\n"); + + if (ctx->gc_gss_ctx) + gss_delete_sec_context(&ctx->gc_gss_ctx); +@@ -616,7 +629,7 @@ gss_destroy_cred(struct rpc_cred *rc) + { + struct gss_cred *cred = (struct gss_cred *)rc; + +- dprintk("RPC: gss_destroy_cred \n"); ++ dprintk("RPC: gss_destroy_cred \n"); + + if (cred->gc_ctx) + gss_put_ctx(cred->gc_ctx); +@@ -628,7 +641,7 @@ gss_create_cred(struct rpc_auth *auth, s + { + struct gss_cred *cred = NULL; + +- dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", ++ dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", + acred->uid, auth->au_flavor); + + if (!(cred = kmalloc(sizeof(*cred), GFP_KERNEL))) +@@ -648,7 +661,7 @@ gss_create_cred(struct rpc_auth *auth, s + return (struct rpc_cred *) cred; + + out_err: +- dprintk("RPC: gss_create_cred failed\n"); ++ dprintk("RPC: gss_create_cred failed\n"); + if (cred) gss_destroy_cred((struct rpc_cred *)cred); + return NULL; + } +@@ -659,6 +672,15 @@ gss_match(struct auth_cred *acred, struc + return (rc->cr_uid == acred->uid); + } + ++static void ++shift_seqnos(u32 *seqnos) ++{ ++ int i; ++ ++ for (i=1; i < GSS_SEQNO_CACHE; i++) ++ seqnos[i] = seqnos[i-1]; ++} ++ + /* + * Marshal credentials. + * Maybe we should keep a cached credential for performance reasons. +@@ -678,24 +700,25 @@ gss_marshal(struct rpc_task *task, u32 * + struct xdr_buf verf_buf; + u32 service; + +- dprintk("RPC: gss_marshal\n"); ++ dprintk("RPC: %4u gss_marshal\n", task->tk_pid); + + *p++ = htonl(RPC_AUTH_GSS); + cred_len = p++; + + service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); + if (service == 0) { +- dprintk("Bad pseudoflavor %d in gss_marshal\n", +- gss_cred->gc_flavor); ++ dprintk("RPC: %4u Bad pseudoflavor %d in gss_marshal\n", ++ task->tk_pid, gss_cred->gc_flavor); + goto out_put_ctx; + } ++ shift_seqnos(req->rq_seqnos); + spin_lock(&ctx->gc_seq_lock); +- req->rq_seqno = ctx->gc_seq++; ++ req->rq_seqnos[0] = ctx->gc_seq++; + spin_unlock(&ctx->gc_seq_lock); + + *p++ = htonl((u32) RPC_GSS_VERSION); + *p++ = htonl((u32) ctx->gc_proc); +- *p++ = htonl((u32) req->rq_seqno); ++ *p++ = htonl((u32) req->rq_seqnos[0]); + *p++ = htonl((u32) service); + p = xdr_encode_netobj(p, &ctx->gc_wire_ctx); + *cred_len = htonl((p - (cred_len + 1)) << 2); +@@ -745,6 +768,32 @@ gss_refresh(struct rpc_task *task) + return 0; + } + ++static int ++verify_checksum(struct gss_ctx *ctx, struct xdr_netobj *mic, u32 *seqnos) ++{ ++ u32 seq, qop_state; ++ struct xdr_buf verf_buf; ++ struct iovec iov; ++ int i; ++ ++ for (i=0; i < GSS_SEQNO_CACHE; i++) { ++ if (i && !seqnos[i]) ++ goto fail; ++ seq = htonl(seqnos[i]); ++ iov.iov_base = &seq; ++ iov.iov_len = sizeof(seq); ++ xdr_buf_from_iov(&iov, &verf_buf); ++ if (!gss_verify_mic(ctx, &verf_buf, mic, &qop_state)) ++ goto success; ++ } ++fail: ++ return -1; ++success: ++ /* So unwrap knows which seqno we used: */ ++ seqnos[0] = seqnos[i]; ++ return 0; ++} ++ + static u32 * + gss_validate(struct rpc_task *task, u32 *p) + { +@@ -752,28 +801,21 @@ gss_validate(struct rpc_task *task, u32 + struct gss_cred *gss_cred = container_of(cred, struct gss_cred, + gc_base); + struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); +- u32 seq, qop_state; +- struct iovec iov; +- struct xdr_buf verf_buf; + struct xdr_netobj mic; + u32 flav,len; + u32 service; + +- dprintk("RPC: gss_validate\n"); ++ dprintk("RPC: %4u gss_validate\n", task->tk_pid); + + flav = ntohl(*p++); + if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) + goto out_bad; + if (flav != RPC_AUTH_GSS) + goto out_bad; +- seq = htonl(task->tk_rqstp->rq_seqno); +- iov.iov_base = &seq; +- iov.iov_len = sizeof(seq); +- xdr_buf_from_iov(&iov, &verf_buf); ++ + mic.data = (u8 *)p; + mic.len = len; +- +- if (gss_verify_mic(ctx->gc_gss_ctx, &verf_buf, &mic, &qop_state)) ++ if (verify_checksum(ctx->gc_gss_ctx, &mic, task->tk_rqstp->rq_seqnos)) + goto out_bad; + service = gss_pseudoflavor_to_service(gss_cred->gc_flavor); + switch (service) { +@@ -789,9 +831,12 @@ gss_validate(struct rpc_task *task, u32 + goto out_bad; + } + gss_put_ctx(ctx); ++ dprintk("RPC: %4u GSS gss_validate: gss_verify_mic succeeded.\n", ++ task->tk_pid); + return p + XDR_QUADLEN(len); + out_bad: + gss_put_ctx(ctx); ++ dprintk("RPC: %4u gss_validate failed.\n", task->tk_pid); + return NULL; + } + +@@ -814,7 +859,7 @@ gss_wrap_req(struct rpc_task *task, + u32 offset, *q; + struct iovec *iov; + +- dprintk("RPC: gss_wrap_body\n"); ++ dprintk("RPC: %4u gss_wrap_req\n", task->tk_pid); + BUG_ON(!ctx); + if (ctx->gc_proc != RPC_GSS_PROC_DATA) { + /* The spec seems a little ambiguous here, but I think that not +@@ -832,7 +877,7 @@ gss_wrap_req(struct rpc_task *task, + + integ_len = p++; + offset = (u8 *)p - (u8 *)snd_buf->head[0].iov_base; +- *p++ = htonl(req->rq_seqno); ++ *p++ = htonl(req->rq_seqnos[0]); + + status = encode(rqstp, p, obj); + if (status) +@@ -871,7 +916,7 @@ gss_wrap_req(struct rpc_task *task, + status = 0; + out: + gss_put_ctx(ctx); +- dprintk("RPC: gss_wrap_req returning %d\n", status); ++ dprintk("RPC: %4u gss_wrap_req returning %d\n", task->tk_pid, status); + return status; + } + +@@ -909,7 +954,7 @@ gss_unwrap_resp(struct rpc_task *task, + mic_offset = integ_len + data_offset; + if (mic_offset > rcv_buf->len) + goto out; +- if (ntohl(*p++) != req->rq_seqno) ++ if (ntohl(*p++) != req->rq_seqnos[0]) + goto out; + + if (xdr_buf_subsegment(rcv_buf, &integ_buf, data_offset, +@@ -932,7 +977,8 @@ out_decode: + status = decode(rqstp, p, obj); + out: + gss_put_ctx(ctx); +- dprintk("RPC: gss_unwrap_resp returning %d\n", status); ++ dprintk("RPC: %4u gss_unwrap_resp returning %d\n", task->tk_pid, ++ status); + return status; + } + +@@ -972,6 +1018,15 @@ static int __init init_rpcsec_gss(void) + int err = 0; + + err = rpcauth_register(&authgss_ops); ++ if (err) ++ goto out; ++ err = gss_svc_init(); ++ if (err) ++ goto out_unregister; ++ return 0; ++out_unregister: ++ rpcauth_unregister(&authgss_ops); ++out: + return err; + } + +diff -puN net/sunrpc/auth_gss/gss_krb5_mech.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_mech.c +--- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_mech.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_mech.c 2004-02-19 16:47:15.000000000 -0500 +@@ -39,6 +39,8 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include +@@ -98,7 +100,7 @@ get_key(char **p, char *end, struct cryp + alg_mode = CRYPTO_TFM_MODE_CBC; + break; + default: +- dprintk("RPC: get_key: unsupported algorithm %d\n", alg); ++ dprintk("RPC: get_key: unsupported algorithm %d\n", alg); + goto out_err_free_key; + } + if (!(*res = crypto_alloc_tfm(alg_name, alg_mode))) +@@ -153,7 +155,7 @@ gss_import_sec_context_kerberos(struct x + goto out_err_free_key2; + + ctx_id->internal_ctx_id = ctx; +- dprintk("Succesfully imported new context.\n"); ++ dprintk("RPC: Succesfully imported new context.\n"); + return 0; + + out_err_free_key2: +@@ -195,7 +197,7 @@ gss_verify_mic_kerberos(struct gss_ctx + if (!maj_stat && qop_state) + *qstate = qop_state; + +- dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); ++ dprintk("RPC: gss_verify_mic_kerberos returning %d\n", maj_stat); + return maj_stat; + } + +@@ -209,7 +211,7 @@ gss_get_mic_kerberos(struct gss_ctx *ctx + + err = krb5_make_token(kctx, qop, message, mic_token, KG_TOK_MIC_MSG); + +- dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); ++ dprintk("RPC: gss_get_mic_kerberos returning %d\n",err); + + return err; + } +@@ -232,6 +234,10 @@ static int __init init_kerberos_module(v + gm = gss_mech_get_by_OID(&gss_mech_krb5_oid); + gss_register_triple(RPC_AUTH_GSS_KRB5 , gm, 0, RPC_GSS_SVC_NONE); + gss_register_triple(RPC_AUTH_GSS_KRB5I, gm, 0, RPC_GSS_SVC_INTEGRITY); ++ if (svcauth_gss_register_pseudoflavor(RPC_AUTH_GSS_KRB5, "krb5")) ++ printk("Failed to register %s with server!\n", "krb5"); ++ if (svcauth_gss_register_pseudoflavor(RPC_AUTH_GSS_KRB5I, "krb5i")) ++ printk("Failed to register %s with server!\n", "krb5i"); + gss_mech_put(gm); + return 0; + } +diff -puN net/sunrpc/auth_gss/gss_mech_switch.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_mech_switch.c +--- linux-2.6.3/net/sunrpc/auth_gss/gss_mech_switch.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_mech_switch.c 2004-02-19 16:47:07.000000000 -0500 +@@ -43,7 +43,6 @@ + #include + #include + #include +-#include + + #ifdef RPC_DEBUG + # define RPCDBG_FACILITY RPCDBG_AUTH +@@ -82,7 +81,7 @@ gss_mech_register(struct xdr_netobj * me + spin_lock(®istered_mechs_lock); + list_add(&gm->gm_list, ®istered_mechs); + spin_unlock(®istered_mechs_lock); +- dprintk("RPC: gss_mech_register: registered mechanism with oid:\n"); ++ dprintk("RPC: gss_mech_register: registered mechanism with oid:\n"); + print_hexl((u32 *)mech_type->data, mech_type->len, 0); + return 0; + } +@@ -94,11 +93,10 @@ do_gss_mech_unregister(struct gss_api_me + + list_del(&gm->gm_list); + +- dprintk("RPC: unregistered mechanism with oid:\n"); ++ dprintk("RPC: unregistered mechanism with oid:\n"); + print_hexl((u32 *)gm->gm_oid.data, gm->gm_oid.len, 0); + if (!gss_mech_put(gm)) { +- dprintk("RPC: We just unregistered a gss_mechanism which" +- " someone is still using.\n"); ++ dprintk("RPC: We just unregistered a gss_mechanism which someone is still using.\n"); + return -1; + } else { + return 0; +@@ -146,7 +144,7 @@ gss_mech_get_by_OID(struct xdr_netobj *m + { + struct gss_api_mech *pos, *gm = NULL; + +- dprintk("RPC: gss_mech_get_by_OID searching for mechanism with OID:\n"); ++ dprintk("RPC: gss_mech_get_by_OID searching for mechanism with OID:\n"); + print_hexl((u32 *)mech_type->data, mech_type->len, 0); + spin_lock(®istered_mechs_lock); + list_for_each_entry(pos, ®istered_mechs, gm_list) { +@@ -158,10 +156,27 @@ gss_mech_get_by_OID(struct xdr_netobj *m + } + } + spin_unlock(®istered_mechs_lock); +- dprintk("RPC: gss_mech_get_by_OID %s it\n", gm ? "found" : "didn't find"); ++ dprintk("RPC: gss_mech_get_by_OID %s it\n", gm ? "found" : "didn't find"); + return gm; + } + ++struct gss_api_mech * ++gss_mech_get_by_name(char *name) ++{ ++ struct gss_api_mech *pos, *gm = NULL; ++ ++ spin_lock(®istered_mechs_lock); ++ list_for_each_entry(pos, ®istered_mechs, gm_list) { ++ if (0 == strcmp(name, pos->gm_ops->name)) { ++ gm = gss_mech_get(pos); ++ break; ++ } ++ } ++ spin_unlock(®istered_mechs_lock); ++ return gm; ++ ++} ++ + int + gss_mech_put(struct gss_api_mech * gm) + { +@@ -228,7 +243,8 @@ gss_verify_mic(struct gss_ctx *context_ + u32 + gss_delete_sec_context(struct gss_ctx **context_handle) + { +- dprintk("gss_delete_sec_context deleting %p\n",*context_handle); ++ dprintk("RPC: gss_delete_sec_context deleting %p\n", ++ *context_handle); + + if (!*context_handle) + return(GSS_S_NO_CONTEXT); +diff -puN net/sunrpc/auth_gss/Makefile~CITI_NFS4_ALL net/sunrpc/auth_gss/Makefile +--- linux-2.6.3/net/sunrpc/auth_gss/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/Makefile 2004-02-19 16:47:04.000000000 -0500 +@@ -5,7 +5,7 @@ + obj-$(CONFIG_SUNRPC_GSS) += auth_rpcgss.o + + auth_rpcgss-objs := auth_gss.o gss_pseudoflavors.o gss_generic_token.o \ +- sunrpcgss_syms.o gss_mech_switch.o ++ sunrpcgss_syms.o gss_mech_switch.o svcauth_gss.o + + obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o + +diff -puN net/sunrpc/auth_gss/sunrpcgss_syms.c~CITI_NFS4_ALL net/sunrpc/auth_gss/sunrpcgss_syms.c +--- linux-2.6.3/net/sunrpc/auth_gss/sunrpcgss_syms.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/sunrpcgss_syms.c 2004-02-19 16:47:04.000000000 -0500 +@@ -8,6 +8,7 @@ + #include + + #include ++#include + #include + + /* sec_triples: */ +@@ -17,6 +18,7 @@ EXPORT_SYMBOL(gss_cmp_triples); + EXPORT_SYMBOL(gss_pseudoflavor_to_mechOID); + EXPORT_SYMBOL(gss_pseudoflavor_supported); + EXPORT_SYMBOL(gss_pseudoflavor_to_service); ++EXPORT_SYMBOL(svcauth_gss_register_pseudoflavor); + + /* registering gss mechanisms to the mech switching code: */ + EXPORT_SYMBOL(gss_mech_register); +diff -puN /dev/null net/sunrpc/auth_gss/svcauth_gss.c +--- /dev/null 2004-01-26 19:20:21.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/svcauth_gss.c 2004-02-19 16:47:15.000000000 -0500 +@@ -0,0 +1,1018 @@ ++/* ++ * Neil Brown ++ * J. Bruce Fields ++ * Andy Adamson ++ * Dug Song ++ * ++ * RPCSEC_GSS server authentication. ++ * This implements RPCSEC_GSS as defined in rfc2203 (rpcsec_gss) and rfc2078 ++ * (gssapi) ++ * ++ * The RPCSEC_GSS involves three stages: ++ * 1/ context creation ++ * 2/ data exchange ++ * 3/ context destruction ++ * ++ * Context creation is handled largely by upcalls to user-space. ++ * In particular, GSS_Accept_sec_context is handled by an upcall ++ * Data exchange is handled entirely within the kernel ++ * In particular, GSS_GetMIC, GSS_VerifyMIC, GSS_Seal, GSS_Unseal are in-kernel. ++ * Context destruction is handled in-kernel ++ * GSS_Delete_sec_context is in-kernel ++ * ++ * Context creation is initiated by a RPCSEC_GSS_INIT request arriving. ++ * The context handle and gss_token are used as a key into the rpcsec_init cache. ++ * The content of this cache includes some of the outputs of GSS_Accept_sec_context, ++ * being major_status, minor_status, context_handle, reply_token. ++ * These are sent back to the client. ++ * Sequence window management is handled by the kernel. The window size if currently ++ * a compile time constant. ++ * ++ * When user-space is happy that a context is established, it places an entry ++ * in the rpcsec_context cache. The key for this cache is the context_handle. ++ * The content includes: ++ * uid/gidlist - for determining access rights ++ * mechanism type ++ * mechanism specific information, such as a key ++ * ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#ifdef RPC_DEBUG ++# define RPCDBG_FACILITY RPCDBG_AUTH ++#endif ++ ++/* The rpcsec_init cache is used for mapping RPCSEC_GSS_{,CONT_}INIT requests ++ * into replies. ++ * ++ * Key is context handle (\x if empty) and gss_token. ++ * Content is major_status minor_status (integers) context_handle, reply_token. ++ * ++ */ ++ ++static int netobj_equal(struct xdr_netobj *a, struct xdr_netobj *b) ++{ ++ return a->len == b->len && 0 == memcmp(a->data, b->data, a->len); ++} ++ ++#define RSI_HASHBITS 6 ++#define RSI_HASHMAX (1<in_handle.data); ++ kfree(rsii->in_token.data); ++ kfree(rsii->out_handle.data); ++ kfree(rsii->out_token.data); ++} ++ ++static void rsi_put(struct cache_head *item, struct cache_detail *cd) ++{ ++ struct rsi *rsii = container_of(item, struct rsi, h); ++ if (cache_put(item, cd)) { ++ rsi_free(rsii); ++ kfree(rsii); ++ } ++} ++ ++static inline int rsi_hash(struct rsi *item) ++{ ++ return hash_mem(item->in_handle.data, item->in_handle.len, RSI_HASHBITS) ++ ^ hash_mem(item->in_token.data, item->in_token.len, RSI_HASHBITS); ++} ++ ++static inline int rsi_match(struct rsi *item, struct rsi *tmp) ++{ ++ return netobj_equal(&item->in_handle, &tmp->in_handle) ++ && netobj_equal(&item->in_token, &tmp->in_token); ++} ++ ++static int dup_to_netobj(struct xdr_netobj *dst, char *src, int len) ++{ ++ dst->len = len; ++ dst->data = (len ? kmalloc(len, GFP_KERNEL) : NULL); ++ if (dst->data) ++ memcpy(dst->data, src, len); ++ if (len && !dst->data) ++ return -ENOMEM; ++ return 0; ++} ++ ++static inline int dup_netobj(struct xdr_netobj *dst, struct xdr_netobj *src) ++{ ++ return dup_to_netobj(dst, src->data, src->len); ++} ++ ++static inline void rsi_init(struct rsi *new, struct rsi *item) ++{ ++ new->out_handle.data = NULL; ++ new->out_handle.len = 0; ++ new->out_token.data = NULL; ++ new->out_token.len = 0; ++ new->in_handle.len = item->in_handle.len; ++ new->in_handle.data = item->in_handle.data; ++ item->in_handle.len = 0; ++ item->in_handle.data = NULL; ++ new->in_token.len = item->in_token.len; ++ new->in_token.data = item->in_token.data; ++ item->in_token.len = 0; ++ item->in_token.data = NULL; ++ return; ++} ++ ++static inline void rsi_update(struct rsi *new, struct rsi *item) ++{ ++ BUG_ON(new->out_handle.data || new->out_token.data); ++ new->out_handle.len = item->out_handle.len; ++ item->out_handle.len = 0; ++ new->out_token.len = item->out_token.len; ++ item->out_token.len = 0; ++ new->out_handle.data = item->out_handle.data; ++ item->out_handle.data = NULL; ++ new->out_token.data = item->out_token.data; ++ item->out_token.data = NULL; ++ ++ new->major_status = item->major_status; ++ new->minor_status = item->minor_status; ++} ++ ++static void rsi_request(struct cache_detail *cd, ++ struct cache_head *h, ++ char **bpp, int *blen) ++{ ++ struct rsi *rsii = container_of(h, struct rsi, h); ++ ++ qword_addhex(bpp, blen, rsii->in_handle.data, rsii->in_handle.len); ++ qword_addhex(bpp, blen, rsii->in_token.data, rsii->in_token.len); ++ (*bpp)[-1] = '\n'; ++} ++ ++ ++static int rsi_parse(struct cache_detail *cd, ++ char *mesg, int mlen) ++{ ++ /* context token expiry major minor context token */ ++ char *buf = mesg; ++ char *ep; ++ int len; ++ struct rsi rsii, *rsip = NULL; ++ time_t expiry; ++ int status = -EINVAL; ++ ++ memset(&rsii, 0, sizeof(rsii)); ++ /* handle */ ++ len = qword_get(&mesg, buf, mlen); ++ if (len < 0) ++ goto out; ++ status = -ENOMEM; ++ if (dup_to_netobj(&rsii.in_handle, buf, len)) ++ goto out; ++ ++ /* token */ ++ len = qword_get(&mesg, buf, mlen); ++ status = -EINVAL; ++ if (len < 0) ++ goto out;; ++ status = -ENOMEM; ++ if (dup_to_netobj(&rsii.in_token, buf, len)) ++ goto out; ++ ++ rsii.h.flags = 0; ++ /* expiry */ ++ expiry = get_expiry(&mesg); ++ status = -EINVAL; ++ if (expiry == 0) ++ goto out; ++ ++ /* major/minor */ ++ len = qword_get(&mesg, buf, mlen); ++ if (len < 0) ++ goto out; ++ if (len == 0) { ++ goto out; ++ } else { ++ rsii.major_status = simple_strtoul(buf, &ep, 10); ++ if (*ep) ++ goto out; ++ len = qword_get(&mesg, buf, mlen); ++ if (len <= 0) ++ goto out; ++ rsii.minor_status = simple_strtoul(buf, &ep, 10); ++ if (*ep) ++ goto out; ++ ++ /* out_handle */ ++ len = qword_get(&mesg, buf, mlen); ++ if (len < 0) ++ goto out; ++ status = -ENOMEM; ++ if (dup_to_netobj(&rsii.out_handle, buf, len)) ++ goto out; ++ ++ /* out_token */ ++ len = qword_get(&mesg, buf, mlen); ++ status = -EINVAL; ++ if (len < 0) ++ goto out; ++ status = -ENOMEM; ++ if (dup_to_netobj(&rsii.out_token, buf, len)) ++ goto out; ++ } ++ rsii.h.expiry_time = expiry; ++ rsip = rsi_lookup(&rsii, 1); ++ status = 0; ++out: ++ rsi_free(&rsii); ++ if (rsip) ++ rsi_put(&rsip->h, &rsi_cache); ++ return status; ++} ++ ++static struct cache_detail rsi_cache = { ++ .hash_size = RSI_HASHMAX, ++ .hash_table = rsi_table, ++ .name = "auth.rpcsec.init", ++ .cache_put = rsi_put, ++ .cache_request = rsi_request, ++ .cache_parse = rsi_parse, ++}; ++ ++static DefineSimpleCacheLookup(rsi, 0) ++ ++/* ++ * The rpcsec_context cache is used to store a context that is ++ * used in data exchange. ++ * The key is a context handle. The content is: ++ * uid, gidlist, mechanism, service-set, mech-specific-data ++ */ ++ ++#define RSC_HASHBITS 10 ++#define RSC_HASHMAX (1<handle.data); ++ if (rsci->mechctx) ++ gss_delete_sec_context(&rsci->mechctx); ++} ++ ++static void rsc_put(struct cache_head *item, struct cache_detail *cd) ++{ ++ struct rsc *rsci = container_of(item, struct rsc, h); ++ ++ if (cache_put(item, cd)) { ++ rsc_free(rsci); ++ kfree(rsci); ++ } ++} ++ ++static inline int ++rsc_hash(struct rsc *rsci) ++{ ++ return hash_mem(rsci->handle.data, rsci->handle.len, RSC_HASHBITS); ++} ++ ++static inline int ++rsc_match(struct rsc *new, struct rsc *tmp) ++{ ++ return netobj_equal(&new->handle, &tmp->handle); ++} ++ ++static inline void ++rsc_init(struct rsc *new, struct rsc *tmp) ++{ ++ new->mechctx = NULL; ++ new->handle.len = tmp->handle.len; ++ new->handle.data = tmp->handle.data; ++ tmp->handle.len = 0; ++ tmp->handle.data = NULL; ++} ++ ++static inline void ++rsc_update(struct rsc *new, struct rsc *tmp) ++{ ++ new->mechctx = tmp->mechctx; ++ tmp->mechctx = NULL; ++ memset(&new->seqdata, 0, sizeof(new->seqdata)); ++ spin_lock_init(&new->seqdata.sd_lock); ++ new->cred = tmp->cred; ++} ++ ++static int rsc_parse(struct cache_detail *cd, ++ char *mesg, int mlen) ++{ ++ /* contexthandle expiry [ uid gid N mechname ...mechdata... ] */ ++ char *buf = mesg; ++ int len, rv; ++ struct rsc rsci, *rscp = NULL; ++ time_t expiry; ++ int status = -EINVAL; ++ ++ memset(&rsci, 0, sizeof(rsci)); ++ /* context handle */ ++ len = qword_get(&mesg, buf, mlen); ++ if (len < 0) goto out; ++ status = -ENOMEM; ++ if (dup_to_netobj(&rsci.handle, buf, len)) ++ goto out; ++ ++ rsci.h.flags = 0; ++ /* expiry */ ++ expiry = get_expiry(&mesg); ++ status = -EINVAL; ++ if (expiry == 0) ++ goto out; ++ ++ /* uid, or NEGATIVE */ ++ rv = get_int(&mesg, &rsci.cred.cr_uid); ++ if (rv == -EINVAL) ++ goto out; ++ if (rv == -ENOENT) ++ set_bit(CACHE_NEGATIVE, &rsci.h.flags); ++ else { ++ int N, i; ++ struct gss_api_mech *gm; ++ struct xdr_netobj tmp_buf; ++ ++ /* gid */ ++ if (get_int(&mesg, &rsci.cred.cr_gid)) ++ goto out; ++ ++ /* number of additional gid's */ ++ if (get_int(&mesg, &N)) ++ goto out; ++ if (N > NGROUPS) ++ goto out; ++ ++ /* gid's */ ++ for (i=0; ih, &rsc_cache); ++ return status; ++} ++ ++static struct cache_detail rsc_cache = { ++ .hash_size = RSC_HASHMAX, ++ .hash_table = rsc_table, ++ .name = "auth.rpcsec.context", ++ .cache_put = rsc_put, ++ .cache_parse = rsc_parse, ++}; ++ ++static DefineSimpleCacheLookup(rsc, 0); ++ ++struct rsc * ++gss_svc_searchbyctx(struct xdr_netobj *handle) ++{ ++ struct rsc rsci; ++ struct rsc *found; ++ ++ rsci.handle = *handle; ++ found = rsc_lookup(&rsci, 0); ++ if (!found) ++ return NULL; ++ if (cache_check(&rsc_cache, &found->h, NULL)) ++ return NULL; ++ return found; ++} ++ ++/* Implements sequence number algorithm as specified in RFC 2203. */ ++static int ++gss_check_seq_num(struct rsc *rsci, int seq_num) ++{ ++ struct gss_svc_seq_data *sd = &rsci->seqdata; ++ ++ spin_lock(&sd->sd_lock); ++ if (seq_num > sd->sd_max) { ++ if (seq_num >= sd->sd_max + GSS_SEQ_WIN) { ++ memset(sd->sd_win,0,sizeof(sd->sd_win)); ++ sd->sd_max = seq_num; ++ } else while (sd->sd_max < seq_num) { ++ sd->sd_max++; ++ __clear_bit(sd->sd_max % GSS_SEQ_WIN, sd->sd_win); ++ } ++ __set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win); ++ goto ok; ++ } else if (seq_num <= sd->sd_max - GSS_SEQ_WIN) { ++ goto drop; ++ } ++ /* sd_max - GSS_SEQ_WIN < seq_num <= sd_max */ ++ if (__test_and_set_bit(seq_num % GSS_SEQ_WIN, sd->sd_win)) ++ goto drop; ++ok: ++ spin_unlock(&sd->sd_lock); ++ return 1; ++drop: ++ spin_unlock(&sd->sd_lock); ++ return 0; ++} ++ ++static inline u32 round_up_to_quad(u32 i) ++{ ++ return (i + 3 ) & ~3; ++} ++ ++static inline int ++svc_safe_getnetobj(struct iovec *argv, struct xdr_netobj *o) ++{ ++ int l; ++ ++ if (argv->iov_len < 4) ++ return -1; ++ o->len = ntohl(svc_getu32(argv)); ++ l = round_up_to_quad(o->len); ++ if (argv->iov_len < l) ++ return -1; ++ o->data = argv->iov_base; ++ argv->iov_base += l; ++ argv->iov_len -= l; ++ return 0; ++} ++ ++static inline int ++svc_safe_putnetobj(struct iovec *resv, struct xdr_netobj *o) ++{ ++ u32 *p; ++ ++ if (resv->iov_len + 4 > PAGE_SIZE) ++ return -1; ++ svc_putu32(resv, htonl(o->len)); ++ p = resv->iov_base + resv->iov_len; ++ resv->iov_len += round_up_to_quad(o->len); ++ if (resv->iov_len > PAGE_SIZE) ++ return -1; ++ memcpy(p, o->data, o->len); ++ memset((u8 *)p + o->len, 0, round_up_to_quad(o->len) - o->len); ++ return 0; ++} ++ ++/* Verify the checksum on the header and return SVC_OK on success. ++ * Otherwise, return SVC_DROP (in the case of a bad sequence number) ++ * or return SVC_DENIED and indicate error in authp. ++ */ ++static int ++gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci, ++ u32 *rpcstart, struct rpc_gss_wire_cred *gc, u32 *authp) ++{ ++ struct gss_ctx *ctx_id = rsci->mechctx; ++ struct xdr_buf rpchdr; ++ struct xdr_netobj checksum; ++ u32 flavor = 0; ++ struct iovec *argv = &rqstp->rq_arg.head[0]; ++ struct iovec iov; ++ ++ /* data to compute the checksum over: */ ++ iov.iov_base = rpcstart; ++ iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart; ++ xdr_buf_from_iov(&iov, &rpchdr); ++ ++ *authp = rpc_autherr_badverf; ++ if (argv->iov_len < 4) ++ return SVC_DENIED; ++ flavor = ntohl(svc_getu32(argv)); ++ if (flavor != RPC_AUTH_GSS) ++ return SVC_DENIED; ++ if (svc_safe_getnetobj(argv, &checksum)) ++ return SVC_DENIED; ++ ++ if (rqstp->rq_deferred) /* skip verification of revisited request */ ++ return SVC_OK; ++ if (gss_verify_mic(ctx_id, &rpchdr, &checksum, NULL) ++ != GSS_S_COMPLETE) { ++ *authp = rpcsec_gsserr_credproblem; ++ return SVC_DENIED; ++ } ++ ++ if (gc->gc_seq > MAXSEQ) { ++ dprintk("RPC: svcauth_gss: discarding request with large sequence number %d\n", ++ gc->gc_seq); ++ *authp = rpcsec_gsserr_ctxproblem; ++ return SVC_DENIED; ++ } ++ if (!gss_check_seq_num(rsci, gc->gc_seq)) { ++ dprintk("RPC: svcauth_gss: discarding request with old sequence number %d\n", ++ gc->gc_seq); ++ return SVC_DROP; ++ } ++ return SVC_OK; ++} ++ ++static int ++gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq) ++{ ++ u32 xdr_seq; ++ u32 maj_stat; ++ struct xdr_buf verf_data; ++ struct xdr_netobj mic; ++ u32 *p; ++ struct iovec iov; ++ ++ svc_putu32(rqstp->rq_res.head, htonl(RPC_AUTH_GSS)); ++ xdr_seq = htonl(seq); ++ ++ iov.iov_base = &xdr_seq; ++ iov.iov_len = sizeof(xdr_seq); ++ xdr_buf_from_iov(&iov, &verf_data); ++ p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len; ++ mic.data = (u8 *)(p + 1); ++ maj_stat = gss_get_mic(ctx_id, 0, &verf_data, &mic); ++ if (maj_stat != GSS_S_COMPLETE) ++ return -1; ++ *p++ = htonl(mic.len); ++ memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len); ++ p += XDR_QUADLEN(mic.len); ++ if (!xdr_ressize_check(rqstp, p)) ++ return -1; ++ return 0; ++} ++ ++struct gss_domain { ++ struct auth_domain h; ++ u32 pseudoflavor; ++}; ++ ++/* XXX this should be done in gss_pseudoflavors, and shouldn't be hardcoded: */ ++static struct auth_domain * ++find_gss_auth_domain(struct gss_ctx *ctx, u32 svc) ++{ ++ switch(gss_get_pseudoflavor(ctx, 0, svc)) { ++ case RPC_AUTH_GSS_KRB5: ++ return auth_domain_find("gss/krb5"); ++ case RPC_AUTH_GSS_KRB5I: ++ return auth_domain_find("gss/krb5i"); ++ case RPC_AUTH_GSS_KRB5P: ++ return auth_domain_find("gss/krb5p"); ++ } ++ return NULL; ++} ++ ++int ++svcauth_gss_register_pseudoflavor(u32 pseudoflavor, char * name) ++{ ++ struct gss_domain *new; ++ struct auth_domain *test; ++ static char *prefix = "gss/"; ++ int stat = -1; ++ ++ new = kmalloc(sizeof(*new), GFP_KERNEL); ++ if (!new) ++ goto out; ++ cache_init(&new->h.h); ++ atomic_inc(&new->h.h.refcnt); ++ new->h.name = kmalloc(strlen(name) + strlen(prefix) + 1, GFP_KERNEL); ++ if (!new->h.name) ++ goto out_free_dom; ++ strcpy(new->h.name, prefix); ++ strcat(new->h.name, name); ++ new->h.flavour = RPC_AUTH_GSS; ++ new->pseudoflavor = pseudoflavor; ++ new->h.h.expiry_time = NEVER; ++ new->h.h.flags = 0; ++ ++ test = auth_domain_lookup(&new->h, 1); ++ if (test == &new->h) { ++ BUG_ON(atomic_dec_and_test(&new->h.h.refcnt)); ++ } else { /* XXX Duplicate registration? */ ++ auth_domain_put(&new->h); ++ goto out; ++ } ++ return 0; ++ ++out_free_dom: ++ kfree(new); ++out: ++ return stat; ++} ++ ++/* It would be nice if this bit of code could be shared with the client. ++ * Obstacles: ++ * The client shouldn't malloc(), would have to pass in own memory. ++ * The server uses base of head iovec as read pointer, while the ++ * client uses separate pointer. */ ++static int ++unwrap_integ_data(struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) ++{ ++ /* XXX audit u32/int uses, sign/overflow issues */ ++ int stat = -EINVAL; ++ u32 integ_len, maj_stat; ++ struct xdr_netobj mic; ++ struct xdr_buf integ_buf; ++ ++ integ_len = ntohl(svc_getu32(&buf->head[0])); ++ if (integ_len & 3) ++ goto out; ++ if (integ_len > buf->len) ++ goto out; ++ if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) ++ goto out; ++ /* copy out mic... */ ++ if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) ++ goto out; ++ if (mic.len > 256) /* XXX: maximum mic length? */ ++ goto out; ++ mic.data = kmalloc(mic.len, GFP_KERNEL); ++ if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len)) ++ goto out; ++ maj_stat = gss_verify_mic(ctx, &integ_buf, &mic, NULL); ++ if (maj_stat != GSS_S_COMPLETE) ++ goto out; ++ if (ntohl(svc_getu32(&buf->head[0])) != seq) ++ goto out; ++ stat = 0; ++out: ++ return stat; ++} ++ ++/* ++ * Accept an rpcsec packet. ++ * If context establishment, punt to user space ++ * If data exchange, verify/decrypt ++ * If context destruction, handle here ++ * In the context establishment and destruction case we encode ++ * response here and return SVC_COMPLETE. ++ */ ++static int ++svcauth_gss_accept(struct svc_rqst *rqstp, u32 *authp) ++{ ++ struct iovec *argv = &rqstp->rq_arg.head[0]; ++ struct iovec *resv = &rqstp->rq_res.head[0]; ++ u32 crlen; ++ struct xdr_netobj tmpobj; ++ struct gss_svc_data *svcdata = rqstp->rq_auth_data; ++ struct rpc_gss_wire_cred *gc; ++ struct rsc *rsci = NULL; ++ struct rsi *rsip, rsikey; ++ u32 *rpcstart; ++ u32 *reject_stat = resv->iov_base; ++ int ret; ++ ++ dprintk("RPC: svcauth_gss: argv->iov_len = %d\n", argv->iov_len); ++ ++ *authp = rpc_autherr_badcred; ++ if (!svcdata) ++ svcdata = kmalloc(sizeof(*svcdata), GFP_KERNEL); ++ if (!svcdata) ++ goto auth_err; ++ rqstp->rq_auth_data = svcdata; ++ gc = &svcdata->clcred; ++ ++ /* start of rpc packet is 7 u32's back from here: ++ * xid direction rpcversion prog vers proc flavour ++ */ ++ rpcstart = argv->iov_base; ++ rpcstart -= 7; ++ ++ /* credential is: ++ * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle ++ * at least 5 u32s, and is preceeded by length, so that makes 6. ++ */ ++ ++ if (argv->iov_len < 5 * 4) ++ goto auth_err; ++ crlen = ntohl(svc_getu32(argv)); ++ if (ntohl(svc_getu32(argv)) != RPC_GSS_VERSION) ++ goto auth_err; ++ gc->gc_proc = ntohl(svc_getu32(argv)); ++ gc->gc_seq = ntohl(svc_getu32(argv)); ++ gc->gc_svc = ntohl(svc_getu32(argv)); ++ if (svc_safe_getnetobj(argv, &gc->gc_ctx)) ++ goto auth_err; ++ if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4) ++ goto auth_err; ++ ++ if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0)) ++ goto auth_err; ++ ++ /* ++ * We've successfully parsed the credential. Let's check out the ++ * verifier. An AUTH_NULL verifier is allowed (and required) for ++ * INIT and CONTINUE_INIT requests. AUTH_RPCSEC_GSS is required for ++ * PROC_DATA and PROC_DESTROY. ++ * ++ * AUTH_NULL verifier is 0 (AUTH_NULL), 0 (length). ++ * AUTH_RPCSEC_GSS verifier is: ++ * 6 (AUTH_RPCSEC_GSS), length, checksum. ++ * checksum is calculated over rpcheader from xid up to here. ++ */ ++ *authp = rpc_autherr_badverf; ++ switch (gc->gc_proc) { ++ case RPC_GSS_PROC_INIT: ++ case RPC_GSS_PROC_CONTINUE_INIT: ++ if (argv->iov_len < 2 * 4) ++ goto auth_err; ++ if (ntohl(svc_getu32(argv)) != RPC_AUTH_NULL) ++ goto auth_err; ++ if (ntohl(svc_getu32(argv)) != 0) ++ goto auth_err; ++ break; ++ case RPC_GSS_PROC_DATA: ++ case RPC_GSS_PROC_DESTROY: ++ *authp = rpcsec_gsserr_credproblem; ++ rsci = gss_svc_searchbyctx(&gc->gc_ctx); ++ if (!rsci) ++ goto auth_err; ++ switch (gss_verify_header(rqstp, rsci, rpcstart, gc, authp)) { ++ case SVC_OK: ++ break; ++ case SVC_DENIED: ++ goto auth_err; ++ case SVC_DROP: ++ goto drop; ++ } ++ break; ++ default: ++ *authp = rpc_autherr_rejectedcred; ++ goto auth_err; ++ } ++ ++ /* now act upon the command: */ ++ switch (gc->gc_proc) { ++ case RPC_GSS_PROC_INIT: ++ case RPC_GSS_PROC_CONTINUE_INIT: ++ *authp = rpc_autherr_badcred; ++ if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) ++ goto auth_err; ++ memset(&rsikey, 0, sizeof(rsikey)); ++ if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx)) ++ goto drop; ++ *authp = rpc_autherr_badverf; ++ if (svc_safe_getnetobj(argv, &tmpobj)) { ++ kfree(rsikey.in_handle.data); ++ goto auth_err; ++ } ++ if (dup_netobj(&rsikey.in_token, &tmpobj)) { ++ kfree(rsikey.in_handle.data); ++ goto drop; ++ } ++ ++ rsip = rsi_lookup(&rsikey, 0); ++ rsi_free(&rsikey); ++ if (!rsip) { ++ goto drop; ++ } ++ switch(cache_check(&rsi_cache, &rsip->h, &rqstp->rq_chandle)) { ++ case -EAGAIN: ++ goto drop; ++ case -ENOENT: ++ goto drop; ++ case 0: ++ rsci = gss_svc_searchbyctx(&rsip->out_handle); ++ if (!rsci) { ++ goto drop; ++ } ++ if (gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN)) ++ goto drop; ++ if (resv->iov_len + 4 > PAGE_SIZE) ++ goto drop; ++ svc_putu32(resv, rpc_success); ++ if (svc_safe_putnetobj(resv, &rsip->out_handle)) ++ goto drop; ++ if (resv->iov_len + 3 * 4 > PAGE_SIZE) ++ goto drop; ++ svc_putu32(resv, htonl(rsip->major_status)); ++ svc_putu32(resv, htonl(rsip->minor_status)); ++ svc_putu32(resv, htonl(GSS_SEQ_WIN)); ++ if (svc_safe_putnetobj(resv, &rsip->out_token)) ++ goto drop; ++ rqstp->rq_client = NULL; ++ } ++ goto complete; ++ case RPC_GSS_PROC_DESTROY: ++ set_bit(CACHE_NEGATIVE, &rsci->h.flags); ++ if (resv->iov_len + 4 > PAGE_SIZE) ++ goto drop; ++ svc_putu32(resv, rpc_success); ++ goto complete; ++ case RPC_GSS_PROC_DATA: ++ rqstp->rq_client = ++ find_gss_auth_domain(rsci->mechctx, gc->gc_svc); ++ if (rqstp->rq_client == NULL) ++ goto auth_err; ++ *authp = rpcsec_gsserr_ctxproblem; ++ if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq)) ++ goto auth_err; ++ ++ rqstp->rq_cred = rsci->cred; ++ ++ *authp = rpc_autherr_badcred; ++ switch (gc->gc_svc) { ++ case RPC_GSS_SVC_NONE: ++ break; ++ case RPC_GSS_SVC_INTEGRITY: ++ if (unwrap_integ_data(&rqstp->rq_arg, ++ gc->gc_seq, rsci->mechctx)) ++ goto auth_err; ++ /* placeholders for length and seq. number: */ ++ svcdata->body_start = resv->iov_base + resv->iov_len; ++ svc_putu32(resv, 0); ++ svc_putu32(resv, 0); ++ break; ++ case RPC_GSS_SVC_PRIVACY: ++ /* currently unsupported */ ++ default: ++ goto auth_err; ++ } ++ ret = SVC_OK; ++ goto out; ++ } ++auth_err: ++ /* Restore write pointer to original value: */ ++ xdr_ressize_check(rqstp, reject_stat); ++ ret = SVC_DENIED; ++ goto out; ++complete: ++ ret = SVC_COMPLETE; ++ goto out; ++drop: ++ ret = SVC_DROP; ++out: ++ if (rsci) ++ rsc_put(&rsci->h, &rsc_cache); ++ return ret; ++} ++ ++static int ++svcauth_gss_release(struct svc_rqst *rqstp) ++{ ++ struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data; ++ struct rpc_gss_wire_cred *gc = &gsd->clcred; ++ struct xdr_buf *resbuf = &rqstp->rq_res; ++ struct xdr_buf integ_buf; ++ struct xdr_netobj mic; ++ struct iovec *resv; ++ u32 *p; ++ int integ_offset, integ_len; ++ struct rsc *rsci; ++ int stat = -EINVAL; ++ ++ /* normally not set till svc_send, but we need it here: */ ++ resbuf->len = resbuf->head[0].iov_len ++ + resbuf->page_len + resbuf->tail[0].iov_len; ++ switch (gc->gc_svc) { ++ case RPC_GSS_SVC_NONE: ++ break; ++ case RPC_GSS_SVC_INTEGRITY: ++ p = gsd->body_start; ++ /* move accept_stat to right place: */ ++ memcpy(p, p + 2, 4); ++ p++; ++ integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base; ++ integ_len = resbuf->len - integ_offset; ++ BUG_ON(integ_len % 4); ++ *p++ = htonl(integ_len); ++ *p++ = htonl(gc->gc_seq); ++ if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, ++ integ_len)) ++ goto out; ++ if (resbuf->page_len == 0) { ++ BUG_ON(resbuf->tail[0].iov_len); ++ /* Use head for everything */ ++ resv = &resbuf->head[0]; ++ } else if (resbuf->tail[0].iov_base == NULL) { ++ /* copied from nfsd4_encode_read */ ++ svc_take_page(rqstp); ++ resbuf->tail[0].iov_base = page_address(rqstp ++ ->rq_respages[rqstp->rq_resused-1]); ++ rqstp->rq_restailpage = rqstp->rq_resused-1; ++ resbuf->tail[0].iov_len = 0; ++ resv = &resbuf->tail[0]; ++ } else { ++ resv = &resbuf->tail[0]; ++ } ++ /* XXX bounds checking!: */ ++ mic.data = (u8 *)resv->iov_base + resv->iov_len + 4; ++ rsci = gss_svc_searchbyctx(&gc->gc_ctx); ++ /* Better error return? Hold count on ctx through ++ * processing instead of looking up again? */ ++ if (!rsci) ++ goto out; ++ /* XXX Whoops, we might overflow here: */ ++ if (gss_get_mic(rsci->mechctx, 0, &integ_buf, &mic)) ++ goto out; ++ svc_putu32(resv, htonl(mic.len)); ++ resv->iov_len += mic.len; ++ resbuf->len += mic.len; /* not strictly necessary */ ++ /* XXX too late, alas: */ ++ if (resbuf->len > PAGE_SIZE) ++ goto out; ++ break; ++ case RPC_GSS_SVC_PRIVACY: ++ default: ++ goto out; ++ } ++ ++ stat = 0; ++out: ++ if (rqstp->rq_client) ++ auth_domain_put(rqstp->rq_client); ++ rqstp->rq_client = NULL; ++ ++ return stat; ++} ++ ++static void ++svcauth_gss_domain_release(struct auth_domain *dom) ++{ ++ struct gss_domain *gd = container_of(dom, struct gss_domain, h); ++ ++ kfree(dom->name); ++ kfree(gd); ++} ++ ++struct auth_ops svcauthops_gss = { ++ .name = "rpcsec_gss", ++ .flavour = RPC_AUTH_GSS, ++ .accept = svcauth_gss_accept, ++ .release = svcauth_gss_release, ++ .domain_release = svcauth_gss_domain_release, ++}; ++ ++int ++gss_svc_init(void) ++{ ++ cache_register(&rsc_cache); ++ cache_register(&rsi_cache); ++ svc_auth_register(RPC_AUTH_GSS, &svcauthops_gss); ++ return 0; ++} +diff -puN net/sunrpc/svc.c~CITI_NFS4_ALL net/sunrpc/svc.c +--- linux-2.6.3/net/sunrpc/svc.c~CITI_NFS4_ALL 2004-02-19 16:47:04.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/svc.c 2004-02-19 16:47:04.000000000 -0500 +@@ -200,6 +200,8 @@ svc_exit_thread(struct svc_rqst *rqstp) + kfree(rqstp->rq_resp); + if (rqstp->rq_argp) + kfree(rqstp->rq_argp); ++ if (rqstp->rq_auth_data) ++ kfree(rqstp->rq_auth_data); + kfree(rqstp); + + /* Release the server */ +@@ -322,6 +324,8 @@ svc_process(struct svc_serv *serv, struc + goto err_bad_auth; + case SVC_DROP: + goto dropit; ++ case SVC_COMPLETE: ++ goto sendit; + } + + progp = serv->sv_program; +diff -puN net/sunrpc/Makefile~CITI_NFS4_ALL net/sunrpc/Makefile +--- linux-2.6.3/net/sunrpc/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/Makefile 2004-02-19 16:47:05.000000000 -0500 +@@ -2,9 +2,9 @@ + # Makefile for Linux kernel SUN RPC + # + +-obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ + + obj-$(CONFIG_SUNRPC) += sunrpc.o ++obj-$(CONFIG_SUNRPC_GSS) += auth_gss/ + + sunrpc-y := clnt.o xprt.o sched.o \ + auth.o auth_null.o auth_unix.o \ +diff -puN fs/nfsd/nfs4proc.c~CITI_NFS4_ALL fs/nfsd/nfs4proc.c +--- linux-2.6.3/fs/nfsd/nfs4proc.c~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/nfs4proc.c 2004-02-19 16:47:15.000000000 -0500 +@@ -52,15 +52,22 @@ + #include + #include + #include ++#ifdef CONFIG_NFS_V4_ACL ++#include ++#endif + + #define NFSDDBG_FACILITY NFSDDBG_PROC + +-/* Note: The organization of the OPEN code seems a little strange; it +- * has been superfluously split into three routines, one of which is named +- * nfsd4_process_open2() even though there is no nfsd4_process_open1()! +- * This is because the code has been organized in anticipation of a +- * subsequent patch which will implement more of the NFSv4 state model. +- */ ++static inline void ++fh_dup2(struct svc_fh *dst, struct svc_fh *src) ++{ ++ fh_put(dst); ++ dget(src->fh_dentry); ++ if (src->fh_export) ++ cache_get(&src->fh_export->h); ++ *dst = *src; ++} ++ + static int + do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) + { +@@ -89,12 +96,19 @@ do_open_lookup(struct svc_rqst *rqstp, s + if (!status) { + set_change_info(&open->op_cinfo, current_fh); + fh_dup2(current_fh, &resfh); ++ /* XXXJBF: keep a saved svc_fh struct instead?? */ ++ open->op_stateowner->so_replay.rp_openfh_len = ++ resfh.fh_handle.fh_size; ++ memcpy(open->op_stateowner->so_replay.rp_openfh, ++ &resfh.fh_handle.fh_base, ++ resfh.fh_handle.fh_size); + + accmode = MAY_NOP; + if (open->op_share_access & NFS4_SHARE_ACCESS_READ) + accmode = MAY_READ; + if (open->op_share_deny & NFS4_SHARE_ACCESS_WRITE) + accmode |= (MAY_WRITE | MAY_TRUNC); ++ accmode |= MAY_OWNER_OVERRIDE; + status = fh_verify(rqstp, current_fh, S_IFREG, accmode); + } + +@@ -102,19 +116,39 @@ do_open_lookup(struct svc_rqst *rqstp, s + return status; + } + ++/* ++ * nfs4_unlock_state() called in encode ++ */ + static inline int + nfsd4_open(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) + { + int status; +- dprintk("NFSD: nfsd4_open filename %.*s\n", +- (int)open->op_fname.len, open->op_fname.data); ++ dprintk("NFSD: nfsd4_open filename %.*s op_stateowner %p\n", ++ (int)open->op_fname.len, open->op_fname.data, ++ open->op_stateowner); + + /* This check required by spec. */ + if (open->op_create && open->op_claim_type != NFS4_OPEN_CLAIM_NULL) + return nfserr_inval; + ++ open->op_stateowner = NULL; ++ nfs4_lock_state(); ++ + /* check seqid for replay. set nfs4_owner */ + status = nfsd4_process_open1(open); ++ if (status == NFSERR_REPLAY_ME) { ++ struct nfs4_replay *rp = &open->op_stateowner->so_replay; ++ fh_put(current_fh); ++ current_fh->fh_handle.fh_size = rp->rp_openfh_len; ++ memcpy(¤t_fh->fh_handle.fh_base, rp->rp_openfh, ++ rp->rp_openfh_len); ++ status = fh_verify(rqstp, current_fh, 0, MAY_NOP); ++ if (status) ++ dprintk("nfsd4_open: replay failed" ++ " restoring previous filehandle\n"); ++ else ++ status = NFSERR_REPLAY_ME; ++ } + if (status) + return status; + /* +@@ -172,7 +206,7 @@ static inline int + nfsd4_restorefh(struct svc_fh *current_fh, struct svc_fh *save_fh) + { + if (!save_fh->fh_dentry) +- return nfserr_nofilehandle; ++ return nfserr_restorefh; + + fh_dup2(current_fh, save_fh); + return nfs_ok; +@@ -204,11 +238,16 @@ nfsd4_access(struct svc_rqst *rqstp, str + static inline int + nfsd4_commit(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_commit *commit) + { ++ int status; ++ + u32 *p = (u32 *)commit->co_verf.data; + *p++ = nfssvc_boot.tv_sec; + *p++ = nfssvc_boot.tv_usec; + +- return nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count); ++ status = nfsd_commit(rqstp, current_fh, commit->co_offset, commit->co_count); ++ if (status == nfserr_symlink) ++ status = nfserr_inval; ++ return status; + } + + static inline int +@@ -221,6 +260,8 @@ nfsd4_create(struct svc_rqst *rqstp, str + fh_init(&resfh, NFS4_FHSIZE); + + status = fh_verify(rqstp, current_fh, S_IFDIR, MAY_CREATE); ++ if (status == nfserr_symlink) ++ status = nfserr_notdir; + if (status) + return status; + +@@ -316,8 +357,10 @@ static inline int + nfsd4_link(struct svc_rqst *rqstp, struct svc_fh *current_fh, + struct svc_fh *save_fh, struct nfsd4_link *link) + { +- int status; ++ int status = nfserr_nofilehandle; + ++ if (!save_fh->fh_dentry) ++ return status; + status = nfsd_link(rqstp, current_fh, link->li_name, link->li_namelen, save_fh); + if (!status) + set_change_info(&link->li_cinfo, current_fh); +@@ -327,14 +370,18 @@ nfsd4_link(struct svc_rqst *rqstp, struc + static inline int + nfsd4_lookupp(struct svc_rqst *rqstp, struct svc_fh *current_fh) + { +- /* +- * XXX: We currently violate the spec in one small respect +- * here. If LOOKUPP is done at the root of the pseudofs, +- * the spec requires us to return NFSERR_NOENT. Personally, +- * I think that leaving the filehandle unchanged is more +- * logical, but this is an academic question anyway, since +- * no clients actually use LOOKUPP. +- */ ++ struct svc_fh tmp_fh; ++ int ret; ++ ++ fh_init(&tmp_fh, NFS4_FHSIZE); ++ if((ret = exp_pseudoroot(rqstp->rq_client, &tmp_fh, ++ &rqstp->rq_chandle)) != 0) ++ return ret; ++ if (tmp_fh.fh_dentry == current_fh->fh_dentry) { ++ fh_put(&tmp_fh); ++ return nfserr_noent; ++ } ++ fh_put(&tmp_fh); + return nfsd_lookup(rqstp, current_fh, "..", 2, current_fh); + } + +@@ -345,6 +392,20 @@ nfsd4_lookup(struct svc_rqst *rqstp, str + } + + static inline int ++access_bits_permit_read(unsigned long access_bmap) ++{ ++ return test_bit(NFS4_SHARE_ACCESS_READ, &access_bmap) || ++ test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); ++} ++ ++static inline int ++access_bits_permit_write(unsigned long access_bmap) ++{ ++ return test_bit(NFS4_SHARE_ACCESS_WRITE, &access_bmap) || ++ test_bit(NFS4_SHARE_ACCESS_BOTH, &access_bmap); ++} ++ ++static inline int + nfsd4_read(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_read *read) + { + struct nfs4_stateid *stp; +@@ -382,7 +443,7 @@ nfsd4_read(struct svc_rqst *rqstp, struc + goto out; + } + status = nfserr_openmode; +- if (!(stp->st_share_access & NFS4_SHARE_ACCESS_READ)) { ++ if (!access_bits_permit_read(stp->st_access_bmap)) { + dprintk("NFSD: nfsd4_read: file not opened for read!\n"); + goto out; + } +@@ -397,6 +458,11 @@ out: + static inline int + nfsd4_readdir(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_readdir *readdir) + { ++ u64 cookie = readdir->rd_cookie; ++ static const nfs4_verifier zeroverf = { ++ .data[0] = 0, ++ }; ++ + /* no need to check permission - this will be done in nfsd_readdir() */ + + if (readdir->rd_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) +@@ -405,7 +471,8 @@ nfsd4_readdir(struct svc_rqst *rqstp, st + readdir->rd_bmval[0] &= NFSD_SUPPORTED_ATTRS_WORD0; + readdir->rd_bmval[1] &= NFSD_SUPPORTED_ATTRS_WORD1; + +- if (readdir->rd_cookie > ~(u32)0) ++ if ((cookie > ~(u32)0) || (cookie == 1) || (cookie == 2) || ++ (cookie == 0 && memcmp(readdir->rd_verf.data, zeroverf.data, NFS4_VERIFIER_SIZE))) + return nfserr_bad_cookie; + + readdir->rd_rqstp = rqstp; +@@ -427,6 +494,8 @@ nfsd4_remove(struct svc_rqst *rqstp, str + int status; + + status = nfsd_unlink(rqstp, current_fh, 0, remove->rm_name, remove->rm_namelen); ++ if (status == nfserr_symlink) ++ return nfserr_notdir; + if (!status) { + fh_unlock(current_fh); + set_change_info(&remove->rm_cinfo, current_fh); +@@ -438,11 +507,25 @@ static inline int + nfsd4_rename(struct svc_rqst *rqstp, struct svc_fh *current_fh, + struct svc_fh *save_fh, struct nfsd4_rename *rename) + { +- int status; ++ int status = nfserr_nofilehandle; + ++ if (!save_fh->fh_dentry) ++ return status; + status = nfsd_rename(rqstp, save_fh, rename->rn_sname, + rename->rn_snamelen, current_fh, + rename->rn_tname, rename->rn_tnamelen); ++ ++ /* the underlying filesystem returns different error's than required ++ * by NFSv4. both save_fh and current_fh have been verified.. */ ++ if (status == nfserr_isdir) ++ status = nfserr_exist; ++ else if ((status == nfserr_notdir) && ++ (S_ISDIR(save_fh->fh_dentry->d_inode->i_mode) && ++ S_ISDIR(current_fh->fh_dentry->d_inode->i_mode))) ++ status = nfserr_exist; ++ else if (status == nfserr_symlink) ++ status = nfserr_notdir; ++ + if (!status) { + set_change_info(&rename->rn_sinfo, current_fh); + set_change_info(&rename->rn_tinfo, save_fh); +@@ -454,14 +537,18 @@ static inline int + nfsd4_setattr(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_setattr *setattr) + { + struct nfs4_stateid *stp; +- int status = nfs_ok; ++ int status = nfserr_nofilehandle; ++ ++ if (!current_fh->fh_dentry) ++ goto out; + ++ status = nfs_ok; + if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { + + status = nfserr_bad_stateid; + if (ZERO_STATEID(&setattr->sa_stateid) || ONE_STATEID(&setattr->sa_stateid)) { + dprintk("NFSD: nfsd4_setattr: magic stateid!\n"); +- return status; ++ goto out; + } + + nfs4_lock_state(); +@@ -469,17 +556,27 @@ nfsd4_setattr(struct svc_rqst *rqstp, st + &setattr->sa_stateid, + CHECK_FH | RDWR_STATE, &stp))) { + dprintk("NFSD: nfsd4_setattr: couldn't process stateid!\n"); +- goto out; ++ goto out_unlock; + } + status = nfserr_openmode; +- if (!(stp->st_share_access & NFS4_SHARE_ACCESS_WRITE)) { ++ if (!access_bits_permit_write(stp->st_access_bmap)) { + dprintk("NFSD: nfsd4_setattr: not opened for write!\n"); +- goto out; ++ goto out_unlock; + } + nfs4_unlock_state(); + } +- return (nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr, 0, (time_t)0)); ++#ifdef CONFIG_NFS_V4_ACL ++ status = nfs_ok; ++ if (setattr->sa_acl != NULL) ++ status = nfsd4_set_nfs4_acl(rqstp, current_fh, setattr->sa_acl); ++ if (status) ++ goto out; ++#endif /* CONFIG_NFS_V4_ACL */ ++ status = nfsd_setattr(rqstp, current_fh, &setattr->sa_iattr, ++ 0, (time_t)0); + out: ++ return status; ++out_unlock: + nfs4_unlock_state(); + return status; + } +@@ -513,7 +610,7 @@ nfsd4_write(struct svc_rqst *rqstp, stru + } + + status = nfserr_openmode; +- if (!(stp->st_share_access & NFS4_SHARE_ACCESS_WRITE)) { ++ if (!access_bits_permit_write(stp->st_access_bmap)) { + dprintk("NFSD: nfsd4_write: file not open for write!\n"); + goto out; + } +@@ -526,9 +623,12 @@ zero_stateid: + *p++ = nfssvc_boot.tv_sec; + *p++ = nfssvc_boot.tv_usec; + +- return (nfsd_write(rqstp, current_fh, write->wr_offset, ++ status = nfsd_write(rqstp, current_fh, write->wr_offset, + write->wr_vec, write->wr_vlen, write->wr_buflen, +- &write->wr_how_written)); ++ &write->wr_how_written); ++ if (status == nfserr_symlink) ++ status = nfserr_inval; ++ return status; + out: + nfs4_unlock_state(); + return status; +@@ -552,8 +652,9 @@ nfsd4_verify(struct svc_rqst *rqstp, str + + if ((verify->ve_bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) + || (verify->ve_bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) +- return nfserr_notsupp; +- if (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1) ++ return nfserr_attrnotsupp; ++ if ((verify->ve_bmval[0] & FATTR4_WORD0_RDATTR_ERROR) ++ || (verify->ve_bmval[1] & NFSD_WRITEONLY_ATTRS_WORD1)) + return nfserr_inval; + if (verify->ve_attrlen & 3) + return nfserr_inval; +@@ -568,7 +669,8 @@ nfsd4_verify(struct svc_rqst *rqstp, str + + status = nfsd4_encode_fattr(current_fh, current_fh->fh_export, + current_fh->fh_dentry, buf, +- &count, verify->ve_bmval); ++ &count, verify->ve_bmval, ++ rqstp); + + /* this means that nfsd4_encode_fattr() ran out of space */ + if (status == nfserr_resource && count == 0) +@@ -658,13 +760,32 @@ nfsd4_proc_compound(struct svc_rqst *rqs + goto encode_op; + } + ++ /* All operations except RENEW, SETCLIENTID, RESTOREFH ++ * SETCLIENTID_CONFIRM, PUTFH and PUTROOTFH ++ * require a valid current filehandle ++ * ++ * SETATTR NOFILEHANDLE error handled in nfsd4_setattr ++ * due to required returned bitmap argument ++ */ ++ if ((!current_fh.fh_dentry) && ++ !((op->opnum == OP_PUTFH) || (op->opnum == OP_PUTROOTFH) || ++ (op->opnum == OP_SETCLIENTID) || ++ (op->opnum == OP_SETCLIENTID_CONFIRM) || ++ (op->opnum == OP_RENEW) || (op->opnum == OP_RESTOREFH) || ++ (op->opnum == OP_RELEASE_LOCKOWNER) || ++ (op->opnum == OP_SETATTR))) { ++ op->status = nfserr_nofilehandle; ++ goto encode_op; ++ } + switch (op->opnum) { + case OP_ACCESS: + op->status = nfsd4_access(rqstp, ¤t_fh, &op->u.access); + break; + case OP_CLOSE: + op->status = nfsd4_close(rqstp, ¤t_fh, &op->u.close); +- op->replay = &op->u.close.cl_stateowner->so_replay; ++ if (op->u.close.cl_stateowner) ++ op->replay = ++ &op->u.close.cl_stateowner->so_replay; + break; + case OP_COMMIT: + op->status = nfsd4_commit(rqstp, ¤t_fh, &op->u.commit); +@@ -683,12 +804,18 @@ nfsd4_proc_compound(struct svc_rqst *rqs + break; + case OP_LOCK: + op->status = nfsd4_lock(rqstp, ¤t_fh, &op->u.lock); ++ if (op->u.lock.lk_stateowner) ++ op->replay = ++ &op->u.lock.lk_stateowner->so_replay; + break; + case OP_LOCKT: + op->status = nfsd4_lockt(rqstp, ¤t_fh, &op->u.lockt); + break; + case OP_LOCKU: + op->status = nfsd4_locku(rqstp, ¤t_fh, &op->u.locku); ++ if (op->u.locku.lu_stateowner) ++ op->replay = ++ &op->u.locku.lu_stateowner->so_replay; + break; + case OP_LOOKUP: + op->status = nfsd4_lookup(rqstp, ¤t_fh, &op->u.lookup); +@@ -703,15 +830,21 @@ nfsd4_proc_compound(struct svc_rqst *rqs + break; + case OP_OPEN: + op->status = nfsd4_open(rqstp, ¤t_fh, &op->u.open); +- op->replay = &op->u.open.op_stateowner->so_replay; ++ if (op->u.open.op_stateowner) ++ op->replay = ++ &op->u.open.op_stateowner->so_replay; + break; + case OP_OPEN_CONFIRM: + op->status = nfsd4_open_confirm(rqstp, ¤t_fh, &op->u.open_confirm); +- op->replay = &op->u.open_confirm.oc_stateowner->so_replay; ++ if (op->u.open_confirm.oc_stateowner) ++ op->replay = ++ &op->u.open_confirm.oc_stateowner->so_replay; + break; + case OP_OPEN_DOWNGRADE: + op->status = nfsd4_open_downgrade(rqstp, ¤t_fh, &op->u.open_downgrade); +- op->replay = &op->u.open_downgrade.od_stateowner->so_replay; ++ if (op->u.open_downgrade.od_stateowner) ++ op->replay = ++ &op->u.open_downgrade.od_stateowner->so_replay; + break; + case OP_PUTFH: + op->status = nfsd4_putfh(rqstp, ¤t_fh, &op->u.putfh); +@@ -760,6 +893,9 @@ nfsd4_proc_compound(struct svc_rqst *rqs + case OP_WRITE: + op->status = nfsd4_write(rqstp, ¤t_fh, &op->u.write); + break; ++ case OP_RELEASE_LOCKOWNER: ++ op->status = nfsd4_release_lockowner(rqstp, &op->u.release_lockowner); ++ break; + default: + BUG_ON(op->status == nfs_ok); + break; +@@ -768,7 +904,7 @@ nfsd4_proc_compound(struct svc_rqst *rqs + encode_op: + if (op->status == NFSERR_REPLAY_ME) { + nfsd4_encode_replay(resp, op); +- status = op->status = NFS_OK; ++ status = op->status = op->replay->rp_status; + } else { + nfsd4_encode_operation(resp, op); + status = op->status; +@@ -776,20 +912,7 @@ encode_op: + } + + out: +- if (args->ops != args->iops) { +- kfree(args->ops); +- args->ops = args->iops; +- } +- if (args->tmpp) { +- kfree(args->tmpp); +- args->tmpp = NULL; +- } +- while (args->to_free) { +- struct tmpbuf *tb = args->to_free; +- args->to_free = tb->next; +- kfree(tb->buf); +- kfree(tb); +- } ++ nfsd4_release_compoundargs(args); + fh_put(¤t_fh); + fh_put(&save_fh); + return status; +diff -puN fs/nfsd/nfs4xdr.c~CITI_NFS4_ALL fs/nfsd/nfs4xdr.c +--- linux-2.6.3/fs/nfsd/nfs4xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/nfs4xdr.c 2004-02-19 16:47:15.000000000 -0500 +@@ -51,100 +51,103 @@ + #include + #include + #include +-#include + #include + #include + #include ++#include ++#include ++#include + + #define NFSDDBG_FACILITY NFSDDBG_XDR + +-/* +- * From Peter Astrand : The following routines check +- * whether a filename supplied by the client is valid. +- */ +-static const char trailing_bytes_for_utf8[256] = { +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, ++static const char utf8_byte_len[256] = { ++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, ++ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, + 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, +- 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, +- 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 ++ 0,0,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, ++ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0,0 + }; + + static inline int +-is_legal_iso_utf8_sequence(unsigned char *source, int length) ++is_legal_utf8_sequence(unsigned char *source, int length) + { +- unsigned char a; +- unsigned char *srcptr; ++ unsigned char *ptr; ++ unsigned char c; + +- srcptr = source + length; ++ if (length==1) return 1; + +- switch (length) { +- /* Everything else falls through when "1"... */ ++ /* Check for overlong sequence, and check second byte */ ++ c = *(source + 1); ++ switch (*source) { ++ case 0xE0: /* 3 bytes */ ++ if ( c < 0xA0 ) return 0; ++ break; ++ case 0xF0: /* 4 bytes */ ++ if ( c < 0x90 ) return 0; ++ break; ++ case 0xF8: /* 5 bytes */ ++ if ( c < 0xC8 ) return 0; ++ break; ++ case 0xFC: /* 6 bytes */ ++ if ( c < 0x84 ) return 0; ++ break; + default: +- /* Sequences with more than 6 bytes are invalid */ +- return 0; ++ if ( (c & 0xC0) != 0x80) return 0; ++ } + +- /* +- Byte 3-6 must be 80..BF +- */ +- case 6: +- if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; +- case 5: +- if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; +- case 4: +- if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; +- case 3: +- if ((a = (*--srcptr)) < 0x80 || a > 0xBF) return 0; +- +- case 2: +- a = *--srcptr; +- +- /* Upper limit */ +- if (a > 0xBF) +- /* 2nd byte may never be > 0xBF */ +- return 0; ++ /* Check that trailing bytes look like 10xxxxxx */ ++ for (ptr = source++ + length - 1; ptr>source; ptr--) ++ if ( ((*ptr) & 0xC0) != 0x80 ) return 0; ++ return 1; ++} + +- /* +- Lower limits checks, to detect non-shortest forms. +- No fall-through in this inner switch. +- */ +- switch (*source) { +- case 0xE0: /* 3 bytes */ +- if (a < 0xA0) return 0; +- break; +- case 0xF0: /* 4 bytes */ +- if (a < 0x90) return 0; +- break; +- case 0xF8: /* 5 bytes */ +- if (a < 0xC8) return 0; +- break; +- case 0xFC: /* 6 bytes */ +- if (a < 0x84) return 0; +- break; +- default: +- /* In all cases, 2nd byte must be >= 0x80 (because leading +- 10...) */ +- if (a < 0x80) return 0; +- } ++/* This does some screening on disallowed unicode characters. It is NOT ++ * comprehensive. ++ */ ++static int ++is_allowed_utf8_char(unsigned char *source, int length) ++{ ++ /* We assume length and source point to a valid utf8 sequence */ ++ unsigned char c; + +- case 1: +- /* Invalid ranges */ +- if (*source >= 0x80 && *source < 0xC2) +- /* Multibyte char with value < 0xC2, non-shortest */ +- return 0; +- if (*source > 0xFD) +- /* Leading byte starting with 11111110 is illegal */ +- return 0; +- if (!*source) +- return 0; ++ /* Disallow F0000 and up (in utf8, F3B08080) */ ++ if (*source > 0xF3 ) return 0; ++ c = *(source + 1); ++ switch (*source) { ++ case 0xF3: ++ if (c >= 0xB0) return 0; ++ break; ++ /* Disallow D800-F8FF (in utf8, EDA080-EFA3BF */ ++ case 0xED: ++ if (c >= 0xA0) return 0; ++ break; ++ case 0xEE: ++ return 0; ++ break; ++ case 0xEF: ++ if (c <= 0xA3) return 0; ++ /* Disallow FFF9-FFFF (EFBFB9-EFBFBF) */ ++ if (c==0xBF) ++ /* Don't need to check <=0xBF, since valid utf8 */ ++ if ( *(source+2) >= 0xB9) return 0; ++ break; + } +- + return 1; + } + ++/* This routine should really check to see that the proper stringprep ++ * mappings have been applied. Instead, we do a simple screen of some ++ * of the more obvious illegal values by calling is_allowed_utf8_char. ++ * This will allow many illegal strings through, but if a client behaves, ++ * it will get full functionality. The other option (apart from full ++ * stringprep checking) is to limit everything to an easily handled subset, ++ * such as 7-bit ascii. ++ * ++ * Note - currently calling routines ignore return value except as boolean. ++ */ + static int + check_utf8(char *str, int len) + { +@@ -155,11 +158,17 @@ check_utf8(char *str, int len) + sourceend = str + len; + + while (chunk < sourceend) { +- chunklen = trailing_bytes_for_utf8[*chunk]+1; ++ chunklen = utf8_byte_len[*chunk]; ++ if (!chunklen) ++ return nfserr_inval; + if (chunk + chunklen > sourceend) + return nfserr_inval; +- if (!is_legal_iso_utf8_sequence(chunk, chunklen)) ++ if (!is_legal_utf8_sequence(chunk, chunklen)) ++ return nfserr_inval; ++ if (!is_allowed_utf8_char(chunk, chunklen)) + return nfserr_inval; ++ if ( (chunklen==1) && (!*chunk) ) ++ return nfserr_inval; /* Disallow embedded nulls */ + chunk += chunklen; + } + +@@ -280,27 +289,40 @@ u32 *read_buf(struct nfsd4_compoundargs + return p; + } + +-char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) ++static int ++defer_free(struct nfsd4_compoundargs *argp, ++ void (*release)(const void *), void *p) + { + struct tmpbuf *tb; ++ ++ tb = kmalloc(sizeof(*tb), GFP_KERNEL); ++ if (!tb) ++ return -ENOMEM; ++ tb->buf = p; ++ tb->release = release; ++ tb->next = argp->to_free; ++ argp->to_free = tb; ++ return 0; ++} ++ ++char *savemem(struct nfsd4_compoundargs *argp, u32 *p, int nbytes) ++{ ++ void *new = NULL; + if (p == argp->tmp) { +- p = kmalloc(nbytes, GFP_KERNEL); +- if (!p) return NULL; ++ new = kmalloc(nbytes, GFP_KERNEL); ++ if (!new) return NULL; ++ p = new; + memcpy(p, argp->tmp, nbytes); + } else { + if (p != argp->tmpp) + BUG(); + argp->tmpp = NULL; + } +- tb = kmalloc(sizeof(*tb), GFP_KERNEL); +- if (!tb) { +- kfree(p); ++ if (defer_free(argp, kfree, p)) { ++ kfree(new); + return NULL; +- } +- tb->buf = p; +- tb->next = argp->to_free; +- argp->to_free = tb; +- return (char*)p; ++ } else ++ return (char *)p; + } + + +@@ -328,7 +350,8 @@ nfsd4_decode_bitmap(struct nfsd4_compoun + } + + static int +-nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr) ++nfsd4_decode_fattr(struct nfsd4_compoundargs *argp, u32 *bmval, struct iattr *iattr, ++ struct nfs4_acl **acl) + { + int expected_len, len = 0; + u32 dummy32; +@@ -344,7 +367,7 @@ nfsd4_decode_fattr(struct nfsd4_compound + * read-only attributes return ERR_INVAL. + */ + if ((bmval[0] & ~NFSD_SUPPORTED_ATTRS_WORD0) || (bmval[1] & ~NFSD_SUPPORTED_ATTRS_WORD1)) +- return nfserr_notsupp; ++ return nfserr_attrnotsupp; + if ((bmval[0] & ~NFSD_WRITEABLE_ATTRS_WORD0) || (bmval[1] & ~NFSD_WRITEABLE_ATTRS_WORD1)) + return nfserr_inval; + +@@ -357,6 +380,39 @@ nfsd4_decode_fattr(struct nfsd4_compound + READ64(iattr->ia_size); + iattr->ia_valid |= ATTR_SIZE; + } ++#ifdef CONFIG_NFS_V4_ACL ++ if (bmval[0] & FATTR4_WORD0_ACL) { ++ int nace, i; ++ struct nfs4_ace ace; ++ ++ READ_BUF(4); len += 4; ++ READ32(nace); ++ ++ *acl = nfs4_acl_new(); ++ if (*acl == NULL) { ++ status = -ENOMEM; ++ goto out_nfserr; ++ } ++ defer_free(argp, (void (*)(const void *))nfs4_acl_free, *acl); ++ ++ for (i = 0; i < nace; i++) { ++ READ_BUF(16); len += 16; ++ READ32(ace.type); ++ READ32(ace.flag); ++ READ32(ace.access_mask); ++ READ32(ace.wholen); ++ READ_BUF(ace.wholen); ++ len += XDR_QUADLEN(ace.wholen) << 2; ++ if (nfs4_acl_add_ace(*acl, ace.type, ace.flag, ++ ace.access_mask, (char *)p, ace.wholen) < 0) { ++ status = -ENOMEM; ++ goto out_nfserr; ++ } ++ p += XDR_QUADLEN(ace.wholen); ++ } ++ } else ++ *acl = NULL; ++#endif /* CONFIG_NFS_V4_ACL */ + if (bmval[1] & FATTR4_WORD1_MODE) { + READ_BUF(4); + len += 4; +@@ -373,7 +429,7 @@ nfsd4_decode_fattr(struct nfsd4_compound + READMEM(buf, dummy32); + if (check_utf8(buf, dummy32)) + return nfserr_inval; +- if ((status = name_get_uid(buf, dummy32, &iattr->ia_uid))) ++ if ((status = nfsd_map_name_to_uid(argp->rqstp, buf, dummy32, &iattr->ia_uid))) + goto out_nfserr; + iattr->ia_valid |= ATTR_UID; + } +@@ -386,7 +442,7 @@ nfsd4_decode_fattr(struct nfsd4_compound + READMEM(buf, dummy32); + if (check_utf8(buf, dummy32)) + return nfserr_inval; +- if ((status = name_get_gid(buf, dummy32, &iattr->ia_gid))) ++ if ((status = nfsd_map_name_to_gid(argp->rqstp, buf, dummy32, &iattr->ia_gid))) + goto out_nfserr; + iattr->ia_valid |= ATTR_GID; + } +@@ -482,6 +538,7 @@ nfsd4_decode_close(struct nfsd4_compound + { + DECODE_HEAD; + ++ (int)close->cl_stateowner = -1; + READ_BUF(4 + sizeof(stateid_t)); + READ32(close->cl_seqid); + READ32(close->cl_stateid.si_generation); +@@ -540,7 +597,7 @@ nfsd4_decode_create(struct nfsd4_compoun + if ((status = check_filename(create->cr_name, create->cr_namelen, nfserr_inval))) + return status; + +- if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr))) ++ if ((status = nfsd4_decode_fattr(argp, create->cr_bmval, &create->cr_iattr, &create->cr_acl))) + goto out; + + DECODE_TAIL; +@@ -572,6 +629,7 @@ nfsd4_decode_lock(struct nfsd4_compounda + { + DECODE_HEAD; + ++ (int)lock->lk_stateowner = -1; + /* + * type, reclaim(boolean), offset, length, new_lock_owner(boolean) + */ +@@ -629,6 +687,7 @@ nfsd4_decode_locku(struct nfsd4_compound + { + DECODE_HEAD; + ++ (int)locku->lu_stateowner = -1; + READ_BUF(24 + sizeof(stateid_t)); + READ32(locku->lu_type); + if ((locku->lu_type < NFS4_READ_LT) || (locku->lu_type > NFS4_WRITEW_LT)) +@@ -664,6 +723,7 @@ nfsd4_decode_open(struct nfsd4_compounda + + memset(open->op_bmval, 0, sizeof(open->op_bmval)); + open->op_iattr.ia_valid = 0; ++ (int)open->op_stateowner = -1; + + /* seqid, share_access, share_deny, clientid, ownerlen */ + READ_BUF(16 + sizeof(clientid_t)); +@@ -686,7 +746,7 @@ nfsd4_decode_open(struct nfsd4_compounda + switch (open->op_createmode) { + case NFS4_CREATE_UNCHECKED: + case NFS4_CREATE_GUARDED: +- if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr))) ++ if ((status = nfsd4_decode_fattr(argp, open->op_bmval, &open->op_iattr, &open->op_acl))) + goto out; + break; + case NFS4_CREATE_EXCLUSIVE: +@@ -739,6 +799,7 @@ nfsd4_decode_open_confirm(struct nfsd4_c + { + DECODE_HEAD; + ++ (int)open_conf->oc_stateowner = -1; + READ_BUF(4 + sizeof(stateid_t)); + READ32(open_conf->oc_req_stateid.si_generation); + COPYMEM(&open_conf->oc_req_stateid.si_opaque, sizeof(stateid_opaque_t)); +@@ -752,6 +813,7 @@ nfsd4_decode_open_downgrade(struct nfsd4 + { + DECODE_HEAD; + ++ (int)open_down->od_stateowner = -1; + READ_BUF(4 + sizeof(stateid_t)); + READ32(open_down->od_stateid.si_generation); + COPYMEM(&open_down->od_stateid.si_opaque, sizeof(stateid_opaque_t)); +@@ -861,7 +923,7 @@ nfsd4_decode_setattr(struct nfsd4_compou + READ_BUF(sizeof(stateid_t)); + READ32(setattr->sa_stateid.si_generation); + COPYMEM(&setattr->sa_stateid.si_opaque, sizeof(stateid_opaque_t)); +- if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr))) ++ if ((status = nfsd4_decode_fattr(argp, setattr->sa_bmval, &setattr->sa_iattr, &setattr->sa_acl))) + goto out; + + DECODE_TAIL; +@@ -928,7 +990,7 @@ nfsd4_decode_write(struct nfsd4_compound + int len; + DECODE_HEAD; + +- READ_BUF(sizeof(stateid_t) + 16); ++ READ_BUF(sizeof(stateid_opaque_t) + 20); + READ32(write->wr_stateid.si_generation); + COPYMEM(&write->wr_stateid.si_opaque, sizeof(stateid_opaque_t)); + READ64(write->wr_offset); +@@ -972,6 +1034,20 @@ nfsd4_decode_write(struct nfsd4_compound + } + + static int ++nfsd4_decode_release_lockowner(struct nfsd4_compoundargs *argp, struct nfsd4_release_lockowner *rlockowner) ++{ ++ DECODE_HEAD; ++ ++ READ_BUF(12); ++ COPYMEM(&rlockowner->rl_clientid, sizeof(clientid_t)); ++ READ32(rlockowner->rl_owner.len); ++ READ_BUF(rlockowner->rl_owner.len); ++ READMEM(rlockowner->rl_owner.data, rlockowner->rl_owner.len); ++ ++ DECODE_TAIL; ++} ++ ++static int + nfsd4_decode_compound(struct nfsd4_compoundargs *argp) + { + DECODE_HEAD; +@@ -1043,6 +1119,13 @@ nfsd4_decode_compound(struct nfsd4_compo + op->opnum = ntohl(*argp->p++); + + switch (op->opnum) { ++ case 2: /* Reserved operation */ ++ op->opnum = OP_ILLEGAL; ++ if (argp->minorversion == 0) ++ op->status = nfserr_op_illegal; ++ else ++ op->status = nfserr_minor_vers_mismatch; ++ break; + case OP_ACCESS: + op->status = nfsd4_decode_access(argp, &op->u.access); + break; +@@ -1136,14 +1219,12 @@ nfsd4_decode_compound(struct nfsd4_compo + case OP_WRITE: + op->status = nfsd4_decode_write(argp, &op->u.write); + break; ++ case OP_RELEASE_LOCKOWNER: ++ op->status = nfsd4_decode_release_lockowner(argp, &op->u.release_lockowner); ++ break; + default: +- /* +- * According to spec, anything greater than OP_WRITE +- * is treated as OP_WRITE+1 in the response. +- */ +- if (op->opnum > OP_WRITE) +- op->opnum = OP_WRITE + 1; +- op->status = nfserr_notsupp; ++ op->opnum = OP_ILLEGAL; ++ op->status = nfserr_op_illegal; + break; + } + +@@ -1183,10 +1264,10 @@ nfsd4_decode_compound(struct nfsd4_compo + } while (0) + #define WRITECINFO(c) do { \ + *p++ = htonl(c.atomic); \ +- *p++ = htonl(c.before_size); \ +- *p++ = htonl(c.before_ctime); \ +- *p++ = htonl(c.after_size); \ +- *p++ = htonl(c.after_ctime); \ ++ *p++ = htonl(c.before_ctime_sec); \ ++ *p++ = htonl(c.before_ctime_nsec); \ ++ *p++ = htonl(c.after_ctime_sec); \ ++ *p++ = htonl(c.after_ctime_nsec); \ + } while (0) + + #define RESERVE_SPACE(nbytes) do { \ +@@ -1209,10 +1290,13 @@ nfsd4_decode_compound(struct nfsd4_compo + * "seqid-mutating" NFSv4 operation. This is + * where seqids are incremented, and the + * replay cache is filled. ++ * ++ * if stateowner != -1 then called with nfs4_lock_state() held + */ + + #define ENCODE_SEQID_OP_TAIL(stateowner) do { \ +- if (seqid_mutating_err(nfserr) && stateowner) { \ ++ if (seqid_mutating_err(nfserr) && stateowner \ ++ && ((int)stateowner != -1)) { \ + if (stateowner->so_confirmed) \ + stateowner->so_seqid++; \ + stateowner->so_replay.rp_status = nfserr; \ +@@ -1220,7 +1304,8 @@ nfsd4_decode_compound(struct nfsd4_compo + (((char *)(resp)->p - (char *)save)); \ + memcpy(stateowner->so_replay.rp_buf, save, \ + stateowner->so_replay.rp_buflen); \ +- } } while(0) ++ } } while(0); \ ++ if ((int)stateowner != -1) nfs4_unlock_state(); + + + static u32 nfs4_ftypes[16] = { +@@ -1239,13 +1324,16 @@ static u32 nfs4_ftypes[16] = { + */ + int + nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, +- struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval) ++ struct dentry *dentry, u32 *buffer, int *countp, u32 *bmval, ++ struct svc_rqst *rqstp) + { + u32 bmval0 = bmval[0]; + u32 bmval1 = bmval[1]; + struct kstat stat; +- struct name_ent *owner = NULL; +- struct name_ent *group = NULL; ++ char owner[IDMAP_NAMESZ]; ++ u32 ownerlen = 0; ++ char group[IDMAP_NAMESZ]; ++ u32 grouplen = 0; + struct svc_fh tempfh; + struct kstatfs statfs; + int buflen = *countp << 2; +@@ -1254,6 +1342,7 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s + u64 dummy64; + u32 *p = buffer; + int status; ++ struct nfs4_acl *acl = NULL; + + BUG_ON(bmval1 & NFSD_WRITEONLY_ATTRS_WORD1); + BUG_ON(bmval0 & ~NFSD_SUPPORTED_ATTRS_WORD0); +@@ -1277,15 +1366,30 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s + fhp = &tempfh; + } + if (bmval1 & FATTR4_WORD1_OWNER) { +- status = name_get_user(stat.uid, &owner); +- if (status) ++ int temp = nfsd_map_uid_to_name(rqstp, stat.uid, owner); ++ if (temp < 0) { ++ status = temp; + goto out_nfserr; ++ } ++ ownerlen = (unsigned) temp; + } + if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { +- status = name_get_group(stat.gid, &group); +- if (status) ++ int temp = nfsd_map_gid_to_name(rqstp, stat.gid, group); ++ if (temp < 0) { ++ status = temp; ++ goto out_nfserr; ++ } ++ grouplen = (unsigned) temp; ++ } ++#ifdef CONFIG_NFS_V4_ACL ++ if (bmval0 & FATTR4_WORD0_ACL) { ++ status = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); ++ if (status == -EOPNOTSUPP) ++ bmval0 &= ~FATTR4_WORD0_ACL; ++ else if (status < 0) + goto out_nfserr; + } ++#endif /* CONFIG_NFS_V4_ACL */ + + if ((buflen -= 16) < 0) + goto out_resource; +@@ -1317,32 +1421,15 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s + } + if (bmval0 & FATTR4_WORD0_CHANGE) { + /* +- * XXX: We currently use the inode ctime as the nfsv4 "changeid" +- * attribute. This violates the spec, which says +- * +- * The server may return the object's time_modify attribute +- * for this attribute, but only if the file system object +- * can not be updated more frequently than the resolution +- * of time_modify. +- * +- * Since we only have 1-second ctime resolution, this is a pretty +- * serious violation. Indeed, 1-second ctime resolution is known +- * to be a problem in practice in the NFSv3 world. +- * +- * The real solution to this problem is probably to work on +- * adding high-resolution mtimes to the VFS layer. +- * +- * Note: Started using i_size for the high 32 bits of the changeid. +- * +- * Note 2: This _must_ be consistent with the scheme for writing ++ * Note: This _must_ be consistent with the scheme for writing + * change_info, so any changes made here must be reflected there + * as well. (See xdr4.h:set_change_info() and the WRITECINFO() + * macro above.) + */ + if ((buflen -= 8) < 0) + goto out_resource; +- WRITE32(stat.size); +- WRITE32(stat.mtime.tv_sec); /* AK: nsec dropped? */ ++ WRITE32(stat.ctime.tv_sec); ++ WRITE32(stat.ctime.tv_nsec); /* AK: nsec dropped? */ + } + if (bmval0 & FATTR4_WORD0_SIZE) { + if ((buflen -= 8) < 0) +@@ -1387,10 +1474,48 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s + goto out_resource; + WRITE32(0); + } ++#ifdef CONFIG_NFS_V4_ACL ++ if (bmval0 & FATTR4_WORD0_ACL) { ++ struct nfs4_ace *ace; ++ struct list_head *h; ++ int alen; ++ ++ if (acl == NULL) { ++ if ((buflen -= 4) < 0) ++ goto out_resource; ++ ++ WRITE32(0); ++ goto out_acl; ++ } ++ ++ alen = acl->naces * 16 + 4; ++ ++ list_for_each(h, &acl->ace_head) { ++ ace = list_entry(h, struct nfs4_ace, l_ace); ++ alen += XDR_QUADLEN(ace->wholen) << 2; ++ } ++ ++ if ((buflen -= alen) < 0) ++ goto out_resource; ++ ++ WRITE32(acl->naces); ++ ++ list_for_each(h, &acl->ace_head) { ++ ace = list_entry(h, struct nfs4_ace, l_ace); ++ ++ WRITE32(ace->type); ++ WRITE32(ace->flag); ++ WRITE32(ace->access_mask); ++ WRITE32(ace->wholen); ++ WRITEMEM(ace->who, ace->wholen); ++ } ++ } ++out_acl: ++#endif /* CONFIG_NFS_V4_ACL */ + if (bmval0 & FATTR4_WORD0_ACLSUPPORT) { + if ((buflen -= 4) < 0) + goto out_resource; +- WRITE32(0); ++ WRITE32(1); + } + if (bmval0 & FATTR4_WORD0_CANSETTIME) { + if ((buflen -= 4) < 0) +@@ -1485,20 +1610,18 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s + WRITE32(stat.nlink); + } + if (bmval1 & FATTR4_WORD1_OWNER) { +- int namelen = strlen(owner->name); +- buflen -= (XDR_QUADLEN(namelen) << 2) + 4; ++ buflen -= (XDR_QUADLEN(ownerlen) << 2) + 4; + if (buflen < 0) + goto out_resource; +- WRITE32(namelen); +- WRITEMEM(owner->name, namelen); ++ WRITE32(ownerlen); ++ WRITEMEM(owner, ownerlen); + } + if (bmval1 & FATTR4_WORD1_OWNER_GROUP) { +- int namelen = strlen(group->name); +- buflen -= (XDR_QUADLEN(namelen) << 2) + 4; ++ buflen -= (XDR_QUADLEN(grouplen) << 2) + 4; + if (buflen < 0) + goto out_resource; +- WRITE32(namelen); +- WRITEMEM(group->name, namelen); ++ WRITE32(grouplen); ++ WRITEMEM(group, grouplen); + } + if (bmval1 & FATTR4_WORD1_RAWDEV) { + if ((buflen -= 8) < 0) +@@ -1564,12 +1687,11 @@ nfsd4_encode_fattr(struct svc_fh *fhp, s + status = nfs_ok; + + out: ++#ifdef CONFIG_NFS_V4_ACL ++ nfs4_acl_free(acl); ++#endif + if (fhp == &tempfh) + fh_put(&tempfh); +- if (owner) +- name_put(owner); +- if (group) +- name_put(group); + return status; + out_nfserr: + status = nfserrno(status); +@@ -1648,7 +1770,8 @@ nfsd4_encode_dirent(struct readdir_cd *c + } + + nfserr = nfsd4_encode_fattr(NULL, exp, +- dentry, p, &buflen, cd->rd_bmval); ++ dentry, p, &buflen, cd->rd_bmval, ++ cd->rd_rqstp); + if (!nfserr) { + p += buflen; + goto out; +@@ -1701,7 +1824,7 @@ out: + return 0; + + nospc: +- cd->common.err = nfserr_readdir_nospc; ++ cd->common.err = nfserr_toosmall; + return -EINVAL; + } + +@@ -1771,7 +1894,8 @@ nfsd4_encode_getattr(struct nfsd4_compou + + buflen = resp->end - resp->p - (COMPOUND_ERR_SLACK_SPACE >> 2); + nfserr = nfsd4_encode_fattr(fhp, fhp->fh_export, fhp->fh_dentry, +- resp->p, &buflen, getattr->ga_bmval); ++ resp->p, &buflen, getattr->ga_bmval, ++ resp->rqstp); + + if (!nfserr) + resp->p += buflen; +@@ -1871,7 +1995,7 @@ nfsd4_encode_open(struct nfsd4_compoundr + ENCODE_SEQID_OP_HEAD; + + if (nfserr) +- return; ++ goto out; + + RESERVE_SPACE(36 + sizeof(stateid_t)); + WRITE32(open->op_stateid.si_generation); +@@ -1925,7 +2049,8 @@ nfsd4_encode_open(struct nfsd4_compoundr + default: + BUG(); + } +- ++ /* XXX save filehandle here */ ++out: + ENCODE_SEQID_OP_TAIL(open->op_stateowner); + } + +@@ -1995,6 +2120,8 @@ nfsd4_encode_read(struct nfsd4_compoundr + read->rd_offset, + read->rd_iov, read->rd_vlen, + &maxcount); ++ if (nfserr == nfserr_symlink) ++ nfserr = nfserr_inval; + if (nfserr) + return nfserr; + eof = (read->rd_offset + maxcount >= read->rd_fhp->fh_dentry->d_inode->i_size); +@@ -2052,6 +2179,8 @@ nfsd4_encode_readlink(struct nfsd4_compo + * assume that truncation occurred, and return NFS4ERR_RESOURCE. + */ + nfserr = nfsd_readlink(readlink->rl_rqstp, readlink->rl_fhp, page, &maxcount); ++ if (nfserr == nfserr_isdir) ++ return nfserr_inval; + if (nfserr) + return nfserr; + +@@ -2081,7 +2210,7 @@ nfsd4_encode_readdir(struct nfsd4_compou + { + int maxcount; + loff_t offset; +- u32 *page; ++ u32 *page, *savep; + ENCODE_HEAD; + + if (nfserr) +@@ -2090,6 +2219,7 @@ nfsd4_encode_readdir(struct nfsd4_compou + return nfserr_resource; + + RESERVE_SPACE(8); /* verifier */ ++ savep = p; + + /* XXX: Following NFSv3, we ignore the READDIR verifier for now. */ + WRITE32(0); +@@ -2107,8 +2237,10 @@ nfsd4_encode_readdir(struct nfsd4_compou + * pointer and eof field. + */ + maxcount = (maxcount >> 2) - 4; +- if (maxcount < 0) +- return nfserr_readdir_nospc; ++ if (maxcount < 0) { ++ nfserr = nfserr_toosmall; ++ goto err_no_verf; ++ } + + svc_take_page(resp->rqstp); + page = page_address(resp->rqstp->rq_respages[resp->rqstp->rq_resused-1]); +@@ -2122,11 +2254,13 @@ nfsd4_encode_readdir(struct nfsd4_compou + &offset, + &readdir->common, nfsd4_encode_dirent); + if (nfserr == nfs_ok && +- readdir->common.err == nfserr_readdir_nospc && ++ readdir->common.err == nfserr_toosmall && + readdir->buffer == page) +- nfserr = nfserr_readdir_nospc; ++ nfserr = nfserr_toosmall; ++ if (nfserr == nfserr_symlink) ++ nfserr = nfserr_notdir; + if (nfserr) +- return nfserr; ++ goto err_no_verf; + + if (readdir->offset) + xdr_encode_hyper(readdir->offset, offset); +@@ -2146,6 +2280,10 @@ nfsd4_encode_readdir(struct nfsd4_compou + resp->end = resp->p + PAGE_SIZE/4; + + return 0; ++err_no_verf: ++ p = savep; ++ ADJUST_ARGS(); ++ return nfserr; + } + + static void +@@ -2237,7 +2375,7 @@ nfsd4_encode_operation(struct nfsd4_comp + + RESERVE_SPACE(8); + WRITE32(op->opnum); +- statp = p++; /* to be backfilled at the end */ ++ statp = p++; /* to be backfilled at the end */ + ADJUST_ARGS(); + + switch (op->opnum) { +@@ -2324,6 +2462,8 @@ nfsd4_encode_operation(struct nfsd4_comp + case OP_WRITE: + nfsd4_encode_write(resp, op->status, &op->u.write); + break; ++ case OP_RELEASE_LOCKOWNER: ++ break; + default: + break; + } +@@ -2340,6 +2480,8 @@ nfsd4_encode_operation(struct nfsd4_comp + * + * XDR note: do not encode rp->rp_buflen: the buffer contains the + * previously sent already encoded operation. ++ * ++ * called with nfs4_lock_state() held + */ + void + nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op) +@@ -2351,12 +2493,13 @@ nfsd4_encode_replay(struct nfsd4_compoun + + RESERVE_SPACE(8); + WRITE32(op->opnum); +- WRITE32(NFS_OK); ++ *p++ = rp->rp_status; /* already xdr'ed */ + ADJUST_ARGS(); + + RESERVE_SPACE(rp->rp_buflen); + WRITEMEM(rp->rp_buf, rp->rp_buflen); + ADJUST_ARGS(); ++ nfs4_unlock_state(); + } + + /* +@@ -2369,6 +2512,24 @@ nfs4svc_encode_voidres(struct svc_rqst * + return xdr_ressize_check(rqstp, p); + } + ++void nfsd4_release_compoundargs(struct nfsd4_compoundargs *args) ++{ ++ if (args->ops != args->iops) { ++ kfree(args->ops); ++ args->ops = args->iops; ++ } ++ if (args->tmpp) { ++ kfree(args->tmpp); ++ args->tmpp = NULL; ++ } ++ while (args->to_free) { ++ struct tmpbuf *tb = args->to_free; ++ args->to_free = tb->next; ++ tb->release(tb->buf); ++ kfree(tb); ++ } ++} ++ + int + nfs4svc_decode_compoundargs(struct svc_rqst *rqstp, u32 *p, struct nfsd4_compoundargs *args) + { +@@ -2381,23 +2542,11 @@ nfs4svc_decode_compoundargs(struct svc_r + args->tmpp = NULL; + args->to_free = NULL; + args->ops = args->iops; ++ args->rqstp = rqstp; + + status = nfsd4_decode_compound(args); + if (status) { +- if (args->ops != args->iops) { +- kfree(args->ops); +- args->ops = args->iops; +- } +- if (args->tmpp) { +- kfree(args->tmpp); +- args->tmpp = NULL; +- } +- while (args->to_free) { +- struct tmpbuf *tb = args->to_free; +- args->to_free = tb->next; +- kfree(tb->buf); +- kfree(tb); +- } ++ nfsd4_release_compoundargs(args); + } + return !status; + } +diff -puN include/linux/nfsd/state.h~CITI_NFS4_ALL include/linux/nfsd/state.h +--- linux-2.6.3/include/linux/nfsd/state.h~CITI_NFS4_ALL 2004-02-19 16:47:05.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfsd/state.h 2004-02-19 16:47:15.000000000 -0500 +@@ -113,6 +113,8 @@ struct nfs4_replay { + unsigned int rp_buflen; + char *rp_buf; + unsigned intrp_allocated; ++ int rp_openfh_len; ++ char rp_openfh[NFS4_FHSIZE]; + char rp_ibuf[NFSD4_REPLAY_ISIZE]; + }; + +@@ -128,12 +130,20 @@ struct nfs4_replay { + * so_perfilestate: heads the list of nfs4_stateid (either open or lock) + * and is used to ensure no dangling nfs4_stateid references when we + * release a stateowner. ++* so_perlockowner: (open) nfs4_stateid->st_perlockowner entry - used when ++* close is called to reap associated byte-range locks ++* so_close_lru: (open) stateowner is placed on this list instead of being ++* reaped (when so_perfilestate is empty) to hold the last close replay. ++* reaped by laundramat thread after lease period. + */ + struct nfs4_stateowner { + struct list_head so_idhash; /* hash by so_id */ + struct list_head so_strhash; /* hash by op_name */ + struct list_head so_perclient; /* nfs4_client->cl_perclient */ + struct list_head so_perfilestate; /* list: nfs4_stateid */ ++ struct list_head so_perlockowner; /* nfs4_stateid->st_perlockowner */ ++ struct list_head so_close_lru; /* tail queue */ ++ time_t so_time; /* time of placement on so_close_lru */ + int so_is_open_owner; /* 1=openowner,0=lockowner */ + u32 so_id; + struct nfs4_client * so_client; +@@ -164,21 +174,23 @@ struct nfs4_file { + * st_hash: stateid_hashtbl[] entry or lockstateid_hashtbl entry + * st_perfile: file_hashtbl[] entry. + * st_perfile_state: nfs4_stateowner->so_perfilestate +-* st_share_access: used only for open stateid +-* st_share_deny: used only for open stateid ++* st_perlockowner: (open stateid) list of lock nfs4_stateowners ++* st_access_bmap: used only for open stateid ++* st_deny_bmap: used only for open stateid + */ + + struct nfs4_stateid { + struct list_head st_hash; + struct list_head st_perfile; + struct list_head st_perfilestate; ++ struct list_head st_perlockowner; + struct nfs4_stateowner * st_stateowner; + struct nfs4_file * st_file; + stateid_t st_stateid; + struct file st_vfs_file; + int st_vfs_set; +- unsigned int st_share_access; +- unsigned int st_share_deny; ++ unsigned long st_access_bmap; ++ unsigned long st_deny_bmap; + }; + + /* flags for preprocess_seqid_op() */ +@@ -187,6 +199,7 @@ struct nfs4_stateid { + #define OPEN_STATE 0x00000004 + #define LOCK_STATE 0x00000008 + #define RDWR_STATE 0x00000010 ++#define CLOSE_STATE 0x00000020 + + #define seqid_mutating_err(err) \ + (((err) != nfserr_stale_clientid) && \ +diff -puN fs/nfsd/Makefile~CITI_NFS4_ALL fs/nfsd/Makefile +--- linux-2.6.3/fs/nfsd/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/Makefile 2004-02-19 16:47:06.000000000 -0500 +@@ -7,5 +7,5 @@ obj-$(CONFIG_NFSD) += nfsd.o + nfsd-y := nfssvc.o nfsctl.o nfsproc.o nfsfh.o vfs.o \ + export.o auth.o lockd.o nfscache.o nfsxdr.o stats.o + nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o +-nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o ++nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o + nfsd-objs := $(nfsd-y) +diff -puN /dev/null fs/nfsd/nfs4idmap.c +--- /dev/null 2004-01-26 19:20:21.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/nfs4idmap.c 2004-02-19 16:47:06.000000000 -0500 +@@ -0,0 +1,569 @@ ++/* ++ * fs/nfsd/nfs4idmap.c ++ * ++ * Mapping of UID/GIDs to name and vice versa. ++ * ++ * Copyright (c) 2002, 2003 The Regents of the University of ++ * Michigan. All rights reserved. ++ * ++ * Marius Aamodt Eriksen ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR ++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++/* ++ * Cache entry ++ */ ++ ++/* ++ * XXX we know that IDMAP_NAMESZ < PAGE_SIZE, but it's ugly to rely on ++ * that. ++ */ ++ ++#define IDMAP_TYPE_USER 0 ++#define IDMAP_TYPE_GROUP 1 ++ ++struct ent { ++ struct cache_head h; ++ int type; /* User / Group */ ++ uid_t id; ++ char name[IDMAP_NAMESZ]; ++ char authname[IDMAP_NAMESZ]; ++}; ++ ++#define DefineSimpleCacheLookupMap(STRUCT, FUNC) \ ++ DefineCacheLookup(struct STRUCT, h, FUNC##_lookup, \ ++ (struct STRUCT *item, int set), /*no setup */, \ ++ & FUNC##_cache, FUNC##_hash(item), FUNC##_match(item, tmp), \ ++ STRUCT##_init(new, item), STRUCT##_update(tmp, item), 0) ++ ++/* Common entry handling */ ++ ++#define ENT_HASHBITS 8 ++#define ENT_HASHMAX (1 << ENT_HASHBITS) ++#define ENT_HASHMASK (ENT_HASHMAX - 1) ++ ++static inline void ++ent_init(struct ent *new, struct ent *itm) ++{ ++ new->id = itm->id; ++ new->type = itm->type; ++ ++ strlcpy(new->name, itm->name, sizeof(new->name)); ++ strlcpy(new->authname, itm->authname, sizeof(new->name)); ++} ++ ++static inline void ++ent_update(struct ent *new, struct ent *itm) ++{ ++ ent_init(new, itm); ++} ++ ++void ++ent_put(struct cache_head *ch, struct cache_detail *cd) ++{ ++ if (cache_put(ch, cd)) { ++ struct ent *map = container_of(ch, struct ent, h); ++ kfree(map); ++ } ++} ++ ++/* ++ * ID -> Name cache ++ */ ++ ++static struct cache_head *idtoname_table[ENT_HASHMAX]; ++ ++static uint32_t ++idtoname_hash(struct ent *ent) ++{ ++ uint32_t hash; ++ ++ hash = hash_str(ent->authname, ENT_HASHBITS); ++ hash = hash_long(hash ^ ent->id, ENT_HASHBITS); ++ ++ /* Flip LSB for user/group */ ++ if (ent->type == IDMAP_TYPE_GROUP) ++ hash ^= 1; ++ ++ return hash; ++} ++ ++static void ++idtoname_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, ++ int *blen) ++{ ++ struct ent *ent = container_of(ch, struct ent, h); ++ char idstr[11]; ++ ++ qword_add(bpp, blen, ent->authname); ++ snprintf(idstr, sizeof(idstr), "%d", ent->id); ++ qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user"); ++ qword_add(bpp, blen, idstr); ++ ++ (*bpp)[-1] = '\n'; ++} ++ ++static inline int ++idtoname_match(struct ent *a, struct ent *b) ++{ ++ return (a->id == b->id && a->type == b->type && ++ strcmp(a->authname, b->authname) == 0); ++} ++ ++static int ++idtoname_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) ++{ ++ struct ent *ent; ++ ++ if (h == NULL) { ++ seq_puts(m, "#domain type id [name]\n"); ++ return 0; ++ } ++ ent = container_of(h, struct ent, h); ++ seq_printf(m, "%s %s %d", ent->authname, ++ ent->type == IDMAP_TYPE_GROUP ? "group" : "user", ++ ent->id); ++ if (test_bit(CACHE_VALID, &h->flags)) ++ seq_printf(m, " %s", ent->name); ++ seq_printf(m, "\n"); ++ return 0; ++} ++ ++static int idtoname_parse(struct cache_detail *, char *, int); ++static struct ent *idtoname_lookup(struct ent *, int); ++ ++struct cache_detail idtoname_cache = { ++ .hash_size = ENT_HASHMAX, ++ .hash_table = idtoname_table, ++ .name = "nfs4.idtoname", ++ .cache_put = ent_put, ++ .cache_request = idtoname_request, ++ .cache_parse = idtoname_parse, ++ .cache_show = idtoname_show, ++}; ++ ++int ++idtoname_parse(struct cache_detail *cd, char *buf, int buflen) ++{ ++ struct ent ent, *res; ++ char *buf1, *bp; ++ int error = -EINVAL; ++ ++ if (buf[buflen - 1] != '\n') ++ return (-EINVAL); ++ buf[buflen - 1]= '\0'; ++ ++ buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if (buf1 == NULL) ++ return (-ENOMEM); ++ ++ memset(&ent, 0, sizeof(ent)); ++ ++ /* Authentication name */ ++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) ++ goto out; ++ memcpy(ent.authname, buf1, sizeof(ent.authname)); ++ ++ /* Type */ ++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) ++ goto out; ++ ent.type = strcmp(buf1, "user") == 0 ? ++ IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; ++ ++ /* ID */ ++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) ++ goto out; ++ ent.id = simple_strtoul(buf1, &bp, 10); ++ if (bp == buf1) ++ goto out; ++ ++ /* expiry */ ++ ent.h.expiry_time = get_expiry(&buf); ++ if (ent.h.expiry_time == 0) ++ goto out; ++ ++ /* Name */ ++ error = qword_get(&buf, buf1, PAGE_SIZE); ++ if (error == -EINVAL) ++ goto out; ++ if (error == -ENOENT) ++ set_bit(CACHE_NEGATIVE, &ent.h.flags); ++ else { ++ if (error >= IDMAP_NAMESZ) { ++ error = -EINVAL; ++ goto out; ++ } ++ memcpy(ent.name, buf1, sizeof(ent.name)); ++ } ++ error = -ENOMEM; ++ if ((res = idtoname_lookup(&ent, 1)) == NULL) ++ goto out; ++ ++ ent_put(&res->h, &idtoname_cache); ++ ++ error = 0; ++out: ++ kfree(buf1); ++ ++ return error; ++} ++ ++static DefineSimpleCacheLookupMap(ent, idtoname); ++ ++/* ++ * Name -> ID cache ++ */ ++ ++static struct cache_head *nametoid_table[ENT_HASHMAX]; ++ ++static inline int ++nametoid_hash(struct ent *ent) ++{ ++ return hash_str(ent->name, ENT_HASHBITS); ++} ++ ++void ++nametoid_request(struct cache_detail *cd, struct cache_head *ch, char **bpp, ++ int *blen) ++{ ++ struct ent *ent = container_of(ch, struct ent, h); ++ ++ qword_add(bpp, blen, ent->authname); ++ qword_add(bpp, blen, ent->type == IDMAP_TYPE_GROUP ? "group" : "user"); ++ qword_add(bpp, blen, ent->name); ++ ++ (*bpp)[-1] = '\n'; ++} ++ ++static inline int ++nametoid_match(struct ent *a, struct ent *b) ++{ ++ return (a->type == b->type && strcmp(a->name, b->name) == 0 && ++ strcmp(a->authname, b->authname) == 0); ++} ++ ++static int ++nametoid_show(struct seq_file *m, struct cache_detail *cd, struct cache_head *h) ++{ ++ struct ent *ent; ++ ++ if (h == NULL) { ++ seq_puts(m, "#domain type name [id]\n"); ++ return 0; ++ } ++ ent = container_of(h, struct ent, h); ++ seq_printf(m, "%s %s %s", ent->authname, ++ ent->type == IDMAP_TYPE_GROUP ? "group" : "user", ++ ent->name); ++ if (test_bit(CACHE_VALID, &h->flags)) ++ seq_printf(m, " %d", ent->id); ++ seq_printf(m, "\n"); ++ return 0; ++} ++ ++static struct ent *nametoid_lookup(struct ent *, int); ++int nametoid_parse(struct cache_detail *, char *, int); ++ ++struct cache_detail nametoid_cache = { ++ .hash_size = ENT_HASHMAX, ++ .hash_table = nametoid_table, ++ .name = "nfs4.nametoid", ++ .cache_put = ent_put, ++ .cache_request = nametoid_request, ++ .cache_parse = nametoid_parse, ++ .cache_show = nametoid_show, ++}; ++ ++int ++nametoid_parse(struct cache_detail *cd, char *buf, int buflen) ++{ ++ struct ent ent, *res; ++ char *buf1; ++ int error = -EINVAL; ++ ++ if (buf[buflen - 1] != '\n') ++ return (-EINVAL); ++ buf[buflen - 1]= '\0'; ++ ++ buf1 = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if (buf1 == NULL) ++ return (-ENOMEM); ++ ++ memset(&ent, 0, sizeof(ent)); ++ ++ /* Authentication name */ ++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) ++ goto out; ++ memcpy(ent.authname, buf1, sizeof(ent.authname)); ++ ++ /* Type */ ++ if (qword_get(&buf, buf1, PAGE_SIZE) <= 0) ++ goto out; ++ ent.type = strcmp(buf1, "user") == 0 ? ++ IDMAP_TYPE_USER : IDMAP_TYPE_GROUP; ++ ++ /* Name */ ++ error = qword_get(&buf, buf1, PAGE_SIZE); ++ if (error <= 0 || error >= IDMAP_NAMESZ) ++ goto out; ++ memcpy(ent.name, buf1, sizeof(ent.name)); ++ ++ /* expiry */ ++ ent.h.expiry_time = get_expiry(&buf); ++ if (ent.h.expiry_time == 0) ++ goto out; ++ ++ /* ID */ ++ error = get_int(&buf, &ent.id); ++ if (error == -EINVAL) ++ goto out; ++ if (error == -ENOENT) ++ set_bit(CACHE_NEGATIVE, &ent.h.flags); ++ ++ error = -ENOMEM; ++ if ((res = nametoid_lookup(&ent, 1)) == NULL) ++ goto out; ++ ++ ent_put(&res->h, &nametoid_cache); ++ error = 0; ++out: ++ kfree(buf1); ++ ++ return (error); ++} ++ ++static DefineSimpleCacheLookupMap(ent, nametoid); ++ ++/* ++ * Exported API ++ */ ++ ++void ++nfsd_idmap_init(void) ++{ ++ cache_register(&idtoname_cache); ++ cache_register(&nametoid_cache); ++} ++ ++void ++nfsd_idmap_shutdown(void) ++{ ++ cache_unregister(&idtoname_cache); ++ cache_unregister(&nametoid_cache); ++} ++ ++/* ++ * Deferred request handling ++ */ ++ ++struct idmap_defer_req { ++ struct cache_req req; ++ struct cache_deferred_req deferred_req; ++ wait_queue_head_t waitq; ++ atomic_t count; ++}; ++ ++static void ++put_mdr(struct idmap_defer_req *mdr) ++{ ++ if (atomic_dec_and_test(&mdr->count)) ++ kfree(mdr); ++} ++ ++static void ++idmap_revisit(struct cache_deferred_req *dreq, int toomany) ++{ ++ struct idmap_defer_req *mdr = ++ container_of(dreq, struct idmap_defer_req, deferred_req); ++ ++ wake_up(&mdr->waitq); ++ put_mdr(mdr); ++} ++ ++static struct cache_deferred_req * ++idmap_defer(struct cache_req *req) ++{ ++ struct idmap_defer_req *mdr = ++ container_of(req, struct idmap_defer_req, req); ++ ++ mdr->deferred_req.revisit = idmap_revisit; ++ return (&mdr->deferred_req); ++} ++ ++static int threads_waiting = 0; ++ ++static inline int ++idmap_lookup_wait(struct idmap_defer_req *mdr, wait_queue_t waitq, struct ++ svc_rqst *rqstp) { ++ int ret = -ETIMEDOUT; ++ ++ set_task_state(current, TASK_INTERRUPTIBLE); ++ lock_kernel(); ++ /* XXX: Does it matter that threads_waiting isn't per-server? */ ++ /* Note: BKL prevents races with nfsd_svc and other lookups */ ++ if (2 * threads_waiting > rqstp->rq_server->sv_nrthreads) ++ goto out; ++ threads_waiting++; ++ schedule_timeout(10 * HZ); ++ threads_waiting--; ++ ret = 0; ++out: ++ unlock_kernel(); ++ remove_wait_queue(&mdr->waitq, &waitq); ++ set_task_state(current, TASK_RUNNING); ++ put_mdr(mdr); ++ return ret; ++} ++ ++static int ++idmap_lookup(struct svc_rqst *rqstp, ++ struct ent *(*lookup_fn)(struct ent *, int), struct ent *key, ++ struct cache_detail *detail, struct ent **item) ++{ ++ struct idmap_defer_req *mdr; ++ DECLARE_WAITQUEUE(waitq, current); ++ int ret; ++ ++ *item = lookup_fn(key, 0); ++ if (!*item) ++ return -ENOMEM; ++ mdr = kmalloc(sizeof(*mdr), GFP_KERNEL); ++ memset(mdr, 0, sizeof(*mdr)); ++ init_waitqueue_head(&mdr->waitq); ++ add_wait_queue(&mdr->waitq, &waitq); ++ atomic_set(&mdr->count, 2); ++ mdr->req.defer = idmap_defer; ++ ret = cache_check(detail, &(*item)->h, &mdr->req); ++ if (ret == -EAGAIN) { ++ ret = idmap_lookup_wait(mdr, waitq, rqstp); ++ if (ret) ++ goto out; ++ /* Try again, but don't wait. */ ++ *item = lookup_fn(key, 0); ++ ret = -ENOMEM; ++ if (!*item) ++ goto out; ++ ret = -ETIMEDOUT; ++ if (!test_bit(CACHE_VALID, &(*item)->h.flags)) { ++ ent_put(&(*item)->h, detail); ++ goto out; ++ } ++ ret = cache_check(detail, &(*item)->h, NULL); ++ } ++out: ++ return ret; ++} ++ ++static int ++idmap_name_to_id(struct svc_rqst *rqstp, int type, const char *name, u32 namelen, ++ uid_t *id) ++{ ++ struct ent *item, key = { ++ .type = type, ++ }; ++ int ret; ++ ++ if (namelen + 1 > sizeof(key.name)) ++ return -EINVAL; ++ memcpy(key.name, name, namelen); ++ key.name[namelen] = '\0'; ++ strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); ++ ret = idmap_lookup(rqstp, nametoid_lookup, &key, &nametoid_cache, &item); ++ if (ret) ++ return ret; ++ *id = item->id; ++ ent_put(&item->h, &nametoid_cache); ++ return 0; ++} ++ ++static int ++idmap_id_to_name(struct svc_rqst *rqstp, int type, uid_t id, char *name) ++{ ++ struct ent *item, key = { ++ .id = id, ++ .type = type, ++ }; ++ int ret; ++ ++ strlcpy(key.authname, rqstp->rq_client->name, sizeof(key.authname)); ++ ret = idmap_lookup(rqstp, idtoname_lookup, &key, &idtoname_cache, &item); ++ if (ret) ++ return ret; ++ ret = strlen(item->name); ++ BUG_ON(ret > IDMAP_NAMESZ); ++ memcpy(name, item->name, ret); ++ ent_put(&item->h, &idtoname_cache); ++ return ret; ++} ++ ++int ++nfsd_map_name_to_uid(struct svc_rqst *rqstp, const char *name, size_t namelen, ++ __u32 *id) ++{ ++ return idmap_name_to_id(rqstp, IDMAP_TYPE_USER, name, namelen, id); ++} ++ ++int ++nfsd_map_name_to_gid(struct svc_rqst *rqstp, const char *name, size_t namelen, ++ __u32 *id) ++{ ++ return idmap_name_to_id(rqstp, IDMAP_TYPE_GROUP, name, namelen, id); ++} ++ ++int ++nfsd_map_uid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) ++{ ++ return idmap_id_to_name(rqstp, IDMAP_TYPE_USER, id, name); ++} ++ ++int ++nfsd_map_gid_to_name(struct svc_rqst *rqstp, __u32 id, char *name) ++{ ++ return idmap_id_to_name(rqstp, IDMAP_TYPE_GROUP, id, name); ++} +diff -puN fs/nfsd/nfsctl.c~CITI_NFS4_ALL fs/nfsd/nfsctl.c +--- linux-2.6.3/fs/nfsd/nfsctl.c~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/nfsctl.c 2004-02-19 16:47:06.000000000 -0500 +@@ -24,6 +24,7 @@ + #include + + #include ++#include + #include + #include + #include +@@ -436,6 +437,9 @@ static int __init init_nfsd(void) + nfsd_cache_init(); /* RPC reply cache */ + nfsd_export_init(); /* Exports table */ + nfsd_lockd_init(); /* lockd->nfsd callbacks */ ++#ifdef CONFIG_NFSD_V4 ++ nfsd_idmap_init(); /* Name to ID mapping */ ++#endif /* CONFIG_NFSD_V4 */ + if (proc_mkdir("fs/nfs", 0)) { + struct proc_dir_entry *entry; + entry = create_proc_entry("fs/nfs/exports", 0, NULL); +@@ -462,6 +466,9 @@ static void __exit exit_nfsd(void) + remove_proc_entry("fs/nfs", NULL); + nfsd_stat_shutdown(); + nfsd_lockd_shutdown(); ++#ifdef CONFIG_NFSD_V4 ++ nfsd_idmap_shutdown(); ++#endif /* CONFIG_NFSD_V4 */ + unregister_filesystem(&nfsd_fs_type); + } + +diff -puN fs/nfsd/nfsproc.c~CITI_NFS4_ALL fs/nfsd/nfsproc.c +--- linux-2.6.3/fs/nfsd/nfsproc.c~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/nfsproc.c 2004-02-19 16:47:06.000000000 -0500 +@@ -585,6 +585,7 @@ nfserrno (int errno) + { nfserr_dquot, -EDQUOT }, + #endif + { nfserr_stale, -ESTALE }, ++ { nfserr_jukebox, -ETIMEDOUT }, + { nfserr_dropit, -EAGAIN }, + { nfserr_dropit, -ENOMEM }, + { -1, -EIO } +diff -puN /dev/null include/linux/nfsd_idmap.h +--- /dev/null 2004-01-26 19:20:21.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfsd_idmap.h 2004-02-19 16:47:06.000000000 -0500 +@@ -0,0 +1,54 @@ ++/* ++ * include/linux/nfsd_idmap.h ++ * ++ * Mapping of UID to name and vice versa. ++ * ++ * Copyright (c) 2002, 2003 The Regents of the University of ++ * Michigan. All rights reserved. ++> * ++ * Marius Aamodt Eriksen ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR ++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef LINUX_NFSD_IDMAP_H ++#define LINUX_NFSD_IDMAP_H ++ ++#include ++#include ++ ++/* XXX from linux/nfs_idmap.h */ ++#define IDMAP_NAMESZ 128 ++ ++void nfsd_idmap_init(void); ++void nfsd_idmap_shutdown(void); ++ ++int nfsd_map_name_to_uid(struct svc_rqst *, const char *, size_t, __u32 *); ++int nfsd_map_name_to_gid(struct svc_rqst *, const char *, size_t, __u32 *); ++int nfsd_map_uid_to_name(struct svc_rqst *, __u32, char *); ++int nfsd_map_gid_to_name(struct svc_rqst *, __u32, char *); ++ ++#endif /* LINUX_NFSD_IDMAP_H */ +diff -puN include/linux/nfsd/xdr4.h~CITI_NFS4_ALL include/linux/nfsd/xdr4.h +--- linux-2.6.3/include/linux/nfsd/xdr4.h~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfsd/xdr4.h 2004-02-19 16:47:10.000000000 -0500 +@@ -39,6 +39,8 @@ + #ifndef _LINUX_NFSD_XDR4_H + #define _LINUX_NFSD_XDR4_H + ++#include ++ + #define NFSD4_MAX_TAGLEN 128 + #define XDR_LEN(n) (((n) + 3) & ~3) + +@@ -54,10 +56,10 @@ typedef struct { + + struct nfsd4_change_info { + u32 atomic; +- u32 before_size; +- u32 before_ctime; +- u32 after_size; +- u32 after_ctime; ++ u32 before_ctime_sec; ++ u32 before_ctime_nsec; ++ u32 after_ctime_sec; ++ u32 after_ctime_nsec; + }; + + struct nfsd4_access { +@@ -95,6 +97,7 @@ struct nfsd4_create { + u32 cr_bmval[2]; /* request */ + struct iattr cr_iattr; /* request */ + struct nfsd4_change_info cr_cinfo; /* response */ ++ struct nfs4_acl *cr_acl; + }; + #define cr_linklen u.link.namelen + #define cr_linkname u.link.name +@@ -216,7 +219,7 @@ struct nfsd4_open { + u32 op_rflags; /* response */ + int op_truncate; /* used during processing */ + struct nfs4_stateowner *op_stateowner; /* used during processing */ +- ++ struct nfs4_acl *op_acl; + }; + #define op_iattr u.iattr + #define op_verf u.verf +@@ -263,6 +266,10 @@ struct nfsd4_readdir { + u32 * offset; + }; + ++struct nfsd4_release_lockowner { ++ clientid_t rl_clientid; ++ struct xdr_netobj rl_owner; ++}; + struct nfsd4_readlink { + struct svc_rqst *rl_rqstp; /* request */ + struct svc_fh * rl_fhp; /* request */ +@@ -287,6 +294,7 @@ struct nfsd4_setattr { + stateid_t sa_stateid; /* request */ + u32 sa_bmval[2]; /* request */ + struct iattr sa_iattr; /* request */ ++ struct nfs4_acl *sa_acl; + }; + + struct nfsd4_setclientid { +@@ -359,6 +367,7 @@ struct nfsd4_op { + struct nfsd4_setclientid_confirm setclientid_confirm; + struct nfsd4_verify verify; + struct nfsd4_write write; ++ struct nfsd4_release_lockowner release_lockowner; + } u; + struct nfs4_replay * replay; + }; +@@ -373,9 +382,12 @@ struct nfsd4_compoundargs { + u32 * tmpp; + struct tmpbuf { + struct tmpbuf *next; ++ void (*release)(const void *); + void *buf; + } *to_free; +- ++ ++ struct svc_rqst *rqstp; ++ + u32 taglen; + char * tag; + u32 minorversion; +@@ -404,10 +416,10 @@ set_change_info(struct nfsd4_change_info + { + BUG_ON(!fhp->fh_pre_saved || !fhp->fh_post_saved); + cinfo->atomic = 1; +- cinfo->before_size = fhp->fh_pre_size; +- cinfo->before_ctime = fhp->fh_pre_ctime.tv_sec; +- cinfo->after_size = fhp->fh_post_size; +- cinfo->after_ctime = fhp->fh_post_ctime.tv_sec; ++ cinfo->before_ctime_sec = fhp->fh_pre_ctime.tv_sec; ++ cinfo->before_ctime_nsec = fhp->fh_pre_ctime.tv_nsec; ++ cinfo->after_ctime_sec = fhp->fh_post_ctime.tv_sec; ++ cinfo->after_ctime_nsec = fhp->fh_post_ctime.tv_nsec; + } + + int nfs4svc_encode_voidres(struct svc_rqst *, u32 *, void *); +@@ -419,7 +431,7 @@ void nfsd4_encode_operation(struct nfsd4 + void nfsd4_encode_replay(struct nfsd4_compoundres *resp, struct nfsd4_op *op); + int nfsd4_encode_fattr(struct svc_fh *fhp, struct svc_export *exp, + struct dentry *dentry, u32 *buffer, int *countp, +- u32 *bmval); ++ u32 *bmval, struct svc_rqst *); + extern int nfsd4_setclientid(struct svc_rqst *rqstp, + struct nfsd4_setclientid *setclid); + extern int nfsd4_setclientid_confirm(struct svc_rqst *rqstp, +@@ -439,6 +451,9 @@ extern int nfsd4_lockt(struct svc_rqst * + struct nfsd4_lockt *lockt); + extern int nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, + struct nfsd4_locku *locku); ++extern int nfsd4_release_lockowner(struct svc_rqst *rqstp, ++ struct nfsd4_release_lockowner *rlockowner); ++extern void nfsd4_release_compoundargs(struct nfsd4_compoundargs *); + #endif + + /* +diff -puN -L include/linux/sunrpc/name_lookup.h include/linux/sunrpc/name_lookup.h~CITI_NFS4_ALL /dev/null +--- linux-2.6.3/include/linux/sunrpc/name_lookup.h ++++ /dev/null 2004-01-26 19:20:21.000000000 -0500 +@@ -1,38 +0,0 @@ +- +-/* +- * map between user/group name and id for a given 'client' +- */ +- +-struct name_ent { +- char name[20]; +-}; +-static inline int name_get_user(int uid, struct name_ent **namep) +-{ +- struct name_ent *n = kmalloc(sizeof(*n),GFP_KERNEL); +- if (n) sprintf(n->name, "%d",uid); +- *namep = n; +- return n ? 0 : -ENOMEM; +-} +-static inline int name_get_group(int uid, struct name_ent **namep) +-{ +- struct name_ent *n = kmalloc(sizeof(*n),GFP_KERNEL); +- if (n) sprintf(n->name, "%d",uid); +- *namep = n; +- return n ? 0 : -ENOMEM; +-} +-static inline int name_get_uid(char *name, int name_len, int *uidp) +-{ +- *uidp = simple_strtoul(name, NULL, 0); +- return 0; +-} +- +-static inline int name_get_gid(char *name, int name_len, int *gidp) +-{ +- *gidp = simple_strtoul(name, NULL, 0); +- return 0; +-} +- +-static inline void name_put(struct name_ent *ent) +-{ +- kfree(ent); +-} +diff -puN fs/Makefile~CITI_NFS4_ALL fs/Makefile +--- linux-2.6.3/fs/Makefile~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 ++++ linux-2.6.3-bfields/fs/Makefile 2004-02-19 16:47:06.000000000 -0500 +@@ -68,6 +68,7 @@ obj-$(CONFIG_NFS_FS) += nfs/ + obj-$(CONFIG_EXPORTFS) += exportfs/ + obj-$(CONFIG_NFSD) += nfsd/ + obj-$(CONFIG_LOCKD) += lockd/ ++obj-$(CONFIG_NFS_V4_ACL) += nfs4acl/ + obj-$(CONFIG_NLS) += nls/ + obj-$(CONFIG_SYSV_FS) += sysv/ + obj-$(CONFIG_SMB_FS) += smbfs/ +diff -puN /dev/null fs/nfs4acl/acl.c +--- /dev/null 2004-01-26 19:20:21.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs4acl/acl.c 2004-02-19 16:47:06.000000000 -0500 +@@ -0,0 +1,921 @@ ++/* ++ * fs/nfs4acl/acl.c ++ * ++ * Common NFSv4 ACL handling code. ++ * ++ * Copyright (c) 2002, 2003 The Regents of the University of Michigan. ++ * All rights reserved. ++ * ++ * Marius Aamodt Eriksen ++ * Jeff Sedlak ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR ++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#define NFS4_READ_MODE (NFS4_ACE_READ_DATA | NFS4_ACE_READ_NAMED_ATTRS) ++#define NFS4_WRITE_MODE (NFS4_ACE_WRITE_DATA | NFS4_ACE_WRITE_NAMED_ATTRS | NFS4_ACE_APPEND_DATA | NFS4_ACE_DELETE_CHILD) ++#define NFS4_EXECUTE_MODE NFS4_ACE_EXECUTE ++#define NFS4_ANYONE_MODE (NFS4_ACE_READ_ATTRIBUTES | NFS4_ACE_READ_ACL) ++#define NFS4_OWNER_MODE (NFS4_ACE_WRITE_ATTRIBUTES | NFS4_ACE_WRITE_ACL) ++ ++#define MASK_EQUAL(mask1, mask2) \ ++ ( ((mask1) & NFS4_ACE_MASK_ALL) == ((mask2) & NFS4_ACE_MASK_ALL) ) ++ ++static u32 ++mask_from_posix(unsigned short perm, int owner) ++{ ++ int mask = NFS4_ANYONE_MODE; ++ if (owner) ++ mask |= NFS4_OWNER_MODE; ++ if (perm & ACL_READ) ++ mask |= NFS4_READ_MODE; ++ if (perm & ACL_WRITE) ++ mask |= NFS4_WRITE_MODE; ++ if (perm & ACL_EXECUTE) ++ mask |= NFS4_EXECUTE_MODE; ++ return mask; ++} ++ ++static int ++mode_from_nfs4(u32 perm, unsigned short *mode, int owner) ++{ ++ /* XXX we might also want to ignore DELETE_CHILD on non-directories */ ++ /* XXX also add special interpretation to EXECUTE on directories */ ++ u32 ignore = NFS4_ACE_SYNCHRONIZE; ++ ++ *mode = 0; ++ if ((perm & NFS4_READ_MODE) == NFS4_READ_MODE) ++ *mode |= ACL_READ; ++ if ((perm & NFS4_WRITE_MODE) == NFS4_WRITE_MODE) ++ *mode |= ACL_WRITE; ++ if ((perm & NFS4_EXECUTE_MODE) == NFS4_EXECUTE_MODE) ++ *mode |= ACL_EXECUTE; ++ if (!MASK_EQUAL(ignore|perm, ignore|mask_from_posix(*mode, owner))) ++ return -EINVAL; ++ return 0; ++} ++ ++struct ace_container { ++ struct nfs4_ace *ace; ++ struct list_head ace_l; ++}; ++ ++static short ace2type(struct nfs4_ace *); ++static int _posix_to_nfsv4_one(struct nfs4_acl_idmapper *, void *idarg, struct posix_acl *, struct nfs4_acl *, int); ++static struct posix_acl *_nfsv4_to_posix_one(struct nfs4_acl_idmapper *, void *idarg, struct nfs4_acl *); ++ ++struct nfs4_acl * ++nfs4_acl_posix_to_nfsv4(struct nfs4_acl_idmapper *idmapper, void *idarg, ++ struct posix_acl *pacl, struct posix_acl *dpacl) ++{ ++ struct nfs4_acl *acl; ++ int error = -EINVAL; ++ ++ if ((pacl != NULL && ++ (posix_acl_valid(pacl) < 0 || pacl->a_count == 0)) || ++ (dpacl != NULL && ++ (posix_acl_valid(dpacl) < 0 || dpacl->a_count == 0))) ++ goto out_err; ++ ++ acl = nfs4_acl_new(); ++ if (acl == NULL) { ++ error = -ENOMEM; ++ goto out_err; ++ } ++ ++ if (pacl != NULL) { ++ error = _posix_to_nfsv4_one(idmapper, idarg, pacl, acl, 0); ++ if (error < 0) ++ goto out_acl; ++ } ++ ++ if (dpacl != NULL) { ++ error = _posix_to_nfsv4_one(idmapper, idarg, dpacl, acl, ++ NFS4_ACE_FILE_INHERIT_ACE | ++ NFS4_ACE_DIRECTORY_INHERIT_ACE | ++ NFS4_ACE_INHERIT_ONLY_ACE); ++ if (error < 0) ++ goto out_acl; ++ } ++ ++ return acl; ++ ++out_acl: ++ nfs4_acl_free(acl); ++out_err: ++ acl = ERR_PTR(error); ++ ++ return acl; ++} ++ ++static int ++nfs4_acl_add_pair(struct nfs4_acl *acl, int eflag, u32 mask, char *owner, ++ int owner_len) ++{ ++ int error; ++ ++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, ++ eflag, mask, owner, owner_len); ++ if (error < 0) ++ return error; ++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, ++ eflag, ~mask, owner, owner_len); ++ return error; ++} ++ ++/* We assume the acl has been verified with posix_acl_valid. */ ++static int ++_posix_to_nfsv4_one(struct nfs4_acl_idmapper *idmapper, void *idarg, ++ struct posix_acl *pacl, struct nfs4_acl *acl, int eflag) ++{ ++ struct posix_acl_entry *pa, *pe, *group_owner_entry; ++ int error = -EINVAL; ++ u32 mask, mask_mask; ++ char xname[IDMAP_NAMESZ]; ++ int xnamelen; ++ ++ BUG_ON(pacl->a_count < 3); ++ pe = pacl->a_entries + pacl->a_count; ++ pa = pe - 2; /* if mask entry exists, it's second from the last. */ ++ if (pa->e_tag == ACL_MASK) ++ mask_mask = ~mask_from_posix(pa->e_perm, 0); ++ else ++ mask_mask = 0; ++ ++ pa = pacl->a_entries; ++ BUG_ON(pa->e_tag != ACL_USER_OBJ); ++ mask = mask_from_posix(pa->e_perm, 1); ++ error = nfs4_acl_add_pair(acl, eflag, mask, "OWNER@", ++ sizeof("OWNER@") - 1); ++ if (error < 0) ++ goto out; ++ pa++; ++ ++ while (pa->e_tag == ACL_USER) { ++ mask = mask_from_posix(pa->e_perm, 0); ++ error = idmapper->uid2name(idarg, pa->e_id, xname); ++ if (error < 0) ++ goto out; ++ xnamelen = error; ++ ++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, ++ eflag, mask_mask, xname, xnamelen); ++ if (error < 0) ++ goto out; ++ ++ ++ error = nfs4_acl_add_pair(acl, eflag, mask, xname, xnamelen); ++ if (error < 0) ++ goto out; ++ pa++; ++ } ++ ++ /* In the case of groups, we apply allow ACEs first, then deny ACEs, ++ * since a user can be in more than one group. */ ++ ++ /* allow ACEs */ ++ ++ if (pacl->a_count > 3) { ++ BUG_ON(pa->e_tag != ACL_GROUP_OBJ); ++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, ++ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask_mask, ++ "GROUP@", sizeof("GROUP@") - 1); ++ if (error < 0) ++ goto out; ++ } ++ group_owner_entry = pa; ++ mask = mask_from_posix(pa->e_perm, 0); ++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, ++ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, ++ "GROUP@", sizeof("GROUP@") - 1); ++ if (error < 0) ++ goto out; ++ pa++; ++ ++ while (pa->e_tag == ACL_GROUP) { ++ mask = mask_from_posix(pa->e_perm, 0); ++ error = idmapper->gid2name(idarg, pa->e_id, xname); ++ if (error < 0) ++ goto out; ++ xnamelen = error; ++ ++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, ++ NFS4_ACE_IDENTIFIER_GROUP | eflag, ++ mask_mask, xname, xnamelen); ++ if (error < 0) ++ goto out; ++ ++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE, ++ NFS4_ACE_IDENTIFIER_GROUP | eflag, mask, xname, xnamelen); ++ if (error < 0) ++ goto out; ++ pa++; ++ } ++ ++ /* deny ACEs */ ++ ++ pa = group_owner_entry; ++ mask = mask_from_posix(pa->e_perm, 0); ++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, ++ NFS4_ACE_IDENTIFIER_GROUP | eflag, ++ ~mask, "GROUP@", sizeof("GROUP@") - 1); ++ if (error < 0) ++ goto out; ++ pa++; ++ while (pa->e_tag == ACL_GROUP) { ++ mask = mask_from_posix(pa->e_perm, 0); ++ error = idmapper->gid2name(idarg, pa->e_id, xname); ++ if (error < 0) ++ goto out; ++ xnamelen = error; ++ ++ error = nfs4_acl_add_ace(acl, NFS4_ACE_ACCESS_DENIED_ACE_TYPE, ++ NFS4_ACE_IDENTIFIER_GROUP | eflag, ~mask, xname, xnamelen); ++ if (error < 0) ++ goto out; ++ pa++; ++ } ++ ++ if (pa->e_tag == ACL_MASK) ++ pa++; ++ BUG_ON(pa->e_tag != ACL_OTHER); ++ mask = mask_from_posix(pa->e_perm, 0); ++ error = nfs4_acl_add_pair(acl, eflag, mask, "EVERYONE@", ++ sizeof("EVERYONE@") - 1); ++ ++out: ++ return error; ++} ++ ++static void ++sort_pacl_range(struct posix_acl *pacl, int start, int end) { ++ int sorted = 0, i; ++ struct posix_acl_entry tmp; ++ ++ /* We just do a bubble sort; easy to do in place, and we're not ++ * expecting acl's to be long enough to justify anything more. */ ++ while (!sorted) { ++ sorted = 1; ++ for (i = start; i < end; i++) { ++ if (pacl->a_entries[i].e_id ++ > pacl->a_entries[i+1].e_id) { ++ sorted = 0; ++ tmp = pacl->a_entries[i]; ++ pacl->a_entries[i] = pacl->a_entries[i+1]; ++ pacl->a_entries[i+1] = tmp; ++ } ++ } ++ } ++} ++ ++static void ++sort_pacl(struct posix_acl *pacl) ++{ ++ /* posix_acl_valid requires that users and groups be in order ++ * by uid/gid. */ ++ int i, j; ++ ++ if (pacl->a_count <= 4) ++ return; /* no users or groups */ ++ i = 1; ++ while (pacl->a_entries[i].e_tag == ACL_USER) ++ i++; ++ sort_pacl_range(pacl, 1, i-1); ++ ++ BUG_ON(pacl->a_entries[i].e_tag != ACL_GROUP_OBJ); ++ j = i++; ++ while (pacl->a_entries[j].e_tag == ACL_GROUP) ++ j++; ++ sort_pacl_range(pacl, i, j-1); ++ return; ++} ++ ++static int ++write_pace(struct nfs4_ace *ace, struct posix_acl *pacl, ++ struct posix_acl_entry **pace, short tag, ++ struct nfs4_acl_idmapper *idmapper, void *idarg) ++{ ++ struct posix_acl_entry *this = *pace;; ++ ++ if (*pace == pacl->a_entries + pacl->a_count) ++ return -EINVAL; /* fell off the end */ ++ (*pace)++; ++ this->e_tag = tag; ++ if (mode_from_nfs4(ace->access_mask, &this->e_perm, ++ tag == ACL_USER_OBJ)) ++ return -EINVAL; ++ switch (tag) { ++ case ACL_USER: ++ return idmapper->name2uid(idarg, ace->who, ace->wholen, ++ &this->e_id); ++ case ACL_GROUP: ++ return idmapper->name2gid(idarg, ace->who, ace->wholen, ++ &this->e_id); ++ default: ++ this->e_id = ACL_UNDEFINED_ID; ++ return 0; ++ } ++} ++ ++static struct nfs4_ace * ++get_next_v4_ace(struct list_head **p, struct list_head *head) ++{ ++ struct nfs4_ace *ace; ++ ++ *p = (*p)->next; ++ if (*p == head) ++ return NULL; ++ ace = list_entry(*p, struct nfs4_ace, l_ace); ++ ++ return ace; ++} ++ ++int ++nfs4_acl_nfsv4_to_posix(struct nfs4_acl_idmapper *idmapper, void *idarg, ++ struct nfs4_acl *acl, struct posix_acl **pacl, ++ struct posix_acl **dpacl) ++{ ++ struct nfs4_acl *dacl; ++ int error = -ENOMEM; ++ ++ *pacl = NULL; ++ *dpacl = NULL; ++ ++ dacl = nfs4_acl_new(); ++ if (dacl == NULL) ++ goto out; ++ ++ error = nfs4_acl_split(acl, dacl); ++ if (error < 0) ++ goto out_acl; ++ ++ if (pacl != NULL) { ++ if (acl->naces == 0) { ++ error = -ENODATA; ++ goto try_dpacl; ++ } ++ ++ *pacl = _nfsv4_to_posix_one(idmapper, idarg, acl); ++ if (IS_ERR(*pacl)) { ++ error = PTR_ERR(*pacl); ++ *pacl = NULL; ++ goto out_acl; ++ } ++ } ++ ++try_dpacl: ++ if (dpacl != NULL) { ++ if (dacl->naces == 0) { ++ if (pacl == NULL || *pacl == NULL) ++ error = -ENODATA; ++ goto out_acl; ++ } ++ ++ error = 0; ++ *dpacl = _nfsv4_to_posix_one(idmapper, idarg, dacl); ++ if (IS_ERR(*dpacl)) { ++ error = PTR_ERR(*dpacl); ++ *dpacl = NULL; ++ goto out_acl; ++ } ++ } ++ ++out_acl: ++ if (error && pacl) { ++ posix_acl_release(*pacl); ++ *pacl = NULL; ++ } ++ nfs4_acl_free(dacl); ++out: ++ return error; ++} ++ ++static int ++complementary_ace_pair(struct nfs4_ace *allow, struct nfs4_ace *deny) ++{ ++ return MASK_EQUAL(allow->access_mask, ~deny->access_mask) && ++ allow->type == NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE && ++ deny->type == NFS4_ACE_ACCESS_DENIED_ACE_TYPE && ++ allow->flag == deny->flag && ++ allow->wholen == deny->wholen && ++ memcmp(allow->who, deny->who, allow->wholen) == 0; ++} ++ ++static inline int ++user_obj_from_v4(struct nfs4_acl *n4acl, struct list_head **p, ++ struct posix_acl *pacl, struct posix_acl_entry **pace, ++ struct nfs4_acl_idmapper *idmapper, void *idarg) ++{ ++ int error = -EINVAL; ++ struct nfs4_ace *ace, *ace2; ++ ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ if (ace2type(ace) != ACL_USER_OBJ) ++ goto out; ++ error = write_pace(ace, pacl, pace, ACL_USER_OBJ, idmapper, idarg); ++ if (error < 0) ++ goto out; ++ error = -EINVAL; ++ ace2 = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace2 == NULL) ++ goto out; ++ if (!complementary_ace_pair(ace, ace2)) ++ goto out; ++ error = 0; ++out: ++ return error; ++} ++ ++static inline int ++users_from_v4(struct nfs4_acl *n4acl, struct list_head **p, ++ struct nfs4_ace **mask_ace, ++ struct posix_acl *pacl, struct posix_acl_entry **pace, ++ struct nfs4_acl_idmapper *idmapper, void *idarg) ++{ ++ int error = -EINVAL; ++ struct nfs4_ace *ace, *ace2; ++ ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ while (ace2type(ace) == ACL_USER) { ++ if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) ++ goto out; ++ if (*mask_ace && ++ !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) ++ goto out; ++ *mask_ace = ace; ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) ++ goto out; ++ error = write_pace(ace, pacl, pace, ACL_USER, idmapper, idarg); ++ if (error < 0) ++ goto out; ++ error = -EINVAL; ++ ace2 = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace2 == NULL) ++ goto out; ++ if (!complementary_ace_pair(ace, ace2)) ++ goto out; ++ if ((*mask_ace)->flag != ace2->flag || ++ ace2->wholen != (*mask_ace)->wholen || ++ memcmp(ace2->who, (*mask_ace)->who, ++ (*mask_ace)->wholen) != 0) ++ goto out; ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ } ++ error = 0; ++out: ++ return error; ++} ++ ++static inline int ++group_obj_and_groups_from_v4(struct nfs4_acl *n4acl, struct list_head **p, ++ struct nfs4_ace **mask_ace, ++ struct posix_acl *pacl, struct posix_acl_entry **pace, ++ struct nfs4_acl_idmapper *idmapper, void *idarg) ++{ ++ int error = -EINVAL; ++ struct nfs4_ace *ace, *ace2; ++ struct ace_container *ac; ++ struct list_head group_l; ++ ++ INIT_LIST_HEAD(&group_l); ++ ace = list_entry(*p, struct nfs4_ace, l_ace); ++ ++ /* group owner (mask and allow aces) */ ++ ++ if (pacl->a_count != 3) { ++ /* then the group owner should be preceded by mask */ ++ if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE) ++ goto out; ++ if (*mask_ace && ++ !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) ++ goto out; ++ *mask_ace = ace; ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ ++ if ((*mask_ace)->flag != ace->flag || ++ ace->wholen != (*mask_ace)->wholen || ++ memcmp(ace->who, (*mask_ace)->who, ++ (*mask_ace)->wholen) != 0) ++ goto out; ++ } ++ ++ if (ace2type(ace) != ACL_GROUP_OBJ) ++ goto out; ++ ++ ac = kmalloc(sizeof(*ac), GFP_KERNEL); ++ error = -ENOMEM; ++ if (ac == NULL) ++ goto out; ++ ac->ace = ace; ++ list_add_tail(&ac->ace_l, &group_l); ++ ++ error = -EINVAL; ++ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) ++ goto out; ++ ++ error = write_pace(ace, pacl, pace, ACL_GROUP_OBJ, idmapper, idarg); ++ if (error < 0) ++ goto out; ++ ++ error = -EINVAL; ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ ++ /* groups (mask and allow aces) */ ++ ++ while (ace2type(ace) == ACL_GROUP) { ++ if (*mask_ace == NULL) ++ goto out; ++ ++ if (ace->type != NFS4_ACE_ACCESS_DENIED_ACE_TYPE || ++ !MASK_EQUAL(ace->access_mask, (*mask_ace)->access_mask)) ++ goto out; ++ *mask_ace = ace; ++ ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ ac = kmalloc(sizeof(*ac), GFP_KERNEL); ++ error = -ENOMEM; ++ if (ac == NULL) ++ goto out; ++ error = -EINVAL; ++ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE || ++ ace->wholen != (*mask_ace)->wholen || ++ memcmp(ace->who, (*mask_ace)->who, (*mask_ace)->wholen) != 0) ++ goto out; ++ ++ ac->ace = ace; ++ list_add_tail(&ac->ace_l, &group_l); ++ ++ error = write_pace(ace, pacl, pace, ACL_GROUP, idmapper, idarg); ++ if (error < 0) ++ goto out; ++ error = -EINVAL; ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ } ++ ++ /* group owner (deny ace) */ ++ ++ if (ace2type(ace) != ACL_GROUP_OBJ) ++ goto out; ++ ac = list_entry(group_l.next, struct ace_container, ace_l); ++ ace2 = ac->ace; ++ if (!complementary_ace_pair(ace2, ace)) ++ goto out; ++ list_del(group_l.next); ++ kfree(ac); ++ ++ /* groups (deny aces) */ ++ ++ while (!list_empty(&group_l)) { ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ if (ace2type(ace) != ACL_GROUP) ++ goto out; ++ ac = list_entry(group_l.next, struct ace_container, ace_l); ++ ace2 = ac->ace; ++ if (!complementary_ace_pair(ace2, ace)) ++ goto out; ++ list_del(group_l.next); ++ kfree(ac); ++ } ++ ++ ace = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace == NULL) ++ goto out; ++ if (ace2type(ace) != ACL_OTHER) ++ goto out; ++ error = 0; ++out: ++ while (!list_empty(&group_l)) { ++ ac = list_entry(group_l.next, struct ace_container, ace_l); ++ list_del(group_l.next); ++ kfree(ac); ++ } ++ return error; ++} ++ ++static inline int ++mask_from_v4(struct nfs4_acl *n4acl, struct list_head **p, ++ struct nfs4_ace **mask_ace, ++ struct posix_acl *pacl, struct posix_acl_entry **pace, ++ struct nfs4_acl_idmapper *idmapper, void *idarg) ++{ ++ int error = -EINVAL; ++ struct nfs4_ace *ace; ++ ++ ace = list_entry(*p, struct nfs4_ace, l_ace); ++ if (pacl->a_count != 3) { ++ if (*mask_ace == NULL) ++ goto out; ++ (*mask_ace)->access_mask = ~(*mask_ace)->access_mask; ++ write_pace(*mask_ace, pacl, pace, ACL_MASK, idmapper, idarg); ++ } ++ error = 0; ++out: ++ return error; ++} ++ ++static inline int ++other_from_v4(struct nfs4_acl *n4acl, struct list_head **p, ++ struct posix_acl *pacl, struct posix_acl_entry **pace, ++ struct nfs4_acl_idmapper *idmapper, void *idarg) ++{ ++ int error = -EINVAL; ++ struct nfs4_ace *ace, *ace2; ++ ++ ace = list_entry(*p, struct nfs4_ace, l_ace); ++ if (ace->type != NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE) ++ goto out; ++ error = write_pace(ace, pacl, pace, ACL_OTHER, idmapper, idarg); ++ if (error < 0) ++ goto out; ++ error = -EINVAL; ++ ace2 = get_next_v4_ace(p, &n4acl->ace_head); ++ if (ace2 == NULL) ++ goto out; ++ if (!complementary_ace_pair(ace, ace2)) ++ goto out; ++ error = 0; ++out: ++ return error; ++} ++ ++static int ++calculate_posix_ace_count(struct nfs4_acl *n4acl) ++{ ++ if (n4acl->naces == 6) /* owner, owner group, and other only */ ++ return 3; ++ else { /* Otherwise there must be a mask entry. */ ++ /* Also, the remaining entries are for named users and ++ * groups, and come in threes (mask, allow, deny): */ ++ if ( (n4acl->naces - 7) % 3) ++ return -1; ++ return 4 + (n4acl->naces - 7)/3; ++ } ++} ++ ++ ++static struct posix_acl * ++_nfsv4_to_posix_one(struct nfs4_acl_idmapper *idmapper, void *idarg, struct nfs4_acl *n4acl) ++{ ++ struct posix_acl *pacl; ++ int error = -EINVAL, nace = 0; ++ struct list_head *p; ++ struct nfs4_ace *mask_ace = NULL; ++ struct posix_acl_entry *pace; ++ ++ nace = calculate_posix_ace_count(n4acl); ++ ++ pacl = posix_acl_alloc(nace, GFP_KERNEL); ++ error = -ENOMEM; ++ if (pacl == NULL) ++ goto out_err; ++ ++ pace = &pacl->a_entries[0]; ++ p = &n4acl->ace_head; ++ ++ error = user_obj_from_v4(n4acl, &p, pacl, &pace, idmapper, idarg); ++ if (error) ++ goto out_acl; ++ ++ error = users_from_v4(n4acl, &p, &mask_ace, pacl, &pace, idmapper, ++ idarg); ++ if (error) ++ goto out_acl; ++ ++ error = group_obj_and_groups_from_v4(n4acl, &p, &mask_ace, pacl, &pace, ++ idmapper, idarg); ++ if (error) ++ goto out_acl; ++ ++ error = mask_from_v4(n4acl, &p, &mask_ace, pacl, &pace, idmapper, idarg); ++ if (error) ++ goto out_acl; ++ error = other_from_v4(n4acl, &p, pacl, &pace, idmapper, idarg); ++ if (error) ++ goto out_acl; ++ ++ error = -EINVAL; ++ if (p->next != &n4acl->ace_head) ++ goto out_acl; ++ if (pace != pacl->a_entries + pacl->a_count) ++ goto out_acl; ++ ++ sort_pacl(pacl); ++ ++ return pacl; ++out_acl: ++ posix_acl_release(pacl); ++out_err: ++ pacl = ERR_PTR(error); ++ return pacl; ++} ++ ++ ++struct nfs4_acl * ++nfs4_acl_new(void) ++{ ++ struct nfs4_acl *acl; ++ ++ if ((acl = kmalloc(sizeof(*acl), GFP_KERNEL)) == NULL) ++ return NULL; ++ ++ acl->naces = 0; ++ INIT_LIST_HEAD(&acl->ace_head); ++ ++ return acl; ++} ++ ++void ++nfs4_acl_free(struct nfs4_acl *acl) ++{ ++ struct list_head *h; ++ struct nfs4_ace *ace; ++ ++ if (!acl) ++ return; ++ ++ while (!list_empty(&acl->ace_head)) { ++ h = acl->ace_head.next; ++ list_del(h); ++ ace = list_entry(h, struct nfs4_ace, l_ace); ++ if (ace->who != NULL) ++ kfree(ace->who); ++ kfree(ace); ++ } ++ ++ kfree(acl); ++ ++ return; ++} ++ ++int ++nfs4_acl_add_ace(struct nfs4_acl *acl, u32 type, u32 flag, u32 access_mask, ++ char *who, u32 wholen) ++{ ++ struct nfs4_ace *ace; ++ ++ if ((ace = kmalloc(sizeof(*ace), GFP_KERNEL)) == NULL) ++ return -1; ++ ++ ace->type = type; ++ ace->flag = flag; ++ ace->access_mask = access_mask; ++ if (wholen > 0) { ++ if ((ace->who = kmalloc(wholen, GFP_KERNEL)) == NULL) ++ goto fail; ++ memcpy(ace->who, who, wholen); ++ } ++ ace->wholen = wholen; ++ ++ list_add_tail(&ace->l_ace, &acl->ace_head); ++ ++ return ++acl->naces; /* XXXJBF: why? */ ++ ++fail: ++ kfree(ace); ++ return -1; ++} ++ ++ ++int ++nfs4_acl_merge(struct nfs4_acl *fromacl, struct nfs4_acl *withacl) ++{ ++ struct nfs4_ace *ace; ++ struct list_head *h; ++ ++ if (fromacl == NULL || withacl == NULL) ++ return 0; ++ ++ while (!list_empty(&fromacl->ace_head)) { ++ h = fromacl->ace_head.next; ++ list_del(h); ++ ace = list_entry(h, struct nfs4_ace, l_ace); ++ /* XXX */ ++ ace->flag |= NFS4_ACE_FILE_INHERIT_ACE | ++ NFS4_ACE_DIRECTORY_INHERIT_ACE | NFS4_ACE_INHERIT_ONLY_ACE; ++ list_add_tail(&ace->l_ace, &withacl->ace_head); ++ withacl->naces++; ++ } ++ ++ return 0; ++} ++ ++int ++nfs4_acl_split(struct nfs4_acl *acl, struct nfs4_acl *dacl) ++{ ++ struct list_head *h, *n; ++ struct nfs4_ace *ace; ++ int error = 0; ++ ++ list_for_each_safe(h, n, &acl->ace_head) { ++ ace = list_entry(h, struct nfs4_ace, l_ace); ++ ++ if (!(ace->flag & NFS4_ACE_DIRECTORY_INHERIT_ACE && ++ ace->flag & NFS4_ACE_FILE_INHERIT_ACE && ++ ace->flag & NFS4_ACE_INHERIT_ONLY_ACE)) ++ continue; ++ ++ error = nfs4_acl_add_ace(dacl, ace->type, ace->flag, ++ ace->access_mask, ace->who, ace->wholen) == -1; ++ if (error < 0) ++ goto out; ++ ++ list_del(h); ++ if (ace->who != NULL) ++ kfree(ace->who); ++ kfree(ace); ++ acl->naces--; ++ } ++ ++out: ++ return error; ++} ++ ++static struct { ++ char *string; ++ int stringlen; ++ short type; ++} s2t_map[] = { ++ { ++ .string = "OWNER@", ++ .stringlen = sizeof("OWNER@") - 1, ++ .type = ACL_USER_OBJ ++ }, ++ { ++ .string = "GROUP@", ++ .stringlen = sizeof("GROUP@") - 1, ++ .type = ACL_GROUP_OBJ ++ }, ++ { ++ .string = "EVERYONE@", ++ .stringlen = sizeof("EVERYONE@") - 1, ++ .type = ACL_OTHER ++ }, ++}; ++ ++static short ++ace2type(struct nfs4_ace *ace) ++{ ++ int i; ++ ++ if (ace->who == NULL || ace->wholen <= 0) ++ return (0); ++ ++ for (i = 0; i < sizeof(s2t_map) / sizeof(*s2t_map); i++) ++ if (s2t_map[i].stringlen == ace->wholen && ++ strncmp(s2t_map[i].string, ace->who, ace->wholen) == 0) ++ return (s2t_map[i].type); ++ ++ return (ace->flag & NFS4_ACE_IDENTIFIER_GROUP ? ACL_GROUP : ACL_USER); ++} +diff -puN /dev/null fs/nfs4acl/acl_syms.c +--- /dev/null 2004-01-26 19:20:21.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs4acl/acl_syms.c 2004-02-19 16:47:06.000000000 -0500 +@@ -0,0 +1,51 @@ ++/* ++ * fs/nfs4acl/acl_syms.c ++ * ++ * Common NFSv4 ACL handling symbol exports. ++ * ++ * Copyright (c) 2002 The Regents of the University of Michigan. ++ * All rights reserved. ++ * ++ * Marius Aamodt Eriksen ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR ++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++ ++#include ++#include ++ ++#include ++#include ++#include ++#include ++#include ++ ++EXPORT_SYMBOL(nfs4_acl_new); ++EXPORT_SYMBOL(nfs4_acl_free); ++EXPORT_SYMBOL(nfs4_acl_merge); ++EXPORT_SYMBOL(nfs4_acl_split); ++EXPORT_SYMBOL(nfs4_acl_add_ace); +diff -puN /dev/null fs/nfs4acl/Makefile +--- /dev/null 2004-01-26 19:20:21.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs4acl/Makefile 2004-02-19 16:47:06.000000000 -0500 +@@ -0,0 +1,3 @@ ++obj-$(CONFIG_NFS_V4_ACL) += nfs4acl.o ++ ++nfs4acl-objs := acl.o acl_syms.o +diff -puN /dev/null include/linux/nfs4_acl.h +--- /dev/null 2004-01-26 19:20:21.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfs4_acl.h 2004-02-19 16:47:06.000000000 -0500 +@@ -0,0 +1,68 @@ ++/* ++ * include/linux/nfs4_acl.c ++ * ++ * Common NFSv4 ACL handling definitions. ++ * ++ * Copyright (c) 2002 The Regents of the University of Michigan. ++ * All rights reserved. ++ * ++ * Marius Aamodt Eriksen ++ * ++ * Redistribution and use in source and binary forms, with or without ++ * modification, are permitted provided that the following conditions ++ * are met: ++ * ++ * 1. Redistributions of source code must retain the above copyright ++ * notice, this list of conditions and the following disclaimer. ++ * 2. Redistributions in binary form must reproduce the above copyright ++ * notice, this list of conditions and the following disclaimer in the ++ * documentation and/or other materials provided with the distribution. ++ * 3. Neither the name of the University nor the names of its ++ * contributors may be used to endorse or promote products derived ++ * from this software without specific prior written permission. ++ * ++ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED ++ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF ++ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE ++ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE ++ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR ++ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF ++ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR ++ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ++ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ++ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ++ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ++ */ ++ ++#ifndef LINUX_NFS4_ACL_H ++#define LINUX_NFS4_ACL_H ++ ++#include ++ ++#define NFS4_ACL_TYPE_ACCESS 0 ++#define NFS4_ACL_TYPE_DEFAULT 1 ++ ++/* XXX from include/linux/nfs_idmap.h: */ ++#define IDMAP_NAMESZ 128 ++ ++struct nfs4_acl_idmapper { ++ int (*name2uid)(void *, const char *, size_t len, __u32 *); ++ int (*name2gid)(void *, const char *, size_t len, __u32 *); ++ int (*uid2name)(void *, __u32, char *); ++ int (*gid2name)(void *, __u32, char *); ++}; ++ ++struct nfs4_acl *nfs4_acl_new(void); ++void nfs4_acl_free(struct nfs4_acl *); ++int nfs4_acl_merge(struct nfs4_acl *, struct nfs4_acl *); ++int nfs4_acl_split(struct nfs4_acl *, struct nfs4_acl *); ++int nfs4_acl_add_ace(struct nfs4_acl *, u32, u32, ++ u32, char *, u32); ++void nfs4_acl_print(struct nfs4_acl *); ++struct nfs4_acl *nfs4_acl_posix_to_nfsv4(struct nfs4_acl_idmapper *, void *, ++ struct posix_acl *, struct posix_acl *); ++int nfs4_acl_nfsv4_to_posix(struct nfs4_acl_idmapper *, void *, ++ struct nfs4_acl *, struct posix_acl **, ++ struct posix_acl **); ++ ++#endif /* LINUX_NFS4_ACL_H */ +diff -puN include/linux/nfs4.h~CITI_NFS4_ALL include/linux/nfs4.h +--- linux-2.6.3/include/linux/nfs4.h~CITI_NFS4_ALL 2004-02-19 16:47:06.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfs4.h 2004-02-19 16:47:14.000000000 -0500 +@@ -37,14 +37,67 @@ + #define NFS4_SHARE_ACCESS_BOTH 0x0003 + #define NFS4_SHARE_DENY_READ 0x0001 + #define NFS4_SHARE_DENY_WRITE 0x0002 ++#define NFS4_SHARE_DENY_BOTH 0x0003 + + #define NFS4_SET_TO_SERVER_TIME 0 + #define NFS4_SET_TO_CLIENT_TIME 1 + +-#define NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE 0 +-#define NFS4_ACE_ACCESS_DENIED_ACE_TYPE 1 +-#define NFS4_ACE_SYSTEM_AUDIT_ACE_TYPE 2 +-#define NFS4_ACE_SYSTEM_ALARM_ACE_TYPE 3 ++#define ACL4_SUPPORT_ALLOW_ACL 0x00000001 ++#define ACL4_SUPPORT_DENY_ACL 0x00000002 ++#define ACL4_SUPPORT_AUDIT_ACL 0x00000004 ++#define ACL4_SUPPORT_ALARM_ACL 0x00000008 ++ ++#define NFS4_ACE_ACCESS_ALLOWED_ACE_TYPE 0x00000000 ++#define NFS4_ACE_ACCESS_DENIED_ACE_TYPE 0x00000001 ++#define NFS4_ACE_SYSTEM_AUDIT_ACE_TYPE 0x00000002 ++#define NFS4_ACE_SYSTEM_ALARM_ACE_TYPE 0x00000003 ++ ++#define NFS4_ACE_FILE_INHERIT_ACE 0x00000001 ++#define NFS4_ACE_DIRECTORY_INHERIT_ACE 0x00000002 ++#define NFS4_ACE_NO_PROPAGATE_INHERIT_ACE 0x00000004 ++#define NFS4_ACE_INHERIT_ONLY_ACE 0x00000008 ++#define NFS4_ACE_SUCCESSFUL_ACCESS_ACE_FLAG 0x00000010 ++#define NFS4_ACE_FAILED_ACCESS_ACE_FLAG 0x00000020 ++#define NFS4_ACE_IDENTIFIER_GROUP 0x00000040 ++#define NFS4_ACE_OWNER 0x00000080 ++#define NFS4_ACE_GROUP 0x00000100 ++#define NFS4_ACE_EVERYONE 0x00000200 ++ ++#define NFS4_ACE_READ_DATA 0x00000001 ++#define NFS4_ACE_LIST_DIRECTORY 0x00000001 ++#define NFS4_ACE_WRITE_DATA 0x00000002 ++#define NFS4_ACE_ADD_FILE 0x00000002 ++#define NFS4_ACE_APPEND_DATA 0x00000004 ++#define NFS4_ACE_ADD_SUBDIRECTORY 0x00000004 ++#define NFS4_ACE_READ_NAMED_ATTRS 0x00000008 ++#define NFS4_ACE_WRITE_NAMED_ATTRS 0x00000010 ++#define NFS4_ACE_EXECUTE 0x00000020 ++#define NFS4_ACE_DELETE_CHILD 0x00000040 ++#define NFS4_ACE_READ_ATTRIBUTES 0x00000080 ++#define NFS4_ACE_WRITE_ATTRIBUTES 0x00000100 ++#define NFS4_ACE_DELETE 0x00010000 ++#define NFS4_ACE_READ_ACL 0x00020000 ++#define NFS4_ACE_WRITE_ACL 0x00040000 ++#define NFS4_ACE_WRITE_OWNER 0x00080000 ++#define NFS4_ACE_SYNCHRONIZE 0x00100000 ++#define NFS4_ACE_GENERIC_READ 0x00120081 ++#define NFS4_ACE_GENERIC_WRITE 0x00160106 ++#define NFS4_ACE_GENERIC_EXECUTE 0x001200A0 ++#define NFS4_ACE_MASK_ALL 0x001F01FF ++ ++struct nfs4_ace { ++ u32 type; ++ u32 flag; ++ u32 access_mask; ++ char *who; ++ u32 wholen; ++ struct list_head l_ace; ++}; ++ ++struct nfs4_acl { ++ u32 naces; ++ struct list_head ace_head; ++}; + + typedef struct { char data[NFS4_VERIFIER_SIZE]; } nfs4_verifier; + typedef struct { char data[16]; } nfs4_stateid; +@@ -86,6 +139,8 @@ enum nfs_opnum4 { + OP_SETCLIENTID_CONFIRM = 36, + OP_VERIFY = 37, + OP_WRITE = 38, ++ OP_RELEASE_LOCKOWNER = 39, ++ OP_ILLEGAL = 10044, + }; + + enum nfsstat4 { +@@ -283,7 +338,6 @@ enum lock_type4 { + + enum { + NFSPROC4_CLNT_NULL = 0, /* Unused */ +- NFSPROC4_CLNT_COMPOUND, /* Soon to be unused */ + NFSPROC4_CLNT_READ, + NFSPROC4_CLNT_WRITE, + NFSPROC4_CLNT_COMMIT, +@@ -300,6 +354,22 @@ enum { + NFSPROC4_CLNT_LOCK, + NFSPROC4_CLNT_LOCKT, + NFSPROC4_CLNT_LOCKU, ++ NFSPROC4_CLNT_GETACL, ++ NFSPROC4_CLNT_SETACL, ++ NFSPROC4_CLNT_ACCESS, ++ NFSPROC4_CLNT_GETATTR, ++ NFSPROC4_CLNT_LOOKUP, ++ NFSPROC4_CLNT_GETROOT_HEAD, ++ NFSPROC4_CLNT_GETROOT_PATH, ++ NFSPROC4_CLNT_REMOVE, ++ NFSPROC4_CLNT_RENAME, ++ NFSPROC4_CLNT_LINK, ++ NFSPROC4_CLNT_CREATE, ++ NFSPROC4_CLNT_PATHCONF, ++ NFSPROC4_CLNT_STATFS, ++ NFSPROC4_CLNT_UNLINK, ++ NFSPROC4_CLNT_READLINK, ++ NFSPROC4_CLNT_READDIR, + }; + + #endif +diff -puN fs/nfs/nfs4xdr.c~CITI_NFS4_ALL fs/nfs/nfs4xdr.c +--- linux-2.6.3/fs/nfs/nfs4xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfs/nfs4xdr.c 2004-02-19 16:47:15.000000000 -0500 +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + + #define NFSDBG_FACILITY NFSDBG_XDR + +@@ -81,11 +82,15 @@ static int nfs_stat_to_errno(int); + #define decode_putrootfh_maxsz op_decode_hdr_maxsz + #define encode_getfh_maxsz op_encode_hdr_maxsz + #define decode_getfh_maxsz op_decode_hdr_maxsz + 1 + \ +- (NFS4_FHSIZE >> 2) ++ ((3+NFS4_FHSIZE) >> 2) + #define encode_getattr_maxsz op_encode_hdr_maxsz + 3 +-#define nfs4_fattr_bitmap_maxsz 26 + 2 * ((NFS4_MAXNAMLEN +1) >> 2) ++#define nfs4_name_maxsz ( 1 + ((3+NFS4_MAXNAMLEN) >> 2) ) ++#define nfs4_fattr_bitmap_maxsz 36 + 2 * nfs4_name_maxsz + #define decode_getattr_maxsz op_decode_hdr_maxsz + 3 + \ + nfs4_fattr_bitmap_maxsz ++#define encode_setattr_maxsz op_decode_hdr_maxsz + 4 + \ ++ nfs4_fattr_bitmap_maxsz ++#define decode_setattr_maxsz op_decode_hdr_maxsz + 3 + #define encode_savefh_maxsz op_encode_hdr_maxsz + #define decode_savefh_maxsz op_decode_hdr_maxsz + #define encode_restorefh_maxsz op_encode_hdr_maxsz +@@ -115,6 +120,18 @@ static int nfs_stat_to_errno(int); + 3 + (NFS4_VERIFIER_SIZE >> 2) + #define decode_setclientid_confirm_maxsz \ + op_decode_hdr_maxsz ++#define encode_lookup_maxsz op_encode_hdr_maxsz + \ ++ 1 + ((3 + NFS_MAXFHSIZE) >> 2) ++#define encode_remove_maxsz op_encode_hdr_maxsz + \ ++ nfs4_name_maxsz ++#define encode_rename_maxsz op_encode_hdr_maxsz + \ ++ 2 * nfs4_name_maxsz ++#define encode_link_maxsz op_encode_hdr_maxsz + \ ++ nfs4_name_maxsz ++#define encode_create_maxsz op_encode_hdr_maxsz + \ ++ 2 + 2 * nfs4_name_maxsz + \ ++ nfs4_fattr_bitmap_maxsz ++#define decode_create_maxsz op_decode_hdr_maxsz + 8 + + #define NFS4_enc_compound_sz 1024 /* XXX: large enough? */ + #define NFS4_dec_compound_sz 1024 /* XXX: large enough? */ +@@ -126,6 +143,18 @@ static int nfs_stat_to_errno(int); + decode_putfh_maxsz + \ + decode_read_getattr_maxsz + \ + op_decode_hdr_maxsz + 2 ++#define NFS4_enc_readlink_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ op_encode_hdr_maxsz ++#define NFS4_dec_readlink_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz ++#define NFS4_enc_readdir_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ op_encode_hdr_maxsz + 9 ++#define NFS4_dec_readdir_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz + 2 + #define NFS4_enc_write_sz compound_encode_hdr_maxsz + \ + encode_putfh_maxsz + \ + encode_pre_write_getattr_maxsz + \ +@@ -255,8 +284,136 @@ static int nfs_stat_to_errno(int); + decode_putfh_maxsz + \ + decode_getattr_maxsz + \ + op_decode_hdr_maxsz + 4 +- +- ++#define NFS4_enc_getacl_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_getattr_maxsz ++#define username_maxsz 1 + ((IDMAP_NAMESZ + 3) >> 2) ++#define ace_maxsz 3 + username_maxsz ++#define acl_maxentries (NFS_ACL_MAX_ENTRIES - 3) * 3 + 6 ++#define acl_maxsz 1 + (acl_maxentries) * (ace_maxsz) ++#define NFS4_dec_getacl_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz + 3 + 1 + acl_maxsz ++#define NFS4_enc_setacl_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ op_encode_hdr_maxsz + 4 + 1 + acl_maxsz ++#define NFS4_dec_setacl_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ decode_setattr_maxsz ++#define NFS4_enc_access_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_getattr_maxsz + \ ++ op_encode_hdr_maxsz + 1 ++#define NFS4_dec_access_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ decode_getattr_maxsz + \ ++ op_decode_hdr_maxsz + 2 ++#define NFS4_enc_getattr_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_getattr_maxsz ++#define NFS4_dec_getattr_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ decode_getattr_maxsz ++#define NFS4_enc_lookup_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_getattr_maxsz + \ ++ encode_lookup_maxsz + \ ++ encode_getattr_maxsz + \ ++ encode_getfh_maxsz ++#define NFS4_dec_lookup_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ decode_getattr_maxsz + \ ++ op_decode_hdr_maxsz + \ ++ decode_getattr_maxsz + \ ++ decode_getfh_maxsz ++#define NFS4_enc_getroot_head_sz compound_encode_hdr_maxsz + \ ++ op_encode_hdr_maxsz + 1 + \ ++ encode_getattr_maxsz + \ ++ encode_getfh_maxsz ++#define NFS4_dec_getroot_head_sz compound_decode_hdr_maxsz + \ ++ op_decode_hdr_maxsz + \ ++ decode_getattr_maxsz + \ ++ decode_getfh_maxsz ++#define NFS4_enc_getroot_path_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_lookup_maxsz + \ ++ encode_getattr_maxsz + \ ++ encode_getfh_maxsz ++#define NFS4_dec_getroot_path_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz + \ ++ decode_getattr_maxsz + \ ++ decode_getfh_maxsz ++#define NFS4_enc_remove_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_remove_maxsz + \ ++ encode_getattr_maxsz ++#define NFS4_dec_remove_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz + 5 + \ ++ decode_getattr_maxsz ++#define NFS4_enc_unlink_sz NFS4_enc_remove_sz ++#define NFS4_dec_unlink_sz NFS4_dec_remove_sz ++#define NFS4_enc_rename_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_savefh_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_rename_maxsz + \ ++ encode_getattr_maxsz + \ ++ encode_restorefh_maxsz + \ ++ encode_getattr_maxsz ++#define NFS4_dec_rename_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ decode_savefh_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz + 5 + 5 + \ ++ decode_getattr_maxsz + \ ++ decode_restorefh_maxsz + \ ++ decode_getattr_maxsz ++#define NFS4_enc_link_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_savefh_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_link_maxsz + \ ++ encode_getattr_maxsz + \ ++ encode_restorefh_maxsz + \ ++ encode_getattr_maxsz ++#define NFS4_dec_link_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ decode_savefh_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz + 5 + \ ++ decode_getattr_maxsz + \ ++ decode_restorefh_maxsz + \ ++ decode_getattr_maxsz ++#define NFS4_enc_create_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_savefh_maxsz + \ ++ encode_create_maxsz + \ ++ encode_getattr_maxsz + \ ++ encode_getfh_maxsz + \ ++ encode_restorefh_maxsz + \ ++ encode_getattr_maxsz ++#define NFS4_dec_create_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz + \ ++ decode_create_maxsz + \ ++ decode_getattr_maxsz + \ ++ decode_getfh_maxsz + \ ++ op_decode_hdr_maxsz + \ ++ decode_getattr_maxsz ++#define NFS4_enc_pathconf_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_getattr_maxsz ++#define NFS4_dec_pathconf_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz + 6 ++#define NFS4_enc_statfs_sz compound_encode_hdr_maxsz + \ ++ encode_putfh_maxsz + \ ++ encode_getattr_maxsz ++#define NFS4_dec_statfs_sz compound_decode_hdr_maxsz + \ ++ decode_putfh_maxsz + \ ++ op_decode_hdr_maxsz + 12 + + static struct { + unsigned int mode; +@@ -333,8 +490,7 @@ encode_compound_hdr(struct xdr_stream *x + } + + static int +-encode_attrs(struct xdr_stream *xdr, struct iattr *iap, +- struct nfs_server *server) ++encode_attrs(struct xdr_stream *xdr, struct iattr *iap, struct nfs_server *server) + { + char owner_name[IDMAP_NAMESZ]; + char owner_group[IDMAP_NAMESZ]; +@@ -352,7 +508,7 @@ encode_attrs(struct xdr_stream *xdr, str + * In the worst-case, this would be + * 12(bitmap) + 4(attrlen) + 8(size) + 4(mode) + 4(atime) + 4(mtime) + * = 36 bytes, plus any contribution from variable-length fields +- * such as owner/group/acl's. ++ * such as owner/group. + */ + len = 16; + +@@ -392,6 +548,7 @@ encode_attrs(struct xdr_stream *xdr, str + len += 16; + else if (iap->ia_valid & ATTR_MTIME) + len += 4; ++ + RESERVE_SPACE(len); + + /* +@@ -462,13 +619,13 @@ encode_attrs(struct xdr_stream *xdr, str + } + + static int +-encode_access(struct xdr_stream *xdr, struct nfs4_access *access) ++encode_access(struct xdr_stream *xdr, u32 access) + { + uint32_t *p; + + RESERVE_SPACE(8); + WRITE32(OP_ACCESS); +- WRITE32(access->ac_req_access); ++ WRITE32(access); + + return 0; + } +@@ -500,37 +657,36 @@ encode_commit(struct xdr_stream *xdr, st + } + + static int +-encode_create(struct xdr_stream *xdr, struct nfs4_create *create, +- struct nfs_server *server) ++encode_create(struct xdr_stream *xdr, struct nfs4_create_arg *create) + { + uint32_t *p; + + RESERVE_SPACE(8); + WRITE32(OP_CREATE); +- WRITE32(create->cr_ftype); ++ WRITE32(create->ftype); + +- switch (create->cr_ftype) { ++ switch (create->ftype) { + case NF4LNK: +- RESERVE_SPACE(4 + create->cr_textlen); +- WRITE32(create->cr_textlen); +- WRITEMEM(create->cr_text, create->cr_textlen); ++ RESERVE_SPACE(4 + create->u.symlink->len); ++ WRITE32(create->u.symlink->len); ++ WRITEMEM(create->u.symlink->name, create->u.symlink->len); + break; + + case NF4BLK: case NF4CHR: + RESERVE_SPACE(8); +- WRITE32(create->cr_specdata1); +- WRITE32(create->cr_specdata2); ++ WRITE32(create->u.device.specdata1); ++ WRITE32(create->u.device.specdata2); + break; + + default: + break; + } + +- RESERVE_SPACE(4 + create->cr_namelen); +- WRITE32(create->cr_namelen); +- WRITEMEM(create->cr_name, create->cr_namelen); ++ RESERVE_SPACE(4 + create->name->len); ++ WRITE32(create->name->len); ++ WRITEMEM(create->name->name, create->name->len); + +- return encode_attrs(xdr, create->cr_attrs, server); ++ return encode_attrs(xdr, create->attrs, create->server); + } + + static int +@@ -558,11 +714,14 @@ encode_getattr_two(struct xdr_stream *xd + return 0; + } + ++extern u32 nfs4_fattr_bitmap[]; ++extern u32 nfs4_statfs_bitmap[]; ++ + static inline int +-encode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr) ++encode_getfattr(struct xdr_stream *xdr) + { +- return encode_getattr_two(xdr, getattr->gt_bmval[0], +- getattr->gt_bmval[1]); ++ return encode_getattr_two(xdr, nfs4_fattr_bitmap[0], ++ nfs4_fattr_bitmap[1]); + } + + /* +@@ -618,14 +777,14 @@ encode_getfh(struct xdr_stream *xdr) + } + + static int +-encode_link(struct xdr_stream *xdr, struct nfs4_link *link) ++encode_link(struct xdr_stream *xdr, struct qstr *name) + { + uint32_t *p; + +- RESERVE_SPACE(8 + link->ln_namelen); ++ RESERVE_SPACE(8 + name->len); + WRITE32(OP_LINK); +- WRITE32(link->ln_namelen); +- WRITEMEM(link->ln_name, link->ln_namelen); ++ WRITE32(name->len); ++ WRITEMEM(name->name, name->len); + + return 0; + } +@@ -705,15 +864,15 @@ encode_locku(struct xdr_stream *xdr, str + } + + static int +-encode_lookup(struct xdr_stream *xdr, struct nfs4_lookup *lookup) ++encode_lookup(struct xdr_stream *xdr, struct qstr *name) + { +- int len = lookup->lo_name->len; ++ int len = name->len; + uint32_t *p; + + RESERVE_SPACE(8 + len); + WRITE32(OP_LOOKUP); + WRITE32(len); +- WRITEMEM(lookup->lo_name->name, len); ++ WRITEMEM(name->name, len); + + return 0; + } +@@ -883,7 +1042,7 @@ encode_read(struct xdr_stream *xdr, stru + } + + static int +-encode_readdir(struct xdr_stream *xdr, struct nfs4_readdir *readdir, struct rpc_rqst *req) ++encode_readdir(struct xdr_stream *xdr, struct nfs4_readdir_arg *readdir, struct rpc_rqst *req) + { + struct rpc_auth *auth = req->rq_task->tk_auth; + int replen; +@@ -891,21 +1050,21 @@ encode_readdir(struct xdr_stream *xdr, s + + RESERVE_SPACE(32+sizeof(nfs4_verifier)); + WRITE32(OP_READDIR); +- WRITE64(readdir->rd_cookie); +- WRITEMEM(readdir->rd_req_verifier.data, sizeof(readdir->rd_req_verifier.data)); +- WRITE32(readdir->rd_count >> 5); /* meaningless "dircount" field */ +- WRITE32(readdir->rd_count); ++ WRITE64(readdir->cookie); ++ WRITEMEM(readdir->req_verifier.data, sizeof(readdir->req_verifier.data)); ++ WRITE32(readdir->count >> 5); /* meaningless "dircount" field */ ++ WRITE32(readdir->count); + WRITE32(2); +- WRITE32(readdir->rd_bmval[0]); +- WRITE32(readdir->rd_bmval[1]); ++ WRITE32(FATTR4_WORD0_FILEID); ++ WRITE32(0); + + /* set up reply iovec + * toplevel_status + taglen + rescount + OP_PUTFH + status + * + OP_READDIR + status + verifer(2) = 9 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + 9) << 2; +- xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->rd_pages, +- readdir->rd_pgbase, readdir->rd_count); ++ xdr_inline_pages(&req->rq_rcv_buf, replen, readdir->pages, ++ readdir->pgbase, readdir->count); + + return 0; + } +@@ -925,37 +1084,37 @@ encode_readlink(struct xdr_stream *xdr, + * + OP_READLINK + status = 7 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + 7) << 2; +- xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->rl_pages, 0, readlink->rl_count); ++ xdr_inline_pages(&req->rq_rcv_buf, replen, readlink->pages, 0, readlink->count); + + return 0; + } + + static int +-encode_remove(struct xdr_stream *xdr, struct nfs4_remove *remove) ++encode_remove(struct xdr_stream *xdr, struct qstr *name) + { + uint32_t *p; + +- RESERVE_SPACE(8 + remove->rm_namelen); ++ RESERVE_SPACE(8 + name->len); + WRITE32(OP_REMOVE); +- WRITE32(remove->rm_namelen); +- WRITEMEM(remove->rm_name, remove->rm_namelen); ++ WRITE32(name->len); ++ WRITEMEM(name->name, name->len); + + return 0; + } + + static int +-encode_rename(struct xdr_stream *xdr, struct nfs4_rename *rename) ++encode_rename(struct xdr_stream *xdr, struct qstr *oldname, struct qstr *newname) + { + uint32_t *p; + +- RESERVE_SPACE(8 + rename->rn_oldnamelen); ++ RESERVE_SPACE(8 + oldname->len); + WRITE32(OP_RENAME); +- WRITE32(rename->rn_oldnamelen); +- WRITEMEM(rename->rn_oldname, rename->rn_oldnamelen); ++ WRITE32(oldname->len); ++ WRITEMEM(oldname->name, oldname->len); + +- RESERVE_SPACE(4 + rename->rn_newnamelen); +- WRITE32(rename->rn_newnamelen); +- WRITEMEM(rename->rn_newname, rename->rn_newnamelen); ++ RESERVE_SPACE(4 + newname->len); ++ WRITE32(newname->len); ++ WRITEMEM(newname->name, newname->len); + + return 0; + } +@@ -1011,6 +1170,39 @@ encode_setattr(struct xdr_stream *xdr, s + return 0; + } + ++extern nfs4_stateid zero_stateid; ++ ++#ifdef CONFIG_NFS_V4_ACL ++ ++static int ++encode_setacl(struct xdr_stream *xdr, struct nfs_setaclargs *arg) ++{ ++ uint32_t *p, *attrbuflen; ++ struct nfs4_ace *ace; ++ struct nfs4_acl *acl = arg->acl; ++ ++ RESERVE_SPACE(4+sizeof(zero_stateid.data)); ++ WRITE32(OP_SETATTR); ++ WRITEMEM(zero_stateid.data, sizeof(zero_stateid.data)); ++ RESERVE_SPACE(4*4); ++ WRITE32(1); ++ WRITE32(FATTR4_WORD0_ACL); ++ attrbuflen = p++; ++ WRITE32(acl->naces); ++ list_for_each_entry(ace, &acl->ace_head, l_ace) { ++ RESERVE_SPACE(4*4 + (XDR_QUADLEN(ace->wholen) << 2)); ++ WRITE32(ace->type); ++ WRITE32(ace->flag); ++ WRITE32(ace->access_mask & NFS4_ACE_MASK_ALL); ++ WRITE32(ace->wholen); ++ WRITEMEM(ace->who, ace->wholen); ++ } ++ *attrbuflen = htonl((char *)p - (char *)attrbuflen - 4); ++ return 0; ++} ++ ++#endif /* CONFIG_NFS_V4_ACL */ ++ + static int + encode_setclientid(struct xdr_stream *xdr, struct nfs4_setclientid *setclientid) + { +@@ -1068,312 +1260,566 @@ encode_write(struct xdr_stream *xdr, str + + return 0; + } +- +-/* FIXME: this sucks */ +-static int +-encode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqst *req) +-{ +- struct compound_hdr hdr = { +- .taglen = cp->taglen, +- .tag = cp->tag, +- .nops = cp->req_nops, +- }; +- int i, status = 0; +- +- encode_compound_hdr(xdr, &hdr); +- +- for (i = 0; i < cp->req_nops; i++) { +- switch (cp->ops[i].opnum) { +- case OP_ACCESS: +- status = encode_access(xdr, &cp->ops[i].u.access); +- break; +- case OP_CREATE: +- status = encode_create(xdr, &cp->ops[i].u.create, cp->server); +- break; +- case OP_GETATTR: +- status = encode_getattr(xdr, &cp->ops[i].u.getattr); +- break; +- case OP_GETFH: +- status = encode_getfh(xdr); +- break; +- case OP_LINK: +- status = encode_link(xdr, &cp->ops[i].u.link); +- break; +- case OP_LOOKUP: +- status = encode_lookup(xdr, &cp->ops[i].u.lookup); +- break; +- case OP_PUTFH: +- status = encode_putfh(xdr, cp->ops[i].u.putfh.pf_fhandle); +- break; +- case OP_PUTROOTFH: +- status = encode_putrootfh(xdr); +- break; +- case OP_READDIR: +- status = encode_readdir(xdr, &cp->ops[i].u.readdir, req); +- break; +- case OP_READLINK: +- status = encode_readlink(xdr, &cp->ops[i].u.readlink, req); +- break; +- case OP_REMOVE: +- status = encode_remove(xdr, &cp->ops[i].u.remove); +- break; +- case OP_RENAME: +- status = encode_rename(xdr, &cp->ops[i].u.rename); +- break; +- case OP_RESTOREFH: +- status = encode_restorefh(xdr); +- break; +- case OP_SAVEFH: +- status = encode_savefh(xdr); +- break; +- default: +- BUG(); +- } +- if (status) +- return status; +- } +- +- return 0; +-} + /* + * END OF "GENERIC" ENCODE ROUTINES. + */ + +- + /* +- * Encode COMPOUND argument ++ * Encode ACCESS request + */ + static int +-nfs4_xdr_enc_compound(struct rpc_rqst *req, uint32_t *p, struct nfs4_compound *cp) ++nfs4_xdr_enc_access(struct rpc_rqst *req, uint32_t *p, struct nfs4_accessargs *args) + { + struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 3, ++ }; + int status; +- +- xdr_init_encode(&xdr, &req->rq_snd_buf, p); +- status = encode_compound(&xdr, cp, req); +- cp->timestamp = jiffies; +- return status; +-} +-/* +- * Encode a CLOSE request +- */ +-static int +-nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) +-{ +- struct xdr_stream xdr; +- struct compound_hdr hdr = { +- .nops = 2, +- }; +- int status; + +- xdr_init_encode(&xdr, &req->rq_snd_buf, p); +- encode_compound_hdr(&xdr, &hdr); +- status = encode_putfh(&xdr, args->fh); +- if(status) +- goto out; +- status = encode_close(&xdr, args); ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fhandle); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); ++ if (status) ++ goto out; ++ status = encode_access(&xdr, args->req_access); + out: +- return status; ++ return status; + } + + /* +- * Encode an OPEN request ++ * Encode LOOKUP request + */ + static int +-nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args) ++nfs4_xdr_enc_lookup(struct rpc_rqst *req, uint32_t *p, struct nfs4_lookupargs *args) + { + struct xdr_stream xdr; + struct compound_hdr hdr = { +- .nops = 7, ++ .nops = 5, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); +- status = encode_putfh(&xdr, args->fh); ++ status = encode_putfh(&xdr, args->dir_fh); + if (status) + goto out; +- status = encode_savefh(&xdr); ++ status = encode_getfattr(&xdr); + if (status) + goto out; +- status = encode_open(&xdr, args); ++ status = encode_lookup(&xdr, args->name); + if (status) + goto out; +- status = encode_getattr(&xdr, args->f_getattr); ++ status = encode_getfattr(&xdr); + if (status) + goto out; + status = encode_getfh(&xdr); +- if (status) +- goto out; +- status = encode_restorefh(&xdr); +- if (status) +- goto out; +- status = encode_getattr(&xdr, args->d_getattr); + out: + return status; + } + + /* +- * Encode an OPEN_CONFIRM request ++ * Encode GETROOT_HEAD request + */ + static int +-nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args) ++nfs4_xdr_enc_getroot_head(struct rpc_rqst *req, uint32_t *p, void *args) + { + struct xdr_stream xdr; + struct compound_hdr hdr = { +- .nops = 2, ++ .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); +- status = encode_putfh(&xdr, args->fh); +- if(status) ++ status = encode_putrootfh(&xdr); ++ if (status) + goto out; +- status = encode_open_confirm(&xdr, args); ++ status = encode_getfattr(&xdr); ++ if (status) ++ goto out; ++ status = encode_getfh(&xdr); + out: + return status; + } + + /* +- * Encode an OPEN request ++ * Encode GETROOT_PATH request + */ + static int +-nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p, +- struct nfs_open_reclaimargs *args) ++nfs4_xdr_enc_getroot_path(struct rpc_rqst *req, uint32_t *p, struct nfs4_getroot_arg *args) + { + struct xdr_stream xdr; + struct compound_hdr hdr = { +- .nops = 3, ++ .nops = 4, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); +- status = encode_putfh(&xdr, args->fh); ++ status = encode_putfh(&xdr, args->fhandle); + if (status) + goto out; +- status = encode_open_reclaim(&xdr, args); ++ status = encode_lookup(&xdr, args->name); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); + if (status) + goto out; +- status = encode_getattr(&xdr, args->f_getattr); ++ status = encode_getfh(&xdr); + out: + return status; + } + + /* +- * Encode an OPEN_DOWNGRADE request ++ * Encode REMOVE request + */ + static int +-nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) ++nfs4_xdr_enc_remove(struct rpc_rqst *req, uint32_t *p, struct nfs4_remove_arg *args) + { + struct xdr_stream xdr; + struct compound_hdr hdr = { +- .nops = 2, ++ .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); +- status = encode_putfh(&xdr, args->fh); ++ status = encode_putfh(&xdr, args->fhandle); + if (status) + goto out; +- status = encode_open_downgrade(&xdr, args); ++ status = encode_remove(&xdr, args->name); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); + out: + return status; + } + + /* +- * Encode a LOCK request ++ * Encode UNLINK request + */ + static int +-nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) ++nfs4_xdr_enc_unlink(struct rpc_rqst *req, uint32_t *p, struct nfs4_unlink *args) + { + struct xdr_stream xdr; + struct compound_hdr hdr = { +- .nops = 2, ++ .nops = 3, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); +- if(status) ++ if (status) + goto out; +- status = encode_lock(&xdr, args); ++ status = encode_remove(&xdr, args->name); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); + out: + return status; + } + + /* +- * Encode a LOCKT request ++ * Encode RENAME request + */ + static int +-nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) ++nfs4_xdr_enc_rename(struct rpc_rqst *req, uint32_t *p, struct nfs4_rename_arg *args) + { + struct xdr_stream xdr; + struct compound_hdr hdr = { +- .nops = 2, ++ .nops = 7, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); +- status = encode_putfh(&xdr, args->fh); +- if(status) ++ status = encode_putfh(&xdr, args->old_dir); ++ if (status) + goto out; +- status = encode_lockt(&xdr, args); ++ status = encode_savefh(&xdr); ++ if (status) ++ goto out; ++ status = encode_putfh(&xdr, args->new_dir); ++ if (status) ++ goto out; ++ status = encode_rename(&xdr, args->old_name, args->new_name); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); ++ if (status) ++ goto out; ++ status = encode_restorefh(&xdr); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); + out: + return status; + } + + /* +- * Encode a LOCKU request ++ * Encode LINK request + */ + static int +-nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) ++nfs4_xdr_enc_link(struct rpc_rqst *req, uint32_t *p, struct nfs4_link_arg *args) + { + struct xdr_stream xdr; + struct compound_hdr hdr = { +- .nops = 2, ++ .nops = 7, + }; + int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); + status = encode_putfh(&xdr, args->fh); +- if(status) ++ if (status) + goto out; +- status = encode_locku(&xdr, args); ++ status = encode_savefh(&xdr); ++ if (status) ++ goto out; ++ status = encode_putfh(&xdr, args->dir_fh); ++ if (status) ++ goto out; ++ status = encode_link(&xdr, args->name); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); ++ if (status) ++ goto out; ++ status = encode_restorefh(&xdr); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); + out: + return status; + } + + /* +- * Encode a READ request ++ * Encode CREATE request + */ + static int +-nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args) ++nfs4_xdr_enc_create(struct rpc_rqst *req, uint32_t *p, struct nfs4_create_arg *args) + { +- struct rpc_auth *auth = req->rq_task->tk_auth; + struct xdr_stream xdr; + struct compound_hdr hdr = { +- .nops = 3, ++ .nops = 7, + }; +- int replen, status; ++ int status; + + xdr_init_encode(&xdr, &req->rq_snd_buf, p); + encode_compound_hdr(&xdr, &hdr); +- status = encode_putfh(&xdr, args->fh); ++ status = encode_putfh(&xdr, args->dir_fh); + if (status) + goto out; +- status = encode_read(&xdr, args); ++ status = encode_savefh(&xdr); + if (status) + goto out; +- status = encode_read_getattr(&xdr); +- +- /* set up reply iovec +- * toplevel status + taglen=0 + rescount + OP_PUTFH + status ++ status = encode_create(&xdr, args); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); ++ if (status) ++ goto out; ++ status = encode_getfh(&xdr); ++ if (status) ++ goto out; ++ status = encode_restorefh(&xdr); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); ++out: ++ return status; ++} ++ ++/* ++ * Encode GETATTR request ++ */ ++static int ++nfs4_xdr_enc_getattr(struct rpc_rqst *req, uint32_t *p, struct nfs_fh *fh) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, fh); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); ++ out: ++ return status; ++} ++ ++/* ++ * Encode a CLOSE request ++ */ ++static int ++nfs4_xdr_enc_close(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if(status) ++ goto out; ++ status = encode_close(&xdr, args); ++out: ++ return status; ++} ++ ++/* ++ * Encode an OPEN request ++ */ ++static int ++nfs4_xdr_enc_open(struct rpc_rqst *req, uint32_t *p, struct nfs_openargs *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 7, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if (status) ++ goto out; ++ status = encode_savefh(&xdr); ++ if (status) ++ goto out; ++ status = encode_open(&xdr, args); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); ++ if (status) ++ goto out; ++ status = encode_getfh(&xdr); ++ if (status) ++ goto out; ++ status = encode_restorefh(&xdr); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); ++out: ++ return status; ++} ++ ++/* ++ * Encode an OPEN_CONFIRM request ++ */ ++static int ++nfs4_xdr_enc_open_confirm(struct rpc_rqst *req, uint32_t *p, struct nfs_open_confirmargs *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if(status) ++ goto out; ++ status = encode_open_confirm(&xdr, args); ++out: ++ return status; ++} ++ ++/* ++ * Encode an OPEN request ++ */ ++static int ++nfs4_xdr_enc_open_reclaim(struct rpc_rqst *req, uint32_t *p, ++ struct nfs_open_reclaimargs *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 3, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if (status) ++ goto out; ++ status = encode_open_reclaim(&xdr, args); ++ if (status) ++ goto out; ++ status = encode_getfattr(&xdr); ++out: ++ return status; ++} ++ ++/* ++ * Encode an OPEN_DOWNGRADE request ++ */ ++static int ++nfs4_xdr_enc_open_downgrade(struct rpc_rqst *req, uint32_t *p, struct nfs_closeargs *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if (status) ++ goto out; ++ status = encode_open_downgrade(&xdr, args); ++out: ++ return status; ++} ++ ++/* ++ * Encode a LOCK request ++ */ ++static int ++nfs4_xdr_enc_lock(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if(status) ++ goto out; ++ status = encode_lock(&xdr, args); ++out: ++ return status; ++} ++ ++/* ++ * Encode a LOCKT request ++ */ ++static int ++nfs4_xdr_enc_lockt(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if(status) ++ goto out; ++ status = encode_lockt(&xdr, args); ++out: ++ return status; ++} ++ ++/* ++ * Encode a LOCKU request ++ */ ++static int ++nfs4_xdr_enc_locku(struct rpc_rqst *req, uint32_t *p, struct nfs_lockargs *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if(status) ++ goto out; ++ status = encode_locku(&xdr, args); ++out: ++ return status; ++} ++ ++/* ++ * Encode a READLINK request ++ */ ++static int ++nfs4_xdr_enc_readlink(struct rpc_rqst *req, uint32_t *p, struct nfs4_readlink *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if(status) ++ goto out; ++ status = encode_readlink(&xdr, args, req); ++out: ++ return status; ++} ++ ++/* ++ * Encode a READDIR request ++ */ ++static int ++nfs4_xdr_enc_readdir(struct rpc_rqst *req, uint32_t *p, struct nfs4_readdir_arg *args) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if(status) ++ goto out; ++ status = encode_readdir(&xdr, args, req); ++out: ++ return status; ++} ++ ++/* ++ * Encode a READ request ++ */ ++static int ++nfs4_xdr_enc_read(struct rpc_rqst *req, uint32_t *p, struct nfs_readargs *args) ++{ ++ struct rpc_auth *auth = req->rq_task->tk_auth; ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 3, ++ }; ++ int replen, status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if (status) ++ goto out; ++ status = encode_read(&xdr, args); ++ if (status) ++ goto out; ++ status = encode_read_getattr(&xdr); ++ ++ /* set up reply iovec ++ * toplevel status + taglen=0 + rescount + OP_PUTFH + status + * + OP_READ + status + eof + datalen = 9 + */ + replen = (RPC_REPHDRSIZE + auth->au_rslack + +@@ -1405,12 +1851,62 @@ nfs4_xdr_enc_setattr(struct rpc_rqst *re + status = encode_setattr(&xdr, args, args->server); + if(status) + goto out; +- status = encode_getattr(&xdr, args->attr); ++ status = encode_getfattr(&xdr); ++out: ++ return status; ++} ++ ++#ifdef CONFIG_NFS_V4_ACL ++ ++/* ++ * Encode an SETACL request ++ */ ++static int ++nfs4_xdr_enc_setacl(struct rpc_rqst *req, uint32_t *p, struct nfs_setaclargs *args) ++ ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, args->fh); ++ if(status) ++ goto out; ++ status = encode_setacl(&xdr, args); + out: + return status; + } + + /* ++ * Encode a GETACL request ++ */ ++static int ++nfs4_xdr_enc_getacl(struct rpc_rqst *req, uint32_t *p,struct nfs_fh *fhandle) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, fhandle); ++ if (status) ++ goto out; ++ status = encode_getattr_two(&xdr, FATTR4_WORD0_ACL, 0); ++out: ++ return status; ++ ++} ++ ++#endif /* CONFIG_NFS_V4_ACL */ ++ ++/* + * Encode a WRITE request + */ + static int +@@ -1487,6 +1983,48 @@ nfs4_xdr_enc_fsinfo(struct rpc_rqst *req + } + + /* ++ * a PATHCONF request ++ */ ++static int ++nfs4_xdr_enc_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_fh *fhandle) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, fhandle); ++ if (!status) ++ status = encode_getattr_one(&xdr,FATTR4_WORD0_MAXLINK | ++ FATTR4_WORD0_MAXNAME ); ++ return status; ++} ++ ++/* ++ * a STATFS request ++ */ ++static int ++nfs4_xdr_enc_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fh *fhandle) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr = { ++ .nops = 2, ++ }; ++ int status; ++ ++ xdr_init_encode(&xdr, &req->rq_snd_buf, p); ++ encode_compound_hdr(&xdr, &hdr); ++ status = encode_putfh(&xdr, fhandle); ++ if (!status) ++ status = encode_getattr_two(&xdr,nfs4_statfs_bitmap[0], ++ nfs4_statfs_bitmap[1]); ++ return status; ++} ++ ++/* + * a RENEW request + */ + static int +@@ -1636,7 +2174,7 @@ decode_change_info(struct xdr_stream *xd + } + + static int +-decode_access(struct xdr_stream *xdr, struct nfs4_access *access) ++decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) + { + uint32_t *p; + uint32_t supp, acc; +@@ -1648,12 +2186,12 @@ decode_access(struct xdr_stream *xdr, st + READ_BUF(8); + READ32(supp); + READ32(acc); +- if ((supp & ~access->ac_req_access) || (acc & ~supp)) { ++ if ((supp & ~access->req_access) || (acc & ~supp)) { + printk(KERN_NOTICE "NFS: server returned bad bits in access call!\n"); + return -EIO; + } +- *access->ac_resp_supported = supp; +- *access->ac_resp_access = acc; ++ *access->resp_supported = supp; ++ *access->resp_access = acc; + return 0; + } + +@@ -1686,7 +2224,7 @@ decode_commit(struct xdr_stream *xdr, st + } + + static int +-decode_create(struct xdr_stream *xdr, struct nfs4_create *create) ++decode_create(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) + { + uint32_t *p; + uint32_t bmlen; +@@ -1695,7 +2233,7 @@ decode_create(struct xdr_stream *xdr, st + status = decode_op_hdr(xdr, OP_CREATE); + if (status) + return status; +- if ((status = decode_change_info(xdr, create->cr_cinfo))) ++ if ((status = decode_change_info(xdr, cinfo))) + return status; + READ_BUF(4); + READ32(bmlen); +@@ -1703,17 +2241,144 @@ decode_create(struct xdr_stream *xdr, st + return 0; + } + +-extern uint32_t nfs4_fattr_bitmap[2]; + extern uint32_t nfs4_fsstat_bitmap[2]; +-extern uint32_t nfs4_pathconf_bitmap[2]; + + static int +-decode_getattr(struct xdr_stream *xdr, struct nfs4_getattr *getattr, ++decode_statfs(struct xdr_stream *xdr, struct nfs_fsstat *fsstat) ++{ ++ uint32_t attrlen, bmlen, ++ bmval0 = 0, ++ bmval1 = 0, ++ len = 0; ++ uint32_t *p; ++ int status; ++ ++ status = decode_op_hdr(xdr, OP_GETATTR); ++ if (status) ++ return status; ++ ++ READ_BUF(4); ++ READ32(bmlen); ++ if (bmlen > 2) ++ goto xdr_error; ++ ++ READ_BUF((bmlen << 2) + 4); ++ if (bmlen > 0) ++ READ32(bmval0); ++ if (bmlen > 1) ++ READ32(bmval1); ++ READ32(attrlen); ++ ++ if ((bmval0 & ~nfs4_statfs_bitmap[0]) || ++ (bmval1 & ~nfs4_statfs_bitmap[1])) { ++ dprintk("read_attrs: server returned bad attributes!\n"); ++ goto xdr_error; ++ } ++ ++ if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { ++ READ_BUF(8); ++ len += 8; ++ READ64(fsstat->afiles); ++ dprintk("read_attrs: files_avail=0x%Lx\n", (long long) fsstat->afiles); ++ } ++ if (bmval0 & FATTR4_WORD0_FILES_FREE) { ++ READ_BUF(8); ++ len += 8; ++ READ64(fsstat->ffiles); ++ dprintk("read_attrs: files_free=0x%Lx\n", (long long) fsstat->ffiles); ++ } ++ if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { ++ READ_BUF(8); ++ len += 8; ++ READ64(fsstat->tfiles); ++ dprintk("read_attrs: files_tot=0x%Lx\n", (long long) fsstat->tfiles); ++ } ++ ++ if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { ++ READ_BUF(8); ++ len += 8; ++ READ64(fsstat->abytes); ++ dprintk("read_attrs: savail=0x%Lx\n", (long long) fsstat->abytes); ++ } ++ if (bmval1 & FATTR4_WORD1_SPACE_FREE) { ++ READ_BUF(8); ++ len += 8; ++ READ64(fsstat->fbytes); ++ dprintk("read_attrs: sfree=0x%Lx\n", (long long) fsstat->fbytes); ++ } ++ if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { ++ READ_BUF(8); ++ len += 8; ++ READ64(fsstat->tbytes); ++ dprintk("read_attrs: stotal=0x%Lx\n", (long long) fsstat->tbytes); ++ } ++ if (len != attrlen) ++ goto xdr_error; ++ ++ DECODE_TAIL; ++} ++ ++static int ++decode_pathconf(struct xdr_stream *xdr, struct nfs_pathconf *pathconf) ++{ ++ uint32_t bmlen, ++ attrlen = 0, ++ bmval0 = 0, ++ bmval1 = 0, ++ len = 0; ++ uint32_t *p; ++ int status; ++ ++ status = decode_op_hdr(xdr, OP_GETATTR); ++ if (status) ++ return status; ++ ++ READ_BUF(4); ++ READ32(bmlen); ++ if ( (bmlen < 1) || (bmlen >2) ) ++ goto xdr_error; ++ READ_BUF((bmlen << 2) + 4); ++ READ32(bmval0); ++ if (bmval0 & ~(FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME)) { ++ goto out_bad_bitmap; ++ } ++ if (bmlen == 2) { ++ READ32(bmval1); ++ if (bmval1 != 0) ++ goto out_bad_bitmap; ++ } ++ ++ READ32(attrlen); ++ if (bmval0 & FATTR4_WORD0_MAXLINK) { ++ READ_BUF(4); ++ len += 4; ++ READ32(pathconf->max_link); ++ dprintk("read_attrs: maxlink=%d\n", pathconf->max_link); ++ } ++ if (bmval0 & FATTR4_WORD0_MAXNAME) { ++ READ_BUF(4); ++ len += 4; ++ READ32(pathconf->max_namelen); ++ dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen); ++ } ++ ++ if (len != attrlen) ++ goto xdr_error; ++ return 0; ++ ++out_bad_bitmap: ++ printk(KERN_NOTICE "%s: server returned bad attribute bitmap\n",__FUNCTION__); ++ return -EIO; ++ ++xdr_error: ++ printk(KERN_NOTICE "xdr error! (%s:%d)\n", __FILE__, __LINE__); ++ return -EIO; ++} ++ ++static int ++decode_getfattr(struct xdr_stream *xdr, struct nfs_fattr *nfp, + struct nfs_server *server) + { +- struct nfs_fattr *nfp = getattr->gt_attrs; +- struct nfs_fsstat *fsstat = getattr->gt_fsstat; +- struct nfs_pathconf *pathconf = getattr->gt_pathconf; + uint32_t attrlen, dummy32, bmlen, + bmval0 = 0, + bmval1 = 0, +@@ -1739,25 +2404,25 @@ decode_getattr(struct xdr_stream *xdr, s + READ32(bmval1); + READ32(attrlen); + +- if ((bmval0 & ~getattr->gt_bmval[0]) || +- (bmval1 & ~getattr->gt_bmval[1])) { ++ if ((bmval0 & ~nfs4_fattr_bitmap[0]) || ++ (bmval1 & ~nfs4_fattr_bitmap[1])) { + dprintk("read_attrs: server returned bad attributes!\n"); + goto xdr_error; + } +- if (nfp) { +- nfp->bitmap[0] = bmval0; +- nfp->bitmap[1] = bmval1; +- } ++ ++ BUG_ON(!nfp); ++ ++ nfp->bitmap[0] = bmval0; ++ nfp->bitmap[1] = bmval1; + + /* + * In case the server doesn't return some attributes, + * we initialize them here to some nominal values.. + */ +- if (nfp) { +- nfp->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; +- nfp->nlink = 1; +- nfp->timestamp = jiffies; +- } ++ nfp->valid = NFS_ATTR_FATTR | NFS_ATTR_FATTR_V3 | NFS_ATTR_FATTR_V4; ++ nfp->nlink = 1; ++ nfp->timestamp = jiffies; ++ + if (bmval0 & FATTR4_WORD0_TYPE) { + READ_BUF(4); + len += 4; +@@ -1797,37 +2462,6 @@ decode_getattr(struct xdr_stream *xdr, s + READ64(nfp->fileid); + dprintk("read_attrs: fileid=%Ld\n", (long long) nfp->fileid); + } +- if (bmval0 & FATTR4_WORD0_FILES_AVAIL) { +- READ_BUF(8); +- len += 8; +- READ64(fsstat->afiles); +- dprintk("read_attrs: files_avail=0x%Lx\n", (long long) fsstat->afiles); +- } +- if (bmval0 & FATTR4_WORD0_FILES_FREE) { +- READ_BUF(8); +- len += 8; +- READ64(fsstat->ffiles); +- dprintk("read_attrs: files_free=0x%Lx\n", (long long) fsstat->ffiles); +- } +- if (bmval0 & FATTR4_WORD0_FILES_TOTAL) { +- READ_BUF(8); +- len += 8; +- READ64(fsstat->tfiles); +- dprintk("read_attrs: files_tot=0x%Lx\n", (long long) fsstat->tfiles); +- } +- if (bmval0 & FATTR4_WORD0_MAXLINK) { +- READ_BUF(4); +- len += 4; +- READ32(pathconf->max_link); +- dprintk("read_attrs: maxlink=%d\n", pathconf->max_link); +- } +- if (bmval0 & FATTR4_WORD0_MAXNAME) { +- READ_BUF(4); +- len += 4; +- READ32(pathconf->max_namelen); +- dprintk("read_attrs: maxname=%d\n", pathconf->max_namelen); +- } +- + if (bmval1 & FATTR4_WORD1_MODE) { + READ_BUF(4); + len += 4; +@@ -1851,9 +2485,11 @@ decode_getattr(struct xdr_stream *xdr, s + } + READ_BUF(dummy32); + len += (XDR_QUADLEN(dummy32) << 2); +- if ((status = nfs_map_name_to_uid(server->nfs4_state, (char *)p, dummy32, +- &nfp->uid)) < 0) { +- dprintk("read_attrs: name-to-uid mapping failed!\n"); ++ status = nfs_map_name_to_uid(server->nfs4_state, (char *)p, ++ dummy32, &nfp->uid); ++ if (status) { ++ dprintk("read_attrs: nfs_map_name_to_uid failed!\n"); ++ /* goto out; */ + nfp->uid = -2; + } + dprintk("read_attrs: uid=%d\n", (int)nfp->uid); +@@ -1868,10 +2504,12 @@ decode_getattr(struct xdr_stream *xdr, s + } + READ_BUF(dummy32); + len += (XDR_QUADLEN(dummy32) << 2); +- if ((status = nfs_map_group_to_gid(server->nfs4_state, (char *)p, dummy32, +- &nfp->gid)) < 0) { +- dprintk("read_attrs: group-to-gid mapping failed!\n"); ++ status = nfs_map_group_to_gid(server->nfs4_state, (char *)p, ++ dummy32, &nfp->gid); ++ if (status) { ++ dprintk("read_attrs: gss_get_num failed!\n"); + nfp->gid = -2; ++ /* goto out; */ + } + dprintk("read_attrs: gid=%d\n", (int)nfp->gid); + } +@@ -1882,28 +2520,10 @@ decode_getattr(struct xdr_stream *xdr, s + len += 8; + READ32(major); + READ32(minor); +- nfp->rdev = MKDEV(major, minor); +- if (MAJOR(nfp->rdev) != major || MINOR(nfp->rdev) != minor) +- nfp->rdev = 0; +- dprintk("read_attrs: rdev=%u:%u\n", major, minor); +- } +- if (bmval1 & FATTR4_WORD1_SPACE_AVAIL) { +- READ_BUF(8); +- len += 8; +- READ64(fsstat->abytes); +- dprintk("read_attrs: savail=0x%Lx\n", (long long) fsstat->abytes); +- } +- if (bmval1 & FATTR4_WORD1_SPACE_FREE) { +- READ_BUF(8); +- len += 8; +- READ64(fsstat->fbytes); +- dprintk("read_attrs: sfree=0x%Lx\n", (long long) fsstat->fbytes); +- } +- if (bmval1 & FATTR4_WORD1_SPACE_TOTAL) { +- READ_BUF(8); +- len += 8; +- READ64(fsstat->tbytes); +- dprintk("read_attrs: stotal=0x%Lx\n", (long long) fsstat->tbytes); ++ nfp->rdev = MKDEV(major, minor); ++ if (MAJOR(nfp->rdev) != major || MINOR(nfp->rdev) != minor) ++ nfp->rdev = 0; ++ dprintk("read_attrs: rdev=%u:%u\n", major, minor); + } + if (bmval1 & FATTR4_WORD1_SPACE_USED) { + READ_BUF(8); +@@ -1935,6 +2555,88 @@ decode_getattr(struct xdr_stream *xdr, s + DECODE_TAIL; + } + ++#ifdef CONFIG_NFS_V4_ACL ++ ++static int ++decode_getacl(struct xdr_stream *xdr, struct nfs4_acl **aclp) ++{ ++ uint32_t attrlen, bmlen, ++ bmval0 = 0, ++ bmval1 = 0, ++ len = 0; ++ uint32_t *p; ++ int status; ++ ++ status = decode_op_hdr(xdr, OP_GETATTR); ++ if (status) ++ return status; ++ ++ READ_BUF(4); ++ READ32(bmlen); ++ if (bmlen > 2) ++ goto xdr_error; ++ ++ READ_BUF((bmlen << 2) + 4); ++ if (bmlen > 0) ++ READ32(bmval0); ++ if (bmlen > 1) ++ READ32(bmval1); ++ READ32(attrlen); ++ ++ if ((bmval0 & ~FATTR4_WORD0_ACL) || (bmval1)) { ++ dprintk("read_attrs: server returned bad attributes!\n"); ++ goto xdr_error; ++ } ++ if (bmval0 & FATTR4_WORD0_ACL) { ++ struct nfs4_acl *acl; ++ struct nfs4_ace ace; ++ int i; ++ u_int nace; ++ ++ if (aclp == NULL) ++ goto xdr_error; /* XXX MARIUS */ ++ ++ READ_BUF(4); len += 4; ++ READ32(nace); ++ ++ if (nace == 0) { ++ *aclp = NULL; ++ goto out_acl; ++ } ++ ++ acl = *aclp = nfs4_acl_new(); ++ if (acl == NULL) { ++ status = -ENOMEM; ++ goto out; ++ } ++ ++ for (i = 0; i < nace; i++) { ++ READ_BUF(16); len += 16; ++ READ32(ace.type); ++ READ32(ace.flag); ++ READ32(ace.access_mask); ++ ace.access_mask &= NFS4_ACE_MASK_ALL; ++ READ32(ace.wholen); ++ READ_BUF(ace.wholen); ++ len += XDR_QUADLEN(ace.wholen) << 2; ++ status = nfs4_acl_add_ace(acl, ace.type, ace.flag, ++ ace.access_mask, (char *)p, ace.wholen); ++ if (status < 0) ++ goto out; ++ p += XDR_QUADLEN(ace.wholen); ++ } ++ } else if (aclp != NULL) ++ *aclp = NULL; ++out_acl: ++ ++ if (len != attrlen) ++ goto xdr_error; ++ ++ DECODE_TAIL; ++} ++ ++#endif /* CONFIG_NFS_V4_ACL */ ++ + static int + decode_change_attr(struct xdr_stream *xdr, uint64_t *change_attr) + { +@@ -2067,6 +2769,77 @@ out_bad_bitmap: + return -EIO; + } + ++static int ++decode_putfh(struct xdr_stream *xdr) ++{ ++ return decode_op_hdr(xdr, OP_PUTFH); ++} ++ ++static int ++decode_setattr(struct xdr_stream *xdr) ++{ ++ uint32_t *p; ++ uint32_t bmlen; ++ int status; ++ ++ ++ status = decode_op_hdr(xdr, OP_SETATTR); ++ if (status) ++ return status; ++ READ_BUF(4); ++ READ32(bmlen); ++ READ_BUF(bmlen << 2); ++ return 0; ++} ++ ++#ifdef CONFIG_NFS_V4_ACL ++ ++/* ++ * Decode SETACL response ++ */ ++static int ++nfs4_xdr_dec_setacl(struct rpc_rqst *rqstp, uint32_t *p, void *res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_setattr(&xdr); ++out: ++ return status; ++} ++ ++/* ++ * Decode GETACL response ++ */ ++static int ++nfs4_xdr_dec_getacl(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_acl **res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_getacl(&xdr, res); ++ ++out: ++ return status; ++} ++ ++#endif /* CONFIG_NFS_V4_ACL */ + + static int + decode_fsinfo(struct xdr_stream *xdr, struct nfs_fsinfo *fsinfo) +@@ -2137,9 +2910,8 @@ out_bad_bitmap: + } + + static int +-decode_getfh(struct xdr_stream *xdr, struct nfs4_getfh *getfh) ++decode_getfh(struct xdr_stream *xdr, struct nfs_fh *fh) + { +- struct nfs_fh *fh = getfh->gf_fhandle; + uint32_t *p; + uint32_t len; + int status; +@@ -2161,14 +2933,14 @@ decode_getfh(struct xdr_stream *xdr, str + } + + static int +-decode_link(struct xdr_stream *xdr, struct nfs4_link *link) ++decode_link(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) + { + int status; + + status = decode_op_hdr(xdr, OP_LINK); + if (status) + return status; +- return decode_change_info(xdr, link->ln_cinfo); ++ return decode_change_info(xdr, cinfo); + } + + /* +@@ -2296,12 +3068,6 @@ decode_open_downgrade(struct xdr_stream + } + + static int +-decode_putfh(struct xdr_stream *xdr) +-{ +- return decode_op_hdr(xdr, OP_PUTFH); +-} +- +-static int + decode_putrootfh(struct xdr_stream *xdr) + { + return decode_op_hdr(xdr, OP_PUTROOTFH); +@@ -2336,7 +3102,7 @@ decode_read(struct xdr_stream *xdr, stru + } + + static int +-decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir *readdir) ++decode_readdir(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readdir_res *readdir) + { + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct page *page = *rcvbuf->pages; +@@ -2350,7 +3116,7 @@ decode_readdir(struct xdr_stream *xdr, s + if (status) + return status; + READ_BUF(8); +- COPYMEM(readdir->rd_resp_verifier.data, 8); ++ COPYMEM(readdir->resp_verifier.data, 8); + + hdrlen = (char *) p - (char *) iov->iov_base; + recvd = req->rq_received - hdrlen; +@@ -2358,9 +3124,9 @@ decode_readdir(struct xdr_stream *xdr, s + pglen = recvd; + xdr_read_pages(xdr, pglen); + +- BUG_ON(pglen + readdir->rd_pgbase > PAGE_CACHE_SIZE); ++ BUG_ON(pglen + readdir->pgbase > PAGE_CACHE_SIZE); + kaddr = p = (uint32_t *) kmap_atomic(page, KM_USER0); +- end = (uint32_t *) ((char *)p + pglen + readdir->rd_pgbase); ++ end = (uint32_t *) ((char *)p + pglen + readdir->pgbase); + entry = p; + for (nr = 0; *p++; nr++) { + if (p + 3 > end) +@@ -2421,7 +3187,7 @@ err_unmap: + } + + static int +-decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req, struct nfs4_readlink *readlink) ++decode_readlink(struct xdr_stream *xdr, struct rpc_rqst *req) + { + struct xdr_buf *rcvbuf = &req->rq_rcv_buf; + struct iovec *iov = rcvbuf->head; +@@ -2469,30 +3235,30 @@ decode_restorefh(struct xdr_stream *xdr) + } + + static int +-decode_remove(struct xdr_stream *xdr, struct nfs4_remove *remove) ++decode_remove(struct xdr_stream *xdr, struct nfs4_change_info *cinfo) + { + int status; + + status = decode_op_hdr(xdr, OP_REMOVE); + if (status) + goto out; +- status = decode_change_info(xdr, remove->rm_cinfo); ++ status = decode_change_info(xdr, cinfo); + out: + return status; + } + + static int +-decode_rename(struct xdr_stream *xdr, struct nfs4_rename *rename) ++decode_rename(struct xdr_stream *xdr, struct nfs4_change_info *old_cinfo, ++ struct nfs4_change_info *new_cinfo) + { + int status; + + status = decode_op_hdr(xdr, OP_RENAME); + if (status) + goto out; +- if ((status = decode_change_info(xdr, rename->rn_src_cinfo))) +- goto out; +- if ((status = decode_change_info(xdr, rename->rn_dst_cinfo))) ++ if ((status = decode_change_info(xdr, old_cinfo))) + goto out; ++ status = decode_change_info(xdr, new_cinfo); + out: + return status; + } +@@ -2510,23 +3276,6 @@ decode_savefh(struct xdr_stream *xdr) + } + + static int +-decode_setattr(struct xdr_stream *xdr, struct nfs_setattrres *res) +-{ +- uint32_t *p; +- uint32_t bmlen; +- int status; +- +- +- status = decode_op_hdr(xdr, OP_SETATTR); +- if (status) +- return status; +- READ_BUF(4); +- READ32(bmlen); +- READ_BUF(bmlen << 2); +- return 0; +-} +- +-static int + decode_setclientid(struct xdr_stream *xdr, struct nfs4_client *clp) + { + uint32_t *p; +@@ -2566,158 +3315,348 @@ decode_setclientid(struct xdr_stream *xd + } + + static int +-decode_setclientid_confirm(struct xdr_stream *xdr) ++decode_setclientid_confirm(struct xdr_stream *xdr) ++{ ++ return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); ++} ++ ++static int ++decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) ++{ ++ uint32_t *p; ++ int status; ++ ++ status = decode_op_hdr(xdr, OP_WRITE); ++ if (status) ++ return status; ++ ++ READ_BUF(16); ++ READ32(res->count); ++ READ32(res->verf->committed); ++ COPYMEM(res->verf->verifier, 8); ++ return 0; ++} ++ ++/* ++ * Decode OPEN_DOWNGRADE response ++ */ ++static int ++nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_open_downgrade(&xdr, res); ++out: ++ return status; ++} ++ ++/* ++ * END OF "GENERIC" DECODE ROUTINES. ++ */ ++ ++/* ++ * Decode ACCESS response ++ */ ++static int ++nfs4_xdr_dec_access(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_accessres *res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->fattr, res->server); ++ if (status) ++ goto out; ++ status = decode_access(&xdr, res); ++out: ++ return status; ++} ++ ++/* ++ * Decode LOOKUP response ++ */ ++static int ++nfs4_xdr_dec_lookup(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_lookupres *res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->dirattr, res->server); ++ if (status) ++ goto out; ++ status = decode_lookup(&xdr); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->fattr, res->server); ++ if (status) ++ goto out; ++ status = decode_getfh(&xdr, res->fhandle); ++out: ++ return status; ++} ++ ++/* ++ * Decode GETROOT_HEAD response ++ */ ++static int ++nfs4_xdr_dec_getroot_head(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getroot_res *res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putrootfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->fattr, res->server); ++ if (status) ++ goto out; ++ status = decode_getfh(&xdr, res->fhandle); ++out: ++ return status; ++} ++ ++/* ++ * Decode GETROOT_PATH response ++ */ ++static int ++nfs4_xdr_dec_getroot_path(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getroot_res *res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_lookup(&xdr); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->fattr, res->server); ++ if (status) ++ goto out; ++ status = decode_getfh(&xdr, res->fhandle); ++out: ++ return status; ++} ++ ++/* ++ * Decode REMOVE response ++ */ ++static int ++nfs4_xdr_dec_remove(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_remove_res *res) + { +- return decode_op_hdr(xdr, OP_SETCLIENTID_CONFIRM); ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_remove(&xdr, res->dir_cinfo); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->dir_attr, res->server); ++out: ++ return status; + } + ++/* ++ * Decode UNLINK response ++ */ + static int +-decode_write(struct xdr_stream *xdr, struct nfs_writeres *res) ++nfs4_xdr_dec_unlink(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_unlink *res) + { +- uint32_t *p; ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; + int status; +- +- status = decode_op_hdr(xdr, OP_WRITE); ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); + if (status) +- return status; +- +- READ_BUF(16); +- READ32(res->count); +- READ32(res->verf->committed); +- COPYMEM(res->verf->verifier, 8); +- return 0; ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_remove(&xdr, &res->cinfo); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, &res->attrs, res->server); ++out: ++ return status; + } + +-/* FIXME: this sucks */ ++/* ++ * Decode RENAME response ++ */ + static int +-decode_compound(struct xdr_stream *xdr, struct nfs4_compound *cp, struct rpc_rqst *req) ++nfs4_xdr_dec_rename(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_rename_res *res) + { ++ struct xdr_stream xdr; + struct compound_hdr hdr; +- struct nfs4_op *op; + int status; +- +- status = decode_compound_hdr(xdr, &hdr); ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); + if (status) + goto out; +- +- cp->toplevel_status = hdr.status; +- +- /* +- * We need this if our zero-copy I/O is going to work. Rumor has +- * it that the spec will soon mandate it... +- */ +- if (hdr.taglen != cp->taglen) +- dprintk("nfs4: non-conforming server returns tag length mismatch!\n"); +- +- cp->resp_nops = hdr.nops; +- if (hdr.nops > cp->req_nops) { +- dprintk("nfs4: resp_nops > req_nops!\n"); +- goto xdr_error; +- } +- +- op = &cp->ops[0]; +- for (cp->nops = 0; cp->nops < cp->resp_nops; cp->nops++, op++) { +- switch (op->opnum) { +- case OP_ACCESS: +- status = decode_access(xdr, &op->u.access); +- break; +- case OP_CREATE: +- status = decode_create(xdr, &op->u.create); +- break; +- case OP_GETATTR: +- status = decode_getattr(xdr, &op->u.getattr, cp->server); +- break; +- case OP_GETFH: +- status = decode_getfh(xdr, &op->u.getfh); +- break; +- case OP_LINK: +- status = decode_link(xdr, &op->u.link); +- break; +- case OP_LOOKUP: +- status = decode_lookup(xdr); +- break; +- case OP_PUTFH: +- status = decode_putfh(xdr); +- break; +- case OP_PUTROOTFH: +- status = decode_putrootfh(xdr); +- break; +- case OP_READDIR: +- status = decode_readdir(xdr, req, &op->u.readdir); +- break; +- case OP_READLINK: +- status = decode_readlink(xdr, req, &op->u.readlink); +- break; +- case OP_RESTOREFH: +- status = decode_restorefh(xdr); +- break; +- case OP_REMOVE: +- status = decode_remove(xdr, &op->u.remove); +- break; +- case OP_RENAME: +- status = decode_rename(xdr, &op->u.rename); +- break; +- case OP_SAVEFH: +- status = decode_savefh(xdr); +- break; +- default: +- BUG(); +- return -EIO; +- } +- if (status) +- break; +- } +- +- DECODE_TAIL; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_savefh(&xdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_rename(&xdr, res->old_cinfo, res->new_cinfo); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->new_fattr, res->server); ++ if (status) ++ goto out; ++ status = decode_restorefh(&xdr); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->old_fattr, res->server); ++out: ++ return status; + } + + /* +- * Decode OPEN_DOWNGRADE response ++ * Decode LINK response + */ + static int +-nfs4_xdr_dec_open_downgrade(struct rpc_rqst *rqstp, uint32_t *p, struct nfs_closeres *res) ++nfs4_xdr_dec_link(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_link_res *res) + { +- struct xdr_stream xdr; +- struct compound_hdr hdr; +- int status; +- +- xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); +- status = decode_compound_hdr(&xdr, &hdr); +- if (status) +- goto out; +- status = decode_putfh(&xdr); +- if (status) +- goto out; +- status = decode_open_downgrade(&xdr, res); ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_savefh(&xdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_link(&xdr, res->dir_cinfo); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->dir_attr, res->server); ++ if (status) ++ goto out; ++ status = decode_restorefh(&xdr); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->fattr, res->server); + out: +- return status; ++ return status; + } + + /* +- * END OF "GENERIC" DECODE ROUTINES. ++ * Decode CREATE response + */ ++static int ++nfs4_xdr_dec_create(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_create_res *res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_savefh(&xdr); ++ if (status) ++ goto out; ++ status = decode_create(&xdr,res->dir_cinfo); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->fattr, res->server); ++ if (status) ++ goto out; ++ status = decode_getfh(&xdr, res->fhandle); ++ if (status) ++ goto out; ++ status = decode_restorefh(&xdr); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->dir_attr, res->server); ++out: ++ return status; ++} + + /* +- * Decode COMPOUND response ++ * Decode GETATTR response + */ + static int +-nfs4_xdr_dec_compound(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_compound *cp) ++nfs4_xdr_dec_getattr(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_getattr_res *res) + { + struct xdr_stream xdr; ++ struct compound_hdr hdr; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); +- if ((status = decode_compound(&xdr, cp, rqstp))) ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) + goto out; +- +- status = 0; +- if (cp->toplevel_status) +- status = -nfs_stat_to_errno(cp->toplevel_status); +- ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_getfattr(&xdr, res->fattr, res->server); + out: + return status; ++ + } + ++ + /* + * Decode CLOSE response + */ +@@ -2748,9 +3687,6 @@ nfs4_xdr_dec_open(struct rpc_rqst *rqstp + { + struct xdr_stream xdr; + struct compound_hdr hdr; +- struct nfs4_getfh gfh = { +- .gf_fhandle = &res->fh, +- }; + int status; + + xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); +@@ -2766,16 +3702,16 @@ nfs4_xdr_dec_open(struct rpc_rqst *rqstp + status = decode_open(&xdr, res); + if (status) + goto out; +- status = decode_getattr(&xdr, res->f_getattr, res->server); ++ status = decode_getfattr(&xdr, res->f_attr, res->server); + if (status) + goto out; +- status = decode_getfh(&xdr, &gfh); ++ status = decode_getfh(&xdr, &res->fh); + if (status) + goto out; + status = decode_restorefh(&xdr); + if (status) + goto out; +- status = decode_getattr(&xdr, res->d_getattr, res->server); ++ status = decode_getfattr(&xdr, res->d_attr, res->server); + if (status) + goto out; + out: +@@ -2824,7 +3760,7 @@ nfs4_xdr_dec_open_reclaim(struct rpc_rqs + status = decode_open(&xdr, res); + if (status) + goto out; +- status = decode_getattr(&xdr, res->f_getattr, res->server); ++ status = decode_getfattr(&xdr, res->f_attr, res->server); + out: + return status; + } +@@ -2846,10 +3782,10 @@ nfs4_xdr_dec_setattr(struct rpc_rqst *rq + status = decode_putfh(&xdr); + if (status) + goto out; +- status = decode_setattr(&xdr, res); ++ status = decode_setattr(&xdr); + if (status) + goto out; +- status = decode_getattr(&xdr, res->attr, res->server); ++ status = decode_getfattr(&xdr, res->fattr, res->server); + out: + return status; + } +@@ -2921,6 +3857,50 @@ out: + } + + /* ++ * Decode READLINK response ++ */ ++static int ++nfs4_xdr_dec_readlink(struct rpc_rqst *rqstp, uint32_t *p, void *res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_readlink(&xdr, rqstp); ++out: ++ return status; ++} ++ ++/* ++ * Decode READDIR response ++ */ ++static int ++nfs4_xdr_dec_readdir(struct rpc_rqst *rqstp, uint32_t *p, struct nfs4_readdir_res *res) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &rqstp->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (status) ++ goto out; ++ status = decode_putfh(&xdr); ++ if (status) ++ goto out; ++ status = decode_readdir(&xdr, rqstp, res); ++out: ++ return status; ++} ++ ++/* + * Decode Read response + */ + static int +@@ -3033,6 +4013,44 @@ nfs4_xdr_dec_fsinfo(struct rpc_rqst *req + } + + /* ++ * PATHCONF request ++ */ ++static int ++nfs4_xdr_dec_pathconf(struct rpc_rqst *req, uint32_t *p, struct nfs_pathconf *pathconf) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &req->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (!status) ++ status = decode_putfh(&xdr); ++ if (!status) ++ status = decode_pathconf(&xdr, pathconf); ++ return status; ++} ++ ++/* ++ * STATFS request ++ */ ++static int ++nfs4_xdr_dec_statfs(struct rpc_rqst *req, uint32_t *p, struct nfs_fsstat *fsstat) ++{ ++ struct xdr_stream xdr; ++ struct compound_hdr hdr; ++ int status; ++ ++ xdr_init_decode(&xdr, &req->rq_rcv_buf, p); ++ status = decode_compound_hdr(&xdr, &hdr); ++ if (!status) ++ status = decode_putfh(&xdr); ++ if (!status) ++ status = decode_statfs(&xdr, fsstat); ++ return status; ++} ++ ++/* + * Decode RENEW response + */ + static int +@@ -3201,7 +4219,6 @@ nfs_stat_to_errno(int stat) + } + + struct rpc_procinfo nfs4_procedures[] = { +- PROC(COMPOUND, enc_compound, dec_compound), + PROC(READ, enc_read, dec_read), + PROC(WRITE, enc_write, dec_write), + PROC(COMMIT, enc_commit, dec_commit), +@@ -3218,6 +4235,24 @@ struct rpc_procinfo nfs4_procedures[] = + PROC(LOCK, enc_lock, dec_lock), + PROC(LOCKT, enc_lockt, dec_lockt), + PROC(LOCKU, enc_locku, dec_locku), ++#ifdef CONFIG_NFS_V4_ACL ++ PROC(GETACL, enc_getacl, dec_getacl), ++ PROC(SETACL, enc_setacl, dec_setacl), ++#endif /* CONFIG_NFS_V4_ACL */ ++ PROC(ACCESS, enc_access, dec_access), ++ PROC(GETATTR, enc_getattr, dec_getattr), ++ PROC(LOOKUP, enc_lookup, dec_lookup), ++ PROC(GETROOT_HEAD, enc_getroot_head, dec_getroot_head), ++ PROC(GETROOT_PATH, enc_getroot_path, dec_getroot_path), ++ PROC(REMOVE, enc_remove, dec_remove), ++ PROC(RENAME, enc_rename, dec_rename), ++ PROC(LINK, enc_link, dec_link), ++ PROC(CREATE, enc_create, dec_create), ++ PROC(PATHCONF, enc_pathconf, dec_pathconf), ++ PROC(STATFS, enc_statfs, dec_statfs), ++ PROC(UNLINK, enc_unlink, dec_unlink), ++ PROC(READLINK, enc_readlink, dec_readlink), ++ PROC(READDIR, enc_readdir, dec_readdir), + }; + + struct rpc_version nfs_version4 = { +diff -puN fs/nfsd/vfs.c~CITI_NFS4_ALL fs/nfsd/vfs.c +--- linux-2.6.3/fs/nfsd/vfs.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/vfs.c 2004-02-19 16:47:12.000000000 -0500 +@@ -44,6 +44,16 @@ + #include + #include + #include ++#ifdef CONFIG_NFSD_V4 ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#endif /* CONFIG_NFSD_V4 */ + + #include + +@@ -341,6 +351,204 @@ out_nfserr: + goto out; + } + ++#ifdef CONFIG_NFS_V4_ACL ++ ++static int ++set_nfsv4_acl_one(struct dentry *dentry, struct posix_acl *pacl, char *key) ++{ ++ int len; ++ size_t buflen; ++ char *buf = NULL; ++ int error = 0; ++ struct inode *inode = dentry->d_inode; ++ ++ buflen = posix_acl_xattr_size(pacl->a_count); ++ buf = kmalloc(buflen, GFP_KERNEL); ++ error = -ENOMEM; ++ if (buf == NULL) ++ goto out; ++ ++ len = posix_acl_to_xattr(pacl, buf, buflen); ++ if (len < 0) { ++ error = len; ++ goto out; ++ } ++ ++ error = -EOPNOTSUPP; ++ if (inode->i_op && inode->i_op->setxattr) { ++ down(&inode->i_sem); ++ security_inode_setxattr(dentry, key, buf, len, 0); ++ error = inode->i_op->setxattr(dentry, key, buf, len, 0); ++ if (!error) ++ security_inode_post_setxattr(dentry, key, buf, len, 0); ++ up(&inode->i_sem); ++ } ++out: ++ kfree(buf); ++ return (error); ++} ++ ++static inline int ++nfsd_name_to_uid_wrapper(void *arg, const char *name, size_t len, __u32 *id) ++{ ++ return nfsd_map_name_to_uid((struct svc_rqst *)arg, name, len, id); ++} ++ ++static inline int ++nfsd_name_to_gid_wrapper(void *arg, const char *name, size_t len, __u32 *id) ++{ ++ return nfsd_map_name_to_gid((struct svc_rqst *)arg, name, len, id); ++} ++ ++static inline int ++nfsd_uid_to_name_wrapper(void *arg, __u32 id, char *name) ++{ ++ return nfsd_map_uid_to_name((struct svc_rqst *)arg, id, name); ++} ++ ++static inline int ++nfsd_gid_to_name_wrapper(void *arg, __u32 id, char *name) ++{ ++ return nfsd_map_gid_to_name((struct svc_rqst *)arg, id, name); ++} ++ ++static struct nfs4_acl_idmapper nfsd_idmapper = { ++ .name2uid = nfsd_name_to_uid_wrapper, ++ .name2gid = nfsd_name_to_gid_wrapper, ++ .uid2name = nfsd_uid_to_name_wrapper, ++ .gid2name = nfsd_gid_to_name_wrapper, ++}; ++ ++ ++int ++nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp, ++ struct nfs4_acl *acl) ++{ ++ int error; ++ struct dentry *dentry; ++ struct inode *inode; ++ struct posix_acl *pacl = NULL, *dpacl = NULL; ++ ++ /* Get inode */ ++ error = fh_verify(rqstp, fhp, 0 /* S_IFREG */, MAY_SATTR); ++ if (error) ++ goto out; ++ ++ dentry = fhp->fh_dentry; ++ inode = dentry->d_inode; ++ ++ error = nfs4_acl_nfsv4_to_posix(&nfsd_idmapper, rqstp, acl, &pacl, &dpacl); ++ if (error < 0) ++ goto out_nfserr; ++ ++ if (pacl) { ++ error = set_nfsv4_acl_one(dentry, pacl, XATTR_NAME_ACL_ACCESS); ++ if (error < 0) ++ goto out_nfserr; ++ } ++ ++ if (dpacl) { ++ error = set_nfsv4_acl_one(dentry, dpacl, XATTR_NAME_ACL_DEFAULT); ++ if (error < 0) ++ goto out_nfserr; ++ } ++ ++ error = nfs_ok; ++ ++out: ++ posix_acl_release(pacl); ++ posix_acl_release(dpacl); ++ return (error); ++out_nfserr: ++ error = nfserrno(error); ++ goto out; ++} ++ ++static struct posix_acl * ++_get_posix_acl(struct dentry *dentry, char *key) ++{ ++ struct inode *inode = dentry->d_inode; ++ char *buf = NULL; ++ int buflen, error = 0; ++ struct posix_acl *pacl = NULL; ++ ++ down(&inode->i_sem); ++ ++ buflen = inode->i_op->getxattr(dentry, key, NULL, 0); ++ if (buflen <= 0) { ++ error = buflen < 0 ? buflen : -ENODATA; ++ goto out_sem; ++ } ++ ++ buf = kmalloc(buflen, GFP_KERNEL); ++ if (buf == NULL) { ++ error = -ENOMEM; ++ goto out_sem; ++ } ++ ++ error = -EOPNOTSUPP; ++ if (inode->i_op && inode->i_op->getxattr) { ++ error = security_inode_getxattr(dentry, key); ++ if (error) ++ goto out_sem; ++ error = inode->i_op->getxattr(dentry, key, buf, buflen); ++ } ++ if (error < 0) ++ goto out_sem; ++ ++ error = 0; ++ up(&inode->i_sem); ++ ++ pacl = posix_acl_from_xattr(buf, buflen); ++ out: ++ kfree(buf); ++ return pacl; ++ out_sem: ++ up(&inode->i_sem); ++ pacl = ERR_PTR(error); ++ goto out; ++} ++ ++int ++nfsd4_get_nfs4_acl(struct svc_rqst *rqstp, struct dentry *dentry, struct nfs4_acl **acl) ++{ ++ struct inode *inode = dentry->d_inode; ++ int error = 0; ++ struct posix_acl *pacl = NULL, *dpacl = NULL; ++ ++ pacl = _get_posix_acl(dentry, XATTR_NAME_ACL_ACCESS); ++ if (IS_ERR(pacl) && PTR_ERR(pacl) == -ENODATA) ++ pacl = posix_acl_from_mode(inode->i_mode, GFP_KERNEL); ++ if (IS_ERR(pacl)) { ++ error = PTR_ERR(pacl); ++ pacl = NULL; ++ goto out; ++ } ++ ++ if (S_ISDIR(inode->i_mode)) { ++ dpacl = _get_posix_acl(dentry, XATTR_NAME_ACL_DEFAULT); ++ if (IS_ERR(dpacl) && PTR_ERR(dpacl) == -ENODATA) ++ dpacl = NULL; ++ else if (IS_ERR(dpacl)) { ++ error = PTR_ERR(dpacl); ++ dpacl = NULL; ++ goto out; ++ } ++ } ++ ++ *acl = nfs4_acl_posix_to_nfsv4(&nfsd_idmapper, rqstp, pacl, dpacl); ++ if (IS_ERR(*acl)) { ++ error = PTR_ERR(*acl); ++ *acl = NULL; ++ } ++ out: ++ posix_acl_release(pacl); ++ posix_acl_release(dpacl); ++ return error; ++} ++ ++#endif /* CONFIG_NFS_V4_ACL */ ++ + #ifdef CONFIG_NFSD_V3 + /* + * Check server access rights to a file system object +@@ -458,11 +666,15 @@ nfsd_open(struct svc_rqst *rqstp, struct + int flags = O_RDONLY|O_LARGEFILE, err; + + /* +- * If we get here, then the client has already done an "open", ++ * If we get here, then for regular files, ++ * the client has already done an "open", + * and (hopefully) checked permission - so allow OWNER_OVERRIDE + * in case a chmod has now revoked permission. + */ +- err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); ++ if (type == S_IFDIR) ++ err = fh_verify(rqstp, fhp, type, access); ++ else ++ err = fh_verify(rqstp, fhp, type, access | MAY_OWNER_OVERRIDE); + if (err) + goto out; + +@@ -1494,7 +1706,7 @@ nfsd_readdir(struct svc_rqst *rqstp, str + err = cdp->err; + *offsetp = file.f_pos; + +- if (err == nfserr_eof || err == nfserr_readdir_nospc) ++ if (err == nfserr_eof || err == nfserr_toosmall) + err = nfs_ok; /* can still be found in ->err */ + out_close: + nfsd_close(&file); +diff -puN include/linux/nfsd/nfsd.h~CITI_NFS4_ALL include/linux/nfsd/nfsd.h +--- linux-2.6.3/include/linux/nfsd/nfsd.h~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfsd/nfsd.h 2004-02-19 16:47:11.000000000 -0500 +@@ -76,6 +76,11 @@ int nfsd_lookup(struct svc_rqst *, stru + const char *, int, struct svc_fh *); + int nfsd_setattr(struct svc_rqst *, struct svc_fh *, + struct iattr *, int, time_t); ++#ifdef CONFIG_NFSD_V4 ++int nfsd4_set_nfs4_acl(struct svc_rqst *, struct svc_fh *, ++ struct nfs4_acl *); ++int nfsd4_get_nfs4_acl(struct svc_rqst *, struct dentry *, struct nfs4_acl **); ++#endif /* CONFIG_NFSD_V4 */ + int nfsd_create(struct svc_rqst *, struct svc_fh *, + char *name, int len, struct iattr *attrs, + int type, dev_t rdev, struct svc_fh *res); +@@ -190,9 +195,12 @@ void nfsd_lockd_shutdown(void); + #define nfserr_bad_seqid __constant_htonl(NFSERR_BAD_SEQID) + #define nfserr_symlink __constant_htonl(NFSERR_SYMLINK) + #define nfserr_not_same __constant_htonl(NFSERR_NOT_SAME) +-#define nfserr_readdir_nospc __constant_htonl(NFSERR_READDIR_NOSPC) ++#define nfserr_restorefh __constant_htonl(NFSERR_RESTOREFH) ++#define nfserr_attrnotsupp __constant_htonl(NFSERR_ATTRNOTSUPP) + #define nfserr_bad_xdr __constant_htonl(NFSERR_BAD_XDR) + #define nfserr_openmode __constant_htonl(NFSERR_OPENMODE) ++#define nfserr_locks_held __constant_htonl(NFSERR_LOCKS_HELD) ++#define nfserr_op_illegal __constant_htonl(NFSERR_OP_ILLEGAL) + + /* error codes for internal use */ + /* if a request fails due to kmalloc failure, it gets dropped. +@@ -247,7 +255,6 @@ static inline int is_fsid(struct svc_fh + + /* + * The following attributes are currently not supported by the NFSv4 server: +- * ACL (will be supported in a forthcoming patch) + * ARCHIVE (deprecated anyway) + * FS_LOCATIONS (will be supported eventually) + * HIDDEN (unlikely to be supported any time soon) +@@ -267,7 +274,7 @@ static inline int is_fsid(struct svc_fh + | FATTR4_WORD0_FILEHANDLE | FATTR4_WORD0_FILEID | FATTR4_WORD0_FILES_AVAIL \ + | FATTR4_WORD0_FILES_FREE | FATTR4_WORD0_FILES_TOTAL | FATTR4_WORD0_HOMOGENEOUS \ + | FATTR4_WORD0_MAXFILESIZE | FATTR4_WORD0_MAXLINK | FATTR4_WORD0_MAXNAME \ +- | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE) ++ | FATTR4_WORD0_MAXREAD | FATTR4_WORD0_MAXWRITE | FATTR4_WORD0_ACL) + + #define NFSD_SUPPORTED_ATTRS_WORD1 \ + (FATTR4_WORD1_MODE | FATTR4_WORD1_NO_TRUNC | FATTR4_WORD1_NUMLINKS \ +@@ -282,7 +289,8 @@ static inline int is_fsid(struct svc_fh + (FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_MODIFY_SET) + + /* These are the only attrs allowed in CREATE/OPEN/SETATTR. */ +-#define NFSD_WRITEABLE_ATTRS_WORD0 FATTR4_WORD0_SIZE ++#define NFSD_WRITEABLE_ATTRS_WORD0 \ ++(FATTR4_WORD0_SIZE | FATTR4_WORD0_ACL ) + #define NFSD_WRITEABLE_ATTRS_WORD1 \ + (FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER | FATTR4_WORD1_OWNER_GROUP \ + | FATTR4_WORD1_TIME_ACCESS_SET | FATTR4_WORD1_TIME_METADATA | FATTR4_WORD1_TIME_MODIFY_SET) +diff -puN net/sunrpc/auth_gss/gss_krb5_crypto.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_crypto.c +--- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_crypto.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_crypto.c 2004-02-19 16:47:07.000000000 -0500 +@@ -58,14 +58,14 @@ krb5_encrypt( + struct scatterlist sg[1]; + u8 local_iv[16] = {0}; + +- dprintk("RPC: krb5_encrypt: input data:\n"); ++ dprintk("RPC: krb5_encrypt: input data:\n"); + print_hexl((u32 *)in, length, 0); + + if (length % crypto_tfm_alg_blocksize(tfm) != 0) + goto out; + + if (crypto_tfm_alg_ivsize(tfm) > 16) { +- dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", ++ dprintk("RPC: gss_k5encrypt: tfm iv size to large %d\n", + crypto_tfm_alg_ivsize(tfm)); + goto out; + } +@@ -80,10 +80,10 @@ krb5_encrypt( + + ret = crypto_cipher_encrypt_iv(tfm, sg, sg, length, local_iv); + +- dprintk("RPC: krb5_encrypt: output data:\n"); ++ dprintk("RPC: krb5_encrypt: output data:\n"); + print_hexl((u32 *)out, length, 0); + out: +- dprintk("krb5_encrypt returns %d\n",ret); ++ dprintk("RPC: krb5_encrypt returns %d\n",ret); + return(ret); + } + +@@ -99,14 +99,14 @@ krb5_decrypt( + struct scatterlist sg[1]; + u8 local_iv[16] = {0}; + +- dprintk("RPC: krb5_decrypt: input data:\n"); ++ dprintk("RPC: krb5_decrypt: input data:\n"); + print_hexl((u32 *)in, length, 0); + + if (length % crypto_tfm_alg_blocksize(tfm) != 0) + goto out; + + if (crypto_tfm_alg_ivsize(tfm) > 16) { +- dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", ++ dprintk("RPC: gss_k5decrypt: tfm iv size to large %d\n", + crypto_tfm_alg_ivsize(tfm)); + goto out; + } +@@ -120,10 +120,10 @@ krb5_decrypt( + + ret = crypto_cipher_decrypt_iv(tfm, sg, sg, length, local_iv); + +- dprintk("RPC: krb5_decrypt: output_data:\n"); ++ dprintk("RPC: krb5_decrypt: output_data:\n"); + print_hexl((u32 *)out, length, 0); + out: +- dprintk("gss_k5decrypt returns %d\n",ret); ++ dprintk("RPC: gss_k5decrypt returns %d\n",ret); + return(ret); + } + +@@ -152,7 +152,7 @@ krb5_make_checksum(s32 cksumtype, char * + cksumname = "md5"; + break; + default: +- dprintk("RPC: krb5_make_checksum:" ++ dprintk("RPC: krb5_make_checksum:" + " unsupported checksum %d", cksumtype); + goto out; + } +diff -puN net/sunrpc/auth_gss/gss_krb5_seqnum.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_krb5_seqnum.c +--- linux-2.6.3/net/sunrpc/auth_gss/gss_krb5_seqnum.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_krb5_seqnum.c 2004-02-19 16:47:07.000000000 -0500 +@@ -70,7 +70,7 @@ krb5_get_seq_num(struct crypto_tfm *key, + s32 code; + unsigned char plain[8]; + +- dprintk("krb5_get_seq_num: \n"); ++ dprintk("RPC: krb5_get_seq_num:\n"); + + if ((code = krb5_decrypt(key, cksum, buf, plain, 8))) + return code; +diff -puN net/sunrpc/auth_gss/gss_pseudoflavors.c~CITI_NFS4_ALL net/sunrpc/auth_gss/gss_pseudoflavors.c +--- linux-2.6.3/net/sunrpc/auth_gss/gss_pseudoflavors.c~CITI_NFS4_ALL 2004-02-19 16:47:07.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/auth_gss/gss_pseudoflavors.c 2004-02-19 16:47:07.000000000 -0500 +@@ -82,12 +82,13 @@ gss_register_triple(u32 pseudoflavor, st + + spin_lock(®istered_triples_lock); + if (do_lookup_triple_by_pseudoflavor(pseudoflavor)) { +- printk("Registered pseudoflavor %d again\n", pseudoflavor); ++ printk(KERN_WARNING "RPC: Registered pseudoflavor %d again\n", ++ pseudoflavor); + goto err_unlock; + } + list_add(&triple->triples, ®istered_triples); + spin_unlock(®istered_triples_lock); +- dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor); ++ dprintk("RPC: registered pseudoflavor %d\n", pseudoflavor); + + return 0; + +@@ -145,7 +146,7 @@ gss_cmp_triples(u32 oid_len, char *oid_d + oid.len = oid_len; + oid.data = oid_data; + +- dprintk("RPC: gss_cmp_triples \n"); ++ dprintk("RPC: gss_cmp_triples\n"); + print_sec_triple(&oid,qop,service); + + spin_lock(®istered_triples_lock); +@@ -158,7 +159,7 @@ gss_cmp_triples(u32 oid_len, char *oid_d + } + } + spin_unlock(®istered_triples_lock); +- dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor); ++ dprintk("RPC: gss_cmp_triples return %d\n", pseudoflavor); + return pseudoflavor; + } + +@@ -193,8 +194,8 @@ gss_pseudoflavor_to_service(u32 pseudofl + triple = do_lookup_triple_by_pseudoflavor(pseudoflavor); + spin_unlock(®istered_triples_lock); + if (!triple) { +- dprintk("RPC: gss_pseudoflavor_to_service called with" +- " unsupported pseudoflavor %d\n", pseudoflavor); ++ dprintk("RPC: gss_pseudoflavor_to_service called with unsupported pseudoflavor %d\n", ++ pseudoflavor); + return 0; + } + return triple->service; +@@ -211,8 +212,8 @@ gss_pseudoflavor_to_mech(u32 pseudoflavo + if (triple) + mech = gss_mech_get(triple->mech); + else +- dprintk("RPC: gss_pseudoflavor_to_mech called with" +- " unsupported pseudoflavor %d\n", pseudoflavor); ++ dprintk("RPC: gss_pseudoflavor_to_mech called with unsupported pseudoflavor %d\n", ++ pseudoflavor); + return mech; + } + +@@ -223,8 +224,8 @@ gss_pseudoflavor_to_mechOID(u32 pseudofl + + mech = gss_pseudoflavor_to_mech(pseudoflavor); + if (!mech) { +- dprintk("RPC: gss_pseudoflavor_to_mechOID called with" +- " unsupported pseudoflavor %d\n", pseudoflavor); ++ dprintk("RPC: gss_pseudoflavor_to_mechOID called with unsupported pseudoflavor %d\n", ++ pseudoflavor); + return -1; + } + oid->len = mech->gm_oid.len; +diff -puN fs/nfsd/nfs4state.c~CITI_NFS4_ALL fs/nfsd/nfs4state.c +--- linux-2.6.3/fs/nfsd/nfs4state.c~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/nfs4state.c 2004-02-19 16:47:15.000000000 -0500 +@@ -43,6 +43,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -135,12 +136,16 @@ static void release_file(struct nfs4_fil + * + * client_lru holds client queue ordered by nfs4_client.cl_time + * for lease renewal. ++ * ++ * close_lru holds (open) stateowner queue ordered by nfs4_stateowner.so_time ++ * for last close replay. + */ + static struct list_head conf_id_hashtbl[CLIENT_HASH_SIZE]; + static struct list_head conf_str_hashtbl[CLIENT_HASH_SIZE]; + static struct list_head unconf_str_hashtbl[CLIENT_HASH_SIZE]; + static struct list_head unconf_id_hashtbl[CLIENT_HASH_SIZE]; + static struct list_head client_lru; ++static struct list_head close_lru; + + static inline void + renew_client(struct nfs4_client *clp) +@@ -269,8 +274,7 @@ cmp_clid(clientid_t * cl1, clientid_t * + /* XXX what about NGROUP */ + static int + cmp_creds(struct svc_cred *cr1, struct svc_cred *cr2){ +- return((cr1->cr_uid == cr2->cr_uid) && +- (cr1->cr_gid == cr2->cr_gid)); ++ return(cr1->cr_uid == cr2->cr_uid); + + } + +@@ -772,6 +776,9 @@ alloc_init_open_stateowner(unsigned int + INIT_LIST_HEAD(&sop->so_strhash); + INIT_LIST_HEAD(&sop->so_perclient); + INIT_LIST_HEAD(&sop->so_perfilestate); ++ INIT_LIST_HEAD(&sop->so_perlockowner); /* not used */ ++ INIT_LIST_HEAD(&sop->so_close_lru); ++ sop->so_time = 0; + list_add(&sop->so_idhash, &ownerid_hashtbl[idhashval]); + list_add(&sop->so_strhash, &ownerstr_hashtbl[strhashval]); + list_add(&sop->so_perclient, &clp->cl_perclient); +@@ -790,13 +797,29 @@ alloc_init_open_stateowner(unsigned int + } + + static void ++release_stateid_lockowner(struct nfs4_stateid *open_stp) ++{ ++ struct nfs4_stateowner *lock_sop; ++ ++ while (!list_empty(&open_stp->st_perlockowner)) { ++ lock_sop = list_entry(open_stp->st_perlockowner.next, ++ struct nfs4_stateowner, so_perlockowner); ++ /* list_del(&open_stp->st_perlockowner); */ ++ BUG_ON(lock_sop->so_is_open_owner); ++ release_stateowner(lock_sop); ++ } ++} ++ ++static void + release_stateowner(struct nfs4_stateowner *sop) + { + struct nfs4_stateid *stp; + +- list_del_init(&sop->so_idhash); +- list_del_init(&sop->so_strhash); +- list_del_init(&sop->so_perclient); ++ list_del(&sop->so_idhash); ++ list_del(&sop->so_strhash); ++ list_del(&sop->so_perclient); ++ list_del(&sop->so_perlockowner); ++ list_del(&sop->so_close_lru); + del_perclient++; + while (!list_empty(&sop->so_perfilestate)) { + stp = list_entry(sop->so_perfilestate.next, +@@ -815,6 +838,7 @@ init_stateid(struct nfs4_stateid *stp, s + + INIT_LIST_HEAD(&stp->st_hash); + INIT_LIST_HEAD(&stp->st_perfilestate); ++ INIT_LIST_HEAD(&stp->st_perlockowner); + INIT_LIST_HEAD(&stp->st_perfile); + list_add(&stp->st_hash, &stateid_hashtbl[hashval]); + list_add(&stp->st_perfilestate, &sop->so_perfilestate); +@@ -826,24 +850,30 @@ init_stateid(struct nfs4_stateid *stp, s + stp->st_stateid.si_stateownerid = sop->so_id; + stp->st_stateid.si_fileid = fp->fi_id; + stp->st_stateid.si_generation = 0; +- stp->st_share_access = open->op_share_access; +- stp->st_share_deny = open->op_share_deny; ++ stp->st_access_bmap = 0; ++ stp->st_deny_bmap = 0; ++ __set_bit(open->op_share_access, &stp->st_access_bmap); ++ __set_bit(open->op_share_deny, &stp->st_deny_bmap); + } + + static void + release_stateid(struct nfs4_stateid *stp, int flags) { + +- list_del_init(&stp->st_hash); ++ list_del(&stp->st_hash); + list_del_perfile++; +- list_del_init(&stp->st_perfile); +- list_del_init(&stp->st_perfilestate); ++ list_del(&stp->st_perfile); ++ list_del(&stp->st_perfilestate); + if((stp->st_vfs_set) && (flags & OPEN_STATE)) { ++ release_stateid_lockowner(stp); + nfsd_close(&stp->st_vfs_file); + vfsclose++; + dput(stp->st_vfs_file.f_dentry); + mntput(stp->st_vfs_file.f_vfsmnt); ++ } else if ((stp->st_vfs_set) && (flags & LOCK_STATE)) { ++ struct file *filp = &stp->st_vfs_file; ++ ++ locks_remove_posix(filp, (fl_owner_t) stp->st_stateowner); + } +- /* should use a slab cache */ + kfree(stp); + stp = NULL; + } +@@ -852,12 +882,25 @@ static void + release_file(struct nfs4_file *fp) + { + free_file++; +- list_del_init(&fp->fi_hash); ++ list_del(&fp->fi_hash); + iput(fp->fi_inode); + kfree(fp); + } + + void ++move_to_close_lru(struct nfs4_stateowner *sop) ++{ ++ dprintk("NFSD: move_to_close_lru nfs4_stateowner %p\n", sop); ++ /* remove stateowner from all other hash lists except perclient */ ++ list_del_init(&sop->so_idhash); ++ list_del_init(&sop->so_strhash); ++ list_del_init(&sop->so_perlockowner); ++ ++ list_add_tail(&sop->so_close_lru, &close_lru); ++ sop->so_time = get_seconds(); ++} ++ ++void + release_state_owner(struct nfs4_stateid *stp, struct nfs4_stateowner **sopp, + int flag) + { +@@ -866,16 +909,13 @@ release_state_owner(struct nfs4_stateid + + dprintk("NFSD: release_state_owner\n"); + release_stateid(stp, flag); +- /* +- * release unused nfs4_stateowners. +- * XXX will need to be placed on an open_stateid_lru list to be ++ ++ /* place unused nfs4_stateowners on so_close_lru list to be + * released by the laundromat service after the lease period + * to enable us to handle CLOSE replay + */ +- if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) { +- release_stateowner(sop); +- *sopp = NULL; +- } ++ if (sop->so_confirmed && list_empty(&sop->so_perfilestate)) ++ move_to_close_lru(sop); + /* unused nfs4_file's are releseed. XXX slab cache? */ + if (list_empty(&fp->fi_perfile)) { + release_file(fp); +@@ -940,15 +980,46 @@ find_file(unsigned int hashval, struct i + return 0; + } + ++#define TEST_ACCESS(x) ((x > 0 || x < 4)?1:0) ++#define TEST_DENY(x) ((x >= 0 || x < 5)?1:0) ++ ++void ++set_access(unsigned int *access, unsigned long bmap) { ++ int i; ++ ++ *access = 0; ++ for (i = 1; i < 4; i++) { ++ if(test_bit(i, &bmap)) ++ *access |= i; ++ } ++} ++ ++void ++set_deny(unsigned int *deny, unsigned long bmap) { ++ int i; ++ ++ *deny = 0; ++ for (i = 0; i < 4; i++) { ++ if(test_bit(i, &bmap)) ++ *deny |= i ; ++ } ++} ++ + static int + test_share(struct nfs4_stateid *stp, struct nfsd4_open *open) { +- if ((stp->st_share_access & open->op_share_deny) || +- (stp->st_share_deny & open->op_share_access)) { ++ unsigned int access, deny; ++ ++ set_access(&access, stp->st_access_bmap); ++ set_deny(&deny, stp->st_deny_bmap); ++ if ((access & open->op_share_deny) || (deny & open->op_share_access)) + return 0; +- } + return 1; + } + ++/* ++ * Called to check deny when READ with all zero stateid or ++ * WRITE with all zero or all one stateid ++ */ + int + nfs4_share_conflict(struct svc_fh *current_fh, unsigned int deny_type) + { +@@ -965,7 +1036,8 @@ nfs4_share_conflict(struct svc_fh *curre + /* Search for conflicting share reservations */ + list_for_each_safe(pos, next, &fp->fi_perfile) { + stp = list_entry(pos, struct nfs4_stateid, st_perfile); +- if (stp->st_share_deny & deny_type) ++ if (test_bit(deny_type, &stp->st_deny_bmap) || ++ test_bit(NFS4_SHARE_DENY_BOTH, &stp->st_deny_bmap)) + return nfserr_share_denied; + } + } +@@ -1010,6 +1082,8 @@ nfs4_file_downgrade(struct file *filp, u + * notfound: + * verify clientid + * create new owner ++ * ++ * called with nfs4_lock_state() held. + */ + int + nfsd4_process_open1(struct nfsd4_open *open) +@@ -1028,7 +1102,6 @@ nfsd4_process_open1(struct nfsd4_open *o + if (STALE_CLIENTID(&open->op_clientid)) + goto out; + +- nfs4_lock_state(); + strhashval = ownerstr_hashval(clientid->cl_id, open->op_owner); + if (find_openstateowner_str(strhashval, open, &sop)) { + open->op_stateowner = sop; +@@ -1086,10 +1159,11 @@ instantiate_new_owner: + renew: + renew_client(sop->so_client); + out: +- nfs4_unlock_state(); + return status; + } +- ++/* ++ * called with nfs4_lock_state() held. ++ */ + int + nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open *open) + { +@@ -1108,7 +1182,10 @@ nfsd4_process_open2(struct svc_rqst *rqs + + ino = current_fh->fh_dentry->d_inode; + +- nfs4_lock_state(); ++ status = nfserr_inval; ++ if (!TEST_ACCESS(open->op_share_access) || !TEST_DENY(open->op_share_deny)) ++ goto out; ++ + fi_hashval = file_hashval(ino); + if (find_file(fi_hashval, ino, &fp)) { + /* Search for conflicting share reservations */ +@@ -1119,6 +1196,9 @@ nfsd4_process_open2(struct svc_rqst *rqs + stp = stq; + continue; + } ++ /* ignore lock owners */ ++ if (stq->st_stateowner->so_is_open_owner == 0) ++ continue; + if (!test_share(stq,open)) + goto out; + } +@@ -1137,7 +1217,7 @@ nfsd4_process_open2(struct svc_rqst *rqs + GFP_KERNEL)) == NULL) + goto out; + +- if (open->op_share_access && NFS4_SHARE_ACCESS_WRITE) ++ if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE) + flags = MAY_WRITE; + else + flags = MAY_READ; +@@ -1156,15 +1236,18 @@ nfsd4_process_open2(struct svc_rqst *rqs + /* This is an upgrade of an existing OPEN. + * OR the incoming share with the existing + * nfs4_stateid share */ +- int share_access = open->op_share_access; ++ unsigned int share_access; + +- share_access &= ~(stp->st_share_access); ++ set_access(&share_access, stp->st_access_bmap); ++ share_access = ~share_access; ++ share_access &= open->op_share_access; + + /* update the struct file */ + if ((status = nfs4_file_upgrade(&stp->st_vfs_file, share_access))) + goto out; +- stp->st_share_access |= share_access; +- stp->st_share_deny |= open->op_share_deny; ++ /* remember the open */ ++ set_bit(open->op_share_access, &stp->st_access_bmap); ++ set_bit(open->op_share_deny, &stp->st_deny_bmap); + /* bump the stateid */ + update_stateid(&stp->st_stateid); + } +@@ -1194,7 +1277,6 @@ out: + if (!open->op_stateowner->so_confirmed) + open->op_rflags |= NFS4_OPEN_RESULT_CONFIRM; + +- nfs4_unlock_state(); + return status; + out_free: + kfree(stp); +@@ -1250,9 +1332,11 @@ time_t + nfs4_laundromat(void) + { + struct nfs4_client *clp; ++ struct nfs4_stateowner *sop; + struct list_head *pos, *next; + time_t cutoff = get_seconds() - NFSD_LEASE_TIME; +- time_t t, return_val = NFSD_LEASE_TIME; ++ time_t t, clientid_val = NFSD_LEASE_TIME; ++ time_t u, close_val = NFSD_LEASE_TIME; + + nfs4_lock_state(); + +@@ -1261,18 +1345,30 @@ nfs4_laundromat(void) + clp = list_entry(pos, struct nfs4_client, cl_lru); + if (time_after((unsigned long)clp->cl_time, (unsigned long)cutoff)) { + t = clp->cl_time - cutoff; +- if (return_val > t) +- return_val = t; ++ if (clientid_val > t) ++ clientid_val = t; + break; + } + dprintk("NFSD: purging unused client (clientid %08x)\n", + clp->cl_clientid.cl_id); + expire_client(clp); + } +- if (return_val < NFSD_LAUNDROMAT_MINTIMEOUT) +- return_val = NFSD_LAUNDROMAT_MINTIMEOUT; ++ list_for_each_safe(pos, next, &close_lru) { ++ sop = list_entry(pos, struct nfs4_stateowner, so_close_lru); ++ if (time_after((unsigned long)sop->so_time, (unsigned long)cutoff)) { ++ u = sop->so_time - cutoff; ++ if (close_val > u) ++ close_val = u; ++ break; ++ } ++ dprintk("NFSD: purging unused open stateowner (so_id %d)\n", ++ sop->so_id); ++ release_stateowner(sop); ++ } ++ if (clientid_val < NFSD_LAUNDROMAT_MINTIMEOUT) ++ clientid_val = NFSD_LAUNDROMAT_MINTIMEOUT; + nfs4_unlock_state(); +- return return_val; ++ return clientid_val; + } + + void +@@ -1285,17 +1381,22 @@ laundromat_main(void *not_used) + schedule_delayed_work(&laundromat_work, t*HZ); + } + +-/* search ownerid_hashtbl[] for stateid owner (stateid->si_stateownerid) */ ++/* search ownerid_hashtbl[] and close_lru for stateid owner ++ * (stateid->si_stateownerid) ++ */ + struct nfs4_stateowner * +-find_openstateowner_id(u32 st_id) { ++find_openstateowner_id(u32 st_id, int flags) { + struct list_head *pos, *next; + struct nfs4_stateowner *local = NULL; +- unsigned int hashval = ownerid_hashval(st_id); + +- list_for_each_safe(pos, next, &ownerid_hashtbl[hashval]) { +- local = list_entry(pos, struct nfs4_stateowner, so_idhash); +- if(local->so_id == st_id) +- return local; ++ dprintk("NFSD: find_openstateowner_id %d\n", st_id); ++ if (flags & CLOSE_STATE) { ++ list_for_each_safe(pos, next, &close_lru) { ++ local = list_entry(pos, struct nfs4_stateowner, ++ so_close_lru); ++ if(local->so_id == st_id) ++ return local; ++ } + } + return NULL; + } +@@ -1303,7 +1404,8 @@ find_openstateowner_id(u32 st_id) { + static inline int + nfs4_check_fh(struct svc_fh *fhp, struct nfs4_stateid *stp) + { +- return (fhp->fh_dentry != stp->st_vfs_file.f_dentry); ++ return (stp->st_vfs_set == 0 || ++ fhp->fh_dentry->d_inode->i_ino != stp->st_vfs_file.f_dentry->d_inode->i_ino); + } + + static int +@@ -1375,7 +1477,7 @@ out: + * Checks for sequence id mutating operations. + */ + int +-nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp) ++nfs4_preprocess_seqid_op(struct svc_fh *current_fh, u32 seqid, stateid_t *stateid, int flags, struct nfs4_stateowner **sopp, struct nfs4_stateid **stpp, clientid_t *lockclid) + { + int status; + struct nfs4_stateid *stp; +@@ -1412,6 +1514,21 @@ nfs4_preprocess_seqid_op(struct svc_fh * + + status = nfserr_bad_stateid; + ++ /* for new lock stateowners, check that the lock->v.new.open_stateid ++ * refers to an open stateowner, and that the lockclid ++ * (nfs4_lock->v.new.clientid) is the same as the ++ * open_stateid->st_stateowner->so_client->clientid ++ */ ++ if (lockclid) { ++ struct nfs4_stateowner *sop = stp->st_stateowner; ++ struct nfs4_client *clp = sop->so_client; ++ ++ if (!sop->so_is_open_owner) ++ goto out; ++ if (!cmp_clid(&clp->cl_clientid, lockclid)) ++ goto out; ++ } ++ + if ((flags & CHECK_FH) && nfs4_check_fh(current_fh, stp)) { + printk("NFSD: preprocess_seqid_op: fh-stateid mismatch!\n"); + goto out; +@@ -1463,24 +1580,30 @@ no_nfs4_stateid: + * starting by trying to look up the stateowner. + * If stateowner is not found - stateid is bad. + */ +- if (!(sop = find_openstateowner_id(stateid->si_stateownerid))) { ++ if (!(sop = find_openstateowner_id(stateid->si_stateownerid, flags))) { + printk("NFSD: preprocess_seqid_op: no stateowner or nfs4_stateid!\n"); + status = nfserr_bad_stateid; + goto out; + } ++ *sopp = sop; + + check_replay: + if (seqid == sop->so_seqid) { + printk("NFSD: preprocess_seqid_op: retransmission?\n"); + /* indicate replay to calling function */ + status = NFSERR_REPLAY_ME; +- } else ++ } else { + printk("NFSD: preprocess_seqid_op: bad seqid (expected %d, got %d\n", sop->so_seqid +1, seqid); + ++ *sopp = NULL; + status = nfserr_bad_seqid; ++ } + goto out; + } + ++/* ++ * nfs4_unlock_state(); called in encode ++ */ + int + nfsd4_open_confirm(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_confirm *oc) + { +@@ -1491,13 +1614,17 @@ nfsd4_open_confirm(struct svc_rqst *rqst + dprintk("NFSD: nfsd4_open_confirm on file %.*s\n", + (int)current_fh->fh_dentry->d_name.len, + current_fh->fh_dentry->d_name.name); ++ ++ if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) ++ goto out; ++ + oc->oc_stateowner = NULL; + nfs4_lock_state(); + + if ((status = nfs4_preprocess_seqid_op(current_fh, oc->oc_seqid, + &oc->oc_req_stateid, + CHECK_FH | CONFIRM | OPEN_STATE, +- &oc->oc_stateowner, &stp))) ++ &oc->oc_stateowner, &stp, NULL))) + goto out; + + sop = oc->oc_stateowner; +@@ -1512,49 +1639,89 @@ nfsd4_open_confirm(struct svc_rqst *rqst + stp->st_stateid.si_generation); + status = nfs_ok; + out: +- nfs4_unlock_state(); + return status; + } ++ ++ ++/* ++ * unset all bits in union bitmap (bmap) that ++ * do not exist in share (from successful OPEN_DOWNGRADE) ++ */ ++static void ++reset_union_bmap_access(unsigned long access, unsigned long *bmap) ++{ ++ int i; ++ for (i = 1; i < 4; i++) { ++ if ((i & access) != i) ++ __clear_bit(i, bmap); ++ } ++} ++ ++static void ++reset_union_bmap_deny(unsigned long deny, unsigned long *bmap) ++{ ++ int i; ++ for (i = 0; i < 4; i++) { ++ if ((i & deny) != i) ++ __clear_bit(i, bmap); ++ } ++} ++ ++/* ++ * nfs4_unlock_state(); called in encode ++ */ ++ + int + nfsd4_open_downgrade(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_open_downgrade *od) + { + int status; + struct nfs4_stateid *stp; ++ unsigned int share_access; + + dprintk("NFSD: nfsd4_open_downgrade on file %.*s\n", + (int)current_fh->fh_dentry->d_name.len, + current_fh->fh_dentry->d_name.name); + ++ od->od_stateowner = NULL; ++ status = nfserr_inval; ++ if (!TEST_ACCESS(od->od_share_access) || !TEST_DENY(od->od_share_deny)) ++ goto out; ++ + nfs4_lock_state(); + if ((status = nfs4_preprocess_seqid_op(current_fh, od->od_seqid, + &od->od_stateid, + CHECK_FH | OPEN_STATE, +- &od->od_stateowner, &stp))) ++ &od->od_stateowner, &stp, NULL))) + goto out; + + status = nfserr_inval; +- if (od->od_share_access & ~stp->st_share_access) { +- dprintk("NFSD:access not a subset current=%08x, desired=%08x\n", +- stp->st_share_access, od->od_share_access); ++ if (!test_bit(od->od_share_access, &stp->st_access_bmap)) { ++ dprintk("NFSD:access not a subset current bitmap: 0x%lx, input access=%08x\n", ++ stp->st_access_bmap, od->od_share_access); + goto out; + } +- if (od->od_share_deny & ~stp->st_share_deny) { +- dprintk("NFSD:deny not a subset current=%08x, desired=%08x\n", +- stp->st_share_deny, od->od_share_deny); ++ if (!test_bit(od->od_share_deny, &stp->st_deny_bmap)) { ++ dprintk("NFSD:deny not a subset current bitmap: 0x%lx, input deny=%08x\n", ++ stp->st_deny_bmap, od->od_share_deny); + goto out; + } ++ set_access(&share_access, stp->st_access_bmap); + nfs4_file_downgrade(&stp->st_vfs_file, +- stp->st_share_access & ~od->od_share_access); +- stp->st_share_access = od->od_share_access; +- stp->st_share_deny = od->od_share_deny; ++ share_access & ~od->od_share_access); ++ ++ reset_union_bmap_access(od->od_share_access, &stp->st_access_bmap); ++ reset_union_bmap_deny(od->od_share_deny, &stp->st_deny_bmap); ++ + update_stateid(&stp->st_stateid); + memcpy(&od->od_stateid, &stp->st_stateid, sizeof(stateid_t)); + status = nfs_ok; + out: +- nfs4_unlock_state(); + return status; + } + ++/* ++ * nfs4_unlock_state() called after encode ++ */ + int + nfsd4_close(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_close *close) + { +@@ -1565,11 +1732,13 @@ nfsd4_close(struct svc_rqst *rqstp, stru + (int)current_fh->fh_dentry->d_name.len, + current_fh->fh_dentry->d_name.name); + ++ close->cl_stateowner = NULL; + nfs4_lock_state(); ++ /* check close_lru for replay */ + if ((status = nfs4_preprocess_seqid_op(current_fh, close->cl_seqid, + &close->cl_stateid, +- CHECK_FH | OPEN_STATE, +- &close->cl_stateowner, &stp))) ++ CHECK_FH | OPEN_STATE | CLOSE_STATE, ++ &close->cl_stateowner, &stp, NULL))) + goto out; + /* + * Return success, but first update the stateid. +@@ -1581,7 +1750,6 @@ nfsd4_close(struct svc_rqst *rqstp, stru + /* release_state_owner() calls nfsd_close() if needed */ + release_state_owner(stp, &close->cl_stateowner, OPEN_STATE); + out: +- nfs4_unlock_state(); + return status; + } + +@@ -1717,7 +1885,7 @@ find_lockstateowner_str(unsigned int has + */ + + static struct nfs4_stateowner * +-alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfsd4_lock *lock) { ++alloc_init_lock_stateowner(unsigned int strhashval, struct nfs4_client *clp, struct nfs4_stateid *open_stp, struct nfsd4_lock *lock) { + struct nfs4_stateowner *sop; + struct nfs4_replay *rp; + unsigned int idhashval; +@@ -1729,9 +1897,13 @@ alloc_init_lock_stateowner(unsigned int + INIT_LIST_HEAD(&sop->so_strhash); + INIT_LIST_HEAD(&sop->so_perclient); + INIT_LIST_HEAD(&sop->so_perfilestate); ++ INIT_LIST_HEAD(&sop->so_perlockowner); ++ INIT_LIST_HEAD(&sop->so_close_lru); /* not used */ ++ sop->so_time = 0; + list_add(&sop->so_idhash, &lock_ownerid_hashtbl[idhashval]); + list_add(&sop->so_strhash, &lock_ownerstr_hashtbl[strhashval]); + list_add(&sop->so_perclient, &clp->cl_perclient); ++ list_add(&sop->so_perlockowner, &open_stp->st_perlockowner); + add_perclient++; + sop->so_is_open_owner = 0; + sop->so_id = current_ownerid++; +@@ -1755,10 +1927,10 @@ alloc_init_lock_stateid(struct nfs4_stat + if ((stp = kmalloc(sizeof(struct nfs4_stateid), + GFP_KERNEL)) == NULL) + goto out; +- + INIT_LIST_HEAD(&stp->st_hash); + INIT_LIST_HEAD(&stp->st_perfile); + INIT_LIST_HEAD(&stp->st_perfilestate); ++ INIT_LIST_HEAD(&stp->st_perlockowner); /* not used */ + list_add(&stp->st_hash, &lockstateid_hashtbl[hashval]); + list_add(&stp->st_perfile, &fp->fi_perfile); + list_add_perfile++; +@@ -1771,15 +1943,24 @@ alloc_init_lock_stateid(struct nfs4_stat + stp->st_stateid.si_generation = 0; + stp->st_vfs_file = open_stp->st_vfs_file; + stp->st_vfs_set = open_stp->st_vfs_set; +- stp->st_share_access = -1; +- stp->st_share_deny = -1; ++ stp->st_access_bmap = open_stp->st_access_bmap; ++ stp->st_deny_bmap = open_stp->st_deny_bmap; + + out: + return stp; + } + ++int ++check_lock_length(u64 offset, u64 length) ++{ ++ return ((length == 0) || ((length != ~(u64)0) && ++ LOFF_OVERFLOW(offset, length))); ++} ++ + /* + * LOCK operation ++ * ++ * nfs4_unlock_state(); called in encode + */ + int + nfsd4_lock(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_lock *lock) +@@ -1795,6 +1976,9 @@ nfsd4_lock(struct svc_rqst *rqstp, struc + dprintk("NFSD: nfsd4_lock: start=%Ld length=%Ld\n", + lock->lk_offset, lock->lk_length); + ++ if (check_lock_length(lock->lk_offset, lock->lk_length)) ++ return nfserr_inval; ++ + lock->lk_stateowner = NULL; + nfs4_lock_state(); + +@@ -1812,12 +1996,15 @@ nfsd4_lock(struct svc_rqst *rqstp, struc + printk("NFSD: nfsd4_lock: clientid is stale!\n"); + goto out; + } ++ /* does the clientid in the lock owner own the open stateid? */ ++ + /* validate and update open stateid and open seqid */ + status = nfs4_preprocess_seqid_op(current_fh, + lock->lk_new_open_seqid, + &lock->lk_new_open_stateid, + CHECK_FH | OPEN_STATE, +- &open_sop, &open_stp); ++ &open_sop, &open_stp, ++ &lock->v.new.clientid); + if (status) + goto out; + /* create lockowner and lock stateid */ +@@ -1836,8 +2023,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc + &lock->v.new.clientid, &lock_sop)) + goto out; + status = nfserr_resource; +- if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, +- open_sop->so_client, lock))) ++ if (!(lock->lk_stateowner = alloc_init_lock_stateowner(strhashval, open_sop->so_client, open_stp, lock))) + goto out; + if ((lock_stp = alloc_init_lock_stateid(lock->lk_stateowner, + fp, open_stp)) == NULL) +@@ -1850,7 +2036,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struc + lock->lk_old_lock_seqid, + &lock->lk_old_lock_stateid, + CHECK_FH | LOCK_STATE, +- &lock->lk_stateowner, &lock_stp); ++ &lock->lk_stateowner, &lock_stp, NULL); + if (status) + goto out; + } +@@ -1938,7 +2124,6 @@ out_destroy_new_stateid: + release_state_owner(lock_stp, &lock->lk_stateowner, LOCK_STATE); + } + out: +- nfs4_unlock_state(); + return status; + } + +@@ -1956,6 +2141,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru + unsigned int strhashval; + int status; + ++ if (check_lock_length(lockt->lt_offset, lockt->lt_length)) ++ return nfserr_inval; ++ + lockt->lt_stateowner = NULL; + nfs4_lock_state(); + +@@ -1967,6 +2155,8 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru + + if ((status = fh_verify(rqstp, current_fh, S_IFREG, 0))) { + printk("NFSD: nfsd4_lockt: fh_verify() failed!\n"); ++ if (status == nfserr_symlink) ++ status = nfserr_inval; + goto out; + } + +@@ -1989,13 +2179,9 @@ nfsd4_lockt(struct svc_rqst *rqstp, stru + strhashval = lock_ownerstr_hashval(inode, + lockt->lt_clientid.cl_id, lockt->lt_owner); + +- if (find_lockstateowner_str(strhashval, &lockt->lt_owner, ++ find_lockstateowner_str(strhashval, &lockt->lt_owner, + &lockt->lt_clientid, +- &lockt->lt_stateowner)) { +- printk("NFSD: nsfd4_lockt: lookup_lockowner() failed!\n"); +- goto out; +- } +- ++ &lockt->lt_stateowner); + sop = lockt->lt_stateowner; + if (sop) { + file_lock.fl_owner = (fl_owner_t) sop; +@@ -2032,7 +2218,10 @@ out: + nfs4_unlock_state(); + return status; + } +- ++ ++/* ++ * nfs4_unlock_state(); called in encode ++ */ + int + nfsd4_locku(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_locku *locku) + { +@@ -2043,13 +2232,18 @@ nfsd4_locku(struct svc_rqst *rqstp, stru + + dprintk("NFSD: nfsd4_locku: start=%Ld length=%Ld\n", + locku->lu_offset, locku->lu_length); ++ ++ if (check_lock_length(locku->lu_offset, locku->lu_length)) ++ return nfserr_inval; ++ ++ locku->lu_stateowner = NULL; + nfs4_lock_state(); + + if ((status = nfs4_preprocess_seqid_op(current_fh, + locku->lu_seqid, + &locku->lu_stateid, + CHECK_FH | LOCK_STATE, +- &locku->lu_stateowner, &stp))) ++ &locku->lu_stateowner, &stp, NULL))) + goto out; + + filp = &stp->st_vfs_file; +@@ -2085,7 +2279,6 @@ nfsd4_locku(struct svc_rqst *rqstp, stru + memcpy(&locku->lu_stateid, &stp->st_stateid, sizeof(stateid_t)); + + out: +- nfs4_unlock_state(); + return status; + + out_nfserr: +@@ -2093,6 +2286,84 @@ out_nfserr: + goto out; + } + ++/* ++ * returns ++ * 1: locks held by lockowner ++ * 0: no locks held by lockowner ++ */ ++static int ++check_for_locks(struct file *filp, struct nfs4_stateowner *lowner) ++{ ++ struct file_lock **flpp; ++ struct inode *inode = filp->f_dentry->d_inode; ++ int status = 0; ++ ++ lock_kernel(); ++ for (flpp = &inode->i_flock; *flpp != NULL; flpp = &(*flpp)->fl_next) { ++ if ((*flpp)->fl_owner == (fl_owner_t)lowner) ++ status = 1; ++ goto out; ++ } ++out: ++ unlock_kernel(); ++ return status; ++} ++ ++int ++nfsd4_release_lockowner(struct svc_rqst *rqstp, struct nfsd4_release_lockowner *rlockowner) ++{ ++ clientid_t *clid = &rlockowner->rl_clientid; ++ struct list_head *pos, *next; ++ struct nfs4_stateowner *local = NULL; ++ struct xdr_netobj *owner = &rlockowner->rl_owner; ++ int status, i; ++ ++ dprintk("nfsd4_release_lockowner clientid: (%08x/%08x):\n", ++ clid->cl_boot, clid->cl_id); ++ ++ /* XXX check for lease expiration */ ++ ++ status = nfserr_stale_clientid; ++ if (STALE_CLIENTID(clid)) { ++ printk("NFSD: nfsd4_release_lockowner: clientid is stale!\n"); ++ return status; ++ } ++ ++ nfs4_lock_state(); ++ ++ /* find the lockowner */ ++ status = nfs_ok; ++ for (i=0; i < LOCK_HASH_SIZE; i++) { ++ list_for_each_safe(pos, next, &lock_ownerstr_hashtbl[i]) { ++ local = list_entry(pos, struct nfs4_stateowner, ++ so_strhash); ++ if(cmp_owner_str(local, owner, clid)) ++ break; ++ } ++ } ++ if (local) { ++ struct nfs4_stateid *stp; ++ ++ /* check for any locks held by any stateid associated with the ++ * (lock) stateowner */ ++ status = nfserr_locks_held; ++ list_for_each_safe(pos, next, &local->so_perfilestate) { ++ stp = list_entry(pos, struct nfs4_stateid, ++ st_perfilestate); ++ if(stp->st_vfs_set) { ++ if (check_for_locks(&stp->st_vfs_file, local)) ++ goto out; ++ } ++ } ++ /* no locks held by (lock) stateowner */ ++ status = nfs_ok; ++ release_stateowner(local); ++ } ++out: ++ nfs4_unlock_state(); ++ return status; ++} ++ + /* + * Start and stop routines + */ +@@ -2128,6 +2399,7 @@ nfs4_state_init(void) + memset(&zerostateid, 0, sizeof(stateid_t)); + memset(&onestateid, ~0, sizeof(stateid_t)); + ++ INIT_LIST_HEAD(&close_lru); + INIT_LIST_HEAD(&client_lru); + init_MUTEX(&client_sema); + boot_time = get_seconds(); +diff -puN fs/nfsd/nfs3xdr.c~CITI_NFS4_ALL fs/nfsd/nfs3xdr.c +--- linux-2.6.3/fs/nfsd/nfs3xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/nfs3xdr.c 2004-02-19 16:47:08.000000000 -0500 +@@ -796,7 +796,7 @@ encode_entry(struct readdir_cd *ccd, con + elen = slen + NFS3_ENTRY_BAGGAGE + + (plus? NFS3_ENTRYPLUS_BAGGAGE : 0); + if (cd->buflen < elen) { +- cd->common.err = nfserr_readdir_nospc; ++ cd->common.err = nfserr_toosmall; + return -EINVAL; + } + *p++ = xdr_one; /* mark entry present */ +diff -puN fs/nfsd/nfsxdr.c~CITI_NFS4_ALL fs/nfsd/nfsxdr.c +--- linux-2.6.3/fs/nfsd/nfsxdr.c~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500 ++++ linux-2.6.3-bfields/fs/nfsd/nfsxdr.c 2004-02-19 16:47:08.000000000 -0500 +@@ -484,7 +484,7 @@ nfssvc_encode_entry(struct readdir_cd *c + + slen = XDR_QUADLEN(namlen); + if ((buflen = cd->buflen - slen - 4) < 0) { +- cd->common.err = nfserr_readdir_nospc; ++ cd->common.err = nfserr_toosmall; + return -EINVAL; + } + *p++ = xdr_one; /* mark entry present */ +diff -puN include/linux/nfs.h~CITI_NFS4_ALL include/linux/nfs.h +--- linux-2.6.3/include/linux/nfs.h~CITI_NFS4_ALL 2004-02-19 16:47:08.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfs.h 2004-02-19 16:47:08.000000000 -0500 +@@ -92,7 +92,7 @@ + NFSERR_NOT_SAME = 10027, /* v4 */ + NFSERR_LOCK_RANGE = 10028, /* v4 */ + NFSERR_SYMLINK = 10029, /* v4 */ +- NFSERR_READDIR_NOSPC = 10030, /* v4 */ ++ NFSERR_RESTOREFH = 10030, /* v4 */ + NFSERR_LEASE_MOVED = 10031, /* v4 */ + NFSERR_ATTRNOTSUPP = 10032, /* v4 */ + NFSERR_NO_GRACE = 10033, /* v4 */ +diff -puN include/linux/nfsd/nfsfh.h~CITI_NFS4_ALL include/linux/nfsd/nfsfh.h +--- linux-2.6.3/include/linux/nfsd/nfsfh.h~CITI_NFS4_ALL 2004-02-19 16:47:10.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/nfsd/nfsfh.h 2004-02-19 16:47:10.000000000 -0500 +@@ -209,14 +209,6 @@ fh_copy(struct svc_fh *dst, struct svc_f + return dst; + } + +-static __inline__ void +-fh_dup2(struct svc_fh *dst, struct svc_fh *src) +-{ +- fh_put(dst); +- dget(src->fh_dentry); +- *dst = *src; +-} +- + static __inline__ struct svc_fh * + fh_init(struct svc_fh *fhp, int maxsize) + { +diff -puN include/linux/sunrpc/xdr.h~CITI_NFS4_ALL include/linux/sunrpc/xdr.h +--- linux-2.6.3/include/linux/sunrpc/xdr.h~CITI_NFS4_ALL 2004-02-19 16:47:15.000000000 -0500 ++++ linux-2.6.3-bfields/include/linux/sunrpc/xdr.h 2004-02-19 16:47:15.000000000 -0500 +@@ -225,6 +225,9 @@ xdr_reserve_space(struct xdr_stream *xdr + extern void xdr_write_pages(struct xdr_stream *xdr, struct page **pages, + unsigned int base, unsigned int len); + extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len); ++int read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len); ++int read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj); ++ + + /* + * Initialize an xdr_stream for decoding data. +diff -puN net/sunrpc/xdr.c~CITI_NFS4_ALL net/sunrpc/xdr.c +--- linux-2.6.3/net/sunrpc/xdr.c~CITI_NFS4_ALL 2004-02-19 16:47:15.000000000 -0500 ++++ linux-2.6.3-bfields/net/sunrpc/xdr.c 2004-02-19 16:47:15.000000000 -0500 +@@ -799,7 +799,7 @@ xdr_buf_subsegment(struct xdr_buf *buf, + } + + /* obj is assumed to point to allocated memory of size at least len: */ +-static int ++int + read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len) + { + struct xdr_buf subbuf; +@@ -824,7 +824,7 @@ out: + return status; + } + +-static int ++int + read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj) + { + u32 raw; + +_ diff --git a/lustre/kernel_patches/patches/linux-2.6.3-nfs-intent.patch b/lustre/kernel_patches/patches/linux-2.6.3-nfs-intent.patch new file mode 100644 index 0000000..62a4f04 --- /dev/null +++ b/lustre/kernel_patches/patches/linux-2.6.3-nfs-intent.patch @@ -0,0 +1,76 @@ +Index: linux-2.6.3/fs/nfs/dir.c +=================================================================== +--- linux-2.6.3.orig/fs/nfs/dir.c 2004-02-23 14:36:26.000000000 -0800 ++++ linux-2.6.3/fs/nfs/dir.c 2004-02-23 14:46:49.000000000 -0800 +@@ -782,7 +782,7 @@ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ +- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) ++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; + } +@@ -803,7 +803,7 @@ + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ +- if (nd->intent.open.flags & O_EXCL) ++ if (nd->intent.it_flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ +@@ -811,7 +811,7 @@ + /* Revalidate parent directory attribute cache */ + nfs_revalidate_inode(NFS_SERVER(dir), dir); + +- if (nd->intent.open.flags & O_CREAT) { ++ if (nd->intent.it_flags & O_CREAT) { + nfs_begin_data_update(dir); + inode = nfs4_atomic_open(dir, dentry, nd); + nfs_end_data_update(dir); +@@ -827,7 +827,7 @@ + break; + /* This turned out not to be a regular file */ + case -ELOOP: +- if (!(nd->intent.open.flags & O_NOFOLLOW)) ++ if (!(nd->intent.it_flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ +@@ -861,7 +861,7 @@ + dir = parent->d_inode; + if (!is_atomic_open(dir, nd)) + goto no_open; +- openflags = nd->intent.open.flags; ++ openflags = nd->intent.it_flags; + if (openflags & O_CREAT) { + /* If this is a negative dentry, just drop it */ + if (!inode) +.old..........pc/linux-2.6.3-nfs-intent/fs/nfs/nfs4proc.c +.new.........fs/nfs/nfs4proc.c +Index: linux-2.6.3/fs/nfs/nfs4proc.c +=================================================================== +--- linux-2.6.3.orig/fs/nfs/nfs4proc.c 2004-02-23 14:36:26.000000000 -0800 ++++ linux-2.6.3/fs/nfs/nfs4proc.c 2004-02-23 14:36:26.000000000 -0800 +@@ -458,17 +458,17 @@ + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { +- attr.ia_mode = nd->intent.open.create_mode; ++ attr.ia_mode = nd->intent.it_create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; +- BUG_ON(nd->intent.open.flags & O_CREAT); ++ BUG_ON(nd->intent.it_flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); +- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); ++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; diff --git a/lustre/kernel_patches/patches/nfs-cifs-intent-2.6.3-suse.patch b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6.3-suse.patch new file mode 100644 index 0000000..1a3d7f4 --- /dev/null +++ b/lustre/kernel_patches/patches/nfs-cifs-intent-2.6.3-suse.patch @@ -0,0 +1,111 @@ +.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c +.new.........fs/nfs/dir.c +.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/dir.c +.new.........fs/nfs/dir.c +Index: linux-2.6.3-20/fs/nfs/dir.c +=================================================================== +--- linux-2.6.3-20.orig/fs/nfs/dir.c 2004-03-08 14:23:40.000000000 -0800 ++++ linux-2.6.3-20/fs/nfs/dir.c 2004-03-08 17:07:34.000000000 -0800 +@@ -751,7 +751,7 @@ + if (nd->flags & LOOKUP_DIRECTORY) + return 0; + /* Are we trying to write to a read only partition? */ +- if (IS_RDONLY(dir) && (nd->intent.open.flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) ++ if (IS_RDONLY(dir) && (nd->intent.it_flags & (O_CREAT|O_TRUNC|FMODE_WRITE))) + return 0; + return 1; + } +@@ -772,7 +772,7 @@ + dentry->d_op = NFS_PROTO(dir)->dentry_ops; + + /* Let vfs_create() deal with O_EXCL */ +- if (nd->intent.open.flags & O_EXCL) ++ if (nd->intent.it_flags & O_EXCL) + goto no_entry; + + /* Open the file on the server */ +@@ -788,7 +788,7 @@ + break; + /* This turned out not to be a regular file */ + case -ELOOP: +- if (!(nd->intent.open.flags & O_NOFOLLOW)) ++ if (!(nd->intent.it_flags & O_NOFOLLOW)) + goto no_open; + /* case -EISDIR: */ + /* case -EINVAL: */ +@@ -818,7 +818,7 @@ + parent = dget_parent(dentry); + if (!is_atomic_open(parent->d_inode, nd)) + goto no_open; +- openflags = nd->intent.open.flags; ++ openflags = nd->intent.it_flags; + if (openflags & O_CREAT) { + /* If this is a negative dentry, just drop it */ + if (!inode) +.old..........pc/linux-2.6.3-nfs-intent-suse/fs/nfs/nfs4proc.c +.new.........fs/nfs/nfs4proc.c +Index: linux-2.6.3-20/fs/nfs/nfs4proc.c +=================================================================== +--- linux-2.6.3-20.orig/fs/nfs/nfs4proc.c 2004-03-05 02:07:03.000000000 -0800 ++++ linux-2.6.3-20/fs/nfs/nfs4proc.c 2004-03-08 17:07:34.000000000 -0800 +@@ -778,17 +778,17 @@ + struct nfs4_state *state; + + if (nd->flags & LOOKUP_CREATE) { +- attr.ia_mode = nd->intent.open.create_mode; ++ attr.ia_mode = nd->intent.it_create_mode; + attr.ia_valid = ATTR_MODE; + if (!IS_POSIXACL(dir)) + attr.ia_mode &= ~current->fs->umask; + } else { + attr.ia_valid = 0; +- BUG_ON(nd->intent.open.flags & O_CREAT); ++ BUG_ON(nd->intent.it_flags & O_CREAT); + } + + cred = rpcauth_lookupcred(NFS_SERVER(dir)->client->cl_auth, 0); +- state = nfs4_do_open(dir, &dentry->d_name, nd->intent.open.flags, &attr, cred); ++ state = nfs4_do_open(dir, &dentry->d_name, nd->intent.it_flags, &attr, cred); + put_rpccred(cred); + if (IS_ERR(state)) + return (struct inode *)state; +.old..........pc/linux-2.6.3-nfs-intent-suse/fs/cifs/dir.c +.new.........fs/cifs/dir.c +Index: linux-2.6.3-20/fs/cifs/dir.c +=================================================================== +--- linux-2.6.3-20.orig/fs/cifs/dir.c 2004-03-05 02:07:03.000000000 -0800 ++++ linux-2.6.3-20/fs/cifs/dir.c 2004-03-08 17:16:19.000000000 -0800 +@@ -146,18 +146,18 @@ + if(nd) { + cFYI(1,("In create for inode %p dentry->inode %p nd flags = 0x%x for %s",inode, direntry->d_inode, nd->flags,full_path)); + +- if ((nd->intent.open.flags & O_ACCMODE) == O_RDONLY) ++ if ((nd->intent.it_flags & O_ACCMODE) == O_RDONLY) + desiredAccess = GENERIC_READ; +- else if ((nd->intent.open.flags & O_ACCMODE) == O_WRONLY) ++ else if ((nd->intent.it_flags & O_ACCMODE) == O_WRONLY) + desiredAccess = GENERIC_WRITE; +- else if ((nd->intent.open.flags & O_ACCMODE) == O_RDWR) ++ else if ((nd->intent.it_flags & O_ACCMODE) == O_RDWR) + desiredAccess = GENERIC_ALL; + +- if((nd->intent.open.flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) ++ if((nd->intent.it_flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL)) + disposition = FILE_CREATE; +- else if((nd->intent.open.flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) ++ else if((nd->intent.it_flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC)) + disposition = FILE_OVERWRITE_IF; +- else if((nd->intent.open.flags & O_CREAT) == O_CREAT) ++ else if((nd->intent.it_flags & O_CREAT) == O_CREAT) + disposition = FILE_OPEN_IF; + else { + cFYI(1,("Create flag not set in create function")); +@@ -314,7 +314,7 @@ + parent_dir_inode, direntry->d_name.name, direntry)); + + if(nd) { /* BB removeme */ +- cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.open.flags)); ++ cFYI(1,("In lookup nd flags 0x%x open intent flags 0x%x",nd->flags,nd->intent.it_flags)); + } /* BB removeme BB */ + /* BB Add check of incoming data - e.g. frame not longer than maximum SMB - let server check the namelen BB */ + diff --git a/lustre/kernel_patches/patches/uml-fix-2.6.3.patch b/lustre/kernel_patches/patches/uml-fix-2.6.3.patch new file mode 100644 index 0000000..afd7e45 --- /dev/null +++ b/lustre/kernel_patches/patches/uml-fix-2.6.3.patch @@ -0,0 +1,13 @@ +Index: linux-2.6.3/arch/i386/kernel/sys_i386.c +=================================================================== +--- linux-2.6.3.orig/arch/i386/kernel/sys_i386.c 2004-02-23 14:21:03.000000000 -0800 ++++ linux-2.6.3/arch/i386/kernel/sys_i386.c 2004-02-23 14:24:38.000000000 -0800 +@@ -56,7 +56,7 @@ + } + + down_write(¤t->mm->mmap_sem); +- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); ++ error = do_mmap_pgoff(current->mm, file, addr, len, prot, flags, pgoff); + up_write(¤t->mm->mmap_sem); + + if (file) diff --git a/lustre/kernel_patches/patches/uml-patch-2.6.3-rc2-1.patch b/lustre/kernel_patches/patches/uml-patch-2.6.3-rc2-1.patch new file mode 100644 index 0000000..50ccbe5 --- /dev/null +++ b/lustre/kernel_patches/patches/uml-patch-2.6.3-rc2-1.patch @@ -0,0 +1,18844 @@ +diff -Naur a/arch/um/config.release b/arch/um/config.release +--- a/arch/um/config.release 2004-02-11 12:16:48.000000000 -0500 ++++ b/arch/um/config.release 2004-02-11 12:29:03.000000000 -0500 +@@ -228,7 +228,6 @@ + CONFIG_EXT2_FS=y + CONFIG_SYSV_FS=m + CONFIG_UDF_FS=m +-# CONFIG_UDF_RW is not set + CONFIG_UFS_FS=m + # CONFIG_UFS_FS_WRITE is not set + +diff -Naur a/arch/um/defconfig b/arch/um/defconfig +--- a/arch/um/defconfig 2004-02-11 12:16:02.000000000 -0500 ++++ b/arch/um/defconfig 2004-02-11 12:27:57.000000000 -0500 +@@ -3,29 +3,19 @@ + # + CONFIG_USERMODE=y + CONFIG_MMU=y +-CONFIG_SWAP=y + CONFIG_UID16=y + CONFIG_RWSEM_GENERIC_SPINLOCK=y +-CONFIG_CONFIG_LOG_BUF_SHIFT=14 + + # +-# Code maturity level options +-# +-CONFIG_EXPERIMENTAL=y +- +-# +-# General Setup ++# UML-specific options + # + CONFIG_MODE_TT=y + CONFIG_MODE_SKAS=y + CONFIG_NET=y +-CONFIG_SYSVIPC=y +-CONFIG_BSD_PROCESS_ACCT=y +-CONFIG_SYSCTL=y +-CONFIG_BINFMT_AOUT=y + CONFIG_BINFMT_ELF=y + CONFIG_BINFMT_MISC=y + CONFIG_HOSTFS=y ++CONFIG_HPPFS=y + CONFIG_MCONSOLE=y + CONFIG_MAGIC_SYSRQ=y + # CONFIG_HOST_2G_2G is not set +@@ -36,12 +26,41 @@ + # CONFIG_HIGHMEM is not set + CONFIG_PROC_MM=y + CONFIG_KERNEL_STACK_ORDER=2 ++CONFIG_UML_REAL_TIME_CLOCK=y ++ ++# ++# Code maturity level options ++# ++CONFIG_EXPERIMENTAL=y ++CONFIG_CLEAN_COMPILE=y ++CONFIG_STANDALONE=y ++CONFIG_BROKEN_ON_SMP=y ++ ++# ++# General setup ++# ++CONFIG_SWAP=y ++CONFIG_SYSVIPC=y ++CONFIG_BSD_PROCESS_ACCT=y ++CONFIG_SYSCTL=y ++CONFIG_LOG_BUF_SHIFT=14 ++# CONFIG_IKCONFIG is not set ++# CONFIG_EMBEDDED is not set ++CONFIG_KALLSYMS=y ++CONFIG_FUTEX=y ++CONFIG_EPOLL=y ++CONFIG_IOSCHED_NOOP=y ++CONFIG_IOSCHED_AS=y ++CONFIG_IOSCHED_DEADLINE=y + + # + # Loadable module support + # +-CONFIG_MODULES=y +-# CONFIG_KMOD is not set ++# CONFIG_MODULES is not set ++ ++# ++# Generic Driver Options ++# + + # + # Character Devices +@@ -69,6 +88,7 @@ + # + CONFIG_BLK_DEV_UBD=y + # CONFIG_BLK_DEV_UBD_SYNC is not set ++CONFIG_BLK_DEV_COW_COMMON=y + CONFIG_BLK_DEV_LOOP=y + CONFIG_BLK_DEV_NBD=y + CONFIG_BLK_DEV_RAM=y +@@ -78,7 +98,7 @@ + CONFIG_NETDEVICES=y + + # +-# Network Devices ++# UML Network Devices + # + CONFIG_UML_NET=y + CONFIG_UML_NET_ETHERTAP=y +@@ -88,22 +108,6 @@ + CONFIG_UML_NET_MCAST=y + # CONFIG_UML_NET_PCAP is not set + CONFIG_UML_NET_SLIRP=y +-CONFIG_DUMMY=y +-# CONFIG_BONDING is not set +-# CONFIG_EQUALIZER is not set +-CONFIG_TUN=y +-# CONFIG_ETHERTAP is not set +-CONFIG_PPP=y +-# CONFIG_PPP_MULTILINK is not set +-# CONFIG_PPP_ASYNC is not set +-# CONFIG_PPP_SYNC_TTY is not set +-# CONFIG_PPP_DEFLATE is not set +-# CONFIG_PPP_BSDCOMP is not set +-# CONFIG_PPPOE is not set +-CONFIG_SLIP=y +-# CONFIG_SLIP_COMPRESSED is not set +-# CONFIG_SLIP_SMART is not set +-# CONFIG_SLIP_MODE_SLIP6 is not set + + # + # Networking support +@@ -115,8 +119,6 @@ + CONFIG_PACKET=y + CONFIG_PACKET_MMAP=y + # CONFIG_NETLINK_DEV is not set +-# CONFIG_NETFILTER is not set +-# CONFIG_FILTER is not set + CONFIG_UNIX=y + # CONFIG_NET_KEY is not set + CONFIG_INET=y +@@ -130,8 +132,11 @@ + # CONFIG_SYN_COOKIES is not set + # CONFIG_INET_AH is not set + # CONFIG_INET_ESP is not set +-# CONFIG_XFRM_USER is not set ++# CONFIG_INET_IPCOMP is not set + # CONFIG_IPV6 is not set ++# CONFIG_DECNET is not set ++# CONFIG_BRIDGE is not set ++# CONFIG_NETFILTER is not set + + # + # SCTP Configuration (EXPERIMENTAL) +@@ -140,9 +145,9 @@ + # CONFIG_IP_SCTP is not set + # CONFIG_ATM is not set + # CONFIG_VLAN_8021Q is not set +-# CONFIG_LLC is not set +-# CONFIG_DECNET is not set +-# CONFIG_BRIDGE is not set ++# CONFIG_LLC2 is not set ++# CONFIG_IPX is not set ++# CONFIG_ATALK is not set + # CONFIG_X25 is not set + # CONFIG_LAPB is not set + # CONFIG_NET_DIVERT is not set +@@ -160,6 +165,10 @@ + # Network testing + # + # CONFIG_NET_PKTGEN is not set ++CONFIG_DUMMY=y ++# CONFIG_BONDING is not set ++# CONFIG_EQUALIZER is not set ++CONFIG_TUN=y + + # + # Ethernet (10 or 100Mbit) +@@ -171,12 +180,28 @@ + # + + # ++# Ethernet (10000 Mbit) ++# ++CONFIG_PPP=y ++# CONFIG_PPP_MULTILINK is not set ++# CONFIG_PPP_FILTER is not set ++# CONFIG_PPP_ASYNC is not set ++# CONFIG_PPP_SYNC_TTY is not set ++# CONFIG_PPP_DEFLATE is not set ++# CONFIG_PPP_BSDCOMP is not set ++# CONFIG_PPPOE is not set ++CONFIG_SLIP=y ++# CONFIG_SLIP_COMPRESSED is not set ++# CONFIG_SLIP_SMART is not set ++# CONFIG_SLIP_MODE_SLIP6 is not set ++ ++# + # Wireless LAN (non-hamradio) + # + # CONFIG_NET_RADIO is not set + + # +-# Token Ring devices (depends on LLC=y) ++# Token Ring devices + # + # CONFIG_SHAPER is not set + +@@ -186,68 +211,101 @@ + # CONFIG_WAN is not set + + # ++# Amateur Radio support ++# ++# CONFIG_HAMRADIO is not set ++ ++# ++# IrDA (infrared) support ++# ++# CONFIG_IRDA is not set ++ ++# ++# Bluetooth support ++# ++# CONFIG_BT is not set ++ ++# + # File systems + # ++CONFIG_EXT2_FS=y ++# CONFIG_EXT2_FS_XATTR is not set ++# CONFIG_EXT3_FS is not set ++# CONFIG_JBD is not set ++CONFIG_REISERFS_FS=y ++# CONFIG_REISERFS_CHECK is not set ++# CONFIG_REISERFS_PROC_INFO is not set ++# CONFIG_JFS_FS is not set ++# CONFIG_XFS_FS is not set ++CONFIG_MINIX_FS=y ++# CONFIG_ROMFS_FS is not set + CONFIG_QUOTA=y + # CONFIG_QFMT_V1 is not set + # CONFIG_QFMT_V2 is not set + CONFIG_QUOTACTL=y +-CONFIG_AUTOFS_FS=m +-CONFIG_AUTOFS4_FS=m +-CONFIG_REISERFS_FS=m +-# CONFIG_REISERFS_CHECK is not set +-# CONFIG_REISERFS_PROC_INFO is not set ++CONFIG_AUTOFS_FS=y ++CONFIG_AUTOFS4_FS=y ++ ++# ++# CD-ROM/DVD Filesystems ++# ++CONFIG_ISO9660_FS=y ++# CONFIG_JOLIET is not set ++# CONFIG_ZISOFS is not set ++# CONFIG_UDF_FS is not set ++ ++# ++# DOS/FAT/NT Filesystems ++# ++CONFIG_FAT_FS=y ++CONFIG_MSDOS_FS=y ++CONFIG_VFAT_FS=y ++# CONFIG_NTFS_FS is not set ++ ++# ++# Pseudo filesystems ++# ++CONFIG_PROC_FS=y ++CONFIG_PROC_KCORE=y ++CONFIG_DEVFS_FS=y ++CONFIG_DEVFS_MOUNT=y ++# CONFIG_DEVFS_DEBUG is not set ++CONFIG_DEVPTS_FS=y ++# CONFIG_DEVPTS_FS_XATTR is not set ++# CONFIG_TMPFS is not set ++# CONFIG_HUGETLB_PAGE is not set ++CONFIG_RAMFS=y ++ ++# ++# Miscellaneous filesystems ++# + # CONFIG_ADFS_FS is not set + # CONFIG_AFFS_FS is not set + # CONFIG_HFS_FS is not set + # CONFIG_BEFS_FS is not set + # CONFIG_BFS_FS is not set +-# CONFIG_EXT3_FS is not set +-# CONFIG_JBD is not set +-CONFIG_FAT_FS=m +-CONFIG_MSDOS_FS=m +-CONFIG_VFAT_FS=m + # CONFIG_EFS_FS is not set + CONFIG_JFFS_FS=y + CONFIG_JFFS_FS_VERBOSE=0 +-CONFIG_JFFS_PROC_FS=y + # CONFIG_JFFS2_FS is not set + # CONFIG_CRAMFS is not set +-# CONFIG_TMPFS is not set +-CONFIG_RAMFS=y +-CONFIG_ISO9660_FS=m +-# CONFIG_JOLIET is not set +-# CONFIG_ZISOFS is not set +-# CONFIG_JFS_FS is not set +-CONFIG_MINIX_FS=m + # CONFIG_VXFS_FS is not set +-# CONFIG_NTFS_FS is not set + # CONFIG_HPFS_FS is not set +-CONFIG_PROC_FS=y +-CONFIG_DEVFS_FS=y +-CONFIG_DEVFS_MOUNT=y +-# CONFIG_DEVFS_DEBUG is not set +-CONFIG_DEVPTS_FS=y + # CONFIG_QNX4FS_FS is not set +-# CONFIG_ROMFS_FS is not set +-CONFIG_EXT2_FS=y +-# CONFIG_EXT2_FS_XATTR is not set + # CONFIG_SYSV_FS is not set +-# CONFIG_UDF_FS is not set + # CONFIG_UFS_FS is not set +-# CONFIG_XFS_FS is not set + + # + # Network File Systems + # +-# CONFIG_CODA_FS is not set +-# CONFIG_INTERMEZZO_FS is not set + # CONFIG_NFS_FS is not set + # CONFIG_NFSD is not set + # CONFIG_EXPORTFS is not set +-# CONFIG_CIFS is not set + # CONFIG_SMB_FS is not set ++# CONFIG_CIFS is not set + # CONFIG_NCP_FS is not set ++# CONFIG_CODA_FS is not set ++# CONFIG_INTERMEZZO_FS is not set + # CONFIG_AFS_FS is not set + + # +@@ -317,28 +375,7 @@ + # + # SCSI support + # +-CONFIG_SCSI=y +-CONFIG_GENERIC_ISA_DMA=y +- +-# +-# SCSI support type (disk, tape, CD-ROM) +-# +-CONFIG_BLK_DEV_SD=y +-CONFIG_SD_EXTRA_DEVS=40 +-CONFIG_CHR_DEV_ST=y +-CONFIG_BLK_DEV_SR=y +-CONFIG_BLK_DEV_SR_VENDOR=y +-CONFIG_SR_EXTRA_DEVS=2 +-CONFIG_CHR_DEV_SG=y +- +-# +-# Some SCSI devices (e.g. CD jukebox) support multiple LUNs +-# +-CONFIG_SCSI_DEBUG_QUEUES=y +-CONFIG_SCSI_MULTI_LUN=y +-CONFIG_SCSI_CONSTANTS=y +-CONFIG_SCSI_LOGGING=y +-CONFIG_SCSI_DEBUG=y ++# CONFIG_SCSI is not set + + # + # Multi-device support (RAID and LVM) +@@ -360,6 +397,7 @@ + CONFIG_MTD_BLOCK=y + # CONFIG_FTL is not set + # CONFIG_NFTL is not set ++# CONFIG_INFTL is not set + + # + # RAM/ROM/Flash chip drivers +@@ -374,20 +412,21 @@ + # + # Mapping drivers for chip access + # ++# CONFIG_MTD_COMPLEX_MAPPINGS is not set + + # + # Self-contained MTD device drivers + # + # CONFIG_MTD_SLRAM is not set + # CONFIG_MTD_MTDRAM is not set +-CONFIG_MTD_BLKMTD=m ++CONFIG_MTD_BLKMTD=y + + # + # Disk-On-Chip Device Drivers + # +-# CONFIG_MTD_DOC1000 is not set + # CONFIG_MTD_DOC2000 is not set + # CONFIG_MTD_DOC2001 is not set ++# CONFIG_MTD_DOC2001PLUS is not set + + # + # NAND Flash Device Drivers +diff -Naur a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c +--- a/arch/um/drivers/chan_kern.c 2004-02-11 12:16:50.000000000 -0500 ++++ b/arch/um/drivers/chan_kern.c 2004-02-11 12:29:06.000000000 -0500 +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include + #include + #include "chan_kern.h" +@@ -265,6 +266,11 @@ + { + int n = 0; + ++ if(chan == NULL){ ++ CONFIG_CHUNK(str, size, n, "none", 1); ++ return(n); ++ } ++ + CONFIG_CHUNK(str, size, n, chan->ops->type, 0); + + if(chan->dev == NULL){ +@@ -420,7 +426,8 @@ + INIT_LIST_HEAD(chans); + } + +- if((out = strchr(str, ',')) != NULL){ ++ out = strchr(str, ','); ++ if(out != NULL){ + in = str; + *out = '\0'; + out++; +@@ -475,12 +482,15 @@ + goto out; + } + err = chan->ops->read(chan->fd, &c, chan->data); +- if(err > 0) tty_receive_char(tty, c); ++ if(err > 0) ++ tty_receive_char(tty, c); + } while(err > 0); ++ + if(err == 0) reactivate_fd(chan->fd, irq); + if(err == -EIO){ + if(chan->primary){ +- if(tty != NULL) tty_hangup(tty); ++ if(tty != NULL) ++ tty_hangup(tty); + line_disable(dev, irq); + close_chan(chans); + free_chan(chans); +diff -Naur a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c +--- a/arch/um/drivers/chan_user.c 2004-02-11 12:14:14.000000000 -0500 ++++ b/arch/um/drivers/chan_user.c 2004-02-11 12:25:42.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -7,7 +7,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -24,29 +23,27 @@ + + void generic_close(int fd, void *unused) + { +- close(fd); ++ os_close_file(fd); + } + + int generic_read(int fd, char *c_out, void *unused) + { + int n; + +- n = read(fd, c_out, sizeof(*c_out)); +- if(n < 0){ +- if(errno == EAGAIN) return(0); +- return(-errno); +- } +- else if(n == 0) return(-EIO); +- return(1); ++ n = os_read_file(fd, c_out, sizeof(*c_out)); ++ ++ if(n == -EAGAIN) ++ return(0); ++ else if(n == 0) ++ return(-EIO); ++ return(n); + } + ++/* XXX Trivial wrapper around os_write_file */ ++ + int generic_write(int fd, const char *buf, int n, void *unused) + { +- int count; +- +- count = write(fd, buf, n); +- if(count < 0) return(-errno); +- return(count); ++ return(os_write_file(fd, buf, n)); + } + + int generic_console_write(int fd, const char *buf, int n, void *unused) +@@ -68,15 +65,18 @@ + int generic_window_size(int fd, void *unused, unsigned short *rows_out, + unsigned short *cols_out) + { +- struct winsize size; +- int ret = 0; ++ int rows, cols; ++ int ret; ++ ++ ret = os_window_size(fd, &rows, &cols); ++ if(ret < 0) ++ return(ret); ++ ++ ret = ((*rows_out != rows) || (*cols_out != cols)); ++ ++ *rows_out = rows; ++ *cols_out = cols; + +- if(ioctl(fd, TIOCGWINSZ, &size) == 0){ +- ret = ((*rows_out != size.ws_row) || +- (*cols_out != size.ws_col)); +- *rows_out = size.ws_row; +- *cols_out = size.ws_col; +- } + return(ret); + } + +@@ -100,14 +100,16 @@ + struct winch_data *data = arg; + sigset_t sigs; + int pty_fd, pipe_fd; ++ int count, err; + char c = 1; + +- close(data->close_me); ++ os_close_file(data->close_me); + pty_fd = data->pty_fd; + pipe_fd = data->pipe_fd; +- if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)) ++ count = os_write_file(pipe_fd, &c, sizeof(c)); ++ if(count != sizeof(c)) + printk("winch_thread : failed to write synchronization " +- "byte, errno = %d\n", errno); ++ "byte, err = %d\n", -count); + + signal(SIGWINCH, winch_handler); + sigfillset(&sigs); +@@ -123,26 +125,24 @@ + exit(1); + } + +- if(ioctl(pty_fd, TIOCSCTTY, 0) < 0){ +- printk("winch_thread : TIOCSCTTY failed, errno = %d\n", errno); +- exit(1); +- } +- if(tcsetpgrp(pty_fd, os_getpid()) < 0){ +- printk("winch_thread : tcsetpgrp failed, errno = %d\n", errno); ++ err = os_new_tty_pgrp(pty_fd, os_getpid()); ++ if(err < 0){ ++ printk("winch_thread : new_tty_pgrp failed, err = %d\n", -err); + exit(1); + } + +- if(read(pipe_fd, &c, sizeof(c)) != sizeof(c)) ++ count = os_read_file(pipe_fd, &c, sizeof(c)); ++ if(count != sizeof(c)) + printk("winch_thread : failed to read synchronization byte, " +- "errno = %d\n", errno); ++ "err = %d\n", -count); + + while(1){ + pause(); + +- if(write(pipe_fd, &c, sizeof(c)) != sizeof(c)){ +- printk("winch_thread : write failed, errno = %d\n", +- errno); +- } ++ count = os_write_file(pipe_fd, &c, sizeof(c)); ++ if(count != sizeof(c)) ++ printk("winch_thread : write failed, err = %d\n", ++ -count); + } + } + +@@ -154,8 +154,8 @@ + char c; + + err = os_pipe(fds, 1, 1); +- if(err){ +- printk("winch_tramp : os_pipe failed, errno = %d\n", -err); ++ if(err < 0){ ++ printk("winch_tramp : os_pipe failed, err = %d\n", -err); + return(err); + } + +@@ -168,12 +168,12 @@ + return(pid); + } + +- close(fds[1]); ++ os_close_file(fds[1]); + *fd_out = fds[0]; +- n = read(fds[0], &c, sizeof(c)); ++ n = os_read_file(fds[0], &c, sizeof(c)); + if(n != sizeof(c)){ + printk("winch_tramp : failed to read synchronization byte\n"); +- printk("read returned %d, errno = %d\n", n, errno); ++ printk("read failed, err = %d\n", -n); + printk("fd %d will not support SIGWINCH\n", fd); + *fd_out = -1; + } +@@ -183,20 +183,24 @@ + void register_winch(int fd, void *device_data) + { + int pid, thread, thread_fd; ++ int count; + char c = 1; + +- if(!isatty(fd)) return; ++ if(!isatty(fd)) ++ return; + + pid = tcgetpgrp(fd); +- if(!CHOOSE_MODE(is_tracer_winch(pid, fd, device_data), 0) && +- (pid == -1)){ ++ if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, ++ device_data) && (pid == -1)){ + thread = winch_tramp(fd, device_data, &thread_fd); + if(fd != -1){ + register_winch_irq(thread_fd, fd, thread, device_data); + +- if(write(thread_fd, &c, sizeof(c)) != sizeof(c)) ++ count = os_write_file(thread_fd, &c, sizeof(c)); ++ if(count != sizeof(c)) + printk("register_winch : failed to write " +- "synchronization byte\n"); ++ "synchronization byte, err = %d\n", ++ -count); + } + } + } +diff -Naur a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h +--- a/arch/um/drivers/cow.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/drivers/cow.h 2004-02-11 12:26:08.000000000 -0500 +@@ -0,0 +1,41 @@ ++#ifndef __COW_H__ ++#define __COW_H__ ++ ++#include ++ ++#if __BYTE_ORDER == __BIG_ENDIAN ++# define ntohll(x) (x) ++# define htonll(x) (x) ++#elif __BYTE_ORDER == __LITTLE_ENDIAN ++# define ntohll(x) bswap_64(x) ++# define htonll(x) bswap_64(x) ++#else ++#error "__BYTE_ORDER not defined" ++#endif ++ ++extern int init_cow_file(int fd, char *cow_file, char *backing_file, ++ int sectorsize, int alignment, int *bitmap_offset_out, ++ unsigned long *bitmap_len_out, int *data_offset_out); ++ ++extern int file_reader(__u64 offset, char *buf, int len, void *arg); ++extern int read_cow_header(int (*reader)(__u64, char *, int, void *), ++ void *arg, __u32 *version_out, ++ char **backing_file_out, time_t *mtime_out, ++ __u64 *size_out, int *sectorsize_out, ++ __u32 *align_out, int *bitmap_offset_out); ++ ++extern int write_cow_header(char *cow_file, int fd, char *backing_file, ++ int sectorsize, int alignment, long long *size); ++ ++extern void cow_sizes(int version, __u64 size, int sectorsize, int align, ++ int bitmap_offset, unsigned long *bitmap_len_out, ++ int *data_offset_out); ++ ++#endif ++ ++/* ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/drivers/cow_kern.c b/arch/um/drivers/cow_kern.c +--- a/arch/um/drivers/cow_kern.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/drivers/cow_kern.c 2004-02-11 12:29:06.000000000 -0500 +@@ -0,0 +1,630 @@ ++#define COW_MAJOR 60 ++#define MAJOR_NR COW_MAJOR ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "2_5compat.h" ++#include "cow.h" ++#include "ubd_user.h" ++ ++#define COW_SHIFT 4 ++ ++struct cow { ++ int count; ++ char *cow_path; ++ dev_t cow_dev; ++ struct block_device *cow_bdev; ++ char *backing_path; ++ dev_t backing_dev; ++ struct block_device *backing_bdev; ++ int sectorsize; ++ unsigned long *bitmap; ++ unsigned long bitmap_len; ++ int bitmap_offset; ++ int data_offset; ++ devfs_handle_t devfs; ++ struct semaphore sem; ++ struct semaphore io_sem; ++ atomic_t working; ++ spinlock_t io_lock; ++ struct buffer_head *bh; ++ struct buffer_head *bhtail; ++ void *end_io; ++}; ++ ++#define DEFAULT_COW { \ ++ .count = 0, \ ++ .cow_path = NULL, \ ++ .cow_dev = 0, \ ++ .backing_path = NULL, \ ++ .backing_dev = 0, \ ++ .bitmap = NULL, \ ++ .bitmap_len = 0, \ ++ .bitmap_offset = 0, \ ++ .data_offset = 0, \ ++ .devfs = NULL, \ ++ .working = ATOMIC_INIT(0), \ ++ .io_lock = SPIN_LOCK_UNLOCKED, \ ++} ++ ++#define MAX_DEV (8) ++#define MAX_MINOR (MAX_DEV << COW_SHIFT) ++ ++struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW }; ++ ++/* Not modified by this driver */ ++static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE }; ++static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 }; ++ ++/* Protected by cow_lock */ ++static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 }; ++ ++static struct hd_struct cow_part[MAX_MINOR] = ++ { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } }; ++ ++/* Protected by io_request_lock */ ++static request_queue_t *cow_queue; ++ ++static int cow_open(struct inode *inode, struct file *filp); ++static int cow_release(struct inode * inode, struct file * file); ++static int cow_ioctl(struct inode * inode, struct file * file, ++ unsigned int cmd, unsigned long arg); ++static int cow_revalidate(kdev_t rdev); ++ ++static struct block_device_operations cow_blops = { ++ .open = cow_open, ++ .release = cow_release, ++ .ioctl = cow_ioctl, ++ .revalidate = cow_revalidate, ++}; ++ ++/* Initialized in an initcall, and unchanged thereafter */ ++devfs_handle_t cow_dir_handle; ++ ++#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \ ++{ \ ++ .major = maj, \ ++ .major_name = name, \ ++ .minor_shift = shift, \ ++ .max_p = 1 << shift, \ ++ .part = parts, \ ++ .sizes = bsizes, \ ++ .nr_real = max, \ ++ .real_devices = NULL, \ ++ .next = NULL, \ ++ .fops = blops, \ ++ .de_arr = NULL, \ ++ .flags = 0 \ ++} ++ ++static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED; ++ ++static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part, ++ COW_SHIFT, sizes, MAX_DEV, ++ &cow_blops); ++ ++static int cow_add(int n) ++{ ++ struct cow *dev = &cow_dev[n]; ++ char name[sizeof("nnnnnn\0")]; ++ int err = -ENODEV; ++ ++ if(dev->cow_path == NULL) ++ goto out; ++ ++ sprintf(name, "%d", n); ++ dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE, ++ MAJOR_NR, n << COW_SHIFT, S_IFBLK | ++ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, ++ &cow_blops, NULL); ++ ++ init_MUTEX_LOCKED(&dev->sem); ++ init_MUTEX(&dev->io_sem); ++ ++ return(0); ++ ++ out: ++ return(err); ++} ++ ++/* ++ * Add buffer_head to back of pending list ++ */ ++static void cow_add_bh(struct cow *cow, struct buffer_head *bh) ++{ ++ unsigned long flags; ++ ++ spin_lock_irqsave(&cow->io_lock, flags); ++ if(cow->bhtail != NULL){ ++ cow->bhtail->b_reqnext = bh; ++ cow->bhtail = bh; ++ } ++ else { ++ cow->bh = bh; ++ cow->bhtail = bh; ++ } ++ spin_unlock_irqrestore(&cow->io_lock, flags); ++} ++ ++/* ++* Grab first pending buffer ++*/ ++static struct buffer_head *cow_get_bh(struct cow *cow) ++{ ++ struct buffer_head *bh; ++ ++ spin_lock_irq(&cow->io_lock); ++ bh = cow->bh; ++ if(bh != NULL){ ++ if(bh == cow->bhtail) ++ cow->bhtail = NULL; ++ cow->bh = bh->b_reqnext; ++ bh->b_reqnext = NULL; ++ } ++ spin_unlock_irq(&cow->io_lock); ++ ++ return(bh); ++} ++ ++static void cow_handle_bh(struct cow *cow, struct buffer_head *bh, ++ struct buffer_head **cow_bh, int ncow_bh) ++{ ++ int i; ++ ++ if(ncow_bh > 0) ++ ll_rw_block(WRITE, ncow_bh, cow_bh); ++ ++ for(i = 0; i < ncow_bh ; i++){ ++ wait_on_buffer(cow_bh[i]); ++ brelse(cow_bh[i]); ++ } ++ ++ ll_rw_block(WRITE, 1, &bh); ++ brelse(bh); ++} ++ ++static struct buffer_head *cow_new_bh(struct cow *dev, int sector) ++{ ++ struct buffer_head *bh; ++ ++ sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize; ++ bh = getblk(dev->cow_dev, sector, dev->sectorsize); ++ memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])), ++ dev->sectorsize); ++ return(bh); ++} ++ ++/* Copied from loop.c, needed to avoid deadlocking in make_request. */ ++ ++static int cow_thread(void *data) ++{ ++ struct cow *dev = data; ++ struct buffer_head *bh; ++ ++ daemonize(); ++ exit_files(current); ++ ++ sprintf(current->comm, "cow%d", dev - cow_dev); ++ ++ spin_lock_irq(¤t->sigmask_lock); ++ sigfillset(¤t->blocked); ++ flush_signals(current); ++ spin_unlock_irq(¤t->sigmask_lock); ++ ++ atomic_inc(&dev->working); ++ ++ current->policy = SCHED_OTHER; ++ current->nice = -20; ++ ++ current->flags |= PF_NOIO; ++ ++ /* ++ * up sem, we are running ++ */ ++ up(&dev->sem); ++ ++ for(;;){ ++ int start, len, nbh, i, update_bitmap = 0; ++ struct buffer_head *cow_bh[2]; ++ ++ down_interruptible(&dev->io_sem); ++ /* ++ * could be upped because of tear-down, not because of ++ * pending work ++ */ ++ if(!atomic_read(&dev->working)) ++ break; ++ ++ bh = cow_get_bh(dev); ++ if(bh == NULL){ ++ printk(KERN_ERR "cow: missing bh\n"); ++ continue; ++ } ++ ++ start = bh->b_blocknr * bh->b_size / dev->sectorsize; ++ len = bh->b_size / dev->sectorsize; ++ for(i = 0; i < len ; i++){ ++ if(ubd_test_bit(start + i, ++ (unsigned char *) dev->bitmap)) ++ continue; ++ ++ update_bitmap = 1; ++ ubd_set_bit(start + i, (unsigned char *) dev->bitmap); ++ } ++ ++ cow_bh[0] = NULL; ++ cow_bh[1] = NULL; ++ nbh = 0; ++ if(update_bitmap){ ++ cow_bh[0] = cow_new_bh(dev, start); ++ nbh++; ++ if(start / dev->sectorsize != ++ (start + len) / dev->sectorsize){ ++ cow_bh[1] = cow_new_bh(dev, start + len); ++ nbh++; ++ } ++ } ++ ++ bh->b_dev = dev->cow_dev; ++ bh->b_blocknr += dev->data_offset / dev->sectorsize; ++ ++ cow_handle_bh(dev, bh, cow_bh, nbh); ++ ++ /* ++ * upped both for pending work and tear-down, lo_pending ++ * will hit zero then ++ */ ++ if(atomic_dec_and_test(&dev->working)) ++ break; ++ } ++ ++ up(&dev->sem); ++ return(0); ++} ++ ++static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh) ++{ ++ struct cow *dev; ++ int n, minor; ++ ++ minor = MINOR(bh->b_rdev); ++ n = minor >> COW_SHIFT; ++ dev = &cow_dev[n]; ++ ++ dev->end_io = NULL; ++ if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){ ++ bh->b_rdev = dev->cow_dev; ++ bh->b_rsector += dev->data_offset / dev->sectorsize; ++ } ++ else if(rw == WRITE){ ++ bh->b_dev = dev->cow_dev; ++ bh->b_blocknr += dev->data_offset / dev->sectorsize; ++ ++ cow_add_bh(dev, bh); ++ up(&dev->io_sem); ++ return(0); ++ } ++ else { ++ bh->b_rdev = dev->backing_dev; ++ } ++ ++ return(1); ++} ++ ++int cow_init(void) ++{ ++ int i; ++ ++ cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL); ++ if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) { ++ printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR); ++ return -1; ++ } ++ read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */ ++ blksize_size[MAJOR_NR] = blk_sizes; ++ blk_size[MAJOR_NR] = sizes; ++ INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes); ++ ++ cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR); ++ blk_init_queue(cow_queue, NULL); ++ INIT_ELV(cow_queue, &cow_queue->elevator); ++ blk_queue_make_request(cow_queue, cow_make_request); ++ ++ add_gendisk(&cow_gendisk); ++ ++ for(i=0;i 0){ ++ n = (left > blocksize) ? blocksize : left; ++ ++ bh = bread(dev, block, (n < 512) ? 512 : n); ++ if(bh == NULL) ++ return(-EIO); ++ ++ n -= offset; ++ memcpy(&buf[cur], bh->b_data + offset, n); ++ block++; ++ left -= n; ++ cur += n; ++ offset = 0; ++ brelse(bh); ++ } ++ ++ return(count); ++} ++ ++static int cow_open(struct inode *inode, struct file *filp) ++{ ++ int (*dev_ioctl)(struct inode *, struct file *, unsigned int, ++ unsigned long); ++ mm_segment_t fs; ++ struct cow *dev; ++ __u64 size; ++ __u32 version, align; ++ time_t mtime; ++ char *backing_file; ++ int n, offset, err = 0; ++ ++ n = DEVICE_NR(inode->i_rdev); ++ if(n >= MAX_DEV) ++ return(-ENODEV); ++ dev = &cow_dev[n]; ++ offset = n << COW_SHIFT; ++ ++ spin_lock(&cow_lock); ++ ++ if(dev->count == 0){ ++ dev->cow_dev = name_to_kdev_t(dev->cow_path); ++ if(dev->cow_dev == 0){ ++ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " ++ "failed\n", dev->cow_path); ++ err = -ENODEV; ++ } ++ ++ dev->backing_dev = name_to_kdev_t(dev->backing_path); ++ if(dev->backing_dev == 0){ ++ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") " ++ "failed\n", dev->backing_path); ++ err = -ENODEV; ++ } ++ ++ if(err) ++ goto out; ++ ++ dev->cow_bdev = bdget(dev->cow_dev); ++ if(dev->cow_bdev == NULL){ ++ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", ++ dev->cow_path); ++ err = -ENOMEM; ++ } ++ dev->backing_bdev = bdget(dev->backing_dev); ++ if(dev->backing_bdev == NULL){ ++ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", ++ dev->backing_path); ++ err = -ENOMEM; ++ } ++ ++ if(err) ++ goto out; ++ ++ err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0, ++ BDEV_RAW); ++ if(err){ ++ printk("cow_open - blkdev_get of COW device failed, " ++ "error = %d\n", err); ++ goto out; ++ } ++ ++ err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW); ++ if(err){ ++ printk("cow_open - blkdev_get of backing device " ++ "failed, error = %d\n", err); ++ goto out; ++ } ++ ++ err = read_cow_header(reader, &dev->cow_dev, &version, ++ &backing_file, &mtime, &size, ++ &dev->sectorsize, &align, ++ &dev->bitmap_offset); ++ if(err){ ++ printk(KERN_ERR "cow_open - read_cow_header failed, " ++ "err = %d\n", err); ++ goto out; ++ } ++ ++ cow_sizes(version, size, dev->sectorsize, align, ++ dev->bitmap_offset, &dev->bitmap_len, ++ &dev->data_offset); ++ dev->bitmap = (void *) vmalloc(dev->bitmap_len); ++ if(dev->bitmap == NULL){ ++ err = -ENOMEM; ++ printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); ++ goto out; ++ } ++ flush_tlb_kernel_vm(); ++ ++ err = reader(dev->bitmap_offset, (char *) dev->bitmap, ++ dev->bitmap_len, &dev->cow_dev); ++ if(err < 0){ ++ printk(KERN_ERR "Failed to read COW bitmap\n"); ++ vfree(dev->bitmap); ++ goto out; ++ } ++ ++ dev_ioctl = dev->backing_bdev->bd_op->ioctl; ++ fs = get_fs(); ++ set_fs(KERNEL_DS); ++ err = (*dev_ioctl)(inode, filp, BLKGETSIZE, ++ (unsigned long) &sizes[offset]); ++ set_fs(fs); ++ if(err){ ++ printk(KERN_ERR "cow_open - BLKGETSIZE failed, " ++ "error = %d\n", err); ++ goto out; ++ } ++ ++ kernel_thread(cow_thread, dev, ++ CLONE_FS | CLONE_FILES | CLONE_SIGHAND); ++ down(&dev->sem); ++ } ++ dev->count++; ++ out: ++ spin_unlock(&cow_lock); ++ return(err); ++} ++ ++static int cow_release(struct inode * inode, struct file * file) ++{ ++ struct cow *dev; ++ int n, err; ++ ++ n = DEVICE_NR(inode->i_rdev); ++ if(n >= MAX_DEV) ++ return(-ENODEV); ++ dev = &cow_dev[n]; ++ ++ spin_lock(&cow_lock); ++ ++ if(--dev->count > 0) ++ goto out; ++ ++ err = blkdev_put(dev->cow_bdev, BDEV_RAW); ++ if(err) ++ printk("cow_release - blkdev_put of cow device failed, " ++ "error = %d\n", err); ++ bdput(dev->cow_bdev); ++ dev->cow_bdev = 0; ++ ++ err = blkdev_put(dev->backing_bdev, BDEV_RAW); ++ if(err) ++ printk("cow_release - blkdev_put of backing device failed, " ++ "error = %d\n", err); ++ bdput(dev->backing_bdev); ++ dev->backing_bdev = 0; ++ ++ out: ++ spin_unlock(&cow_lock); ++ return(0); ++} ++ ++static int cow_ioctl(struct inode * inode, struct file * file, ++ unsigned int cmd, unsigned long arg) ++{ ++ struct cow *dev; ++ int (*dev_ioctl)(struct inode *, struct file *, unsigned int, ++ unsigned long); ++ int n; ++ ++ n = DEVICE_NR(inode->i_rdev); ++ if(n >= MAX_DEV) ++ return(-ENODEV); ++ dev = &cow_dev[n]; ++ ++ dev_ioctl = dev->backing_bdev->bd_op->ioctl; ++ return((*dev_ioctl)(inode, file, cmd, arg)); ++} ++ ++static int cow_revalidate(kdev_t rdev) ++{ ++ printk(KERN_ERR "Need to implement cow_revalidate\n"); ++ return(0); ++} ++ ++static int parse_unit(char **ptr) ++{ ++ char *str = *ptr, *end; ++ int n = -1; ++ ++ if(isdigit(*str)) { ++ n = simple_strtoul(str, &end, 0); ++ if(end == str) ++ return(-1); ++ *ptr = end; ++ } ++ else if (('a' <= *str) && (*str <= 'h')) { ++ n = *str - 'a'; ++ str++; ++ *ptr = str; ++ } ++ return(n); ++} ++ ++static int cow_setup(char *str) ++{ ++ struct cow *dev; ++ char *cow_name, *backing_name; ++ int unit; ++ ++ unit = parse_unit(&str); ++ if(unit < 0){ ++ printk(KERN_ERR "cow_setup - Couldn't parse unit number\n"); ++ return(1); ++ } ++ ++ if(*str != '='){ ++ printk(KERN_ERR "cow_setup - Missing '=' after unit " ++ "number\n"); ++ return(1); ++ } ++ str++; ++ ++ cow_name = str; ++ backing_name = strchr(str, ','); ++ if(backing_name == NULL){ ++ printk(KERN_ERR "cow_setup - missing backing device name\n"); ++ return(0); ++ } ++ *backing_name = '\0'; ++ backing_name++; ++ ++ spin_lock(&cow_lock); ++ ++ dev = &cow_dev[unit]; ++ dev->cow_path = cow_name; ++ dev->backing_path = backing_name; ++ ++ spin_unlock(&cow_lock); ++ return(0); ++} ++ ++__setup("cow", cow_setup); ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h +--- a/arch/um/drivers/cow_sys.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/drivers/cow_sys.h 2004-02-11 12:27:42.000000000 -0500 +@@ -0,0 +1,48 @@ ++#ifndef __COW_SYS_H__ ++#define __COW_SYS_H__ ++ ++#include "kern_util.h" ++#include "user_util.h" ++#include "os.h" ++#include "user.h" ++ ++static inline void *cow_malloc(int size) ++{ ++ return(um_kmalloc(size)); ++} ++ ++static inline void cow_free(void *ptr) ++{ ++ kfree(ptr); ++} ++ ++#define cow_printf printk ++ ++static inline char *cow_strdup(char *str) ++{ ++ return(uml_strdup(str)); ++} ++ ++static inline int cow_seek_file(int fd, __u64 offset) ++{ ++ return(os_seek_file(fd, offset)); ++} ++ ++static inline int cow_file_size(char *file, __u64 *size_out) ++{ ++ return(os_file_size(file, size_out)); ++} ++ ++static inline int cow_write_file(int fd, char *buf, int size) ++{ ++ return(os_write_file(fd, buf, size)); ++} ++ ++#endif ++ ++/* ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c +--- a/arch/um/drivers/cow_user.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/drivers/cow_user.c 2004-02-11 12:27:36.000000000 -0500 +@@ -0,0 +1,375 @@ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "os.h" ++ ++#include "cow.h" ++#include "cow_sys.h" ++ ++#define PATH_LEN_V1 256 ++ ++struct cow_header_v1 { ++ int magic; ++ int version; ++ char backing_file[PATH_LEN_V1]; ++ time_t mtime; ++ __u64 size; ++ int sectorsize; ++}; ++ ++#define PATH_LEN_V2 MAXPATHLEN ++ ++struct cow_header_v2 { ++ unsigned long magic; ++ unsigned long version; ++ char backing_file[PATH_LEN_V2]; ++ time_t mtime; ++ __u64 size; ++ int sectorsize; ++}; ++ ++/* Define PATH_LEN_V3 as the usual value of MAXPATHLEN, just hard-code it in ++ * case other systems have different values for MAXPATHLEN ++ */ ++#define PATH_LEN_V3 4096 ++ ++/* Changes from V2 - ++ * PATH_LEN_V3 as described above ++ * Explicitly specify field bit lengths for systems with different ++ * lengths for the usual C types. Not sure whether char or ++ * time_t should be changed, this can be changed later without ++ * breaking compatibility ++ * Add alignment field so that different alignments can be used for the ++ * bitmap and data ++ * Add cow_format field to allow for the possibility of different ways ++ * of specifying the COW blocks. For now, the only value is 0, ++ * for the traditional COW bitmap. ++ * Move the backing_file field to the end of the header. This allows ++ * for the possibility of expanding it into the padding required ++ * by the bitmap alignment. ++ * The bitmap and data portions of the file will be aligned as specified ++ * by the alignment field. This is to allow COW files to be ++ * put on devices with restrictions on access alignments, such as ++ * /dev/raw, with a 512 byte alignment restriction. This also ++ * allows the data to be more aligned more strictly than on ++ * sector boundaries. This is needed for ubd-mmap, which needs ++ * the data to be page aligned. ++ * Fixed (finally!) the rounding bug ++ */ ++ ++struct cow_header_v3 { ++ __u32 magic; ++ __u32 version; ++ time_t mtime; ++ __u64 size; ++ __u32 sectorsize; ++ __u32 alignment; ++ __u32 cow_format; ++ char backing_file[PATH_LEN_V3]; ++}; ++ ++/* COW format definitions - for now, we have only the usual COW bitmap */ ++#define COW_BITMAP 0 ++ ++union cow_header { ++ struct cow_header_v1 v1; ++ struct cow_header_v2 v2; ++ struct cow_header_v3 v3; ++}; ++ ++#define COW_MAGIC 0x4f4f4f4d /* MOOO */ ++#define COW_VERSION 3 ++ ++#define DIV_ROUND(x, len) (((x) + (len) - 1) / (len)) ++#define ROUND_UP(x, align) DIV_ROUND(x, align) * (align) ++ ++void cow_sizes(int version, __u64 size, int sectorsize, int align, ++ int bitmap_offset, unsigned long *bitmap_len_out, ++ int *data_offset_out) ++{ ++ if(version < 3){ ++ *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); ++ ++ *data_offset_out = bitmap_offset + *bitmap_len_out; ++ *data_offset_out = (*data_offset_out + sectorsize - 1) / ++ sectorsize; ++ *data_offset_out *= sectorsize; ++ } ++ else { ++ *bitmap_len_out = DIV_ROUND(size, sectorsize); ++ *bitmap_len_out = DIV_ROUND(*bitmap_len_out, 8); ++ ++ *data_offset_out = bitmap_offset + *bitmap_len_out; ++ *data_offset_out = ROUND_UP(*data_offset_out, align); ++ } ++} ++ ++static int absolutize(char *to, int size, char *from) ++{ ++ char save_cwd[256], *slash; ++ int remaining; ++ ++ if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { ++ cow_printf("absolutize : unable to get cwd - errno = %d\n", ++ errno); ++ return(-1); ++ } ++ slash = strrchr(from, '/'); ++ if(slash != NULL){ ++ *slash = '\0'; ++ if(chdir(from)){ ++ *slash = '/'; ++ cow_printf("absolutize : Can't cd to '%s' - " ++ "errno = %d\n", from, errno); ++ return(-1); ++ } ++ *slash = '/'; ++ if(getcwd(to, size) == NULL){ ++ cow_printf("absolutize : unable to get cwd of '%s' - " ++ "errno = %d\n", from, errno); ++ return(-1); ++ } ++ remaining = size - strlen(to); ++ if(strlen(slash) + 1 > remaining){ ++ cow_printf("absolutize : unable to fit '%s' into %d " ++ "chars\n", from, size); ++ return(-1); ++ } ++ strcat(to, slash); ++ } ++ else { ++ if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ ++ cow_printf("absolutize : unable to fit '%s' into %d " ++ "chars\n", from, size); ++ return(-1); ++ } ++ strcpy(to, save_cwd); ++ strcat(to, "/"); ++ strcat(to, from); ++ } ++ chdir(save_cwd); ++ return(0); ++} ++ ++int write_cow_header(char *cow_file, int fd, char *backing_file, ++ int sectorsize, int alignment, long long *size) ++{ ++ struct cow_header_v3 *header; ++ unsigned long modtime; ++ int err; ++ ++ err = cow_seek_file(fd, 0); ++ if(err < 0){ ++ cow_printf("write_cow_header - lseek failed, err = %d\n", -err); ++ goto out; ++ } ++ ++ err = -ENOMEM; ++ header = cow_malloc(sizeof(*header)); ++ if(header == NULL){ ++ cow_printf("Failed to allocate COW V3 header\n"); ++ goto out; ++ } ++ header->magic = htonl(COW_MAGIC); ++ header->version = htonl(COW_VERSION); ++ ++ err = -EINVAL; ++ if(strlen(backing_file) > sizeof(header->backing_file) - 1){ ++ cow_printf("Backing file name \"%s\" is too long - names are " ++ "limited to %d characters\n", backing_file, ++ sizeof(header->backing_file) - 1); ++ goto out_free; ++ } ++ ++ if(absolutize(header->backing_file, sizeof(header->backing_file), ++ backing_file)) ++ goto out_free; ++ ++ err = os_file_modtime(header->backing_file, &modtime); ++ if(err < 0){ ++ cow_printf("Backing file '%s' mtime request failed, " ++ "err = %d\n", header->backing_file, -err); ++ goto out_free; ++ } ++ ++ err = cow_file_size(header->backing_file, size); ++ if(err < 0){ ++ cow_printf("Couldn't get size of backing file '%s', " ++ "err = %d\n", header->backing_file, -err); ++ goto out_free; ++ } ++ ++ header->mtime = htonl(modtime); ++ header->size = htonll(*size); ++ header->sectorsize = htonl(sectorsize); ++ header->alignment = htonl(alignment); ++ header->cow_format = COW_BITMAP; ++ ++ err = os_write_file(fd, header, sizeof(*header)); ++ if(err != sizeof(*header)){ ++ cow_printf("Write of header to new COW file '%s' failed, " ++ "err = %d\n", cow_file, -err); ++ goto out_free; ++ } ++ err = 0; ++ out_free: ++ cow_free(header); ++ out: ++ return(err); ++} ++ ++int file_reader(__u64 offset, char *buf, int len, void *arg) ++{ ++ int fd = *((int *) arg); ++ ++ return(pread(fd, buf, len, offset)); ++} ++ ++/* XXX Need to sanity-check the values read from the header */ ++ ++int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, ++ __u32 *version_out, char **backing_file_out, ++ time_t *mtime_out, __u64 *size_out, ++ int *sectorsize_out, __u32 *align_out, ++ int *bitmap_offset_out) ++{ ++ union cow_header *header; ++ char *file; ++ int err, n; ++ unsigned long version, magic; ++ ++ header = cow_malloc(sizeof(*header)); ++ if(header == NULL){ ++ cow_printf("read_cow_header - Failed to allocate header\n"); ++ return(-ENOMEM); ++ } ++ err = -EINVAL; ++ n = (*reader)(0, (char *) header, sizeof(*header), arg); ++ if(n < offsetof(typeof(header->v1), backing_file)){ ++ cow_printf("read_cow_header - short header\n"); ++ goto out; ++ } ++ ++ magic = header->v1.magic; ++ if(magic == COW_MAGIC) { ++ version = header->v1.version; ++ } ++ else if(magic == ntohl(COW_MAGIC)){ ++ version = ntohl(header->v1.version); ++ } ++ /* No error printed because the non-COW case comes through here */ ++ else goto out; ++ ++ *version_out = version; ++ ++ if(version == 1){ ++ if(n < sizeof(header->v1)){ ++ cow_printf("read_cow_header - failed to read V1 " ++ "header\n"); ++ goto out; ++ } ++ *mtime_out = header->v1.mtime; ++ *size_out = header->v1.size; ++ *sectorsize_out = header->v1.sectorsize; ++ *bitmap_offset_out = sizeof(header->v1); ++ *align_out = *sectorsize_out; ++ file = header->v1.backing_file; ++ } ++ else if(version == 2){ ++ if(n < sizeof(header->v2)){ ++ cow_printf("read_cow_header - failed to read V2 " ++ "header\n"); ++ goto out; ++ } ++ *mtime_out = ntohl(header->v2.mtime); ++ *size_out = ntohll(header->v2.size); ++ *sectorsize_out = ntohl(header->v2.sectorsize); ++ *bitmap_offset_out = sizeof(header->v2); ++ *align_out = *sectorsize_out; ++ file = header->v2.backing_file; ++ } ++ else if(version == 3){ ++ if(n < sizeof(header->v3)){ ++ cow_printf("read_cow_header - failed to read V2 " ++ "header\n"); ++ goto out; ++ } ++ *mtime_out = ntohl(header->v3.mtime); ++ *size_out = ntohll(header->v3.size); ++ *sectorsize_out = ntohl(header->v3.sectorsize); ++ *align_out = ntohl(header->v3.alignment); ++ *bitmap_offset_out = ROUND_UP(sizeof(header->v3), *align_out); ++ file = header->v3.backing_file; ++ } ++ else { ++ cow_printf("read_cow_header - invalid COW version\n"); ++ goto out; ++ } ++ err = -ENOMEM; ++ *backing_file_out = cow_strdup(file); ++ if(*backing_file_out == NULL){ ++ cow_printf("read_cow_header - failed to allocate backing " ++ "file\n"); ++ goto out; ++ } ++ err = 0; ++ out: ++ cow_free(header); ++ return(err); ++} ++ ++int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize, ++ int alignment, int *bitmap_offset_out, ++ unsigned long *bitmap_len_out, int *data_offset_out) ++{ ++ __u64 size, offset; ++ char zero = 0; ++ int err; ++ ++ err = write_cow_header(cow_file, fd, backing_file, sectorsize, ++ alignment, &size); ++ if(err) ++ goto out; ++ ++ *bitmap_offset_out = ROUND_UP(sizeof(struct cow_header_v3), alignment); ++ cow_sizes(COW_VERSION, size, sectorsize, alignment, *bitmap_offset_out, ++ bitmap_len_out, data_offset_out); ++ ++ offset = *data_offset_out + size - sizeof(zero); ++ err = cow_seek_file(fd, offset); ++ if(err < 0){ ++ cow_printf("cow bitmap lseek failed : err = %d\n", -err); ++ goto out; ++ } ++ ++ /* does not really matter how much we write it is just to set EOF ++ * this also sets the entire COW bitmap ++ * to zero without having to allocate it ++ */ ++ err = cow_write_file(fd, &zero, sizeof(zero)); ++ if(err != sizeof(zero)){ ++ cow_printf("Write of bitmap to new COW file '%s' failed, " ++ "err = %d\n", cow_file, -err); ++ err = -EINVAL; ++ goto out; ++ } ++ ++ return(0); ++ ++ out: ++ return(err); ++} ++ ++/* ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/drivers/daemon_user.c b/arch/um/drivers/daemon_user.c +--- a/arch/um/drivers/daemon_user.c 2004-02-11 12:15:11.000000000 -0500 ++++ b/arch/um/drivers/daemon_user.c 2004-02-11 12:26:57.000000000 -0500 +@@ -53,7 +53,8 @@ + struct request_v3 req; + int fd, n, err; + +- if((pri->control = socket(AF_UNIX, SOCK_STREAM, 0)) < 0){ ++ pri->control = socket(AF_UNIX, SOCK_STREAM, 0); ++ if(pri->control < 0){ + printk("daemon_open : control socket failed, errno = %d\n", + errno); + return(-errno); +@@ -67,7 +68,8 @@ + goto out; + } + +- if((fd = socket(AF_UNIX, SOCK_DGRAM, 0)) < 0){ ++ fd = socket(AF_UNIX, SOCK_DGRAM, 0); ++ if(fd < 0){ + printk("daemon_open : data socket failed, errno = %d\n", + errno); + err = -errno; +@@ -91,18 +93,18 @@ + req.version = SWITCH_VERSION; + req.type = REQ_NEW_CONTROL; + req.sock = *local_addr; +- n = write(pri->control, &req, sizeof(req)); ++ n = os_write_file(pri->control, &req, sizeof(req)); + if(n != sizeof(req)){ +- printk("daemon_open : control setup request returned %d, " +- "errno = %d\n", n, errno); ++ printk("daemon_open : control setup request failed, err = %d\n", ++ -n); + err = -ENOTCONN; + goto out; + } + +- n = read(pri->control, sun, sizeof(*sun)); ++ n = os_read_file(pri->control, sun, sizeof(*sun)); + if(n != sizeof(*sun)){ +- printk("daemon_open : read of data socket returned %d, " +- "errno = %d\n", n, errno); ++ printk("daemon_open : read of data socket failed, err = %d\n", ++ -n); + err = -ENOTCONN; + goto out_close; + } +@@ -111,9 +113,9 @@ + return(fd); + + out_close: +- close(fd); ++ os_close_file(fd); + out: +- close(pri->control); ++ os_close_file(pri->control); + return(err); + } + +@@ -153,8 +155,8 @@ + { + struct daemon_data *pri = data; + +- close(pri->fd); +- close(pri->control); ++ os_close_file(pri->fd); ++ os_close_file(pri->control); + if(pri->data_addr != NULL) kfree(pri->data_addr); + if(pri->ctl_addr != NULL) kfree(pri->ctl_addr); + if(pri->local_addr != NULL) kfree(pri->local_addr); +diff -Naur a/arch/um/drivers/fd.c b/arch/um/drivers/fd.c +--- a/arch/um/drivers/fd.c 2004-02-11 12:16:47.000000000 -0500 ++++ b/arch/um/drivers/fd.c 2004-02-11 12:29:01.000000000 -0500 +@@ -35,7 +35,8 @@ + printk("fd_init : couldn't parse file descriptor '%s'\n", str); + return(NULL); + } +- if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); ++ data = um_kmalloc(sizeof(*data)); ++ if(data == NULL) return(NULL); + *data = ((struct fd_chan) { .fd = n, + .raw = opts->raw }); + return(data); +diff -Naur a/arch/um/drivers/harddog_user.c b/arch/um/drivers/harddog_user.c +--- a/arch/um/drivers/harddog_user.c 2004-02-11 12:14:38.000000000 -0500 ++++ b/arch/um/drivers/harddog_user.c 2004-02-11 12:26:41.000000000 -0500 +@@ -27,10 +27,10 @@ + dup2(data->stdin, 0); + dup2(data->stdout, 1); + dup2(data->stdout, 2); +- close(data->stdin); +- close(data->stdout); +- close(data->close_me[0]); +- close(data->close_me[1]); ++ os_close_file(data->stdin); ++ os_close_file(data->stdout); ++ os_close_file(data->close_me[0]); ++ os_close_file(data->close_me[1]); + } + + int start_watchdog(int *in_fd_ret, int *out_fd_ret, char *sock) +@@ -44,15 +44,15 @@ + char **args = NULL; + + err = os_pipe(in_fds, 1, 0); +- if(err){ +- printk("harddog_open - os_pipe failed, errno = %d\n", -err); +- return(err); ++ if(err < 0){ ++ printk("harddog_open - os_pipe failed, err = %d\n", -err); ++ goto out; + } + + err = os_pipe(out_fds, 1, 0); +- if(err){ +- printk("harddog_open - os_pipe failed, errno = %d\n", -err); +- return(err); ++ if(err < 0){ ++ printk("harddog_open - os_pipe failed, err = %d\n", -err); ++ goto out_close_in; + } + + data.stdin = out_fds[0]; +@@ -72,42 +72,47 @@ + + pid = run_helper(pre_exec, &data, args, NULL); + +- close(out_fds[0]); +- close(in_fds[1]); ++ os_close_file(out_fds[0]); ++ os_close_file(in_fds[1]); + + if(pid < 0){ + err = -pid; +- printk("harddog_open - run_helper failed, errno = %d\n", err); +- goto out; ++ printk("harddog_open - run_helper failed, errno = %d\n", -err); ++ goto out_close_out; + } + +- n = read(in_fds[0], &c, sizeof(c)); ++ n = os_read_file(in_fds[0], &c, sizeof(c)); + if(n == 0){ + printk("harddog_open - EOF on watchdog pipe\n"); + helper_wait(pid); + err = -EIO; +- goto out; ++ goto out_close_out; + } + else if(n < 0){ + printk("harddog_open - read of watchdog pipe failed, " +- "errno = %d\n", errno); ++ "err = %d\n", -n); + helper_wait(pid); +- err = -errno; +- goto out; ++ err = n; ++ goto out_close_out; + } + *in_fd_ret = in_fds[0]; + *out_fd_ret = out_fds[1]; + return(0); ++ ++ out_close_in: ++ os_close_file(in_fds[0]); ++ os_close_file(in_fds[1]); ++ out_close_out: ++ os_close_file(out_fds[0]); ++ os_close_file(out_fds[1]); + out: +- close(out_fds[1]); +- close(in_fds[0]); + return(err); + } + + void stop_watchdog(int in_fd, int out_fd) + { +- close(in_fd); +- close(out_fd); ++ os_close_file(in_fd); ++ os_close_file(out_fd); + } + + int ping_watchdog(int fd) +@@ -115,11 +120,12 @@ + int n; + char c = '\n'; + +- n = write(fd, &c, sizeof(c)); +- if(n < sizeof(c)){ +- printk("ping_watchdog - write failed, errno = %d\n", +- errno); +- return(-errno); ++ n = os_write_file(fd, &c, sizeof(c)); ++ if(n != sizeof(c)){ ++ printk("ping_watchdog - write failed, err = %d\n", -n); ++ if(n < 0) ++ return(n); ++ return(-EIO); + } + return 1; + +diff -Naur a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c +--- a/arch/um/drivers/hostaudio_kern.c 2004-02-11 12:16:48.000000000 -0500 ++++ b/arch/um/drivers/hostaudio_kern.c 2004-02-11 12:29:03.000000000 -0500 +@@ -5,12 +5,12 @@ + + #include "linux/config.h" + #include "linux/module.h" +-#include "linux/version.h" + #include "linux/init.h" + #include "linux/slab.h" + #include "linux/fs.h" + #include "linux/sound.h" + #include "linux/soundcard.h" ++#include "asm/uaccess.h" + #include "kern_util.h" + #include "init.h" + #include "hostaudio.h" +@@ -19,30 +19,39 @@ + char *dsp = HOSTAUDIO_DEV_DSP; + char *mixer = HOSTAUDIO_DEV_MIXER; + ++#define DSP_HELP \ ++" This is used to specify the host dsp device to the hostaudio driver.\n" \ ++" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" ++ ++#define MIXER_HELP \ ++" This is used to specify the host mixer device to the hostaudio driver.\n" \ ++" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" ++ + #ifndef MODULE + static int set_dsp(char *name, int *add) + { +- dsp = uml_strdup(name); ++ dsp = name; + return(0); + } + +-__uml_setup("dsp=", set_dsp, +-"dsp=\n" +-" This is used to specify the host dsp device to the hostaudio driver.\n" +-" The default is \"" HOSTAUDIO_DEV_DSP "\".\n\n" +-); ++__uml_setup("dsp=", set_dsp, "dsp=\n" DSP_HELP); + + static int set_mixer(char *name, int *add) + { +- mixer = uml_strdup(name); ++ mixer = name; + return(0); + } + +-__uml_setup("mixer=", set_mixer, +-"mixer=\n" +-" This is used to specify the host mixer device to the hostaudio driver.\n" +-" The default is \"" HOSTAUDIO_DEV_MIXER "\".\n\n" +-); ++__uml_setup("mixer=", set_mixer, "mixer=\n" MIXER_HELP); ++ ++#else /*MODULE*/ ++ ++MODULE_PARM(dsp, "s"); ++MODULE_PARM_DESC(dsp, DSP_HELP); ++ ++MODULE_PARM(mixer, "s"); ++MODULE_PARM_DESC(mixer, MIXER_HELP); ++ + #endif + + /* /dev/dsp file operations */ +@@ -51,23 +60,55 @@ + loff_t *ppos) + { + struct hostaudio_state *state = file->private_data; ++ void *kbuf; ++ int err; + + #ifdef DEBUG + printk("hostaudio: read called, count = %d\n", count); + #endif + +- return(hostaudio_read_user(state, buffer, count, ppos)); ++ kbuf = kmalloc(count, GFP_KERNEL); ++ if(kbuf == NULL) ++ return(-ENOMEM); ++ ++ err = hostaudio_read_user(state, kbuf, count, ppos); ++ if(err < 0) ++ goto out; ++ ++ if(copy_to_user(buffer, kbuf, err)) ++ err = -EFAULT; ++ ++ out: ++ kfree(kbuf); ++ return(err); + } + + static ssize_t hostaudio_write(struct file *file, const char *buffer, + size_t count, loff_t *ppos) + { + struct hostaudio_state *state = file->private_data; ++ void *kbuf; ++ int err; + + #ifdef DEBUG + printk("hostaudio: write called, count = %d\n", count); + #endif +- return(hostaudio_write_user(state, buffer, count, ppos)); ++ ++ kbuf = kmalloc(count, GFP_KERNEL); ++ if(kbuf == NULL) ++ return(-ENOMEM); ++ ++ err = -EFAULT; ++ if(copy_from_user(kbuf, buffer, count)) ++ goto out; ++ ++ err = hostaudio_write_user(state, kbuf, count, ppos); ++ if(err < 0) ++ goto out; ++ ++ out: ++ kfree(kbuf); ++ return(err); + } + + static unsigned int hostaudio_poll(struct file *file, +@@ -86,12 +127,43 @@ + unsigned int cmd, unsigned long arg) + { + struct hostaudio_state *state = file->private_data; ++ unsigned long data = 0; ++ int err; + + #ifdef DEBUG + printk("hostaudio: ioctl called, cmd = %u\n", cmd); + #endif ++ switch(cmd){ ++ case SNDCTL_DSP_SPEED: ++ case SNDCTL_DSP_STEREO: ++ case SNDCTL_DSP_GETBLKSIZE: ++ case SNDCTL_DSP_CHANNELS: ++ case SNDCTL_DSP_SUBDIVIDE: ++ case SNDCTL_DSP_SETFRAGMENT: ++ if(get_user(data, (int *) arg)) ++ return(-EFAULT); ++ break; ++ default: ++ break; ++ } ++ ++ err = hostaudio_ioctl_user(state, cmd, (unsigned long) &data); ++ ++ switch(cmd){ ++ case SNDCTL_DSP_SPEED: ++ case SNDCTL_DSP_STEREO: ++ case SNDCTL_DSP_GETBLKSIZE: ++ case SNDCTL_DSP_CHANNELS: ++ case SNDCTL_DSP_SUBDIVIDE: ++ case SNDCTL_DSP_SETFRAGMENT: ++ if(put_user(data, (int *) arg)) ++ return(-EFAULT); ++ break; ++ default: ++ break; ++ } + +- return(hostaudio_ioctl_user(state, cmd, arg)); ++ return(err); + } + + static int hostaudio_open(struct inode *inode, struct file *file) +@@ -225,7 +297,8 @@ + + static int __init hostaudio_init_module(void) + { +- printk(KERN_INFO "UML Audio Relay\n"); ++ printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n", ++ dsp, mixer); + + module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1); + if(module_data.dev_audio < 0){ +diff -Naur a/arch/um/drivers/hostaudio_user.c b/arch/um/drivers/hostaudio_user.c +--- a/arch/um/drivers/hostaudio_user.c 2004-02-11 12:16:08.000000000 -0500 ++++ b/arch/um/drivers/hostaudio_user.c 2004-02-11 12:28:04.000000000 -0500 +@@ -4,9 +4,6 @@ + */ + + #include +-#include +-#include +-#include + #include + #include + #include "hostaudio.h" +@@ -20,45 +17,31 @@ + ssize_t hostaudio_read_user(struct hostaudio_state *state, char *buffer, + size_t count, loff_t *ppos) + { +- ssize_t ret; +- + #ifdef DEBUG + printk("hostaudio: read_user called, count = %d\n", count); + #endif + +- ret = read(state->fd, buffer, count); +- +- if(ret < 0) return(-errno); +- return(ret); ++ return(os_read_file(state->fd, buffer, count)); + } + + ssize_t hostaudio_write_user(struct hostaudio_state *state, const char *buffer, + size_t count, loff_t *ppos) + { +- ssize_t ret; +- + #ifdef DEBUG + printk("hostaudio: write_user called, count = %d\n", count); + #endif + +- ret = write(state->fd, buffer, count); +- +- if(ret < 0) return(-errno); +- return(ret); ++ return(os_write_file(state->fd, buffer, count)); + } + + int hostaudio_ioctl_user(struct hostaudio_state *state, unsigned int cmd, + unsigned long arg) + { +- int ret; + #ifdef DEBUG + printk("hostaudio: ioctl_user called, cmd = %u\n", cmd); + #endif + +- ret = ioctl(state->fd, cmd, arg); +- +- if(ret < 0) return(-errno); +- return(ret); ++ return(os_ioctl_generic(state->fd, cmd, arg)); + } + + int hostaudio_open_user(struct hostaudio_state *state, int r, int w, char *dsp) +@@ -67,14 +50,15 @@ + printk("hostaudio: open_user called\n"); + #endif + +- state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); +- +- if(state->fd >= 0) return(0); ++ state->fd = os_open_file(dsp, of_set_rw(OPENFLAGS(), r, w), 0); + +- printk("hostaudio_open_user failed to open '%s', errno = %d\n", +- dsp, errno); ++ if(state->fd < 0) { ++ printk("hostaudio_open_user failed to open '%s', err = %d\n", ++ dsp, -state->fd); ++ return(state->fd); ++ } + +- return(-errno); ++ return(0); + } + + int hostaudio_release_user(struct hostaudio_state *state) +@@ -82,10 +66,10 @@ + #ifdef DEBUG + printk("hostaudio: release called\n"); + #endif +- if(state->fd >= 0){ +- close(state->fd); +- state->fd=-1; +- } ++ if(state->fd >= 0){ ++ os_close_file(state->fd); ++ state->fd = -1; ++ } + + return(0); + } +@@ -95,15 +79,11 @@ + int hostmixer_ioctl_mixdev_user(struct hostmixer_state *state, + unsigned int cmd, unsigned long arg) + { +- int ret; + #ifdef DEBUG + printk("hostmixer: ioctl_user called cmd = %u\n",cmd); + #endif + +- ret = ioctl(state->fd, cmd, arg); +- if(ret < 0) +- return(-errno); +- return(ret); ++ return(os_ioctl_generic(state->fd, cmd, arg)); + } + + int hostmixer_open_mixdev_user(struct hostmixer_state *state, int r, int w, +@@ -115,12 +95,13 @@ + + state->fd = os_open_file(mixer, of_set_rw(OPENFLAGS(), r, w), 0); + +- if(state->fd >= 0) return(0); +- +- printk("hostaudio_open_mixdev_user failed to open '%s', errno = %d\n", +- mixer, errno); ++ if(state->fd < 0) { ++ printk("hostaudio_open_mixdev_user failed to open '%s', " ++ "err = %d\n", mixer, state->fd); ++ return(state->fd); ++ } + +- return(-errno); ++ return(0); + } + + int hostmixer_release_mixdev_user(struct hostmixer_state *state) +@@ -130,7 +111,7 @@ + #endif + + if(state->fd >= 0){ +- close(state->fd); ++ os_close_file(state->fd); + state->fd = -1; + } + +diff -Naur a/arch/um/drivers/line.c b/arch/um/drivers/line.c +--- a/arch/um/drivers/line.c 2004-02-11 12:16:38.000000000 -0500 ++++ b/arch/um/drivers/line.c 2004-02-11 12:28:38.000000000 -0500 +@@ -6,8 +6,8 @@ + #include "linux/sched.h" + #include "linux/slab.h" + #include "linux/list.h" ++#include "linux/interrupt.h" + #include "linux/devfs_fs_kernel.h" +-#include "asm/irq.h" + #include "asm/uaccess.h" + #include "chan_kern.h" + #include "irq_user.h" +@@ -16,38 +16,55 @@ + #include "user_util.h" + #include "kern_util.h" + #include "os.h" ++#include "irq_kern.h" + + #define LINE_BUFSIZE 4096 + +-void line_interrupt(int irq, void *data, struct pt_regs *unused) ++static irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused) + { + struct line *dev = data; + + if(dev->count > 0) + chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq, + dev); ++ return IRQ_HANDLED; + } + +-void line_timer_cb(void *arg) ++static void line_timer_cb(void *arg) + { + struct line *dev = arg; + + line_interrupt(dev->driver->read_irq, dev, NULL); + } + +-static void buffer_data(struct line *line, const char *buf, int len) ++static int write_room(struct line *dev) + { +- int end; ++ int n; ++ ++ if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); ++ ++ n = dev->head - dev->tail; ++ if(n <= 0) n = LINE_BUFSIZE + n; ++ return(n - 1); ++} ++ ++static int buffer_data(struct line *line, const char *buf, int len) ++{ ++ int end, room; + + if(line->buffer == NULL){ + line->buffer = kmalloc(LINE_BUFSIZE, GFP_ATOMIC); + if(line->buffer == NULL){ + printk("buffer_data - atomic allocation failed\n"); +- return; ++ return(0); + } + line->head = line->buffer; + line->tail = line->buffer; + } ++ ++ room = write_room(line); ++ len = (len > room) ? room : len; ++ + end = line->buffer + LINE_BUFSIZE - line->tail; + if(len < end){ + memcpy(line->tail, buf, len); +@@ -60,6 +77,8 @@ + memcpy(line->buffer, buf, len); + line->tail = line->buffer + len; + } ++ ++ return(len); + } + + static int flush_buffer(struct line *line) +@@ -95,7 +114,7 @@ + struct line *line; + char *new; + unsigned long flags; +- int n, err, i; ++ int n, err, i, ret = 0; + + if(tty->stopped) return 0; + +@@ -104,9 +123,13 @@ + if(new == NULL) + return(0); + n = copy_from_user(new, buf, len); +- if(n == len) +- return(-EFAULT); + buf = new; ++ if(n == len){ ++ len = -EFAULT; ++ goto out_free; ++ } ++ ++ len -= n; + } + + i = tty->index; +@@ -115,41 +138,50 @@ + down(&line->sem); + if(line->head != line->tail){ + local_irq_save(flags); +- buffer_data(line, buf, len); ++ ret += buffer_data(line, buf, len); + err = flush_buffer(line); + local_irq_restore(flags); + if(err <= 0) +- goto out; ++ goto out_up; + } + else { + n = write_chan(&line->chan_list, buf, len, + line->driver->write_irq); + if(n < 0){ +- len = n; +- goto out; ++ ret = n; ++ goto out_up; + } +- if(n < len) +- buffer_data(line, buf + n, len - n); ++ ++ len -= n; ++ ret += n; ++ if(len > 0) ++ ret += buffer_data(line, buf + n, len); + } +- out: ++ out_up: + up(&line->sem); +- return(len); ++ out_free: ++ if(from_user) ++ kfree(buf); ++ return(ret); + } + +-void line_write_interrupt(int irq, void *data, struct pt_regs *unused) ++static irqreturn_t line_write_interrupt(int irq, void *data, ++ struct pt_regs *unused) + { + struct line *dev = data; + struct tty_struct *tty = dev->tty; + int err; + + err = flush_buffer(dev); +- if(err == 0) return; ++ if(err == 0) ++ return(IRQ_NONE); + else if(err < 0){ + dev->head = dev->buffer; + dev->tail = dev->buffer; + } + +- if(tty == NULL) return; ++ if(tty == NULL) ++ return(IRQ_NONE); + + if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) && + (tty->ldisc.write_wakeup != NULL)) +@@ -161,21 +193,9 @@ + * writes. + */ + +- if (waitqueue_active(&tty->write_wait)) ++ if(waitqueue_active(&tty->write_wait)) + wake_up_interruptible(&tty->write_wait); +- +-} +- +-int line_write_room(struct tty_struct *tty) +-{ +- struct line *dev = tty->driver_data; +- int n; +- +- if(dev->buffer == NULL) return(LINE_BUFSIZE - 1); +- +- n = dev->head - dev->tail; +- if(n <= 0) n = LINE_BUFSIZE + n; +- return(n - 1); ++ return(IRQ_HANDLED); + } + + int line_setup_irq(int fd, int input, int output, void *data) +@@ -305,7 +325,7 @@ + if(*end != '='){ + printk(KERN_ERR "line_setup failed to parse \"%s\"\n", + init); +- return(1); ++ return(0); + } + init = end; + } +@@ -313,12 +333,12 @@ + if((n >= 0) && (n >= num)){ + printk("line_setup - %d out of range ((0 ... %d) allowed)\n", + n, num); +- return(1); ++ return(0); + } + else if(n >= 0){ + if(lines[n].count > 0){ + printk("line_setup - device %d is open\n", n); +- return(1); ++ return(0); + } + if(lines[n].init_pri <= INIT_ONE){ + lines[n].init_pri = INIT_ONE; +@@ -332,7 +352,7 @@ + else if(!all_allowed){ + printk("line_setup - can't configure all devices from " + "mconsole\n"); +- return(1); ++ return(0); + } + else { + for(i = 0; i < num; i++){ +@@ -346,7 +366,7 @@ + } + } + } +- return(0); ++ return(1); + } + + int line_config(struct line *lines, int num, char *str) +@@ -357,7 +377,7 @@ + printk("line_config - uml_strdup failed\n"); + return(-ENOMEM); + } +- return(line_setup(lines, num, new, 0)); ++ return(!line_setup(lines, num, new, 0)); + } + + int line_get_config(char *name, struct line *lines, int num, char *str, +@@ -369,7 +389,7 @@ + + dev = simple_strtoul(name, &end, 0); + if((*end != '\0') || (end == name)){ +- *error_out = "line_setup failed to parse device number"; ++ *error_out = "line_get_config failed to parse device number"; + return(0); + } + +@@ -379,15 +399,15 @@ + } + + line = &lines[dev]; ++ + down(&line->sem); +- + if(!line->valid) + CONFIG_CHUNK(str, size, n, "none", 1); + else if(line->count == 0) + CONFIG_CHUNK(str, size, n, line->init_str, 1); + else n = chan_config_string(&line->chan_list, str, size, error_out); +- + up(&line->sem); ++ + return(n); + } + +@@ -396,7 +416,14 @@ + char config[sizeof("conxxxx=none\0")]; + + sprintf(config, "%s=none", str); +- return(line_setup(lines, num, config, 0)); ++ return(!line_setup(lines, num, config, 0)); ++} ++ ++int line_write_room(struct tty_struct *tty) ++{ ++ struct line *dev = tty->driver_data; ++ ++ return(write_room(dev)); + } + + struct tty_driver *line_register_devfs(struct lines *set, +@@ -412,7 +439,8 @@ + return NULL; + + driver->driver_name = line_driver->name; +- driver->name = line_driver->devfs_name; ++ driver->name = line_driver->device_name; ++ driver->devfs_name = line_driver->devfs_name; + driver->major = line_driver->major; + driver->minor_start = line_driver->minor_start; + driver->type = line_driver->type; +@@ -432,7 +460,7 @@ + + for(i = 0; i < nlines; i++){ + if(!lines[i].valid) +- tty_unregister_devfs(driver, i); ++ tty_unregister_device(driver, i); + } + + mconsole_register_dev(&line_driver->mc); +@@ -465,24 +493,25 @@ + struct line *line; + }; + +-void winch_interrupt(int irq, void *data, struct pt_regs *unused) ++irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused) + { + struct winch *winch = data; + struct tty_struct *tty; + int err; + char c; + +- err = generic_read(winch->fd, &c, NULL); +- if(err < 0){ +- if(err != -EAGAIN){ +- printk("winch_interrupt : read failed, errno = %d\n", +- -err); +- printk("fd %d is losing SIGWINCH support\n", +- winch->tty_fd); +- free_irq(irq, data); +- return; ++ if(winch->fd != -1){ ++ err = generic_read(winch->fd, &c, NULL); ++ if(err < 0){ ++ if(err != -EAGAIN){ ++ printk("winch_interrupt : read failed, " ++ "errno = %d\n", -err); ++ printk("fd %d is losing SIGWINCH support\n", ++ winch->tty_fd); ++ return(IRQ_HANDLED); ++ } ++ goto out; + } +- goto out; + } + tty = winch->line->tty; + if(tty != NULL){ +@@ -492,7 +521,9 @@ + kill_pg(tty->pgrp, SIGWINCH, 1); + } + out: +- reactivate_fd(winch->fd, WINCH_IRQ); ++ if(winch->fd != -1) ++ reactivate_fd(winch->fd, WINCH_IRQ); ++ return(IRQ_HANDLED); + } + + DECLARE_MUTEX(winch_handler_sem); +@@ -529,7 +560,10 @@ + + list_for_each(ele, &winch_handlers){ + winch = list_entry(ele, struct winch, list); +- close(winch->fd); ++ if(winch->fd != -1){ ++ deactivate_fd(winch->fd, WINCH_IRQ); ++ os_close_file(winch->fd); ++ } + if(winch->pid != -1) + os_kill_process(winch->pid, 1); + } +diff -Naur a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile +--- a/arch/um/drivers/Makefile 2004-02-11 12:15:52.000000000 -0500 ++++ b/arch/um/drivers/Makefile 2004-02-11 12:27:45.000000000 -0500 +@@ -1,5 +1,5 @@ + # +-# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) ++# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com) + # Licensed under the GPL + # + +@@ -39,6 +39,8 @@ + obj-$(CONFIG_TTY_CHAN) += tty.o + obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o + obj-$(CONFIG_UML_WATCHDOG) += harddog.o ++obj-$(CONFIG_BLK_DEV_COW) += cow_kern.o ++obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o + + obj-y += stdio_console.o $(CHAN_OBJS) + +@@ -46,18 +48,7 @@ + + USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \ + null.o pty.o tty.o xterm.o +-USER_OBJS := $(foreach file,$(USER_OBJS),arch/um/drivers/$(file)) ++USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) + + $(USER_OBJS) : %.o: %.c + $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< +- +-clean: +- +-modules: +- +-fastdep: +- +-dep: +- +-archmrproper: clean +- +diff -Naur a/arch/um/drivers/mcast_user.c b/arch/um/drivers/mcast_user.c +--- a/arch/um/drivers/mcast_user.c 2004-02-11 12:15:46.000000000 -0500 ++++ b/arch/um/drivers/mcast_user.c 2004-02-11 12:27:37.000000000 -0500 +@@ -23,6 +23,7 @@ + #include "kern_util.h" + #include "user_util.h" + #include "user.h" ++#include "os.h" + + #define MAX_PACKET (ETH_MAX_PACKET + ETH_HEADER_OTHER) + +@@ -62,7 +63,8 @@ + goto out; + } + +- if ((fd = socket(AF_INET, SOCK_DGRAM, 0)) < 0){ ++ fd = socket(AF_INET, SOCK_DGRAM, 0); ++ if (fd < 0){ + printk("mcast_open : data socket failed, errno = %d\n", + errno); + fd = -ENOMEM; +@@ -72,7 +74,7 @@ + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &yes, sizeof(yes)) < 0) { + printk("mcast_open: SO_REUSEADDR failed, errno = %d\n", + errno); +- close(fd); ++ os_close_file(fd); + fd = -EINVAL; + goto out; + } +@@ -82,7 +84,7 @@ + sizeof(pri->ttl)) < 0) { + printk("mcast_open: IP_MULTICAST_TTL failed, error = %d\n", + errno); +- close(fd); ++ os_close_file(fd); + fd = -EINVAL; + goto out; + } +@@ -91,7 +93,7 @@ + if (setsockopt(fd, SOL_IP, IP_MULTICAST_LOOP, &yes, sizeof(yes)) < 0) { + printk("mcast_open: IP_MULTICAST_LOOP failed, error = %d\n", + errno); +- close(fd); ++ os_close_file(fd); + fd = -EINVAL; + goto out; + } +@@ -99,7 +101,7 @@ + /* bind socket to mcast address */ + if (bind(fd, (struct sockaddr *) sin, sizeof(*sin)) < 0) { + printk("mcast_open : data bind failed, errno = %d\n", errno); +- close(fd); ++ os_close_file(fd); + fd = -EINVAL; + goto out; + } +@@ -115,7 +117,7 @@ + "interface on the host.\n"); + printk("eth0 should be configured in order to use the " + "multicast transport.\n"); +- close(fd); ++ os_close_file(fd); + fd = -EINVAL; + } + +@@ -137,7 +139,7 @@ + errno); + } + +- close(fd); ++ os_close_file(fd); + } + + int mcast_user_write(int fd, void *buf, int len, struct mcast_data *pri) +diff -Naur a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c +--- a/arch/um/drivers/mconsole_kern.c 2004-02-11 12:14:15.000000000 -0500 ++++ b/arch/um/drivers/mconsole_kern.c 2004-02-11 12:25:42.000000000 -0500 +@@ -1,6 +1,6 @@ + /* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) +- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -15,6 +15,9 @@ + #include "linux/sysrq.h" + #include "linux/workqueue.h" + #include "linux/module.h" ++#include "linux/file.h" ++#include "linux/fs.h" ++#include "linux/namei.h" + #include "linux/proc_fs.h" + #include "asm/irq.h" + #include "asm/uaccess.h" +@@ -27,6 +30,7 @@ + #include "init.h" + #include "os.h" + #include "umid.h" ++#include "irq_kern.h" + + static int do_unlink_socket(struct notifier_block *notifier, + unsigned long what, void *data) +@@ -67,7 +71,7 @@ + + DECLARE_WORK(mconsole_work, mc_work_proc, NULL); + +-void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs) + { + int fd; + struct mconsole_entry *new; +@@ -75,9 +79,10 @@ + + fd = (int) dev_id; + while (mconsole_get_request(fd, &req)){ +- if(req.cmd->as_interrupt) (*req.cmd->handler)(&req); ++ if(req.cmd->context == MCONSOLE_INTR) ++ (*req.cmd->handler)(&req); + else { +- new = kmalloc(sizeof(req), GFP_ATOMIC); ++ new = kmalloc(sizeof(*new), GFP_ATOMIC); + if(new == NULL) + mconsole_reply(&req, "Out of memory", 1, 0); + else { +@@ -88,6 +93,7 @@ + } + if(!list_empty(&mc_requests)) schedule_work(&mconsole_work); + reactivate_fd(fd, MCONSOLE_IRQ); ++ return(IRQ_HANDLED); + } + + void mconsole_version(struct mc_request *req) +@@ -100,20 +106,110 @@ + mconsole_reply(req, version, 0, 0); + } + ++void mconsole_log(struct mc_request *req) ++{ ++ int len; ++ char *ptr = req->request.data; ++ ++ ptr += strlen("log"); ++ while(isspace(*ptr)) ptr++; ++ ++ len = req->len - (ptr - req->request.data); ++ printk("%.*s", len, ptr); ++ mconsole_reply(req, "", 0, 0); ++} ++ ++void mconsole_proc(struct mc_request *req) ++{ ++ struct nameidata nd; ++ struct file_system_type *proc; ++ struct super_block *super; ++ struct file *file; ++ int n, err; ++ char *ptr = req->request.data, *buf; ++ ++ ptr += strlen("proc"); ++ while(isspace(*ptr)) ptr++; ++ ++ proc = get_fs_type("proc"); ++ if(proc == NULL){ ++ mconsole_reply(req, "procfs not registered", 1, 0); ++ goto out; ++ } ++ ++ super = (*proc->get_sb)(proc, 0, NULL, NULL); ++ put_filesystem(proc); ++ if(super == NULL){ ++ mconsole_reply(req, "Failed to get procfs superblock", 1, 0); ++ goto out; ++ } ++ up_write(&super->s_umount); ++ ++ nd.dentry = super->s_root; ++ nd.mnt = NULL; ++ nd.flags = O_RDONLY + 1; ++ nd.last_type = LAST_ROOT; ++ ++ err = link_path_walk(ptr, &nd); ++ if(err){ ++ mconsole_reply(req, "Failed to look up file", 1, 0); ++ goto out_kill; ++ } ++ ++ file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); ++ if(IS_ERR(file)){ ++ mconsole_reply(req, "Failed to open file", 1, 0); ++ goto out_kill; ++ } ++ ++ buf = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if(buf == NULL){ ++ mconsole_reply(req, "Failed to allocate buffer", 1, 0); ++ goto out_fput; ++ } ++ ++ if((file->f_op != NULL) && (file->f_op->read != NULL)){ ++ do { ++ n = (*file->f_op->read)(file, buf, PAGE_SIZE - 1, ++ &file->f_pos); ++ if(n >= 0){ ++ buf[n] = '\0'; ++ mconsole_reply(req, buf, 0, (n > 0)); ++ } ++ else { ++ mconsole_reply(req, "Read of file failed", ++ 1, 0); ++ goto out_free; ++ } ++ } while(n > 0); ++ } ++ else mconsole_reply(req, "", 0, 0); ++ ++ out_free: ++ kfree(buf); ++ out_fput: ++ fput(file); ++ out_kill: ++ deactivate_super(super); ++ out: ; ++} ++ + #define UML_MCONSOLE_HELPTEXT \ +-"Commands: +- version - Get kernel version +- help - Print this message +- halt - Halt UML +- reboot - Reboot UML +- config = - Add a new device to UML; +- same syntax as command line +- config - Query the configuration of a device +- remove - Remove a device from UML +- sysrq - Performs the SysRq action controlled by the letter +- cad - invoke the Ctl-Alt-Del handler +- stop - pause the UML; it will do nothing until it receives a 'go' +- go - continue the UML after a 'stop' ++"Commands: \n\ ++ version - Get kernel version \n\ ++ help - Print this message \n\ ++ halt - Halt UML \n\ ++ reboot - Reboot UML \n\ ++ config = - Add a new device to UML; \n\ ++ same syntax as command line \n\ ++ config - Query the configuration of a device \n\ ++ remove - Remove a device from UML \n\ ++ sysrq - Performs the SysRq action controlled by the letter \n\ ++ cad - invoke the Ctl-Alt-Del handler \n\ ++ stop - pause the UML; it will do nothing until it receives a 'go' \n\ ++ go - continue the UML after a 'stop' \n\ ++ log - make UML enter into the kernel log\n\ ++ proc - returns the contents of the UML's /proc/\n\ + " + + void mconsole_help(struct mc_request *req) +@@ -302,7 +398,7 @@ + if(umid_file_name("mconsole", file, sizeof(file))) return(-1); + snprintf(mconsole_socket_name, sizeof(file), "%s", file); + +- sock = create_unix_socket(file, sizeof(file)); ++ sock = os_create_unix_socket(file, sizeof(file), 1); + if (sock < 0){ + printk("Failed to initialize management console\n"); + return(1); +@@ -344,11 +440,16 @@ + if(buf == NULL) + return(-ENOMEM); + +- if(copy_from_user(buf, buffer, count)) +- return(-EFAULT); ++ if(copy_from_user(buf, buffer, count)){ ++ count = -EFAULT; ++ goto out; ++ } ++ + buf[count] = '\0'; + + mconsole_notify(notify_socket, MCONSOLE_USER_NOTIFY, buf, count); ++ out: ++ kfree(buf); + return(count); + } + +diff -Naur a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c +--- a/arch/um/drivers/mconsole_user.c 2004-02-11 12:14:27.000000000 -0500 ++++ b/arch/um/drivers/mconsole_user.c 2004-02-11 12:26:08.000000000 -0500 +@@ -1,6 +1,6 @@ + /* + * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) +- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -18,16 +18,18 @@ + #include "umid.h" + + static struct mconsole_command commands[] = { +- { "version", mconsole_version, 1 }, +- { "halt", mconsole_halt, 0 }, +- { "reboot", mconsole_reboot, 0 }, +- { "config", mconsole_config, 0 }, +- { "remove", mconsole_remove, 0 }, +- { "sysrq", mconsole_sysrq, 1 }, +- { "help", mconsole_help, 1 }, +- { "cad", mconsole_cad, 1 }, +- { "stop", mconsole_stop, 0 }, +- { "go", mconsole_go, 1 }, ++ { "version", mconsole_version, MCONSOLE_INTR }, ++ { "halt", mconsole_halt, MCONSOLE_PROC }, ++ { "reboot", mconsole_reboot, MCONSOLE_PROC }, ++ { "config", mconsole_config, MCONSOLE_PROC }, ++ { "remove", mconsole_remove, MCONSOLE_PROC }, ++ { "sysrq", mconsole_sysrq, MCONSOLE_INTR }, ++ { "help", mconsole_help, MCONSOLE_INTR }, ++ { "cad", mconsole_cad, MCONSOLE_INTR }, ++ { "stop", mconsole_stop, MCONSOLE_PROC }, ++ { "go", mconsole_go, MCONSOLE_INTR }, ++ { "log", mconsole_log, MCONSOLE_INTR }, ++ { "proc", mconsole_proc, MCONSOLE_PROC }, + }; + + /* Initialized in mconsole_init, which is an initcall */ +@@ -139,6 +141,7 @@ + memcpy(reply.data, str, len); + reply.data[len] = '\0'; + total -= len; ++ str += len; + reply.len = len + 1; + + len = sizeof(reply) + reply.len - sizeof(reply.data); +diff -Naur a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c +--- a/arch/um/drivers/mmapper_kern.c 2004-02-11 12:14:23.000000000 -0500 ++++ b/arch/um/drivers/mmapper_kern.c 2004-02-11 12:26:07.000000000 -0500 +@@ -120,7 +120,10 @@ + printk(KERN_INFO "Mapper v0.1\n"); + + v_buf = (char *) find_iomem("mmapper", &mmapper_size); +- if(mmapper_size == 0) return(0); ++ if(mmapper_size == 0){ ++ printk(KERN_ERR "mmapper_init - find_iomem failed\n"); ++ return(0); ++ } + + p_buf = __pa(v_buf); + +diff -Naur a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c +--- a/arch/um/drivers/net_kern.c 2004-02-11 12:15:23.000000000 -0500 ++++ b/arch/um/drivers/net_kern.c 2004-02-11 12:27:11.000000000 -0500 +@@ -26,6 +26,7 @@ + #include "mconsole_kern.h" + #include "init.h" + #include "irq_user.h" ++#include "irq_kern.h" + + static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED; + LIST_HEAD(opened); +@@ -37,7 +38,8 @@ + struct sk_buff *skb; + + /* If we can't allocate memory, try again next round. */ +- if ((skb = dev_alloc_skb(dev->mtu)) == NULL) { ++ skb = dev_alloc_skb(dev->mtu); ++ if (skb == NULL) { + lp->stats.rx_dropped++; + return 0; + } +@@ -61,14 +63,14 @@ + return pkt_len; + } + +-void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) ++irqreturn_t uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs) + { + struct net_device *dev = dev_id; + struct uml_net_private *lp = dev->priv; + int err; + + if(!netif_running(dev)) +- return; ++ return(IRQ_NONE); + + spin_lock(&lp->lock); + while((err = uml_net_rx(dev)) > 0) ; +@@ -83,6 +85,7 @@ + + out: + spin_unlock(&lp->lock); ++ return(IRQ_HANDLED); + } + + static int uml_net_open(struct net_device *dev) +@@ -252,37 +255,6 @@ + #endif + } + +-/* +- * default do nothing hard header packet routines for struct net_device init. +- * real ethernet transports will overwrite with real routines. +- */ +-static int uml_net_hard_header(struct sk_buff *skb, struct net_device *dev, +- unsigned short type, void *daddr, void *saddr, unsigned len) +-{ +- return(0); /* no change */ +-} +- +-static int uml_net_rebuild_header(struct sk_buff *skb) +-{ +- return(0); /* ignore */ +-} +- +-static int uml_net_header_cache(struct neighbour *neigh, struct hh_cache *hh) +-{ +- return(-1); /* fail */ +-} +- +-static void uml_net_header_cache_update(struct hh_cache *hh, +- struct net_device *dev, unsigned char * haddr) +-{ +- /* ignore */ +-} +- +-static int uml_net_header_parse(struct sk_buff *skb, unsigned char *haddr) +-{ +- return(0); /* nothing */ +-} +- + static spinlock_t devices_lock = SPIN_LOCK_UNLOCKED; + static struct list_head devices = LIST_HEAD_INIT(devices); + +@@ -292,7 +264,7 @@ + struct uml_net *device; + struct net_device *dev; + struct uml_net_private *lp; +- int err, size; ++ int save, err, size; + + size = transport->private_size + sizeof(struct uml_net_private) + + sizeof(((struct uml_net_private *) 0)->user); +@@ -334,12 +306,6 @@ + snprintf(dev->name, sizeof(dev->name), "eth%d", n); + device->dev = dev; + +- dev->hard_header = uml_net_hard_header; +- dev->rebuild_header = uml_net_rebuild_header; +- dev->hard_header_cache = uml_net_header_cache; +- dev->header_cache_update= uml_net_header_cache_update; +- dev->hard_header_parse = uml_net_header_parse; +- + (*transport->kern->init)(dev, init); + + dev->mtu = transport->user->max_packet; +@@ -362,21 +328,29 @@ + return 1; + lp = dev->priv; + +- INIT_LIST_HEAD(&lp->list); +- spin_lock_init(&lp->lock); +- lp->dev = dev; +- lp->fd = -1; +- lp->mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0 }; +- lp->have_mac = device->have_mac; +- lp->protocol = transport->kern->protocol; +- lp->open = transport->user->open; +- lp->close = transport->user->close; +- lp->remove = transport->user->remove; +- lp->read = transport->kern->read; +- lp->write = transport->kern->write; +- lp->add_address = transport->user->add_address; +- lp->delete_address = transport->user->delete_address; +- lp->set_mtu = transport->user->set_mtu; ++ /* lp.user is the first four bytes of the transport data, which ++ * has already been initialized. This structure assignment will ++ * overwrite that, so we make sure that .user gets overwritten with ++ * what it already has. ++ */ ++ save = lp->user[0]; ++ *lp = ((struct uml_net_private) ++ { .list = LIST_HEAD_INIT(lp->list), ++ .lock = SPIN_LOCK_UNLOCKED, ++ .dev = dev, ++ .fd = -1, ++ .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0}, ++ .have_mac = device->have_mac, ++ .protocol = transport->kern->protocol, ++ .open = transport->user->open, ++ .close = transport->user->close, ++ .remove = transport->user->remove, ++ .read = transport->kern->read, ++ .write = transport->kern->write, ++ .add_address = transport->user->add_address, ++ .delete_address = transport->user->delete_address, ++ .set_mtu = transport->user->set_mtu, ++ .user = { save } }); + + init_timer(&lp->tl); + lp->tl.function = uml_net_user_timer_expire; +@@ -609,7 +583,8 @@ + unregister_netdev(dev); + + list_del(&device->list); +- free_netdev(device); ++ kfree(device); ++ free_netdev(dev); + return(0); + } + +diff -Naur a/arch/um/drivers/net_user.c b/arch/um/drivers/net_user.c +--- a/arch/um/drivers/net_user.c 2004-02-11 12:14:28.000000000 -0500 ++++ b/arch/um/drivers/net_user.c 2004-02-11 12:26:11.000000000 -0500 +@@ -26,8 +26,7 @@ + if(gate_addr == NULL) return(0); + if(sscanf(gate_addr, "%d.%d.%d.%d", &tap_addr[0], + &tap_addr[1], &tap_addr[2], &tap_addr[3]) != 4){ +- printk("Invalid tap IP address - '%s'\n", +- gate_addr); ++ printk("Invalid tap IP address - '%s'\n", gate_addr); + return(-EINVAL); + } + return(0); +@@ -60,18 +59,18 @@ + } + + *output = '\0'; +- if(read(fd, &remain, sizeof(remain)) != sizeof(remain)){ +- printk("read_output - read of length failed, errno = %d\n", +- errno); ++ n = os_read_file(fd, &remain, sizeof(remain)); ++ if(n != sizeof(remain)){ ++ printk("read_output - read of length failed, err = %d\n", -n); + return; + } + + while(remain != 0){ + n = (remain < len) ? remain : len; +- actual = read(fd, output, n); ++ actual = os_read_file(fd, output, n); + if(actual != n){ + printk("read_output - read of data failed, " +- "errno = %d\n", errno); ++ "err = %d\n", -actual); + return; + } + remain -= actual; +@@ -83,13 +82,12 @@ + { + int n; + +- while(((n = read(fd, buf, len)) < 0) && (errno == EINTR)) ; ++ n = os_read_file(fd, buf, len); + +- if(n < 0){ +- if(errno == EAGAIN) return(0); +- return(-errno); +- } +- else if(n == 0) return(-ENOTCONN); ++ if(n == -EAGAIN) ++ return(0); ++ else if(n == 0) ++ return(-ENOTCONN); + return(n); + } + +@@ -112,13 +110,13 @@ + { + int n; + +- while(((n = write(fd, buf, len)) < 0) && (errno == EINTR)) ; +- if(n < 0){ +- if(errno == EAGAIN) return(0); +- return(-errno); +- } +- else if(n == 0) return(-ENOTCONN); +- return(n); ++ n = os_write_file(fd, buf, len); ++ ++ if(n == -EAGAIN) ++ return(0); ++ else if(n == 0) ++ return(-ENOTCONN); ++ return(n); + } + + int net_send(int fd, void *buf, int len) +@@ -157,7 +155,7 @@ + { + struct change_pre_exec_data *data = arg; + +- close(data->close_me); ++ os_close_file(data->close_me); + dup2(data->stdout, 1); + } + +@@ -167,15 +165,15 @@ + struct change_pre_exec_data pe_data; + + err = os_pipe(fds, 1, 0); +- if(err){ +- printk("change_tramp - pipe failed, errno = %d\n", -err); ++ if(err < 0){ ++ printk("change_tramp - pipe failed, err = %d\n", -err); + return(err); + } + pe_data.close_me = fds[0]; + pe_data.stdout = fds[1]; + pid = run_helper(change_pre_exec, &pe_data, argv, NULL); + +- close(fds[1]); ++ os_close_file(fds[1]); + read_output(fds[0], output, output_len); + waitpid(pid, NULL, 0); + return(pid); +diff -Naur a/arch/um/drivers/null.c b/arch/um/drivers/null.c +--- a/arch/um/drivers/null.c 2004-02-11 12:14:21.000000000 -0500 ++++ b/arch/um/drivers/null.c 2004-02-11 12:26:02.000000000 -0500 +@@ -5,7 +5,6 @@ + + #include + #include +-#include + #include "chan_user.h" + #include "os.h" + +diff -Naur a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c +--- a/arch/um/drivers/port_kern.c 2004-02-11 12:14:18.000000000 -0500 ++++ b/arch/um/drivers/port_kern.c 2004-02-11 12:26:00.000000000 -0500 +@@ -6,6 +6,7 @@ + #include "linux/list.h" + #include "linux/sched.h" + #include "linux/slab.h" ++#include "linux/interrupt.h" + #include "linux/irq.h" + #include "linux/spinlock.h" + #include "linux/errno.h" +@@ -14,6 +15,7 @@ + #include "kern_util.h" + #include "kern.h" + #include "irq_user.h" ++#include "irq_kern.h" + #include "port.h" + #include "init.h" + #include "os.h" +@@ -38,21 +40,21 @@ + struct connection { + struct list_head list; + int fd; +- int helper_pid; ++ int helper_pid; + int socket[2]; + int telnetd_pid; + struct port_list *port; + }; + +-static void pipe_interrupt(int irq, void *data, struct pt_regs *regs) ++static irqreturn_t pipe_interrupt(int irq, void *data, struct pt_regs *regs) + { + struct connection *conn = data; + int fd; + +- fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); ++ fd = os_rcv_fd(conn->socket[0], &conn->helper_pid); + if(fd < 0){ + if(fd == -EAGAIN) +- return; ++ return(IRQ_NONE); + + printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", + -fd); +@@ -65,6 +67,7 @@ + list_add(&conn->list, &conn->port->connections); + + up(&conn->port->sem); ++ return(IRQ_HANDLED); + } + + static int port_accept(struct port_list *port) +@@ -102,8 +105,7 @@ + } + + list_add(&conn->list, &port->pending); +- ret = 1; +- goto out; ++ return(1); + + out_free: + kfree(conn); +@@ -138,12 +140,13 @@ + + DECLARE_WORK(port_work, port_work_proc, NULL); + +-static void port_interrupt(int irq, void *data, struct pt_regs *regs) ++static irqreturn_t port_interrupt(int irq, void *data, struct pt_regs *regs) + { + struct port_list *port = data; + + port->has_connection = 1; + schedule_work(&port_work); ++ return(IRQ_HANDLED); + } + + void *port_data(int port_num) +diff -Naur a/arch/um/drivers/port_user.c b/arch/um/drivers/port_user.c +--- a/arch/um/drivers/port_user.c 2004-02-11 12:15:59.000000000 -0500 ++++ b/arch/um/drivers/port_user.c 2004-02-11 12:27:52.000000000 -0500 +@@ -47,10 +47,12 @@ + return(NULL); + } + +- if((kern_data = port_data(port)) == NULL) ++ kern_data = port_data(port); ++ if(kern_data == NULL) + return(NULL); + +- if((data = um_kmalloc(sizeof(*data))) == NULL) ++ data = um_kmalloc(sizeof(*data)); ++ if(data == NULL) + goto err; + + *data = ((struct port_chan) { .raw = opts->raw, +@@ -90,7 +92,7 @@ + struct port_chan *data = d; + + port_remove_dev(data->kernel_data); +- close(fd); ++ os_close_file(fd); + } + + int port_console_write(int fd, const char *buf, int n, void *d) +@@ -130,11 +132,15 @@ + goto out; + } + +- if((listen(fd, 1) < 0) || (os_set_fd_block(fd, 0))){ ++ if(listen(fd, 1) < 0){ + err = -errno; + goto out; + } + ++ err = os_set_fd_block(fd, 0); ++ if(err < 0) ++ goto out; ++ + return(fd); + out: + os_close_file(fd); +@@ -153,10 +159,10 @@ + dup2(data->sock_fd, 0); + dup2(data->sock_fd, 1); + dup2(data->sock_fd, 2); +- close(data->sock_fd); ++ os_close_file(data->sock_fd); + dup2(data->pipe_fd, 3); + os_shutdown_socket(3, 1, 0); +- close(data->pipe_fd); ++ os_close_file(data->pipe_fd); + } + + int port_connection(int fd, int *socket, int *pid_out) +@@ -166,11 +172,12 @@ + "/usr/lib/uml/port-helper", NULL }; + struct port_pre_exec_data data; + +- if((new = os_accept_connection(fd)) < 0) +- return(-errno); ++ new = os_accept_connection(fd); ++ if(new < 0) ++ return(new); + + err = os_pipe(socket, 0, 0); +- if(err) ++ if(err < 0) + goto out_close; + + data = ((struct port_pre_exec_data) +@@ -186,11 +193,11 @@ + + out_shutdown: + os_shutdown_socket(socket[0], 1, 1); +- close(socket[0]); ++ os_close_file(socket[0]); + os_shutdown_socket(socket[1], 1, 1); +- close(socket[1]); ++ os_close_file(socket[1]); + out_close: +- close(new); ++ os_close_file(new); + return(err); + } + +diff -Naur a/arch/um/drivers/pty.c b/arch/um/drivers/pty.c +--- a/arch/um/drivers/pty.c 2004-02-11 12:16:37.000000000 -0500 ++++ b/arch/um/drivers/pty.c 2004-02-11 12:28:37.000000000 -0500 +@@ -7,12 +7,12 @@ + #include + #include + #include +-#include + #include + #include "chan_user.h" + #include "user.h" + #include "user_util.h" + #include "kern_util.h" ++#include "os.h" + + struct pty_chan { + void (*announce)(char *dev_name, int dev); +@@ -26,7 +26,8 @@ + { + struct pty_chan *data; + +- if((data = um_kmalloc(sizeof(*data))) == NULL) return(NULL); ++ data = um_kmalloc(sizeof(*data)); ++ if(data == NULL) return(NULL); + *data = ((struct pty_chan) { .announce = opts->announce, + .dev = device, + .raw = opts->raw }); +@@ -39,7 +40,8 @@ + char *dev; + int fd; + +- if((fd = get_pty()) < 0){ ++ fd = get_pty(); ++ if(fd < 0){ + printk("open_pts : Failed to open pts\n"); + return(-errno); + } +@@ -57,29 +59,27 @@ + + int getmaster(char *line) + { +- struct stat stb; + char *pty, *bank, *cp; +- int master; ++ int master, err; + + pty = &line[strlen("/dev/ptyp")]; + for (bank = "pqrs"; *bank; bank++) { + line[strlen("/dev/pty")] = *bank; + *pty = '0'; +- if (stat(line, &stb) < 0) ++ if (os_stat_file(line, NULL) < 0) + break; + for (cp = "0123456789abcdef"; *cp; cp++) { + *pty = *cp; +- master = open(line, O_RDWR); ++ master = os_open_file(line, of_rdwr(OPENFLAGS()), 0); + if (master >= 0) { + char *tp = &line[strlen("/dev/")]; +- int ok; + + /* verify slave side is usable */ + *tp = 't'; +- ok = access(line, R_OK|W_OK) == 0; ++ err = os_access(line, OS_ACC_RW_OK); + *tp = 'p'; +- if (ok) return(master); +- (void) close(master); ++ if(err == 0) return(master); ++ (void) os_close_file(master); + } + } + } +diff -Naur a/arch/um/drivers/slip_user.c b/arch/um/drivers/slip_user.c +--- a/arch/um/drivers/slip_user.c 2004-02-11 12:16:37.000000000 -0500 ++++ b/arch/um/drivers/slip_user.c 2004-02-11 12:28:37.000000000 -0500 +@@ -4,11 +4,9 @@ + #include + #include + #include +-#include + #include + #include + #include +-#include + #include + #include "user_util.h" + #include "kern_util.h" +@@ -65,9 +63,9 @@ + { + struct slip_pre_exec_data *data = arg; + +- if(data->stdin != -1) dup2(data->stdin, 0); ++ if(data->stdin >= 0) dup2(data->stdin, 0); + dup2(data->stdout, 1); +- if(data->close_me != -1) close(data->close_me); ++ if(data->close_me >= 0) os_close_file(data->close_me); + } + + static int slip_tramp(char **argv, int fd) +@@ -77,8 +75,8 @@ + int status, pid, fds[2], err, output_len; + + err = os_pipe(fds, 1, 0); +- if(err){ +- printk("slip_tramp : pipe failed, errno = %d\n", -err); ++ if(err < 0){ ++ printk("slip_tramp : pipe failed, err = %d\n", -err); + return(err); + } + +@@ -96,7 +94,7 @@ + printk("slip_tramp : failed to allocate output " + "buffer\n"); + +- close(fds[1]); ++ os_close_file(fds[1]); + read_output(fds[0], output, output_len); + if(output != NULL){ + printk("%s", output); +@@ -105,7 +103,7 @@ + if(waitpid(pid, &status, 0) < 0) err = errno; + else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)){ + printk("'%s' didn't exit with status 0\n", argv[0]); +- err = EINVAL; ++ err = -EINVAL; + } + } + return(err); +@@ -118,15 +116,17 @@ + char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; + char *argv[] = { "uml_net", version_buf, "slip", "up", gate_buf, + NULL }; +- int sfd, mfd, disc, sencap, err; ++ int sfd, mfd, err; + +- if((mfd = get_pty()) < 0){ +- printk("umn : Failed to open pty\n"); +- return(-1); ++ mfd = get_pty(); ++ if(mfd < 0){ ++ printk("umn : Failed to open pty, err = %d\n", -mfd); ++ return(mfd); + } +- if((sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0)) < 0){ +- printk("Couldn't open tty for slip line\n"); +- return(-1); ++ sfd = os_open_file(ptsname(mfd), of_rdwr(OPENFLAGS()), 0); ++ if(sfd < 0){ ++ printk("Couldn't open tty for slip line, err = %d\n", -sfd); ++ return(sfd); + } + if(set_up_tty(sfd)) return(-1); + pri->slave = sfd; +@@ -138,28 +138,23 @@ + + err = slip_tramp(argv, sfd); + +- if(err != 0){ +- printk("slip_tramp failed - errno = %d\n", err); +- return(-err); ++ if(err < 0){ ++ printk("slip_tramp failed - err = %d\n", -err); ++ return(err); + } +- if(ioctl(pri->slave, SIOCGIFNAME, pri->name) < 0){ +- printk("SIOCGIFNAME failed, errno = %d\n", errno); +- return(-errno); ++ err = os_get_ifname(pri->slave, pri->name); ++ if(err < 0){ ++ printk("get_ifname failed, err = %d\n", -err); ++ return(err); + } + iter_addresses(pri->dev, open_addr, pri->name); + } + else { +- disc = N_SLIP; +- if(ioctl(sfd, TIOCSETD, &disc) < 0){ +- printk("Failed to set slip line discipline - " +- "errno = %d\n", errno); +- return(-errno); +- } +- sencap = 0; +- if(ioctl(sfd, SIOCSIFENCAP, &sencap) < 0){ +- printk("Failed to set slip encapsulation - " +- "errno = %d\n", errno); +- return(-errno); ++ err = os_set_slip(sfd); ++ if(err < 0){ ++ printk("Failed to set slip discipline encapsulation - " ++ "err = %d\n", -err); ++ return(err); + } + } + return(mfd); +@@ -181,9 +176,9 @@ + err = slip_tramp(argv, -1); + + if(err != 0) +- printk("slip_tramp failed - errno = %d\n", err); +- close(fd); +- close(pri->slave); ++ printk("slip_tramp failed - errno = %d\n", -err); ++ os_close_file(fd); ++ os_close_file(pri->slave); + pri->slave = -1; + } + +@@ -243,7 +238,7 @@ + { + struct slip_data *pri = data; + +- if(pri->slave == -1) return; ++ if(pri->slave < 0) return; + open_addr(addr, netmask, pri->name); + } + +@@ -252,7 +247,7 @@ + { + struct slip_data *pri = data; + +- if(pri->slave == -1) return; ++ if(pri->slave < 0) return; + close_addr(addr, netmask, pri->name); + } + +diff -Naur a/arch/um/drivers/slirp_user.c b/arch/um/drivers/slirp_user.c +--- a/arch/um/drivers/slirp_user.c 2004-02-11 12:16:03.000000000 -0500 ++++ b/arch/um/drivers/slirp_user.c 2004-02-11 12:27:58.000000000 -0500 +@@ -4,7 +4,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -48,15 +47,15 @@ + + return(pid); + } +- ++ ++/* XXX This is just a trivial wrapper around os_pipe */ + static int slirp_datachan(int *mfd, int *sfd) + { + int fds[2], err; + + err = os_pipe(fds, 1, 1); +- if(err){ +- printk("slirp_datachan: Failed to open pipe, errno = %d\n", +- -err); ++ if(err < 0){ ++ printk("slirp_datachan: Failed to open pipe, err = %d\n", -err); + return(err); + } + +@@ -77,7 +76,7 @@ + pid = slirp_tramp(pri->argw.argv, sfd); + + if(pid < 0){ +- printk("slirp_tramp failed - errno = %d\n", pid); ++ printk("slirp_tramp failed - errno = %d\n", -pid); + os_close_file(sfd); + os_close_file(mfd); + return(pid); +@@ -97,8 +96,8 @@ + struct slirp_data *pri = data; + int status,err; + +- close(fd); +- close(pri->slave); ++ os_close_file(fd); ++ os_close_file(pri->slave); + + pri->slave = -1; + +diff -Naur a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c +--- a/arch/um/drivers/ssl.c 2004-02-11 12:15:28.000000000 -0500 ++++ b/arch/um/drivers/ssl.c 2004-02-11 12:27:17.000000000 -0500 +@@ -10,6 +10,7 @@ + #include "linux/major.h" + #include "linux/mm.h" + #include "linux/init.h" ++#include "linux/console.h" + #include "asm/termbits.h" + #include "asm/irq.h" + #include "line.h" +@@ -53,8 +54,9 @@ + + static struct line_driver driver = { + .name = "UML serial line", +- .devfs_name = "tts/%d", +- .major = TTYAUX_MAJOR, ++ .device_name = "ttS", ++ .devfs_name = "tts/", ++ .major = TTY_MAJOR, + .minor_start = 64, + .type = TTY_DRIVER_TYPE_SERIAL, + .subtype = 0, +@@ -149,6 +151,9 @@ + case TCSETSW: + case TCGETA: + case TIOCMGET: ++ case TCSBRK: ++ case TCSBRKP: ++ case TIOCMSET: + ret = -ENOIOCTLCMD; + break; + default: +@@ -212,6 +217,37 @@ + */ + static int ssl_init_done = 0; + ++static void ssl_console_write(struct console *c, const char *string, ++ unsigned len) ++{ ++ struct line *line = &serial_lines[c->index]; ++ if(ssl_init_done) ++ down(&line->sem); ++ console_write_chan(&line->chan_list, string, len); ++ if(ssl_init_done) ++ up(&line->sem); ++} ++ ++static struct tty_driver *ssl_console_device(struct console *c, int *index) ++{ ++ *index = c->index; ++ return ssl_driver; ++} ++ ++static int ssl_console_setup(struct console *co, char *options) ++{ ++ return(0); ++} ++ ++static struct console ssl_cons = { ++ name: "ttyS", ++ write: ssl_console_write, ++ device: ssl_console_device, ++ setup: ssl_console_setup, ++ flags: CON_PRINTBUFFER, ++ index: -1, ++}; ++ + int ssl_init(void) + { + char *new_title; +@@ -227,17 +263,18 @@ + new_title = add_xterm_umid(opts.xterm_title); + if(new_title != NULL) opts.xterm_title = new_title; + ++ register_console(&ssl_cons); + ssl_init_done = 1; + return(0); + } + +-__initcall(ssl_init); ++late_initcall(ssl_init); + + static int ssl_chan_setup(char *str) + { +- line_setup(serial_lines, sizeof(serial_lines)/sizeof(serial_lines[0]), +- str, 1); +- return(1); ++ return(line_setup(serial_lines, ++ sizeof(serial_lines)/sizeof(serial_lines[0]), ++ str, 1)); + } + + __setup("ssl", ssl_chan_setup); +diff -Naur a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c +--- a/arch/um/drivers/stdio_console.c 2004-02-11 12:14:32.000000000 -0500 ++++ b/arch/um/drivers/stdio_console.c 2004-02-11 12:26:14.000000000 -0500 +@@ -83,7 +83,8 @@ + + static struct line_driver driver = { + .name = "UML console", +- .devfs_name = "vc/%d", ++ .device_name = "tty", ++ .devfs_name = "vc/", + .major = TTY_MAJOR, + .minor_start = 0, + .type = TTY_DRIVER_TYPE_CONSOLE, +@@ -159,6 +160,15 @@ + + static int con_init_done = 0; + ++static struct tty_operations console_ops = { ++ .open = con_open, ++ .close = con_close, ++ .write = con_write, ++ .chars_in_buffer = chars_in_buffer, ++ .set_termios = set_termios, ++ .write_room = line_write_room, ++}; ++ + int stdio_init(void) + { + char *new_title; +@@ -166,7 +176,8 @@ + printk(KERN_INFO "Initializing stdio console driver\n"); + + console_driver = line_register_devfs(&console_lines, &driver, +- &console_ops, vts, sizeof(vts)/sizeof(vts[0])); ++ &console_ops, vts, ++ sizeof(vts)/sizeof(vts[0])); + + lines_init(vts, sizeof(vts)/sizeof(vts[0])); + +@@ -178,24 +189,19 @@ + return(0); + } + +-__initcall(stdio_init); ++late_initcall(stdio_init); + + static void console_write(struct console *console, const char *string, + unsigned len) + { +- if(con_init_done) down(&vts[console->index].sem); +- console_write_chan(&vts[console->index].chan_list, string, len); +- if(con_init_done) up(&vts[console->index].sem); +-} ++ struct line *line = &vts[console->index]; + +-static struct tty_operations console_ops = { +- .open = con_open, +- .close = con_close, +- .write = con_write, +- .chars_in_buffer = chars_in_buffer, +- .set_termios = set_termios, +- .write_room = line_write_room, +-}; ++ if(con_init_done) ++ down(&line->sem); ++ console_write_chan(&line->chan_list, string, len); ++ if(con_init_done) ++ up(&line->sem); ++} + + static struct tty_driver *console_device(struct console *c, int *index) + { +@@ -208,22 +214,28 @@ + return(0); + } + +-static struct console stdiocons = INIT_CONSOLE("tty", console_write, +- console_device, console_setup, +- CON_PRINTBUFFER); ++static struct console stdiocons = { ++ name: "tty", ++ write: console_write, ++ device: console_device, ++ setup: console_setup, ++ flags: CON_PRINTBUFFER, ++ index: -1, ++}; + +-static void __init stdio_console_init(void) ++static int __init stdio_console_init(void) + { + INIT_LIST_HEAD(&vts[0].chan_list); + list_add(&init_console_chan.list, &vts[0].chan_list); + register_console(&stdiocons); ++ return(0); + } ++ + console_initcall(stdio_console_init); + + static int console_chan_setup(char *str) + { +- line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1); +- return(1); ++ return(line_setup(vts, sizeof(vts)/sizeof(vts[0]), str, 1)); + } + + __setup("con", console_chan_setup); +diff -Naur a/arch/um/drivers/tty.c b/arch/um/drivers/tty.c +--- a/arch/um/drivers/tty.c 2004-02-11 12:15:02.000000000 -0500 ++++ b/arch/um/drivers/tty.c 2004-02-11 12:26:51.000000000 -0500 +@@ -5,7 +5,6 @@ + + #include + #include +-#include + #include + #include + #include "chan_user.h" +@@ -30,7 +29,8 @@ + } + str++; + +- if((data = um_kmalloc(sizeof(*data))) == NULL) ++ data = um_kmalloc(sizeof(*data)); ++ if(data == NULL) + return(NULL); + *data = ((struct tty_chan) { .dev = str, + .raw = opts->raw }); +diff -Naur a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c +--- a/arch/um/drivers/ubd_kern.c 2004-02-11 12:15:25.000000000 -0500 ++++ b/arch/um/drivers/ubd_kern.c 2004-02-11 12:27:12.000000000 -0500 +@@ -8,6 +8,13 @@ + * old style ubd by setting UBD_SHIFT to 0 + * 2002-09-27...2002-10-18 massive tinkering for 2.5 + * partitions have changed in 2.5 ++ * 2003-01-29 more tinkering for 2.5.59-1 ++ * This should now address the sysfs problems and has ++ * the symlink for devfs to allow for booting with ++ * the common /dev/ubd/discX/... names rather than ++ * only /dev/ubdN/discN this version also has lots of ++ * clean ups preparing for ubd-many. ++ * James McMechan + */ + + #define MAJOR_NR UBD_MAJOR +@@ -40,9 +47,12 @@ + #include "mconsole_kern.h" + #include "init.h" + #include "irq_user.h" ++#include "irq_kern.h" + #include "ubd_user.h" + #include "2_5compat.h" + #include "os.h" ++#include "mem.h" ++#include "mem_kern.h" + + static spinlock_t ubd_io_lock = SPIN_LOCK_UNLOCKED; + static spinlock_t ubd_lock = SPIN_LOCK_UNLOCKED; +@@ -56,6 +66,10 @@ + + #define MAX_DEV (8) + ++/* Changed in early boot */ ++static int ubd_do_mmap = 0; ++#define UBD_MMAP_BLOCK_SIZE PAGE_SIZE ++ + static struct block_device_operations ubd_blops = { + .owner = THIS_MODULE, + .open = ubd_open, +@@ -67,7 +81,7 @@ + static request_queue_t *ubd_queue; + + /* Protected by ubd_lock */ +-static int fake_major = 0; ++static int fake_major = MAJOR_NR; + + static struct gendisk *ubd_gendisk[MAX_DEV]; + static struct gendisk *fake_gendisk[MAX_DEV]; +@@ -96,13 +110,19 @@ + + struct ubd { + char *file; +- int is_dir; + int count; + int fd; + __u64 size; + struct openflags boot_openflags; + struct openflags openflags; ++ int no_cow; + struct cow cow; ++ ++ int map_writes; ++ int map_reads; ++ int nomap_writes; ++ int nomap_reads; ++ int write_maps; + }; + + #define DEFAULT_COW { \ +@@ -115,21 +135,28 @@ + + #define DEFAULT_UBD { \ + .file = NULL, \ +- .is_dir = 0, \ + .count = 0, \ + .fd = -1, \ + .size = -1, \ + .boot_openflags = OPEN_FLAGS, \ + .openflags = OPEN_FLAGS, \ ++ .no_cow = 0, \ + .cow = DEFAULT_COW, \ ++ .map_writes = 0, \ ++ .map_reads = 0, \ ++ .nomap_writes = 0, \ ++ .nomap_reads = 0, \ ++ .write_maps = 0, \ + } + + struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD }; + + static int ubd0_init(void) + { +- if(ubd_dev[0].file == NULL) +- ubd_dev[0].file = "root_fs"; ++ struct ubd *dev = &ubd_dev[0]; ++ ++ if(dev->file == NULL) ++ dev->file = "root_fs"; + return(0); + } + +@@ -196,19 +223,46 @@ + " Create ide0 entries that map onto ubd devices.\n\n" + ); + ++static int parse_unit(char **ptr) ++{ ++ char *str = *ptr, *end; ++ int n = -1; ++ ++ if(isdigit(*str)) { ++ n = simple_strtoul(str, &end, 0); ++ if(end == str) ++ return(-1); ++ *ptr = end; ++ } ++ else if (('a' <= *str) && (*str <= 'h')) { ++ n = *str - 'a'; ++ str++; ++ *ptr = str; ++ } ++ return(n); ++} ++ + static int ubd_setup_common(char *str, int *index_out) + { ++ struct ubd *dev; + struct openflags flags = global_openflags; + char *backing_file; + int n, err; + + if(index_out) *index_out = -1; +- n = *str++; ++ n = *str; + if(n == '='){ +- static int fake_major_allowed = 1; + char *end; + int major; + ++ str++; ++ if(!strcmp(str, "mmap")){ ++ CHOOSE_MODE(printk("mmap not supported by the ubd " ++ "driver in tt mode\n"), ++ ubd_do_mmap = 1); ++ return(0); ++ } ++ + if(!strcmp(str, "sync")){ + global_openflags.s = 1; + return(0); +@@ -220,20 +274,14 @@ + return(1); + } + +- if(!fake_major_allowed){ +- printk(KERN_ERR "Can't assign a fake major twice\n"); +- return(1); +- } +- + err = 1; + spin_lock(&ubd_lock); +- if(!fake_major_allowed){ ++ if(fake_major != MAJOR_NR){ + printk(KERN_ERR "Can't assign a fake major twice\n"); + goto out1; + } + + fake_major = major; +- fake_major_allowed = 0; + + printk(KERN_INFO "Setting extra ubd major number to %d\n", + major); +@@ -243,25 +291,23 @@ + return(err); + } + +- if(n < '0'){ +- printk(KERN_ERR "ubd_setup : index out of range\n"); } +- +- if((n >= '0') && (n <= '9')) n -= '0'; +- else if((n >= 'a') && (n <= 'z')) n -= 'a'; +- else { +- printk(KERN_ERR "ubd_setup : device syntax invalid\n"); ++ n = parse_unit(&str); ++ if(n < 0){ ++ printk(KERN_ERR "ubd_setup : couldn't parse unit number " ++ "'%s'\n", str); + return(1); + } + if(n >= MAX_DEV){ +- printk(KERN_ERR "ubd_setup : index out of range " +- "(%d devices)\n", MAX_DEV); ++ printk(KERN_ERR "ubd_setup : index %d out of range " ++ "(%d devices)\n", n, MAX_DEV); + return(1); + } + + err = 1; + spin_lock(&ubd_lock); + +- if(ubd_dev[n].file != NULL){ ++ dev = &ubd_dev[n]; ++ if(dev->file != NULL){ + printk(KERN_ERR "ubd_setup : device already configured\n"); + goto out2; + } +@@ -276,6 +322,11 @@ + flags.s = 1; + str++; + } ++ if (*str == 'd'){ ++ dev->no_cow = 1; ++ str++; ++ } ++ + if(*str++ != '='){ + printk(KERN_ERR "ubd_setup : Expected '='\n"); + goto out2; +@@ -284,14 +335,17 @@ + err = 0; + backing_file = strchr(str, ','); + if(backing_file){ +- *backing_file = '\0'; +- backing_file++; ++ if(dev->no_cow) ++ printk(KERN_ERR "Can't specify both 'd' and a " ++ "cow file\n"); ++ else { ++ *backing_file = '\0'; ++ backing_file++; ++ } + } +- ubd_dev[n].file = str; +- if(ubd_is_dir(ubd_dev[n].file)) +- ubd_dev[n].is_dir = 1; +- ubd_dev[n].cow.file = backing_file; +- ubd_dev[n].boot_openflags = flags; ++ dev->file = str; ++ dev->cow.file = backing_file; ++ dev->boot_openflags = flags; + out2: + spin_unlock(&ubd_lock); + return(err); +@@ -321,8 +375,7 @@ + static int fakehd_set = 0; + static int fakehd(char *str) + { +- printk(KERN_INFO +- "fakehd : Changing ubd name to \"hd\".\n"); ++ printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n"); + fakehd_set = 1; + return 1; + } +@@ -368,32 +421,42 @@ + { + struct io_thread_req req; + struct request *rq = elv_next_request(ubd_queue); +- int n; ++ int n, err; + + do_ubd = NULL; + intr_count++; + n = read_ubd_fs(thread_fd, &req, sizeof(req)); + if(n != sizeof(req)){ + printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, " +- "errno = %d\n", os_getpid(), -n); ++ "err = %d\n", os_getpid(), -n); + spin_lock(&ubd_io_lock); + end_request(rq, 0); + spin_unlock(&ubd_io_lock); + return; + } + +- if((req.offset != ((__u64) (rq->sector)) << 9) || +- (req.length != (rq->current_nr_sectors) << 9)) ++ if((req.op != UBD_MMAP) && ++ ((req.offset != ((__u64) (rq->sector)) << 9) || ++ (req.length != (rq->current_nr_sectors) << 9))) + panic("I/O op mismatch"); + ++ if(req.map_fd != -1){ ++ err = physmem_subst_mapping(req.buffer, req.map_fd, ++ req.map_offset, 1); ++ if(err) ++ printk("ubd_handler - physmem_subst_mapping failed, " ++ "err = %d\n", -err); ++ } ++ + ubd_finish(rq, req.error); + reactivate_fd(thread_fd, UBD_IRQ); + do_ubd_request(ubd_queue); + } + +-static void ubd_intr(int irq, void *dev, struct pt_regs *unused) ++static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused) + { + ubd_handler(); ++ return(IRQ_HANDLED); + } + + /* Only changed by ubd_init, which is an initcall. */ +@@ -417,10 +480,14 @@ + + static void ubd_close(struct ubd *dev) + { ++ if(ubd_do_mmap) ++ physmem_forget_descriptor(dev->fd); + os_close_file(dev->fd); + if(dev->cow.file == NULL) + return; + ++ if(ubd_do_mmap) ++ physmem_forget_descriptor(dev->cow.fd); + os_close_file(dev->cow.fd); + vfree(dev->cow.bitmap); + dev->cow.bitmap = NULL; +@@ -429,18 +496,20 @@ + static int ubd_open_dev(struct ubd *dev) + { + struct openflags flags; +- int err, n, create_cow, *create_ptr; ++ char **back_ptr; ++ int err, create_cow, *create_ptr; + ++ dev->openflags = dev->boot_openflags; + create_cow = 0; + create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL; +- dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file, ++ back_ptr = dev->no_cow ? NULL : &dev->cow.file; ++ dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr, + &dev->cow.bitmap_offset, &dev->cow.bitmap_len, + &dev->cow.data_offset, create_ptr); + + if((dev->fd == -ENOENT) && create_cow){ +- n = dev - ubd_dev; + dev->fd = create_cow_file(dev->file, dev->cow.file, +- dev->openflags, 1 << 9, ++ dev->openflags, 1 << 9, PAGE_SIZE, + &dev->cow.bitmap_offset, + &dev->cow.bitmap_len, + &dev->cow.data_offset); +@@ -455,13 +524,17 @@ + if(dev->cow.file != NULL){ + err = -ENOMEM; + dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len); +- if(dev->cow.bitmap == NULL) goto error; ++ if(dev->cow.bitmap == NULL){ ++ printk(KERN_ERR "Failed to vmalloc COW bitmap\n"); ++ goto error; ++ } + flush_tlb_kernel_vm(); + + err = read_cow_bitmap(dev->fd, dev->cow.bitmap, + dev->cow.bitmap_offset, + dev->cow.bitmap_len); +- if(err) goto error; ++ if(err < 0) ++ goto error; + + flags = dev->openflags; + flags.w = 0; +@@ -481,17 +554,31 @@ + + { + struct gendisk *disk; ++ char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")]; ++ int err; + + disk = alloc_disk(1 << UBD_SHIFT); +- if (!disk) +- return -ENOMEM; ++ if(disk == NULL) ++ return(-ENOMEM); + + disk->major = major; + disk->first_minor = unit << UBD_SHIFT; + disk->fops = &ubd_blops; + set_capacity(disk, size / 512); +- sprintf(disk->disk_name, "ubd"); +- sprintf(disk->devfs_name, "ubd/disc%d", unit); ++ if(major == MAJOR_NR){ ++ sprintf(disk->disk_name, "ubd%c", 'a' + unit); ++ sprintf(disk->devfs_name, "ubd/disc%d", unit); ++ sprintf(from, "ubd/%d", unit); ++ sprintf(to, "disc%d/disc", unit); ++ err = devfs_mk_symlink(from, to); ++ if(err) ++ printk("ubd_new_disk failed to make link from %s to " ++ "%s, error = %d\n", from, to, err); ++ } ++ else { ++ sprintf(disk->disk_name, "ubd_fake%d", unit); ++ sprintf(disk->devfs_name, "ubd_fake/disc%d", unit); ++ } + + disk->private_data = &ubd_dev[unit]; + disk->queue = ubd_queue; +@@ -506,24 +593,21 @@ + struct ubd *dev = &ubd_dev[n]; + int err; + +- if(dev->is_dir) +- return(-EISDIR); +- +- if (!dev->file) ++ if(dev->file == NULL) + return(-ENODEV); + + if (ubd_open_dev(dev)) + return(-ENODEV); + + err = ubd_file_size(dev, &dev->size); +- if(err) ++ if(err < 0) + return(err); + + err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]); + if(err) + return(err); + +- if(fake_major) ++ if(fake_major != MAJOR_NR) + ubd_new_disk(fake_major, dev->size, n, + &fake_gendisk[n]); + +@@ -561,42 +645,42 @@ + return(err); + } + +-static int ubd_get_config(char *dev, char *str, int size, char **error_out) ++static int ubd_get_config(char *name, char *str, int size, char **error_out) + { +- struct ubd *ubd; ++ struct ubd *dev; + char *end; +- int major, n = 0; ++ int n, len = 0; + +- major = simple_strtoul(dev, &end, 0); +- if((*end != '\0') || (end == dev)){ +- *error_out = "ubd_get_config : didn't parse major number"; ++ n = simple_strtoul(name, &end, 0); ++ if((*end != '\0') || (end == name)){ ++ *error_out = "ubd_get_config : didn't parse device number"; + return(-1); + } + +- if((major >= MAX_DEV) || (major < 0)){ +- *error_out = "ubd_get_config : major number out of range"; ++ if((n >= MAX_DEV) || (n < 0)){ ++ *error_out = "ubd_get_config : device number out of range"; + return(-1); + } + +- ubd = &ubd_dev[major]; ++ dev = &ubd_dev[n]; + spin_lock(&ubd_lock); + +- if(ubd->file == NULL){ +- CONFIG_CHUNK(str, size, n, "", 1); ++ if(dev->file == NULL){ ++ CONFIG_CHUNK(str, size, len, "", 1); + goto out; + } + +- CONFIG_CHUNK(str, size, n, ubd->file, 0); ++ CONFIG_CHUNK(str, size, len, dev->file, 0); + +- if(ubd->cow.file != NULL){ +- CONFIG_CHUNK(str, size, n, ",", 0); +- CONFIG_CHUNK(str, size, n, ubd->cow.file, 1); ++ if(dev->cow.file != NULL){ ++ CONFIG_CHUNK(str, size, len, ",", 0); ++ CONFIG_CHUNK(str, size, len, dev->cow.file, 1); + } +- else CONFIG_CHUNK(str, size, n, "", 1); ++ else CONFIG_CHUNK(str, size, len, "", 1); + + out: + spin_unlock(&ubd_lock); +- return(n); ++ return(len); + } + + static int ubd_remove(char *str) +@@ -604,11 +688,9 @@ + struct ubd *dev; + int n, err = -ENODEV; + +- if(!isdigit(*str)) +- return(err); /* it should be a number 0-7/a-h */ ++ n = parse_unit(&str); + +- n = *str - '0'; +- if(n >= MAX_DEV) ++ if((n < 0) || (n >= MAX_DEV)) + return(err); + + dev = &ubd_dev[n]; +@@ -669,7 +751,7 @@ + + elevator_init(ubd_queue, &elevator_noop); + +- if (fake_major != 0) { ++ if (fake_major != MAJOR_NR) { + char name[sizeof("ubd_nnn\0")]; + + snprintf(name, sizeof(name), "ubd_%d", fake_major); +@@ -696,6 +778,7 @@ + io_pid = start_io_thread(stack + PAGE_SIZE - sizeof(void *), + &thread_fd); + if(io_pid < 0){ ++ io_pid = -1; + printk(KERN_ERR + "ubd : Failed to start I/O thread (errno = %d) - " + "falling back to synchronous I/O\n", -io_pid); +@@ -703,8 +786,8 @@ + } + err = um_request_irq(UBD_IRQ, thread_fd, IRQ_READ, ubd_intr, + SA_INTERRUPT, "ubd", ubd_dev); +- if(err != 0) printk(KERN_ERR +- "um_request_irq failed - errno = %d\n", -err); ++ if(err != 0) ++ printk(KERN_ERR "um_request_irq failed - errno = %d\n", -err); + return(err); + } + +@@ -714,15 +797,9 @@ + { + struct gendisk *disk = inode->i_bdev->bd_disk; + struct ubd *dev = disk->private_data; +- int err = -EISDIR; +- +- if(dev->is_dir == 1) +- goto out; ++ int err = 0; + +- err = 0; + if(dev->count == 0){ +- dev->openflags = dev->boot_openflags; +- + err = ubd_open_dev(dev); + if(err){ + printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n", +@@ -749,62 +826,156 @@ + return(0); + } + +-void cowify_req(struct io_thread_req *req, struct ubd *dev) ++static void cowify_bitmap(__u64 io_offset, int length, unsigned long *cow_mask, ++ __u64 *cow_offset, unsigned long *bitmap, ++ __u64 bitmap_offset, unsigned long *bitmap_words, ++ __u64 bitmap_len) ++{ ++ __u64 sector = io_offset >> 9; ++ int i, update_bitmap = 0; ++ ++ for(i = 0; i < length >> 9; i++){ ++ if(cow_mask != NULL) ++ ubd_set_bit(i, (unsigned char *) cow_mask); ++ if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) ++ continue; ++ ++ update_bitmap = 1; ++ ubd_set_bit(sector + i, (unsigned char *) bitmap); ++ } ++ ++ if(!update_bitmap) ++ return; ++ ++ *cow_offset = sector / (sizeof(unsigned long) * 8); ++ ++ /* This takes care of the case where we're exactly at the end of the ++ * device, and *cow_offset + 1 is off the end. So, just back it up ++ * by one word. Thanks to Lynn Kerby for the fix and James McMechan ++ * for the original diagnosis. ++ */ ++ if(*cow_offset == ((bitmap_len + sizeof(unsigned long) - 1) / ++ sizeof(unsigned long) - 1)) ++ (*cow_offset)--; ++ ++ bitmap_words[0] = bitmap[*cow_offset]; ++ bitmap_words[1] = bitmap[*cow_offset + 1]; ++ ++ *cow_offset *= sizeof(unsigned long); ++ *cow_offset += bitmap_offset; ++} ++ ++static void cowify_req(struct io_thread_req *req, unsigned long *bitmap, ++ __u64 bitmap_offset, __u64 bitmap_len) + { +- int i, update_bitmap, sector = req->offset >> 9; ++ __u64 sector = req->offset >> 9; ++ int i; + + if(req->length > (sizeof(req->sector_mask) * 8) << 9) + panic("Operation too long"); ++ + if(req->op == UBD_READ) { + for(i = 0; i < req->length >> 9; i++){ +- if(ubd_test_bit(sector + i, (unsigned char *) +- dev->cow.bitmap)){ ++ if(ubd_test_bit(sector + i, (unsigned char *) bitmap)) + ubd_set_bit(i, (unsigned char *) + &req->sector_mask); +- } + } +- } +- else { +- update_bitmap = 0; +- for(i = 0; i < req->length >> 9; i++){ +- ubd_set_bit(i, (unsigned char *) +- &req->sector_mask); +- if(!ubd_test_bit(sector + i, (unsigned char *) +- dev->cow.bitmap)) +- update_bitmap = 1; +- ubd_set_bit(sector + i, (unsigned char *) +- dev->cow.bitmap); +- } +- if(update_bitmap){ +- req->cow_offset = sector / (sizeof(unsigned long) * 8); +- req->bitmap_words[0] = +- dev->cow.bitmap[req->cow_offset]; +- req->bitmap_words[1] = +- dev->cow.bitmap[req->cow_offset + 1]; +- req->cow_offset *= sizeof(unsigned long); +- req->cow_offset += dev->cow.bitmap_offset; ++ } ++ else cowify_bitmap(req->offset, req->length, &req->sector_mask, ++ &req->cow_offset, bitmap, bitmap_offset, ++ req->bitmap_words, bitmap_len); ++} ++ ++static int mmap_fd(struct request *req, struct ubd *dev, __u64 offset) ++{ ++ __u64 sector; ++ unsigned char *bitmap; ++ int bit, i; ++ ++ /* mmap must have been requested on the command line */ ++ if(!ubd_do_mmap) ++ return(-1); ++ ++ /* The buffer must be page aligned */ ++ if(((unsigned long) req->buffer % UBD_MMAP_BLOCK_SIZE) != 0) ++ return(-1); ++ ++ /* The request must be a page long */ ++ if((req->current_nr_sectors << 9) != PAGE_SIZE) ++ return(-1); ++ ++ if(dev->cow.file == NULL) ++ return(dev->fd); ++ ++ sector = offset >> 9; ++ bitmap = (unsigned char *) dev->cow.bitmap; ++ bit = ubd_test_bit(sector, bitmap); ++ ++ for(i = 1; i < req->current_nr_sectors; i++){ ++ if(ubd_test_bit(sector + i, bitmap) != bit) ++ return(-1); ++ } ++ ++ if(bit || (rq_data_dir(req) == WRITE)) ++ offset += dev->cow.data_offset; ++ ++ /* The data on disk must be page aligned */ ++ if((offset % UBD_MMAP_BLOCK_SIZE) != 0) ++ return(-1); ++ ++ return(bit ? dev->fd : dev->cow.fd); ++} ++ ++static int prepare_mmap_request(struct ubd *dev, int fd, __u64 offset, ++ struct request *req, ++ struct io_thread_req *io_req) ++{ ++ int err; ++ ++ if(rq_data_dir(req) == WRITE){ ++ /* Writes are almost no-ops since the new data is already in the ++ * host page cache ++ */ ++ dev->map_writes++; ++ if(dev->cow.file != NULL) ++ cowify_bitmap(io_req->offset, io_req->length, ++ &io_req->sector_mask, &io_req->cow_offset, ++ dev->cow.bitmap, dev->cow.bitmap_offset, ++ io_req->bitmap_words, ++ dev->cow.bitmap_len); ++ } ++ else { ++ int w; ++ ++ if((dev->cow.file != NULL) && (fd == dev->cow.fd)) ++ w = 0; ++ else w = dev->openflags.w; ++ ++ if((dev->cow.file != NULL) && (fd == dev->fd)) ++ offset += dev->cow.data_offset; ++ ++ err = physmem_subst_mapping(req->buffer, fd, offset, w); ++ if(err){ ++ printk("physmem_subst_mapping failed, err = %d\n", ++ -err); ++ return(1); + } ++ dev->map_reads++; + } ++ io_req->op = UBD_MMAP; ++ io_req->buffer = req->buffer; ++ return(0); + } + + static int prepare_request(struct request *req, struct io_thread_req *io_req) + { + struct gendisk *disk = req->rq_disk; + struct ubd *dev = disk->private_data; +- __u64 block; +- int nsect; ++ __u64 offset; ++ int len, fd; + + if(req->rq_status == RQ_INACTIVE) return(1); + +- if(dev->is_dir){ +- strcpy(req->buffer, "HOSTFS:"); +- strcat(req->buffer, dev->file); +- spin_lock(&ubd_io_lock); +- end_request(req, 1); +- spin_unlock(&ubd_io_lock); +- return(1); +- } +- + if((rq_data_dir(req) == WRITE) && !dev->openflags.w){ + printk("Write attempted on readonly ubd device %s\n", + disk->disk_name); +@@ -814,23 +985,49 @@ + return(1); + } + +- block = req->sector; +- nsect = req->current_nr_sectors; ++ offset = ((__u64) req->sector) << 9; ++ len = req->current_nr_sectors << 9; + +- io_req->op = rq_data_dir(req) == READ ? UBD_READ : UBD_WRITE; + io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd; + io_req->fds[1] = dev->fd; ++ io_req->map_fd = -1; ++ io_req->cow_offset = -1; ++ io_req->offset = offset; ++ io_req->length = len; ++ io_req->error = 0; ++ io_req->sector_mask = 0; ++ ++ fd = mmap_fd(req, dev, io_req->offset); ++ if(fd > 0){ ++ /* If mmapping is otherwise OK, but the first access to the ++ * page is a write, then it's not mapped in yet. So we have ++ * to write the data to disk first, then we can map the disk ++ * page in and continue normally from there. ++ */ ++ if((rq_data_dir(req) == WRITE) && !is_remapped(req->buffer)){ ++ io_req->map_fd = dev->fd; ++ io_req->map_offset = io_req->offset + ++ dev->cow.data_offset; ++ dev->write_maps++; ++ } ++ else return(prepare_mmap_request(dev, fd, io_req->offset, req, ++ io_req)); ++ } ++ ++ if(rq_data_dir(req) == READ) ++ dev->nomap_reads++; ++ else dev->nomap_writes++; ++ ++ io_req->op = (rq_data_dir(req) == READ) ? UBD_READ : UBD_WRITE; + io_req->offsets[0] = 0; + io_req->offsets[1] = dev->cow.data_offset; +- io_req->offset = ((__u64) block) << 9; +- io_req->length = nsect << 9; + io_req->buffer = req->buffer; + io_req->sectorsize = 1 << 9; +- io_req->sector_mask = 0; +- io_req->cow_offset = -1; +- io_req->error = 0; + +- if(dev->cow.file != NULL) cowify_req(io_req, dev); ++ if(dev->cow.file != NULL) ++ cowify_req(io_req, dev->cow.bitmap, dev->cow.bitmap_offset, ++ dev->cow.bitmap_len); ++ + return(0); + } + +@@ -841,7 +1038,7 @@ + int err, n; + + if(thread_fd == -1){ +- while(!list_empty(&q->queue_head)){ ++ while(!elv_queue_empty(q)){ + req = elv_next_request(q); + err = prepare_request(req, &io_req); + if(!err){ +@@ -851,7 +1048,8 @@ + } + } + else { +- if(do_ubd || list_empty(&q->queue_head)) return; ++ if(do_ubd || elv_queue_empty(q)) ++ return; + req = elv_next_request(q); + err = prepare_request(req, &io_req); + if(!err){ +@@ -885,7 +1083,7 @@ + g.heads = 128; + g.sectors = 32; + g.cylinders = dev->size / (128 * 32 * 512); +- g.start = 2; ++ g.start = get_start_sect(inode->i_bdev); + return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0); + + case HDIO_SET_UNMASKINTR: +@@ -935,6 +1133,142 @@ + return(-EINVAL); + } + ++static int ubd_check_remapped(int fd, unsigned long address, int is_write, ++ __u64 offset) ++{ ++ __u64 bitmap_offset; ++ unsigned long new_bitmap[2]; ++ int i, err, n; ++ ++ /* If it's not a write access, we can't do anything about it */ ++ if(!is_write) ++ return(0); ++ ++ /* We have a write */ ++ for(i = 0; i < sizeof(ubd_dev) / sizeof(ubd_dev[0]); i++){ ++ struct ubd *dev = &ubd_dev[i]; ++ ++ if((dev->fd != fd) && (dev->cow.fd != fd)) ++ continue; ++ ++ /* It's a write to a ubd device */ ++ ++ if(!dev->openflags.w){ ++ /* It's a write access on a read-only device - probably ++ * shouldn't happen. If the kernel is trying to change ++ * something with no intention of writing it back out, ++ * then this message will clue us in that this needs ++ * fixing ++ */ ++ printk("Write access to mapped page from readonly ubd " ++ "device %d\n", i); ++ return(0); ++ } ++ ++ /* It's a write to a writeable ubd device - it must be COWed ++ * because, otherwise, the page would have been mapped in ++ * writeable ++ */ ++ ++ if(!dev->cow.file) ++ panic("Write fault on writeable non-COW ubd device %d", ++ i); ++ ++ /* It should also be an access to the backing file since the ++ * COW pages should be mapped in read-write ++ */ ++ ++ if(fd == dev->fd) ++ panic("Write fault on a backing page of ubd " ++ "device %d\n", i); ++ ++ /* So, we do the write, copying the backing data to the COW ++ * file... ++ */ ++ ++ err = os_seek_file(dev->fd, offset + dev->cow.data_offset); ++ if(err < 0) ++ panic("Couldn't seek to %lld in COW file of ubd " ++ "device %d, err = %d", ++ offset + dev->cow.data_offset, i, -err); ++ ++ n = os_write_file(dev->fd, (void *) address, PAGE_SIZE); ++ if(n != PAGE_SIZE) ++ panic("Couldn't copy data to COW file of ubd " ++ "device %d, err = %d", i, -n); ++ ++ /* ... updating the COW bitmap... */ ++ ++ cowify_bitmap(offset, PAGE_SIZE, NULL, &bitmap_offset, ++ dev->cow.bitmap, dev->cow.bitmap_offset, ++ new_bitmap, dev->cow.bitmap_len); ++ ++ err = os_seek_file(dev->fd, bitmap_offset); ++ if(err < 0) ++ panic("Couldn't seek to %lld in COW file of ubd " ++ "device %d, err = %d", bitmap_offset, i, -err); ++ ++ n = os_write_file(dev->fd, new_bitmap, sizeof(new_bitmap)); ++ if(n != sizeof(new_bitmap)) ++ panic("Couldn't update bitmap of ubd device %d, " ++ "err = %d", i, -n); ++ ++ /* Maybe we can map the COW page in, and maybe we can't. If ++ * it is a pre-V3 COW file, we can't, since the alignment will ++ * be wrong. If it is a V3 or later COW file which has been ++ * moved to a system with a larger page size, then maybe we ++ * can't, depending on the exact location of the page. ++ */ ++ ++ offset += dev->cow.data_offset; ++ ++ /* Remove the remapping, putting the original anonymous page ++ * back. If the COW file can be mapped in, that is done. ++ * Otherwise, the COW page is read in. ++ */ ++ ++ if(!physmem_remove_mapping((void *) address)) ++ panic("Address 0x%lx not remapped by ubd device %d", ++ address, i); ++ if((offset % UBD_MMAP_BLOCK_SIZE) == 0) ++ physmem_subst_mapping((void *) address, dev->fd, ++ offset, 1); ++ else { ++ err = os_seek_file(dev->fd, offset); ++ if(err < 0) ++ panic("Couldn't seek to %lld in COW file of " ++ "ubd device %d, err = %d", offset, i, ++ -err); ++ ++ n = os_read_file(dev->fd, (void *) address, PAGE_SIZE); ++ if(n != PAGE_SIZE) ++ panic("Failed to read page from offset %llx of " ++ "COW file of ubd device %d, err = %d", ++ offset, i, -n); ++ } ++ ++ return(1); ++ } ++ ++ /* It's not a write on a ubd device */ ++ return(0); ++} ++ ++static struct remapper ubd_remapper = { ++ .list = LIST_HEAD_INIT(ubd_remapper.list), ++ .proc = ubd_check_remapped, ++}; ++ ++static int ubd_remapper_setup(void) ++{ ++ if(ubd_do_mmap) ++ register_remapper(&ubd_remapper); ++ ++ return(0); ++} ++ ++__initcall(ubd_remapper_setup); ++ + /* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically +diff -Naur a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c +--- a/arch/um/drivers/ubd_user.c 2004-02-11 12:14:32.000000000 -0500 ++++ b/arch/um/drivers/ubd_user.c 2004-02-11 12:26:14.000000000 -0500 +@@ -11,11 +11,8 @@ + #include + #include + #include +-#include + #include +-#include + #include +-#include + #include + #include + #include "asm/types.h" +@@ -24,146 +21,30 @@ + #include "user.h" + #include "ubd_user.h" + #include "os.h" ++#include "cow.h" + + #include + #include +-#if __BYTE_ORDER == __BIG_ENDIAN +-# define ntohll(x) (x) +-# define htonll(x) (x) +-#elif __BYTE_ORDER == __LITTLE_ENDIAN +-# define ntohll(x) bswap_64(x) +-# define htonll(x) bswap_64(x) +-#else +-#error "__BYTE_ORDER not defined" +-#endif +- +-#define PATH_LEN_V1 256 +- +-struct cow_header_v1 { +- int magic; +- int version; +- char backing_file[PATH_LEN_V1]; +- time_t mtime; +- __u64 size; +- int sectorsize; +-}; +- +-#define PATH_LEN_V2 MAXPATHLEN +- +-struct cow_header_v2 { +- unsigned long magic; +- unsigned long version; +- char backing_file[PATH_LEN_V2]; +- time_t mtime; +- __u64 size; +- int sectorsize; +-}; +- +-union cow_header { +- struct cow_header_v1 v1; +- struct cow_header_v2 v2; +-}; +- +-#define COW_MAGIC 0x4f4f4f4d /* MOOO */ +-#define COW_VERSION 2 +- +-static void sizes(__u64 size, int sectorsize, int bitmap_offset, +- unsigned long *bitmap_len_out, int *data_offset_out) +-{ +- *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize); +- +- *data_offset_out = bitmap_offset + *bitmap_len_out; +- *data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize; +- *data_offset_out *= sectorsize; +-} +- +-static int read_cow_header(int fd, int *magic_out, char **backing_file_out, +- time_t *mtime_out, __u64 *size_out, +- int *sectorsize_out, int *bitmap_offset_out) +-{ +- union cow_header *header; +- char *file; +- int err, n; +- unsigned long version, magic; +- +- header = um_kmalloc(sizeof(*header)); +- if(header == NULL){ +- printk("read_cow_header - Failed to allocate header\n"); +- return(-ENOMEM); +- } +- err = -EINVAL; +- n = read(fd, header, sizeof(*header)); +- if(n < offsetof(typeof(header->v1), backing_file)){ +- printk("read_cow_header - short header\n"); +- goto out; +- } +- +- magic = header->v1.magic; +- if(magic == COW_MAGIC) { +- version = header->v1.version; +- } +- else if(magic == ntohl(COW_MAGIC)){ +- version = ntohl(header->v1.version); +- } +- else goto out; +- +- *magic_out = COW_MAGIC; +- +- if(version == 1){ +- if(n < sizeof(header->v1)){ +- printk("read_cow_header - failed to read V1 header\n"); +- goto out; +- } +- *mtime_out = header->v1.mtime; +- *size_out = header->v1.size; +- *sectorsize_out = header->v1.sectorsize; +- *bitmap_offset_out = sizeof(header->v1); +- file = header->v1.backing_file; +- } +- else if(version == 2){ +- if(n < sizeof(header->v2)){ +- printk("read_cow_header - failed to read V2 header\n"); +- goto out; +- } +- *mtime_out = ntohl(header->v2.mtime); +- *size_out = ntohll(header->v2.size); +- *sectorsize_out = ntohl(header->v2.sectorsize); +- *bitmap_offset_out = sizeof(header->v2); +- file = header->v2.backing_file; +- } +- else { +- printk("read_cow_header - invalid COW version\n"); +- goto out; +- } +- err = -ENOMEM; +- *backing_file_out = uml_strdup(file); +- if(*backing_file_out == NULL){ +- printk("read_cow_header - failed to allocate backing file\n"); +- goto out; +- } +- err = 0; +- out: +- kfree(header); +- return(err); +-} + + static int same_backing_files(char *from_cmdline, char *from_cow, char *cow) + { +- struct stat buf1, buf2; ++ struct uml_stat buf1, buf2; ++ int err; + + if(from_cmdline == NULL) return(1); + if(!strcmp(from_cmdline, from_cow)) return(1); + +- if(stat(from_cmdline, &buf1) < 0){ +- printk("Couldn't stat '%s', errno = %d\n", from_cmdline, +- errno); ++ err = os_stat_file(from_cmdline, &buf1); ++ if(err < 0){ ++ printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err); + return(1); + } +- if(stat(from_cow, &buf2) < 0){ +- printk("Couldn't stat '%s', errno = %d\n", from_cow, errno); ++ err = os_stat_file(from_cow, &buf2); ++ if(err < 0){ ++ printk("Couldn't stat '%s', err = %d\n", from_cow, -err); + return(1); + } +- if((buf1.st_dev == buf2.st_dev) && (buf1.st_ino == buf2.st_ino)) ++ if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino)) + return(1); + + printk("Backing file mismatch - \"%s\" requested,\n" +@@ -174,20 +55,21 @@ + + static int backing_file_mismatch(char *file, __u64 size, time_t mtime) + { +- struct stat64 buf; ++ unsigned long modtime; + long long actual; + int err; + +- if(stat64(file, &buf) < 0){ +- printk("Failed to stat backing file \"%s\", errno = %d\n", +- file, errno); +- return(-errno); ++ err = os_file_modtime(file, &modtime); ++ if(err < 0){ ++ printk("Failed to get modification time of backing file " ++ "\"%s\", err = %d\n", file, -err); ++ return(err); + } + + err = os_file_size(file, &actual); +- if(err){ ++ if(err < 0){ + printk("Failed to get size of backing file \"%s\", " +- "errno = %d\n", file, -err); ++ "err = %d\n", file, -err); + return(err); + } + +@@ -196,9 +78,9 @@ + "file\n", size, actual); + return(-EINVAL); + } +- if(buf.st_mtime != mtime){ ++ if(modtime != mtime){ + printk("mtime mismatch (%ld vs %ld) of COW header vs backing " +- "file\n", mtime, buf.st_mtime); ++ "file\n", mtime, modtime); + return(-EINVAL); + } + return(0); +@@ -209,124 +91,16 @@ + int err; + + err = os_seek_file(fd, offset); +- if(err != 0) return(-errno); +- err = read(fd, buf, len); +- if(err < 0) return(-errno); +- return(0); +-} ++ if(err < 0) ++ return(err); + +-static int absolutize(char *to, int size, char *from) +-{ +- char save_cwd[256], *slash; +- int remaining; ++ err = os_read_file(fd, buf, len); ++ if(err < 0) ++ return(err); + +- if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) { +- printk("absolutize : unable to get cwd - errno = %d\n", errno); +- return(-1); +- } +- slash = strrchr(from, '/'); +- if(slash != NULL){ +- *slash = '\0'; +- if(chdir(from)){ +- *slash = '/'; +- printk("absolutize : Can't cd to '%s' - errno = %d\n", +- from, errno); +- return(-1); +- } +- *slash = '/'; +- if(getcwd(to, size) == NULL){ +- printk("absolutize : unable to get cwd of '%s' - " +- "errno = %d\n", from, errno); +- return(-1); +- } +- remaining = size - strlen(to); +- if(strlen(slash) + 1 > remaining){ +- printk("absolutize : unable to fit '%s' into %d " +- "chars\n", from, size); +- return(-1); +- } +- strcat(to, slash); +- } +- else { +- if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){ +- printk("absolutize : unable to fit '%s' into %d " +- "chars\n", from, size); +- return(-1); +- } +- strcpy(to, save_cwd); +- strcat(to, "/"); +- strcat(to, from); +- } +- chdir(save_cwd); + return(0); + } + +-static int write_cow_header(char *cow_file, int fd, char *backing_file, +- int sectorsize, long long *size) +-{ +- struct cow_header_v2 *header; +- struct stat64 buf; +- int err; +- +- err = os_seek_file(fd, 0); +- if(err != 0){ +- printk("write_cow_header - lseek failed, errno = %d\n", errno); +- return(-errno); +- } +- +- err = -ENOMEM; +- header = um_kmalloc(sizeof(*header)); +- if(header == NULL){ +- printk("Failed to allocate COW V2 header\n"); +- goto out; +- } +- header->magic = htonl(COW_MAGIC); +- header->version = htonl(COW_VERSION); +- +- err = -EINVAL; +- if(strlen(backing_file) > sizeof(header->backing_file) - 1){ +- printk("Backing file name \"%s\" is too long - names are " +- "limited to %d characters\n", backing_file, +- sizeof(header->backing_file) - 1); +- goto out_free; +- } +- +- if(absolutize(header->backing_file, sizeof(header->backing_file), +- backing_file)) +- goto out_free; +- +- err = stat64(header->backing_file, &buf); +- if(err < 0){ +- printk("Stat of backing file '%s' failed, errno = %d\n", +- header->backing_file, errno); +- err = -errno; +- goto out_free; +- } +- +- err = os_file_size(header->backing_file, size); +- if(err){ +- printk("Couldn't get size of backing file '%s', errno = %d\n", +- header->backing_file, -*size); +- goto out_free; +- } +- +- header->mtime = htonl(buf.st_mtime); +- header->size = htonll(*size); +- header->sectorsize = htonl(sectorsize); +- +- err = write(fd, header, sizeof(*header)); +- if(err != sizeof(*header)){ +- printk("Write of header to new COW file '%s' failed, " +- "errno = %d\n", cow_file, errno); +- goto out_free; +- } +- err = 0; +- out_free: +- kfree(header); +- out: +- return(err); +-} +- + int open_ubd_file(char *file, struct openflags *openflags, + char **backing_file_out, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out, +@@ -334,26 +108,36 @@ + { + time_t mtime; + __u64 size; ++ __u32 version, align; + char *backing_file; +- int fd, err, sectorsize, magic, same, mode = 0644; ++ int fd, err, sectorsize, same, mode = 0644; + +- if((fd = os_open_file(file, *openflags, mode)) < 0){ ++ fd = os_open_file(file, *openflags, mode); ++ if(fd < 0){ + if((fd == -ENOENT) && (create_cow_out != NULL)) + *create_cow_out = 1; + if(!openflags->w || + ((errno != EROFS) && (errno != EACCES))) return(-errno); + openflags->w = 0; +- if((fd = os_open_file(file, *openflags, mode)) < 0) ++ fd = os_open_file(file, *openflags, mode); ++ if(fd < 0) + return(fd); + } ++ ++ err = os_lock_file(fd, openflags->w); ++ if(err < 0){ ++ printk("Failed to lock '%s', err = %d\n", file, -err); ++ goto out_close; ++ } ++ + if(backing_file_out == NULL) return(fd); + +- err = read_cow_header(fd, &magic, &backing_file, &mtime, &size, +- §orsize, bitmap_offset_out); ++ err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime, ++ &size, §orsize, &align, bitmap_offset_out); + if(err && (*backing_file_out != NULL)){ + printk("Failed to read COW header from COW file \"%s\", " +- "errno = %d\n", file, err); +- goto error; ++ "errno = %d\n", file, -err); ++ goto out_close; + } + if(err) return(fd); + +@@ -363,36 +147,33 @@ + + if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){ + printk("Switching backing file to '%s'\n", *backing_file_out); +- err = write_cow_header(file, fd, *backing_file_out, +- sectorsize, &size); ++ err = write_cow_header(file, fd, *backing_file_out, ++ sectorsize, align, &size); + if(err){ +- printk("Switch failed, errno = %d\n", err); ++ printk("Switch failed, errno = %d\n", -err); + return(err); + } + } + else { + *backing_file_out = backing_file; + err = backing_file_mismatch(*backing_file_out, size, mtime); +- if(err) goto error; ++ if(err) goto out_close; + } + +- sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out, +- data_offset_out); ++ cow_sizes(version, size, sectorsize, align, *bitmap_offset_out, ++ bitmap_len_out, data_offset_out); + + return(fd); +- error: +- close(fd); ++ out_close: ++ os_close_file(fd); + return(err); + } + + int create_cow_file(char *cow_file, char *backing_file, struct openflags flags, +- int sectorsize, int *bitmap_offset_out, ++ int sectorsize, int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, int *data_offset_out) + { +- __u64 blocks; +- long zero; +- int err, fd, i; +- long long size; ++ int err, fd; + + flags.c = 1; + fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL); +@@ -403,57 +184,49 @@ + goto out; + } + +- err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size); +- if(err) goto out_close; +- +- blocks = (size + sectorsize - 1) / sectorsize; +- blocks = (blocks + sizeof(long) * 8 - 1) / (sizeof(long) * 8); +- zero = 0; +- for(i = 0; i < blocks; i++){ +- err = write(fd, &zero, sizeof(zero)); +- if(err != sizeof(zero)){ +- printk("Write of bitmap to new COW file '%s' failed, " +- "errno = %d\n", cow_file, errno); +- goto out_close; +- } +- } +- +- sizes(size, sectorsize, sizeof(struct cow_header_v2), +- bitmap_len_out, data_offset_out); +- *bitmap_offset_out = sizeof(struct cow_header_v2); +- +- return(fd); +- +- out_close: +- close(fd); ++ err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment, ++ bitmap_offset_out, bitmap_len_out, ++ data_offset_out); ++ if(!err) ++ return(fd); ++ os_close_file(fd); + out: + return(err); + } + ++/* XXX Just trivial wrappers around os_read_file and os_write_file */ + int read_ubd_fs(int fd, void *buffer, int len) + { +- int n; +- +- n = read(fd, buffer, len); +- if(n < 0) return(-errno); +- else return(n); ++ return(os_read_file(fd, buffer, len)); + } + + int write_ubd_fs(int fd, char *buffer, int len) + { +- int n; +- +- n = write(fd, buffer, len); +- if(n < 0) return(-errno); +- else return(n); ++ return(os_write_file(fd, buffer, len)); + } + +-int ubd_is_dir(char *file) ++static int update_bitmap(struct io_thread_req *req) + { +- struct stat64 buf; ++ int n; ++ ++ if(req->cow_offset == -1) ++ return(0); ++ ++ n = os_seek_file(req->fds[1], req->cow_offset); ++ if(n < 0){ ++ printk("do_io - bitmap lseek failed : err = %d\n", -n); ++ return(1); ++ } ++ ++ n = os_write_file(req->fds[1], &req->bitmap_words, ++ sizeof(req->bitmap_words)); ++ if(n != sizeof(req->bitmap_words)){ ++ printk("do_io - bitmap update failed, err = %d fd = %d\n", -n, ++ req->fds[1]); ++ return(1); ++ } + +- if(stat64(file, &buf) < 0) return(0); +- return(S_ISDIR(buf.st_mode)); ++ return(0); + } + + void do_io(struct io_thread_req *req) +@@ -461,8 +234,18 @@ + char *buf; + unsigned long len; + int n, nsectors, start, end, bit; ++ int err; + __u64 off; + ++ if(req->op == UBD_MMAP){ ++ /* Touch the page to force the host to do any necessary IO to ++ * get it into memory ++ */ ++ n = *((volatile int *) req->buffer); ++ req->error = update_bitmap(req); ++ return; ++ } ++ + nsectors = req->length / req->sectorsize; + start = 0; + do { +@@ -473,15 +256,14 @@ + &req->sector_mask) == bit)) + end++; + +- if(end != nsectors) +- printk("end != nsectors\n"); + off = req->offset + req->offsets[bit] + + start * req->sectorsize; + len = (end - start) * req->sectorsize; + buf = &req->buffer[start * req->sectorsize]; + +- if(os_seek_file(req->fds[bit], off) != 0){ +- printk("do_io - lseek failed : errno = %d\n", errno); ++ err = os_seek_file(req->fds[bit], off); ++ if(err < 0){ ++ printk("do_io - lseek failed : err = %d\n", -err); + req->error = 1; + return; + } +@@ -490,11 +272,10 @@ + do { + buf = &buf[n]; + len -= n; +- n = read(req->fds[bit], buf, len); ++ n = os_read_file(req->fds[bit], buf, len); + if (n < 0) { +- printk("do_io - read returned %d : " +- "errno = %d fd = %d\n", n, +- errno, req->fds[bit]); ++ printk("do_io - read failed, err = %d " ++ "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } +@@ -502,11 +283,10 @@ + if (n < len) memset(&buf[n], 0, len - n); + } + else { +- n = write(req->fds[bit], buf, len); ++ n = os_write_file(req->fds[bit], buf, len); + if(n != len){ +- printk("do_io - write returned %d : " +- "errno = %d fd = %d\n", n, +- errno, req->fds[bit]); ++ printk("do_io - write failed err = %d " ++ "fd = %d\n", -n, req->fds[bit]); + req->error = 1; + return; + } +@@ -515,24 +295,7 @@ + start = end; + } while(start < nsectors); + +- if(req->cow_offset != -1){ +- if(os_seek_file(req->fds[1], req->cow_offset) != 0){ +- printk("do_io - bitmap lseek failed : errno = %d\n", +- errno); +- req->error = 1; +- return; +- } +- n = write(req->fds[1], &req->bitmap_words, +- sizeof(req->bitmap_words)); +- if(n != sizeof(req->bitmap_words)){ +- printk("do_io - bitmap update returned %d : " +- "errno = %d fd = %d\n", n, errno, req->fds[1]); +- req->error = 1; +- return; +- } +- } +- req->error = 0; +- return; ++ req->error = update_bitmap(req); + } + + /* Changed in start_io_thread, which is serialized by being called only +@@ -550,19 +313,23 @@ + + signal(SIGWINCH, SIG_IGN); + while(1){ +- n = read(kernel_fd, &req, sizeof(req)); +- if(n < 0) printk("io_thread - read returned %d, errno = %d\n", +- n, errno); +- else if(n < sizeof(req)){ +- printk("io_thread - short read : length = %d\n", n); ++ n = os_read_file(kernel_fd, &req, sizeof(req)); ++ if(n != sizeof(req)){ ++ if(n < 0) ++ printk("io_thread - read failed, fd = %d, " ++ "err = %d\n", kernel_fd, -n); ++ else { ++ printk("io_thread - short read, fd = %d, " ++ "length = %d\n", kernel_fd, n); ++ } + continue; + } + io_count++; + do_io(&req); +- n = write(kernel_fd, &req, sizeof(req)); ++ n = os_write_file(kernel_fd, &req, sizeof(req)); + if(n != sizeof(req)) +- printk("io_thread - write failed, errno = %d\n", +- errno); ++ printk("io_thread - write failed, fd = %d, err = %d\n", ++ kernel_fd, -n); + } + } + +@@ -571,10 +338,11 @@ + int pid, fds[2], err; + + err = os_pipe(fds, 1, 1); +- if(err){ +- printk("start_io_thread - os_pipe failed, errno = %d\n", -err); +- return(-1); ++ if(err < 0){ ++ printk("start_io_thread - os_pipe failed, err = %d\n", -err); ++ goto out; + } ++ + kernel_fd = fds[0]; + *fd_out = fds[1]; + +@@ -582,32 +350,19 @@ + NULL); + if(pid < 0){ + printk("start_io_thread - clone failed : errno = %d\n", errno); +- return(-errno); ++ goto out_close; + } +- return(pid); +-} +- +-#ifdef notdef +-int start_io_thread(unsigned long sp, int *fd_out) +-{ +- int pid; + +- if((kernel_fd = get_pty()) < 0) return(-1); +- raw(kernel_fd, 0); +- if((*fd_out = open(ptsname(kernel_fd), O_RDWR)) < 0){ +- printk("Couldn't open tty for IO\n"); +- return(-1); +- } +- +- pid = clone(io_thread, (void *) sp, CLONE_FILES | CLONE_VM | SIGCHLD, +- NULL); +- if(pid < 0){ +- printk("start_io_thread - clone failed : errno = %d\n", errno); +- return(-errno); +- } + return(pid); ++ ++ out_close: ++ os_close_file(fds[0]); ++ os_close_file(fds[1]); ++ kernel_fd = -1; ++ *fd_out = -1; ++ out: ++ return(err); + } +-#endif + + /* + * Overrides for Emacs so that we follow Linus's tabbing style. +diff -Naur a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c +--- a/arch/um/drivers/xterm.c 2004-02-11 12:14:17.000000000 -0500 ++++ b/arch/um/drivers/xterm.c 2004-02-11 12:26:00.000000000 -0500 +@@ -8,7 +8,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -36,7 +35,8 @@ + { + struct xterm_chan *data; + +- if((data = malloc(sizeof(*data))) == NULL) return(NULL); ++ data = malloc(sizeof(*data)); ++ if(data == NULL) return(NULL); + *data = ((struct xterm_chan) { .pid = -1, + .helper_pid = -1, + .device = device, +@@ -93,7 +93,7 @@ + "/usr/lib/uml/port-helper", "-uml-socket", + file, NULL }; + +- if(access(argv[4], X_OK)) ++ if(os_access(argv[4], OS_ACC_X_OK) < 0) + argv[4] = "port-helper"; + + fd = mkstemp(file); +@@ -106,13 +106,13 @@ + printk("xterm_open : unlink failed, errno = %d\n", errno); + return(-errno); + } +- close(fd); ++ os_close_file(fd); + +- fd = create_unix_socket(file, sizeof(file)); ++ fd = os_create_unix_socket(file, sizeof(file), 1); + if(fd < 0){ + printk("xterm_open : create_unix_socket failed, errno = %d\n", + -fd); +- return(-fd); ++ return(fd); + } + + sprintf(title, data->title, data->device); +@@ -128,15 +128,16 @@ + if(data->direct_rcv) + new = os_rcv_fd(fd, &data->helper_pid); + else { +- if((err = os_set_fd_block(fd, 0)) != 0){ ++ err = os_set_fd_block(fd, 0); ++ if(err < 0){ + printk("xterm_open : failed to set descriptor " +- "non-blocking, errno = %d\n", err); ++ "non-blocking, err = %d\n", -err); + return(err); + } + new = xterm_fd(fd, &data->helper_pid); + } + if(new < 0){ +- printk("xterm_open : os_rcv_fd failed, errno = %d\n", -new); ++ printk("xterm_open : os_rcv_fd failed, err = %d\n", -new); + goto out; + } + +@@ -160,7 +161,7 @@ + if(data->helper_pid != -1) + os_kill_process(data->helper_pid, 0); + data->helper_pid = -1; +- close(fd); ++ os_close_file(fd); + } + + void xterm_free(void *d) +diff -Naur a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c +--- a/arch/um/drivers/xterm_kern.c 2004-02-11 12:16:10.000000000 -0500 ++++ b/arch/um/drivers/xterm_kern.c 2004-02-11 12:28:20.000000000 -0500 +@@ -5,9 +5,12 @@ + + #include "linux/errno.h" + #include "linux/slab.h" ++#include "linux/signal.h" ++#include "linux/interrupt.h" + #include "asm/semaphore.h" + #include "asm/irq.h" + #include "irq_user.h" ++#include "irq_kern.h" + #include "kern_util.h" + #include "os.h" + #include "xterm.h" +@@ -19,17 +22,18 @@ + int new_fd; + }; + +-static void xterm_interrupt(int irq, void *data, struct pt_regs *regs) ++static irqreturn_t xterm_interrupt(int irq, void *data, struct pt_regs *regs) + { + struct xterm_wait *xterm = data; + int fd; + + fd = os_rcv_fd(xterm->fd, &xterm->pid); + if(fd == -EAGAIN) +- return; ++ return(IRQ_NONE); + + xterm->new_fd = fd; + up(&xterm->sem); ++ return(IRQ_HANDLED); + } + + int xterm_fd(int socket, int *pid_out) +@@ -54,7 +58,8 @@ + if(err){ + printk(KERN_ERR "xterm_fd : failed to get IRQ for xterm, " + "err = %d\n", err); +- return(err); ++ ret = err; ++ goto out; + } + down(&data->sem); + +@@ -62,6 +67,7 @@ + + ret = data->new_fd; + *pid_out = data->pid; ++ out: + kfree(data); + + return(ret); +diff -Naur a/arch/um/dyn.lds.S b/arch/um/dyn.lds.S +--- a/arch/um/dyn.lds.S 2004-02-11 12:15:45.000000000 -0500 ++++ b/arch/um/dyn.lds.S 2004-02-11 12:27:35.000000000 -0500 +@@ -10,12 +10,15 @@ + { + . = START + SIZEOF_HEADERS; + .interp : { *(.interp) } +- . = ALIGN(4096); + __binary_start = .; + . = ALIGN(4096); /* Init code and data */ + _stext = .; + __init_begin = .; +- .text.init : { *(.text.init) } ++ .init.text : { ++ _sinittext = .; ++ *(.init.text) ++ _einittext = .; ++ } + + . = ALIGN(4096); + +@@ -67,7 +70,7 @@ + + #include "asm/common.lds.S" + +- .data.init : { *(.data.init) } ++ init.data : { *(.init.data) } + + /* Ensure the __preinit_array_start label is properly aligned. We + could instead move the label definition inside the section, but +diff -Naur a/arch/um/include/2_5compat.h b/arch/um/include/2_5compat.h +--- a/arch/um/include/2_5compat.h 2004-02-11 12:15:23.000000000 -0500 ++++ b/arch/um/include/2_5compat.h 2004-02-11 12:27:10.000000000 -0500 +@@ -6,20 +6,6 @@ + #ifndef __2_5_COMPAT_H__ + #define __2_5_COMPAT_H__ + +-#include "linux/version.h" +- +-#define INIT_CONSOLE(dev_name, write_proc, device_proc, setup_proc, f) { \ +- name : dev_name, \ +- write : write_proc, \ +- read : NULL, \ +- device : device_proc, \ +- setup : setup_proc, \ +- flags : f, \ +- index : -1, \ +- cflag : 0, \ +- next : NULL \ +-} +- + #define INIT_HARDSECT(arr, maj, sizes) + + #define SET_PRI(task) do ; while(0) +diff -Naur a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h +--- a/arch/um/include/irq_kern.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/include/irq_kern.h 2004-02-11 12:27:15.000000000 -0500 +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) ++ * Licensed under the GPL ++ */ ++ ++#ifndef __IRQ_KERN_H__ ++#define __IRQ_KERN_H__ ++ ++#include "linux/interrupt.h" ++ ++extern int um_request_irq(unsigned int irq, int fd, int type, ++ irqreturn_t (*handler)(int, void *, ++ struct pt_regs *), ++ unsigned long irqflags, const char * devname, ++ void *dev_id); ++ ++#endif ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h +--- a/arch/um/include/kern_util.h 2004-02-11 12:15:00.000000000 -0500 ++++ b/arch/um/include/kern_util.h 2004-02-11 12:26:51.000000000 -0500 +@@ -63,10 +63,9 @@ + extern void *syscall_sp(void *t); + extern void syscall_trace(void); + extern int hz(void); +-extern void idle_timer(void); ++extern void uml_idle_timer(void); + extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs); + extern int external_pid(void *t); +-extern int pid_to_processor_id(int pid); + extern void boot_timer_handler(int sig); + extern void interrupt_end(void); + extern void initial_thread_cb(void (*proc)(void *), void *arg); +@@ -90,9 +89,7 @@ + extern char *uml_strdup(char *string); + extern void unprotect_kernel_mem(void); + extern void protect_kernel_mem(void); +-extern void set_kmem_end(unsigned long); + extern void uml_cleanup(void); +-extern int pid_to_processor_id(int pid); + extern void set_current(void *t); + extern void lock_signalled_task(void *t); + extern void IPI_handler(int cpu); +@@ -101,7 +98,9 @@ + extern int clear_user_proc(void *buf, int size); + extern int copy_to_user_proc(void *to, void *from, int size); + extern int copy_from_user_proc(void *to, void *from, int size); ++extern int strlen_user_proc(char *str); + extern void bus_handler(int sig, union uml_pt_regs *regs); ++extern void winch(int sig, union uml_pt_regs *regs); + extern long execute_syscall(void *r); + extern int smp_sigio_handler(void); + extern void *get_current(void); +@@ -112,6 +111,8 @@ + extern void free_irq(unsigned int, void *); + extern int um_in_interrupt(void); + extern int cpu(void); ++extern unsigned long long time_stamp(void); ++ + #endif + + /* +diff -Naur a/arch/um/include/line.h b/arch/um/include/line.h +--- a/arch/um/include/line.h 2004-02-11 12:16:27.000000000 -0500 ++++ b/arch/um/include/line.h 2004-02-11 12:28:24.000000000 -0500 +@@ -9,12 +9,14 @@ + #include "linux/list.h" + #include "linux/workqueue.h" + #include "linux/tty.h" ++#include "linux/interrupt.h" + #include "asm/semaphore.h" + #include "chan_user.h" + #include "mconsole_kern.h" + + struct line_driver { + char *name; ++ char *device_name; + char *devfs_name; + short major; + short minor_start; +@@ -67,8 +69,6 @@ + + #define LINES_INIT(n) { num : n } + +-extern void line_interrupt(int irq, void *data, struct pt_regs *unused); +-extern void line_write_interrupt(int irq, void *data, struct pt_regs *unused); + extern void line_close(struct line *lines, struct tty_struct *tty); + extern int line_open(struct line *lines, struct tty_struct *tty, + struct chan_opts *opts); +diff -Naur a/arch/um/include/mconsole.h b/arch/um/include/mconsole.h +--- a/arch/um/include/mconsole.h 2004-02-11 12:15:15.000000000 -0500 ++++ b/arch/um/include/mconsole.h 2004-02-11 12:27:02.000000000 -0500 +@@ -41,11 +41,13 @@ + + struct mc_request; + ++enum mc_context { MCONSOLE_INTR, MCONSOLE_PROC }; ++ + struct mconsole_command + { + char *command; + void (*handler)(struct mc_request *req); +- int as_interrupt; ++ enum mc_context context; + }; + + struct mc_request +@@ -77,6 +79,8 @@ + extern void mconsole_cad(struct mc_request *req); + extern void mconsole_stop(struct mc_request *req); + extern void mconsole_go(struct mc_request *req); ++extern void mconsole_log(struct mc_request *req); ++extern void mconsole_proc(struct mc_request *req); + + extern int mconsole_get_request(int fd, struct mc_request *req); + extern int mconsole_notify(char *sock_name, int type, const void *data, +diff -Naur a/arch/um/include/mem.h b/arch/um/include/mem.h +--- a/arch/um/include/mem.h 2004-02-11 12:17:08.000000000 -0500 ++++ b/arch/um/include/mem.h 2004-02-11 12:29:12.000000000 -0500 +@@ -1,19 +1,18 @@ + /* +- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2002, 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + + #ifndef __MEM_H__ + #define __MEM_H__ + +-struct vm_reserved { +- struct list_head list; +- unsigned long start; +- unsigned long end; +-}; ++#include "linux/types.h" + +-extern void set_usable_vm(unsigned long start, unsigned long end); +-extern void set_kmem_end(unsigned long new); ++extern int phys_mapping(unsigned long phys, __u64 *offset_out); ++extern int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w); ++extern int is_remapped(void *virt); ++extern int physmem_remove_mapping(void *virt); ++extern void physmem_forget_descriptor(int fd); + + #endif + +diff -Naur a/arch/um/include/mem_kern.h b/arch/um/include/mem_kern.h +--- a/arch/um/include/mem_kern.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/include/mem_kern.h 2004-02-11 12:27:40.000000000 -0500 +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (C) 2003 Jeff Dike (jdike@addtoit.com) ++ * Licensed under the GPL ++ */ ++ ++#ifndef __MEM_KERN_H__ ++#define __MEM_KERN_H__ ++ ++#include "linux/list.h" ++#include "linux/types.h" ++ ++struct remapper { ++ struct list_head list; ++ int (*proc)(int, unsigned long, int, __u64); ++}; ++ ++extern void register_remapper(struct remapper *info); ++ ++#endif ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/include/mem_user.h b/arch/um/include/mem_user.h +--- a/arch/um/include/mem_user.h 2004-02-11 12:16:03.000000000 -0500 ++++ b/arch/um/include/mem_user.h 2004-02-11 12:27:57.000000000 -0500 +@@ -32,43 +32,38 @@ + #ifndef _MEM_USER_H + #define _MEM_USER_H + +-struct mem_region { ++struct iomem_region { ++ struct iomem_region *next; + char *driver; +- unsigned long start_pfn; +- unsigned long start; +- unsigned long len; +- void *mem_map; + int fd; ++ int size; ++ unsigned long phys; ++ unsigned long virt; + }; + +-extern struct mem_region *regions[]; +-extern struct mem_region physmem_region; ++extern struct iomem_region *iomem_regions; ++extern int iomem_size; + + #define ROUND_4M(n) ((((unsigned long) (n)) + (1 << 22)) & ~((1 << 22) - 1)) + + extern unsigned long host_task_size; + extern unsigned long task_size; + ++extern void check_devanon(void); + extern int init_mem_user(void); + extern int create_mem_file(unsigned long len); +-extern void setup_range(int fd, char *driver, unsigned long start, +- unsigned long pfn, unsigned long total, int need_vm, +- struct mem_region *region, void *reserved); + extern void setup_memory(void *entry); + extern unsigned long find_iomem(char *driver, unsigned long *len_out); +-extern int init_maps(struct mem_region *region); +-extern int nregions(void); +-extern int reserve_vm(unsigned long start, unsigned long end, void *e); ++extern int init_maps(unsigned long physmem, unsigned long iomem, ++ unsigned long highmem); + extern unsigned long get_vm(unsigned long len); + extern void setup_physmem(unsigned long start, unsigned long usable, +- unsigned long len); +-extern int setup_region(struct mem_region *region, void *entry); ++ unsigned long len, unsigned long highmem); + extern void add_iomem(char *name, int fd, unsigned long size); +-extern struct mem_region *phys_region(unsigned long phys); + extern unsigned long phys_offset(unsigned long phys); + extern void unmap_physmem(void); +-extern int map_memory(unsigned long virt, unsigned long phys, +- unsigned long len, int r, int w, int x); ++extern void map_memory(unsigned long virt, unsigned long phys, ++ unsigned long len, int r, int w, int x); + extern int protect_memory(unsigned long addr, unsigned long len, + int r, int w, int x, int must_succeed); + extern unsigned long get_kmem_end(void); +diff -Naur a/arch/um/include/os.h b/arch/um/include/os.h +--- a/arch/um/include/os.h 2004-02-11 12:14:31.000000000 -0500 ++++ b/arch/um/include/os.h 2004-02-11 12:26:12.000000000 -0500 +@@ -17,6 +17,32 @@ + #define OS_TYPE_FIFO 6 + #define OS_TYPE_SOCK 7 + ++/* os_access() flags */ ++#define OS_ACC_F_OK 0 /* Test for existence. */ ++#define OS_ACC_X_OK 1 /* Test for execute permission. */ ++#define OS_ACC_W_OK 2 /* Test for write permission. */ ++#define OS_ACC_R_OK 4 /* Test for read permission. */ ++#define OS_ACC_RW_OK (OS_ACC_W_OK | OS_ACC_R_OK) /* Test for RW permission */ ++ ++/* ++ * types taken from stat_file() in hostfs_user.c ++ * (if they are wrong here, they are wrong there...). ++ */ ++struct uml_stat { ++ int ust_dev; /* device */ ++ unsigned long long ust_ino; /* inode */ ++ int ust_mode; /* protection */ ++ int ust_nlink; /* number of hard links */ ++ int ust_uid; /* user ID of owner */ ++ int ust_gid; /* group ID of owner */ ++ unsigned long long ust_size; /* total size, in bytes */ ++ int ust_blksize; /* blocksize for filesystem I/O */ ++ unsigned long long ust_blocks; /* number of blocks allocated */ ++ unsigned long ust_atime; /* time of last access */ ++ unsigned long ust_mtime; /* time of last modification */ ++ unsigned long ust_ctime; /* time of last change */ ++}; ++ + struct openflags { + unsigned int r : 1; + unsigned int w : 1; +@@ -84,29 +110,47 @@ + flags.e = 1; + return(flags); + } +- ++ + static inline struct openflags of_cloexec(struct openflags flags) + { + flags.cl = 1; + return(flags); + } + ++extern int os_stat_file(const char *file_name, struct uml_stat *buf); ++extern int os_stat_fd(const int fd, struct uml_stat *buf); ++extern int os_access(const char *file, int mode); ++extern void os_print_error(int error, const char* str); ++extern int os_get_exec_close(int fd, int *close_on_exec); ++extern int os_set_exec_close(int fd, int close_on_exec); ++extern int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg); ++extern int os_window_size(int fd, int *rows, int *cols); ++extern int os_new_tty_pgrp(int fd, int pid); ++extern int os_get_ifname(int fd, char *namebuf); ++extern int os_set_slip(int fd); ++extern int os_set_owner(int fd, int pid); ++extern int os_sigio_async(int master, int slave); ++extern int os_mode_fd(int fd, int mode); ++ + extern int os_seek_file(int fd, __u64 offset); + extern int os_open_file(char *file, struct openflags flags, int mode); + extern int os_read_file(int fd, void *buf, int len); +-extern int os_write_file(int fd, void *buf, int count); ++extern int os_write_file(int fd, const void *buf, int count); + extern int os_file_size(char *file, long long *size_out); ++extern int os_file_modtime(char *file, unsigned long *modtime); + extern int os_pipe(int *fd, int stream, int close_on_exec); + extern int os_set_fd_async(int fd, int owner); + extern int os_set_fd_block(int fd, int blocking); + extern int os_accept_connection(int fd); ++extern int os_create_unix_socket(char *file, int len, int close_on_exec); + extern int os_shutdown_socket(int fd, int r, int w); + extern void os_close_file(int fd); + extern int os_rcv_fd(int fd, int *helper_pid_out); +-extern int create_unix_socket(char *file, int len); ++extern int create_unix_socket(char *file, int len, int close_on_exec); + extern int os_connect_socket(char *name); + extern int os_file_type(char *file); + extern int os_file_mode(char *file, struct openflags *mode_out); ++extern int os_lock_file(int fd, int excl); + + extern unsigned long os_process_pc(int pid); + extern int os_process_parent(int pid); +@@ -115,11 +159,12 @@ + extern void os_usr1_process(int pid); + extern int os_getpid(void); + +-extern int os_map_memory(void *virt, int fd, unsigned long off, ++extern int os_map_memory(void *virt, int fd, unsigned long long off, + unsigned long len, int r, int w, int x); + extern int os_protect_memory(void *addr, unsigned long len, + int r, int w, int x); + extern int os_unmap_memory(void *addr, int len); ++extern void os_flush_stdout(void); + + #endif + +diff -Naur a/arch/um/include/skas_ptrace.h b/arch/um/include/skas_ptrace.h +--- a/arch/um/include/skas_ptrace.h 2004-02-11 12:16:37.000000000 -0500 ++++ b/arch/um/include/skas_ptrace.h 2004-02-11 12:28:37.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) + * Licensed under the GPL + */ + +diff -Naur a/arch/um/include/sysdep-i386/frame_user.h b/arch/um/include/sysdep-i386/frame_user.h +--- a/arch/um/include/sysdep-i386/frame_user.h 2004-02-11 12:16:44.000000000 -0500 ++++ b/arch/um/include/sysdep-i386/frame_user.h 2004-02-11 12:29:00.000000000 -0500 +@@ -56,26 +56,26 @@ + * it would have to be __builtin_frame_address(1). + */ + +-static inline unsigned long frame_restorer(void) +-{ +- unsigned long *fp; +- +- fp = __builtin_frame_address(0); +- return((unsigned long) (fp + 1)); +-} ++#define frame_restorer() \ ++({ \ ++ unsigned long *fp; \ ++\ ++ fp = __builtin_frame_address(0); \ ++ ((unsigned long) (fp + 1)); \ ++}) + + /* Similarly, this returns the value of sp when the handler was first + * entered. This is used to calculate the proper sp when delivering + * signals. + */ + +-static inline unsigned long frame_sp(void) +-{ +- unsigned long *fp; +- +- fp = __builtin_frame_address(0); +- return((unsigned long) (fp + 1)); +-} ++#define frame_sp() \ ++({ \ ++ unsigned long *fp; \ ++\ ++ fp = __builtin_frame_address(0); \ ++ ((unsigned long) (fp + 1)); \ ++}) + + #endif + +diff -Naur a/arch/um/include/sysdep-i386/sigcontext.h b/arch/um/include/sysdep-i386/sigcontext.h +--- a/arch/um/include/sysdep-i386/sigcontext.h 2004-02-11 12:16:08.000000000 -0500 ++++ b/arch/um/include/sysdep-i386/sigcontext.h 2004-02-11 12:28:19.000000000 -0500 +@@ -28,8 +28,8 @@ + */ + #define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0) + +-/* These are General Protection and Page Fault */ +-#define SEGV_IS_FIXABLE(trap) ((trap == 13) || (trap == 14)) ++/* This is Page Fault */ ++#define SEGV_IS_FIXABLE(trap) (trap == 14) + + #define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc))) + +diff -Naur a/arch/um/include/ubd_user.h b/arch/um/include/ubd_user.h +--- a/arch/um/include/ubd_user.h 2004-02-11 12:15:48.000000000 -0500 ++++ b/arch/um/include/ubd_user.h 2004-02-11 12:27:41.000000000 -0500 +@@ -9,7 +9,7 @@ + + #include "os.h" + +-enum ubd_req { UBD_READ, UBD_WRITE }; ++enum ubd_req { UBD_READ, UBD_WRITE, UBD_MMAP }; + + struct io_thread_req { + enum ubd_req op; +@@ -20,8 +20,10 @@ + char *buffer; + int sectorsize; + unsigned long sector_mask; +- unsigned long cow_offset; ++ unsigned long long cow_offset; + unsigned long bitmap_words[2]; ++ int map_fd; ++ unsigned long long map_offset; + int error; + }; + +@@ -31,7 +33,7 @@ + int *create_cow_out); + extern int create_cow_file(char *cow_file, char *backing_file, + struct openflags flags, int sectorsize, +- int *bitmap_offset_out, ++ int alignment, int *bitmap_offset_out, + unsigned long *bitmap_len_out, + int *data_offset_out); + extern int read_cow_bitmap(int fd, void *buf, int offset, int len); +@@ -39,7 +41,6 @@ + extern int write_ubd_fs(int fd, char *buffer, int len); + extern int start_io_thread(unsigned long sp, int *fds_out); + extern void do_io(struct io_thread_req *req); +-extern int ubd_is_dir(char *file); + + static inline int ubd_test_bit(__u64 bit, unsigned char *data) + { +diff -Naur a/arch/um/include/um_uaccess.h b/arch/um/include/um_uaccess.h +--- a/arch/um/include/um_uaccess.h 2004-02-11 12:16:07.000000000 -0500 ++++ b/arch/um/include/um_uaccess.h 2004-02-11 12:28:02.000000000 -0500 +@@ -38,22 +38,73 @@ + from, n)); + } + ++/* ++ * strncpy_from_user: - Copy a NUL terminated string from userspace. ++ * @dst: Destination address, in kernel space. This buffer must be at ++ * least @count bytes long. ++ * @src: Source address, in user space. ++ * @count: Maximum number of bytes to copy, including the trailing NUL. ++ * ++ * Copies a NUL-terminated string from userspace to kernel space. ++ * ++ * On success, returns the length of the string (not including the trailing ++ * NUL). ++ * ++ * If access to userspace fails, returns -EFAULT (some data may have been ++ * copied). ++ * ++ * If @count is smaller than the length of the string, copies @count bytes ++ * and returns @count. ++ */ ++ + static inline int strncpy_from_user(char *dst, const char *src, int count) + { + return(CHOOSE_MODE_PROC(strncpy_from_user_tt, strncpy_from_user_skas, + dst, src, count)); + } + ++/* ++ * __clear_user: - Zero a block of memory in user space, with less checking. ++ * @to: Destination address, in user space. ++ * @n: Number of bytes to zero. ++ * ++ * Zero a block of memory in user space. Caller must check ++ * the specified block with access_ok() before calling this function. ++ * ++ * Returns number of bytes that could not be cleared. ++ * On success, this will be zero. ++ */ + static inline int __clear_user(void *mem, int len) + { + return(CHOOSE_MODE_PROC(__clear_user_tt, __clear_user_skas, mem, len)); + } + ++/* ++ * clear_user: - Zero a block of memory in user space. ++ * @to: Destination address, in user space. ++ * @n: Number of bytes to zero. ++ * ++ * Zero a block of memory in user space. ++ * ++ * Returns number of bytes that could not be cleared. ++ * On success, this will be zero. ++ */ + static inline int clear_user(void *mem, int len) + { + return(CHOOSE_MODE_PROC(clear_user_tt, clear_user_skas, mem, len)); + } + ++/* ++ * strlen_user: - Get the size of a string in user space. ++ * @str: The string to measure. ++ * @n: The maximum valid length ++ * ++ * Get the size of a NUL-terminated string in user space. ++ * ++ * Returns the size of the string INCLUDING the terminating NUL. ++ * On exception, returns 0. ++ * If the string is too long, returns a value greater than @n. ++ */ + static inline int strnlen_user(const void *str, int len) + { + return(CHOOSE_MODE_PROC(strnlen_user_tt, strnlen_user_skas, str, len)); +diff -Naur a/arch/um/include/user.h b/arch/um/include/user.h +--- a/arch/um/include/user.h 2004-02-11 12:14:17.000000000 -0500 ++++ b/arch/um/include/user.h 2004-02-11 12:26:00.000000000 -0500 +@@ -14,6 +14,7 @@ + extern void kfree(void *ptr); + extern int in_aton(char *str); + extern int open_gdb_chan(void); ++extern int strlcpy(char *, const char *, int); + + #endif + +diff -Naur a/arch/um/include/user_util.h b/arch/um/include/user_util.h +--- a/arch/um/include/user_util.h 2004-02-11 12:14:23.000000000 -0500 ++++ b/arch/um/include/user_util.h 2004-02-11 12:26:06.000000000 -0500 +@@ -14,8 +14,6 @@ + extern int unlockpt(int __fd); + extern char *ptsname(int __fd); + +-enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; +- + struct cpu_task { + int pid; + void *task; +@@ -59,7 +57,6 @@ + extern void *add_signal_handler(int sig, void (*handler)(int)); + extern int start_fork_tramp(void *arg, unsigned long temp_stack, + int clone_flags, int (*tramp)(void *)); +-extern int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags); + extern int linux_main(int argc, char **argv); + extern void set_cmdline(char *cmd); + extern void input_cb(void (*proc)(void *), void *arg, int arg_len); +@@ -86,11 +83,13 @@ + extern int run_kernel_thread(int (*fn)(void *), void *arg, void **jmp_ptr); + extern void write_sigio_workaround(void); + extern void arch_check_bugs(void); ++extern int cpu_feature(char *what, char *buf, int len); + extern int arch_handle_signal(int sig, union uml_pt_regs *regs); + extern int arch_fixup(unsigned long address, void *sc_ptr); + extern void forward_pending_sigio(int target); + extern int can_do_skas(void); +- ++extern void arch_init_thread(void); ++ + #endif + + /* +diff -Naur a/arch/um/Kconfig b/arch/um/Kconfig +--- a/arch/um/Kconfig 2004-02-11 12:15:26.000000000 -0500 ++++ b/arch/um/Kconfig 2004-02-11 12:27:15.000000000 -0500 +@@ -61,6 +61,20 @@ + + config NET + bool "Networking support" ++ help ++ Unless you really know what you are doing, you should say Y here. ++ The reason is that some programs need kernel networking support even ++ when running on a stand-alone machine that isn't connected to any ++ other computer. If you are upgrading from an older kernel, you ++ should consider updating your networking tools too because changes ++ in the kernel and the tools often go hand in hand. The tools are ++ contained in the package net-tools, the location and version number ++ of which are given in Documentation/Changes. ++ ++ For a general introduction to Linux networking, it is highly ++ recommended to read the NET-HOWTO, available from ++ . ++ + + source "fs/Kconfig.binfmt" + +@@ -85,6 +99,19 @@ + If you'd like to be able to work with files stored on the host, + say Y or M here; otherwise say N. + ++config HPPFS ++ tristate "HoneyPot ProcFS" ++ help ++ hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc ++ entries to be overridden, removed, or fabricated from the host. ++ Its purpose is to allow a UML to appear to be a physical machine ++ by removing or changing anything in /proc which gives away the ++ identity of a UML. ++ ++ See http://user-mode-linux.sf.net/hppfs.html for more information. ++ ++ You only need this if you are setting up a UML honeypot. Otherwise, ++ it is safe to say 'N' here. + + config MCONSOLE + bool "Management console" +@@ -105,6 +132,16 @@ + config MAGIC_SYSRQ + bool "Magic SysRq key" + depends on MCONSOLE ++ help ++ If you say Y here, you will have some control over the system even ++ if the system crashes for example during kernel debugging (e.g., you ++ will be able to flush the buffer cache to disk, reboot the system ++ immediately or dump some status information). This is accomplished ++ by pressing various keys while holding SysRq (Alt+PrintScreen). It ++ also works on a serial console (on PC hardware at least), if you ++ send a BREAK and then within 5 seconds a command keypress. The ++ keys are documented in Documentation/sysrq.txt. Don't say Y ++ unless you really know what this hack does. + + config HOST_2G_2G + bool "2G/2G host address space split" +@@ -160,6 +197,9 @@ + config HIGHMEM + bool "Highmem support" + ++config PROC_MM ++ bool "/proc/mm support" ++ + config KERNEL_STACK_ORDER + int "Kernel stack size order" + default 2 +@@ -168,6 +208,17 @@ + be 1 << order pages. The default is OK unless you're running Valgrind + on UML, in which case, set this to 3. + ++config UML_REAL_TIME_CLOCK ++ bool "Real-time Clock" ++ default y ++ help ++ This option makes UML time deltas match wall clock deltas. This should ++ normally be enabled. The exception would be if you are debugging with ++ UML and spend long times with UML stopped at a breakpoint. In this ++ case, when UML is restarted, it will call the timer enough times to make ++ up for the time spent at the breakpoint. This could result in a ++ noticable lag. If this is a problem, then disable this option. ++ + endmenu + + source "init/Kconfig" +@@ -240,6 +291,10 @@ + config PT_PROXY + bool "Enable ptrace proxy" + depends on XTERM_CHAN && DEBUG_INFO ++ help ++ This option enables a debugging interface which allows gdb to debug ++ the kernel without needing to actually attach to kernel threads. ++ If you want to do kernel debugging, say Y here; otherwise say N. + + config GPROF + bool "Enable gprof support" +diff -Naur a/arch/um/Kconfig_block b/arch/um/Kconfig_block +--- a/arch/um/Kconfig_block 2004-02-11 12:16:04.000000000 -0500 ++++ b/arch/um/Kconfig_block 2004-02-11 12:28:00.000000000 -0500 +@@ -29,6 +29,20 @@ + wise choice too. In all other cases (for example, if you're just + playing around with User-Mode Linux) you can choose N. + ++# Turn this back on when the driver actually works ++# ++#config BLK_DEV_COW ++# tristate "COW block device" ++# help ++# This is a layered driver which sits above two other block devices. ++# One is read-only, and the other is a read-write layer which stores ++# all changes. This provides the illusion that the read-only layer ++# can be mounted read-write and changed. ++ ++config BLK_DEV_COW_COMMON ++ bool ++ default BLK_DEV_COW || BLK_DEV_UBD ++ + config BLK_DEV_LOOP + tristate "Loopback device support" + +diff -Naur a/arch/um/Kconfig_net b/arch/um/Kconfig_net +--- a/arch/um/Kconfig_net 2004-02-11 12:15:54.000000000 -0500 ++++ b/arch/um/Kconfig_net 2004-02-11 12:27:47.000000000 -0500 +@@ -1,5 +1,5 @@ + +-menu "Network Devices" ++menu "UML Network Devices" + depends on NET + + # UML virtual driver +@@ -176,73 +176,5 @@ + + Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp" + +- +-# Below are hardware-independent drivers mirrored from +-# drivers/net/Config.in. It would be nice if Linux +-# had HW independent drivers separated from the other +-# but it does not. Until then each non-ISA/PCI arch +-# needs to provide it's own menu of network drivers +-config DUMMY +- tristate "Dummy net driver support" +- +-config BONDING +- tristate "Bonding driver support" +- +-config EQUALIZER +- tristate "EQL (serial line load balancing) support" +- +-config TUN +- tristate "Universal TUN/TAP device driver support" +- +-config ETHERTAP +- tristate "Ethertap network tap (OBSOLETE)" +- depends on EXPERIMENTAL && NETLINK +- +-config PPP +- tristate "PPP (point-to-point protocol) support" +- +-config PPP_MULTILINK +- bool "PPP multilink support (EXPERIMENTAL)" +- depends on PPP && EXPERIMENTAL +- +-config PPP_FILTER +- bool "PPP filtering" +- depends on PPP && FILTER +- +-config PPP_ASYNC +- tristate "PPP support for async serial ports" +- depends on PPP +- +-config PPP_SYNC_TTY +- tristate "PPP support for sync tty ports" +- depends on PPP +- +-config PPP_DEFLATE +- tristate "PPP Deflate compression" +- depends on PPP +- +-config PPP_BSDCOMP +- tristate "PPP BSD-Compress compression" +- depends on PPP +- +-config PPPOE +- tristate "PPP over Ethernet (EXPERIMENTAL)" +- depends on PPP && EXPERIMENTAL +- +-config SLIP +- tristate "SLIP (serial line) support" +- +-config SLIP_COMPRESSED +- bool "CSLIP compressed headers" +- depends on SLIP=y +- +-config SLIP_SMART +- bool "Keepalive and linefill" +- depends on SLIP=y +- +-config SLIP_MODE_SLIP6 +- bool "Six bit SLIP encapsulation" +- depends on SLIP=y +- + endmenu + +diff -Naur a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in +--- a/arch/um/kernel/config.c.in 2004-02-11 12:16:10.000000000 -0500 ++++ b/arch/um/kernel/config.c.in 2004-02-11 12:28:20.000000000 -0500 +@@ -7,9 +7,7 @@ + #include + #include "init.h" + +-static __initdata char *config = " +-CONFIG +-"; ++static __initdata char *config = "CONFIG"; + + static int __init print_config(char *line, int *add) + { +diff -Naur a/arch/um/kernel/exec_kern.c b/arch/um/kernel/exec_kern.c +--- a/arch/um/kernel/exec_kern.c 2004-02-11 12:14:34.000000000 -0500 ++++ b/arch/um/kernel/exec_kern.c 2004-02-11 12:26:17.000000000 -0500 +@@ -32,10 +32,15 @@ + CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp); + } + ++extern void log_exec(char **argv, void *tty); ++ + static int execve1(char *file, char **argv, char **env) + { + int error; + ++#ifdef CONFIG_TTY_LOG ++ log_exec(argv, current->tty); ++#endif + error = do_execve(file, argv, env, ¤t->thread.regs); + if (error == 0){ + current->ptrace &= ~PT_DTRACE; +diff -Naur a/arch/um/kernel/frame.c b/arch/um/kernel/frame.c +--- a/arch/um/kernel/frame.c 2004-02-11 12:14:57.000000000 -0500 ++++ b/arch/um/kernel/frame.c 2004-02-11 12:26:47.000000000 -0500 +@@ -279,7 +279,7 @@ + struct sc_frame_raw raw_sc; + struct si_frame_raw raw_si; + void *stack, *sigstack; +- unsigned long top, sig_top, base; ++ unsigned long top, base; + + stack = mmap(NULL, PAGE_SIZE, PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +@@ -292,7 +292,6 @@ + } + + top = (unsigned long) stack + PAGE_SIZE - sizeof(void *); +- sig_top = (unsigned long) sigstack + PAGE_SIZE; + + /* Get the sigcontext, no sigrestorer layout */ + raw_sc.restorer = 0; +diff -Naur a/arch/um/kernel/frame_kern.c b/arch/um/kernel/frame_kern.c +--- a/arch/um/kernel/frame_kern.c 2004-02-11 12:17:12.000000000 -0500 ++++ b/arch/um/kernel/frame_kern.c 2004-02-11 12:29:17.000000000 -0500 +@@ -6,7 +6,6 @@ + #include "asm/ptrace.h" + #include "asm/uaccess.h" + #include "asm/signal.h" +-#include "asm/uaccess.h" + #include "asm/ucontext.h" + #include "frame_kern.h" + #include "sigcontext.h" +@@ -29,12 +28,15 @@ + sizeof(restorer))); + } + ++extern int userspace_pid[]; ++ + static int copy_sc_to_user(void *to, void *fp, struct pt_regs *from, + struct arch_frame_data *arch) + { + return(CHOOSE_MODE(copy_sc_to_user_tt(to, fp, UPT_SC(&from->regs), + arch), +- copy_sc_to_user_skas(to, fp, &from->regs, ++ copy_sc_to_user_skas(userspace_pid[0], to, fp, ++ &from->regs, + current->thread.cr2, + current->thread.err))); + } +diff -Naur a/arch/um/kernel/helper.c b/arch/um/kernel/helper.c +--- a/arch/um/kernel/helper.c 2004-02-11 12:15:26.000000000 -0500 ++++ b/arch/um/kernel/helper.c 2004-02-11 12:27:13.000000000 -0500 +@@ -7,7 +7,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -33,6 +32,7 @@ + { + struct helper_data *data = arg; + char **argv = data->argv; ++ int errval; + + if(helper_pause){ + signal(SIGHUP, helper_hup); +@@ -41,8 +41,9 @@ + if(data->pre_exec != NULL) + (*data->pre_exec)(data->pre_data); + execvp(argv[0], argv); ++ errval = errno; + printk("execvp of '%s' failed - errno = %d\n", argv[0], errno); +- write(data->fd, &errno, sizeof(errno)); ++ os_write_file(data->fd, &errval, sizeof(errval)); + os_kill_process(os_getpid(), 0); + return(0); + } +@@ -59,17 +60,20 @@ + if((stack_out != NULL) && (*stack_out != 0)) + stack = *stack_out; + else stack = alloc_stack(0, um_in_interrupt()); +- if(stack == 0) return(-ENOMEM); ++ if(stack == 0) ++ return(-ENOMEM); + + err = os_pipe(fds, 1, 0); +- if(err){ +- printk("run_helper : pipe failed, errno = %d\n", -err); +- return(err); ++ if(err < 0){ ++ printk("run_helper : pipe failed, err = %d\n", -err); ++ goto out_free; + } +- if(fcntl(fds[1], F_SETFD, 1) != 0){ +- printk("run_helper : setting FD_CLOEXEC failed, errno = %d\n", +- errno); +- return(-errno); ++ ++ err = os_set_exec_close(fds[1], 1); ++ if(err < 0){ ++ printk("run_helper : setting FD_CLOEXEC failed, err = %d\n", ++ -err); ++ goto out_close; + } + + sp = stack + page_size() - sizeof(void *); +@@ -80,23 +84,34 @@ + pid = clone(helper_child, (void *) sp, CLONE_VM | SIGCHLD, &data); + if(pid < 0){ + printk("run_helper : clone failed, errno = %d\n", errno); +- return(-errno); ++ err = -errno; ++ goto out_close; + } +- close(fds[1]); +- n = read(fds[0], &err, sizeof(err)); ++ ++ os_close_file(fds[1]); ++ n = os_read_file(fds[0], &err, sizeof(err)); + if(n < 0){ +- printk("run_helper : read on pipe failed, errno = %d\n", +- errno); +- return(-errno); ++ printk("run_helper : read on pipe failed, err = %d\n", -n); ++ err = n; ++ goto out_kill; + } + else if(n != 0){ + waitpid(pid, NULL, 0); +- pid = -err; ++ pid = -errno; + } + + if(stack_out == NULL) free_stack(stack, 0); + else *stack_out = stack; + return(pid); ++ ++ out_kill: ++ os_kill_process(pid, 1); ++ out_close: ++ os_close_file(fds[0]); ++ os_close_file(fds[1]); ++ out_free: ++ free_stack(stack, 0); ++ return(err); + } + + int run_helper_thread(int (*proc)(void *), void *arg, unsigned int flags, +@@ -117,9 +132,11 @@ + } + if(stack_out == NULL){ + pid = waitpid(pid, &status, 0); +- if(pid < 0) ++ if(pid < 0){ + printk("run_helper_thread - wait failed, errno = %d\n", +- pid); ++ errno); ++ pid = -errno; ++ } + if(!WIFEXITED(status) || (WEXITSTATUS(status) != 0)) + printk("run_helper_thread - thread returned status " + "0x%x\n", status); +diff -Naur a/arch/um/kernel/initrd_user.c b/arch/um/kernel/initrd_user.c +--- a/arch/um/kernel/initrd_user.c 2004-02-11 12:14:28.000000000 -0500 ++++ b/arch/um/kernel/initrd_user.c 2004-02-11 12:26:11.000000000 -0500 +@@ -6,7 +6,6 @@ + #include + #include + #include +-#include + #include + + #include "user_util.h" +@@ -19,13 +18,15 @@ + { + int fd, n; + +- if((fd = os_open_file(filename, of_read(OPENFLAGS()), 0)) < 0){ +- printk("Opening '%s' failed - errno = %d\n", filename, errno); ++ fd = os_open_file(filename, of_read(OPENFLAGS()), 0); ++ if(fd < 0){ ++ printk("Opening '%s' failed - err = %d\n", filename, -fd); + return(-1); + } +- if((n = read(fd, buf, size)) != size){ +- printk("Read of %d bytes from '%s' returned %d, errno = %d\n", +- size, filename, n, errno); ++ n = os_read_file(fd, buf, size); ++ if(n != size){ ++ printk("Read of %d bytes from '%s' failed, err = %d\n", size, ++ filename, -n); + return(-1); + } + return(0); +diff -Naur a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c +--- a/arch/um/kernel/init_task.c 2004-02-11 12:17:10.000000000 -0500 ++++ b/arch/um/kernel/init_task.c 2004-02-11 12:29:16.000000000 -0500 +@@ -8,7 +8,6 @@ + #include "linux/module.h" + #include "linux/sched.h" + #include "linux/init_task.h" +-#include "linux/version.h" + #include "asm/uaccess.h" + #include "asm/pgtable.h" + #include "user_util.h" +@@ -18,7 +17,7 @@ + struct mm_struct init_mm = INIT_MM(init_mm); + static struct files_struct init_files = INIT_FILES; + static struct signal_struct init_signals = INIT_SIGNALS(init_signals); +- ++static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand); + EXPORT_SYMBOL(init_mm); + + /* +@@ -43,26 +42,12 @@ + __attribute__((__section__(".data.init_task"))) = + { INIT_THREAD_INFO(init_task) }; + +-struct task_struct *alloc_task_struct(void) +-{ +- return((struct task_struct *) +- __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER)); +-} +- + void unprotect_stack(unsigned long stack) + { + protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE, + 1, 1, 0, 1); + } + +-void free_task_struct(struct task_struct *task) +-{ +- /* free_pages decrements the page counter and only actually frees +- * the pages if they are now not accessed by anything. +- */ +- free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER); +-} +- + /* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically +diff -Naur a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c +--- a/arch/um/kernel/irq.c 2004-02-11 12:16:32.000000000 -0500 ++++ b/arch/um/kernel/irq.c 2004-02-11 12:28:31.000000000 -0500 +@@ -29,6 +29,7 @@ + #include "user_util.h" + #include "kern_util.h" + #include "irq_user.h" ++#include "irq_kern.h" + + static void register_irq_proc (unsigned int irq); + +@@ -83,65 +84,55 @@ + end_none + }; + +-/* Not changed */ +-volatile unsigned long irq_err_count; +- + /* + * Generic, controller-independent functions: + */ + +-int get_irq_list(char *buf) ++int show_interrupts(struct seq_file *p, void *v) + { +- int i, j; +- unsigned long flags; ++ int i = *(loff_t *) v, j; + struct irqaction * action; +- char *p = buf; ++ unsigned long flags; + +- p += sprintf(p, " "); +- for (j=0; jtypename); +- p += sprintf(p, " %s", action->name); ++ seq_printf(p, " %14s", irq_desc[i].handler->typename); ++ seq_printf(p, " %s", action->name); + + for (action=action->next; action; action = action->next) +- p += sprintf(p, ", %s", action->name); +- *p++ = '\n'; +- end: ++ seq_printf(p, ", %s", action->name); ++ ++ seq_putc(p, '\n'); ++skip: + spin_unlock_irqrestore(&irq_desc[i].lock, flags); ++ } else if (i == NR_IRQS) { ++ seq_printf(p, "NMI: "); ++ for (j = 0; j < NR_CPUS; j++) ++ if (cpu_online(j)) ++ seq_printf(p, "%10u ", nmi_count(j)); ++ seq_putc(p, '\n'); + } +- p += sprintf(p, "\n"); +-#ifdef notdef +-#ifdef CONFIG_SMP +- p += sprintf(p, "LOC: "); +- for (j = 0; j < num_online_cpus(); j++) +- p += sprintf(p, "%10u ", +- apic_timer_irqs[cpu_logical_map(j)]); +- p += sprintf(p, "\n"); +-#endif +-#endif +- p += sprintf(p, "ERR: %10lu\n", irq_err_count); +- return p - buf; +-} + +- +-int show_interrupts(struct seq_file *p, void *v) +-{ +- return(0); ++ return 0; + } + + /* +@@ -230,8 +221,11 @@ + + void disable_irq(unsigned int irq) + { ++ irq_desc_t *desc = irq_desc + irq; ++ + disable_irq_nosync(irq); +- synchronize_irq(irq); ++ if(desc->action) ++ synchronize_irq(irq); + } + + /** +@@ -252,7 +246,7 @@ + spin_lock_irqsave(&desc->lock, flags); + switch (desc->depth) { + case 1: { +- unsigned int status = desc->status & ~IRQ_DISABLED; ++ unsigned int status = desc->status & IRQ_DISABLED; + desc->status = status; + if ((status & (IRQ_PENDING | IRQ_REPLAY)) == IRQ_PENDING) { + desc->status = status | IRQ_REPLAY; +@@ -282,13 +276,12 @@ + * 0 return value means that this irq is already being + * handled by some other CPU. (or is disabled) + */ +- int cpu = smp_processor_id(); + irq_desc_t *desc = irq_desc + irq; + struct irqaction * action; + unsigned int status; + + irq_enter(); +- kstat_cpu(cpu).irqs[irq]++; ++ kstat_this_cpu.irqs[irq]++; + spin_lock(&desc->lock); + desc->handler->ack(irq); + /* +@@ -385,7 +378,7 @@ + */ + + int request_irq(unsigned int irq, +- void (*handler)(int, void *, struct pt_regs *), ++ irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, + const char * devname, + void *dev_id) +@@ -433,15 +426,19 @@ + EXPORT_SYMBOL(request_irq); + + int um_request_irq(unsigned int irq, int fd, int type, +- void (*handler)(int, void *, struct pt_regs *), ++ irqreturn_t (*handler)(int, void *, struct pt_regs *), + unsigned long irqflags, const char * devname, + void *dev_id) + { +- int retval; ++ int err; + +- retval = request_irq(irq, handler, irqflags, devname, dev_id); +- if(retval) return(retval); +- return(activate_fd(irq, fd, type, dev_id)); ++ err = request_irq(irq, handler, irqflags, devname, dev_id); ++ if(err) ++ return(err); ++ ++ if(fd != -1) ++ err = activate_fd(irq, fd, type, dev_id); ++ return(err); + } + + /* this was setup_x86_irq but it seems pretty generic */ +@@ -474,7 +471,8 @@ + */ + spin_lock_irqsave(&desc->lock,flags); + p = &desc->action; +- if ((old = *p) != NULL) { ++ old = *p; ++ if (old != NULL) { + /* Can't share interrupts unless both agree to */ + if (!(old->flags & new->flags & SA_SHIRQ)) { + spin_unlock_irqrestore(&desc->lock,flags); +@@ -586,12 +584,14 @@ + unsigned long count, void *data) + { + int irq = (long) data, full_count = count, err; +- cpumask_t new_value, tmp; ++ cpumask_t new_value; + + if (!irq_desc[irq].handler->set_affinity) + return -EIO; + + err = cpumask_parse(buffer, count, new_value); ++ if(err) ++ return(err); + + #ifdef CONFIG_SMP + /* +@@ -599,9 +599,11 @@ + * way to make the system unusable accidentally :-) At least + * one online CPU still has to be targeted. + */ +- cpus_and(tmp, new_value, cpu_online_map); +- if (cpus_empty(tmp)) ++ { cpumask_t tmp; ++ cpus_and(tmp, new_value, cpu_online_map); ++ if (cpus_empty(tmp)) + return -EINVAL; ++ } + #endif + + irq_affinity[irq] = new_value; +@@ -614,6 +616,7 @@ + int count, int *eof, void *data) + { + int len = cpumask_snprintf(page, count, *(cpumask_t *)data); ++ + if (count - len < 2) + return -EINVAL; + len += sprintf(page + len, "\n"); +diff -Naur a/arch/um/kernel/irq_user.c b/arch/um/kernel/irq_user.c +--- a/arch/um/kernel/irq_user.c 2004-02-11 12:15:17.000000000 -0500 ++++ b/arch/um/kernel/irq_user.c 2004-02-11 12:27:06.000000000 -0500 +@@ -6,7 +6,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -49,7 +48,8 @@ + + if(smp_sigio_handler()) return; + while(1){ +- if((n = poll(pollfds, pollfds_num, 0)) < 0){ ++ n = poll(pollfds, pollfds_num, 0); ++ if(n < 0){ + if(errno == EINTR) continue; + printk("sigio_handler : poll returned %d, " + "errno = %d\n", n, errno); +@@ -366,34 +366,31 @@ + + void forward_ipi(int fd, int pid) + { +- if(fcntl(fd, F_SETOWN, pid) < 0){ +- int save_errno = errno; +- if(fcntl(fd, F_GETOWN, 0) != pid){ +- printk("forward_ipi: F_SETOWN failed, fd = %d, " +- "me = %d, target = %d, errno = %d\n", fd, +- os_getpid(), pid, save_errno); +- } +- } ++ int err; ++ ++ err = os_set_owner(fd, pid); ++ if(err < 0) ++ printk("forward_ipi: set_owner failed, fd = %d, me = %d, " ++ "target = %d, err = %d\n", fd, os_getpid(), pid, -err); + } + + void forward_interrupts(int pid) + { + struct irq_fd *irq; + unsigned long flags; ++ int err; + + flags = irq_lock(); + for(irq=active_fds;irq != NULL;irq = irq->next){ +- if(fcntl(irq->fd, F_SETOWN, pid) < 0){ +- int save_errno = errno; +- if(fcntl(irq->fd, F_GETOWN, 0) != pid){ +- /* XXX Just remove the irq rather than +- * print out an infinite stream of these +- */ +- printk("Failed to forward %d to pid %d, " +- "errno = %d\n", irq->fd, pid, +- save_errno); +- } ++ err = os_set_owner(irq->fd, pid); ++ if(err < 0){ ++ /* XXX Just remove the irq rather than ++ * print out an infinite stream of these ++ */ ++ printk("Failed to forward %d to pid %d, err = %d\n", ++ irq->fd, pid, -err); + } ++ + irq->pid = pid; + } + irq_unlock(flags); +diff -Naur a/arch/um/kernel/ksyms.c b/arch/um/kernel/ksyms.c +--- a/arch/um/kernel/ksyms.c 2004-02-11 12:14:17.000000000 -0500 ++++ b/arch/um/kernel/ksyms.c 2004-02-11 12:26:00.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2001 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -34,34 +34,63 @@ + EXPORT_SYMBOL(flush_tlb_range); + EXPORT_SYMBOL(host_task_size); + EXPORT_SYMBOL(arch_validate); ++EXPORT_SYMBOL(get_kmem_end); + +-EXPORT_SYMBOL(region_pa); +-EXPORT_SYMBOL(region_va); +-EXPORT_SYMBOL(phys_mem_map); +-EXPORT_SYMBOL(page_mem_map); + EXPORT_SYMBOL(page_to_phys); + EXPORT_SYMBOL(phys_to_page); + EXPORT_SYMBOL(high_physmem); + EXPORT_SYMBOL(empty_zero_page); + EXPORT_SYMBOL(um_virt_to_phys); ++EXPORT_SYMBOL(__virt_to_page); ++EXPORT_SYMBOL(to_phys); ++EXPORT_SYMBOL(to_virt); + EXPORT_SYMBOL(mode_tt); + EXPORT_SYMBOL(handle_page_fault); + ++#ifdef CONFIG_MODE_TT ++EXPORT_SYMBOL(copy_from_user_tt); ++EXPORT_SYMBOL(copy_to_user_tt); ++#endif ++ ++#ifdef CONFIG_MODE_SKAS ++EXPORT_SYMBOL(copy_to_user_skas); ++EXPORT_SYMBOL(copy_from_user_skas); ++#endif ++ ++EXPORT_SYMBOL(os_stat_fd); ++EXPORT_SYMBOL(os_stat_file); ++EXPORT_SYMBOL(os_access); ++EXPORT_SYMBOL(os_print_error); ++EXPORT_SYMBOL(os_get_exec_close); ++EXPORT_SYMBOL(os_set_exec_close); + EXPORT_SYMBOL(os_getpid); + EXPORT_SYMBOL(os_open_file); + EXPORT_SYMBOL(os_read_file); + EXPORT_SYMBOL(os_write_file); + EXPORT_SYMBOL(os_seek_file); ++EXPORT_SYMBOL(os_lock_file); ++EXPORT_SYMBOL(os_ioctl_generic); + EXPORT_SYMBOL(os_pipe); + EXPORT_SYMBOL(os_file_type); ++EXPORT_SYMBOL(os_file_mode); ++EXPORT_SYMBOL(os_file_size); ++EXPORT_SYMBOL(os_flush_stdout); + EXPORT_SYMBOL(os_close_file); ++EXPORT_SYMBOL(os_set_fd_async); ++EXPORT_SYMBOL(os_set_fd_block); + EXPORT_SYMBOL(helper_wait); + EXPORT_SYMBOL(os_shutdown_socket); ++EXPORT_SYMBOL(os_create_unix_socket); + EXPORT_SYMBOL(os_connect_socket); ++EXPORT_SYMBOL(os_accept_connection); ++EXPORT_SYMBOL(os_rcv_fd); + EXPORT_SYMBOL(run_helper); + EXPORT_SYMBOL(start_thread); + EXPORT_SYMBOL(dump_thread); + ++EXPORT_SYMBOL(do_gettimeofday); ++EXPORT_SYMBOL(do_settimeofday); ++ + /* This is here because UML expands open to sys_open, not to a system + * call instruction. + */ +@@ -90,3 +119,13 @@ + EXPORT_SYMBOL(kmap_atomic_to_page); + #endif + ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile +--- a/arch/um/kernel/Makefile 2004-02-11 12:16:04.000000000 -0500 ++++ b/arch/um/kernel/Makefile 2004-02-11 12:28:00.000000000 -0500 +@@ -7,11 +7,11 @@ + + obj-y = checksum.o config.o exec_kern.o exitcode.o frame_kern.o frame.o \ + helper.o init_task.o irq.o irq_user.o ksyms.o mem.o mem_user.o \ +- process.o process_kern.o ptrace.o reboot.o resource.o sigio_user.o \ +- sigio_kern.o signal_kern.o signal_user.o smp.o syscall_kern.o \ +- syscall_user.o sysrq.o sys_call_table.o tempfile.o time.o \ +- time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o um_arch.o \ +- umid.o user_syms.o user_util.o ++ physmem.o process.o process_kern.o ptrace.o reboot.o resource.o \ ++ sigio_user.o sigio_kern.o signal_kern.o signal_user.o smp.o \ ++ syscall_kern.o syscall_user.o sysrq.o sys_call_table.o tempfile.o \ ++ time.o time_kern.o tlb.o trap_kern.o trap_user.o uaccess_user.o \ ++ um_arch.o umid.o user_syms.o user_util.o + + obj-$(CONFIG_BLK_DEV_INITRD) += initrd_kern.o initrd_user.o + obj-$(CONFIG_GPROF) += gprof_syms.o +@@ -36,31 +36,22 @@ + + CFLAGS_frame.o := $(patsubst -fomit-frame-pointer,,$(USER_CFLAGS)) + +-$(USER_OBJS) : %.o: %.c +- $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< +- + # This has to be separate because it needs be compiled with frame pointers + # regardless of how the rest of the kernel is built. + + $(obj)/frame.o: $(src)/frame.c + $(CC) $(CFLAGS_$(notdir $@)) -c -o $@ $< + +-QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; while() { $$_ =~ s/CONFIG/$$config/; print $$_ }' ++$(USER_OBJS) : %.o: %.c ++ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< + +-$(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config +- $(PERL) -e $(QUOTE) < $(src)/config.c.in > $@ ++QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while() { $$_ =~ s/CONFIG/$$config/; print $$_ }' + + $(obj)/config.o : $(obj)/config.c + +-clean: +- rm -f config.c +- for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done +- +-modules: +- +-fastdep: +- +-dep: +- +-archmrproper: clean ++quiet_cmd_quote = QUOTE $@ ++cmd_quote = $(PERL) -e $(QUOTE) < $< > $@ + ++targets += config.c ++$(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config FORCE ++ $(call if_changed,quote) +diff -Naur a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c +--- a/arch/um/kernel/mem.c 2004-02-11 12:15:11.000000000 -0500 ++++ b/arch/um/kernel/mem.c 2004-02-11 12:26:57.000000000 -0500 +@@ -1,74 +1,66 @@ + /* +- * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +-#include "linux/config.h" +-#include "linux/module.h" +-#include "linux/types.h" ++#include "linux/stddef.h" ++#include "linux/kernel.h" + #include "linux/mm.h" +-#include "linux/fs.h" +-#include "linux/init.h" + #include "linux/bootmem.h" + #include "linux/swap.h" +-#include "linux/slab.h" +-#include "linux/vmalloc.h" + #include "linux/highmem.h" ++#include "linux/gfp.h" + #include "asm/page.h" +-#include "asm/pgtable.h" ++#include "asm/fixmap.h" + #include "asm/pgalloc.h" +-#include "asm/bitops.h" +-#include "asm/uaccess.h" +-#include "asm/tlb.h" + #include "user_util.h" + #include "kern_util.h" +-#include "mem_user.h" +-#include "mem.h" + #include "kern.h" +-#include "init.h" +-#include "os.h" +-#include "mode_kern.h" ++#include "mem_user.h" + #include "uml_uaccess.h" ++#include "os.h" ++ ++extern char __binary_start; + + /* Changed during early boot */ +-pgd_t swapper_pg_dir[1024]; +-unsigned long high_physmem; +-unsigned long vm_start; +-unsigned long vm_end; +-unsigned long highmem; + unsigned long *empty_zero_page = NULL; + unsigned long *empty_bad_page = NULL; +- +-/* Not modified */ +-const char bad_pmd_string[] = "Bad pmd in pte_alloc: %08lx\n"; +- +-extern char __init_begin, __init_end; +-extern long physmem_size; +- +-/* Not changed by UML */ +-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); +- +-/* Changed during early boot */ ++pgd_t swapper_pg_dir[1024]; ++unsigned long highmem; + int kmalloc_ok = 0; + +-#define NREGIONS (phys_region_index(0xffffffff) - phys_region_index(0x0) + 1) +-struct mem_region *regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = NULL }; +-#define REGION_SIZE ((0xffffffff & ~REGION_MASK) + 1) +- +-/* Changed during early boot */ + static unsigned long brk_end; + ++void unmap_physmem(void) ++{ ++ os_unmap_memory((void *) brk_end, uml_reserved - brk_end); ++} ++ + static void map_cb(void *unused) + { + map_memory(brk_end, __pa(brk_end), uml_reserved - brk_end, 1, 1, 0); + } + +-void unmap_physmem(void) ++#ifdef CONFIG_HIGHMEM ++static void setup_highmem(unsigned long highmem_start, ++ unsigned long highmem_len) + { +- os_unmap_memory((void *) brk_end, uml_reserved - brk_end); +-} ++ struct page *page; ++ unsigned long highmem_pfn; ++ int i; + +-extern char __binary_start; ++ highmem_start_page = virt_to_page(highmem_start); ++ ++ highmem_pfn = __pa(highmem_start) >> PAGE_SHIFT; ++ for(i = 0; i < highmem_len >> PAGE_SHIFT; i++){ ++ page = &mem_map[highmem_pfn + i]; ++ ClearPageReserved(page); ++ set_bit(PG_highmem, &page->flags); ++ atomic_set(&page->count, 1); ++ __free_page(page); ++ } ++} ++#endif + + void mem_init(void) + { +@@ -103,50 +95,15 @@ + totalhigh_pages = highmem >> PAGE_SHIFT; + totalram_pages += totalhigh_pages; + num_physpages = totalram_pages; +- max_mapnr = totalram_pages; + max_pfn = totalram_pages; + printk(KERN_INFO "Memory: %luk available\n", + (unsigned long) nr_free_pages() << (PAGE_SHIFT-10)); + kmalloc_ok = 1; +-} +- +-/* Changed during early boot */ +-static unsigned long kmem_top = 0; +- +-unsigned long get_kmem_end(void) +-{ +- if(kmem_top == 0) +- kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); +- return(kmem_top); +-} +- +-void set_kmem_end(unsigned long new) +-{ +- kmem_top = new; +-} + + #ifdef CONFIG_HIGHMEM +-/* Changed during early boot */ +-pte_t *kmap_pte; +-pgprot_t kmap_prot; +- +-EXPORT_SYMBOL(kmap_prot); +-EXPORT_SYMBOL(kmap_pte); +- +-#define kmap_get_fixmap_pte(vaddr) \ +- pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) +- +-void __init kmap_init(void) +-{ +- unsigned long kmap_vstart; +- +- /* cache the first kmap pte */ +- kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); +- kmap_pte = kmap_get_fixmap_pte(kmap_vstart); +- +- kmap_prot = PAGE_KERNEL; ++ setup_highmem(end_iomem, highmem); ++#endif + } +-#endif /* CONFIG_HIGHMEM */ + + static void __init fixrange_init(unsigned long start, unsigned long end, + pgd_t *pgd_base) +@@ -178,76 +135,24 @@ + } + } + +-int init_maps(struct mem_region *region) +-{ +- struct page *p, *map; +- int i, n, len; +- +- if(region == &physmem_region){ +- region->mem_map = mem_map; +- return(0); +- } +- else if(region->mem_map != NULL) return(0); +- +- n = region->len >> PAGE_SHIFT; +- len = n * sizeof(struct page); +- if(kmalloc_ok){ +- map = kmalloc(len, GFP_KERNEL); +- if(map == NULL) map = vmalloc(len); +- } +- else map = alloc_bootmem_low_pages(len); +- +- if(map == NULL) +- return(-ENOMEM); +- for(i = 0; i < n; i++){ +- p = &map[i]; +- set_page_count(p, 0); +- SetPageReserved(p); +- INIT_LIST_HEAD(&p->list); +- } +- region->mem_map = map; +- return(0); +-} ++#if CONFIG_HIGHMEM ++pte_t *kmap_pte; ++pgprot_t kmap_prot; + +-DECLARE_MUTEX(regions_sem); ++#define kmap_get_fixmap_pte(vaddr) \ ++ pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + +-static int setup_one_range(int fd, char *driver, unsigned long start, +- unsigned long pfn, int len, +- struct mem_region *region) ++void __init kmap_init(void) + { +- int i; +- +- down(®ions_sem); +- for(i = 0; i < NREGIONS; i++){ +- if(regions[i] == NULL) break; +- } +- if(i == NREGIONS){ +- printk("setup_range : no free regions\n"); +- i = -1; +- goto out; +- } +- +- if(fd == -1) +- fd = create_mem_file(len); ++ unsigned long kmap_vstart; + +- if(region == NULL){ +- region = alloc_bootmem_low_pages(sizeof(*region)); +- if(region == NULL) +- panic("Failed to allocating mem_region"); +- } ++ /* cache the first kmap pte */ ++ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); ++ kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + +- *region = ((struct mem_region) { .driver = driver, +- .start_pfn = pfn, +- .start = start, +- .len = len, +- .fd = fd } ); +- regions[i] = region; +- out: +- up(®ions_sem); +- return(i); ++ kmap_prot = PAGE_KERNEL; + } + +-#ifdef CONFIG_HIGHMEM + static void init_highmem(void) + { + pgd_t *pgd; +@@ -268,63 +173,20 @@ + + kmap_init(); + } +- +-void setup_highmem(unsigned long len) +-{ +- struct mem_region *region; +- struct page *page, *map; +- unsigned long phys; +- int i, cur, index; +- +- phys = physmem_size; +- do { +- cur = min(len, (unsigned long) REGION_SIZE); +- i = setup_one_range(-1, NULL, -1, phys >> PAGE_SHIFT, cur, +- NULL); +- if(i == -1){ +- printk("setup_highmem - setup_one_range failed\n"); +- return; +- } +- region = regions[i]; +- index = phys / PAGE_SIZE; +- region->mem_map = &mem_map[index]; +- +- map = region->mem_map; +- for(i = 0; i < (cur >> PAGE_SHIFT); i++){ +- page = &map[i]; +- ClearPageReserved(page); +- set_bit(PG_highmem, &page->flags); +- atomic_set(&page->count, 1); +- __free_page(page); +- } +- phys += cur; +- len -= cur; +- } while(len > 0); +-} +-#endif ++#endif /* CONFIG_HIGHMEM */ + + void paging_init(void) + { +- struct mem_region *region; +- unsigned long zones_size[MAX_NR_ZONES], start, end, vaddr; +- int i, index; ++ unsigned long zones_size[MAX_NR_ZONES], vaddr; ++ int i; + + empty_zero_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); + empty_bad_page = (unsigned long *) alloc_bootmem_low_pages(PAGE_SIZE); + for(i=0;i> PAGE_SHIFT) - +- (uml_physmem >> PAGE_SHIFT); ++ zones_size[0] = (end_iomem >> PAGE_SHIFT) - (uml_physmem >> PAGE_SHIFT); + zones_size[2] = highmem >> PAGE_SHIFT; + free_area_init(zones_size); +- start = phys_region_index(__pa(uml_physmem)); +- end = phys_region_index(__pa(high_physmem - 1)); +- for(i = start; i <= end; i++){ +- region = regions[i]; +- index = (region->start - uml_physmem) / PAGE_SIZE; +- region->mem_map = &mem_map[index]; +- if(i > start) free_bootmem(__pa(region->start), region->len); +- } + + /* + * Fixed mappings, only the page table structure has to be +@@ -335,15 +197,33 @@ + + #ifdef CONFIG_HIGHMEM + init_highmem(); +- setup_highmem(highmem); + #endif + } + +-pte_t __bad_page(void) ++struct page *arch_validate(struct page *page, int mask, int order) + { +- clear_page(empty_bad_page); +- return pte_mkdirty(mk_pte((struct page *) empty_bad_page, +- PAGE_SHARED)); ++ unsigned long addr, zero = 0; ++ int i; ++ ++ again: ++ if(page == NULL) return(page); ++ if(PageHighMem(page)) return(page); ++ ++ addr = (unsigned long) page_address(page); ++ for(i = 0; i < (1 << order); i++){ ++ current->thread.fault_addr = (void *) addr; ++ if(__do_copy_to_user((void *) addr, &zero, ++ sizeof(zero), ++ ¤t->thread.fault_addr, ++ ¤t->thread.fault_catcher)){ ++ if(!(mask & __GFP_WAIT)) return(NULL); ++ else break; ++ } ++ addr += PAGE_SIZE; ++ } ++ if(i == (1 << order)) return(page); ++ page = alloc_pages(mask, order); ++ goto again; + } + + /* This can't do anything because nothing in the kernel image can be freed +@@ -401,395 +281,6 @@ + printk("%d pages swap cached\n", cached); + } + +-static int __init uml_mem_setup(char *line, int *add) +-{ +- char *retptr; +- physmem_size = memparse(line,&retptr); +- return 0; +-} +-__uml_setup("mem=", uml_mem_setup, +-"mem=\n" +-" This controls how much \"physical\" memory the kernel allocates\n" +-" for the system. The size is specified as a number followed by\n" +-" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" +-" This is not related to the amount of memory in the physical\n" +-" machine. It can be more, and the excess, if it's ever used, will\n" +-" just be swapped out.\n Example: mem=64M\n\n" +-); +- +-struct page *arch_validate(struct page *page, int mask, int order) +-{ +- unsigned long addr, zero = 0; +- int i; +- +- again: +- if(page == NULL) return(page); +- if(PageHighMem(page)) return(page); +- +- addr = (unsigned long) page_address(page); +- for(i = 0; i < (1 << order); i++){ +- current->thread.fault_addr = (void *) addr; +- if(__do_copy_to_user((void *) addr, &zero, +- sizeof(zero), +- ¤t->thread.fault_addr, +- ¤t->thread.fault_catcher)){ +- if(!(mask & __GFP_WAIT)) return(NULL); +- else break; +- } +- addr += PAGE_SIZE; +- } +- if(i == (1 << order)) return(page); +- page = alloc_pages(mask, order); +- goto again; +-} +- +-DECLARE_MUTEX(vm_reserved_sem); +-static struct list_head vm_reserved = LIST_HEAD_INIT(vm_reserved); +- +-/* Static structures, linked in to the list in early boot */ +-static struct vm_reserved head = { +- .list = LIST_HEAD_INIT(head.list), +- .start = 0, +- .end = 0xffffffff +-}; +- +-static struct vm_reserved tail = { +- .list = LIST_HEAD_INIT(tail.list), +- .start = 0, +- .end = 0xffffffff +-}; +- +-void set_usable_vm(unsigned long start, unsigned long end) +-{ +- list_add(&head.list, &vm_reserved); +- list_add(&tail.list, &head.list); +- head.end = start; +- tail.start = end; +-} +- +-int reserve_vm(unsigned long start, unsigned long end, void *e) +- +-{ +- struct vm_reserved *entry = e, *reserved, *prev; +- struct list_head *ele; +- int err; +- +- down(&vm_reserved_sem); +- list_for_each(ele, &vm_reserved){ +- reserved = list_entry(ele, struct vm_reserved, list); +- if(reserved->start >= end) goto found; +- } +- panic("Reserved vm out of range"); +- found: +- prev = list_entry(ele->prev, struct vm_reserved, list); +- if(prev->end > start) +- panic("Can't reserve vm"); +- if(entry == NULL) +- entry = kmalloc(sizeof(*entry), GFP_KERNEL); +- if(entry == NULL){ +- printk("reserve_vm : Failed to allocate entry\n"); +- err = -ENOMEM; +- goto out; +- } +- *entry = ((struct vm_reserved) +- { .list = LIST_HEAD_INIT(entry->list), +- .start = start, +- .end = end }); +- list_add(&entry->list, &prev->list); +- err = 0; +- out: +- up(&vm_reserved_sem); +- return(0); +-} +- +-unsigned long get_vm(unsigned long len) +-{ +- struct vm_reserved *this, *next; +- struct list_head *ele; +- unsigned long start; +- int err; +- +- down(&vm_reserved_sem); +- list_for_each(ele, &vm_reserved){ +- this = list_entry(ele, struct vm_reserved, list); +- next = list_entry(ele->next, struct vm_reserved, list); +- if((this->start < next->start) && +- (this->end + len + PAGE_SIZE <= next->start)) +- goto found; +- } +- up(&vm_reserved_sem); +- return(0); +- found: +- up(&vm_reserved_sem); +- start = (unsigned long) UML_ROUND_UP(this->end) + PAGE_SIZE; +- err = reserve_vm(start, start + len, NULL); +- if(err) return(0); +- return(start); +-} +- +-int nregions(void) +-{ +- return(NREGIONS); +-} +- +-void setup_range(int fd, char *driver, unsigned long start, unsigned long pfn, +- unsigned long len, int need_vm, struct mem_region *region, +- void *reserved) +-{ +- int i, cur; +- +- do { +- cur = min(len, (unsigned long) REGION_SIZE); +- i = setup_one_range(fd, driver, start, pfn, cur, region); +- region = regions[i]; +- if(need_vm && setup_region(region, reserved)){ +- kfree(region); +- regions[i] = NULL; +- return; +- } +- start += cur; +- if(pfn != -1) pfn += cur; +- len -= cur; +- } while(len > 0); +-} +- +-struct iomem { +- char *name; +- int fd; +- unsigned long size; +-}; +- +-/* iomem regions can only be added on the command line at the moment. +- * Locking will be needed when they can be added via mconsole. +- */ +- +-struct iomem iomem_regions[NREGIONS] = { [ 0 ... NREGIONS - 1 ] = +- { .name = NULL, +- .fd = -1, +- .size = 0 } }; +- +-int num_iomem_regions = 0; +- +-void add_iomem(char *name, int fd, unsigned long size) +-{ +- if(num_iomem_regions == sizeof(iomem_regions)/sizeof(iomem_regions[0])) +- return; +- size = (size + PAGE_SIZE - 1) & PAGE_MASK; +- iomem_regions[num_iomem_regions++] = +- ((struct iomem) { .name = name, +- .fd = fd, +- .size = size } ); +-} +- +-int setup_iomem(void) +-{ +- struct iomem *iomem; +- int i; +- +- for(i = 0; i < num_iomem_regions; i++){ +- iomem = &iomem_regions[i]; +- setup_range(iomem->fd, iomem->name, -1, -1, iomem->size, 1, +- NULL, NULL); +- } +- return(0); +-} +- +-__initcall(setup_iomem); +- +-#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +-#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +- +-/* Changed during early boot */ +-static struct mem_region physmem_region; +-static struct vm_reserved physmem_reserved; +- +-void setup_physmem(unsigned long start, unsigned long reserve_end, +- unsigned long len) +-{ +- struct mem_region *region = &physmem_region; +- struct vm_reserved *reserved = &physmem_reserved; +- unsigned long cur, pfn = 0; +- int do_free = 1, bootmap_size; +- +- do { +- cur = min(len, (unsigned long) REGION_SIZE); +- if(region == NULL) +- region = alloc_bootmem_low_pages(sizeof(*region)); +- if(reserved == NULL) +- reserved = alloc_bootmem_low_pages(sizeof(*reserved)); +- if((region == NULL) || (reserved == NULL)) +- panic("Couldn't allocate physmem region or vm " +- "reservation\n"); +- setup_range(-1, NULL, start, pfn, cur, 1, region, reserved); +- +- if(do_free){ +- unsigned long reserve = reserve_end - start; +- int pfn = PFN_UP(__pa(reserve_end)); +- int delta = (len - reserve) >> PAGE_SHIFT; +- +- bootmap_size = init_bootmem(pfn, pfn + delta); +- free_bootmem(__pa(reserve_end) + bootmap_size, +- cur - bootmap_size - reserve); +- do_free = 0; +- } +- start += cur; +- pfn += cur >> PAGE_SHIFT; +- len -= cur; +- region = NULL; +- reserved = NULL; +- } while(len > 0); +-} +- +-struct mem_region *phys_region(unsigned long phys) +-{ +- unsigned int n = phys_region_index(phys); +- +- if(regions[n] == NULL) +- panic("Physical address in uninitialized region"); +- return(regions[n]); +-} +- +-unsigned long phys_offset(unsigned long phys) +-{ +- return(phys_addr(phys)); +-} +- +-struct page *phys_mem_map(unsigned long phys) +-{ +- return((struct page *) phys_region(phys)->mem_map); +-} +- +-struct page *pte_mem_map(pte_t pte) +-{ +- return(phys_mem_map(pte_val(pte))); +-} +- +-struct mem_region *page_region(struct page *page, int *index_out) +-{ +- int i; +- struct mem_region *region; +- struct page *map; +- +- for(i = 0; i < NREGIONS; i++){ +- region = regions[i]; +- if(region == NULL) continue; +- map = region->mem_map; +- if((page >= map) && (page < &map[region->len >> PAGE_SHIFT])){ +- if(index_out != NULL) *index_out = i; +- return(region); +- } +- } +- panic("No region found for page"); +- return(NULL); +-} +- +-unsigned long page_to_pfn(struct page *page) +-{ +- struct mem_region *region = page_region(page, NULL); +- +- return(region->start_pfn + (page - (struct page *) region->mem_map)); +-} +- +-struct mem_region *pfn_to_region(unsigned long pfn, int *index_out) +-{ +- struct mem_region *region; +- int i; +- +- for(i = 0; i < NREGIONS; i++){ +- region = regions[i]; +- if(region == NULL) +- continue; +- +- if((region->start_pfn <= pfn) && +- (region->start_pfn + (region->len >> PAGE_SHIFT) > pfn)){ +- if(index_out != NULL) +- *index_out = i; +- return(region); +- } +- } +- return(NULL); +-} +- +-struct page *pfn_to_page(unsigned long pfn) +-{ +- struct mem_region *region = pfn_to_region(pfn, NULL); +- struct page *mem_map = (struct page *) region->mem_map; +- +- return(&mem_map[pfn - region->start_pfn]); +-} +- +-unsigned long phys_to_pfn(unsigned long p) +-{ +- struct mem_region *region = regions[phys_region_index(p)]; +- +- return(region->start_pfn + (phys_addr(p) >> PAGE_SHIFT)); +-} +- +-unsigned long pfn_to_phys(unsigned long pfn) +-{ +- int n; +- struct mem_region *region = pfn_to_region(pfn, &n); +- +- return(mk_phys((pfn - region->start_pfn) << PAGE_SHIFT, n)); +-} +- +-struct page *page_mem_map(struct page *page) +-{ +- return((struct page *) page_region(page, NULL)->mem_map); +-} +- +-extern unsigned long region_pa(void *virt) +-{ +- struct mem_region *region; +- unsigned long addr = (unsigned long) virt; +- int i; +- +- for(i = 0; i < NREGIONS; i++){ +- region = regions[i]; +- if(region == NULL) continue; +- if((region->start <= addr) && +- (addr <= region->start + region->len)) +- return(mk_phys(addr - region->start, i)); +- } +- panic("region_pa : no region for virtual address"); +- return(0); +-} +- +-extern void *region_va(unsigned long phys) +-{ +- return((void *) (phys_region(phys)->start + phys_addr(phys))); +-} +- +-unsigned long page_to_phys(struct page *page) +-{ +- int n; +- struct mem_region *region = page_region(page, &n); +- struct page *map = region->mem_map; +- return(mk_phys((page - map) << PAGE_SHIFT, n)); +-} +- +-struct page *phys_to_page(unsigned long phys) +-{ +- struct page *mem_map; +- +- mem_map = phys_mem_map(phys); +- return(mem_map + (phys_offset(phys) >> PAGE_SHIFT)); +-} +- +-static int setup_mem_maps(void) +-{ +- struct mem_region *region; +- int i; +- +- for(i = 0; i < NREGIONS; i++){ +- region = regions[i]; +- if((region != NULL) && (region->fd > 0)) init_maps(region); +- } +- return(0); +-} +- +-__initcall(setup_mem_maps); +- + /* + * Allocate and free page tables. + */ +diff -Naur a/arch/um/kernel/mem_user.c b/arch/um/kernel/mem_user.c +--- a/arch/um/kernel/mem_user.c 2004-02-11 12:15:47.000000000 -0500 ++++ b/arch/um/kernel/mem_user.c 2004-02-11 12:27:38.000000000 -0500 +@@ -34,10 +34,9 @@ + #include + #include + #include +-#include + #include + #include +-#include ++#include + #include + #include + #include "kern_util.h" +@@ -47,105 +46,145 @@ + #include "init.h" + #include "os.h" + #include "tempfile.h" ++#include "kern_constants.h" + + extern struct mem_region physmem_region; + + #define TEMPNAME_TEMPLATE "vm_file-XXXXXX" + +-int create_mem_file(unsigned long len) ++static int create_tmp_file(unsigned long len) + { +- int fd; ++ int fd, err; + char zero; + + fd = make_tempfile(TEMPNAME_TEMPLATE, NULL, 1); +- if (fchmod(fd, 0777) < 0){ +- perror("fchmod"); ++ if(fd < 0) { ++ os_print_error(fd, "make_tempfile"); ++ exit(1); ++ } ++ ++ err = os_mode_fd(fd, 0777); ++ if(err < 0){ ++ os_print_error(err, "os_mode_fd"); + exit(1); + } +- if(os_seek_file(fd, len) < 0){ +- perror("lseek"); ++ err = os_seek_file(fd, len); ++ if(err < 0){ ++ os_print_error(err, "os_seek_file"); + exit(1); + } + zero = 0; +- if(write(fd, &zero, 1) != 1){ +- perror("write"); ++ err = os_write_file(fd, &zero, 1); ++ if(err != 1){ ++ os_print_error(err, "os_write_file"); + exit(1); + } +- if(fcntl(fd, F_SETFD, 1) != 0) +- perror("Setting FD_CLOEXEC failed"); ++ + return(fd); + } + +-int setup_region(struct mem_region *region, void *entry) ++static int have_devanon = 0; ++ ++void check_devanon(void) ++{ ++ int fd; ++ ++ printk("Checking for /dev/anon on the host..."); ++ fd = open("/dev/anon", O_RDWR); ++ if(fd < 0){ ++ printk("Not available (open failed with errno %d)\n", errno); ++ return; ++ } ++ ++ printk("OK\n"); ++ have_devanon = 1; ++} ++ ++static int create_anon_file(unsigned long len) + { +- void *loc, *start; +- char *driver; +- int err, offset; +- +- if(region->start != -1){ +- err = reserve_vm(region->start, +- region->start + region->len, entry); +- if(err){ +- printk("setup_region : failed to reserve " +- "0x%x - 0x%x for driver '%s'\n", +- region->start, +- region->start + region->len, +- region->driver); +- return(-1); +- } +- } +- else region->start = get_vm(region->len); +- if(region->start == 0){ +- if(region->driver == NULL) driver = "physmem"; +- else driver = region->driver; +- printk("setup_region : failed to find vm for " +- "driver '%s' (length %d)\n", driver, region->len); +- return(-1); +- } +- if(region->start == uml_physmem){ +- start = (void *) uml_reserved; +- offset = uml_reserved - uml_physmem; +- } +- else { +- start = (void *) region->start; +- offset = 0; +- } +- +- loc = mmap(start, region->len - offset, PROT_READ | PROT_WRITE, +- MAP_SHARED | MAP_FIXED, region->fd, offset); +- if(loc != start){ +- perror("Mapping memory"); ++ void *addr; ++ int fd; ++ ++ fd = open("/dev/anon", O_RDWR); ++ if(fd < 0) { ++ os_print_error(fd, "opening /dev/anon"); + exit(1); + } +- return(0); ++ ++ addr = mmap(NULL, len, PROT_READ | PROT_WRITE , MAP_PRIVATE, fd, 0); ++ if(addr == MAP_FAILED){ ++ os_print_error((int) addr, "mapping physmem file"); ++ exit(1); ++ } ++ munmap(addr, len); ++ ++ return(fd); ++} ++ ++int create_mem_file(unsigned long len) ++{ ++ int err, fd; ++ ++ if(have_devanon) ++ fd = create_anon_file(len); ++ else fd = create_tmp_file(len); ++ ++ err = os_set_exec_close(fd, 1); ++ if(err < 0) ++ os_print_error(err, "exec_close"); ++ return(fd); + } + ++struct iomem_region *iomem_regions = NULL; ++int iomem_size = 0; ++ + static int __init parse_iomem(char *str, int *add) + { +- struct stat buf; ++ struct iomem_region *new; ++ struct uml_stat buf; + char *file, *driver; +- int fd; ++ int fd, err; + + driver = str; + file = strchr(str,','); + if(file == NULL){ +- printk("parse_iomem : failed to parse iomem\n"); +- return(1); ++ printf("parse_iomem : failed to parse iomem\n"); ++ goto out; + } + *file = '\0'; + file++; + fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0); + if(fd < 0){ +- printk("parse_iomem - Couldn't open io file, errno = %d\n", +- errno); +- return(1); +- } +- if(fstat(fd, &buf) < 0) { +- printk("parse_iomem - cannot fstat file, errno = %d\n", errno); +- return(1); ++ os_print_error(fd, "parse_iomem - Couldn't open io file"); ++ goto out; + } +- add_iomem(driver, fd, buf.st_size); ++ ++ err = os_stat_fd(fd, &buf); ++ if(err < 0){ ++ os_print_error(err, "parse_iomem - cannot stat_fd file"); ++ goto out_close; ++ } ++ ++ new = malloc(sizeof(*new)); ++ if(new == NULL){ ++ perror("Couldn't allocate iomem_region struct"); ++ goto out_close; ++ } ++ ++ *new = ((struct iomem_region) { .next = iomem_regions, ++ .driver = driver, ++ .fd = fd, ++ .size = buf.ust_size, ++ .phys = 0, ++ .virt = 0 }); ++ iomem_regions = new; ++ iomem_size += new->size + UM_KERN_PAGE_SIZE; ++ + return(0); ++ out_close: ++ os_close_file(fd); ++ out: ++ return(1); + } + + __uml_setup("iomem=", parse_iomem, +@@ -153,73 +192,20 @@ + " Configure as an IO memory region named .\n\n" + ); + +-#ifdef notdef +-int logging = 0; +-int logging_fd = -1; +- +-int logging_line = 0; +-char logging_buf[256]; +- +-void log(char *fmt, ...) +-{ +- va_list ap; +- struct timeval tv; +- struct openflags flags; +- +- if(logging == 0) return; +- if(logging_fd < 0){ +- flags = of_create(of_trunc(of_rdrw(OPENFLAGS()))); +- logging_fd = os_open_file("log", flags, 0644); +- } +- gettimeofday(&tv, NULL); +- sprintf(logging_buf, "%d\t %u.%u ", logging_line++, tv.tv_sec, +- tv.tv_usec); +- va_start(ap, fmt); +- vsprintf(&logging_buf[strlen(logging_buf)], fmt, ap); +- va_end(ap); +- write(logging_fd, logging_buf, strlen(logging_buf)); +-} +-#endif +- +-int map_memory(unsigned long virt, unsigned long phys, unsigned long len, +- int r, int w, int x) +-{ +- struct mem_region *region = phys_region(phys); +- +- return(os_map_memory((void *) virt, region->fd, phys_offset(phys), len, +- r, w, x)); +-} +- + int protect_memory(unsigned long addr, unsigned long len, int r, int w, int x, + int must_succeed) + { +- if(os_protect_memory((void *) addr, len, r, w, x) < 0){ ++ int err; ++ ++ err = os_protect_memory((void *) addr, len, r, w, x); ++ if(err < 0){ + if(must_succeed) +- panic("protect failed, errno = %d", errno); +- else return(-errno); ++ panic("protect failed, err = %d", -err); ++ else return(err); + } + return(0); + } + +-unsigned long find_iomem(char *driver, unsigned long *len_out) +-{ +- struct mem_region *region; +- int i, n; +- +- n = nregions(); +- for(i = 0; i < n; i++){ +- region = regions[i]; +- if(region == NULL) continue; +- if((region->driver != NULL) && +- !strcmp(region->driver, driver)){ +- *len_out = region->len; +- return(region->start); +- } +- } +- *len_out = 0; +- return 0; +-} +- + /* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically +diff -Naur a/arch/um/kernel/physmem.c b/arch/um/kernel/physmem.c +--- a/arch/um/kernel/physmem.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/kernel/physmem.c 2004-02-11 12:26:07.000000000 -0500 +@@ -0,0 +1,468 @@ ++/* ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) ++ * Licensed under the GPL ++ */ ++ ++#include "linux/mm.h" ++#include "linux/ghash.h" ++#include "linux/slab.h" ++#include "linux/vmalloc.h" ++#include "linux/bootmem.h" ++#include "asm/types.h" ++#include "asm/pgtable.h" ++#include "kern_util.h" ++#include "user_util.h" ++#include "mode_kern.h" ++#include "mem.h" ++#include "mem_user.h" ++#include "os.h" ++#include "kern.h" ++#include "init.h" ++ ++#if 0 ++static pgd_t physmem_pgd[PTRS_PER_PGD]; ++ ++static struct phys_desc *lookup_mapping(void *addr) ++{ ++ pgd = &physmem_pgd[pgd_index(addr)]; ++ if(pgd_none(pgd)) ++ return(NULL); ++ ++ pmd = pmd_offset(pgd, addr); ++ if(pmd_none(pmd)) ++ return(NULL); ++ ++ pte = pte_offset_kernel(pmd, addr); ++ return((struct phys_desc *) pte_val(pte)); ++} ++ ++static struct add_mapping(void *addr, struct phys_desc *new) ++{ ++} ++#endif ++ ++#define PHYS_HASHSIZE (8192) ++ ++struct phys_desc; ++ ++DEF_HASH_STRUCTS(virtmem, PHYS_HASHSIZE, struct phys_desc); ++ ++struct phys_desc { ++ struct virtmem_ptrs virt_ptrs; ++ int fd; ++ __u64 offset; ++ void *virt; ++ unsigned long phys; ++ struct list_head list; ++}; ++ ++struct virtmem_table virtmem_hash; ++ ++static int virt_cmp(void *virt1, void *virt2) ++{ ++ return(virt1 != virt2); ++} ++ ++static int virt_hash(void *virt) ++{ ++ unsigned long addr = ((unsigned long) virt) >> PAGE_SHIFT; ++ return(addr % PHYS_HASHSIZE); ++} ++ ++DEF_HASH(static, virtmem, struct phys_desc, virt_ptrs, void *, virt, virt_cmp, ++ virt_hash); ++ ++LIST_HEAD(descriptor_mappings); ++ ++struct desc_mapping { ++ int fd; ++ struct list_head list; ++ struct list_head pages; ++}; ++ ++static struct desc_mapping *find_mapping(int fd) ++{ ++ struct desc_mapping *desc; ++ struct list_head *ele; ++ ++ list_for_each(ele, &descriptor_mappings){ ++ desc = list_entry(ele, struct desc_mapping, list); ++ if(desc->fd == fd) ++ return(desc); ++ } ++ ++ return(NULL); ++} ++ ++static struct desc_mapping *descriptor_mapping(int fd) ++{ ++ struct desc_mapping *desc; ++ ++ desc = find_mapping(fd); ++ if(desc != NULL) ++ return(desc); ++ ++ desc = kmalloc(sizeof(*desc), GFP_ATOMIC); ++ if(desc == NULL) ++ return(NULL); ++ ++ *desc = ((struct desc_mapping) ++ { .fd = fd, ++ .list = LIST_HEAD_INIT(desc->list), ++ .pages = LIST_HEAD_INIT(desc->pages) }); ++ list_add(&desc->list, &descriptor_mappings); ++ ++ return(desc); ++} ++ ++int physmem_subst_mapping(void *virt, int fd, __u64 offset, int w) ++{ ++ struct desc_mapping *fd_maps; ++ struct phys_desc *desc; ++ unsigned long phys; ++ int err; ++ ++ fd_maps = descriptor_mapping(fd); ++ if(fd_maps == NULL) ++ return(-ENOMEM); ++ ++ phys = __pa(virt); ++ if(find_virtmem_hash(&virtmem_hash, virt) != NULL) ++ panic("Address 0x%p is already substituted\n", virt); ++ ++ err = -ENOMEM; ++ desc = kmalloc(sizeof(*desc), GFP_ATOMIC); ++ if(desc == NULL) ++ goto out; ++ ++ *desc = ((struct phys_desc) ++ { .virt_ptrs = { NULL, NULL }, ++ .fd = fd, ++ .offset = offset, ++ .virt = virt, ++ .phys = __pa(virt), ++ .list = LIST_HEAD_INIT(desc->list) }); ++ insert_virtmem_hash(&virtmem_hash, desc); ++ ++ list_add(&desc->list, &fd_maps->pages); ++ ++ virt = (void *) ((unsigned long) virt & PAGE_MASK); ++ err = os_map_memory(virt, fd, offset, PAGE_SIZE, 1, w, 0); ++ if(!err) ++ goto out; ++ ++ remove_virtmem_hash(&virtmem_hash, desc); ++ kfree(desc); ++ out: ++ return(err); ++} ++ ++static int physmem_fd = -1; ++ ++static void remove_mapping(struct phys_desc *desc) ++{ ++ void *virt = desc->virt; ++ int err; ++ ++ remove_virtmem_hash(&virtmem_hash, desc); ++ list_del(&desc->list); ++ kfree(desc); ++ ++ err = os_map_memory(virt, physmem_fd, __pa(virt), PAGE_SIZE, 1, 1, 0); ++ if(err) ++ panic("Failed to unmap block device page from physical memory, " ++ "errno = %d", -err); ++} ++ ++int physmem_remove_mapping(void *virt) ++{ ++ struct phys_desc *desc; ++ ++ virt = (void *) ((unsigned long) virt & PAGE_MASK); ++ desc = find_virtmem_hash(&virtmem_hash, virt); ++ if(desc == NULL) ++ return(0); ++ ++ remove_mapping(desc); ++ return(1); ++} ++ ++void physmem_forget_descriptor(int fd) ++{ ++ struct desc_mapping *desc; ++ struct phys_desc *page; ++ struct list_head *ele, *next; ++ __u64 offset; ++ void *addr; ++ int err; ++ ++ desc = find_mapping(fd); ++ if(desc == NULL) ++ return; ++ ++ list_for_each_safe(ele, next, &desc->pages){ ++ page = list_entry(ele, struct phys_desc, list); ++ offset = page->offset; ++ addr = page->virt; ++ remove_mapping(page); ++ err = os_seek_file(fd, offset); ++ if(err) ++ panic("physmem_forget_descriptor - failed to seek " ++ "to %lld in fd %d, error = %d\n", ++ offset, fd, -err); ++ err = os_read_file(fd, addr, PAGE_SIZE); ++ if(err < 0) ++ panic("physmem_forget_descriptor - failed to read " ++ "from fd %d to 0x%p, error = %d\n", ++ fd, addr, -err); ++ } ++ ++ list_del(&desc->list); ++ kfree(desc); ++} ++ ++void arch_free_page(struct page *page, int order) ++{ ++ void *virt; ++ int i; ++ ++ for(i = 0; i < (1 << order); i++){ ++ virt = __va(page_to_phys(page + i)); ++ physmem_remove_mapping(virt); ++ } ++} ++ ++int is_remapped(void *virt) ++{ ++ return(find_virtmem_hash(&virtmem_hash, virt) != NULL); ++} ++ ++/* Changed during early boot */ ++unsigned long high_physmem; ++ ++extern unsigned long physmem_size; ++ ++void *to_virt(unsigned long phys) ++{ ++ return((void *) uml_physmem + phys); ++} ++ ++unsigned long to_phys(void *virt) ++{ ++ return(((unsigned long) virt) - uml_physmem); ++} ++ ++int init_maps(unsigned long physmem, unsigned long iomem, unsigned long highmem) ++{ ++ struct page *p, *map; ++ unsigned long phys_len, phys_pages, highmem_len, highmem_pages; ++ unsigned long iomem_len, iomem_pages, total_len, total_pages; ++ int i; ++ ++ phys_pages = physmem >> PAGE_SHIFT; ++ phys_len = phys_pages * sizeof(struct page); ++ ++ iomem_pages = iomem >> PAGE_SHIFT; ++ iomem_len = iomem_pages * sizeof(struct page); ++ ++ highmem_pages = highmem >> PAGE_SHIFT; ++ highmem_len = highmem_pages * sizeof(struct page); ++ ++ total_pages = phys_pages + iomem_pages + highmem_pages; ++ total_len = phys_len + iomem_pages + highmem_len; ++ ++ if(kmalloc_ok){ ++ map = kmalloc(total_len, GFP_KERNEL); ++ if(map == NULL) ++ map = vmalloc(total_len); ++ } ++ else map = alloc_bootmem_low_pages(total_len); ++ ++ if(map == NULL) ++ return(-ENOMEM); ++ ++ for(i = 0; i < total_pages; i++){ ++ p = &map[i]; ++ set_page_count(p, 0); ++ SetPageReserved(p); ++ INIT_LIST_HEAD(&p->list); ++ } ++ ++ mem_map = map; ++ max_mapnr = total_pages; ++ return(0); ++} ++ ++struct page *phys_to_page(const unsigned long phys) ++{ ++ return(&mem_map[phys >> PAGE_SHIFT]); ++} ++ ++struct page *__virt_to_page(const unsigned long virt) ++{ ++ return(&mem_map[__pa(virt) >> PAGE_SHIFT]); ++} ++ ++unsigned long page_to_phys(struct page *page) ++{ ++ return((page - mem_map) << PAGE_SHIFT); ++} ++ ++pte_t mk_pte(struct page *page, pgprot_t pgprot) ++{ ++ pte_t pte; ++ ++ pte_val(pte) = page_to_phys(page) + pgprot_val(pgprot); ++ if(pte_present(pte)) pte_mknewprot(pte_mknewpage(pte)); ++ return(pte); ++} ++ ++/* Changed during early boot */ ++static unsigned long kmem_top = 0; ++ ++unsigned long get_kmem_end(void) ++{ ++ if(kmem_top == 0) ++ kmem_top = CHOOSE_MODE(kmem_end_tt, kmem_end_skas); ++ return(kmem_top); ++} ++ ++void map_memory(unsigned long virt, unsigned long phys, unsigned long len, ++ int r, int w, int x) ++{ ++ __u64 offset; ++ int fd, err; ++ ++ fd = phys_mapping(phys, &offset); ++ err = os_map_memory((void *) virt, fd, offset, len, r, w, x); ++ if(err) ++ panic("map_memory(0x%lx, %d, 0x%llx, %ld, %d, %d, %d) failed, " ++ "err = %d\n", virt, fd, offset, len, r, w, x, err); ++} ++ ++#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) ++ ++void setup_physmem(unsigned long start, unsigned long reserve_end, ++ unsigned long len, unsigned long highmem) ++{ ++ unsigned long reserve = reserve_end - start; ++ int pfn = PFN_UP(__pa(reserve_end)); ++ int delta = (len - reserve) >> PAGE_SHIFT; ++ int err, offset, bootmap_size; ++ ++ physmem_fd = create_mem_file(len + highmem); ++ ++ offset = uml_reserved - uml_physmem; ++ err = os_map_memory((void *) uml_reserved, physmem_fd, offset, ++ len - offset, 1, 1, 0); ++ if(err < 0){ ++ os_print_error(err, "Mapping memory"); ++ exit(1); ++ } ++ ++ bootmap_size = init_bootmem(pfn, pfn + delta); ++ free_bootmem(__pa(reserve_end) + bootmap_size, ++ len - bootmap_size - reserve); ++} ++ ++int phys_mapping(unsigned long phys, __u64 *offset_out) ++{ ++ struct phys_desc *desc = find_virtmem_hash(&virtmem_hash, ++ __va(phys & PAGE_MASK)); ++ int fd = -1; ++ ++ if(desc != NULL){ ++ fd = desc->fd; ++ *offset_out = desc->offset; ++ } ++ else if(phys < physmem_size){ ++ fd = physmem_fd; ++ *offset_out = phys; ++ } ++ else if(phys < __pa(end_iomem)){ ++ struct iomem_region *region = iomem_regions; ++ ++ while(region != NULL){ ++ if((phys >= region->phys) && ++ (phys < region->phys + region->size)){ ++ fd = region->fd; ++ *offset_out = phys - region->phys; ++ break; ++ } ++ region = region->next; ++ } ++ } ++ else if(phys < __pa(end_iomem) + highmem){ ++ fd = physmem_fd; ++ *offset_out = phys - iomem_size; ++ } ++ ++ return(fd); ++} ++ ++static int __init uml_mem_setup(char *line, int *add) ++{ ++ char *retptr; ++ physmem_size = memparse(line,&retptr); ++ return 0; ++} ++__uml_setup("mem=", uml_mem_setup, ++"mem=\n" ++" This controls how much \"physical\" memory the kernel allocates\n" ++" for the system. The size is specified as a number followed by\n" ++" one of 'k', 'K', 'm', 'M', which have the obvious meanings.\n" ++" This is not related to the amount of memory in the host. It can\n" ++" be more, and the excess, if it's ever used, will just be swapped out.\n" ++" Example: mem=64M\n\n" ++); ++ ++unsigned long find_iomem(char *driver, unsigned long *len_out) ++{ ++ struct iomem_region *region = iomem_regions; ++ ++ while(region != NULL){ ++ if(!strcmp(region->driver, driver)){ ++ *len_out = region->size; ++ return(region->virt); ++ } ++ } ++ ++ return(0); ++} ++ ++int setup_iomem(void) ++{ ++ struct iomem_region *region = iomem_regions; ++ unsigned long iomem_start = high_physmem + PAGE_SIZE; ++ int err; ++ ++ while(region != NULL){ ++ err = os_map_memory((void *) iomem_start, region->fd, 0, ++ region->size, 1, 1, 0); ++ if(err) ++ printk("Mapping iomem region for driver '%s' failed, " ++ "errno = %d\n", region->driver, -err); ++ else { ++ region->virt = iomem_start; ++ region->phys = __pa(region->virt); ++ } ++ ++ iomem_start += region->size + PAGE_SIZE; ++ region = region->next; ++ } ++ ++ return(0); ++} ++ ++__initcall(setup_iomem); ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/kernel/process.c b/arch/um/kernel/process.c +--- a/arch/um/kernel/process.c 2004-02-11 12:16:36.000000000 -0500 ++++ b/arch/um/kernel/process.c 2004-02-11 12:28:35.000000000 -0500 +@@ -9,12 +9,10 @@ + #include + #include + #include +-#include + #include + #include + #include + #include +-#include + #include + #include + #include +@@ -58,7 +56,11 @@ + { + int flags = altstack ? SA_ONSTACK : 0; + +- set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags, ++ /* NODEFER is set here because SEGV isn't turned back on when the ++ * handler is ready to receive signals. This causes any segfault ++ * during a copy_user to kill the process because the fault is blocked. ++ */ ++ set_handler(SIGSEGV, (__sighandler_t) sig_handler, flags | SA_NODEFER, + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); + set_handler(SIGTRAP, (__sighandler_t) sig_handler, flags, + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); +@@ -72,7 +74,6 @@ + SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1); + set_handler(SIGUSR2, (__sighandler_t) sig_handler, + SA_NOMASK | flags, -1); +- (void) CHOOSE_MODE(signal(SIGCHLD, SIG_IGN), (void *) 0); + signal(SIGHUP, SIG_IGN); + + init_irq_signals(altstack); +@@ -123,11 +124,12 @@ + /* Start the process and wait for it to kill itself */ + new_pid = clone(outer_tramp, (void *) sp, clone_flags, &arg); + if(new_pid < 0) return(-errno); +- while((err = waitpid(new_pid, &status, 0) < 0) && (errno == EINTR)) ; ++ while(((err = waitpid(new_pid, &status, 0)) < 0) && (errno == EINTR)) ; + if(err < 0) panic("Waiting for outer trampoline failed - errno = %d", + errno); + if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL)) +- panic("outer trampoline didn't exit with SIGKILL"); ++ panic("outer trampoline didn't exit with SIGKILL, " ++ "status = %d", status); + + return(arg.pid); + } +@@ -138,7 +140,7 @@ + + os_stop_process(os_getpid()); + +- if(read(fd, &c, sizeof(c)) != sizeof(c)) ++ if(os_read_file(fd, &c, sizeof(c)) != sizeof(c)) + panic("read failed in suspend_new_thread"); + } + +@@ -233,7 +235,7 @@ + int n; + + *jmp_ptr = &buf; +- n = setjmp(buf); ++ n = sigsetjmp(buf, 1); + if(n != 0) + return(n); + (*fn)(arg); +@@ -273,7 +275,7 @@ + stop_ptraced_child(pid, stack, 1); + + printf("Checking for /proc/mm..."); +- if(access("/proc/mm", W_OK)){ ++ if(os_access("/proc/mm", OS_ACC_W_OK) < 0){ + printf("not found\n"); + ret = 0; + } +diff -Naur a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c +--- a/arch/um/kernel/process_kern.c 2004-02-11 12:15:46.000000000 -0500 ++++ b/arch/um/kernel/process_kern.c 2004-02-11 12:27:37.000000000 -0500 +@@ -16,6 +16,7 @@ + #include "linux/module.h" + #include "linux/init.h" + #include "linux/capability.h" ++#include "linux/spinlock.h" + #include "asm/unistd.h" + #include "asm/mman.h" + #include "asm/segment.h" +@@ -23,7 +24,6 @@ + #include "asm/pgtable.h" + #include "asm/processor.h" + #include "asm/tlbflush.h" +-#include "asm/spinlock.h" + #include "asm/uaccess.h" + #include "asm/user.h" + #include "user_util.h" +@@ -52,17 +52,12 @@ + + struct task_struct *get_task(int pid, int require) + { +- struct task_struct *task, *ret; ++ struct task_struct *ret; + +- ret = NULL; + read_lock(&tasklist_lock); +- for_each_process(task){ +- if(task->pid == pid){ +- ret = task; +- break; +- } +- } ++ ret = find_task_by_pid(pid); + read_unlock(&tasklist_lock); ++ + if(require && (ret == NULL)) panic("get_task couldn't find a task\n"); + return(ret); + } +@@ -95,7 +90,8 @@ + int flags = GFP_KERNEL; + + if(atomic) flags |= GFP_ATOMIC; +- if((page = __get_free_pages(flags, order)) == 0) ++ page = __get_free_pages(flags, order); ++ if(page == 0) + return(0); + stack_protections(page); + return(page); +@@ -103,13 +99,15 @@ + + int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) + { +- struct task_struct *p; ++ int pid; + + current->thread.request.u.thread.proc = fn; + current->thread.request.u.thread.arg = arg; +- p = do_fork(CLONE_VM | flags, 0, NULL, 0, NULL, NULL); +- if(IS_ERR(p)) panic("do_fork failed in kernel_thread"); +- return(p->pid); ++ pid = do_fork(CLONE_VM | CLONE_UNTRACED | flags, 0, NULL, 0, NULL, ++ NULL); ++ if(pid < 0) ++ panic("do_fork failed in kernel_thread, errno = %d", pid); ++ return(pid); + } + + void switch_mm(struct mm_struct *prev, struct mm_struct *next, +@@ -129,7 +127,7 @@ + { external_pid(task), task }); + } + +-void *switch_to(void *prev, void *next, void *last) ++void *_switch_to(void *prev, void *next, void *last) + { + return(CHOOSE_MODE(switch_to_tt(prev, next), + switch_to_skas(prev, next))); +@@ -149,7 +147,7 @@ + void exit_thread(void) + { + CHOOSE_MODE(exit_thread_tt(), exit_thread_skas()); +- unprotect_stack((unsigned long) current->thread_info); ++ unprotect_stack((unsigned long) current_thread); + } + + void *get_current(void) +@@ -157,6 +155,10 @@ + return(current); + } + ++void prepare_to_copy(struct task_struct *tsk) ++{ ++} ++ + int copy_thread(int nr, unsigned long clone_flags, unsigned long sp, + unsigned long stack_top, struct task_struct * p, + struct pt_regs *regs) +@@ -190,7 +192,7 @@ + + void default_idle(void) + { +- idle_timer(); ++ uml_idle_timer(); + + atomic_inc(&init_mm.mm_count); + current->mm = &init_mm; +@@ -367,10 +369,15 @@ + return(clear_user(buf, size)); + } + ++int strlen_user_proc(char *str) ++{ ++ return(strlen_user(str)); ++} ++ + int smp_sigio_handler(void) + { + #ifdef CONFIG_SMP +- int cpu = current->thread_info->cpu; ++ int cpu = current_thread->cpu; + IPI_handler(cpu); + if(cpu != 0) + return(1); +@@ -385,7 +392,7 @@ + + int cpu(void) + { +- return(current->thread_info->cpu); ++ return(current_thread->cpu); + } + + /* +diff -Naur a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c +--- a/arch/um/kernel/ptrace.c 2004-02-11 12:14:24.000000000 -0500 ++++ b/arch/um/kernel/ptrace.c 2004-02-11 12:26:07.000000000 -0500 +@@ -311,11 +311,8 @@ + + /* the 0x80 provides a way for the tracing parent to distinguish + between a syscall stop and SIGTRAP delivery */ +- current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) +- ? 0x80 : 0); +- current->state = TASK_STOPPED; +- notify_parent(current, SIGCHLD); +- schedule(); ++ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD) ++ ? 0x80 : 0)); + + /* + * this isn't the same as continuing with a signal, but it will do +diff -Naur a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c +--- a/arch/um/kernel/reboot.c 2004-02-11 12:15:45.000000000 -0500 ++++ b/arch/um/kernel/reboot.c 2004-02-11 12:27:36.000000000 -0500 +@@ -15,6 +15,7 @@ + #ifdef CONFIG_SMP + static void kill_idlers(int me) + { ++#ifdef CONFIG_MODE_TT + struct task_struct *p; + int i; + +@@ -23,6 +24,7 @@ + if((p != NULL) && (p->thread.mode.tt.extern_pid != me)) + os_kill_process(p->thread.mode.tt.extern_pid, 0); + } ++#endif + } + #endif + +diff -Naur a/arch/um/kernel/sigio_kern.c b/arch/um/kernel/sigio_kern.c +--- a/arch/um/kernel/sigio_kern.c 2004-02-11 12:14:33.000000000 -0500 ++++ b/arch/um/kernel/sigio_kern.c 2004-02-11 12:26:17.000000000 -0500 +@@ -6,18 +6,21 @@ + #include "linux/kernel.h" + #include "linux/list.h" + #include "linux/slab.h" +-#include "asm/irq.h" ++#include "linux/signal.h" ++#include "linux/interrupt.h" + #include "init.h" + #include "sigio.h" + #include "irq_user.h" ++#include "irq_kern.h" + + /* Protected by sigio_lock() called from write_sigio_workaround */ + static int sigio_irq_fd = -1; + +-void sigio_interrupt(int irq, void *data, struct pt_regs *unused) ++irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused) + { + read_sigio_fd(sigio_irq_fd); + reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ); ++ return(IRQ_HANDLED); + } + + int write_sigio_irq(int fd) +diff -Naur a/arch/um/kernel/sigio_user.c b/arch/um/kernel/sigio_user.c +--- a/arch/um/kernel/sigio_user.c 2004-02-11 12:16:48.000000000 -0500 ++++ b/arch/um/kernel/sigio_user.c 2004-02-11 12:29:02.000000000 -0500 +@@ -7,7 +7,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -26,7 +25,7 @@ + int pty_close_sigio = 0; + + /* Used as a flag during SIGIO testing early in boot */ +-static int got_sigio = 0; ++static volatile int got_sigio = 0; + + void __init handler(int sig) + { +@@ -45,7 +44,7 @@ + + info->err = 0; + if(openpty(&info->master, &info->slave, NULL, NULL, NULL)) +- info->err = errno; ++ info->err = -errno; + } + + void __init check_one_sigio(void (*proc)(int, int)) +@@ -53,11 +52,11 @@ + struct sigaction old, new; + struct termios tt; + struct openpty_arg pty = { .master = -1, .slave = -1 }; +- int master, slave, flags; ++ int master, slave, err; + + initial_thread_cb(openpty_cb, &pty); + if(pty.err){ +- printk("openpty failed, errno = %d\n", pty.err); ++ printk("openpty failed, errno = %d\n", -pty.err); + return; + } + +@@ -69,23 +68,16 @@ + return; + } + ++ /* XXX These can fail with EINTR */ + if(tcgetattr(master, &tt) < 0) + panic("check_sigio : tcgetattr failed, errno = %d\n", errno); + cfmakeraw(&tt); + if(tcsetattr(master, TCSADRAIN, &tt) < 0) + panic("check_sigio : tcsetattr failed, errno = %d\n", errno); + +- if((flags = fcntl(master, F_GETFL)) < 0) +- panic("tty_fds : fcntl F_GETFL failed, errno = %d\n", errno); +- +- if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || +- (fcntl(master, F_SETOWN, os_getpid()) < 0)) +- panic("check_sigio : fcntl F_SETFL or F_SETOWN failed, " +- "errno = %d\n", errno); +- +- if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)) +- panic("check_sigio : fcntl F_SETFL failed, errno = %d\n", +- errno); ++ err = os_sigio_async(master, slave); ++ if(err < 0) ++ panic("tty_fds : sigio_async failed, err = %d\n", -err); + + if(sigaction(SIGIO, NULL, &old) < 0) + panic("check_sigio : sigaction 1 failed, errno = %d\n", errno); +@@ -97,8 +89,8 @@ + got_sigio = 0; + (*proc)(master, slave); + +- close(master); +- close(slave); ++ os_close_file(master); ++ os_close_file(slave); + + if(sigaction(SIGIO, &old, NULL) < 0) + panic("check_sigio : sigaction 3 failed, errno = %d\n", errno); +@@ -112,25 +104,25 @@ + printk("Checking that host ptys support output SIGIO..."); + + memset(buf, 0, sizeof(buf)); +- while(write(master, buf, sizeof(buf)) > 0) ; ++ ++ while(os_write_file(master, buf, sizeof(buf)) > 0) ; + if(errno != EAGAIN) + panic("check_sigio : write failed, errno = %d\n", errno); +- +- while(((n = read(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; ++ while(((n = os_read_file(slave, buf, sizeof(buf))) > 0) && !got_sigio) ; + + if(got_sigio){ + printk("Yes\n"); + pty_output_sigio = 1; + } +- else if(errno == EAGAIN) printk("No, enabling workaround\n"); +- else panic("check_sigio : read failed, errno = %d\n", errno); ++ else if(n == -EAGAIN) printk("No, enabling workaround\n"); ++ else panic("check_sigio : read failed, err = %d\n", n); + } + + static void tty_close(int master, int slave) + { + printk("Checking that host ptys support SIGIO on close..."); + +- close(slave); ++ os_close_file(slave); + if(got_sigio){ + printk("Yes\n"); + pty_close_sigio = 1; +@@ -140,7 +132,8 @@ + + void __init check_sigio(void) + { +- if(access("/dev/ptmx", R_OK) && access("/dev/ptyp0", R_OK)){ ++ if((os_access("/dev/ptmx", OS_ACC_R_OK) < 0) && ++ (os_access("/dev/ptyp0", OS_ACC_R_OK) < 0)){ + printk("No pseudo-terminals available - skipping pty SIGIO " + "check\n"); + return; +@@ -201,11 +194,10 @@ + p = &fds->poll[i]; + if(p->revents == 0) continue; + if(p->fd == sigio_private[1]){ +- n = read(sigio_private[1], &c, sizeof(c)); ++ n = os_read_file(sigio_private[1], &c, sizeof(c)); + if(n != sizeof(c)) + printk("write_sigio_thread : " +- "read failed, errno = %d\n", +- errno); ++ "read failed, err = %d\n", -n); + tmp = current_poll; + current_poll = next_poll; + next_poll = tmp; +@@ -218,10 +210,10 @@ + (fds->used - i) * sizeof(*fds->poll)); + } + +- n = write(respond_fd, &c, sizeof(c)); ++ n = os_write_file(respond_fd, &c, sizeof(c)); + if(n != sizeof(c)) + printk("write_sigio_thread : write failed, " +- "errno = %d\n", errno); ++ "err = %d\n", -n); + } + } + } +@@ -252,15 +244,15 @@ + char c; + + flags = set_signals(0); +- n = write(sigio_private[0], &c, sizeof(c)); ++ n = os_write_file(sigio_private[0], &c, sizeof(c)); + if(n != sizeof(c)){ +- printk("update_thread : write failed, errno = %d\n", errno); ++ printk("update_thread : write failed, err = %d\n", -n); + goto fail; + } + +- n = read(sigio_private[0], &c, sizeof(c)); ++ n = os_read_file(sigio_private[0], &c, sizeof(c)); + if(n != sizeof(c)){ +- printk("update_thread : read failed, errno = %d\n", errno); ++ printk("update_thread : read failed, err = %d\n", -n); + goto fail; + } + +@@ -271,10 +263,10 @@ + if(write_sigio_pid != -1) + os_kill_process(write_sigio_pid, 1); + write_sigio_pid = -1; +- close(sigio_private[0]); +- close(sigio_private[1]); +- close(write_sigio_fds[0]); +- close(write_sigio_fds[1]); ++ os_close_file(sigio_private[0]); ++ os_close_file(sigio_private[1]); ++ os_close_file(write_sigio_fds[0]); ++ os_close_file(write_sigio_fds[1]); + sigio_unlock(); + set_signals(flags); + } +@@ -369,15 +361,15 @@ + goto out; + + err = os_pipe(write_sigio_fds, 1, 1); +- if(err){ ++ if(err < 0){ + printk("write_sigio_workaround - os_pipe 1 failed, " +- "errno = %d\n", -err); ++ "err = %d\n", -err); + goto out; + } + err = os_pipe(sigio_private, 1, 1); +- if(err){ ++ if(err < 0){ + printk("write_sigio_workaround - os_pipe 2 failed, " +- "errno = %d\n", -err); ++ "err = %d\n", -err); + goto out_close1; + } + if(setup_initial_poll(sigio_private[1])) +@@ -399,11 +391,11 @@ + os_kill_process(write_sigio_pid, 1); + write_sigio_pid = -1; + out_close2: +- close(sigio_private[0]); +- close(sigio_private[1]); ++ os_close_file(sigio_private[0]); ++ os_close_file(sigio_private[1]); + out_close1: +- close(write_sigio_fds[0]); +- close(write_sigio_fds[1]); ++ os_close_file(write_sigio_fds[0]); ++ os_close_file(write_sigio_fds[1]); + sigio_unlock(); + } + +@@ -412,10 +404,16 @@ + int n; + char c; + +- n = read(fd, &c, sizeof(c)); ++ n = os_read_file(fd, &c, sizeof(c)); + if(n != sizeof(c)){ +- printk("read_sigio_fd - read failed, errno = %d\n", errno); +- return(-errno); ++ if(n < 0) { ++ printk("read_sigio_fd - read failed, err = %d\n", -n); ++ return(n); ++ } ++ else { ++ printk("read_sigio_fd - short read, bytes = %d\n", n); ++ return(-EIO); ++ } + } + return(n); + } +diff -Naur a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c +--- a/arch/um/kernel/signal_kern.c 2004-02-11 12:15:52.000000000 -0500 ++++ b/arch/um/kernel/signal_kern.c 2004-02-11 12:27:44.000000000 -0500 +@@ -36,7 +36,7 @@ + if(sig == SIGSEGV){ + struct k_sigaction *ka; + +- ka = ¤t->sig->action[SIGSEGV - 1]; ++ ka = ¤t->sighand->action[SIGSEGV - 1]; + ka->sa.sa_handler = SIG_DFL; + } + force_sig(SIGSEGV, current); +@@ -60,10 +60,10 @@ + int err, ret; + + ret = 0; ++ /* Always make any pending restarted system calls return -EINTR */ ++ current_thread_info()->restart_block.fn = do_no_restart_syscall; + switch(error){ + case -ERESTART_RESTARTBLOCK: +- current_thread_info()->restart_block.fn = +- do_no_restart_syscall; + case -ERESTARTNOHAND: + ret = -EINTR; + break; +@@ -142,7 +142,7 @@ + return(0); + + /* Whee! Actually deliver the signal. */ +- ka = ¤t->sig->action[sig -1 ]; ++ ka = ¤t->sighand->action[sig -1 ]; + err = handle_signal(regs, sig, ka, &info, oldset, error); + if(!err) return(1); + +@@ -201,7 +201,7 @@ + } + } + +-int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize) ++int sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize) + { + sigset_t saveset, newset; + +@@ -227,20 +227,59 @@ + } + } + ++int sys_sigaction(int sig, const struct old_sigaction __user *act, ++ struct old_sigaction __user *oact) ++{ ++ struct k_sigaction new_ka, old_ka; ++ int ret; ++ ++ if (act) { ++ old_sigset_t mask; ++ if (verify_area(VERIFY_READ, act, sizeof(*act)) || ++ __get_user(new_ka.sa.sa_handler, &act->sa_handler) || ++ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) ++ return -EFAULT; ++ __get_user(new_ka.sa.sa_flags, &act->sa_flags); ++ __get_user(mask, &act->sa_mask); ++ siginitset(&new_ka.sa.sa_mask, mask); ++ } ++ ++ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); ++ ++ if (!ret && oact) { ++ if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || ++ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || ++ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) ++ return -EFAULT; ++ __put_user(old_ka.sa.sa_flags, &oact->sa_flags); ++ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); ++ } ++ ++ return ret; ++} ++ ++int sys_sigaltstack(const stack_t *uss, stack_t *uoss) ++{ ++ return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); ++} ++ ++extern int userspace_pid[]; ++ + static int copy_sc_from_user(struct pt_regs *to, void *from, + struct arch_frame_data *arch) + { + int ret; + + ret = CHOOSE_MODE(copy_sc_from_user_tt(UPT_SC(&to->regs), from, arch), +- copy_sc_from_user_skas(&to->regs, from)); ++ copy_sc_from_user_skas(userspace_pid[0], ++ &to->regs, from)); + return(ret); + } + + int sys_sigreturn(struct pt_regs regs) + { +- void *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); +- void *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); ++ void __user *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs)); ++ void __user *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs)); + int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long); + + spin_lock_irq(¤t->sighand->siglock); +@@ -257,8 +296,8 @@ + + int sys_rt_sigreturn(struct pt_regs regs) + { +- struct ucontext *uc = sp_to_uc(PT_REGS_SP(¤t->thread.regs)); +- void *fp; ++ unsigned long sp = PT_REGS_SP(¤t->thread.regs); ++ struct ucontext __user *uc = sp_to_uc(sp); + int sig_size = _NSIG_WORDS * sizeof(unsigned long); + + spin_lock_irq(¤t->sighand->siglock); +@@ -266,7 +305,6 @@ + sigdelsetmask(¤t->blocked, ~_BLOCKABLE); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); +- fp = (void *) (((unsigned long) uc) + sizeof(struct ucontext)); + copy_sc_from_user(¤t->thread.regs, &uc->uc_mcontext, + &signal_frame_si.common.arch); + return(PT_REGS_SYSCALL_RET(¤t->thread.regs)); +diff -Naur a/arch/um/kernel/skas/include/mode.h b/arch/um/kernel/skas/include/mode.h +--- a/arch/um/kernel/skas/include/mode.h 2004-02-11 12:15:48.000000000 -0500 ++++ b/arch/um/kernel/skas/include/mode.h 2004-02-11 12:27:41.000000000 -0500 +@@ -12,14 +12,16 @@ + extern int have_fpx_regs; + + extern void user_time_init_skas(void); +-extern int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr); +-extern int copy_sc_to_user_skas(void *to_ptr, void *fp, ++extern int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, ++ void *from_ptr); ++extern int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, + union uml_pt_regs *regs, + unsigned long fault_addr, int fault_type); + extern void sig_handler_common_skas(int sig, void *sc_ptr); + extern void halt_skas(void); + extern void reboot_skas(void); + extern void kill_off_processes_skas(void); ++extern int is_skas_winch(int pid, int fd, void *data); + + #endif + +diff -Naur a/arch/um/kernel/skas/include/skas.h b/arch/um/kernel/skas/include/skas.h +--- a/arch/um/kernel/skas/include/skas.h 2004-02-11 12:14:37.000000000 -0500 ++++ b/arch/um/kernel/skas/include/skas.h 2004-02-11 12:26:22.000000000 -0500 +@@ -8,7 +8,7 @@ + + #include "sysdep/ptrace.h" + +-extern int userspace_pid; ++extern int userspace_pid[]; + + extern void switch_threads(void *me, void *next); + extern void thread_wait(void *sw, void *fb); +@@ -32,7 +32,7 @@ + extern int new_mm(int from); + extern void save_registers(union uml_pt_regs *regs); + extern void restore_registers(union uml_pt_regs *regs); +-extern void start_userspace(void); ++extern void start_userspace(int cpu); + extern void init_registers(int pid); + + #endif +diff -Naur a/arch/um/kernel/skas/include/uaccess.h b/arch/um/kernel/skas/include/uaccess.h +--- a/arch/um/kernel/skas/include/uaccess.h 2004-02-11 12:15:17.000000000 -0500 ++++ b/arch/um/kernel/skas/include/uaccess.h 2004-02-11 12:27:05.000000000 -0500 +@@ -6,20 +6,12 @@ + #ifndef __SKAS_UACCESS_H + #define __SKAS_UACCESS_H + +-#include "linux/string.h" +-#include "linux/sched.h" +-#include "linux/err.h" +-#include "asm/processor.h" +-#include "asm/pgtable.h" + #include "asm/errno.h" +-#include "asm/current.h" +-#include "asm/a.out.h" +-#include "kern_util.h" + + #define access_ok_skas(type, addr, size) \ + ((segment_eq(get_fs(), KERNEL_DS)) || \ + (((unsigned long) (addr) < TASK_SIZE) && \ +- ((unsigned long) (addr) + (size) < TASK_SIZE))) ++ ((unsigned long) (addr) + (size) <= TASK_SIZE))) + + static inline int verify_area_skas(int type, const void * addr, + unsigned long size) +@@ -27,197 +19,12 @@ + return(access_ok_skas(type, addr, size) ? 0 : -EFAULT); + } + +-static inline unsigned long maybe_map(unsigned long virt, int is_write) +-{ +- pte_t pte; +- +- void *phys = um_virt_to_phys(current, virt, &pte); +- int dummy_code; +- +- if(IS_ERR(phys) || (is_write && !pte_write(pte))){ +- if(handle_page_fault(virt, 0, is_write, 0, &dummy_code)) +- return(0); +- phys = um_virt_to_phys(current, virt, NULL); +- } +- return((unsigned long) __va((unsigned long) phys)); +-} +- +-static inline int buffer_op(unsigned long addr, int len, +- int (*op)(unsigned long addr, int len, void *arg), +- void *arg) +-{ +- int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); +- int remain = len, n; +- +- n = (*op)(addr, size, arg); +- if(n != 0) +- return(n < 0 ? remain : 0); +- +- addr += size; +- remain -= size; +- if(remain == 0) +- return(0); +- +- while(addr < ((addr + remain) & PAGE_MASK)){ +- n = (*op)(addr, PAGE_SIZE, arg); +- if(n != 0) +- return(n < 0 ? remain : 0); +- +- addr += PAGE_SIZE; +- remain -= PAGE_SIZE; +- } +- if(remain == 0) +- return(0); +- +- n = (*op)(addr, remain, arg); +- if(n != 0) +- return(n < 0 ? remain : 0); +- return(0); +-} +- +-static inline int copy_chunk_from_user(unsigned long from, int len, void *arg) +-{ +- unsigned long *to_ptr = arg, to = *to_ptr; +- +- from = maybe_map(from, 0); +- if(from == 0) +- return(-1); +- +- memcpy((void *) to, (void *) from, len); +- *to_ptr += len; +- return(0); +-} +- +-static inline int copy_from_user_skas(void *to, const void *from, int n) +-{ +- if(segment_eq(get_fs(), KERNEL_DS)){ +- memcpy(to, from, n); +- return(0); +- } +- +- return(access_ok_skas(VERIFY_READ, from, n) ? +- buffer_op((unsigned long) from, n, copy_chunk_from_user, &to) : +- n); +-} +- +-static inline int copy_chunk_to_user(unsigned long to, int len, void *arg) +-{ +- unsigned long *from_ptr = arg, from = *from_ptr; +- +- to = maybe_map(to, 1); +- if(to == 0) +- return(-1); +- +- memcpy((void *) to, (void *) from, len); +- *from_ptr += len; +- return(0); +-} +- +-static inline int copy_to_user_skas(void *to, const void *from, int n) +-{ +- if(segment_eq(get_fs(), KERNEL_DS)){ +- memcpy(to, from, n); +- return(0); +- } +- +- return(access_ok_skas(VERIFY_WRITE, to, n) ? +- buffer_op((unsigned long) to, n, copy_chunk_to_user, &from) : +- n); +-} +- +-static inline int strncpy_chunk_from_user(unsigned long from, int len, +- void *arg) +-{ +- char **to_ptr = arg, *to = *to_ptr; +- int n; +- +- from = maybe_map(from, 0); +- if(from == 0) +- return(-1); +- +- strncpy(to, (void *) from, len); +- n = strnlen(to, len); +- *to_ptr += n; +- +- if(n < len) +- return(1); +- return(0); +-} +- +-static inline int strncpy_from_user_skas(char *dst, const char *src, int count) +-{ +- int n; +- char *ptr = dst; +- +- if(segment_eq(get_fs(), KERNEL_DS)){ +- strncpy(dst, src, count); +- return(strnlen(dst, count)); +- } +- +- if(!access_ok_skas(VERIFY_READ, src, 1)) +- return(-EFAULT); +- +- n = buffer_op((unsigned long) src, count, strncpy_chunk_from_user, +- &ptr); +- if(n != 0) +- return(-EFAULT); +- return(strnlen(dst, count)); +-} +- +-static inline int clear_chunk(unsigned long addr, int len, void *unused) +-{ +- addr = maybe_map(addr, 1); +- if(addr == 0) +- return(-1); +- +- memset((void *) addr, 0, len); +- return(0); +-} +- +-static inline int __clear_user_skas(void *mem, int len) +-{ +- return(buffer_op((unsigned long) mem, len, clear_chunk, NULL)); +-} +- +-static inline int clear_user_skas(void *mem, int len) +-{ +- if(segment_eq(get_fs(), KERNEL_DS)){ +- memset(mem, 0, len); +- return(0); +- } +- +- return(access_ok_skas(VERIFY_WRITE, mem, len) ? +- buffer_op((unsigned long) mem, len, clear_chunk, NULL) : len); +-} +- +-static inline int strnlen_chunk(unsigned long str, int len, void *arg) +-{ +- int *len_ptr = arg, n; +- +- str = maybe_map(str, 0); +- if(str == 0) +- return(-1); +- +- n = strnlen((void *) str, len); +- *len_ptr += n; +- +- if(n < len) +- return(1); +- return(0); +-} +- +-static inline int strnlen_user_skas(const void *str, int len) +-{ +- int count = 0, n; +- +- if(segment_eq(get_fs(), KERNEL_DS)) +- return(strnlen(str, len) + 1); +- +- n = buffer_op((unsigned long) str, len, strnlen_chunk, &count); +- if(n == 0) +- return(count + 1); +- return(-EFAULT); +-} ++extern int copy_from_user_skas(void *to, const void *from, int n); ++extern int copy_to_user_skas(void *to, const void *from, int n); ++extern int strncpy_from_user_skas(char *dst, const char *src, int count); ++extern int __clear_user_skas(void *mem, int len); ++extern int clear_user_skas(void *mem, int len); ++extern int strnlen_user_skas(const void *str, int len); + + #endif + +diff -Naur a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile +--- a/arch/um/kernel/skas/Makefile 2004-02-11 12:14:38.000000000 -0500 ++++ b/arch/um/kernel/skas/Makefile 2004-02-11 12:26:41.000000000 -0500 +@@ -5,20 +5,24 @@ + + obj-y = exec_kern.o exec_user.o mem.o mem_user.o mmu.o process.o \ + process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o \ +- sys-$(SUBARCH)/ ++ uaccess.o sys-$(SUBARCH)/ ++ ++host-progs := util/mk_ptregs ++clean-files := include/skas_ptregs.h + + USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o + USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) + +-include/skas_ptregs.h : util/mk_ptregs +- util/mk_ptregs > $@ +- +-util/mk_ptregs : +- $(MAKE) -C util ++$(TOPDIR)/arch/um/include/skas_ptregs.h : $(src)/util/mk_ptregs ++ @echo -n ' Generating $@' ++ @$< > $@.tmp ++ @if [ -r $@ ] && cmp -s $@ $@.tmp; then \ ++ echo ' (unchanged)'; \ ++ rm -f $@.tmp; \ ++ else \ ++ echo ' (updated)'; \ ++ mv -f $@.tmp $@; \ ++ fi + + $(USER_OBJS) : %.o: %.c + $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< +- +-clean : +- $(MAKE) -C util clean +- $(RM) -f include/skas_ptregs.h +diff -Naur a/arch/um/kernel/skas/mem_user.c b/arch/um/kernel/skas/mem_user.c +--- a/arch/um/kernel/skas/mem_user.c 2004-02-11 12:14:58.000000000 -0500 ++++ b/arch/um/kernel/skas/mem_user.c 2004-02-11 12:26:50.000000000 -0500 +@@ -7,6 +7,7 @@ + #include + #include + #include "mem_user.h" ++#include "mem.h" + #include "user.h" + #include "os.h" + #include "proc_mm.h" +@@ -15,12 +16,12 @@ + int r, int w, int x) + { + struct proc_mm_op map; +- struct mem_region *region; +- int prot, n; ++ __u64 offset; ++ int prot, n, phys_fd; + + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); +- region = phys_region(phys); ++ phys_fd = phys_mapping(phys, &offset); + + map = ((struct proc_mm_op) { .op = MM_MMAP, + .u = +@@ -30,12 +31,12 @@ + .prot = prot, + .flags = MAP_SHARED | + MAP_FIXED, +- .fd = region->fd, +- .offset = phys_offset(phys) ++ .fd = phys_fd, ++ .offset = offset + } } } ); + n = os_write_file(fd, &map, sizeof(map)); + if(n != sizeof(map)) +- printk("map : /proc/mm map failed, errno = %d\n", errno); ++ printk("map : /proc/mm map failed, err = %d\n", -n); + } + + int unmap(int fd, void *addr, int len) +@@ -49,8 +50,13 @@ + { .addr = (unsigned long) addr, + .len = len } } } ); + n = os_write_file(fd, &unmap, sizeof(unmap)); +- if((n != 0) && (n != sizeof(unmap))) +- return(-errno); ++ if(n != sizeof(unmap)) { ++ if(n < 0) ++ return(n); ++ else if(n > 0) ++ return(-EIO); ++ } ++ + return(0); + } + +@@ -71,11 +77,15 @@ + .prot = prot } } } ); + + n = os_write_file(fd, &protect, sizeof(protect)); +- if((n != 0) && (n != sizeof(protect))){ ++ if(n != sizeof(protect)) { ++ if(n == 0) return(0); ++ + if(must_succeed) +- panic("protect failed, errno = %d", errno); +- return(-errno); ++ panic("protect failed, err = %d", -n); ++ ++ return(-EIO); + } ++ + return(0); + } + +diff -Naur a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c +--- a/arch/um/kernel/skas/mmu.c 2004-02-11 12:17:12.000000000 -0500 ++++ b/arch/um/kernel/skas/mmu.c 2004-02-11 12:29:17.000000000 -0500 +@@ -22,9 +22,11 @@ + else from = -1; + + mm->context.skas.mm_fd = new_mm(from); +- if(mm->context.skas.mm_fd < 0) +- panic("init_new_context_skas - new_mm failed, errno = %d\n", +- mm->context.skas.mm_fd); ++ if(mm->context.skas.mm_fd < 0){ ++ printk("init_new_context_skas - new_mm failed, errno = %d\n", ++ mm->context.skas.mm_fd); ++ return(mm->context.skas.mm_fd); ++ } + + return(0); + } +diff -Naur a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c +--- a/arch/um/kernel/skas/process.c 2004-02-11 12:16:47.000000000 -0500 ++++ b/arch/um/kernel/skas/process.c 2004-02-11 12:29:02.000000000 -0500 +@@ -4,6 +4,7 @@ + */ + + #include ++#include + #include + #include + #include +@@ -24,6 +25,18 @@ + #include "os.h" + #include "proc_mm.h" + #include "skas_ptrace.h" ++#include "chan_user.h" ++ ++int is_skas_winch(int pid, int fd, void *data) ++{ ++ if(pid != getpid()) ++ return(0); ++ ++ register_winch_irq(-1, fd, -1, data); ++ return(1); ++} ++ ++/* These are set once at boot time and not changed thereafter */ + + unsigned long exec_regs[FRAME_SIZE]; + unsigned long exec_fp_regs[HOST_FP_SIZE]; +@@ -48,11 +61,11 @@ + int err, syscall_nr, status; + + syscall_nr = PT_SYSCALL_NR(regs->skas.regs); ++ UPT_SYSCALL_NR(regs) = syscall_nr; + if(syscall_nr < 1){ + relay_signal(SIGTRAP, regs); + return; + } +- UPT_SYSCALL_NR(regs) = syscall_nr; + + err = ptrace(PTRACE_POKEUSER, pid, PT_SYSCALL_NR_OFFSET, __NR_getpid); + if(err < 0) +@@ -72,8 +85,6 @@ + handle_syscall(regs); + } + +-int userspace_pid; +- + static int userspace_tramp(void *arg) + { + init_new_thread_signals(0); +@@ -83,7 +94,11 @@ + return(0); + } + +-void start_userspace(void) ++/* Each element set once, and only accessed by a single processor anyway */ ++#define NR_CPUS 1 ++int userspace_pid[NR_CPUS]; ++ ++void start_userspace(int cpu) + { + void *stack; + unsigned long sp; +@@ -114,21 +129,21 @@ + if(munmap(stack, PAGE_SIZE) < 0) + panic("start_userspace : munmap failed, errno = %d\n", errno); + +- userspace_pid = pid; ++ userspace_pid[cpu] = pid; + } + + void userspace(union uml_pt_regs *regs) + { +- int err, status, op; ++ int err, status, op, pid = userspace_pid[0]; + + restore_registers(regs); + +- err = ptrace(PTRACE_SYSCALL, userspace_pid, 0, 0); ++ err = ptrace(PTRACE_SYSCALL, pid, 0, 0); + if(err) + panic("userspace - PTRACE_SYSCALL failed, errno = %d\n", + errno); + while(1){ +- err = waitpid(userspace_pid, &status, WUNTRACED); ++ err = waitpid(pid, &status, WUNTRACED); + if(err < 0) + panic("userspace - waitpid failed, errno = %d\n", + errno); +@@ -139,16 +154,17 @@ + if(WIFSTOPPED(status)){ + switch(WSTOPSIG(status)){ + case SIGSEGV: +- handle_segv(userspace_pid); ++ handle_segv(pid); + break; + case SIGTRAP: +- handle_trap(userspace_pid, regs); ++ handle_trap(pid, regs); + break; + case SIGIO: + case SIGVTALRM: + case SIGILL: + case SIGBUS: + case SIGFPE: ++ case SIGWINCH: + user_signal(WSTOPSIG(status), regs); + break; + default: +@@ -162,7 +178,7 @@ + + op = singlestepping_skas() ? PTRACE_SINGLESTEP : + PTRACE_SYSCALL; +- err = ptrace(op, userspace_pid, 0, 0); ++ err = ptrace(op, pid, 0, 0); + if(err) + panic("userspace - PTRACE_SYSCALL failed, " + "errno = %d\n", errno); +@@ -177,7 +193,7 @@ + *switch_buf_ptr = &switch_buf; + *fork_buf_ptr = &fork_buf; + +- if(setjmp(fork_buf) == 0) ++ if(sigsetjmp(fork_buf, 1) == 0) + new_thread_proc(stack, handler); + + remove_sigstack(); +@@ -189,16 +205,16 @@ + + *switch_buf = &buf; + fork_buf = fb; +- if(setjmp(buf) == 0) +- longjmp(*fork_buf, 1); ++ if(sigsetjmp(buf, 1) == 0) ++ siglongjmp(*fork_buf, 1); + } + +-static int move_registers(int int_op, int fp_op, union uml_pt_regs *regs, +- unsigned long *fp_regs) ++static int move_registers(int pid, int int_op, int fp_op, ++ union uml_pt_regs *regs, unsigned long *fp_regs) + { +- if(ptrace(int_op, userspace_pid, 0, regs->skas.regs) < 0) ++ if(ptrace(int_op, pid, 0, regs->skas.regs) < 0) + return(-errno); +- if(ptrace(fp_op, userspace_pid, 0, fp_regs) < 0) ++ if(ptrace(fp_op, pid, 0, fp_regs) < 0) + return(-errno); + return(0); + } +@@ -217,10 +233,11 @@ + fp_regs = regs->skas.fp; + } + +- err = move_registers(PTRACE_GETREGS, fp_op, regs, fp_regs); ++ err = move_registers(userspace_pid[0], PTRACE_GETREGS, fp_op, regs, ++ fp_regs); + if(err) + panic("save_registers - saving registers failed, errno = %d\n", +- err); ++ -err); + } + + void restore_registers(union uml_pt_regs *regs) +@@ -237,10 +254,11 @@ + fp_regs = regs->skas.fp; + } + +- err = move_registers(PTRACE_SETREGS, fp_op, regs, fp_regs); ++ err = move_registers(userspace_pid[0], PTRACE_SETREGS, fp_op, regs, ++ fp_regs); + if(err) + panic("restore_registers - saving registers failed, " +- "errno = %d\n", err); ++ "errno = %d\n", -err); + } + + void switch_threads(void *me, void *next) +@@ -248,8 +266,8 @@ + jmp_buf my_buf, **me_ptr = me, *next_buf = next; + + *me_ptr = &my_buf; +- if(setjmp(my_buf) == 0) +- longjmp(*next_buf, 1); ++ if(sigsetjmp(my_buf, 1) == 0) ++ siglongjmp(*next_buf, 1); + } + + static jmp_buf initial_jmpbuf; +@@ -265,14 +283,14 @@ + int n; + + *fork_buf_ptr = &initial_jmpbuf; +- n = setjmp(initial_jmpbuf); ++ n = sigsetjmp(initial_jmpbuf, 1); + if(n == 0) + new_thread_proc((void *) stack, new_thread_handler); + else if(n == 1) + remove_sigstack(); + else if(n == 2){ + (*cb_proc)(cb_arg); +- longjmp(*cb_back, 1); ++ siglongjmp(*cb_back, 1); + } + else if(n == 3){ + kmalloc_ok = 0; +@@ -282,7 +300,7 @@ + kmalloc_ok = 0; + return(1); + } +- longjmp(**switch_buf, 1); ++ siglongjmp(**switch_buf, 1); + } + + void remove_sigstack(void) +@@ -304,8 +322,8 @@ + cb_back = &here; + + block_signals(); +- if(setjmp(here) == 0) +- longjmp(initial_jmpbuf, 2); ++ if(sigsetjmp(here, 1) == 0) ++ siglongjmp(initial_jmpbuf, 2); + unblock_signals(); + + cb_proc = NULL; +@@ -316,22 +334,23 @@ + void halt_skas(void) + { + block_signals(); +- longjmp(initial_jmpbuf, 3); ++ siglongjmp(initial_jmpbuf, 3); + } + + void reboot_skas(void) + { + block_signals(); +- longjmp(initial_jmpbuf, 4); ++ siglongjmp(initial_jmpbuf, 4); + } + + int new_mm(int from) + { + struct proc_mm_op copy; +- int n, fd = os_open_file("/proc/mm", of_write(OPENFLAGS()), 0); ++ int n, fd = os_open_file("/proc/mm", ++ of_cloexec(of_write(OPENFLAGS())), 0); + + if(fd < 0) +- return(-errno); ++ return(fd); + + if(from != -1){ + copy = ((struct proc_mm_op) { .op = MM_COPY_SEGMENTS, +@@ -340,8 +359,9 @@ + n = os_write_file(fd, ©, sizeof(copy)); + if(n != sizeof(copy)) + printk("new_mm : /proc/mm copy_segments failed, " +- "errno = %d\n", errno); ++ "err = %d\n", -n); + } ++ + return(fd); + } + +@@ -349,7 +369,8 @@ + { + int err; + +- err = ptrace(PTRACE_SWITCH_MM, userspace_pid, 0, mm_fd); ++#warning need cpu pid in switch_mm_skas ++ err = ptrace(PTRACE_SWITCH_MM, userspace_pid[0], 0, mm_fd); + if(err) + panic("switch_mm_skas - PTRACE_SWITCH_MM failed, errno = %d\n", + errno); +@@ -357,7 +378,8 @@ + + void kill_off_processes_skas(void) + { +- os_kill_process(userspace_pid, 1); ++#warning need to loop over userspace_pids in kill_off_processes_skas ++ os_kill_process(userspace_pid[0], 1); + } + + void init_registers(int pid) +diff -Naur a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c +--- a/arch/um/kernel/skas/process_kern.c 2004-02-11 12:14:33.000000000 -0500 ++++ b/arch/um/kernel/skas/process_kern.c 2004-02-11 12:26:15.000000000 -0500 +@@ -61,11 +61,13 @@ + thread_wait(¤t->thread.mode.skas.switch_buf, + current->thread.mode.skas.fork_buf); + +-#ifdef CONFIG_SMP +- schedule_tail(NULL); +-#endif ++ if(current->thread.prev_sched != NULL) ++ schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + ++ /* The return value is 1 if the kernel thread execs a process, ++ * 0 if it just exits ++ */ + n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf); + if(n == 1) + userspace(¤t->thread.regs.regs); +@@ -93,9 +95,8 @@ + current->thread.mode.skas.fork_buf); + + force_flush_all(); +-#ifdef CONFIG_SMP +- schedule_tail(current->thread.prev_sched); +-#endif ++ if(current->thread.prev_sched != NULL) ++ schedule_tail(current->thread.prev_sched); + current->thread.prev_sched = NULL; + unblock_signals(); + +@@ -136,7 +137,7 @@ + + void init_idle_skas(void) + { +- cpu_tasks[current->thread_info->cpu].pid = os_getpid(); ++ cpu_tasks[current_thread->cpu].pid = os_getpid(); + default_idle(); + } + +@@ -160,11 +161,11 @@ + + int start_uml_skas(void) + { +- start_userspace(); ++ start_userspace(0); + capture_signal_stack(); ++ uml_idle_timer(); + + init_new_thread_signals(1); +- idle_timer(); + + init_task.thread.request.u.thread.proc = start_kernel_proc; + init_task.thread.request.u.thread.arg = NULL; +@@ -175,12 +176,14 @@ + + int external_pid_skas(struct task_struct *task) + { +- return(userspace_pid); ++#warning Need to look up userspace_pid by cpu ++ return(userspace_pid[0]); + } + + int thread_pid_skas(struct task_struct *task) + { +- return(userspace_pid); ++#warning Need to look up userspace_pid by cpu ++ return(userspace_pid[0]); + } + + /* +diff -Naur a/arch/um/kernel/skas/syscall_kern.c b/arch/um/kernel/skas/syscall_kern.c +--- a/arch/um/kernel/skas/syscall_kern.c 2004-02-11 12:15:58.000000000 -0500 ++++ b/arch/um/kernel/skas/syscall_kern.c 2004-02-11 12:27:51.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +diff -Naur a/arch/um/kernel/skas/sys-i386/Makefile b/arch/um/kernel/skas/sys-i386/Makefile +--- a/arch/um/kernel/skas/sys-i386/Makefile 2004-02-11 12:16:38.000000000 -0500 ++++ b/arch/um/kernel/skas/sys-i386/Makefile 2004-02-11 12:28:37.000000000 -0500 +@@ -10,5 +10,3 @@ + + $(USER_OBJS) : %.o: %.c + $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< +- +-clean : +diff -Naur a/arch/um/kernel/skas/sys-i386/sigcontext.c b/arch/um/kernel/skas/sys-i386/sigcontext.c +--- a/arch/um/kernel/skas/sys-i386/sigcontext.c 2004-02-11 12:14:18.000000000 -0500 ++++ b/arch/um/kernel/skas/sys-i386/sigcontext.c 2004-02-11 12:26:01.000000000 -0500 +@@ -12,10 +12,9 @@ + #include "kern_util.h" + #include "user.h" + #include "sigcontext.h" ++#include "mode.h" + +-extern int userspace_pid; +- +-int copy_sc_from_user_skas(union uml_pt_regs *regs, void *from_ptr) ++int copy_sc_from_user_skas(int pid, union uml_pt_regs *regs, void *from_ptr) + { + struct sigcontext sc, *from = from_ptr; + unsigned long fpregs[FP_FRAME_SIZE]; +@@ -41,13 +40,12 @@ + regs->skas.regs[EIP] = sc.eip; + regs->skas.regs[CS] = sc.cs; + regs->skas.regs[EFL] = sc.eflags; +- regs->skas.regs[UESP] = sc.esp_at_signal; + regs->skas.regs[SS] = sc.ss; + regs->skas.fault_addr = sc.cr2; + regs->skas.fault_type = FAULT_WRITE(sc.err); + regs->skas.trap_type = sc.trapno; + +- err = ptrace(PTRACE_SETFPREGS, userspace_pid, 0, fpregs); ++ err = ptrace(PTRACE_SETFPREGS, pid, 0, fpregs); + if(err < 0){ + printk("copy_sc_to_user - PTRACE_SETFPREGS failed, " + "errno = %d\n", errno); +@@ -57,8 +55,9 @@ + return(0); + } + +-int copy_sc_to_user_skas(void *to_ptr, void *fp, union uml_pt_regs *regs, +- unsigned long fault_addr, int fault_type) ++int copy_sc_to_user_skas(int pid, void *to_ptr, void *fp, ++ union uml_pt_regs *regs, unsigned long fault_addr, ++ int fault_type) + { + struct sigcontext sc, *to = to_ptr; + struct _fpstate *to_fp; +@@ -86,7 +85,7 @@ + sc.err = TO_SC_ERR(fault_type); + sc.trapno = regs->skas.trap_type; + +- err = ptrace(PTRACE_GETFPREGS, userspace_pid, 0, fpregs); ++ err = ptrace(PTRACE_GETFPREGS, pid, 0, fpregs); + if(err < 0){ + printk("copy_sc_to_user - PTRACE_GETFPREGS failed, " + "errno = %d\n", errno); +diff -Naur a/arch/um/kernel/skas/trap_user.c b/arch/um/kernel/skas/trap_user.c +--- a/arch/um/kernel/skas/trap_user.c 2004-02-11 12:15:17.000000000 -0500 ++++ b/arch/um/kernel/skas/trap_user.c 2004-02-11 12:27:06.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -41,8 +41,6 @@ + { + struct signal_info *info; + +- if(sig == SIGVTALRM) +- missed_ticks[cpu()]++; + regs->skas.is_user = 1; + regs->skas.fault_addr = 0; + regs->skas.fault_type = 0; +diff -Naur a/arch/um/kernel/skas/uaccess.c b/arch/um/kernel/skas/uaccess.c +--- a/arch/um/kernel/skas/uaccess.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/kernel/skas/uaccess.c 2004-02-11 12:28:20.000000000 -0500 +@@ -0,0 +1,219 @@ ++/* ++ * Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) ++ * Licensed under the GPL ++ */ ++ ++#include "linux/stddef.h" ++#include "linux/kernel.h" ++#include "linux/string.h" ++#include "linux/fs.h" ++#include "linux/highmem.h" ++#include "asm/page.h" ++#include "asm/pgtable.h" ++#include "asm/uaccess.h" ++#include "kern_util.h" ++ ++extern void *um_virt_to_phys(struct task_struct *task, unsigned long addr, ++ pte_t *pte_out); ++ ++static unsigned long maybe_map(unsigned long virt, int is_write) ++{ ++ pte_t pte; ++ int err; ++ ++ void *phys = um_virt_to_phys(current, virt, &pte); ++ int dummy_code; ++ ++ if(IS_ERR(phys) || (is_write && !pte_write(pte))){ ++ err = handle_page_fault(virt, 0, is_write, 0, &dummy_code); ++ if(err) ++ return(0); ++ phys = um_virt_to_phys(current, virt, NULL); ++ } ++ return((unsigned long) phys); ++} ++ ++static int do_op(unsigned long addr, int len, int is_write, ++ int (*op)(unsigned long addr, int len, void *arg), void *arg) ++{ ++ struct page *page; ++ int n; ++ ++ addr = maybe_map(addr, is_write); ++ if(addr == -1) ++ return(-1); ++ ++ page = phys_to_page(addr); ++ addr = (unsigned long) kmap(page) + (addr & ~PAGE_MASK); ++ n = (*op)(addr, len, arg); ++ kunmap(page); ++ ++ return(n); ++} ++ ++static int buffer_op(unsigned long addr, int len, int is_write, ++ int (*op)(unsigned long addr, int len, void *arg), ++ void *arg) ++{ ++ int size = min(PAGE_ALIGN(addr) - addr, (unsigned long) len); ++ int remain = len, n; ++ ++ n = do_op(addr, size, is_write, op, arg); ++ if(n != 0) ++ return(n < 0 ? remain : 0); ++ ++ addr += size; ++ remain -= size; ++ if(remain == 0) ++ return(0); ++ ++ while(addr < ((addr + remain) & PAGE_MASK)){ ++ n = do_op(addr, PAGE_SIZE, is_write, op, arg); ++ if(n != 0) ++ return(n < 0 ? remain : 0); ++ ++ addr += PAGE_SIZE; ++ remain -= PAGE_SIZE; ++ } ++ if(remain == 0) ++ return(0); ++ ++ n = do_op(addr, remain, is_write, op, arg); ++ if(n != 0) ++ return(n < 0 ? remain : 0); ++ return(0); ++} ++ ++static int copy_chunk_from_user(unsigned long from, int len, void *arg) ++{ ++ unsigned long *to_ptr = arg, to = *to_ptr; ++ ++ memcpy((void *) to, (void *) from, len); ++ *to_ptr += len; ++ return(0); ++} ++ ++int copy_from_user_skas(void *to, const void *from, int n) ++{ ++ if(segment_eq(get_fs(), KERNEL_DS)){ ++ memcpy(to, from, n); ++ return(0); ++ } ++ ++ return(access_ok_skas(VERIFY_READ, from, n) ? ++ buffer_op((unsigned long) from, n, 0, copy_chunk_from_user, &to): ++ n); ++} ++ ++static int copy_chunk_to_user(unsigned long to, int len, void *arg) ++{ ++ unsigned long *from_ptr = arg, from = *from_ptr; ++ ++ memcpy((void *) to, (void *) from, len); ++ *from_ptr += len; ++ return(0); ++} ++ ++int copy_to_user_skas(void *to, const void *from, int n) ++{ ++ if(segment_eq(get_fs(), KERNEL_DS)){ ++ memcpy(to, from, n); ++ return(0); ++ } ++ ++ return(access_ok_skas(VERIFY_WRITE, to, n) ? ++ buffer_op((unsigned long) to, n, 1, copy_chunk_to_user, &from) : ++ n); ++} ++ ++static int strncpy_chunk_from_user(unsigned long from, int len, void *arg) ++{ ++ char **to_ptr = arg, *to = *to_ptr; ++ int n; ++ ++ strncpy(to, (void *) from, len); ++ n = strnlen(to, len); ++ *to_ptr += n; ++ ++ if(n < len) ++ return(1); ++ return(0); ++} ++ ++int strncpy_from_user_skas(char *dst, const char *src, int count) ++{ ++ int n; ++ char *ptr = dst; ++ ++ if(segment_eq(get_fs(), KERNEL_DS)){ ++ strncpy(dst, src, count); ++ return(strnlen(dst, count)); ++ } ++ ++ if(!access_ok_skas(VERIFY_READ, src, 1)) ++ return(-EFAULT); ++ ++ n = buffer_op((unsigned long) src, count, 0, strncpy_chunk_from_user, ++ &ptr); ++ if(n != 0) ++ return(-EFAULT); ++ return(strnlen(dst, count)); ++} ++ ++static int clear_chunk(unsigned long addr, int len, void *unused) ++{ ++ memset((void *) addr, 0, len); ++ return(0); ++} ++ ++int __clear_user_skas(void *mem, int len) ++{ ++ return(buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL)); ++} ++ ++int clear_user_skas(void *mem, int len) ++{ ++ if(segment_eq(get_fs(), KERNEL_DS)){ ++ memset(mem, 0, len); ++ return(0); ++ } ++ ++ return(access_ok_skas(VERIFY_WRITE, mem, len) ? ++ buffer_op((unsigned long) mem, len, 1, clear_chunk, NULL) : len); ++} ++ ++static int strnlen_chunk(unsigned long str, int len, void *arg) ++{ ++ int *len_ptr = arg, n; ++ ++ n = strnlen((void *) str, len); ++ *len_ptr += n; ++ ++ if(n < len) ++ return(1); ++ return(0); ++} ++ ++int strnlen_user_skas(const void *str, int len) ++{ ++ int count = 0, n; ++ ++ if(segment_eq(get_fs(), KERNEL_DS)) ++ return(strnlen(str, len) + 1); ++ ++ n = buffer_op((unsigned long) str, len, 0, strnlen_chunk, &count); ++ if(n == 0) ++ return(count + 1); ++ return(-EFAULT); ++} ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/kernel/skas/util/Makefile b/arch/um/kernel/skas/util/Makefile +--- a/arch/um/kernel/skas/util/Makefile 2004-02-11 12:16:34.000000000 -0500 ++++ b/arch/um/kernel/skas/util/Makefile 2004-02-11 12:28:33.000000000 -0500 +@@ -1,10 +1,9 @@ + all: mk_ptregs + + mk_ptregs : mk_ptregs.o +- $(CC) -o mk_ptregs mk_ptregs.o ++ $(HOSTCC) -o mk_ptregs mk_ptregs.o + + mk_ptregs.o : mk_ptregs.c +- $(CC) -c $< ++ $(HOSTCC) -c $< + +-clean : +- $(RM) -f mk_ptregs *.o *~ ++clean-files := mk_ptregs *.o *~ +diff -Naur a/arch/um/kernel/skas/util/mk_ptregs.c b/arch/um/kernel/skas/util/mk_ptregs.c +--- a/arch/um/kernel/skas/util/mk_ptregs.c 2004-02-11 12:15:12.000000000 -0500 ++++ b/arch/um/kernel/skas/util/mk_ptregs.c 2004-02-11 12:27:00.000000000 -0500 +@@ -1,3 +1,4 @@ ++#include + #include + #include + +diff -Naur a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c +--- a/arch/um/kernel/smp.c 2004-02-11 12:14:32.000000000 -0500 ++++ b/arch/um/kernel/smp.c 2004-02-11 12:26:13.000000000 -0500 +@@ -1,9 +1,15 @@ + /* +- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + + #include "linux/config.h" ++#include "linux/percpu.h" ++#include "asm/pgalloc.h" ++#include "asm/tlb.h" ++ ++/* For some reason, mmu_gathers are referenced when CONFIG_SMP is off. */ ++DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); + + #ifdef CONFIG_SMP + +@@ -23,7 +29,7 @@ + #include "os.h" + + /* CPU online map, set by smp_boot_cpus */ +-unsigned long cpu_online_map = cpumask_of_cpu(0); ++unsigned long cpu_online_map = CPU_MASK_NONE; + + EXPORT_SYMBOL(cpu_online_map); + +@@ -55,7 +61,7 @@ + + void smp_send_reschedule(int cpu) + { +- write(cpu_data[cpu].ipi_pipe[1], "R", 1); ++ os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1); + num_reschedules_sent++; + } + +@@ -100,35 +106,34 @@ + + printk(KERN_INFO "Stopping all CPUs..."); + for(i = 0; i < num_online_cpus(); i++){ +- if(i == current->thread_info->cpu) ++ if(i == current_thread->cpu) + continue; +- write(cpu_data[i].ipi_pipe[1], "S", 1); ++ os_write_file(cpu_data[i].ipi_pipe[1], "S", 1); + } + printk("done\n"); + } + +-static cpumask_t smp_commenced_mask; +-static cpumask_t smp_callin_map = CPU_MASK_NONE; ++static cpumask_t smp_commenced_mask = CPU_MASK_NONE; ++static cpumask_t cpu_callin_map = CPU_MASK_NONE; + + static int idle_proc(void *cpup) + { + int cpu = (int) cpup, err; + + err = os_pipe(cpu_data[cpu].ipi_pipe, 1, 1); +- if(err) +- panic("CPU#%d failed to create IPI pipe, errno = %d", cpu, +- -err); ++ if(err < 0) ++ panic("CPU#%d failed to create IPI pipe, err = %d", cpu, -err); + + activate_ipi(cpu_data[cpu].ipi_pipe[0], + current->thread.mode.tt.extern_pid); + + wmb(); +- if (cpu_test_and_set(cpu, &smp_callin_map)) { ++ if (cpu_test_and_set(cpu, cpu_callin_map)) { + printk("huh, CPU#%d already present??\n", cpu); + BUG(); + } + +- while (!cpu_isset(cpu, &smp_commenced_mask)) ++ while (!cpu_isset(cpu, smp_commenced_mask)) + cpu_relax(); + + cpu_set(cpu, cpu_online_map); +@@ -143,16 +148,20 @@ + + current->thread.request.u.thread.proc = idle_proc; + current->thread.request.u.thread.arg = (void *) cpu; +- new_task = do_fork(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, NULL); +- if(IS_ERR(new_task)) panic("do_fork failed in idle_thread"); ++ new_task = copy_process(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, ++ NULL); ++ if(IS_ERR(new_task)) ++ panic("copy_process failed in idle_thread, error = %ld", ++ PTR_ERR(new_task)); + + cpu_tasks[cpu] = ((struct cpu_task) + { .pid = new_task->thread.mode.tt.extern_pid, + .task = new_task } ); + idle_threads[cpu] = new_task; +- CHOOSE_MODE(write(new_task->thread.mode.tt.switch_pipe[1], &c, ++ CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c, + sizeof(c)), + ({ panic("skas mode doesn't support SMP"); })); ++ wake_up_forked_process(new_task); + return(new_task); + } + +@@ -160,15 +169,17 @@ + { + struct task_struct *idle; + unsigned long waittime; +- int err, cpu; ++ int err, cpu, me = smp_processor_id(); + +- cpu_set(0, cpu_online_map); +- cpu_set(0, smp_callin_map); ++ cpu_clear(me, cpu_online_map); ++ cpu_set(me, cpu_online_map); ++ cpu_set(me, cpu_callin_map); + +- err = os_pipe(cpu_data[0].ipi_pipe, 1, 1); +- if(err) panic("CPU#0 failed to create IPI pipe, errno = %d", -err); ++ err = os_pipe(cpu_data[me].ipi_pipe, 1, 1); ++ if(err < 0) ++ panic("CPU#0 failed to create IPI pipe, errno = %d", -err); + +- activate_ipi(cpu_data[0].ipi_pipe[0], ++ activate_ipi(cpu_data[me].ipi_pipe[0], + current->thread.mode.tt.extern_pid); + + for(cpu = 1; cpu < ncpus; cpu++){ +@@ -180,10 +191,10 @@ + unhash_process(idle); + + waittime = 200000000; +- while (waittime-- && !cpu_isset(cpu, smp_callin_map)) ++ while (waittime-- && !cpu_isset(cpu, cpu_callin_map)) + cpu_relax(); + +- if (cpu_isset(cpu, smp_callin_map)) ++ if (cpu_isset(cpu, cpu_callin_map)) + printk("done\n"); + else printk("failed\n"); + } +@@ -216,7 +227,7 @@ + int fd; + + fd = cpu_data[cpu].ipi_pipe[0]; +- while (read(fd, &c, 1) == 1) { ++ while (os_read_file(fd, &c, 1) == 1) { + switch (c) { + case 'C': + smp_call_function_slave(cpu); +@@ -273,9 +284,9 @@ + info = _info; + + for (i=0;ithread_info->cpu) && ++ if((i != current_thread->cpu) && + cpu_isset(i, cpu_online_map)) +- write(cpu_data[i].ipi_pipe[1], "C", 1); ++ os_write_file(cpu_data[i].ipi_pipe[1], "C", 1); + + while (atomic_read(&scf_started) != cpus) + barrier(); +diff -Naur a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c +--- a/arch/um/kernel/syscall_kern.c 2004-02-11 12:16:08.000000000 -0500 ++++ b/arch/um/kernel/syscall_kern.c 2004-02-11 12:28:04.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -35,39 +35,40 @@ + + long sys_fork(void) + { +- struct task_struct *p; ++ long ret; + + current->thread.forking = 1; +- p = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); ++ ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL); + current->thread.forking = 0; +- return(IS_ERR(p) ? PTR_ERR(p) : p->pid); ++ return(ret); + } + +-long sys_clone(unsigned long clone_flags, unsigned long newsp) ++long sys_clone(unsigned long clone_flags, unsigned long newsp, ++ int *parent_tid, int *child_tid) + { +- struct task_struct *p; ++ long ret; + + current->thread.forking = 1; +- p = do_fork(clone_flags, newsp, NULL, 0, NULL, NULL); ++ ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid); + current->thread.forking = 0; +- return(IS_ERR(p) ? PTR_ERR(p) : p->pid); ++ return(ret); + } + + long sys_vfork(void) + { +- struct task_struct *p; ++ long ret; + + current->thread.forking = 1; +- p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, NULL); ++ ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, ++ NULL); + current->thread.forking = 0; +- return(IS_ERR(p) ? PTR_ERR(p) : p->pid); ++ return(ret); + } + + /* common code for old and new mmaps */ +-static inline long do_mmap2( +- unsigned long addr, unsigned long len, +- unsigned long prot, unsigned long flags, +- unsigned long fd, unsigned long pgoff) ++long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len, ++ unsigned long prot, unsigned long flags, unsigned long fd, ++ unsigned long pgoff) + { + int error = -EBADF; + struct file * file = NULL; +@@ -79,9 +80,9 @@ + goto out; + } + +- down_write(¤t->mm->mmap_sem); +- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff); +- up_write(¤t->mm->mmap_sem); ++ down_write(&mm->mmap_sem); ++ error = do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff); ++ up_write(&mm->mmap_sem); + + if (file) + fput(file); +@@ -93,7 +94,7 @@ + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) + { +- return do_mmap2(addr, len, prot, flags, fd, pgoff); ++ return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff); + } + + /* +@@ -120,7 +121,8 @@ + if (offset & ~PAGE_MASK) + goto out; + +- err = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT); ++ err = do_mmap2(current->mm, addr, len, prot, flags, fd, ++ offset >> PAGE_SHIFT); + out: + return err; + } +@@ -135,43 +137,12 @@ + + error = do_pipe(fd); + if (!error) { +- if (copy_to_user(fildes, fd, 2*sizeof(int))) ++ if (copy_to_user(fildes, fd, sizeof(fd))) + error = -EFAULT; + } + return error; + } + +-int sys_sigaction(int sig, const struct old_sigaction *act, +- struct old_sigaction *oact) +-{ +- struct k_sigaction new_ka, old_ka; +- int ret; +- +- if (act) { +- old_sigset_t mask; +- if (verify_area(VERIFY_READ, act, sizeof(*act)) || +- __get_user(new_ka.sa.sa_handler, &act->sa_handler) || +- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer)) +- return -EFAULT; +- __get_user(new_ka.sa.sa_flags, &act->sa_flags); +- __get_user(mask, &act->sa_mask); +- siginitset(&new_ka.sa.sa_mask, mask); +- } +- +- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); +- +- if (!ret && oact) { +- if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) || +- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) || +- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer)) +- return -EFAULT; +- __put_user(old_ka.sa.sa_flags, &oact->sa_flags); +- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); +- } +- +- return ret; +-} +- + /* + * sys_ipc() is the de-multiplexer for the SysV IPC calls.. + * +@@ -253,7 +224,7 @@ + return sys_shmctl (first, second, + (struct shmid_ds *) ptr); + default: +- return -EINVAL; ++ return -ENOSYS; + } + } + +@@ -302,11 +273,6 @@ + return error; + } + +-int sys_sigaltstack(const stack_t *uss, stack_t *uoss) +-{ +- return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs))); +-} +- + long execute_syscall(void *r) + { + return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r)); +diff -Naur a/arch/um/kernel/sys_call_table.c b/arch/um/kernel/sys_call_table.c +--- a/arch/um/kernel/sys_call_table.c 2004-02-11 12:16:34.000000000 -0500 ++++ b/arch/um/kernel/sys_call_table.c 2004-02-11 12:28:33.000000000 -0500 +@@ -5,7 +5,6 @@ + + #include "linux/config.h" + #include "linux/unistd.h" +-#include "linux/version.h" + #include "linux/sys.h" + #include "linux/swap.h" + #include "linux/sysctl.h" +@@ -219,15 +218,30 @@ + extern syscall_handler_t sys_gettid; + extern syscall_handler_t sys_readahead; + extern syscall_handler_t sys_tkill; ++extern syscall_handler_t sys_setxattr; ++extern syscall_handler_t sys_lsetxattr; ++extern syscall_handler_t sys_fsetxattr; ++extern syscall_handler_t sys_getxattr; ++extern syscall_handler_t sys_lgetxattr; ++extern syscall_handler_t sys_fgetxattr; ++extern syscall_handler_t sys_listxattr; ++extern syscall_handler_t sys_llistxattr; ++extern syscall_handler_t sys_flistxattr; ++extern syscall_handler_t sys_removexattr; ++extern syscall_handler_t sys_lremovexattr; ++extern syscall_handler_t sys_fremovexattr; + extern syscall_handler_t sys_sendfile64; + extern syscall_handler_t sys_futex; + extern syscall_handler_t sys_sched_setaffinity; + extern syscall_handler_t sys_sched_getaffinity; ++extern syscall_handler_t sys_set_thread_area; ++extern syscall_handler_t sys_get_thread_area; + extern syscall_handler_t sys_io_setup; + extern syscall_handler_t sys_io_destroy; + extern syscall_handler_t sys_io_getevents; + extern syscall_handler_t sys_io_submit; + extern syscall_handler_t sys_io_cancel; ++extern syscall_handler_t sys_fadvise64; + extern syscall_handler_t sys_exit_group; + extern syscall_handler_t sys_lookup_dcookie; + extern syscall_handler_t sys_epoll_create; +@@ -235,6 +249,20 @@ + extern syscall_handler_t sys_epoll_wait; + extern syscall_handler_t sys_remap_file_pages; + extern syscall_handler_t sys_set_tid_address; ++extern syscall_handler_t sys_timer_create; ++extern syscall_handler_t sys_timer_settime; ++extern syscall_handler_t sys_timer_gettime; ++extern syscall_handler_t sys_timer_getoverrun; ++extern syscall_handler_t sys_timer_delete; ++extern syscall_handler_t sys_clock_settime; ++extern syscall_handler_t sys_clock_gettime; ++extern syscall_handler_t sys_clock_getres; ++extern syscall_handler_t sys_clock_nanosleep; ++extern syscall_handler_t sys_statfs64; ++extern syscall_handler_t sys_fstatfs64; ++extern syscall_handler_t sys_tgkill; ++extern syscall_handler_t sys_utimes; ++extern syscall_handler_t sys_fadvise64_64; + + #ifdef CONFIG_NFSD + #define NFSSERVCTL sys_nfsservctl +@@ -246,7 +274,7 @@ + extern syscall_handler_t um_time; + extern syscall_handler_t um_stime; + +-#define LAST_GENERIC_SYSCALL __NR_set_tid_address ++#define LAST_GENERIC_SYSCALL __NR_vserver + + #if LAST_GENERIC_SYSCALL > LAST_ARCH_SYSCALL + #define LAST_SYSCALL LAST_GENERIC_SYSCALL +@@ -455,32 +483,37 @@ + [ __NR_stat64 ] = sys_stat64, + [ __NR_lstat64 ] = sys_lstat64, + [ __NR_fstat64 ] = sys_fstat64, +- [ __NR_fcntl64 ] = sys_fcntl64, + [ __NR_getdents64 ] = sys_getdents64, ++ [ __NR_fcntl64 ] = sys_fcntl64, ++ [ 223 ] = sys_ni_syscall, + [ __NR_gettid ] = sys_gettid, + [ __NR_readahead ] = sys_readahead, +- [ __NR_setxattr ] = sys_ni_syscall, +- [ __NR_lsetxattr ] = sys_ni_syscall, +- [ __NR_fsetxattr ] = sys_ni_syscall, +- [ __NR_getxattr ] = sys_ni_syscall, +- [ __NR_lgetxattr ] = sys_ni_syscall, +- [ __NR_fgetxattr ] = sys_ni_syscall, +- [ __NR_listxattr ] = sys_ni_syscall, +- [ __NR_llistxattr ] = sys_ni_syscall, +- [ __NR_flistxattr ] = sys_ni_syscall, +- [ __NR_removexattr ] = sys_ni_syscall, +- [ __NR_lremovexattr ] = sys_ni_syscall, +- [ __NR_fremovexattr ] = sys_ni_syscall, ++ [ __NR_setxattr ] = sys_setxattr, ++ [ __NR_lsetxattr ] = sys_lsetxattr, ++ [ __NR_fsetxattr ] = sys_fsetxattr, ++ [ __NR_getxattr ] = sys_getxattr, ++ [ __NR_lgetxattr ] = sys_lgetxattr, ++ [ __NR_fgetxattr ] = sys_fgetxattr, ++ [ __NR_listxattr ] = sys_listxattr, ++ [ __NR_llistxattr ] = sys_llistxattr, ++ [ __NR_flistxattr ] = sys_flistxattr, ++ [ __NR_removexattr ] = sys_removexattr, ++ [ __NR_lremovexattr ] = sys_lremovexattr, ++ [ __NR_fremovexattr ] = sys_fremovexattr, + [ __NR_tkill ] = sys_tkill, + [ __NR_sendfile64 ] = sys_sendfile64, + [ __NR_futex ] = sys_futex, + [ __NR_sched_setaffinity ] = sys_sched_setaffinity, + [ __NR_sched_getaffinity ] = sys_sched_getaffinity, ++ [ __NR_set_thread_area ] = sys_ni_syscall, ++ [ __NR_get_thread_area ] = sys_ni_syscall, + [ __NR_io_setup ] = sys_io_setup, + [ __NR_io_destroy ] = sys_io_destroy, + [ __NR_io_getevents ] = sys_io_getevents, + [ __NR_io_submit ] = sys_io_submit, + [ __NR_io_cancel ] = sys_io_cancel, ++ [ __NR_fadvise64 ] = sys_fadvise64, ++ [ 251 ] = sys_ni_syscall, + [ __NR_exit_group ] = sys_exit_group, + [ __NR_lookup_dcookie ] = sys_lookup_dcookie, + [ __NR_epoll_create ] = sys_epoll_create, +@@ -488,6 +521,21 @@ + [ __NR_epoll_wait ] = sys_epoll_wait, + [ __NR_remap_file_pages ] = sys_remap_file_pages, + [ __NR_set_tid_address ] = sys_set_tid_address, ++ [ __NR_timer_create ] = sys_timer_create, ++ [ __NR_timer_settime ] = sys_timer_settime, ++ [ __NR_timer_gettime ] = sys_timer_gettime, ++ [ __NR_timer_getoverrun ] = sys_timer_getoverrun, ++ [ __NR_timer_delete ] = sys_timer_delete, ++ [ __NR_clock_settime ] = sys_clock_settime, ++ [ __NR_clock_gettime ] = sys_clock_gettime, ++ [ __NR_clock_getres ] = sys_clock_getres, ++ [ __NR_clock_nanosleep ] = sys_clock_nanosleep, ++ [ __NR_statfs64 ] = sys_statfs64, ++ [ __NR_fstatfs64 ] = sys_fstatfs64, ++ [ __NR_tgkill ] = sys_tgkill, ++ [ __NR_utimes ] = sys_utimes, ++ [ __NR_fadvise64_64 ] = sys_fadvise64_64, ++ [ __NR_vserver ] = sys_ni_syscall, + + ARCH_SYSCALLS + [ LAST_SYSCALL + 1 ... NR_syscalls ] = +diff -Naur a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c +--- a/arch/um/kernel/sysrq.c 2004-02-11 12:14:56.000000000 -0500 ++++ b/arch/um/kernel/sysrq.c 2004-02-11 12:26:47.000000000 -0500 +@@ -55,6 +55,14 @@ + show_trace((unsigned long *)esp); + } + ++void show_stack(struct task_struct *task, unsigned long *sp) ++{ ++ if(task) ++ show_trace_task(task); ++ else ++ show_trace(sp); ++} ++ + /* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically +diff -Naur a/arch/um/kernel/tempfile.c b/arch/um/kernel/tempfile.c +--- a/arch/um/kernel/tempfile.c 2004-02-11 12:15:48.000000000 -0500 ++++ b/arch/um/kernel/tempfile.c 2004-02-11 12:27:41.000000000 -0500 +@@ -28,6 +28,7 @@ + } + if((dir == NULL) || (*dir == '\0')) + dir = "/tmp"; ++ + tempdir = malloc(strlen(dir) + 2); + if(tempdir == NULL){ + fprintf(stderr, "Failed to malloc tempdir, " +@@ -49,7 +50,8 @@ + else + *tempname = 0; + strcat(tempname, template); +- if((fd = mkstemp(tempname)) < 0){ ++ fd = mkstemp(tempname); ++ if(fd < 0){ + fprintf(stderr, "open - cannot create %s: %s\n", tempname, + strerror(errno)); + return -1; +@@ -59,7 +61,8 @@ + return -1; + } + if(out_tempname){ +- if((*out_tempname = strdup(tempname)) == NULL){ ++ *out_tempname = strdup(tempname); ++ if(*out_tempname == NULL){ + perror("strdup"); + return -1; + } +diff -Naur a/arch/um/kernel/time.c b/arch/um/kernel/time.c +--- a/arch/um/kernel/time.c 2004-02-11 12:14:28.000000000 -0500 ++++ b/arch/um/kernel/time.c 2004-02-11 12:26:11.000000000 -0500 +@@ -4,24 +4,33 @@ + */ + + #include ++#include + #include + #include + #include + #include + #include +-#include "linux/module.h" + #include "user_util.h" + #include "kern_util.h" + #include "user.h" + #include "process.h" + #include "signal_user.h" + #include "time_user.h" ++#include "kern_constants.h" ++ ++/* XXX This really needs to be declared and initialized in a kernel file since ++ * it's in ++ */ ++extern struct timespec wall_to_monotonic; + + extern struct timeval xtime; + ++struct timeval local_offset = { 0, 0 }; ++ + void timer(void) + { + gettimeofday(&xtime, NULL); ++ timeradd(&xtime, &local_offset, &xtime); + } + + void set_interval(int timer_type) +@@ -66,7 +75,7 @@ + errno); + } + +-void idle_timer(void) ++void uml_idle_timer(void) + { + if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR) + panic("Couldn't unset SIGVTALRM handler"); +@@ -76,14 +85,56 @@ + set_interval(ITIMER_REAL); + } + ++static unsigned long long get_host_hz(void) ++{ ++ char mhzline[16], *end; ++ int ret, mult, mhz, rest, len; ++ ++ ret = cpu_feature("cpu MHz", mhzline, ++ sizeof(mhzline) / sizeof(mhzline[0])); ++ if(!ret) ++ panic ("Could not get host MHZ"); ++ ++ mhz = strtoul(mhzline, &end, 10); ++ ++ /* This business is to parse a floating point number without using ++ * floating types. ++ */ ++ ++ rest = 0; ++ mult = 0; ++ if(*end == '.'){ ++ end++; ++ len = strlen(end); ++ if(len < 6) ++ mult = 6 - len; ++ else if(len > 6) ++ end[6] = '\0'; ++ rest = strtoul(end, NULL, 10); ++ while(mult-- > 0) ++ rest *= 10; ++ } ++ ++ return(1000000 * mhz + rest); ++} ++ ++unsigned long long host_hz = 0; ++ ++extern int do_posix_clock_monotonic_gettime(struct timespec *tp); ++ + void time_init(void) + { ++ struct timespec now; ++ ++ host_hz = get_host_hz(); + if(signal(SIGVTALRM, boot_timer_handler) == SIG_ERR) + panic("Couldn't set SIGVTALRM handler"); + set_interval(ITIMER_VIRTUAL); +-} + +-struct timeval local_offset = { 0, 0 }; ++ do_posix_clock_monotonic_gettime(&now); ++ wall_to_monotonic.tv_sec = -now.tv_sec; ++ wall_to_monotonic.tv_nsec = -now.tv_nsec; ++} + + void do_gettimeofday(struct timeval *tv) + { +@@ -95,15 +146,13 @@ + time_unlock(flags); + } + +-EXPORT_SYMBOL(do_gettimeofday); +- + int do_settimeofday(struct timespec *tv) + { + struct timeval now; + unsigned long flags; + struct timeval tv_in; + +- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC) ++ if ((unsigned long) tv->tv_nsec >= UM_NSEC_PER_SEC) + return -EINVAL; + + tv_in.tv_sec = tv->tv_sec; +@@ -113,9 +162,9 @@ + gettimeofday(&now, NULL); + timersub(&tv_in, &now, &local_offset); + time_unlock(flags); +-} + +-EXPORT_SYMBOL(do_settimeofday); ++ return(0); ++} + + void idle_sleep(int secs) + { +diff -Naur a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c +--- a/arch/um/kernel/time_kern.c 2004-02-11 12:15:59.000000000 -0500 ++++ b/arch/um/kernel/time_kern.c 2004-02-11 12:27:52.000000000 -0500 +@@ -30,6 +30,14 @@ + return(HZ); + } + ++/* ++ * Scheduler clock - returns current time in nanosec units. ++ */ ++unsigned long long sched_clock(void) ++{ ++ return (unsigned long long)jiffies_64 * (1000000000 / HZ); ++} ++ + /* Changed at early boot */ + int timer_irq_inited = 0; + +@@ -39,13 +47,47 @@ + */ + int __attribute__ ((__section__ (".unprotected"))) missed_ticks[NR_CPUS]; + ++static int first_tick; ++static unsigned long long prev_tsc; ++static long long delta; /* Deviation per interval */ ++ ++extern unsigned long long host_hz; ++ + void timer_irq(union uml_pt_regs *regs) + { +- int cpu = current->thread_info->cpu, ticks = missed_ticks[cpu]; ++ unsigned long long ticks = 0; ++ ++ if(!timer_irq_inited){ ++ /* This is to ensure that ticks don't pile up when ++ * the timer handler is suspended */ ++ first_tick = 0; ++ return; ++ } ++ ++ if(first_tick){ ++#if defined(CONFIG_UML_REAL_TIME_CLOCK) ++ unsigned long long tsc; ++ /* We've had 1 tick */ ++ tsc = time_stamp(); ++ ++ delta += tsc - prev_tsc; ++ prev_tsc = tsc; ++ ++ ticks += (delta * HZ) / host_hz; ++ delta -= (ticks * host_hz) / HZ; ++#else ++ ticks = 1; ++#endif ++ } ++ else { ++ prev_tsc = time_stamp(); ++ first_tick = 1; ++ } + +- if(!timer_irq_inited) return; +- missed_ticks[cpu] = 0; +- while(ticks--) do_IRQ(TIMER_IRQ, regs); ++ while(ticks > 0){ ++ do_IRQ(TIMER_IRQ, regs); ++ ticks--; ++ } + } + + void boot_timer_handler(int sig) +@@ -58,12 +100,13 @@ + do_timer(®s); + } + +-void um_timer(int irq, void *dev, struct pt_regs *regs) ++irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs) + { + do_timer(regs); +- write_seqlock(&xtime_lock); ++ write_seqlock_irq(&xtime_lock); + timer(); +- write_sequnlock(&xtime_lock); ++ write_sequnlock_irq(&xtime_lock); ++ return(IRQ_HANDLED); + } + + long um_time(int * tloc) +@@ -81,12 +124,12 @@ + long um_stime(int * tptr) + { + int value; +- struct timeval new; ++ struct timespec new; + + if (get_user(value, tptr)) + return -EFAULT; + new.tv_sec = value; +- new.tv_usec = 0; ++ new.tv_nsec = 0; + do_settimeofday(&new); + return 0; + } +@@ -125,9 +168,11 @@ + void timer_handler(int sig, union uml_pt_regs *regs) + { + #ifdef CONFIG_SMP ++ local_irq_disable(); + update_process_times(user_context(UPT_SP(regs))); ++ local_irq_enable(); + #endif +- if(current->thread_info->cpu == 0) ++ if(current_thread->cpu == 0) + timer_irq(regs); + } + +@@ -136,6 +181,7 @@ + unsigned long time_lock(void) + { + unsigned long flags; ++ + spin_lock_irqsave(&timer_spinlock, flags); + return(flags); + } +@@ -150,8 +196,8 @@ + int err; + + CHOOSE_MODE(user_time_init_tt(), user_time_init_skas()); +- if((err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", +- NULL)) != 0) ++ err = request_irq(TIMER_IRQ, um_timer, SA_INTERRUPT, "timer", NULL); ++ if(err != 0) + printk(KERN_ERR "timer_init : request_irq failed - " + "errno = %d\n", -err); + timer_irq_inited = 1; +@@ -160,7 +206,6 @@ + + __initcall(timer_init); + +- + /* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically +diff -Naur a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c +--- a/arch/um/kernel/trap_kern.c 2004-02-11 12:14:18.000000000 -0500 ++++ b/arch/um/kernel/trap_kern.c 2004-02-11 12:26:00.000000000 -0500 +@@ -16,12 +16,15 @@ + #include "asm/tlbflush.h" + #include "asm/a.out.h" + #include "asm/current.h" ++#include "asm/irq.h" + #include "user_util.h" + #include "kern_util.h" + #include "kern.h" + #include "chan_kern.h" + #include "mconsole_kern.h" + #include "2_5compat.h" ++#include "mem.h" ++#include "mem_kern.h" + + int handle_page_fault(unsigned long address, unsigned long ip, + int is_write, int is_user, int *code_out) +@@ -51,12 +54,12 @@ + if(is_write && !(vma->vm_flags & VM_WRITE)) + goto out; + page = address & PAGE_MASK; +- if(page == (unsigned long) current->thread_info + PAGE_SIZE) ++ if(page == (unsigned long) current_thread + PAGE_SIZE) + panic("Kernel stack overflow"); + pgd = pgd_offset(mm, page); + pmd = pmd_offset(pgd, page); +- survive: + do { ++ survive: + switch (handle_mm_fault(mm, vma, address, is_write)){ + case VM_FAULT_MINOR: + current->min_flt++; +@@ -71,14 +74,20 @@ + err = -ENOMEM; + goto out_of_memory; + default: +- BUG(); ++ if (current->pid == 1) { ++ up_read(&mm->mmap_sem); ++ yield(); ++ down_read(&mm->mmap_sem); ++ goto survive; ++ } ++ goto out; + } + pte = pte_offset_kernel(pmd, page); + } while(!pte_present(*pte)); ++ err = 0; + *pte = pte_mkyoung(*pte); + if(pte_write(*pte)) *pte = pte_mkdirty(*pte); + flush_tlb_page(vma, page); +- err = 0; + out: + up_read(&mm->mmap_sem); + return(err); +@@ -98,6 +107,33 @@ + goto out; + } + ++LIST_HEAD(physmem_remappers); ++ ++void register_remapper(struct remapper *info) ++{ ++ list_add(&info->list, &physmem_remappers); ++} ++ ++static int check_remapped_addr(unsigned long address, int is_write) ++{ ++ struct remapper *remapper; ++ struct list_head *ele; ++ __u64 offset; ++ int fd; ++ ++ fd = phys_mapping(__pa(address), &offset); ++ if(fd == -1) ++ return(0); ++ ++ list_for_each(ele, &physmem_remappers){ ++ remapper = list_entry(ele, struct remapper, list); ++ if((*remapper->proc)(fd, address, is_write, offset)) ++ return(1); ++ } ++ ++ return(0); ++} ++ + unsigned long segv(unsigned long address, unsigned long ip, int is_write, + int is_user, void *sc) + { +@@ -109,7 +145,9 @@ + flush_tlb_kernel_vm(); + return(0); + } +- if(current->mm == NULL) ++ else if(check_remapped_addr(address & PAGE_MASK, is_write)) ++ return(0); ++ else if(current->mm == NULL) + panic("Segfault with no mm"); + err = handle_page_fault(address, ip, is_write, is_user, &si.si_code); + +@@ -120,9 +158,8 @@ + current->thread.fault_addr = (void *) address; + do_longjmp(catcher, 1); + } +- else if(current->thread.fault_addr != NULL){ ++ else if(current->thread.fault_addr != NULL) + panic("fault_addr set but no fault catcher"); +- } + else if(arch_fixup(ip, sc)) + return(0); + +@@ -155,8 +192,6 @@ + { + struct siginfo si; + +- printk(KERN_ERR "Unfixable SEGV in '%s' (pid %d) at 0x%lx " +- "(ip 0x%lx)\n", current->comm, current->pid, address, ip); + si.si_signo = SIGSEGV; + si.si_code = SEGV_ACCERR; + si.si_addr = (void *) address; +@@ -180,6 +215,11 @@ + else relay_signal(sig, regs); + } + ++void winch(int sig, union uml_pt_regs *regs) ++{ ++ do_IRQ(WINCH_IRQ, regs); ++} ++ + void trap_init(void) + { + } +diff -Naur a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c +--- a/arch/um/kernel/trap_user.c 2004-02-11 12:15:23.000000000 -0500 ++++ b/arch/um/kernel/trap_user.c 2004-02-11 12:27:10.000000000 -0500 +@@ -5,11 +5,9 @@ + + #include + #include +-#include + #include + #include + #include +-#include + #include + #include + #include +@@ -82,6 +80,8 @@ + .is_irq = 0 }, + [ SIGILL ] { .handler = relay_signal, + .is_irq = 0 }, ++ [ SIGWINCH ] { .handler = winch, ++ .is_irq = 1 }, + [ SIGBUS ] { .handler = bus_handler, + .is_irq = 0 }, + [ SIGSEGV] { .handler = segv_handler, +@@ -123,7 +123,7 @@ + { + jmp_buf *buf = b; + +- longjmp(*buf, val); ++ siglongjmp(*buf, val); + } + + /* +diff -Naur a/arch/um/kernel/tt/exec_kern.c b/arch/um/kernel/tt/exec_kern.c +--- a/arch/um/kernel/tt/exec_kern.c 2004-02-11 12:14:28.000000000 -0500 ++++ b/arch/um/kernel/tt/exec_kern.c 2004-02-11 12:26:11.000000000 -0500 +@@ -17,6 +17,7 @@ + #include "mem_user.h" + #include "os.h" + #include "tlb.h" ++#include "mode.h" + + static int exec_tramp(void *sig_stack) + { +@@ -47,17 +48,17 @@ + do_exit(SIGKILL); + } + +- if(current->thread_info->cpu == 0) ++ if(current_thread->cpu == 0) + forward_interrupts(new_pid); + current->thread.request.op = OP_EXEC; + current->thread.request.u.exec.pid = new_pid; +- unprotect_stack((unsigned long) current->thread_info); ++ unprotect_stack((unsigned long) current_thread); + os_usr1_process(os_getpid()); + + enable_timer(); + free_page(stack); + protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 1, 0, 1); +- task_protections((unsigned long) current->thread_info); ++ task_protections((unsigned long) current_thread); + force_flush_all(); + unblock_signals(); + } +diff -Naur a/arch/um/kernel/tt/include/mode.h b/arch/um/kernel/tt/include/mode.h +--- a/arch/um/kernel/tt/include/mode.h 2004-02-11 12:16:01.000000000 -0500 ++++ b/arch/um/kernel/tt/include/mode.h 2004-02-11 12:27:56.000000000 -0500 +@@ -8,6 +8,8 @@ + + #include "sysdep/ptrace.h" + ++enum { OP_NONE, OP_EXEC, OP_FORK, OP_TRACE_ON, OP_REBOOT, OP_HALT, OP_CB }; ++ + extern int tracing_pid; + + extern int tracer(int (*init_proc)(void *), void *sp); +diff -Naur a/arch/um/kernel/tt/include/uaccess.h b/arch/um/kernel/tt/include/uaccess.h +--- a/arch/um/kernel/tt/include/uaccess.h 2004-02-11 12:16:02.000000000 -0500 ++++ b/arch/um/kernel/tt/include/uaccess.h 2004-02-11 12:27:56.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -43,65 +43,19 @@ + + extern int __do_copy_from_user(void *to, const void *from, int n, + void **fault_addr, void **fault_catcher); +- +-static inline int copy_from_user_tt(void *to, const void *from, int n) +-{ +- return(access_ok_tt(VERIFY_READ, from, n) ? +- __do_copy_from_user(to, from, n, +- ¤t->thread.fault_addr, +- ¤t->thread.fault_catcher) : n); +-} +- +-static inline int copy_to_user_tt(void *to, const void *from, int n) +-{ +- return(access_ok_tt(VERIFY_WRITE, to, n) ? +- __do_copy_to_user(to, from, n, +- ¤t->thread.fault_addr, +- ¤t->thread.fault_catcher) : n); +-} +- + extern int __do_strncpy_from_user(char *dst, const char *src, size_t n, + void **fault_addr, void **fault_catcher); +- +-static inline int strncpy_from_user_tt(char *dst, const char *src, int count) +-{ +- int n; +- +- if(!access_ok_tt(VERIFY_READ, src, 1)) return(-EFAULT); +- n = __do_strncpy_from_user(dst, src, count, +- ¤t->thread.fault_addr, +- ¤t->thread.fault_catcher); +- if(n < 0) return(-EFAULT); +- return(n); +-} +- + extern int __do_clear_user(void *mem, size_t len, void **fault_addr, + void **fault_catcher); +- +-static inline int __clear_user_tt(void *mem, int len) +-{ +- return(__do_clear_user(mem, len, +- ¤t->thread.fault_addr, +- ¤t->thread.fault_catcher)); +-} +- +-static inline int clear_user_tt(void *mem, int len) +-{ +- return(access_ok_tt(VERIFY_WRITE, mem, len) ? +- __do_clear_user(mem, len, +- ¤t->thread.fault_addr, +- ¤t->thread.fault_catcher) : len); +-} +- + extern int __do_strnlen_user(const char *str, unsigned long n, + void **fault_addr, void **fault_catcher); + +-static inline int strnlen_user_tt(const void *str, int len) +-{ +- return(__do_strnlen_user(str, len, +- ¤t->thread.fault_addr, +- ¤t->thread.fault_catcher)); +-} ++extern int copy_from_user_tt(void *to, const void *from, int n); ++extern int copy_to_user_tt(void *to, const void *from, int n); ++extern int strncpy_from_user_tt(char *dst, const char *src, int count); ++extern int __clear_user_tt(void *mem, int len); ++extern int clear_user_tt(void *mem, int len); ++extern int strnlen_user_tt(const void *str, int len); + + #endif + +diff -Naur a/arch/um/kernel/tt/Makefile b/arch/um/kernel/tt/Makefile +--- a/arch/um/kernel/tt/Makefile 2004-02-11 12:15:45.000000000 -0500 ++++ b/arch/um/kernel/tt/Makefile 2004-02-11 12:27:36.000000000 -0500 +@@ -1,5 +1,5 @@ + # +-# Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++# Copyright (C) 2002 - 2003 Jeff Dike (jdike@addtoit.com) + # Licensed under the GPL + # + +@@ -7,7 +7,7 @@ + + obj-y = exec_kern.o exec_user.o gdb.o ksyms.o mem.o mem_user.o process_kern.o \ + syscall_kern.o syscall_user.o time.o tlb.o tracer.o trap_user.o \ +- uaccess_user.o sys-$(SUBARCH)/ ++ uaccess.o uaccess_user.o sys-$(SUBARCH)/ + + obj-$(CONFIG_PT_PROXY) += gdb_kern.o ptproxy/ + +@@ -27,5 +27,3 @@ + + $(obj)/unmap_fin.o : $(src)/unmap.o + ld -r -o $@ $< -lc -L/usr/lib +- +-clean : +diff -Naur a/arch/um/kernel/tt/mem_user.c b/arch/um/kernel/tt/mem_user.c +--- a/arch/um/kernel/tt/mem_user.c 2004-02-11 12:14:34.000000000 -0500 ++++ b/arch/um/kernel/tt/mem_user.c 2004-02-11 12:26:17.000000000 -0500 +@@ -25,14 +25,13 @@ + size = (unsigned long) segment_end - + (unsigned long) segment_start; + data = create_mem_file(size); +- if((addr = mmap(NULL, size, PROT_WRITE | PROT_READ, +- MAP_SHARED, data, 0)) == MAP_FAILED){ ++ addr = mmap(NULL, size, PROT_WRITE | PROT_READ, MAP_SHARED, data, 0); ++ if(addr == MAP_FAILED){ + perror("mapping new data segment"); + exit(1); + } + memcpy(addr, segment_start, size); +- if(switcheroo(data, prot, addr, segment_start, +- size) < 0){ ++ if(switcheroo(data, prot, addr, segment_start, size) < 0){ + printf("switcheroo failed\n"); + exit(1); + } +diff -Naur a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c +--- a/arch/um/kernel/tt/process_kern.c 2004-02-11 12:16:33.000000000 -0500 ++++ b/arch/um/kernel/tt/process_kern.c 2004-02-11 12:28:32.000000000 -0500 +@@ -62,7 +62,7 @@ + reading = 0; + err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c)); + if(err != sizeof(c)) +- panic("write of switch_pipe failed, errno = %d", -err); ++ panic("write of switch_pipe failed, err = %d", -err); + + reading = 1; + if((from->state == TASK_ZOMBIE) || (from->state == TASK_DEAD)) +@@ -104,48 +104,72 @@ + + void release_thread_tt(struct task_struct *task) + { +- os_kill_process(task->thread.mode.tt.extern_pid, 0); ++ int pid = task->thread.mode.tt.extern_pid; ++ ++ if(os_getpid() != pid) ++ os_kill_process(pid, 0); + } + + void exit_thread_tt(void) + { +- close(current->thread.mode.tt.switch_pipe[0]); +- close(current->thread.mode.tt.switch_pipe[1]); ++ os_close_file(current->thread.mode.tt.switch_pipe[0]); ++ os_close_file(current->thread.mode.tt.switch_pipe[1]); + } + + void schedule_tail(task_t *prev); + + static void new_thread_handler(int sig) + { ++ unsigned long disable; + int (*fn)(void *); + void *arg; + + fn = current->thread.request.u.thread.proc; + arg = current->thread.request.u.thread.arg; ++ + UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); ++ disable = (1 << (SIGVTALRM - 1)) | (1 << (SIGALRM - 1)) | ++ (1 << (SIGIO - 1)) | (1 << (SIGPROF - 1)); ++ SC_SIGMASK(UPT_SC(¤t->thread.regs.regs)) &= ~disable; ++ + suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); + +- block_signals(); ++ force_flush_all(); ++ if(current->thread.prev_sched != NULL) ++ schedule_tail(current->thread.prev_sched); ++ current->thread.prev_sched = NULL; ++ + init_new_thread_signals(1); +-#ifdef CONFIG_SMP +- schedule_tail(current->thread.prev_sched); +-#endif + enable_timer(); + free_page(current->thread.temp_stack); + set_cmdline("(kernel thread)"); +- force_flush_all(); + +- current->thread.prev_sched = NULL; + change_sig(SIGUSR1, 1); + change_sig(SIGVTALRM, 1); + change_sig(SIGPROF, 1); +- unblock_signals(); ++ local_irq_enable(); + if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf)) + do_exit(0); + } + + static int new_thread_proc(void *stack) + { ++ /* local_irq_disable is needed to block out signals until this thread is ++ * properly scheduled. Otherwise, the tracing thread will get mighty ++ * upset about any signals that arrive before that. ++ * This has the complication that it sets the saved signal mask in ++ * the sigcontext to block signals. This gets restored when this ++ * thread (or a descendant, since they get a copy of this sigcontext) ++ * returns to userspace. ++ * So, this is compensated for elsewhere. ++ * XXX There is still a small window until local_irq_disable() actually ++ * finishes where signals are possible - shouldn't be a problem in ++ * practice since SIGIO hasn't been forwarded here yet, and the ++ * local_irq_disable should finish before a SIGVTALRM has time to be ++ * delivered. ++ */ ++ ++ local_irq_disable(); + init_new_thread_stack(stack, new_thread_handler); + os_usr1_process(os_getpid()); + return(0); +@@ -156,7 +180,7 @@ + * itself with a SIGUSR1. set_user_mode has to be run with SIGUSR1 off, + * so it is blocked before it's called. They are re-enabled on sigreturn + * despite the fact that they were blocked when the SIGUSR1 was issued because +- * copy_thread copies the parent's signcontext, including the signal mask ++ * copy_thread copies the parent's sigcontext, including the signal mask + * onto the signal frame. + */ + +@@ -165,35 +189,32 @@ + UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1); + suspend_new_thread(current->thread.mode.tt.switch_pipe[0]); + +-#ifdef CONFIG_SMP +- schedule_tail(NULL); +-#endif ++ force_flush_all(); ++ if(current->thread.prev_sched != NULL) ++ schedule_tail(current->thread.prev_sched); ++ current->thread.prev_sched = NULL; ++ + enable_timer(); + change_sig(SIGVTALRM, 1); + local_irq_enable(); +- force_flush_all(); + if(current->mm != current->parent->mm) + protect_memory(uml_reserved, high_physmem - uml_reserved, 1, + 1, 0, 1); +- task_protections((unsigned long) current->thread_info); +- +- current->thread.prev_sched = NULL; ++ task_protections((unsigned long) current_thread); + + free_page(current->thread.temp_stack); ++ local_irq_disable(); + change_sig(SIGUSR1, 0); + set_user_mode(current); + } + +-static int sigusr1 = SIGUSR1; +- + int fork_tramp(void *stack) + { +- int sig = sigusr1; +- + local_irq_disable(); ++ arch_init_thread(); + init_new_thread_stack(stack, finish_fork_handler); + +- kill(os_getpid(), sig); ++ os_usr1_process(os_getpid()); + return(0); + } + +@@ -213,8 +234,8 @@ + } + + err = os_pipe(p->thread.mode.tt.switch_pipe, 1, 1); +- if(err){ +- printk("copy_thread : pipe failed, errno = %d\n", -err); ++ if(err < 0){ ++ printk("copy_thread : pipe failed, err = %d\n", -err); + return(err); + } + +@@ -377,8 +398,8 @@ + + pages = (1 << CONFIG_KERNEL_STACK_ORDER); + +- start = (unsigned long) current->thread_info + PAGE_SIZE; +- end = (unsigned long) current + PAGE_SIZE * pages; ++ start = (unsigned long) current_thread + PAGE_SIZE; ++ end = (unsigned long) current_thread + PAGE_SIZE * pages; + protect_memory(uml_reserved, start - uml_reserved, 1, w, 1, 1); + protect_memory(end, high_physmem - end, 1, w, 1, 1); + +@@ -454,8 +475,9 @@ + + init_task.thread.mode.tt.extern_pid = pid; + err = os_pipe(init_task.thread.mode.tt.switch_pipe, 1, 1); +- if(err) panic("Can't create switch pipe for init_task, errno = %d", +- err); ++ if(err) ++ panic("Can't create switch pipe for init_task, errno = %d", ++ -err); + } + + int singlestepping_tt(void *t) +diff -Naur a/arch/um/kernel/tt/ptproxy/Makefile b/arch/um/kernel/tt/ptproxy/Makefile +--- a/arch/um/kernel/tt/ptproxy/Makefile 2004-02-11 12:15:11.000000000 -0500 ++++ b/arch/um/kernel/tt/ptproxy/Makefile 2004-02-11 12:26:57.000000000 -0500 +@@ -9,5 +9,3 @@ + + $(USER_OBJS) : %.o: %.c + $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< +- +-clean: +diff -Naur a/arch/um/kernel/tt/ptproxy/proxy.c b/arch/um/kernel/tt/ptproxy/proxy.c +--- a/arch/um/kernel/tt/ptproxy/proxy.c 2004-02-11 12:15:57.000000000 -0500 ++++ b/arch/um/kernel/tt/ptproxy/proxy.c 2004-02-11 12:27:50.000000000 -0500 +@@ -15,7 +15,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -293,10 +292,10 @@ + } + + char gdb_init_string[] = +-"att 1 +-b panic +-b stop +-handle SIGWINCH nostop noprint pass ++"att 1 \n\ ++b panic \n\ ++b stop \n\ ++handle SIGWINCH nostop noprint pass \n\ + "; + + int start_debugger(char *prog, int startup, int stop, int *fd_out) +@@ -304,7 +303,8 @@ + int slave, child; + + slave = open_gdb_chan(); +- if((child = fork()) == 0){ ++ child = fork(); ++ if(child == 0){ + char *tempname = NULL; + int fd; + +@@ -327,18 +327,19 @@ + exit(1); + #endif + } +- if((fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0)) < 0){ +- printk("start_debugger : make_tempfile failed, errno = %d\n", +- errno); ++ fd = make_tempfile("/tmp/gdb_init-XXXXXX", &tempname, 0); ++ if(fd < 0){ ++ printk("start_debugger : make_tempfile failed," ++ "err = %d\n", -fd); + exit(1); + } +- write(fd, gdb_init_string, sizeof(gdb_init_string) - 1); ++ os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1); + if(startup){ + if(stop){ +- write(fd, "b start_kernel\n", ++ os_write_file(fd, "b start_kernel\n", + strlen("b start_kernel\n")); + } +- write(fd, "c\n", strlen("c\n")); ++ os_write_file(fd, "c\n", strlen("c\n")); + } + if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){ + printk("start_debugger : PTRACE_TRACEME failed, " +diff -Naur a/arch/um/kernel/tt/ptproxy/sysdep.c b/arch/um/kernel/tt/ptproxy/sysdep.c +--- a/arch/um/kernel/tt/ptproxy/sysdep.c 2004-02-11 12:16:27.000000000 -0500 ++++ b/arch/um/kernel/tt/ptproxy/sysdep.c 2004-02-11 12:28:24.000000000 -0500 +@@ -9,6 +9,7 @@ + #include + #include + #include ++#include + #include + #include + #include +diff -Naur a/arch/um/kernel/tt/ptproxy/wait.c b/arch/um/kernel/tt/ptproxy/wait.c +--- a/arch/um/kernel/tt/ptproxy/wait.c 2004-02-11 12:16:37.000000000 -0500 ++++ b/arch/um/kernel/tt/ptproxy/wait.c 2004-02-11 12:28:37.000000000 -0500 +@@ -56,21 +56,23 @@ + int real_wait_return(struct debugger *debugger) + { + unsigned long ip; +- int err, pid; ++ int pid; + + pid = debugger->pid; ++ + ip = ptrace(PTRACE_PEEKUSER, pid, PT_IP_OFFSET, 0); +- ip = IP_RESTART_SYSCALL(ip); +- err = ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip); ++ IP_RESTART_SYSCALL(ip); ++ + if(ptrace(PTRACE_POKEUSER, pid, PT_IP_OFFSET, ip) < 0) + tracer_panic("real_wait_return : Failed to restart system " +- "call, errno = %d\n"); ++ "call, errno = %d\n", errno); ++ + if((ptrace(PTRACE_SYSCALL, debugger->pid, 0, SIGCHLD) < 0) || + (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || + (ptrace(PTRACE_SYSCALL, debugger->pid, 0, 0) < 0) || + debugger_normal_return(debugger, -1)) + tracer_panic("real_wait_return : gdb failed to wait, " +- "errno = %d\n"); ++ "errno = %d\n", errno); + return(0); + } + +diff -Naur a/arch/um/kernel/tt/syscall_kern.c b/arch/um/kernel/tt/syscall_kern.c +--- a/arch/um/kernel/tt/syscall_kern.c 2004-02-11 12:14:00.000000000 -0500 ++++ b/arch/um/kernel/tt/syscall_kern.c 2004-02-11 12:25:40.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +diff -Naur a/arch/um/kernel/tt/sys-i386/Makefile b/arch/um/kernel/tt/sys-i386/Makefile +--- a/arch/um/kernel/tt/sys-i386/Makefile 2004-02-11 12:15:17.000000000 -0500 ++++ b/arch/um/kernel/tt/sys-i386/Makefile 2004-02-11 12:27:05.000000000 -0500 +@@ -10,5 +10,3 @@ + + $(USER_OBJS) : %.o: %.c + $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< +- +-clean : +diff -Naur a/arch/um/kernel/tt/tlb.c b/arch/um/kernel/tt/tlb.c +--- a/arch/um/kernel/tt/tlb.c 2004-02-11 12:14:18.000000000 -0500 ++++ b/arch/um/kernel/tt/tlb.c 2004-02-11 12:26:01.000000000 -0500 +@@ -10,6 +10,7 @@ + #include "asm/page.h" + #include "asm/pgtable.h" + #include "asm/uaccess.h" ++#include "asm/tlbflush.h" + #include "user_util.h" + #include "mem_user.h" + #include "os.h" +diff -Naur a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c +--- a/arch/um/kernel/tt/tracer.c 2004-02-11 12:14:16.000000000 -0500 ++++ b/arch/um/kernel/tt/tracer.c 2004-02-11 12:25:59.000000000 -0500 +@@ -39,16 +39,17 @@ + return(0); + + register_winch_irq(tracer_winch[0], fd, -1, data); +- return(0); ++ return(1); + } + + static void tracer_winch_handler(int sig) + { ++ int n; + char c = 1; + +- if(write(tracer_winch[1], &c, sizeof(c)) != sizeof(c)) +- printk("tracer_winch_handler - write failed, errno = %d\n", +- errno); ++ n = os_write_file(tracer_winch[1], &c, sizeof(c)); ++ if(n != sizeof(c)) ++ printk("tracer_winch_handler - write failed, err = %d\n", -n); + } + + /* Called only by the tracing thread during initialization */ +@@ -58,9 +59,8 @@ + int err; + + err = os_pipe(tracer_winch, 1, 1); +- if(err){ +- printk("setup_tracer_winch : os_pipe failed, errno = %d\n", +- -err); ++ if(err < 0){ ++ printk("setup_tracer_winch : os_pipe failed, err = %d\n", -err); + return; + } + signal(SIGWINCH, tracer_winch_handler); +@@ -130,8 +130,8 @@ + case SIGTSTP: + if(ptrace(PTRACE_CONT, pid, 0, sig) < 0) + tracer_panic("sleeping_process_signal : Failed to " +- "continue pid %d, errno = %d\n", pid, +- sig); ++ "continue pid %d, signal = %d, " ++ "errno = %d\n", pid, sig, errno); + break; + + /* This happens when the debugger (e.g. strace) is doing system call +@@ -145,7 +145,7 @@ + if(ptrace(PTRACE_SYSCALL, pid, 0, 0) < 0) + tracer_panic("sleeping_process_signal : Failed to " + "PTRACE_SYSCALL pid %d, errno = %d\n", +- pid, sig); ++ pid, errno); + break; + case SIGSTOP: + break; +@@ -218,7 +218,7 @@ + err = attach(debugger_parent); + if(err){ + printf("Failed to attach debugger parent %d, " +- "errno = %d\n", debugger_parent, err); ++ "errno = %d\n", debugger_parent, -err); + debugger_parent = -1; + } + else { +@@ -233,7 +233,8 @@ + } + set_cmdline("(tracing thread)"); + while(1){ +- if((pid = waitpid(-1, &status, WUNTRACED)) <= 0){ ++ pid = waitpid(-1, &status, WUNTRACED); ++ if(pid <= 0){ + if(errno != ECHILD){ + printf("wait failed - errno = %d\n", errno); + } +@@ -401,7 +402,7 @@ + + if(!strcmp(line, "go")) debug_stop = 0; + else if(!strcmp(line, "parent")) debug_parent = 1; +- else printk("Unknown debug option : '%s'\n", line); ++ else printf("Unknown debug option : '%s'\n", line); + + line = next; + } +diff -Naur a/arch/um/kernel/tt/uaccess.c b/arch/um/kernel/tt/uaccess.c +--- a/arch/um/kernel/tt/uaccess.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/kernel/tt/uaccess.c 2004-02-11 12:25:43.000000000 -0500 +@@ -0,0 +1,73 @@ ++/* ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) ++ * Licensed under the GPL ++ */ ++ ++#include "linux/sched.h" ++#include "asm/uaccess.h" ++ ++int copy_from_user_tt(void *to, const void *from, int n) ++{ ++ if(!access_ok_tt(VERIFY_READ, from, n)) ++ return(n); ++ ++ return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr, ++ ¤t->thread.fault_catcher)); ++} ++ ++int copy_to_user_tt(void *to, const void *from, int n) ++{ ++ if(!access_ok_tt(VERIFY_WRITE, to, n)) ++ return(n); ++ ++ return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr, ++ ¤t->thread.fault_catcher)); ++} ++ ++int strncpy_from_user_tt(char *dst, const char *src, int count) ++{ ++ int n; ++ ++ if(!access_ok_tt(VERIFY_READ, src, 1)) ++ return(-EFAULT); ++ ++ n = __do_strncpy_from_user(dst, src, count, ++ ¤t->thread.fault_addr, ++ ¤t->thread.fault_catcher); ++ if(n < 0) return(-EFAULT); ++ return(n); ++} ++ ++int __clear_user_tt(void *mem, int len) ++{ ++ return(__do_clear_user(mem, len, ++ ¤t->thread.fault_addr, ++ ¤t->thread.fault_catcher)); ++} ++ ++int clear_user_tt(void *mem, int len) ++{ ++ if(!access_ok_tt(VERIFY_WRITE, mem, len)) ++ return(len); ++ ++ return(__do_clear_user(mem, len, ¤t->thread.fault_addr, ++ ¤t->thread.fault_catcher)); ++} ++ ++int strnlen_user_tt(const void *str, int len) ++{ ++ return(__do_strnlen_user(str, len, ++ ¤t->thread.fault_addr, ++ ¤t->thread.fault_catcher)); ++} ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/kernel/tt/uaccess_user.c b/arch/um/kernel/tt/uaccess_user.c +--- a/arch/um/kernel/tt/uaccess_user.c 2004-02-11 12:14:38.000000000 -0500 ++++ b/arch/um/kernel/tt/uaccess_user.c 2004-02-11 12:26:42.000000000 -0500 +@@ -8,15 +8,20 @@ + #include + #include "user_util.h" + #include "uml_uaccess.h" ++#include "task.h" ++#include "kern_util.h" + + int __do_copy_from_user(void *to, const void *from, int n, + void **fault_addr, void **fault_catcher) + { ++ struct tt_regs save = TASK_REGS(get_current())->tt; + unsigned long fault; + int faulted; + + fault = __do_user_copy(to, from, n, fault_addr, fault_catcher, + __do_copy, &faulted); ++ TASK_REGS(get_current())->tt = save; ++ + if(!faulted) return(0); + else return(n - (fault - (unsigned long) from)); + } +@@ -29,11 +34,14 @@ + int __do_strncpy_from_user(char *dst, const char *src, unsigned long count, + void **fault_addr, void **fault_catcher) + { ++ struct tt_regs save = TASK_REGS(get_current())->tt; + unsigned long fault; + int faulted; + + fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher, + __do_strncpy, &faulted); ++ TASK_REGS(get_current())->tt = save; ++ + if(!faulted) return(strlen(dst)); + else return(-1); + } +@@ -46,11 +54,14 @@ + int __do_clear_user(void *mem, unsigned long len, + void **fault_addr, void **fault_catcher) + { ++ struct tt_regs save = TASK_REGS(get_current())->tt; + unsigned long fault; + int faulted; + + fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher, + __do_clear, &faulted); ++ TASK_REGS(get_current())->tt = save; ++ + if(!faulted) return(0); + else return(len - (fault - (unsigned long) mem)); + } +@@ -58,19 +69,20 @@ + int __do_strnlen_user(const char *str, unsigned long n, + void **fault_addr, void **fault_catcher) + { ++ struct tt_regs save = TASK_REGS(get_current())->tt; + int ret; + unsigned long *faddrp = (unsigned long *)fault_addr; + jmp_buf jbuf; + + *fault_catcher = &jbuf; +- if(setjmp(jbuf) == 0){ ++ if(sigsetjmp(jbuf, 1) == 0) + ret = strlen(str) + 1; +- } +- else { +- ret = *faddrp - (unsigned long) str; +- } ++ else ret = *faddrp - (unsigned long) str; ++ + *fault_addr = NULL; + *fault_catcher = NULL; ++ ++ TASK_REGS(get_current())->tt = save; + return ret; + } + +diff -Naur a/arch/um/kernel/tt/unmap.c b/arch/um/kernel/tt/unmap.c +--- a/arch/um/kernel/tt/unmap.c 2004-02-11 12:16:26.000000000 -0500 ++++ b/arch/um/kernel/tt/unmap.c 2004-02-11 12:28:22.000000000 -0500 +@@ -3,10 +3,7 @@ + * Licensed under the GPL + */ + +-#include +-#include + #include +-#include "user.h" + + int switcheroo(int fd, int prot, void *from, void *to, int size) + { +diff -Naur a/arch/um/kernel/tty_log.c b/arch/um/kernel/tty_log.c +--- a/arch/um/kernel/tty_log.c 2004-02-11 12:15:58.000000000 -0500 ++++ b/arch/um/kernel/tty_log.c 2004-02-11 12:27:51.000000000 -0500 +@@ -9,10 +9,10 @@ + #include + #include + #include +-#include + #include + #include "init.h" + #include "user.h" ++#include "kern_util.h" + #include "os.h" + + #define TTY_LOG_DIR "./" +@@ -24,29 +24,40 @@ + #define TTY_LOG_OPEN 1 + #define TTY_LOG_CLOSE 2 + #define TTY_LOG_WRITE 3 ++#define TTY_LOG_EXEC 4 ++ ++#define TTY_READ 1 ++#define TTY_WRITE 2 + + struct tty_log_buf { + int what; + unsigned long tty; + int len; ++ int direction; ++ unsigned long sec; ++ unsigned long usec; + }; + +-int open_tty_log(void *tty) ++int open_tty_log(void *tty, void *current_tty) + { + struct timeval tv; + struct tty_log_buf data; + char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")]; + int fd; + ++ gettimeofday(&tv, NULL); + if(tty_log_fd != -1){ +- data = ((struct tty_log_buf) { what : TTY_LOG_OPEN, +- tty : (unsigned long) tty, +- len : 0 }); +- write(tty_log_fd, &data, sizeof(data)); ++ data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN, ++ .tty = (unsigned long) tty, ++ .len = sizeof(current_tty), ++ .direction = 0, ++ .sec = tv.tv_sec, ++ .usec = tv.tv_usec } ); ++ os_write_file(tty_log_fd, &data, sizeof(data)); ++ os_write_file(tty_log_fd, ¤t_tty, data.len); + return(tty_log_fd); + } + +- gettimeofday(&tv, NULL); + sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, + (unsigned int) tv.tv_usec); + +@@ -62,30 +73,117 @@ + void close_tty_log(int fd, void *tty) + { + struct tty_log_buf data; ++ struct timeval tv; + + if(tty_log_fd != -1){ +- data = ((struct tty_log_buf) { what : TTY_LOG_CLOSE, +- tty : (unsigned long) tty, +- len : 0 }); +- write(tty_log_fd, &data, sizeof(data)); ++ gettimeofday(&tv, NULL); ++ data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE, ++ .tty = (unsigned long) tty, ++ .len = 0, ++ .direction = 0, ++ .sec = tv.tv_sec, ++ .usec = tv.tv_usec } ); ++ os_write_file(tty_log_fd, &data, sizeof(data)); + return; + } +- close(fd); ++ os_close_file(fd); + } + +-int write_tty_log(int fd, char *buf, int len, void *tty) ++static int log_chunk(int fd, const char *buf, int len) + { ++ int total = 0, try, missed, n; ++ char chunk[64]; ++ ++ while(len > 0){ ++ try = (len > sizeof(chunk)) ? sizeof(chunk) : len; ++ missed = copy_from_user_proc(chunk, (char *) buf, try); ++ try -= missed; ++ n = os_write_file(fd, chunk, try); ++ if(n != try) { ++ if(n < 0) ++ return(n); ++ return(-EIO); ++ } ++ if(missed != 0) ++ return(-EFAULT); ++ ++ len -= try; ++ total += try; ++ buf += try; ++ } ++ ++ return(total); ++} ++ ++int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read) ++{ ++ struct timeval tv; + struct tty_log_buf data; ++ int direction; + + if(fd == tty_log_fd){ +- data = ((struct tty_log_buf) { what : TTY_LOG_WRITE, +- tty : (unsigned long) tty, +- len : len }); +- write(tty_log_fd, &data, sizeof(data)); ++ gettimeofday(&tv, NULL); ++ direction = is_read ? TTY_READ : TTY_WRITE; ++ data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE, ++ .tty = (unsigned long) tty, ++ .len = len, ++ .direction = direction, ++ .sec = tv.tv_sec, ++ .usec = tv.tv_usec } ); ++ os_write_file(tty_log_fd, &data, sizeof(data)); + } +- return(write(fd, buf, len)); ++ ++ return(log_chunk(fd, buf, len)); + } + ++void log_exec(char **argv, void *tty) ++{ ++ struct timeval tv; ++ struct tty_log_buf data; ++ char **ptr,*arg; ++ int len; ++ ++ if(tty_log_fd == -1) return; ++ ++ gettimeofday(&tv, NULL); ++ ++ len = 0; ++ for(ptr = argv; ; ptr++){ ++ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) ++ return; ++ if(arg == NULL) break; ++ len += strlen_user_proc(arg); ++ } ++ ++ data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC, ++ .tty = (unsigned long) tty, ++ .len = len, ++ .direction = 0, ++ .sec = tv.tv_sec, ++ .usec = tv.tv_usec } ); ++ os_write_file(tty_log_fd, &data, sizeof(data)); ++ ++ for(ptr = argv; ; ptr++){ ++ if(copy_from_user_proc(&arg, ptr, sizeof(arg))) ++ return; ++ if(arg == NULL) break; ++ log_chunk(tty_log_fd, arg, strlen_user_proc(arg)); ++ } ++} ++ ++extern void register_tty_logger(int (*opener)(void *, void *), ++ int (*writer)(int, const char *, int, ++ void *, int), ++ void (*closer)(int, void *)); ++ ++static int register_logger(void) ++{ ++ register_tty_logger(open_tty_log, write_tty_log, close_tty_log); ++ return(0); ++} ++ ++__uml_initcall(register_logger); ++ + static int __init set_tty_log_dir(char *name, int *add) + { + tty_log_dir = name; +@@ -104,7 +202,7 @@ + + tty_log_fd = strtoul(name, &end, 0); + if((*end != '\0') || (end == name)){ +- printk("set_tty_log_fd - strtoul failed on '%s'\n", name); ++ printf("set_tty_log_fd - strtoul failed on '%s'\n", name); + tty_log_fd = -1; + } + return 0; +diff -Naur a/arch/um/kernel/uaccess_user.c b/arch/um/kernel/uaccess_user.c +--- a/arch/um/kernel/uaccess_user.c 2004-02-11 12:16:09.000000000 -0500 ++++ b/arch/um/kernel/uaccess_user.c 2004-02-11 12:28:20.000000000 -0500 +@@ -20,7 +20,7 @@ + + jmp_buf jbuf; + *fault_catcher = &jbuf; +- if(setjmp(jbuf) == 0){ ++ if(sigsetjmp(jbuf, 1) == 0){ + (*op)(to, from, n); + ret = 0; + *faulted_out = 0; +diff -Naur a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c +--- a/arch/um/kernel/um_arch.c 2004-02-11 12:16:29.000000000 -0500 ++++ b/arch/um/kernel/um_arch.c 2004-02-11 12:28:28.000000000 -0500 +@@ -38,13 +38,18 @@ + #include "mode_kern.h" + #include "mode.h" + +-#define DEFAULT_COMMAND_LINE "root=6200" ++#define DEFAULT_COMMAND_LINE "root=98:0" + + struct cpuinfo_um boot_cpu_data = { + .loops_per_jiffy = 0, + .ipi_pipe = { -1, -1 } + }; + ++/* Placeholder to make UML link until the vsyscall stuff is actually ++ * implemented ++ */ ++void *__kernel_vsyscall; ++ + unsigned long thread_saved_pc(struct task_struct *task) + { + return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas, +@@ -53,18 +58,22 @@ + + static int show_cpuinfo(struct seq_file *m, void *v) + { +- int index; ++ int index = 0; + +- index = (struct cpuinfo_um *)v - cpu_data; + #ifdef CONFIG_SMP ++ index = (struct cpuinfo_um *) v - cpu_data; + if (!cpu_online(index)) + return 0; + #endif + +- seq_printf(m, "bogomips\t: %lu.%02lu\n", ++ seq_printf(m, "processor\t: %d\n", index); ++ seq_printf(m, "vendor_id\t: User Mode Linux\n"); ++ seq_printf(m, "model name\t: UML\n"); ++ seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas")); ++ seq_printf(m, "host\t\t: %s\n", host_info); ++ seq_printf(m, "bogomips\t: %lu.%02lu\n\n", + loops_per_jiffy/(500000/HZ), + (loops_per_jiffy/(5000/HZ)) % 100); +- seq_printf(m, "host\t\t: %s\n", host_info); + + return(0); + } +@@ -134,12 +143,12 @@ + if(umid != NULL){ + snprintf(argv1_begin, + (argv1_end - argv1_begin) * sizeof(*ptr), +- "(%s)", umid); ++ "(%s) ", umid); + ptr = &argv1_begin[strlen(argv1_begin)]; + } + else ptr = argv1_begin; + +- snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), " [%s]", cmd); ++ snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd); + memset(argv1_begin + strlen(argv1_begin), '\0', + argv1_end - argv1_begin - strlen(argv1_begin)); + #endif +@@ -179,7 +188,7 @@ + static int __init uml_ncpus_setup(char *line, int *add) + { + if (!sscanf(line, "%d", &ncpus)) { +- printk("Couldn't parse [%s]\n", line); ++ printf("Couldn't parse [%s]\n", line); + return -1; + } + +@@ -210,7 +219,7 @@ + + static int __init mode_tt_setup(char *line, int *add) + { +- printk("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); ++ printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n"); + return(0); + } + +@@ -221,7 +230,7 @@ + + static int __init mode_tt_setup(char *line, int *add) + { +- printk("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); ++ printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n"); + return(0); + } + +@@ -291,7 +300,7 @@ + + /* Set during early boot */ + unsigned long brk_start; +-static struct vm_reserved kernel_vm_reserved; ++unsigned long end_iomem; + + #define MIN_VMALLOC (32 * 1024 * 1024) + +@@ -299,7 +308,7 @@ + { + unsigned long avail; + unsigned long virtmem_size, max_physmem; +- unsigned int i, add, err; ++ unsigned int i, add; + + for (i = 1; i < argc; i++){ + if((i == 1) && (argv[i][0] == ' ')) continue; +@@ -328,12 +337,16 @@ + argv1_end = &argv[1][strlen(argv[1])]; + #endif + +- set_usable_vm(uml_physmem, get_kmem_end()); +- + highmem = 0; +- max_physmem = get_kmem_end() - uml_physmem - MIN_VMALLOC; +- if(physmem_size > max_physmem){ +- highmem = physmem_size - max_physmem; ++ iomem_size = (iomem_size + PAGE_SIZE - 1) & PAGE_MASK; ++ max_physmem = get_kmem_end() - uml_physmem - iomem_size - MIN_VMALLOC; ++ ++ /* Zones have to begin on a 1 << MAX_ORDER page boundary, ++ * so this makes sure that's true for highmem ++ */ ++ max_physmem &= ~((1 << (PAGE_SHIFT + MAX_ORDER)) - 1); ++ if(physmem_size + iomem_size > max_physmem){ ++ highmem = physmem_size + iomem_size - max_physmem; + physmem_size -= highmem; + #ifndef CONFIG_HIGHMEM + highmem = 0; +@@ -343,11 +356,19 @@ + } + + high_physmem = uml_physmem + physmem_size; +- high_memory = (void *) high_physmem; ++ end_iomem = high_physmem + iomem_size; ++ high_memory = (void *) end_iomem; + + start_vm = VMALLOC_START; + +- setup_physmem(uml_physmem, uml_reserved, physmem_size); ++ setup_physmem(uml_physmem, uml_reserved, physmem_size, highmem); ++ if(init_maps(physmem_size, iomem_size, highmem)){ ++ printf("Failed to allocate mem_map for %ld bytes of physical " ++ "memory and %ld bytes of highmem\n", physmem_size, ++ highmem); ++ exit(1); ++ } ++ + virtmem_size = physmem_size; + avail = get_kmem_end() - start_vm; + if(physmem_size > avail) virtmem_size = avail; +@@ -357,18 +378,13 @@ + printf("Kernel virtual memory size shrunk to %ld bytes\n", + virtmem_size); + +- err = reserve_vm(high_physmem, end_vm, &kernel_vm_reserved); +- if(err){ +- printf("Failed to reserve VM area for kernel VM\n"); +- exit(1); +- } +- + uml_postsetup(); + + init_task.thread.kernel_stack = (unsigned long) &init_thread_info + + 2 * PAGE_SIZE; + + task_protections((unsigned long) &init_thread_info); ++ os_flush_stdout(); + + return(CHOOSE_MODE(start_uml_tt(), start_uml_skas())); + } +@@ -377,7 +393,7 @@ + void *unused2) + { + #ifdef CONFIG_MAGIC_SYSRQ +- handle_sysrq('p', ¤t->thread.regs, NULL, NULL); ++ handle_sysrq('p', ¤t->thread.regs, NULL); + #endif + machine_halt(); + return(0); +@@ -403,6 +419,11 @@ + arch_check_bugs(); + check_ptrace(); + check_sigio(); ++ check_devanon(); ++} ++ ++void apply_alternatives(void *start, void *end) ++{ + } + + /* +diff -Naur a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c +--- a/arch/um/kernel/umid.c 2004-02-11 12:16:43.000000000 -0500 ++++ b/arch/um/kernel/umid.c 2004-02-11 12:28:42.000000000 -0500 +@@ -5,7 +5,6 @@ + + #include + #include +-#include + #include + #include + #include +@@ -33,18 +32,19 @@ + static int umid_is_random = 1; + static int umid_inited = 0; + +-static int make_umid(void); ++static int make_umid(int (*printer)(const char *fmt, ...)); + +-static int __init set_umid(char *name, int is_random) ++static int __init set_umid(char *name, int is_random, ++ int (*printer)(const char *fmt, ...)) + { + if(umid_inited){ +- printk("Unique machine name can't be set twice\n"); ++ (*printer)("Unique machine name can't be set twice\n"); + return(-1); + } + + if(strlen(name) > UMID_LEN - 1) +- printk("Unique machine name is being truncated to %s " +- "characters\n", UMID_LEN); ++ (*printer)("Unique machine name is being truncated to %s " ++ "characters\n", UMID_LEN); + strlcpy(umid, name, sizeof(umid)); + + umid_is_random = is_random; +@@ -54,7 +54,7 @@ + + static int __init set_umid_arg(char *name, int *add) + { +- return(set_umid(name, 0)); ++ return(set_umid(name, 0, printf)); + } + + __uml_setup("umid=", set_umid_arg, +@@ -67,7 +67,7 @@ + { + int n; + +- if(!umid_inited && make_umid()) return(-1); ++ if(!umid_inited && make_umid(printk)) return(-1); + + n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1; + if(n > len){ +@@ -85,22 +85,23 @@ + { + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")]; +- int fd; ++ int fd, n; + + if(umid_file_name("pid", file, sizeof(file))) return 0; + + fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), + 0644); + if(fd < 0){ +- printk("Open of machine pid file \"%s\" failed - " +- "errno = %d\n", file, -fd); ++ printf("Open of machine pid file \"%s\" failed - " ++ "err = %d\n", file, -fd); + return 0; + } + + sprintf(pid, "%d\n", os_getpid()); +- if(write(fd, pid, strlen(pid)) != strlen(pid)) +- printk("Write of pid file failed - errno = %d\n", errno); +- close(fd); ++ n = os_write_file(fd, pid, strlen(pid)); ++ if(n != strlen(pid)) ++ printf("Write of pid file failed - err = %d\n", -n); ++ os_close_file(fd); + return 0; + } + +@@ -111,7 +112,8 @@ + int len; + char file[256]; + +- if((directory = opendir(dir)) == NULL){ ++ directory = opendir(dir); ++ if(directory == NULL){ + printk("actually_do_remove : couldn't open directory '%s', " + "errno = %d\n", dir, errno); + return(1); +@@ -160,22 +162,24 @@ + { + char file[strlen(uml_dir) + UMID_LEN + sizeof("/pid\0")]; + char pid[sizeof("nnnnn\0")], *end; +- int dead, fd, p; ++ int dead, fd, p, n; + + sprintf(file, "%s/pid", dir); + dead = 0; +- if((fd = os_open_file(file, of_read(OPENFLAGS()), 0)) < 0){ ++ fd = os_open_file(file, of_read(OPENFLAGS()), 0); ++ if(fd < 0){ + if(fd != -ENOENT){ + printk("not_dead_yet : couldn't open pid file '%s', " +- "errno = %d\n", file, -fd); ++ "err = %d\n", file, -fd); + return(1); + } + dead = 1; + } + if(fd > 0){ +- if(read(fd, pid, sizeof(pid)) < 0){ ++ n = os_read_file(fd, pid, sizeof(pid)); ++ if(n < 0){ + printk("not_dead_yet : couldn't read pid file '%s', " +- "errno = %d\n", file, errno); ++ "err = %d\n", file, -n); + return(1); + } + p = strtoul(pid, &end, 0); +@@ -197,7 +201,7 @@ + if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){ + uml_dir = malloc(strlen(name) + 1); + if(uml_dir == NULL){ +- printk("Failed to malloc uml_dir - error = %d\n", ++ printf("Failed to malloc uml_dir - error = %d\n", + errno); + uml_dir = name; + return(0); +@@ -217,7 +221,7 @@ + char *home = getenv("HOME"); + + if(home == NULL){ +- printk("make_uml_dir : no value in environment for " ++ printf("make_uml_dir : no value in environment for " + "$HOME\n"); + exit(1); + } +@@ -232,57 +236,59 @@ + dir[len + 1] = '\0'; + } + +- if((uml_dir = malloc(strlen(dir) + 1)) == NULL){ ++ uml_dir = malloc(strlen(dir) + 1); ++ if(uml_dir == NULL){ + printf("make_uml_dir : malloc failed, errno = %d\n", errno); + exit(1); + } + strcpy(uml_dir, dir); + + if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){ +- printk("Failed to mkdir %s - errno = %i\n", uml_dir, errno); ++ printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno); + return(-1); + } + return 0; + } + +-static int __init make_umid(void) ++static int __init make_umid(int (*printer)(const char *fmt, ...)) + { + int fd, err; + char tmp[strlen(uml_dir) + UMID_LEN + 1]; + + strlcpy(tmp, uml_dir, sizeof(tmp)); + +- if(*umid == 0){ ++ if(!umid_inited){ + strcat(tmp, "XXXXXX"); + fd = mkstemp(tmp); + if(fd < 0){ +- printk("make_umid - mkstemp failed, errno = %d\n", +- errno); ++ (*printer)("make_umid - mkstemp failed, errno = %d\n", ++ errno); + return(1); + } + +- close(fd); ++ os_close_file(fd); + /* There's a nice tiny little race between this unlink and + * the mkdir below. It'd be nice if there were a mkstemp + * for directories. + */ + unlink(tmp); +- set_umid(&tmp[strlen(uml_dir)], 1); ++ set_umid(&tmp[strlen(uml_dir)], 1, printer); + } + + sprintf(tmp, "%s%s", uml_dir, umid); + +- if((err = mkdir(tmp, 0777)) < 0){ ++ err = mkdir(tmp, 0777); ++ if(err < 0){ + if(errno == EEXIST){ + if(not_dead_yet(tmp)){ +- printk("umid '%s' is in use\n", umid); ++ (*printer)("umid '%s' is in use\n", umid); + return(-1); + } + err = mkdir(tmp, 0777); + } + } + if(err < 0){ +- printk("Failed to create %s - errno = %d\n", umid, errno); ++ (*printer)("Failed to create %s - errno = %d\n", umid, errno); + return(-1); + } + +@@ -295,7 +301,13 @@ + ); + + __uml_postsetup(make_uml_dir); +-__uml_postsetup(make_umid); ++ ++static int __init make_umid_setup(void) ++{ ++ return(make_umid(printf)); ++} ++ ++__uml_postsetup(make_umid_setup); + __uml_postsetup(create_pid_file); + + /* +diff -Naur a/arch/um/kernel/user_syms.c b/arch/um/kernel/user_syms.c +--- a/arch/um/kernel/user_syms.c 2004-02-11 12:16:42.000000000 -0500 ++++ b/arch/um/kernel/user_syms.c 2004-02-11 12:28:41.000000000 -0500 +@@ -1,7 +1,7 @@ + #include + #include +-#include + #include ++#include + #include + #include + #include +@@ -16,46 +16,61 @@ + * since this includes various user-level headers. + */ + +-struct module_symbol ++/* Had to update this: this changed in late 2.5 to add CRC and other beasts ++ * and was never updated here- 13 Dec 2003-Blaisorblade ++ */ ++ ++/* v850 toolchain uses a `_' prefix for all user symbols */ ++#ifndef MODULE_SYMBOL_PREFIX ++#define MODULE_SYMBOL_PREFIX "" ++#endif ++ ++struct kernel_symbol + { + unsigned long value; + const char *name; + }; + +-/* Indirect stringification. */ +- +-#define __MODULE_STRING_1(x) #x +-#define __MODULE_STRING(x) __MODULE_STRING_1(x) +- +-#if !defined(__AUTOCONF_INCLUDED__) +- +-#define __EXPORT_SYMBOL(sym,str) error config_must_be_included_before_module +-#define EXPORT_SYMBOL(var) error config_must_be_included_before_module +-#define EXPORT_SYMBOL_NOVERS(var) error config_must_be_included_before_module +- +-#elif !defined(UML_CONFIG_MODULES) ++#if !defined(UML_CONFIG_MODULES) ++#define EXPORT_SYMBOL(sym) ++#define EXPORT_SYMBOL_GPL(sym) ++#define EXPORT_SYMBOL_NOVERS(sym) ++ ++#else /*UML_CONFIG_MODULES*/ ++#ifndef __GENKSYMS__ ++#ifdef UML_CONFIG_MODVERSIONS ++/* Mark the CRC weak since genksyms apparently decides not to ++ * generate a checksums for some symbols */ ++#define __CRC_SYMBOL(sym, sec) \ ++ extern void *__crc_##sym __attribute__((weak)); \ ++ static const unsigned long __kcrctab_##sym \ ++ __attribute__((section("__kcrctab" sec), unused)) \ ++ = (unsigned long) &__crc_##sym; ++#else ++#define __CRC_SYMBOL(sym, sec) ++#endif + +-#define __EXPORT_SYMBOL(sym,str) +-#define EXPORT_SYMBOL(var) +-#define EXPORT_SYMBOL_NOVERS(var) ++/* For every exported symbol, place a struct in the __ksymtab section */ ++#define __EXPORT_SYMBOL(sym, sec) \ ++ __CRC_SYMBOL(sym, sec) \ ++ static const char __kstrtab_##sym[] \ ++ __attribute__((section("__ksymtab_strings"))) \ ++ = MODULE_SYMBOL_PREFIX #sym; \ ++ static const struct kernel_symbol __ksymtab_##sym \ ++ __attribute__((section("__ksymtab" sec), unused)) \ ++ = { (unsigned long)&sym, __kstrtab_##sym } + +-#else ++#define EXPORT_SYMBOL(sym) \ ++ __EXPORT_SYMBOL(sym, "") + +-#define __EXPORT_SYMBOL(sym, str) \ +-const char __kstrtab_##sym[] \ +-__attribute__((section(".kstrtab"))) = str; \ +-const struct module_symbol __ksymtab_##sym \ +-__attribute__((section("__ksymtab"))) = \ +-{ (unsigned long)&sym, __kstrtab_##sym } ++#define EXPORT_SYMBOL_GPL(sym) \ ++ __EXPORT_SYMBOL(sym, "_gpl") + +-#if defined(__MODVERSIONS__) || !defined(UML_CONFIG_MODVERSIONS) +-#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) +-#else +-#define EXPORT_SYMBOL(var) __EXPORT_SYMBOL(var, __MODULE_STRING(__VERSIONED_SYMBOL(var))) + #endif + +-#define EXPORT_SYMBOL_NOVERS(var) __EXPORT_SYMBOL(var, __MODULE_STRING(var)) +- ++/* We don't mangle the actual symbol anymore, so no need for ++ * special casing EXPORT_SYMBOL_NOVERS. FIXME: Deprecated */ ++#define EXPORT_SYMBOL_NOVERS(sym) EXPORT_SYMBOL(sym) + #endif + + EXPORT_SYMBOL(__errno_location); +@@ -109,5 +124,18 @@ + + EXPORT_SYMBOL(memset); + EXPORT_SYMBOL(strstr); ++EXPORT_SYMBOL(printf); ++EXPORT_SYMBOL(strlen); + + EXPORT_SYMBOL(find_iomem); ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/kernel/user_util.c b/arch/um/kernel/user_util.c +--- a/arch/um/kernel/user_util.c 2004-02-11 12:14:27.000000000 -0500 ++++ b/arch/um/kernel/user_util.c 2004-02-11 12:26:09.000000000 -0500 +@@ -5,7 +5,6 @@ + + #include + #include +-#include + #include + #include + #include +@@ -82,7 +81,8 @@ + int status, ret; + + while(1){ +- if(((ret = waitpid(pid, &status, WUNTRACED)) < 0) || ++ ret = waitpid(pid, &status, WUNTRACED); ++ if((ret < 0) || + !WIFSTOPPED(status) || (WSTOPSIG(status) != sig)){ + if(ret < 0){ + if(errno == EINTR) continue; +@@ -119,17 +119,6 @@ + } + } + +-int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags) +-{ +- int pid; +- +- pid = clone(fn, sp, flags, arg); +- if(pid < 0) return(-1); +- wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL); +- ptrace(PTRACE_CONT, pid, 0, 0); +- return(pid); +-} +- + int raw(int fd, int complain) + { + struct termios tt; +diff -Naur a/arch/um/main.c b/arch/um/main.c +--- a/arch/um/main.c 2004-02-11 12:16:28.000000000 -0500 ++++ b/arch/um/main.c 2004-02-11 12:28:27.000000000 -0500 +@@ -8,6 +8,7 @@ + #include + #include + #include ++#include + #include + #include + #include +@@ -123,12 +124,14 @@ + + set_stklim(); + +- if((new_argv = malloc((argc + 1) * sizeof(char *))) == NULL){ ++ new_argv = malloc((argc + 1) * sizeof(char *)); ++ if(new_argv == NULL){ + perror("Mallocing argv"); + exit(1); + } + for(i=0;i $@ ++# Generated files ++define filechk_umlconfig ++ sed 's/ CONFIG/ UML_CONFIG/' ++endef ++ ++$(ARCH_DIR)/include/uml-config.h : $(TOPDIR)/include/linux/autoconf.h ++ $(call filechk,umlconfig) ++ ++filechk_gen_header = $< + + $(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task +- $< > $@ ++ $(call filechk,gen_header) + + $(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants +- $< > $@ ++ $(call filechk,gen_header) + +-$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h \ +- $(ARCH_DIR)/util FORCE ; ++$(ARCH_DIR)/util/mk_task $(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util \ ++ sys_prepare FORCE ; + + $(ARCH_DIR)/util: FORCE +- @$(call descend,$@,) ++ $(Q)$(MAKE) $(build)=$@ + +-export SUBARCH USER_CFLAGS OS ++export SUBARCH USER_CFLAGS OS +diff -Naur a/arch/um/Makefile-i386 b/arch/um/Makefile-i386 +--- a/arch/um/Makefile-i386 2004-02-11 12:16:28.000000000 -0500 ++++ b/arch/um/Makefile-i386 2004-02-11 12:28:27.000000000 -0500 +@@ -16,22 +16,27 @@ + + SYS_HEADERS = $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h + ++sys_prepare: $(SYS_DIR)/sc.h ++ + prepare: $(SYS_HEADERS) + ++filechk_$(SYS_DIR)/sc.h := $(SYS_UTIL_DIR)/mk_sc ++ + $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc +- $< > $@ ++ $(call filechk,$@) ++ ++filechk_$(SYS_DIR)/thread.h := $(SYS_UTIL_DIR)/mk_thread + + $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread +- $< > $@ ++ $(call filechk,$@) + +-$(SYS_UTIL_DIR)/mk_sc: FORCE ; +- @$(call descend,$(SYS_UTIL_DIR),$@) ++$(SYS_UTIL_DIR)/mk_sc: scripts/fixdep include/config/MARKER FORCE ; ++ $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ + +-$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE ; +- @$(call descend,$(SYS_UTIL_DIR),$@) ++$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) sys_prepare FORCE ; ++ $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) $@ + + $(SYS_UTIL_DIR): include/asm FORCE +- @$(call descend,$@,) ++ $(Q)$(MAKE) $(build)=$(SYS_UTIL_DIR) + +-sysclean : +- rm -f $(SYS_HEADERS) ++CLEAN_FILES += $(SYS_HEADERS) +diff -Naur a/arch/um/Makefile-skas b/arch/um/Makefile-skas +--- a/arch/um/Makefile-skas 2004-02-11 12:15:22.000000000 -0500 ++++ b/arch/um/Makefile-skas 2004-02-11 12:27:10.000000000 -0500 +@@ -14,7 +14,7 @@ + LINK_SKAS = -Wl,-rpath,/lib + LD_SCRIPT_SKAS = dyn.lds.s + +-GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h ++GEN_HEADERS += $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h + +-$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h : +- $(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h ++$(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h : ++ $(Q)$(MAKE) $(build)=$(ARCH_DIR)/kernel/skas $@ +diff -Naur a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c +--- a/arch/um/os-Linux/drivers/ethertap_kern.c 2004-02-11 12:14:17.000000000 -0500 ++++ b/arch/um/os-Linux/drivers/ethertap_kern.c 2004-02-11 12:26:00.000000000 -0500 +@@ -8,7 +8,6 @@ + #include "linux/init.h" + #include "linux/netdevice.h" + #include "linux/etherdevice.h" +-#include "linux/init.h" + #include "net_kern.h" + #include "net_user.h" + #include "etap.h" +diff -Naur a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c +--- a/arch/um/os-Linux/drivers/ethertap_user.c 2004-02-11 12:15:29.000000000 -0500 ++++ b/arch/um/os-Linux/drivers/ethertap_user.c 2004-02-11 12:27:17.000000000 -0500 +@@ -8,7 +8,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -42,13 +41,14 @@ + { + struct addr_change change; + void *output; ++ int n; + + change.what = op; + memcpy(change.addr, addr, sizeof(change.addr)); + memcpy(change.netmask, netmask, sizeof(change.netmask)); +- if(write(fd, &change, sizeof(change)) != sizeof(change)) +- printk("etap_change - request failed, errno = %d\n", +- errno); ++ n = os_write_file(fd, &change, sizeof(change)); ++ if(n != sizeof(change)) ++ printk("etap_change - request failed, err = %d\n", -n); + output = um_kmalloc(page_size()); + if(output == NULL) + printk("etap_change : Failed to allocate output buffer\n"); +@@ -82,15 +82,15 @@ + struct etap_pre_exec_data *data = arg; + + dup2(data->control_remote, 1); +- close(data->data_me); +- close(data->control_me); ++ os_close_file(data->data_me); ++ os_close_file(data->control_me); + } + + static int etap_tramp(char *dev, char *gate, int control_me, + int control_remote, int data_me, int data_remote) + { + struct etap_pre_exec_data pe_data; +- int pid, status, err; ++ int pid, status, err, n; + char version_buf[sizeof("nnnnn\0")]; + char data_fd_buf[sizeof("nnnnnn\0")]; + char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; +@@ -114,21 +114,21 @@ + pe_data.data_me = data_me; + pid = run_helper(etap_pre_exec, &pe_data, args, NULL); + +- if(pid < 0) err = errno; +- close(data_remote); +- close(control_remote); +- if(read(control_me, &c, sizeof(c)) != sizeof(c)){ +- printk("etap_tramp : read of status failed, errno = %d\n", +- errno); +- return(EINVAL); ++ if(pid < 0) err = pid; ++ os_close_file(data_remote); ++ os_close_file(control_remote); ++ n = os_read_file(control_me, &c, sizeof(c)); ++ if(n != sizeof(c)){ ++ printk("etap_tramp : read of status failed, err = %d\n", -n); ++ return(-EINVAL); + } + if(c != 1){ + printk("etap_tramp : uml_net failed\n"); +- err = EINVAL; +- if(waitpid(pid, &status, 0) < 0) err = errno; +- else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)){ ++ err = -EINVAL; ++ if(waitpid(pid, &status, 0) < 0) ++ err = -errno; ++ else if(!WIFEXITED(status) || (WEXITSTATUS(status) != 1)) + printk("uml_net didn't exit with status 1\n"); +- } + } + return(err); + } +@@ -143,14 +143,14 @@ + if(err) return(err); + + err = os_pipe(data_fds, 0, 0); +- if(err){ +- printk("data os_pipe failed - errno = %d\n", -err); ++ if(err < 0){ ++ printk("data os_pipe failed - err = %d\n", -err); + return(err); + } + + err = os_pipe(control_fds, 1, 0); +- if(err){ +- printk("control os_pipe failed - errno = %d\n", -err); ++ if(err < 0){ ++ printk("control os_pipe failed - err = %d\n", -err); + return(err); + } + +@@ -167,9 +167,9 @@ + kfree(output); + } + +- if(err != 0){ +- printk("etap_tramp failed - errno = %d\n", err); +- return(-err); ++ if(err < 0){ ++ printk("etap_tramp failed - err = %d\n", -err); ++ return(err); + } + + pri->data_fd = data_fds[0]; +@@ -183,11 +183,11 @@ + struct ethertap_data *pri = data; + + iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); +- close(fd); ++ os_close_file(fd); + os_shutdown_socket(pri->data_fd, 1, 1); +- close(pri->data_fd); ++ os_close_file(pri->data_fd); + pri->data_fd = -1; +- close(pri->control_fd); ++ os_close_file(pri->control_fd); + pri->control_fd = -1; + } + +diff -Naur a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c +--- a/arch/um/os-Linux/drivers/tuntap_user.c 2004-02-11 12:17:08.000000000 -0500 ++++ b/arch/um/os-Linux/drivers/tuntap_user.c 2004-02-11 12:29:13.000000000 -0500 +@@ -8,7 +8,6 @@ + #include + #include + #include +-#include + #include + #include + #include +@@ -61,7 +60,7 @@ + struct tuntap_pre_exec_data *data = arg; + + dup2(data->stdout, 1); +- close(data->close_me); ++ os_close_file(data->close_me); + } + + static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, +@@ -86,7 +85,7 @@ + + if(pid < 0) return(-pid); + +- close(remote); ++ os_close_file(remote); + + msg.msg_name = NULL; + msg.msg_namelen = 0; +@@ -107,19 +106,19 @@ + if(n < 0){ + printk("tuntap_open_tramp : recvmsg failed - errno = %d\n", + errno); +- return(errno); ++ return(-errno); + } + waitpid(pid, NULL, 0); + + cmsg = CMSG_FIRSTHDR(&msg); + if(cmsg == NULL){ + printk("tuntap_open_tramp : didn't receive a message\n"); +- return(EINVAL); ++ return(-EINVAL); + } + if((cmsg->cmsg_level != SOL_SOCKET) || + (cmsg->cmsg_type != SCM_RIGHTS)){ + printk("tuntap_open_tramp : didn't receive a descriptor\n"); +- return(EINVAL); ++ return(-EINVAL); + } + *fd_out = ((int *) CMSG_DATA(cmsg))[0]; + return(0); +@@ -133,27 +132,29 @@ + int err, fds[2], len, used; + + err = tap_open_common(pri->dev, pri->gate_addr); +- if(err) return(err); ++ if(err < 0) ++ return(err); + + if(pri->fixed_config){ +- if((pri->fd = open("/dev/net/tun", O_RDWR)) < 0){ +- printk("Failed to open /dev/net/tun, errno = %d\n", +- errno); +- return(-errno); ++ pri->fd = os_open_file("/dev/net/tun", of_rdwr(OPENFLAGS()), 0); ++ if(pri->fd < 0){ ++ printk("Failed to open /dev/net/tun, err = %d\n", ++ -pri->fd); ++ return(pri->fd); + } + memset(&ifr, 0, sizeof(ifr)); +- ifr.ifr_flags = IFF_TAP; ++ ifr.ifr_flags = IFF_TAP | IFF_NO_PI; + strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name)); + if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){ +- printk("TUNSETIFF failed, errno = %d", errno); +- close(pri->fd); ++ printk("TUNSETIFF failed, errno = %d\n", errno); ++ os_close_file(pri->fd); + return(-errno); + } + } + else { + err = os_pipe(fds, 0, 0); +- if(err){ +- printk("tuntap_open : os_pipe failed - errno = %d\n", ++ if(err < 0){ ++ printk("tuntap_open : os_pipe failed - err = %d\n", + -err); + return(err); + } +@@ -166,19 +167,19 @@ + fds[1], buffer, len, &used); + + output = buffer; +- if(err == 0){ +- pri->dev_name = uml_strdup(buffer); +- output += IFNAMSIZ; +- printk(output); +- free_output_buffer(buffer); +- } +- else { +- printk(output); ++ if(err < 0) { ++ printk("%s", output); + free_output_buffer(buffer); +- printk("tuntap_open_tramp failed - errno = %d\n", err); +- return(-err); ++ printk("tuntap_open_tramp failed - err = %d\n", -err); ++ return(err); + } +- close(fds[0]); ++ ++ pri->dev_name = uml_strdup(buffer); ++ output += IFNAMSIZ; ++ printk("%s", output); ++ free_output_buffer(buffer); ++ ++ os_close_file(fds[0]); + iter_addresses(pri->dev, open_addr, pri->dev_name); + } + +@@ -191,7 +192,7 @@ + + if(!pri->fixed_config) + iter_addresses(pri->dev, close_addr, pri->dev_name); +- close(fd); ++ os_close_file(fd); + pri->fd = -1; + } + +diff -Naur a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c +--- a/arch/um/os-Linux/file.c 2004-02-11 12:17:07.000000000 -0500 ++++ b/arch/um/os-Linux/file.c 2004-02-11 12:29:09.000000000 -0500 +@@ -8,6 +8,8 @@ + #include + #include + #include ++#include ++#include + #include + #include + #include +@@ -17,33 +19,235 @@ + #include "user.h" + #include "kern_util.h" + +-int os_file_type(char *file) ++static void copy_stat(struct uml_stat *dst, struct stat64 *src) ++{ ++ *dst = ((struct uml_stat) { ++ .ust_dev = src->st_dev, /* device */ ++ .ust_ino = src->st_ino, /* inode */ ++ .ust_mode = src->st_mode, /* protection */ ++ .ust_nlink = src->st_nlink, /* number of hard links */ ++ .ust_uid = src->st_uid, /* user ID of owner */ ++ .ust_gid = src->st_gid, /* group ID of owner */ ++ .ust_size = src->st_size, /* total size, in bytes */ ++ .ust_blksize = src->st_blksize, /* blocksize for filesys I/O */ ++ .ust_blocks = src->st_blocks, /* number of blocks allocated */ ++ .ust_atime = src->st_atime, /* time of last access */ ++ .ust_mtime = src->st_mtime, /* time of last modification */ ++ .ust_ctime = src->st_ctime, /* time of last change */ ++ }); ++} ++ ++int os_stat_fd(const int fd, struct uml_stat *ubuf) ++{ ++ struct stat64 sbuf; ++ int err; ++ ++ do { ++ err = fstat64(fd, &sbuf); ++ } while((err < 0) && (errno == EINTR)) ; ++ ++ if(err < 0) ++ return(-errno); ++ ++ if(ubuf != NULL) ++ copy_stat(ubuf, &sbuf); ++ return(err); ++} ++ ++int os_stat_file(const char *file_name, struct uml_stat *ubuf) ++{ ++ struct stat64 sbuf; ++ int err; ++ ++ do { ++ err = stat64(file_name, &sbuf); ++ } while((err < 0) && (errno == EINTR)) ; ++ ++ if(err < 0) ++ return(-errno); ++ ++ if(ubuf != NULL) ++ copy_stat(ubuf, &sbuf); ++ return(err); ++} ++ ++int os_access(const char* file, int mode) ++{ ++ int amode, err; ++ ++ amode=(mode&OS_ACC_R_OK ? R_OK : 0) | (mode&OS_ACC_W_OK ? W_OK : 0) | ++ (mode&OS_ACC_X_OK ? X_OK : 0) | (mode&OS_ACC_F_OK ? F_OK : 0) ; ++ ++ err = access(file, amode); ++ if(err < 0) ++ return(-errno); ++ ++ return(0); ++} ++ ++void os_print_error(int error, const char* str) ++{ ++ errno = error < 0 ? -error : error; ++ ++ perror(str); ++} ++ ++/* FIXME? required only by hostaudio (because it passes ioctls verbatim) */ ++int os_ioctl_generic(int fd, unsigned int cmd, unsigned long arg) ++{ ++ int err; ++ ++ err = ioctl(fd, cmd, arg); ++ if(err < 0) ++ return(-errno); ++ ++ return(err); ++} ++ ++int os_window_size(int fd, int *rows, int *cols) ++{ ++ struct winsize size; ++ ++ if(ioctl(fd, TIOCGWINSZ, &size) < 0) ++ return(-errno); ++ ++ *rows = size.ws_row; ++ *cols = size.ws_col; ++ ++ return(0); ++} ++ ++int os_new_tty_pgrp(int fd, int pid) + { +- struct stat64 buf; ++ if(ioctl(fd, TIOCSCTTY, 0) < 0){ ++ printk("TIOCSCTTY failed, errno = %d\n", errno); ++ return(-errno); ++ } ++ ++ if(tcsetpgrp(fd, pid) < 0){ ++ printk("tcsetpgrp failed, errno = %d\n", errno); ++ return(-errno); ++ } ++ ++ return(0); ++} ++ ++/* FIXME: ensure namebuf in os_get_if_name is big enough */ ++int os_get_ifname(int fd, char* namebuf) ++{ ++ if(ioctl(fd, SIOCGIFNAME, namebuf) < 0) ++ return(-errno); ++ ++ return(0); ++} ++ ++int os_set_slip(int fd) ++{ ++ int disc, sencap; ++ ++ disc = N_SLIP; ++ if(ioctl(fd, TIOCSETD, &disc) < 0){ ++ printk("Failed to set slip line discipline - " ++ "errno = %d\n", errno); ++ return(-errno); ++ } ++ ++ sencap = 0; ++ if(ioctl(fd, SIOCSIFENCAP, &sencap) < 0){ ++ printk("Failed to set slip encapsulation - " ++ "errno = %d\n", errno); ++ return(-errno); ++ } ++ ++ return(0); ++} ++ ++int os_set_owner(int fd, int pid) ++{ ++ if(fcntl(fd, F_SETOWN, pid) < 0){ ++ int save_errno = errno; ++ ++ if(fcntl(fd, F_GETOWN, 0) != pid) ++ return(-save_errno); ++ } ++ ++ return(0); ++} ++ ++/* FIXME? moved wholesale from sigio_user.c to get fcntls out of that file */ ++int os_sigio_async(int master, int slave) ++{ ++ int flags; + +- if(stat64(file, &buf) == -1) ++ flags = fcntl(master, F_GETFL); ++ if(flags < 0) { ++ printk("fcntl F_GETFL failed, errno = %d\n", errno); + return(-errno); ++ } ++ ++ if((fcntl(master, F_SETFL, flags | O_NONBLOCK | O_ASYNC) < 0) || ++ (fcntl(master, F_SETOWN, os_getpid()) < 0)){ ++ printk("fcntl F_SETFL or F_SETOWN failed, errno = %d\n", errno); ++ return(-errno); ++ } ++ ++ if((fcntl(slave, F_SETFL, flags | O_NONBLOCK) < 0)){ ++ printk("fcntl F_SETFL failed, errno = %d\n", errno); ++ return(-errno); ++ } + +- if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); +- else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); +- else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); +- else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); +- else if(S_ISFIFO(buf.st_mode)) return(OS_TYPE_FIFO); +- else if(S_ISSOCK(buf.st_mode)) return(OS_TYPE_SOCK); ++ return(0); ++} ++ ++int os_mode_fd(int fd, int mode) ++{ ++ int err; ++ ++ do { ++ err = fchmod(fd, mode); ++ } while((err < 0) && (errno==EINTR)) ; ++ ++ if(err < 0) ++ return(-errno); ++ ++ return(0); ++} ++ ++int os_file_type(char *file) ++{ ++ struct uml_stat buf; ++ int err; ++ ++ err = os_stat_file(file, &buf); ++ if(err < 0) ++ return(err); ++ ++ if(S_ISDIR(buf.ust_mode)) return(OS_TYPE_DIR); ++ else if(S_ISLNK(buf.ust_mode)) return(OS_TYPE_SYMLINK); ++ else if(S_ISCHR(buf.ust_mode)) return(OS_TYPE_CHARDEV); ++ else if(S_ISBLK(buf.ust_mode)) return(OS_TYPE_BLOCKDEV); ++ else if(S_ISFIFO(buf.ust_mode)) return(OS_TYPE_FIFO); ++ else if(S_ISSOCK(buf.ust_mode)) return(OS_TYPE_SOCK); + else return(OS_TYPE_FILE); + } + + int os_file_mode(char *file, struct openflags *mode_out) + { ++ int err; ++ + *mode_out = OPENFLAGS(); + +- if(!access(file, W_OK)) *mode_out = of_write(*mode_out); +- else if(errno != EACCES) +- return(-errno); ++ err = os_access(file, OS_ACC_W_OK); ++ if((err < 0) && (err != -EACCES)) ++ return(err); + +- if(!access(file, R_OK)) *mode_out = of_read(*mode_out); +- else if(errno != EACCES) +- return(-errno); ++ *mode_out = of_write(*mode_out); ++ ++ err = os_access(file, OS_ACC_R_OK); ++ if((err < 0) && (err != -EACCES)) ++ return(err); ++ ++ *mode_out = of_read(*mode_out); + + return(0); + } +@@ -63,16 +267,14 @@ + if(flags.e) f |= O_EXCL; + + fd = open64(file, f, mode); +- if(fd < 0) return(-errno); +- +- if(flags.cl){ +- if(fcntl(fd, F_SETFD, 1)){ +- close(fd); +- return(-errno); +- } ++ if(fd < 0) ++ return(-errno); ++ ++ if(flags.cl && fcntl(fd, F_SETFD, 1)){ ++ os_close_file(fd); ++ return(-errno); + } + +- return(fd); + return(fd); + } + +@@ -90,7 +292,7 @@ + + err = connect(fd, (struct sockaddr *) &sock, sizeof(sock)); + if(err) +- return(err); ++ return(-errno); + + return(fd); + } +@@ -109,88 +311,162 @@ + return(0); + } + +-int os_read_file(int fd, void *buf, int len) ++static int fault_buffer(void *start, int len, ++ int (*copy_proc)(void *addr, void *buf, int len)) + { +- int n; ++ int page = getpagesize(), i; ++ char c; + +- /* Force buf into memory if it's not already. */ ++ for(i = 0; i < len; i += page){ ++ if((*copy_proc)(start + i, &c, sizeof(c))) ++ return(-EFAULT); ++ } ++ if((len % page) != 0){ ++ if((*copy_proc)(start + len - 1, &c, sizeof(c))) ++ return(-EFAULT); ++ } ++ return(0); ++} + +- /* XXX This fails if buf is kernel memory */ +-#ifdef notdef +- if(copy_to_user_proc(buf, &c, sizeof(c))) +- return(-EFAULT); +-#endif ++static int file_io(int fd, void *buf, int len, ++ int (*io_proc)(int fd, void *buf, int len), ++ int (*copy_user_proc)(void *addr, void *buf, int len)) ++{ ++ int n, err; ++ ++ do { ++ n = (*io_proc)(fd, buf, len); ++ if((n < 0) && (errno == EFAULT)){ ++ err = fault_buffer(buf, len, copy_user_proc); ++ if(err) ++ return(err); ++ n = (*io_proc)(fd, buf, len); ++ } ++ } while((n < 0) && (errno == EINTR)); + +- n = read(fd, buf, len); + if(n < 0) + return(-errno); + return(n); + } + +-int os_write_file(int fd, void *buf, int count) ++int os_read_file(int fd, void *buf, int len) + { +- int n; +- +- /* Force buf into memory if it's not already. */ +- +- /* XXX This fails if buf is kernel memory */ +-#ifdef notdef +- if(copy_to_user_proc(buf, buf, buf[0])) +- return(-EFAULT); +-#endif ++ return(file_io(fd, buf, len, (int (*)(int, void *, int)) read, ++ copy_from_user_proc)); ++} + +- n = write(fd, buf, count); +- if(n < 0) +- return(-errno); +- return(n); ++int os_write_file(int fd, const void *buf, int len) ++{ ++ return(file_io(fd, (void *) buf, len, ++ (int (*)(int, void *, int)) write, copy_to_user_proc)); + } + + int os_file_size(char *file, long long *size_out) + { +- struct stat64 buf; ++ struct uml_stat buf; ++ int err; + +- if(stat64(file, &buf) == -1){ +- printk("Couldn't stat \"%s\" : errno = %d\n", file, errno); +- return(-errno); ++ err = os_stat_file(file, &buf); ++ if(err < 0){ ++ printk("Couldn't stat \"%s\" : err = %d\n", file, -err); ++ return(err); + } +- if(S_ISBLK(buf.st_mode)){ ++ ++ if(S_ISBLK(buf.ust_mode)){ + int fd, blocks; + +- if((fd = open64(file, O_RDONLY)) < 0){ +- printk("Couldn't open \"%s\", errno = %d\n", file, +- errno); +- return(-errno); ++ fd = os_open_file(file, of_read(OPENFLAGS()), 0); ++ if(fd < 0){ ++ printk("Couldn't open \"%s\", errno = %d\n", file, -fd); ++ return(fd); + } + if(ioctl(fd, BLKGETSIZE, &blocks) < 0){ + printk("Couldn't get the block size of \"%s\", " + "errno = %d\n", file, errno); +- close(fd); +- return(-errno); ++ err = -errno; ++ os_close_file(fd); ++ return(err); + } + *size_out = ((long long) blocks) * 512; +- close(fd); ++ os_close_file(fd); + return(0); + } +- *size_out = buf.st_size; ++ *size_out = buf.ust_size; ++ return(0); ++} ++ ++int os_file_modtime(char *file, unsigned long *modtime) ++{ ++ struct uml_stat buf; ++ int err; ++ ++ err = os_stat_file(file, &buf); ++ if(err < 0){ ++ printk("Couldn't stat \"%s\" : err = %d\n", file, -err); ++ return(err); ++ } ++ ++ *modtime = buf.ust_mtime; + return(0); + } + ++int os_get_exec_close(int fd, int* close_on_exec) ++{ ++ int ret; ++ ++ do { ++ ret = fcntl(fd, F_GETFD); ++ } while((ret < 0) && (errno == EINTR)) ; ++ ++ if(ret < 0) ++ return(-errno); ++ ++ *close_on_exec = (ret&FD_CLOEXEC) ? 1 : 0; ++ return(ret); ++} ++ ++int os_set_exec_close(int fd, int close_on_exec) ++{ ++ int flag, err; ++ ++ if(close_on_exec) flag = FD_CLOEXEC; ++ else flag = 0; ++ ++ do { ++ err = fcntl(fd, F_SETFD, flag); ++ } while((err < 0) && (errno == EINTR)) ; ++ ++ if(err < 0) ++ return(-errno); ++ return(err); ++} ++ + int os_pipe(int *fds, int stream, int close_on_exec) + { + int err, type = stream ? SOCK_STREAM : SOCK_DGRAM; + + err = socketpair(AF_UNIX, type, 0, fds); +- if(err) ++ if(err < 0) + return(-errno); + + if(!close_on_exec) + return(0); + +- if((fcntl(fds[0], F_SETFD, 1) < 0) || (fcntl(fds[1], F_SETFD, 1) < 0)) +- printk("os_pipe : Setting FD_CLOEXEC failed, errno = %d", +- errno); ++ err = os_set_exec_close(fds[0], 1); ++ if(err < 0) ++ goto error; ++ ++ err = os_set_exec_close(fds[1], 1); ++ if(err < 0) ++ goto error; + + return(0); ++ ++ error: ++ printk("os_pipe : Setting FD_CLOEXEC failed, err = %d\n", -err); ++ os_close_file(fds[1]); ++ os_close_file(fds[0]); ++ return(err); + } + + int os_set_fd_async(int fd, int owner) +@@ -270,7 +546,7 @@ + return(-EINVAL); + } + err = shutdown(fd, what); +- if(err) ++ if(err < 0) + return(-errno); + return(0); + } +@@ -315,7 +591,7 @@ + return(new); + } + +-int create_unix_socket(char *file, int len) ++int os_create_unix_socket(char *file, int len, int close_on_exec) + { + struct sockaddr_un addr; + int sock, err; +@@ -327,6 +603,13 @@ + return(-errno); + } + ++ if(close_on_exec) { ++ err = os_set_exec_close(sock, 1); ++ if(err < 0) ++ printk("create_unix_socket : close_on_exec failed, " ++ "err = %d", -err); ++ } ++ + addr.sun_family = AF_UNIX; + + /* XXX Be more careful about overflow */ +@@ -334,14 +617,45 @@ + + err = bind(sock, (struct sockaddr *) &addr, sizeof(addr)); + if (err < 0){ +- printk("create_listening_socket - bind failed, errno = %d\n", +- errno); ++ printk("create_listening_socket at '%s' - bind failed, " ++ "errno = %d\n", file, errno); + return(-errno); + } + + return(sock); + } + ++void os_flush_stdout(void) ++{ ++ fflush(stdout); ++} ++ ++int os_lock_file(int fd, int excl) ++{ ++ int type = excl ? F_WRLCK : F_RDLCK; ++ struct flock lock = ((struct flock) { .l_type = type, ++ .l_whence = SEEK_SET, ++ .l_start = 0, ++ .l_len = 0 } ); ++ int err, save; ++ ++ err = fcntl(fd, F_SETLK, &lock); ++ if(!err) ++ goto out; ++ ++ save = -errno; ++ err = fcntl(fd, F_GETLK, &lock); ++ if(err){ ++ err = -errno; ++ goto out; ++ } ++ ++ printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid); ++ err = save; ++ out: ++ return(err); ++} ++ + /* + * Overrides for Emacs so that we follow Linus's tabbing style. + * Emacs will notice this stuff at the end of the file and automatically +diff -Naur a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile +--- a/arch/um/os-Linux/Makefile 2004-02-11 12:15:56.000000000 -0500 ++++ b/arch/um/os-Linux/Makefile 2004-02-11 12:27:48.000000000 -0500 +@@ -9,7 +9,3 @@ + + $(USER_OBJS) : %.o: %.c + $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< +- +-clean : +- +-archmrproper: +diff -Naur a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c +--- a/arch/um/os-Linux/process.c 2004-02-11 12:14:23.000000000 -0500 ++++ b/arch/um/os-Linux/process.c 2004-02-11 12:26:04.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2002 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -7,32 +7,37 @@ + #include + #include + #include ++#include + #include + #include + #include "os.h" + #include "user.h" + ++#define ARBITRARY_ADDR -1 ++#define FAILURE_PID -1 ++ + unsigned long os_process_pc(int pid) + { + char proc_stat[sizeof("/proc/#####/stat\0")], buf[256]; + unsigned long pc; +- int fd; ++ int fd, err; + + sprintf(proc_stat, "/proc/%d/stat", pid); + fd = os_open_file(proc_stat, of_read(OPENFLAGS()), 0); + if(fd < 0){ +- printk("os_process_pc - couldn't open '%s', errno = %d\n", +- proc_stat, errno); +- return(-1); ++ printk("os_process_pc - couldn't open '%s', err = %d\n", ++ proc_stat, -fd); ++ return(ARBITRARY_ADDR); + } +- if(read(fd, buf, sizeof(buf)) < 0){ +- printk("os_process_pc - couldn't read '%s', errno = %d\n", +- proc_stat, errno); +- close(fd); +- return(-1); ++ err = os_read_file(fd, buf, sizeof(buf)); ++ if(err < 0){ ++ printk("os_process_pc - couldn't read '%s', err = %d\n", ++ proc_stat, -err); ++ os_close_file(fd); ++ return(ARBITRARY_ADDR); + } +- close(fd); +- pc = -1; ++ os_close_file(fd); ++ pc = ARBITRARY_ADDR; + if(sscanf(buf, "%*d %*s %*c %*d %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d %*d " + "%*d %*d %*d %*d %ld", &pc) != 1){ +@@ -52,22 +57,23 @@ + snprintf(stat, sizeof(stat), "/proc/%d/stat", pid); + fd = os_open_file(stat, of_read(OPENFLAGS()), 0); + if(fd < 0){ +- printk("Couldn't open '%s', errno = %d\n", stat, -fd); +- return(-1); ++ printk("Couldn't open '%s', err = %d\n", stat, -fd); ++ return(FAILURE_PID); + } + +- n = read(fd, data, sizeof(data)); +- close(fd); ++ n = os_read_file(fd, data, sizeof(data)); ++ os_close_file(fd); + + if(n < 0){ +- printk("Couldn't read '%s', errno = %d\n", stat); +- return(-1); ++ printk("Couldn't read '%s', err = %d\n", stat, -n); ++ return(FAILURE_PID); + } + +- parent = -1; ++ parent = FAILURE_PID; + /* XXX This will break if there is a space in the command */ + n = sscanf(data, "%*d %*s %*c %d", &parent); +- if(n != 1) printk("Failed to scan '%s'\n", data); ++ if(n != 1) ++ printk("Failed to scan '%s'\n", data); + + return(parent); + } +@@ -87,7 +93,8 @@ + + void os_usr1_process(int pid) + { +- kill(pid, SIGUSR1); ++ syscall(__NR_tkill, pid, SIGUSR1); ++ /* kill(pid, SIGUSR1); */ + } + + int os_getpid(void) +@@ -95,7 +102,7 @@ + return(getpid()); + } + +-int os_map_memory(void *virt, int fd, unsigned long off, unsigned long len, ++int os_map_memory(void *virt, int fd, unsigned long long off, unsigned long len, + int r, int w, int x) + { + void *loc; +@@ -104,8 +111,8 @@ + prot = (r ? PROT_READ : 0) | (w ? PROT_WRITE : 0) | + (x ? PROT_EXEC : 0); + +- loc = mmap((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, +- fd, off); ++ loc = mmap64((void *) virt, len, prot, MAP_SHARED | MAP_FIXED, ++ fd, off); + if(loc == MAP_FAILED) + return(-errno); + return(0); +@@ -126,7 +133,8 @@ + int err; + + err = munmap(addr, len); +- if(err < 0) return(-errno); ++ if(err < 0) ++ return(-errno); + return(0); + } + +diff -Naur a/arch/um/os-Linux/tty.c b/arch/um/os-Linux/tty.c +--- a/arch/um/os-Linux/tty.c 2004-02-11 12:15:53.000000000 -0500 ++++ b/arch/um/os-Linux/tty.c 2004-02-11 12:27:46.000000000 -0500 +@@ -28,10 +28,10 @@ + struct grantpt_info info; + int fd; + +- if((fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0)) < 0){ +- printk("get_pty : Couldn't open /dev/ptmx - errno = %d\n", +- errno); +- return(-1); ++ fd = os_open_file("/dev/ptmx", of_rdwr(OPENFLAGS()), 0); ++ if(fd < 0){ ++ printk("get_pty : Couldn't open /dev/ptmx - err = %d\n", -fd); ++ return(fd); + } + + info.fd = fd; +@@ -39,7 +39,7 @@ + + if(info.res < 0){ + printk("get_pty : Couldn't grant pty - errno = %d\n", +- info.err); ++ -info.err); + return(-1); + } + if(unlockpt(fd) < 0){ +diff -Naur a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c +--- a/arch/um/sys-i386/bugs.c 2004-02-11 12:16:28.000000000 -0500 ++++ b/arch/um/sys-i386/bugs.c 2004-02-11 12:28:27.000000000 -0500 +@@ -4,20 +4,21 @@ + */ + + #include +-#include + #include + #include + #include ++#include + #include "kern_util.h" + #include "user.h" + #include "sysdep/ptrace.h" + #include "task.h" ++#include "os.h" + + #define MAXTOKEN 64 + + /* Set during early boot */ +-int cpu_has_cmov = 1; +-int cpu_has_xmm = 0; ++int host_has_cmov = 1; ++int host_has_xmm = 0; + + static char token(int fd, char *buf, int len, char stop) + { +@@ -27,13 +28,15 @@ + ptr = buf; + end = &buf[len]; + do { +- n = read(fd, ptr, sizeof(*ptr)); ++ n = os_read_file(fd, ptr, sizeof(*ptr)); + c = *ptr++; +- if(n == 0) return(0); +- else if(n != sizeof(*ptr)){ +- printk("Reading /proc/cpuinfo failed, " +- "errno = %d\n", errno); +- return(-errno); ++ if(n != sizeof(*ptr)){ ++ if(n == 0) return(0); ++ printk("Reading /proc/cpuinfo failed, err = %d\n", -n); ++ if(n < 0) ++ return(n); ++ else ++ return(-EIO); + } + } while((c != '\n') && (c != stop) && (ptr < end)); + +@@ -45,45 +48,79 @@ + return(c); + } + +-static int check_cpu_feature(char *feature, int *have_it) ++static int find_cpuinfo_line(int fd, char *key, char *scratch, int len) + { +- char buf[MAXTOKEN], c; +- int fd, len = sizeof(buf)/sizeof(buf[0]), n; +- +- printk("Checking for host processor %s support...", feature); +- fd = open("/proc/cpuinfo", O_RDONLY); +- if(fd < 0){ +- printk("Couldn't open /proc/cpuinfo, errno = %d\n", errno); +- return(0); +- } ++ int n; ++ char c; + +- *have_it = 0; +- buf[len - 1] = '\0'; ++ scratch[len - 1] = '\0'; + while(1){ +- c = token(fd, buf, len - 1, ':'); +- if(c <= 0) goto out; ++ c = token(fd, scratch, len - 1, ':'); ++ if(c <= 0) ++ return(0); + else if(c != ':'){ + printk("Failed to find ':' in /proc/cpuinfo\n"); +- goto out; ++ return(0); + } + +- if(!strncmp(buf, "flags", strlen("flags"))) break; ++ if(!strncmp(scratch, key, strlen(key))) ++ return(1); + + do { +- n = read(fd, &c, sizeof(c)); ++ n = os_read_file(fd, &c, sizeof(c)); + if(n != sizeof(c)){ + printk("Failed to find newline in " +- "/proc/cpuinfo, n = %d, errno = %d\n", +- n, errno); +- goto out; ++ "/proc/cpuinfo, err = %d\n", -n); ++ return(0); + } + } while(c != '\n'); + } ++ return(0); ++} ++ ++int cpu_feature(char *what, char *buf, int len) ++{ ++ int fd, ret = 0; ++ ++ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); ++ if(fd < 0){ ++ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); ++ return(0); ++ } ++ ++ if(!find_cpuinfo_line(fd, what, buf, len)){ ++ printk("Couldn't find '%s' line in /proc/cpuinfo\n", what); ++ goto out_close; ++ } ++ ++ token(fd, buf, len, '\n'); ++ ret = 1; ++ ++ out_close: ++ os_close_file(fd); ++ return(ret); ++} ++ ++static int check_cpu_flag(char *feature, int *have_it) ++{ ++ char buf[MAXTOKEN], c; ++ int fd, len = sizeof(buf)/sizeof(buf[0]); ++ ++ printk("Checking for host processor %s support...", feature); ++ fd = os_open_file("/proc/cpuinfo", of_read(OPENFLAGS()), 0); ++ if(fd < 0){ ++ printk("Couldn't open /proc/cpuinfo, err = %d\n", -fd); ++ return(0); ++ } ++ ++ *have_it = 0; ++ if(!find_cpuinfo_line(fd, "flags", buf, sizeof(buf) / sizeof(buf[0]))) ++ goto out; + + c = token(fd, buf, len - 1, ' '); + if(c < 0) goto out; + else if(c != ' '){ +- printk("Failed to find ':' in /proc/cpuinfo\n"); ++ printk("Failed to find ' ' in /proc/cpuinfo\n"); + goto out; + } + +@@ -100,21 +137,48 @@ + out: + if(*have_it == 0) printk("No\n"); + else if(*have_it == 1) printk("Yes\n"); +- close(fd); ++ os_close_file(fd); + return(1); + } + ++#if 0 /* This doesn't work in tt mode, plus it's causing compilation problems ++ * for some people. ++ */ ++static void disable_lcall(void) ++{ ++ struct modify_ldt_ldt_s ldt; ++ int err; ++ ++ bzero(&ldt, sizeof(ldt)); ++ ldt.entry_number = 7; ++ ldt.base_addr = 0; ++ ldt.limit = 0; ++ err = modify_ldt(1, &ldt, sizeof(ldt)); ++ if(err) ++ printk("Failed to disable lcall7 - errno = %d\n", errno); ++} ++#endif ++ ++void arch_init_thread(void) ++{ ++#if 0 ++ disable_lcall(); ++#endif ++} ++ + void arch_check_bugs(void) + { + int have_it; + +- if(access("/proc/cpuinfo", R_OK)){ ++ if(os_access("/proc/cpuinfo", OS_ACC_R_OK) < 0){ + printk("/proc/cpuinfo not available - skipping CPU capability " + "checks\n"); + return; + } +- if(check_cpu_feature("cmov", &have_it)) cpu_has_cmov = have_it; +- if(check_cpu_feature("xmm", &have_it)) cpu_has_xmm = have_it; ++ if(check_cpu_flag("cmov", &have_it)) ++ host_has_cmov = have_it; ++ if(check_cpu_flag("xmm", &have_it)) ++ host_has_xmm = have_it; + } + + int arch_handle_signal(int sig, union uml_pt_regs *regs) +@@ -130,18 +194,18 @@ + if((*((char *) ip) != 0x0f) || ((*((char *) (ip + 1)) & 0xf0) != 0x40)) + return(0); + +- if(cpu_has_cmov == 0) ++ if(host_has_cmov == 0) + panic("SIGILL caused by cmov, which this processor doesn't " + "implement, boot a filesystem compiled for older " + "processors"); +- else if(cpu_has_cmov == 1) ++ else if(host_has_cmov == 1) + panic("SIGILL caused by cmov, which this processor claims to " + "implement"); +- else if(cpu_has_cmov == -1) ++ else if(host_has_cmov == -1) + panic("SIGILL caused by cmov, couldn't tell if this processor " + "implements it, boot a filesystem compiled for older " + "processors"); +- else panic("Bad value for cpu_has_cmov (%d)", cpu_has_cmov); ++ else panic("Bad value for host_has_cmov (%d)", host_has_cmov); + return(0); + } + +diff -Naur a/arch/um/sys-i386/extable.c b/arch/um/sys-i386/extable.c +--- a/arch/um/sys-i386/extable.c 2004-02-11 12:16:45.000000000 -0500 ++++ b/arch/um/sys-i386/extable.c 1969-12-31 19:00:00.000000000 -0500 +@@ -1,30 +0,0 @@ +-/* +- * linux/arch/i386/mm/extable.c +- */ +- +-#include +-#include +-#include +-#include +- +-/* Simple binary search */ +-const struct exception_table_entry * +-search_extable(const struct exception_table_entry *first, +- const struct exception_table_entry *last, +- unsigned long value) +-{ +- while (first <= last) { +- const struct exception_table_entry *mid; +- long diff; +- +- mid = (last - first) / 2 + first; +- diff = mid->insn - value; +- if (diff == 0) +- return mid; +- else if (diff < 0) +- first = mid+1; +- else +- last = mid-1; +- } +- return NULL; +-} +diff -Naur a/arch/um/sys-i386/fault.c b/arch/um/sys-i386/fault.c +--- a/arch/um/sys-i386/fault.c 2004-02-11 12:15:44.000000000 -0500 ++++ b/arch/um/sys-i386/fault.c 2004-02-11 12:27:19.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2002 - 2004 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -7,16 +7,24 @@ + #include "sysdep/ptrace.h" + #include "sysdep/sigcontext.h" + +-extern unsigned long search_exception_table(unsigned long addr); ++/* These two are from asm-um/uaccess.h and linux/module.h, check them. */ ++struct exception_table_entry ++{ ++ unsigned long insn; ++ unsigned long fixup; ++}; + ++const struct exception_table_entry *search_exception_tables(unsigned long add); ++ ++/* Compare this to arch/i386/mm/extable.c:fixup_exception() */ + int arch_fixup(unsigned long address, void *sc_ptr) + { + struct sigcontext *sc = sc_ptr; +- unsigned long fixup; ++ const struct exception_table_entry *fixup; + + fixup = search_exception_tables(address); + if(fixup != 0){ +- sc->eip = fixup; ++ sc->eip = fixup->fixup; + return(1); + } + return(0); +diff -Naur a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile +--- a/arch/um/sys-i386/Makefile 2004-02-11 12:14:27.000000000 -0500 ++++ b/arch/um/sys-i386/Makefile 2004-02-11 12:26:08.000000000 -0500 +@@ -1,7 +1,8 @@ +-obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o module.o \ +- ptrace.o ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o ++obj-y = bugs.o checksum.o fault.o ksyms.o ldt.o ptrace.o ptrace_user.o \ ++ semaphore.o sigcontext.o syscalls.o sysrq.o time.o + + obj-$(CONFIG_HIGHMEM) += highmem.o ++obj-$(CONFIG_MODULES) += module.o + + USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o + USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) +@@ -9,6 +10,8 @@ + SYMLINKS = semaphore.c highmem.c module.c + SYMLINKS := $(foreach f,$(SYMLINKS),$(src)/$f) + ++clean-files := $(SYMLINKS) ++ + semaphore.c-dir = kernel + highmem.c-dir = mm + module.c-dir = kernel +@@ -24,19 +27,4 @@ + $(SYMLINKS): + $(call make_link,$@) + +-clean: +- $(MAKE) -C util clean +- +-fastdep: +- +-dep: +- +-archmrproper: +- rm -f $(SYMLINKS) +- +-archclean: +- +-archdep: +- +-modules: +- ++subdir- := util +diff -Naur a/arch/um/sys-i386/ptrace_user.c b/arch/um/sys-i386/ptrace_user.c +--- a/arch/um/sys-i386/ptrace_user.c 2004-02-11 12:14:33.000000000 -0500 ++++ b/arch/um/sys-i386/ptrace_user.c 2004-02-11 12:26:15.000000000 -0500 +@@ -39,10 +39,10 @@ + nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); + for(i = 0; i < nregs; i++){ + if((i == 4) || (i == 5)) continue; +- if(ptrace(PTRACE_POKEUSR, pid, &dummy->u_debugreg[i], ++ if(ptrace(PTRACE_POKEUSER, pid, &dummy->u_debugreg[i], + regs[i]) < 0) +- printk("write_debugregs - ptrace failed, " +- "errno = %d\n", errno); ++ printk("write_debugregs - ptrace failed on " ++ "register %d, errno = %d\n", errno); + } + } + +@@ -54,7 +54,7 @@ + dummy = NULL; + nregs = sizeof(dummy->u_debugreg)/sizeof(dummy->u_debugreg[0]); + for(i = 0; i < nregs; i++){ +- regs[i] = ptrace(PTRACE_PEEKUSR, pid, ++ regs[i] = ptrace(PTRACE_PEEKUSER, pid, + &dummy->u_debugreg[i], 0); + } + } +diff -Naur a/arch/um/sys-i386/time.c b/arch/um/sys-i386/time.c +--- a/arch/um/sys-i386/time.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/arch/um/sys-i386/time.c 2004-02-11 12:26:16.000000000 -0500 +@@ -0,0 +1,24 @@ ++/* ++ * sys-i386/time.c ++ * Created 25.9.2002 Sapan Bhatia ++ * ++ */ ++ ++unsigned long long time_stamp(void) ++{ ++ unsigned long low, high; ++ ++ asm("rdtsc" : "=a" (low), "=d" (high)); ++ return((((unsigned long long) high) << 32) + low); ++} ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/arch/um/sys-i386/util/Makefile b/arch/um/sys-i386/util/Makefile +--- a/arch/um/sys-i386/util/Makefile 2004-02-11 12:16:48.000000000 -0500 ++++ b/arch/um/sys-i386/util/Makefile 2004-02-11 12:29:04.000000000 -0500 +@@ -1,15 +1,10 @@ ++host-progs := mk_sc mk_thread ++always := $(host-progs) + +-host-progs := mk_sc +-always := $(host-progs) mk_thread +-targets := mk_thread_kern.o mk_thread_user.o ++mk_thread-objs := mk_thread_kern.o mk_thread_user.o + +-mk_sc-objs := mk_sc.o +- +-$(obj)/mk_thread : $(obj)/mk_thread_kern.o $(obj)/mk_thread_user.o +- $(CC) $(CFLAGS) -o $@ $^ +- +-$(obj)/mk_thread_user.o : $(src)/mk_thread_user.c +- $(CC) $(USER_CFLAGS) -c -o $@ $< ++HOSTCFLAGS_mk_thread_kern.o := $(CFLAGS) ++HOSTCFLAGS_mk_thread_user.o := $(USER_CFLAGS) + + clean : + $(RM) -f $(build-targets) +diff -Naur a/arch/um/sys-i386/util/mk_sc.c b/arch/um/sys-i386/util/mk_sc.c +--- a/arch/um/sys-i386/util/mk_sc.c 2004-02-11 12:16:38.000000000 -0500 ++++ b/arch/um/sys-i386/util/mk_sc.c 2004-02-11 12:28:37.000000000 -0500 +@@ -38,6 +38,7 @@ + SC_OFFSET("SC_ERR", err); + SC_OFFSET("SC_CR2", cr2); + SC_OFFSET("SC_FPSTATE", fpstate); ++ SC_OFFSET("SC_SIGMASK", oldmask); + SC_FP_OFFSET("SC_FP_CW", cw); + SC_FP_OFFSET("SC_FP_SW", sw); + SC_FP_OFFSET("SC_FP_TAG", tag); +diff -Naur a/arch/um/sys-ia64/Makefile b/arch/um/sys-ia64/Makefile +--- a/arch/um/sys-ia64/Makefile 2004-02-11 12:14:16.000000000 -0500 ++++ b/arch/um/sys-ia64/Makefile 2004-02-11 12:25:43.000000000 -0500 +@@ -7,18 +7,5 @@ + $(OBJ): $(OBJS) + rm -f $@ + $(LD) $(LINKFLAGS) --start-group $^ --end-group -o $@ +-clean: +- rm -f $(OBJS) + +-fastdep: +- +-archmrproper: +- +-archclean: +- rm -f link.ld +- @$(MAKEBOOT) clean +- +-archdep: +- @$(MAKEBOOT) dep +- +-modules: ++clean-files := $(OBJS) link.ld +diff -Naur a/arch/um/sys-ppc/Makefile b/arch/um/sys-ppc/Makefile +--- a/arch/um/sys-ppc/Makefile 2004-02-11 12:15:23.000000000 -0500 ++++ b/arch/um/sys-ppc/Makefile 2004-02-11 12:27:11.000000000 -0500 +@@ -66,13 +66,4 @@ + $(CC) $(EXTRA_AFLAGS) $(AFLAGS) -D__ASSEMBLY__ -D__UM_PPC__ -c $< -o $*.o + rm -f asm + +-clean: +- rm -f $(OBJS) +- rm -f ppc_defs.h +- rm -f checksum.S semaphore.c mk_defs.c +- +-fastdep: +- +-dep: +- +-modules: ++clean-files := $(OBJS) ppc_defs.h checksum.S semaphore.c mk_defs.c +diff -Naur a/arch/um/uml.lds.S b/arch/um/uml.lds.S +--- a/arch/um/uml.lds.S 2004-02-11 12:15:21.000000000 -0500 ++++ b/arch/um/uml.lds.S 2004-02-11 12:27:08.000000000 -0500 +@@ -9,7 +9,6 @@ + { + . = START + SIZEOF_HEADERS; + +- . = ALIGN(4096); + __binary_start = .; + #ifdef MODE_TT + .thread_private : { +@@ -26,7 +25,11 @@ + . = ALIGN(4096); /* Init code and data */ + _stext = .; + __init_begin = .; +- .text.init : { *(.text.init) } ++ .init.text : { ++ _sinittext = .; ++ *(.init.text) ++ _einittext = .; ++ } + . = ALIGN(4096); + .text : + { +@@ -38,7 +41,7 @@ + + #include "asm/common.lds.S" + +- .data.init : { *(.data.init) } ++ init.data : { *(init.data) } + .data : + { + . = ALIGN(KERNEL_STACK_SIZE); /* init_task */ +diff -Naur a/arch/um/util/Makefile b/arch/um/util/Makefile +--- a/arch/um/util/Makefile 2004-02-11 12:17:07.000000000 -0500 ++++ b/arch/um/util/Makefile 2004-02-11 12:29:10.000000000 -0500 +@@ -1,23 +1,8 @@ +-always := mk_task mk_constants +-targets := mk_task_user.o mk_task_kern.o \ +- mk_constants_user.o mk_constants_kern.o ++host-progs := mk_task mk_constants ++always := $(host-progs) + +-$(obj)/mk_task: $(obj)/mk_task_user.o $(obj)/mk_task_kern.o +- $(CC) -o $@ $^ ++mk_task-objs := mk_task_user.o mk_task_kern.o ++mk_constants-objs := mk_constants_user.o mk_constants_kern.o + +-$(obj)/mk_task_user.o: $(src)/mk_task_user.c +- $(CC) -o $@ -c $< +- +-$(obj)/mk_constants : $(obj)/mk_constants_user.o $(obj)/mk_constants_kern.o +- $(CC) -o $@ $^ +- +-$(obj)/mk_constants_user.o : $(src)/mk_constants_user.c +- $(CC) -c $< -o $@ +- +-$(obj)/mk_constants_kern.o : $(src)/mk_constants_kern.c +- $(CC) $(CFLAGS) -c $< -o $@ +- +-clean: +- $(RM) $(build-targets) +- +-archmrproper: ++HOSTCFLAGS_mk_task_kern.o := $(CFLAGS) ++HOSTCFLAGS_mk_constants_kern.o := $(CFLAGS) +diff -Naur a/arch/um/util/mk_constants_kern.c b/arch/um/util/mk_constants_kern.c +--- a/arch/um/util/mk_constants_kern.c 2004-02-11 12:14:19.000000000 -0500 ++++ b/arch/um/util/mk_constants_kern.c 2004-02-11 12:26:02.000000000 -0500 +@@ -1,5 +1,6 @@ + #include "linux/kernel.h" + #include "linux/stringify.h" ++#include "linux/time.h" + #include "asm/page.h" + + extern void print_head(void); +@@ -11,6 +12,7 @@ + { + print_head(); + print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE); ++ + print_constant_str("UM_KERN_EMERG", KERN_EMERG); + print_constant_str("UM_KERN_ALERT", KERN_ALERT); + print_constant_str("UM_KERN_CRIT", KERN_CRIT); +@@ -19,6 +21,8 @@ + print_constant_str("UM_KERN_NOTICE", KERN_NOTICE); + print_constant_str("UM_KERN_INFO", KERN_INFO); + print_constant_str("UM_KERN_DEBUG", KERN_DEBUG); ++ ++ print_constant_int("UM_NSEC_PER_SEC", NSEC_PER_SEC); + print_tail(); + return(0); + } +diff -Naur a/drivers/base/Makefile b/drivers/base/Makefile +--- a/drivers/base/Makefile 2004-02-11 12:16:32.000000000 -0500 ++++ b/drivers/base/Makefile 2004-02-11 12:28:31.000000000 -0500 +@@ -2,7 +2,8 @@ + + obj-y := core.o sys.o interface.o bus.o \ + driver.o class.o class_simple.o platform.o \ +- cpu.o firmware.o init.o map.o dmapool.o ++ cpu.o firmware.o init.o map.o + obj-y += power/ + obj-$(CONFIG_FW_LOADER) += firmware_class.o + obj-$(CONFIG_NUMA) += node.o ++obj-$(CONFIG_PCI) += dmapool.o +diff -Naur a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h +--- a/fs/hostfs/hostfs.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/fs/hostfs/hostfs.h 2004-02-11 12:25:42.000000000 -0500 +@@ -0,0 +1,79 @@ ++#ifndef __UM_FS_HOSTFS ++#define __UM_FS_HOSTFS ++ ++#include "os.h" ++ ++/* These are exactly the same definitions as in fs.h, but the names are ++ * changed so that this file can be included in both kernel and user files. ++ */ ++ ++#define HOSTFS_ATTR_MODE 1 ++#define HOSTFS_ATTR_UID 2 ++#define HOSTFS_ATTR_GID 4 ++#define HOSTFS_ATTR_SIZE 8 ++#define HOSTFS_ATTR_ATIME 16 ++#define HOSTFS_ATTR_MTIME 32 ++#define HOSTFS_ATTR_CTIME 64 ++#define HOSTFS_ATTR_ATIME_SET 128 ++#define HOSTFS_ATTR_MTIME_SET 256 ++#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */ ++#define HOSTFS_ATTR_ATTR_FLAG 1024 ++ ++struct hostfs_iattr { ++ unsigned int ia_valid; ++ mode_t ia_mode; ++ uid_t ia_uid; ++ gid_t ia_gid; ++ loff_t ia_size; ++ struct timespec ia_atime; ++ struct timespec ia_mtime; ++ struct timespec ia_ctime; ++ unsigned int ia_attr_flags; ++}; ++ ++extern int stat_file(const char *path, unsigned long long *inode_out, ++ int *mode_out, int *nlink_out, int *uid_out, int *gid_out, ++ unsigned long long *size_out, struct timespec *atime_out, ++ struct timespec *mtime_out, struct timespec *ctime_out, ++ int *blksize_out, unsigned long long *blocks_out); ++extern int access_file(char *path, int r, int w, int x); ++extern int open_file(char *path, int r, int w, int append); ++extern int file_type(const char *path, int *rdev); ++extern void *open_dir(char *path, int *err_out); ++extern char *read_dir(void *stream, unsigned long long *pos, ++ unsigned long long *ino_out, int *len_out); ++extern void close_file(void *stream); ++extern void close_dir(void *stream); ++extern int read_file(int fd, unsigned long long *offset, char *buf, int len); ++extern int write_file(int fd, unsigned long long *offset, const char *buf, ++ int len); ++extern int lseek_file(int fd, long long offset, int whence); ++extern int file_create(char *name, int ur, int uw, int ux, int gr, ++ int gw, int gx, int or, int ow, int ox); ++extern int set_attr(const char *file, struct hostfs_iattr *attrs); ++extern int make_symlink(const char *from, const char *to); ++extern int unlink_file(const char *file); ++extern int do_mkdir(const char *file, int mode); ++extern int do_rmdir(const char *file); ++extern int do_mknod(const char *file, int mode, int dev); ++extern int link_file(const char *from, const char *to); ++extern int do_readlink(char *file, char *buf, int size); ++extern int rename_file(char *from, char *to); ++extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, ++ long long *bfree_out, long long *bavail_out, ++ long long *files_out, long long *ffree_out, ++ void *fsid_out, int fsid_size, long *namelen_out, ++ long *spare_out); ++ ++#endif ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c +--- a/fs/hostfs/hostfs_kern.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/fs/hostfs/hostfs_kern.c 2004-02-11 12:25:59.000000000 -0500 +@@ -0,0 +1,1008 @@ ++/* ++ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com) ++ * Licensed under the GPL ++ * ++ * Ported the filesystem routines to 2.5. ++ * 2003-02-10 Petr Baudis ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "hostfs.h" ++#include "kern_util.h" ++#include "kern.h" ++#include "user_util.h" ++#include "2_5compat.h" ++#include "init.h" ++ ++struct hostfs_inode_info { ++ char *host_filename; ++ int fd; ++ int mode; ++ struct inode vfs_inode; ++}; ++ ++static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode) ++{ ++ return(list_entry(inode, struct hostfs_inode_info, vfs_inode)); ++} ++ ++#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_dentry->d_inode) ++ ++int hostfs_d_delete(struct dentry *dentry) ++{ ++ return(1); ++} ++ ++struct dentry_operations hostfs_dentry_ops = { ++ .d_delete = hostfs_d_delete, ++}; ++ ++/* Changed in hostfs_args before the kernel starts running */ ++static char *root_ino = "/"; ++static int append = 0; ++ ++#define HOSTFS_SUPER_MAGIC 0x00c0ffee ++ ++static struct inode_operations hostfs_iops; ++static struct inode_operations hostfs_dir_iops; ++static struct address_space_operations hostfs_link_aops; ++ ++static int __init hostfs_args(char *options, int *add) ++{ ++ char *ptr; ++ ++ ptr = strchr(options, ','); ++ if(ptr != NULL) ++ *ptr++ = '\0'; ++ if(*options != '\0') ++ root_ino = options; ++ ++ options = ptr; ++ while(options){ ++ ptr = strchr(options, ','); ++ if(ptr != NULL) ++ *ptr++ = '\0'; ++ if(*options != '\0'){ ++ if(!strcmp(options, "append")) ++ append = 1; ++ else printf("hostfs_args - unsupported option - %s\n", ++ options); ++ } ++ options = ptr; ++ } ++ return(0); ++} ++ ++__uml_setup("hostfs=", hostfs_args, ++"hostfs=,,...\n" ++" This is used to set hostfs parameters. The root directory argument\n" ++" is used to confine all hostfs mounts to within the specified directory\n" ++" tree on the host. If this isn't specified, then a user inside UML can\n" ++" mount anything on the host that's accessible to the user that's running\n" ++" it.\n" ++" The only flag currently supported is 'append', which specifies that all\n" ++" files opened by hostfs will be opened in append mode.\n\n" ++); ++ ++static char *dentry_name(struct dentry *dentry, int extra) ++{ ++ struct dentry *parent; ++ char *root, *name; ++ int len; ++ ++ len = 0; ++ parent = dentry; ++ while(parent->d_parent != parent){ ++ len += parent->d_name.len + 1; ++ parent = parent->d_parent; ++ } ++ ++ root = HOSTFS_I(parent->d_inode)->host_filename; ++ len += strlen(root); ++ name = kmalloc(len + extra + 1, GFP_KERNEL); ++ if(name == NULL) return(NULL); ++ ++ name[len] = '\0'; ++ parent = dentry; ++ while(parent->d_parent != parent){ ++ len -= parent->d_name.len + 1; ++ name[len] = '/'; ++ strncpy(&name[len + 1], parent->d_name.name, ++ parent->d_name.len); ++ parent = parent->d_parent; ++ } ++ strncpy(name, root, strlen(root)); ++ return(name); ++} ++ ++static char *inode_name(struct inode *ino, int extra) ++{ ++ struct dentry *dentry; ++ ++ dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias); ++ return(dentry_name(dentry, extra)); ++} ++ ++static int read_name(struct inode *ino, char *name) ++{ ++ /* The non-int inode fields are copied into ints by stat_file and ++ * then copied into the inode because passing the actual pointers ++ * in and having them treated as int * breaks on big-endian machines ++ */ ++ int err; ++ int i_mode, i_nlink, i_blksize; ++ unsigned long long i_size; ++ unsigned long long i_ino; ++ unsigned long long i_blocks; ++ ++ err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, ++ &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, ++ &ino->i_ctime, &i_blksize, &i_blocks); ++ if(err) ++ return(err); ++ ++ ino->i_ino = i_ino; ++ ino->i_mode = i_mode; ++ ino->i_nlink = i_nlink; ++ ino->i_size = i_size; ++ ino->i_blksize = i_blksize; ++ ino->i_blocks = i_blocks; ++ if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid())) ++ ino->i_uid = 0; ++ return(0); ++} ++ ++static char *follow_link(char *link) ++{ ++ int len, n; ++ char *name, *resolved, *end; ++ ++ len = 64; ++ while(1){ ++ n = -ENOMEM; ++ name = kmalloc(len, GFP_KERNEL); ++ if(name == NULL) ++ goto out; ++ ++ n = do_readlink(link, name, len); ++ if(n < len) ++ break; ++ len *= 2; ++ kfree(name); ++ } ++ if(n < 0) ++ goto out_free; ++ ++ if(*name == '/') ++ return(name); ++ ++ end = strrchr(link, '/'); ++ if(end == NULL) ++ return(name); ++ ++ *(end + 1) = '\0'; ++ len = strlen(link) + strlen(name) + 1; ++ ++ resolved = kmalloc(len, GFP_KERNEL); ++ if(resolved == NULL){ ++ n = -ENOMEM; ++ goto out_free; ++ } ++ ++ sprintf(resolved, "%s%s", link, name); ++ kfree(name); ++ kfree(link); ++ return(resolved); ++ ++ out_free: ++ kfree(name); ++ out: ++ return(ERR_PTR(n)); ++} ++ ++static int read_inode(struct inode *ino) ++{ ++ char *name; ++ int err = 0; ++ ++ /* Unfortunately, we are called from iget() when we don't have a dentry ++ * allocated yet. ++ */ ++ if(list_empty(&ino->i_dentry)) ++ goto out; ++ ++ err = -ENOMEM; ++ name = inode_name(ino, 0); ++ if(name == NULL) ++ goto out; ++ ++ if(file_type(name, NULL) == OS_TYPE_SYMLINK){ ++ name = follow_link(name); ++ if(IS_ERR(name)){ ++ err = PTR_ERR(name); ++ goto out; ++ } ++ } ++ ++ err = read_name(ino, name); ++ kfree(name); ++ out: ++ return(err); ++} ++ ++int hostfs_statfs(struct super_block *sb, struct kstatfs *sf) ++{ ++ /* do_statfs uses struct statfs64 internally, but the linux kernel ++ * struct statfs still has 32-bit versions for most of these fields, ++ * so we convert them here ++ */ ++ int err; ++ long long f_blocks; ++ long long f_bfree; ++ long long f_bavail; ++ long long f_files; ++ long long f_ffree; ++ ++ err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename, ++ &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files, ++ &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), ++ &sf->f_namelen, sf->f_spare); ++ if(err) return(err); ++ sf->f_blocks = f_blocks; ++ sf->f_bfree = f_bfree; ++ sf->f_bavail = f_bavail; ++ sf->f_files = f_files; ++ sf->f_ffree = f_ffree; ++ sf->f_type = HOSTFS_SUPER_MAGIC; ++ return(0); ++} ++ ++static struct inode *hostfs_alloc_inode(struct super_block *sb) ++{ ++ struct hostfs_inode_info *hi; ++ ++ hi = kmalloc(sizeof(*hi), GFP_KERNEL); ++ if(hi == NULL) ++ return(NULL); ++ ++ *hi = ((struct hostfs_inode_info) { .host_filename = NULL, ++ .fd = -1, ++ .mode = 0 }); ++ inode_init_once(&hi->vfs_inode); ++ return(&hi->vfs_inode); ++} ++ ++static void hostfs_destroy_inode(struct inode *inode) ++{ ++ if(HOSTFS_I(inode)->host_filename) ++ kfree(HOSTFS_I(inode)->host_filename); ++ ++ if(HOSTFS_I(inode)->fd != -1) ++ close_file(&HOSTFS_I(inode)->fd); ++ ++ kfree(HOSTFS_I(inode)); ++} ++ ++static void hostfs_read_inode(struct inode *inode) ++{ ++ read_inode(inode); ++} ++ ++static struct super_operations hostfs_sbops = { ++ .alloc_inode = hostfs_alloc_inode, ++ .destroy_inode = hostfs_destroy_inode, ++ .read_inode = hostfs_read_inode, ++ .statfs = hostfs_statfs, ++}; ++ ++int hostfs_readdir(struct file *file, void *ent, filldir_t filldir) ++{ ++ void *dir; ++ char *name; ++ unsigned long long next, ino; ++ int error, len; ++ ++ name = dentry_name(file->f_dentry, 0); ++ if(name == NULL) return(-ENOMEM); ++ dir = open_dir(name, &error); ++ kfree(name); ++ if(dir == NULL) return(-error); ++ next = file->f_pos; ++ while((name = read_dir(dir, &next, &ino, &len)) != NULL){ ++ error = (*filldir)(ent, name, len, file->f_pos, ++ ino, DT_UNKNOWN); ++ if(error) break; ++ file->f_pos = next; ++ } ++ close_dir(dir); ++ return(0); ++} ++ ++int hostfs_file_open(struct inode *ino, struct file *file) ++{ ++ char *name; ++ int mode = 0, r = 0, w = 0, fd; ++ ++ mode = file->f_mode & (FMODE_READ | FMODE_WRITE); ++ if((mode & HOSTFS_I(ino)->mode) == mode) ++ return(0); ++ ++ /* The file may already have been opened, but with the wrong access, ++ * so this resets things and reopens the file with the new access. ++ */ ++ if(HOSTFS_I(ino)->fd != -1){ ++ close_file(&HOSTFS_I(ino)->fd); ++ HOSTFS_I(ino)->fd = -1; ++ } ++ ++ HOSTFS_I(ino)->mode |= mode; ++ if(HOSTFS_I(ino)->mode & FMODE_READ) ++ r = 1; ++ if(HOSTFS_I(ino)->mode & FMODE_WRITE) ++ w = 1; ++ if(w) ++ r = 1; ++ ++ name = dentry_name(file->f_dentry, 0); ++ if(name == NULL) ++ return(-ENOMEM); ++ ++ fd = open_file(name, r, w, append); ++ kfree(name); ++ if(fd < 0) return(fd); ++ FILE_HOSTFS_I(file)->fd = fd; ++ ++ return(0); ++} ++ ++int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync) ++{ ++ return(0); ++} ++ ++static struct file_operations hostfs_file_fops = { ++ .llseek = generic_file_llseek, ++ .read = generic_file_read, ++ .write = generic_file_write, ++ .mmap = generic_file_mmap, ++ .open = hostfs_file_open, ++ .release = NULL, ++ .fsync = hostfs_fsync, ++}; ++ ++static struct file_operations hostfs_dir_fops = { ++ .readdir = hostfs_readdir, ++ .read = generic_read_dir, ++}; ++ ++int hostfs_writepage(struct page *page, struct writeback_control *wbc) ++{ ++ struct address_space *mapping = page->mapping; ++ struct inode *inode = mapping->host; ++ char *buffer; ++ unsigned long long base; ++ int count = PAGE_CACHE_SIZE; ++ int end_index = inode->i_size >> PAGE_CACHE_SHIFT; ++ int err; ++ ++ if (page->index >= end_index) ++ count = inode->i_size & (PAGE_CACHE_SIZE-1); ++ ++ buffer = kmap(page); ++ base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT; ++ ++ err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count); ++ if(err != count){ ++ ClearPageUptodate(page); ++ goto out; ++ } ++ ++ if (base > inode->i_size) ++ inode->i_size = base; ++ ++ if (PageError(page)) ++ ClearPageError(page); ++ err = 0; ++ ++ out: ++ kunmap(page); ++ ++ unlock_page(page); ++ return err; ++} ++ ++int hostfs_readpage(struct file *file, struct page *page) ++{ ++ char *buffer; ++ long long start; ++ int err = 0; ++ ++ start = (long long) page->index << PAGE_CACHE_SHIFT; ++ buffer = kmap(page); ++ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer, ++ PAGE_CACHE_SIZE); ++ if(err < 0) goto out; ++ ++ memset(&buffer[err], 0, PAGE_CACHE_SIZE - err); ++ ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ if (PageError(page)) ClearPageError(page); ++ err = 0; ++ out: ++ kunmap(page); ++ unlock_page(page); ++ return(err); ++} ++ ++int hostfs_prepare_write(struct file *file, struct page *page, ++ unsigned int from, unsigned int to) ++{ ++ char *buffer; ++ long long start, tmp; ++ int err; ++ ++ start = (long long) page->index << PAGE_CACHE_SHIFT; ++ buffer = kmap(page); ++ if(from != 0){ ++ tmp = start; ++ err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer, ++ from); ++ if(err < 0) goto out; ++ } ++ if(to != PAGE_CACHE_SIZE){ ++ start += to; ++ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to, ++ PAGE_CACHE_SIZE - to); ++ if(err < 0) goto out; ++ } ++ err = 0; ++ out: ++ kunmap(page); ++ return(err); ++} ++ ++int hostfs_commit_write(struct file *file, struct page *page, unsigned from, ++ unsigned to) ++{ ++ struct address_space *mapping = page->mapping; ++ struct inode *inode = mapping->host; ++ char *buffer; ++ long long start; ++ int err = 0; ++ ++ start = (long long) (page->index << PAGE_CACHE_SHIFT) + from; ++ buffer = kmap(page); ++ err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from, ++ to - from); ++ if(err > 0) err = 0; ++ if(!err && (start > inode->i_size)) ++ inode->i_size = start; ++ ++ kunmap(page); ++ return(err); ++} ++ ++static struct address_space_operations hostfs_aops = { ++ .writepage = hostfs_writepage, ++ .readpage = hostfs_readpage, ++/* .set_page_dirty = __set_page_dirty_nobuffers, */ ++ .prepare_write = hostfs_prepare_write, ++ .commit_write = hostfs_commit_write ++}; ++ ++static int init_inode(struct inode *inode, struct dentry *dentry) ++{ ++ char *name; ++ int type, err = -ENOMEM, rdev; ++ ++ if(dentry){ ++ name = dentry_name(dentry, 0); ++ if(name == NULL) ++ goto out; ++ type = file_type(name, &rdev); ++ kfree(name); ++ } ++ else type = OS_TYPE_DIR; ++ ++ err = 0; ++ if(type == OS_TYPE_SYMLINK) ++ inode->i_op = &page_symlink_inode_operations; ++ else if(type == OS_TYPE_DIR) ++ inode->i_op = &hostfs_dir_iops; ++ else inode->i_op = &hostfs_iops; ++ ++ if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops; ++ else inode->i_fop = &hostfs_file_fops; ++ ++ if(type == OS_TYPE_SYMLINK) ++ inode->i_mapping->a_ops = &hostfs_link_aops; ++ else inode->i_mapping->a_ops = &hostfs_aops; ++ ++ switch (type) { ++ case OS_TYPE_CHARDEV: ++ init_special_inode(inode, S_IFCHR, rdev); ++ break; ++ case OS_TYPE_BLOCKDEV: ++ init_special_inode(inode, S_IFBLK, rdev); ++ break; ++ case OS_TYPE_FIFO: ++ init_special_inode(inode, S_IFIFO, 0); ++ break; ++ case OS_TYPE_SOCK: ++ init_special_inode(inode, S_IFSOCK, 0); ++ break; ++ } ++ out: ++ return(err); ++} ++ ++int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, ++ struct nameidata *nd) ++{ ++ struct inode *inode; ++ char *name; ++ int error, fd; ++ ++ error = -ENOMEM; ++ inode = iget(dir->i_sb, 0); ++ if(inode == NULL) goto out; ++ ++ error = init_inode(inode, dentry); ++ if(error) ++ goto out_put; ++ ++ error = -ENOMEM; ++ name = dentry_name(dentry, 0); ++ if(name == NULL) ++ goto out_put; ++ ++ fd = file_create(name, ++ mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, ++ mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, ++ mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH); ++ if(fd < 0) ++ error = fd; ++ else error = read_name(inode, name); ++ ++ kfree(name); ++ if(error) ++ goto out_put; ++ ++ HOSTFS_I(inode)->fd = fd; ++ HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE; ++ d_instantiate(dentry, inode); ++ return(0); ++ ++ out_put: ++ iput(inode); ++ out: ++ return(error); ++} ++ ++struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct inode *inode; ++ char *name; ++ int err; ++ ++ err = -ENOMEM; ++ inode = iget(ino->i_sb, 0); ++ if(inode == NULL) ++ goto out; ++ ++ err = init_inode(inode, dentry); ++ if(err) ++ goto out_put; ++ ++ err = -ENOMEM; ++ name = dentry_name(dentry, 0); ++ if(name == NULL) ++ goto out_put; ++ ++ err = read_name(inode, name); ++ kfree(name); ++ if(err == -ENOENT){ ++ iput(inode); ++ inode = NULL; ++ } ++ else if(err) ++ goto out_put; ++ ++ d_add(dentry, inode); ++ dentry->d_op = &hostfs_dentry_ops; ++ return(NULL); ++ ++ out_put: ++ iput(inode); ++ out: ++ return(ERR_PTR(err)); ++} ++ ++static char *inode_dentry_name(struct inode *ino, struct dentry *dentry) ++{ ++ char *file; ++ int len; ++ ++ file = inode_name(ino, dentry->d_name.len + 1); ++ if(file == NULL) return(NULL); ++ strcat(file, "/"); ++ len = strlen(file); ++ strncat(file, dentry->d_name.name, dentry->d_name.len); ++ file[len + dentry->d_name.len] = '\0'; ++ return(file); ++} ++ ++int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from) ++{ ++ char *from_name, *to_name; ++ int err; ++ ++ if((from_name = inode_dentry_name(ino, from)) == NULL) ++ return(-ENOMEM); ++ to_name = dentry_name(to, 0); ++ if(to_name == NULL){ ++ kfree(from_name); ++ return(-ENOMEM); ++ } ++ err = link_file(to_name, from_name); ++ kfree(from_name); ++ kfree(to_name); ++ return(err); ++} ++ ++int hostfs_unlink(struct inode *ino, struct dentry *dentry) ++{ ++ char *file; ++ int err; ++ ++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); ++ if(append) ++ return(-EPERM); ++ ++ err = unlink_file(file); ++ kfree(file); ++ return(err); ++} ++ ++int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to) ++{ ++ char *file; ++ int err; ++ ++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); ++ err = make_symlink(file, to); ++ kfree(file); ++ return(err); ++} ++ ++int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode) ++{ ++ char *file; ++ int err; ++ ++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); ++ err = do_mkdir(file, mode); ++ kfree(file); ++ return(err); ++} ++ ++int hostfs_rmdir(struct inode *ino, struct dentry *dentry) ++{ ++ char *file; ++ int err; ++ ++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM); ++ err = do_rmdir(file); ++ kfree(file); ++ return(err); ++} ++ ++int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) ++{ ++ struct inode *inode; ++ char *name; ++ int err = -ENOMEM; ++ ++ inode = iget(dir->i_sb, 0); ++ if(inode == NULL) ++ goto out; ++ ++ err = init_inode(inode, dentry); ++ if(err) ++ goto out_put; ++ ++ err = -ENOMEM; ++ name = dentry_name(dentry, 0); ++ if(name == NULL) ++ goto out_put; ++ ++ init_special_inode(inode, mode, dev); ++ err = do_mknod(name, mode, dev); ++ if(err) ++ goto out_free; ++ ++ err = read_name(inode, name); ++ kfree(name); ++ if(err) ++ goto out_put; ++ ++ d_instantiate(dentry, inode); ++ return(0); ++ ++ out_free: ++ kfree(name); ++ out_put: ++ iput(inode); ++ out: ++ return(err); ++} ++ ++int hostfs_rename(struct inode *from_ino, struct dentry *from, ++ struct inode *to_ino, struct dentry *to) ++{ ++ char *from_name, *to_name; ++ int err; ++ ++ if((from_name = inode_dentry_name(from_ino, from)) == NULL) ++ return(-ENOMEM); ++ if((to_name = inode_dentry_name(to_ino, to)) == NULL){ ++ kfree(from_name); ++ return(-ENOMEM); ++ } ++ err = rename_file(from_name, to_name); ++ kfree(from_name); ++ kfree(to_name); ++ return(err); ++} ++ ++void hostfs_truncate(struct inode *ino) ++{ ++ not_implemented(); ++} ++ ++int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd) ++{ ++ char *name; ++ int r = 0, w = 0, x = 0, err; ++ ++ if(desired & MAY_READ) r = 1; ++ if(desired & MAY_WRITE) w = 1; ++ if(desired & MAY_EXEC) x = 1; ++ name = inode_name(ino, 0); ++ if(name == NULL) return(-ENOMEM); ++ err = access_file(name, r, w, x); ++ kfree(name); ++ if(!err) err = vfs_permission(ino, desired); ++ return(err); ++} ++ ++int hostfs_setattr(struct dentry *dentry, struct iattr *attr) ++{ ++ struct hostfs_iattr attrs; ++ char *name; ++ int err; ++ ++ if(append) ++ attr->ia_valid &= ~ATTR_SIZE; ++ ++ attrs.ia_valid = 0; ++ if(attr->ia_valid & ATTR_MODE){ ++ attrs.ia_valid |= HOSTFS_ATTR_MODE; ++ attrs.ia_mode = attr->ia_mode; ++ } ++ if(attr->ia_valid & ATTR_UID){ ++ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && ++ (attr->ia_uid == 0)) ++ attr->ia_uid = getuid(); ++ attrs.ia_valid |= HOSTFS_ATTR_UID; ++ attrs.ia_uid = attr->ia_uid; ++ } ++ if(attr->ia_valid & ATTR_GID){ ++ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && ++ (attr->ia_gid == 0)) ++ attr->ia_gid = getuid(); ++ attrs.ia_valid |= HOSTFS_ATTR_GID; ++ attrs.ia_gid = attr->ia_gid; ++ } ++ if(attr->ia_valid & ATTR_SIZE){ ++ attrs.ia_valid |= HOSTFS_ATTR_SIZE; ++ attrs.ia_size = attr->ia_size; ++ } ++ if(attr->ia_valid & ATTR_ATIME){ ++ attrs.ia_valid |= HOSTFS_ATTR_ATIME; ++ attrs.ia_atime = attr->ia_atime; ++ } ++ if(attr->ia_valid & ATTR_MTIME){ ++ attrs.ia_valid |= HOSTFS_ATTR_MTIME; ++ attrs.ia_mtime = attr->ia_mtime; ++ } ++ if(attr->ia_valid & ATTR_CTIME){ ++ attrs.ia_valid |= HOSTFS_ATTR_CTIME; ++ attrs.ia_ctime = attr->ia_ctime; ++ } ++ if(attr->ia_valid & ATTR_ATIME_SET){ ++ attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET; ++ } ++ if(attr->ia_valid & ATTR_MTIME_SET){ ++ attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET; ++ } ++ name = dentry_name(dentry, 0); ++ if(name == NULL) return(-ENOMEM); ++ err = set_attr(name, &attrs); ++ kfree(name); ++ if(err) ++ return(err); ++ ++ return(inode_setattr(dentry->d_inode, attr)); ++} ++ ++int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry, ++ struct kstat *stat) ++{ ++ generic_fillattr(dentry->d_inode, stat); ++ return(0); ++} ++ ++static struct inode_operations hostfs_iops = { ++ .create = hostfs_create, ++ .link = hostfs_link, ++ .unlink = hostfs_unlink, ++ .symlink = hostfs_symlink, ++ .mkdir = hostfs_mkdir, ++ .rmdir = hostfs_rmdir, ++ .mknod = hostfs_mknod, ++ .rename = hostfs_rename, ++ .truncate = hostfs_truncate, ++ .permission = hostfs_permission, ++ .setattr = hostfs_setattr, ++ .getattr = hostfs_getattr, ++}; ++ ++static struct inode_operations hostfs_dir_iops = { ++ .create = hostfs_create, ++ .lookup = hostfs_lookup, ++ .link = hostfs_link, ++ .unlink = hostfs_unlink, ++ .symlink = hostfs_symlink, ++ .mkdir = hostfs_mkdir, ++ .rmdir = hostfs_rmdir, ++ .mknod = hostfs_mknod, ++ .rename = hostfs_rename, ++ .truncate = hostfs_truncate, ++ .permission = hostfs_permission, ++ .setattr = hostfs_setattr, ++ .getattr = hostfs_getattr, ++}; ++ ++int hostfs_link_readpage(struct file *file, struct page *page) ++{ ++ char *buffer, *name; ++ long long start; ++ int err; ++ ++ start = page->index << PAGE_CACHE_SHIFT; ++ buffer = kmap(page); ++ name = inode_name(page->mapping->host, 0); ++ if(name == NULL) return(-ENOMEM); ++ err = do_readlink(name, buffer, PAGE_CACHE_SIZE); ++ kfree(name); ++ if(err == PAGE_CACHE_SIZE) ++ err = -E2BIG; ++ else if(err > 0){ ++ flush_dcache_page(page); ++ SetPageUptodate(page); ++ if (PageError(page)) ClearPageError(page); ++ err = 0; ++ } ++ kunmap(page); ++ unlock_page(page); ++ return(err); ++} ++ ++static struct address_space_operations hostfs_link_aops = { ++ .readpage = hostfs_link_readpage, ++}; ++ ++static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent) ++{ ++ struct inode *root_inode; ++ char *name, *data = d; ++ int err; ++ ++ sb->s_blocksize = 1024; ++ sb->s_blocksize_bits = 10; ++ sb->s_magic = HOSTFS_SUPER_MAGIC; ++ sb->s_op = &hostfs_sbops; ++ ++ if((data == NULL) || (*data == '\0')) ++ data = root_ino; ++ ++ err = -ENOMEM; ++ name = kmalloc(strlen(data) + 1, GFP_KERNEL); ++ if(name == NULL) ++ goto out; ++ ++ strcpy(name, data); ++ ++ root_inode = iget(sb, 0); ++ if(root_inode == NULL) ++ goto out_free; ++ ++ err = init_inode(root_inode, NULL); ++ if(err) ++ goto out_put; ++ ++ HOSTFS_I(root_inode)->host_filename = name; ++ ++ err = -ENOMEM; ++ sb->s_root = d_alloc_root(root_inode); ++ if(sb->s_root == NULL) ++ goto out_put; ++ ++ err = read_inode(root_inode); ++ if(err) ++ goto out_put; ++ ++ return(0); ++ ++ out_put: ++ iput(root_inode); ++ out_free: ++ kfree(name); ++ out: ++ return(err); ++} ++ ++static struct super_block *hostfs_read_sb(struct file_system_type *type, ++ int flags, const char *dev_name, ++ void *data) ++{ ++ return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common)); ++} ++ ++static struct file_system_type hostfs_type = { ++ .owner = THIS_MODULE, ++ .name = "hostfs", ++ .get_sb = hostfs_read_sb, ++ .kill_sb = kill_anon_super, ++ .fs_flags = 0, ++}; ++ ++static int __init init_hostfs(void) ++{ ++ return(register_filesystem(&hostfs_type)); ++} ++ ++static void __exit exit_hostfs(void) ++{ ++ unregister_filesystem(&hostfs_type); ++} ++ ++module_init(init_hostfs) ++module_exit(exit_hostfs) ++MODULE_LICENSE("GPL"); ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c +--- a/fs/hostfs/hostfs_user.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/fs/hostfs/hostfs_user.c 2004-02-11 12:26:11.000000000 -0500 +@@ -0,0 +1,361 @@ ++/* ++ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com) ++ * Licensed under the GPL ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "hostfs.h" ++#include "kern_util.h" ++#include "user.h" ++ ++int stat_file(const char *path, unsigned long long *inode_out, int *mode_out, ++ int *nlink_out, int *uid_out, int *gid_out, ++ unsigned long long *size_out, struct timespec *atime_out, ++ struct timespec *mtime_out, struct timespec *ctime_out, ++ int *blksize_out, unsigned long long *blocks_out) ++{ ++ struct stat64 buf; ++ ++ if(lstat64(path, &buf) < 0) ++ return(-errno); ++ ++ /* See the Makefile for why STAT64_INO_FIELD is passed in ++ * by the build ++ */ ++ if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD; ++ if(mode_out != NULL) *mode_out = buf.st_mode; ++ if(nlink_out != NULL) *nlink_out = buf.st_nlink; ++ if(uid_out != NULL) *uid_out = buf.st_uid; ++ if(gid_out != NULL) *gid_out = buf.st_gid; ++ if(size_out != NULL) *size_out = buf.st_size; ++ if(atime_out != NULL) { ++ atime_out->tv_sec = buf.st_atime; ++ atime_out->tv_nsec = 0; ++ } ++ if(mtime_out != NULL) { ++ mtime_out->tv_sec = buf.st_mtime; ++ mtime_out->tv_nsec = 0; ++ } ++ if(ctime_out != NULL) { ++ ctime_out->tv_sec = buf.st_ctime; ++ ctime_out->tv_nsec = 0; ++ } ++ if(blksize_out != NULL) *blksize_out = buf.st_blksize; ++ if(blocks_out != NULL) *blocks_out = buf.st_blocks; ++ return(0); ++} ++ ++int file_type(const char *path, int *rdev) ++{ ++ struct stat64 buf; ++ ++ if(lstat64(path, &buf) < 0) ++ return(-errno); ++ if(rdev != NULL) ++ *rdev = buf.st_rdev; ++ ++ if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR); ++ else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK); ++ else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV); ++ else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV); ++ else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO); ++ else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK); ++ else return(OS_TYPE_FILE); ++} ++ ++int access_file(char *path, int r, int w, int x) ++{ ++ int mode = 0; ++ ++ if(r) mode = R_OK; ++ if(w) mode |= W_OK; ++ if(x) mode |= X_OK; ++ if(access(path, mode) != 0) return(-errno); ++ else return(0); ++} ++ ++int open_file(char *path, int r, int w, int append) ++{ ++ int mode = 0, fd; ++ ++ if(r && !w) ++ mode = O_RDONLY; ++ else if(!r && w) ++ mode = O_WRONLY; ++ else if(r && w) ++ mode = O_RDWR; ++ else panic("Impossible mode in open_file"); ++ ++ if(append) ++ mode |= O_APPEND; ++ fd = open64(path, mode); ++ if(fd < 0) return(-errno); ++ else return(fd); ++} ++ ++void *open_dir(char *path, int *err_out) ++{ ++ DIR *dir; ++ ++ dir = opendir(path); ++ *err_out = errno; ++ if(dir == NULL) return(NULL); ++ return(dir); ++} ++ ++char *read_dir(void *stream, unsigned long long *pos, ++ unsigned long long *ino_out, int *len_out) ++{ ++ DIR *dir = stream; ++ struct dirent *ent; ++ ++ seekdir(dir, *pos); ++ ent = readdir(dir); ++ if(ent == NULL) return(NULL); ++ *len_out = strlen(ent->d_name); ++ *ino_out = ent->d_ino; ++ *pos = telldir(dir); ++ return(ent->d_name); ++} ++ ++int read_file(int fd, unsigned long long *offset, char *buf, int len) ++{ ++ int n; ++ ++ n = pread64(fd, buf, len, *offset); ++ if(n < 0) return(-errno); ++ *offset += n; ++ return(n); ++} ++ ++int write_file(int fd, unsigned long long *offset, const char *buf, int len) ++{ ++ int n; ++ ++ n = pwrite64(fd, buf, len, *offset); ++ if(n < 0) return(-errno); ++ *offset += n; ++ return(n); ++} ++ ++int lseek_file(int fd, long long offset, int whence) ++{ ++ int ret; ++ ++ ret = lseek64(fd, offset, whence); ++ if(ret < 0) return(-errno); ++ return(0); ++} ++ ++void close_file(void *stream) ++{ ++ close(*((int *) stream)); ++} ++ ++void close_dir(void *stream) ++{ ++ closedir(stream); ++} ++ ++int file_create(char *name, int ur, int uw, int ux, int gr, ++ int gw, int gx, int or, int ow, int ox) ++{ ++ int mode, fd; ++ ++ mode = 0; ++ mode |= ur ? S_IRUSR : 0; ++ mode |= uw ? S_IWUSR : 0; ++ mode |= ux ? S_IXUSR : 0; ++ mode |= gr ? S_IRGRP : 0; ++ mode |= gw ? S_IWGRP : 0; ++ mode |= gx ? S_IXGRP : 0; ++ mode |= or ? S_IROTH : 0; ++ mode |= ow ? S_IWOTH : 0; ++ mode |= ox ? S_IXOTH : 0; ++ fd = open64(name, O_CREAT | O_RDWR, mode); ++ if(fd < 0) ++ return(-errno); ++ return(fd); ++} ++ ++int set_attr(const char *file, struct hostfs_iattr *attrs) ++{ ++ struct utimbuf buf; ++ int err, ma; ++ ++ if(attrs->ia_valid & HOSTFS_ATTR_MODE){ ++ if(chmod(file, attrs->ia_mode) != 0) return(-errno); ++ } ++ if(attrs->ia_valid & HOSTFS_ATTR_UID){ ++ if(chown(file, attrs->ia_uid, -1)) return(-errno); ++ } ++ if(attrs->ia_valid & HOSTFS_ATTR_GID){ ++ if(chown(file, -1, attrs->ia_gid)) return(-errno); ++ } ++ if(attrs->ia_valid & HOSTFS_ATTR_SIZE){ ++ if(truncate(file, attrs->ia_size)) return(-errno); ++ } ++ ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET; ++ if((attrs->ia_valid & ma) == ma){ ++ buf.actime = attrs->ia_atime.tv_sec; ++ buf.modtime = attrs->ia_mtime.tv_sec; ++ if(utime(file, &buf) != 0) return(-errno); ++ } ++ else { ++ struct timespec ts; ++ ++ if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){ ++ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, ++ NULL, NULL, &ts, NULL, NULL, NULL); ++ if(err != 0) ++ return(err); ++ buf.actime = attrs->ia_atime.tv_sec; ++ buf.modtime = ts.tv_sec; ++ if(utime(file, &buf) != 0) ++ return(-errno); ++ } ++ if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){ ++ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, ++ NULL, &ts, NULL, NULL, NULL, NULL); ++ if(err != 0) ++ return(err); ++ buf.actime = ts.tv_sec; ++ buf.modtime = attrs->ia_mtime.tv_sec; ++ if(utime(file, &buf) != 0) ++ return(-errno); ++ } ++ } ++ if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ; ++ if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){ ++ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, ++ &attrs->ia_atime, &attrs->ia_mtime, NULL, ++ NULL, NULL); ++ if(err != 0) return(err); ++ } ++ return(0); ++} ++ ++int make_symlink(const char *from, const char *to) ++{ ++ int err; ++ ++ err = symlink(to, from); ++ if(err) return(-errno); ++ return(0); ++} ++ ++int unlink_file(const char *file) ++{ ++ int err; ++ ++ err = unlink(file); ++ if(err) return(-errno); ++ return(0); ++} ++ ++int do_mkdir(const char *file, int mode) ++{ ++ int err; ++ ++ err = mkdir(file, mode); ++ if(err) return(-errno); ++ return(0); ++} ++ ++int do_rmdir(const char *file) ++{ ++ int err; ++ ++ err = rmdir(file); ++ if(err) return(-errno); ++ return(0); ++} ++ ++int do_mknod(const char *file, int mode, int dev) ++{ ++ int err; ++ ++ err = mknod(file, mode, dev); ++ if(err) return(-errno); ++ return(0); ++} ++ ++int link_file(const char *to, const char *from) ++{ ++ int err; ++ ++ err = link(to, from); ++ if(err) return(-errno); ++ return(0); ++} ++ ++int do_readlink(char *file, char *buf, int size) ++{ ++ int n; ++ ++ n = readlink(file, buf, size); ++ if(n < 0) ++ return(-errno); ++ if(n < size) ++ buf[n] = '\0'; ++ return(n); ++} ++ ++int rename_file(char *from, char *to) ++{ ++ int err; ++ ++ err = rename(from, to); ++ if(err < 0) return(-errno); ++ return(0); ++} ++ ++int do_statfs(char *root, long *bsize_out, long long *blocks_out, ++ long long *bfree_out, long long *bavail_out, ++ long long *files_out, long long *ffree_out, ++ void *fsid_out, int fsid_size, long *namelen_out, ++ long *spare_out) ++{ ++ struct statfs64 buf; ++ int err; ++ ++ err = statfs64(root, &buf); ++ if(err < 0) return(-errno); ++ *bsize_out = buf.f_bsize; ++ *blocks_out = buf.f_blocks; ++ *bfree_out = buf.f_bfree; ++ *bavail_out = buf.f_bavail; ++ *files_out = buf.f_files; ++ *ffree_out = buf.f_ffree; ++ memcpy(fsid_out, &buf.f_fsid, ++ sizeof(buf.f_fsid) > fsid_size ? fsid_size : ++ sizeof(buf.f_fsid)); ++ *namelen_out = buf.f_namelen; ++ spare_out[0] = buf.f_spare[0]; ++ spare_out[1] = buf.f_spare[1]; ++ spare_out[2] = buf.f_spare[2]; ++ spare_out[3] = buf.f_spare[3]; ++ spare_out[4] = buf.f_spare[4]; ++ spare_out[5] = buf.f_spare[5]; ++ return(0); ++} ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/fs/hostfs/Makefile b/fs/hostfs/Makefile +--- a/fs/hostfs/Makefile 1969-12-31 19:00:00.000000000 -0500 ++++ b/fs/hostfs/Makefile 2004-02-11 12:25:42.000000000 -0500 +@@ -0,0 +1,26 @@ ++# ++# Copyright (C) 2000 Jeff Dike (jdike@karaya.com) ++# Licensed under the GPL ++# ++ ++# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino ++# to __st_ino. It stayed in the same place, so as long as the correct name ++# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa. ++ ++STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \ ++ echo __)st_ino ++ ++hostfs-objs := hostfs_kern.o hostfs_user.o ++ ++obj-y = ++obj-$(CONFIG_HOSTFS) += hostfs.o ++ ++SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs)) ++ ++USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(SINGLE_OBJS)) ++USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file)) ++ ++USER_CFLAGS += -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD) ++ ++$(USER_OBJS) : %.o: %.c ++ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $< +diff -Naur a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c +--- a/fs/hppfs/hppfs_kern.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/fs/hppfs/hppfs_kern.c 2004-02-11 12:27:10.000000000 -0500 +@@ -0,0 +1,811 @@ ++/* ++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++ * Licensed under the GPL ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include "os.h" ++ ++static int init_inode(struct inode *inode, struct dentry *dentry); ++ ++struct hppfs_data { ++ struct list_head list; ++ char contents[PAGE_SIZE - sizeof(struct list_head)]; ++}; ++ ++struct hppfs_private { ++ struct file proc_file; ++ int host_fd; ++ loff_t len; ++ struct hppfs_data *contents; ++}; ++ ++struct hppfs_inode_info { ++ struct dentry *proc_dentry; ++ struct inode vfs_inode; ++}; ++ ++static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode) ++{ ++ return(list_entry(inode, struct hppfs_inode_info, vfs_inode)); ++} ++ ++#define HPPFS_SUPER_MAGIC 0xb00000ee ++ ++static struct super_operations hppfs_sbops; ++ ++static int is_pid(struct dentry *dentry) ++{ ++ struct super_block *sb; ++ int i; ++ ++ sb = dentry->d_sb; ++ if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root)) ++ return(0); ++ ++ for(i = 0; i < dentry->d_name.len; i++){ ++ if(!isdigit(dentry->d_name.name[i])) ++ return(0); ++ } ++ return(1); ++} ++ ++static char *dentry_name(struct dentry *dentry, int extra) ++{ ++ struct dentry *parent; ++ char *root, *name; ++ const char *seg_name; ++ int len, seg_len; ++ ++ len = 0; ++ parent = dentry; ++ while(parent->d_parent != parent){ ++ if(is_pid(parent)) ++ len += strlen("pid") + 1; ++ else len += parent->d_name.len + 1; ++ parent = parent->d_parent; ++ } ++ ++ root = "proc"; ++ len += strlen(root); ++ name = kmalloc(len + extra + 1, GFP_KERNEL); ++ if(name == NULL) return(NULL); ++ ++ name[len] = '\0'; ++ parent = dentry; ++ while(parent->d_parent != parent){ ++ if(is_pid(parent)){ ++ seg_name = "pid"; ++ seg_len = strlen("pid"); ++ } ++ else { ++ seg_name = parent->d_name.name; ++ seg_len = parent->d_name.len; ++ } ++ ++ len -= seg_len + 1; ++ name[len] = '/'; ++ strncpy(&name[len + 1], seg_name, seg_len); ++ parent = parent->d_parent; ++ } ++ strncpy(name, root, strlen(root)); ++ return(name); ++} ++ ++struct dentry_operations hppfs_dentry_ops = { ++}; ++ ++static int file_removed(struct dentry *dentry, const char *file) ++{ ++ char *host_file; ++ int extra, fd; ++ ++ extra = 0; ++ if(file != NULL) extra += strlen(file) + 1; ++ ++ host_file = dentry_name(dentry, extra + strlen("/remove")); ++ if(host_file == NULL){ ++ printk("file_removed : allocation failed\n"); ++ return(-ENOMEM); ++ } ++ ++ if(file != NULL){ ++ strcat(host_file, "/"); ++ strcat(host_file, file); ++ } ++ strcat(host_file, "/remove"); ++ ++ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); ++ kfree(host_file); ++ if(fd > 0){ ++ os_close_file(fd); ++ return(1); ++ } ++ return(0); ++} ++ ++static void hppfs_read_inode(struct inode *ino) ++{ ++ struct inode *proc_ino; ++ ++ if(HPPFS_I(ino)->proc_dentry == NULL) ++ return; ++ ++ proc_ino = HPPFS_I(ino)->proc_dentry->d_inode; ++ ino->i_uid = proc_ino->i_uid; ++ ino->i_gid = proc_ino->i_gid; ++ ino->i_atime = proc_ino->i_atime; ++ ino->i_mtime = proc_ino->i_mtime; ++ ino->i_ctime = proc_ino->i_ctime; ++ ino->i_ino = proc_ino->i_ino; ++ ino->i_mode = proc_ino->i_mode; ++ ino->i_nlink = proc_ino->i_nlink; ++ ino->i_size = proc_ino->i_size; ++ ino->i_blksize = proc_ino->i_blksize; ++ ino->i_blocks = proc_ino->i_blocks; ++} ++ ++static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, ++ struct nameidata *nd) ++{ ++ struct dentry *proc_dentry, *new, *parent; ++ struct inode *inode; ++ int err, deleted; ++ ++ deleted = file_removed(dentry, NULL); ++ if(deleted < 0) ++ return(ERR_PTR(deleted)); ++ else if(deleted) ++ return(ERR_PTR(-ENOENT)); ++ ++ err = -ENOMEM; ++ parent = HPPFS_I(ino)->proc_dentry; ++ down(&parent->d_inode->i_sem); ++ proc_dentry = d_lookup(parent, &dentry->d_name); ++ if(proc_dentry == NULL){ ++ proc_dentry = d_alloc(parent, &dentry->d_name); ++ if(proc_dentry == NULL){ ++ up(&parent->d_inode->i_sem); ++ goto out; ++ } ++ new = (*parent->d_inode->i_op->lookup)(parent->d_inode, ++ proc_dentry, NULL); ++ if(new){ ++ dput(proc_dentry); ++ proc_dentry = new; ++ } ++ } ++ up(&parent->d_inode->i_sem); ++ ++ if(IS_ERR(proc_dentry)) ++ return(proc_dentry); ++ ++ inode = iget(ino->i_sb, 0); ++ if(inode == NULL) ++ goto out_dput; ++ ++ err = init_inode(inode, proc_dentry); ++ if(err) ++ goto out_put; ++ ++ hppfs_read_inode(inode); ++ ++ d_add(dentry, inode); ++ dentry->d_op = &hppfs_dentry_ops; ++ return(NULL); ++ ++ out_put: ++ iput(inode); ++ out_dput: ++ dput(proc_dentry); ++ out: ++ return(ERR_PTR(err)); ++} ++ ++static struct inode_operations hppfs_file_iops = { ++}; ++ ++static ssize_t read_proc(struct file *file, char *buf, ssize_t count, ++ loff_t *ppos, int is_user) ++{ ++ ssize_t (*read)(struct file *, char *, size_t, loff_t *); ++ ssize_t n; ++ ++ read = file->f_dentry->d_inode->i_fop->read; ++ ++ if(!is_user) ++ set_fs(KERNEL_DS); ++ ++ n = (*read)(file, buf, count, &file->f_pos); ++ ++ if(!is_user) ++ set_fs(USER_DS); ++ ++ if(ppos) *ppos = file->f_pos; ++ return(n); ++} ++ ++static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count) ++{ ++ ssize_t n; ++ int cur, err; ++ char *new_buf; ++ ++ n = -ENOMEM; ++ new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL); ++ if(new_buf == NULL){ ++ printk("hppfs_read_file : kmalloc failed\n"); ++ goto out; ++ } ++ n = 0; ++ while(count > 0){ ++ cur = min_t(ssize_t, count, PAGE_SIZE); ++ err = os_read_file(fd, new_buf, cur); ++ if(err < 0){ ++ printk("hppfs_read : read failed, errno = %d\n", ++ count); ++ n = err; ++ goto out_free; ++ } ++ else if(err == 0) ++ break; ++ ++ if(copy_to_user(buf, new_buf, err)){ ++ n = -EFAULT; ++ goto out_free; ++ } ++ n += err; ++ count -= err; ++ } ++ out_free: ++ kfree(new_buf); ++ out: ++ return(n); ++} ++ ++static ssize_t hppfs_read(struct file *file, char *buf, size_t count, ++ loff_t *ppos) ++{ ++ struct hppfs_private *hppfs = file->private_data; ++ struct hppfs_data *data; ++ loff_t off; ++ int err; ++ ++ if(hppfs->contents != NULL){ ++ if(*ppos >= hppfs->len) return(0); ++ ++ data = hppfs->contents; ++ off = *ppos; ++ while(off >= sizeof(data->contents)){ ++ data = list_entry(data->list.next, struct hppfs_data, ++ list); ++ off -= sizeof(data->contents); ++ } ++ ++ if(off + count > hppfs->len) ++ count = hppfs->len - off; ++ copy_to_user(buf, &data->contents[off], count); ++ *ppos += count; ++ } ++ else if(hppfs->host_fd != -1){ ++ err = os_seek_file(hppfs->host_fd, *ppos); ++ if(err){ ++ printk("hppfs_read : seek failed, errno = %d\n", err); ++ return(err); ++ } ++ count = hppfs_read_file(hppfs->host_fd, buf, count); ++ if(count > 0) ++ *ppos += count; ++ } ++ else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1); ++ ++ return(count); ++} ++ ++static ssize_t hppfs_write(struct file *file, const char *buf, size_t len, ++ loff_t *ppos) ++{ ++ struct hppfs_private *data = file->private_data; ++ struct file *proc_file = &data->proc_file; ++ ssize_t (*write)(struct file *, const char *, size_t, loff_t *); ++ int err; ++ ++ write = proc_file->f_dentry->d_inode->i_fop->write; ++ ++ proc_file->f_pos = file->f_pos; ++ err = (*write)(proc_file, buf, len, &proc_file->f_pos); ++ file->f_pos = proc_file->f_pos; ++ ++ return(err); ++} ++ ++static int open_host_sock(char *host_file, int *filter_out) ++{ ++ char *end; ++ int fd; ++ ++ end = &host_file[strlen(host_file)]; ++ strcpy(end, "/rw"); ++ *filter_out = 1; ++ fd = os_connect_socket(host_file); ++ if(fd > 0) ++ return(fd); ++ ++ strcpy(end, "/r"); ++ *filter_out = 0; ++ fd = os_connect_socket(host_file); ++ return(fd); ++} ++ ++static void free_contents(struct hppfs_data *head) ++{ ++ struct hppfs_data *data; ++ struct list_head *ele, *next; ++ ++ if(head == NULL) return; ++ ++ list_for_each_safe(ele, next, &head->list){ ++ data = list_entry(ele, struct hppfs_data, list); ++ kfree(data); ++ } ++ kfree(head); ++} ++ ++static struct hppfs_data *hppfs_get_data(int fd, int filter, ++ struct file *proc_file, ++ struct file *hppfs_file, ++ loff_t *size_out) ++{ ++ struct hppfs_data *data, *new, *head; ++ int n, err; ++ ++ err = -ENOMEM; ++ data = kmalloc(sizeof(*data), GFP_KERNEL); ++ if(data == NULL){ ++ printk("hppfs_get_data : head allocation failed\n"); ++ goto failed; ++ } ++ ++ INIT_LIST_HEAD(&data->list); ++ ++ head = data; ++ *size_out = 0; ++ ++ if(filter){ ++ while((n = read_proc(proc_file, data->contents, ++ sizeof(data->contents), NULL, 0)) > 0) ++ os_write_file(fd, data->contents, n); ++ err = os_shutdown_socket(fd, 0, 1); ++ if(err){ ++ printk("hppfs_get_data : failed to shut down " ++ "socket\n"); ++ goto failed_free; ++ } ++ } ++ while(1){ ++ n = os_read_file(fd, data->contents, sizeof(data->contents)); ++ if(n < 0){ ++ err = n; ++ printk("hppfs_get_data : read failed, errno = %d\n", ++ err); ++ goto failed_free; ++ } ++ else if(n == 0) ++ break; ++ ++ *size_out += n; ++ ++ if(n < sizeof(data->contents)) ++ break; ++ ++ new = kmalloc(sizeof(*data), GFP_KERNEL); ++ if(new == 0){ ++ printk("hppfs_get_data : data allocation failed\n"); ++ err = -ENOMEM; ++ goto failed_free; ++ } ++ ++ INIT_LIST_HEAD(&new->list); ++ list_add(&new->list, &data->list); ++ data = new; ++ } ++ return(head); ++ ++ failed_free: ++ free_contents(head); ++ failed: ++ return(ERR_PTR(err)); ++} ++ ++static struct hppfs_private *hppfs_data(void) ++{ ++ struct hppfs_private *data; ++ ++ data = kmalloc(sizeof(*data), GFP_KERNEL); ++ if(data == NULL) ++ return(data); ++ ++ *data = ((struct hppfs_private ) { .host_fd = -1, ++ .len = -1, ++ .contents = NULL } ); ++ return(data); ++} ++ ++static int file_mode(int fmode) ++{ ++ if(fmode == (FMODE_READ | FMODE_WRITE)) ++ return(O_RDWR); ++ if(fmode == FMODE_READ) ++ return(O_RDONLY); ++ if(fmode == FMODE_WRITE) ++ return(O_WRONLY); ++ return(0); ++} ++ ++static int hppfs_open(struct inode *inode, struct file *file) ++{ ++ struct hppfs_private *data; ++ struct dentry *proc_dentry; ++ char *host_file; ++ int err, fd, type, filter; ++ ++ err = -ENOMEM; ++ data = hppfs_data(); ++ if(data == NULL) ++ goto out; ++ ++ host_file = dentry_name(file->f_dentry, strlen("/rw")); ++ if(host_file == NULL) ++ goto out_free2; ++ ++ proc_dentry = HPPFS_I(inode)->proc_dentry; ++ ++ /* XXX This isn't closed anywhere */ ++ err = open_private_file(&data->proc_file, proc_dentry, ++ file_mode(file->f_mode)); ++ if(err) ++ goto out_free1; ++ ++ type = os_file_type(host_file); ++ if(type == OS_TYPE_FILE){ ++ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0); ++ if(fd >= 0) ++ data->host_fd = fd; ++ else printk("hppfs_open : failed to open '%s', errno = %d\n", ++ host_file, -fd); ++ ++ data->contents = NULL; ++ } ++ else if(type == OS_TYPE_DIR){ ++ fd = open_host_sock(host_file, &filter); ++ if(fd > 0){ ++ data->contents = hppfs_get_data(fd, filter, ++ &data->proc_file, ++ file, &data->len); ++ if(!IS_ERR(data->contents)) ++ data->host_fd = fd; ++ } ++ else printk("hppfs_open : failed to open a socket in " ++ "'%s', errno = %d\n", host_file, -fd); ++ } ++ kfree(host_file); ++ ++ file->private_data = data; ++ return(0); ++ ++ out_free1: ++ kfree(host_file); ++ out_free2: ++ free_contents(data->contents); ++ kfree(data); ++ out: ++ return(err); ++} ++ ++static int hppfs_dir_open(struct inode *inode, struct file *file) ++{ ++ struct hppfs_private *data; ++ struct dentry *proc_dentry; ++ int err; ++ ++ err = -ENOMEM; ++ data = hppfs_data(); ++ if(data == NULL) ++ goto out; ++ ++ proc_dentry = HPPFS_I(inode)->proc_dentry; ++ err = open_private_file(&data->proc_file, proc_dentry, ++ file_mode(file->f_mode)); ++ if(err) ++ goto out_free; ++ ++ file->private_data = data; ++ return(0); ++ ++ out_free: ++ kfree(data); ++ out: ++ return(err); ++} ++ ++static loff_t hppfs_llseek(struct file *file, loff_t off, int where) ++{ ++ struct hppfs_private *data = file->private_data; ++ struct file *proc_file = &data->proc_file; ++ loff_t (*llseek)(struct file *, loff_t, int); ++ loff_t ret; ++ ++ llseek = proc_file->f_dentry->d_inode->i_fop->llseek; ++ if(llseek != NULL){ ++ ret = (*llseek)(proc_file, off, where); ++ if(ret < 0) ++ return(ret); ++ } ++ ++ return(default_llseek(file, off, where)); ++} ++ ++static struct file_operations hppfs_file_fops = { ++ .owner = NULL, ++ .llseek = hppfs_llseek, ++ .read = hppfs_read, ++ .write = hppfs_write, ++ .open = hppfs_open, ++}; ++ ++struct hppfs_dirent { ++ void *vfs_dirent; ++ filldir_t filldir; ++ struct dentry *dentry; ++}; ++ ++static int hppfs_filldir(void *d, const char *name, int size, ++ loff_t offset, ino_t inode, unsigned int type) ++{ ++ struct hppfs_dirent *dirent = d; ++ ++ if(file_removed(dirent->dentry, name)) ++ return(0); ++ ++ return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset, ++ inode, type)); ++} ++ ++static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir) ++{ ++ struct hppfs_private *data = file->private_data; ++ struct file *proc_file = &data->proc_file; ++ int (*readdir)(struct file *, void *, filldir_t); ++ struct hppfs_dirent dirent = ((struct hppfs_dirent) ++ { .vfs_dirent = ent, ++ .filldir = filldir, ++ .dentry = file->f_dentry } ); ++ int err; ++ ++ readdir = proc_file->f_dentry->d_inode->i_fop->readdir; ++ ++ proc_file->f_pos = file->f_pos; ++ err = (*readdir)(proc_file, &dirent, hppfs_filldir); ++ file->f_pos = proc_file->f_pos; ++ ++ return(err); ++} ++ ++static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync) ++{ ++ return(0); ++} ++ ++static struct file_operations hppfs_dir_fops = { ++ .owner = NULL, ++ .readdir = hppfs_readdir, ++ .open = hppfs_dir_open, ++ .fsync = hppfs_fsync, ++}; ++ ++static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf) ++{ ++ sf->f_blocks = 0; ++ sf->f_bfree = 0; ++ sf->f_bavail = 0; ++ sf->f_files = 0; ++ sf->f_ffree = 0; ++ sf->f_type = HPPFS_SUPER_MAGIC; ++ return(0); ++} ++ ++static struct inode *hppfs_alloc_inode(struct super_block *sb) ++{ ++ struct hppfs_inode_info *hi; ++ ++ hi = kmalloc(sizeof(*hi), GFP_KERNEL); ++ if(hi == NULL) ++ return(NULL); ++ ++ *hi = ((struct hppfs_inode_info) { .proc_dentry = NULL }); ++ inode_init_once(&hi->vfs_inode); ++ return(&hi->vfs_inode); ++} ++ ++void hppfs_delete_inode(struct inode *ino) ++{ ++ clear_inode(ino); ++} ++ ++static void hppfs_destroy_inode(struct inode *inode) ++{ ++ kfree(HPPFS_I(inode)); ++} ++ ++static struct super_operations hppfs_sbops = { ++ .alloc_inode = hppfs_alloc_inode, ++ .destroy_inode = hppfs_destroy_inode, ++ .read_inode = hppfs_read_inode, ++ .delete_inode = hppfs_delete_inode, ++ .statfs = hppfs_statfs, ++}; ++ ++static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen) ++{ ++ struct file proc_file; ++ struct dentry *proc_dentry; ++ int (*readlink)(struct dentry *, char *, int); ++ int err, n; ++ ++ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; ++ err = open_private_file(&proc_file, proc_dentry, O_RDONLY); ++ if(err) ++ return(err); ++ ++ readlink = proc_dentry->d_inode->i_op->readlink; ++ n = (*readlink)(proc_dentry, buffer, buflen); ++ ++ close_private_file(&proc_file); ++ ++ return(n); ++} ++ ++static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd) ++{ ++ struct file proc_file; ++ struct dentry *proc_dentry; ++ int (*follow_link)(struct dentry *, struct nameidata *); ++ int err, n; ++ ++ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry; ++ err = open_private_file(&proc_file, proc_dentry, O_RDONLY); ++ if(err) ++ return(err); ++ ++ follow_link = proc_dentry->d_inode->i_op->follow_link; ++ n = (*follow_link)(proc_dentry, nd); ++ ++ close_private_file(&proc_file); ++ ++ return(n); ++} ++ ++static struct inode_operations hppfs_dir_iops = { ++ .lookup = hppfs_lookup, ++}; ++ ++static struct inode_operations hppfs_link_iops = { ++ .readlink = hppfs_readlink, ++ .follow_link = hppfs_follow_link, ++}; ++ ++static int init_inode(struct inode *inode, struct dentry *dentry) ++{ ++ if(S_ISDIR(dentry->d_inode->i_mode)){ ++ inode->i_op = &hppfs_dir_iops; ++ inode->i_fop = &hppfs_dir_fops; ++ } ++ else if(S_ISLNK(dentry->d_inode->i_mode)){ ++ inode->i_op = &hppfs_link_iops; ++ inode->i_fop = &hppfs_file_fops; ++ } ++ else { ++ inode->i_op = &hppfs_file_iops; ++ inode->i_fop = &hppfs_file_fops; ++ } ++ ++ HPPFS_I(inode)->proc_dentry = dentry; ++ ++ return(0); ++} ++ ++static int hppfs_fill_super(struct super_block *sb, void *d, int silent) ++{ ++ struct inode *root_inode; ++ struct file_system_type *procfs; ++ struct super_block *proc_sb; ++ int err; ++ ++ err = -ENOENT; ++ procfs = get_fs_type("proc"); ++ if(procfs == NULL) ++ goto out; ++ ++ if(list_empty(&procfs->fs_supers)) ++ goto out; ++ ++ proc_sb = list_entry(procfs->fs_supers.next, struct super_block, ++ s_instances); ++ ++ sb->s_blocksize = 1024; ++ sb->s_blocksize_bits = 10; ++ sb->s_magic = HPPFS_SUPER_MAGIC; ++ sb->s_op = &hppfs_sbops; ++ ++ root_inode = iget(sb, 0); ++ if(root_inode == NULL) ++ goto out; ++ ++ err = init_inode(root_inode, proc_sb->s_root); ++ if(err) ++ goto out_put; ++ ++ err = -ENOMEM; ++ sb->s_root = d_alloc_root(root_inode); ++ if(sb->s_root == NULL) ++ goto out_put; ++ ++ hppfs_read_inode(root_inode); ++ ++ return(0); ++ ++ out_put: ++ iput(root_inode); ++ out: ++ return(err); ++} ++ ++static struct super_block *hppfs_read_super(struct file_system_type *type, ++ int flags, const char *dev_name, ++ void *data) ++{ ++ return(get_sb_nodev(type, flags, data, hppfs_fill_super)); ++} ++ ++static struct file_system_type hppfs_type = { ++ .owner = THIS_MODULE, ++ .name = "hppfs", ++ .get_sb = hppfs_read_super, ++ .kill_sb = kill_anon_super, ++ .fs_flags = 0, ++}; ++ ++static int __init init_hppfs(void) ++{ ++ return(register_filesystem(&hppfs_type)); ++} ++ ++static void __exit exit_hppfs(void) ++{ ++ unregister_filesystem(&hppfs_type); ++} ++ ++module_init(init_hppfs) ++module_exit(exit_hppfs) ++MODULE_LICENSE("GPL"); ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/fs/hppfs/Makefile b/fs/hppfs/Makefile +--- a/fs/hppfs/Makefile 1969-12-31 19:00:00.000000000 -0500 ++++ b/fs/hppfs/Makefile 2004-02-11 12:27:35.000000000 -0500 +@@ -0,0 +1,19 @@ ++# ++# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com) ++# Licensed under the GPL ++# ++ ++hppfs-objs := hppfs_kern.o ++ ++obj-y = ++obj-$(CONFIG_HPPFS) += hppfs.o ++ ++clean: ++ ++modules: ++ ++fastdep: ++ ++dep: ++ ++archmrproper: clean +diff -Naur a/fs/Makefile b/fs/Makefile +--- a/fs/Makefile 2004-02-11 12:15:52.000000000 -0500 ++++ b/fs/Makefile 2004-02-11 12:27:45.000000000 -0500 +@@ -91,3 +91,5 @@ + obj-$(CONFIG_XFS_FS) += xfs/ + obj-$(CONFIG_AFS_FS) += afs/ + obj-$(CONFIG_BEFS_FS) += befs/ ++obj-$(CONFIG_HOSTFS) += hostfs/ ++obj-$(CONFIG_HPPFS) += hppfs/ +diff -Naur a/include/asm-um/archparam-i386.h b/include/asm-um/archparam-i386.h +--- a/include/asm-um/archparam-i386.h 2004-02-11 12:16:32.000000000 -0500 ++++ b/include/asm-um/archparam-i386.h 2004-02-11 12:28:31.000000000 -0500 +@@ -1,5 +1,5 @@ + /* +- * Copyright (C) 2000, 2001 Jeff Dike (jdike@karaya.com) ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) + * Licensed under the GPL + */ + +@@ -56,6 +56,83 @@ + pr_reg[16] = PT_REGS_SS(regs); \ + } while(0); + ++#if 0 /* Turn this back on when UML has VSYSCALL working */ ++#define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL)) ++#else ++#define VSYSCALL_BASE NULL ++#endif ++ ++#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE) ++#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall) ++extern void *__kernel_vsyscall; ++ ++/* ++ * Architecture-neutral AT_ values in 0-17, leave some room ++ * for more of them, start the x86-specific ones at 32. ++ */ ++#define AT_SYSINFO 32 ++#define AT_SYSINFO_EHDR 33 ++ ++#define ARCH_DLINFO \ ++do { \ ++ NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \ ++ NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \ ++} while (0) ++ ++/* ++ * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out ++ * extra segments containing the vsyscall DSO contents. Dumping its ++ * contents makes post-mortem fully interpretable later without matching up ++ * the same kernel and hardware config to see what PC values meant. ++ * Dumping its extra ELF program headers includes all the other information ++ * a debugger needs to easily find how the vsyscall DSO was being used. ++ */ ++#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum) ++#define ELF_CORE_WRITE_EXTRA_PHDRS \ ++do { \ ++ const struct elf_phdr *const vsyscall_phdrs = \ ++ (const struct elf_phdr *) (VSYSCALL_BASE \ ++ + VSYSCALL_EHDR->e_phoff); \ ++ int i; \ ++ Elf32_Off ofs = 0; \ ++ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ ++ struct elf_phdr phdr = vsyscall_phdrs[i]; \ ++ if (phdr.p_type == PT_LOAD) { \ ++ ofs = phdr.p_offset = offset; \ ++ offset += phdr.p_filesz; \ ++ } \ ++ else \ ++ phdr.p_offset += ofs; \ ++ phdr.p_paddr = 0; /* match other core phdrs */ \ ++ DUMP_WRITE(&phdr, sizeof(phdr)); \ ++ } \ ++} while (0) ++#define ELF_CORE_WRITE_EXTRA_DATA \ ++do { \ ++ const struct elf_phdr *const vsyscall_phdrs = \ ++ (const struct elf_phdr *) (VSYSCALL_BASE \ ++ + VSYSCALL_EHDR->e_phoff); \ ++ int i; \ ++ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \ ++ if (vsyscall_phdrs[i].p_type == PT_LOAD) \ ++ DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \ ++ vsyscall_phdrs[i].p_filesz); \ ++ } \ ++} while (0) ++ ++#define R_386_NONE 0 ++#define R_386_32 1 ++#define R_386_PC32 2 ++#define R_386_GOT32 3 ++#define R_386_PLT32 4 ++#define R_386_COPY 5 ++#define R_386_GLOB_DAT 6 ++#define R_386_JMP_SLOT 7 ++#define R_386_RELATIVE 8 ++#define R_386_GOTOFF 9 ++#define R_386_GOTPC 10 ++#define R_386_NUM 11 ++ + /********* Bits for asm-um/delay.h **********/ + + typedef unsigned long um_udelay_t; +diff -Naur a/include/asm-um/common.lds.S b/include/asm-um/common.lds.S +--- a/include/asm-um/common.lds.S 2004-02-11 12:14:28.000000000 -0500 ++++ b/include/asm-um/common.lds.S 2004-02-11 12:26:11.000000000 -0500 +@@ -1,3 +1,5 @@ ++#include ++ + .fini : { *(.fini) } =0x9090 + _etext = .; + PROVIDE (etext = .); +@@ -13,18 +15,6 @@ + + RODATA + +- __start___ksymtab = .; /* Kernel symbol table */ +- __ksymtab : { *(__ksymtab) } +- __stop___ksymtab = .; +- +- __start___gpl_ksymtab = .; /* Kernel symbol table: GPL-only symbols */ +- __gpl_ksymtab : { *(__gpl_ksymtab) } +- __stop___gpl_ksymtab = .; +- +- __start___kallsyms = .; /* All kernel symbols */ +- __kallsyms : { *(__kallsyms) } +- __stop___kallsyms = .; +- + .unprotected : { *(.unprotected) } + . = ALIGN(4096); + PROVIDE (_unprotected_end = .); +@@ -67,11 +57,17 @@ + } + __initcall_end = .; + ++ __con_initcall_start = .; ++ .con_initcall.init : { *(.con_initcall.init) } ++ __con_initcall_end = .; ++ + __uml_initcall_start = .; + .uml.initcall.init : { *(.uml.initcall.init) } + __uml_initcall_end = .; + __init_end = .; + ++ SECURITY_INIT ++ + __exitcall_begin = .; + .exitcall : { *(.exitcall.exit) } + __exitcall_end = .; +@@ -80,7 +76,33 @@ + .uml.exitcall : { *(.uml.exitcall.exit) } + __uml_exitcall_end = .; + +- . = ALIGN(4096); ++ . = ALIGN(4); ++ __alt_instructions = .; ++ .altinstructions : { *(.altinstructions) } ++ __alt_instructions_end = .; ++ .altinstr_replacement : { *(.altinstr_replacement) } ++ /* .exit.text is discard at runtime, not link time, to deal with references ++ from .altinstructions and .eh_frame */ ++ .exit.text : { *(.exit.text) } ++ .exit.data : { *(.exit.data) } ++ ++ __preinit_array_start = .; ++ .preinit_array : { *(.preinit_array) } ++ __preinit_array_end = .; ++ __init_array_start = .; ++ .init_array : { *(.init_array) } ++ __init_array_end = .; ++ __fini_array_start = .; ++ .fini_array : { *(.fini_array) } ++ __fini_array_end = .; ++ ++ . = ALIGN(4096); + __initramfs_start = .; + .init.ramfs : { *(.init.ramfs) } + __initramfs_end = .; ++ ++ /* Sections to be discarded */ ++ /DISCARD/ : { ++ *(.exitcall.exit) ++ } ++ +diff -Naur a/include/asm-um/cpufeature.h b/include/asm-um/cpufeature.h +--- a/include/asm-um/cpufeature.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/include/asm-um/cpufeature.h 2004-02-11 12:25:42.000000000 -0500 +@@ -0,0 +1,6 @@ ++#ifndef __UM_CPUFEATURE_H ++#define __UM_CPUFEATURE_H ++ ++#include "asm/arch/cpufeature.h" ++ ++#endif +diff -Naur a/include/asm-um/current.h b/include/asm-um/current.h +--- a/include/asm-um/current.h 2004-02-11 12:14:18.000000000 -0500 ++++ b/include/asm-um/current.h 2004-02-11 12:26:01.000000000 -0500 +@@ -16,8 +16,10 @@ + #define CURRENT_THREAD(dummy) (((unsigned long) &dummy) & \ + (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER)) + +-#define current ({ int dummy; \ +- ((struct thread_info *) CURRENT_THREAD(dummy))->task; }) ++#define current_thread \ ++ ({ int dummy; ((struct thread_info *) CURRENT_THREAD(dummy)); }) ++ ++#define current (current_thread->task) + + #endif /* __ASSEMBLY__ */ + +diff -Naur a/include/asm-um/elf.h b/include/asm-um/elf.h +--- a/include/asm-um/elf.h 2004-02-11 12:16:01.000000000 -0500 ++++ b/include/asm-um/elf.h 2004-02-11 12:27:55.000000000 -0500 +@@ -15,4 +15,17 @@ + + #define USE_ELF_CORE_DUMP + ++#define R_386_NONE 0 ++#define R_386_32 1 ++#define R_386_PC32 2 ++#define R_386_GOT32 3 ++#define R_386_PLT32 4 ++#define R_386_COPY 5 ++#define R_386_GLOB_DAT 6 ++#define R_386_JMP_SLOT 7 ++#define R_386_RELATIVE 8 ++#define R_386_GOTOFF 9 ++#define R_386_GOTPC 10 ++#define R_386_NUM 11 ++ + #endif +diff -Naur a/include/asm-um/fixmap.h b/include/asm-um/fixmap.h +--- a/include/asm-um/fixmap.h 2004-02-11 12:16:42.000000000 -0500 ++++ b/include/asm-um/fixmap.h 2004-02-11 12:28:41.000000000 -0500 +@@ -34,6 +34,7 @@ + FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ + FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, + #endif ++ FIX_VSYSCALL, + __end_of_fixed_addresses + }; + +@@ -63,6 +64,13 @@ + #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) + #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) + ++/* ++ * This is the range that is readable by user mode, and things ++ * acting like user mode such as get_user_pages. ++ */ ++#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL)) ++#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE) ++ + extern void __this_fixmap_does_not_exist(void); + + /* +diff -Naur a/include/asm-um/irq.h b/include/asm-um/irq.h +--- a/include/asm-um/irq.h 2004-02-11 12:17:06.000000000 -0500 ++++ b/include/asm-um/irq.h 2004-02-11 12:29:07.000000000 -0500 +@@ -1,15 +1,6 @@ + #ifndef __UM_IRQ_H + #define __UM_IRQ_H + +-/* The i386 irq.h has a struct task_struct in a prototype without including +- * sched.h. This forward declaration kills the resulting warning. +- */ +-struct task_struct; +- +-#include "asm/ptrace.h" +- +-#undef NR_IRQS +- + #define TIMER_IRQ 0 + #define UMN_IRQ 1 + #define CONSOLE_IRQ 2 +@@ -28,8 +19,4 @@ + #define LAST_IRQ XTERM_IRQ + #define NR_IRQS (LAST_IRQ + 1) + +-extern int um_request_irq(unsigned int irq, int fd, int type, +- void (*handler)(int, void *, struct pt_regs *), +- unsigned long irqflags, const char * devname, +- void *dev_id); + #endif +diff -Naur a/include/asm-um/local.h b/include/asm-um/local.h +--- a/include/asm-um/local.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/include/asm-um/local.h 2004-02-11 12:27:52.000000000 -0500 +@@ -0,0 +1,6 @@ ++#ifndef __UM_LOCAL_H ++#define __UM_LOCAL_H ++ ++#include "asm/arch/local.h" ++ ++#endif +diff -Naur a/include/asm-um/module-generic.h b/include/asm-um/module-generic.h +--- a/include/asm-um/module-generic.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/include/asm-um/module-generic.h 2004-02-11 12:27:42.000000000 -0500 +@@ -0,0 +1,6 @@ ++#ifndef __UM_MODULE_GENERIC_H ++#define __UM_MODULE_GENERIC_H ++ ++#include "asm/arch/module.h" ++ ++#endif +diff -Naur a/include/asm-um/module-i386.h b/include/asm-um/module-i386.h +--- a/include/asm-um/module-i386.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/include/asm-um/module-i386.h 2004-02-11 12:27:42.000000000 -0500 +@@ -0,0 +1,13 @@ ++#ifndef __UM_MODULE_I386_H ++#define __UM_MODULE_I386_H ++ ++/* UML is simple */ ++struct mod_arch_specific ++{ ++}; ++ ++#define Elf_Shdr Elf32_Shdr ++#define Elf_Sym Elf32_Sym ++#define Elf_Ehdr Elf32_Ehdr ++ ++#endif +diff -Naur a/include/asm-um/page.h b/include/asm-um/page.h +--- a/include/asm-um/page.h 2004-02-11 12:15:52.000000000 -0500 ++++ b/include/asm-um/page.h 2004-02-11 12:27:45.000000000 -0500 +@@ -1,10 +1,14 @@ ++/* ++ * Copyright (C) 2000 - 2003 Jeff Dike (jdike@addtoit.com) ++ * Licensed under the GPL ++ */ ++ + #ifndef __UM_PAGE_H + #define __UM_PAGE_H + + struct page; + + #include "asm/arch/page.h" +-#include "asm/bug.h" + + #undef __pa + #undef __va +@@ -24,25 +28,36 @@ + + #define __va_space (8*1024*1024) + +-extern unsigned long region_pa(void *virt); +-extern void *region_va(unsigned long phys); +- +-#define __pa(virt) region_pa((void *) (virt)) +-#define __va(phys) region_va((unsigned long) (phys)) +- +-extern unsigned long page_to_pfn(struct page *page); +-extern struct page *pfn_to_page(unsigned long pfn); ++extern unsigned long to_phys(void *virt); ++extern void *to_virt(unsigned long phys); + +-extern struct page *phys_to_page(unsigned long phys); ++#define __pa(virt) to_phys((void *) virt) ++#define __va(phys) to_virt((unsigned long) phys) + +-#define virt_to_page(v) (phys_to_page(__pa(v))) ++#define page_to_pfn(page) ((page) - mem_map) ++#define pfn_to_page(pfn) (mem_map + (pfn)) + +-extern struct page *page_mem_map(struct page *page); ++#define phys_to_pfn(p) ((p) >> PAGE_SHIFT) ++#define pfn_to_phys(pfn) ((pfn) << PAGE_SHIFT) + +-#define pfn_valid(pfn) (page_mem_map(pfn_to_page(pfn)) != NULL) +-#define virt_addr_valid(v) pfn_valid(__pa(v) >> PAGE_SHIFT) ++#define pfn_valid(pfn) ((pfn) < max_mapnr) ++#define virt_addr_valid(v) pfn_valid(phys_to_pfn(__pa(v))) + + extern struct page *arch_validate(struct page *page, int mask, int order); + #define HAVE_ARCH_VALIDATE + ++extern void arch_free_page(struct page *page, int order); ++#define HAVE_ARCH_FREE_PAGE ++ + #endif ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ +diff -Naur a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h +--- a/include/asm-um/pgtable.h 2004-02-11 12:17:12.000000000 -0500 ++++ b/include/asm-um/pgtable.h 2004-02-11 12:29:17.000000000 -0500 +@@ -12,8 +12,6 @@ + #include "asm/page.h" + #include "asm/fixmap.h" + +-extern pgd_t swapper_pg_dir[1024]; +- + extern void *um_virt_to_phys(struct task_struct *task, unsigned long virt, + pte_t *pte_out); + +@@ -49,6 +47,8 @@ + #define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + ++extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; ++ + /* + * pgd entries used up by user/kernel: + */ +@@ -65,10 +65,10 @@ + * area for the same reason. ;) + */ + +-extern unsigned long high_physmem; ++extern unsigned long end_iomem; + + #define VMALLOC_OFFSET (__va_space) +-#define VMALLOC_START (((unsigned long) high_physmem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) ++#define VMALLOC_START ((end_iomem + VMALLOC_OFFSET) & ~(VMALLOC_OFFSET-1)) + + #ifdef CONFIG_HIGHMEM + # define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE) +@@ -78,12 +78,13 @@ + + #define _PAGE_PRESENT 0x001 + #define _PAGE_NEWPAGE 0x002 +-#define _PAGE_PROTNONE 0x004 /* If not present */ +-#define _PAGE_RW 0x008 +-#define _PAGE_USER 0x010 +-#define _PAGE_ACCESSED 0x020 +-#define _PAGE_DIRTY 0x040 +-#define _PAGE_NEWPROT 0x080 ++#define _PAGE_NEWPROT 0x004 ++#define _PAGE_FILE 0x008 /* set:pagecache unset:swap */ ++#define _PAGE_PROTNONE 0x010 /* If not present */ ++#define _PAGE_RW 0x020 ++#define _PAGE_USER 0x040 ++#define _PAGE_ACCESSED 0x080 ++#define _PAGE_DIRTY 0x100 + + #define REGION_MASK 0xf0000000 + #define REGION_SHIFT 28 +@@ -143,7 +144,8 @@ + + #define BAD_PAGETABLE __bad_pagetable() + #define BAD_PAGE __bad_page() +-#define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) ++ ++#define ZERO_PAGE(vaddr) virt_to_page(empty_zero_page) + + /* number of bits that fit into a memory pointer */ + #define BITS_PER_PTR (8*sizeof(unsigned long)) +@@ -164,9 +166,6 @@ + + #define pte_clear(xp) do { pte_val(*(xp)) = _PAGE_NEWPAGE; } while (0) + +-#define phys_region_index(x) (((x) & REGION_MASK) >> REGION_SHIFT) +-#define pte_region_index(x) phys_region_index(pte_val(x)) +- + #define pmd_none(x) (!(pmd_val(x) & ~_PAGE_NEWPAGE)) + #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) + #define pmd_present(x) (pmd_val(x) & _PAGE_PRESENT) +@@ -188,19 +187,25 @@ + + #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) + +-extern struct page *pte_mem_map(pte_t pte); +-extern struct page *phys_mem_map(unsigned long phys); +-extern unsigned long phys_to_pfn(unsigned long p); +-extern unsigned long pfn_to_phys(unsigned long pfn); +- +-#define pte_page(x) pfn_to_page(pte_pfn(x)) +-#define pte_address(x) (__va(pte_val(x) & PAGE_MASK)) +-#define mk_phys(a, r) ((a) + (r << REGION_SHIFT)) +-#define phys_addr(p) ((p) & ~REGION_MASK) +-#define phys_page(p) (phys_mem_map(p) + ((phys_addr(p)) >> PAGE_SHIFT)) ++#define pte_page(pte) phys_to_page(pte_val(pte)) ++#define pmd_page(pmd) phys_to_page(pmd_val(pmd) & PAGE_MASK) ++ + #define pte_pfn(x) phys_to_pfn(pte_val(x)) + #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot)) +-#define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot)) ++ ++extern struct page *phys_to_page(const unsigned long phys); ++extern struct page *__virt_to_page(const unsigned long virt); ++#define virt_to_page(addr) __virt_to_page((const unsigned long) addr) ++ ++/* ++ * Bits 0 through 3 are taken ++ */ ++#define PTE_FILE_MAX_BITS 28 ++ ++#define pte_to_pgoff(pte) ((pte).pte_low >> 4) ++ ++#define pgoff_to_pte(off) \ ++ ((pte_t) { ((off) << 4) + _PAGE_FILE }) + + static inline pte_t pte_mknewprot(pte_t pte) + { +@@ -235,6 +240,12 @@ + * The following only work if pte_present() is true. + * Undefined behaviour if not.. + */ ++static inline int pte_user(pte_t pte) ++{ ++ return((pte_val(pte) & _PAGE_USER) && ++ !(pte_val(pte) & _PAGE_PROTNONE)); ++} ++ + static inline int pte_read(pte_t pte) + { + return((pte_val(pte) & _PAGE_USER) && +@@ -252,6 +263,14 @@ + !(pte_val(pte) & _PAGE_PROTNONE)); + } + ++/* ++ * The following only works if pte_present() is not true. ++ */ ++static inline int pte_file(pte_t pte) ++{ ++ return (pte).pte_low & _PAGE_FILE; ++} ++ + static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; } + static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; } + static inline int pte_newpage(pte_t pte) { return pte_val(pte) & _PAGE_NEWPAGE; } +@@ -334,14 +353,7 @@ + * and a page entry and page directory to the page they refer to. + */ + +-#define mk_pte(page, pgprot) \ +-({ \ +- pte_t __pte; \ +- \ +- pte_val(__pte) = page_to_phys(page) + pgprot_val(pgprot);\ +- if(pte_present(__pte)) pte_mknewprot(pte_mknewpage(__pte)); \ +- __pte; \ +-}) ++extern pte_t mk_pte(struct page *page, pgprot_t pgprot); + + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) + { +@@ -351,17 +363,27 @@ + } + + #define pmd_page_kernel(pmd) ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) +-#define pmd_page(pmd) (phys_mem_map(pmd_val(pmd) & PAGE_MASK) + \ +- ((phys_addr(pmd_val(pmd)) >> PAGE_SHIFT))) + +-/* to find an entry in a page-table-directory. */ ++/* ++ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] ++ * ++ * this macro returns the index of the entry in the pgd page which would ++ * control the given virtual address ++ */ + #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) + +-/* to find an entry in a page-table-directory */ ++/* ++ * pgd_offset() returns a (pgd_t *) ++ * pgd_index() is used get the offset into the pgd page's array of pgd_t's; ++ */ + #define pgd_offset(mm, address) \ + ((mm)->pgd + ((address) >> PGDIR_SHIFT)) + +-/* to find an entry in a kernel page-table-directory */ ++ ++/* ++ * a shortcut which implies the use of the kernel's pgd, instead ++ * of a process's ++ */ + #define pgd_offset_k(address) pgd_offset(&init_mm, address) + + #define pmd_index(address) \ +@@ -373,7 +395,12 @@ + return (pmd_t *) dir; + } + +-/* Find an entry in the third-level page table.. */ ++/* ++ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE] ++ * ++ * this macro returns the index of the entry in the pte page which would ++ * control the given virtual address ++ */ + #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + #define pte_offset_kernel(dir, address) \ + ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address)) +@@ -399,11 +426,11 @@ + #define update_mmu_cache(vma,address,pte) do ; while (0) + + /* Encode and de-code a swap entry */ +-#define __swp_type(x) (((x).val >> 3) & 0x7f) +-#define __swp_offset(x) ((x).val >> 10) ++#define __swp_type(x) (((x).val >> 4) & 0x3f) ++#define __swp_offset(x) ((x).val >> 11) + + #define __swp_entry(type, offset) \ +- ((swp_entry_t) { ((type) << 3) | ((offset) << 10) }) ++ ((swp_entry_t) { ((type) << 4) | ((offset) << 11) }) + #define __pte_to_swp_entry(pte) \ + ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) }) + #define __swp_entry_to_pte(x) ((pte_t) { (x).val }) +diff -Naur a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h +--- a/include/asm-um/processor-generic.h 2004-02-11 12:14:28.000000000 -0500 ++++ b/include/asm-um/processor-generic.h 2004-02-11 12:26:10.000000000 -0500 +@@ -11,9 +11,7 @@ + struct task_struct; + + #include "linux/config.h" +-#include "linux/signal.h" + #include "asm/ptrace.h" +-#include "asm/siginfo.h" + #include "choose-mode.h" + + struct mm_struct; +@@ -22,23 +20,6 @@ + + #define cpu_relax() do ; while (0) + +-#ifdef CONFIG_MODE_TT +-struct proc_tt_mode { +- int extern_pid; +- int tracing; +- int switch_pipe[2]; +- int singlestep_syscall; +- int vm_seq; +-}; +-#endif +- +-#ifdef CONFIG_MODE_SKAS +-struct proc_skas_mode { +- void *switch_buf; +- void *fork_buf; +-}; +-#endif +- + struct thread_struct { + int forking; + unsigned long kernel_stack; +@@ -46,6 +27,7 @@ + struct pt_regs regs; + unsigned long cr2; + int err; ++ unsigned long trap_no; + void *fault_addr; + void *fault_catcher; + struct task_struct *prev_sched; +@@ -54,10 +36,20 @@ + struct arch_thread arch; + union { + #ifdef CONFIG_MODE_TT +- struct proc_tt_mode tt; ++ struct { ++ int extern_pid; ++ int tracing; ++ int switch_pipe[2]; ++ int singlestep_syscall; ++ int vm_seq; ++ } tt; + #endif + #ifdef CONFIG_MODE_SKAS +- struct proc_skas_mode skas; ++ struct { ++ void *switch_buf; ++ void *fork_buf; ++ int mm_count; ++ } skas; + #endif + } mode; + struct { +@@ -101,14 +93,19 @@ + } mm_segment_t; + + extern struct task_struct *alloc_task_struct(void); +-extern void free_task_struct(struct task_struct *task); + + extern void release_thread(struct task_struct *); + extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags); + extern void dump_thread(struct pt_regs *regs, struct user *u); ++extern void prepare_to_copy(struct task_struct *tsk); + + extern unsigned long thread_saved_pc(struct task_struct *t); + ++static inline void mm_copy_segments(struct mm_struct *from_mm, ++ struct mm_struct *new_mm) ++{ ++} ++ + #define init_stack (init_thread_union.stack) + + /* +diff -Naur a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h +--- a/include/asm-um/processor-i386.h 2004-02-11 12:14:17.000000000 -0500 ++++ b/include/asm-um/processor-i386.h 2004-02-11 12:26:00.000000000 -0500 +@@ -6,8 +6,8 @@ + #ifndef __UM_PROCESSOR_I386_H + #define __UM_PROCESSOR_I386_H + +-extern int cpu_has_xmm; +-extern int cpu_has_cmov; ++extern int host_has_xmm; ++extern int host_has_cmov; + + struct arch_thread { + unsigned long debugregs[8]; +diff -Naur a/include/asm-um/sections.h b/include/asm-um/sections.h +--- a/include/asm-um/sections.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/include/asm-um/sections.h 2004-02-11 12:27:57.000000000 -0500 +@@ -0,0 +1,7 @@ ++#ifndef _UM_SECTIONS_H ++#define _UM_SECTIONS_H ++ ++/* nothing to see, move along */ ++#include ++ ++#endif +diff -Naur a/include/asm-um/smp.h b/include/asm-um/smp.h +--- a/include/asm-um/smp.h 2004-02-11 12:14:12.000000000 -0500 ++++ b/include/asm-um/smp.h 2004-02-11 12:25:41.000000000 -0500 +@@ -10,7 +10,7 @@ + + extern cpumask_t cpu_online_map; + +-#define smp_processor_id() (current->thread_info->cpu) ++#define smp_processor_id() (current_thread->cpu) + #define cpu_logical_map(n) (n) + #define cpu_number_map(n) (n) + #define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */ +diff -Naur a/include/asm-um/smplock.h b/include/asm-um/smplock.h +--- a/include/asm-um/smplock.h 2004-02-11 12:14:27.000000000 -0500 ++++ b/include/asm-um/smplock.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,6 +0,0 @@ +-#ifndef __UM_SMPLOCK_H +-#define __UM_SMPLOCK_H +- +-#include "asm/arch/smplock.h" +- +-#endif +diff -Naur a/include/asm-um/spinlock.h b/include/asm-um/spinlock.h +--- a/include/asm-um/spinlock.h 2004-02-11 12:16:39.000000000 -0500 ++++ b/include/asm-um/spinlock.h 1969-12-31 19:00:00.000000000 -0500 +@@ -1,10 +0,0 @@ +-#ifndef __UM_SPINLOCK_H +-#define __UM_SPINLOCK_H +- +-#include "linux/config.h" +- +-#ifdef CONFIG_SMP +-#include "asm/arch/spinlock.h" +-#endif +- +-#endif +diff -Naur a/include/asm-um/system-generic.h b/include/asm-um/system-generic.h +--- a/include/asm-um/system-generic.h 2004-02-11 12:17:08.000000000 -0500 ++++ b/include/asm-um/system-generic.h 2004-02-11 12:29:12.000000000 -0500 +@@ -23,8 +23,10 @@ + extern void block_signals(void); + extern void unblock_signals(void); + +-#define local_save_flags(flags) do { (flags) = get_signals(); } while(0) +-#define local_irq_restore(flags) do { set_signals(flags); } while(0) ++#define local_save_flags(flags) do { typecheck(unsigned long, flags); \ ++ (flags) = get_signals(); } while(0) ++#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \ ++ set_signals(flags); } while(0) + + #define local_irq_save(flags) do { local_save_flags(flags); \ + local_irq_disable(); } while(0) +@@ -39,4 +41,7 @@ + (flags == 0); \ + }) + ++extern void *_switch_to(void *prev, void *next, void *last); ++#define switch_to(prev, next, last) prev = _switch_to(prev, next, last) ++ + #endif +diff -Naur a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h +--- a/include/asm-um/thread_info.h 2004-02-11 12:14:39.000000000 -0500 ++++ b/include/asm-um/thread_info.h 2004-02-11 12:26:45.000000000 -0500 +@@ -9,6 +9,7 @@ + #ifndef __ASSEMBLY__ + + #include ++#include + + struct thread_info { + struct task_struct *task; /* main task structure */ +@@ -43,15 +44,18 @@ + static inline struct thread_info *current_thread_info(void) + { + struct thread_info *ti; +- __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~16383UL)); ++ unsigned long mask = PAGE_SIZE * ++ (1 << CONFIG_KERNEL_STACK_ORDER) - 1; ++ __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~mask)); + return ti; + } + + /* thread information allocation */ +-#define THREAD_SIZE (4*PAGE_SIZE) +-#define alloc_thread_info(tsk) ((struct thread_info *) \ +- __get_free_pages(GFP_KERNEL,2)) +-#define free_thread_info(ti) free_pages((unsigned long) (ti), 2) ++#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE) ++#define alloc_thread_info(tsk) \ ++ ((struct thread_info *) kmalloc(THREAD_SIZE, GFP_KERNEL)) ++#define free_thread_info(ti) kfree(ti) ++ + #define get_thread_info(ti) get_task_struct((ti)->task) + #define put_thread_info(ti) put_task_struct((ti)->task) + +@@ -65,11 +69,13 @@ + #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling + * TIF_NEED_RESCHED + */ ++#define TIF_RESTART_BLOCK 4 + + #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) + #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) + #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) + #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG) ++#define _TIF_RESTART_BLOCK (1 << TIF_RESTART_BLOCK) + + #endif + +diff -Naur a/include/asm-um/timex.h b/include/asm-um/timex.h +--- a/include/asm-um/timex.h 2004-02-11 12:16:00.000000000 -0500 ++++ b/include/asm-um/timex.h 2004-02-11 12:27:53.000000000 -0500 +@@ -1,8 +1,6 @@ + #ifndef __UM_TIMEX_H + #define __UM_TIMEX_H + +-#include "linux/time.h" +- + typedef unsigned long cycles_t; + + #define cacheflush_time (0) +diff -Naur a/include/asm-um/uaccess.h b/include/asm-um/uaccess.h +--- a/include/asm-um/uaccess.h 2004-02-11 12:16:04.000000000 -0500 ++++ b/include/asm-um/uaccess.h 2004-02-11 12:28:00.000000000 -0500 +@@ -6,6 +6,8 @@ + #ifndef __UM_UACCESS_H + #define __UM_UACCESS_H + ++#include "linux/sched.h" ++ + #define VERIFY_READ 0 + #define VERIFY_WRITE 1 + +diff -Naur a/include/asm-um/unistd.h b/include/asm-um/unistd.h +--- a/include/asm-um/unistd.h 2004-02-11 12:16:33.000000000 -0500 ++++ b/include/asm-um/unistd.h 2004-02-11 12:28:32.000000000 -0500 +@@ -33,7 +33,10 @@ + set_fs(KERNEL_DS); \ + ret = sys(args); \ + set_fs(fs); \ +- return ret; ++ if (ret >= 0) \ ++ return ret; \ ++ errno = -(long)ret; \ ++ return -1; + + static inline long open(const char *pathname, int flags, int mode) + { +diff -Naur a/include/linux/gfp.h b/include/linux/gfp.h +--- a/include/linux/gfp.h 2004-02-11 12:14:33.000000000 -0500 ++++ b/include/linux/gfp.h 2004-02-11 12:26:16.000000000 -0500 +@@ -63,6 +63,11 @@ + * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets + * optimized to &contig_page_data at compile-time. + */ ++ ++#ifndef HAVE_ARCH_FREE_PAGE ++static inline void arch_free_page(struct page *page, int order) { } ++#endif ++ + extern struct page * FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *)); + static inline struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order) + { +diff -Naur a/include/linux/ghash.h b/include/linux/ghash.h +--- a/include/linux/ghash.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/include/linux/ghash.h 2004-02-11 12:26:13.000000000 -0500 +@@ -0,0 +1,236 @@ ++/* ++ * include/linux/ghash.h -- generic hashing with fuzzy retrieval ++ * ++ * (C) 1997 Thomas Schoebel-Theuer ++ * ++ * The algorithms implemented here seem to be a completely new invention, ++ * and I'll publish the fundamentals in a paper. ++ */ ++ ++#ifndef _GHASH_H ++#define _GHASH_H ++/* HASHSIZE _must_ be a power of two!!! */ ++ ++ ++#define DEF_HASH_FUZZY_STRUCTS(NAME,HASHSIZE,TYPE) \ ++\ ++struct NAME##_table {\ ++ TYPE * hashtable[HASHSIZE];\ ++ TYPE * sorted_list;\ ++ int nr_entries;\ ++};\ ++\ ++struct NAME##_ptrs {\ ++ TYPE * next_hash;\ ++ TYPE * prev_hash;\ ++ TYPE * next_sorted;\ ++ TYPE * prev_sorted;\ ++}; ++ ++#define DEF_HASH_FUZZY(LINKAGE,NAME,HASHSIZE,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,KEYEQ,HASHFN)\ ++\ ++LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ ++{\ ++ int ix = HASHFN(elem->KEY);\ ++ TYPE ** base = &tbl->hashtable[ix];\ ++ TYPE * ptr = *base;\ ++ TYPE * prev = NULL;\ ++\ ++ tbl->nr_entries++;\ ++ while(ptr && KEYCMP(ptr->KEY, elem->KEY)) {\ ++ base = &ptr->PTRS.next_hash;\ ++ prev = ptr;\ ++ ptr = *base;\ ++ }\ ++ elem->PTRS.next_hash = ptr;\ ++ elem->PTRS.prev_hash = prev;\ ++ if(ptr) {\ ++ ptr->PTRS.prev_hash = elem;\ ++ }\ ++ *base = elem;\ ++\ ++ ptr = prev;\ ++ if(!ptr) {\ ++ ptr = tbl->sorted_list;\ ++ prev = NULL;\ ++ } else {\ ++ prev = ptr->PTRS.prev_sorted;\ ++ }\ ++ while(ptr) {\ ++ TYPE * next = ptr->PTRS.next_hash;\ ++ if(next && KEYCMP(next->KEY, elem->KEY)) {\ ++ prev = ptr;\ ++ ptr = next;\ ++ } else if(KEYCMP(ptr->KEY, elem->KEY)) {\ ++ prev = ptr;\ ++ ptr = ptr->PTRS.next_sorted;\ ++ } else\ ++ break;\ ++ }\ ++ elem->PTRS.next_sorted = ptr;\ ++ elem->PTRS.prev_sorted = prev;\ ++ if(ptr) {\ ++ ptr->PTRS.prev_sorted = elem;\ ++ }\ ++ if(prev) {\ ++ prev->PTRS.next_sorted = elem;\ ++ } else {\ ++ tbl->sorted_list = elem;\ ++ }\ ++}\ ++\ ++LINKAGE void remove_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ ++{\ ++ TYPE * next = elem->PTRS.next_hash;\ ++ TYPE * prev = elem->PTRS.prev_hash;\ ++\ ++ tbl->nr_entries--;\ ++ if(next)\ ++ next->PTRS.prev_hash = prev;\ ++ if(prev)\ ++ prev->PTRS.next_hash = next;\ ++ else {\ ++ int ix = HASHFN(elem->KEY);\ ++ tbl->hashtable[ix] = next;\ ++ }\ ++\ ++ next = elem->PTRS.next_sorted;\ ++ prev = elem->PTRS.prev_sorted;\ ++ if(next)\ ++ next->PTRS.prev_sorted = prev;\ ++ if(prev)\ ++ prev->PTRS.next_sorted = next;\ ++ else\ ++ tbl->sorted_list = next;\ ++}\ ++\ ++LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\ ++{\ ++ int ix = hashfn(pos);\ ++ TYPE * ptr = tbl->hashtable[ix];\ ++ while(ptr && KEYCMP(ptr->KEY, pos))\ ++ ptr = ptr->PTRS.next_hash;\ ++ if(ptr && !KEYEQ(ptr->KEY, pos))\ ++ ptr = NULL;\ ++ return ptr;\ ++}\ ++\ ++LINKAGE TYPE * find_##NAME##_hash_fuzzy(struct NAME##_table * tbl, KEYTYPE pos)\ ++{\ ++ int ix;\ ++ int offset;\ ++ TYPE * ptr;\ ++ TYPE * next;\ ++\ ++ ptr = tbl->sorted_list;\ ++ if(!ptr || KEYCMP(pos, ptr->KEY))\ ++ return NULL;\ ++ ix = HASHFN(pos);\ ++ offset = HASHSIZE;\ ++ do {\ ++ offset >>= 1;\ ++ next = tbl->hashtable[(ix+offset) & ((HASHSIZE)-1)];\ ++ if(next && (KEYCMP(next->KEY, pos) || KEYEQ(next->KEY, pos))\ ++ && KEYCMP(ptr->KEY, next->KEY))\ ++ ptr = next;\ ++ } while(offset);\ ++\ ++ for(;;) {\ ++ next = ptr->PTRS.next_hash;\ ++ if(next) {\ ++ if(KEYCMP(next->KEY, pos)) {\ ++ ptr = next;\ ++ continue;\ ++ }\ ++ }\ ++ next = ptr->PTRS.next_sorted;\ ++ if(next && KEYCMP(next->KEY, pos)) {\ ++ ptr = next;\ ++ continue;\ ++ }\ ++ return ptr;\ ++ }\ ++ return NULL;\ ++} ++ ++/* LINKAGE - empty or "static", depending on whether you want the definitions to ++ * be public or not ++ * NAME - a string to stick in names to make this hash table type distinct from ++ * any others ++ * HASHSIZE - number of buckets ++ * TYPE - type of data contained in the buckets - must be a structure, one ++ * field is of type NAME_ptrs, another is the hash key ++ * PTRS - TYPE must contain a field of type NAME_ptrs, PTRS is the name of that ++ * field ++ * KEYTYPE - type of the key field within TYPE ++ * KEY - name of the key field within TYPE ++ * KEYCMP - pointer to function that compares KEYTYPEs to each other - the ++ * prototype is int KEYCMP(KEYTYPE, KEYTYPE), it returns zero for equal, ++ * non-zero for not equal ++ * HASHFN - the hash function - the prototype is int HASHFN(KEYTYPE), ++ * it returns a number in the range 0 ... HASHSIZE - 1 ++ * Call DEF_HASH_STRUCTS, define your hash table as a NAME_table, then call ++ * DEF_HASH. ++ */ ++ ++#define DEF_HASH_STRUCTS(NAME,HASHSIZE,TYPE) \ ++\ ++struct NAME##_table {\ ++ TYPE * hashtable[HASHSIZE];\ ++ int nr_entries;\ ++};\ ++\ ++struct NAME##_ptrs {\ ++ TYPE * next_hash;\ ++ TYPE * prev_hash;\ ++}; ++ ++#define DEF_HASH(LINKAGE,NAME,TYPE,PTRS,KEYTYPE,KEY,KEYCMP,HASHFN)\ ++\ ++LINKAGE void insert_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ ++{\ ++ int ix = HASHFN(elem->KEY);\ ++ TYPE ** base = &tbl->hashtable[ix];\ ++ TYPE * ptr = *base;\ ++ TYPE * prev = NULL;\ ++\ ++ tbl->nr_entries++;\ ++ while(ptr && KEYCMP(ptr->KEY, elem->KEY)) {\ ++ base = &ptr->PTRS.next_hash;\ ++ prev = ptr;\ ++ ptr = *base;\ ++ }\ ++ elem->PTRS.next_hash = ptr;\ ++ elem->PTRS.prev_hash = prev;\ ++ if(ptr) {\ ++ ptr->PTRS.prev_hash = elem;\ ++ }\ ++ *base = elem;\ ++}\ ++\ ++LINKAGE void remove_##NAME##_hash(struct NAME##_table * tbl, TYPE * elem)\ ++{\ ++ TYPE * next = elem->PTRS.next_hash;\ ++ TYPE * prev = elem->PTRS.prev_hash;\ ++\ ++ tbl->nr_entries--;\ ++ if(next)\ ++ next->PTRS.prev_hash = prev;\ ++ if(prev)\ ++ prev->PTRS.next_hash = next;\ ++ else {\ ++ int ix = HASHFN(elem->KEY);\ ++ tbl->hashtable[ix] = next;\ ++ }\ ++}\ ++\ ++LINKAGE TYPE * find_##NAME##_hash(struct NAME##_table * tbl, KEYTYPE pos)\ ++{\ ++ int ix = HASHFN(pos);\ ++ TYPE * ptr = tbl->hashtable[ix];\ ++ while(ptr && KEYCMP(ptr->KEY, pos))\ ++ ptr = ptr->PTRS.next_hash;\ ++ return ptr;\ ++} ++ ++#endif +diff -Naur a/include/linux/mm.h b/include/linux/mm.h +--- a/include/linux/mm.h 2004-02-11 12:14:17.000000000 -0500 ++++ b/include/linux/mm.h 2004-02-11 12:26:00.000000000 -0500 +@@ -507,6 +507,9 @@ + return __set_page_dirty_buffers(page); + } + ++extern long do_mprotect(struct mm_struct *mm, unsigned long start, ++ size_t len, unsigned long prot); ++ + /* + * On a two-level page table, this ends up being trivial. Thus the + * inlining and the symmetry break with pte_alloc_map() that does all +@@ -537,9 +540,10 @@ + + extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); + +-extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, +- unsigned long len, unsigned long prot, +- unsigned long flag, unsigned long pgoff); ++extern unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file *file, ++ unsigned long addr, unsigned long len, ++ unsigned long prot, unsigned long flag, ++ unsigned long pgoff); + + static inline unsigned long do_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, +@@ -549,7 +553,8 @@ + if ((offset + PAGE_ALIGN(len)) < offset) + goto out; + if (!(offset & ~PAGE_MASK)) +- ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); ++ ret = do_mmap_pgoff(current->mm, file, addr, len, prot, flag, ++ offset >> PAGE_SHIFT); + out: + return ret; + } +diff -Naur a/include/linux/proc_mm.h b/include/linux/proc_mm.h +--- a/include/linux/proc_mm.h 1969-12-31 19:00:00.000000000 -0500 ++++ b/include/linux/proc_mm.h 2004-02-11 12:25:40.000000000 -0500 +@@ -0,0 +1,48 @@ ++/* ++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++ * Licensed under the GPL ++ */ ++ ++#ifndef __PROC_MM_H ++#define __PROC_MM_H ++ ++#include "linux/sched.h" ++ ++#define MM_MMAP 54 ++#define MM_MUNMAP 55 ++#define MM_MPROTECT 56 ++#define MM_COPY_SEGMENTS 57 ++ ++struct mm_mmap { ++ unsigned long addr; ++ unsigned long len; ++ unsigned long prot; ++ unsigned long flags; ++ unsigned long fd; ++ unsigned long offset; ++}; ++ ++struct mm_munmap { ++ unsigned long addr; ++ unsigned long len; ++}; ++ ++struct mm_mprotect { ++ unsigned long addr; ++ unsigned long len; ++ unsigned int prot; ++}; ++ ++struct proc_mm_op { ++ int op; ++ union { ++ struct mm_mmap mmap; ++ struct mm_munmap munmap; ++ struct mm_mprotect mprotect; ++ int copy_segments; ++ } u; ++}; ++ ++extern struct mm_struct *proc_mm_get_mm(int fd); ++ ++#endif +diff -Naur a/include/linux/time.h b/include/linux/time.h +--- a/include/linux/time.h 2004-02-11 12:15:59.000000000 -0500 ++++ b/include/linux/time.h 2004-02-11 12:27:52.000000000 -0500 +@@ -41,7 +41,7 @@ + * Have the 32 bit jiffies value wrap 5 minutes after boot + * so jiffies wrap bugs show up earlier. + */ +-#define INITIAL_JIFFIES ((unsigned long)(unsigned int) (-300*HZ)) ++#define INITIAL_JIFFIES ((unsigned long)(0)) + + /* + * Change timeval to jiffies, trying to avoid the +diff -Naur a/mm/Makefile b/mm/Makefile +--- a/mm/Makefile 2004-02-11 12:15:59.000000000 -0500 ++++ b/mm/Makefile 2004-02-11 12:27:53.000000000 -0500 +@@ -12,3 +12,5 @@ + slab.o swap.o truncate.o vmscan.o $(mmu-y) + + obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o ++obj-$(CONFIG_PROC_MM) += proc_mm.o ++ +diff -Naur a/mm/mmap.c b/mm/mmap.c +--- a/mm/mmap.c 2004-02-11 12:15:58.000000000 -0500 ++++ b/mm/mmap.c 2004-02-11 12:27:51.000000000 -0500 +@@ -460,11 +460,11 @@ + * The caller must hold down_write(current->mm->mmap_sem). + */ + +-unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, +- unsigned long len, unsigned long prot, +- unsigned long flags, unsigned long pgoff) ++unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file * file, ++ unsigned long addr, unsigned long len, ++ unsigned long prot, unsigned long flags, ++ unsigned long pgoff) + { +- struct mm_struct * mm = current->mm; + struct vm_area_struct * vma, * prev; + struct inode *inode; + unsigned int vm_flags; +diff -Naur a/mm/mprotect.c b/mm/mprotect.c +--- a/mm/mprotect.c 2004-02-11 12:15:11.000000000 -0500 ++++ b/mm/mprotect.c 2004-02-11 12:26:58.000000000 -0500 +@@ -222,7 +222,8 @@ + } + + asmlinkage long +-sys_mprotect(unsigned long start, size_t len, unsigned long prot) ++do_mprotect(struct mm_struct *mm, unsigned long start, size_t len, ++ unsigned long prot) + { + unsigned long vm_flags, nstart, end, tmp; + struct vm_area_struct * vma, * next, * prev; +@@ -245,9 +246,9 @@ + + vm_flags = calc_vm_prot_bits(prot); + +- down_write(¤t->mm->mmap_sem); ++ down_write(&mm->mmap_sem); + +- vma = find_vma_prev(current->mm, start, &prev); ++ vma = find_vma_prev(mm, start, &prev); + error = -ENOMEM; + if (!vma) + goto out; +@@ -326,6 +327,11 @@ + prev->vm_mm->map_count--; + } + out: +- up_write(¤t->mm->mmap_sem); ++ up_write(&mm->mmap_sem); + return error; + } ++ ++asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot) ++{ ++ return(do_mprotect(current->mm, start, len, prot)); ++} +diff -Naur a/mm/page_alloc.c b/mm/page_alloc.c +--- a/mm/page_alloc.c 2004-02-11 12:14:18.000000000 -0500 ++++ b/mm/page_alloc.c 2004-02-11 12:26:01.000000000 -0500 +@@ -268,6 +268,8 @@ + LIST_HEAD(list); + int i; + ++ arch_free_page(page, order); ++ + mod_page_state(pgfree, 1 << order); + for (i = 0 ; i < (1 << order) ; ++i) + free_pages_check(__FUNCTION__, page + i); +@@ -449,6 +451,8 @@ + struct per_cpu_pages *pcp; + unsigned long flags; + ++ arch_free_page(page, 0); ++ + kernel_map_pages(page, 1, 0); + inc_page_state(pgfree); + free_pages_check(__FUNCTION__, page); +diff -Naur a/mm/proc_mm.c b/mm/proc_mm.c +--- a/mm/proc_mm.c 1969-12-31 19:00:00.000000000 -0500 ++++ b/mm/proc_mm.c 2004-02-11 12:27:05.000000000 -0500 +@@ -0,0 +1,174 @@ ++/* ++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com) ++ * Licensed under the GPL ++ */ ++ ++#include "linux/mm.h" ++#include "linux/init.h" ++#include "linux/proc_fs.h" ++#include "linux/proc_mm.h" ++#include "linux/file.h" ++#include "asm/uaccess.h" ++#include "asm/mmu_context.h" ++ ++static struct file_operations proc_mm_fops; ++ ++struct mm_struct *proc_mm_get_mm(int fd) ++{ ++ struct mm_struct *ret = ERR_PTR(-EBADF); ++ struct file *file; ++ ++ file = fget(fd); ++ if (!file) ++ goto out; ++ ++ ret = ERR_PTR(-EINVAL); ++ if(file->f_op != &proc_mm_fops) ++ goto out_fput; ++ ++ ret = file->private_data; ++ out_fput: ++ fput(file); ++ out: ++ return(ret); ++} ++ ++extern long do_mmap2(struct mm_struct *mm, unsigned long addr, ++ unsigned long len, unsigned long prot, ++ unsigned long flags, unsigned long fd, ++ unsigned long pgoff); ++ ++static ssize_t write_proc_mm(struct file *file, const char *buffer, ++ size_t count, loff_t *ppos) ++{ ++ struct mm_struct *mm = file->private_data; ++ struct proc_mm_op req; ++ int n, ret; ++ ++ if(count > sizeof(req)) ++ return(-EINVAL); ++ ++ n = copy_from_user(&req, buffer, count); ++ if(n != 0) ++ return(-EFAULT); ++ ++ ret = count; ++ switch(req.op){ ++ case MM_MMAP: { ++ struct mm_mmap *map = &req.u.mmap; ++ ++ ret = do_mmap2(mm, map->addr, map->len, map->prot, ++ map->flags, map->fd, map->offset >> PAGE_SHIFT); ++ if((ret & ~PAGE_MASK) == 0) ++ ret = count; ++ ++ break; ++ } ++ case MM_MUNMAP: { ++ struct mm_munmap *unmap = &req.u.munmap; ++ ++ down_write(&mm->mmap_sem); ++ ret = do_munmap(mm, unmap->addr, unmap->len); ++ up_write(&mm->mmap_sem); ++ ++ if(ret == 0) ++ ret = count; ++ break; ++ } ++ case MM_MPROTECT: { ++ struct mm_mprotect *protect = &req.u.mprotect; ++ ++ ret = do_mprotect(mm, protect->addr, protect->len, ++ protect->prot); ++ if(ret == 0) ++ ret = count; ++ break; ++ } ++ ++ case MM_COPY_SEGMENTS: { ++ struct mm_struct *from = proc_mm_get_mm(req.u.copy_segments); ++ ++ if(IS_ERR(from)){ ++ ret = PTR_ERR(from); ++ break; ++ } ++ ++ mm_copy_segments(from, mm); ++ break; ++ } ++ default: ++ ret = -EINVAL; ++ break; ++ } ++ ++ return(ret); ++} ++ ++static int open_proc_mm(struct inode *inode, struct file *file) ++{ ++ struct mm_struct *mm = mm_alloc(); ++ int ret; ++ ++ ret = -ENOMEM; ++ if(mm == NULL) ++ goto out_mem; ++ ++ ret = init_new_context(current, mm); ++ if(ret) ++ goto out_free; ++ ++ spin_lock(&mmlist_lock); ++ list_add(&mm->mmlist, ¤t->mm->mmlist); ++ mmlist_nr++; ++ spin_unlock(&mmlist_lock); ++ ++ file->private_data = mm; ++ ++ return(0); ++ ++ out_free: ++ mmput(mm); ++ out_mem: ++ return(ret); ++} ++ ++static int release_proc_mm(struct inode *inode, struct file *file) ++{ ++ struct mm_struct *mm = file->private_data; ++ ++ mmput(mm); ++ return(0); ++} ++ ++static struct file_operations proc_mm_fops = { ++ .open = open_proc_mm, ++ .release = release_proc_mm, ++ .write = write_proc_mm, ++}; ++ ++static int make_proc_mm(void) ++{ ++ struct proc_dir_entry *ent; ++ ++ ent = create_proc_entry("mm", 0222, &proc_root); ++ if(ent == NULL){ ++ printk("make_proc_mm : Failed to register /proc/mm\n"); ++ return(0); ++ } ++ ent->proc_fops = &proc_mm_fops; ++ ++ return(0); ++} ++ ++__initcall(make_proc_mm); ++ ++/* ++ * Overrides for Emacs so that we follow Linus's tabbing style. ++ * Emacs will notice this stuff at the end of the file and automatically ++ * adjust the settings for this buffer only. This must remain at the end ++ * of the file. ++ * --------------------------------------------------------------------------- ++ * Local variables: ++ * c-file-style: "linux" ++ * End: ++ */ diff --git a/lustre/kernel_patches/patches/vfs_intent-2.6.3-suse.patch b/lustre/kernel_patches/patches/vfs_intent-2.6.3-suse.patch new file mode 100644 index 0000000..aaae350 --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_intent-2.6.3-suse.patch @@ -0,0 +1,777 @@ + fs/exec.c | 18 +++++++--- + fs/namei.c | 86 +++++++++++++++++++++++++++++++++++++++++++++---- + fs/namespace.c | 2 + + fs/nfs/dir.c | 4 +- + fs/open.c | 62 +++++++++++++++++++++++------------ + fs/stat.c | 24 ++++++++++--- + include/linux/dcache.h | 3 + + include/linux/fs.h | 8 ++++ + include/linux/namei.h | 56 ++++++++++++++++++++++++++----- + kernel/ksyms.c | 8 ++++ + 10 files changed, 222 insertions(+), 49 deletions(-) + +.old..........pc/vfs_intent-2.6.3-suse/fs/exec.c +.new.........fs/exec.c +Index: linux-2.6.3-20/fs/exec.c +=================================================================== +--- linux-2.6.3-20.orig/fs/exec.c 2004-03-05 02:07:04.000000000 -0800 ++++ linux-2.6.3-20/fs/exec.c 2004-03-08 14:23:40.000000000 -0800 +@@ -121,8 +121,11 @@ + struct file * file; + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_OPEN); + +- nd.intent.open.flags = FMODE_READ; ++ error = user_path_walk_it(library, &nd); ++ ++ nd.intent.it_flags = O_RDONLY; + error = __user_walk(library, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); + if (error) + goto out; +@@ -135,7 +138,7 @@ + if (error) + goto exit; + +- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); ++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); + error = PTR_ERR(file); + if (IS_ERR(file)) + goto out; +@@ -475,8 +478,9 @@ + int err; + struct file *file; + +- nd.intent.open.flags = FMODE_READ; +- err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_OPEN, &nd); ++ intent_init(&nd.intent, IT_OPEN); ++ nd.intent.it_flags = O_RDONLY; ++ err = path_lookup(name, LOOKUP_FOLLOW, &nd); + file = ERR_PTR(err); + + if (!err) { +@@ -489,7 +493,7 @@ + err = -EACCES; + file = ERR_PTR(err); + if (!err) { +- file = dentry_open(nd.dentry, nd.mnt, O_RDONLY); ++ file = dentry_open_it(nd.dentry, nd.mnt, O_RDONLY, &nd.intent); + if (!IS_ERR(file)) { + err = deny_write_access(file); + if (err) { +.old..........pc/vfs_intent-2.6.3-suse/fs/namei.c +.new.........fs/namei.c +Index: linux-2.6.3-20/fs/namei.c +=================================================================== +--- linux-2.6.3-20.orig/fs/namei.c 2004-03-05 02:07:04.000000000 -0800 ++++ linux-2.6.3-20/fs/namei.c 2004-03-08 14:32:24.000000000 -0800 +@@ -269,8 +269,19 @@ + return 0; + } + ++void intent_release(struct lookup_intent *it) ++{ ++ if (!it) ++ return; ++ if (it->it_magic != INTENT_MAGIC) ++ return; ++ if (it->it_op_release) ++ it->it_op_release(it); ++} ++ + void path_release(struct nameidata *nd) + { ++ intent_release(&nd->intent); + dput(nd->dentry); + mntput(nd->mnt); + } +@@ -347,7 +358,10 @@ + { + struct dentry * result; + struct inode *dir = parent->d_inode; ++ int counter = 0; + ++again: ++ counter++; + down(&dir->i_sem); + /* + * First re-do the cached lookup just in case it was created +@@ -386,7 +400,10 @@ + if (result->d_op && result->d_op->d_revalidate) { + if (!result->d_op->d_revalidate(result, nd) && !d_invalidate(result)) { + dput(result); +- result = ERR_PTR(-ENOENT); ++ if (counter > 10) ++ result = ERR_PTR(-ESTALE); ++ if (!IS_ERR(result)) ++ goto again; + } + } + return result; +@@ -563,6 +580,31 @@ + return PTR_ERR(dentry); + } + ++static int revalidate_special(struct nameidata *nd) ++{ ++ struct dentry *dentry = nd->dentry; ++ int err, counter = 0; ++ ++ if (!dentry->d_op || !dentry->d_op->d_revalidate) ++ return 0; ++ revalidate_again: ++ if (!dentry->d_op->d_revalidate(dentry, nd)) { ++ struct dentry *new; ++ if ((err = permission(dentry->d_parent->d_inode, MAY_EXEC,nd))) ++ return err; ++ new = real_lookup(dentry->d_parent, &dentry->d_name, nd); ++ d_invalidate(dentry); ++ dput(dentry); ++ nd->dentry = dentry = new; ++ counter++; ++ if (counter < 10) ++ goto revalidate_again; ++ printk("excessive revalidate_it loops\n"); ++ return -ESTALE; ++ } ++ return 0; ++} ++ + /* + * Name resolution. + * +@@ -663,7 +705,9 @@ + + if (inode->i_op->follow_link) { + mntget(next.mnt); ++ nd->flags |= LOOKUP_LINK_NOTLAST; + err = do_follow_link(next.dentry, nd); ++ nd->flags &= ~LOOKUP_LINK_NOTLAST; + dput(next.dentry); + mntput(next.mnt); + if (err) +@@ -702,6 +746,11 @@ + inode = nd->dentry->d_inode; + /* fallthrough */ + case 1: ++ nd->flags |= LOOKUP_LAST; ++ err = revalidate_special(nd); ++ nd->flags &= ~LOOKUP_LAST; ++ if (err) ++ break; + goto return_reval; + } + if (nd->dentry->d_op && nd->dentry->d_op->d_hash) { +@@ -709,7 +758,9 @@ + if (err < 0) + break; + } ++ nd->flags |= LOOKUP_LAST; + err = do_lookup(nd, &this, &next); ++ nd->flags &= ~LOOKUP_LAST; + if (err) + break; + follow_mount(&next.mnt, &next.dentry); +@@ -935,7 +986,7 @@ + } + + /* SMP-safe */ +-struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++struct dentry * lookup_one_len_it(const char * name, struct dentry * base, int len, struct nameidata *nd) + { + unsigned long hash; + struct qstr this; +@@ -955,11 +1006,16 @@ + } + this.hash = end_name_hash(hash); + +- return lookup_hash(&this, base); ++ return __lookup_hash(&this, base, nd); + access: + return ERR_PTR(-EACCES); + } + ++struct dentry * lookup_one_len(const char * name, struct dentry * base, int len) ++{ ++ return lookup_one_len_it(name, base, len, NULL); ++} ++ + /* + * namei() + * +@@ -971,7 +1027,7 @@ + * that namei follows links, while lnamei does not. + * SMP-safe + */ +-int fastcall __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) ++int fastcall __user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd) + { + char *tmp = getname(name); + int err = PTR_ERR(tmp); +@@ -983,6 +1039,12 @@ + return err; + } + ++int __user_walk(const char __user *name, unsigned flags, struct nameidata *nd) ++{ ++ intent_init(&nd->intent, IT_LOOKUP); ++ return __user_walk_it(name, flags, nd); ++} ++ + /* + * It's inline, so penalty for filesystems that don't use sticky bit is + * minimal. +@@ -1255,8 +1317,8 @@ + acc_mode |= MAY_APPEND; + + /* Fill in the open() intent data */ +- nd->intent.open.flags = flag; +- nd->intent.open.create_mode = mode; ++ nd->intent.it_flags = flag; ++ nd->intent.it_create_mode = mode; + + /* + * The simplest case - just a plain lookup. +@@ -1271,6 +1333,7 @@ + /* + * Create - we need to know the parent. + */ ++ nd->intent.it_op |= IT_CREAT; + error = path_lookup(pathname, LOOKUP_PARENT|LOOKUP_OPEN|LOOKUP_CREATE, nd); + if (error) + return error; +@@ -1287,7 +1350,9 @@ + dir = nd->dentry; + nd->flags &= ~LOOKUP_PARENT; + down(&dir->d_inode->i_sem); ++ nd->flags |= LOOKUP_LAST; + dentry = __lookup_hash(&nd->last, nd->dentry, nd); ++ nd->flags &= ~LOOKUP_LAST; + + do_last: + error = PTR_ERR(dentry); +@@ -1392,7 +1457,9 @@ + } + dir = nd->dentry; + down(&dir->d_inode->i_sem); ++ nd->flags |= LOOKUP_LAST; + dentry = __lookup_hash(&nd->last, nd->dentry, nd); ++ nd->flags &= ~LOOKUP_LAST; + putname(nd->last.name); + goto do_last; + } +@@ -2154,7 +2221,9 @@ + __vfs_follow_link(struct nameidata *nd, const char *link) + { + int res = 0; ++ struct lookup_intent it = nd->intent; + char *name; ++ + if (IS_ERR(link)) + goto fail; + +@@ -2164,6 +2233,10 @@ + /* weird __emul_prefix() stuff did it */ + goto out; + } ++ ++ intent_init(&nd->intent, it.it_op); ++ nd->intent.it_flags = it.it_flags; ++ nd->intent.it_create_mode = it.it_create_mode; + res = link_path_walk(link, nd); + out: + if (current->link_count || res || nd->last_type!=LAST_NORM) +.old..........pc/vfs_intent-2.6.3-suse/fs/namespace.c +.new.........fs/namespace.c +Index: linux-2.6.3-20/fs/namespace.c +=================================================================== +--- linux-2.6.3-20.orig/fs/namespace.c 2004-03-05 02:07:04.000000000 -0800 ++++ linux-2.6.3-20/fs/namespace.c 2004-03-08 14:23:40.000000000 -0800 +@@ -744,6 +744,7 @@ + int retval = 0; + int mnt_flags = 0; + ++ intent_init(&nd.intent, IT_LOOKUP); + /* Discard magic */ + if ((flags & MS_MGC_MSK) == MS_MGC_VAL) + flags &= ~MS_MGC_MSK; +.old..........pc/vfs_intent-2.6.3-suse/fs/open.c +.new.........fs/open.c +Index: linux-2.6.3-20/fs/open.c +=================================================================== +--- linux-2.6.3-20.orig/fs/open.c 2004-03-05 02:07:04.000000000 -0800 ++++ linux-2.6.3-20/fs/open.c 2004-03-08 14:23:40.000000000 -0800 +@@ -202,7 +202,7 @@ + struct nameidata nd; + struct inode * inode; + int error; +- ++ intent_init(&nd.intent, IT_GETATTR); + error = -EINVAL; + if (length < 0) /* sorry, but loff_t says... */ + goto out; +@@ -461,6 +461,7 @@ + int old_fsuid, old_fsgid; + kernel_cap_t old_cap; + int res; ++ intent_init(&nd.intent, IT_GETATTR); + + if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ + return -EINVAL; +@@ -492,6 +493,7 @@ + if(!res && (mode & S_IWOTH) && IS_RDONLY(nd.dentry->d_inode) + && !special_file(nd.dentry->d_inode->i_mode)) + res = -EROFS; ++ + path_release(&nd); + } + +@@ -506,6 +508,7 @@ + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + + error = __user_walk(filename, LOOKUP_FOLLOW|LOOKUP_DIRECTORY, &nd); + if (error) +@@ -557,6 +560,7 @@ + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + + error = __user_walk(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY | LOOKUP_NOALT, &nd); + if (error) +@@ -629,7 +633,7 @@ + error = -EROFS; + if (IS_RDONLY(inode)) + goto dput_and_out; +- ++ + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto dput_and_out; +@@ -737,27 +741,8 @@ + * for the internal routines (ie open_namei()/follow_link() etc). 00 is + * used by symlinks. + */ +-struct file *filp_open(const char * filename, int flags, int mode) +-{ +- int namei_flags, error; +- struct nameidata nd; +- +- namei_flags = flags; +- if ((namei_flags+1) & O_ACCMODE) +- namei_flags++; +- if (namei_flags & O_TRUNC) +- namei_flags |= 2; +- +- error = open_namei(filename, namei_flags, mode, &nd); +- if (!error) +- return dentry_open(nd.dentry, nd.mnt, flags); +- +- return ERR_PTR(error); +-} +- +-EXPORT_SYMBOL(filp_open); +- +-struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) ++struct file *dentry_open_it(struct dentry *dentry, struct vfsmount *mnt, int flags, ++ struct lookup_intent *it) + { + struct file * f; + struct inode *inode; +@@ -769,6 +754,7 @@ + goto cleanup_dentry; + f->f_flags = flags; + f->f_mode = (flags+1) & O_ACCMODE; ++ f->f_it = it; + inode = dentry->d_inode; + if (f->f_mode & FMODE_WRITE) { + error = get_write_access(inode); +@@ -788,6 +774,7 @@ + error = f->f_op->open(inode,f); + if (error) + goto cleanup_all; ++ intent_release(it); + } + f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC); + +@@ -812,6 +799,7 @@ + cleanup_file: + put_filp(f); + cleanup_dentry: ++ intent_release(it); + dput(dentry); + mntput(mnt); + return ERR_PTR(error); +@@ -819,6 +807,36 @@ + + EXPORT_SYMBOL(dentry_open); + ++struct file *filp_open(const char * filename, int flags, int mode) ++{ ++ int namei_flags, error; ++ struct file * temp_filp; ++ struct nameidata nd; ++ intent_init(&nd.intent, IT_OPEN); ++ ++ namei_flags = flags; ++ if ((namei_flags+1) & O_ACCMODE) ++ namei_flags++; ++ if (namei_flags & O_TRUNC) ++ namei_flags |= 2; ++ ++ error = open_namei(filename, namei_flags, mode, &nd); ++ if (!error) { ++ temp_filp = dentry_open_it(nd.dentry, nd.mnt, flags, &nd.intent); ++ return temp_filp; ++ } ++ return ERR_PTR(error); ++} ++ ++ ++struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags) ++{ ++ struct lookup_intent it; ++ intent_init(&it, IT_LOOKUP); ++ ++ return dentry_open_it(dentry, mnt, flags, &it); ++} ++ + /* + * Find an empty file descriptor entry, and mark it busy. + */ +.old..........pc/vfs_intent-2.6.3-suse/fs/stat.c +.new.........fs/stat.c +Index: linux-2.6.3-20/fs/stat.c +=================================================================== +--- linux-2.6.3-20.orig/fs/stat.c 2004-03-05 02:07:04.000000000 -0800 ++++ linux-2.6.3-20/fs/stat.c 2004-03-08 14:23:40.000000000 -0800 +@@ -36,7 +36,7 @@ + + EXPORT_SYMBOL(generic_fillattr); + +-int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ++int vfs_getattr_it(struct vfsmount *mnt, struct dentry *dentry, struct lookup_intent *it, struct kstat *stat) + { + struct inode *inode = dentry->d_inode; + int retval; +@@ -45,6 +45,8 @@ + if (retval) + return retval; + ++ if (inode->i_op->getattr_it) ++ return inode->i_op->getattr_it(mnt, dentry, it, stat); + if (inode->i_op->getattr) + return inode->i_op->getattr(mnt, dentry, stat); + +@@ -61,14 +63,20 @@ + + EXPORT_SYMBOL(vfs_getattr); + ++int vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) ++{ ++ return vfs_getattr_it(mnt, dentry, NULL, stat); ++} ++ + int vfs_stat(char __user *name, struct kstat *stat) + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = user_path_walk(name, &nd); ++ error = user_path_walk_it(name, &nd); + if (!error) { +- error = vfs_getattr(nd.mnt, nd.dentry, stat); ++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); + path_release(&nd); + } + return error; +@@ -80,10 +88,11 @@ + { + struct nameidata nd; + int error; ++ intent_init(&nd.intent, IT_GETATTR); + +- error = user_path_walk_link(name, &nd); ++ error = user_path_walk_link_it(name, &nd); + if (!error) { +- error = vfs_getattr(nd.mnt, nd.dentry, stat); ++ error = vfs_getattr_it(nd.mnt, nd.dentry, &nd.intent, stat); + path_release(&nd); + } + return error; +@@ -95,9 +104,12 @@ + { + struct file *f = fget(fd); + int error = -EBADF; ++ struct nameidata nd; ++ intent_init(&nd.intent, IT_GETATTR); + + if (f) { +- error = vfs_getattr(f->f_vfsmnt, f->f_dentry, stat); ++ error = vfs_getattr_it(f->f_vfsmnt, f->f_dentry, &nd.intent, stat); ++ intent_release(&nd.intent); + fput(f); + } + return error; +.old..........pc/vfs_intent-2.6.3-suse/fs/nfs/dir.c +.new.........fs/nfs/dir.c +Index: linux-2.6.3-20/fs/nfs/dir.c +=================================================================== +--- linux-2.6.3-20.orig/fs/nfs/dir.c 2004-03-05 02:07:03.000000000 -0800 ++++ linux-2.6.3-20/fs/nfs/dir.c 2004-03-08 14:23:40.000000000 -0800 +@@ -681,7 +681,7 @@ + return 0; + if (!nd || (nd->flags & LOOKUP_CONTINUE) || !(nd->flags & LOOKUP_CREATE)) + return 0; +- return (nd->intent.open.flags & O_EXCL) != 0; ++ return (nd->intent.it_flags & O_EXCL) != 0; + } + + static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) +@@ -972,7 +972,7 @@ + attr.ia_valid = ATTR_MODE; + + if (nd && (nd->flags & LOOKUP_CREATE)) +- open_flags = nd->intent.open.flags; ++ open_flags = nd->intent.it_flags; + + /* + * The 0 argument passed into the create function should one day +.old..........pc/vfs_intent-2.6.3-suse/fs/inode.c +.new.........fs/inode.c +Index: linux-2.6.3-20/fs/inode.c +=================================================================== +--- linux-2.6.3-20.orig/fs/inode.c 2004-03-05 02:07:04.000000000 -0800 ++++ linux-2.6.3-20/fs/inode.c 2004-03-08 14:23:40.000000000 -0800 +@@ -223,6 +223,7 @@ + inodes_stat.nr_unused--; + } + ++EXPORT_SYMBOL(__iget); + /** + * clear_inode - clear an inode + * @inode: inode to clear +.old..........pc/vfs_intent-2.6.3-suse/fs/super.c +.new.........fs/super.c +Index: linux-2.6.3-20/fs/super.c +=================================================================== +--- linux-2.6.3-20.orig/fs/super.c 2004-03-05 02:07:04.000000000 -0800 ++++ linux-2.6.3-20/fs/super.c 2004-03-08 14:23:40.000000000 -0800 +@@ -841,6 +841,8 @@ + return (struct vfsmount *)sb; + } + ++EXPORT_SYMBOL(do_kern_mount); ++ + struct vfsmount *kern_mount(struct file_system_type *type) + { + return do_kern_mount(type->name, 0, type->name, NULL); +.old..........pc/vfs_intent-2.6.3-suse/include/linux/dcache.h +.new.........include/linux/dcache.h +Index: linux-2.6.3-20/include/linux/dcache.h +=================================================================== +--- linux-2.6.3-20.orig/include/linux/dcache.h 2004-03-05 02:07:17.000000000 -0800 ++++ linux-2.6.3-20/include/linux/dcache.h 2004-03-08 14:23:40.000000000 -0800 +@@ -4,6 +4,7 @@ + #ifdef __KERNEL__ + + #include ++#include + #include + #include + #include +@@ -35,6 +36,8 @@ + char name_str[0]; + }; + ++#include ++ + struct dentry_stat_t { + int nr_dentry; + int nr_unused; +.old..........pc/vfs_intent-2.6.3-suse/include/linux/fs.h +.new.........include/linux/fs.h +Index: linux-2.6.3-20/include/linux/fs.h +=================================================================== +--- linux-2.6.3-20.orig/include/linux/fs.h 2004-03-05 02:07:17.000000000 -0800 ++++ linux-2.6.3-20/include/linux/fs.h 2004-03-08 14:23:41.000000000 -0800 +@@ -243,6 +243,8 @@ + #define ATTR_ATTR_FLAG 1024 + #define ATTR_KILL_SUID 2048 + #define ATTR_KILL_SGID 4096 ++#define ATTR_RAW 8192 /* file system, not vfs will massage attrs */ ++#define ATTR_FROM_OPEN 16384 /* called from open path, ie O_TRUNC */ + + /* + * This is the Inode Attributes structure, used for notify_change(). It +@@ -409,6 +411,7 @@ + struct block_device *i_bdev; + struct cdev *i_cdev; + int i_cindex; ++ void *i_filterdata; + + unsigned long i_dnotify_mask; /* Directory notify events */ + struct dnotify_struct *i_dnotify; /* for directory notifications */ +@@ -541,6 +544,7 @@ + spinlock_t f_ep_lock; + #endif /* #ifdef CONFIG_EPOLL */ + struct address_space *f_mapping; ++ struct lookup_intent *f_it; + }; + extern spinlock_t files_lock; + #define file_list_lock() spin_lock(&files_lock); +@@ -846,7 +850,9 @@ + void (*truncate) (struct inode *); + int (*permission) (struct inode *, int, struct nameidata *); + int (*setattr) (struct dentry *, struct iattr *); ++ int (*setattr_raw) (struct inode *, struct iattr *); + int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); ++ int (*getattr_it) (struct vfsmount *, struct dentry *, struct lookup_intent *, struct kstat *); + int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); + ssize_t (*getxattr) (struct dentry *, const char *, void *, size_t); + ssize_t (*listxattr) (struct dentry *, char *, size_t); +@@ -1062,6 +1068,7 @@ + extern int unregister_filesystem(struct file_system_type *); + extern struct vfsmount *kern_mount(struct file_system_type *); + extern int may_umount(struct vfsmount *); ++struct vfsmount *do_kern_mount(const char *type, int flags, const char *name, void *data); + extern long do_mount(char *, char *, char *, unsigned long, void *); + + extern int vfs_statfs(struct super_block *, struct kstatfs *); +@@ -1126,6 +1133,7 @@ + extern int do_truncate(struct dentry *, loff_t start); + extern struct file *filp_open(const char *, int, int); + extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); ++extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); + extern int filp_close(struct file *, fl_owner_t id); + extern char * getname(const char __user *); + +.old..........pc/vfs_intent-2.6.3-suse/include/linux/namei.h +.new.........include/linux/namei.h +Index: linux-2.6.3-20/include/linux/namei.h +=================================================================== +--- linux-2.6.3-20.orig/include/linux/namei.h 2004-03-05 02:07:18.000000000 -0800 ++++ linux-2.6.3-20/include/linux/namei.h 2004-03-08 14:23:41.000000000 -0800 +@@ -2,25 +2,55 @@ + #define _LINUX_NAMEI_H + + #include ++#include + + struct vfsmount; ++struct nameidata; + +-struct open_intent { +- int flags; +- int create_mode; ++/* intent opcodes */ ++#define IT_OPEN (1) ++#define IT_CREAT (1<<1) ++#define IT_READDIR (1<<2) ++#define IT_GETATTR (1<<3) ++#define IT_LOOKUP (1<<4) ++#define IT_UNLINK (1<<5) ++#define IT_TRUNC (1<<6) ++#define IT_GETXATTR (1<<7) ++ ++struct lustre_intent_data { ++ int it_disposition; ++ int it_status; ++ __u64 it_lock_handle; ++ void *it_data; ++ int it_lock_mode; + }; + ++#define INTENT_MAGIC 0x19620323 ++struct lookup_intent { ++ int it_magic; ++ void (*it_op_release)(struct lookup_intent *); ++ int it_op; ++ int it_flags; ++ int it_create_mode; ++ union { ++ struct lustre_intent_data lustre; ++ } d; ++}; ++ ++static inline void intent_init(struct lookup_intent *it, int op) ++{ ++ memset(it, 0, sizeof(*it)); ++ it->it_magic = INTENT_MAGIC; ++ it->it_op = op; ++} ++ + struct nameidata { + struct dentry *dentry; + struct vfsmount *mnt; + struct qstr last; + unsigned int flags; + int last_type; +- +- /* Intent data */ +- union { +- struct open_intent open; +- } intent; ++ struct lookup_intent intent; + }; + + /* +@@ -41,6 +71,9 @@ + #define LOOKUP_CONTINUE 4 + #define LOOKUP_PARENT 16 + #define LOOKUP_NOALT 32 ++#define LOOKUP_LAST (1<<6) ++#define LOOKUP_LINK_NOTLAST (1<<7) ++ + /* + * Intent data + */ +@@ -49,6 +82,12 @@ + #define LOOKUP_ACCESS (0x0400) + + extern int FASTCALL(__user_walk(const char __user *, unsigned, struct nameidata *)); ++extern int FASTCALL(__user_walk_it(const char __user *name, unsigned flags, struct nameidata *nd)); ++#define user_path_walk_it(name,nd) \ ++ __user_walk_it(name, LOOKUP_FOLLOW, nd) ++#define user_path_walk_link_it(name,nd) \ ++ __user_walk_it(name, 0, nd) ++extern void intent_release(struct lookup_intent *); + #define user_path_walk(name,nd) \ + __user_walk(name, LOOKUP_FOLLOW, nd) + #define user_path_walk_link(name,nd) \ +@@ -60,7 +99,6 @@ + + extern struct dentry * lookup_one_len(const char *, struct dentry *, int); + extern struct dentry * lookup_hash(struct qstr *, struct dentry *); +- + extern int follow_down(struct vfsmount **, struct dentry **); + extern int follow_up(struct vfsmount **, struct dentry **); + +.old..........pc/vfs_intent-2.6.3-suse/kernel/exit.c +.new.........kernel/exit.c +Index: linux-2.6.3-20/kernel/exit.c +=================================================================== +--- linux-2.6.3-20.orig/kernel/exit.c 2004-03-05 02:07:17.000000000 -0800 ++++ linux-2.6.3-20/kernel/exit.c 2004-03-08 14:23:41.000000000 -0800 +@@ -258,6 +258,8 @@ + write_unlock_irq(&tasklist_lock); + } + ++EXPORT_SYMBOL(reparent_to_init); ++ + void __set_special_pids(pid_t session, pid_t pgrp) + { + struct task_struct *curr = current; +@@ -427,6 +429,8 @@ + __exit_files(tsk); + } + ++EXPORT_SYMBOL(exit_files); ++ + static inline void __put_fs_struct(struct fs_struct *fs) + { + /* No need to hold fs->lock if we are killing it */ diff --git a/lustre/kernel_patches/patches/vfs_nointent-2.6.3-mm4.patch b/lustre/kernel_patches/patches/vfs_nointent-2.6.3-mm4.patch new file mode 100644 index 0000000..4000785 --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_nointent-2.6.3-mm4.patch @@ -0,0 +1,423 @@ + 0 files changed + +Index: linux-2.6.3-mm4/fs/namei.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/namei.c 2004-03-08 14:46:20.906229088 +0800 ++++ linux-2.6.3-mm4/fs/namei.c 2004-03-08 14:51:27.317647472 +0800 +@@ -1277,7 +1277,7 @@ + if (!error) { + DQUOT_INIT(inode); + +- error = do_truncate(dentry, 0); ++ error = do_truncate(dentry, 0, 1); + } + put_write_access(inode); + if (error) +@@ -1527,6 +1527,7 @@ + char * tmp; + struct dentry * dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + if (S_ISDIR(mode)) + return -EPERM; +@@ -1537,6 +1538,15 @@ + error = path_lookup(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ ++ if (nd.dentry->d_inode->i_op->mknod_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mknod_raw(&nd, mode, dev); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } ++ + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + +@@ -1563,6 +1573,7 @@ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1604,10 +1615,18 @@ + if (!IS_ERR(tmp)) { + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + error = path_lookup(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ if (nd.dentry->d_inode->i_op->mkdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mkdir_raw(&nd, mode); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } + dentry = lookup_create(&nd, 1); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -1617,6 +1636,7 @@ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1697,6 +1717,7 @@ + char * name; + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + name = getname(pathname); + if(IS_ERR(name)) +@@ -1717,6 +1738,16 @@ + error = -EBUSY; + goto exit1; + } ++ ++ if (nd.dentry->d_inode->i_op->rmdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ error = op->rmdir_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } ++ + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); +@@ -1775,6 +1806,7 @@ + struct dentry *dentry; + struct nameidata nd; + struct inode *inode = NULL; ++ intent_init(&nd.intent, IT_LOOKUP); + + name = getname(pathname); + if(IS_ERR(name)) +@@ -1786,6 +1818,13 @@ + error = -EISDIR; + if (nd.last_type != LAST_NORM) + goto exit1; ++ if (nd.dentry->d_inode->i_op->unlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->unlink_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); +@@ -1853,10 +1892,18 @@ + if (!IS_ERR(to)) { + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + error = path_lookup(to, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ if (nd.dentry->d_inode->i_op->symlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->symlink_raw(&nd, from); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -1864,6 +1911,7 @@ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(to); +@@ -1927,6 +1975,8 @@ + struct nameidata nd, old_nd; + int error; + char * to; ++ intent_init(&nd.intent, IT_LOOKUP); ++ intent_init(&old_nd.intent, IT_LOOKUP); + + to = getname(newname); + if (IS_ERR(to)) +@@ -1941,6 +1991,13 @@ + error = -EXDEV; + if (old_nd.mnt != nd.mnt) + goto out_release; ++ if (nd.dentry->d_inode->i_op->link_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->link_raw(&old_nd, &nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out_release; ++ } + new_dentry = lookup_create(&nd, 0); + error = PTR_ERR(new_dentry); + if (!IS_ERR(new_dentry)) { +@@ -1991,7 +2048,7 @@ + * locking]. + */ + int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry) + { + int error = 0; + struct inode *target; +@@ -2036,7 +2093,7 @@ + } + + int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry) + { + struct inode *target; + int error; +@@ -2113,6 +2170,8 @@ + struct dentry * old_dentry, *new_dentry; + struct dentry * trap; + struct nameidata oldnd, newnd; ++ intent_init(&oldnd.intent, IT_LOOKUP); ++ intent_init(&newnd.intent, IT_LOOKUP); + + error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); + if (error) +@@ -2135,6 +2194,13 @@ + if (newnd.last_type != LAST_NORM) + goto exit2; + ++ if (old_dir->d_inode->i_op->rename_raw) { ++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit2; ++ } ++ + trap = lock_rename(new_dir, old_dir); + + old_dentry = lookup_hash(&oldnd.last, old_dir); +@@ -2166,8 +2232,7 @@ + if (new_dentry == trap) + goto exit5; + +- error = vfs_rename(old_dir->d_inode, old_dentry, +- new_dir->d_inode, new_dentry); ++ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); + exit5: + dput(new_dentry); + exit4: +Index: linux-2.6.3-mm4/fs/open.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/open.c 2004-03-08 14:46:21.050207200 +0800 ++++ linux-2.6.3-mm4/fs/open.c 2004-03-08 14:55:01.025158992 +0800 +@@ -180,9 +180,10 @@ + return error; + } + +-int do_truncate(struct dentry *dentry, loff_t length) ++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) + { + int err; ++ struct inode_operations *op = dentry->d_inode->i_op; + struct iattr newattrs; + + /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ +@@ -193,7 +194,14 @@ + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + down(&dentry->d_inode->i_sem); + down_write(&dentry->d_inode->i_alloc_sem); +- err = notify_change(dentry, &newattrs); ++ if (called_from_open) ++ newattrs.ia_valid |= ATTR_FROM_OPEN; ++ if (op->setattr_raw) { ++ newattrs.ia_valid |= ATTR_RAW; ++ newattrs.ia_ctime = CURRENT_TIME; ++ err = op->setattr_raw(dentry->d_inode, &newattrs); ++ } else ++ err = notify_change(dentry, &newattrs); + up_write(&dentry->d_inode->i_alloc_sem); + up(&dentry->d_inode->i_sem); + return err; +@@ -249,7 +257,7 @@ + error = locks_verify_truncate(inode, NULL, length); + if (!error) { + DQUOT_INIT(inode); +- error = do_truncate(nd.dentry, length); ++ error = do_truncate(nd.dentry, length, 0); + } + put_write_access(inode); + +@@ -301,7 +309,7 @@ + + error = locks_verify_truncate(inode, file, length); + if (!error) +- error = do_truncate(dentry, length); ++ error = do_truncate(dentry, length, 0); + out_putf: + fput(file); + out: +@@ -380,9 +388,19 @@ + (error = permission(inode,MAY_WRITE,&nd)) != 0) + goto dput_and_out; + } +- down(&inode->i_sem); +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } else { ++ down(&inode->i_sem); ++ error = notify_change(nd.dentry, &newattrs); ++ up(&inode->i_sem); ++ } + dput_and_out: + path_release(&nd); + out: +@@ -433,9 +451,19 @@ + (error = permission(inode,MAY_WRITE,&nd)) != 0) + goto dput_and_out; + } +- down(&inode->i_sem); +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } else { ++ down(&inode->i_sem); ++ error = notify_change(nd.dentry, &newattrs); ++ up(&inode->i_sem); ++ } + dput_and_out: + path_release(&nd); + out: +@@ -636,6 +664,18 @@ + if (IS_RDONLY(inode)) + goto dput_and_out; + ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_mode = mode; ++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } ++ + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto dput_and_out; +@@ -669,6 +709,18 @@ + if (IS_RDONLY(inode)) + goto out; + error = -EPERM; ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = dentry->d_inode->i_op; ++ ++ newattrs.ia_uid = user; ++ newattrs.ia_gid = group; ++ newattrs.ia_valid = ATTR_UID | ATTR_GID; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ return error; ++ } + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out; + newattrs.ia_valid = ATTR_CTIME; +@@ -682,6 +734,7 @@ + } + if (!S_ISDIR(inode->i_mode)) + newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; ++ + down(&inode->i_sem); + error = notify_change(dentry, &newattrs); + up(&inode->i_sem); +Index: linux-2.6.3-mm4/fs/exec.c +=================================================================== +--- linux-2.6.3-mm4.orig/fs/exec.c 2004-03-08 14:46:20.758251584 +0800 ++++ linux-2.6.3-mm4/fs/exec.c 2004-03-08 14:51:27.454626648 +0800 +@@ -1408,7 +1408,7 @@ + goto close_fail; + if (!file->f_op->write) + goto close_fail; +- if (do_truncate(file->f_dentry, 0) != 0) ++ if (do_truncate(file->f_dentry, 0, 0) != 0) + goto close_fail; + + retval = binfmt->core_dump(signr, regs, file); +Index: linux-2.6.3-mm4/include/linux/fs.h +=================================================================== +--- linux-2.6.3-mm4.orig/include/linux/fs.h 2004-03-08 14:46:21.391155368 +0800 ++++ linux-2.6.3-mm4/include/linux/fs.h 2004-03-08 14:56:25.775275016 +0800 +@@ -843,13 +843,20 @@ + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + int (*link) (struct dentry *,struct inode *,struct dentry *); ++ int (*link_raw) (struct nameidata *,struct nameidata *); + int (*unlink) (struct inode *,struct dentry *); ++ int (*unlink_raw) (struct nameidata *); + int (*symlink) (struct inode *,struct dentry *,const char *); ++ int (*symlink_raw) (struct nameidata *,const char *); + int (*mkdir) (struct inode *,struct dentry *,int); ++ int (*mkdir_raw) (struct nameidata *,int); + int (*rmdir) (struct inode *,struct dentry *); ++ int (*rmdir_raw) (struct nameidata *); + int (*mknod) (struct inode *,struct dentry *,int,dev_t); ++ int (*mknod_raw) (struct nameidata *,int,dev_t); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); ++ int (*rename_raw) (struct nameidata *, struct nameidata *); + int (*readlink) (struct dentry *, char __user *,int); + int (*follow_link) (struct dentry *, struct nameidata *); + void (*truncate) (struct inode *); +@@ -1133,7 +1140,7 @@ + + /* fs/open.c */ + +-extern int do_truncate(struct dentry *, loff_t start); ++extern int do_truncate(struct dentry *, loff_t start, int called_from_open); + extern struct file *filp_open(const char *, int, int); + extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); + extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); +Index: linux-2.6.3-mm4/net/unix/af_unix.c +=================================================================== +--- linux-2.6.3-mm4.orig/net/unix/af_unix.c 2004-02-26 14:22:03.000000000 +0800 ++++ linux-2.6.3-mm4/net/unix/af_unix.c 2004-03-08 14:51:27.591605824 +0800 +@@ -592,6 +592,7 @@ + int err = 0; + + if (sunname->sun_path[0]) { ++ intent_init(&nd.intent, IT_LOOKUP); + err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); + if (err) + goto fail; diff --git a/lustre/kernel_patches/patches/vfs_nointent_2.6.3-suse.patch b/lustre/kernel_patches/patches/vfs_nointent_2.6.3-suse.patch new file mode 100644 index 0000000..c72c0b9 --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_nointent_2.6.3-suse.patch @@ -0,0 +1,433 @@ + 0 files changed + +.old..........pc/vfs_nointent_2.6.0-suse/fs/namei.c +.new.........fs/namei.c +Index: linux-2.6.3-20/fs/namei.c +=================================================================== +--- linux-2.6.3-20.orig/fs/namei.c 2004-03-08 14:32:24.000000000 -0800 ++++ linux-2.6.3-20/fs/namei.c 2004-03-08 14:40:01.000000000 -0800 +@@ -1276,7 +1276,7 @@ + if (!error) { + DQUOT_INIT(inode); + +- error = do_truncate(dentry, 0); ++ error = do_truncate(dentry, 0, 1); + } + put_write_access(inode); + if (error) +@@ -1526,6 +1526,7 @@ + char * tmp; + struct dentry * dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + if (S_ISDIR(mode)) + return -EPERM; +@@ -1536,6 +1537,15 @@ + error = path_lookup(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ ++ if (nd.dentry->d_inode->i_op->mknod_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mknod_raw(&nd, mode, dev); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } ++ + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + +@@ -1562,6 +1572,7 @@ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1603,10 +1614,18 @@ + if (!IS_ERR(tmp)) { + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + error = path_lookup(tmp, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ if (nd.dentry->d_inode->i_op->mkdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->mkdir_raw(&nd, mode); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } + dentry = lookup_create(&nd, 1); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -1616,6 +1635,7 @@ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(tmp); +@@ -1696,6 +1716,7 @@ + char * name; + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + name = getname(pathname); + if(IS_ERR(name)) +@@ -1716,6 +1737,16 @@ + error = -EBUSY; + goto exit1; + } ++ ++ if (nd.dentry->d_inode->i_op->rmdir_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ error = op->rmdir_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } ++ + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); +@@ -1774,6 +1805,7 @@ + struct dentry *dentry; + struct nameidata nd; + struct inode *inode = NULL; ++ intent_init(&nd.intent, IT_LOOKUP); + + name = getname(pathname); + if(IS_ERR(name)) +@@ -1785,6 +1817,13 @@ + error = -EISDIR; + if (nd.last_type != LAST_NORM) + goto exit1; ++ if (nd.dentry->d_inode->i_op->unlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->unlink_raw(&nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit1; ++ } + down(&nd.dentry->d_inode->i_sem); + dentry = lookup_hash(&nd.last, nd.dentry); + error = PTR_ERR(dentry); +@@ -1852,10 +1891,18 @@ + if (!IS_ERR(to)) { + struct dentry *dentry; + struct nameidata nd; ++ intent_init(&nd.intent, IT_LOOKUP); + + error = path_lookup(to, LOOKUP_PARENT, &nd); + if (error) + goto out; ++ if (nd.dentry->d_inode->i_op->symlink_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->symlink_raw(&nd, from); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out2; ++ } + dentry = lookup_create(&nd, 0); + error = PTR_ERR(dentry); + if (!IS_ERR(dentry)) { +@@ -1863,6 +1910,7 @@ + dput(dentry); + } + up(&nd.dentry->d_inode->i_sem); ++out2: + path_release(&nd); + out: + putname(to); +@@ -1926,6 +1974,8 @@ + struct nameidata nd, old_nd; + int error; + char * to; ++ intent_init(&nd.intent, IT_LOOKUP); ++ intent_init(&old_nd.intent, IT_LOOKUP); + + to = getname(newname); + if (IS_ERR(to)) +@@ -1940,6 +1990,13 @@ + error = -EXDEV; + if (old_nd.mnt != nd.mnt) + goto out_release; ++ if (nd.dentry->d_inode->i_op->link_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ error = op->link_raw(&old_nd, &nd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto out_release; ++ } + new_dentry = lookup_create(&nd, 0); + error = PTR_ERR(new_dentry); + if (!IS_ERR(new_dentry)) { +@@ -1990,7 +2047,7 @@ + * locking]. + */ + int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry) + { + int error = 0; + struct inode *target; +@@ -2035,7 +2092,7 @@ + } + + int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry, +- struct inode *new_dir, struct dentry *new_dentry) ++ struct inode *new_dir, struct dentry *new_dentry) + { + struct inode *target; + int error; +@@ -2112,6 +2169,8 @@ + struct dentry * old_dentry, *new_dentry; + struct dentry * trap; + struct nameidata oldnd, newnd; ++ intent_init(&oldnd.intent, IT_LOOKUP); ++ intent_init(&newnd.intent, IT_LOOKUP); + + error = path_lookup(oldname, LOOKUP_PARENT, &oldnd); + if (error) +@@ -2134,6 +2193,13 @@ + if (newnd.last_type != LAST_NORM) + goto exit2; + ++ if (old_dir->d_inode->i_op->rename_raw) { ++ error = old_dir->d_inode->i_op->rename_raw(&oldnd, &newnd); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto exit2; ++ } ++ + trap = lock_rename(new_dir, old_dir); + + old_dentry = lookup_hash(&oldnd.last, old_dir); +@@ -2165,8 +2231,7 @@ + if (new_dentry == trap) + goto exit5; + +- error = vfs_rename(old_dir->d_inode, old_dentry, +- new_dir->d_inode, new_dentry); ++ error = vfs_rename(old_dir->d_inode, old_dentry, new_dir->d_inode, new_dentry); + exit5: + dput(new_dentry); + exit4: +.old..........pc/vfs_nointent_2.6.0-suse/fs/open.c +.new.........fs/open.c +Index: linux-2.6.3-20/fs/open.c +=================================================================== +--- linux-2.6.3-20.orig/fs/open.c 2004-03-08 14:23:40.000000000 -0800 ++++ linux-2.6.3-20/fs/open.c 2004-03-08 14:40:01.000000000 -0800 +@@ -180,9 +180,10 @@ + return error; + } + +-int do_truncate(struct dentry *dentry, loff_t length) ++int do_truncate(struct dentry *dentry, loff_t length, int called_from_open) + { + int err; ++ struct inode_operations *op = dentry->d_inode->i_op; + struct iattr newattrs; + + /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ +@@ -192,7 +193,14 @@ + newattrs.ia_size = length; + newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; + down(&dentry->d_inode->i_sem); +- err = notify_change(dentry, &newattrs); ++ if (called_from_open) ++ newattrs.ia_valid |= ATTR_FROM_OPEN; ++ if (op->setattr_raw) { ++ newattrs.ia_valid |= ATTR_RAW; ++ newattrs.ia_ctime = CURRENT_TIME; ++ err = op->setattr_raw(dentry->d_inode, &newattrs); ++ } else ++ err = notify_change(dentry, &newattrs); + up(&dentry->d_inode->i_sem); + return err; + } +@@ -247,7 +255,7 @@ + error = locks_verify_truncate(inode, NULL, length); + if (!error) { + DQUOT_INIT(inode); +- error = do_truncate(nd.dentry, length); ++ error = do_truncate(nd.dentry, length, 0); + } + put_write_access(inode); + +@@ -299,7 +307,7 @@ + + error = locks_verify_truncate(inode, file, length); + if (!error) +- error = do_truncate(dentry, length); ++ error = do_truncate(dentry, length, 0); + out_putf: + fput(file); + out: +@@ -378,9 +386,19 @@ + (error = permission(inode,MAY_WRITE,&nd)) != 0) + goto dput_and_out; + } +- down(&inode->i_sem); +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } else { ++ down(&inode->i_sem); ++ error = notify_change(nd.dentry, &newattrs); ++ up(&inode->i_sem); ++ } + dput_and_out: + path_release(&nd); + out: +@@ -431,9 +449,19 @@ + (error = permission(inode,MAY_WRITE,&nd)) != 0) + goto dput_and_out; + } +- down(&inode->i_sem); +- error = notify_change(nd.dentry, &newattrs); +- up(&inode->i_sem); ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } else { ++ down(&inode->i_sem); ++ error = notify_change(nd.dentry, &newattrs); ++ up(&inode->i_sem); ++ } + dput_and_out: + path_release(&nd); + out: +@@ -634,6 +662,18 @@ + if (IS_RDONLY(inode)) + goto dput_and_out; + ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = nd.dentry->d_inode->i_op; ++ ++ newattrs.ia_mode = mode; ++ newattrs.ia_valid = ATTR_MODE | ATTR_CTIME; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ goto dput_and_out; ++ } ++ + error = -EPERM; + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto dput_and_out; +@@ -667,6 +707,18 @@ + if (IS_RDONLY(inode)) + goto out; + error = -EPERM; ++ if (inode->i_op->setattr_raw) { ++ struct inode_operations *op = dentry->d_inode->i_op; ++ ++ newattrs.ia_uid = user; ++ newattrs.ia_gid = group; ++ newattrs.ia_valid = ATTR_UID | ATTR_GID; ++ newattrs.ia_valid |= ATTR_RAW; ++ error = op->setattr_raw(inode, &newattrs); ++ /* the file system wants to use normal vfs path now */ ++ if (error != -EOPNOTSUPP) ++ return error; ++ } + if (IS_IMMUTABLE(inode) || IS_APPEND(inode)) + goto out; + newattrs.ia_valid = ATTR_CTIME; +@@ -680,6 +732,7 @@ + } + if (!S_ISDIR(inode->i_mode)) + newattrs.ia_valid |= ATTR_KILL_SUID|ATTR_KILL_SGID; ++ + down(&inode->i_sem); + error = notify_change(dentry, &newattrs); + up(&inode->i_sem); +.old..........pc/vfs_nointent_2.6.0-suse/fs/exec.c +.new.........fs/exec.c +Index: linux-2.6.3-20/fs/exec.c +=================================================================== +--- linux-2.6.3-20.orig/fs/exec.c 2004-03-08 14:23:40.000000000 -0800 ++++ linux-2.6.3-20/fs/exec.c 2004-03-08 14:40:01.000000000 -0800 +@@ -1406,7 +1406,7 @@ + goto close_fail; + if (!file->f_op->write) + goto close_fail; +- if (do_truncate(file->f_dentry, 0) != 0) ++ if (do_truncate(file->f_dentry, 0, 0) != 0) + goto close_fail; + + retval = binfmt->core_dump(signr, regs, file); +.old..........pc/vfs_nointent_2.6.0-suse/include/linux/fs.h +.new.........include/linux/fs.h +Index: linux-2.6.3-20/include/linux/fs.h +=================================================================== +--- linux-2.6.3-20.orig/include/linux/fs.h 2004-03-08 14:23:41.000000000 -0800 ++++ linux-2.6.3-20/include/linux/fs.h 2004-03-08 14:42:10.000000000 -0800 +@@ -838,13 +838,20 @@ + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); + struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + int (*link) (struct dentry *,struct inode *,struct dentry *); ++ int (*link_raw) (struct nameidata *,struct nameidata *); + int (*unlink) (struct inode *,struct dentry *); ++ int (*unlink_raw) (struct nameidata *); + int (*symlink) (struct inode *,struct dentry *,const char *); ++ int (*symlink_raw) (struct nameidata *,const char *); + int (*mkdir) (struct inode *,struct dentry *,int); ++ int (*mkdir_raw) (struct nameidata *,int); + int (*rmdir) (struct inode *,struct dentry *); ++ int (*rmdir_raw) (struct nameidata *); + int (*mknod) (struct inode *,struct dentry *,int,dev_t); ++ int (*mknod_raw) (struct nameidata *,int,dev_t); + int (*rename) (struct inode *, struct dentry *, + struct inode *, struct dentry *); ++ int (*rename_raw) (struct nameidata *, struct nameidata *); + int (*readlink) (struct dentry *, char __user *,int); + int (*follow_link) (struct dentry *, struct nameidata *); + void (*truncate) (struct inode *); +@@ -1130,7 +1137,7 @@ + + /* fs/open.c */ + +-extern int do_truncate(struct dentry *, loff_t start); ++extern int do_truncate(struct dentry *, loff_t start, int called_from_open); + extern struct file *filp_open(const char *, int, int); + extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); + extern struct file * dentry_open_it(struct dentry *, struct vfsmount *, int, struct lookup_intent *); +.old..........pc/vfs_nointent_2.6.0-suse/net/unix/af_unix.c +.new.........net/unix/af_unix.c +Index: linux-2.6.3-20/net/unix/af_unix.c +=================================================================== +--- linux-2.6.3-20.orig/net/unix/af_unix.c 2004-03-05 02:07:04.000000000 -0800 ++++ linux-2.6.3-20/net/unix/af_unix.c 2004-03-08 14:40:01.000000000 -0800 +@@ -592,6 +592,7 @@ + int err = 0; + + if (sunname->sun_path[0]) { ++ intent_init(&nd.intent, IT_LOOKUP); + err = path_lookup(sunname->sun_path, LOOKUP_FOLLOW, &nd); + if (err) + goto fail; diff --git a/lustre/kernel_patches/series/2.6.3-mm4 b/lustre/kernel_patches/series/2.6.3-mm4 new file mode 100644 index 0000000..cc9c683 --- /dev/null +++ b/lustre/kernel_patches/series/2.6.3-mm4 @@ -0,0 +1,17 @@ +lustre_version.patch +vfs_intent-2.6.3.patch +vfs_nointent-2.6.3-mm4.patch +vfs_races_2.5.72_rev1.patch +ext3-wantedi-2.6.3.patch +ext3-san-jdike-2.5.73.patch +iopen-2.6.3-mm4.patch +export-truncate-2.5.63.patch +export_symbols-2.6.0.patch +ext3-map_inode_page-2.6.0.patch +removepage-2.6.0.patch +dev_read_only_2.6.0.patch +kernel_text_address-2.6.3.patch +ext3-init-generation-2.6.0.patch +ext3-ea-in-inode-2.6.0.patch +fs-intent-2.6.3-mm4.patch +export-2.6.3.patch diff --git a/lustre/kernel_patches/series/suse-2.6.3 b/lustre/kernel_patches/series/suse-2.6.3 new file mode 100644 index 0000000..f2c17c0 --- /dev/null +++ b/lustre/kernel_patches/series/suse-2.6.3 @@ -0,0 +1,17 @@ +lustre_version.patch +vfs_intent-2.6.3-suse.patch +vfs_nointent_2.6.3-suse.patch +vfs_races_2.5.72_rev1.patch +ext3-wantedi-2.6.3.patch +ext3-san-jdike-2.5.73.patch +nfs-cifs-intent-2.6.3-suse.patch +iopen-2.6.0-test6.patch +export-truncate-2.5.63.patch +export_symbols-2.6.0.patch +ext3-map_inode_page-2.6.0.patch +removepage-2.6.0.patch +dev_read_only_2.6.0.patch +kernel_text_address-2.6.3.patch +ext3-init-generation-2.6.0.patch +ext3-ea-in-inode-2.6.0.patch +export-2.6.3.patch diff --git a/lustre/kernel_patches/series/vanilla-2.6.3-nfs4 b/lustre/kernel_patches/series/vanilla-2.6.3-nfs4 new file mode 100644 index 0000000..9e4ff97 --- /dev/null +++ b/lustre/kernel_patches/series/vanilla-2.6.3-nfs4 @@ -0,0 +1,20 @@ +uml-patch-2.6.3-rc2-1.patch +uml-fix-2.6.3.patch +lustre_version.patch +vfs_intent-2.6.3.patch +vfs_nointent_2.6.0-uml1.patch +vfs_races_2.5.72_rev1.patch +ext3-wantedi-2.6.3.patch +ext3-san-jdike-2.5.73.patch +iopen-2.6.0-test6.patch +export-truncate-2.5.63.patch +export_symbols-2.6.0.patch +ext3-map_inode_page-2.6.0.patch +removepage-2.6.0.patch +dev_read_only_2.6.0.patch +kernel_text_address-2.6.3.patch +ext3-init-generation-2.6.0.patch +ext3-ea-in-inode-2.6.0.patch +linux-2.6.3-CITI_NFS4_ALL.patch +linux-2.6.3-nfs-intent.patch +export-2.6.3.patch diff --git a/lustre/llite/Makefile.am b/lustre/llite/Makefile.am index 2253f66..493d95c 100644 --- a/lustre/llite/Makefile.am +++ b/lustre/llite/Makefile.am @@ -9,8 +9,13 @@ MODULE = llite modulefs_DATA = llite.o EXTRA_PROGRAMS = llite -llite_SOURCES = dcache.c dir.c file.c llite_close.c llite_lib.c llite_nfs.c -llite_SOURCES += lproc_llite.c namei.c rw.c rw24.c super.c super25.c -llite_SOURCES += symlink.c sysctl.c llite_internal.h +COMMON_SRC = dcache.c dir.c file.c llite_close.c llite_lib.c llite_nfs.c rw.c \ + lproc_llite.c namei.c symlink.c sysctl.c llite_internal.h + +if LINUX25 +llite_SOURCES = $(COMMON_SRC) rw26.c super25.c +else +llite_SOURCES = $(COMMON_SRC) rw24.c super.c +endif include $(top_srcdir)/Rules diff --git a/lustre/llite/llite_lib.c b/lustre/llite/llite_lib.c index c9cf119..8ae804e 100644 --- a/lustre/llite/llite_lib.c +++ b/lustre/llite/llite_lib.c @@ -192,8 +192,12 @@ int lustre_common_fill_super(struct super_block *sb, char *mdc, char *osc) GOTO(out_root, err); } +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) +#warning "Please fix this" +#else /* bug 2805 - set VM readahead to zero */ vm_max_readahead = vm_min_readahead = 0; +#endif sb->s_root = d_alloc_root(root); RETURN(err); diff --git a/lustre/llite/rw26.c b/lustre/llite/rw26.c index 640cf05..c8c7cbb 100644 --- a/lustre/llite/rw26.c +++ b/lustre/llite/rw26.c @@ -139,7 +139,7 @@ struct address_space_operations ll_aops = { writepage: ll_writepage_26, writepages: generic_writepages, set_page_dirty: __set_page_dirty_nobuffers, - sync_page: ll_sync_page, + sync_page: NULL, prepare_write: ll_prepare_write, commit_write: ll_commit_write, removepage: ll_removepage, diff --git a/lustre/llite/super.c b/lustre/llite/super.c index 80a6938..57ceb3f 100644 --- a/lustre/llite/super.c +++ b/lustre/llite/super.c @@ -34,7 +34,6 @@ #include #include "llite_internal.h" -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) extern struct address_space_operations ll_aops; extern struct address_space_operations ll_dir_aops; @@ -141,4 +140,3 @@ MODULE_LICENSE("GPL"); module_init(init_lustre_lite); module_exit(exit_lustre_lite); -#endif diff --git a/lustre/llite/super25.c b/lustre/llite/super25.c index 233035f..ee340b9 100644 --- a/lustre/llite/super25.c +++ b/lustre/llite/super25.c @@ -34,8 +34,6 @@ #include #include "llite_internal.h" -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)) - struct super_block * ll_get_sb(struct file_system_type *fs_type, int flags, const char *devname, void * data) { @@ -170,4 +168,3 @@ MODULE_LICENSE("GPL"); module_init(init_lustre_lite); module_exit(exit_lustre_lite); -#endif diff --git a/lustre/lvfs/Makefile.am b/lustre/lvfs/Makefile.am index 1fd7dd1..1569d3b 100644 --- a/lustre/lvfs/Makefile.am +++ b/lustre/lvfs/Makefile.am @@ -3,15 +3,6 @@ # This code is issued under the GNU General Public License. # See the file COPYING in this distribution DEFS= -MODULE = lvfs - - -if EXTN -FSMOD = fsfilt_extN -else -FSMOD = fsfilt_ext3 -endif - if LIBLUSTRE noinst_LIBRARIES = liblvfs.a @@ -23,11 +14,36 @@ liblvfs_a_CFLAGS = -fPIC #endif else -modulefs_DATA = lvfs.o $(FSMOD).o fsfilt_reiserfs.o -EXTRA_PROGRAMS = lvfs $(FSMOD) fsfilt_reiserfs -lvfs_SOURCES = lvfs_common.c lvfs_linux.c fsfilt.c lvfs_internal.h +MODULE = lvfs + +if EXTN +FSMOD = fsfilt_extN +else +FSMOD = fsfilt_ext3 endif +modulefs_DATA = lvfs.o $(FSMOD).o + +EXTRA_PROGRAMS = lvfs $(FSMOD) +lvfs_SOURCES = lvfs_common.c lvfs_linux.c fsfilt.c lvfs_internal.h +if EXTN +fsfilt_extN_SOURCES = fsfilt_extN.c lvfs_internal.h +else +fsfilt_ext3_SOURCES = fsfilt_ext3.c lvfs_internal.h +endif +endif include $(top_srcdir)/Rules + +if LINUX25 +# workaround for fsfilt_ext3 +$(FSMOD).o: $(FSMOD).c + $(COMPILE) -UKBUILD_MODNAME -DKBUILD_MODNAME=$(FSMOD) -c -o $(FSMOD)_tmp.o $< + rm -f $(FSMOD)_tmp.c + $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(FSMOD)_tmp.o + $(COMPILE) -UKBUILD_MODNAME -UKBUILD_BASENAME -DKBUILD_BASENAME=$(FSMOD) \ + -c $(FSMOD)_tmp.mod.c + $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r \ + -o $(FSMOD).o $(FSMOD)_tmp.o $(FSMOD)_tmp.mod.o +endif diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 7774eb5..76a5088 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -25,6 +25,8 @@ #define DEBUG_SUBSYSTEM S_FILTER +#include +#include #include #include #include @@ -43,7 +45,6 @@ #include #include #include -#include static kmem_cache_t *fcb_cache; static atomic_t fcb_cache_count = ATOMIC_INIT(0); diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index 7f381d3..1e58081 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -65,6 +65,14 @@ int obd_memmax; # define ASSERT_KERNEL_CTXT(msg) do {} while(0) #endif +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) +#define current_ngroups current->group_info->ngroups +#define current_groups current->group_info->small_block +#else +#define current_ngroups current->ngroups +#define current_groups current->groups +#endif + /* push / pop to root of obd store */ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, struct obd_ucred *uc) @@ -89,7 +97,7 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, LASSERT(atomic_read(&new_ctx->pwd->d_count)); save->pwd = dget(current->fs->pwd); save->pwdmnt = mntget(current->fs->pwdmnt); - save->ngroups = current->ngroups; + save->ngroups = current_ngroups; LASSERT(save->pwd); LASSERT(save->pwdmnt); @@ -100,18 +108,18 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, save->ouc.ouc_fsuid = current->fsuid; save->ouc.ouc_fsgid = current->fsgid; save->ouc.ouc_cap = current->cap_effective; - save->ouc.ouc_suppgid1 = current->groups[0]; - save->ouc.ouc_suppgid2 = current->groups[1]; + save->ouc.ouc_suppgid1 = current_groups[0]; + save->ouc.ouc_suppgid2 = current_groups[1]; current->fsuid = uc->ouc_fsuid; current->fsgid = uc->ouc_fsgid; current->cap_effective = uc->ouc_cap; - current->ngroups = 0; + current_ngroups = 0; if (uc->ouc_suppgid1 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid1; + current_groups[current_ngroups++] = uc->ouc_suppgid1; if (uc->ouc_suppgid2 != -1) - current->groups[current->ngroups++] = uc->ouc_suppgid2; + current_groups[current_ngroups++] = uc->ouc_suppgid2; } set_fs(new_ctx->fs); set_fs_pwd(current->fs, new_ctx->pwdmnt, new_ctx->pwd); @@ -160,9 +168,9 @@ void pop_ctxt(struct obd_run_ctxt *saved, struct obd_run_ctxt *new_ctx, current->fsuid = saved->ouc.ouc_fsuid; current->fsgid = saved->ouc.ouc_fsgid; current->cap_effective = saved->ouc.ouc_cap; - current->ngroups = saved->ngroups; - current->groups[0] = saved->ouc.ouc_suppgid1; - current->groups[1] = saved->ouc.ouc_suppgid2; + current_ngroups = saved->ngroups; + current_groups[0] = saved->ouc.ouc_suppgid1; + current_groups[1] = saved->ouc.ouc_suppgid2; } /* diff --git a/lustre/mds/mds_fs.c b/lustre/mds/mds_fs.c index 6c69bd4..9bbb11a 100644 --- a/lustre/mds/mds_fs.c +++ b/lustre/mds/mds_fs.c @@ -370,10 +370,9 @@ int mds_fs_setup(struct obd_device *obd, struct vfsmount *mnt) int rc; ENTRY; - - /* Get rid of unneeded supplementary groups */ - current->ngroups = 0; - memset(current->groups, 0, sizeof(current->groups)); + rc = cleanup_group_info(); + if (rc) + RETURN(rc); mds->mds_vfsmnt = mnt; mds->mds_sb = mnt->mnt_root->d_inode->i_sb; diff --git a/lustre/obdclass/Makefile.am b/lustre/obdclass/Makefile.am index 9fc783b..4451952 100644 --- a/lustre/obdclass/Makefile.am +++ b/lustre/obdclass/Makefile.am @@ -37,3 +37,15 @@ lustre_build_version: endif include $(top_srcdir)/Rules + +if LINUX25 +# workaround for llog_test +llog_test.o: llog_test.c + $(COMPILE) -UKBUILD_MODNAME -DKBUILD_MODNAME=llog_test -c -o llog_test_tmp.o $< + rm -f llog_test_tmp.c + $(LINUX)/scripts/modpost $(LINUX)/vmlinux llog_test_tmp.o + $(COMPILE) -UKBUILD_MODNAME -UKBUILD_BASENAME -DKBUILD_BASENAME=llog_test \ + -c llog_test_tmp.mod.c + $(LD) -m "`$(LD) --help | awk '/supported emulations/ {print $$4}'`" -r \ + -o llog_test.o llog_test_tmp.o llog_test_tmp.mod.o +endif diff --git a/lustre/obdfilter/Makefile.am b/lustre/obdfilter/Makefile.am index 18fd5f3..afa74fd 100644 --- a/lustre/obdfilter/Makefile.am +++ b/lustre/obdfilter/Makefile.am @@ -6,7 +6,14 @@ MODULE = obdfilter modulefs_DATA = obdfilter.o EXTRA_PROGRAMS = obdfilter -obdfilter_SOURCES = filter.c filter_io.c filter_log.c filter_san.c \ - filter_io_24.c lproc_obdfilter.c filter_internal.h filter_lvb.c + +COMMON_SRC = filter.c filter_io.c filter_log.c filter_san.c lproc_obdfilter.c \ + filter_internal.h filter_lvb.c + +if LINUX25 +obdfilter_SOURCES = $(COMMON_SRC) filter_io_26.c +else +obdfilter_SOURCES = $(COMMON_SRC) filter_io_24.c +endif include $(top_srcdir)/Rules diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index 15ac8e9..a3464f3 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -29,8 +29,6 @@ #include // XXX kill me soon #include -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - #define DEBUG_SUBSYSTEM S_FILTER #include @@ -345,6 +343,3 @@ cleanup: RETURN(rc); } - -#endif - diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index 95c96e6..5ce65c7 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -97,7 +97,8 @@ static int filter_range_is_mapped(struct inode *inode, obd_size offset, int len) int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, - struct niobuf_local *res, struct obd_trans_info *oti) + struct niobuf_local *res, struct obd_trans_info *oti, + int rc) { struct obd_device *obd = exp->exp_obd; struct obd_run_ctxt saved; @@ -105,7 +106,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, struct fsfilt_objinfo fso; struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, }; struct inode *inode = NULL; - int rc = 0, i, k, cleanup_phase = 0, err; + int i, k, cleanup_phase = 0, err; unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */ int blocks_per_page; struct dio_request *dreq; @@ -115,6 +116,9 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, LASSERT(objcount == 1); LASSERT(current->journal_info == NULL); + if (rc != 0) + GOTO(cleanup, rc); + inode = res->dentry->d_inode; blocks_per_page = PAGE_SIZE >> inode->i_blkbits; LASSERT(blocks_per_page <= MAX_BLOCKS_PER_PAGE); diff --git a/lustre/ost/ost_handler.c b/lustre/ost/ost_handler.c index 3771ded..1359623 100644 --- a/lustre/ost/ost_handler.c +++ b/lustre/ost/ost_handler.c @@ -1124,9 +1124,9 @@ static int ost_setup(struct obd_device *obddev, obd_count len, void *buf) int rc; ENTRY; - /* Get rid of unneeded supplementary groups */ - current->ngroups = 0; - memset(current->groups, 0, sizeof(current->groups)); + rc = cleanup_group_info(); + if (rc) + RETURN(rc); rc = llog_start_commit_thread(); if (rc < 0) diff --git a/lustre/portals/Rules.linux b/lustre/portals/Rules.linux index 93943b7..232a248 100644 --- a/lustre/portals/Rules.linux +++ b/lustre/portals/Rules.linux @@ -3,11 +3,15 @@ if LINUX25 -basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g') +basename=$(shell echo $< | sed -e 's/\.c//g' | sed -e 's/-//g' | sed -e 's/\.o//g' | sed -e 's/^.*\///g') AM_CPPFLAGS= -Wstrict-prototypes -Wno-trigraphs -fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2 -DKBUILD_MODNAME=$(MODULE) -DKBUILD_BASENAME=$(basename) -$(MODULE).o: $($(MODULE)_OBJECTS) - $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $($(MODULE)_OBJECTS) +$(MODULE).o: $($(MODULE)_OBJECTS) $($(MODULE)_DEPENDENCIES) + $(LD) -m $(MOD_LINK) -r -o $(MODULE)_tmp.o $($(MODULE)_OBJECTS) + rm -f $(MODULE)_tmp.c + $(LINUX)/scripts/modpost $(LINUX)/vmlinux $(MODULE)_tmp.o + $(COMPILE) -UKBUILD_BASENAME -DKBUILD_BASENAME=$(MODULE) -c $(MODULE)_tmp.mod.c + $(LD) -m $(MOD_LINK) -r -o $(MODULE).o $(MODULE)_tmp.o $(MODULE)_tmp.mod.o else diff --git a/lustre/portals/archdep.m4 b/lustre/portals/archdep.m4 index 7801957..65cfaff 100644 --- a/lustre/portals/archdep.m4 +++ b/lustre/portals/archdep.m4 @@ -65,7 +65,7 @@ case ${host_cpu} in KCFLAGS='-g -Wall -pipe -Wno-trigraphs -Wstrict-prototypes -fno-strict-aliasing -fno-common ' case ${linux25} in yes ) - KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include -DKBUILD_BASENAME=$(MODULE) -DKBUILD_MODNAME=$(MODULE) ' + KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/include -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/kernel/skas/include -O2 -nostdinc -iwithprefix include' ;; * ) KCPPFLAGS='-D__KERNEL__ -U__i386__ -Ui386 -DUM_FASTCALL -D__arch_um__ -DSUBARCH="i386" -DNESTING=0 -D_LARGEFILE64_SOURCE -Derrno=kernel_errno -DPATCHLEVEL=4 -DMODULE -I$(LINUX)/arch/um/kernel/tt/include -I$(LINUX)/arch/um/include ' @@ -206,11 +206,10 @@ if test $host_cpu != "lib" ; then AC_MSG_CHECKING(for MODVERSIONS) if egrep -e 'MODVERSIONS.*1' $LINUX/include/linux/autoconf.h >/dev/null 2>&1; then - MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB" - AC_MSG_RESULT(yes) - else - MFLAGS= - AC_MSG_RESULT(no) + if test $linux25 != "yes"; then + MFLAGS="-DMODULE -DMODVERSIONS -include $LINUX/include/linux/modversions.h -DEXPORT_SYMTAB" + AC_MSG_RESULT(yes) + fi fi fi diff --git a/lustre/portals/knals/socknal/socknal.c b/lustre/portals/knals/socknal/socknal.c index 2c44b43..0dd5d11 100644 --- a/lustre/portals/knals/socknal/socknal.c +++ b/lustre/portals/knals/socknal/socknal.c @@ -196,7 +196,7 @@ ksocknal_bind_irq (unsigned int irq) /* FIXME: Find a better method of setting IRQ affinity... */ - call_usermodehelper (argv[0], argv, envp); + USERMODEHELPER(argv[0], argv, envp); #endif } diff --git a/lustre/portals/knals/socknal/socknal.h b/lustre/portals/knals/socknal/socknal.h index db8c842..17a7e49 100644 --- a/lustre/portals/knals/socknal/socknal.h +++ b/lustre/portals/knals/socknal/socknal.h @@ -60,6 +60,7 @@ #define DEBUG_SUBSYSTEM S_SOCKNAL #include +#include #include #include #include diff --git a/lustre/portals/utils/debug.c b/lustre/portals/utils/debug.c index 01e690f..69880ea 100644 --- a/lustre/portals/utils/debug.c +++ b/lustre/portals/utils/debug.c @@ -43,10 +43,10 @@ #include #include -#define BUG() /* workaround for module.h includes */ #include #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define BUG() /* workaround for module.h includes */ #include #endif @@ -524,41 +524,43 @@ int jt_dbg_mark_debug_buf(int argc, char **argv) return 0; } +static struct mod_paths { + char *name, *path; +} mod_paths[] = { + {"portals", "lustre/portals/libcfs"}, + {"ksocknal", "lustre/portals/knals/socknal"}, + {"kptlrouter", "lustre/portals/router"}, + {"lvfs", "lustre/lvfs"}, + {"obdclass", "lustre/obdclass"}, + {"llog_test", "lustre/obdclass"}, + {"ptlrpc", "lustre/ptlrpc"}, + {"obdext2", "lustre/obdext2"}, + {"ost", "lustre/ost"}, + {"osc", "lustre/osc"}, + {"mds", "lustre/mds"}, + {"mdc", "lustre/mdc"}, + {"llite", "lustre/llite"}, + {"obdecho", "lustre/obdecho"}, + {"ldlm", "lustre/ldlm"}, + {"obdfilter", "lustre/obdfilter"}, + {"extN", "lustre/extN"}, + {"lov", "lustre/lov"}, + {"fsfilt_ext3", "lustre/lvfs"}, + {"fsfilt_extN", "lustre/lvfs"}, + {"fsfilt_reiserfs", "lustre/lvfs"}, + {"mds_ext2", "lustre/mds"}, + {"mds_ext3", "lustre/mds"}, + {"mds_extN", "lustre/mds"}, + {"ptlbd", "lustre/ptlbd"}, + {"mgmt_svc", "lustre/mgmt"}, + {"mgmt_cli", "lustre/mgmt"}, + {NULL, NULL} +}; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) int jt_dbg_modules(int argc, char **argv) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - struct mod_paths { - char *name, *path; - } *mp, mod_paths[] = { - {"portals", "lustre/portals/libcfs"}, - {"ksocknal", "lustre/portals/knals/socknal"}, - {"kptlrouter", "lustre/portals/router"}, - {"lvfs", "lustre/lvfs"}, - {"obdclass", "lustre/obdclass"}, - {"llog_test", "lustre/obdclass"}, - {"ptlrpc", "lustre/ptlrpc"}, - {"obdext2", "lustre/obdext2"}, - {"ost", "lustre/ost"}, - {"osc", "lustre/osc"}, - {"mds", "lustre/mds"}, - {"mdc", "lustre/mdc"}, - {"llite", "lustre/llite"}, - {"obdecho", "lustre/obdecho"}, - {"ldlm", "lustre/ldlm"}, - {"obdfilter", "lustre/obdfilter"}, - {"extN", "lustre/extN"}, - {"lov", "lustre/lov"}, - {"fsfilt_ext3", "lustre/lvfs"}, - {"fsfilt_extN", "lustre/lvfs"}, - {"fsfilt_reiserfs", "lustre/lvfs"}, - {"mds_ext2", "lustre/mds"}, - {"mds_ext3", "lustre/mds"}, - {"mds_extN", "lustre/mds"}, - {"ptlbd", "lustre/ptlbd"}, - {"mgmt_svc", "lustre/mgmt"}, - {"mgmt_cli", "lustre/mgmt"}, - {NULL, NULL} - }; + struct mod_paths *mp; char *path = ".."; char *kernel = "linux"; @@ -592,11 +594,49 @@ int jt_dbg_modules(int argc, char **argv) } return 0; +} #else - printf("jt_dbg_module is not yet implemented for Linux 2.5\n"); +int jt_dbg_modules(int argc, char **argv) +{ + struct mod_paths *mp; + char *path = ".."; + char *kernel = "linux"; + const char *proc = "/proc/modules"; + char modname[128], others[128]; + long modaddr; + int rc; + FILE *file; + + if (argc >= 2) + path = argv[1]; + if (argc == 3) + kernel = argv[2]; + if (argc > 3) { + printf("%s [path] [kernel]\n", argv[0]); + return 0; + } + + file = fopen(proc, "r"); + if (!file) { + printf("failed open %s: %s\n", proc, strerror(errno)); + return 0; + } + + while ((rc = fscanf(file, "%s %s %s %s %s %lx\n", + modname, others, others, others, others, &modaddr)) == 6) { + for (mp = mod_paths; mp->name != NULL; mp++) { + if (!strcmp(mp->name, modname)) + break; + } + if (mp->name) { + printf("add-symbol-file %s/%s/%s.o 0x%0lx\n", path, + mp->path, mp->name, modaddr); + } + } + return 0; -#endif /* linux 2.5 */ } +#endif /* linux 2.5 */ int jt_dbg_panic(int argc, char **argv) { diff --git a/lustre/scripts/merge1.sh b/lustre/scripts/merge1.sh index d7e0069..9bdc9b5 100755 --- a/lustre/scripts/merge1.sh +++ b/lustre/scripts/merge1.sh @@ -1,7 +1,7 @@ #!/bin/sh -e CONFLICTS=cvs-merge-conflicts -CVS=cvs +CVS="cvs -z3" if [ -f .mergeinfo ] ; then echo ".mergeinfo exists - clean up first" -- 1.8.3.1