#include <linux/portals_compat25.h>
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define PGCACHE_WRLOCK(mapping) write_lock(&mapping->page_lock)
-# define PGCACHE_WRUNLOCK(mapping) write_unlock(&mapping->page_lock)
+
+/* XXX our code should be using the 2.6 calls, not the other way around */
+#define TryLockPage(page) TestSetPageLocked(page)
+#define filemap_fdatasync(mapping) filemap_fdatawrite(mapping)
+#define Page_Uptodate(page) PageUptodate(page)
#define KDEVT_INIT(val) { .value = val }
#define ll_vfs_create(a,b,c,d) vfs_create(a,b,c,d)
+#define ll_dev_t dev_t
+
+#include <linux/writeback.h>
+
#else /* 2.4.. */
#define ll_vfs_create(a,b,c,d) vfs_create(a,b,c)
#define ll_permission(a,b,c) permission(a,b)
-# define PGCACHE_WRLOCK(mapping) spin_lock(&pagecache_lock)
-# define PGCACHE_WRUNLOCK(mapping) spin_unlock(&pagecache_lock)
+
+#define ll_dev_t int
+
+static inline void clear_page_dirty(struct page *page)
+{
+ if (PageDirty(page))
+ ClearPageDirty(page);
+}
/* 2.5 uses hlists for some things, like the d_hash. we'll treat them
* as 2.5 and let macros drop back.. */
#endif /* end of 2.4 compat macros */
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define filemap_fdatasync(mapping) filemap_fdatawrite(mapping)
-#endif
-
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define TryLockPage(page) TestSetPageLocked(page)
-#endif
-
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define Page_Uptodate(page) PageUptodate(page)
-#endif
-
#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
#define rb_node_s rb_node
#define rb_root_s rb_root
--- /dev/null
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_USERMODE=y
+CONFIG_MMU=y
+CONFIG_UID16=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+
+#
+# UML-specific options
+#
+CONFIG_MODE_TT=y
+# CONFIG_MODE_SKAS is not set
+CONFIG_NET=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+CONFIG_HOSTFS=y
+# CONFIG_HPPFS is not set
+CONFIG_MCONSOLE=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_HOST_2G_2G is not set
+# CONFIG_UML_SMP is not set
+# CONFIG_SMP is not set
+CONFIG_NEST_LEVEL=0
+CONFIG_KERNEL_HALF_GIGS=1
+# CONFIG_HIGHMEM is not set
+# CONFIG_PROC_MM is not set
+CONFIG_KERNEL_STACK_ORDER=2
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# General setup
+#
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+
+#
+# Loadable module support
+#
+# CONFIG_MODULES is not set
+
+#
+# Generic Driver Options
+#
+# CONFIG_FW_LOADER is not set
+
+#
+# Character Devices
+#
+CONFIG_STDIO_CONSOLE=y
+CONFIG_SSL=y
+CONFIG_FD_CHAN=y
+# CONFIG_NULL_CHAN is not set
+CONFIG_PORT_CHAN=y
+CONFIG_PTY_CHAN=y
+CONFIG_TTY_CHAN=y
+CONFIG_XTERM_CHAN=y
+CONFIG_CON_ZERO_CHAN="fd:0,fd:1"
+CONFIG_CON_CHAN="xterm"
+CONFIG_SSL_CHAN="pty"
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=256
+# CONFIG_WATCHDOG is not set
+# CONFIG_UML_SOUND is not set
+# CONFIG_SOUND is not set
+# CONFIG_HOSTAUDIO is not set
+
+#
+# Block Devices
+#
+CONFIG_BLK_DEV_UBD=y
+# CONFIG_BLK_DEV_UBD_SYNC is not set
+CONFIG_BLK_DEV_COW_COMMON=y
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_MMAPPER is not set
+CONFIG_NETDEVICES=y
+
+#
+# UML Network Devices
+#
+CONFIG_UML_NET=y
+CONFIG_UML_NET_ETHERTAP=y
+CONFIG_UML_NET_TUNTAP=y
+CONFIG_UML_NET_SLIP=y
+CONFIG_UML_NET_DAEMON=y
+CONFIG_UML_NET_MCAST=y
+# CONFIG_UML_NET_PCAP is not set
+# CONFIG_UML_NET_SLIRP is not set
+
+#
+# Networking support
+#
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+# CONFIG_NETLINK_DEV is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_IPV6 is not set
+# CONFIG_DECNET is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_XFRM_USER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+CONFIG_IPV6_SCTP__=y
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_LLC is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+CONFIG_DUMMY=y
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=y
+# CONFIG_ETHERTAP is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+# CONFIG_NET_ETHERNET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+
+#
+# Ethernet (10000 Mbit)
+#
+CONFIG_PPP=y
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+# CONFIG_PPP_ASYNC is not set
+# CONFIG_PPP_SYNC_TTY is not set
+# CONFIG_PPP_DEFLATE is not set
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPPOE is not set
+CONFIG_SLIP=y
+# CONFIG_SLIP_COMPRESSED is not set
+# CONFIG_SLIP_SMART is not set
+# CONFIG_SLIP_MODE_SLIP6 is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Token Ring devices (depends on LLC=y)
+#
+# CONFIG_SHAPER is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_FAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_DEVFS_FS=y
+CONFIG_DEVFS_MOUNT=y
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+# CONFIG_NFS_FS is not set
+# CONFIG_NFSD is not set
+# CONFIG_EXPORTFS is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_INTERMEZZO_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Security options
+#
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC32 is not set
+
+#
+# SCSI support
+#
+# CONFIG_SCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_SLAB=y
+# CONFIG_DEBUG_SPINLOCK is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_FRAME_POINTER=y
+CONFIG_PT_PROXY=y
+# CONFIG_GPROF is not set
+# CONFIG_GCOV is not set
--- /dev/null
+ include/linux/dynlocks.h | 33 ++++++++++
+ lib/Makefile | 4 -
+ lib/dynlocks.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 187 insertions(+), 2 deletions(-)
+
+--- /dev/null 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/include/linux/dynlocks.h 2003-09-01 16:33:25.000000000 +0400
+@@ -0,0 +1,33 @@
++#ifndef _LINUX_DYNLOCKS_H
++#define _LINUX_DYNLOCKS_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++struct dynlock_member {
++ struct list_head dl_list;
++ unsigned long dl_value; /* lock value */
++ int dl_refcount; /* number of users */
++ int dl_readers;
++ int dl_writers;
++ int dl_pid; /* holder of the lock */
++ wait_queue_head_t dl_wait;
++};
++
++/*
++ * lock's namespace:
++ * - list of locks
++ * - lock to protect this list
++ */
++struct dynlock {
++ struct list_head dl_list;
++ spinlock_t dl_list_lock;
++};
++
++void dynlock_init(struct dynlock *dl);
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp);
++void dynlock_unlock(struct dynlock *dl, void *lock);
++
++
++#endif
++
+--- /dev/null 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/lib/dynlocks.c 2003-09-01 16:36:00.000000000 +0400
+@@ -0,0 +1,152 @@
++/*
++ * Dynamic Locks
++ *
++ * struct dynlock is lockspace
++ * one may request lock (exclusive or shared) for some value
++ * in that lockspace
++ *
++ */
++
++#include <linux/dynlocks.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++
++/*
++ * dynlock_init
++ *
++ * initialize lockspace
++ *
++ */
++void dynlock_init(struct dynlock *dl)
++{
++ spin_lock_init(&dl->dl_list_lock);
++ INIT_LIST_HEAD(&dl->dl_list);
++}
++
++/*
++ * dynlock_lock
++ *
++ * acquires lock (exclusive or shared) in specified lockspace
++ * each lock in lockspace is allocated separately, so user have
++ * to specify GFP flags.
++ * routine returns pointer to lock. this pointer is intended to
++ * be passed to dynlock_unlock
++ *
++ */
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp)
++{
++ struct dynlock_member *nhl = NULL;
++ struct dynlock_member *hl;
++ struct list_head *cur;
++
++repeat:
++ /* find requested lock in lockspace */
++ spin_lock(&dl->dl_list_lock);
++ list_for_each(cur, &dl->dl_list) {
++ hl = list_entry(cur, struct dynlock_member, dl_list);
++ if (hl->dl_value == value) {
++ /* lock is found */
++ if (nhl) {
++ /* someone else just allocated
++ * lock we didn't find and just created
++ * so, we drop our lock
++ */
++ kfree(nhl);
++ nhl = NULL;
++ }
++ hl->dl_refcount++;
++ goto found;
++ }
++ }
++ /* lock not found */
++ if (nhl) {
++ /* we already have allocated lock. use it */
++ hl = nhl;
++ nhl = NULL;
++ list_add(&hl->dl_list, &dl->dl_list);
++ goto found;
++ }
++ spin_unlock(&dl->dl_list_lock);
++
++ /* lock not found and we haven't allocated lock yet. allocate it */
++ nhl = kmalloc(sizeof(struct dynlock_member), gfp);
++ if (nhl == NULL)
++ return NULL;
++ nhl->dl_refcount = 1;
++ nhl->dl_value = value;
++ nhl->dl_readers = 0;
++ nhl->dl_writers = 0;
++ init_waitqueue_head(&nhl->dl_wait);
++
++ /* while lock is being allocated, someone else may allocate it
++ * and put onto to list. check this situation
++ */
++ goto repeat;
++
++found:
++ if (rw) {
++ /* exclusive lock: user don't want to share lock at all
++ * NOTE: one process may take the same lock several times
++ * this functionaly is useful for rename operations */
++ while ((hl->dl_writers && hl->dl_pid != current->pid) ||
++ hl->dl_readers) {
++ spin_unlock(&dl->dl_list_lock);
++ wait_event(hl->dl_wait,
++ hl->dl_writers == 0 && hl->dl_readers == 0);
++ spin_lock(&dl->dl_list_lock);
++ }
++ hl->dl_writers++;
++ } else {
++ /* shared lock: user do not want to share lock with writer */
++ while (hl->dl_writers) {
++ spin_unlock(&dl->dl_list_lock);
++ wait_event(hl->dl_wait, hl->dl_writers == 0);
++ spin_lock(&dl->dl_list_lock);
++ }
++ hl->dl_readers++;
++ }
++ hl->dl_pid = current->pid;
++ spin_unlock(&dl->dl_list_lock);
++
++ return hl;
++}
++
++
++/*
++ * dynlock_unlock
++ *
++ * user have to specify lockspace (dl) and pointer to lock structure
++ * returned by dynlock_lock()
++ *
++ */
++void dynlock_unlock(struct dynlock *dl, void *lock)
++{
++ struct dynlock_member *hl = lock;
++ int wakeup = 0;
++
++ spin_lock(&dl->dl_list_lock);
++ if (hl->dl_writers) {
++ hl->dl_writers--;
++ if (hl->dl_writers == 0)
++ wakeup = 1;
++ } else {
++ hl->dl_readers--;
++ if (hl->dl_readers == 0)
++ wakeup = 1;
++ }
++ if (wakeup) {
++ hl->dl_pid = 0;
++ wake_up(&hl->dl_wait);
++ }
++ if (--(hl->dl_refcount) == 0)
++ list_del(&hl->dl_list);
++ spin_unlock(&dl->dl_list_lock);
++ if (hl->dl_refcount == 0)
++ kfree(hl);
++}
++
++EXPORT_SYMBOL(dynlock_init);
++EXPORT_SYMBOL(dynlock_lock);
++EXPORT_SYMBOL(dynlock_unlock);
++
+--- linux-2.4.18/lib/Makefile~dynamic-locks-2.4.18-chaos 2003-08-29 11:57:40.000000000 +0400
++++ linux-2.4.18-alexey/lib/Makefile 2003-09-01 16:35:23.000000000 +0400
+@@ -8,9 +8,9 @@
+
+ L_TARGET := lib.a
+
+-export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o
++export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o dynlocks.o
+
+-obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o
++obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o dynlocks.o
+
+ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+ obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+
+_
--- /dev/null
+ include/linux/dynlocks.h | 33 ++++++++++
+ lib/Makefile | 4 -
+ lib/dynlocks.c | 152 +++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 187 insertions(+), 2 deletions(-)
+
+Index: linux-2.4.20-rh/include/linux/dynlocks.h
+===================================================================
+--- linux-2.4.20-rh.orig/include/linux/dynlocks.h 2003-09-04 18:25:49.000000000 +0800
++++ linux-2.4.20-rh/include/linux/dynlocks.h 2003-09-04 18:25:49.000000000 +0800
+@@ -0,0 +1,33 @@
++#ifndef _LINUX_DYNLOCKS_H
++#define _LINUX_DYNLOCKS_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++struct dynlock_member {
++ struct list_head dl_list;
++ unsigned long dl_value; /* lock value */
++ int dl_refcount; /* number of users */
++ int dl_readers;
++ int dl_writers;
++ int dl_pid; /* holder of the lock */
++ wait_queue_head_t dl_wait;
++};
++
++/*
++ * lock's namespace:
++ * - list of locks
++ * - lock to protect this list
++ */
++struct dynlock {
++ struct list_head dl_list;
++ spinlock_t dl_list_lock;
++};
++
++void dynlock_init(struct dynlock *dl);
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp);
++void dynlock_unlock(struct dynlock *dl, void *lock);
++
++
++#endif
++
+Index: linux-2.4.20-rh/lib/dynlocks.c
+===================================================================
+--- linux-2.4.20-rh.orig/lib/dynlocks.c 2003-09-04 18:25:49.000000000 +0800
++++ linux-2.4.20-rh/lib/dynlocks.c 2003-09-04 18:25:49.000000000 +0800
+@@ -0,0 +1,152 @@
++/*
++ * Dynamic Locks
++ *
++ * struct dynlock is lockspace
++ * one may request lock (exclusive or shared) for some value
++ * in that lockspace
++ *
++ */
++
++#include <linux/dynlocks.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++
++/*
++ * dynlock_init
++ *
++ * initialize lockspace
++ *
++ */
++void dynlock_init(struct dynlock *dl)
++{
++ spin_lock_init(&dl->dl_list_lock);
++ INIT_LIST_HEAD(&dl->dl_list);
++}
++
++/*
++ * dynlock_lock
++ *
++ * acquires lock (exclusive or shared) in specified lockspace
++ * each lock in lockspace is allocated separately, so user have
++ * to specify GFP flags.
++ * routine returns pointer to lock. this pointer is intended to
++ * be passed to dynlock_unlock
++ *
++ */
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp)
++{
++ struct dynlock_member *nhl = NULL;
++ struct dynlock_member *hl;
++ struct list_head *cur;
++
++repeat:
++ /* find requested lock in lockspace */
++ spin_lock(&dl->dl_list_lock);
++ list_for_each(cur, &dl->dl_list) {
++ hl = list_entry(cur, struct dynlock_member, dl_list);
++ if (hl->dl_value == value) {
++ /* lock is found */
++ if (nhl) {
++ /* someone else just allocated
++ * lock we didn't find and just created
++ * so, we drop our lock
++ */
++ kfree(nhl);
++ nhl = NULL;
++ }
++ hl->dl_refcount++;
++ goto found;
++ }
++ }
++ /* lock not found */
++ if (nhl) {
++ /* we already have allocated lock. use it */
++ hl = nhl;
++ nhl = NULL;
++ list_add(&hl->dl_list, &dl->dl_list);
++ goto found;
++ }
++ spin_unlock(&dl->dl_list_lock);
++
++ /* lock not found and we haven't allocated lock yet. allocate it */
++ nhl = kmalloc(sizeof(struct dynlock_member), gfp);
++ if (nhl == NULL)
++ return NULL;
++ nhl->dl_refcount = 1;
++ nhl->dl_value = value;
++ nhl->dl_readers = 0;
++ nhl->dl_writers = 0;
++ init_waitqueue_head(&nhl->dl_wait);
++
++ /* while lock is being allocated, someone else may allocate it
++ * and put onto to list. check this situation
++ */
++ goto repeat;
++
++found:
++ if (rw) {
++ /* exclusive lock: user don't want to share lock at all
++ * NOTE: one process may take the same lock several times
++ * this functionaly is useful for rename operations */
++ while ((hl->dl_writers && hl->dl_pid != current->pid) ||
++ hl->dl_readers) {
++ spin_unlock(&dl->dl_list_lock);
++ wait_event(hl->dl_wait,
++ hl->dl_writers == 0 && hl->dl_readers == 0);
++ spin_lock(&dl->dl_list_lock);
++ }
++ hl->dl_writers++;
++ } else {
++ /* shared lock: user do not want to share lock with writer */
++ while (hl->dl_writers) {
++ spin_unlock(&dl->dl_list_lock);
++ wait_event(hl->dl_wait, hl->dl_writers == 0);
++ spin_lock(&dl->dl_list_lock);
++ }
++ hl->dl_readers++;
++ }
++ hl->dl_pid = current->pid;
++ spin_unlock(&dl->dl_list_lock);
++
++ return hl;
++}
++
++
++/*
++ * dynlock_unlock
++ *
++ * user have to specify lockspace (dl) and pointer to lock structure
++ * returned by dynlock_lock()
++ *
++ */
++void dynlock_unlock(struct dynlock *dl, void *lock)
++{
++ struct dynlock_member *hl = lock;
++ int wakeup = 0;
++
++ spin_lock(&dl->dl_list_lock);
++ if (hl->dl_writers) {
++ hl->dl_writers--;
++ if (hl->dl_writers == 0)
++ wakeup = 1;
++ } else {
++ hl->dl_readers--;
++ if (hl->dl_readers == 0)
++ wakeup = 1;
++ }
++ if (wakeup) {
++ hl->dl_pid = 0;
++ wake_up(&hl->dl_wait);
++ }
++ if (--(hl->dl_refcount) == 0)
++ list_del(&hl->dl_list);
++ spin_unlock(&dl->dl_list_lock);
++ if (hl->dl_refcount == 0)
++ kfree(hl);
++}
++
++EXPORT_SYMBOL(dynlock_init);
++EXPORT_SYMBOL(dynlock_lock);
++EXPORT_SYMBOL(dynlock_unlock);
++
+Index: linux-2.4.20-rh/lib/Makefile
+===================================================================
+--- linux-2.4.20-rh.orig/lib/Makefile 2002-11-29 07:53:15.000000000 +0800
++++ linux-2.4.20-rh/lib/Makefile 2003-09-04 18:27:26.000000000 +0800
+@@ -8,10 +8,10 @@
+
+ L_TARGET := lib.a
+
+-export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o
++export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o dynlocks.o
+
+ obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \
+- bust_spinlocks.o rbtree.o dump_stack.o
++ bust_spinlocks.o rbtree.o dump_stack.o dynlocks.o
+
+ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+ obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
--- /dev/null
+ include/linux/ext3_fs.h | 1 +
+ 1 files changed, 1 insertion(+)
+
+--- linux-2.4.18/include/linux/ext3_fs.h~ext-2.4-patch-5 2003-08-29 16:53:18.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs.h 2003-09-01 11:50:37.000000000 +0400
+@@ -344,6 +344,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_WRITEBACK_DATA 0x0C00 /* No data ordering */
+ #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
+ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
++#define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+
+_
--- /dev/null
+--- ./fs/ext3/balloc.c.orig Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/balloc.c Tue May 7 15:35:59 2002
+@@ -46,18 +46,18 @@ struct ext3_group_desc * ext3_get_group_
+ unsigned long desc;
+ struct ext3_group_desc * gdp;
+
+- if (block_group >= sb->u.ext3_sb.s_groups_count) {
++ if (block_group >= EXT3_SB(sb)->s_groups_count) {
+ ext3_error (sb, "ext3_get_group_desc",
+ "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+- block_group, sb->u.ext3_sb.s_groups_count);
++ block_group, EXT3_SB(sb)->s_groups_count);
+
+ return NULL;
+ }
+
+ group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
+ desc = block_group % EXT3_DESC_PER_BLOCK(sb);
+- if (!sb->u.ext3_sb.s_group_desc[group_desc]) {
++ if (!EXT3_SB(sb)->s_group_desc[group_desc]) {
+ ext3_error (sb, "ext3_get_group_desc",
+ "Group descriptor not loaded - "
+ "block_group = %d, group_desc = %lu, desc = %lu",
+@@ -66,9 +66,9 @@ struct ext3_group_desc * ext3_get_group_
+ }
+
+ gdp = (struct ext3_group_desc *)
+- sb->u.ext3_sb.s_group_desc[group_desc]->b_data;
++ EXT3_SB(sb)->s_group_desc[group_desc]->b_data;
+ if (bh)
+- *bh = sb->u.ext3_sb.s_group_desc[group_desc];
++ *bh = EXT3_SB(sb)->s_group_desc[group_desc];
+ return gdp + desc;
+ }
+
+@@ -104,8 +104,8 @@ static int read_block_bitmap (struct sup
+ * this group. The IO will be retried next time.
+ */
+ error_out:
+- sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group;
+- sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh;
++ EXT3_SB(sb)->s_block_bitmap_number[bitmap_nr] = block_group;
++ EXT3_SB(sb)->s_block_bitmap[bitmap_nr] = bh;
+ return retval;
+ }
+
+@@ -128,16 +128,17 @@ static int __load_block_bitmap (struct s
+ int i, j, retval = 0;
+ unsigned long block_bitmap_number;
+ struct buffer_head * block_bitmap;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
+
+- if (block_group >= sb->u.ext3_sb.s_groups_count)
++ if (block_group >= sbi->s_groups_count)
+ ext3_panic (sb, "load_block_bitmap",
+ "block_group >= groups_count - "
+ "block_group = %d, groups_count = %lu",
+- block_group, sb->u.ext3_sb.s_groups_count);
++ block_group, EXT3_SB(sb)->s_groups_count);
+
+- if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) {
+- if (sb->u.ext3_sb.s_block_bitmap[block_group]) {
+- if (sb->u.ext3_sb.s_block_bitmap_number[block_group] ==
++ if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED) {
++ if (sbi->s_block_bitmap[block_group]) {
++ if (sbi->s_block_bitmap_number[block_group] ==
+ block_group)
+ return block_group;
+ ext3_error (sb, "__load_block_bitmap",
+@@ -149,21 +150,20 @@ static int __load_block_bitmap (struct s
+ return block_group;
+ }
+
+- for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
+- sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++)
++ for (i = 0; i < sbi->s_loaded_block_bitmaps &&
++ sbi->s_block_bitmap_number[i] != block_group; i++)
+ ;
+- if (i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
+- sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) {
+- block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i];
+- block_bitmap = sb->u.ext3_sb.s_block_bitmap[i];
++ if (i < sbi->s_loaded_block_bitmaps &&
++ sbi->s_block_bitmap_number[i] == block_group) {
++ block_bitmap_number = sbi->s_block_bitmap_number[i];
++ block_bitmap = sbi->s_block_bitmap[i];
+ for (j = i; j > 0; j--) {
+- sb->u.ext3_sb.s_block_bitmap_number[j] =
+- sb->u.ext3_sb.s_block_bitmap_number[j - 1];
+- sb->u.ext3_sb.s_block_bitmap[j] =
+- sb->u.ext3_sb.s_block_bitmap[j - 1];
++ sbi->s_block_bitmap_number[j] =
++ sbi->s_block_bitmap_number[j - 1];
++ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1];
+ }
+- sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number;
+- sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap;
++ sbi->s_block_bitmap_number[0] = block_bitmap_number;
++ sbi->s_block_bitmap[0] = block_bitmap;
+
+ /*
+ * There's still one special case here --- if block_bitmap == 0
+@@ -173,17 +173,14 @@ static int __load_block_bitmap (struct s
+ if (!block_bitmap)
+ retval = read_block_bitmap (sb, block_group, 0);
+ } else {
+- if (sb->u.ext3_sb.s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
+- sb->u.ext3_sb.s_loaded_block_bitmaps++;
++ if (sbi->s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
++ sbi->s_loaded_block_bitmaps++;
+ else
+- brelse (sb->u.ext3_sb.s_block_bitmap
+- [EXT3_MAX_GROUP_LOADED - 1]);
+- for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1;
+- j > 0; j--) {
+- sb->u.ext3_sb.s_block_bitmap_number[j] =
+- sb->u.ext3_sb.s_block_bitmap_number[j - 1];
+- sb->u.ext3_sb.s_block_bitmap[j] =
+- sb->u.ext3_sb.s_block_bitmap[j - 1];
++ brelse(sbi->s_block_bitmap[EXT3_MAX_GROUP_LOADED - 1]);
++ for (j = sbi->s_loaded_block_bitmaps - 1; j > 0; j--) {
++ sbi->s_block_bitmap_number[j] =
++ sbi->s_block_bitmap_number[j - 1];
++ sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1];
+ }
+ retval = read_block_bitmap (sb, block_group, 0);
+ }
+@@ -206,24 +203,25 @@ static int __load_block_bitmap (struct s
+ static inline int load_block_bitmap (struct super_block * sb,
+ unsigned int block_group)
+ {
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ int slot;
+-
++
+ /*
+ * Do the lookup for the slot. First of all, check if we're asking
+ * for the same slot as last time, and did we succeed that last time?
+ */
+- if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 &&
+- sb->u.ext3_sb.s_block_bitmap_number[0] == block_group &&
+- sb->u.ext3_sb.s_block_bitmap[0]) {
++ if (sbi->s_loaded_block_bitmaps > 0 &&
++ sbi->s_block_bitmap_number[0] == block_group &&
++ sbi->s_block_bitmap[0]) {
+ return 0;
+ }
+ /*
+ * Or can we do a fast lookup based on a loaded group on a filesystem
+ * small enough to be mapped directly into the superblock?
+ */
+- else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED &&
+- sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group
+- && sb->u.ext3_sb.s_block_bitmap[block_group]) {
++ else if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED &&
++ sbi->s_block_bitmap_number[block_group] == block_group
++ && sbi->s_block_bitmap[block_group]) {
+ slot = block_group;
+ }
+ /*
+@@ -243,7 +241,7 @@ static inline int load_block_bitmap (str
+ * If it's a valid slot, we may still have cached a previous IO error,
+ * in which case the bh in the superblock cache will be zero.
+ */
+- if (!sb->u.ext3_sb.s_block_bitmap[slot])
++ if (!sbi->s_block_bitmap[slot])
+ return -EIO;
+
+ /*
+@@ -275,7 +273,7 @@ void ext3_free_blocks (handle_t *handle,
+ return;
+ }
+ lock_super (sb);
+- es = sb->u.ext3_sb.s_es;
++ es = EXT3_SB(sb)->s_es;
+ if (block < le32_to_cpu(es->s_first_data_block) ||
+ block + count < block ||
+ (block + count) > le32_to_cpu(es->s_blocks_count)) {
+@@ -304,7 +302,7 @@ do_more:
+ if (bitmap_nr < 0)
+ goto error_return;
+
+- bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
++ bitmap_bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
+ gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
+ if (!gdp)
+ goto error_return;
+@@ -330,8 +328,8 @@ do_more:
+ if (err)
+ goto error_return;
+
+- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
+- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+ if (err)
+ goto error_return;
+
+@@ -341,7 +339,7 @@
+ if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
+ block == le32_to_cpu(gdp->bg_inode_bitmap) ||
+ in_range(block, le32_to_cpu(gdp->bg_inode_table),
+- sb->u.ext2_sb.s_itb_per_group)) {
++ EXT3_SB(sb)->s_itb_per_group)) {
+ ext3_error(sb, __FUNCTION__,
+ "Freeing block in system zone - block = %lu",
+ block);
+@@ -410,8 +407,8 @@ do_more:
+ if (!err) err = ret;
+
+ /* And the superblock */
+- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock");
+- ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock");
++ ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ if (!err) err = ret;
+
+ if (overflow && !err) {
+@@ -564,12 +560,12 @@ int ext3_new_block (handle_t *handle, st
+ }
+
+ lock_super (sb);
+- es = sb->u.ext3_sb.s_es;
++ es = EXT3_SB(sb)->s_es;
+ if (le32_to_cpu(es->s_free_blocks_count) <=
+ le32_to_cpu(es->s_r_blocks_count) &&
+- ((sb->u.ext3_sb.s_resuid != current->fsuid) &&
+- (sb->u.ext3_sb.s_resgid == 0 ||
+- !in_group_p (sb->u.ext3_sb.s_resgid)) &&
++ ((EXT3_SB(sb)->s_resuid != current->fsuid) &&
++ (EXT3_SB(sb)->s_resgid == 0 ||
++ !in_group_p (EXT3_SB(sb)->s_resgid)) &&
+ !capable(CAP_SYS_RESOURCE)))
+ goto out;
+
+@@ -598,7 +595,7 @@ int ext3_new_block (handle_t *handle, st
+ if (bitmap_nr < 0)
+ goto io_error;
+
+- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
++ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
+
+ ext3_debug ("goal is at %d:%d.\n", i, j);
+
+@@ -621,9 +618,9 @@ int ext3_new_block (handle_t *handle, st
+ * Now search the rest of the groups. We assume that
+ * i and gdp correctly point to the last group visited.
+ */
+- for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) {
++ for (k = 0; k < EXT3_SB(sb)->s_groups_count; k++) {
+ i++;
+- if (i >= sb->u.ext3_sb.s_groups_count)
++ if (i >= EXT3_SB(sb)->s_groups_count)
+ i = 0;
+ gdp = ext3_get_group_desc (sb, i, &bh2);
+ if (!gdp) {
+@@ -635,7 +632,7 @@ int ext3_new_block (handle_t *handle, st
+ if (bitmap_nr < 0)
+ goto io_error;
+
+- bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
++ bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
+ j = find_next_usable_block(-1, bh,
+ EXT3_BLOCKS_PER_GROUP(sb));
+ if (j >= 0)
+@@ -674,8 +671,8 @@ got_block:
+ fatal = ext3_journal_get_write_access(handle, bh2);
+ if (fatal) goto out;
+
+- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
+- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
++ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+ if (fatal) goto out;
+
+ tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb)
+@@ -796,7 +804,7 @@ got_block:
+ if (!fatal) fatal = err;
+
+ BUFFER_TRACE(bh, "journal_dirty_metadata for superblock");
+- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ if (!fatal) fatal = err;
+
+ sb->s_dirt = 1;
+@@ -829,11 +837,11 @@ unsigned long ext3_count_free_blocks (st
+ int i;
+
+ lock_super (sb);
+- es = sb->u.ext3_sb.s_es;
++ es = EXT3_SB(sb)->s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ gdp = NULL;
+- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+ gdp = ext3_get_group_desc (sb, i, NULL);
+ if (!gdp)
+ continue;
+@@ -842,7 +850,7 @@ unsigned long ext3_count_free_blocks (st
+ if (bitmap_nr < 0)
+ continue;
+
+- x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr],
++ x = ext3_count_free (EXT3_SB(sb)->s_block_bitmap[bitmap_nr],
+ sb->s_blocksize);
+ printk ("group %d: stored = %d, counted = %lu\n",
+ i, le16_to_cpu(gdp->bg_free_blocks_count), x);
+@@ -853,7 +861,7 @@ unsigned long ext3_count_free_blocks (st
+ unlock_super (sb);
+ return bitmap_count;
+ #else
+- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
++ return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count);
+ #endif
+ }
+
+@@ -862,7 +870,7 @@ static inline int block_in_use (unsigned
+ unsigned char * map)
+ {
+ return ext3_test_bit ((block -
+- le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) %
++ le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
+ EXT3_BLOCKS_PER_GROUP(sb), map);
+ }
+
+@@ -930,11 +938,11 @@ void ext3_check_blocks_bitmap (struct su
+ struct ext3_group_desc * gdp;
+ int i;
+
+- es = sb->u.ext3_sb.s_es;
++ es = EXT3_SB(sb)->s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ gdp = NULL;
+- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
++ for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+ gdp = ext3_get_group_desc (sb, i, NULL);
+ if (!gdp)
+ continue;
+@@ -968,7 +976,7 @@ void ext3_check_blocks_bitmap (struct su
+ "Inode bitmap for group %d is marked free",
+ i);
+
+- for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++)
++ for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++)
+ if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
+ sb, bh->b_data))
+ ext3_error (sb, "ext3_check_blocks_bitmap",
+--- ./fs/ext3/dir.c.orig Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/dir.c Tue May 7 14:54:13 2002
+@@ -52,7 +52,7 @@ int ext3_check_dir_entry (const char * f
+ else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+ error_msg = "directory entry across blocks";
+ else if (le32_to_cpu(de->inode) >
+- le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
++ le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
+ error_msg = "inode out of bounds";
+
+ if (error_msg != NULL)
+--- ./fs/ext3/ialloc.c.orig Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/ialloc.c Tue May 7 15:39:26 2002
+@@ -73,8 +73,8 @@ static int read_inode_bitmap (struct sup
+ * this group. The IO will be retried next time.
+ */
+ error_out:
+- sb->u.ext3_sb.s_inode_bitmap_number[bitmap_nr] = block_group;
+- sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh;
++ EXT3_SB(sb)->s_inode_bitmap_number[bitmap_nr] = block_group;
++ EXT3_SB(sb)->s_inode_bitmap[bitmap_nr] = bh;
+ return retval;
+ }
+
+@@ -225,7 +225,7 @@ void ext3_free_inode (handle_t *handle,
+ clear_inode (inode);
+
+ lock_super (sb);
+- es = sb->u.ext3_sb.s_es;
++ es = EXT3_SB(sb)->s_es;
+ if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+ ext3_error (sb, "ext3_free_inode",
+ "reserved or nonexistent inode %lu", ino);
+@@ -237,7 +237,7 @@ void ext3_free_inode (handle_t *handle,
+ if (bitmap_nr < 0)
+ goto error_return;
+
+- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
++ bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr];
+
+ BUFFER_TRACE(bh, "get_write_access");
+ fatal = ext3_journal_get_write_access(handle, bh);
+@@ -255,8 +255,8 @@ void ext3_free_inode (handle_t *handle,
+ fatal = ext3_journal_get_write_access(handle, bh2);
+ if (fatal) goto error_return;
+
+- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access");
+- fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access");
++ fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+ if (fatal) goto error_return;
+
+ if (gdp) {
+@@ -271,9 +271,9 @@ void ext3_free_inode (handle_t *handle,
+ if (!fatal) fatal = err;
+ es->s_free_inodes_count =
+ cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
+- BUFFER_TRACE(sb->u.ext3_sb.s_sbh,
++ BUFFER_TRACE(EXT3_SB(sb)->s_sbh,
+ "call ext3_journal_dirty_metadata");
+- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ if (!fatal) fatal = err;
+ }
+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+@@ -305,6 +305,8 @@ struct inode * ext3_new_inode (handle_t
+ int i, j, avefreei;
+ struct inode * inode;
+ int bitmap_nr;
++ struct ext3_inode_info *ei;
++ struct ext3_sb_info *sbi;
+ struct ext3_group_desc * gdp;
+ struct ext3_group_desc * tmp;
+ struct ext3_super_block * es;
+@@ -318,7 +320,9 @@ struct inode * ext3_new_inode (handle_t
+ inode = new_inode(sb);
+ if (!inode)
+ return ERR_PTR(-ENOMEM);
+- init_rwsem(&inode->u.ext3_i.truncate_sem);
++ sbi = EXT3_SB(sb);
++ ei = EXT3_I(inode);
++ init_rwsem(&ei->truncate_sem);
+
+ lock_super (sb);
+ es = sb->u.ext3_sb.s_es;
+@@ -328,9 +332,9 @@ struct inode * ext3_new_inode (handle_t
+
+ if (S_ISDIR(mode)) {
+ avefreei = le32_to_cpu(es->s_free_inodes_count) /
+- sb->u.ext3_sb.s_groups_count;
++ sbi->s_groups_count;
+ if (!gdp) {
+- for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
++ for (j = 0; j < sbi->s_groups_count; j++) {
+ struct buffer_head *temp_buffer;
+ tmp = ext3_get_group_desc (sb, j, &temp_buffer);
+ if (tmp &&
+@@ -350,7 +354,7 @@ repeat:
+ /*
+ * Try to place the inode in its parent directory
+ */
+- i = dir->u.ext3_i.i_block_group;
++ i = EXT3_I(dir)->i_block_group;
+ tmp = ext3_get_group_desc (sb, i, &bh2);
+ if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
+ gdp = tmp;
+@@ -360,10 +364,10 @@ repeat:
+ * Use a quadratic hash to find a group with a
+ * free inode
+ */
+- for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
++ for (j = 1; j < sbi->s_groups_count; j <<= 1) {
+ i += j;
+- if (i >= sb->u.ext3_sb.s_groups_count)
+- i -= sb->u.ext3_sb.s_groups_count;
++ if (i >= sbi->s_groups_count)
++ i -= sbi->s_groups_count;
+ tmp = ext3_get_group_desc (sb, i, &bh2);
+ if (tmp &&
+ le16_to_cpu(tmp->bg_free_inodes_count)) {
+@@ -376,9 +380,9 @@ repeat:
+ /*
+ * That failed: try linear search for a free inode
+ */
+- i = dir->u.ext3_i.i_block_group + 1;
+- for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
+- if (++i >= sb->u.ext3_sb.s_groups_count)
++ i = EXT3_I(dir)->i_block_group + 1;
++ for (j = 2; j < sbi->s_groups_count; j++) {
++ if (++i >= sbi->s_groups_count)
+ i = 0;
+ tmp = ext3_get_group_desc (sb, i, &bh2);
+ if (tmp &&
+@@ -399,11 +403,11 @@ repeat:
+ if (bitmap_nr < 0)
+ goto fail;
+
+- bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
++ bh = sbi->s_inode_bitmap[bitmap_nr];
+
+ if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data,
+- EXT3_INODES_PER_GROUP(sb))) <
+- EXT3_INODES_PER_GROUP(sb)) {
++ sbi->s_inodes_per_group)) <
++ sbi->s_inodes_per_group) {
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, bh);
+ if (err) goto fail;
+@@ -457,13 +461,13 @@ repeat:
+ err = ext3_journal_dirty_metadata(handle, bh2);
+ if (err) goto fail;
+
+- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
+- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++ BUFFER_TRACE(sbi->s_sbh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, sbi->s_sbh);
+ if (err) goto fail;
+ es->s_free_inodes_count =
+ cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
+- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata");
+- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++ BUFFER_TRACE(sbi->s_sbh, "call ext3_journal_dirty_metadata");
++ err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+ sb->s_dirt = 1;
+ if (err) goto fail;
+
+@@ -483,31 +487,31 @@ repeat:
+ inode->i_blksize = PAGE_SIZE;
+ inode->i_blocks = 0;
+ inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+- inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL;
++ ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
+ if (S_ISLNK(mode))
+- inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
++ ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
+ #ifdef EXT3_FRAGMENTS
+- inode->u.ext3_i.i_faddr = 0;
+- inode->u.ext3_i.i_frag_no = 0;
+- inode->u.ext3_i.i_frag_size = 0;
++ ei->i_faddr = 0;
++ ei->i_frag_no = 0;
++ ei->i_frag_size = 0;
+ #endif
+- inode->u.ext3_i.i_file_acl = 0;
+- inode->u.ext3_i.i_dir_acl = 0;
+- inode->u.ext3_i.i_dtime = 0;
+- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
++ ei->i_file_acl = 0;
++ ei->i_dir_acl = 0;
++ ei->i_dtime = 0;
++ INIT_LIST_HEAD(&ei->i_orphan);
+ #ifdef EXT3_PREALLOCATE
+- inode->u.ext3_i.i_prealloc_count = 0;
++ ei->i_prealloc_count = 0;
+ #endif
+- inode->u.ext3_i.i_block_group = i;
++ ei->i_block_group = i;
+
+- if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL)
++ if (ei->i_flags & EXT3_SYNC_FL)
+ inode->i_flags |= S_SYNC;
+ if (IS_SYNC(inode))
+ handle->h_sync = 1;
+ insert_inode_hash(inode);
+- inode->i_generation = sb->u.ext3_sb.s_next_generation++;
++ inode->i_generation = sbi->s_next_generation++;
+
+- inode->u.ext3_i.i_state = EXT3_STATE_NEW;
++ ei->i_state = EXT3_STATE_NEW;
+ err = ext3_mark_inode_dirty(handle, inode);
+ if (err) goto fail;
+
+@@ -585,19 +589,19 @@ struct inode *ext3_orphan_get (struct su
+
+ unsigned long ext3_count_free_inodes (struct super_block * sb)
+ {
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct ext3_super_block *es = sbi->s_es;
+ #ifdef EXT3FS_DEBUG
+- struct ext3_super_block * es;
+ unsigned long desc_count, bitmap_count, x;
+ int bitmap_nr;
+ struct ext3_group_desc * gdp;
+ int i;
+
+ lock_super (sb);
+- es = sb->u.ext3_sb.s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ gdp = NULL;
+- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
++ for (i = 0; i < sbi->s_groups_count; i++) {
+ gdp = ext3_get_group_desc (sb, i, NULL);
+ if (!gdp)
+ continue;
+@@ -606,8 +610,8 @@ unsigned long ext3_count_free_inodes (st
+ if (bitmap_nr < 0)
+ continue;
+
+- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
+- EXT3_INODES_PER_GROUP(sb) / 8);
++ x = ext3_count_free(sbi->s_inode_bitmap[bitmap_nr],
++ sbi->s_inodes_per_group / 8);
+ printk ("group %d: stored = %d, counted = %lu\n",
+ i, le16_to_cpu(gdp->bg_free_inodes_count), x);
+ bitmap_count += x;
+@@ -617,7 +621,7 @@ unsigned long ext3_count_free_inodes (st
+ unlock_super (sb);
+ return desc_count;
+ #else
+- return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count);
++ return le32_to_cpu(es->s_free_inodes_count);
+ #endif
+ }
+
+@@ -626,16 +630,18 @@ unsigned long ext3_count_free_inodes (st
+ void ext3_check_inodes_bitmap (struct super_block * sb)
+ {
+ struct ext3_super_block * es;
++ struct ext3_sb_info *sbi;
+ unsigned long desc_count, bitmap_count, x;
+ int bitmap_nr;
+ struct ext3_group_desc * gdp;
+ int i;
+
+- es = sb->u.ext3_sb.s_es;
++ sbi = EXT3_SB(sb);
++ es = sbi->s_es;
+ desc_count = 0;
+ bitmap_count = 0;
+ gdp = NULL;
+- for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
++ for (i = 0; i < sbi->s_groups_count; i++) {
+ gdp = ext3_get_group_desc (sb, i, NULL);
+ if (!gdp)
+ continue;
+@@ -644,7 +650,7 @@ void ext3_check_inodes_bitmap (struct su
+ if (bitmap_nr < 0)
+ continue;
+
+- x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
++ x = ext3_count_free (sbi->s_inode_bitmap[bitmap_nr],
+ EXT3_INODES_PER_GROUP(sb) / 8);
+ if (le16_to_cpu(gdp->bg_free_inodes_count) != x)
+ ext3_error (sb, "ext3_check_inodes_bitmap",
+--- ./fs/ext3/inode.c.orig Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/inode.c Tue May 7 15:41:23 2002
+@@ -196,7 +196,7 @@ void ext3_delete_inode (struct inode * i
+ * (Well, we could do this if we need to, but heck - it works)
+ */
+ ext3_orphan_del(handle, inode);
+- inode->u.ext3_i.i_dtime = CURRENT_TIME;
++ EXT3_I(inode)->i_dtime = CURRENT_TIME;
+
+ /*
+ * One subtle ordering requirement: if anything has gone wrong
+@@ -220,13 +220,14 @@ no_delete:
+ void ext3_discard_prealloc (struct inode * inode)
+ {
+ #ifdef EXT3_PREALLOCATE
++ struct ext3_inode_info *ei = EXT3_I(inode);
+ lock_kernel();
+ /* Writer: ->i_prealloc* */
+- if (inode->u.ext3_i.i_prealloc_count) {
+- unsigned short total = inode->u.ext3_i.i_prealloc_count;
+- unsigned long block = inode->u.ext3_i.i_prealloc_block;
+- inode->u.ext3_i.i_prealloc_count = 0;
+- inode->u.ext3_i.i_prealloc_block = 0;
++ if (ei->i_prealloc_count) {
++ unsigned short total = ei->i_prealloc_count;
++ unsigned long block = ei->i_prealloc_block;
++ ei->i_prealloc_count = 0;
++ ei->i_prealloc_block = 0;
+ /* Writer: end */
+ ext3_free_blocks (inode, block, total);
+ }
+@@ -243,13 +244,15 @@ static int ext3_alloc_block (handle_t *h
+ unsigned long result;
+
+ #ifdef EXT3_PREALLOCATE
++ struct ext3_inode_info *ei = EXT3_I(inode);
++
+ /* Writer: ->i_prealloc* */
+- if (inode->u.ext3_i.i_prealloc_count &&
+- (goal == inode->u.ext3_i.i_prealloc_block ||
+- goal + 1 == inode->u.ext3_i.i_prealloc_block))
++ if (ei->i_prealloc_count &&
++ (goal == ei->i_prealloc_block ||
++ goal + 1 == ei->i_prealloc_block))
+ {
+- result = inode->u.ext3_i.i_prealloc_block++;
+- inode->u.ext3_i.i_prealloc_count--;
++ result = ei->i_prealloc_block++;
++ ei->i_prealloc_count--;
+ /* Writer: end */
+ ext3_debug ("preallocation hit (%lu/%lu).\n",
+ ++alloc_hits, ++alloc_attempts);
+@@ -259,8 +262,8 @@ static int ext3_alloc_block (handle_t *h
+ alloc_hits, ++alloc_attempts);
+ if (S_ISREG(inode->i_mode))
+ result = ext3_new_block (inode, goal,
+- &inode->u.ext3_i.i_prealloc_count,
+- &inode->u.ext3_i.i_prealloc_block, err);
++ &ei->i_prealloc_count,
++ &ei->i_prealloc_block, err);
+ else
+ result = ext3_new_block (inode, goal, 0, 0, err);
+ /*
+@@ -394,7 +397,7 @@ static Indirect *ext3_get_branch(struct
+
+ *err = 0;
+ /* i_data is not going away, no lock needed */
+- add_chain (chain, NULL, inode->u.ext3_i.i_data + *offsets);
++ add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
+ if (!p->key)
+ goto no_block;
+ while (--depth) {
+@@ -437,7 +440,8 @@ no_block:
+
+ static inline unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
+ {
+- u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext3_i.i_data;
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data;
+ u32 *p;
+
+ /* Try to find previous block */
+@@ -453,9 +456,8 @@ static inline unsigned long ext3_find_ne
+ * It is going to be refered from inode itself? OK, just put it into
+ * the same cylinder group then.
+ */
+- return (inode->u.ext3_i.i_block_group *
+- EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
+- le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block);
++ return (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
+ }
+
+ /**
+@@ -474,14 +477,15 @@
+ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
+ Indirect *partial, unsigned long *goal)
+ {
++ struct ext3_inode_info *ei = EXT3_I(inode);
+ /* Writer: ->i_next_alloc* */
+- if (block == inode->u.ext3_i.i_next_alloc_block + 1) {
+- inode->u.ext3_i.i_next_alloc_block++;
+- inode->u.ext3_i.i_next_alloc_goal++;
++ if (block == ei->i_next_alloc_block + 1) {
++ ei->i_next_alloc_block++;
++ ei->i_next_alloc_goal++;
+ }
+ #ifdef SEARCH_FROM_ZERO
+- inode->u.ext3_i.i_next_alloc_block = 0;
+- inode->u.ext3_i.i_next_alloc_goal = 0;
++ ei->i_next_alloc_block = 0;
++ ei->i_next_alloc_goal = 0;
+ #endif
+ /* Writer: end */
+ /* Reader: pointers, ->i_next_alloc* */
+@@ -490,8 +493,8 @@ static int ext3_find_goal(struct inode *
+ * try the heuristic for sequential allocation,
+ * failing that at least try to get decent locality.
+ */
+- if (block == inode->u.ext3_i.i_next_alloc_block)
+- *goal = inode->u.ext3_i.i_next_alloc_goal;
++ if (block == ei->i_next_alloc_block)
++ *goal = ei->i_next_alloc_goal;
+ if (!*goal)
+ *goal = ext3_find_near(inode, partial);
+ #ifdef SEARCH_FROM_ZERO
+@@ -619,6 +621,7 @@
+ {
+ int i;
+ int err = 0;
++ struct ext3_inode_info *ei = EXT3_I(inode);
+
+ /*
+ * If we're splicing into a [td]indirect block (as opposed to the
+@@ -641,11 +644,11 @@ static int ext3_splice_branch(handle_t *
+ /* That's it */
+
+ *where->p = where->key;
+- inode->u.ext3_i.i_next_alloc_block = block;
+- inode->u.ext3_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key);
++ ei->i_next_alloc_block = block;
++ ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key);
+ #ifdef SEARCH_FROM_ZERO
+- inode->u.ext3_i.i_next_alloc_block = 0;
+- inode->u.ext3_i.i_next_alloc_goal = 0;
++ ei->i_next_alloc_block = 0;
++ ei->i_next_alloc_goal = 0;
+ #endif
+ /* Writer: end */
+
+@@ -729,6 +732,7 @@
+ unsigned long goal;
+ int left;
+ int depth = ext3_block_to_path(inode, iblock, offsets);
++ struct ext3_inode_info *ei = EXT3_I(inode);
+ loff_t new_size;
+
+ J_ASSERT(handle != NULL || create == 0);
+@@ -782,7 +785,7 @@ out:
+ /*
+ * Block out ext3_truncate while we alter the tree
+ */
+- down_read(&inode->u.ext3_i.truncate_sem);
++ down_read(&ei->truncate_sem);
+ err = ext3_alloc_branch(handle, inode, left, goal,
+ offsets+(partial-chain), partial);
+
+@@ -794,7 +797,7 @@ out:
+ if (!err)
+ err = ext3_splice_branch(handle, inode, iblock, chain,
+ partial, left);
+- up_read(&inode->u.ext3_i.truncate_sem);
++ up_read(&ei->truncate_sem);
+ if (err == -EAGAIN)
+ goto changed;
+ if (err)
+@@ -807,8 +810,8 @@ out:
+ * truncate is in progress. It is racy between multiple parallel
+ * instances of get_block, but we have the BKL.
+ */
+- if (new_size > inode->u.ext3_i.i_disksize)
+- inode->u.ext3_i.i_disksize = new_size;
++ if (new_size > ei->i_disksize)
++ ei->i_disksize = new_size;
+
+ bh_result->b_state |= (1UL << BH_New);
+ goto got_it;
+@@ -921,7 +924,7 @@ struct buffer_head *ext3_bread(handle_t
+ struct buffer_head *tmp_bh;
+
+ for (i = 1;
+- inode->u.ext3_i.i_prealloc_count &&
++ EXT3_I(inode)->i_prealloc_count &&
+ i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks;
+ i++) {
+ /*
+@@ -1131,8 +1134,8 @@ static int ext3_commit_write(struct file
+ kunmap(page);
+ }
+ }
+- if (inode->i_size > inode->u.ext3_i.i_disksize) {
+- inode->u.ext3_i.i_disksize = inode->i_size;
++ if (inode->i_size > EXT3_I(inode)->i_disksize) {
++ EXT3_I(inode)->i_disksize = inode->i_size;
+ ret2 = ext3_mark_inode_dirty(handle, inode);
+ if (!ret)
+ ret = ret2;
+@@ -1832,7 +1835,8 @@ static void ext3_free_branches(handle_t
+ void ext3_truncate(struct inode * inode)
+ {
+ handle_t *handle;
+- u32 *i_data = inode->u.ext3_i.i_data;
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ u32 *i_data = EXT3_I(inode)->i_data;
+ int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+ int offsets[4];
+ Indirect chain[4];
+@@ -1884,13 +1887,13 @@ void ext3_truncate(struct inode * inode)
+ * on-disk inode. We do this via i_disksize, which is the value which
+ * ext3 *really* writes onto the disk inode.
+ */
+- inode->u.ext3_i.i_disksize = inode->i_size;
++ ei->i_disksize = inode->i_size;
+
+ /*
+ * From here we block out all ext3_get_block() callers who want to
+ * modify the block allocation tree.
+ */
+- down_write(&inode->u.ext3_i.truncate_sem);
++ down_write(&ei->truncate_sem);
+
+ if (n == 1) { /* direct blocks */
+ ext3_free_data(handle, inode, NULL, i_data+offsets[0],
+@@ -1954,7 +1957,7 @@ do_indirects:
+ case EXT3_TIND_BLOCK:
+ ;
+ }
+- up_write(&inode->u.ext3_i.truncate_sem);
++ up_write(&ei->truncate_sem);
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ ext3_mark_inode_dirty(handle, inode);
+
+@@ -1983,6 +1986,8 @@ out_stop:
+
+ int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
+ {
++ struct super_block *sb = inode->i_sb;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
+ struct buffer_head *bh = 0;
+ unsigned long block;
+ unsigned long block_group;
+@@ -1997,23 +2010,19 @@ int ext3_get_inode_loc (struct inode *in
+ inode->i_ino != EXT3_JOURNAL_INO &&
+- inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
+- inode->i_ino > le32_to_cpu(
+- inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) {
+- ext3_error (inode->i_sb, "ext3_get_inode_loc",
+- "bad inode number: %lu", inode->i_ino);
++ inode->i_ino < EXT3_FIRST_INO(sb)) ||
++ inode->i_ino > le32_to_cpu(sbi->s_es->s_inodes_count)) {
++ ext3_error (sb, __FUNCTION__, "bad inode #%lu", inode->i_ino);
+ goto bad_inode;
+ }
+- block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb);
+- if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) {
+- ext3_error (inode->i_sb, "ext3_get_inode_loc",
+- "group >= groups count");
++ block_group = (inode->i_ino - 1) / sbi->s_inodes_per_group;
++ if (block_group >= sbi->s_groups_count) {
++ ext3_error(sb, __FUNCTION__, "group >= groups count");
+ goto bad_inode;
+ }
+- group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb);
+- desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1);
+- bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc];
++ group_desc = block_group >> sbi->s_desc_per_block_bits;
++ desc = block_group & (sbi->s_desc_per_block - 1);
++ bh = sbi->s_group_desc[group_desc];
+ if (!bh) {
+- ext3_error (inode->i_sb, "ext3_get_inode_loc",
+- "Descriptor not loaded");
++ ext3_error(sb, __FUNCTION__, "Descriptor not loaded");
+ goto bad_inode;
+ }
+
+@@ -2021,17 +2022,17 @@ int ext3_get_inode_loc (struct inode *in
+ /*
+ * Figure out the offset within the block group inode table
+ */
+- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) *
+- EXT3_INODE_SIZE(inode->i_sb);
++ offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) *
++ sbi->s_inode_size;
+ block = le32_to_cpu(gdp[desc].bg_inode_table) +
+- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb));
+- if (!(bh = sb_bread(inode->i_sb, block))) {
+- ext3_error (inode->i_sb, "ext3_get_inode_loc",
++ (offset >> EXT3_BLOCK_SIZE_BITS(sb));
++ if (!(bh = sb_bread(sb, block))) {
++ ext3_error (sb, __FUNCTION__,
+ "unable to read inode block - "
+ "inode=%lu, block=%lu", inode->i_ino, block);
+ goto bad_inode;
+ }
+- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1);
++ offset &= (EXT3_BLOCK_SIZE(sb) - 1);
+
+ iloc->bh = bh;
+ iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset);
+@@ -2047,6 +2048,7 @@ void ext3_read_inode(struct inode * inod
+ {
+ struct ext3_iloc iloc;
+ struct ext3_inode *raw_inode;
++ struct ext3_inode_info *ei = EXT3_I(inode);
+ struct buffer_head *bh;
+ int block;
+
+@@ -2054,7 +2056,7 @@ void ext3_read_inode(struct inode * inod
+ goto bad_inode;
+ bh = iloc.bh;
+ raw_inode = iloc.raw_inode;
+- init_rwsem(&inode->u.ext3_i.truncate_sem);
++ init_rwsem(&ei->truncate_sem);
+ inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+ inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+ inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+@@ -2067,7 +2069,7 @@ void ext3_read_inode(struct inode * inod
+ inode->i_atime = le32_to_cpu(raw_inode->i_atime);
+ inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
+ inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
+- inode->u.ext3_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
++ ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
+ /* We now have enough fields to check if the inode was active or not.
+ * This is needed because nfsd might try to access dead inodes
+ * the test is that same one that e2fsck uses
+@@ -2075,7 +2077,7 @@ void ext3_read_inode(struct inode * inod
+ */
+ if (inode->i_nlink == 0) {
+ if (inode->i_mode == 0 ||
+- !(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) {
++ !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
+ /* this inode is deleted */
+ brelse (bh);
+ goto bad_inode;
+@@ -2090,33 +2092,33 @@ void ext3_read_inode(struct inode * inod
+ * size */
+ inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
+ inode->i_version = ++event;
+- inode->u.ext3_i.i_flags = le32_to_cpu(raw_inode->i_flags);
++ ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+ #ifdef EXT3_FRAGMENTS
+- inode->u.ext3_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
+- inode->u.ext3_i.i_frag_no = raw_inode->i_frag;
+- inode->u.ext3_i.i_frag_size = raw_inode->i_fsize;
++ ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
++ ei->i_frag_no = raw_inode->i_frag;
++ ei->i_frag_size = raw_inode->i_fsize;
+ #endif
+- inode->u.ext3_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
++ ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+ if (!S_ISREG(inode->i_mode)) {
+- inode->u.ext3_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
++ ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
+ } else {
+ inode->i_size |=
+ ((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
+ }
+- inode->u.ext3_i.i_disksize = inode->i_size;
++ ei->i_disksize = inode->i_size;
+ inode->i_generation = le32_to_cpu(raw_inode->i_generation);
+ #ifdef EXT3_PREALLOCATE
+- inode->u.ext3_i.i_prealloc_count = 0;
++ ei->i_prealloc_count = 0;
+ #endif
+- inode->u.ext3_i.i_block_group = iloc.block_group;
++ ei->i_block_group = iloc.block_group;
+
+ /*
+ * NOTE! The in-memory inode i_data array is in little-endian order
+ * even on big-endian machines: we do NOT byteswap the block numbers!
+ */
+ for (block = 0; block < EXT3_N_BLOCKS; block++)
+- inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block];
+- INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
++ ei->i_data[block] = iloc.raw_inode->i_block[block];
++ INIT_LIST_HEAD(&ei->i_orphan);
+
+ brelse (iloc.bh);
+
+@@ -2143,17 +2145,17 @@ void ext3_read_inode(struct inode * inod
+ /* inode->i_attr_flags = 0; unused */
+- if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
++ if (ei->i_flags & EXT3_SYNC_FL) {
+ /* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
+ inode->i_flags |= S_SYNC;
+ }
+- if (inode->u.ext3_i.i_flags & EXT3_APPEND_FL) {
++ if (ei->i_flags & EXT3_APPEND_FL) {
+ /* inode->i_attr_flags |= ATTR_FLAG_APPEND; unused */
+ inode->i_flags |= S_APPEND;
+ }
+- if (inode->u.ext3_i.i_flags & EXT3_IMMUTABLE_FL) {
++ if (ei->i_flags & EXT3_IMMUTABLE_FL) {
+ /* inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE; unused */
+ inode->i_flags |= S_IMMUTABLE;
+ }
+- if (inode->u.ext3_i.i_flags & EXT3_NOATIME_FL) {
++ if (ei->i_flags & EXT3_NOATIME_FL) {
+ /* inode->i_attr_flags |= ATTR_FLAG_NOATIME; unused */
+ inode->i_flags |= S_NOATIME;
+ }
+@@ -2175,6 +2177,7 @@ static int ext3_do_update_inode(handle_t
+ struct ext3_iloc *iloc)
+ {
+ struct ext3_inode *raw_inode = iloc->raw_inode;
++ struct ext3_inode_info *ei = EXT3_I(inode);
+ struct buffer_head *bh = iloc->bh;
+ int err = 0, rc, block;
+
+@@ -2192,7 +2195,7 @@ static int ext3_do_update_inode(handle_t
+ * Fix up interoperability with old kernels. Otherwise, old inodes get
+ * re-used with the upper 16 bits of the uid/gid intact
+ */
+- if(!inode->u.ext3_i.i_dtime) {
++ if(!ei->i_dtime) {
+ raw_inode->i_uid_high =
+ cpu_to_le16(high_16_bits(inode->i_uid));
+ raw_inode->i_gid_high =
+@@ -2210,34 +2213,33 @@ static int ext3_do_update_inode(handle_t
+ raw_inode->i_gid_high = 0;
+ }
+ raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+- raw_inode->i_size = cpu_to_le32(inode->u.ext3_i.i_disksize);
++ raw_inode->i_size = cpu_to_le32(ei->i_disksize);
+ raw_inode->i_atime = cpu_to_le32(inode->i_atime);
+ raw_inode->i_ctime = cpu_to_le32(inode->i_ctime);
+ raw_inode->i_mtime = cpu_to_le32(inode->i_mtime);
+ raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+- raw_inode->i_dtime = cpu_to_le32(inode->u.ext3_i.i_dtime);
+- raw_inode->i_flags = cpu_to_le32(inode->u.ext3_i.i_flags);
++ raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
++ raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+ #ifdef EXT3_FRAGMENTS
+- raw_inode->i_faddr = cpu_to_le32(inode->u.ext3_i.i_faddr);
+- raw_inode->i_frag = inode->u.ext3_i.i_frag_no;
+- raw_inode->i_fsize = inode->u.ext3_i.i_frag_size;
++ raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
++ raw_inode->i_frag = ei->i_frag_no;
++ raw_inode->i_fsize = ei->i_frag_size;
+ #else
+ /* If we are not tracking these fields in the in-memory inode,
+ * then preserve them on disk, but still initialise them to zero
+ * for new inodes. */
+- if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
++ if (ei->i_state & EXT3_STATE_NEW) {
+ raw_inode->i_faddr = 0;
+ raw_inode->i_frag = 0;
+ raw_inode->i_fsize = 0;
+ }
+ #endif
+- raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl);
++ raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
+ if (!S_ISREG(inode->i_mode)) {
+- raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl);
++ raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
+ } else {
+- raw_inode->i_size_high =
+- cpu_to_le32(inode->u.ext3_i.i_disksize >> 32);
+- if (inode->u.ext3_i.i_disksize > 0x7fffffffULL) {
++ raw_inode->i_size_high = cpu_to_le32(ei->i_disksize >> 32);
++ if (ei->i_disksize > MAX_NON_LFS) {
+ struct super_block *sb = inode->i_sb;
+ if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+ EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
+@@ -2247,7 +2249,7 @@ static int ext3_do_update_inode(handle_t
+ * created, add a flag to the superblock.
+ */
+ err = ext3_journal_get_write_access(handle,
+- sb->u.ext3_sb.s_sbh);
++ EXT3_SB(sb)->s_sbh);
+ if (err)
+ goto out_brelse;
+ ext3_update_dynamic_rev(sb);
+@@ -2256,7 +2258,7 @@ static int ext3_do_update_inode(handle_t
+ sb->s_dirt = 1;
+ handle->h_sync = 1;
+ err = ext3_journal_dirty_metadata(handle,
+- sb->u.ext3_sb.s_sbh);
++ EXT3_SB(sb)->s_sbh);
+ }
+ }
+ }
+@@ -2265,13 +2267,13 @@ static int ext3_do_update_inode(handle_t
+ raw_inode->i_block[0] =
+ cpu_to_le32(kdev_t_to_nr(inode->i_rdev));
+ else for (block = 0; block < EXT3_N_BLOCKS; block++)
+- raw_inode->i_block[block] = inode->u.ext3_i.i_data[block];
++ raw_inode->i_block[block] = ei->i_data[block];
+
+ BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+ rc = ext3_journal_dirty_metadata(handle, bh);
+ if (!err)
+ err = rc;
+- EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
++ ei->i_state &= ~EXT3_STATE_NEW;
+
+ out_brelse:
+ brelse (bh);
+@@ -2379,7 +2381,7 @@ int ext3_setattr(struct dentry *dentry,
+ }
+
+ error = ext3_orphan_add(handle, inode);
+- inode->u.ext3_i.i_disksize = attr->ia_size;
++ EXT3_I(inode)->i_disksize = attr->ia_size;
+ rc = ext3_mark_inode_dirty(handle, inode);
+ if (!error)
+ error = rc;
+@@ -2622,9 +2624,9 @@ int ext3_change_inode_journal_flag(struc
+ */
+
+ if (val)
+- inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
++ EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
+ else
+- inode->u.ext3_i.i_flags &= ~EXT3_JOURNAL_DATA_FL;
++ EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL;
+
+ journal_unlock_updates(journal);
+
+--- ./fs/ext3/ioctl.c.orig Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/ioctl.c Tue May 7 15:20:52 2002
+@@ -18,13 +18,14 @@
+ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+ unsigned long arg)
+ {
++ struct ext3_inode_info *ei = EXT3_I(inode);
+ unsigned int flags;
+
+ ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+
+ switch (cmd) {
+ case EXT3_IOC_GETFLAGS:
+- flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE;
++ flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
+ return put_user(flags, (int *) arg);
+ case EXT3_IOC_SETFLAGS: {
+ handle_t *handle = NULL;
+@@ -42,7 +42,7 @@ int ext3_ioctl (struct inode * inode, st
+ if (get_user(flags, (int *) arg))
+ return -EFAULT;
+
+- oldflags = inode->u.ext3_i.i_flags;
++ oldflags = ei->i_flags;
+
+ /* The JOURNAL_DATA flag is modifiable only by root */
+ jflag = flags & EXT3_JOURNAL_DATA_FL;
+@@ -79,7 +79,7 @@ int ext3_ioctl (struct inode * inode, st
+
+ flags = flags & EXT3_FL_USER_MODIFIABLE;
+ flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
+- inode->u.ext3_i.i_flags = flags;
++ ei->i_flags = flags;
+
+ if (flags & EXT3_SYNC_FL)
+ inode->i_flags |= S_SYNC;
+@@ -155,12 +155,12 @@ flags_err:
+ int ret = 0;
+
+ set_current_state(TASK_INTERRUPTIBLE);
+- add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
+- if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) {
++ add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
++ if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
+ schedule();
+ ret = 1;
+ }
+- remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
++ remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
+ return ret;
+ }
+ #endif
+--- ./fs/ext3/namei.c.orig Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/namei.c Tue May 7 16:05:51 2002
+@@ -1430,8 +1430,8 @@ int ext3_orphan_add(handle_t *handle, st
+ J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
+
+- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
+- err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+ if (err)
+ goto out_unlock;
+
+@@ -1442,7 +1442,7 @@ int ext3_orphan_add(handle_t *handle, st
+ /* Insert this inode at the head of the on-disk orphan list... */
+ NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
+ EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+- err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ if (!err)
+ err = rc;
+@@ -1520,8 +1520,7 @@ int ext3_orphan_del(handle_t *handle, st
+ err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+ } else {
+ struct ext3_iloc iloc2;
+- struct inode *i_prev =
+- list_entry(prev, struct inode, u.ext3_i.i_orphan);
++ struct inode *i_prev = orphan_list_entry(prev);
+
+ jbd_debug(4, "orphan inode %lu will point to %lu\n",
+ i_prev->i_ino, ino_next);
+--- ./fs/ext3/super.c.orig Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/super.c Tue May 7 16:05:44 2002
+@@ -121,7 +121,7 @@ static int ext3_error_behaviour(struct s
+ /* If no overrides were specified on the mount, then fall back
+ * to the default behaviour set in the filesystem's superblock
+ * on disk. */
+- switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) {
++ switch (le16_to_cpu(EXT3_SB(sb)->s_es->s_errors)) {
+ case EXT3_ERRORS_PANIC:
+ return EXT3_ERRORS_PANIC;
+ case EXT3_ERRORS_RO:
+@@ -269,9 +269,9 @@ void ext3_abort (struct super_block * sb
+ return;
+
+ printk (KERN_CRIT "Remounting filesystem read-only\n");
+- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS;
++ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+ sb->s_flags |= MS_RDONLY;
+- sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT;
++ EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+ journal_abort(EXT3_SB(sb)->s_journal, -EIO);
+ }
+
+@@ -377,8 +377,6 @@ static int ext3_blkdev_remove(struct ext3
+ return ret;
+ }
+
+-#define orphan_list_entry(l) list_entry((l), struct inode, u.ext3_i.i_orphan)
+-
+ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
+ {
+ struct list_head *l;
+@@ -818,7 +818,7 @@ static void ext3_orphan_cleanup (struct
+ sb->s_flags &= ~MS_RDONLY;
+ }
+
+- if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) {
++ if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
+ if (es->s_last_orphan)
+ jbd_debug(1, "Errors on filesystem, "
+ "clearing orphan list.\n");
+@@ -1463,12 +1463,14 @@ static void ext3_commit_super (struct su
+ struct ext3_super_block * es,
+ int sync)
+ {
++ struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
++
+ es->s_wtime = cpu_to_le32(CURRENT_TIME);
+- BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty");
+- mark_buffer_dirty(sb->u.ext3_sb.s_sbh);
++ BUFFER_TRACE(sbh, "marking dirty");
++ mark_buffer_dirty(sbh);
+ if (sync) {
+- ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh);
+- wait_on_buffer(sb->u.ext3_sb.s_sbh);
++ ll_rw_block(WRITE, 1, &sbh);
++ wait_on_buffer(sbh);
+ }
+ }
+
+@@ -1519,7 +1521,7 @@ static void ext3_clear_journal_err(struc
+ ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
+ "filesystem check.");
+
+- sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS;
++ EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+ es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+ ext3_commit_super (sb, es, 1);
+
+--- ./fs/ext3/symlink.c.orig Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/symlink.c Tue May 7 15:25:39 2002
+@@ -23,13 +23,13 @@
+
+ static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen)
+ {
+- char *s = (char *)dentry->d_inode->u.ext3_i.i_data;
+- return vfs_readlink(dentry, buffer, buflen, s);
++ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
++ return vfs_readlink(dentry, buffer, buflen, (char *)ei->i_data);
+ }
+
+ static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
+ {
+- char *s = (char *)dentry->d_inode->u.ext3_i.i_data;
+- return vfs_follow_link(nd, s);
++ struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
++ return vfs_follow_link(nd, (char*)ei->i_data);
+ }
+
+--- ./include/linux/ext3_fs.h.orig Tue Apr 16 14:27:25 2002
++++ ./include/linux/ext3_fs.h Tue May 7 16:47:36 2002
+@@ -84,22 +84,25 @@
+ #define EXT3_MIN_BLOCK_SIZE 1024
+ #define EXT3_MAX_BLOCK_SIZE 4096
+ #define EXT3_MIN_BLOCK_LOG_SIZE 10
++
+ #ifdef __KERNEL__
+-# define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize)
+-#else
+-# define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+-#endif
+-#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+-#ifdef __KERNEL__
+-# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
+-#else
+-# define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
+-#endif
+-#ifdef __KERNEL__
+-#define EXT3_ADDR_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_addr_per_block_bits)
+-#define EXT3_INODE_SIZE(s) ((s)->u.ext3_sb.s_inode_size)
+-#define EXT3_FIRST_INO(s) ((s)->u.ext3_sb.s_first_ino)
++#define EXT3_SB(sb) (&((sb)->u.ext3_sb))
++#define EXT3_I(inode) (&((inode)->u.ext3_i))
++
++#define EXT3_BLOCK_SIZE(s) ((s)->s_blocksize)
++#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
++#define EXT3_ADDR_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_addr_per_block_bits)
++#define EXT3_INODE_SIZE(s) (EXT3_SB(s)->s_inode_size)
++#define EXT3_FIRST_INO(s) (EXT3_SB(s)->s_first_ino)
+ #else
++
++/* Assume that user mode programs are passing in an ext3fs superblock, not
++ * a kernel struct super_block. This will allow us to call the feature-test
++ * macros from user land. */
++#define EXT3_SB(sb) (sb)
++
++#define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
++#define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_log_block_size + 10)
+ #define EXT3_INODE_SIZE(s) (((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
+ EXT3_GOOD_OLD_INODE_SIZE : \
+ (s)->s_inode_size)
+@@ -108,6 +110,7 @@
+ EXT3_GOOD_OLD_FIRST_INO : \
+ (s)->s_first_ino)
+ #endif
++#define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+
+ /*
+ * Macro-instructions used to manage fragments
+@@ -116,8 +120,8 @@
+ #define EXT3_MAX_FRAG_SIZE 4096
+ #define EXT3_MIN_FRAG_LOG_SIZE 10
+ #ifdef __KERNEL__
+-# define EXT3_FRAG_SIZE(s) ((s)->u.ext3_sb.s_frag_size)
+-# define EXT3_FRAGS_PER_BLOCK(s) ((s)->u.ext3_sb.s_frags_per_block)
++# define EXT3_FRAG_SIZE(s) (EXT3_SB(s)->s_frag_size)
++# define EXT3_FRAGS_PER_BLOCK(s) (EXT3_SB(s)->s_frags_per_block)
+ #else
+ # define EXT3_FRAG_SIZE(s) (EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size)
+ # define EXT3_FRAGS_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s))
+@@ -163,15 +167,13 @@
+ /*
+ * Macro-instructions used to manage group descriptors
+ */
++# define EXT3_BLOCKS_PER_GROUP(s) (EXT3_SB(s)->s_blocks_per_group)
++# define EXT3_INODES_PER_GROUP(s) (EXT3_SB(s)->s_inodes_per_group)
+ #ifdef __KERNEL__
+-# define EXT3_BLOCKS_PER_GROUP(s) ((s)->u.ext3_sb.s_blocks_per_group)
+-# define EXT3_DESC_PER_BLOCK(s) ((s)->u.ext3_sb.s_desc_per_block)
+-# define EXT3_INODES_PER_GROUP(s) ((s)->u.ext3_sb.s_inodes_per_group)
+-# define EXT3_DESC_PER_BLOCK_BITS(s) ((s)->u.ext3_sb.s_desc_per_block_bits)
++# define EXT3_DESC_PER_BLOCK(s) (EXT3_SB(s)->s_desc_per_block)
++# define EXT3_DESC_PER_BLOCK_BITS(s) (EXT3_SB(s)->s_desc_per_block_bits)
+ #else
+-# define EXT3_BLOCKS_PER_GROUP(s) ((s)->s_blocks_per_group)
+ # define EXT3_DESC_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc))
+-# define EXT3_INODES_PER_GROUP(s) ((s)->s_inodes_per_group)
+ #endif
+
+ /*
+@@ -344,7 +347,7 @@
+ #ifndef _LINUX_EXT2_FS_H
+ #define clear_opt(o, opt) o &= ~EXT3_MOUNT_##opt
+ #define set_opt(o, opt) o |= EXT3_MOUNT_##opt
+-#define test_opt(sb, opt) ((sb)->u.ext3_sb.s_mount_opt & \
++#define test_opt(sb, opt) (EXT3_SB(sb)->s_mount_opt & \
+ EXT3_MOUNT_##opt)
+ #else
+ #define EXT2_MOUNT_NOLOAD EXT3_MOUNT_NOLOAD
+@@ -441,17 +443,11 @@
+ /*EC*/ __u32 s_reserved[197]; /* Padding to the end of the block */
+ };
+
+-#ifdef __KERNEL__
+-#define EXT3_SB(sb) (&((sb)->u.ext3_sb))
+-#define EXT3_I(inode) (&((inode)->u.ext3_i))
+-#else
+-/* Assume that user mode programs are passing in an ext3fs superblock, not
+- * a kernel struct super_block. This will allow us to call the feature-test
+- * macros from user land. */
+-#define EXT3_SB(sb) (sb)
+-#endif
+-
+-#define NEXT_ORPHAN(inode) (inode)->u.ext3_i.i_dtime
++#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime
++static inline struct inode *orphan_list_entry(struct list_head *l)
++{
++ return list_entry(l, struct inode, u.ext3_i.i_orphan);
++}
+
+ /*
+ * Codes for operating systems
+--- ./include/linux/ext3_jbd.h.orig Tue May 7 14:44:08 2002
++++ ./include/linux/ext3_jbd.h Tue May 7 14:44:43 2002
+@@ -291,7 +291,7 @@
+ return 1;
+ if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+ return 1;
+- if (inode->u.ext3_i.i_flags & EXT3_JOURNAL_DATA_FL)
++ if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+ return 1;
+ return 0;
+ }
--- /dev/null
+ fs/ext3/namei.c | 2 +-
+ 1 files changed, 1 insertion(+), 1 deletion(-)
+
+diff -puN fs/ext3/namei.c~ext3-compat-2.4.18-chaos fs/ext3/namei.c
+--- linux-2.4.18/fs/ext3/namei.c~ext3-compat-2.4.18-chaos 2003-08-28 20:14:27.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/namei.c 2003-08-28 20:14:27.000000000 +0400
+@@ -830,9 +830,9 @@ static int ext3_rmdir (struct inode * di
+ * recovery. */
+ inode->i_size = 0;
+ ext3_orphan_add(handle, inode);
+- ext3_mark_inode_dirty(handle, inode);
+ dir->i_nlink--;
+ inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
++ ext3_mark_inode_dirty(handle, inode);
+ dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+ ext3_mark_inode_dirty(handle, dir);
+
+
+_
--- /dev/null
+
+Create a service thread to handle delete and truncate of inodes, to avoid
+long latency while truncating very large files.
+
+
+ fs/ext3/inode.c | 116 ++++++++++++++++++++++
+ fs/ext3/super.c | 231 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h | 5
+ include/linux/ext3_fs_sb.h | 10 +
+ 4 files changed, 362 insertions(+)
+
+--- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c Wed Jul 2 23:49:40 2003
+@@ -396,6 +396,220 @@ static void dump_orphan_list(struct supe
+ }
+ }
+
++#ifdef EXT3_DELETE_THREAD
++/*
++ * Delete inodes in a loop until there are no more to be deleted.
++ * Normally, we run in the background doing the deletes and sleeping again,
++ * and clients just add new inodes to be deleted onto the end of the list.
++ * If someone is concerned about free space (e.g. block allocation or similar)
++ * then they can sleep on s_delete_waiter_queue and be woken up when space
++ * has been freed.
++ */
++int ext3_delete_thread(void *data)
++{
++ struct super_block *sb = data;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct task_struct *tsk = current;
++
++ /* Almost like daemonize, but not quite */
++ exit_mm(current);
++ tsk->session = 1;
++ tsk->pgrp = 1;
++ tsk->tty = NULL;
++ exit_files(current);
++ reparent_to_init();
++
++ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
++ sigfillset(&tsk->blocked);
++
++ /*tsk->flags |= PF_KERNTHREAD;*/
++
++ INIT_LIST_HEAD(&sbi->s_delete_list);
++ wake_up(&sbi->s_delete_waiter_queue);
++ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
++
++ /* main loop */
++ for (;;) {
++ wait_event_interruptible(sbi->s_delete_thread_queue,
++ !list_empty(&sbi->s_delete_list) ||
++ !test_opt(sb, ASYNCDEL));
++ ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
++ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
++
++ spin_lock(&sbi->s_delete_lock);
++ if (list_empty(&sbi->s_delete_list)) {
++ clear_opt(sbi->s_mount_opt, ASYNCDEL);
++ memset(&sbi->s_delete_list, 0,
++ sizeof(sbi->s_delete_list));
++ spin_unlock(&sbi->s_delete_lock);
++ ext3_debug("delete thread on %s exiting\n",
++ kdevname(sb->s_dev));
++ wake_up(&sbi->s_delete_waiter_queue);
++ break;
++ }
++
++ while (!list_empty(&sbi->s_delete_list)) {
++ struct inode *inode=list_entry(sbi->s_delete_list.next,
++ struct inode, i_dentry);
++ unsigned long blocks = inode->i_blocks >>
++ (inode->i_blkbits - 9);
++
++ list_del_init(&inode->i_dentry);
++ spin_unlock(&sbi->s_delete_lock);
++ ext3_debug("%s delete ino %lu blk %lu\n",
++ tsk->comm, inode->i_ino, blocks);
++
++ iput(inode);
++
++ spin_lock(&sbi->s_delete_lock);
++ sbi->s_delete_blocks -= blocks;
++ sbi->s_delete_inodes--;
++ }
++ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
++ ext3_warning(sb, __FUNCTION__,
++ "%lu blocks, %lu inodes on list?\n",
++ sbi->s_delete_blocks,sbi->s_delete_inodes);
++ sbi->s_delete_blocks = 0;
++ sbi->s_delete_inodes = 0;
++ }
++ spin_unlock(&sbi->s_delete_lock);
++ wake_up(&sbi->s_delete_waiter_queue);
++ }
++
++ return 0;
++}
++
++static void ext3_start_delete_thread(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int rc;
++
++ spin_lock_init(&sbi->s_delete_lock);
++ init_waitqueue_head(&sbi->s_delete_thread_queue);
++ init_waitqueue_head(&sbi->s_delete_waiter_queue);
++
++ if (!test_opt(sb, ASYNCDEL))
++ return;
++
++ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
++ if (rc < 0)
++ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
++ rc);
++ else
++ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
++}
++
++static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
++{
++ if (sbi->s_delete_list.next == 0) /* thread never started */
++ return;
++
++ clear_opt(sbi->s_mount_opt, ASYNCDEL);
++ wake_up(&sbi->s_delete_thread_queue);
++ wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
++}
++
++/* Instead of playing games with the inode flags, destruction, etc we just
++ * create a new inode locally and put it on a list for the truncate thread.
++ * We need large parts of the inode struct in order to complete the
++ * truncate and unlink, so we may as well just have a real inode to do it.
++ *
++ * If we have any problem deferring the delete, just delete it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++static void ext3_delete_inode_thread(struct inode *old_inode)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++ struct inode *new_inode;
++ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++ if (is_bad_inode(old_inode)) {
++ clear_inode(old_inode);
++ return;
++ }
++
++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++ goto out_delete;
++
++ /* We may want to delete the inode immediately and not defer it */
++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++ goto out_delete;
++
++ /* We can't use the delete thread as-is during real orphan recovery,
++ * as we add to the orphan list here, causing ext3_orphan_cleanup()
++ * to loop endlessly. It would be nice to do so, but needs work.
++ */
++ if (oei->i_state & EXT3_STATE_DELETE ||
++ sbi->s_mount_state & EXT3_ORPHAN_FS) {
++ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++ old_inode->i_ino, blocks);
++ goto out_delete;
++ }
++
++ /* We can iget this inode again here, because our caller has unhashed
++ * old_inode, so new_inode will be in a different inode struct.
++ *
++ * We need to ensure that the i_orphan pointers in the other inodes
++ * point at the new inode copy instead of the old one so the orphan
++ * list doesn't get corrupted when the old orphan inode is freed.
++ */
++ down(&sbi->s_orphan_lock);
++
++ sbi->s_mount_state |= EXT3_ORPHAN_FS;
++ new_inode = iget(old_inode->i_sb, old_inode->i_ino);
++ sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
++ if (is_bad_inode(new_inode)) {
++ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
++ iput(new_inode);
++ new_inode = NULL;
++ }
++ if (!new_inode) {
++ up(&sbi->s_orphan_lock);
++ ext3_debug("delete inode %lu directly (bad read)\n",
++ old_inode->i_ino);
++ goto out_delete;
++ }
++ J_ASSERT(new_inode != old_inode);
++
++ J_ASSERT(!list_empty(&oei->i_orphan));
++
++ nei = EXT3_I(new_inode);
++ /* Ugh. We need to insert new_inode into the same spot on the list
++ * as old_inode was, to ensure the in-memory orphan list is still
++ * in the same order as the on-disk orphan list (badness otherwise).
++ */
++ nei->i_orphan = oei->i_orphan;
++ nei->i_orphan.next->prev = &nei->i_orphan;
++ nei->i_orphan.prev->next = &nei->i_orphan;
++ nei->i_state |= EXT3_STATE_DELETE;
++ up(&sbi->s_orphan_lock);
++
++ clear_inode(old_inode);
++
++ spin_lock(&sbi->s_delete_lock);
++ J_ASSERT(list_empty(&new_inode->i_dentry));
++ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++ sbi->s_delete_blocks += blocks;
++ sbi->s_delete_inodes++;
++ spin_unlock(&sbi->s_delete_lock);
++
++ ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++ new_inode->i_ino, blocks);
++
++ wake_up(&sbi->s_delete_thread_queue);
++ return;
++
++out_delete:
++ ext3_delete_inode(old_inode);
++}
++#else
++#define ext3_start_delete_thread(sbi) do {} while(0)
++#define ext3_stop_delete_thread(sbi) do {} while(0)
++#endif /* EXT3_DELETE_THREAD */
++
+ void ext3_put_super (struct super_block * sb)
+ {
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+@@ -403,6 +617,7 @@ void ext3_put_super (struct super_block
+ kdev_t j_dev = sbi->s_journal->j_dev;
+ int i;
+
++ ext3_stop_delete_thread(sbi);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+ if (!(sb->s_flags & MS_RDONLY)) {
+@@ -451,7 +666,11 @@ static struct super_operations ext3_sops
+ write_inode: ext3_write_inode, /* BKL not held. Don't need */
+ dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */
+ put_inode: ext3_put_inode, /* BKL not held. Don't need */
++#ifdef EXT3_DELETE_THREAD
++ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */
++#else
+ delete_inode: ext3_delete_inode, /* BKL not held. We take it */
++#endif
+ put_super: ext3_put_super, /* BKL held */
+ write_super: ext3_write_super, /* BKL held */
+ write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
+@@ -511,6 +730,14 @@ static int parse_options (char * options
+ this_char = strtok (NULL, ",")) {
+ if ((value = strchr (this_char, '=')) != NULL)
+ *value++ = 0;
++#ifdef EXT3_DELETE_THREAD
++ if (!strcmp(this_char, "asyncdel"))
++ set_opt(*mount_options, ASYNCDEL);
++ else if (!strcmp(this_char, "noasyncdel"))
++ clear_opt(*mount_options, ASYNCDEL);
++ else
++#endif
++
+ if (!strcmp (this_char, "bsddf"))
+ clear_opt (*mount_options, MINIX_DF);
+ else if (!strcmp (this_char, "nouid32")) {
+@@ -1206,6 +1433,7 @@ struct super_block * ext3_read_super (st
+ }
+
+ ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
++ ext3_start_delete_thread(sb);
+ /*
+ * akpm: core read_super() calls in here with the superblock locked.
+ * That deadlocks, because orphan cleanup needs to lock the superblock
+@@ -1648,6 +1876,9 @@ int ext3_remount (struct super_block * s
+ if (!parse_options(data, &tmp, sbi, &tmp, 1))
+ return -EINVAL;
+
++ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
++ ext3_stop_delete_thread(sbi);
++
+ if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+ ext3_abort(sb, __FUNCTION__, "Abort forced by user");
+
+--- linux/fs/ext3/file.c.orig Fri Jan 17 10:57:31 2003
++++ linux/fs/ext3/file.c Mon Jun 30 13:28:52 2003
+@@ -121,7 +121,11 @@ struct file_operations ext3_file_operati
+ };
+
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++ truncate: ext3_truncate_thread, /* BKL held */
++#else
+ truncate: ext3_truncate, /* BKL held */
++#endif
+ setattr: ext3_setattr, /* BKL held */
+ };
+
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~ext3-delete_thread-2.4.18 Wed Jul 2 23:13:58 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c Wed Jul 2 23:50:29 2003
+@@ -2004,6 +2004,118 @@ out_stop:
+ ext3_journal_stop(handle, inode);
+ }
+
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead. This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++ struct inode *new_inode;
++ handle_t *handle;
++ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++ goto out_truncate;
++
++ /* XXX This is a temporary limitation for code simplicity.
++ * We could truncate to arbitrary sizes at some later time.
++ */
++ if (old_inode->i_size != 0)
++ goto out_truncate;
++
++ /* We may want to truncate the inode immediately and not defer it */
++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++ old_inode->i_size > oei->i_disksize)
++ goto out_truncate;
++
++ /* We can't use the delete thread as-is during real orphan recovery,
++ * as we add to the orphan list here, causing ext3_orphan_cleanup()
++ * to loop endlessly. It would be nice to do so, but needs work.
++ */
++ if (oei->i_state & EXT3_STATE_DELETE ||
++ sbi->s_mount_state & EXT3_ORPHAN_FS) {
++ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++ old_inode->i_ino, blocks);
++ goto out_truncate;
++ }
++
++ ext3_discard_prealloc(old_inode);
++
++ /* old_inode = 1
++ * new_inode = sb + GDT + ibitmap
++ * orphan list = 1 inode/superblock for add, 2 inodes for del
++ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++ */
++ handle = ext3_journal_start(old_inode, 7);
++ if (IS_ERR(handle))
++ goto out_truncate;
++
++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++ if (IS_ERR(new_inode)) {
++ ext3_debug("truncate inode %lu directly (no new inodes)\n",
++ old_inode->i_ino);
++ goto out_journal;
++ }
++
++ nei = EXT3_I(new_inode);
++
++ down_write(&oei->truncate_sem);
++ new_inode->i_size = old_inode->i_size;
++ new_inode->i_blocks = old_inode->i_blocks;
++ new_inode->i_uid = old_inode->i_uid;
++ new_inode->i_gid = old_inode->i_gid;
++ new_inode->i_nlink = 0;
++
++ /* FIXME when we do arbitrary truncates */
++ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++ memset(oei->i_data, 0, sizeof(oei->i_data));
++
++ nei->i_disksize = oei->i_disksize;
++ nei->i_state |= EXT3_STATE_DELETE;
++ up_write(&oei->truncate_sem);
++
++ if (ext3_orphan_add(handle, new_inode) < 0)
++ goto out_journal;
++
++ if (ext3_orphan_del(handle, old_inode) < 0) {
++ ext3_orphan_del(handle, new_inode);
++ iput(new_inode);
++ goto out_journal;
++ }
++
++ ext3_journal_stop(handle, old_inode);
++
++ spin_lock(&sbi->s_delete_lock);
++ J_ASSERT(list_empty(&new_inode->i_dentry));
++ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++ sbi->s_delete_blocks += blocks;
++ sbi->s_delete_inodes++;
++ spin_unlock(&sbi->s_delete_lock);
++
++ ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++ new_inode->i_ino, blocks);
++
++ wake_up(&sbi->s_delete_thread_queue);
++ return;
++
++out_journal:
++ ext3_journal_stop(handle, old_inode);
++out_truncate:
++ ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /*
+ * ext3_get_inode_loc returns with an extra refcount against the
+ * inode's underlying buffer_head on success.
+--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:20 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h Wed Jul 2 23:19:09 2003
+@@ -190,6 +190,7 @@ struct ext3_group_desc
+ */
+ #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
+ #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
++#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */
+
+ /*
+ * ioctl commands
+@@ -317,6 +318,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
+ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
+ #define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */
++#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+@@ -651,6 +653,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18 Tue Jun 3 17:26:21 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h Wed Jul 2 23:19:09 2003
+@@ -29,6 +29,8 @@
+
+ #define EXT3_MAX_GROUP_LOADED 32
+
++#define EXT3_DELETE_THREAD
++
+ /*
+ * third extended-fs super-block data in memory
+ */
+@@ -74,6 +76,14 @@ struct ext3_sb_info {
+ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
+ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
+ #endif
++#ifdef EXT3_DELETE_THREAD
++ spinlock_t s_delete_lock;
++ struct list_head s_delete_list;
++ unsigned long s_delete_blocks;
++ unsigned long s_delete_inodes;
++ wait_queue_head_t s_delete_thread_queue;
++ wait_queue_head_t s_delete_waiter_queue;
++#endif
+ };
+
+ #endif /* _LINUX_EXT3_FS_SB */
+
+_
--- /dev/null
+ fs/ext3/Makefile | 3
+ fs/ext3/extents.c | 1573 +++++++++++++++++++++++++++++++++++++++++++++
+ fs/ext3/ialloc.c | 4
+ fs/ext3/inode.c | 26
+ fs/ext3/super.c | 9
+ include/linux/ext3_fs.h | 18
+ include/linux/ext3_fs_i.h | 4
+ include/linux/ext3_fs_sb.h | 10
+ 8 files changed, 1641 insertions(+), 6 deletions(-)
+
+diff -puN /dev/null fs/ext3/extents.c
+--- /dev/null 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-chaos-alexey/fs/ext3/extents.c 2003-08-25 21:11:58.000000000 +0400
+@@ -0,0 +1,1573 @@
++/*
++ *
++ * linux/fs/ext3/extents.c
++ *
++ * Extents support for EXT3
++ *
++ * 07/08/2003 Alex Tomas <bzzz@tmi.comex.ru>
++ *
++ * TODO:
++ * - ext3*_error() should be used in some situations
++ * - find_goal() [to be tested and improved]
++ * - error handling
++ * - we could leak allocated block in some error cases
++ * - quick search for index/leaf in ext3_ext_find_extent()
++ * - tree reduction
++ * - cache last found extent
++ * - arch-independent
++ */
++
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/time.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/smp_lock.h>
++#include <linux/highuid.h>
++#include <linux/pagemap.h>
++#include <linux/quotaops.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/locks.h>
++
++/*
++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks
++ * become very little, so index split, in-depth growing and
++ * other hard changes happens much more often
++ * this is for debug purposes only
++ */
++#define AGRESSIVE_TEST_
++
++/*
++ * if EXT_DEBUG defined you can use 'extdebug' mount option
++ * to get lots of info what's going on
++ */
++#define EXT_DEBUG
++#ifdef EXT_DEBUG
++#define ext_debug(inode,fmt,a...) \
++do { \
++ if (test_opt((inode)->i_sb, EXTDEBUG)) \
++ printk(fmt, ##a); \
++} while (0);
++#else
++#define ext_debug(inode,fmt,a...)
++#endif
++
++#define EXT3_ALLOC_NEEDED 2 /* block bitmap + group descriptor */
++
++/*
++ * ext3_inode has i_block array (total 60 bytes)
++ * first 4 bytes are used to store:
++ * - tree depth (0 mean there is no tree yet. all extents in the inode)
++ * - number of alive extents in the inode
++ */
++
++/*
++ * this is extent on-disk structure
++ * it's used at the bottom of the tree
++ */
++struct ext3_extent {
++ __u32 e_block; /* first logical block extent covers */
++ __u32 e_start; /* first physical block extents lives */
++ __u32 e_num; /* number of blocks covered by extent */
++};
++
++/*
++ * this is index on-disk structure
++ * it's used at all the levels, but the bottom
++ */
++struct ext3_extent_idx {
++ __u32 e_block; /* index covers logical blocks from 'block' */
++ __u32 e_leaf; /* pointer to the physical block of the next *
++ * level. leaf or next index could bet here */
++};
++
++/*
++ * each block (leaves and indexes), even inode-stored has header
++ */
++struct ext3_extent_header {
++ __u16 e_num; /* number of valid entries */
++ __u16 e_max; /* capacity of store in entries */
++};
++
++/*
++ * array of ext3_ext_path contains path to some extent
++ * creation/lookup routines use it for traversal/splitting/etc
++ * truncate uses it to simulate recursive walking
++ */
++struct ext3_ext_path {
++ __u32 p_block;
++ __u16 p_depth;
++ struct ext3_extent *p_ext;
++ struct ext3_extent_idx *p_idx;
++ struct ext3_extent_header *p_hdr;
++ struct buffer_head *p_bh;
++};
++
++#define EXT_FIRST_EXTENT(__hdr__) \
++ ((struct ext3_extent *) (((char *) (__hdr__)) + \
++ sizeof(struct ext3_extent_header)))
++#define EXT_FIRST_INDEX(__hdr__) \
++ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \
++ sizeof(struct ext3_extent_header)))
++#define EXT_HAS_FREE_INDEX(__path__) \
++ ((__path__)->p_hdr->e_num < (__path__)->p_hdr->e_max)
++#define EXT_LAST_EXTENT(__hdr__) \
++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_num - 1)
++#define EXT_LAST_INDEX(__hdr__) \
++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_num - 1)
++#define EXT_MAX_EXTENT(__hdr__) \
++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_max - 1)
++#define EXT_MAX_INDEX(__hdr__) \
++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_max - 1)
++
++
++#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
++
++/*
++ * could return:
++ * - EROFS
++ * - ENOMEM
++ */
++static int ext3_ext_get_access(handle_t *handle, struct inode *inode,
++ struct ext3_ext_path *path)
++{
++ if (path->p_bh) {
++ /* path points to block */
++ return ext3_journal_get_write_access(handle, path->p_bh);
++ }
++
++ /* path points to leaf/index in inode body */
++ return 0;
++}
++
++/*
++ * could return:
++ * - EROFS
++ * - ENOMEM
++ * - EIO
++ */
++static int ext3_ext_dirty(handle_t *handle, struct inode *inode,
++ struct ext3_ext_path *path)
++{
++ if (path->p_bh) {
++ /* path points to block */
++ return ext3_journal_dirty_metadata(handle, path->p_bh);
++ }
++
++ /* path points to leaf/index in inode body */
++ return ext3_mark_inode_dirty(handle, inode);
++}
++
++static inline int ext3_ext_space_block(struct inode *inode)
++{
++ int size;
++
++ size = (inode->i_sb->s_blocksize - sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++ size = 6; /* FIXME: for debug, remove this line */
++#endif
++ return size;
++}
++
++static inline int ext3_ext_space_inode(struct inode *inode)
++{
++ int size;
++
++ size = (sizeof(EXT3_I(inode)->i_data) -
++ sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++ size = 3; /* FIXME: for debug, remove this line */
++#endif
++ return size;
++}
++
++static inline int ext3_ext_space_inode_idx(struct inode *inode)
++{
++ int size;
++
++ size = (sizeof(EXT3_I(inode)->i_data) -
++ sizeof(struct ext3_extent_header))
++ / sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++ size = 4; /* FIXME: for debug, remove this line */
++#endif
++ return size;
++}
++
++static void ext3_ext_show_path(struct inode *inode, struct ext3_ext_path *path)
++{
++ int k, l = path->p_depth;
++
++ ext_debug(inode, "path:");
++ for (k = 0; k <= l; k++, path++) {
++ if (path->p_idx) {
++ ext_debug(inode, " %d->%d", path->p_idx->e_block,
++ path->p_idx->e_leaf);
++ } else if (path->p_ext) {
++ ext_debug(inode, " %d:%d:%d",
++ path->p_ext->e_block,
++ path->p_ext->e_start,
++ path->p_ext->e_num);
++ } else
++ ext_debug(inode, " []");
++ }
++ ext_debug(inode, "\n");
++}
++
++static void ext3_ext_show_leaf(struct inode *inode, struct ext3_ext_path *path)
++{
++ int depth = EXT3_I(inode)->i_depth;
++ struct ext3_extent_header *eh = path[depth].p_hdr;
++ struct ext3_extent *ex = EXT_FIRST_EXTENT(eh);
++ int i;
++
++ for (i = 0; i < eh->e_num; i++, ex++) {
++ ext_debug(inode, "%d:%d:%d ",
++ ex->e_block, ex->e_start, ex->e_num);
++ }
++ ext_debug(inode, "\n");
++}
++
++static void ext3_ext_drop_refs(struct inode *inode, struct ext3_ext_path *path)
++{
++ int depth = path->p_depth;
++ int i;
++
++ for (i = 0; i <= depth; i++, path++)
++ if (path->p_bh) {
++ brelse(path->p_bh);
++ path->p_bh = NULL;
++ }
++}
++
++static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path)
++{
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ unsigned long bg_start;
++ unsigned long colour;
++ int depth;
++
++ if (path) {
++ depth = path->p_depth;
++ /* try to find previous block */
++ if (path[depth].p_ext)
++ return path[depth].p_ext->e_start +
++ path[depth].p_ext->e_num - 1;
++
++ /* it looks index is empty
++ * try to find starting from index itself */
++ if (path[depth].p_bh)
++ return path[depth].p_bh->b_blocknr;
++ }
++
++ /* OK. use inode's group */
++ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++ colour = (current->pid % 16) *
++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++ return bg_start + colour;
++}
++
++static struct ext3_ext_path *
++ext3_ext_find_extent(struct inode *inode, int block, struct ext3_ext_path *path)
++{
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ struct ext3_extent_header *eh = (void *) ei->i_data;
++ struct ext3_extent_idx *ix;
++ struct buffer_head *bh;
++ struct ext3_extent *ex;
++ int depth, i, k, ppos = 0;
++
++ eh = (struct ext3_extent_header *) ei->i_data;
++
++ /* initialize capacity of leaf in inode for first time */
++ if (eh->e_max == 0)
++ eh->e_max = ext3_ext_space_inode(inode);
++ i = depth = ei->i_depth;
++ EXT_ASSERT(i == 0 || eh->e_num > 0);
++
++ /* account possible depth increase */
++ if (!path) {
++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2),
++ GFP_NOFS);
++ if (!path)
++ return ERR_PTR(-ENOMEM);
++ }
++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++
++ /* walk through the tree */
++ while (i) {
++ ext_debug(inode, "depth %d: num %d, max %d\n",
++ ppos, eh->e_num, eh->e_max);
++ ix = EXT_FIRST_INDEX(eh);
++ if (eh->e_num)
++ path[ppos].p_idx = ix;
++ EXT_ASSERT(eh->e_num <= eh->e_max);
++ for (k = 0; k < eh->e_num; k++, ix++) {
++ ext_debug(inode, "index: %d -> %d\n",
++ ix->e_block, ix->e_leaf);
++ if (block < ix->e_block)
++ break;
++ path[ppos].p_idx = ix;
++ }
++ path[ppos].p_block = path[ppos].p_idx->e_leaf;
++ path[ppos].p_depth = i;
++ path[ppos].p_hdr = eh;
++ path[ppos].p_ext = NULL;
++
++ bh = sb_bread(inode->i_sb, path[ppos].p_block);
++ if (!bh) {
++ ext3_ext_drop_refs(inode, path);
++ kfree(path);
++ return ERR_PTR(-EIO);
++ }
++ eh = (struct ext3_extent_header *) bh->b_data;
++ ppos++;
++ EXT_ASSERT(ppos <= depth);
++ path[ppos].p_bh = bh;
++ i--;
++ }
++
++ path[ppos].p_depth = i;
++ path[ppos].p_hdr = eh;
++ path[ppos].p_ext = NULL;
++
++ /* find extent */
++ ex = EXT_FIRST_EXTENT(eh);
++ if (eh->e_num)
++ path[ppos].p_ext = ex;
++ EXT_ASSERT(eh->e_num <= eh->e_max);
++ for (k = 0; k < eh->e_num; k++, ex++) {
++ if (block < ex->e_block)
++ break;
++ path[ppos].p_ext = ex;
++ }
++
++ ext3_ext_show_path(inode, path);
++
++ return path;
++}
++
++static void ext3_ext_check_boundary(struct inode *inode,
++ struct ext3_ext_path *curp,
++ void *addr, int len)
++{
++ void *end;
++
++ if (!len)
++ return;
++ if (curp->p_bh)
++ end = (void *) curp->p_hdr + inode->i_sb->s_blocksize;
++ else
++ end = (void *) curp->p_hdr + sizeof(EXT3_I(inode)->i_data);
++ if (((unsigned long) addr) + len > (unsigned long) end) {
++ printk("overflow! 0x%p > 0x%p\n", addr + len, end);
++ BUG();
++ }
++ if ((unsigned long) addr < (unsigned long) curp->p_hdr) {
++ printk("underflow! 0x%p < 0x%p\n", addr, curp->p_hdr);
++ BUG();
++ }
++}
++
++/*
++ * insert new index [logical;ptr] into the block at cupr
++ * it check where to insert: before curp or after curp
++ */
++static int ext3_ext_insert_index(handle_t *handle, struct inode *inode,
++ struct ext3_ext_path *curp, int logical,
++ int ptr)
++{
++ struct ext3_extent_idx *ix;
++ int len, err;
++
++ if ((err = ext3_ext_get_access(handle, inode, curp)))
++ return err;
++
++ EXT_ASSERT(logical != curp->p_idx->e_block);
++ len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
++ if (logical > curp->p_idx->e_block) {
++ /* insert after */
++ len = (len - 1) * sizeof(struct ext3_extent_idx);
++ len = len < 0 ? 0 : len;
++ ext_debug(inode, "insert new index %d after: %d. "
++ "move %d from 0x%p to 0x%p\n",
++ logical, ptr, len,
++ (curp->p_idx + 1), (curp->p_idx + 2));
++
++ ext3_ext_check_boundary(inode, curp, curp->p_idx + 2, len);
++ memmove(curp->p_idx + 2, curp->p_idx + 1, len);
++ ix = curp->p_idx + 1;
++ } else {
++ /* insert before */
++ len = len * sizeof(struct ext3_extent_idx);
++ len = len < 0 ? 0 : len;
++ ext_debug(inode, "insert new index %d before: %d. "
++ "move %d from 0x%p to 0x%p\n",
++ logical, ptr, len,
++ curp->p_idx, (curp->p_idx + 1));
++
++ ext3_ext_check_boundary(inode, curp, curp->p_idx + 1, len);
++ memmove(curp->p_idx + 1, curp->p_idx, len);
++ ix = curp->p_idx;
++ }
++
++ ix->e_block = logical;
++ ix->e_leaf = ptr;
++ curp->p_hdr->e_num++;
++
++ err = ext3_ext_dirty(handle, inode, curp);
++ ext3_std_error(inode->i_sb, err);
++
++ return err;
++}
++
++/*
++ * routine inserts new subtree into the path, using free index entry
++ * at depth 'at:
++ * - allocates all needed blocks (new leaf and all intermediate index blocks)
++ * - makes decision where to split
++ * - moves remaining extens and index entries (right to the split point)
++ * into the newly allocated blocks
++ * - initialize subtree
++ */
++static int ext3_ext_split(handle_t *handle, struct inode *inode,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext, int at)
++{
++ struct buffer_head *bh = NULL;
++ int depth = EXT3_I(inode)->i_depth;
++ struct ext3_extent_header *neh;
++ struct ext3_extent_idx *fidx;
++ struct ext3_extent *ex;
++ int i = at, k, m, a;
++ long newblock, oldblock, border;
++ int *ablocks = NULL; /* array of allocated blocks */
++ int err = 0;
++
++ /* make decision: where to split? */
++ /* FIXME: now desicion is simplest: at current extent */
++
++ /* if current leaf will be splitted, then we should use
++ * border from split point */
++ if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
++ border = path[depth].p_ext[1].e_block;
++ ext_debug(inode, "leaf will be splitted."
++ " next leaf starts at %d\n",
++ (int)border);
++ } else {
++ border = newext->e_block;
++ ext_debug(inode, "leaf will be added."
++ " next leaf starts at %d\n",
++ (int)border);
++ }
++
++ /*
++ * if error occurs, then we break processing
++ * and turn filesystem read-only. so, index won't
++ * be inserted and tree will be in consistent
++ * state. next mount will repair buffers too
++ */
++
++ /*
++ * get array to track all allocated blocks
++ * we need this to handle errors and free blocks
++ * upon them
++ */
++ ablocks = kmalloc(sizeof(long) * depth, GFP_NOFS);
++ if (!ablocks)
++ return -ENOMEM;
++ memset(ablocks, 0, sizeof(long) * depth);
++
++ /* allocate all needed blocks */
++ ext_debug(inode, "allocate %d blocks for indexes and leaf\n",
++ depth - at);
++ ablocks[0] = newext->e_start++;
++ newext->e_num--;
++ for (a = 1; a < depth - at; a++) {
++ newblock = ext3_new_block(handle, inode, newext->e_start,
++ 0, 0, &err);
++ if (newblock == 0)
++ goto cleanup;
++ ablocks[a] = newblock;
++ }
++
++ /* initialize new leaf */
++ newblock = ablocks[--a];
++ EXT_ASSERT(newblock);
++ bh = sb_getblk(inode->i_sb, newblock);
++ if (!bh) {
++ err = -EIO;
++ goto cleanup;
++ }
++ lock_buffer(bh);
++
++ if ((err = ext3_journal_get_create_access(handle, bh)))
++ goto cleanup;
++
++ neh = (struct ext3_extent_header *) bh->b_data;
++ neh->e_num = 0;
++ neh->e_max = ext3_ext_space_block(inode);
++ ex = EXT_FIRST_EXTENT(neh);
++
++ /* move remain of path[depth] to the new leaf */
++ EXT_ASSERT(path[depth].p_hdr->e_num ==
++ path[depth].p_hdr->e_max);
++ /* start copy from next extent */
++ /* TODO: we could do it by single memmove */
++ m = 0;
++ path[depth].p_ext++;
++ while (path[depth].p_ext <=
++ EXT_MAX_EXTENT(path[depth].p_hdr)) {
++ ext_debug(inode, "move %d:%d:%d in new leaf\n",
++ path[depth].p_ext->e_block,
++ path[depth].p_ext->e_start,
++ path[depth].p_ext->e_num);
++ memmove(ex++, path[depth].p_ext++,
++ sizeof(struct ext3_extent));
++ neh->e_num++;
++ m++;
++ }
++ mark_buffer_uptodate(bh, 1);
++ unlock_buffer(bh);
++
++ if ((err = ext3_journal_dirty_metadata(handle, bh)))
++ goto cleanup;
++ brelse(bh);
++ bh = NULL;
++
++ /* correct old leaf */
++ if (m) {
++ if ((err = ext3_ext_get_access(handle, inode, path)))
++ goto cleanup;
++ path[depth].p_hdr->e_num -= m;
++ if ((err = ext3_ext_dirty(handle, inode, path)))
++ goto cleanup;
++
++ }
++
++ /* create intermediate indexes */
++ k = depth - at - 1;
++ EXT_ASSERT(k >= 0);
++ if (k)
++ ext_debug(inode,
++ "create %d intermediate indices\n", k);
++ /* insert new index into current index block */
++ /* current depth stored in i var */
++ i = depth - 1;
++ while (k--) {
++ oldblock = newblock;
++ newblock = ablocks[--a];
++ bh = sb_getblk(inode->i_sb, newblock);
++ if (!bh) {
++ err = -EIO;
++ goto cleanup;
++ }
++ lock_buffer(bh);
++
++ if ((err = ext3_journal_get_create_access(handle, bh)))
++ goto cleanup;
++
++ neh = (struct ext3_extent_header *) bh->b_data;
++ neh->e_num = 1;
++ neh->e_max = ext3_ext_space_block(inode);
++ fidx = EXT_FIRST_INDEX(neh);
++ fidx->e_block = border;
++ fidx->e_leaf = oldblock;
++
++ ext_debug(inode,
++ "int.index at %d (block %u): %d -> %d\n",
++ i, (unsigned) newblock,
++ (int) border,
++ (int) oldblock);
++ /* copy indexes */
++ m = 0;
++ path[i].p_idx++;
++ EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) ==
++ EXT_LAST_INDEX(path[i].p_hdr));
++ ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
++ EXT_MAX_INDEX(path[i].p_hdr));
++ while (path[i].p_idx <=
++ EXT_MAX_INDEX(path[i].p_hdr)) {
++ ext_debug(inode, "%d: move %d:%d in new index\n",
++ i, path[i].p_idx->e_block,
++ path[i].p_idx->e_leaf);
++ memmove(++fidx, path[i].p_idx++,
++ sizeof(struct ext3_extent_idx));
++ neh->e_num++;
++ m++;
++ }
++
++ mark_buffer_uptodate(bh, 1);
++ unlock_buffer(bh);
++
++ if ((err = ext3_journal_dirty_metadata(handle, bh)))
++ goto cleanup;
++ brelse(bh);
++ bh = NULL;
++
++ /* correct old index */
++ if (m) {
++ err = ext3_ext_get_access(handle,inode,path+i);
++ if (err)
++ goto cleanup;
++ path[i].p_hdr->e_num -= m;
++ err = ext3_ext_dirty(handle, inode, path + i);
++ if (err)
++ goto cleanup;
++ }
++
++ i--;
++ }
++
++ /* insert new index */
++ if (!err)
++ err = ext3_ext_insert_index(handle, inode, path + at,
++ border, newblock);
++
++cleanup:
++ if (bh) {
++ if (buffer_locked(bh))
++ unlock_buffer(bh);
++ brelse(bh);
++ }
++
++ if (err) {
++ /* free all allocated blocks in error case */
++ for (i = 0; i < depth; i++)
++ if (!ablocks[i])
++ continue;
++ ext3_free_blocks(handle, inode, ablocks[i], 1);
++ }
++ kfree(ablocks);
++
++ return err;
++}
++
++/*
++ * routine implements tree growing procedure:
++ * - allocates new block
++ * - moves top-level data (index block or leaf) into the new block
++ * - initialize new top-level, creating index that points to the
++ * just created block
++ */
++static int ext3_ext_grow_indepth(handle_t *handle, struct inode *inode,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext)
++{
++ struct buffer_head *bh;
++ struct ext3_ext_path *curp = path;
++ struct ext3_extent_header *neh;
++ struct ext3_extent_idx *fidx;
++ int len, err = 0;
++ long newblock;
++
++ /*
++ * use already allocated by the called block for new root block
++ */
++ newblock = newext->e_start++;
++ newext->e_num--;
++
++ bh = sb_getblk(inode->i_sb, newblock);
++ if (!bh) {
++ err = -EIO;
++ ext3_std_error(inode->i_sb, err);
++ return err;
++ }
++ lock_buffer(bh);
++
++ if ((err = ext3_journal_get_create_access(handle, bh))) {
++ unlock_buffer(bh);
++ goto out;
++ }
++
++ /* move top-level index/leaf into new block */
++ len = sizeof(struct ext3_extent_header) +
++ sizeof(struct ext3_extent) * curp->p_hdr->e_max;
++ EXT_ASSERT(len >= 0 && len < 4096);
++ memmove(bh->b_data, curp->p_hdr, len);
++
++ /* set size of new block */
++ neh = (struct ext3_extent_header *) bh->b_data;
++ neh->e_max = ext3_ext_space_block(inode);
++ mark_buffer_uptodate(bh, 1);
++ unlock_buffer(bh);
++
++ if ((err = ext3_journal_dirty_metadata(handle, bh)))
++ goto out;
++
++ /* create index in new top-level index: num,max,pointer */
++ if ((err = ext3_ext_get_access(handle, inode, curp)))
++ goto out;
++
++ curp->p_hdr->e_max = ext3_ext_space_inode_idx(inode);
++ curp->p_hdr->e_num = 1;
++ curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
++ curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block;
++ curp->p_idx->e_leaf = newblock;
++
++ neh = (struct ext3_extent_header *) EXT3_I(inode)->i_data;
++ fidx = EXT_FIRST_INDEX(neh);
++ ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %d\n",
++ neh->e_num, neh->e_max, fidx->e_block, fidx->e_leaf);
++
++ EXT3_I(inode)->i_depth++;
++ err = ext3_ext_dirty(handle, inode, curp);
++out:
++ brelse(bh);
++
++ return err;
++}
++
++/*
++ * routine finds empty index and adds new leaf. if no free index found
++ * then it requests in-depth growing
++ */
++static int ext3_ext_create_new_leaf(handle_t *handle, struct inode *inode,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext)
++{
++ int depth = EXT3_I(inode)->i_depth;
++ struct ext3_ext_path *curp;
++ int i = depth, err = 0;
++ long newblock = newext->e_start;
++
++ /* walk up to the tree and look for free index entry */
++ curp = path + depth;
++ while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
++ i--;
++ curp--;
++ }
++
++ /* we use already allocated block for index block
++ * so, subsequent data blocks should be contigoues */
++ if (EXT_HAS_FREE_INDEX(curp)) {
++ /* if we found index with free entry, then use that
++ * entry: create all needed subtree and add new leaf */
++ err = ext3_ext_split(handle, inode, path, newext, i);
++ } else {
++ /* tree is full, time to grow in depth */
++ err = ext3_ext_grow_indepth(handle, inode, path, newext);
++ }
++
++ if (!err) {
++ /* refill path */
++ ext3_ext_drop_refs(inode, path);
++ path = ext3_ext_find_extent(inode, newext->e_block, path);
++ if (IS_ERR(path))
++ err = PTR_ERR(path);
++
++ /*
++ * probably we've used some blocks from extent
++ * let's allocate new block for it
++ */
++ if (newext->e_num == 0 && !err) {
++ newext->e_start =
++ ext3_new_block(handle, inode, newblock,
++ 0, 0, &err);
++ newext->e_num = 1;
++ }
++ }
++
++ return err;
++}
++
++/*
++ * returns next allocated block or 0xffffffff
++ * NOTE: it consider block number from index entry as
++ * allocated block. thus, index entries have to be consistent
++ * with leafs
++ */
++static inline unsigned ext3_ext_next_allocated_block(struct inode *inode,
++ struct ext3_ext_path *path)
++{
++ int depth;
++
++ EXT_ASSERT(path != NULL);
++ depth = path->p_depth;
++
++ if (depth == 0 && path->p_ext == NULL)
++ return 0xffffffff;
++
++ /* FIXME: what if index isn't full ?! */
++ while (depth >= 0) {
++ if (depth == path->p_depth) {
++ /* leaf */
++ if (path[depth].p_ext !=
++ EXT_LAST_EXTENT(path[depth].p_hdr))
++ return path[depth].p_ext[1].e_block;
++ } else {
++ /* index */
++ if (path[depth].p_idx !=
++ EXT_LAST_INDEX(path[depth].p_hdr))
++ return path[depth].p_idx[1].e_block;
++ }
++ depth--;
++ }
++
++ return 0xffffffff;
++}
++
++/*
++ * returns first allocated block from next leaf or 0xffffffff
++ */
++static unsigned ext3_ext_next_leaf_block(struct inode *inode,
++ struct ext3_ext_path *path)
++{
++ int depth;
++
++ EXT_ASSERT(path != NULL);
++ depth = path->p_depth;
++
++ /* zero-tree has no leaf blocks at all */
++ if (depth == 0)
++ return 0xffffffff;
++
++ /* go to index block */
++ depth--;
++
++ while (depth >= 0) {
++ if (path[depth].p_idx !=
++ EXT_LAST_INDEX(path[depth].p_hdr))
++ return path[depth].p_idx[1].e_block;
++ depth--;
++ }
++
++ return 0xffffffff;
++}
++
++/*
++ * if leaf gets modified and modified extent is first in the leaf
++ * then we have to correct all indexes above
++ * TODO: do we need to correct tree in all cases?
++ */
++int ext3_ext_correct_indexes(handle_t *handle, struct inode *inode,
++ struct ext3_ext_path *path)
++{
++ int depth = EXT3_I(inode)->i_depth;
++ struct ext3_extent_header *eh;
++ struct ext3_extent *ex;
++ long border;
++ int k, err = 0;
++
++ eh = path[depth].p_hdr;
++ ex = path[depth].p_ext;
++
++ EXT_ASSERT(ex);
++ EXT_ASSERT(eh);
++
++ if (depth == 0) {
++ /* there is no tree at all */
++ return 0;
++ }
++
++ if (ex != EXT_FIRST_EXTENT(eh)) {
++ /* we correct tree if first leaf got modified only */
++ return 0;
++ }
++
++ k = depth - 1;
++ border = path[depth].p_ext->e_block;
++ if ((err = ext3_ext_get_access(handle, inode, path + k)))
++ return err;
++ path[k].p_idx->e_block = border;
++ if ((err = ext3_ext_dirty(handle, inode, path + k)))
++ return err;
++
++ while (k--) {
++ /* change all left-side indexes */
++ if (path[k].p_idx != EXT_FIRST_INDEX(path[k].p_hdr)
++ && k != 0)
++ break;
++ if ((err = ext3_ext_get_access(handle, inode, path + k)))
++ break;
++ path[k].p_idx->e_block = border;
++ if ((err = ext3_ext_dirty(handle, inode, path + k)))
++ break;
++ }
++
++ return err;
++}
++
++/*
++ * this routine tries to merge requsted extent into the existing
++ * extent or inserts requested extent as new one into the tree,
++ * creating new leaf in no-space case
++ */
++int ext3_ext_insert_extent(handle_t *handle, struct inode *inode,
++ struct ext3_ext_path *path,
++ struct ext3_extent *newext)
++{
++ int depth, len;
++ struct ext3_extent_header * eh;
++ struct ext3_extent *ex;
++ struct ext3_extent *nearex; /* nearest extent */
++ struct ext3_ext_path *npath = NULL;
++ int err;
++
++ depth = EXT3_I(inode)->i_depth;
++ if ((ex = path[depth].p_ext)) {
++ /* try to insert block into found extent and return */
++ if (ex->e_block + ex->e_num == newext->e_block &&
++ ex->e_start + ex->e_num == newext->e_start) {
++#ifdef AGRESSIVE_TEST
++ if (ex->e_num >= 2)
++ goto repeat;
++#endif
++ if ((err = ext3_ext_get_access(handle, inode,
++ path + depth)))
++ return err;
++ ext_debug(inode, "append %d block to %d:%d (from %d)\n",
++ newext->e_num, ex->e_block, ex->e_num,
++ ex->e_start);
++ ex->e_num += newext->e_num;
++ err = ext3_ext_dirty(handle, inode, path + depth);
++ return err;
++ }
++ }
++
++repeat:
++ depth = EXT3_I(inode)->i_depth;
++ eh = path[depth].p_hdr;
++ if (eh->e_num == eh->e_max) {
++ /* probably next leaf has space for us? */
++ int next = ext3_ext_next_leaf_block(inode, path);
++ if (next != 0xffffffff) {
++ ext_debug(inode, "next leaf block - %d\n", next);
++ EXT_ASSERT(!npath);
++ npath = ext3_ext_find_extent(inode, next, NULL);
++ if (IS_ERR(npath))
++ return PTR_ERR(npath);
++ EXT_ASSERT(npath->p_depth == path->p_depth);
++ eh = npath[depth].p_hdr;
++ if (eh->e_num < eh->e_max) {
++ ext_debug(inode,
++ "next leaf has free ext(%d)\n",
++ eh->e_num);
++ path = npath;
++ goto repeat;
++ }
++ ext_debug(inode, "next leaf hasno free space(%d,%d)\n",
++ eh->e_num, eh->e_max);
++ }
++ /*
++ * there is no free space in found leaf
++ * we're gonna add new leaf in the tree
++ */
++ err = ext3_ext_create_new_leaf(handle, inode, path, newext);
++ if (err)
++ goto cleanup;
++ goto repeat;
++ }
++
++ nearex = path[depth].p_ext;
++
++ if ((err = ext3_ext_get_access(handle, inode, path + depth)))
++ goto cleanup;
++
++ if (!nearex) {
++ /* there is no extent in this leaf, create first one */
++ ext_debug(inode, "first extent in the leaf: %d:%d:%d\n",
++ newext->e_block, newext->e_start,
++ newext->e_num);
++ eh->e_num++;
++ path[depth].p_ext = EXT_FIRST_EXTENT(eh);
++
++ } else if (newext->e_block > nearex->e_block) {
++ EXT_ASSERT(newext->e_block != nearex->e_block);
++ len = EXT_MAX_EXTENT(eh) - nearex;
++ len = (len - 1) * sizeof(struct ext3_extent);
++ len = len < 0 ? 0 : len;
++ ext_debug(inode, "insert %d:%d:%d after: nearest 0x%p, "
++ "move %d from 0x%p to 0x%p\n",
++ newext->e_block, newext->e_start, newext->e_num,
++ nearex, len, nearex + 1, nearex + 2);
++ ext3_ext_check_boundary(inode, path + depth, nearex + 2, len);
++ memmove(nearex + 2, nearex + 1, len);
++ path[depth].p_ext = nearex + 1;
++ eh->e_num++;
++ } else {
++ EXT_ASSERT(newext->e_block != nearex->e_block);
++ len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent);
++ len = len < 0 ? 0 : len;
++ ext_debug(inode, "insert %d:%d:%d before: nearest 0x%p, "
++ "move %d from 0x%p to 0x%p\n",
++ newext->e_block, newext->e_start, newext->e_num,
++ nearex, len, nearex + 1, nearex + 2);
++ memmove(nearex + 1, nearex, len);
++ path[depth].p_ext = nearex;
++ eh->e_num++;
++
++ /* time to correct all indexes above */
++ err = ext3_ext_correct_indexes(handle, inode, path);
++ }
++
++ if (!err) {
++ nearex = path[depth].p_ext;
++ nearex->e_block = newext->e_block;
++ nearex->e_start = newext->e_start;
++ nearex->e_num = newext->e_num;
++ }
++
++ err = ext3_ext_dirty(handle, inode, path + depth);
++
++cleanup:
++ if (npath) {
++ ext3_ext_drop_refs(inode, npath);
++ kfree(npath);
++ }
++
++ return err;
++}
++
++int ext3_ext_get_block(handle_t *handle, struct inode *inode, long iblock,
++ struct buffer_head *bh_result, int create,
++ int extend_disksize)
++{
++ struct ext3_ext_path *path;
++ int depth = EXT3_I(inode)->i_depth;
++ struct ext3_extent newex;
++ struct ext3_extent *ex;
++ int goal, newblock, err = 0;
++
++ ext_debug(inode, "block %d requested for inode %u, bh_result 0x%p\n",
++ (int) iblock, (unsigned) inode->i_ino, bh_result);
++ bh_result->b_state &= ~(1UL << BH_New);
++
++ down(&EXT3_I(inode)->i_ext_sem);
++
++ /* find extent for this block */
++ path = ext3_ext_find_extent(inode, iblock, NULL);
++ if (IS_ERR(path)) {
++ err = PTR_ERR(path);
++ goto out2;
++ }
++
++ if ((ex = path[depth].p_ext)) {
++ /* if found exent covers block, simple return it */
++ if (iblock >= ex->e_block && iblock < ex->e_block + ex->e_num) {
++ newblock = iblock - ex->e_block + ex->e_start;
++ ext_debug(inode, "%d fit into %d:%d -> %d\n",
++ (int) iblock, ex->e_block, ex->e_num,
++ newblock);
++ goto out;
++ }
++ }
++
++ /*
++ * we couldn't try to create block if create flag is zero
++ */
++ if (!create)
++ goto out2;
++
++ /* allocate new block */
++ goal = ext3_ext_find_goal(inode, path);
++ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err);
++ if (!newblock)
++ goto out2;
++ ext_debug(inode, "allocate new block: goal %d, found %d\n",
++ goal, newblock);
++
++ /* try to insert new extent into found leaf and return */
++ newex.e_block = iblock;
++ newex.e_start = newblock;
++ newex.e_num = 1;
++ err = ext3_ext_insert_extent(handle, inode, path, &newex);
++ if (err)
++ goto out2;
++
++ /* previous routine could use block we allocated */
++ newblock = newex.e_start;
++ bh_result->b_state |= (1UL << BH_New);
++
++out:
++ ext3_ext_show_leaf(inode, path);
++ bh_result->b_dev = inode->i_dev;
++ bh_result->b_blocknr = newblock;
++out2:
++ ext3_ext_drop_refs(inode, path);
++ kfree(path);
++ up(&EXT3_I(inode)->i_ext_sem);
++
++ return err;
++}
++
++/*
++ * returns 1 if current index have to be freed (even partial)
++ */
++static int ext3_ext_more_to_truncate(struct inode *inode,
++ struct ext3_ext_path *path)
++{
++ EXT_ASSERT(path->p_idx);
++
++ if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
++ return 0;
++
++ /*
++ * if truncate on deeper level happened it it wasn't partial
++ * so we have to consider current index for truncation
++ */
++ if (path->p_hdr->e_num == path->p_block)
++ return 0;
++
++ /*
++ * put actual number of indexes to know is this number got
++ * changed at the next iteration
++ */
++ path->p_block = path->p_hdr->e_num;
++
++ return 1;
++}
++
++/*
++ * routine removes index from the index block
++ * it's used in truncate case only. thus all requests are for
++ * last index in the block only
++ */
++int ext3_ext_remove_index(handle_t *handle, struct inode *inode,
++ struct ext3_ext_path *path)
++{
++ struct buffer_head *bh;
++ int err;
++
++ /* free index block */
++ path--;
++ EXT_ASSERT(path->p_hdr->e_num);
++ if ((err = ext3_ext_get_access(handle, inode, path)))
++ return err;
++ path->p_hdr->e_num--;
++ if ((err = ext3_ext_dirty(handle, inode, path)))
++ return err;
++ bh = sb_get_hash_table(inode->i_sb, path->p_idx->e_leaf);
++ ext3_forget(handle, 0, inode, bh, path->p_idx->e_leaf);
++ ext3_free_blocks(handle, inode, path->p_idx->e_leaf, 1);
++
++ ext_debug(inode, "index is empty, remove it, free block %d\n",
++ path->p_idx->e_leaf);
++ return err;
++}
++
++/*
++ * returns 1 if current extent needs to be freed (even partial)
++ * instead, returns 0
++ */
++int ext3_ext_more_leaves_to_truncate(struct inode *inode,
++ struct ext3_ext_path *path)
++{
++ unsigned blocksize = inode->i_sb->s_blocksize;
++ struct ext3_extent *ex = path->p_ext;
++ int last_block;
++
++ EXT_ASSERT(ex);
++
++ /* is there leave in the current leaf? */
++ if (ex < EXT_FIRST_EXTENT(path->p_hdr))
++ return 0;
++
++ last_block = (inode->i_size + blocksize-1)
++ >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
++
++ if (last_block >= ex->e_block + ex->e_num)
++ return 0;
++
++ /* seems it extent have to be freed */
++ return 1;
++}
++
++handle_t *ext3_ext_journal_restart(handle_t *handle, int needed)
++{
++ int err;
++
++ if (handle->h_buffer_credits > needed)
++ return handle;
++ if (!ext3_journal_extend(handle, needed))
++ return handle;
++ err = ext3_journal_restart(handle, needed);
++
++ return handle;
++}
++
++/*
++ * this routine calculate max number of blocks to be modified
++ * while freeing extent and is intended to be used in truncate path
++ */
++static int ext3_ext_calc_credits(struct inode *inode,
++ struct ext3_ext_path *path,
++ int num)
++{
++ int depth = EXT3_I(inode)->i_depth;
++ int needed;
++
++ /*
++ * extent couldn't cross group, so we will modify
++ * single bitmap block and single group descriptor
++ */
++ needed = 2;
++
++ /*
++ * if this is last extent in a leaf, then we have to
++ * free leaf block and remove pointer from index above.
++ * that pointer could be last in index block, so we'll
++ * have to remove it too. this way we could modify/free
++ * the whole path + root index (inode stored) will be
++ * modified
++ */
++ if (!path || (num == path->p_ext->e_num &&
++ path->p_ext == EXT_FIRST_EXTENT(path->p_hdr)))
++ needed += (depth * EXT3_ALLOC_NEEDED) + 1;
++
++ return needed;
++}
++
++/*
++ * core of the truncate procedure:
++ * - calculated what part of each extent in the requested leaf
++ * need to be freed
++ * - frees and forgets these blocks
++ *
++ * TODO: we could optimize and free several extents during
++ * single journal_restart()-journal_restart() cycle
++ */
++static int ext3_ext_truncate_leaf(handle_t *handle,
++ struct inode *inode,
++ struct ext3_ext_path *path,
++ int depth)
++{
++ unsigned blocksize = inode->i_sb->s_blocksize;
++ int last_block;
++ int i, err = 0, sf, num;
++
++ ext_debug(inode, "level %d - leaf\n", depth);
++ if (!path->p_hdr)
++ path->p_hdr =
++ (struct ext3_extent_header *) path->p_bh->b_data;
++
++ EXT_ASSERT(path->p_hdr->e_num <= path->p_hdr->e_max);
++
++ last_block = (inode->i_size + blocksize-1)
++ >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
++ path->p_ext = EXT_LAST_EXTENT(path->p_hdr);
++ while (ext3_ext_more_leaves_to_truncate(inode, path)) {
++
++ /* what part of extent have to be freed? */
++ sf = last_block > path->p_ext->e_block ?
++ last_block : path->p_ext->e_block;
++
++ /* number of blocks from extent to be freed */
++ num = path->p_ext->e_block + path->p_ext->e_num - sf;
++
++ /* calc physical first physical block to be freed */
++ sf = path->p_ext->e_start + (sf - path->p_ext->e_block);
++
++ i = ext3_ext_calc_credits(inode, path, num);
++ handle = ext3_ext_journal_restart(handle, i);
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++
++ ext_debug(inode, "free extent %d:%d:%d -> free %d:%d\n",
++ path->p_ext->e_block, path->p_ext->e_start,
++ path->p_ext->e_num, sf, num);
++ for (i = 0; i < num; i++) {
++ struct buffer_head *bh =
++ sb_get_hash_table(inode->i_sb, sf + i);
++ ext3_forget(handle, 0, inode, bh, sf + i);
++ }
++ ext3_free_blocks(handle, inode, sf, num);
++
++ /* collect extents usage stats */
++ spin_lock(&EXT3_SB(inode->i_sb)->s_ext_lock);
++ EXT3_SB(inode->i_sb)->s_ext_extents++;
++ EXT3_SB(inode->i_sb)->s_ext_blocks += num;
++ spin_unlock(&EXT3_SB(inode->i_sb)->s_ext_lock);
++
++ /* reduce extent */
++ if ((err = ext3_ext_get_access(handle, inode, path)))
++ return err;
++ path->p_ext->e_num -= num;
++ if (path->p_ext->e_num == 0)
++ path->p_hdr->e_num--;
++ if ((err = ext3_ext_dirty(handle, inode, path)))
++ return err;
++
++ path->p_ext--;
++ }
++
++ /* if this leaf is free, then we should
++ * remove it from index block above */
++ if (path->p_hdr->e_num == 0 && depth > 0)
++ err = ext3_ext_remove_index(handle, inode, path);
++
++ return err;
++}
++
++static void ext3_ext_collect_stats(struct inode *inode)
++{
++ int depth;
++
++ /* skip inodes with old good bitmap */
++ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
++ return;
++
++ /* collect on full truncate only */
++ if (inode->i_size)
++ return;
++
++ depth = EXT3_I(inode)->i_depth;
++ if (depth < EXT3_SB(inode->i_sb)->s_ext_mindepth)
++ EXT3_SB(inode->i_sb)->s_ext_mindepth = depth;
++ if (depth > EXT3_SB(inode->i_sb)->s_ext_maxdepth)
++ EXT3_SB(inode->i_sb)->s_ext_maxdepth = depth;
++ EXT3_SB(inode->i_sb)->s_ext_sum += depth;
++ EXT3_SB(inode->i_sb)->s_ext_count++;
++
++}
++
++void ext3_ext_truncate(struct inode * inode)
++{
++ struct address_space *mapping = inode->i_mapping;
++ struct ext3_ext_path *path;
++ struct page * page;
++ handle_t *handle;
++ int i, depth, err = 0;
++
++ down(&EXT3_I(inode)->i_ext_sem);
++ ext3_ext_collect_stats(inode);
++
++ /*
++ * We have to lock the EOF page here, because lock_page() nests
++ * outside journal_start().
++ */
++ if ((inode->i_size & (inode->i_sb->s_blocksize - 1)) == 0) {
++ /* Block boundary? Nothing to do */
++ page = NULL;
++ } else {
++ page = grab_cache_page(mapping,
++ inode->i_size >> PAGE_CACHE_SHIFT);
++ if (!page) {
++ up(&EXT3_I(inode)->i_ext_sem);
++ return;
++ }
++ }
++
++ /*
++ * probably first extent we're gonna free will be last in block
++ */
++ i = ext3_ext_calc_credits(inode, NULL, 0);
++ handle = ext3_journal_start(inode, i);
++ if (IS_ERR(handle)) {
++ if (page) {
++ clear_highpage(page);
++ flush_dcache_page(page);
++ unlock_page(page);
++ page_cache_release(page);
++ }
++ up(&EXT3_I(inode)->i_ext_sem);
++ return;
++ }
++
++ if (page)
++ ext3_block_truncate_page(handle, mapping, inode->i_size, page,
++ inode->i_sb->s_blocksize);
++
++ /*
++ * TODO: optimization is possible here
++ * probably we need not scaning at all,
++ * because page truncation is enough
++ */
++ if (ext3_orphan_add(handle, inode))
++ goto out_stop;
++
++ /* we have to know where to truncate from in crash case */
++ EXT3_I(inode)->i_disksize = inode->i_size;
++ ext3_mark_inode_dirty(handle, inode);
++
++ /*
++ * we start scanning from right side freeing all the blocks
++ * after i_size and walking into the deep
++ */
++ i = 0;
++ depth = EXT3_I(inode)->i_depth;
++ path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL);
++ if (IS_ERR(path)) {
++ ext3_error(inode->i_sb, "ext3_ext_truncate",
++ "Can't allocate path array");
++ goto out_stop;
++ }
++ memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++
++ path[i].p_hdr = (struct ext3_extent_header *) EXT3_I(inode)->i_data;
++ while (i >= 0 && err == 0) {
++ if (i == depth) {
++ /* this is leaf block */
++ err = ext3_ext_truncate_leaf(handle, inode,
++ path + i, i);
++ /* root level have p_bh == NULL, brelse() eats this */
++ brelse(path[i].p_bh);
++ i--;
++ continue;
++ }
++
++ /* this is index block */
++ if (!path[i].p_hdr) {
++ path[i].p_hdr =
++ (struct ext3_extent_header *) path[i].p_bh->b_data;
++ ext_debug(inode, "initialize header\n");
++ }
++
++ EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max);
++
++ if (!path[i].p_idx) {
++ /* this level hasn't touched yet */
++ path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
++ path[i].p_block = path[i].p_hdr->e_num + 1;
++ ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
++ path[i].p_hdr, path[i].p_hdr->e_num);
++ } else {
++ /* we've already was here, see at next index */
++ path[i].p_idx--;
++ }
++
++ ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
++ i, EXT_FIRST_INDEX(path[i].p_hdr),
++ path[i].p_idx);
++ if (ext3_ext_more_to_truncate(inode, path + i)) {
++ /* go to the next level */
++ ext_debug(inode, "move to level %d (block %d)\n", i+1,
++ path[i].p_idx->e_leaf);
++ memset(path + i + 1, 0, sizeof(*path));
++ path[i+1].p_bh = sb_bread(inode->i_sb,
++ path[i].p_idx->e_leaf);
++ if (!path[i+1].p_bh) {
++ /* should we reset i_size? */
++ err = -EIO;
++ break;
++ }
++ i++;
++ } else {
++ /* we finish processing this index, go up */
++ if (path[i].p_hdr->e_num == 0 && i > 0) {
++ /* index is empty, remove it
++ * handle must be already prepared by the
++ * truncate_leaf()
++ */
++ err = ext3_ext_remove_index(handle, inode,
++ path + i);
++ }
++ /* root level have p_bh == NULL, brelse() eats this */
++ brelse(path[i].p_bh);
++ i--;
++ ext_debug(inode, "return to level %d\n", i);
++ }
++ }
++
++ /* TODO: flexible tree reduction should be here */
++ if (path->p_hdr->e_num == 0) {
++ /*
++ * truncate to zero freed all the tree
++ * so, we need to correct i_depth
++ */
++ EXT3_I(inode)->i_depth = 0;
++ path->p_hdr->e_max = 0;
++ ext3_mark_inode_dirty(handle, inode);
++ }
++
++ kfree(path);
++
++ /* In a multi-transaction truncate, we only make the final
++ * transaction synchronous */
++ if (IS_SYNC(inode))
++ handle->h_sync = 1;
++
++out_stop:
++ /*
++ * If this was a simple ftruncate(), and the file will remain alive
++ * then we need to clear up the orphan record which we created above.
++ * However, if this was a real unlink then we were called by
++ * ext3_delete_inode(), and we allow that function to clean up the
++ * orphan info for us.
++ */
++ if (inode->i_nlink)
++ ext3_orphan_del(handle, inode);
++
++ up(&EXT3_I(inode)->i_ext_sem);
++ ext3_journal_stop(handle, inode);
++}
++
++/*
++ * this routine calculate max number of blocks we could modify
++ * in order to allocate new block for an inode
++ */
++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num)
++{
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ int depth = ei->i_depth + 1;
++ int needed;
++
++ /*
++ * the worste case we're expecting is creation of the
++ * new root (growing in depth) with index splitting
++ * for splitting we have to consider depth + 1 because
++ * previous growing could increase it
++ */
++
++ /*
++ * growing in depth:
++ * block allocation + new root + old root
++ */
++ needed = EXT3_ALLOC_NEEDED + 2;
++
++ /* index split. we may need:
++ * allocate intermediate indexes and new leaf
++ * change two blocks at each level, but root
++ * modify root block (inode)
++ */
++ needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1;
++
++ /* caller want to allocate num blocks */
++ needed *= num;
++
++#ifdef CONFIG_QUOTA
++ /*
++ * FIXME: real calculation should be here
++ * it depends on blockmap format of qouta file
++ */
++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++
++ return needed;
++}
++
++/*
++ * called at mount time
++ */
++void ext3_ext_init(struct super_block *sb)
++{
++ /*
++ * possible initialization would be here
++ */
++
++ if (test_opt(sb, EXTENTS))
++ printk("EXT3-fs: file extents enabled\n");
++ spin_lock_init(&EXT3_SB(sb)->s_ext_lock);
++}
++
++/*
++ * called at umount time
++ */
++void ext3_ext_release(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++ /* show collected stats */
++ if (sbi->s_ext_count && sbi->s_ext_extents)
++ printk("EXT3-fs: min depth - %d, max depth - %d, "
++ "ave. depth - %d, ave. blocks/extent - %d\n",
++ sbi->s_ext_mindepth,
++ sbi->s_ext_maxdepth,
++ sbi->s_ext_sum / sbi->s_ext_count,
++ sbi->s_ext_blocks / sbi->s_ext_extents);
++}
++
+diff -puN fs/ext3/ialloc.c~ext3-extents fs/ext3/ialloc.c
+--- linux-2.4.18-chaos/fs/ext3/ialloc.c~ext3-extents 2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/ialloc.c 2003-08-25 21:12:14.000000000 +0400
+@@ -571,6 +571,10 @@ repeat:
+ ei->i_prealloc_count = 0;
+ #endif
+ ei->i_block_group = i;
++ if (test_opt(sb, EXTENTS))
++ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
++ ei->i_depth = 0;
++ sema_init(&ei->i_ext_sem, 1);
+
+ if (ei->i_flags & EXT3_SYNC_FL)
+ inode->i_flags |= S_SYNC;
+diff -puN fs/ext3/inode.c~ext3-extents fs/ext3/inode.c
+--- linux-2.4.18-chaos/fs/ext3/inode.c~ext3-extents 2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/inode.c 2003-08-25 20:09:59.000000000 +0400
+@@ -842,6 +842,15 @@ changed:
+ goto reread;
+ }
+
++static inline int
++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block,
++ struct buffer_head *bh, int create, int extend_disksize)
++{
++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++ return ext3_ext_get_block(handle, inode, block, bh, create, 1);
++ return ext3_get_block_handle(handle, inode, block, bh, create, 1);
++}
++
+ /*
+ * The BKL is not held on entry here.
+ */
+@@ -855,7 +864,7 @@ static int ext3_get_block(struct inode *
+ handle = ext3_journal_current_handle();
+ J_ASSERT(handle != 0);
+ }
+- ret = ext3_get_block_handle(handle, inode, iblock,
++ ret = ext3_get_block_wrap(handle, inode, iblock,
+ bh_result, create, 1);
+ return ret;
+ }
+@@ -882,7 +891,7 @@ ext3_direct_io_get_block(struct inode *i
+ }
+ }
+ if (ret == 0)
+- ret = ext3_get_block_handle(handle, inode, iblock,
++ ret = ext3_get_block_wrap(handle, inode, iblock,
+ bh_result, create, 0);
+ if (ret == 0)
+ bh_result->b_size = (1 << inode->i_blkbits);
+@@ -904,7 +913,7 @@ struct buffer_head *ext3_getblk(handle_t
+ dummy.b_state = 0;
+ dummy.b_blocknr = -1000;
+ buffer_trace_init(&dummy.b_history);
+- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
++ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1);
+ if (!*errp && buffer_mapped(&dummy)) {
+ struct buffer_head *bh;
+ bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+@@ -1520,7 +1529,7 @@ ext3_block_truncate_page_prepare(struct
+ * This required during truncate. We need to physically zero the tail end
+ * of that block so it doesn't yield old data if the file is later grown.
+ */
+-static int ext3_block_truncate_page(handle_t *handle,
++int ext3_block_truncate_page(handle_t *handle,
+ struct address_space *mapping, loff_t from,
+ struct page *page, unsigned blocksize)
+ {
+@@ -2040,6 +2049,9 @@ void ext3_truncate(struct inode * inode)
+ */
+ ei->i_disksize = inode->i_size;
+
++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++ return ext3_ext_truncate(inode);
++
+ /*
+ * From here we block out all ext3_get_block() callers who want to
+ * modify the block allocation tree.
+@@ -2436,6 +2448,8 @@ void ext3_read_inode(struct inode * inod
+ ei->i_prealloc_count = 0;
+ #endif
+ ei->i_block_group = iloc.block_group;
++ ei->i_depth = raw_inode->osd2.linux2.l_i_depth;
++ sema_init(&ei->i_ext_sem, 1);
+
+ /*
+ * NOTE! The in-memory inode i_data array is in little-endian order
+@@ -2556,6 +2570,7 @@ static int ext3_do_update_inode(handle_t
+ raw_inode->i_fsize = 0;
+ }
+ #endif
++ raw_inode->osd2.linux2.l_i_depth = ei->i_depth;
+ raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
+ if (!S_ISREG(inode->i_mode)) {
+ raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
+@@ -2759,6 +2774,9 @@ int ext3_writepage_trans_blocks(struct i
+ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+ int ret;
+
++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++ return ext3_ext_writepage_trans_blocks(inode, bpp);
++
+ if (ext3_should_journal_data(inode))
+ ret = 3 * (bpp + indirects) + 2;
+ else
+diff -puN fs/ext3/Makefile~ext3-extents fs/ext3/Makefile
+--- linux-2.4.18-chaos/fs/ext3/Makefile~ext3-extents 2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/Makefile 2003-08-25 20:09:59.000000000 +0400
+@@ -12,7 +12,8 @@ O_TARGET := ext3.o
+ export-objs := ext3-exports.o
+
+ obj-y := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+- ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
++ ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o \
++ extents.o
+ obj-m := $(O_TARGET)
+
+ include $(TOPDIR)/Rules.make
+diff -puN fs/ext3/super.c~ext3-extents fs/ext3/super.c
+--- linux-2.4.18-chaos/fs/ext3/super.c~ext3-extents 2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/super.c 2003-08-25 20:09:59.000000000 +0400
+@@ -619,6 +619,7 @@ void ext3_put_super (struct super_block
+ kdev_t j_dev = sbi->s_journal->j_dev;
+ int i;
+
++ ext3_ext_release(sb);
+ ext3_stop_delete_thread(sbi);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+@@ -741,6 +742,12 @@ static int parse_options (char * options
+ else
+ #endif
+
++ if (!strcmp (this_char, "extents"))
++ set_opt (sbi->s_mount_opt, EXTENTS);
++ else
++ if (!strcmp (this_char, "extdebug"))
++ set_opt (sbi->s_mount_opt, EXTDEBUG);
++ else
+ if (!strcmp (this_char, "bsddf"))
+ clear_opt (*mount_options, MINIX_DF);
+ else if (!strcmp (this_char, "nouid32")) {
+@@ -1711,6 +1718,8 @@ static int ext3_create_journal(struct su
+ /* Make sure we flush the recovery flag to disk. */
+ ext3_commit_super(sb, es, 1);
+
++ ext3_ext_init(sb);
++
+ return 0;
+ }
+
+diff -puN include/linux/ext3_fs.h~ext3-extents include/linux/ext3_fs.h
+--- linux-2.4.18-chaos/include/linux/ext3_fs.h~ext3-extents 2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs.h 2003-08-25 21:12:14.000000000 +0400
+@@ -183,6 +183,7 @@ struct ext3_group_desc
+ #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */
+ #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */
+ #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */
++#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */
+
+ #define EXT3_FL_USER_VISIBLE 0x00005FFF /* User visible flags */
+ #define EXT3_FL_USER_MODIFIABLE 0x000000FF /* User modifiable flags */
+@@ -243,7 +244,7 @@ struct ext3_inode {
+ struct {
+ __u8 l_i_frag; /* Fragment number */
+ __u8 l_i_fsize; /* Fragment size */
+- __u16 i_pad1;
++ __u16 l_i_depth;
+ __u16 l_i_uid_high; /* these 2 fields */
+ __u16 l_i_gid_high; /* were reserved2[0] */
+ __u32 l_i_reserved2;
+@@ -324,6 +325,8 @@ struct ext3_inode {
+ #define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
++#define EXT3_MOUNT_EXTENTS 0x40000 /* Extents support */
++#define EXT3_MOUNT_EXTDEBUG 0x80000 /* Extents debug */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+@@ -663,6 +666,12 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++extern int ext3_block_truncate_page(handle_t *handle,
++ struct address_space *mapping, loff_t from,
++ struct page *page, unsigned blocksize);
++extern int ext3_forget(handle_t *handle, int is_metadata,
++ struct inode *inode, struct buffer_head *bh,
++ int blocknr);
+ #ifdef EXT3_DELETE_THREAD
+ extern void ext3_truncate_thread(struct inode *inode);
+ #endif
+@@ -722,6 +731,13 @@ extern struct inode_operations ext3_dir_
+ /* symlink.c */
+ extern struct inode_operations ext3_fast_symlink_inode_operations;
+
++/* extents.c */
++extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
++extern int ext3_ext_get_block(handle_t *, struct inode *, long,
++ struct buffer_head *, int, int);
++extern void ext3_ext_truncate(struct inode *);
++extern void ext3_ext_init(struct super_block *);
++extern void ext3_ext_release(struct super_block *);
+
+ #endif /* __KERNEL__ */
+
+diff -puN include/linux/ext3_fs_i.h~ext3-extents include/linux/ext3_fs_i.h
+--- linux-2.4.18-chaos/include/linux/ext3_fs_i.h~ext3-extents 2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs_i.h 2003-08-25 20:09:59.000000000 +0400
+@@ -73,6 +73,10 @@ struct ext3_inode_info {
+ * by other means, so we have truncate_sem.
+ */
+ struct rw_semaphore truncate_sem;
++
++ /* extents-related data */
++ struct semaphore i_ext_sem;
++ __u16 i_depth;
+ };
+
+ #endif /* _LINUX_EXT3_FS_I */
+diff -puN include/linux/ext3_fs_sb.h~ext3-extents include/linux/ext3_fs_sb.h
+--- linux-2.4.18-chaos/include/linux/ext3_fs_sb.h~ext3-extents 2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs_sb.h 2003-08-25 20:09:59.000000000 +0400
+@@ -84,6 +84,16 @@ struct ext3_sb_info {
+ wait_queue_head_t s_delete_thread_queue;
+ wait_queue_head_t s_delete_waiter_queue;
+ #endif
++
++ /* extents */
++ int s_ext_debug;
++ int s_ext_mindepth;
++ int s_ext_maxdepth;
++ int s_ext_sum;
++ int s_ext_count;
++ spinlock_t s_ext_lock;
++ int s_ext_extents;
++ int s_ext_blocks;
+ };
+
+ #endif /* _LINUX_EXT3_FS_SB */
+
+_
--- /dev/null
+ fs/ext3/ialloc.c | 5 +++--
+ fs/ext3/inode.c | 2 +-
+ fs/ext3/namei.c | 38 ++++++++++++++++++++++++++++++++++----
+ include/asm-alpha/fcntl.h | 1 +
+ include/asm-arm/fcntl.h | 1 +
+ include/asm-cris/fcntl.h | 1 +
+ include/asm-i386/fcntl.h | 1 +
+ include/asm-ia64/fcntl.h | 1 +
+ include/asm-m68k/fcntl.h | 1 +
+ include/asm-mips/fcntl.h | 1 +
+ include/asm-mips64/fcntl.h | 1 +
+ include/asm-parisc/fcntl.h | 1 +
+ include/asm-ppc/fcntl.h | 1 +
+ include/asm-s390/fcntl.h | 1 +
+ include/asm-s390x/fcntl.h | 1 +
+ include/asm-sh/fcntl.h | 1 +
+ include/asm-sparc/fcntl.h | 1 +
+ include/asm-sparc64/fcntl.h | 1 +
+ include/linux/ext3_fs.h | 2 +-
+ 19 files changed, 54 insertions(+), 8 deletions(-)
+
+--- linux-2.4.18/fs/ext3/ialloc.c~ext3-extents-oflag-2.4.18-chaos 2003-09-08 23:12:48.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/ialloc.c 2003-09-08 23:12:56.000000000 +0400
+@@ -331,7 +331,8 @@ int ext3_itable_block_used(struct super_
+ */
+ struct inode * ext3_new_inode (handle_t *handle,
+ const struct inode * dir, int mode,
+- unsigned long goal)
++ unsigned long goal,
++ struct lookup_intent *it)
+ {
+ struct super_block * sb;
+ struct buffer_head * bh;
+@@ -573,7 +574,7 @@ repeat:
+ ei->i_prealloc_count = 0;
+ #endif
+ ei->i_block_group = i;
+- if (test_opt(sb, EXTENTS))
++ if (test_opt(sb, EXTENTS) && it && (it->it_flags & O_EXTENTS))
+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
+ ei->i_depth = 0;
+ sema_init(&ei->i_ext_sem, 1);
+--- linux-2.4.18/fs/ext3/namei.c~ext3-extents-oflag-2.4.18-chaos 2003-09-08 23:12:28.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/namei.c 2003-09-08 23:12:56.000000000 +0400
+@@ -1225,7 +1225,36 @@ static int ext3_create (struct inode * d
+ handle->h_sync = 1;
+
+ inode = ext3_new_inode (handle, dir, mode,
+- (unsigned long)dentry->d_fsdata);
++ (unsigned long)dentry->d_fsdata, NULL);
++ err = PTR_ERR(inode);
++ if (!IS_ERR(inode)) {
++ inode->i_op = &ext3_file_inode_operations;
++ inode->i_fop = &ext3_file_operations;
++ inode->i_mapping->a_ops = &ext3_aops;
++ err = ext3_add_nondir(handle, dentry, inode);
++ ext3_mark_inode_dirty(handle, inode);
++ }
++ ext3_journal_stop(handle, dir);
++ return err;
++}
++
++static int ext3_create_it (struct inode * dir, struct dentry * dentry, int mode,
++ struct lookup_intent *it)
++{
++ handle_t *handle;
++ struct inode * inode;
++ int err;
++
++ handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++ EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++
++ if (IS_SYNC(dir))
++ handle->h_sync = 1;
++
++ inode = ext3_new_inode (handle, dir, mode,
++ (unsigned long)dentry->d_fsdata, it);
+ err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ inode->i_op = &ext3_file_inode_operations;
+@@ -1254,7 +1283,7 @@ static int ext3_mknod (struct inode * di
+ handle->h_sync = 1;
+
+ inode = ext3_new_inode (handle, dir, mode,
+- (unsigned long)dentry->d_fsdata);
++ (unsigned long)dentry->d_fsdata, NULL);
+ err = PTR_ERR(inode);
+ if (!IS_ERR(inode)) {
+ init_special_inode(inode, mode, rdev);
+@@ -1285,7 +1314,7 @@ static int ext3_mkdir(struct inode * dir
+ handle->h_sync = 1;
+
+ inode = ext3_new_inode (handle, dir, S_IFDIR | mode,
+- (unsigned long)dentry->d_fsdata);
++ (unsigned long)dentry->d_fsdata, NULL);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto out_stop;
+@@ -1678,7 +1707,7 @@ static int ext3_symlink (struct inode *
+ handle->h_sync = 1;
+
+ inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO,
+- (unsigned long)dentry->d_fsdata);
++ (unsigned long)dentry->d_fsdata, NULL);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto out_stop;
+@@ -1882,6 +1911,7 @@ end_rename:
+ * directories can handle most operations...
+ */
+ struct inode_operations ext3_dir_inode_operations = {
++ create_it: ext3_create_it, /* BKL held */
+ create: ext3_create, /* BKL held */
+ lookup: ext3_lookup, /* BKL held */
+ link: ext3_link, /* BKL held */
+--- linux-2.4.18/include/asm-alpha/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:07.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-alpha/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -22,6 +22,7 @@
+ #define O_LARGEFILE 0400000 /* will be set by the kernel on every open */
+ #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
+ #define O_DIRECT 02000000 /* direct disk access - should check with OSF/1 */
++#define O_EXTENTS 04000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/asm-arm/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:07.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-arm/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -21,6 +21,7 @@
+ #define O_DIRECT 0200000 /* direct disk access hint - currently ignored */
+ #define O_LARGEFILE 0400000
+ #define O_ATOMICLOOKUP 01000000
++#define O_EXTENTS 02000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/asm-cris/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2001-02-09 03:32:44.000000000 +0300
++++ linux-2.4.18-alexey/include/asm-cris/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -22,6 +22,7 @@
+ #define O_LARGEFILE 0100000
+ #define O_DIRECTORY 0200000 /* must be a directory */
+ #define O_NOFOLLOW 0400000 /* don't follow links */
++#define O_EXTENTS 01000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get f_flags */
+--- linux-2.4.18/include/asm-i386/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:09.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-i386/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -21,6 +21,7 @@
+ #define O_DIRECTORY 0200000 /* must be a directory */
+ #define O_NOFOLLOW 0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
++#define O_EXTENTS 02000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/asm-ia64/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:09.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-ia64/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -29,6 +29,7 @@
+ #define O_DIRECTORY 0200000 /* must be a directory */
+ #define O_NOFOLLOW 0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
++#define O_EXTENTS 02000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/asm-m68k/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2000-11-28 05:00:49.000000000 +0300
++++ linux-2.4.18-alexey/include/asm-m68k/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -20,6 +20,7 @@
+ #define O_NOFOLLOW 0100000 /* don't follow links */
+ #define O_DIRECT 0200000 /* direct disk access hint - currently ignored */
+ #define O_LARGEFILE 0400000
++#define O_EXTENTS 01000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/asm-mips64/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-mips64/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -27,6 +27,7 @@
+ #define O_DIRECTORY 0x10000 /* must be a directory */
+ #define O_NOFOLLOW 0x20000 /* don't follow links */
+ #define O_ATOMICLOOKUP 0x40000
++#define O_EXTENTS 0x80000 /* create file with extents if possible */
+
+ #define O_NDELAY O_NONBLOCK
+
+--- linux-2.4.18/include/asm-mips/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:14.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-mips/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -27,6 +27,7 @@
+ #define O_DIRECTORY 0x10000 /* must be a directory */
+ #define O_NOFOLLOW 0x20000 /* don't follow links */
+ #define O_ATOMICLOOKUP 0x40000
++#define O_EXTENTS 02000000 /* create file with extents if possible */
+
+ #define O_NDELAY O_NONBLOCK
+
+--- linux-2.4.18/include/asm-parisc/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2000-12-05 23:29:39.000000000 +0300
++++ linux-2.4.18-alexey/include/asm-parisc/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -19,6 +19,7 @@
+ #define O_NOCTTY 00400000 /* not fcntl */
+ #define O_DSYNC 01000000 /* HPUX only */
+ #define O_RSYNC 02000000 /* HPUX only */
++#define O_EXTENTS 04000000 /* create file with extents if possible */
+
+ #define FASYNC 00020000 /* fcntl, for BSD compatibility */
+ #define O_DIRECT 00040000 /* direct disk access hint - currently ignored */
+--- linux-2.4.18/include/asm-ppc/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-ppc/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -24,6 +24,7 @@
+ #define O_LARGEFILE 0200000
+ #define O_DIRECT 0400000 /* direct disk access hint */
+ #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
++#define O_EXTENT 02000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/asm-s390/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-s390/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -28,6 +28,7 @@
+ #define O_DIRECTORY 0200000 /* must be a directory */
+ #define O_NOFOLLOW 0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
++#define O_EXTENTS 02000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/asm-s390x/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-s390x/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -28,6 +28,7 @@
+ #define O_DIRECTORY 0200000 /* must be a directory */
+ #define O_NOFOLLOW 0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP 01000000 /* do atomic file lookup */
++#define O_EXTENTS 02000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/asm-sh/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-sh/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -21,6 +21,7 @@
+ #define O_DIRECTORY 0200000 /* must be a directory */
+ #define O_NOFOLLOW 0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP 01000000
++#define O_EXTENTS 02000000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/asm-sparc64/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:16.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-sparc64/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -22,6 +22,7 @@
+ #define O_LARGEFILE 0x40000
+ #define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */
+ #define O_DIRECT 0x100000 /* direct disk access hint */
++#define O_EXTENTS 0x200000 /* create file with extents if possible */
+
+
+ #define F_DUPFD 0 /* dup */
+--- linux-2.4.18/include/asm-sparc/fcntl.h~ext3-extents-oflag-2.4.18-chaos 2003-07-28 17:52:16.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-sparc/fcntl.h 2003-09-08 23:12:56.000000000 +0400
+@@ -22,6 +22,7 @@
+ #define O_LARGEFILE 0x40000
+ #define O_ATOMICLOOKUP 0x80000 /* do atomic file lookup */
+ #define O_DIRECT 0x100000 /* direct disk access hint */
++#define O_EXTENTS 0x200000 /* create file with extents if possible */
+
+ #define F_DUPFD 0 /* dup */
+ #define F_GETFD 1 /* get close_on_exec */
+--- linux-2.4.18/include/linux/ext3_fs.h~ext3-extents-oflag-2.4.18-chaos 2003-09-08 23:12:48.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs.h 2003-09-08 23:12:56.000000000 +0400
+@@ -641,7 +641,7 @@ extern int ext3_sync_file (struct file *
+
+ /* ialloc.c */
+ extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int,
+- unsigned long);
++ unsigned long, struct lookup_intent *);
+ extern void ext3_free_inode (handle_t *, struct inode *);
+ extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
+ extern unsigned long ext3_count_free_inodes (struct super_block *);
+--- linux-2.4.18/fs/ext3/inode.c~ext3-extents-oflag-2.4.18-chaos 2003-09-08 23:12:48.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/inode.c 2003-09-08 23:13:15.000000000 +0400
+@@ -2204,7 +2204,7 @@ void ext3_truncate_thread(struct inode *
+ if (IS_ERR(handle))
+ goto out_truncate;
+
+- new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0);
++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0, 0);
+ if (IS_ERR(new_inode)) {
+ ext3_debug("truncate inode %lu directly (no new inodes)\n",
+ old_inode->i_ino);
+
+_
--- /dev/null
+ fs/ext3/inode.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/ext3/super.c | 3 +++
+ 2 files changed, 55 insertions(+)
+
+--- linux-2.6.0-test3/fs/ext3/inode.c~ext3-map_inode_page-2.6.0 2003-09-02 14:48:43.000000000 +0400
++++ linux-2.6.0-test3-alexey/fs/ext3/inode.c 2003-09-08 17:50:16.000000000 +0400
+@@ -3129,3 +3129,55 @@ int ext3_prep_san_write(struct inode *in
+ ret = ret2;
+ return ret;
+ }
++
++int ext3_map_inode_page(struct inode *inode, struct page *page,
++ unsigned long *blocks, int *created, int create)
++{
++ unsigned int blocksize, blocks_per_page;
++ unsigned long iblock;
++ struct buffer_head dummy;
++ void *handle;
++ int i, rc = 0, failed = 0, needed_blocks;
++
++ blocksize = inode->i_sb->s_blocksize;
++ blocks_per_page = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
++ iblock = page->index >> (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
++
++ for (i = 0; i < blocks_per_page; i++, iblock++) {
++ blocks[i] = ext3_bmap(inode->i_mapping, iblock);
++ if (blocks[i] == 0) {
++ failed++;
++ created[i] = -1;
++ } else {
++ created[i] = 0;
++ }
++ }
++
++ if (failed == 0 || create == 0)
++ return 0;
++
++ needed_blocks = ext3_writepage_trans_blocks(inode) * failed;
++ handle = ext3_journal_start(inode, needed_blocks);
++ if (IS_ERR(handle))
++ return PTR_ERR(handle);
++
++ iblock = page->index >> (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
++ for (i = 0; i < blocks_per_page; i++, iblock++) {
++ if (blocks[i] != 0)
++ continue;
++
++ rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1, 1);
++ if (rc) {
++ printk(KERN_INFO "ext3_map_inode_page: error reading "
++ "block %ld\n", iblock);
++ goto out;
++ }
++ blocks[i] = dummy.b_blocknr;
++ created[i] = 1;
++ }
++
++ out:
++ ext3_journal_stop(handle);
++ return rc;
++}
++
+--- linux-2.6.0-test3/fs/ext3/super.c~ext3-map_inode_page-2.6.0 2003-09-02 14:48:43.000000000 +0400
++++ linux-2.6.0-test3-alexey/fs/ext3/super.c 2003-09-08 17:48:33.000000000 +0400
+@@ -2094,6 +2094,9 @@ static void __exit exit_ext3_fs(void)
+ int ext3_prep_san_write(struct inode *inode, long *blocks,
+ int nblocks, loff_t newsize);
+ EXPORT_SYMBOL(ext3_prep_san_write);
++int ext3_map_inode_page(struct inode *inode, struct page *page,
++ unsigned long *blocks, int *created, int create)
++EXPORT_SYMBOL(ext3_map_inode_page);
+
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+
+_
--- /dev/null
+ fs/ext3/super.c | 1 -
+ 1 files changed, 1 deletion(-)
+
+--- linux-2.4.18-chaos/fs/ext3/super.c~ext3-no-write-super-chaos 2003-08-24 21:34:53.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/super.c 2003-08-24 21:40:47.000000000 +0400
+@@ -1818,7 +1818,6 @@ void ext3_write_super (struct super_bloc
+ if (down_trylock(&sb->s_lock) == 0)
+ BUG();
+ sb->s_dirt = 0;
+- log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+ }
+
+ static int ext3_sync_fs(struct super_block *sb)
+
+_
--- /dev/null
+
+Index: linux-2.4.20-rh/fs/ext3/inode.c
+===================================================================
+--- linux-2.4.20-rh.orig/fs/ext3/inode.c 2003-09-04 18:01:41.000000000 +0800
++++ linux-2.4.20-rh/fs/ext3/inode.c 2003-09-04 18:18:54.000000000 +0800
+@@ -27,6 +27,7 @@
+ #include <linux/ext3_jbd.h>
+ #include <linux/jbd.h>
+ #include <linux/locks.h>
++#include <linux/iobuf.h>
+ #include <linux/smp_lock.h>
+ #include <linux/highuid.h>
+ #include <linux/quotaops.h>
+@@ -743,9 +744,9 @@
+ * The BKL may not be held on entry here. Be sure to take it early.
+ */
+
+-static int ext3_get_block_handle(handle_t *handle, struct inode *inode,
+- long iblock,
+- struct buffer_head *bh_result, int create)
++static int
++ext3_get_block_handle(handle_t *handle, struct inode *inode, long iblock,
++ struct buffer_head *bh_result, int create, int extend_disksize)
+ {
+ int err = -EIO;
+ int offsets[4];
+@@ -825,15 +826,18 @@
+ if (err)
+ goto cleanup;
+
+- new_size = inode->i_size;
+- /*
+- * This is not racy against ext3_truncate's modification of i_disksize
+- * because VM/VFS ensures that the file cannot be extended while
+- * truncate is in progress. It is racy between multiple parallel
+- * instances of get_block, but we have the BKL.
+- */
+- if (new_size > inode->u.ext3_i.i_disksize)
+- inode->u.ext3_i.i_disksize = new_size;
++ if (extend_disksize) {
++ /*
++ * This is not racy against ext3_truncate's modification of
++ * i_disksize because VM/VFS ensures that the file cannot be
++ * extended while truncate is in progress. It is racy between
++ * multiple parallel instances of get_block, but we have BKL.
++ */
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ new_size = inode->i_size;
++ if (new_size > ei->i_disksize)
++ ei->i_disksize = new_size;
++ }
+
+ bh_result->b_state |= (1UL << BH_New);
+ goto got_it;
+@@ -861,7 +865,38 @@
+ handle = ext3_journal_current_handle();
+ J_ASSERT(handle != 0);
+ }
+- ret = ext3_get_block_handle(handle, inode, iblock, bh_result, create);
++ ret = ext3_get_block_handle(handle, inode, iblock,
++ bh_result, create, 1);
++ return ret;
++}
++
++#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
++
++static int
++ext3_direct_io_get_block(struct inode *inode, long iblock,
++ struct buffer_head *bh_result, int create)
++{
++ handle_t *handle = journal_current_handle();
++ int ret = 0;
++
++ lock_kernel();
++ if (handle && handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {
++ /*
++ * Getting low on buffer credits...
++ */
++ if (!ext3_journal_extend(handle, DIO_CREDITS)) {
++ /*
++ * Couldn't extend the transaction. Start a new one
++ */
++ ret = ext3_journal_restart(handle, DIO_CREDITS);
++ }
++ }
++ if (ret == 0)
++ ret = ext3_get_block_handle(handle, inode, iblock,
++ bh_result, create, 0);
++ if (ret == 0)
++ bh_result->b_size = (1 << inode->i_blkbits);
++ unlock_kernel();
+ return ret;
+ }
+
+@@ -879,7 +914,7 @@
+ dummy.b_state = 0;
+ dummy.b_blocknr = -1000;
+ buffer_trace_init(&dummy.b_history);
+- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create);
++ *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
+ if (!*errp && buffer_mapped(&dummy)) {
+ struct buffer_head *bh;
+ bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+@@ -1387,6 +1422,67 @@
+ return journal_try_to_free_buffers(journal, page, wait);
+ }
+
++static int
++ext3_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
++ unsigned long blocknr, int blocksize)
++{
++ struct ext3_inode_info *ei = EXT3_I(inode);
++ handle_t *handle = NULL;
++ int ret;
++ int orphan = 0;
++ loff_t offset = blocknr << inode->i_blkbits; /* ugh */
++ ssize_t count = iobuf->length; /* ditto */
++
++ if (rw == WRITE) {
++ loff_t final_size = offset + count;
++
++ lock_kernel();
++ handle = ext3_journal_start(inode, DIO_CREDITS);
++ unlock_kernel();
++ if (IS_ERR(handle)) {
++ ret = PTR_ERR(handle);
++ goto out;
++ }
++ if (final_size > inode->i_size) {
++ lock_kernel();
++ ret = ext3_orphan_add(handle, inode);
++ unlock_kernel();
++ if (ret)
++ goto out_stop;
++ orphan = 1;
++ ei->i_disksize = inode->i_size;
++ }
++ }
++
++ ret = generic_direct_IO(rw, inode, iobuf, blocknr,
++ blocksize, ext3_direct_io_get_block);
++
++out_stop:
++ if (handle) {
++ int err;
++
++ lock_kernel();
++ if (orphan)
++ ext3_orphan_del(handle, inode);
++ if (orphan && ret > 0) {
++ loff_t end = offset + ret;
++ if (end > inode->i_size) {
++ ei->i_disksize = end;
++ inode->i_size = end;
++ err = ext3_mark_inode_dirty(handle, inode);
++ if (!ret)
++ ret = err;
++ }
++ }
++ err = ext3_journal_stop(handle, inode);
++ if (ret == 0)
++ ret = err;
++ unlock_kernel();
++ }
++out:
++ return ret;
++
++}
+
+ struct address_space_operations ext3_aops = {
+ readpage: ext3_readpage, /* BKL not held. Don't need */
+@@ -1397,6 +1493,7 @@
+ bmap: ext3_bmap, /* BKL held */
+ flushpage: ext3_flushpage, /* BKL not held. Don't need */
+ releasepage: ext3_releasepage, /* BKL not held. Don't need */
++ direct_IO: ext3_direct_IO, /* BKL not held. Don't need */
+ };
+
+ /*
+@@ -2970,7 +3067,7 @@
+ /* alloc blocks one by one */
+ for (i = 0; i < nblocks; i++) {
+ ret = ext3_get_block_handle(handle, inode, blocks[i],
+- &bh_tmp, 1);
++ &bh_tmp, 1, 1);
+ if (ret)
+ break;
+
+@@ -3030,7 +3127,7 @@
+ if (blocks[i] != 0)
+ continue;
+
+- rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1);
++ rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1, 1);
+ if (rc) {
+ printk(KERN_INFO "ext3_map_inode_page: error reading "
+ "block %ld\n", iblock);
--- /dev/null
+ fs/ext3/ialloc.c | 3
+ fs/ext3/inode.c | 3
+ fs/ext3/namei.c | 582 +++++++++++++++++++++++++++++++++++++---------
+ fs/ext3/super.c | 14 +
+ include/linux/ext3_fs.h | 1
+ include/linux/ext3_fs_i.h | 6
+ 6 files changed, 500 insertions(+), 109 deletions(-)
+
+--- linux-2.4.18/fs/ext3/namei.c~ext3-pdirops-2.4.18-chaos 2003-09-01 14:58:06.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/namei.c 2003-09-02 11:46:15.000000000 +0400
+@@ -52,6 +52,9 @@ static struct buffer_head *ext3_append(h
+ {
+ struct buffer_head *bh;
+
++ /* with parallel dir operations all appends
++ * have to be serialized -bzzz */
++ down(&EXT3_I(inode)->i_append_sem);
+ *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
+
+ if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
+@@ -59,6 +62,8 @@ static struct buffer_head *ext3_append(h
+ EXT3_I(inode)->i_disksize = inode->i_size;
+ ext3_journal_get_write_access(handle,bh);
+ }
++ up(&EXT3_I(inode)->i_append_sem);
++
+ return bh;
+ }
+
+@@ -135,6 +140,8 @@ struct dx_frame
+ struct buffer_head *bh;
+ struct dx_entry *entries;
+ struct dx_entry *at;
++ unsigned long leaf;
++ unsigned int curidx;
+ };
+
+ struct dx_map_entry
+@@ -143,6 +150,30 @@ struct dx_map_entry
+ u32 offs;
+ };
+
++/* FIXME: this should be reworked using bb_spin_lock
++ * introduced in -mm tree
++ */
++#define BH_DXLock 25
++
++static inline void dx_lock_bh(struct buffer_head volatile *bh)
++{
++#ifdef CONFIG_SMP
++ while (test_and_set_bit(BH_DXLock, &bh->b_state)) {
++ while (test_bit(BH_DXLock, &bh->b_state))
++ cpu_relax();
++ }
++#endif
++}
++
++static inline void dx_unlock_bh(struct buffer_head *bh)
++{
++#ifdef CONFIG_SMP
++ smp_mb__before_clear_bit();
++ clear_bit(BH_DXLock, &bh->b_state);
++#endif
++}
++
++
+ #ifdef CONFIG_EXT3_INDEX
+ static inline unsigned dx_get_block (struct dx_entry *entry);
+ static void dx_set_block (struct dx_entry *entry, unsigned value);
+@@ -154,7 +185,7 @@ static void dx_set_count (struct dx_entr
+ static void dx_set_limit (struct dx_entry *entries, unsigned value);
+ static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
+ static unsigned dx_node_limit (struct inode *dir);
+-static struct dx_frame *dx_probe(struct dentry *dentry,
++static struct dx_frame *dx_probe(struct qstr *name,
+ struct inode *dir,
+ struct dx_hash_info *hinfo,
+ struct dx_frame *frame,
+@@ -166,15 +197,18 @@ static void dx_sort_map(struct dx_map_en
+ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+ struct dx_map_entry *offsets, int count);
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+-static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
++static void dx_insert_block (struct inode *, struct dx_frame *, u32, u32, u32);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+ struct dx_frame *frame,
+ struct dx_frame *frames, int *err,
+ __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+- struct ext3_dir_entry_2 **res_dir, int *err);
++ struct ext3_dir_entry_2 **res_dir, int *err,
++ int rwlock, void **lock);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ struct inode *inode);
++static inline void *ext3_lock_htree(struct inode *, unsigned long, int);
++static inline void ext3_unlock_htree(struct inode *, void *);
+
+ /*
+ * Future: use high four bits of block for coalesce-on-delete flags
+@@ -307,6 +341,94 @@ struct stats dx_show_entries(struct dx_h
+ #endif /* DX_DEBUG */
+
+ /*
++ * dx_find_position
++ *
++ * search position of specified hash in index
++ *
++ */
++
++struct dx_entry * dx_find_position(struct dx_entry * entries, u32 hash)
++{
++ struct dx_entry *p, *q, *m;
++ int count;
++
++ count = dx_get_count(entries);
++ p = entries + 1;
++ q = entries + count - 1;
++ while (p <= q)
++ {
++ m = p + (q - p)/2;
++ if (dx_get_hash(m) > hash)
++ q = m - 1;
++ else
++ p = m + 1;
++ }
++ return p - 1;
++}
++
++/*
++ * returns 1 if path is unchanged
++ */
++int dx_check_path(struct dx_frame *frame, u32 hash)
++{
++ struct dx_entry *p;
++ int ret = 1;
++
++ dx_lock_bh(frame->bh);
++ p = dx_find_position(frame->entries, hash);
++ if (frame->leaf != dx_get_block(p))
++ ret = 0;
++ dx_unlock_bh(frame->bh);
++
++ return ret;
++}
++
++/*
++ * 0 - changed
++ * 1 - hasn't changed
++ */
++static int
++dx_check_full_path(struct dx_frame *frames, struct dx_hash_info *hinfo)
++{
++ struct dx_entry *p;
++ struct dx_frame *frame = frames;
++ u32 leaf;
++
++ /* check first level */
++ dx_lock_bh(frame->bh);
++ p = dx_find_position(frame->entries, hinfo->hash);
++ leaf = dx_get_block(p);
++ dx_unlock_bh(frame->bh);
++
++ if (leaf != frame->leaf)
++ return 0;
++
++ /* is there 2nd level? */
++ frame++;
++ if (frame->bh == NULL)
++ return 1;
++
++ /* check second level */
++ dx_lock_bh(frame->bh);
++
++ /* probably 1st level got changed, check it */
++ if (!dx_check_path(frames, hinfo->hash)) {
++ /* path changed */
++ dx_unlock_bh(frame->bh);
++ return 0;
++ }
++
++ p = dx_find_position(frame->entries, hinfo->hash);
++ leaf = dx_get_block(p);
++ dx_unlock_bh(frame->bh);
++
++ if (leaf != frame->leaf)
++ return 0;
++
++ return 1;
++}
++
++/*
+ * Probe for a directory leaf block to search.
+ *
+ * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
+@@ -316,19 +438,20 @@ struct stats dx_show_entries(struct dx_h
+ * back to userspace.
+ */
+ static struct dx_frame *
+-dx_probe(struct dentry *dentry, struct inode *dir,
++dx_probe(struct qstr *name, struct inode *dir,
+ struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
+ {
+- unsigned count, indirect;
+- struct dx_entry *at, *entries, *p, *q, *m;
++ unsigned indirect;
++ struct dx_entry *at, *entries;
+ struct dx_root *root;
+ struct buffer_head *bh;
+ struct dx_frame *frame = frame_in;
+ u32 hash;
++ unsigned int curidx;
+
+ frame->bh = NULL;
+- if (dentry)
+- dir = dentry->d_parent->d_inode;
++ frame[1].bh = NULL;
++
+ if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+ goto fail;
+ root = (struct dx_root *) bh->b_data;
+@@ -344,8 +467,8 @@ dx_probe(struct dentry *dentry, struct i
+ }
+ hinfo->hash_version = root->info.hash_version;
+ hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed;
+- if (dentry)
+- ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
++ if (name)
++ ext3fs_dirhash(name->name, name->len, hinfo);
+ hash = hinfo->hash;
+
+ if (root->info.unused_flags & 1) {
+@@ -357,7 +480,19 @@ dx_probe(struct dentry *dentry, struct i
+ goto fail;
+ }
+
++repeat:
++ curidx = 0;
++ entries = (struct dx_entry *) (((char *)&root->info) +
++ root->info.info_length);
++ assert(dx_get_limit(entries) == dx_root_limit(dir,
++ root->info.info_length));
++ dxtrace (printk("Look up %x", hash));
++ dx_lock_bh(bh);
++ /* indirect must be initialized under bh lock because
++ * 2nd level creation procedure may change it and dx_probe()
++ * will suggest htree is still single-level -bzzz */
+ if ((indirect = root->info.indirect_levels) > 1) {
++ dx_unlock_bh(bh);
+ ext3_warning(dir->i_sb, __FUNCTION__,
+ "Unimplemented inode hash depth: %#06x",
+ root->info.indirect_levels);
+@@ -365,56 +500,46 @@ dx_probe(struct dentry *dentry, struct i
+ *err = ERR_BAD_DX_DIR;
+ goto fail;
+ }
+-
+- entries = (struct dx_entry *) (((char *)&root->info) +
+- root->info.info_length);
+- assert(dx_get_limit(entries) == dx_root_limit(dir,
+- root->info.info_length));
+- dxtrace (printk("Look up %x", hash));
++
+ while (1)
+ {
+- count = dx_get_count(entries);
+- assert (count && count <= dx_get_limit(entries));
+- p = entries + 1;
+- q = entries + count - 1;
+- while (p <= q)
+- {
+- m = p + (q - p)/2;
+- dxtrace(printk("."));
+- if (dx_get_hash(m) > hash)
+- q = m - 1;
+- else
+- p = m + 1;
+- }
+-
+- if (0) // linear search cross check
+- {
+- unsigned n = count - 1;
+- at = entries;
+- while (n--)
+- {
+- dxtrace(printk(","));
+- if (dx_get_hash(++at) > hash)
+- {
+- at--;
+- break;
+- }
+- }
+- assert (at == p - 1);
+- }
+-
+- at = p - 1;
+- dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
++ at = dx_find_position(entries, hinfo->hash);
++ dxtrace(printk(" %x->%u\n",
++ at == entries? 0: dx_get_hash(at),
++ dx_get_block(at)));
+ frame->bh = bh;
+ frame->entries = entries;
+ frame->at = at;
+- if (!indirect--) return frame;
+- if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
++ frame->curidx = curidx;
++ frame->leaf = dx_get_block(at);
++ if (!indirect--) {
++ dx_unlock_bh(bh);
++ return frame;
++ }
++
++ /* step into next htree level */
++ curidx = dx_get_block(at);
++ dx_unlock_bh(bh);
++ if (!(bh = ext3_bread (NULL,dir, frame->leaf, 0, err)))
+ goto fail2;
++
++ dx_lock_bh(bh);
++ /* splitting may change root index block and move
++ * hash we're looking for into another index block
++ * so, we have to check this situation and repeat
++ * from begining if path got changed -bzzz */
++ if (!dx_check_path(frame, hash)) {
++ dx_unlock_bh(bh);
++ bh = frame->bh;
++ indirect++;
++ goto repeat;
++ }
++
+ at = entries = ((struct dx_node *) bh->b_data)->entries;
+ assert (dx_get_limit(entries) == dx_node_limit (dir));
+ frame++;
+ }
++ dx_unlock_bh(bh);
+ fail2:
+ while (frame >= frame_in) {
+ brelse(frame->bh);
+@@ -428,8 +553,7 @@ static void dx_release (struct dx_frame
+ {
+ if (frames[0].bh == NULL)
+ return;
+-
+- if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
++ if (frames[1].bh != NULL)
+ brelse(frames[1].bh);
+ brelse(frames[0].bh);
+ }
+@@ -471,8 +595,10 @@ static int ext3_htree_next_block(struct
+ * nodes need to be read.
+ */
+ while (1) {
+- if (++(p->at) < p->entries + dx_get_count(p->entries))
++ if (++(p->at) < p->entries + dx_get_count(p->entries)) {
++ p->leaf = dx_get_block(p->at);
+ break;
++ }
+ if (p == frames)
+ return 0;
+ num_frames++;
+@@ -498,13 +624,17 @@ static int ext3_htree_next_block(struct
+ * block so no check is necessary
+ */
+ while (num_frames--) {
+- if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+- 0, err)))
++ u32 idx;
++
++ idx = p->leaf = dx_get_block(p->at);
++ if (!(bh = ext3_bread(NULL, dir, idx, 0, err)))
+ return -1; /* Failure */
+ p++;
+ brelse (p->bh);
+ p->bh = bh;
+ p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
++ p->curidx = idx;
++ p->leaf = dx_get_block(p->at);
+ }
+ return 1;
+ }
+@@ -544,7 +674,7 @@ int ext3_htree_fill_tree(struct file *di
+ dir = dir_file->f_dentry->d_inode;
+ hinfo.hash = start_hash;
+ hinfo.minor_hash = 0;
+- frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
++ frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+ if (!frame)
+ return err;
+
+@@ -626,7 +756,8 @@ static int dx_make_map (struct ext3_dir_
+ count++;
+ }
+ /* XXX: do we need to check rec_len == 0 case? -Chris */
+- de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
++ de = (struct ext3_dir_entry_2 *)((char*)de +
++ le16_to_cpu(de->rec_len));
+ }
+ return count;
+ }
+@@ -659,7 +790,8 @@ static void dx_sort_map (struct dx_map_e
+ } while(more);
+ }
+
+-static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
++static void dx_insert_block(struct inode *dir, struct dx_frame *frame,
++ u32 hash, u32 block, u32 idx)
+ {
+ struct dx_entry *entries = frame->entries;
+ struct dx_entry *old = frame->at, *new = old + 1;
+@@ -671,6 +803,7 @@ static void dx_insert_block(struct dx_fr
+ dx_set_hash(new, hash);
+ dx_set_block(new, block);
+ dx_set_count(entries, count + 1);
++
+ }
+ #endif
+
+@@ -753,7 +886,8 @@ static int inline search_dirblock(struct
+
+
+ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
+- struct ext3_dir_entry_2 ** res_dir)
++ struct ext3_dir_entry_2 ** res_dir,
++ int rwlock, void **lock)
+ {
+ struct super_block * sb;
+ struct buffer_head * bh_use[NAMEI_RA_SIZE];
+@@ -769,6 +903,7 @@ static struct buffer_head * ext3_find_en
+ int namelen;
+ const u8 *name;
+ unsigned blocksize;
++ int do_not_use_dx = 0;
+
+ *res_dir = NULL;
+ sb = dir->i_sb;
+@@ -777,9 +912,10 @@ static struct buffer_head * ext3_find_en
+ name = dentry->d_name.name;
+ if (namelen > EXT3_NAME_LEN)
+ return NULL;
++repeat:
+ #ifdef CONFIG_EXT3_INDEX
+ if (is_dx(dir)) {
+- bh = ext3_dx_find_entry(dentry, res_dir, &err);
++ bh = ext3_dx_find_entry(dentry, res_dir, &err, rwlock, lock);
+ /*
+ * On success, or if the error was file not found,
+ * return. Otherwise, fall back to doing a search the
+@@ -788,8 +924,14 @@ static struct buffer_head * ext3_find_en
+ if (bh || (err != ERR_BAD_DX_DIR))
+ return bh;
+ dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
++ do_not_use_dx = 1;
+ }
+ #endif
++ *lock = ext3_lock_htree(dir, 0, rwlock);
++ if (is_dx(dir) && !do_not_use_dx) {
++ ext3_unlock_htree(dir, *lock);
++ goto repeat;
++ }
+ nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+ start = EXT3_I(dir)->i_dir_start_lookup;
+ if (start >= nblocks)
+@@ -861,12 +1003,17 @@ cleanup_and_exit:
+ /* Clean up the read-ahead blocks */
+ for (; ra_ptr < ra_max; ra_ptr++)
+ brelse (bh_use[ra_ptr]);
++ if (!ret) {
++ ext3_unlock_htree(dir, *lock);
++ *lock = NULL;
++ }
+ return ret;
+ }
+
+ #ifdef CONFIG_EXT3_INDEX
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+- struct ext3_dir_entry_2 **res_dir, int *err)
++ struct ext3_dir_entry_2 **res_dir, int *err,
++ int rwlock, void **lock)
+ {
+ struct super_block * sb;
+ struct dx_hash_info hinfo;
+@@ -881,11 +1028,22 @@ static struct buffer_head * ext3_dx_find
+ struct inode *dir = dentry->d_parent->d_inode;
+
+ sb = dir->i_sb;
+- if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err)))
++repeat:
++ if (!(frame = dx_probe (&dentry->d_name, dir, &hinfo, frames, err)))
+ return NULL;
++
++ *lock = ext3_lock_htree(dir, frame->leaf, rwlock);
++ /* while locking leaf we just found may get splitted
++ * so, we need another leaf. check this */
++ if (!dx_check_full_path(frames, &hinfo)) {
++ ext3_unlock_htree(dir, *lock);
++ dx_release(frames);
++ goto repeat;
++ }
++
+ hash = hinfo.hash;
+ do {
+- block = dx_get_block(frame->at);
++ block = frame->leaf;
+ if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+ goto errout;
+ de = (struct ext3_dir_entry_2 *) bh->b_data;
+@@ -919,6 +1077,8 @@ static struct buffer_head * ext3_dx_find
+ *err = -ENOENT;
+ errout:
+ dxtrace(printk("%s not found\n", name));
++ ext3_unlock_htree(dir, *lock);
++ *lock = NULL;
+ dx_release (frames);
+ return NULL;
+ }
+@@ -931,6 +1091,7 @@ static struct dentry *ext3_lookup(struct
+ struct ext3_dir_entry_2 * de;
+ struct buffer_head * bh;
+ struct dentry *alternate = NULL;
++ void *lock = NULL;
+
+ if (dentry->d_name.len > EXT3_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+@@ -938,10 +1099,11 @@ static struct dentry *ext3_lookup(struct
+ if (ext3_check_for_iopen(dir, dentry))
+ return NULL;
+
+- bh = ext3_find_entry(dentry, &de);
++ bh = ext3_find_entry(dentry, &de, 0, &lock);
+ inode = NULL;
+ if (bh) {
+ unsigned long ino = le32_to_cpu(de->inode);
++ ext3_unlock_htree(dir, lock);
+ brelse (bh);
+ inode = iget(dir->i_sb, ino);
+
+@@ -984,7 +1146,8 @@ dx_move_dirents(char *from, char *to, st
+ unsigned rec_len = 0;
+
+ while (count--) {
+- struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
++ struct ext3_dir_entry_2 *de =
++ (struct ext3_dir_entry_2 *) (from + map->offs);
+ rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ memcpy (to, de, rec_len);
+ ((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
+@@ -997,7 +1160,8 @@ dx_move_dirents(char *from, char *to, st
+
+ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
+ {
+- struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
++ struct ext3_dir_entry_2 *next, *to, *prev;
++ struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) base;
+ unsigned rec_len = 0;
+
+ prev = to = de;
+@@ -1019,7 +1183,8 @@ static struct ext3_dir_entry_2* dx_pack_
+
+ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+ struct buffer_head **bh,struct dx_frame *frame,
+- struct dx_hash_info *hinfo, int *error)
++ struct dx_hash_info *hinfo, void **target,
++ int *error)
+ {
+ unsigned blocksize = dir->i_sb->s_blocksize;
+ unsigned count, continued;
+@@ -1066,23 +1231,30 @@ static struct ext3_dir_entry_2 *do_split
+ hash2 = map[split].hash;
+ continued = hash2 == map[split - 1].hash;
+ dxtrace(printk("Split block %i at %x, %i/%i\n",
+- dx_get_block(frame->at), hash2, split, count-split));
+-
++ frame->leaf, hash2, split, count-split));
++
+ /* Fancy dance to stay within two buffers */
+ de2 = dx_move_dirents(data1, data2, map + split, count - split);
+ de = dx_pack_dirents(data1,blocksize);
+ de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+ de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
+- dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
+- dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
++ dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data1, blocksize, 1));
++ dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data2, blocksize, 1));
+
+ /* Which block gets the new entry? */
++ *target = NULL;
+ if (hinfo->hash >= hash2)
+ {
+ swap(*bh, bh2);
+ de = de2;
+- }
+- dx_insert_block (frame, hash2 + continued, newblock);
++
++ /* entry will be stored into new block
++ * we have to lock it before add_dirent_to_buf */
++ *target = ext3_lock_htree(dir, newblock, 1);
++ }
++ dx_lock_bh(frame->bh);
++ dx_insert_block (dir, frame, hash2 + continued, newblock, frame->curidx);
++ dx_unlock_bh(frame->bh);
+ err = ext3_journal_dirty_metadata (handle, bh2);
+ if (err)
+ goto journal_error;
+@@ -1156,7 +1328,8 @@ static int add_dirent_to_buf(handle_t *h
+ nlen = EXT3_DIR_REC_LEN(de->name_len);
+ rlen = le16_to_cpu(de->rec_len);
+ if (de->inode) {
+- struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
++ struct ext3_dir_entry_2 *de1 =
++ (struct ext3_dir_entry_2 *)((char *)de + nlen);
+ de1->rec_len = cpu_to_le16(rlen - nlen);
+ de->rec_len = cpu_to_le16(nlen);
+ de = de1;
+@@ -1214,7 +1387,8 @@ static int make_indexed_dir(handle_t *ha
+ unsigned blocksize;
+ struct dx_hash_info hinfo;
+ u32 block;
+-
++ void *lock, *new_lock;
++
+ blocksize = dir->i_sb->s_blocksize;
+ dxtrace(printk("Creating index\n"));
+ retval = ext3_journal_get_write_access(handle, bh);
+@@ -1225,7 +1399,6 @@ static int make_indexed_dir(handle_t *ha
+ }
+ root = (struct dx_root *) bh->b_data;
+
+- EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
+ bh2 = ext3_append (handle, dir, &block, &retval);
+ if (!(bh2)) {
+ brelse(bh);
+@@ -1233,6 +1406,8 @@ static int make_indexed_dir(handle_t *ha
+ }
+ data1 = bh2->b_data;
+
++ lock = ext3_lock_htree(dir, block, 1);
++
+ /* The 0th block becomes the root, move the dirents out */
+ de = (struct ext3_dir_entry_2 *) &root->info;
+ len = ((char *) root) + blocksize - (char *) de;
+@@ -1261,13 +1436,25 @@ static int make_indexed_dir(handle_t *ha
+ frame->entries = entries;
+ frame->at = entries;
+ frame->bh = bh;
++ frame->curidx = 0;
++ frame->leaf = 0;
++ frame[1].bh = NULL;
+ bh = bh2;
+- de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
++ de = do_split(handle,dir, &bh, frame, &hinfo, &new_lock, &retval);
+ dx_release (frames);
+ if (!(de))
+- return retval;
++ goto cleanup;
++
++ retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++cleanup:
++ if (new_lock)
++ ext3_unlock_htree(dir, new_lock);
++ /* we mark directory indexed in order to
++ * avoid races while htree being created -bzzz */
++ EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
++ ext3_unlock_htree(dir, lock);
+
+- return add_dirent_to_buf(handle, dentry, inode, de, bh);
++ return retval;
+ }
+ #endif
+
+@@ -1296,11 +1483,13 @@ static int ext3_add_entry (handle_t *han
+ unsigned blocksize;
+ unsigned nlen, rlen;
+ u32 block, blocks;
++ void *lock;
+
+ sb = dir->i_sb;
+ blocksize = sb->s_blocksize;
+ if (!dentry->d_name.len)
+ return -EINVAL;
++repeat:
+ #ifdef CONFIG_EXT3_INDEX
+ if (is_dx(dir)) {
+ retval = ext3_dx_add_entry(handle, dentry, inode);
+@@ -1311,36 +1500,53 @@ static int ext3_add_entry (handle_t *han
+ ext3_mark_inode_dirty(handle, dir);
+ }
+ #endif
++ lock = ext3_lock_htree(dir, 0, 1);
++ if (is_dx(dir)) {
++ /* we got lock for block 0
++ * probably previous holder of the lock
++ * created htree -bzzz */
++ ext3_unlock_htree(dir, lock);
++ goto repeat;
++ }
++
+ blocks = dir->i_size >> sb->s_blocksize_bits;
+ for (block = 0, offset = 0; block < blocks; block++) {
+ bh = ext3_bread(handle, dir, block, 0, &retval);
+- if(!bh)
++ if(!bh) {
++ ext3_unlock_htree(dir, lock);
+ return retval;
++ }
+ retval = add_dirent_to_buf(handle, dentry, inode, 0, bh);
+- if (retval != -ENOSPC)
++ if (retval != -ENOSPC) {
++ ext3_unlock_htree(dir, lock);
+ return retval;
++ }
+
+ #ifdef CONFIG_EXT3_INDEX
+ if (blocks == 1 && !dx_fallback &&
+- EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
+- return make_indexed_dir(handle, dentry, inode, bh);
++ EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) {
++ retval = make_indexed_dir(handle, dentry, inode, bh);
++ ext3_unlock_htree(dir, lock);
++ return retval;
++ }
+ #endif
+ brelse(bh);
+ }
+ bh = ext3_append(handle, dir, &block, &retval);
+- if (!bh)
++ if (!bh) {
++ ext3_unlock_htree(dir, lock);
+ return retval;
++ }
+ de = (struct ext3_dir_entry_2 *) bh->b_data;
+ de->inode = 0;
+ de->rec_len = cpu_to_le16(rlen = blocksize);
+ nlen = 0;
+- return add_dirent_to_buf(handle, dentry, inode, de, bh);
++ retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++ ext3_unlock_htree(dir, lock);
++ return retval;
+ }
+
+ #ifdef CONFIG_EXT3_INDEX
+-/*
+- * Returns 0 for success, or a negative error value
+- */
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ struct inode *inode)
+ {
+@@ -1352,15 +1558,28 @@ static int ext3_dx_add_entry(handle_t *h
+ struct super_block * sb = dir->i_sb;
+ struct ext3_dir_entry_2 *de;
+ int err;
+-
+- frame = dx_probe(dentry, 0, &hinfo, frames, &err);
++ int curidx;
++ void *idx_lock, *leaf_lock, *newleaf_lock;
++
++repeat:
++ frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
+ if (!frame)
+ return err;
+- entries = frame->entries;
+- at = frame->at;
+
+- if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
++ /* we're going to chage leaf, so lock it first */
++ leaf_lock = ext3_lock_htree(dir, frame->leaf, 1);
++
++ /* while locking leaf we just found may get splitted
++ * so we need to check this */
++ if (!dx_check_full_path(frames, &hinfo)) {
++ ext3_unlock_htree(dir, leaf_lock);
++ dx_release(frames);
++ goto repeat;
++ }
++ if (!(bh = ext3_bread(handle,dir, frame->leaf, 0, &err))) {
++ printk("can't ext3_bread(%d) = %d\n", (int) frame->leaf, err);
+ goto cleanup;
++ }
+
+ BUFFER_TRACE(bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle, bh);
+@@ -1373,6 +1592,35 @@ static int ext3_dx_add_entry(handle_t *h
+ goto cleanup;
+ }
+
++ /* our leaf has no enough space. hence, we have to
++ * split it. so lock index for this leaf first */
++ curidx = frame->curidx;
++ idx_lock = ext3_lock_htree(dir, curidx, 1);
++
++ /* now check did path get changed? */
++ dx_release(frames);
++
++ frame = dx_probe(&dentry->d_name, dentry->d_parent->d_inode,
++ &hinfo, frames, &err);
++ if (!frame) {
++ /* FIXME: error handling here */
++ brelse(bh);
++ ext3_unlock_htree(dir, idx_lock);
++ return err;
++ }
++
++ if (frame->curidx != curidx) {
++ /* path has been changed. we have to drop old lock
++ * and repeat */
++ brelse(bh);
++ ext3_unlock_htree(dir, idx_lock);
++ ext3_unlock_htree(dir, leaf_lock);
++ dx_release(frames);
++ goto repeat;
++ }
++ entries = frame->entries;
++ at = frame->at;
++
+ /* Block full, should compress but for now just split */
+ dxtrace(printk("using %u of %u node entries\n",
+ dx_get_count(entries), dx_get_limit(entries)));
+@@ -1384,7 +1632,8 @@ static int ext3_dx_add_entry(handle_t *h
+ struct dx_entry *entries2;
+ struct dx_node *node2;
+ struct buffer_head *bh2;
+-
++ void *nb_lock;
++
+ if (levels && (dx_get_count(frames->entries) ==
+ dx_get_limit(frames->entries))) {
+ ext3_warning(sb, __FUNCTION__,
+@@ -1395,6 +1644,7 @@ static int ext3_dx_add_entry(handle_t *h
+ bh2 = ext3_append (handle, dir, &newblock, &err);
+ if (!(bh2))
+ goto cleanup;
++ nb_lock = ext3_lock_htree(dir, newblock, 1);
+ node2 = (struct dx_node *)(bh2->b_data);
+ entries2 = node2->entries;
+ node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+@@ -1406,27 +1656,73 @@ static int ext3_dx_add_entry(handle_t *h
+ if (levels) {
+ unsigned icount1 = icount/2, icount2 = icount - icount1;
+ unsigned hash2 = dx_get_hash(entries + icount1);
++ void *ri_lock;
++
++ /* we have to protect root htree index against
++ * another dx_add_entry() which would want to
++ * split it too -bzzz */
++ ri_lock = ext3_lock_htree(dir, 0, 1);
++
++ /* as root index block blocked we must repeat
++ * searching for current position of our 2nd index -bzzz */
++ dx_lock_bh(frame->bh);
++ frames->at = dx_find_position(frames->entries, hinfo.hash);
++ dx_unlock_bh(frame->bh);
++
+ dxtrace(printk("Split index %i/%i\n", icount1, icount2));
+-
+- BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
++
++ BUFFER_TRACE(frame->bh, "get_write_access");
+ err = ext3_journal_get_write_access(handle,
+ frames[0].bh);
+ if (err)
+ goto journal_error;
+-
++
++ /* copy index into new one */
+ memcpy ((char *) entries2, (char *) (entries + icount1),
+ icount2 * sizeof(struct dx_entry));
+- dx_set_count (entries, icount1);
+ dx_set_count (entries2, icount2);
+ dx_set_limit (entries2, dx_node_limit(dir));
+
+ /* Which index block gets the new entry? */
+ if (at - entries >= icount1) {
++ /* unlock index we won't use */
++ ext3_unlock_htree(dir, idx_lock);
++ idx_lock = nb_lock;
+ frame->at = at = at - entries - icount1 + entries2;
+- frame->entries = entries = entries2;
++ frame->entries = entries2;
++ frame->curidx = curidx = newblock;
+ swap(frame->bh, bh2);
++ } else {
++ /* we'll use old index,so new one may be freed */
++ ext3_unlock_htree(dir, nb_lock);
+ }
+- dx_insert_block (frames + 0, hash2, newblock);
++
++ /* NOTE: very subtle piece of code
++ * competing dx_probe() may find 2nd level index in root
++ * index, then we insert new index here and set new count
++ * in that 2nd level index. so, dx_probe() may see 2nd
++ * level index w/o hash it looks for. the solution is
++ * to check root index after we locked just founded 2nd
++ * level index -bzzz */
++ dx_lock_bh(frames[0].bh);
++ dx_insert_block (dir, frames + 0, hash2, newblock, 0);
++ dx_unlock_bh(frames[0].bh);
++
++ /* now old and new 2nd level index blocks contain
++ * all pointers, so dx_probe() may find it in the both.
++ * it's OK -bzzz */
++
++ dx_lock_bh(frame->bh);
++ dx_set_count(entries, icount1);
++ dx_unlock_bh(frame->bh);
++
++ /* now old 2nd level index block points to first half
++ * of leafs. it's importand that dx_probe() must
++ * check root index block for changes under
++ * dx_lock_bh(frame->bh) -bzzz */
++
++ ext3_unlock_htree(dir, ri_lock);
++
+ dxtrace(dx_show_index ("node", frames[1].entries));
+ dxtrace(dx_show_index ("node",
+ ((struct dx_node *) bh2->b_data)->entries));
+@@ -1435,38 +1731,61 @@ static int ext3_dx_add_entry(handle_t *h
+ goto journal_error;
+ brelse (bh2);
+ } else {
++ unsigned long leaf = frame->leaf;
++
+ dxtrace(printk("Creating second level index...\n"));
+ memcpy((char *) entries2, (char *) entries,
+ icount * sizeof(struct dx_entry));
+ dx_set_limit(entries2, dx_node_limit(dir));
+
+ /* Set up root */
++ dx_lock_bh(frames[0].bh);
+ dx_set_count(entries, 1);
+ dx_set_block(entries + 0, newblock);
+ ((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
++ dx_unlock_bh(frames[0].bh);
+
+ /* Add new access path frame */
+ frame = frames + 1;
+ frame->at = at = at - entries + entries2;
+ frame->entries = entries = entries2;
+ frame->bh = bh2;
++ frame->curidx = newblock;
++ frame->leaf = leaf;
+ err = ext3_journal_get_write_access(handle,
+ frame->bh);
+ if (err)
+ goto journal_error;
++
++ /* first level index was root. it's already initialized */
++ /* we my unlock it now */
++ ext3_unlock_htree(dir, idx_lock);
++
++ /* current index is just created 2nd level index */
++ curidx = newblock;
++ idx_lock = nb_lock;
+ }
+ ext3_journal_dirty_metadata(handle, frames[0].bh);
+ }
+- de = do_split(handle, dir, &bh, frame, &hinfo, &err);
++ de = do_split(handle, dir, &bh, frame, &hinfo, &newleaf_lock, &err);
+ if (!de)
+ goto cleanup;
++
++ /* index splitted */
++ ext3_unlock_htree(dir, idx_lock);
++
+ err = add_dirent_to_buf(handle, dentry, inode, de, bh);
++
++ if (newleaf_lock)
++ ext3_unlock_htree(dir, newleaf_lock);
++
+ bh = 0;
+ goto cleanup;
+
+ journal_error:
+ ext3_std_error(dir->i_sb, err);
+ cleanup:
++ ext3_unlock_htree(dir, leaf_lock);
+ if (bh)
+ brelse(bh);
+ dx_release(frames);
+@@ -1899,6 +2218,7 @@ static int ext3_rmdir (struct inode * di
+ struct buffer_head * bh;
+ struct ext3_dir_entry_2 * de;
+ handle_t *handle;
++ void *lock;
+
+ handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+ if (IS_ERR(handle)) {
+@@ -1906,7 +2226,7 @@ static int ext3_rmdir (struct inode * di
+ }
+
+ retval = -ENOENT;
+- bh = ext3_find_entry (dentry, &de);
++ bh = ext3_find_entry (dentry, &de, 1, &lock);
+ if (!bh)
+ goto end_rmdir;
+
+@@ -1917,14 +2237,19 @@ static int ext3_rmdir (struct inode * di
+ DQUOT_INIT(inode);
+
+ retval = -EIO;
+- if (le32_to_cpu(de->inode) != inode->i_ino)
++ if (le32_to_cpu(de->inode) != inode->i_ino) {
++ ext3_unlock_htree(dir, lock);
+ goto end_rmdir;
++ }
+
+ retval = -ENOTEMPTY;
+- if (!empty_dir (inode))
++ if (!empty_dir (inode)) {
++ ext3_unlock_htree(dir, lock);
+ goto end_rmdir;
++ }
+
+ retval = ext3_delete_entry(handle, dir, de, bh);
++ ext3_unlock_htree(dir, lock);
+ if (retval)
+ goto end_rmdir;
+ if (inode->i_nlink != 2)
+@@ -1957,6 +2282,7 @@ static int ext3_unlink(struct inode * di
+ struct buffer_head * bh;
+ struct ext3_dir_entry_2 * de;
+ handle_t *handle;
++ void *lock;
+
+ handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+ if (IS_ERR(handle)) {
+@@ -1967,7 +2293,7 @@ static int ext3_unlink(struct inode * di
+ handle->h_sync = 1;
+
+ retval = -ENOENT;
+- bh = ext3_find_entry (dentry, &de);
++ bh = ext3_find_entry (dentry, &de, 1, &lock);
+ if (!bh)
+ goto end_unlink;
+
+@@ -1975,8 +2301,10 @@ static int ext3_unlink(struct inode * di
+ DQUOT_INIT(inode);
+
+ retval = -EIO;
+- if (le32_to_cpu(de->inode) != inode->i_ino)
++ if (le32_to_cpu(de->inode) != inode->i_ino) {
++ ext3_unlock_htree(dir, lock);
+ goto end_unlink;
++ }
+
+ if (!inode->i_nlink) {
+ ext3_warning (inode->i_sb, "ext3_unlink",
+@@ -1985,6 +2313,7 @@ static int ext3_unlink(struct inode * di
+ inode->i_nlink = 1;
+ }
+ retval = ext3_delete_entry(handle, dir, de, bh);
++ ext3_unlock_htree(dir, lock);
+ if (retval)
+ goto end_unlink;
+ dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+@@ -2106,6 +2435,7 @@ static int ext3_rename (struct inode * o
+ struct buffer_head * old_bh, * new_bh, * dir_bh;
+ struct ext3_dir_entry_2 * old_de, * new_de;
+ int retval;
++ void *lock1 = NULL, *lock2 = NULL, *lock3 = NULL;
+
+ old_bh = new_bh = dir_bh = NULL;
+
+@@ -2118,7 +2448,10 @@ static int ext3_rename (struct inode * o
+ if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
+ handle->h_sync = 1;
+
+- old_bh = ext3_find_entry (old_dentry, &old_de);
++ if (old_dentry->d_parent == new_dentry->d_parent)
++ down(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
++
++ old_bh = ext3_find_entry (old_dentry, &old_de, 1, &lock1 /* FIXME */);
+ /*
+ * Check for inode number is _not_ due to possible IO errors.
+ * We might rmdir the source, keep it as pwd of some process
+@@ -2131,7 +2464,7 @@ static int ext3_rename (struct inode * o
+ goto end_rename;
+
+ new_inode = new_dentry->d_inode;
+- new_bh = ext3_find_entry (new_dentry, &new_de);
++ new_bh = ext3_find_entry (new_dentry, &new_de, 1, &lock2 /* FIXME */);
+ if (new_bh) {
+ if (!new_inode) {
+ brelse (new_bh);
+@@ -2194,7 +2527,7 @@ static int ext3_rename (struct inode * o
+ struct buffer_head *old_bh2;
+ struct ext3_dir_entry_2 *old_de2;
+
+- old_bh2 = ext3_find_entry(old_dentry, &old_de2);
++ old_bh2 = ext3_find_entry(old_dentry, &old_de2, 1, &lock3 /* FIXME */);
+ if (old_bh2) {
+ retval = ext3_delete_entry(handle, old_dir,
+ old_de2, old_bh2);
+@@ -2237,6 +2570,14 @@ static int ext3_rename (struct inode * o
+ retval = 0;
+
+ end_rename:
++ if (lock1)
++ ext3_unlock_htree(old_dentry->d_parent->d_inode, lock1);
++ if (lock2)
++ ext3_unlock_htree(new_dentry->d_parent->d_inode, lock2);
++ if (lock3)
++ ext3_unlock_htree(old_dentry->d_parent->d_inode, lock3);
++ if (old_dentry->d_parent == new_dentry->d_parent)
++ up(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
+ brelse (dir_bh);
+ brelse (old_bh);
+ brelse (new_bh);
+@@ -2245,6 +2586,29 @@ end_rename:
+ }
+
+ /*
++ * this locking primitives are used to protect parts
++ * of dir's htree. protection unit is block: leaf or index
++ */
++static inline void *ext3_lock_htree(struct inode *dir,
++ unsigned long value, int rwlock)
++{
++ void *lock;
++
++ if (!test_opt(dir->i_sb, PDIROPS))
++ return NULL;
++ lock = dynlock_lock(&EXT3_I(dir)->i_htree_lock, value, 1, GFP_KERNEL);
++ return lock;
++}
++
++static inline void ext3_unlock_htree(struct inode *dir,
++ void *lock)
++{
++ if (!test_opt(dir->i_sb, PDIROPS) || !lock)
++ return;
++ dynlock_unlock(&EXT3_I(dir)->i_htree_lock, lock);
++}
++
++/*
+ * directories can handle most operations...
+ */
+ struct inode_operations ext3_dir_inode_operations = {
+--- linux-2.4.18/fs/ext3/super.c~ext3-pdirops-2.4.18-chaos 2003-09-01 16:33:25.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/super.c 2003-09-02 12:46:29.000000000 +0400
+@@ -786,6 +786,8 @@ static int parse_options (char * options
+ return 0;
+ }
+ }
++ else if (!strcmp (this_char, "pdirops"))
++ set_opt (sbi->s_mount_opt, PDIROPS);
+ else if (!strcmp (this_char, "grpid") ||
+ !strcmp (this_char, "bsdgroups"))
+ set_opt (*mount_options, GRPID);
+@@ -812,6 +814,9 @@ static int parse_options (char * options
+ if (want_numeric(value, "sb", sb_block))
+ return 0;
+ }
++ else if (!strcmp (this_char, "pdirops")) {
++ set_opt (sbi->s_mount_opt, PDIROPS);
++ }
+ #ifdef CONFIG_JBD_DEBUG
+ else if (!strcmp (this_char, "ro-after")) {
+ unsigned long v;
+@@ -969,6 +974,10 @@ static int ext3_setup_super(struct super
+ ext3_check_inodes_bitmap (sb);
+ }
+ #endif
++#ifdef S_PDIROPS
++ if (test_opt (sb, PDIROPS))
++ sb->s_flags |= S_PDIROPS;
++#endif
+ setup_ro_after(sb);
+ return res;
+ }
+@@ -1463,6 +1472,11 @@ struct super_block * ext3_read_super (st
+ test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
+ "writeback");
+
++ if (test_opt(sb, PDIROPS)) {
++ printk (KERN_INFO "EXT3-fs: mounted filesystem with parallel dirops\n");
++ sb->s_flags |= S_PDIROPS;
++ }
++
+ return sb;
+
+ failed_mount3:
+--- linux-2.4.18/include/linux/ext3_fs.h~ext3-pdirops-2.4.18-chaos 2003-09-01 14:58:06.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs.h 2003-09-02 11:46:15.000000000 +0400
+@@ -310,6 +310,7 @@ struct ext3_inode {
+ /*
+ * Mount flags
+ */
++#define EXT3_MOUNT_PDIROPS 0x800000/* Parallel dir operations */
+ #define EXT3_MOUNT_CHECK 0x0001 /* Do mount-time checks */
+ #define EXT3_MOUNT_GRPID 0x0004 /* Create files with directory's group */
+ #define EXT3_MOUNT_DEBUG 0x0008 /* Some debugging messages */
+--- linux-2.4.18/include/linux/ext3_fs_i.h~ext3-pdirops-2.4.18-chaos 2003-08-29 11:57:30.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs_i.h 2003-09-02 11:46:15.000000000 +0400
+@@ -17,6 +17,7 @@
+ #define _LINUX_EXT3_FS_I
+
+ #include <linux/rwsem.h>
++#include <linux/dynlocks.h>
+
+ /*
+ * second extended file system inode data in memory
+@@ -73,6 +74,11 @@ struct ext3_inode_info {
+ * by other means, so we have truncate_sem.
+ */
+ struct rw_semaphore truncate_sem;
++
++ /* following fields for parallel directory operations -bzzz */
++ struct dynlock i_htree_lock;
++ struct semaphore i_append_sem;
++ struct semaphore i_rename_sem;
+ };
+
+ #endif /* _LINUX_EXT3_FS_I */
+--- linux-2.4.18/fs/ext3/inode.c~ext3-pdirops-2.4.18-chaos 2003-09-01 16:33:25.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/inode.c 2003-09-02 11:46:15.000000000 +0400
+@@ -2454,6 +2454,9 @@ void ext3_read_inode(struct inode * inod
+ } else if (S_ISDIR(inode->i_mode)) {
+ inode->i_op = &ext3_dir_inode_operations;
+ inode->i_fop = &ext3_dir_operations;
++ dynlock_init(&EXT3_I(inode)->i_htree_lock);
++ sema_init(&EXT3_I(inode)->i_rename_sem, 1);
++ sema_init(&EXT3_I(inode)->i_append_sem, 1);
+ } else if (S_ISLNK(inode->i_mode)) {
+ if (ext3_inode_is_fast_symlink(inode))
+ inode->i_op = &ext3_fast_symlink_inode_operations;
+--- linux-2.4.18/fs/ext3/ialloc.c~ext3-pdirops-2.4.18-chaos 2003-09-01 14:58:05.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/ialloc.c 2003-09-02 11:46:15.000000000 +0400
+@@ -601,6 +601,9 @@ repeat:
+ return ERR_PTR(-EDQUOT);
+ }
+ ext3_debug ("allocating inode %lu\n", inode->i_ino);
++ dynlock_init(&EXT3_I(inode)->i_htree_lock);
++ sema_init(&EXT3_I(inode)->i_rename_sem, 1);
++ sema_init(&EXT3_I(inode)->i_append_sem, 1);
+ return inode;
+
+ fail:
+
+_
--- /dev/null
+ Documentation/filesystems/ext2.txt | 16 ++
+ fs/ext3/Makefile | 2
+ fs/ext3/inode.c | 4
+ fs/ext3/iopen.c | 259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h | 13 +
+ fs/ext3/namei.c | 12 +
+ fs/ext3/super.c | 11 +
+ include/linux/ext3_fs.h | 2
+ 8 files changed, 318 insertions(+), 1 deletion(-)
+
+--- linux-2.4.18-p4smp/Documentation/filesystems/ext2.txt~iopen-2.4.18 2003-07-09 12:17:30.000000000 -0600
++++ linux-2.4.18-p4smp-braam/Documentation/filesystems/ext2.txt 2003-07-09 17:13:02.000000000 -0600
+@@ -35,6 +35,22 @@ resgid=n The group ID which may use th
+
+ sb=n Use alternate superblock at this location.
+
++iopen Makes an invisible pseudo-directory called
++ __iopen__ available in the root directory
++ of the filesystem. Allows open-by-inode-
++ number. i.e., inode 3145 can be accessed
++ via /mntpt/__iopen__/3145
++
++iopen_nopriv This option makes the iopen directory be
++ world-readable. This may be safer since it
++ allows daemons to run as an unprivileged user,
++ however it significantly changes the security
++ model of a Unix filesystem, since previously
++ all files under a mode 700 directory were not
++ generally avilable even if the
++ permissions on the file itself is
++ world-readable.
++
+ grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
+
+
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~iopen-2.4.18 2003-07-09 17:12:12.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile 2003-07-09 17:13:15.000000000 -0600
+@@ -11,7 +11,7 @@ O_TARGET := ext3.o
+
+ export-objs := super.o inode.o xattr.o ext3-exports.o
+
+-obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
++obj-y := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ ioctl.o namei.o super.o symlink.o xattr.o hash.o ext3-exports.o
+ obj-m := $(O_TARGET)
+
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~iopen-2.4.18 2003-07-09 17:11:19.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c 2003-07-09 17:13:02.000000000 -0600
+@@ -31,6 +31,7 @@
+ #include <linux/highuid.h>
+ #include <linux/quotaops.h>
+ #include <linux/module.h>
++#include "iopen.h"
+
+ /*
+ * SEARCH_FROM_ZERO forces each block allocation to search from the start
+@@ -2165,6 +2166,9 @@ void ext3_read_inode(struct inode * inod
+ struct buffer_head *bh;
+ int block;
+
++ if (ext3_iopen_get_inode(inode))
++ return;
++
+ if(ext3_get_inode_loc(inode, &iloc))
+ goto bad_inode;
+ bh = iloc.bh;
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.c 2003-07-09 17:13:02.000000000 -0600
+@@ -0,0 +1,259 @@
++/*
++ * linux/fs/ext3/iopen.c
++ *
++ * Special support for open by inode number
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ *
++ *
++ * Invariants:
++ * - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
++ * for an inode at one time.
++ * - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
++ * aliases on an inode at the same time.
++ *
++ * If we have any connected dentry aliases for an inode, use one of those
++ * in iopen_lookup(). Otherwise, we instantiate a single NFSD_DISCONNECTED
++ * dentry for this inode, which thereafter will be found by the dcache
++ * when looking up this inode number in __iopen__, so we don't return here
++ * until it is gone.
++ *
++ * If we get an inode via a regular name lookup, then we "rename" the
++ * NFSD_DISCONNECTED dentry to the proper name and parent. This ensures
++ * existing users of the disconnected dentry will continue to use the same
++ * dentry as the connected users, and there will never be both kinds of
++ * dentry aliases at one time.
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/locks.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/smp_lock.h>
++#include "iopen.h"
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#define IOPEN_NAME_LEN 32
++
++/*
++ * This implements looking up an inode by number.
++ */
++static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry)
++{
++ struct inode *inode;
++ unsigned long ino;
++ struct list_head *lp;
++ struct dentry *alternate;
++ char buf[IOPEN_NAME_LEN];
++
++ if (dentry->d_name.len >= IOPEN_NAME_LEN)
++ return ERR_PTR(-ENAMETOOLONG);
++
++ memcpy(buf, dentry->d_name.name, dentry->d_name.len);
++ buf[dentry->d_name.len] = 0;
++
++ if (strcmp(buf, ".") == 0)
++ ino = dir->i_ino;
++ else if (strcmp(buf, "..") == 0)
++ ino = EXT3_ROOT_INO;
++ else
++ ino = simple_strtoul(buf, 0, 0);
++
++ if ((ino != EXT3_ROOT_INO &&
++ //ino != EXT3_ACL_IDX_INO &&
++ //ino != EXT3_ACL_DATA_INO &&
++ ino < EXT3_FIRST_INO(dir->i_sb)) ||
++ ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
++ return ERR_PTR(-ENOENT);
++
++ inode = iget(dir->i_sb, ino);
++ if (!inode)
++ return ERR_PTR(-EACCES);
++ if (is_bad_inode(inode)) {
++ iput(inode);
++ return ERR_PTR(-ENOENT);
++ }
++
++ /* preferrably return a connected dentry */
++ spin_lock(&dcache_lock);
++ list_for_each(lp, &inode->i_dentry) {
++ alternate = list_entry(lp, struct dentry, d_alias);
++ assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED));
++ }
++
++ if (!list_empty(&inode->i_dentry)) {
++ alternate = list_entry(inode->i_dentry.next,
++ struct dentry, d_alias);
++ dget_locked(alternate);
++ alternate->d_vfs_flags |= DCACHE_REFERENCED;
++ iput(inode);
++ spin_unlock(&dcache_lock);
++ return alternate;
++ }
++ dentry->d_flags |= DCACHE_NFSD_DISCONNECTED;
++ spin_unlock(&dcache_lock);
++
++ d_add(dentry, inode);
++ return NULL;
++}
++
++#define do_switch(x,y) do { \
++ __typeof__ (x) __tmp = x; \
++ x = y; y = __tmp; } while (0)
++
++static inline void switch_names(struct dentry *dentry, struct dentry *target)
++{
++ const unsigned char *old_name, *new_name;
++
++ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
++ old_name = target->d_name.name;
++ new_name = dentry->d_name.name;
++ if (old_name == target->d_iname)
++ old_name = dentry->d_iname;
++ if (new_name == dentry->d_iname)
++ new_name = target->d_iname;
++ target->d_name.name = new_name;
++ dentry->d_name.name = old_name;
++}
++
++/* This function is spliced into ext3_lookup and does the move of a
++ * disconnected dentry (if it exists) to a connected dentry.
++ */
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode)
++{
++ struct dentry *tmp, *goal = NULL;
++ struct list_head *lp;
++
++ /* preferrably return a connected dentry */
++ spin_lock(&dcache_lock);
++ /* verify this dentry is really new */
++ assert(!de->d_inode);
++ assert(list_empty(&de->d_subdirs));
++ assert(list_empty(&de->d_alias));
++
++
++ list_for_each(lp, &inode->i_dentry) {
++ tmp = list_entry(lp, struct dentry, d_alias);
++ if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) {
++ assert(tmp->d_alias.next == &inode->i_dentry);
++ assert(tmp->d_alias.prev == &inode->i_dentry);
++ goal = tmp;
++ dget_locked(goal);
++ break;
++ }
++ }
++
++ if (!goal) {
++ spin_unlock(&dcache_lock);
++ return NULL;
++ }
++
++ /* Move the goal to the de hash queue - like d_move() */
++ goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED;
++ list_del(&goal->d_hash);
++ list_add(&goal->d_hash, &de->d_hash);
++
++ list_del(&goal->d_child);
++ list_del(&de->d_child);
++
++ /* Switch the parents and the names.. */
++ switch_names(goal, de);
++ do_switch(goal->d_parent, de->d_parent);
++ do_switch(goal->d_name.len, de->d_name.len);
++ do_switch(goal->d_name.hash, de->d_name.hash);
++
++ /* And add them back to the (new) parent lists */
++ list_add(&goal->d_child, &goal->d_parent->d_subdirs);
++ list_add(&de->d_child, &de->d_parent->d_subdirs);
++ spin_unlock(&dcache_lock);
++
++ return goal;
++}
++
++/*
++ * These are the special structures for the iopen pseudo directory.
++ */
++
++static struct inode_operations iopen_inode_operations = {
++ lookup: iopen_lookup, /* BKL held */
++};
++
++static struct file_operations iopen_file_operations = {
++ read: generic_read_dir,
++};
++
++static int match_dentry(struct dentry *dentry, const char *name)
++{
++ int len;
++
++ len = strlen(name);
++ if (dentry->d_name.len != len)
++ return 0;
++ if (strncmp(dentry->d_name.name, name, len))
++ return 0;
++ return 1;
++}
++
++/*
++ * This function is spliced into ext3_lookup and returns 1 the file
++ * name is __iopen__ and dentry has been filled in appropriately.
++ */
++int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry)
++{
++ struct inode *inode;
++
++ if (dir->i_ino != EXT3_ROOT_INO ||
++ !test_opt(dir->i_sb, IOPEN) ||
++ !match_dentry(dentry, "__iopen__"))
++ return 0;
++
++ inode = iget(dir->i_sb, EXT3_BAD_INO);
++
++ if (!inode)
++ return 0;
++ d_add(dentry, inode);
++ return 1;
++}
++
++/*
++ * This function is spliced into read_inode; it returns 1 if inode
++ * number is the one for /__iopen__, in which case the inode is filled
++ * in appropriately. Otherwise, this fuction returns 0.
++ */
++int ext3_iopen_get_inode(struct inode *inode)
++{
++ if (inode->i_ino != EXT3_BAD_INO)
++ return 0;
++
++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++ if (test_opt(inode->i_sb, IOPEN_NOPRIV))
++ inode->i_mode |= 0777;
++ inode->i_uid = 0;
++ inode->i_gid = 0;
++ inode->i_nlink = 1;
++ inode->i_size = 4096;
++ inode->i_atime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME;
++ inode->i_mtime = CURRENT_TIME;
++ inode->u.ext3_i.i_dtime = 0;
++ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size
++ * (for stat), not the fs block
++ * size */
++ inode->i_blocks = 0;
++ inode->i_version = 1;
++ inode->i_generation = 0;
++
++ inode->i_op = &iopen_inode_operations;
++ inode->i_fop = &iopen_file_operations;
++ inode->i_mapping->a_ops = 0;
++
++ return 1;
++}
+--- /dev/null 2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.h 2003-07-09 17:13:02.000000000 -0600
+@@ -0,0 +1,13 @@
++/*
++ * iopen.h
++ *
++ * Special support for opening files by inode number.
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
++extern int ext3_iopen_get_inode(struct inode *inode);
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~iopen-2.4.18 2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c 2003-07-09 17:13:02.000000000 -0600
+@@ -34,6 +34,7 @@
+ #include <linux/string.h>
+ #include <linux/locks.h>
+ #include <linux/quotaops.h>
++#include "iopen.h"
+
+ /*
+ * define how far ahead to read directories while searching them.
+@@ -703,16 +704,21 @@ cleanup_and_exit:
+ return NULL;
+ }
+ #endif
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode);
+
+ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
+ {
+ struct inode * inode;
+ struct ext3_dir_entry_2 * de;
+ struct buffer_head * bh;
++ struct dentry *alternate = NULL;
+
+ if (dentry->d_name.len > EXT3_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
++ if (ext3_check_for_iopen(dir, dentry))
++ return NULL;
++
+ bh = ext3_find_entry(dentry, &de);
+ inode = NULL;
+ if (bh) {
+@@ -723,6 +729,12 @@ static struct dentry *ext3_lookup(struct
+ if (!inode)
+ return ERR_PTR(-EACCES);
+ }
++
++ if (inode && (alternate = iopen_connect_dentry(dentry, inode))) {
++ iput(inode);
++ return alternate;
++ }
++
+ d_add(dentry, inode);
+ return NULL;
+ }
+--- linux-2.4.18-p4smp/fs/ext3/super.c~iopen-2.4.18 2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c 2003-07-09 17:13:02.000000000 -0600
+@@ -831,6 +831,17 @@ static int parse_options (char * options
+ || !strcmp (this_char, "quota")
+ || !strcmp (this_char, "usrquota"))
+ /* Don't do anything ;-) */ ;
++ else if (!strcmp (this_char, "iopen")) {
++ set_opt (sbi->s_mount_opt, IOPEN);
++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++ } else if (!strcmp (this_char, "noiopen")) {
++ clear_opt (sbi->s_mount_opt, IOPEN);
++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++ }
++ else if (!strcmp (this_char, "iopen_nopriv")) {
++ set_opt (sbi->s_mount_opt, IOPEN);
++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++ }
+ else if (!strcmp (this_char, "journal")) {
+ /* @@@ FIXME */
+ /* Eventually we will want to be able to create
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~iopen-2.4.18 2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h 2003-07-09 17:13:02.000000000 -0600
+@@ -321,6 +321,8 @@ struct ext3_inode {
+ #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
+ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
+ #define EXT3_MOUNT_INDEX 0x4000 /* Enable directory index */
++#define EXT3_MOUNT_IOPEN 0x8000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV 0x10000 /* Make iopen world-readable */
+ #define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+
+_
--- /dev/null
+ Documentation/filesystems/ext2.txt | 16 ++
+ fs/ext3/Makefile | 2
+ fs/ext3/inode.c | 3
+ fs/ext3/iopen.c | 239 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h | 15 ++
+ fs/ext3/namei.c | 13 ++
+ fs/ext3/super.c | 11 +
+ include/linux/ext3_fs.h | 2
+ 8 files changed, 300 insertions(+), 1 deletion(-)
+
+--- linux-2.6.0-test1/Documentation/filesystems/ext2.txt~iopen-2.6.0 2002-11-11 06:28:06.000000000 +0300
++++ linux-2.6.0-test1-alexey/Documentation/filesystems/ext2.txt 2003-08-24 13:02:02.000000000 +0400
+@@ -35,6 +35,22 @@ resgid=n The group ID which may use th
+
+ sb=n Use alternate superblock at this location.
+
++iopen Makes an invisible pseudo-directory called
++ __iopen__ available in the root directory
++ of the filesystem. Allows open-by-inode-
++ number. i.e., inode 3145 can be accessed
++ via /mntpt/__iopen__/3145
++
++iopen_nopriv This option makes the iopen directory be
++ world-readable. This may be safer since it
++ allows daemons to run as an unprivileged user,
++ however it significantly changes the security
++ model of a Unix filesystem, since previously
++ all files under a mode 700 directory were not
++ generally avilable even if the
++ permissions on the file itself is
++ world-readable.
++
+ grpquota,noquota,quota,usrquota Quota options are silently ignored by ext2.
+
+
+--- linux-2.6.0-test1/fs/ext3/inode.c~iopen-2.6.0 2003-08-24 13:00:36.000000000 +0400
++++ linux-2.6.0-test1-alexey/fs/ext3/inode.c 2003-08-24 13:02:02.000000000 +0400
+@@ -37,6 +37,7 @@
+ #include <linux/mpage.h>
+ #include <linux/uio.h>
+ #include "xattr.h"
++#include "iopen.h"
+ #include "acl.h"
+
+ /*
+@@ -2477,6 +2478,8 @@ void ext3_read_inode(struct inode * inod
+ ei->i_acl = EXT3_ACL_NOT_CACHED;
+ ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+ #endif
++ if (ext3_iopen_get_inode(inode))
++ return;
+ if (ext3_get_inode_loc(inode, &iloc, 0))
+ goto bad_inode;
+ bh = iloc.bh;
+--- /dev/null 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.0-test1-alexey/fs/ext3/iopen.c 2003-08-24 13:02:02.000000000 +0400
+@@ -0,0 +1,239 @@
++
++
++/*
++ * linux/fs/ext3/iopen.c
++ *
++ * Special support for open by inode number
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/smp_lock.h>
++#include "iopen.h"
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#define IOPEN_NAME_LEN 32
++
++/*
++ * This implements looking up an inode by number.
++ */
++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
++{
++ struct inode * inode;
++ unsigned long ino;
++ struct list_head *lp;
++ struct dentry *alternate;
++ char buf[IOPEN_NAME_LEN];
++
++ if (dentry->d_name.len >= IOPEN_NAME_LEN)
++ return ERR_PTR(-ENAMETOOLONG);
++
++ memcpy(buf, dentry->d_name.name, dentry->d_name.len);
++ buf[dentry->d_name.len] = 0;
++
++ if (strcmp(buf, ".") == 0)
++ ino = dir->i_ino;
++ else if (strcmp(buf, "..") == 0)
++ ino = EXT3_ROOT_INO;
++ else
++ ino = simple_strtoul(buf, 0, 0);
++
++ if ((ino != EXT3_ROOT_INO &&
++ //ino != EXT3_ACL_IDX_INO &&
++ //ino != EXT3_ACL_DATA_INO &&
++ ino < EXT3_FIRST_INO(dir->i_sb)) ||
++ ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
++ return ERR_PTR(-ENOENT);
++
++ inode = iget(dir->i_sb, ino);
++ if (!inode)
++ return ERR_PTR(-EACCES);
++ if (is_bad_inode(inode)) {
++ iput(inode);
++ return ERR_PTR(-ENOENT);
++ }
++
++ /* preferrably return a connected dentry */
++ spin_lock(&dcache_lock);
++ list_for_each(lp, &inode->i_dentry) {
++ alternate = list_entry(lp, struct dentry, d_alias);
++ assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
++ }
++
++ if (!list_empty(&inode->i_dentry)) {
++ alternate = list_entry(inode->i_dentry.next,
++ struct dentry, d_alias);
++ dget_locked(alternate);
++ alternate->d_vfs_flags |= DCACHE_REFERENCED;
++ iput(inode);
++ spin_unlock(&dcache_lock);
++ return alternate;
++ }
++ dentry->d_flags |= DCACHE_DISCONNECTED;
++ spin_unlock(&dcache_lock);
++
++ d_add(dentry, inode);
++ return NULL;
++}
++
++#define do_switch(x,y) do { \
++ __typeof__ (x) __tmp = x; \
++ x = y; y = __tmp; } while (0)
++
++static inline void switch_names(struct dentry * dentry, struct dentry * target)
++{
++ const unsigned char *old_name, *new_name;
++
++ memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN);
++ old_name = target->d_name.name;
++ new_name = dentry->d_name.name;
++ if (old_name == target->d_iname)
++ old_name = dentry->d_iname;
++ if (new_name == dentry->d_iname)
++ new_name = target->d_iname;
++ target->d_name.name = new_name;
++ dentry->d_name.name = old_name;
++}
++
++
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode)
++{
++ struct dentry *tmp, *goal = NULL;
++ struct list_head *lp;
++
++ /* preferrably return a connected dentry */
++ spin_lock(&dcache_lock);
++ /* verify this dentry is really new */
++ assert(!de->d_inode);
++ assert(list_empty(&de->d_subdirs));
++ assert(list_empty(&de->d_alias));
++
++
++ list_for_each(lp, &inode->i_dentry) {
++ tmp = list_entry(lp, struct dentry, d_alias);
++ if (tmp->d_flags & DCACHE_DISCONNECTED) {
++ assert(tmp->d_alias.next == &inode->i_dentry);
++ assert(tmp->d_alias.prev == &inode->i_dentry);
++ goal = tmp;
++ dget_locked(goal);
++ break;
++ }
++ }
++
++ if (!goal) {
++ spin_unlock(&dcache_lock);
++ return NULL;
++ }
++
++ /* Move the goal to the de hash queue */
++ goal->d_flags &= ~DCACHE_DISCONNECTED;
++ hlist_add_before(&goal->d_hash, &de->d_hash);
++ hlist_del(&goal->d_hash);
++
++ list_del(&goal->d_child);
++ list_del(&de->d_child);
++
++ /* Switch the parents and the names.. */
++ switch_names(goal, de);
++ do_switch(goal->d_parent, de->d_parent);
++ do_switch(goal->d_name.len, de->d_name.len);
++ do_switch(goal->d_name.hash, de->d_name.hash);
++
++ /* And add them back to the (new) parent lists */
++ list_add(&goal->d_child, &goal->d_parent->d_subdirs);
++ list_add(&de->d_child, &de->d_parent->d_subdirs);
++
++ spin_unlock(&dcache_lock);
++ return goal;
++}
++
++/*
++ * These are the special structures for the iopen pseudo directory.
++ */
++
++static struct inode_operations iopen_inode_operations = {
++ lookup: iopen_lookup, /* BKL held */
++};
++
++static struct file_operations iopen_file_operations = {
++ read: generic_read_dir,
++};
++
++static int match_dentry(struct dentry *dentry, const char *name)
++{
++ int len;
++
++ len = strlen(name);
++ if (dentry->d_name.len != len)
++ return 0;
++ if (strncmp(dentry->d_name.name, name, len))
++ return 0;
++ return 1;
++}
++
++/*
++ * This function is spliced into ext3_lookup and returns 1 the file
++ * name is __iopen__ and dentry has been filled in appropriately.
++ */
++int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry)
++{
++ struct inode * inode;
++
++ if (dir->i_ino != EXT3_ROOT_INO ||
++ !test_opt(dir->i_sb, IOPEN) ||
++ !match_dentry(dentry, "__iopen__"))
++ return 0;
++
++ inode = iget(dir->i_sb, EXT3_BAD_INO);
++
++ if (!inode)
++ return 0;
++ d_add(dentry, inode);
++ return 1;
++}
++
++/*
++ * This function is spliced into read_inode; it returns 1 if inode
++ * number is the one for /__iopen__, in which case the inode is filled
++ * in appropriately. Otherwise, this fuction returns 0.
++ */
++int ext3_iopen_get_inode(struct inode * inode)
++{
++ if (inode->i_ino != EXT3_BAD_INO)
++ return 0;
++
++ inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++ if (test_opt(inode->i_sb, IOPEN_NOPRIV))
++ inode->i_mode |= 0777;
++ inode->i_uid = 0;
++ inode->i_gid = 0;
++ inode->i_nlink = 1;
++ inode->i_size = 4096;
++ inode->i_atime = CURRENT_TIME;
++ inode->i_ctime = CURRENT_TIME;
++ inode->i_mtime = CURRENT_TIME;
++ EXT3_I(inode)->i_dtime = 0;
++ inode->i_blksize = PAGE_SIZE; /* This is the optimal IO size
++ * (for stat), not the fs block
++ * size */
++ inode->i_blocks = 0;
++ inode->i_version = 1;
++ inode->i_generation = 0;
++
++ inode->i_op = &iopen_inode_operations;
++ inode->i_fop = &iopen_file_operations;
++ inode->i_mapping->a_ops = 0;
++
++ return 1;
++}
+--- /dev/null 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.0-test1-alexey/fs/ext3/iopen.h 2003-08-24 13:02:02.000000000 +0400
+@@ -0,0 +1,15 @@
++/*
++ * iopen.h
++ *
++ * Special support for opening files by inode number.
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ *
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++extern int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry);
++extern int ext3_iopen_get_inode(struct inode * inode);
++
++
+--- linux-2.6.0-test1/fs/ext3/Makefile~iopen-2.6.0 2003-08-24 12:58:32.000000000 +0400
++++ linux-2.6.0-test1-alexey/fs/ext3/Makefile 2003-08-24 13:02:40.000000000 +0400
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+
+ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+- ioctl.o namei.o super.o symlink.o hash.o
++ iopen.o ioctl.o namei.o super.o symlink.o hash.o
+
+ ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+--- linux-2.6.0-test1/fs/ext3/namei.c~iopen-2.6.0 2003-07-24 15:52:30.000000000 +0400
++++ linux-2.6.0-test1-alexey/fs/ext3/namei.c 2003-08-24 13:02:02.000000000 +0400
+@@ -37,6 +37,7 @@
+ #include <linux/buffer_head.h>
+ #include <linux/smp_lock.h>
+ #include "xattr.h"
++#include "iopen.h"
+ #include "acl.h"
+
+ /*
+@@ -970,15 +971,21 @@ errout:
+ }
+ #endif
+
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode);
++
+ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+ {
+ struct inode * inode;
+ struct ext3_dir_entry_2 * de;
+ struct buffer_head * bh;
++ struct dentry *alternate = NULL;
+
+ if (dentry->d_name.len > EXT3_NAME_LEN)
+ return ERR_PTR(-ENAMETOOLONG);
+
++ if (ext3_check_for_iopen(dir, dentry))
++ return NULL;
++
+ bh = ext3_find_entry(dentry, &de);
+ inode = NULL;
+ if (bh) {
+@@ -991,6 +998,12 @@ static struct dentry *ext3_lookup(struct
+ }
+ if (inode)
+ return d_splice_alias(inode, dentry);
++
++ if (inode && (alternate = iopen_connect_dentry(dentry, inode))) {
++ iput(inode);
++ return alternate;
++ }
++
+ d_add(dentry, inode);
+ return NULL;
+ }
+--- linux-2.6.0-test1/fs/ext3/super.c~iopen-2.6.0 2003-08-24 13:00:36.000000000 +0400
++++ linux-2.6.0-test1-alexey/fs/ext3/super.c 2003-08-24 13:02:02.000000000 +0400
+@@ -755,6 +755,17 @@ static int parse_options (char * options
+ || !strcmp (this_char, "quota")
+ || !strcmp (this_char, "usrquota"))
+ /* Don't do anything ;-) */ ;
++ else if (!strcmp (this_char, "iopen")) {
++ set_opt (sbi->s_mount_opt, IOPEN);
++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++ } else if (!strcmp (this_char, "noiopen")) {
++ clear_opt (sbi->s_mount_opt, IOPEN);
++ clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++ }
++ else if (!strcmp (this_char, "iopen_nopriv")) {
++ set_opt (sbi->s_mount_opt, IOPEN);
++ set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++ }
+ else if (!strcmp (this_char, "journal")) {
+ /* @@@ FIXME */
+ /* Eventually we will want to be able to create
+--- linux-2.6.0-test1/include/linux/ext3_fs.h~iopen-2.6.0 2003-08-24 12:58:57.000000000 +0400
++++ linux-2.6.0-test1-alexey/include/linux/ext3_fs.h 2003-08-24 13:02:02.000000000 +0400
+@@ -324,6 +324,8 @@ struct ext3_inode {
+ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
+ #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
+ #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */
++#define EXT3_MOUNT_IOPEN 0x10000 /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV 0x20000 /* Make iopen world-readable */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+
+_
--- /dev/null
+ fs/ext3/Makefile | 4
+ fs/ext3/ext3-exports.c | 13
+ fs/ext3/ialloc.c | 2
+ fs/ext3/inode.c | 29 -
+ fs/ext3/namei.c | 8
+ fs/ext3/super.c | 23
+ fs/ext3/xattr.c | 1242 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h | 46 -
+ include/linux/ext3_jbd.h | 8
+ include/linux/ext3_xattr.h | 155 +++++
+ include/linux/xattr.h | 15
+ 11 files changed, 1494 insertions(+), 51 deletions(-)
+
+--- /dev/null 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/fs/ext3/ext3-exports.c 2003-09-01 14:55:39.000000000 +0400
+@@ -0,0 +1,13 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
++
++EXPORT_SYMBOL(ext3_force_commit);
++EXPORT_SYMBOL(ext3_bread);
++EXPORT_SYMBOL(ext3_xattr_register);
++EXPORT_SYMBOL(ext3_xattr_unregister);
++EXPORT_SYMBOL(ext3_xattr_get);
++EXPORT_SYMBOL(ext3_xattr_list);
++EXPORT_SYMBOL(ext3_xattr_set);
+--- linux-2.4.18/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26-2 2003-07-28 17:52:04.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/ialloc.c 2003-09-01 14:55:39.000000000 +0400
+@@ -17,6 +17,7 @@
+ #include <linux/jbd.h>
+ #include <linux/ext3_fs.h>
+ #include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
+ #include <linux/stat.h>
+ #include <linux/string.h>
+ #include <linux/locks.h>
+@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle,
+ * as writing the quota to disk may need the lock as well.
+ */
+ DQUOT_INIT(inode);
++ ext3_xattr_drop_inode(handle, inode);
+ DQUOT_FREE_INODE(inode);
+ DQUOT_DROP(inode);
+
+--- linux-2.4.18/fs/ext3/inode.c~linux-2.4.18ea-0.8.26-2 2003-07-28 17:52:04.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/inode.c 2003-09-01 14:55:39.000000000 +0400
+@@ -39,6 +39,18 @@
+ */
+ #undef SEARCH_FROM_ZERO
+
++/*
++ * Test whether an inode is a fast symlink.
++ */
++static inline int ext3_inode_is_fast_symlink(struct inode *inode)
++{
++ int ea_blocks = EXT3_I(inode)->i_file_acl ?
++ (inode->i_sb->s_blocksize >> 9) : 0;
++
++ return (S_ISLNK(inode->i_mode) &&
++ inode->i_blocks - ea_blocks == 0);
++}
++
+ /* The ext3 forget function must perform a revoke if we are freeing data
+ * which has been journaled. Metadata (eg. indirect blocks) must be
+ * revoked in all cases.
+@@ -48,7 +60,7 @@
+ * still needs to be revoked.
+ */
+
+-static int ext3_forget(handle_t *handle, int is_metadata,
++int ext3_forget(handle_t *handle, int is_metadata,
+ struct inode *inode, struct buffer_head *bh,
+ int blocknr)
+ {
+@@ -164,9 +176,7 @@ void ext3_delete_inode (struct inode * i
+ {
+ handle_t *handle;
+
+- if (is_bad_inode(inode) ||
+- inode->i_ino == EXT3_ACL_IDX_INO ||
+- inode->i_ino == EXT3_ACL_DATA_INO)
++ if (is_bad_inode(inode))
+ goto no_delete;
+
+ lock_kernel();
+@@ -1877,6 +1887,8 @@ void ext3_truncate(struct inode * inode)
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ S_ISLNK(inode->i_mode)))
+ return;
++ if (ext3_inode_is_fast_symlink(inode))
++ return;
+ if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ return;
+
+@@ -2038,8 +2050,6 @@ int ext3_get_inode_loc (struct inode *in
+ struct ext3_group_desc * gdp;
+
+ if ((inode->i_ino != EXT3_ROOT_INO &&
+- inode->i_ino != EXT3_ACL_IDX_INO &&
+- inode->i_ino != EXT3_ACL_DATA_INO &&
+ inode->i_ino != EXT3_JOURNAL_INO &&
+ inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
+ inode->i_ino > le32_to_cpu(
+@@ -2166,10 +2176,7 @@ void ext3_read_inode(struct inode * inod
+
+ brelse (iloc.bh);
+
+- if (inode->i_ino == EXT3_ACL_IDX_INO ||
+- inode->i_ino == EXT3_ACL_DATA_INO)
+- /* Nothing to do */ ;
+- else if (S_ISREG(inode->i_mode)) {
++ if (S_ISREG(inode->i_mode)) {
+ inode->i_op = &ext3_file_inode_operations;
+ inode->i_fop = &ext3_file_operations;
+ inode->i_mapping->a_ops = &ext3_aops;
+@@ -2177,7 +2184,7 @@ void ext3_read_inode(struct inode * inod
+ inode->i_op = &ext3_dir_inode_operations;
+ inode->i_fop = &ext3_dir_operations;
+ } else if (S_ISLNK(inode->i_mode)) {
+- if (!inode->i_blocks)
++ if (ext3_inode_is_fast_symlink(inode))
+ inode->i_op = &ext3_fast_symlink_inode_operations;
+ else {
+ inode->i_op = &page_symlink_inode_operations;
+--- linux-2.4.18/fs/ext3/Makefile~linux-2.4.18ea-0.8.26-2 2003-08-29 16:53:17.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/Makefile 2003-09-01 14:55:50.000000000 +0400
+@@ -9,10 +9,10 @@
+
+ O_TARGET := ext3.o
+
+-export-objs := super.o inode.o
++export-objs := ext3-exports.o
+
+ obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+- ioctl.o namei.o super.o symlink.o hash.o
++ ioctl.o namei.o super.o symlink.o xattr.o hash.o ext3-exports.o
+ obj-m := $(O_TARGET)
+
+ include $(TOPDIR)/Rules.make
+--- linux-2.4.18/fs/ext3/namei.c~linux-2.4.18ea-0.8.26-2 2003-09-01 11:50:59.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/namei.c 2003-09-01 14:55:39.000000000 +0400
+@@ -29,6 +29,7 @@
+ #include <linux/sched.h>
+ #include <linux/ext3_fs.h>
+ #include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
+ #include <linux/fcntl.h>
+ #include <linux/stat.h>
+ #include <linux/string.h>
+@@ -1524,6 +1525,7 @@ static int ext3_add_nondir(handle_t *han
+ d_instantiate(dentry, inode);
+ return 0;
+ }
++ ext3_xattr_drop_inode(handle, inode);
+ ext3_dec_count(handle, inode);
+ iput(inode);
+ return err;
+@@ -1612,7 +1614,7 @@ static int ext3_mkdir(struct inode * dir
+ if (IS_SYNC(dir))
+ handle->h_sync = 1;
+
+- inode = ext3_new_inode (handle, dir, S_IFDIR);
++ inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
+ err = PTR_ERR(inode);
+ if (IS_ERR(inode))
+ goto out_stop;
+@@ -1620,7 +1622,6 @@ static int ext3_mkdir(struct inode * dir
+ inode->i_op = &ext3_dir_inode_operations;
+ inode->i_fop = &ext3_dir_operations;
+ inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+- inode->i_blocks = 0;
+ dir_block = ext3_bread (handle, inode, 0, 1, &err);
+ if (!dir_block) {
+ inode->i_nlink--; /* is this nlink == 0? */
+@@ -1647,9 +1648,6 @@ static int ext3_mkdir(struct inode * dir
+ BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
+ ext3_journal_dirty_metadata(handle, dir_block);
+ brelse (dir_block);
+- inode->i_mode = S_IFDIR | mode;
+- if (dir->i_mode & S_ISGID)
+- inode->i_mode |= S_ISGID;
+ ext3_mark_inode_dirty(handle, inode);
+ err = ext3_add_entry (handle, dentry, inode);
+ if (err) {
+--- linux-2.4.18/fs/ext3/super.c~linux-2.4.18ea-0.8.26-2 2003-08-29 16:53:17.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/super.c 2003-09-01 14:55:39.000000000 +0400
+@@ -24,6 +24,7 @@
+ #include <linux/jbd.h>
+ #include <linux/ext3_fs.h>
+ #include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
+ #include <linux/slab.h>
+ #include <linux/init.h>
+ #include <linux/locks.h>
+@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block
+ kdev_t j_dev = sbi->s_journal->j_dev;
+ int i;
+
++ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+ if (!(sb->s_flags & MS_RDONLY)) {
+ EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+@@ -1743,18 +1745,27 @@ int ext3_statfs (struct super_block * sb
+
+ static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super);
+
+-static int __init init_ext3_fs(void)
++static void exit_ext3_fs(void)
+ {
+- return register_filesystem(&ext3_fs_type);
++ unregister_filesystem(&ext3_fs_type);
++ exit_ext3_xattr_user();
++ exit_ext3_xattr();
+ }
+
+-static void __exit exit_ext3_fs(void)
++static int __init init_ext3_fs(void)
+ {
+- unregister_filesystem(&ext3_fs_type);
++ int error = init_ext3_xattr();
++ if (!error)
++ error = init_ext3_xattr_user();
++ if (!error)
++ error = register_filesystem(&ext3_fs_type);
++ if (!error)
++ return 0;
++
++ exit_ext3_fs();
++ return error;
+ }
+
+-EXPORT_SYMBOL(ext3_force_commit);
+-EXPORT_SYMBOL(ext3_bread);
+
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+--- /dev/null 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/fs/ext3/xattr.c 2003-09-01 14:55:39.000000000 +0400
+@@ -0,0 +1,1242 @@
++/*
++ * linux/fs/ext3/xattr.c
++ *
++ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
++ *
++ * Fix by Harrison Xing <harrison@mountainviewdata.com>.
++ * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
++ * Extended attributes for symlinks and special files added per
++ * suggestion of Luka Renko <luka.renko@hermes.si>.
++ */
++
++/*
++ * Extended attributes are stored on disk blocks allocated outside of
++ * any inode. The i_file_acl field is then made to point to this allocated
++ * block. If all extended attributes of an inode are identical, these
++ * inodes may share the same extended attribute block. Such situations
++ * are automatically detected by keeping a cache of recent attribute block
++ * numbers and hashes over the block's contents in memory.
++ *
++ *
++ * Extended attribute block layout:
++ *
++ * +------------------+
++ * | header |
++ * ¦ entry 1 | |
++ * | entry 2 | | growing downwards
++ * | entry 3 | v
++ * | four null bytes |
++ * | . . . |
++ * | value 1 | ^
++ * | value 3 | | growing upwards
++ * | value 2 | |
++ * +------------------+
++ *
++ * The block header is followed by multiple entry descriptors. These entry
++ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
++ * byte boundaries. The entry descriptors are sorted by attribute name,
++ * so that two extended attribute blocks can be compared efficiently.
++ *
++ * Attribute values are aligned to the end of the block, stored in
++ * no specific order. They are also padded to EXT3_XATTR_PAD byte
++ * boundaries. No additional gaps are left between them.
++ *
++ * Locking strategy
++ * ----------------
++ * The VFS already holds the BKL and the inode->i_sem semaphore when any of
++ * the xattr inode operations are called, so we are guaranteed that only one
++ * processes accesses extended attributes of an inode at any time.
++ *
++ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that
++ * only a single process is modifying an extended attribute block, even
++ * if the block is shared among inodes.
++ *
++ * Note for porting to 2.5
++ * -----------------------
++ * The BKL will no longer be held in the xattr inode operations.
++ */
++
++#include <linux/fs.h>
++#include <linux/locks.h>
++#include <linux/slab.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_xattr.h>
++#ifdef CONFIG_EXT3_FS_XATTR_SHARING
++#include <linux/mbcache.h>
++#endif
++#include <linux/quotaops.h>
++#include <asm/semaphore.h>
++#include <linux/compatmac.h>
++#include <linux/module.h>
++
++/* These symbols may be needed by a module. */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
++# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1)
++#endif
++
++#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
++#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
++#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
++#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
++
++#ifdef EXT3_XATTR_DEBUG
++# define ea_idebug(inode, f...) do { \
++ printk(KERN_DEBUG "inode %s:%ld: ", \
++ kdevname(inode->i_dev), inode->i_ino); \
++ printk(f); \
++ printk("\n"); \
++ } while (0)
++# define ea_bdebug(bh, f...) do { \
++ printk(KERN_DEBUG "block %s:%ld: ", \
++ kdevname(bh->b_dev), bh->b_blocknr); \
++ printk(f); \
++ printk("\n"); \
++ } while (0)
++#else
++# define ea_idebug(f...)
++# define ea_bdebug(f...)
++#endif
++
++static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *,
++ struct ext3_xattr_header *);
++
++#ifdef CONFIG_EXT3_FS_XATTR_SHARING
++
++static int ext3_xattr_cache_insert(struct buffer_head *);
++static struct buffer_head *ext3_xattr_cache_find(struct inode *,
++ struct ext3_xattr_header *);
++static void ext3_xattr_cache_remove(struct buffer_head *);
++static void ext3_xattr_rehash(struct ext3_xattr_header *,
++ struct ext3_xattr_entry *);
++
++static struct mb_cache *ext3_xattr_cache;
++
++#else
++# define ext3_xattr_cache_insert(bh) 0
++# define ext3_xattr_cache_find(inode, header) NULL
++# define ext3_xattr_cache_remove(bh) do {} while(0)
++# define ext3_xattr_rehash(header, entry) do {} while(0)
++#endif
++
++/*
++ * If a file system does not share extended attributes among inodes,
++ * we should not need the ext3_xattr_sem semaphore. However, the
++ * filesystem may still contain shared blocks, so we always take
++ * the lock.
++ */
++
++DECLARE_MUTEX(ext3_xattr_sem);
++
++static inline void
++ext3_xattr_lock(void)
++{
++ down(&ext3_xattr_sem);
++}
++
++static inline void
++ext3_xattr_unlock(void)
++{
++ up(&ext3_xattr_sem);
++}
++
++static inline int
++ext3_xattr_new_block(handle_t *handle, struct inode *inode,
++ int * errp, int force)
++{
++ struct super_block *sb = inode->i_sb;
++ int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
++ EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb);
++
++ /* How can we enforce the allocation? */
++ int block = ext3_new_block(handle, inode, goal, 0, 0, errp);
++#ifdef OLD_QUOTAS
++ if (!*errp)
++ inode->i_blocks += inode->i_sb->s_blocksize >> 9;
++#endif
++ return block;
++}
++
++static inline int
++ext3_xattr_quota_alloc(struct inode *inode, int force)
++{
++ /* How can we enforce the allocation? */
++#ifdef OLD_QUOTAS
++ int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1);
++ if (!error)
++ inode->i_blocks += inode->i_sb->s_blocksize >> 9;
++#else
++ int error = DQUOT_ALLOC_BLOCK(inode, 1);
++#endif
++ return error;
++}
++
++#ifdef OLD_QUOTAS
++
++static inline void
++ext3_xattr_quota_free(struct inode *inode)
++{
++ DQUOT_FREE_BLOCK(inode->i_sb, inode, 1);
++ inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
++}
++
++static inline void
++ext3_xattr_free_block(handle_t *handle, struct inode * inode,
++ unsigned long block)
++{
++ ext3_free_blocks(handle, inode, block, 1);
++ inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
++}
++
++#else
++# define ext3_xattr_quota_free(inode) \
++ DQUOT_FREE_BLOCK(inode, 1)
++# define ext3_xattr_free_block(handle, inode, block) \
++ ext3_free_blocks(handle, inode, block, 1)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)
++
++static inline struct buffer_head *
++sb_bread(struct super_block *sb, int block)
++{
++ return bread(sb->s_dev, block, sb->s_blocksize);
++}
++
++static inline struct buffer_head *
++sb_getblk(struct super_block *sb, int block)
++{
++ return getblk(sb->s_dev, block, sb->s_blocksize);
++}
++
++#endif
++
++struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX];
++rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED;
++
++int
++ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler)
++{
++ int error = -EINVAL;
++
++ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
++ write_lock(&ext3_handler_lock);
++ if (!ext3_xattr_handlers[name_index-1]) {
++ ext3_xattr_handlers[name_index-1] = handler;
++ error = 0;
++ }
++ write_unlock(&ext3_handler_lock);
++ }
++ return error;
++}
++
++void
++ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler)
++{
++ if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) {
++ write_lock(&ext3_handler_lock);
++ ext3_xattr_handlers[name_index-1] = NULL;
++ write_unlock(&ext3_handler_lock);
++ }
++}
++
++static inline const char *
++strcmp_prefix(const char *a, const char *a_prefix)
++{
++ while (*a_prefix && *a == *a_prefix) {
++ a++;
++ a_prefix++;
++ }
++ return *a_prefix ? NULL : a;
++}
++
++/*
++ * Decode the extended attribute name, and translate it into
++ * the name_index and name suffix.
++ */
++static inline struct ext3_xattr_handler *
++ext3_xattr_resolve_name(const char **name)
++{
++ struct ext3_xattr_handler *handler = NULL;
++ int i;
++
++ if (!*name)
++ return NULL;
++ read_lock(&ext3_handler_lock);
++ for (i=0; i<EXT3_XATTR_INDEX_MAX; i++) {
++ if (ext3_xattr_handlers[i]) {
++ const char *n = strcmp_prefix(*name,
++ ext3_xattr_handlers[i]->prefix);
++ if (n) {
++ handler = ext3_xattr_handlers[i];
++ *name = n;
++ break;
++ }
++ }
++ }
++ read_unlock(&ext3_handler_lock);
++ return handler;
++}
++
++static inline struct ext3_xattr_handler *
++ext3_xattr_handler(int name_index)
++{
++ struct ext3_xattr_handler *handler = NULL;
++ if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
++ read_lock(&ext3_handler_lock);
++ handler = ext3_xattr_handlers[name_index-1];
++ read_unlock(&ext3_handler_lock);
++ }
++ return handler;
++}
++
++/*
++ * Inode operation getxattr()
++ *
++ * dentry->d_inode->i_sem down
++ * BKL held [before 2.5.x]
++ */
++ssize_t
++ext3_getxattr(struct dentry *dentry, const char *name,
++ void *buffer, size_t size)
++{
++ struct ext3_xattr_handler *handler;
++ struct inode *inode = dentry->d_inode;
++
++ handler = ext3_xattr_resolve_name(&name);
++ if (!handler)
++ return -ENOTSUP;
++ return handler->get(inode, name, buffer, size);
++}
++
++/*
++ * Inode operation listxattr()
++ *
++ * dentry->d_inode->i_sem down
++ * BKL held [before 2.5.x]
++ */
++ssize_t
++ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
++{
++ return ext3_xattr_list(dentry->d_inode, buffer, size);
++}
++
++/*
++ * Inode operation setxattr()
++ *
++ * dentry->d_inode->i_sem down
++ * BKL held [before 2.5.x]
++ */
++int
++ext3_setxattr(struct dentry *dentry, const char *name,
++ void *value, size_t size, int flags)
++{
++ struct ext3_xattr_handler *handler;
++ struct inode *inode = dentry->d_inode;
++
++ if (size == 0)
++ value = ""; /* empty EA, do not remove */
++ handler = ext3_xattr_resolve_name(&name);
++ if (!handler)
++ return -ENOTSUP;
++ return handler->set(inode, name, value, size, flags);
++}
++
++/*
++ * Inode operation removexattr()
++ *
++ * dentry->d_inode->i_sem down
++ * BKL held [before 2.5.x]
++ */
++int
++ext3_removexattr(struct dentry *dentry, const char *name)
++{
++ struct ext3_xattr_handler *handler;
++ struct inode *inode = dentry->d_inode;
++
++ handler = ext3_xattr_resolve_name(&name);
++ if (!handler)
++ return -ENOTSUP;
++ return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
++}
++
++/*
++ * ext3_xattr_get()
++ *
++ * Copy an extended attribute into the buffer
++ * provided, or compute the buffer size required.
++ * Buffer is NULL to compute the size of the buffer required.
++ *
++ * Returns a negative error number on failure, or the number of bytes
++ * used / required on success.
++ */
++int
++ext3_xattr_get(struct inode *inode, int name_index, const char *name,
++ void *buffer, size_t buffer_size)
++{
++ struct buffer_head *bh = NULL;
++ struct ext3_xattr_entry *entry;
++ unsigned int block, size;
++ char *end;
++ int name_len, error;
++
++ ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
++ name_index, name, buffer, (long)buffer_size);
++
++ if (name == NULL)
++ return -EINVAL;
++ if (!EXT3_I(inode)->i_file_acl)
++ return -ENOATTR;
++ block = EXT3_I(inode)->i_file_acl;
++ ea_idebug(inode, "reading block %d", block);
++ bh = sb_bread(inode->i_sb, block);
++ if (!bh)
++ return -EIO;
++ ea_bdebug(bh, "b_count=%d, refcount=%d",
++ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
++ end = bh->b_data + bh->b_size;
++ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
++ HDR(bh)->h_blocks != cpu_to_le32(1)) {
++bad_block: ext3_error(inode->i_sb, "ext3_xattr_get",
++ "inode %ld: bad block %d", inode->i_ino, block);
++ error = -EIO;
++ goto cleanup;
++ }
++ /* find named attribute */
++ name_len = strlen(name);
++
++ error = -ERANGE;
++ if (name_len > 255)
++ goto cleanup;
++ entry = FIRST_ENTRY(bh);
++ while (!IS_LAST_ENTRY(entry)) {
++ struct ext3_xattr_entry *next =
++ EXT3_XATTR_NEXT(entry);
++ if ((char *)next >= end)
++ goto bad_block;
++ if (name_index == entry->e_name_index &&
++ name_len == entry->e_name_len &&
++ memcmp(name, entry->e_name, name_len) == 0)
++ goto found;
++ entry = next;
++ }
++ /* Check the remaining name entries */
++ while (!IS_LAST_ENTRY(entry)) {
++ struct ext3_xattr_entry *next =
++ EXT3_XATTR_NEXT(entry);
++ if ((char *)next >= end)
++ goto bad_block;
++ entry = next;
++ }
++ if (ext3_xattr_cache_insert(bh))
++ ea_idebug(inode, "cache insert failed");
++ error = -ENOATTR;
++ goto cleanup;
++found:
++ /* check the buffer size */
++ if (entry->e_value_block != 0)
++ goto bad_block;
++ size = le32_to_cpu(entry->e_value_size);
++ if (size > inode->i_sb->s_blocksize ||
++ le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
++ goto bad_block;
++
++ if (ext3_xattr_cache_insert(bh))
++ ea_idebug(inode, "cache insert failed");
++ if (buffer) {
++ error = -ERANGE;
++ if (size > buffer_size)
++ goto cleanup;
++ /* return value of attribute */
++ memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
++ size);
++ }
++ error = size;
++
++cleanup:
++ brelse(bh);
++
++ return error;
++}
++
++/*
++ * ext3_xattr_list()
++ *
++ * Copy a list of attribute names into the buffer
++ * provided, or compute the buffer size required.
++ * Buffer is NULL to compute the size of the buffer required.
++ *
++ * Returns a negative error number on failure, or the number of bytes
++ * used / required on success.
++ */
++int
++ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
++{
++ struct buffer_head *bh = NULL;
++ struct ext3_xattr_entry *entry;
++ unsigned int block, size = 0;
++ char *buf, *end;
++ int error;
++
++ ea_idebug(inode, "buffer=%p, buffer_size=%ld",
++ buffer, (long)buffer_size);
++
++ if (!EXT3_I(inode)->i_file_acl)
++ return 0;
++ block = EXT3_I(inode)->i_file_acl;
++ ea_idebug(inode, "reading block %d", block);
++ bh = sb_bread(inode->i_sb, block);
++ if (!bh)
++ return -EIO;
++ ea_bdebug(bh, "b_count=%d, refcount=%d",
++ atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
++ end = bh->b_data + bh->b_size;
++ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
++ HDR(bh)->h_blocks != cpu_to_le32(1)) {
++bad_block: ext3_error(inode->i_sb, "ext3_xattr_list",
++ "inode %ld: bad block %d", inode->i_ino, block);
++ error = -EIO;
++ goto cleanup;
++ }
++ /* compute the size required for the list of attribute names */
++ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
++ entry = EXT3_XATTR_NEXT(entry)) {
++ struct ext3_xattr_handler *handler;
++ struct ext3_xattr_entry *next =
++ EXT3_XATTR_NEXT(entry);
++ if ((char *)next >= end)
++ goto bad_block;
++
++ handler = ext3_xattr_handler(entry->e_name_index);
++ if (handler) {
++ size += handler->list(NULL, inode, entry->e_name,
++ entry->e_name_len) + 1;
++ }
++ }
++
++ if (ext3_xattr_cache_insert(bh))
++ ea_idebug(inode, "cache insert failed");
++ if (!buffer) {
++ error = size;
++ goto cleanup;
++ } else {
++ error = -ERANGE;
++ if (size > buffer_size)
++ goto cleanup;
++ }
++
++ /* list the attribute names */
++ buf = buffer;
++ for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
++ entry = EXT3_XATTR_NEXT(entry)) {
++ struct ext3_xattr_handler *handler;
++
++ handler = ext3_xattr_handler(entry->e_name_index);
++ if (handler) {
++ buf += handler->list(buf, inode, entry->e_name,
++ entry->e_name_len);
++ *buf++ = '\0';
++ }
++ }
++ error = size;
++
++cleanup:
++ brelse(bh);
++
++ return error;
++}
++
++/*
++ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
++ * not set, set it.
++ */
++static void ext3_xattr_update_super_block(handle_t *handle,
++ struct super_block *sb)
++{
++ if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
++ return;
++
++ lock_super(sb);
++ ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
++ EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR;
++#endif
++ EXT3_SB(sb)->s_es->s_feature_compat |=
++ cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
++ sb->s_dirt = 1;
++ ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
++ unlock_super(sb);
++}
++
++/*
++ * ext3_xattr_set()
++ *
++ * Create, replace or remove an extended attribute for this inode. Buffer
++ * is NULL to remove an existing extended attribute, and non-NULL to
++ * either replace an existing extended attribute, or create a new extended
++ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
++ * specify that an extended attribute must exist and must not exist
++ * previous to the call, respectively.
++ *
++ * Returns 0, or a negative error number on failure.
++ */
++int
++ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
++ const char *name, void *value, size_t value_len, int flags)
++{
++ struct super_block *sb = inode->i_sb;
++ struct buffer_head *bh = NULL;
++ struct ext3_xattr_header *header = NULL;
++ struct ext3_xattr_entry *here, *last;
++ unsigned int name_len;
++ int min_offs = sb->s_blocksize, not_found = 1, free, error;
++ char *end;
++
++ /*
++ * header -- Points either into bh, or to a temporarily
++ * allocated buffer.
++ * here -- The named entry found, or the place for inserting, within
++ * the block pointed to by header.
++ * last -- Points right after the last named entry within the block
++ * pointed to by header.
++ * min_offs -- The offset of the first value (values are aligned
++ * towards the end of the block).
++ * end -- Points right after the block pointed to by header.
++ */
++
++ ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
++ name_index, name, value, (long)value_len);
++
++ if (IS_RDONLY(inode))
++ return -EROFS;
++ if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
++ return -EPERM;
++ if (value == NULL)
++ value_len = 0;
++ if (name == NULL)
++ return -EINVAL;
++ name_len = strlen(name);
++ if (name_len > 255 || value_len > sb->s_blocksize)
++ return -ERANGE;
++ ext3_xattr_lock();
++
++ if (EXT3_I(inode)->i_file_acl) {
++ /* The inode already has an extended attribute block. */
++ int block = EXT3_I(inode)->i_file_acl;
++
++ bh = sb_bread(sb, block);
++ error = -EIO;
++ if (!bh)
++ goto cleanup;
++ ea_bdebug(bh, "b_count=%d, refcount=%d",
++ atomic_read(&(bh->b_count)),
++ le32_to_cpu(HDR(bh)->h_refcount));
++ header = HDR(bh);
++ end = bh->b_data + bh->b_size;
++ if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
++ header->h_blocks != cpu_to_le32(1)) {
++bad_block: ext3_error(sb, "ext3_xattr_set",
++ "inode %ld: bad block %d", inode->i_ino, block);
++ error = -EIO;
++ goto cleanup;
++ }
++ /* Find the named attribute. */
++ here = FIRST_ENTRY(bh);
++ while (!IS_LAST_ENTRY(here)) {
++ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
++ if ((char *)next >= end)
++ goto bad_block;
++ if (!here->e_value_block && here->e_value_size) {
++ int offs = le16_to_cpu(here->e_value_offs);
++ if (offs < min_offs)
++ min_offs = offs;
++ }
++ not_found = name_index - here->e_name_index;
++ if (!not_found)
++ not_found = name_len - here->e_name_len;
++ if (!not_found)
++ not_found = memcmp(name, here->e_name,name_len);
++ if (not_found <= 0)
++ break;
++ here = next;
++ }
++ last = here;
++ /* We still need to compute min_offs and last. */
++ while (!IS_LAST_ENTRY(last)) {
++ struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
++ if ((char *)next >= end)
++ goto bad_block;
++ if (!last->e_value_block && last->e_value_size) {
++ int offs = le16_to_cpu(last->e_value_offs);
++ if (offs < min_offs)
++ min_offs = offs;
++ }
++ last = next;
++ }
++
++ /* Check whether we have enough space left. */
++ free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
++ } else {
++ /* We will use a new extended attribute block. */
++ free = sb->s_blocksize -
++ sizeof(struct ext3_xattr_header) - sizeof(__u32);
++ here = last = NULL; /* avoid gcc uninitialized warning. */
++ }
++
++ if (not_found) {
++ /* Request to remove a nonexistent attribute? */
++ error = -ENOATTR;
++ if (flags & XATTR_REPLACE)
++ goto cleanup;
++ error = 0;
++ if (value == NULL)
++ goto cleanup;
++ else
++ free -= EXT3_XATTR_LEN(name_len);
++ } else {
++ /* Request to create an existing attribute? */
++ error = -EEXIST;
++ if (flags & XATTR_CREATE)
++ goto cleanup;
++ if (!here->e_value_block && here->e_value_size) {
++ unsigned int size = le32_to_cpu(here->e_value_size);
++
++ if (le16_to_cpu(here->e_value_offs) + size >
++ sb->s_blocksize || size > sb->s_blocksize)
++ goto bad_block;
++ free += EXT3_XATTR_SIZE(size);
++ }
++ }
++ free -= EXT3_XATTR_SIZE(value_len);
++ error = -ENOSPC;
++ if (free < 0)
++ goto cleanup;
++
++ /* Here we know that we can set the new attribute. */
++
++ if (header) {
++ if (header->h_refcount == cpu_to_le32(1)) {
++ ea_bdebug(bh, "modifying in-place");
++ ext3_xattr_cache_remove(bh);
++ error = ext3_journal_get_write_access(handle, bh);
++ if (error)
++ goto cleanup;
++ } else {
++ int offset;
++
++ ea_bdebug(bh, "cloning");
++ header = kmalloc(bh->b_size, GFP_KERNEL);
++ error = -ENOMEM;
++ if (header == NULL)
++ goto cleanup;
++ memcpy(header, HDR(bh), bh->b_size);
++ header->h_refcount = cpu_to_le32(1);
++ offset = (char *)header - bh->b_data;
++ here = ENTRY((char *)here + offset);
++ last = ENTRY((char *)last + offset);
++ }
++ } else {
++ /* Allocate a buffer where we construct the new block. */
++ header = kmalloc(sb->s_blocksize, GFP_KERNEL);
++ error = -ENOMEM;
++ if (header == NULL)
++ goto cleanup;
++ memset(header, 0, sb->s_blocksize);
++ end = (char *)header + sb->s_blocksize;
++ header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
++ header->h_blocks = header->h_refcount = cpu_to_le32(1);
++ last = here = ENTRY(header+1);
++ }
++
++ if (not_found) {
++ /* Insert the new name. */
++ int size = EXT3_XATTR_LEN(name_len);
++ int rest = (char *)last - (char *)here;
++ memmove((char *)here + size, here, rest);
++ memset(here, 0, size);
++ here->e_name_index = name_index;
++ here->e_name_len = name_len;
++ memcpy(here->e_name, name, name_len);
++ } else {
++ /* Remove the old value. */
++ if (!here->e_value_block && here->e_value_size) {
++ char *first_val = (char *)header + min_offs;
++ int offs = le16_to_cpu(here->e_value_offs);
++ char *val = (char *)header + offs;
++ size_t size = EXT3_XATTR_SIZE(
++ le32_to_cpu(here->e_value_size));
++ memmove(first_val + size, first_val, val - first_val);
++ memset(first_val, 0, size);
++ here->e_value_offs = 0;
++ min_offs += size;
++
++ /* Adjust all value offsets. */
++ last = ENTRY(header+1);
++ while (!IS_LAST_ENTRY(last)) {
++ int o = le16_to_cpu(last->e_value_offs);
++ if (!last->e_value_block && o < offs)
++ last->e_value_offs =
++ cpu_to_le16(o + size);
++ last = EXT3_XATTR_NEXT(last);
++ }
++ }
++ if (value == NULL) {
++ /* Remove this attribute. */
++ if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) {
++ /* This block is now empty. */
++ error = ext3_xattr_set2(handle, inode, bh,NULL);
++ goto cleanup;
++ } else {
++ /* Remove the old name. */
++ int size = EXT3_XATTR_LEN(name_len);
++ last = ENTRY((char *)last - size);
++ memmove(here, (char*)here + size,
++ (char*)last - (char*)here);
++ memset(last, 0, size);
++ }
++ }
++ }
++
++ if (value != NULL) {
++ /* Insert the new value. */
++ here->e_value_size = cpu_to_le32(value_len);
++ if (value_len) {
++ size_t size = EXT3_XATTR_SIZE(value_len);
++ char *val = (char *)header + min_offs - size;
++ here->e_value_offs =
++ cpu_to_le16((char *)val - (char *)header);
++ memset(val + size - EXT3_XATTR_PAD, 0,
++ EXT3_XATTR_PAD); /* Clear the pad bytes. */
++ memcpy(val, value, value_len);
++ }
++ }
++ ext3_xattr_rehash(header, here);
++
++ error = ext3_xattr_set2(handle, inode, bh, header);
++
++cleanup:
++ brelse(bh);
++ if (!(bh && header == HDR(bh)))
++ kfree(header);
++ ext3_xattr_unlock();
++
++ return error;
++}
++
++/*
++ * Second half of ext3_xattr_set(): Update the file system.
++ */
++static int
++ext3_xattr_set2(handle_t *handle, struct inode *inode,
++ struct buffer_head *old_bh, struct ext3_xattr_header *header)
++{
++ struct super_block *sb = inode->i_sb;
++ struct buffer_head *new_bh = NULL;
++ int error;
++
++ if (header) {
++ new_bh = ext3_xattr_cache_find(inode, header);
++ if (new_bh) {
++ /*
++ * We found an identical block in the cache.
++ * The old block will be released after updating
++ * the inode.
++ */
++ ea_bdebug(old_bh, "reusing block %ld",
++ new_bh->b_blocknr);
++
++ error = -EDQUOT;
++ if (ext3_xattr_quota_alloc(inode, 1))
++ goto cleanup;
++
++ error = ext3_journal_get_write_access(handle, new_bh);
++ if (error)
++ goto cleanup;
++ HDR(new_bh)->h_refcount = cpu_to_le32(
++ le32_to_cpu(HDR(new_bh)->h_refcount) + 1);
++ ea_bdebug(new_bh, "refcount now=%d",
++ le32_to_cpu(HDR(new_bh)->h_refcount));
++ } else if (old_bh && header == HDR(old_bh)) {
++ /* Keep this block. */
++ new_bh = old_bh;
++ (void)ext3_xattr_cache_insert(new_bh);
++ } else {
++ /* We need to allocate a new block */
++ int force = EXT3_I(inode)->i_file_acl != 0;
++ int block = ext3_xattr_new_block(handle, inode,
++ &error, force);
++ if (error)
++ goto cleanup;
++ ea_idebug(inode, "creating block %d", block);
++
++ new_bh = sb_getblk(sb, block);
++ if (!new_bh) {
++getblk_failed: ext3_xattr_free_block(handle, inode, block);
++ error = -EIO;
++ goto cleanup;
++ }
++ lock_buffer(new_bh);
++ error = ext3_journal_get_create_access(handle, new_bh);
++ if (error) {
++ unlock_buffer(new_bh);
++ goto getblk_failed;
++ }
++ memcpy(new_bh->b_data, header, new_bh->b_size);
++ mark_buffer_uptodate(new_bh, 1);
++ unlock_buffer(new_bh);
++ (void)ext3_xattr_cache_insert(new_bh);
++ ext3_xattr_update_super_block(handle, sb);
++ }
++ error = ext3_journal_dirty_metadata(handle, new_bh);
++ if (error)
++ goto cleanup;
++ }
++
++ /* Update the inode. */
++ EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
++ inode->i_ctime = CURRENT_TIME;
++ ext3_mark_inode_dirty(handle, inode);
++ if (IS_SYNC(inode))
++ handle->h_sync = 1;
++
++ error = 0;
++ if (old_bh && old_bh != new_bh) {
++ /*
++ * If there was an old block, and we are not still using it,
++ * we now release the old block.
++ */
++ unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount);
++
++ error = ext3_journal_get_write_access(handle, old_bh);
++ if (error)
++ goto cleanup;
++ if (refcount == 1) {
++ /* Free the old block. */
++ ea_bdebug(old_bh, "freeing");
++ ext3_xattr_free_block(handle, inode, old_bh->b_blocknr);
++
++ /* ext3_forget() calls bforget() for us, but we
++ let our caller release old_bh, so we need to
++ duplicate the handle before. */
++ get_bh(old_bh);
++ ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
++ } else {
++ /* Decrement the refcount only. */
++ refcount--;
++ HDR(old_bh)->h_refcount = cpu_to_le32(refcount);
++ ext3_xattr_quota_free(inode);
++ ext3_journal_dirty_metadata(handle, old_bh);
++ ea_bdebug(old_bh, "refcount now=%d", refcount);
++ }
++ }
++
++cleanup:
++ if (old_bh != new_bh)
++ brelse(new_bh);
++
++ return error;
++}
++
++/*
++ * ext3_xattr_drop_inode()
++ *
++ * Free extended attribute resources associated with this inode. This
++ * is called immediately before an inode is freed.
++ */
++void
++ext3_xattr_drop_inode(handle_t *handle, struct inode *inode)
++{
++ struct buffer_head *bh;
++ unsigned int block = EXT3_I(inode)->i_file_acl;
++
++ if (!block)
++ return;
++ ext3_xattr_lock();
++
++ bh = sb_bread(inode->i_sb, block);
++ if (!bh) {
++ ext3_error(inode->i_sb, "ext3_xattr_drop_inode",
++ "inode %ld: block %d read error", inode->i_ino, block);
++ goto cleanup;
++ }
++ ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
++ if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
++ HDR(bh)->h_blocks != cpu_to_le32(1)) {
++ ext3_error(inode->i_sb, "ext3_xattr_drop_inode",
++ "inode %ld: bad block %d", inode->i_ino, block);
++ goto cleanup;
++ }
++ ext3_journal_get_write_access(handle, bh);
++ ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
++ if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
++ ext3_xattr_cache_remove(bh);
++ ext3_xattr_free_block(handle, inode, block);
++ ext3_forget(handle, 1, inode, bh, block);
++ bh = NULL;
++ } else {
++ HDR(bh)->h_refcount = cpu_to_le32(
++ le32_to_cpu(HDR(bh)->h_refcount) - 1);
++ ext3_journal_dirty_metadata(handle, bh);
++ if (IS_SYNC(inode))
++ handle->h_sync = 1;
++ ext3_xattr_quota_free(inode);
++ }
++ EXT3_I(inode)->i_file_acl = 0;
++
++cleanup:
++ brelse(bh);
++ ext3_xattr_unlock();
++}
++
++/*
++ * ext3_xattr_put_super()
++ *
++ * This is called when a file system is unmounted.
++ */
++void
++ext3_xattr_put_super(struct super_block *sb)
++{
++#ifdef CONFIG_EXT3_FS_XATTR_SHARING
++ mb_cache_shrink(ext3_xattr_cache, sb->s_dev);
++#endif
++}
++
++#ifdef CONFIG_EXT3_FS_XATTR_SHARING
++
++/*
++ * ext3_xattr_cache_insert()
++ *
++ * Create a new entry in the extended attribute cache, and insert
++ * it unless such an entry is already in the cache.
++ *
++ * Returns 0, or a negative error number on failure.
++ */
++static int
++ext3_xattr_cache_insert(struct buffer_head *bh)
++{
++ __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
++ struct mb_cache_entry *ce;
++ int error;
++
++ ce = mb_cache_entry_alloc(ext3_xattr_cache);
++ if (!ce)
++ return -ENOMEM;
++ error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash);
++ if (error) {
++ mb_cache_entry_free(ce);
++ if (error == -EBUSY) {
++ ea_bdebug(bh, "already in cache (%d cache entries)",
++ atomic_read(&ext3_xattr_cache->c_entry_count));
++ error = 0;
++ }
++ } else {
++ ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
++ atomic_read(&ext3_xattr_cache->c_entry_count));
++ mb_cache_entry_release(ce);
++ }
++ return error;
++}
++
++/*
++ * ext3_xattr_cmp()
++ *
++ * Compare two extended attribute blocks for equality.
++ *
++ * Returns 0 if the blocks are equal, 1 if they differ, and
++ * a negative error number on errors.
++ */
++static int
++ext3_xattr_cmp(struct ext3_xattr_header *header1,
++ struct ext3_xattr_header *header2)
++{
++ struct ext3_xattr_entry *entry1, *entry2;
++
++ entry1 = ENTRY(header1+1);
++ entry2 = ENTRY(header2+1);
++ while (!IS_LAST_ENTRY(entry1)) {
++ if (IS_LAST_ENTRY(entry2))
++ return 1;
++ if (entry1->e_hash != entry2->e_hash ||
++ entry1->e_name_len != entry2->e_name_len ||
++ entry1->e_value_size != entry2->e_value_size ||
++ memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
++ return 1;
++ if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
++ return -EIO;
++ if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
++ (char *)header2 + le16_to_cpu(entry2->e_value_offs),
++ le32_to_cpu(entry1->e_value_size)))
++ return 1;
++
++ entry1 = EXT3_XATTR_NEXT(entry1);
++ entry2 = EXT3_XATTR_NEXT(entry2);
++ }
++ if (!IS_LAST_ENTRY(entry2))
++ return 1;
++ return 0;
++}
++
++/*
++ * ext3_xattr_cache_find()
++ *
++ * Find an identical extended attribute block.
++ *
++ * Returns a pointer to the block found, or NULL if such a block was
++ * not found or an error occurred.
++ */
++static struct buffer_head *
++ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header)
++{
++ __u32 hash = le32_to_cpu(header->h_hash);
++ struct mb_cache_entry *ce;
++
++ if (!header->h_hash)
++ return NULL; /* never share */
++ ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
++ ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash);
++ while (ce) {
++ struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
++
++ if (!bh) {
++ ext3_error(inode->i_sb, "ext3_xattr_cache_find",
++ "inode %ld: block %ld read error",
++ inode->i_ino, ce->e_block);
++ } else if (le32_to_cpu(HDR(bh)->h_refcount) >
++ EXT3_XATTR_REFCOUNT_MAX) {
++ ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block,
++ le32_to_cpu(HDR(bh)->h_refcount),
++ EXT3_XATTR_REFCOUNT_MAX);
++ } else if (!ext3_xattr_cmp(header, HDR(bh))) {
++ ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count)));
++ mb_cache_entry_release(ce);
++ return bh;
++ }
++ brelse(bh);
++ ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash);
++ }
++ return NULL;
++}
++
++/*
++ * ext3_xattr_cache_remove()
++ *
++ * Remove the cache entry of a block from the cache. Called when a
++ * block becomes invalid.
++ */
++static void
++ext3_xattr_cache_remove(struct buffer_head *bh)
++{
++ struct mb_cache_entry *ce;
++
++ ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr);
++ if (ce) {
++ ea_bdebug(bh, "removing (%d cache entries remaining)",
++ atomic_read(&ext3_xattr_cache->c_entry_count)-1);
++ mb_cache_entry_free(ce);
++ } else
++ ea_bdebug(bh, "no cache entry");
++}
++
++#define NAME_HASH_SHIFT 5
++#define VALUE_HASH_SHIFT 16
++
++/*
++ * ext3_xattr_hash_entry()
++ *
++ * Compute the hash of an extended attribute.
++ */
++static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
++ struct ext3_xattr_entry *entry)
++{
++ __u32 hash = 0;
++ char *name = entry->e_name;
++ int n;
++
++ for (n=0; n < entry->e_name_len; n++) {
++ hash = (hash << NAME_HASH_SHIFT) ^
++ (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
++ *name++;
++ }
++
++ if (entry->e_value_block == 0 && entry->e_value_size != 0) {
++ __u32 *value = (__u32 *)((char *)header +
++ le16_to_cpu(entry->e_value_offs));
++ for (n = (le32_to_cpu(entry->e_value_size) +
++ EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
++ hash = (hash << VALUE_HASH_SHIFT) ^
++ (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
++ le32_to_cpu(*value++);
++ }
++ }
++ entry->e_hash = cpu_to_le32(hash);
++}
++
++#undef NAME_HASH_SHIFT
++#undef VALUE_HASH_SHIFT
++
++#define BLOCK_HASH_SHIFT 16
++
++/*
++ * ext3_xattr_rehash()
++ *
++ * Re-compute the extended attribute hash value after an entry has changed.
++ */
++static void ext3_xattr_rehash(struct ext3_xattr_header *header,
++ struct ext3_xattr_entry *entry)
++{
++ struct ext3_xattr_entry *here;
++ __u32 hash = 0;
++
++ ext3_xattr_hash_entry(header, entry);
++ here = ENTRY(header+1);
++ while (!IS_LAST_ENTRY(here)) {
++ if (!here->e_hash) {
++ /* Block is not shared if an entry's hash value == 0 */
++ hash = 0;
++ break;
++ }
++ hash = (hash << BLOCK_HASH_SHIFT) ^
++ (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
++ le32_to_cpu(here->e_hash);
++ here = EXT3_XATTR_NEXT(here);
++ }
++ header->h_hash = cpu_to_le32(hash);
++}
++
++#undef BLOCK_HASH_SHIFT
++
++int __init
++init_ext3_xattr(void)
++{
++ ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
++ sizeof(struct mb_cache_entry) +
++ sizeof(struct mb_cache_entry_index), 1, 61);
++ if (!ext3_xattr_cache)
++ return -ENOMEM;
++
++ return 0;
++}
++
++void
++exit_ext3_xattr(void)
++{
++ if (ext3_xattr_cache)
++ mb_cache_destroy(ext3_xattr_cache);
++ ext3_xattr_cache = NULL;
++}
++
++#else /* CONFIG_EXT3_FS_XATTR_SHARING */
++
++int __init
++init_ext3_xattr(void)
++{
++ return 0;
++}
++
++void
++exit_ext3_xattr(void)
++{
++}
++
++#endif /* CONFIG_EXT3_FS_XATTR_SHARING */
+--- linux-2.4.18/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26-2 2003-09-01 11:51:00.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs.h 2003-09-01 14:55:39.000000000 +0400
+@@ -63,8 +63,6 @@
+ */
+ #define EXT3_BAD_INO 1 /* Bad blocks inode */
+ #define EXT3_ROOT_INO 2 /* Root inode */
+-#define EXT3_ACL_IDX_INO 3 /* ACL inode */
+-#define EXT3_ACL_DATA_INO 4 /* ACL inode */
+ #define EXT3_BOOT_LOADER_INO 5 /* Boot loader inode */
+ #define EXT3_UNDEL_DIR_INO 6 /* Undelete directory inode */
+ #define EXT3_RESIZE_INO 7 /* Reserved group descriptors inode */
+@@ -94,7 +92,6 @@
+ #else
+ # define EXT3_BLOCK_SIZE(s) (EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+ #endif
+-#define EXT3_ACLE_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry))
+ #define EXT3_ADDR_PER_BLOCK(s) (EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+ #ifdef __KERNEL__
+ # define EXT3_BLOCK_SIZE_BITS(s) ((s)->s_blocksize_bits)
+@@ -129,28 +126,6 @@
+ #endif
+
+ /*
+- * ACL structures
+- */
+-struct ext3_acl_header /* Header of Access Control Lists */
+-{
+- __u32 aclh_size;
+- __u32 aclh_file_count;
+- __u32 aclh_acle_count;
+- __u32 aclh_first_acle;
+-};
+-
+-struct ext3_acl_entry /* Access Control List Entry */
+-{
+- __u32 acle_size;
+- __u16 acle_perms; /* Access permissions */
+- __u16 acle_type; /* Type of entry */
+- __u16 acle_tag; /* User or group identity */
+- __u16 acle_pad1;
+- __u32 acle_next; /* Pointer on next entry for the */
+- /* same inode or on next free entry */
+-};
+-
+-/*
+ * Structure of a blocks group descriptor
+ */
+ struct ext3_group_desc
+@@ -521,7 +496,7 @@ struct ext3_super_block {
+ #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */
+ #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */
+
+-#define EXT3_FEATURE_COMPAT_SUPP 0
++#define EXT3_FEATURE_COMPAT_SUPP EXT3_FEATURE_COMPAT_EXT_ATTR
+ #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \
+ EXT3_FEATURE_INCOMPAT_RECOVER)
+ #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+@@ -623,6 +598,24 @@ struct dx_hash_info
+ #define HASH_NB_ALWAYS 1
+
+
++/* Defined for extended attributes */
++#define CONFIG_EXT3_FS_XATTR y
++#ifndef ENOATTR
++#define ENOATTR ENODATA /* No such attribute */
++#endif
++#ifndef ENOTSUP
++#define ENOTSUP EOPNOTSUPP /* Operation not supported */
++#endif
++#ifndef XATTR_NAME_MAX
++#define XATTR_NAME_MAX 255 /* # chars in an extended attribute name */
++#define XATTR_SIZE_MAX 65536 /* size of an extended attribute value (64k) */
++#define XATTR_LIST_MAX 65536 /* size of extended attribute namelist (64k) */
++#endif
++#ifndef XATTR_CREATE
++#define XATTR_CREATE 1 /* set value, fail if attr already exists */
++#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */
++#endif
++
+ /*
+ * Describe an inode's exact location on disk and in memory
+ */
+@@ -704,6 +697,7 @@ extern void ext3_check_inodes_bitmap (st
+ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
+
+ /* inode.c */
++extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
+ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+
+--- linux-2.4.18/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26-2 2003-08-29 16:53:17.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_jbd.h 2003-09-01 14:55:39.000000000 +0400
+@@ -30,13 +30,19 @@
+
+ #define EXT3_SINGLEDATA_TRANS_BLOCKS 8
+
++/* Extended attributes may touch two data buffers, two bitmap buffers,
++ * and two group and summaries. */
++
++#define EXT3_XATTR_TRANS_BLOCKS 8
++
+ /* Define the minimum size for a transaction which modifies data. This
+ * needs to take into account the fact that we may end up modifying two
+ * quota files too (one for the group, one for the user quota). The
+ * superblock only gets updated once, of course, so don't bother
+ * counting that again for the quota updates. */
+
+-#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2)
++#define EXT3_DATA_TRANS_BLOCKS (3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \
++ EXT3_XATTR_TRANS_BLOCKS - 2)
+
+ extern int ext3_writepage_trans_blocks(struct inode *inode);
+
+--- /dev/null 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/include/linux/ext3_xattr.h 2003-09-01 14:55:39.000000000 +0400
+@@ -0,0 +1,155 @@
++/*
++ File: linux/ext3_xattr.h
++
++ On-disk format of extended attributes for the ext3 filesystem.
++
++ (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
++*/
++
++#include <linux/config.h>
++#include <linux/init.h>
++#include <linux/xattr.h>
++
++/* Magic value in attribute blocks */
++#define EXT3_XATTR_MAGIC 0xEA020000
++
++/* Maximum number of references to one attribute block */
++#define EXT3_XATTR_REFCOUNT_MAX 1024
++
++/* Name indexes */
++#define EXT3_XATTR_INDEX_MAX 10
++#define EXT3_XATTR_INDEX_USER 1
++
++struct ext3_xattr_header {
++ __u32 h_magic; /* magic number for identification */
++ __u32 h_refcount; /* reference count */
++ __u32 h_blocks; /* number of disk blocks used */
++ __u32 h_hash; /* hash value of all attributes */
++ __u32 h_reserved[4]; /* zero right now */
++};
++
++struct ext3_xattr_entry {
++ __u8 e_name_len; /* length of name */
++ __u8 e_name_index; /* attribute name index */
++ __u16 e_value_offs; /* offset in disk block of value */
++ __u32 e_value_block; /* disk block attribute is stored on (n/i) */
++ __u32 e_value_size; /* size of attribute value */
++ __u32 e_hash; /* hash value of name and value */
++ char e_name[0]; /* attribute name */
++};
++
++#define EXT3_XATTR_PAD_BITS 2
++#define EXT3_XATTR_PAD (1<<EXT3_XATTR_PAD_BITS)
++#define EXT3_XATTR_ROUND (EXT3_XATTR_PAD-1)
++#define EXT3_XATTR_LEN(name_len) \
++ (((name_len) + EXT3_XATTR_ROUND + \
++ sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
++#define EXT3_XATTR_NEXT(entry) \
++ ( (struct ext3_xattr_entry *)( \
++ (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
++#define EXT3_XATTR_SIZE(size) \
++ (((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
++
++#ifdef __KERNEL__
++
++# ifdef CONFIG_EXT3_FS_XATTR
++
++struct ext3_xattr_handler {
++ char *prefix;
++ size_t (*list)(char *list, struct inode *inode, const char *name,
++ int name_len);
++ int (*get)(struct inode *inode, const char *name, void *buffer,
++ size_t size);
++ int (*set)(struct inode *inode, const char *name, void *buffer,
++ size_t size, int flags);
++};
++
++extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
++extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
++
++extern int ext3_setxattr(struct dentry *, const char *, void *, size_t, int);
++extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
++extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
++extern int ext3_removexattr(struct dentry *, const char *);
++
++extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
++extern int ext3_xattr_list(struct inode *, char *, size_t);
++extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, void *, size_t, int);
++
++extern void ext3_xattr_drop_inode(handle_t *, struct inode *);
++extern void ext3_xattr_put_super(struct super_block *);
++
++extern int init_ext3_xattr(void) __init;
++extern void exit_ext3_xattr(void);
++
++# else /* CONFIG_EXT3_FS_XATTR */
++# define ext3_setxattr NULL
++# define ext3_getxattr NULL
++# define ext3_listxattr NULL
++# define ext3_removexattr NULL
++
++static inline int
++ext3_xattr_get(struct inode *inode, int name_index, const char *name,
++ void *buffer, size_t size, int flags)
++{
++ return -ENOTSUP;
++}
++
++static inline int
++ext3_xattr_list(struct inode *inode, void *buffer, size_t size, int flags)
++{
++ return -ENOTSUP;
++}
++
++static inline int
++ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
++ const char *name, void *value, size_t size, int flags)
++{
++ return -ENOTSUP;
++}
++
++static inline void
++ext3_xattr_drop_inode(handle_t *handle, struct inode *inode)
++{
++}
++
++static inline void
++ext3_xattr_put_super(struct super_block *sb)
++{
++}
++
++static inline int
++init_ext3_xattr(void)
++{
++ return 0;
++}
++
++static inline void
++exit_ext3_xattr(void)
++{
++}
++
++# endif /* CONFIG_EXT3_FS_XATTR */
++
++# ifdef CONFIG_EXT3_FS_XATTR_USER
++
++extern int init_ext3_xattr_user(void) __init;
++extern void exit_ext3_xattr_user(void);
++
++# else /* CONFIG_EXT3_FS_XATTR_USER */
++
++static inline int
++init_ext3_xattr_user(void)
++{
++ return 0;
++}
++
++static inline void
++exit_ext3_xattr_user(void)
++{
++}
++
++#endif /* CONFIG_EXT3_FS_XATTR_USER */
++
++#endif /* __KERNEL__ */
++
+--- /dev/null 2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/include/linux/xattr.h 2003-09-01 14:55:39.000000000 +0400
+@@ -0,0 +1,15 @@
++/*
++ File: linux/xattr.h
++
++ Extended attributes handling.
++
++ Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
++ Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
++*/
++#ifndef _LINUX_XATTR_H
++#define _LINUX_XATTR_H
++
++#define XATTR_CREATE 1 /* set value, fail if attr already exists */
++#define XATTR_REPLACE 2 /* set value, fail if attr does not exist */
++
++#endif /* _LINUX_XATTR_H */
+
+_
--- /dev/null
+ include/linux/fs.h | 1 +
+ mm/filemap.c | 3 +++
+ 2 files changed, 4 insertions(+)
+
+--- linux-2.4.20-b_llpmd-l24/include/linux/fs.h~removepage-2.4.20 2003-09-05 11:45:42.000000000 -0700
++++ linux-2.4.20-b_llpmd-l24-zab/include/linux/fs.h 2003-09-05 11:46:25.000000000 -0700
+@@ -402,6 +402,7 @@ struct address_space_operations {
+ int (*releasepage) (struct page *, int);
+ #define KERNEL_HAS_O_DIRECT /* this is for modules out of the kernel */
+ int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int);
++ void (*removepage)(struct page *); /* called when page gets removed from the inode */
+ };
+
+ struct address_space {
+--- linux-2.4.20-b_llpmd-l24/mm/filemap.c~removepage-2.4.20 2003-09-05 11:45:42.000000000 -0700
++++ linux-2.4.20-b_llpmd-l24-zab/mm/filemap.c 2003-09-05 11:46:25.000000000 -0700
+@@ -95,6 +95,9 @@ static inline void remove_page_from_inod
+ {
+ struct address_space * mapping = page->mapping;
+
++ if (mapping->a_ops->removepage)
++ mapping->a_ops->removepage(page);
++
+ mapping->nrpages--;
+ list_del(&page->list);
+ page->mapping = NULL;
+
+_
--- /dev/null
+ include/linux/fs.h | 1 +
+ mm/filemap.c | 3 +++
+ 2 files changed, 4 insertions(+)
+
+--- linux-2.6.0-test3-l25/include/linux/fs.h~removepage-2.6.0 2003-09-05 15:31:52.000000000 -0700
++++ linux-2.6.0-test3-l25-zab/include/linux/fs.h 2003-09-08 10:47:30.000000000 -0700
+@@ -311,6 +311,7 @@ struct address_space_operations {
+ int (*releasepage) (struct page *, int);
+ int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
+ loff_t offset, unsigned long nr_segs);
++ void (*removepage)(struct page *); /* called when page gets removed from the inode */
+ };
+
+ struct backing_dev_info;
+--- linux-2.6.0-test3-l25/mm/filemap.c~removepage-2.6.0 2003-08-08 21:34:39.000000000 -0700
++++ linux-2.6.0-test3-l25-zab/mm/filemap.c 2003-09-08 10:48:10.000000000 -0700
+@@ -81,6 +81,9 @@ void __remove_from_page_cache(struct pag
+ {
+ struct address_space *mapping = page->mapping;
+
++ if (mapping->a_ops->removepage)
++ mapping->a_ops->removepage(page);
++
+ radix_tree_delete(&mapping->page_tree, page->index);
+ list_del(&page->list);
+ page->mapping = NULL;
+
+_
--- /dev/null
+ include/asm-um/unistd.h | 2 ++
+ 1 files changed, 2 insertions(+)
+
+diff -puN include/asm-um/unistd.h~uml-2.6.0-fix include/asm-um/unistd.h
+--- linux-2.6.0-test3/include/asm-um/unistd.h~uml-2.6.0-fix 2003-09-04 18:39:45.000000000 +0400
++++ linux-2.6.0-test3-alexey/include/asm-um/unistd.h 2003-09-04 18:39:59.000000000 +0400
+@@ -6,8 +6,10 @@
+ #ifndef _UM_UNISTD_H_
+ #define _UM_UNISTD_H_
+
++#ifdef __KERNEL__
+ #include "linux/resource.h"
+ #include "asm/uaccess.h"
++#endif
+
+ extern long sys_open(const char *filename, int flags, int mode);
+ extern long sys_dup(unsigned int fildes);
+
+_
--- /dev/null
+diff -Naur a/arch/um/Kconfig b/arch/um/Kconfig
+--- a/arch/um/Kconfig Fri Aug 15 15:05:57 2003
++++ b/arch/um/Kconfig Fri Aug 15 15:11:53 2003
+@@ -61,6 +61,20 @@
+
+ config NET
+ bool "Networking support"
++ help
++ Unless you really know what you are doing, you should say Y here.
++ The reason is that some programs need kernel networking support even
++ when running on a stand-alone machine that isn't connected to any
++ other computer. If you are upgrading from an older kernel, you
++ should consider updating your networking tools too because changes
++ in the kernel and the tools often go hand in hand. The tools are
++ contained in the package net-tools, the location and version number
++ of which are given in Documentation/Changes.
++
++ For a general introduction to Linux networking, it is highly
++ recommended to read the NET-HOWTO, available from
++ <http://www.tldp.org/docs.html#howto>.
++
+
+ source "fs/Kconfig.binfmt"
+
+@@ -85,6 +99,19 @@
+ If you'd like to be able to work with files stored on the host,
+ say Y or M here; otherwise say N.
+
++config HPPFS
++ tristate "HoneyPot ProcFS"
++ help
++ hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc
++ entries to be overridden, removed, or fabricated from the host.
++ Its purpose is to allow a UML to appear to be a physical machine
++ by removing or changing anything in /proc which gives away the
++ identity of a UML.
++
++ See http://user-mode-linux.sf.net/hppfs.html for more information.
++
++ You only need this if you are setting up a UML honeypot. Otherwise,
++ it is safe to say 'N' here.
+
+ config MCONSOLE
+ bool "Management console"
+@@ -105,6 +132,16 @@
+ config MAGIC_SYSRQ
+ bool "Magic SysRq key"
+ depends on MCONSOLE
++ help
++ If you say Y here, you will have some control over the system even
++ if the system crashes for example during kernel debugging (e.g., you
++ will be able to flush the buffer cache to disk, reboot the system
++ immediately or dump some status information). This is accomplished
++ by pressing various keys while holding SysRq (Alt+PrintScreen). It
++ also works on a serial console (on PC hardware at least), if you
++ send a BREAK and then within 5 seconds a command keypress. The
++ keys are documented in Documentation/sysrq.txt. Don't say Y
++ unless you really know what this hack does.
+
+ config HOST_2G_2G
+ bool "2G/2G host address space split"
+@@ -159,6 +196,9 @@
+ config HIGHMEM
+ bool "Highmem support"
+
++config PROC_MM
++ bool "/proc/mm support"
++
+ config KERNEL_STACK_ORDER
+ int "Kernel stack size order"
+ default 2
+@@ -239,6 +279,10 @@
+ config PT_PROXY
+ bool "Enable ptrace proxy"
+ depends on XTERM_CHAN && DEBUG_INFO
++ help
++ This option enables a debugging interface which allows gdb to debug
++ the kernel without needing to actually attach to kernel threads.
++ If you want to do kernel debugging, say Y here; otherwise say N.
+
+ config GPROF
+ bool "Enable gprof support"
+diff -Naur a/arch/um/Kconfig_block b/arch/um/Kconfig_block
+--- a/arch/um/Kconfig_block Fri Aug 15 15:07:32 2003
++++ b/arch/um/Kconfig_block Fri Aug 15 15:12:56 2003
+@@ -29,6 +29,20 @@
+ wise choice too. In all other cases (for example, if you're just
+ playing around with User-Mode Linux) you can choose N.
+
++# Turn this back on when the driver actually works
++#
++#config BLK_DEV_COW
++# tristate "COW block device"
++# help
++# This is a layered driver which sits above two other block devices.
++# One is read-only, and the other is a read-write layer which stores
++# all changes. This provides the illusion that the read-only layer
++# can be mounted read-write and changed.
++
++config BLK_DEV_COW_COMMON
++ bool
++ default BLK_DEV_COW || BLK_DEV_UBD
++
+ config BLK_DEV_LOOP
+ tristate "Loopback device support"
+
+diff -Naur a/arch/um/Kconfig_net b/arch/um/Kconfig_net
+--- a/arch/um/Kconfig_net Fri Aug 15 15:06:52 2003
++++ b/arch/um/Kconfig_net Fri Aug 15 15:12:43 2003
+@@ -1,5 +1,5 @@
+
+-menu "Network Devices"
++menu "UML Network Devices"
+ depends on NET
+
+ # UML virtual driver
+@@ -176,73 +176,5 @@
+
+ Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
+
+-
+-# Below are hardware-independent drivers mirrored from
+-# drivers/net/Config.in. It would be nice if Linux
+-# had HW independent drivers separated from the other
+-# but it does not. Until then each non-ISA/PCI arch
+-# needs to provide it's own menu of network drivers
+-config DUMMY
+- tristate "Dummy net driver support"
+-
+-config BONDING
+- tristate "Bonding driver support"
+-
+-config EQUALIZER
+- tristate "EQL (serial line load balancing) support"
+-
+-config TUN
+- tristate "Universal TUN/TAP device driver support"
+-
+-config ETHERTAP
+- tristate "Ethertap network tap (OBSOLETE)"
+- depends on EXPERIMENTAL && NETLINK
+-
+-config PPP
+- tristate "PPP (point-to-point protocol) support"
+-
+-config PPP_MULTILINK
+- bool "PPP multilink support (EXPERIMENTAL)"
+- depends on PPP && EXPERIMENTAL
+-
+-config PPP_FILTER
+- bool "PPP filtering"
+- depends on PPP && FILTER
+-
+-config PPP_ASYNC
+- tristate "PPP support for async serial ports"
+- depends on PPP
+-
+-config PPP_SYNC_TTY
+- tristate "PPP support for sync tty ports"
+- depends on PPP
+-
+-config PPP_DEFLATE
+- tristate "PPP Deflate compression"
+- depends on PPP
+-
+-config PPP_BSDCOMP
+- tristate "PPP BSD-Compress compression"
+- depends on PPP
+-
+-config PPPOE
+- tristate "PPP over Ethernet (EXPERIMENTAL)"
+- depends on PPP && EXPERIMENTAL
+-
+-config SLIP
+- tristate "SLIP (serial line) support"
+-
+-config SLIP_COMPRESSED
+- bool "CSLIP compressed headers"
+- depends on SLIP=y
+-
+-config SLIP_SMART
+- bool "Keepalive and linefill"
+- depends on SLIP=y
+-
+-config SLIP_MODE_SLIP6
+- bool "Six bit SLIP encapsulation"
+- depends on SLIP=y
+-
+ endmenu
+
+diff -Naur a/arch/um/Makefile b/arch/um/Makefile
+--- a/arch/um/Makefile Fri Aug 15 15:07:18 2003
++++ b/arch/um/Makefile Fri Aug 15 15:12:45 2003
+@@ -24,15 +24,17 @@
+ # Have to precede the include because the included Makefiles reference them.
+ SYMLINK_HEADERS = include/asm-um/archparam.h include/asm-um/system.h \
+ include/asm-um/sigcontext.h include/asm-um/processor.h \
+- include/asm-um/ptrace.h include/asm-um/arch-signal.h
++ include/asm-um/ptrace.h include/asm-um/arch-signal.h \
++ include/asm-um/module.h
+
+ ARCH_SYMLINKS = include/asm-um/arch $(ARCH_DIR)/include/sysdep $(ARCH_DIR)/os \
+ $(SYMLINK_HEADERS) $(ARCH_DIR)/include/uml-config.h
+
+ GEN_HEADERS += $(ARCH_DIR)/include/task.h $(ARCH_DIR)/include/kern_constants.h
+
+-include $(ARCH_DIR)/Makefile-$(SUBARCH)
+-include $(ARCH_DIR)/Makefile-os-$(OS)
++.PHONY: sys_prepare
++sys_prepare:
++ @:
+
+ MAKEFILE-$(CONFIG_MODE_TT) += Makefile-tt
+ MAKEFILE-$(CONFIG_MODE_SKAS) += Makefile-skas
+@@ -41,6 +43,9 @@
+ include $(addprefix $(ARCH_DIR)/,$(MAKEFILE-y))
+ endif
+
++include $(ARCH_DIR)/Makefile-$(SUBARCH)
++include $(ARCH_DIR)/Makefile-os-$(OS)
++
+ EXTRAVERSION := $(EXTRAVERSION)-1um
+
+ ARCH_INCLUDE = -I$(ARCH_DIR)/include
+@@ -52,14 +57,14 @@
+
+ CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
+ -D_LARGEFILE64_SOURCE $(ARCH_INCLUDE) -Derrno=kernel_errno \
+- $(MODE_INCLUDE)
++ -Dsigprocmask=kernel_sigprocmask $(MODE_INCLUDE)
+
+ LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
+
+ SIZE = (($(CONFIG_NEST_LEVEL) + $(CONFIG_KERNEL_HALF_GIGS)) * 0x20000000)
+
+ ifeq ($(CONFIG_MODE_SKAS), y)
+-$(SYS_HEADERS) : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h
++$(SYS_HEADERS) : $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h
+ endif
+
+ include/linux/version.h: arch/$(ARCH)/Makefile
+@@ -116,6 +121,7 @@
+
+ USER_CFLAGS := $(patsubst -I%,,$(CFLAGS))
+ USER_CFLAGS := $(patsubst -Derrno=kernel_errno,,$(USER_CFLAGS))
++USER_CFLAGS := $(patsubst -Dsigprocmask=kernel_sigprocmask,,$(USER_CFLAGS))
+ USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \
+ $(MODE_INCLUDE)
+
+@@ -123,9 +129,10 @@
+ USER_CFLAGS += -D_GNU_SOURCE
+
+ CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/uml.lds.s \
+- $(ARCH_DIR)/dyn_link.ld.s $(GEN_HEADERS)
++ $(ARCH_DIR)/dyn_link.ld.s $(ARCH_DIR)/include/uml-config.h \
++ $(GEN_HEADERS)
+
+-$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c
++$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c sys_prepare
+ $(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $<
+
+ archmrproper:
+@@ -161,19 +168,23 @@
+ $(ARCH_DIR)/os:
+ cd $(ARCH_DIR) && ln -sf os-$(OS) os
+
+-$(ARCH_DIR)/include/uml-config.h :
++$(ARCH_DIR)/include/uml-config.h : $(TOPDIR)/include/linux/autoconf.h
+ sed 's/ CONFIG/ UML_CONFIG/' $(TOPDIR)/include/linux/autoconf.h > $@
+
++filechk_$(ARCH_DIR)/include/task.h := $(ARCH_DIR)/util/mk_task
++
+ $(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task
+- $< > $@
++ $(call filechk,$@)
++
++filechk_$(ARCH_DIR)/include/kern_constants.h := $(ARCH_DIR)/util/mk_constants
+
+ $(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants
+- $< > $@
++ $(call filechk,$@)
+
+-$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h \
+- $(ARCH_DIR)/util FORCE ;
++$(ARCH_DIR)/util/mk_task $(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util \
++ sys_prepare FORCE ;
+
+ $(ARCH_DIR)/util: FORCE
+- @$(call descend,$@,)
++ $(MAKE) -f scripts/Makefile.build obj=$@
+
+-export SUBARCH USER_CFLAGS OS
++export SUBARCH USER_CFLAGS OS
+diff -Naur a/arch/um/Makefile-i386 b/arch/um/Makefile-i386
+--- a/arch/um/Makefile-i386 Fri Aug 15 15:07:46 2003
++++ b/arch/um/Makefile-i386 Fri Aug 15 15:13:14 2003
+@@ -16,22 +16,28 @@
+
+ SYS_HEADERS = $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h
+
++sys_prepare: $(SYS_DIR)/sc.h
++
+ prepare: $(SYS_HEADERS)
+
++filechk_$(SYS_DIR)/sc.h := $(SYS_UTIL_DIR)/mk_sc
++
+ $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc
+- $< > $@
++ $(call filechk,$@)
++
++filechk_$(SYS_DIR)/thread.h := $(SYS_UTIL_DIR)/mk_thread
+
+ $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread
+- $< > $@
++ $(call filechk,$@)
+
+-$(SYS_UTIL_DIR)/mk_sc: FORCE ;
+- @$(call descend,$(SYS_UTIL_DIR),$@)
++$(SYS_UTIL_DIR)/mk_sc: scripts/fixdep include/config/MARKER FORCE ;
++ +@$(call descend,$(SYS_UTIL_DIR),$@)
+
+-$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE ;
+- @$(call descend,$(SYS_UTIL_DIR),$@)
++$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) sys_prepare FORCE ;
++ +@$(call descend,$(SYS_UTIL_DIR),$@)
+
+ $(SYS_UTIL_DIR): include/asm FORCE
+- @$(call descend,$@,)
++ +@$(call descend,$@,)
+
+ sysclean :
+ rm -f $(SYS_HEADERS)
+diff -Naur a/arch/um/Makefile-skas b/arch/um/Makefile-skas
+--- a/arch/um/Makefile-skas Fri Aug 15 15:05:43 2003
++++ b/arch/um/Makefile-skas Fri Aug 15 15:11:52 2003
+@@ -14,7 +14,7 @@
+ LINK_SKAS = -Wl,-rpath,/lib
+ LD_SCRIPT_SKAS = dyn.lds.s
+
+-GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h
++GEN_HEADERS += $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h
+
+-$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h :
+- $(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h
++$(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h :
++ $(call descend,$(ARCH_DIR)/kernel/skas,$@)
+diff -Naur a/arch/um/config.release b/arch/um/config.release
+--- a/arch/um/config.release Fri Aug 15 15:09:05 2003
++++ b/arch/um/config.release Fri Aug 15 15:13:48 2003
+@@ -228,7 +228,6 @@
+ CONFIG_EXT2_FS=y
+ CONFIG_SYSV_FS=m
+ CONFIG_UDF_FS=m
+-# CONFIG_UDF_RW is not set
+ CONFIG_UFS_FS=m
+ # CONFIG_UFS_FS_WRITE is not set
+
+diff -Naur a/arch/um/defconfig b/arch/um/defconfig
+--- a/arch/um/defconfig Fri Aug 15 15:07:30 2003
++++ b/arch/um/defconfig Fri Aug 15 15:12:54 2003
+@@ -6,7 +6,6 @@
+ CONFIG_SWAP=y
+ CONFIG_UID16=y
+ CONFIG_RWSEM_GENERIC_SPINLOCK=y
+-CONFIG_CONFIG_LOG_BUF_SHIFT=14
+
+ #
+ # Code maturity level options
+@@ -116,7 +115,6 @@
+ CONFIG_PACKET_MMAP=y
+ # CONFIG_NETLINK_DEV is not set
+ # CONFIG_NETFILTER is not set
+-# CONFIG_FILTER is not set
+ CONFIG_UNIX=y
+ # CONFIG_NET_KEY is not set
+ CONFIG_INET=y
+@@ -385,7 +383,6 @@
+ #
+ # Disk-On-Chip Device Drivers
+ #
+-# CONFIG_MTD_DOC1000 is not set
+ # CONFIG_MTD_DOC2000 is not set
+ # CONFIG_MTD_DOC2001 is not set
+
+diff -Naur a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
+--- a/arch/um/drivers/Makefile Fri Aug 15 15:06:42 2003
++++ b/arch/um/drivers/Makefile Fri Aug 15 15:12:40 2003
+@@ -1,5 +1,5 @@
+ #
+-# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
++# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com)
+ # Licensed under the GPL
+ #
+
+@@ -39,6 +39,8 @@
+ obj-$(CONFIG_TTY_CHAN) += tty.o
+ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
+ obj-$(CONFIG_UML_WATCHDOG) += harddog.o
++obj-$(CONFIG_BLK_DEV_COW) += cow_kern.o
++obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
+
+ obj-y += stdio_console.o $(CHAN_OBJS)
+
+@@ -46,7 +48,7 @@
+
+ USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \
+ null.o pty.o tty.o xterm.o
+-USER_OBJS := $(foreach file,$(USER_OBJS),arch/um/drivers/$(file))
++USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+
+ $(USER_OBJS) : %.o: %.c
+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+diff -Naur a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
+--- a/arch/um/drivers/chan_kern.c Fri Aug 15 15:09:13 2003
++++ b/arch/um/drivers/chan_kern.c Fri Aug 15 15:13:51 2003
+@@ -8,6 +8,7 @@
+ #include <linux/list.h>
+ #include <linux/slab.h>
+ #include <linux/tty.h>
++#include <linux/string.h>
+ #include <linux/tty_flip.h>
+ #include <asm/irq.h>
+ #include "chan_kern.h"
+diff -Naur a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
+--- a/arch/um/drivers/chan_user.c Fri Aug 15 15:03:46 2003
++++ b/arch/um/drivers/chan_user.c Fri Aug 15 15:10:09 2003
+@@ -188,8 +188,8 @@
+ if(!isatty(fd)) return;
+
+ pid = tcgetpgrp(fd);
+- if(!CHOOSE_MODE(is_tracer_winch(pid, fd, device_data), 0) &&
+- (pid == -1)){
++ if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd,
++ device_data) && (pid == -1)){
+ thread = winch_tramp(fd, device_data, &thread_fd);
+ if(fd != -1){
+ register_winch_irq(thread_fd, fd, thread, device_data);
+diff -Naur a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h
+--- a/arch/um/drivers/cow.h Wed Dec 31 19:00:00 1969
++++ b/arch/um/drivers/cow.h Fri Aug 15 15:10:34 2003
+@@ -0,0 +1,40 @@
++#ifndef __COW_H__
++#define __COW_H__
++
++#include <asm/types.h>
++
++#if __BYTE_ORDER == __BIG_ENDIAN
++# define ntohll(x) (x)
++# define htonll(x) (x)
++#elif __BYTE_ORDER == __LITTLE_ENDIAN
++# define ntohll(x) bswap_64(x)
++# define htonll(x) bswap_64(x)
++#else
++#error "__BYTE_ORDER not defined"
++#endif
++
++extern int init_cow_file(int fd, char *cow_file, char *backing_file,
++ int sectorsize, int *bitmap_offset_out,
++ unsigned long *bitmap_len_out, int *data_offset_out);
++
++extern int file_reader(__u64 offset, char *buf, int len, void *arg);
++extern int read_cow_header(int (*reader)(__u64, char *, int, void *),
++ void *arg, __u32 *magic_out,
++ char **backing_file_out, time_t *mtime_out,
++ __u64 *size_out, int *sectorsize_out,
++ int *bitmap_offset_out);
++
++extern int write_cow_header(char *cow_file, int fd, char *backing_file,
++ int sectorsize, long long *size);
++
++extern void cow_sizes(__u64 size, int sectorsize, int bitmap_offset,
++ unsigned long *bitmap_len_out, int *data_offset_out);
++
++#endif
++
++/*
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/cow_kern.c b/arch/um/drivers/cow_kern.c
+--- a/arch/um/drivers/cow_kern.c Wed Dec 31 19:00:00 1969
++++ b/arch/um/drivers/cow_kern.c Fri Aug 15 15:13:51 2003
+@@ -0,0 +1,628 @@
++#define COW_MAJOR 60
++#define MAJOR_NR COW_MAJOR
++
++#include <linux/stddef.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/stat.h>
++#include <linux/vmalloc.h>
++#include <linux/blkdev.h>
++#include <linux/blk.h>
++#include <linux/fs.h>
++#include <linux/genhd.h>
++#include <linux/devfs_fs.h>
++#include <asm/uaccess.h>
++#include "2_5compat.h"
++#include "cow.h"
++#include "ubd_user.h"
++
++#define COW_SHIFT 4
++
++struct cow {
++ int count;
++ char *cow_path;
++ dev_t cow_dev;
++ struct block_device *cow_bdev;
++ char *backing_path;
++ dev_t backing_dev;
++ struct block_device *backing_bdev;
++ int sectorsize;
++ unsigned long *bitmap;
++ unsigned long bitmap_len;
++ int bitmap_offset;
++ int data_offset;
++ devfs_handle_t devfs;
++ struct semaphore sem;
++ struct semaphore io_sem;
++ atomic_t working;
++ spinlock_t io_lock;
++ struct buffer_head *bh;
++ struct buffer_head *bhtail;
++ void *end_io;
++};
++
++#define DEFAULT_COW { \
++ .count = 0, \
++ .cow_path = NULL, \
++ .cow_dev = 0, \
++ .backing_path = NULL, \
++ .backing_dev = 0, \
++ .bitmap = NULL, \
++ .bitmap_len = 0, \
++ .bitmap_offset = 0, \
++ .data_offset = 0, \
++ .devfs = NULL, \
++ .working = ATOMIC_INIT(0), \
++ .io_lock = SPIN_LOCK_UNLOCKED, \
++}
++
++#define MAX_DEV (8)
++#define MAX_MINOR (MAX_DEV << COW_SHIFT)
++
++struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW };
++
++/* Not modified by this driver */
++static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE };
++static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 };
++
++/* Protected by cow_lock */
++static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 };
++
++static struct hd_struct cow_part[MAX_MINOR] =
++ { [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } };
++
++/* Protected by io_request_lock */
++static request_queue_t *cow_queue;
++
++static int cow_open(struct inode *inode, struct file *filp);
++static int cow_release(struct inode * inode, struct file * file);
++static int cow_ioctl(struct inode * inode, struct file * file,
++ unsigned int cmd, unsigned long arg);
++static int cow_revalidate(kdev_t rdev);
++
++static struct block_device_operations cow_blops = {
++ .open = cow_open,
++ .release = cow_release,
++ .ioctl = cow_ioctl,
++ .revalidate = cow_revalidate,
++};
++
++/* Initialized in an initcall, and unchanged thereafter */
++devfs_handle_t cow_dir_handle;
++
++#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \
++{ \
++ .major = maj, \
++ .major_name = name, \
++ .minor_shift = shift, \
++ .max_p = 1 << shift, \
++ .part = parts, \
++ .sizes = bsizes, \
++ .nr_real = max, \
++ .real_devices = NULL, \
++ .next = NULL, \
++ .fops = blops, \
++ .de_arr = NULL, \
++ .flags = 0 \
++}
++
++static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED;
++
++static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part,
++ COW_SHIFT, sizes, MAX_DEV,
++ &cow_blops);
++
++static int cow_add(int n)
++{
++ struct cow *dev = &cow_dev[n];
++ char name[sizeof("nnnnnn\0")];
++ int err = -ENODEV;
++
++ if(dev->cow_path == NULL)
++ goto out;
++
++ sprintf(name, "%d", n);
++ dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE,
++ MAJOR_NR, n << COW_SHIFT, S_IFBLK |
++ S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP,
++ &cow_blops, NULL);
++
++ init_MUTEX_LOCKED(&dev->sem);
++ init_MUTEX(&dev->io_sem);
++
++ return(0);
++
++out:
++ return(err);
++}
++
++/*
++* Add buffer_head to back of pending list
++*/
++static void cow_add_bh(struct cow *cow, struct buffer_head *bh)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&cow->io_lock, flags);
++ if(cow->bhtail != NULL){
++ cow->bhtail->b_reqnext = bh;
++ cow->bhtail = bh;
++ }
++ else {
++ cow->bh = bh;
++ cow->bhtail = bh;
++ }
++ spin_unlock_irqrestore(&cow->io_lock, flags);
++}
++
++/*
++* Grab first pending buffer
++*/
++static struct buffer_head *cow_get_bh(struct cow *cow)
++{
++ struct buffer_head *bh;
++
++ spin_lock_irq(&cow->io_lock);
++ bh = cow->bh;
++ if(bh != NULL){
++ if(bh == cow->bhtail)
++ cow->bhtail = NULL;
++ cow->bh = bh->b_reqnext;
++ bh->b_reqnext = NULL;
++ }
++ spin_unlock_irq(&cow->io_lock);
++
++ return(bh);
++}
++
++static void cow_handle_bh(struct cow *cow, struct buffer_head *bh,
++ struct buffer_head **cow_bh, int ncow_bh)
++{
++ int i;
++
++ if(ncow_bh > 0)
++ ll_rw_block(WRITE, ncow_bh, cow_bh);
++
++ for(i = 0; i < ncow_bh ; i++){
++ wait_on_buffer(cow_bh[i]);
++ brelse(cow_bh[i]);
++ }
++
++ ll_rw_block(WRITE, 1, &bh);
++ brelse(bh);
++}
++
++static struct buffer_head *cow_new_bh(struct cow *dev, int sector)
++{
++ struct buffer_head *bh;
++
++ sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize;
++ bh = getblk(dev->cow_dev, sector, dev->sectorsize);
++ memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])),
++ dev->sectorsize);
++ return(bh);
++}
++
++/* Copied from loop.c, needed to avoid deadlocking in make_request. */
++
++static int cow_thread(void *data)
++{
++ struct cow *dev = data;
++ struct buffer_head *bh;
++
++ daemonize();
++ exit_files(current);
++
++ sprintf(current->comm, "cow%d", dev - cow_dev);
++
++ spin_lock_irq(¤t->sigmask_lock);
++ sigfillset(¤t->blocked);
++ flush_signals(current);
++ spin_unlock_irq(¤t->sigmask_lock);
++
++ atomic_inc(&dev->working);
++
++ current->policy = SCHED_OTHER;
++ current->nice = -20;
++
++ current->flags |= PF_NOIO;
++
++ /*
++ * up sem, we are running
++ */
++ up(&dev->sem);
++
++ for(;;){
++ int start, len, nbh, i, update_bitmap = 0;
++ struct buffer_head *cow_bh[2];
++
++ down_interruptible(&dev->io_sem);
++ /*
++ * could be upped because of tear-down, not because of
++ * pending work
++ */
++ if(!atomic_read(&dev->working))
++ break;
++
++ bh = cow_get_bh(dev);
++ if(bh == NULL){
++ printk(KERN_ERR "cow: missing bh\n");
++ continue;
++ }
++
++ start = bh->b_blocknr * bh->b_size / dev->sectorsize;
++ len = bh->b_size / dev->sectorsize;
++ for(i = 0; i < len ; i++){
++ if(ubd_test_bit(start +ni,
++ (unsigned char *) dev->bitmap))
++ continue;
++
++ update_bitmap = 1;
++ ubd_set_bit(start + i, (unsigned char *) dev->bitmap);
++ }
++
++ cow_bh[0] = NULL;
++ cow_bh[1] = NULL;
++ nbh = 0;
++ if(update_bitmap){
++ cow_bh[0] = cow_new_bh(dev, start);
++ nbh++;
++ if(start / dev->sectorsize !=
++ (start + len) / dev->sectorsize){
++ cow_bh[1] = cow_new_bh(dev, start + len);
++ nbh++;
++ }
++ }
++
++ bh->b_dev = dev->cow_dev;
++ bh->b_blocknr += dev->data_offset / dev->sectorsize;
++
++ cow_handle_bh(dev, bh, cow_bh, nbh);
++
++ /*
++ * upped both for pending work and tear-down, lo_pending
++ * will hit zero then
++ */
++ if(atomic_dec_and_test(&dev->working))
++ break;
++ }
++
++ up(&dev->sem);
++ return(0);
++}
++
++static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh)
++{
++ struct cow *dev;
++ int n, minor;
++
++ minor = MINOR(bh->b_rdev);
++ n = minor >> COW_SHIFT;
++ dev = &cow_dev[n];
++
++ dev->end_io = NULL;
++ if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){
++ bh->b_rdev = dev->cow_dev;
++ bh->b_rsector += dev->data_offset / dev->sectorsize;
++ }
++ else if(rw == WRITE){
++ bh->b_dev = dev->cow_dev;
++ bh->b_blocknr += dev->data_offset / dev->sectorsize;
++
++ cow_add_bh(dev, bh);
++ up(&dev->io_sem);
++ return(0);
++ }
++ else {
++ bh->b_rdev = dev->backing_dev;
++ }
++
++ return(1);
++}
++
++int cow_init(void)
++{
++ int i;
++
++ cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL);
++ if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) {
++ printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR);
++ return -1;
++ }
++ read_ahead[MAJOR_NR] = 8; /* 8 sector (4kB) read-ahead */
++ blksize_size[MAJOR_NR] = blk_sizes;
++ blk_size[MAJOR_NR] = sizes;
++ INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes);
++
++ cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR);
++ blk_init_queue(cow_queue, NULL);
++ INIT_ELV(cow_queue, &cow_queue->elevator);
++ blk_queue_make_request(cow_queue, cow_make_request);
++
++ add_gendisk(&cow_gendisk);
++
++ for(i=0;i<MAX_DEV;i++)
++ cow_add(i);
++
++ return(0);
++}
++
++__initcall(cow_init);
++
++static int reader(__u64 start, char *buf, int count, void *arg)
++{
++ dev_t dev = *((dev_t *) arg);
++ struct buffer_head *bh;
++ __u64 block;
++ int cur, offset, left, n, blocksize = get_hardsect_size(dev);
++
++ if(blocksize == 0)
++ panic("Zero blocksize");
++
++ block = start / blocksize;
++ offset = start % blocksize;
++ left = count;
++ cur = 0;
++ while(left > 0){
++ n = (left > blocksize) ? blocksize : left;
++
++ bh = bread(dev, block, (n < 512) ? 512 : n);
++ if(bh == NULL)
++ return(-EIO);
++
++ n -= offset;
++ memcpy(&buf[cur], bh->b_data + offset, n);
++ block++;
++ left -= n;
++ cur += n;
++ offset = 0;
++ brelse(bh);
++ }
++
++ return(count);
++}
++
++static int cow_open(struct inode *inode, struct file *filp)
++{
++ int (*dev_ioctl)(struct inode *, struct file *, unsigned int,
++ unsigned long);
++ mm_segment_t fs;
++ struct cow *dev;
++ __u64 size;
++ __u32 magic;
++ time_t mtime;
++ char *backing_file;
++ int n, offset, err = 0;
++
++ n = DEVICE_NR(inode->i_rdev);
++ if(n >= MAX_DEV)
++ return(-ENODEV);
++ dev = &cow_dev[n];
++ offset = n << COW_SHIFT;
++
++ spin_lock(&cow_lock);
++
++ if(dev->count == 0){
++ dev->cow_dev = name_to_kdev_t(dev->cow_path);
++ if(dev->cow_dev == 0){
++ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") "
++ "failed\n", dev->cow_path);
++ err = -ENODEV;
++ }
++
++ dev->backing_dev = name_to_kdev_t(dev->backing_path);
++ if(dev->backing_dev == 0){
++ printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") "
++ "failed\n", dev->backing_path);
++ err = -ENODEV;
++ }
++
++ if(err)
++ goto out;
++
++ dev->cow_bdev = bdget(dev->cow_dev);
++ if(dev->cow_bdev == NULL){
++ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n",
++ dev->cow_path);
++ err = -ENOMEM;
++ }
++ dev->backing_bdev = bdget(dev->backing_dev);
++ if(dev->backing_bdev == NULL){
++ printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n",
++ dev->backing_path);
++ err = -ENOMEM;
++ }
++
++ if(err)
++ goto out;
++
++ err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0,
++ BDEV_RAW);
++ if(err){
++ printk("cow_open - blkdev_get of COW device failed, "
++ "error = %d\n", err);
++ goto out;
++ }
++
++ err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW);
++ if(err){
++ printk("cow_open - blkdev_get of backing device "
++ "failed, error = %d\n", err);
++ goto out;
++ }
++
++ err = read_cow_header(reader, &dev->cow_dev, &magic,
++ &backing_file, &mtime, &size,
++ &dev->sectorsize, &dev->bitmap_offset);
++ if(err){
++ printk(KERN_ERR "cow_open - read_cow_header failed, "
++ "err = %d\n", err);
++ goto out;
++ }
++
++ cow_sizes(size, dev->sectorsize, dev->bitmap_offset,
++ &dev->bitmap_len, &dev->data_offset);
++ dev->bitmap = (void *) vmalloc(dev->bitmap_len);
++ if(dev->bitmap == NULL){
++ err = -ENOMEM;
++ printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
++ goto out;
++ }
++ flush_tlb_kernel_vm();
++
++ err = reader(dev->bitmap_offset, (char *) dev->bitmap,
++ dev->bitmap_len, &dev->cow_dev);
++ if(err < 0){
++ printk(KERN_ERR "Failed to read COW bitmap\n");
++ vfree(dev->bitmap);
++ goto out;
++ }
++
++ dev_ioctl = dev->backing_bdev->bd_op->ioctl;
++ fs = get_fs();
++ set_fs(KERNEL_DS);
++ err = (*dev_ioctl)(inode, filp, BLKGETSIZE,
++ (unsigned long) &sizes[offset]);
++ set_fs(fs);
++ if(err){
++ printk(KERN_ERR "cow_open - BLKGETSIZE failed, "
++ "error = %d\n", err);
++ goto out;
++ }
++
++ kernel_thread(cow_thread, dev,
++ CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
++ down(&dev->sem);
++ }
++ dev->count++;
++out:
++ spin_unlock(&cow_lock);
++ return(err);
++}
++
++static int cow_release(struct inode * inode, struct file * file)
++{
++ struct cow *dev;
++ int n, err;
++
++ n = DEVICE_NR(inode->i_rdev);
++ if(n >= MAX_DEV)
++ return(-ENODEV);
++ dev = &cow_dev[n];
++
++ spin_lock(&cow_lock);
++
++ if(--dev->count > 0)
++ goto out;
++
++ err = blkdev_put(dev->cow_bdev, BDEV_RAW);
++ if(err)
++ printk("cow_release - blkdev_put of cow device failed, "
++ "error = %d\n", err);
++ bdput(dev->cow_bdev);
++ dev->cow_bdev = 0;
++
++ err = blkdev_put(dev->backing_bdev, BDEV_RAW);
++ if(err)
++ printk("cow_release - blkdev_put of backing device failed, "
++ "error = %d\n", err);
++ bdput(dev->backing_bdev);
++ dev->backing_bdev = 0;
++
++out:
++ spin_unlock(&cow_lock);
++ return(0);
++}
++
++static int cow_ioctl(struct inode * inode, struct file * file,
++ unsigned int cmd, unsigned long arg)
++{
++ struct cow *dev;
++ int (*dev_ioctl)(struct inode *, struct file *, unsigned int,
++ unsigned long);
++ int n;
++
++ n = DEVICE_NR(inode->i_rdev);
++ if(n >= MAX_DEV)
++ return(-ENODEV);
++ dev = &cow_dev[n];
++
++ dev_ioctl = dev->backing_bdev->bd_op->ioctl;
++ return((*dev_ioctl)(inode, file, cmd, arg));
++}
++
++static int cow_revalidate(kdev_t rdev)
++{
++ printk(KERN_ERR "Need to implement cow_revalidate\n");
++ return(0);
++}
++
++static int parse_unit(char **ptr)
++{
++ char *str = *ptr, *end;
++ int n = -1;
++
++ if(isdigit(*str)) {
++ n = simple_strtoul(str, &end, 0);
++ if(end == str)
++ return(-1);
++ *ptr = end;
++ }
++ else if (('a' <= *str) && (*str <= 'h')) {
++ n = *str - 'a';
++ str++;
++ *ptr = str;
++ }
++ return(n);
++}
++
++static int cow_setup(char *str)
++{
++ struct cow *dev;
++ char *cow_name, *backing_name;
++ int unit;
++
++ unit = parse_unit(&str);
++ if(unit < 0){
++ printk(KERN_ERR "cow_setup - Couldn't parse unit number\n");
++ return(1);
++ }
++
++ if(*str != '='){
++ printk(KERN_ERR "cow_setup - Missing '=' after unit "
++ "number\n");
++ return(1);
++ }
++ str++;
++
++ cow_name = str;
++ backing_name = strchr(str, ',');
++ if(backing_name == NULL){
++ printk(KERN_ERR "cow_setup - missing backing device name\n");
++ return(0);
++ }
++ *backing_name = '\0';
++ backing_name++;
++
++ spin_lock(&cow_lock);
++
++ dev = &cow_dev[unit];
++ dev->cow_path = cow_name;
++ dev->backing_path = backing_name;
++
++ spin_unlock(&cow_lock);
++ return(0);
++}
++
++__setup("cow", cow_setup);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h
+--- a/arch/um/drivers/cow_sys.h Wed Dec 31 19:00:00 1969
++++ b/arch/um/drivers/cow_sys.h Fri Aug 15 15:12:37 2003
+@@ -0,0 +1,48 @@
++#ifndef __COW_SYS_H__
++#define __COW_SYS_H__
++
++#include "kern_util.h"
++#include "user_util.h"
++#include "os.h"
++#include "user.h"
++
++static inline void *cow_malloc(int size)
++{
++ return(um_kmalloc(size));
++}
++
++static inline void cow_free(void *ptr)
++{
++ kfree(ptr);
++}
++
++#define cow_printf printk
++
++static inline char *cow_strdup(char *str)
++{
++ return(uml_strdup(str));
++}
++
++static inline int cow_seek_file(int fd, __u64 offset)
++{
++ return(os_seek_file(fd, offset));
++}
++
++static inline int cow_file_size(char *file, __u64 *size_out)
++{
++ return(os_file_size(file, size_out));
++}
++
++static inline int cow_write_file(int fd, char *buf, int size)
++{
++ return(os_write_file(fd, buf, size));
++}
++
++#endif
++
++/*
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
+--- a/arch/um/drivers/cow_user.c Wed Dec 31 19:00:00 1969
++++ b/arch/um/drivers/cow_user.c Fri Aug 15 15:12:34 2003
+@@ -0,0 +1,296 @@
++#include <stddef.h>
++#include <string.h>
++#include <errno.h>
++#include <unistd.h>
++#include <byteswap.h>
++#include <sys/stat.h>
++#include <sys/time.h>
++#include <sys/param.h>
++#include <netinet/in.h>
++
++#include "cow.h"
++#include "cow_sys.h"
++
++#define PATH_LEN_V1 256
++
++struct cow_header_v1 {
++ int magic;
++ int version;
++ char backing_file[PATH_LEN_V1];
++ time_t mtime;
++ __u64 size;
++ int sectorsize;
++};
++
++#define PATH_LEN_V2 MAXPATHLEN
++
++struct cow_header_v2 {
++ unsigned long magic;
++ unsigned long version;
++ char backing_file[PATH_LEN_V2];
++ time_t mtime;
++ __u64 size;
++ int sectorsize;
++};
++
++union cow_header {
++ struct cow_header_v1 v1;
++ struct cow_header_v2 v2;
++};
++
++#define COW_MAGIC 0x4f4f4f4d /* MOOO */
++#define COW_VERSION 2
++
++void cow_sizes(__u64 size, int sectorsize, int bitmap_offset,
++ unsigned long *bitmap_len_out, int *data_offset_out)
++{
++ *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
++
++ *data_offset_out = bitmap_offset + *bitmap_len_out;
++ *data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize;
++ *data_offset_out *= sectorsize;
++}
++
++static int absolutize(char *to, int size, char *from)
++{
++ char save_cwd[256], *slash;
++ int remaining;
++
++ if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
++ cow_printf("absolutize : unable to get cwd - errno = %d\n",
++ errno);
++ return(-1);
++ }
++ slash = strrchr(from, '/');
++ if(slash != NULL){
++ *slash = '\0';
++ if(chdir(from)){
++ *slash = '/';
++ cow_printf("absolutize : Can't cd to '%s' - "
++ "errno = %d\n", from, errno);
++ return(-1);
++ }
++ *slash = '/';
++ if(getcwd(to, size) == NULL){
++ cow_printf("absolutize : unable to get cwd of '%s' - "
++ "errno = %d\n", from, errno);
++ return(-1);
++ }
++ remaining = size - strlen(to);
++ if(strlen(slash) + 1 > remaining){
++ cow_printf("absolutize : unable to fit '%s' into %d "
++ "chars\n", from, size);
++ return(-1);
++ }
++ strcat(to, slash);
++ }
++ else {
++ if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){
++ cow_printf("absolutize : unable to fit '%s' into %d "
++ "chars\n", from, size);
++ return(-1);
++ }
++ strcpy(to, save_cwd);
++ strcat(to, "/");
++ strcat(to, from);
++ }
++ chdir(save_cwd);
++ return(0);
++}
++
++int write_cow_header(char *cow_file, int fd, char *backing_file,
++ int sectorsize, long long *size)
++{
++ struct cow_header_v2 *header;
++ struct stat64 buf;
++ int err;
++
++ err = cow_seek_file(fd, 0);
++ if(err != 0){
++ cow_printf("write_cow_header - lseek failed, errno = %d\n",
++ errno);
++ return(-errno);
++ }
++
++ err = -ENOMEM;
++ header = cow_malloc(sizeof(*header));
++ if(header == NULL){
++ cow_printf("Failed to allocate COW V2 header\n");
++ goto out;
++ }
++ header->magic = htonl(COW_MAGIC);
++ header->version = htonl(COW_VERSION);
++
++ err = -EINVAL;
++ if(strlen(backing_file) > sizeof(header->backing_file) - 1){
++ cow_printf("Backing file name \"%s\" is too long - names are "
++ "limited to %d characters\n", backing_file,
++ sizeof(header->backing_file) - 1);
++ goto out_free;
++ }
++
++ if(absolutize(header->backing_file, sizeof(header->backing_file),
++ backing_file))
++ goto out_free;
++
++ err = stat64(header->backing_file, &buf);
++ if(err < 0){
++ cow_printf("Stat of backing file '%s' failed, errno = %d\n",
++ header->backing_file, errno);
++ err = -errno;
++ goto out_free;
++ }
++
++ err = cow_file_size(header->backing_file, size);
++ if(err){
++ cow_printf("Couldn't get size of backing file '%s', "
++ "errno = %d\n", header->backing_file, -*size);
++ goto out_free;
++ }
++
++ header->mtime = htonl(buf.st_mtime);
++ header->size = htonll(*size);
++ header->sectorsize = htonl(sectorsize);
++
++ err = write(fd, header, sizeof(*header));
++ if(err != sizeof(*header)){
++ cow_printf("Write of header to new COW file '%s' failed, "
++ "errno = %d\n", cow_file, errno);
++ goto out_free;
++ }
++ err = 0;
++ out_free:
++ cow_free(header);
++ out:
++ return(err);
++}
++
++int file_reader(__u64 offset, char *buf, int len, void *arg)
++{
++ int fd = *((int *) arg);
++
++ return(pread(fd, buf, len, offset));
++}
++
++int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg,
++ __u32 *magic_out, char **backing_file_out,
++ time_t *mtime_out, __u64 *size_out,
++ int *sectorsize_out, int *bitmap_offset_out)
++{
++ union cow_header *header;
++ char *file;
++ int err, n;
++ unsigned long version, magic;
++
++ header = cow_malloc(sizeof(*header));
++ if(header == NULL){
++ cow_printf("read_cow_header - Failed to allocate header\n");
++ return(-ENOMEM);
++ }
++ err = -EINVAL;
++ n = (*reader)(0, (char *) header, sizeof(*header), arg);
++ if(n < offsetof(typeof(header->v1), backing_file)){
++ cow_printf("read_cow_header - short header\n");
++ goto out;
++ }
++
++ magic = header->v1.magic;
++ if(magic == COW_MAGIC) {
++ version = header->v1.version;
++ }
++ else if(magic == ntohl(COW_MAGIC)){
++ version = ntohl(header->v1.version);
++ }
++ /* No error printed because the non-COW case comes through here */
++ else goto out;
++
++ *magic_out = COW_MAGIC;
++
++ if(version == 1){
++ if(n < sizeof(header->v1)){
++ cow_printf("read_cow_header - failed to read V1 "
++ "header\n");
++ goto out;
++ }
++ *mtime_out = header->v1.mtime;
++ *size_out = header->v1.size;
++ *sectorsize_out = header->v1.sectorsize;
++ *bitmap_offset_out = sizeof(header->v1);
++ file = header->v1.backing_file;
++ }
++ else if(version == 2){
++ if(n < sizeof(header->v2)){
++ cow_printf("read_cow_header - failed to read V2 "
++ "header\n");
++ goto out;
++ }
++ *mtime_out = ntohl(header->v2.mtime);
++ *size_out = ntohll(header->v2.size);
++ *sectorsize_out = ntohl(header->v2.sectorsize);
++ *bitmap_offset_out = sizeof(header->v2);
++ file = header->v2.backing_file;
++ }
++ else {
++ cow_printf("read_cow_header - invalid COW version\n");
++ goto out;
++ }
++ err = -ENOMEM;
++ *backing_file_out = cow_strdup(file);
++ if(*backing_file_out == NULL){
++ cow_printf("read_cow_header - failed to allocate backing "
++ "file\n");
++ goto out;
++ }
++ err = 0;
++ out:
++ cow_free(header);
++ return(err);
++}
++
++int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
++ int *bitmap_offset_out, unsigned long *bitmap_len_out,
++ int *data_offset_out)
++{
++ __u64 size, offset;
++ char zero = 0;
++ int err;
++
++ err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size);
++ if(err)
++ goto out;
++
++ cow_sizes(size, sectorsize, sizeof(struct cow_header_v2),
++ bitmap_len_out, data_offset_out);
++ *bitmap_offset_out = sizeof(struct cow_header_v2);
++
++ offset = *data_offset_out + size - sizeof(zero);
++ err = cow_seek_file(fd, offset);
++ if(err != 0){
++ cow_printf("cow bitmap lseek failed : errno = %d\n", errno);
++ goto out;
++ }
++
++ /* does not really matter how much we write it is just to set EOF
++ * this also sets the entire COW bitmap
++ * to zero without having to allocate it
++ */
++ err = cow_write_file(fd, &zero, sizeof(zero));
++ if(err != sizeof(zero)){
++ err = -EINVAL;
++ cow_printf("Write of bitmap to new COW file '%s' failed, "
++ "errno = %d\n", cow_file, errno);
++ goto out;
++ }
++
++ return(0);
++
++ out:
++ return(err);
++}
++
++/*
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
+--- a/arch/um/drivers/hostaudio_kern.c Fri Aug 15 15:09:05 2003
++++ b/arch/um/drivers/hostaudio_kern.c Fri Aug 15 15:13:48 2003
+@@ -11,6 +11,7 @@
+ #include "linux/fs.h"
+ #include "linux/sound.h"
+ #include "linux/soundcard.h"
++#include "asm/uaccess.h"
+ #include "kern_util.h"
+ #include "init.h"
+ #include "hostaudio.h"
+@@ -22,7 +23,7 @@
+ #ifndef MODULE
+ static int set_dsp(char *name, int *add)
+ {
+- dsp = uml_strdup(name);
++ dsp = name;
+ return(0);
+ }
+
+@@ -34,7 +35,7 @@
+
+ static int set_mixer(char *name, int *add)
+ {
+- mixer = uml_strdup(name);
++ mixer = name;
+ return(0);
+ }
+
+@@ -51,23 +52,55 @@
+ loff_t *ppos)
+ {
+ struct hostaudio_state *state = file->private_data;
++ void *kbuf;
++ int err;
+
+ #ifdef DEBUG
+ printk("hostaudio: read called, count = %d\n", count);
+ #endif
+
+- return(hostaudio_read_user(state, buffer, count, ppos));
++ kbuf = kmalloc(count, GFP_KERNEL);
++ if(kbuf == NULL)
++ return(-ENOMEM);
++
++ err = hostaudio_read_user(state, kbuf, count, ppos);
++ if(err < 0)
++ goto out;
++
++ if(copy_to_user(buffer, kbuf, err))
++ err = -EFAULT;
++
++ out:
++ kfree(kbuf);
++ return(err);
+ }
+
+ static ssize_t hostaudio_write(struct file *file, const char *buffer,
+ size_t count, loff_t *ppos)
+ {
+ struct hostaudio_state *state = file->private_data;
++ void *kbuf;
++ int err;
+
+ #ifdef DEBUG
+ printk("hostaudio: write called, count = %d\n", count);
+ #endif
+- return(hostaudio_write_user(state, buffer, count, ppos));
++
++ kbuf = kmalloc(count, GFP_KERNEL);
++ if(kbuf == NULL)
++ return(-ENOMEM);
++
++ err = -EFAULT;
++ if(copy_from_user(kbuf, buffer, count))
++ goto out;
++
++ err = hostaudio_write_user(state, kbuf, count, ppos);
++ if(err < 0)
++ goto out;
++
++ out:
++ kfree(kbuf);
++ return(err);
+ }
+
+ static unsigned int hostaudio_poll(struct file *file,
+@@ -86,12 +119,43 @@
+ unsigned int cmd, unsigned long arg)
+ {
+ struct hostaudio_state *state = file->private_data;
++ unsigned long data = 0;
++ int err;
+
+ #ifdef DEBUG
+ printk("hostaudio: ioctl called, cmd = %u\n", cmd);
+ #endif
++ switch(cmd){
++ case SNDCTL_DSP_SPEED:
++ case SNDCTL_DSP_STEREO:
++ case SNDCTL_DSP_GETBLKSIZE:
++ case SNDCTL_DSP_CHANNELS:
++ case SNDCTL_DSP_SUBDIVIDE:
++ case SNDCTL_DSP_SETFRAGMENT:
++ if(get_user(data, (int *) arg))
++ return(-EFAULT);
++ break;
++ default:
++ break;
++ }
++
++ err = hostaudio_ioctl_user(state, cmd, (unsigned long) &data);
++
++ switch(cmd){
++ case SNDCTL_DSP_SPEED:
++ case SNDCTL_DSP_STEREO:
++ case SNDCTL_DSP_GETBLKSIZE:
++ case SNDCTL_DSP_CHANNELS:
++ case SNDCTL_DSP_SUBDIVIDE:
++ case SNDCTL_DSP_SETFRAGMENT:
++ if(put_user(data, (int *) arg))
++ return(-EFAULT);
++ break;
++ default:
++ break;
++ }
+
+- return(hostaudio_ioctl_user(state, cmd, arg));
++ return(err);
+ }
+
+ static int hostaudio_open(struct inode *inode, struct file *file)
+@@ -225,7 +289,8 @@
+
+ static int __init hostaudio_init_module(void)
+ {
+- printk(KERN_INFO "UML Audio Relay\n");
++ printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n",
++ dsp, mixer);
+
+ module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1);
+ if(module_data.dev_audio < 0){
+diff -Naur a/arch/um/drivers/line.c b/arch/um/drivers/line.c
+--- a/arch/um/drivers/line.c Fri Aug 15 15:08:24 2003
++++ b/arch/um/drivers/line.c Fri Aug 15 15:13:28 2003
+@@ -6,8 +6,8 @@
+ #include "linux/sched.h"
+ #include "linux/slab.h"
+ #include "linux/list.h"
++#include "linux/interrupt.h"
+ #include "linux/devfs_fs_kernel.h"
+-#include "asm/irq.h"
+ #include "asm/uaccess.h"
+ #include "chan_kern.h"
+ #include "irq_user.h"
+@@ -16,16 +16,18 @@
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "os.h"
++#include "irq_kern.h"
+
+ #define LINE_BUFSIZE 4096
+
+-void line_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ struct line *dev = data;
+
+ if(dev->count > 0)
+ chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq,
+ dev);
++ return IRQ_HANDLED;
+ }
+
+ void line_timer_cb(void *arg)
+@@ -136,20 +138,22 @@
+ return(len);
+ }
+
+-void line_write_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t line_write_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ struct line *dev = data;
+ struct tty_struct *tty = dev->tty;
+ int err;
+
+ err = flush_buffer(dev);
+- if(err == 0) return;
++ if(err == 0)
++ return(IRQ_NONE);
+ else if(err < 0){
+ dev->head = dev->buffer;
+ dev->tail = dev->buffer;
+ }
+
+- if(tty == NULL) return;
++ if(tty == NULL)
++ return(IRQ_NONE);
+
+ if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) &&
+ (tty->ldisc.write_wakeup != NULL))
+@@ -161,9 +165,9 @@
+ * writes.
+ */
+
+- if (waitqueue_active(&tty->write_wait))
++ if(waitqueue_active(&tty->write_wait))
+ wake_up_interruptible(&tty->write_wait);
+-
++ return(IRQ_HANDLED);
+ }
+
+ int line_write_room(struct tty_struct *tty)
+@@ -369,7 +373,7 @@
+
+ dev = simple_strtoul(name, &end, 0);
+ if((*end != '\0') || (end == name)){
+- *error_out = "line_setup failed to parse device number";
++ *error_out = "line_get_config failed to parse device number";
+ return(0);
+ }
+
+@@ -379,15 +383,15 @@
+ }
+
+ line = &lines[dev];
++
+ down(&line->sem);
+-
+ if(!line->valid)
+ CONFIG_CHUNK(str, size, n, "none", 1);
+ else if(line->count == 0)
+ CONFIG_CHUNK(str, size, n, line->init_str, 1);
+ else n = chan_config_string(&line->chan_list, str, size, error_out);
+-
+ up(&line->sem);
++
+ return(n);
+ }
+
+@@ -412,7 +416,8 @@
+ return NULL;
+
+ driver->driver_name = line_driver->name;
+- driver->name = line_driver->devfs_name;
++ driver->name = line_driver->device_name;
++ driver->devfs_name = line_driver->devfs_name;
+ driver->major = line_driver->major;
+ driver->minor_start = line_driver->minor_start;
+ driver->type = line_driver->type;
+@@ -432,7 +437,7 @@
+
+ for(i = 0; i < nlines; i++){
+ if(!lines[i].valid)
+- tty_unregister_devfs(driver, i);
++ tty_unregister_device(driver, i);
+ }
+
+ mconsole_register_dev(&line_driver->mc);
+@@ -465,24 +470,25 @@
+ struct line *line;
+ };
+
+-void winch_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ struct winch *winch = data;
+ struct tty_struct *tty;
+ int err;
+ char c;
+
+- err = generic_read(winch->fd, &c, NULL);
+- if(err < 0){
+- if(err != -EAGAIN){
+- printk("winch_interrupt : read failed, errno = %d\n",
+- -err);
+- printk("fd %d is losing SIGWINCH support\n",
+- winch->tty_fd);
+- free_irq(irq, data);
+- return;
++ if(winch->fd != -1){
++ err = generic_read(winch->fd, &c, NULL);
++ if(err < 0){
++ if(err != -EAGAIN){
++ printk("winch_interrupt : read failed, "
++ "errno = %d\n", -err);
++ printk("fd %d is losing SIGWINCH support\n",
++ winch->tty_fd);
++ return(IRQ_HANDLED);
++ }
++ goto out;
+ }
+- goto out;
+ }
+ tty = winch->line->tty;
+ if(tty != NULL){
+@@ -492,7 +498,9 @@
+ kill_pg(tty->pgrp, SIGWINCH, 1);
+ }
+ out:
+- reactivate_fd(winch->fd, WINCH_IRQ);
++ if(winch->fd != -1)
++ reactivate_fd(winch->fd, WINCH_IRQ);
++ return(IRQ_HANDLED);
+ }
+
+ DECLARE_MUTEX(winch_handler_sem);
+@@ -529,7 +537,10 @@
+
+ list_for_each(ele, &winch_handlers){
+ winch = list_entry(ele, struct winch, list);
+- close(winch->fd);
++ if(winch->fd != -1){
++ deactivate_fd(winch->fd, WINCH_IRQ);
++ close(winch->fd);
++ }
+ if(winch->pid != -1)
+ os_kill_process(winch->pid, 1);
+ }
+diff -Naur a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
+--- a/arch/um/drivers/mconsole_kern.c Fri Aug 15 15:03:47 2003
++++ b/arch/um/drivers/mconsole_kern.c Fri Aug 15 15:10:11 2003
+@@ -27,6 +27,7 @@
+ #include "init.h"
+ #include "os.h"
+ #include "umid.h"
++#include "irq_kern.h"
+
+ static int do_unlink_socket(struct notifier_block *notifier,
+ unsigned long what, void *data)
+@@ -67,7 +68,7 @@
+
+ DECLARE_WORK(mconsole_work, mc_work_proc, NULL);
+
+-void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ int fd;
+ struct mconsole_entry *new;
+@@ -88,6 +89,7 @@
+ }
+ if(!list_empty(&mc_requests)) schedule_work(&mconsole_work);
+ reactivate_fd(fd, MCONSOLE_IRQ);
++ return(IRQ_HANDLED);
+ }
+
+ void mconsole_version(struct mc_request *req)
+@@ -100,20 +102,34 @@
+ mconsole_reply(req, version, 0, 0);
+ }
+
++void mconsole_log(struct mc_request *req)
++{
++ int len;
++ char *ptr = req->request.data;
++
++ ptr += strlen("log");
++ while(isspace(*ptr)) ptr++;
++
++ len = ptr - req->request.data;
++ printk("%.*s", len, ptr);
++ mconsole_reply(req, "", 0, 0);
++}
++
+ #define UML_MCONSOLE_HELPTEXT \
+-"Commands:
+- version - Get kernel version
+- help - Print this message
+- halt - Halt UML
+- reboot - Reboot UML
+- config <dev>=<config> - Add a new device to UML;
+- same syntax as command line
+- config <dev> - Query the configuration of a device
+- remove <dev> - Remove a device from UML
+- sysrq <letter> - Performs the SysRq action controlled by the letter
+- cad - invoke the Ctl-Alt-Del handler
+- stop - pause the UML; it will do nothing until it receives a 'go'
+- go - continue the UML after a 'stop'
++"Commands: \n\
++ version - Get kernel version \n\
++ help - Print this message \n\
++ halt - Halt UML \n\
++ reboot - Reboot UML \n\
++ config <dev>=<config> - Add a new device to UML; \n\
++ same syntax as command line \n\
++ config <dev> - Query the configuration of a device \n\
++ remove <dev> - Remove a device from UML \n\
++ sysrq <letter> - Performs the SysRq action controlled by the letter \n\
++ cad - invoke the Ctl-Alt-Del handler \n\
++ stop - pause the UML; it will do nothing until it receives a 'go' \n\
++ go - continue the UML after a 'stop' \n\
++ log <string> - make UML enter <string> into the kernel log\n\
+ "
+
+ void mconsole_help(struct mc_request *req)
+@@ -302,7 +318,7 @@
+ if(umid_file_name("mconsole", file, sizeof(file))) return(-1);
+ snprintf(mconsole_socket_name, sizeof(file), "%s", file);
+
+- sock = create_unix_socket(file, sizeof(file));
++ sock = create_unix_socket(file, sizeof(file), 1);
+ if (sock < 0){
+ printk("Failed to initialize management console\n");
+ return(1);
+diff -Naur a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
+--- a/arch/um/drivers/mconsole_user.c Fri Aug 15 15:04:47 2003
++++ b/arch/um/drivers/mconsole_user.c Fri Aug 15 15:10:35 2003
+@@ -28,6 +28,7 @@
+ { "cad", mconsole_cad, 1 },
+ { "stop", mconsole_stop, 0 },
+ { "go", mconsole_go, 1 },
++ { "log", mconsole_log, 1 },
+ };
+
+ /* Initialized in mconsole_init, which is an initcall */
+@@ -139,6 +140,7 @@
+ memcpy(reply.data, str, len);
+ reply.data[len] = '\0';
+ total -= len;
++ str += len;
+ reply.len = len + 1;
+
+ len = sizeof(reply) + reply.len - sizeof(reply.data);
+diff -Naur a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c
+--- a/arch/um/drivers/mmapper_kern.c Fri Aug 15 15:04:33 2003
++++ b/arch/um/drivers/mmapper_kern.c Fri Aug 15 15:10:32 2003
+@@ -120,7 +120,10 @@
+ printk(KERN_INFO "Mapper v0.1\n");
+
+ v_buf = (char *) find_iomem("mmapper", &mmapper_size);
+- if(mmapper_size == 0) return(0);
++ if(mmapper_size == 0){
++ printk(KERN_ERR "mmapper_init - find_iomem failed\n");
++ return(0);
++ }
+
+ p_buf = __pa(v_buf);
+
+diff -Naur a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
+--- a/arch/um/drivers/net_kern.c Fri Aug 15 15:05:49 2003
++++ b/arch/um/drivers/net_kern.c Fri Aug 15 15:11:52 2003
+@@ -26,6 +26,7 @@
+ #include "mconsole_kern.h"
+ #include "init.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+
+ static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED;
+ LIST_HEAD(opened);
+@@ -61,14 +62,14 @@
+ return pkt_len;
+ }
+
+-void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ struct net_device *dev = dev_id;
+ struct uml_net_private *lp = dev->priv;
+ int err;
+
+ if(!netif_running(dev))
+- return;
++ return(IRQ_NONE);
+
+ spin_lock(&lp->lock);
+ while((err = uml_net_rx(dev)) > 0) ;
+@@ -83,6 +84,7 @@
+
+ out:
+ spin_unlock(&lp->lock);
++ return(IRQ_HANDLED);
+ }
+
+ static int uml_net_open(struct net_device *dev)
+@@ -292,7 +294,7 @@
+ struct uml_net *device;
+ struct net_device *dev;
+ struct uml_net_private *lp;
+- int err, size;
++ int save, err, size;
+
+ size = transport->private_size + sizeof(struct uml_net_private) +
+ sizeof(((struct uml_net_private *) 0)->user);
+@@ -362,21 +364,29 @@
+ return 1;
+ lp = dev->priv;
+
+- INIT_LIST_HEAD(&lp->list);
+- spin_lock_init(&lp->lock);
+- lp->dev = dev;
+- lp->fd = -1;
+- lp->mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0 };
+- lp->have_mac = device->have_mac;
+- lp->protocol = transport->kern->protocol;
+- lp->open = transport->user->open;
+- lp->close = transport->user->close;
+- lp->remove = transport->user->remove;
+- lp->read = transport->kern->read;
+- lp->write = transport->kern->write;
+- lp->add_address = transport->user->add_address;
+- lp->delete_address = transport->user->delete_address;
+- lp->set_mtu = transport->user->set_mtu;
++ /* lp.user is the first four bytes of the transport data, which
++ * has already been initialized. This structure assignment will
++ * overwrite that, so we make sure that .user gets overwritten with
++ * what it already has.
++ */
++ save = lp->user[0];
++ *lp = ((struct uml_net_private)
++ { .list = LIST_HEAD_INIT(lp->list),
++ .lock = SPIN_LOCK_UNLOCKED,
++ .dev = dev,
++ .fd = -1,
++ .mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0},
++ .have_mac = device->have_mac,
++ .protocol = transport->kern->protocol,
++ .open = transport->user->open,
++ .close = transport->user->close,
++ .remove = transport->user->remove,
++ .read = transport->kern->read,
++ .write = transport->kern->write,
++ .add_address = transport->user->add_address,
++ .delete_address = transport->user->delete_address,
++ .set_mtu = transport->user->set_mtu,
++ .user = { save } });
+
+ init_timer(&lp->tl);
+ lp->tl.function = uml_net_user_timer_expire;
+diff -Naur a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
+--- a/arch/um/drivers/port_kern.c Fri Aug 15 15:04:01 2003
++++ b/arch/um/drivers/port_kern.c Fri Aug 15 15:10:18 2003
+@@ -6,6 +6,7 @@
+ #include "linux/list.h"
+ #include "linux/sched.h"
+ #include "linux/slab.h"
++#include "linux/interrupt.h"
+ #include "linux/irq.h"
+ #include "linux/spinlock.h"
+ #include "linux/errno.h"
+@@ -14,6 +15,7 @@
+ #include "kern_util.h"
+ #include "kern.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ #include "port.h"
+ #include "init.h"
+ #include "os.h"
+@@ -44,7 +46,7 @@
+ struct port_list *port;
+ };
+
+-static void pipe_interrupt(int irq, void *data, struct pt_regs *regs)
++static irqreturn_t pipe_interrupt(int irq, void *data, struct pt_regs *regs)
+ {
+ struct connection *conn = data;
+ int fd;
+@@ -52,7 +54,7 @@
+ fd = os_rcv_fd(conn->socket[0], &conn->helper_pid);
+ if(fd < 0){
+ if(fd == -EAGAIN)
+- return;
++ return(IRQ_NONE);
+
+ printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n",
+ -fd);
+@@ -65,6 +67,7 @@
+ list_add(&conn->list, &conn->port->connections);
+
+ up(&conn->port->sem);
++ return(IRQ_HANDLED);
+ }
+
+ static int port_accept(struct port_list *port)
+@@ -138,12 +141,13 @@
+
+ DECLARE_WORK(port_work, port_work_proc, NULL);
+
+-static void port_interrupt(int irq, void *data, struct pt_regs *regs)
++static irqreturn_t port_interrupt(int irq, void *data, struct pt_regs *regs)
+ {
+ struct port_list *port = data;
+
+ port->has_connection = 1;
+ schedule_work(&port_work);
++ return(IRQ_HANDLED);
+ }
+
+ void *port_data(int port_num)
+diff -Naur a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
+--- a/arch/um/drivers/ssl.c Fri Aug 15 15:06:09 2003
++++ b/arch/um/drivers/ssl.c Fri Aug 15 15:12:30 2003
+@@ -53,8 +53,9 @@
+
+ static struct line_driver driver = {
+ .name = "UML serial line",
+- .devfs_name = "tts/%d",
+- .major = TTYAUX_MAJOR,
++ .device_name = "ttS",
++ .devfs_name = "tts/",
++ .major = TTY_MAJOR,
+ .minor_start = 64,
+ .type = TTY_DRIVER_TYPE_SERIAL,
+ .subtype = 0,
+diff -Naur a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
+--- a/arch/um/drivers/stdio_console.c Fri Aug 15 15:04:51 2003
++++ b/arch/um/drivers/stdio_console.c Fri Aug 15 15:10:56 2003
+@@ -83,7 +83,8 @@
+
+ static struct line_driver driver = {
+ .name = "UML console",
+- .devfs_name = "vc/%d",
++ .device_name = "tty",
++ .devfs_name = "vc/",
+ .major = TTY_MAJOR,
+ .minor_start = 0,
+ .type = TTY_DRIVER_TYPE_CONSOLE,
+@@ -159,6 +160,15 @@
+
+ static int con_init_done = 0;
+
++static struct tty_operations console_ops = {
++ .open = con_open,
++ .close = con_close,
++ .write = con_write,
++ .chars_in_buffer = chars_in_buffer,
++ .set_termios = set_termios,
++ .write_room = line_write_room,
++};
++
+ int stdio_init(void)
+ {
+ char *new_title;
+@@ -166,7 +176,8 @@
+ printk(KERN_INFO "Initializing stdio console driver\n");
+
+ console_driver = line_register_devfs(&console_lines, &driver,
+- &console_ops, vts, sizeof(vts)/sizeof(vts[0]));
++ &console_ops, vts,
++ sizeof(vts)/sizeof(vts[0]));
+
+ lines_init(vts, sizeof(vts)/sizeof(vts[0]));
+
+@@ -188,15 +199,6 @@
+ if(con_init_done) up(&vts[console->index].sem);
+ }
+
+-static struct tty_operations console_ops = {
+- .open = con_open,
+- .close = con_close,
+- .write = con_write,
+- .chars_in_buffer = chars_in_buffer,
+- .set_termios = set_termios,
+- .write_room = line_write_room,
+-};
+-
+ static struct tty_driver *console_device(struct console *c, int *index)
+ {
+ *index = c->index;
+@@ -212,12 +214,14 @@
+ console_device, console_setup,
+ CON_PRINTBUFFER);
+
+-static void __init stdio_console_init(void)
++static int __init stdio_console_init(void)
+ {
+ INIT_LIST_HEAD(&vts[0].chan_list);
+ list_add(&init_console_chan.list, &vts[0].chan_list);
+ register_console(&stdiocons);
++ return(0);
+ }
++
+ console_initcall(stdio_console_init);
+
+ static int console_chan_setup(char *str)
+diff -Naur a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
+--- a/arch/um/drivers/ubd_kern.c Fri Aug 15 15:05:56 2003
++++ b/arch/um/drivers/ubd_kern.c Fri Aug 15 15:11:53 2003
+@@ -8,6 +8,13 @@
+ * old style ubd by setting UBD_SHIFT to 0
+ * 2002-09-27...2002-10-18 massive tinkering for 2.5
+ * partitions have changed in 2.5
++ * 2003-01-29 more tinkering for 2.5.59-1
++ * This should now address the sysfs problems and has
++ * the symlink for devfs to allow for booting with
++ * the common /dev/ubd/discX/... names rather than
++ * only /dev/ubdN/discN this version also has lots of
++ * clean ups preparing for ubd-many.
++ * James McMechan
+ */
+
+ #define MAJOR_NR UBD_MAJOR
+@@ -40,6 +47,7 @@
+ #include "mconsole_kern.h"
+ #include "init.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ #include "ubd_user.h"
+ #include "2_5compat.h"
+ #include "os.h"
+@@ -70,7 +78,7 @@
+ static request_queue_t *ubd_queue;
+
+ /* Protected by ubd_lock */
+-static int fake_major = 0;
++static int fake_major = MAJOR_NR;
+
+ static struct gendisk *ubd_gendisk[MAX_DEV];
+ static struct gendisk *fake_gendisk[MAX_DEV];
+@@ -99,12 +107,12 @@
+
+ struct ubd {
+ char *file;
+- int is_dir;
+ int count;
+ int fd;
+ __u64 size;
+ struct openflags boot_openflags;
+ struct openflags openflags;
++ int no_cow;
+ struct cow cow;
+ };
+
+@@ -118,12 +126,12 @@
+
+ #define DEFAULT_UBD { \
+ .file = NULL, \
+- .is_dir = 0, \
+ .count = 0, \
+ .fd = -1, \
+ .size = -1, \
+ .boot_openflags = OPEN_FLAGS, \
+ .openflags = OPEN_FLAGS, \
++ .no_cow = 0, \
+ .cow = DEFAULT_COW, \
+ }
+
+@@ -131,8 +139,10 @@
+
+ static int ubd0_init(void)
+ {
+- if(ubd_dev[0].file == NULL)
+- ubd_dev[0].file = "root_fs";
++ struct ubd *dev = &ubd_dev[0];
++
++ if(dev->file == NULL)
++ dev->file = "root_fs";
+ return(0);
+ }
+
+@@ -199,19 +209,39 @@
+ " Create ide0 entries that map onto ubd devices.\n\n"
+ );
+
++static int parse_unit(char **ptr)
++{
++ char *str = *ptr, *end;
++ int n = -1;
++
++ if(isdigit(*str)) {
++ n = simple_strtoul(str, &end, 0);
++ if(end == str)
++ return(-1);
++ *ptr = end;
++ }
++ else if (('a' <= *str) && (*str <= 'h')) {
++ n = *str - 'a';
++ str++;
++ *ptr = str;
++ }
++ return(n);
++}
++
+ static int ubd_setup_common(char *str, int *index_out)
+ {
++ struct ubd *dev;
+ struct openflags flags = global_openflags;
+ char *backing_file;
+ int n, err;
+
+ if(index_out) *index_out = -1;
+- n = *str++;
++ n = *str;
+ if(n == '='){
+- static int fake_major_allowed = 1;
+ char *end;
+ int major;
+
++ str++;
+ if(!strcmp(str, "sync")){
+ global_openflags.s = 1;
+ return(0);
+@@ -223,20 +253,14 @@
+ return(1);
+ }
+
+- if(!fake_major_allowed){
+- printk(KERN_ERR "Can't assign a fake major twice\n");
+- return(1);
+- }
+-
+ err = 1;
+ spin_lock(&ubd_lock);
+- if(!fake_major_allowed){
++ if(fake_major != MAJOR_NR){
+ printk(KERN_ERR "Can't assign a fake major twice\n");
+ goto out1;
+ }
+
+ fake_major = major;
+- fake_major_allowed = 0;
+
+ printk(KERN_INFO "Setting extra ubd major number to %d\n",
+ major);
+@@ -246,25 +270,23 @@
+ return(err);
+ }
+
+- if(n < '0'){
+- printk(KERN_ERR "ubd_setup : index out of range\n"); }
+-
+- if((n >= '0') && (n <= '9')) n -= '0';
+- else if((n >= 'a') && (n <= 'z')) n -= 'a';
+- else {
+- printk(KERN_ERR "ubd_setup : device syntax invalid\n");
++ n = parse_unit(&str);
++ if(n < 0){
++ printk(KERN_ERR "ubd_setup : couldn't parse unit number "
++ "'%s'\n", str);
+ return(1);
+ }
+ if(n >= MAX_DEV){
+- printk(KERN_ERR "ubd_setup : index out of range "
+- "(%d devices)\n", MAX_DEV);
++ printk(KERN_ERR "ubd_setup : index %d out of range "
++ "(%d devices)\n", n, MAX_DEV);
+ return(1);
+ }
+
+ err = 1;
+ spin_lock(&ubd_lock);
+
+- if(ubd_dev[n].file != NULL){
++ dev = &ubd_dev[n];
++ if(dev->file != NULL){
+ printk(KERN_ERR "ubd_setup : device already configured\n");
+ goto out2;
+ }
+@@ -279,6 +301,11 @@
+ flags.s = 1;
+ str++;
+ }
++ if (*str == 'd'){
++ dev->no_cow = 1;
++ str++;
++ }
++
+ if(*str++ != '='){
+ printk(KERN_ERR "ubd_setup : Expected '='\n");
+ goto out2;
+@@ -287,14 +314,17 @@
+ err = 0;
+ backing_file = strchr(str, ',');
+ if(backing_file){
+- *backing_file = '\0';
+- backing_file++;
++ if(dev->no_cow)
++ printk(KERN_ERR "Can't specify both 'd' and a "
++ "cow file\n");
++ else {
++ *backing_file = '\0';
++ backing_file++;
++ }
+ }
+- ubd_dev[n].file = str;
+- if(ubd_is_dir(ubd_dev[n].file))
+- ubd_dev[n].is_dir = 1;
+- ubd_dev[n].cow.file = backing_file;
+- ubd_dev[n].boot_openflags = flags;
++ dev->file = str;
++ dev->cow.file = backing_file;
++ dev->boot_openflags = flags;
+ out2:
+ spin_unlock(&ubd_lock);
+ return(err);
+@@ -324,8 +354,7 @@
+ static int fakehd_set = 0;
+ static int fakehd(char *str)
+ {
+- printk(KERN_INFO
+- "fakehd : Changing ubd name to \"hd\".\n");
++ printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
+ fakehd_set = 1;
+ return 1;
+ }
+@@ -394,9 +423,10 @@
+ do_ubd_request(ubd_queue);
+ }
+
+-static void ubd_intr(int irq, void *dev, struct pt_regs *unused)
++static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
+ {
+ ubd_handler();
++ return(IRQ_HANDLED);
+ }
+
+ /* Only changed by ubd_init, which is an initcall. */
+@@ -432,16 +462,18 @@
+ static int ubd_open_dev(struct ubd *dev)
+ {
+ struct openflags flags;
+- int err, n, create_cow, *create_ptr;
++ char **back_ptr;
++ int err, create_cow, *create_ptr;
+
++ dev->openflags = dev->boot_openflags;
+ create_cow = 0;
+ create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
+- dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file,
++ back_ptr = dev->no_cow ? NULL : &dev->cow.file;
++ dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
+ &dev->cow.bitmap_offset, &dev->cow.bitmap_len,
+ &dev->cow.data_offset, create_ptr);
+
+ if((dev->fd == -ENOENT) && create_cow){
+- n = dev - ubd_dev;
+ dev->fd = create_cow_file(dev->file, dev->cow.file,
+ dev->openflags, 1 << 9,
+ &dev->cow.bitmap_offset,
+@@ -458,7 +490,10 @@
+ if(dev->cow.file != NULL){
+ err = -ENOMEM;
+ dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
+- if(dev->cow.bitmap == NULL) goto error;
++ if(dev->cow.bitmap == NULL){
++ printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
++ goto error;
++ }
+ flush_tlb_kernel_vm();
+
+ err = read_cow_bitmap(dev->fd, dev->cow.bitmap,
+@@ -484,17 +519,31 @@
+
+ {
+ struct gendisk *disk;
++ char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
++ int err;
+
+ disk = alloc_disk(1 << UBD_SHIFT);
+- if (!disk)
+- return -ENOMEM;
++ if(disk == NULL)
++ return(-ENOMEM);
+
+ disk->major = major;
+ disk->first_minor = unit << UBD_SHIFT;
+ disk->fops = &ubd_blops;
+ set_capacity(disk, size / 512);
+- sprintf(disk->disk_name, "ubd");
+- sprintf(disk->devfs_name, "ubd/disc%d", unit);
++ if(major == MAJOR_NR){
++ sprintf(disk->disk_name, "ubd%d", unit);
++ sprintf(disk->devfs_name, "ubd/disc%d", unit);
++ sprintf(from, "ubd/%d", unit);
++ sprintf(to, "disc%d/disc", unit);
++ err = devfs_mk_symlink(from, to);
++ if(err)
++ printk("ubd_new_disk failed to make link from %s to "
++ "%s, error = %d\n", from, to, err);
++ }
++ else {
++ sprintf(disk->disk_name, "ubd_fake%d", unit);
++ sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
++ }
+
+ disk->private_data = &ubd_dev[unit];
+ disk->queue = ubd_queue;
+@@ -509,10 +558,7 @@
+ struct ubd *dev = &ubd_dev[n];
+ int err;
+
+- if(dev->is_dir)
+- return(-EISDIR);
+-
+- if (!dev->file)
++ if(dev->file == NULL)
+ return(-ENODEV);
+
+ if (ubd_open_dev(dev))
+@@ -526,7 +572,7 @@
+ if(err)
+ return(err);
+
+- if(fake_major)
++ if(fake_major != MAJOR_NR)
+ ubd_new_disk(fake_major, dev->size, n,
+ &fake_gendisk[n]);
+
+@@ -564,42 +610,42 @@
+ return(err);
+ }
+
+-static int ubd_get_config(char *dev, char *str, int size, char **error_out)
++static int ubd_get_config(char *name, char *str, int size, char **error_out)
+ {
+- struct ubd *ubd;
++ struct ubd *dev;
+ char *end;
+- int major, n = 0;
++ int n, len = 0;
+
+- major = simple_strtoul(dev, &end, 0);
+- if((*end != '\0') || (end == dev)){
+- *error_out = "ubd_get_config : didn't parse major number";
++ n = simple_strtoul(name, &end, 0);
++ if((*end != '\0') || (end == name)){
++ *error_out = "ubd_get_config : didn't parse device number";
+ return(-1);
+ }
+
+- if((major >= MAX_DEV) || (major < 0)){
+- *error_out = "ubd_get_config : major number out of range";
++ if((n >= MAX_DEV) || (n < 0)){
++ *error_out = "ubd_get_config : device number out of range";
+ return(-1);
+ }
+
+- ubd = &ubd_dev[major];
++ dev = &ubd_dev[n];
+ spin_lock(&ubd_lock);
+
+- if(ubd->file == NULL){
+- CONFIG_CHUNK(str, size, n, "", 1);
++ if(dev->file == NULL){
++ CONFIG_CHUNK(str, size, len, "", 1);
+ goto out;
+ }
+
+- CONFIG_CHUNK(str, size, n, ubd->file, 0);
++ CONFIG_CHUNK(str, size, len, dev->file, 0);
+
+- if(ubd->cow.file != NULL){
+- CONFIG_CHUNK(str, size, n, ",", 0);
+- CONFIG_CHUNK(str, size, n, ubd->cow.file, 1);
++ if(dev->cow.file != NULL){
++ CONFIG_CHUNK(str, size, len, ",", 0);
++ CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
+ }
+- else CONFIG_CHUNK(str, size, n, "", 1);
++ else CONFIG_CHUNK(str, size, len, "", 1);
+
+ out:
+ spin_unlock(&ubd_lock);
+- return(n);
++ return(len);
+ }
+
+ static int ubd_remove(char *str)
+@@ -607,11 +653,9 @@
+ struct ubd *dev;
+ int n, err = -ENODEV;
+
+- if(!isdigit(*str))
+- return(err); /* it should be a number 0-7/a-h */
++ n = parse_unit(&str);
+
+- n = *str - '0';
+- if(n >= MAX_DEV)
++ if((n < 0) || (n >= MAX_DEV))
+ return(err);
+
+ dev = &ubd_dev[n];
+@@ -672,7 +716,7 @@
+
+ elevator_init(ubd_queue, &elevator_noop);
+
+- if (fake_major != 0) {
++ if (fake_major != MAJOR_NR) {
+ char name[sizeof("ubd_nnn\0")];
+
+ snprintf(name, sizeof(name), "ubd_%d", fake_major);
+@@ -717,15 +761,9 @@
+ {
+ struct gendisk *disk = inode->i_bdev->bd_disk;
+ struct ubd *dev = disk->private_data;
+- int err = -EISDIR;
+-
+- if(dev->is_dir == 1)
+- goto out;
++ int err = 0;
+
+- err = 0;
+ if(dev->count == 0){
+- dev->openflags = dev->boot_openflags;
+-
+ err = ubd_open_dev(dev);
+ if(err){
+ printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
+@@ -799,15 +837,6 @@
+
+ if(req->rq_status == RQ_INACTIVE) return(1);
+
+- if(dev->is_dir){
+- strcpy(req->buffer, "HOSTFS:");
+- strcat(req->buffer, dev->file);
+- spin_lock(&ubd_io_lock);
+- end_request(req, 1);
+- spin_unlock(&ubd_io_lock);
+- return(1);
+- }
+-
+ if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
+ printk("Write attempted on readonly ubd device %s\n",
+ disk->disk_name);
+diff -Naur a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
+--- a/arch/um/drivers/ubd_user.c Fri Aug 15 15:04:51 2003
++++ b/arch/um/drivers/ubd_user.c Fri Aug 15 15:10:54 2003
+@@ -24,142 +24,24 @@
+ #include "user.h"
+ #include "ubd_user.h"
+ #include "os.h"
++#include "cow.h"
+
+ #include <endian.h>
+ #include <byteswap.h>
+-#if __BYTE_ORDER == __BIG_ENDIAN
+-# define ntohll(x) (x)
+-# define htonll(x) (x)
+-#elif __BYTE_ORDER == __LITTLE_ENDIAN
+-# define ntohll(x) bswap_64(x)
+-# define htonll(x) bswap_64(x)
+-#else
+-#error "__BYTE_ORDER not defined"
+-#endif
+-
+-#define PATH_LEN_V1 256
+-
+-struct cow_header_v1 {
+- int magic;
+- int version;
+- char backing_file[PATH_LEN_V1];
+- time_t mtime;
+- __u64 size;
+- int sectorsize;
+-};
+-
+-#define PATH_LEN_V2 MAXPATHLEN
+-
+-struct cow_header_v2 {
+- unsigned long magic;
+- unsigned long version;
+- char backing_file[PATH_LEN_V2];
+- time_t mtime;
+- __u64 size;
+- int sectorsize;
+-};
+-
+-union cow_header {
+- struct cow_header_v1 v1;
+- struct cow_header_v2 v2;
+-};
+-
+-#define COW_MAGIC 0x4f4f4f4d /* MOOO */
+-#define COW_VERSION 2
+-
+-static void sizes(__u64 size, int sectorsize, int bitmap_offset,
+- unsigned long *bitmap_len_out, int *data_offset_out)
+-{
+- *bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
+-
+- *data_offset_out = bitmap_offset + *bitmap_len_out;
+- *data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize;
+- *data_offset_out *= sectorsize;
+-}
+-
+-static int read_cow_header(int fd, int *magic_out, char **backing_file_out,
+- time_t *mtime_out, __u64 *size_out,
+- int *sectorsize_out, int *bitmap_offset_out)
+-{
+- union cow_header *header;
+- char *file;
+- int err, n;
+- unsigned long version, magic;
+-
+- header = um_kmalloc(sizeof(*header));
+- if(header == NULL){
+- printk("read_cow_header - Failed to allocate header\n");
+- return(-ENOMEM);
+- }
+- err = -EINVAL;
+- n = read(fd, header, sizeof(*header));
+- if(n < offsetof(typeof(header->v1), backing_file)){
+- printk("read_cow_header - short header\n");
+- goto out;
+- }
+-
+- magic = header->v1.magic;
+- if(magic == COW_MAGIC) {
+- version = header->v1.version;
+- }
+- else if(magic == ntohl(COW_MAGIC)){
+- version = ntohl(header->v1.version);
+- }
+- else goto out;
+-
+- *magic_out = COW_MAGIC;
+-
+- if(version == 1){
+- if(n < sizeof(header->v1)){
+- printk("read_cow_header - failed to read V1 header\n");
+- goto out;
+- }
+- *mtime_out = header->v1.mtime;
+- *size_out = header->v1.size;
+- *sectorsize_out = header->v1.sectorsize;
+- *bitmap_offset_out = sizeof(header->v1);
+- file = header->v1.backing_file;
+- }
+- else if(version == 2){
+- if(n < sizeof(header->v2)){
+- printk("read_cow_header - failed to read V2 header\n");
+- goto out;
+- }
+- *mtime_out = ntohl(header->v2.mtime);
+- *size_out = ntohll(header->v2.size);
+- *sectorsize_out = ntohl(header->v2.sectorsize);
+- *bitmap_offset_out = sizeof(header->v2);
+- file = header->v2.backing_file;
+- }
+- else {
+- printk("read_cow_header - invalid COW version\n");
+- goto out;
+- }
+- err = -ENOMEM;
+- *backing_file_out = uml_strdup(file);
+- if(*backing_file_out == NULL){
+- printk("read_cow_header - failed to allocate backing file\n");
+- goto out;
+- }
+- err = 0;
+- out:
+- kfree(header);
+- return(err);
+-}
+
+ static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
+ {
+- struct stat buf1, buf2;
++ struct stat64 buf1, buf2;
+
+ if(from_cmdline == NULL) return(1);
+ if(!strcmp(from_cmdline, from_cow)) return(1);
+
+- if(stat(from_cmdline, &buf1) < 0){
++ if(stat64(from_cmdline, &buf1) < 0){
+ printk("Couldn't stat '%s', errno = %d\n", from_cmdline,
+ errno);
+ return(1);
+ }
+- if(stat(from_cow, &buf2) < 0){
++ if(stat64(from_cow, &buf2) < 0){
+ printk("Couldn't stat '%s', errno = %d\n", from_cow, errno);
+ return(1);
+ }
+@@ -178,6 +60,7 @@
+ long long actual;
+ int err;
+
++ printk("%ld", htonll(size));
+ if(stat64(file, &buf) < 0){
+ printk("Failed to stat backing file \"%s\", errno = %d\n",
+ file, errno);
+@@ -215,118 +98,6 @@
+ return(0);
+ }
+
+-static int absolutize(char *to, int size, char *from)
+-{
+- char save_cwd[256], *slash;
+- int remaining;
+-
+- if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
+- printk("absolutize : unable to get cwd - errno = %d\n", errno);
+- return(-1);
+- }
+- slash = strrchr(from, '/');
+- if(slash != NULL){
+- *slash = '\0';
+- if(chdir(from)){
+- *slash = '/';
+- printk("absolutize : Can't cd to '%s' - errno = %d\n",
+- from, errno);
+- return(-1);
+- }
+- *slash = '/';
+- if(getcwd(to, size) == NULL){
+- printk("absolutize : unable to get cwd of '%s' - "
+- "errno = %d\n", from, errno);
+- return(-1);
+- }
+- remaining = size - strlen(to);
+- if(strlen(slash) + 1 > remaining){
+- printk("absolutize : unable to fit '%s' into %d "
+- "chars\n", from, size);
+- return(-1);
+- }
+- strcat(to, slash);
+- }
+- else {
+- if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){
+- printk("absolutize : unable to fit '%s' into %d "
+- "chars\n", from, size);
+- return(-1);
+- }
+- strcpy(to, save_cwd);
+- strcat(to, "/");
+- strcat(to, from);
+- }
+- chdir(save_cwd);
+- return(0);
+-}
+-
+-static int write_cow_header(char *cow_file, int fd, char *backing_file,
+- int sectorsize, long long *size)
+-{
+- struct cow_header_v2 *header;
+- struct stat64 buf;
+- int err;
+-
+- err = os_seek_file(fd, 0);
+- if(err != 0){
+- printk("write_cow_header - lseek failed, errno = %d\n", errno);
+- return(-errno);
+- }
+-
+- err = -ENOMEM;
+- header = um_kmalloc(sizeof(*header));
+- if(header == NULL){
+- printk("Failed to allocate COW V2 header\n");
+- goto out;
+- }
+- header->magic = htonl(COW_MAGIC);
+- header->version = htonl(COW_VERSION);
+-
+- err = -EINVAL;
+- if(strlen(backing_file) > sizeof(header->backing_file) - 1){
+- printk("Backing file name \"%s\" is too long - names are "
+- "limited to %d characters\n", backing_file,
+- sizeof(header->backing_file) - 1);
+- goto out_free;
+- }
+-
+- if(absolutize(header->backing_file, sizeof(header->backing_file),
+- backing_file))
+- goto out_free;
+-
+- err = stat64(header->backing_file, &buf);
+- if(err < 0){
+- printk("Stat of backing file '%s' failed, errno = %d\n",
+- header->backing_file, errno);
+- err = -errno;
+- goto out_free;
+- }
+-
+- err = os_file_size(header->backing_file, size);
+- if(err){
+- printk("Couldn't get size of backing file '%s', errno = %d\n",
+- header->backing_file, -*size);
+- goto out_free;
+- }
+-
+- header->mtime = htonl(buf.st_mtime);
+- header->size = htonll(*size);
+- header->sectorsize = htonl(sectorsize);
+-
+- err = write(fd, header, sizeof(*header));
+- if(err != sizeof(*header)){
+- printk("Write of header to new COW file '%s' failed, "
+- "errno = %d\n", cow_file, errno);
+- goto out_free;
+- }
+- err = 0;
+- out_free:
+- kfree(header);
+- out:
+- return(err);
+-}
+-
+ int open_ubd_file(char *file, struct openflags *openflags,
+ char **backing_file_out, int *bitmap_offset_out,
+ unsigned long *bitmap_len_out, int *data_offset_out,
+@@ -346,10 +117,17 @@
+ if((fd = os_open_file(file, *openflags, mode)) < 0)
+ return(fd);
+ }
++
++ err = os_lock_file(fd, openflags->w);
++ if(err){
++ printk("Failed to lock '%s', errno = %d\n", file, -err);
++ goto error;
++ }
++
+ if(backing_file_out == NULL) return(fd);
+
+- err = read_cow_header(fd, &magic, &backing_file, &mtime, &size,
+- §orsize, bitmap_offset_out);
++ err = read_cow_header(file_reader, &fd, &magic, &backing_file, &mtime,
++ &size, §orsize, bitmap_offset_out);
+ if(err && (*backing_file_out != NULL)){
+ printk("Failed to read COW header from COW file \"%s\", "
+ "errno = %d\n", file, err);
+@@ -376,12 +154,12 @@
+ if(err) goto error;
+ }
+
+- sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out,
+- data_offset_out);
++ cow_sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out,
++ data_offset_out);
+
+ return(fd);
+ error:
+- close(fd);
++ os_close_file(fd);
+ return(err);
+ }
+
+@@ -389,10 +167,7 @@
+ int sectorsize, int *bitmap_offset_out,
+ unsigned long *bitmap_len_out, int *data_offset_out)
+ {
+- __u64 blocks;
+- long zero;
+- int err, fd, i;
+- long long size;
++ int err, fd;
+
+ flags.c = 1;
+ fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
+@@ -403,29 +178,12 @@
+ goto out;
+ }
+
+- err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size);
+- if(err) goto out_close;
+-
+- blocks = (size + sectorsize - 1) / sectorsize;
+- blocks = (blocks + sizeof(long) * 8 - 1) / (sizeof(long) * 8);
+- zero = 0;
+- for(i = 0; i < blocks; i++){
+- err = write(fd, &zero, sizeof(zero));
+- if(err != sizeof(zero)){
+- printk("Write of bitmap to new COW file '%s' failed, "
+- "errno = %d\n", cow_file, errno);
+- goto out_close;
+- }
+- }
+-
+- sizes(size, sectorsize, sizeof(struct cow_header_v2),
+- bitmap_len_out, data_offset_out);
+- *bitmap_offset_out = sizeof(struct cow_header_v2);
+-
+- return(fd);
+-
+- out_close:
+- close(fd);
++ err = init_cow_file(fd, cow_file, backing_file, sectorsize,
++ bitmap_offset_out, bitmap_len_out,
++ data_offset_out);
++ if(!err)
++ return(fd);
++ os_close_file(fd);
+ out:
+ return(err);
+ }
+@@ -448,14 +206,6 @@
+ else return(n);
+ }
+
+-int ubd_is_dir(char *file)
+-{
+- struct stat64 buf;
+-
+- if(stat64(file, &buf) < 0) return(0);
+- return(S_ISDIR(buf.st_mode));
+-}
+-
+ void do_io(struct io_thread_req *req)
+ {
+ char *buf;
+diff -Naur a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
+--- a/arch/um/drivers/xterm.c Fri Aug 15 15:04:00 2003
++++ b/arch/um/drivers/xterm.c Fri Aug 15 15:10:18 2003
+@@ -108,7 +108,7 @@
+ }
+ close(fd);
+
+- fd = create_unix_socket(file, sizeof(file));
++ fd = create_unix_socket(file, sizeof(file), 1);
+ if(fd < 0){
+ printk("xterm_open : create_unix_socket failed, errno = %d\n",
+ -fd);
+diff -Naur a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c
+--- a/arch/um/drivers/xterm_kern.c Fri Aug 15 15:07:37 2003
++++ b/arch/um/drivers/xterm_kern.c Fri Aug 15 15:13:03 2003
+@@ -5,9 +5,12 @@
+
+ #include "linux/errno.h"
+ #include "linux/slab.h"
++#include "linux/signal.h"
++#include "linux/interrupt.h"
+ #include "asm/semaphore.h"
+ #include "asm/irq.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ #include "kern_util.h"
+ #include "os.h"
+ #include "xterm.h"
+@@ -19,17 +22,18 @@
+ int new_fd;
+ };
+
+-static void xterm_interrupt(int irq, void *data, struct pt_regs *regs)
++static irqreturn_t xterm_interrupt(int irq, void *data, struct pt_regs *regs)
+ {
+ struct xterm_wait *xterm = data;
+ int fd;
+
+ fd = os_rcv_fd(xterm->fd, &xterm->pid);
+ if(fd == -EAGAIN)
+- return;
++ return(IRQ_NONE);
+
+ xterm->new_fd = fd;
+ up(&xterm->sem);
++ return(IRQ_HANDLED);
+ }
+
+ int xterm_fd(int socket, int *pid_out)
+diff -Naur a/arch/um/dyn.lds.S b/arch/um/dyn.lds.S
+--- a/arch/um/dyn.lds.S Fri Aug 15 15:06:20 2003
++++ b/arch/um/dyn.lds.S Fri Aug 15 15:12:31 2003
+@@ -15,7 +15,11 @@
+ . = ALIGN(4096); /* Init code and data */
+ _stext = .;
+ __init_begin = .;
+- .text.init : { *(.text.init) }
++ .init.text : {
++ _sinittext = .;
++ *(.init.text)
++ _einittext = .;
++ }
+
+ . = ALIGN(4096);
+
+@@ -67,7 +71,7 @@
+
+ #include "asm/common.lds.S"
+
+- .data.init : { *(.data.init) }
++ init.data : { *(.init.data) }
+
+ /* Ensure the __preinit_array_start label is properly aligned. We
+ could instead move the label definition inside the section, but
+diff -Naur a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h
+--- a/arch/um/include/irq_kern.h Wed Dec 31 19:00:00 1969
++++ b/arch/um/include/irq_kern.h Fri Aug 15 15:11:53 2003
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#ifndef __IRQ_KERN_H__
++#define __IRQ_KERN_H__
++
++#include "linux/interrupt.h"
++
++extern int um_request_irq(unsigned int irq, int fd, int type,
++ irqreturn_t (*handler)(int, void *,
++ struct pt_regs *),
++ unsigned long irqflags, const char * devname,
++ void *dev_id);
++
++#endif
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
+--- a/arch/um/include/kern_util.h Fri Aug 15 15:05:04 2003
++++ b/arch/um/include/kern_util.h Fri Aug 15 15:11:18 2003
+@@ -63,10 +63,9 @@
+ extern void *syscall_sp(void *t);
+ extern void syscall_trace(void);
+ extern int hz(void);
+-extern void idle_timer(void);
++extern void uml_idle_timer(void);
+ extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs);
+ extern int external_pid(void *t);
+-extern int pid_to_processor_id(int pid);
+ extern void boot_timer_handler(int sig);
+ extern void interrupt_end(void);
+ extern void initial_thread_cb(void (*proc)(void *), void *arg);
+@@ -90,9 +89,7 @@
+ extern char *uml_strdup(char *string);
+ extern void unprotect_kernel_mem(void);
+ extern void protect_kernel_mem(void);
+-extern void set_kmem_end(unsigned long);
+ extern void uml_cleanup(void);
+-extern int pid_to_processor_id(int pid);
+ extern void set_current(void *t);
+ extern void lock_signalled_task(void *t);
+ extern void IPI_handler(int cpu);
+@@ -101,7 +98,9 @@
+ extern int clear_user_proc(void *buf, int size);
+ extern int copy_to_user_proc(void *to, void *from, int size);
+ extern int copy_from_user_proc(void *to, void *from, int size);
++extern int strlen_user_proc(char *str);
+ extern void bus_handler(int sig, union uml_pt_regs *regs);
++extern void winch(int sig, union uml_pt_regs *regs);
+ extern long execute_syscall(void *r);
+ extern int smp_sigio_handler(void);
+ extern void *get_current(void);
+diff -Naur a/arch/um/include/line.h b/arch/um/include/line.h
+--- a/arch/um/include/line.h Fri Aug 15 15:07:40 2003
++++ b/arch/um/include/line.h Fri Aug 15 15:13:11 2003
+@@ -9,12 +9,14 @@
+ #include "linux/list.h"
+ #include "linux/workqueue.h"
+ #include "linux/tty.h"
++#include "linux/interrupt.h"
+ #include "asm/semaphore.h"
+ #include "chan_user.h"
+ #include "mconsole_kern.h"
+
+ struct line_driver {
+ char *name;
++ char *device_name;
+ char *devfs_name;
+ short major;
+ short minor_start;
+@@ -67,8 +69,9 @@
+
+ #define LINES_INIT(n) { num : n }
+
+-extern void line_interrupt(int irq, void *data, struct pt_regs *unused);
+-extern void line_write_interrupt(int irq, void *data, struct pt_regs *unused);
++extern irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused);
++extern irqreturn_t line_write_interrupt(int irq, void *data,
++ struct pt_regs *unused);
+ extern void line_close(struct line *lines, struct tty_struct *tty);
+ extern int line_open(struct line *lines, struct tty_struct *tty,
+ struct chan_opts *opts);
+diff -Naur a/arch/um/include/mconsole.h b/arch/um/include/mconsole.h
+--- a/arch/um/include/mconsole.h Fri Aug 15 15:05:26 2003
++++ b/arch/um/include/mconsole.h Fri Aug 15 15:11:43 2003
+@@ -77,6 +77,7 @@
+ extern void mconsole_cad(struct mc_request *req);
+ extern void mconsole_stop(struct mc_request *req);
+ extern void mconsole_go(struct mc_request *req);
++extern void mconsole_log(struct mc_request *req);
+
+ extern int mconsole_get_request(int fd, struct mc_request *req);
+ extern int mconsole_notify(char *sock_name, int type, const void *data,
+diff -Naur a/arch/um/include/mem.h b/arch/um/include/mem.h
+--- a/arch/um/include/mem.h Fri Aug 15 15:09:22 2003
++++ b/arch/um/include/mem.h Fri Aug 15 15:14:01 2003
+@@ -13,7 +13,6 @@
+ };
+
+ extern void set_usable_vm(unsigned long start, unsigned long end);
+-extern void set_kmem_end(unsigned long new);
+
+ #endif
+
+diff -Naur a/arch/um/include/mem_user.h b/arch/um/include/mem_user.h
+--- a/arch/um/include/mem_user.h Fri Aug 15 15:07:31 2003
++++ b/arch/um/include/mem_user.h Fri Aug 15 15:12:54 2003
+@@ -51,9 +51,6 @@
+
+ extern int init_mem_user(void);
+ extern int create_mem_file(unsigned long len);
+-extern void setup_range(int fd, char *driver, unsigned long start,
+- unsigned long pfn, unsigned long total, int need_vm,
+- struct mem_region *region, void *reserved);
+ extern void setup_memory(void *entry);
+ extern unsigned long find_iomem(char *driver, unsigned long *len_out);
+ extern int init_maps(struct mem_region *region);
+diff -Naur a/arch/um/include/os.h b/arch/um/include/os.h
+--- a/arch/um/include/os.h Fri Aug 15 15:04:50 2003
++++ b/arch/um/include/os.h Fri Aug 15 15:10:48 2003
+@@ -103,10 +103,11 @@
+ extern int os_shutdown_socket(int fd, int r, int w);
+ extern void os_close_file(int fd);
+ extern int os_rcv_fd(int fd, int *helper_pid_out);
+-extern int create_unix_socket(char *file, int len);
++extern int create_unix_socket(char *file, int len, int close_on_exec);
+ extern int os_connect_socket(char *name);
+ extern int os_file_type(char *file);
+ extern int os_file_mode(char *file, struct openflags *mode_out);
++extern int os_lock_file(int fd, int excl);
+
+ extern unsigned long os_process_pc(int pid);
+ extern int os_process_parent(int pid);
+@@ -120,6 +121,7 @@
+ extern int os_protect_memory(void *addr, unsigned long len,
+ int r, int w, int x);
+ extern int os_unmap_memory(void *addr, int len);
++extern void os_flush_stdout(void);
+
+ #endif
+
+diff -Naur a/arch/um/include/sysdep-i386/sigcontext.h b/arch/um/include/sysdep-i386/sigcontext.h
+--- a/arch/um/include/sysdep-i386/sigcontext.h Fri Aug 15 15:07:37 2003
++++ b/arch/um/include/sysdep-i386/sigcontext.h Fri Aug 15 15:13:03 2003
+@@ -28,8 +28,8 @@
+ */
+ #define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0)
+
+-/* These are General Protection and Page Fault */
+-#define SEGV_IS_FIXABLE(trap) ((trap == 13) || (trap == 14))
++/* This is Page Fault */
++#define SEGV_IS_FIXABLE(trap) (trap == 14)
+
+ #define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc)))
+
+diff -Naur a/arch/um/include/ubd_user.h b/arch/um/include/ubd_user.h
+--- a/arch/um/include/ubd_user.h Fri Aug 15 15:06:34 2003
++++ b/arch/um/include/ubd_user.h Fri Aug 15 15:12:37 2003
+@@ -39,7 +39,6 @@
+ extern int write_ubd_fs(int fd, char *buffer, int len);
+ extern int start_io_thread(unsigned long sp, int *fds_out);
+ extern void do_io(struct io_thread_req *req);
+-extern int ubd_is_dir(char *file);
+
+ static inline int ubd_test_bit(__u64 bit, unsigned char *data)
+ {
+diff -Naur a/arch/um/include/user.h b/arch/um/include/user.h
+--- a/arch/um/include/user.h Fri Aug 15 15:03:58 2003
++++ b/arch/um/include/user.h Fri Aug 15 15:10:14 2003
+@@ -14,7 +14,7 @@
+ extern void kfree(void *ptr);
+ extern int in_aton(char *str);
+ extern int open_gdb_chan(void);
+-
++extern int strlcpy(char *, const char *, int);
+ #endif
+
+ /*
+diff -Naur a/arch/um/include/user_util.h b/arch/um/include/user_util.h
+--- a/arch/um/include/user_util.h Fri Aug 15 15:04:33 2003
++++ b/arch/um/include/user_util.h Fri Aug 15 15:10:32 2003
+@@ -59,7 +59,6 @@
+ extern void *add_signal_handler(int sig, void (*handler)(int));
+ extern int start_fork_tramp(void *arg, unsigned long temp_stack,
+ int clone_flags, int (*tramp)(void *));
+-extern int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags);
+ extern int linux_main(int argc, char **argv);
+ extern void set_cmdline(char *cmd);
+ extern void input_cb(void (*proc)(void *), void *arg, int arg_len);
+@@ -90,7 +89,8 @@
+ extern int arch_fixup(unsigned long address, void *sc_ptr);
+ extern void forward_pending_sigio(int target);
+ extern int can_do_skas(void);
+-
++extern void arch_init_thread(void);
++
+ #endif
+
+ /*
+diff -Naur a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
+--- a/arch/um/kernel/Makefile Fri Aug 15 15:07:32 2003
++++ b/arch/um/kernel/Makefile Fri Aug 15 15:12:57 2003
+@@ -19,6 +19,8 @@
+ obj-$(CONFIG_MODE_TT) += tt/
+ obj-$(CONFIG_MODE_SKAS) += skas/
+
++clean-files := config.c
++
+ user-objs-$(CONFIG_TTY_LOG) += tty_log.o
+
+ USER_OBJS := $(filter %_user.o,$(obj-y)) $(user-objs-y) config.o helper.o \
+@@ -43,17 +45,13 @@
+ $(obj)/frame.o: $(src)/frame.c
+ $(CC) $(CFLAGS_$(notdir $@)) -c -o $@ $<
+
+-QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; while(<STDIN>) { $$_ =~ s/CONFIG/$$config/; print $$_ }'
++QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while(<STDIN>) { $$_ =~ s/CONFIG/$$config/; print $$_ }'
+
+ $(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config
+ $(PERL) -e $(QUOTE) < $(src)/config.c.in > $@
+
+ $(obj)/config.o : $(obj)/config.c
+
+-clean:
+- rm -f config.c
+- for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done
+-
+ modules:
+
+ fastdep:
+diff -Naur a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in
+--- a/arch/um/kernel/config.c.in Fri Aug 15 15:07:37 2003
++++ b/arch/um/kernel/config.c.in Fri Aug 15 15:13:03 2003
+@@ -7,9 +7,7 @@
+ #include <stdlib.h>
+ #include "init.h"
+
+-static __initdata char *config = "
+-CONFIG
+-";
++static __initdata char *config = "CONFIG";
+
+ static int __init print_config(char *line, int *add)
+ {
+diff -Naur a/arch/um/kernel/exec_kern.c b/arch/um/kernel/exec_kern.c
+--- a/arch/um/kernel/exec_kern.c Fri Aug 15 15:04:54 2003
++++ b/arch/um/kernel/exec_kern.c Fri Aug 15 15:11:03 2003
+@@ -32,10 +32,15 @@
+ CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp);
+ }
+
++extern void log_exec(char **argv, void *tty);
++
+ static int execve1(char *file, char **argv, char **env)
+ {
+ int error;
+
++#ifdef CONFIG_TTY_LOG
++ log_exec(argv, current->tty);
++#endif
+ error = do_execve(file, argv, env, ¤t->thread.regs);
+ if (error == 0){
+ current->ptrace &= ~PT_DTRACE;
+diff -Naur a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
+--- a/arch/um/kernel/init_task.c Fri Aug 15 15:09:24 2003
++++ b/arch/um/kernel/init_task.c Fri Aug 15 15:14:04 2003
+@@ -17,6 +17,7 @@
+ struct mm_struct init_mm = INIT_MM(init_mm);
+ static struct files_struct init_files = INIT_FILES;
+ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
++static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+
+ /*
+ * Initial task structure.
+@@ -38,26 +39,12 @@
+ __attribute__((__section__(".data.init_task"))) =
+ { INIT_THREAD_INFO(init_task) };
+
+-struct task_struct *alloc_task_struct(void)
+-{
+- return((struct task_struct *)
+- __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER));
+-}
+-
+ void unprotect_stack(unsigned long stack)
+ {
+ protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE,
+ 1, 1, 0, 1);
+ }
+
+-void free_task_struct(struct task_struct *task)
+-{
+- /* free_pages decrements the page counter and only actually frees
+- * the pages if they are now not accessed by anything.
+- */
+- free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER);
+-}
+-
+ /*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
+--- a/arch/um/kernel/irq.c Fri Aug 15 15:07:53 2003
++++ b/arch/um/kernel/irq.c Fri Aug 15 15:13:18 2003
+@@ -28,6 +28,7 @@
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+
+ static void register_irq_proc (unsigned int irq);
+
+@@ -82,65 +83,52 @@
+ end_none
+ };
+
+-/* Not changed */
+-volatile unsigned long irq_err_count;
+-
+ /*
+ * Generic, controller-independent functions:
+ */
+
+-int get_irq_list(char *buf)
++int show_interrupts(struct seq_file *p, void *v)
+ {
+ int i, j;
+- unsigned long flags;
+ struct irqaction * action;
+- char *p = buf;
++ unsigned long flags;
+
+- p += sprintf(p, " ");
+- for (j=0; j<num_online_cpus(); j++)
+- p += sprintf(p, "CPU%d ",j);
+- *p++ = '\n';
++ seq_printf(p, " ");
++ for (j=0; j<NR_CPUS; j++)
++ if (cpu_online(j))
++ seq_printf(p, "CPU%d ",j);
++ seq_putc(p, '\n');
+
+ for (i = 0 ; i < NR_IRQS ; i++) {
+ spin_lock_irqsave(&irq_desc[i].lock, flags);
+ action = irq_desc[i].action;
+ if (!action)
+- goto end;
+- p += sprintf(p, "%3d: ",i);
++ goto skip;
++ seq_printf(p, "%3d: ",i);
+ #ifndef CONFIG_SMP
+- p += sprintf(p, "%10u ", kstat_irqs(i));
++ seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+- for (j = 0; j < num_online_cpus(); j++)
+- p += sprintf(p, "%10u ",
+- kstat_cpu(cpu_logical_map(j)).irqs[i]);
++ for (j = 0; j < NR_CPUS; j++)
++ if (cpu_online(j))
++ seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ #endif
+- p += sprintf(p, " %14s", irq_desc[i].handler->typename);
+- p += sprintf(p, " %s", action->name);
++ seq_printf(p, " %14s", irq_desc[i].handler->typename);
++ seq_printf(p, " %s", action->name);
+
+ for (action=action->next; action; action = action->next)
+- p += sprintf(p, ", %s", action->name);
+- *p++ = '\n';
+- end:
++ seq_printf(p, ", %s", action->name);
++
++ seq_putc(p, '\n');
++skip:
+ spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ }
+- p += sprintf(p, "\n");
+-#ifdef notdef
+-#ifdef CONFIG_SMP
+- p += sprintf(p, "LOC: ");
+- for (j = 0; j < num_online_cpus(); j++)
+- p += sprintf(p, "%10u ",
+- apic_timer_irqs[cpu_logical_map(j)]);
+- p += sprintf(p, "\n");
+-#endif
+-#endif
+- p += sprintf(p, "ERR: %10lu\n", irq_err_count);
+- return p - buf;
+-}
+-
++ seq_printf(p, "NMI: ");
++ for (j = 0; j < NR_CPUS; j++)
++ if (cpu_online(j))
++ seq_printf(p, "%10u ", nmi_count(j));
++ seq_putc(p, '\n');
+
+-int show_interrupts(struct seq_file *p, void *v)
+-{
+- return(0);
++ return 0;
+ }
+
+ /*
+@@ -281,13 +269,12 @@
+ * 0 return value means that this irq is already being
+ * handled by some other CPU. (or is disabled)
+ */
+- int cpu = smp_processor_id();
+ irq_desc_t *desc = irq_desc + irq;
+ struct irqaction * action;
+ unsigned int status;
+
+ irq_enter();
+- kstat_cpu(cpu).irqs[irq]++;
++ kstat_this_cpu.irqs[irq]++;
+ spin_lock(&desc->lock);
+ desc->handler->ack(irq);
+ /*
+@@ -384,7 +371,7 @@
+ */
+
+ int request_irq(unsigned int irq,
+- void (*handler)(int, void *, struct pt_regs *),
++ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags,
+ const char * devname,
+ void *dev_id)
+@@ -430,15 +417,19 @@
+ }
+
+ int um_request_irq(unsigned int irq, int fd, int type,
+- void (*handler)(int, void *, struct pt_regs *),
++ irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ unsigned long irqflags, const char * devname,
+ void *dev_id)
+ {
+- int retval;
++ int err;
+
+- retval = request_irq(irq, handler, irqflags, devname, dev_id);
+- if(retval) return(retval);
+- return(activate_fd(irq, fd, type, dev_id));
++ err = request_irq(irq, handler, irqflags, devname, dev_id);
++ if(err)
++ return(err);
++
++ if(fd != -1)
++ err = activate_fd(irq, fd, type, dev_id);
++ return(err);
+ }
+
+ /* this was setup_x86_irq but it seems pretty generic */
+diff -Naur a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
+--- a/arch/um/kernel/mem.c Fri Aug 15 15:05:20 2003
++++ b/arch/um/kernel/mem.c Fri Aug 15 15:11:21 2003
+@@ -119,11 +119,6 @@
+ return(kmem_top);
+ }
+
+-void set_kmem_end(unsigned long new)
+-{
+- kmem_top = new;
+-}
+-
+ #ifdef CONFIG_HIGHMEM
+ /* Changed during early boot */
+ pte_t *kmap_pte;
+@@ -218,7 +213,7 @@
+ if(regions[i] == NULL) break;
+ }
+ if(i == NREGIONS){
+- printk("setup_range : no free regions\n");
++ printk("setup_one_range : no free regions\n");
+ i = -1;
+ goto out;
+ }
+@@ -227,7 +222,9 @@
+ fd = create_mem_file(len);
+
+ if(region == NULL){
+- region = alloc_bootmem_low_pages(sizeof(*region));
++ if(kmalloc_ok)
++ region = kmalloc(sizeof(*region), GFP_KERNEL);
++ else region = alloc_bootmem_low_pages(sizeof(*region));
+ if(region == NULL)
+ panic("Failed to allocating mem_region");
+ }
+@@ -528,9 +525,9 @@
+ return(NREGIONS);
+ }
+
+-void setup_range(int fd, char *driver, unsigned long start, unsigned long pfn,
+- unsigned long len, int need_vm, struct mem_region *region,
+- void *reserved)
++static void setup_range(int fd, char *driver, unsigned long start,
++ unsigned long pfn, unsigned long len, int need_vm,
++ struct mem_region *region, void *reserved)
+ {
+ int i, cur;
+
+diff -Naur a/arch/um/kernel/mem_user.c b/arch/um/kernel/mem_user.c
+--- a/arch/um/kernel/mem_user.c Fri Aug 15 15:06:25 2003
++++ b/arch/um/kernel/mem_user.c Fri Aug 15 15:12:36 2003
+@@ -111,6 +111,11 @@
+ offset = 0;
+ }
+
++ if(offset >= region->len){
++ printf("%d bytes of physical memory is insufficient\n",
++ region->len);
++ exit(1);
++ }
+ loc = mmap(start, region->len - offset, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_FIXED, region->fd, offset);
+ if(loc != start){
+@@ -122,26 +127,26 @@
+
+ static int __init parse_iomem(char *str, int *add)
+ {
+- struct stat buf;
++ struct stat64 buf;
+ char *file, *driver;
+ int fd;
+
+ driver = str;
+ file = strchr(str,',');
+ if(file == NULL){
+- printk("parse_iomem : failed to parse iomem\n");
++ printf("parse_iomem : failed to parse iomem\n");
+ return(1);
+ }
+ *file = '\0';
+ file++;
+ fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0);
+ if(fd < 0){
+- printk("parse_iomem - Couldn't open io file, errno = %d\n",
++ printf("parse_iomem - Couldn't open io file, errno = %d\n",
+ errno);
+ return(1);
+ }
+- if(fstat(fd, &buf) < 0) {
+- printk("parse_iomem - cannot fstat file, errno = %d\n", errno);
++ if(fstat64(fd, &buf) < 0) {
++ printf("parse_iomem - cannot fstat file, errno = %d\n", errno);
+ return(1);
+ }
+ add_iomem(driver, fd, buf.st_size);
+diff -Naur a/arch/um/kernel/process.c b/arch/um/kernel/process.c
+--- a/arch/um/kernel/process.c Fri Aug 15 15:08:15 2003
++++ b/arch/um/kernel/process.c Fri Aug 15 15:13:26 2003
+@@ -72,7 +72,6 @@
+ SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ set_handler(SIGUSR2, (__sighandler_t) sig_handler,
+ SA_NOMASK | flags, -1);
+- (void) CHOOSE_MODE(signal(SIGCHLD, SIG_IGN), (void *) 0);
+ signal(SIGHUP, SIG_IGN);
+
+ init_irq_signals(altstack);
+@@ -127,7 +126,8 @@
+ if(err < 0) panic("Waiting for outer trampoline failed - errno = %d",
+ errno);
+ if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL))
+- panic("outer trampoline didn't exit with SIGKILL");
++ panic("outer trampoline didn't exit with SIGKILL, "
++ "status = %d", status);
+
+ return(arg.pid);
+ }
+diff -Naur a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
+--- a/arch/um/kernel/process_kern.c Fri Aug 15 15:06:24 2003
++++ b/arch/um/kernel/process_kern.c Fri Aug 15 15:12:35 2003
+@@ -52,17 +52,12 @@
+
+ struct task_struct *get_task(int pid, int require)
+ {
+- struct task_struct *task, *ret;
++ struct task_struct *ret;
+
+- ret = NULL;
+ read_lock(&tasklist_lock);
+- for_each_process(task){
+- if(task->pid == pid){
+- ret = task;
+- break;
+- }
+- }
++ ret = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock);
++
+ if(require && (ret == NULL)) panic("get_task couldn't find a task\n");
+ return(ret);
+ }
+@@ -103,13 +98,14 @@
+
+ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+ {
+- struct task_struct *p;
++ int pid;
+
+ current->thread.request.u.thread.proc = fn;
+ current->thread.request.u.thread.arg = arg;
+- p = do_fork(CLONE_VM | flags, 0, NULL, 0, NULL, NULL);
+- if(IS_ERR(p)) panic("do_fork failed in kernel_thread");
+- return(p->pid);
++ pid = do_fork(CLONE_VM | flags, 0, NULL, 0, NULL, NULL);
++ if(pid < 0)
++ panic("do_fork failed in kernel_thread, errno = %d", pid);
++ return(pid);
+ }
+
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+@@ -157,6 +153,10 @@
+ return(current);
+ }
+
++void prepare_to_copy(struct task_struct *tsk)
++{
++}
++
+ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
+ unsigned long stack_top, struct task_struct * p,
+ struct pt_regs *regs)
+@@ -190,7 +190,7 @@
+
+ void default_idle(void)
+ {
+- idle_timer();
++ uml_idle_timer();
+
+ atomic_inc(&init_mm.mm_count);
+ current->mm = &init_mm;
+@@ -363,6 +363,11 @@
+ return(clear_user(buf, size));
+ }
+
++int strlen_user_proc(char *str)
++{
++ return(strlen_user(str));
++}
++
+ int smp_sigio_handler(void)
+ {
+ #ifdef CONFIG_SMP
+diff -Naur a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
+--- a/arch/um/kernel/ptrace.c Fri Aug 15 15:04:36 2003
++++ b/arch/um/kernel/ptrace.c Fri Aug 15 15:10:33 2003
+@@ -311,11 +311,8 @@
+
+ /* the 0x80 provides a way for the tracing parent to distinguish
+ between a syscall stop and SIGTRAP delivery */
+- current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+- ? 0x80 : 0);
+- current->state = TASK_STOPPED;
+- notify_parent(current, SIGCHLD);
+- schedule();
++ ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
++ ? 0x80 : 0));
+
+ /*
+ * this isn't the same as continuing with a signal, but it will do
+diff -Naur a/arch/um/kernel/sigio_kern.c b/arch/um/kernel/sigio_kern.c
+--- a/arch/um/kernel/sigio_kern.c Fri Aug 15 15:04:52 2003
++++ b/arch/um/kernel/sigio_kern.c Fri Aug 15 15:10:59 2003
+@@ -6,18 +6,21 @@
+ #include "linux/kernel.h"
+ #include "linux/list.h"
+ #include "linux/slab.h"
+-#include "asm/irq.h"
++#include "linux/signal.h"
++#include "linux/interrupt.h"
+ #include "init.h"
+ #include "sigio.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+
+ /* Protected by sigio_lock() called from write_sigio_workaround */
+ static int sigio_irq_fd = -1;
+
+-void sigio_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ read_sigio_fd(sigio_irq_fd);
+ reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
++ return(IRQ_HANDLED);
+ }
+
+ int write_sigio_irq(int fd)
+diff -Naur a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c
+--- a/arch/um/kernel/signal_kern.c Fri Aug 15 15:06:38 2003
++++ b/arch/um/kernel/signal_kern.c Fri Aug 15 15:12:40 2003
+@@ -36,7 +36,7 @@
+ if(sig == SIGSEGV){
+ struct k_sigaction *ka;
+
+- ka = ¤t->sig->action[SIGSEGV - 1];
++ ka = ¤t->sighand->action[SIGSEGV - 1];
+ ka->sa.sa_handler = SIG_DFL;
+ }
+ force_sig(SIGSEGV, current);
+@@ -142,7 +142,7 @@
+ return(0);
+
+ /* Whee! Actually deliver the signal. */
+- ka = ¤t->sig->action[sig -1 ];
++ ka = ¤t->sighand->action[sig -1 ];
+ err = handle_signal(regs, sig, ka, &info, oldset, error);
+ if(!err) return(1);
+
+@@ -201,7 +201,7 @@
+ }
+ }
+
+-int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize)
++int sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize)
+ {
+ sigset_t saveset, newset;
+
+@@ -227,6 +227,42 @@
+ }
+ }
+
++int sys_sigaction(int sig, const struct old_sigaction __user *act,
++ struct old_sigaction __user *oact)
++{
++ struct k_sigaction new_ka, old_ka;
++ int ret;
++
++ if (act) {
++ old_sigset_t mask;
++ if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
++ __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
++ __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
++ return -EFAULT;
++ __get_user(new_ka.sa.sa_flags, &act->sa_flags);
++ __get_user(mask, &act->sa_mask);
++ siginitset(&new_ka.sa.sa_mask, mask);
++ }
++
++ ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
++
++ if (!ret && oact) {
++ if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
++ __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
++ __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
++ return -EFAULT;
++ __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
++ __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
++ }
++
++ return ret;
++}
++
++int sys_sigaltstack(const stack_t *uss, stack_t *uoss)
++{
++ return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs)));
++}
++
+ static int copy_sc_from_user(struct pt_regs *to, void *from,
+ struct arch_frame_data *arch)
+ {
+@@ -239,8 +275,8 @@
+
+ int sys_sigreturn(struct pt_regs regs)
+ {
+- void *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs));
+- void *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs));
++ void __user *sc = sp_to_sc(PT_REGS_SP(¤t->thread.regs));
++ void __user *mask = sp_to_mask(PT_REGS_SP(¤t->thread.regs));
+ int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
+
+ spin_lock_irq(¤t->sighand->siglock);
+@@ -257,7 +293,8 @@
+
+ int sys_rt_sigreturn(struct pt_regs regs)
+ {
+- struct ucontext *uc = sp_to_uc(PT_REGS_SP(¤t->thread.regs));
++ unsigned long sp = PT_REGS_SP(¤t->thread.regs);
++ struct ucontext __user *uc = sp_to_uc(sp);
+ void *fp;
+ int sig_size = _NSIG_WORDS * sizeof(unsigned long);
+
+diff -Naur a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
+--- a/arch/um/kernel/skas/Makefile Fri Aug 15 15:05:00 2003
++++ b/arch/um/kernel/skas/Makefile Fri Aug 15 15:11:08 2003
+@@ -7,18 +7,22 @@
+ process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o \
+ sys-$(SUBARCH)/
+
++host-progs := util/mk_ptregs
++clean-files := include/skas_ptregs.h
++
+ USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o
+ USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+
+-include/skas_ptregs.h : util/mk_ptregs
+- util/mk_ptregs > $@
+-
+-util/mk_ptregs :
+- $(MAKE) -C util
++$(TOPDIR)/arch/um/include/skas_ptregs.h : $(src)/util/mk_ptregs
++ @echo -n ' Generating $@'
++ @$< > $@.tmp
++ @if [ -r $@ ] && cmp -s $@ $@.tmp; then \
++ echo ' (unchanged)'; \
++ rm -f $@.tmp; \
++ else \
++ echo ' (updated)'; \
++ mv -f $@.tmp $@; \
++ fi
+
+ $(USER_OBJS) : %.o: %.c
+ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+-
+-clean :
+- $(MAKE) -C util clean
+- $(RM) -f include/skas_ptregs.h
+diff -Naur a/arch/um/kernel/skas/include/mode.h b/arch/um/kernel/skas/include/mode.h
+--- a/arch/um/kernel/skas/include/mode.h Fri Aug 15 15:06:34 2003
++++ b/arch/um/kernel/skas/include/mode.h Fri Aug 15 15:12:37 2003
+@@ -20,6 +20,7 @@
+ extern void halt_skas(void);
+ extern void reboot_skas(void);
+ extern void kill_off_processes_skas(void);
++extern int is_skas_winch(int pid, int fd, void *data);
+
+ #endif
+
+diff -Naur a/arch/um/kernel/skas/include/uaccess.h b/arch/um/kernel/skas/include/uaccess.h
+--- a/arch/um/kernel/skas/include/uaccess.h Fri Aug 15 15:05:28 2003
++++ b/arch/um/kernel/skas/include/uaccess.h Fri Aug 15 15:11:44 2003
+@@ -19,7 +19,7 @@
+ #define access_ok_skas(type, addr, size) \
+ ((segment_eq(get_fs(), KERNEL_DS)) || \
+ (((unsigned long) (addr) < TASK_SIZE) && \
+- ((unsigned long) (addr) + (size) < TASK_SIZE)))
++ ((unsigned long) (addr) + (size) <= TASK_SIZE)))
+
+ static inline int verify_area_skas(int type, const void * addr,
+ unsigned long size)
+diff -Naur a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
+--- a/arch/um/kernel/skas/process.c Fri Aug 15 15:08:54 2003
++++ b/arch/um/kernel/skas/process.c Fri Aug 15 15:13:46 2003
+@@ -4,6 +4,7 @@
+ */
+
+ #include <stdlib.h>
++#include <unistd.h>
+ #include <errno.h>
+ #include <signal.h>
+ #include <setjmp.h>
+@@ -24,6 +25,16 @@
+ #include "os.h"
+ #include "proc_mm.h"
+ #include "skas_ptrace.h"
++#include "chan_user.h"
++
++int is_skas_winch(int pid, int fd, void *data)
++{
++ if(pid != getpid())
++ return(0);
++
++ register_winch_irq(-1, fd, -1, data);
++ return(1);
++}
+
+ unsigned long exec_regs[FRAME_SIZE];
+ unsigned long exec_fp_regs[HOST_FP_SIZE];
+@@ -72,8 +83,6 @@
+ handle_syscall(regs);
+ }
+
+-int userspace_pid;
+-
+ static int userspace_tramp(void *arg)
+ {
+ init_new_thread_signals(0);
+@@ -83,6 +92,8 @@
+ return(0);
+ }
+
++int userspace_pid;
++
+ void start_userspace(void)
+ {
+ void *stack;
+@@ -149,6 +160,7 @@
+ case SIGILL:
+ case SIGBUS:
+ case SIGFPE:
++ case SIGWINCH:
+ user_signal(WSTOPSIG(status), regs);
+ break;
+ default:
+@@ -328,7 +340,8 @@
+ int new_mm(int from)
+ {
+ struct proc_mm_op copy;
+- int n, fd = os_open_file("/proc/mm", of_write(OPENFLAGS()), 0);
++ int n, fd = os_open_file("/proc/mm",
++ of_cloexec(of_write(OPENFLAGS())), 0);
+
+ if(fd < 0)
+ return(-errno);
+@@ -342,6 +355,7 @@
+ printk("new_mm : /proc/mm copy_segments failed, "
+ "errno = %d\n", errno);
+ }
++
+ return(fd);
+ }
+
+diff -Naur a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c
+--- a/arch/um/kernel/skas/process_kern.c Fri Aug 15 15:04:51 2003
++++ b/arch/um/kernel/skas/process_kern.c Fri Aug 15 15:10:56 2003
+@@ -61,9 +61,8 @@
+ thread_wait(¤t->thread.mode.skas.switch_buf,
+ current->thread.mode.skas.fork_buf);
+
+-#ifdef CONFIG_SMP
+- schedule_tail(NULL);
+-#endif
++ if(current->thread.prev_sched != NULL)
++ schedule_tail(current->thread.prev_sched);
+ current->thread.prev_sched = NULL;
+
+ n = run_kernel_thread(fn, arg, ¤t->thread.exec_buf);
+@@ -93,9 +92,8 @@
+ current->thread.mode.skas.fork_buf);
+
+ force_flush_all();
+-#ifdef CONFIG_SMP
+- schedule_tail(current->thread.prev_sched);
+-#endif
++ if(current->thread.prev_sched != NULL)
++ schedule_tail(current->thread.prev_sched);
+ current->thread.prev_sched = NULL;
+ unblock_signals();
+
+@@ -164,7 +162,7 @@
+ capture_signal_stack();
+
+ init_new_thread_signals(1);
+- idle_timer();
++ uml_idle_timer();
+
+ init_task.thread.request.u.thread.proc = start_kernel_proc;
+ init_task.thread.request.u.thread.arg = NULL;
+diff -Naur a/arch/um/kernel/skas/util/mk_ptregs.c b/arch/um/kernel/skas/util/mk_ptregs.c
+--- a/arch/um/kernel/skas/util/mk_ptregs.c Fri Aug 15 15:05:20 2003
++++ b/arch/um/kernel/skas/util/mk_ptregs.c Fri Aug 15 15:11:21 2003
+@@ -1,3 +1,4 @@
++#include <stdio.h>
+ #include <asm/ptrace.h>
+ #include <asm/user.h>
+
+diff -Naur a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
+--- a/arch/um/kernel/smp.c Fri Aug 15 15:04:50 2003
++++ b/arch/um/kernel/smp.c Fri Aug 15 15:10:52 2003
+@@ -140,8 +140,10 @@
+
+ current->thread.request.u.thread.proc = idle_proc;
+ current->thread.request.u.thread.arg = (void *) cpu;
+- new_task = do_fork(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, NULL);
+- if(IS_ERR(new_task)) panic("do_fork failed in idle_thread");
++ new_task = copy_process(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL,
++ NULL);
++ if(IS_ERR(new_task))
++ panic("copy_process failed in idle_thread");
+
+ cpu_tasks[cpu] = ((struct cpu_task)
+ { .pid = new_task->thread.mode.tt.extern_pid,
+@@ -150,6 +152,7 @@
+ CHOOSE_MODE(write(new_task->thread.mode.tt.switch_pipe[1], &c,
+ sizeof(c)),
+ ({ panic("skas mode doesn't support SMP"); }));
++ wake_up_forked_process(new_task);
+ return(new_task);
+ }
+
+@@ -254,15 +257,19 @@
+ atomic_inc(&scf_finished);
+ }
+
+-int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic,
+- int wait)
++int smp_call_function_on_cpu(void (*_func)(void *info), void *_info, int wait,
++ unsigned long mask)
+ {
+- int cpus = num_online_cpus() - 1;
+- int i;
+-
+- if (!cpus)
+- return 0;
++ int i, cpu, num_cpus;
+
++ cpu = get_cpu();
++ mask &= ~(1UL << cpu);
++ num_cpus = hweight32(mask);
++ if(num_cpus == 0){
++ put_cpu_no_resched();
++ return(0);
++ }
++
+ spin_lock_bh(&call_lock);
+ atomic_set(&scf_started, 0);
+ atomic_set(&scf_finished, 0);
+@@ -270,19 +277,25 @@
+ info = _info;
+
+ for (i=0;i<NR_CPUS;i++)
+- if((i != current->thread_info->cpu) &&
+- test_bit(i, &cpu_online_map))
++ if(cpu_online(i) && ((1UL << i) & mask))
+ write(cpu_data[i].ipi_pipe[1], "C", 1);
+
+- while (atomic_read(&scf_started) != cpus)
++ while(atomic_read(&scf_started) != num_cpus)
+ barrier();
+
+- if (wait)
+- while (atomic_read(&scf_finished) != cpus)
++ if(wait)
++ while(atomic_read(&scf_finished) != num_cpus)
+ barrier();
+
+ spin_unlock_bh(&call_lock);
+- return 0;
++ put_cpu_no_resched();
++ return(0);
++}
++
++int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic,
++ int wait)
++{
++ return(smp_call_function_on_cpu(_func, _info, wait, cpu_online_map));
+ }
+
+ #endif
+diff -Naur a/arch/um/kernel/sys_call_table.c b/arch/um/kernel/sys_call_table.c
+--- a/arch/um/kernel/sys_call_table.c Fri Aug 15 15:07:57 2003
++++ b/arch/um/kernel/sys_call_table.c Fri Aug 15 15:13:24 2003
+@@ -219,6 +219,18 @@
+ extern syscall_handler_t sys_gettid;
+ extern syscall_handler_t sys_readahead;
+ extern syscall_handler_t sys_tkill;
++extern syscall_handler_t sys_setxattr;
++extern syscall_handler_t sys_lsetxattr;
++extern syscall_handler_t sys_fsetxattr;
++extern syscall_handler_t sys_getxattr;
++extern syscall_handler_t sys_lgetxattr;
++extern syscall_handler_t sys_fgetxattr;
++extern syscall_handler_t sys_listxattr;
++extern syscall_handler_t sys_llistxattr;
++extern syscall_handler_t sys_flistxattr;
++extern syscall_handler_t sys_removexattr;
++extern syscall_handler_t sys_lremovexattr;
++extern syscall_handler_t sys_fremovexattr;
+ extern syscall_handler_t sys_sendfile64;
+ extern syscall_handler_t sys_futex;
+ extern syscall_handler_t sys_sched_setaffinity;
+@@ -235,6 +247,19 @@
+ extern syscall_handler_t sys_epoll_wait;
+ extern syscall_handler_t sys_remap_file_pages;
+ extern syscall_handler_t sys_set_tid_address;
++extern syscall_handler_t sys_timer_create;
++extern syscall_handler_t sys_timer_settime;
++extern syscall_handler_t sys_timer_gettime;
++extern syscall_handler_t sys_timer_getoverrun;
++extern syscall_handler_t sys_timer_delete;
++extern syscall_handler_t sys_clock_settime;
++extern syscall_handler_t sys_clock_gettime;
++extern syscall_handler_t sys_clock_getres;
++extern syscall_handler_t sys_clock_nanosleep;
++extern syscall_handler_t sys_statfs64;
++extern syscall_handler_t sys_fstatfs64;
++extern syscall_handler_t sys_tgkill;
++extern syscall_handler_t sys_utimes;
+
+ #ifdef CONFIG_NFSD
+ #define NFSSERVCTL sys_nfsservctl
+@@ -459,18 +484,18 @@
+ [ __NR_getdents64 ] = sys_getdents64,
+ [ __NR_gettid ] = sys_gettid,
+ [ __NR_readahead ] = sys_readahead,
+- [ __NR_setxattr ] = sys_ni_syscall,
+- [ __NR_lsetxattr ] = sys_ni_syscall,
+- [ __NR_fsetxattr ] = sys_ni_syscall,
+- [ __NR_getxattr ] = sys_ni_syscall,
+- [ __NR_lgetxattr ] = sys_ni_syscall,
+- [ __NR_fgetxattr ] = sys_ni_syscall,
+- [ __NR_listxattr ] = sys_ni_syscall,
+- [ __NR_llistxattr ] = sys_ni_syscall,
+- [ __NR_flistxattr ] = sys_ni_syscall,
+- [ __NR_removexattr ] = sys_ni_syscall,
+- [ __NR_lremovexattr ] = sys_ni_syscall,
+- [ __NR_fremovexattr ] = sys_ni_syscall,
++ [ __NR_setxattr ] = sys_setxattr,
++ [ __NR_lsetxattr ] = sys_lsetxattr,
++ [ __NR_fsetxattr ] = sys_fsetxattr,
++ [ __NR_getxattr ] = sys_getxattr,
++ [ __NR_lgetxattr ] = sys_lgetxattr,
++ [ __NR_fgetxattr ] = sys_fgetxattr,
++ [ __NR_listxattr ] = sys_listxattr,
++ [ __NR_llistxattr ] = sys_llistxattr,
++ [ __NR_flistxattr ] = sys_flistxattr,
++ [ __NR_removexattr ] = sys_removexattr,
++ [ __NR_lremovexattr ] = sys_lremovexattr,
++ [ __NR_fremovexattr ] = sys_fremovexattr,
+ [ __NR_tkill ] = sys_tkill,
+ [ __NR_sendfile64 ] = sys_sendfile64,
+ [ __NR_futex ] = sys_futex,
+@@ -488,6 +513,19 @@
+ [ __NR_epoll_wait ] = sys_epoll_wait,
+ [ __NR_remap_file_pages ] = sys_remap_file_pages,
+ [ __NR_set_tid_address ] = sys_set_tid_address,
++ [ __NR_timer_create ] = sys_timer_create,
++ [ __NR_timer_settime ] = sys_timer_settime,
++ [ __NR_timer_gettime ] = sys_timer_gettime,
++ [ __NR_timer_getoverrun ] = sys_timer_getoverrun,
++ [ __NR_timer_delete ] = sys_timer_delete,
++ [ __NR_clock_settime ] = sys_clock_settime,
++ [ __NR_clock_gettime ] = sys_clock_gettime,
++ [ __NR_clock_getres ] = sys_clock_getres,
++ [ __NR_clock_nanosleep ] = sys_clock_nanosleep,
++ [ __NR_statfs64 ] = sys_statfs64,
++ [ __NR_fstatfs64 ] = sys_fstatfs64,
++ [ __NR_tgkill ] = sys_tgkill,
++ [ __NR_utimes ] = sys_utimes,
+
+ ARCH_SYSCALLS
+ [ LAST_SYSCALL + 1 ... NR_syscalls ] =
+diff -Naur a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c
+--- a/arch/um/kernel/syscall_kern.c Fri Aug 15 15:07:37 2003
++++ b/arch/um/kernel/syscall_kern.c Fri Aug 15 15:13:03 2003
+@@ -35,39 +35,40 @@
+
+ long sys_fork(void)
+ {
+- struct task_struct *p;
++ long ret;
+
+ current->thread.forking = 1;
+- p = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL);
++ ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL);
+ current->thread.forking = 0;
+- return(IS_ERR(p) ? PTR_ERR(p) : p->pid);
++ return(ret);
+ }
+
+-long sys_clone(unsigned long clone_flags, unsigned long newsp)
++long sys_clone(unsigned long clone_flags, unsigned long newsp,
++ int *parent_tid, int *child_tid)
+ {
+- struct task_struct *p;
++ long ret;
+
+ current->thread.forking = 1;
+- p = do_fork(clone_flags, newsp, NULL, 0, NULL, NULL);
++ ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid);
+ current->thread.forking = 0;
+- return(IS_ERR(p) ? PTR_ERR(p) : p->pid);
++ return(ret);
+ }
+
+ long sys_vfork(void)
+ {
+- struct task_struct *p;
++ long ret;
+
+ current->thread.forking = 1;
+- p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, NULL);
++ ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL,
++ NULL);
+ current->thread.forking = 0;
+- return(IS_ERR(p) ? PTR_ERR(p) : p->pid);
++ return(ret);
+ }
+
+ /* common code for old and new mmaps */
+-static inline long do_mmap2(
+- unsigned long addr, unsigned long len,
+- unsigned long prot, unsigned long flags,
+- unsigned long fd, unsigned long pgoff)
++long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len,
++ unsigned long prot, unsigned long flags, unsigned long fd,
++ unsigned long pgoff)
+ {
+ int error = -EBADF;
+ struct file * file = NULL;
+@@ -79,9 +80,9 @@
+ goto out;
+ }
+
+- down_write(¤t->mm->mmap_sem);
+- error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+- up_write(¤t->mm->mmap_sem);
++ down_write(&mm->mmap_sem);
++ error = do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff);
++ up_write(&mm->mmap_sem);
+
+ if (file)
+ fput(file);
+@@ -93,7 +94,7 @@
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
+ {
+- return do_mmap2(addr, len, prot, flags, fd, pgoff);
++ return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff);
+ }
+
+ /*
+@@ -120,7 +121,8 @@
+ if (offset & ~PAGE_MASK)
+ goto out;
+
+- err = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
++ err = do_mmap2(current->mm, addr, len, prot, flags, fd,
++ offset >> PAGE_SHIFT);
+ out:
+ return err;
+ }
+@@ -141,37 +143,6 @@
+ return error;
+ }
+
+-int sys_sigaction(int sig, const struct old_sigaction *act,
+- struct old_sigaction *oact)
+-{
+- struct k_sigaction new_ka, old_ka;
+- int ret;
+-
+- if (act) {
+- old_sigset_t mask;
+- if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+- __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+- __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+- return -EFAULT;
+- __get_user(new_ka.sa.sa_flags, &act->sa_flags);
+- __get_user(mask, &act->sa_mask);
+- siginitset(&new_ka.sa.sa_mask, mask);
+- }
+-
+- ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+-
+- if (!ret && oact) {
+- if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+- __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+- __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+- return -EFAULT;
+- __put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+- __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+- }
+-
+- return ret;
+-}
+-
+ /*
+ * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+ *
+@@ -253,7 +224,7 @@
+ return sys_shmctl (first, second,
+ (struct shmid_ds *) ptr);
+ default:
+- return -EINVAL;
++ return -ENOSYS;
+ }
+ }
+
+@@ -302,11 +273,6 @@
+ return error;
+ }
+
+-int sys_sigaltstack(const stack_t *uss, stack_t *uoss)
+-{
+- return(do_sigaltstack(uss, uoss, PT_REGS_SP(¤t->thread.regs)));
+-}
+-
+ long execute_syscall(void *r)
+ {
+ return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r));
+diff -Naur a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
+--- a/arch/um/kernel/sysrq.c Fri Aug 15 15:05:01 2003
++++ b/arch/um/kernel/sysrq.c Fri Aug 15 15:11:13 2003
+@@ -11,6 +11,14 @@
+ #include "sysrq.h"
+ #include "user_util.h"
+
++void show_stack(struct task_struct *task, unsigned long *sp)
++{
++ if(task)
++ show_trace_task(task);
++ else
++ show_trace(sp);
++}
++
+ void show_trace(unsigned long * stack)
+ {
+ int i;
+diff -Naur a/arch/um/kernel/time.c b/arch/um/kernel/time.c
+--- a/arch/um/kernel/time.c Fri Aug 15 15:04:49 2003
++++ b/arch/um/kernel/time.c Fri Aug 15 15:10:46 2003
+@@ -15,12 +15,16 @@
+ #include "process.h"
+ #include "signal_user.h"
+ #include "time_user.h"
++#include "kern_constants.h"
+
+ extern struct timeval xtime;
+
++struct timeval local_offset = { 0, 0 };
++
+ void timer(void)
+ {
+ gettimeofday(&xtime, NULL);
++ timeradd(&xtime, &local_offset, &xtime);
+ }
+
+ void set_interval(int timer_type)
+@@ -65,7 +69,7 @@
+ errno);
+ }
+
+-void idle_timer(void)
++void uml_idle_timer(void)
+ {
+ if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR)
+ panic("Couldn't unset SIGVTALRM handler");
+@@ -82,8 +86,6 @@
+ set_interval(ITIMER_VIRTUAL);
+ }
+
+-struct timeval local_offset = { 0, 0 };
+-
+ void do_gettimeofday(struct timeval *tv)
+ {
+ unsigned long flags;
+@@ -100,7 +102,7 @@
+ unsigned long flags;
+ struct timeval tv_in;
+
+- if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
++ if ((unsigned long) tv->tv_nsec >= UM_NSEC_PER_SEC)
+ return -EINVAL;
+
+ tv_in.tv_sec = tv->tv_sec;
+@@ -110,6 +112,8 @@
+ gettimeofday(&now, NULL);
+ timersub(&tv_in, &now, &local_offset);
+ time_unlock(flags);
++
++ return(0);
+ }
+
+ void idle_sleep(int secs)
+diff -Naur a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c
+--- a/arch/um/kernel/time_kern.c Fri Aug 15 15:07:19 2003
++++ b/arch/um/kernel/time_kern.c Fri Aug 15 15:12:46 2003
+@@ -55,12 +55,13 @@
+ do_timer(®s);
+ }
+
+-void um_timer(int irq, void *dev, struct pt_regs *regs)
++irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs)
+ {
+ do_timer(regs);
+- write_seqlock(&xtime_lock);
++ write_seqlock_irq(&xtime_lock);
+ timer();
+- write_sequnlock(&xtime_lock);
++ write_sequnlock_irq(&xtime_lock);
++ return(IRQ_HANDLED);
+ }
+
+ long um_time(int * tloc)
+@@ -78,12 +79,12 @@
+ long um_stime(int * tptr)
+ {
+ int value;
+- struct timeval new;
++ struct timespec new;
+
+ if (get_user(value, tptr))
+ return -EFAULT;
+ new.tv_sec = value;
+- new.tv_usec = 0;
++ new.tv_nsec = 0;
+ do_settimeofday(&new);
+ return 0;
+ }
+@@ -122,7 +123,9 @@
+ void timer_handler(int sig, union uml_pt_regs *regs)
+ {
+ #ifdef CONFIG_SMP
++ local_irq_disable();
+ update_process_times(user_context(UPT_SP(regs)));
++ local_irq_enable();
+ #endif
+ if(current->thread_info->cpu == 0)
+ timer_irq(regs);
+diff -Naur a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c
+--- a/arch/um/kernel/trap_kern.c Fri Aug 15 15:04:01 2003
++++ b/arch/um/kernel/trap_kern.c Fri Aug 15 15:10:18 2003
+@@ -16,6 +16,7 @@
+ #include "asm/tlbflush.h"
+ #include "asm/a.out.h"
+ #include "asm/current.h"
++#include "asm/irq.h"
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "kern.h"
+@@ -180,6 +181,11 @@
+ else relay_signal(sig, regs);
+ }
+
++void winch(int sig, union uml_pt_regs *regs)
++{
++ do_IRQ(WINCH_IRQ, regs);
++}
++
+ void trap_init(void)
+ {
+ }
+diff -Naur a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c
+--- a/arch/um/kernel/trap_user.c Fri Aug 15 15:05:45 2003
++++ b/arch/um/kernel/trap_user.c Fri Aug 15 15:11:52 2003
+@@ -82,6 +82,8 @@
+ .is_irq = 0 },
+ [ SIGILL ] { .handler = relay_signal,
+ .is_irq = 0 },
++ [ SIGWINCH ] { .handler = winch,
++ .is_irq = 1 },
+ [ SIGBUS ] { .handler = bus_handler,
+ .is_irq = 0 },
+ [ SIGSEGV] { .handler = segv_handler,
+diff -Naur a/arch/um/kernel/tt/include/uaccess.h b/arch/um/kernel/tt/include/uaccess.h
+--- a/arch/um/kernel/tt/include/uaccess.h Fri Aug 15 15:07:25 2003
++++ b/arch/um/kernel/tt/include/uaccess.h Fri Aug 15 15:12:52 2003
+@@ -46,18 +46,20 @@
+
+ static inline int copy_from_user_tt(void *to, const void *from, int n)
+ {
+- return(access_ok_tt(VERIFY_READ, from, n) ?
+- __do_copy_from_user(to, from, n,
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher) : n);
++ if(!access_ok_tt(VERIFY_READ, from, n))
++ return(n);
++
++ return(__do_copy_from_user(to, from, n, ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher));
+ }
+
+ static inline int copy_to_user_tt(void *to, const void *from, int n)
+ {
+- return(access_ok_tt(VERIFY_WRITE, to, n) ?
+- __do_copy_to_user(to, from, n,
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher) : n);
++ if(!access_ok_tt(VERIFY_WRITE, to, n))
++ return(n);
++
++ return(__do_copy_to_user(to, from, n, ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher));
+ }
+
+ extern int __do_strncpy_from_user(char *dst, const char *src, size_t n,
+@@ -67,7 +69,9 @@
+ {
+ int n;
+
+- if(!access_ok_tt(VERIFY_READ, src, 1)) return(-EFAULT);
++ if(!access_ok_tt(VERIFY_READ, src, 1))
++ return(-EFAULT);
++
+ n = __do_strncpy_from_user(dst, src, count,
+ ¤t->thread.fault_addr,
+ ¤t->thread.fault_catcher);
+@@ -87,10 +91,11 @@
+
+ static inline int clear_user_tt(void *mem, int len)
+ {
+- return(access_ok_tt(VERIFY_WRITE, mem, len) ?
+- __do_clear_user(mem, len,
+- ¤t->thread.fault_addr,
+- ¤t->thread.fault_catcher) : len);
++ if(!access_ok_tt(VERIFY_WRITE, mem, len))
++ return(len);
++
++ return(__do_clear_user(mem, len, ¤t->thread.fault_addr,
++ ¤t->thread.fault_catcher));
+ }
+
+ extern int __do_strnlen_user(const char *str, unsigned long n,
+diff -Naur a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c
+--- a/arch/um/kernel/tt/process_kern.c Fri Aug 15 15:07:55 2003
++++ b/arch/um/kernel/tt/process_kern.c Fri Aug 15 15:13:23 2003
+@@ -104,7 +104,10 @@
+
+ void release_thread_tt(struct task_struct *task)
+ {
+- os_kill_process(task->thread.mode.tt.extern_pid, 0);
++ int pid = task->thread.mode.tt.extern_pid;
++
++ if(os_getpid() != pid)
++ os_kill_process(pid, 0);
+ }
+
+ void exit_thread_tt(void)
+@@ -125,27 +128,27 @@
+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1);
+ suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
+
+- block_signals();
++ force_flush_all();
++ if(current->thread.prev_sched != NULL)
++ schedule_tail(current->thread.prev_sched);
++ current->thread.prev_sched = NULL;
++
+ init_new_thread_signals(1);
+-#ifdef CONFIG_SMP
+- schedule_tail(current->thread.prev_sched);
+-#endif
+ enable_timer();
+ free_page(current->thread.temp_stack);
+ set_cmdline("(kernel thread)");
+- force_flush_all();
+
+- current->thread.prev_sched = NULL;
+ change_sig(SIGUSR1, 1);
+ change_sig(SIGVTALRM, 1);
+ change_sig(SIGPROF, 1);
+- unblock_signals();
++ local_irq_enable();
+ if(!run_kernel_thread(fn, arg, ¤t->thread.exec_buf))
+ do_exit(0);
+ }
+
+ static int new_thread_proc(void *stack)
+ {
++ local_irq_disable();
+ init_new_thread_stack(stack, new_thread_handler);
+ os_usr1_process(os_getpid());
+ return(0);
+@@ -165,35 +168,32 @@
+ UPT_SC(¤t->thread.regs.regs) = (void *) (&sig + 1);
+ suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
+
+-#ifdef CONFIG_SMP
+- schedule_tail(NULL);
+-#endif
++ force_flush_all();
++ if(current->thread.prev_sched != NULL)
++ schedule_tail(current->thread.prev_sched);
++ current->thread.prev_sched = NULL;
++
+ enable_timer();
+ change_sig(SIGVTALRM, 1);
+ local_irq_enable();
+- force_flush_all();
+ if(current->mm != current->parent->mm)
+ protect_memory(uml_reserved, high_physmem - uml_reserved, 1,
+ 1, 0, 1);
+ task_protections((unsigned long) current->thread_info);
+
+- current->thread.prev_sched = NULL;
+-
+ free_page(current->thread.temp_stack);
++ local_irq_disable();
+ change_sig(SIGUSR1, 0);
+ set_user_mode(current);
+ }
+
+-static int sigusr1 = SIGUSR1;
+-
+ int fork_tramp(void *stack)
+ {
+- int sig = sigusr1;
+-
+ local_irq_disable();
++ arch_init_thread();
+ init_new_thread_stack(stack, finish_fork_handler);
+
+- kill(os_getpid(), sig);
++ os_usr1_process(os_getpid());
+ return(0);
+ }
+
+diff -Naur a/arch/um/kernel/tt/ptproxy/proxy.c b/arch/um/kernel/tt/ptproxy/proxy.c
+--- a/arch/um/kernel/tt/ptproxy/proxy.c Fri Aug 15 15:07:01 2003
++++ b/arch/um/kernel/tt/ptproxy/proxy.c Fri Aug 15 15:12:44 2003
+@@ -293,10 +293,10 @@
+ }
+
+ char gdb_init_string[] =
+-"att 1
+-b panic
+-b stop
+-handle SIGWINCH nostop noprint pass
++"att 1 \n\
++b panic \n\
++b stop \n\
++handle SIGWINCH nostop noprint pass \n\
+ ";
+
+ int start_debugger(char *prog, int startup, int stop, int *fd_out)
+diff -Naur a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c
+--- a/arch/um/kernel/tt/tracer.c Fri Aug 15 15:03:51 2003
++++ b/arch/um/kernel/tt/tracer.c Fri Aug 15 15:10:12 2003
+@@ -39,7 +39,7 @@
+ return(0);
+
+ register_winch_irq(tracer_winch[0], fd, -1, data);
+- return(0);
++ return(1);
+ }
+
+ static void tracer_winch_handler(int sig)
+@@ -401,7 +401,7 @@
+
+ if(!strcmp(line, "go")) debug_stop = 0;
+ else if(!strcmp(line, "parent")) debug_parent = 1;
+- else printk("Unknown debug option : '%s'\n", line);
++ else printf("Unknown debug option : '%s'\n", line);
+
+ line = next;
+ }
+diff -Naur a/arch/um/kernel/tt/uaccess_user.c b/arch/um/kernel/tt/uaccess_user.c
+--- a/arch/um/kernel/tt/uaccess_user.c Fri Aug 15 15:05:00 2003
++++ b/arch/um/kernel/tt/uaccess_user.c Fri Aug 15 15:11:10 2003
+@@ -8,15 +8,20 @@
+ #include <string.h>
+ #include "user_util.h"
+ #include "uml_uaccess.h"
++#include "task.h"
++#include "kern_util.h"
+
+ int __do_copy_from_user(void *to, const void *from, int n,
+ void **fault_addr, void **fault_catcher)
+ {
++ struct tt_regs save = TASK_REGS(get_current())->tt;
+ unsigned long fault;
+ int faulted;
+
+ fault = __do_user_copy(to, from, n, fault_addr, fault_catcher,
+ __do_copy, &faulted);
++ TASK_REGS(get_current())->tt = save;
++
+ if(!faulted) return(0);
+ else return(n - (fault - (unsigned long) from));
+ }
+@@ -29,11 +34,14 @@
+ int __do_strncpy_from_user(char *dst, const char *src, unsigned long count,
+ void **fault_addr, void **fault_catcher)
+ {
++ struct tt_regs save = TASK_REGS(get_current())->tt;
+ unsigned long fault;
+ int faulted;
+
+ fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher,
+ __do_strncpy, &faulted);
++ TASK_REGS(get_current())->tt = save;
++
+ if(!faulted) return(strlen(dst));
+ else return(-1);
+ }
+@@ -46,11 +54,14 @@
+ int __do_clear_user(void *mem, unsigned long len,
+ void **fault_addr, void **fault_catcher)
+ {
++ struct tt_regs save = TASK_REGS(get_current())->tt;
+ unsigned long fault;
+ int faulted;
+
+ fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher,
+ __do_clear, &faulted);
++ TASK_REGS(get_current())->tt = save;
++
+ if(!faulted) return(0);
+ else return(len - (fault - (unsigned long) mem));
+ }
+@@ -58,6 +69,7 @@
+ int __do_strnlen_user(const char *str, unsigned long n,
+ void **fault_addr, void **fault_catcher)
+ {
++ struct tt_regs save = TASK_REGS(get_current())->tt;
+ int ret;
+ unsigned long *faddrp = (unsigned long *)fault_addr;
+ jmp_buf jbuf;
+@@ -71,6 +83,8 @@
+ }
+ *fault_addr = NULL;
+ *fault_catcher = NULL;
++
++ TASK_REGS(get_current())->tt = save;
+ return ret;
+ }
+
+diff -Naur a/arch/um/kernel/tty_log.c b/arch/um/kernel/tty_log.c
+--- a/arch/um/kernel/tty_log.c Fri Aug 15 15:07:04 2003
++++ b/arch/um/kernel/tty_log.c Fri Aug 15 15:12:44 2003
+@@ -13,6 +13,7 @@
+ #include <sys/time.h>
+ #include "init.h"
+ #include "user.h"
++#include "kern_util.h"
+ #include "os.h"
+
+ #define TTY_LOG_DIR "./"
+@@ -24,29 +25,40 @@
+ #define TTY_LOG_OPEN 1
+ #define TTY_LOG_CLOSE 2
+ #define TTY_LOG_WRITE 3
++#define TTY_LOG_EXEC 4
++
++#define TTY_READ 1
++#define TTY_WRITE 2
+
+ struct tty_log_buf {
+ int what;
+ unsigned long tty;
+ int len;
++ int direction;
++ unsigned long sec;
++ unsigned long usec;
+ };
+
+-int open_tty_log(void *tty)
++int open_tty_log(void *tty, void *current_tty)
+ {
+ struct timeval tv;
+ struct tty_log_buf data;
+ char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")];
+ int fd;
+
++ gettimeofday(&tv, NULL);
+ if(tty_log_fd != -1){
+- data = ((struct tty_log_buf) { what : TTY_LOG_OPEN,
+- tty : (unsigned long) tty,
+- len : 0 });
++ data = ((struct tty_log_buf) { .what = TTY_LOG_OPEN,
++ .tty = (unsigned long) tty,
++ .len = sizeof(current_tty),
++ .direction = 0,
++ .sec = tv.tv_sec,
++ .usec = tv.tv_usec } );
+ write(tty_log_fd, &data, sizeof(data));
++ write(tty_log_fd, ¤t_tty, data.len);
+ return(tty_log_fd);
+ }
+
+- gettimeofday(&tv, NULL);
+ sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec,
+ (unsigned int) tv.tv_usec);
+
+@@ -62,30 +74,114 @@
+ void close_tty_log(int fd, void *tty)
+ {
+ struct tty_log_buf data;
++ struct timeval tv;
+
+ if(tty_log_fd != -1){
+- data = ((struct tty_log_buf) { what : TTY_LOG_CLOSE,
+- tty : (unsigned long) tty,
+- len : 0 });
++ gettimeofday(&tv, NULL);
++ data = ((struct tty_log_buf) { .what = TTY_LOG_CLOSE,
++ .tty = (unsigned long) tty,
++ .len = 0,
++ .direction = 0,
++ .sec = tv.tv_sec,
++ .usec = tv.tv_usec } );
+ write(tty_log_fd, &data, sizeof(data));
+ return;
+ }
+ close(fd);
+ }
+
+-int write_tty_log(int fd, char *buf, int len, void *tty)
++static int log_chunk(int fd, const char *buf, int len)
+ {
++ int total = 0, try, missed, n;
++ char chunk[64];
++
++ while(len > 0){
++ try = (len > sizeof(chunk)) ? sizeof(chunk) : len;
++ missed = copy_from_user_proc(chunk, (char *) buf, try);
++ try -= missed;
++ n = write(fd, chunk, try);
++ if(n != try)
++ return(-errno);
++ if(missed != 0)
++ return(-EFAULT);
++
++ len -= try;
++ total += try;
++ buf += try;
++ }
++
++ return(total);
++}
++
++int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read)
++{
++ struct timeval tv;
+ struct tty_log_buf data;
++ int direction;
+
+ if(fd == tty_log_fd){
+- data = ((struct tty_log_buf) { what : TTY_LOG_WRITE,
+- tty : (unsigned long) tty,
+- len : len });
++ gettimeofday(&tv, NULL);
++ direction = is_read ? TTY_READ : TTY_WRITE;
++ data = ((struct tty_log_buf) { .what = TTY_LOG_WRITE,
++ .tty = (unsigned long) tty,
++ .len = len,
++ .direction = direction,
++ .sec = tv.tv_sec,
++ .usec = tv.tv_usec } );
+ write(tty_log_fd, &data, sizeof(data));
+ }
+- return(write(fd, buf, len));
++
++ return(log_chunk(fd, buf, len));
+ }
+
++void log_exec(char **argv, void *tty)
++{
++ struct timeval tv;
++ struct tty_log_buf data;
++ char **ptr,*arg;
++ int len;
++
++ if(tty_log_fd == -1) return;
++
++ gettimeofday(&tv, NULL);
++
++ len = 0;
++ for(ptr = argv; ; ptr++){
++ if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
++ return;
++ if(arg == NULL) break;
++ len += strlen_user_proc(arg);
++ }
++
++ data = ((struct tty_log_buf) { .what = TTY_LOG_EXEC,
++ .tty = (unsigned long) tty,
++ .len = len,
++ .direction = 0,
++ .sec = tv.tv_sec,
++ .usec = tv.tv_usec } );
++ write(tty_log_fd, &data, sizeof(data));
++
++ for(ptr = argv; ; ptr++){
++ if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
++ return;
++ if(arg == NULL) break;
++ log_chunk(tty_log_fd, arg, strlen_user_proc(arg));
++ }
++}
++
++extern void register_tty_logger(int (*opener)(void *, void *),
++ int (*writer)(int, const char *, int,
++ void *, int),
++ void (*closer)(int, void *));
++
++static int register_logger(void)
++{
++ register_tty_logger(open_tty_log, write_tty_log, close_tty_log);
++ return(0);
++}
++
++__uml_initcall(register_logger);
++
+ static int __init set_tty_log_dir(char *name, int *add)
+ {
+ tty_log_dir = name;
+@@ -104,7 +200,7 @@
+
+ tty_log_fd = strtoul(name, &end, 0);
+ if((*end != '\0') || (end == name)){
+- printk("set_tty_log_fd - strtoul failed on '%s'\n", name);
++ printf("set_tty_log_fd - strtoul failed on '%s'\n", name);
+ tty_log_fd = -1;
+ }
+ return 0;
+diff -Naur a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
+--- a/arch/um/kernel/um_arch.c Fri Aug 15 15:07:48 2003
++++ b/arch/um/kernel/um_arch.c Fri Aug 15 15:13:14 2003
+@@ -38,13 +38,18 @@
+ #include "mode_kern.h"
+ #include "mode.h"
+
+-#define DEFAULT_COMMAND_LINE "root=6200"
++#define DEFAULT_COMMAND_LINE "root=ubd0"
+
+ struct cpuinfo_um boot_cpu_data = {
+ .loops_per_jiffy = 0,
+ .ipi_pipe = { -1, -1 }
+ };
+
++/* Placeholder to make UML link until the vsyscall stuff is actually
++ * implemented
++ */
++void *__kernel_vsyscall;
++
+ unsigned long thread_saved_pc(struct task_struct *task)
+ {
+ return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas,
+@@ -61,10 +66,14 @@
+ return 0;
+ #endif
+
+- seq_printf(m, "bogomips\t: %lu.%02lu\n",
++ seq_printf(m, "processor\t: %d\n", index);
++ seq_printf(m, "vendor_id\t: User Mode Linux\n");
++ seq_printf(m, "model name\t: UML\n");
++ seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas"));
++ seq_printf(m, "host\t\t: %s\n", host_info);
++ seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+ loops_per_jiffy/(500000/HZ),
+ (loops_per_jiffy/(5000/HZ)) % 100);
+- seq_printf(m, "host\t\t: %s\n", host_info);
+
+ return(0);
+ }
+@@ -134,12 +143,12 @@
+ if(umid != NULL){
+ snprintf(argv1_begin,
+ (argv1_end - argv1_begin) * sizeof(*ptr),
+- "(%s)", umid);
++ "(%s) ", umid);
+ ptr = &argv1_begin[strlen(argv1_begin)];
+ }
+ else ptr = argv1_begin;
+
+- snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), " [%s]", cmd);
++ snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd);
+ memset(argv1_begin + strlen(argv1_begin), '\0',
+ argv1_end - argv1_begin - strlen(argv1_begin));
+ #endif
+@@ -179,7 +188,7 @@
+ static int __init uml_ncpus_setup(char *line, int *add)
+ {
+ if (!sscanf(line, "%d", &ncpus)) {
+- printk("Couldn't parse [%s]\n", line);
++ printf("Couldn't parse [%s]\n", line);
+ return -1;
+ }
+
+@@ -210,7 +219,7 @@
+
+ static int __init mode_tt_setup(char *line, int *add)
+ {
+- printk("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n");
++ printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n");
+ return(0);
+ }
+
+@@ -221,7 +230,7 @@
+
+ static int __init mode_tt_setup(char *line, int *add)
+ {
+- printk("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n");
++ printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n");
+ return(0);
+ }
+
+@@ -369,6 +378,7 @@
+ 2 * PAGE_SIZE;
+
+ task_protections((unsigned long) &init_thread_info);
++ os_flush_stdout();
+
+ return(CHOOSE_MODE(start_uml_tt(), start_uml_skas()));
+ }
+diff -Naur a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
+--- a/arch/um/kernel/umid.c Fri Aug 15 15:08:44 2003
++++ b/arch/um/kernel/umid.c Fri Aug 15 15:13:39 2003
+@@ -33,18 +33,19 @@
+ static int umid_is_random = 1;
+ static int umid_inited = 0;
+
+-static int make_umid(void);
++static int make_umid(int (*printer)(const char *fmt, ...));
+
+-static int __init set_umid(char *name, int is_random)
++static int __init set_umid(char *name, int is_random,
++ int (*printer)(const char *fmt, ...))
+ {
+ if(umid_inited){
+- printk("Unique machine name can't be set twice\n");
++ (*printer)("Unique machine name can't be set twice\n");
+ return(-1);
+ }
+
+ if(strlen(name) > UMID_LEN - 1)
+- printk("Unique machine name is being truncated to %s "
+- "characters\n", UMID_LEN);
++ (*printer)("Unique machine name is being truncated to %s "
++ "characters\n", UMID_LEN);
+ strlcpy(umid, name, sizeof(umid));
+
+ umid_is_random = is_random;
+@@ -54,7 +55,7 @@
+
+ static int __init set_umid_arg(char *name, int *add)
+ {
+- return(set_umid(name, 0));
++ return(set_umid(name, 0, printf));
+ }
+
+ __uml_setup("umid=", set_umid_arg,
+@@ -67,7 +68,7 @@
+ {
+ int n;
+
+- if(!umid_inited && make_umid()) return(-1);
++ if(!umid_inited && make_umid(printk)) return(-1);
+
+ n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1;
+ if(n > len){
+@@ -92,14 +93,14 @@
+ fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))),
+ 0644);
+ if(fd < 0){
+- printk("Open of machine pid file \"%s\" failed - "
++ printf("Open of machine pid file \"%s\" failed - "
+ "errno = %d\n", file, -fd);
+ return 0;
+ }
+
+ sprintf(pid, "%d\n", os_getpid());
+ if(write(fd, pid, strlen(pid)) != strlen(pid))
+- printk("Write of pid file failed - errno = %d\n", errno);
++ printf("Write of pid file failed - errno = %d\n", errno);
+ close(fd);
+ return 0;
+ }
+@@ -197,7 +198,7 @@
+ if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){
+ uml_dir = malloc(strlen(name) + 1);
+ if(uml_dir == NULL){
+- printk("Failed to malloc uml_dir - error = %d\n",
++ printf("Failed to malloc uml_dir - error = %d\n",
+ errno);
+ uml_dir = name;
+ return(0);
+@@ -217,7 +218,7 @@
+ char *home = getenv("HOME");
+
+ if(home == NULL){
+- printk("make_uml_dir : no value in environment for "
++ printf("make_uml_dir : no value in environment for "
+ "$HOME\n");
+ exit(1);
+ }
+@@ -239,25 +240,25 @@
+ strcpy(uml_dir, dir);
+
+ if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){
+- printk("Failed to mkdir %s - errno = %i\n", uml_dir, errno);
++ printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno);
+ return(-1);
+ }
+ return 0;
+ }
+
+-static int __init make_umid(void)
++static int __init make_umid(int (*printer)(const char *fmt, ...))
+ {
+ int fd, err;
+ char tmp[strlen(uml_dir) + UMID_LEN + 1];
+
+ strlcpy(tmp, uml_dir, sizeof(tmp));
+
+- if(*umid == 0){
++ if(!umid_inited){
+ strcat(tmp, "XXXXXX");
+ fd = mkstemp(tmp);
+ if(fd < 0){
+- printk("make_umid - mkstemp failed, errno = %d\n",
+- errno);
++ (*printer)("make_umid - mkstemp failed, errno = %d\n",
++ errno);
+ return(1);
+ }
+
+@@ -267,7 +268,7 @@
+ * for directories.
+ */
+ unlink(tmp);
+- set_umid(&tmp[strlen(uml_dir)], 1);
++ set_umid(&tmp[strlen(uml_dir)], 1, printer);
+ }
+
+ sprintf(tmp, "%s%s", uml_dir, umid);
+@@ -275,14 +276,14 @@
+ if((err = mkdir(tmp, 0777)) < 0){
+ if(errno == EEXIST){
+ if(not_dead_yet(tmp)){
+- printk("umid '%s' is in use\n", umid);
++ (*printer)("umid '%s' is in use\n", umid);
+ return(-1);
+ }
+ err = mkdir(tmp, 0777);
+ }
+ }
+ if(err < 0){
+- printk("Failed to create %s - errno = %d\n", umid, errno);
++ (*printer)("Failed to create %s - errno = %d\n", umid, errno);
+ return(-1);
+ }
+
+@@ -295,7 +296,13 @@
+ );
+
+ __uml_postsetup(make_uml_dir);
+-__uml_postsetup(make_umid);
++
++static int __init make_umid_setup(void)
++{
++ return(make_umid(printf));
++}
++
++__uml_postsetup(make_umid_setup);
+ __uml_postsetup(create_pid_file);
+
+ /*
+diff -Naur a/arch/um/kernel/user_util.c b/arch/um/kernel/user_util.c
+--- a/arch/um/kernel/user_util.c Fri Aug 15 15:04:48 2003
++++ b/arch/um/kernel/user_util.c Fri Aug 15 15:10:41 2003
+@@ -119,17 +119,6 @@
+ }
+ }
+
+-int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags)
+-{
+- int pid;
+-
+- pid = clone(fn, sp, flags, arg);
+- if(pid < 0) return(-1);
+- wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL);
+- ptrace(PTRACE_CONT, pid, 0, 0);
+- return(pid);
+-}
+-
+ int raw(int fd, int complain)
+ {
+ struct termios tt;
+diff -Naur a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
+--- a/arch/um/os-Linux/drivers/tuntap_user.c Fri Aug 15 15:09:23 2003
++++ b/arch/um/os-Linux/drivers/tuntap_user.c Fri Aug 15 15:14:02 2003
+@@ -142,7 +142,7 @@
+ return(-errno);
+ }
+ memset(&ifr, 0, sizeof(ifr));
+- ifr.ifr_flags = IFF_TAP;
++ ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+ strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name));
+ if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){
+ printk("TUNSETIFF failed, errno = %d", errno);
+diff -Naur a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
+--- a/arch/um/os-Linux/file.c Fri Aug 15 15:09:15 2003
++++ b/arch/um/os-Linux/file.c Fri Aug 15 15:13:54 2003
+@@ -315,7 +315,7 @@
+ return(new);
+ }
+
+-int create_unix_socket(char *file, int len)
++int create_unix_socket(char *file, int len, int close_on_exec)
+ {
+ struct sockaddr_un addr;
+ int sock, err;
+@@ -327,6 +327,10 @@
+ return(-errno);
+ }
+
++ if(close_on_exec && fcntl(sock, F_SETFD, 1) < 0)
++ printk("create_unix_socket : Setting FD_CLOEXEC failed, "
++ "errno = %d", errno);
++
+ addr.sun_family = AF_UNIX;
+
+ /* XXX Be more careful about overflow */
+@@ -342,6 +346,37 @@
+ return(sock);
+ }
+
++void os_flush_stdout(void)
++{
++ fflush(stdout);
++}
++
++int os_lock_file(int fd, int excl)
++{
++ int type = excl ? F_WRLCK : F_RDLCK;
++ struct flock lock = ((struct flock) { .l_type = type,
++ .l_whence = SEEK_SET,
++ .l_start = 0,
++ .l_len = 0 } );
++ int err, save;
++
++ err = fcntl(fd, F_SETLK, &lock);
++ if(!err)
++ goto out;
++
++ save = -errno;
++ err = fcntl(fd, F_GETLK, &lock);
++ if(err){
++ err = -errno;
++ goto out;
++ }
++
++ printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid);
++ err = save;
++ out:
++ return(err);
++}
++
+ /*
+ * Overrides for Emacs so that we follow Linus's tabbing style.
+ * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
+--- a/arch/um/sys-i386/Makefile Fri Aug 15 15:04:47 2003
++++ b/arch/um/sys-i386/Makefile Fri Aug 15 15:10:35 2003
+@@ -1,7 +1,8 @@
+-obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o module.o \
+- ptrace.o ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o
++obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o ptrace.o \
++ ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o
+
+ obj-$(CONFIG_HIGHMEM) += highmem.o
++obj-$(CONFIG_MODULES) += module.o
+
+ USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o
+ USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+@@ -9,6 +10,8 @@
+ SYMLINKS = semaphore.c highmem.c module.c
+ SYMLINKS := $(foreach f,$(SYMLINKS),$(src)/$f)
+
++clean-files := $(SYMLINKS)
++
+ semaphore.c-dir = kernel
+ highmem.c-dir = mm
+ module.c-dir = kernel
+@@ -24,8 +27,7 @@
+ $(SYMLINKS):
+ $(call make_link,$@)
+
+-clean:
+- $(MAKE) -C util clean
++subdir- := util
+
+ fastdep:
+
+diff -Naur a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
+--- a/arch/um/sys-i386/bugs.c Fri Aug 15 15:07:41 2003
++++ b/arch/um/sys-i386/bugs.c Fri Aug 15 15:13:14 2003
+@@ -8,6 +8,7 @@
+ #include <errno.h>
+ #include <string.h>
+ #include <sys/signal.h>
++#include <asm/ldt.h>
+ #include "kern_util.h"
+ #include "user.h"
+ #include "sysdep/ptrace.h"
+@@ -16,8 +17,8 @@
+ #define MAXTOKEN 64
+
+ /* Set during early boot */
+-int cpu_has_cmov = 1;
+-int cpu_has_xmm = 0;
++int host_has_cmov = 1;
++int host_has_xmm = 0;
+
+ static char token(int fd, char *buf, int len, char stop)
+ {
+@@ -104,6 +105,25 @@
+ return(1);
+ }
+
++static void disable_lcall(void)
++{
++ struct modify_ldt_ldt_s ldt;
++ int err;
++
++ bzero(&ldt, sizeof(ldt));
++ ldt.entry_number = 7;
++ ldt.base_addr = 0;
++ ldt.limit = 0;
++ err = modify_ldt(1, &ldt, sizeof(ldt));
++ if(err)
++ printk("Failed to disable lcall7 - errno = %d\n", errno);
++}
++
++void arch_init_thread(void)
++{
++ disable_lcall();
++}
++
+ void arch_check_bugs(void)
+ {
+ int have_it;
+@@ -113,8 +133,8 @@
+ "checks\n");
+ return;
+ }
+- if(check_cpu_feature("cmov", &have_it)) cpu_has_cmov = have_it;
+- if(check_cpu_feature("xmm", &have_it)) cpu_has_xmm = have_it;
++ if(check_cpu_feature("cmov", &have_it)) host_has_cmov = have_it;
++ if(check_cpu_feature("xmm", &have_it)) host_has_xmm = have_it;
+ }
+
+ int arch_handle_signal(int sig, union uml_pt_regs *regs)
+@@ -130,18 +150,18 @@
+ if((*((char *) ip) != 0x0f) || ((*((char *) (ip + 1)) & 0xf0) != 0x40))
+ return(0);
+
+- if(cpu_has_cmov == 0)
++ if(host_has_cmov == 0)
+ panic("SIGILL caused by cmov, which this processor doesn't "
+ "implement, boot a filesystem compiled for older "
+ "processors");
+- else if(cpu_has_cmov == 1)
++ else if(host_has_cmov == 1)
+ panic("SIGILL caused by cmov, which this processor claims to "
+ "implement");
+- else if(cpu_has_cmov == -1)
++ else if(host_has_cmov == -1)
+ panic("SIGILL caused by cmov, couldn't tell if this processor "
+ "implements it, boot a filesystem compiled for older "
+ "processors");
+- else panic("Bad value for cpu_has_cmov (%d)", cpu_has_cmov);
++ else panic("Bad value for host_has_cmov (%d)", host_has_cmov);
+ return(0);
+ }
+
+diff -Naur a/arch/um/uml.lds.S b/arch/um/uml.lds.S
+--- a/arch/um/uml.lds.S Fri Aug 15 15:05:37 2003
++++ b/arch/um/uml.lds.S Fri Aug 15 15:11:48 2003
+@@ -26,7 +26,11 @@
+ . = ALIGN(4096); /* Init code and data */
+ _stext = .;
+ __init_begin = .;
+- .text.init : { *(.text.init) }
++ .init.text : {
++ _sinittext = .;
++ *(.init.text)
++ _einittext = .;
++ }
+ . = ALIGN(4096);
+ .text :
+ {
+@@ -38,7 +42,7 @@
+
+ #include "asm/common.lds.S"
+
+- .data.init : { *(.data.init) }
++ init.data : { *(init.data) }
+ .data :
+ {
+ . = ALIGN(KERNEL_STACK_SIZE); /* init_task */
+diff -Naur a/arch/um/util/mk_constants_kern.c b/arch/um/util/mk_constants_kern.c
+--- a/arch/um/util/mk_constants_kern.c Fri Aug 15 15:04:15 2003
++++ b/arch/um/util/mk_constants_kern.c Fri Aug 15 15:10:27 2003
+@@ -1,5 +1,6 @@
+ #include "linux/kernel.h"
+ #include "linux/stringify.h"
++#include "linux/time.h"
+ #include "asm/page.h"
+
+ extern void print_head(void);
+@@ -11,6 +12,7 @@
+ {
+ print_head();
+ print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE);
++
+ print_constant_str("UM_KERN_EMERG", KERN_EMERG);
+ print_constant_str("UM_KERN_ALERT", KERN_ALERT);
+ print_constant_str("UM_KERN_CRIT", KERN_CRIT);
+@@ -19,6 +21,8 @@
+ print_constant_str("UM_KERN_NOTICE", KERN_NOTICE);
+ print_constant_str("UM_KERN_INFO", KERN_INFO);
+ print_constant_str("UM_KERN_DEBUG", KERN_DEBUG);
++
++ print_constant_int("UM_NSEC_PER_SEC", NSEC_PER_SEC);
+ print_tail();
+ return(0);
+ }
+diff -Naur a/fs/Makefile b/fs/Makefile
+--- a/fs/Makefile Fri Aug 15 15:06:45 2003
++++ b/fs/Makefile Fri Aug 15 15:12:41 2003
+@@ -91,3 +91,5 @@
+ obj-$(CONFIG_XFS_FS) += xfs/
+ obj-$(CONFIG_AFS_FS) += afs/
+ obj-$(CONFIG_BEFS_FS) += befs/
++obj-$(CONFIG_HOSTFS) += hostfs/
++obj-$(CONFIG_HPPFS) += hppfs/
+diff -Naur a/fs/hostfs/Makefile b/fs/hostfs/Makefile
+--- a/fs/hostfs/Makefile Wed Dec 31 19:00:00 1969
++++ b/fs/hostfs/Makefile Fri Aug 15 15:10:07 2003
+@@ -0,0 +1,36 @@
++#
++# Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
++# Licensed under the GPL
++#
++
++# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino
++# to __st_ino. It stayed in the same place, so as long as the correct name
++# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa.
++
++STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \
++ echo __)st_ino
++
++hostfs-objs := hostfs_kern.o hostfs_user.o
++
++obj-y =
++obj-$(CONFIG_HOSTFS) += hostfs.o
++
++SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs))
++
++USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(SINGLE_OBJS))
++USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
++
++USER_CFLAGS += -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD)
++
++$(USER_OBJS) : %.o: %.c
++ $(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
++
++clean:
++
++modules:
++
++fastdep:
++
++dep:
++
++archmrproper: clean
+diff -Naur a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
+--- a/fs/hostfs/hostfs.h Wed Dec 31 19:00:00 1969
++++ b/fs/hostfs/hostfs.h Fri Aug 15 15:10:06 2003
+@@ -0,0 +1,79 @@
++#ifndef __UM_FS_HOSTFS
++#define __UM_FS_HOSTFS
++
++#include "os.h"
++
++/* These are exactly the same definitions as in fs.h, but the names are
++ * changed so that this file can be included in both kernel and user files.
++ */
++
++#define HOSTFS_ATTR_MODE 1
++#define HOSTFS_ATTR_UID 2
++#define HOSTFS_ATTR_GID 4
++#define HOSTFS_ATTR_SIZE 8
++#define HOSTFS_ATTR_ATIME 16
++#define HOSTFS_ATTR_MTIME 32
++#define HOSTFS_ATTR_CTIME 64
++#define HOSTFS_ATTR_ATIME_SET 128
++#define HOSTFS_ATTR_MTIME_SET 256
++#define HOSTFS_ATTR_FORCE 512 /* Not a change, but a change it */
++#define HOSTFS_ATTR_ATTR_FLAG 1024
++
++struct hostfs_iattr {
++ unsigned int ia_valid;
++ mode_t ia_mode;
++ uid_t ia_uid;
++ gid_t ia_gid;
++ loff_t ia_size;
++ struct timespec ia_atime;
++ struct timespec ia_mtime;
++ struct timespec ia_ctime;
++ unsigned int ia_attr_flags;
++};
++
++extern int stat_file(const char *path, unsigned long long *inode_out,
++ int *mode_out, int *nlink_out, int *uid_out, int *gid_out,
++ unsigned long long *size_out, struct timespec *atime_out,
++ struct timespec *mtime_out, struct timespec *ctime_out,
++ int *blksize_out, unsigned long long *blocks_out);
++extern int access_file(char *path, int r, int w, int x);
++extern int open_file(char *path, int r, int w, int append);
++extern int file_type(const char *path, int *rdev);
++extern void *open_dir(char *path, int *err_out);
++extern char *read_dir(void *stream, unsigned long long *pos,
++ unsigned long long *ino_out, int *len_out);
++extern void close_file(void *stream);
++extern void close_dir(void *stream);
++extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
++extern int write_file(int fd, unsigned long long *offset, const char *buf,
++ int len);
++extern int lseek_file(int fd, long long offset, int whence);
++extern int file_create(char *name, int ur, int uw, int ux, int gr,
++ int gw, int gx, int or, int ow, int ox);
++extern int set_attr(const char *file, struct hostfs_iattr *attrs);
++extern int make_symlink(const char *from, const char *to);
++extern int unlink_file(const char *file);
++extern int do_mkdir(const char *file, int mode);
++extern int do_rmdir(const char *file);
++extern int do_mknod(const char *file, int mode, int dev);
++extern int link_file(const char *from, const char *to);
++extern int do_readlink(char *file, char *buf, int size);
++extern int rename_file(char *from, char *to);
++extern int do_statfs(char *root, long *bsize_out, long long *blocks_out,
++ long long *bfree_out, long long *bavail_out,
++ long long *files_out, long long *ffree_out,
++ void *fsid_out, int fsid_size, long *namelen_out,
++ long *spare_out);
++
++#endif
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
+--- a/fs/hostfs/hostfs_kern.c Wed Dec 31 19:00:00 1969
++++ b/fs/hostfs/hostfs_kern.c Fri Aug 15 15:10:12 2003
+@@ -0,0 +1,1010 @@
++/*
++ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ *
++ * Ported the filesystem routines to 2.5.
++ * 2003-02-10 Petr Baudis <pasky@ucw.cz>
++ */
++
++#include <linux/stddef.h>
++#include <linux/fs.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/pagemap.h>
++#include <linux/blkdev.h>
++#include <linux/list.h>
++#include <linux/buffer_head.h>
++#include <linux/root_dev.h>
++#include <linux/statfs.h>
++#include <asm/uaccess.h>
++#include "hostfs.h"
++#include "kern_util.h"
++#include "kern.h"
++#include "user_util.h"
++#include "2_5compat.h"
++#include "init.h"
++
++struct hostfs_inode_info {
++ char *host_filename;
++ int fd;
++ int mode;
++ struct inode vfs_inode;
++};
++
++static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
++{
++ return(list_entry(inode, struct hostfs_inode_info, vfs_inode));
++}
++
++#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_dentry->d_inode)
++
++int hostfs_d_delete(struct dentry *dentry)
++{
++ return(1);
++}
++
++struct dentry_operations hostfs_dentry_ops = {
++ .d_delete = hostfs_d_delete,
++};
++
++/* Changed in hostfs_args before the kernel starts running */
++static char *root_ino = "/";
++static int append = 0;
++
++#define HOSTFS_SUPER_MAGIC 0x00c0ffee
++
++static struct inode_operations hostfs_iops;
++static struct inode_operations hostfs_dir_iops;
++static struct address_space_operations hostfs_link_aops;
++
++static int __init hostfs_args(char *options, int *add)
++{
++ char *ptr;
++
++ ptr = strchr(options, ',');
++ if(ptr != NULL)
++ *ptr++ = '\0';
++ if(*options != '\0')
++ root_ino = options;
++
++ options = ptr;
++ while(options){
++ ptr = strchr(options, ',');
++ if(ptr != NULL)
++ *ptr++ = '\0';
++ if(*options != '\0'){
++ if(!strcmp(options, "append"))
++ append = 1;
++ else printf("hostfs_args - unsupported option - %s\n",
++ options);
++ }
++ options = ptr;
++ }
++ return(0);
++}
++
++__uml_setup("hostfs=", hostfs_args,
++"hostfs=<root dir>,<flags>,...\n"
++" This is used to set hostfs parameters. The root directory argument\n"
++" is used to confine all hostfs mounts to within the specified directory\n"
++" tree on the host. If this isn't specified, then a user inside UML can\n"
++" mount anything on the host that's accessible to the user that's running\n"
++" it.\n"
++" The only flag currently supported is 'append', which specifies that all\n"
++" files opened by hostfs will be opened in append mode.\n\n"
++);
++
++static char *dentry_name(struct dentry *dentry, int extra)
++{
++ struct dentry *parent;
++ char *root, *name;
++ int len;
++
++ len = 0;
++ parent = dentry;
++ while(parent->d_parent != parent){
++ len += parent->d_name.len + 1;
++ parent = parent->d_parent;
++ }
++
++ root = HOSTFS_I(parent->d_inode)->host_filename;
++ len += strlen(root);
++ name = kmalloc(len + extra + 1, GFP_KERNEL);
++ if(name == NULL) return(NULL);
++
++ name[len] = '\0';
++ parent = dentry;
++ while(parent->d_parent != parent){
++ len -= parent->d_name.len + 1;
++ name[len] = '/';
++ strncpy(&name[len + 1], parent->d_name.name,
++ parent->d_name.len);
++ parent = parent->d_parent;
++ }
++ strncpy(name, root, strlen(root));
++ return(name);
++}
++
++static char *inode_name(struct inode *ino, int extra)
++{
++ struct dentry *dentry;
++
++ dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
++ return(dentry_name(dentry, extra));
++}
++
++static int read_name(struct inode *ino, char *name)
++{
++ /* The non-int inode fields are copied into ints by stat_file and
++ * then copied into the inode because passing the actual pointers
++ * in and having them treated as int * breaks on big-endian machines
++ */
++ int err;
++ int i_mode, i_nlink, i_blksize;
++ unsigned long long i_size;
++ unsigned long long i_ino;
++ unsigned long long i_blocks;
++
++ err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid,
++ &ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime,
++ &ino->i_ctime, &i_blksize, &i_blocks);
++ if(err)
++ return(err);
++
++ ino->i_ino = i_ino;
++ ino->i_mode = i_mode;
++ ino->i_nlink = i_nlink;
++ ino->i_size = i_size;
++ ino->i_blksize = i_blksize;
++ ino->i_blocks = i_blocks;
++ if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid()))
++ ino->i_uid = 0;
++ return(0);
++}
++
++static char *follow_link(char *link)
++{
++ int len, n;
++ char *name, *resolved, *end;
++
++ len = 64;
++ while(1){
++ n = -ENOMEM;
++ name = kmalloc(len, GFP_KERNEL);
++ if(name == NULL)
++ goto out;
++
++ n = do_readlink(link, name, len);
++ if(n < len)
++ break;
++ len *= 2;
++ kfree(name);
++ }
++ if(n < 0)
++ goto out_free;
++
++ if(*name == '/')
++ return(name);
++
++ end = strrchr(link, '/');
++ if(end == NULL)
++ return(name);
++
++ *(end + 1) = '\0';
++ len = strlen(link) + strlen(name) + 1;
++
++ resolved = kmalloc(len, GFP_KERNEL);
++ if(resolved == NULL){
++ n = -ENOMEM;
++ goto out_free;
++ }
++
++ sprintf(resolved, "%s%s", link, name);
++ kfree(name);
++ kfree(link);
++ return(resolved);
++
++ out_free:
++ kfree(name);
++ out:
++ return(ERR_PTR(n));
++}
++
++static int read_inode(struct inode *ino)
++{
++ char *name;
++ int err = 0;
++
++ /* Unfortunately, we are called from iget() when we don't have a dentry
++ * allocated yet.
++ */
++ if(list_empty(&ino->i_dentry))
++ goto out;
++
++ err = -ENOMEM;
++ name = inode_name(ino, 0);
++ if(name == NULL)
++ goto out;
++
++ if(file_type(name, NULL) == OS_TYPE_SYMLINK){
++ name = follow_link(name);
++ if(IS_ERR(name)){
++ err = PTR_ERR(name);
++ goto out;
++ }
++ }
++
++ err = read_name(ino, name);
++ kfree(name);
++ out:
++ return(err);
++}
++
++int hostfs_statfs(struct super_block *sb, struct kstatfs *sf)
++{
++ /* do_statfs uses struct statfs64 internally, but the linux kernel
++ * struct statfs still has 32-bit versions for most of these fields,
++ * so we convert them here
++ */
++ int err;
++ long long f_blocks;
++ long long f_bfree;
++ long long f_bavail;
++ long long f_files;
++ long long f_ffree;
++
++ err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename,
++ &sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
++ &f_ffree, &sf->f_fsid, sizeof(sf->f_fsid),
++ &sf->f_namelen, sf->f_spare);
++ if(err) return(err);
++ sf->f_blocks = f_blocks;
++ sf->f_bfree = f_bfree;
++ sf->f_bavail = f_bavail;
++ sf->f_files = f_files;
++ sf->f_ffree = f_ffree;
++ sf->f_type = HOSTFS_SUPER_MAGIC;
++ return(0);
++}
++
++static struct inode *hostfs_alloc_inode(struct super_block *sb)
++{
++ struct hostfs_inode_info *hi;
++
++ hi = kmalloc(sizeof(*hi), GFP_KERNEL);
++ if(hi == NULL)
++ return(NULL);
++
++ *hi = ((struct hostfs_inode_info) { .host_filename = NULL,
++ .fd = -1,
++ .mode = 0 });
++ inode_init_once(&hi->vfs_inode);
++ return(&hi->vfs_inode);
++}
++
++static void hostfs_destroy_inode(struct inode *inode)
++{
++ if(HOSTFS_I(inode)->host_filename)
++ kfree(HOSTFS_I(inode)->host_filename);
++
++ if(HOSTFS_I(inode)->fd != -1)
++ close_file(&HOSTFS_I(inode)->fd);
++
++ kfree(HOSTFS_I(inode));
++}
++
++static void hostfs_read_inode(struct inode *inode)
++{
++ read_inode(inode);
++}
++
++static struct super_operations hostfs_sbops = {
++ .alloc_inode = hostfs_alloc_inode,
++ .destroy_inode = hostfs_destroy_inode,
++ .read_inode = hostfs_read_inode,
++ .statfs = hostfs_statfs,
++};
++
++int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
++{
++ void *dir;
++ char *name;
++ unsigned long long next, ino;
++ int error, len;
++
++ name = dentry_name(file->f_dentry, 0);
++ if(name == NULL) return(-ENOMEM);
++ dir = open_dir(name, &error);
++ kfree(name);
++ if(dir == NULL) return(-error);
++ next = file->f_pos;
++ while((name = read_dir(dir, &next, &ino, &len)) != NULL){
++ error = (*filldir)(ent, name, len, file->f_pos,
++ ino, DT_UNKNOWN);
++ if(error) break;
++ file->f_pos = next;
++ }
++ close_dir(dir);
++ return(0);
++}
++
++int hostfs_file_open(struct inode *ino, struct file *file)
++{
++ char *name;
++ int mode = 0, r = 0, w = 0, fd;
++
++ mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
++ if((mode & HOSTFS_I(ino)->mode) == mode)
++ return(0);
++
++ /* The file may already have been opened, but with the wrong access,
++ * so this resets things and reopens the file with the new access.
++ */
++ if(HOSTFS_I(ino)->fd != -1){
++ close_file(&HOSTFS_I(ino)->fd);
++ HOSTFS_I(ino)->fd = -1;
++ }
++
++ HOSTFS_I(ino)->mode |= mode;
++ if(HOSTFS_I(ino)->mode & FMODE_READ)
++ r = 1;
++ if(HOSTFS_I(ino)->mode & FMODE_WRITE)
++ w = 1;
++ if(w)
++ r = 1;
++
++ name = dentry_name(file->f_dentry, 0);
++ if(name == NULL)
++ return(-ENOMEM);
++
++ fd = open_file(name, r, w, append);
++ kfree(name);
++ if(fd < 0) return(fd);
++ FILE_HOSTFS_I(file)->fd = fd;
++
++ return(0);
++}
++
++int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
++{
++ return(0);
++}
++
++static struct file_operations hostfs_file_fops = {
++ .llseek = generic_file_llseek,
++ .read = generic_file_read,
++ .write = generic_file_write,
++ .mmap = generic_file_mmap,
++ .open = hostfs_file_open,
++ .release = NULL,
++ .fsync = hostfs_fsync,
++};
++
++static struct file_operations hostfs_dir_fops = {
++ .readdir = hostfs_readdir,
++ .read = generic_read_dir,
++};
++
++int hostfs_writepage(struct page *page, struct writeback_control *wbc)
++{
++ struct address_space *mapping = page->mapping;
++ struct inode *inode = mapping->host;
++ char *buffer;
++ unsigned long long base;
++ int count = PAGE_CACHE_SIZE;
++ int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
++ int err;
++
++ if (page->index >= end_index)
++ count = inode->i_size & (PAGE_CACHE_SIZE-1);
++
++ buffer = kmap(page);
++ base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
++
++ err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
++ if(err != count){
++ ClearPageUptodate(page);
++ goto out;
++ }
++
++ if (base > inode->i_size)
++ inode->i_size = base;
++
++ if (PageError(page))
++ ClearPageError(page);
++ err = 0;
++
++ out:
++ kunmap(page);
++
++ unlock_page(page);
++ return err;
++}
++
++int hostfs_readpage(struct file *file, struct page *page)
++{
++ char *buffer;
++ long long start;
++ int err = 0;
++
++ start = (long long) page->index << PAGE_CACHE_SHIFT;
++ buffer = kmap(page);
++ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
++ PAGE_CACHE_SIZE);
++ if(err < 0) goto out;
++
++ memset(&buffer[err], 0, PAGE_CACHE_SIZE - err);
++
++ flush_dcache_page(page);
++ SetPageUptodate(page);
++ if (PageError(page)) ClearPageError(page);
++ err = 0;
++ out:
++ kunmap(page);
++ unlock_page(page);
++ return(err);
++}
++
++int hostfs_prepare_write(struct file *file, struct page *page,
++ unsigned int from, unsigned int to)
++{
++ char *buffer;
++ long long start, tmp;
++ int err;
++
++ start = (long long) page->index << PAGE_CACHE_SHIFT;
++ buffer = kmap(page);
++ if(from != 0){
++ tmp = start;
++ err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer,
++ from);
++ if(err < 0) goto out;
++ }
++ if(to != PAGE_CACHE_SIZE){
++ start += to;
++ err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to,
++ PAGE_CACHE_SIZE - to);
++ if(err < 0) goto out;
++ }
++ err = 0;
++ out:
++ kunmap(page);
++ return(err);
++}
++
++int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
++ unsigned to)
++{
++ struct address_space *mapping = page->mapping;
++ struct inode *inode = mapping->host;
++ char *buffer;
++ long long start;
++ int err = 0;
++
++ start = (long long) (page->index << PAGE_CACHE_SHIFT) + from;
++ buffer = kmap(page);
++ err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from,
++ to - from);
++ if(err > 0) err = 0;
++ if(!err && (start > inode->i_size))
++ inode->i_size = start;
++
++ kunmap(page);
++ return(err);
++}
++
++static struct address_space_operations hostfs_aops = {
++ .writepage = hostfs_writepage,
++ .readpage = hostfs_readpage,
++/* .set_page_dirty = __set_page_dirty_nobuffers, */
++ .prepare_write = hostfs_prepare_write,
++ .commit_write = hostfs_commit_write
++};
++
++static int init_inode(struct inode *inode, struct dentry *dentry)
++{
++ char *name;
++ int type, err = -ENOMEM, rdev;
++
++ if(dentry){
++ name = dentry_name(dentry, 0);
++ if(name == NULL)
++ goto out;
++ type = file_type(name, &rdev);
++ kfree(name);
++ }
++ else type = OS_TYPE_DIR;
++
++ err = 0;
++ if(type == OS_TYPE_SYMLINK)
++ inode->i_op = &page_symlink_inode_operations;
++ else if(type == OS_TYPE_DIR)
++ inode->i_op = &hostfs_dir_iops;
++ else inode->i_op = &hostfs_iops;
++
++ if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
++ else inode->i_fop = &hostfs_file_fops;
++
++ if(type == OS_TYPE_SYMLINK)
++ inode->i_mapping->a_ops = &hostfs_link_aops;
++ else inode->i_mapping->a_ops = &hostfs_aops;
++
++ switch (type) {
++ case OS_TYPE_CHARDEV:
++ init_special_inode(inode, S_IFCHR, rdev);
++ break;
++ case OS_TYPE_BLOCKDEV:
++ init_special_inode(inode, S_IFBLK, rdev);
++ break;
++ case OS_TYPE_FIFO:
++ init_special_inode(inode, S_IFIFO, 0);
++ break;
++ case OS_TYPE_SOCK:
++ init_special_inode(inode, S_IFSOCK, 0);
++ break;
++ }
++ out:
++ return(err);
++}
++
++int hostfs_create(struct inode *dir, struct dentry *dentry, int mode,
++ struct nameidata *nd)
++{
++ struct inode *inode;
++ char *name;
++ int error, fd;
++
++ error = -ENOMEM;
++ inode = iget(dir->i_sb, 0);
++ if(inode == NULL) goto out;
++
++ error = init_inode(inode, dentry);
++ if(error)
++ goto out_put;
++
++ error = -ENOMEM;
++ name = dentry_name(dentry, 0);
++ if(name == NULL)
++ goto out_put;
++
++ fd = file_create(name,
++ mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR,
++ mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP,
++ mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
++ if(fd < 0)
++ error = fd;
++ else error = read_name(inode, name);
++
++ kfree(name);
++ if(error)
++ goto out_put;
++
++ HOSTFS_I(inode)->fd = fd;
++ HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE;
++ d_instantiate(dentry, inode);
++ return(0);
++
++ out_free:
++ kfree(name);
++ out_put:
++ iput(inode);
++ out:
++ return(error);
++}
++
++struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct inode *inode;
++ char *name;
++ int err;
++
++ err = -ENOMEM;
++ inode = iget(ino->i_sb, 0);
++ if(inode == NULL)
++ goto out;
++
++ err = init_inode(inode, dentry);
++ if(err)
++ goto out_put;
++
++ err = -ENOMEM;
++ name = dentry_name(dentry, 0);
++ if(name == NULL)
++ goto out_put;
++
++ err = read_name(inode, name);
++ kfree(name);
++ if(err == -ENOENT){
++ iput(inode);
++ inode = NULL;
++ }
++ else if(err)
++ goto out_put;
++
++ d_add(dentry, inode);
++ dentry->d_op = &hostfs_dentry_ops;
++ return(NULL);
++
++ out_put:
++ iput(inode);
++ out:
++ return(ERR_PTR(err));
++}
++
++static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
++{
++ char *file;
++ int len;
++
++ file = inode_name(ino, dentry->d_name.len + 1);
++ if(file == NULL) return(NULL);
++ strcat(file, "/");
++ len = strlen(file);
++ strncat(file, dentry->d_name.name, dentry->d_name.len);
++ file[len + dentry->d_name.len] = '\0';
++ return(file);
++}
++
++int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
++{
++ char *from_name, *to_name;
++ int err;
++
++ if((from_name = inode_dentry_name(ino, from)) == NULL)
++ return(-ENOMEM);
++ to_name = dentry_name(to, 0);
++ if(to_name == NULL){
++ kfree(from_name);
++ return(-ENOMEM);
++ }
++ err = link_file(to_name, from_name);
++ kfree(from_name);
++ kfree(to_name);
++ return(err);
++}
++
++int hostfs_unlink(struct inode *ino, struct dentry *dentry)
++{
++ char *file;
++ int err;
++
++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++ if(append)
++ return(-EPERM);
++
++ err = unlink_file(file);
++ kfree(file);
++ return(err);
++}
++
++int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
++{
++ char *file;
++ int err;
++
++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++ err = make_symlink(file, to);
++ kfree(file);
++ return(err);
++}
++
++int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
++{
++ char *file;
++ int err;
++
++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++ err = do_mkdir(file, mode);
++ kfree(file);
++ return(err);
++}
++
++int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
++{
++ char *file;
++ int err;
++
++ if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++ err = do_rmdir(file);
++ kfree(file);
++ return(err);
++}
++
++int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
++{
++ struct inode *inode;
++ char *name;
++ int err = -ENOMEM;
++
++ inode = iget(dir->i_sb, 0);
++ if(inode == NULL)
++ goto out;
++
++ err = init_inode(inode, dentry);
++ if(err)
++ goto out_put;
++
++ err = -ENOMEM;
++ name = dentry_name(dentry, 0);
++ if(name == NULL)
++ goto out_put;
++
++ init_special_inode(inode, mode, dev);
++ err = do_mknod(name, mode, dev);
++ if(err)
++ goto out_free;
++
++ err = read_name(inode, name);
++ kfree(name);
++ if(err)
++ goto out_put;
++
++ d_instantiate(dentry, inode);
++ return(0);
++
++ out_free:
++ kfree(name);
++ out_put:
++ iput(inode);
++ out:
++ return(err);
++}
++
++int hostfs_rename(struct inode *from_ino, struct dentry *from,
++ struct inode *to_ino, struct dentry *to)
++{
++ char *from_name, *to_name;
++ int err;
++
++ if((from_name = inode_dentry_name(from_ino, from)) == NULL)
++ return(-ENOMEM);
++ if((to_name = inode_dentry_name(to_ino, to)) == NULL){
++ kfree(from_name);
++ return(-ENOMEM);
++ }
++ err = rename_file(from_name, to_name);
++ kfree(from_name);
++ kfree(to_name);
++ return(err);
++}
++
++void hostfs_truncate(struct inode *ino)
++{
++ not_implemented();
++}
++
++int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
++{
++ char *name;
++ int r = 0, w = 0, x = 0, err;
++
++ if(desired & MAY_READ) r = 1;
++ if(desired & MAY_WRITE) w = 1;
++ if(desired & MAY_EXEC) x = 1;
++ name = inode_name(ino, 0);
++ if(name == NULL) return(-ENOMEM);
++ err = access_file(name, r, w, x);
++ kfree(name);
++ if(!err) err = vfs_permission(ino, desired);
++ return(err);
++}
++
++int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
++{
++ struct hostfs_iattr attrs;
++ char *name;
++ int err;
++
++ if(append)
++ attr->ia_valid &= ~ATTR_SIZE;
++
++ attrs.ia_valid = 0;
++ if(attr->ia_valid & ATTR_MODE){
++ attrs.ia_valid |= HOSTFS_ATTR_MODE;
++ attrs.ia_mode = attr->ia_mode;
++ }
++ if(attr->ia_valid & ATTR_UID){
++ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
++ (attr->ia_uid == 0))
++ attr->ia_uid = getuid();
++ attrs.ia_valid |= HOSTFS_ATTR_UID;
++ attrs.ia_uid = attr->ia_uid;
++ }
++ if(attr->ia_valid & ATTR_GID){
++ if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) &&
++ (attr->ia_gid == 0))
++ attr->ia_gid = getuid();
++ attrs.ia_valid |= HOSTFS_ATTR_GID;
++ attrs.ia_gid = attr->ia_gid;
++ }
++ if(attr->ia_valid & ATTR_SIZE){
++ attrs.ia_valid |= HOSTFS_ATTR_SIZE;
++ attrs.ia_size = attr->ia_size;
++ }
++ if(attr->ia_valid & ATTR_ATIME){
++ attrs.ia_valid |= HOSTFS_ATTR_ATIME;
++ attrs.ia_atime = attr->ia_atime;
++ }
++ if(attr->ia_valid & ATTR_MTIME){
++ attrs.ia_valid |= HOSTFS_ATTR_MTIME;
++ attrs.ia_mtime = attr->ia_mtime;
++ }
++ if(attr->ia_valid & ATTR_CTIME){
++ attrs.ia_valid |= HOSTFS_ATTR_CTIME;
++ attrs.ia_ctime = attr->ia_ctime;
++ }
++ if(attr->ia_valid & ATTR_ATIME_SET){
++ attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET;
++ }
++ if(attr->ia_valid & ATTR_MTIME_SET){
++ attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
++ }
++ name = dentry_name(dentry, 0);
++ if(name == NULL) return(-ENOMEM);
++ err = set_attr(name, &attrs);
++ kfree(name);
++ if(err)
++ return(err);
++
++ return(inode_setattr(dentry->d_inode, attr));
++}
++
++int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ generic_fillattr(dentry->d_inode, stat);
++ return(0);
++}
++
++static struct inode_operations hostfs_iops = {
++ .create = hostfs_create,
++ .link = hostfs_link,
++ .unlink = hostfs_unlink,
++ .symlink = hostfs_symlink,
++ .mkdir = hostfs_mkdir,
++ .rmdir = hostfs_rmdir,
++ .mknod = hostfs_mknod,
++ .rename = hostfs_rename,
++ .truncate = hostfs_truncate,
++ .permission = hostfs_permission,
++ .setattr = hostfs_setattr,
++ .getattr = hostfs_getattr,
++};
++
++static struct inode_operations hostfs_dir_iops = {
++ .create = hostfs_create,
++ .lookup = hostfs_lookup,
++ .link = hostfs_link,
++ .unlink = hostfs_unlink,
++ .symlink = hostfs_symlink,
++ .mkdir = hostfs_mkdir,
++ .rmdir = hostfs_rmdir,
++ .mknod = hostfs_mknod,
++ .rename = hostfs_rename,
++ .truncate = hostfs_truncate,
++ .permission = hostfs_permission,
++ .setattr = hostfs_setattr,
++ .getattr = hostfs_getattr,
++};
++
++int hostfs_link_readpage(struct file *file, struct page *page)
++{
++ char *buffer, *name;
++ long long start;
++ int err;
++
++ start = page->index << PAGE_CACHE_SHIFT;
++ buffer = kmap(page);
++ name = inode_name(page->mapping->host, 0);
++ if(name == NULL) return(-ENOMEM);
++ err = do_readlink(name, buffer, PAGE_CACHE_SIZE);
++ kfree(name);
++ if(err == PAGE_CACHE_SIZE)
++ err = -E2BIG;
++ else if(err > 0){
++ flush_dcache_page(page);
++ SetPageUptodate(page);
++ if (PageError(page)) ClearPageError(page);
++ err = 0;
++ }
++ kunmap(page);
++ unlock_page(page);
++ return(err);
++}
++
++static struct address_space_operations hostfs_link_aops = {
++ .readpage = hostfs_link_readpage,
++};
++
++static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
++{
++ struct inode *root_inode;
++ char *name, *data = d;
++ int err;
++
++ sb->s_blocksize = 1024;
++ sb->s_blocksize_bits = 10;
++ sb->s_magic = HOSTFS_SUPER_MAGIC;
++ sb->s_op = &hostfs_sbops;
++
++ if((data == NULL) || (*data == '\0'))
++ data = root_ino;
++
++ err = -ENOMEM;
++ name = kmalloc(strlen(data) + 1, GFP_KERNEL);
++ if(name == NULL)
++ goto out;
++
++ strcpy(name, data);
++
++ root_inode = iget(sb, 0);
++ if(root_inode == NULL)
++ goto out_free;
++
++ err = init_inode(root_inode, NULL);
++ if(err)
++ goto out_put;
++
++ HOSTFS_I(root_inode)->host_filename = name;
++
++ err = -ENOMEM;
++ sb->s_root = d_alloc_root(root_inode);
++ if(sb->s_root == NULL)
++ goto out_put;
++
++ err = read_inode(root_inode);
++ if(err)
++ goto out_put;
++
++ return(0);
++
++ out_put:
++ iput(root_inode);
++ out_free:
++ kfree(name);
++ out:
++ return(err);
++}
++
++static struct super_block *hostfs_read_sb(struct file_system_type *type,
++ int flags, const char *dev_name,
++ void *data)
++{
++ return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common));
++}
++
++static struct file_system_type hostfs_type = {
++ .owner = THIS_MODULE,
++ .name = "hostfs",
++ .get_sb = hostfs_read_sb,
++ .kill_sb = kill_anon_super,
++ .fs_flags = 0,
++};
++
++static int __init init_hostfs(void)
++{
++ return(register_filesystem(&hostfs_type));
++}
++
++static void __exit exit_hostfs(void)
++{
++ unregister_filesystem(&hostfs_type);
++}
++
++module_init(init_hostfs)
++module_exit(exit_hostfs)
++MODULE_LICENSE("GPL");
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
+--- a/fs/hostfs/hostfs_user.c Wed Dec 31 19:00:00 1969
++++ b/fs/hostfs/hostfs_user.c Fri Aug 15 15:10:43 2003
+@@ -0,0 +1,361 @@
++/*
++ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#include <unistd.h>
++#include <stdio.h>
++#include <fcntl.h>
++#include <dirent.h>
++#include <errno.h>
++#include <utime.h>
++#include <string.h>
++#include <sys/stat.h>
++#include <sys/time.h>
++#include <sys/vfs.h>
++#include "hostfs.h"
++#include "kern_util.h"
++#include "user.h"
++
++int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
++ int *nlink_out, int *uid_out, int *gid_out,
++ unsigned long long *size_out, struct timespec *atime_out,
++ struct timespec *mtime_out, struct timespec *ctime_out,
++ int *blksize_out, unsigned long long *blocks_out)
++{
++ struct stat64 buf;
++
++ if(lstat64(path, &buf) < 0)
++ return(-errno);
++
++ /* See the Makefile for why STAT64_INO_FIELD is passed in
++ * by the build
++ */
++ if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD;
++ if(mode_out != NULL) *mode_out = buf.st_mode;
++ if(nlink_out != NULL) *nlink_out = buf.st_nlink;
++ if(uid_out != NULL) *uid_out = buf.st_uid;
++ if(gid_out != NULL) *gid_out = buf.st_gid;
++ if(size_out != NULL) *size_out = buf.st_size;
++ if(atime_out != NULL) {
++ atime_out->tv_sec = buf.st_atime;
++ atime_out->tv_nsec = 0;
++ }
++ if(mtime_out != NULL) {
++ mtime_out->tv_sec = buf.st_mtime;
++ mtime_out->tv_nsec = 0;
++ }
++ if(ctime_out != NULL) {
++ ctime_out->tv_sec = buf.st_ctime;
++ ctime_out->tv_nsec = 0;
++ }
++ if(blksize_out != NULL) *blksize_out = buf.st_blksize;
++ if(blocks_out != NULL) *blocks_out = buf.st_blocks;
++ return(0);
++}
++
++int file_type(const char *path, int *rdev)
++{
++ struct stat64 buf;
++
++ if(lstat64(path, &buf) < 0)
++ return(-errno);
++ if(rdev != NULL)
++ *rdev = buf.st_rdev;
++
++ if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR);
++ else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK);
++ else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV);
++ else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV);
++ else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO);
++ else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK);
++ else return(OS_TYPE_FILE);
++}
++
++int access_file(char *path, int r, int w, int x)
++{
++ int mode = 0;
++
++ if(r) mode = R_OK;
++ if(w) mode |= W_OK;
++ if(x) mode |= X_OK;
++ if(access(path, mode) != 0) return(-errno);
++ else return(0);
++}
++
++int open_file(char *path, int r, int w, int append)
++{
++ int mode = 0, fd;
++
++ if(r && !w)
++ mode = O_RDONLY;
++ else if(!r && w)
++ mode = O_WRONLY;
++ else if(r && w)
++ mode = O_RDWR;
++ else panic("Impossible mode in open_file");
++
++ if(append)
++ mode |= O_APPEND;
++ fd = open64(path, mode);
++ if(fd < 0) return(-errno);
++ else return(fd);
++}
++
++void *open_dir(char *path, int *err_out)
++{
++ DIR *dir;
++
++ dir = opendir(path);
++ *err_out = errno;
++ if(dir == NULL) return(NULL);
++ return(dir);
++}
++
++char *read_dir(void *stream, unsigned long long *pos,
++ unsigned long long *ino_out, int *len_out)
++{
++ DIR *dir = stream;
++ struct dirent *ent;
++
++ seekdir(dir, *pos);
++ ent = readdir(dir);
++ if(ent == NULL) return(NULL);
++ *len_out = strlen(ent->d_name);
++ *ino_out = ent->d_ino;
++ *pos = telldir(dir);
++ return(ent->d_name);
++}
++
++int read_file(int fd, unsigned long long *offset, char *buf, int len)
++{
++ int n;
++
++ n = pread64(fd, buf, len, *offset);
++ if(n < 0) return(-errno);
++ *offset += n;
++ return(n);
++}
++
++int write_file(int fd, unsigned long long *offset, const char *buf, int len)
++{
++ int n;
++
++ n = pwrite64(fd, buf, len, *offset);
++ if(n < 0) return(-errno);
++ *offset += n;
++ return(n);
++}
++
++int lseek_file(int fd, long long offset, int whence)
++{
++ int ret;
++
++ ret = lseek64(fd, offset, whence);
++ if(ret < 0) return(-errno);
++ return(0);
++}
++
++void close_file(void *stream)
++{
++ close(*((int *) stream));
++}
++
++void close_dir(void *stream)
++{
++ closedir(stream);
++}
++
++int file_create(char *name, int ur, int uw, int ux, int gr,
++ int gw, int gx, int or, int ow, int ox)
++{
++ int mode, fd;
++
++ mode = 0;
++ mode |= ur ? S_IRUSR : 0;
++ mode |= uw ? S_IWUSR : 0;
++ mode |= ux ? S_IXUSR : 0;
++ mode |= gr ? S_IRGRP : 0;
++ mode |= gw ? S_IWGRP : 0;
++ mode |= gx ? S_IXGRP : 0;
++ mode |= or ? S_IROTH : 0;
++ mode |= ow ? S_IWOTH : 0;
++ mode |= ox ? S_IXOTH : 0;
++ fd = open64(name, O_CREAT | O_RDWR, mode);
++ if(fd < 0)
++ return(-errno);
++ return(fd);
++}
++
++int set_attr(const char *file, struct hostfs_iattr *attrs)
++{
++ struct utimbuf buf;
++ int err, ma;
++
++ if(attrs->ia_valid & HOSTFS_ATTR_MODE){
++ if(chmod(file, attrs->ia_mode) != 0) return(-errno);
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_UID){
++ if(chown(file, attrs->ia_uid, -1)) return(-errno);
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_GID){
++ if(chown(file, -1, attrs->ia_gid)) return(-errno);
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_SIZE){
++ if(truncate(file, attrs->ia_size)) return(-errno);
++ }
++ ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET;
++ if((attrs->ia_valid & ma) == ma){
++ buf.actime = attrs->ia_atime.tv_sec;
++ buf.modtime = attrs->ia_mtime.tv_sec;
++ if(utime(file, &buf) != 0) return(-errno);
++ }
++ else {
++ struct timespec ts;
++
++ if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){
++ err = stat_file(file, NULL, NULL, NULL, NULL, NULL,
++ NULL, NULL, &ts, NULL, NULL, NULL);
++ if(err != 0)
++ return(err);
++ buf.actime = attrs->ia_atime.tv_sec;
++ buf.modtime = ts.tv_sec;
++ if(utime(file, &buf) != 0)
++ return(-errno);
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){
++ err = stat_file(file, NULL, NULL, NULL, NULL, NULL,
++ NULL, &ts, NULL, NULL, NULL, NULL);
++ if(err != 0)
++ return(err);
++ buf.actime = ts.tv_sec;
++ buf.modtime = attrs->ia_mtime.tv_sec;
++ if(utime(file, &buf) != 0)
++ return(-errno);
++ }
++ }
++ if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ;
++ if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){
++ err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL,
++ &attrs->ia_atime, &attrs->ia_mtime, NULL,
++ NULL, NULL);
++ if(err != 0) return(err);
++ }
++ return(0);
++}
++
++int make_symlink(const char *from, const char *to)
++{
++ int err;
++
++ err = symlink(to, from);
++ if(err) return(-errno);
++ return(0);
++}
++
++int unlink_file(const char *file)
++{
++ int err;
++
++ err = unlink(file);
++ if(err) return(-errno);
++ return(0);
++}
++
++int do_mkdir(const char *file, int mode)
++{
++ int err;
++
++ err = mkdir(file, mode);
++ if(err) return(-errno);
++ return(0);
++}
++
++int do_rmdir(const char *file)
++{
++ int err;
++
++ err = rmdir(file);
++ if(err) return(-errno);
++ return(0);
++}
++
++int do_mknod(const char *file, int mode, int dev)
++{
++ int err;
++
++ err = mknod(file, mode, dev);
++ if(err) return(-errno);
++ return(0);
++}
++
++int link_file(const char *to, const char *from)
++{
++ int err;
++
++ err = link(to, from);
++ if(err) return(-errno);
++ return(0);
++}
++
++int do_readlink(char *file, char *buf, int size)
++{
++ int n;
++
++ n = readlink(file, buf, size);
++ if(n < 0)
++ return(-errno);
++ if(n < size)
++ buf[n] = '\0';
++ return(n);
++}
++
++int rename_file(char *from, char *to)
++{
++ int err;
++
++ err = rename(from, to);
++ if(err < 0) return(-errno);
++ return(0);
++}
++
++int do_statfs(char *root, long *bsize_out, long long *blocks_out,
++ long long *bfree_out, long long *bavail_out,
++ long long *files_out, long long *ffree_out,
++ void *fsid_out, int fsid_size, long *namelen_out,
++ long *spare_out)
++{
++ struct statfs64 buf;
++ int err;
++
++ err = statfs64(root, &buf);
++ if(err < 0) return(-errno);
++ *bsize_out = buf.f_bsize;
++ *blocks_out = buf.f_blocks;
++ *bfree_out = buf.f_bfree;
++ *bavail_out = buf.f_bavail;
++ *files_out = buf.f_files;
++ *ffree_out = buf.f_ffree;
++ memcpy(fsid_out, &buf.f_fsid,
++ sizeof(buf.f_fsid) > fsid_size ? fsid_size :
++ sizeof(buf.f_fsid));
++ *namelen_out = buf.f_namelen;
++ spare_out[0] = buf.f_spare[0];
++ spare_out[1] = buf.f_spare[1];
++ spare_out[2] = buf.f_spare[2];
++ spare_out[3] = buf.f_spare[3];
++ spare_out[4] = buf.f_spare[4];
++ spare_out[5] = buf.f_spare[5];
++ return(0);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hppfs/Makefile b/fs/hppfs/Makefile
+--- a/fs/hppfs/Makefile Wed Dec 31 19:00:00 1969
++++ b/fs/hppfs/Makefile Fri Aug 15 15:12:31 2003
+@@ -0,0 +1,19 @@
++#
++# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com)
++# Licensed under the GPL
++#
++
++hppfs-objs := hppfs_kern.o
++
++obj-y =
++obj-$(CONFIG_HPPFS) += hppfs.o
++
++clean:
++
++modules:
++
++fastdep:
++
++dep:
++
++archmrproper: clean
+diff -Naur a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
+--- a/fs/hppfs/hppfs_kern.c Wed Dec 31 19:00:00 1969
++++ b/fs/hppfs/hppfs_kern.c Fri Aug 15 15:11:52 2003
+@@ -0,0 +1,811 @@
++/*
++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/dcache.h>
++#include <linux/statfs.h>
++#include <asm/uaccess.h>
++#include <asm/fcntl.h>
++#include "os.h"
++
++static int init_inode(struct inode *inode, struct dentry *dentry);
++
++struct hppfs_data {
++ struct list_head list;
++ char contents[PAGE_SIZE - sizeof(struct list_head)];
++};
++
++struct hppfs_private {
++ struct file proc_file;
++ int host_fd;
++ loff_t len;
++ struct hppfs_data *contents;
++};
++
++struct hppfs_inode_info {
++ struct dentry *proc_dentry;
++ struct inode vfs_inode;
++};
++
++static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
++{
++ return(list_entry(inode, struct hppfs_inode_info, vfs_inode));
++}
++
++#define HPPFS_SUPER_MAGIC 0xb00000ee
++
++static struct super_operations hppfs_sbops;
++
++static int is_pid(struct dentry *dentry)
++{
++ struct super_block *sb;
++ int i;
++
++ sb = dentry->d_sb;
++ if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root))
++ return(0);
++
++ for(i = 0; i < dentry->d_name.len; i++){
++ if(!isdigit(dentry->d_name.name[i]))
++ return(0);
++ }
++ return(1);
++}
++
++static char *dentry_name(struct dentry *dentry, int extra)
++{
++ struct dentry *parent;
++ char *root, *name;
++ const char *seg_name;
++ int len, seg_len;
++
++ len = 0;
++ parent = dentry;
++ while(parent->d_parent != parent){
++ if(is_pid(parent))
++ len += strlen("pid") + 1;
++ else len += parent->d_name.len + 1;
++ parent = parent->d_parent;
++ }
++
++ root = "proc";
++ len += strlen(root);
++ name = kmalloc(len + extra + 1, GFP_KERNEL);
++ if(name == NULL) return(NULL);
++
++ name[len] = '\0';
++ parent = dentry;
++ while(parent->d_parent != parent){
++ if(is_pid(parent)){
++ seg_name = "pid";
++ seg_len = strlen("pid");
++ }
++ else {
++ seg_name = parent->d_name.name;
++ seg_len = parent->d_name.len;
++ }
++
++ len -= seg_len + 1;
++ name[len] = '/';
++ strncpy(&name[len + 1], seg_name, seg_len);
++ parent = parent->d_parent;
++ }
++ strncpy(name, root, strlen(root));
++ return(name);
++}
++
++struct dentry_operations hppfs_dentry_ops = {
++};
++
++static int file_removed(struct dentry *dentry, const char *file)
++{
++ char *host_file;
++ int extra, fd;
++
++ extra = 0;
++ if(file != NULL) extra += strlen(file) + 1;
++
++ host_file = dentry_name(dentry, extra + strlen("/remove"));
++ if(host_file == NULL){
++ printk("file_removed : allocation failed\n");
++ return(-ENOMEM);
++ }
++
++ if(file != NULL){
++ strcat(host_file, "/");
++ strcat(host_file, file);
++ }
++ strcat(host_file, "/remove");
++
++ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
++ kfree(host_file);
++ if(fd > 0){
++ os_close_file(fd);
++ return(1);
++ }
++ return(0);
++}
++
++static void hppfs_read_inode(struct inode *ino)
++{
++ struct inode *proc_ino;
++
++ if(HPPFS_I(ino)->proc_dentry == NULL)
++ return;
++
++ proc_ino = HPPFS_I(ino)->proc_dentry->d_inode;
++ ino->i_uid = proc_ino->i_uid;
++ ino->i_gid = proc_ino->i_gid;
++ ino->i_atime = proc_ino->i_atime;
++ ino->i_mtime = proc_ino->i_mtime;
++ ino->i_ctime = proc_ino->i_ctime;
++ ino->i_ino = proc_ino->i_ino;
++ ino->i_mode = proc_ino->i_mode;
++ ino->i_nlink = proc_ino->i_nlink;
++ ino->i_size = proc_ino->i_size;
++ ino->i_blksize = proc_ino->i_blksize;
++ ino->i_blocks = proc_ino->i_blocks;
++}
++
++static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry,
++ struct nameidata *nd)
++{
++ struct dentry *proc_dentry, *new, *parent;
++ struct inode *inode;
++ int err, deleted;
++
++ deleted = file_removed(dentry, NULL);
++ if(deleted < 0)
++ return(ERR_PTR(deleted));
++ else if(deleted)
++ return(ERR_PTR(-ENOENT));
++
++ err = -ENOMEM;
++ parent = HPPFS_I(ino)->proc_dentry;
++ down(&parent->d_inode->i_sem);
++ proc_dentry = d_lookup(parent, &dentry->d_name);
++ if(proc_dentry == NULL){
++ proc_dentry = d_alloc(parent, &dentry->d_name);
++ if(proc_dentry == NULL){
++ up(&parent->d_inode->i_sem);
++ goto out;
++ }
++ new = (*parent->d_inode->i_op->lookup)(parent->d_inode,
++ proc_dentry, NULL);
++ if(new){
++ dput(proc_dentry);
++ proc_dentry = new;
++ }
++ }
++ up(&parent->d_inode->i_sem);
++
++ if(IS_ERR(proc_dentry))
++ return(proc_dentry);
++
++ inode = iget(ino->i_sb, 0);
++ if(inode == NULL)
++ goto out_dput;
++
++ err = init_inode(inode, proc_dentry);
++ if(err)
++ goto out_put;
++
++ hppfs_read_inode(inode);
++
++ d_add(dentry, inode);
++ dentry->d_op = &hppfs_dentry_ops;
++ return(NULL);
++
++ out_put:
++ iput(inode);
++ out_dput:
++ dput(proc_dentry);
++ out:
++ return(ERR_PTR(err));
++}
++
++static struct inode_operations hppfs_file_iops = {
++};
++
++static ssize_t read_proc(struct file *file, char *buf, ssize_t count,
++ loff_t *ppos, int is_user)
++{
++ ssize_t (*read)(struct file *, char *, size_t, loff_t *);
++ ssize_t n;
++
++ read = file->f_dentry->d_inode->i_fop->read;
++
++ if(!is_user)
++ set_fs(KERNEL_DS);
++
++ n = (*read)(file, buf, count, &file->f_pos);
++
++ if(!is_user)
++ set_fs(USER_DS);
++
++ if(ppos) *ppos = file->f_pos;
++ return(n);
++}
++
++static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
++{
++ ssize_t n;
++ int cur, err;
++ char *new_buf;
++
++ n = -ENOMEM;
++ new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++ if(new_buf == NULL){
++ printk("hppfs_read_file : kmalloc failed\n");
++ goto out;
++ }
++ n = 0;
++ while(count > 0){
++ cur = min_t(ssize_t, count, PAGE_SIZE);
++ err = os_read_file(fd, new_buf, cur);
++ if(err < 0){
++ printk("hppfs_read : read failed, errno = %d\n",
++ count);
++ n = err;
++ goto out_free;
++ }
++ else if(err == 0)
++ break;
++
++ if(copy_to_user(buf, new_buf, err)){
++ n = -EFAULT;
++ goto out_free;
++ }
++ n += err;
++ count -= err;
++ }
++ out_free:
++ kfree(new_buf);
++ out:
++ return(n);
++}
++
++static ssize_t hppfs_read(struct file *file, char *buf, size_t count,
++ loff_t *ppos)
++{
++ struct hppfs_private *hppfs = file->private_data;
++ struct hppfs_data *data;
++ loff_t off;
++ int err;
++
++ if(hppfs->contents != NULL){
++ if(*ppos >= hppfs->len) return(0);
++
++ data = hppfs->contents;
++ off = *ppos;
++ while(off >= sizeof(data->contents)){
++ data = list_entry(data->list.next, struct hppfs_data,
++ list);
++ off -= sizeof(data->contents);
++ }
++
++ if(off + count > hppfs->len)
++ count = hppfs->len - off;
++ copy_to_user(buf, &data->contents[off], count);
++ *ppos += count;
++ }
++ else if(hppfs->host_fd != -1){
++ err = os_seek_file(hppfs->host_fd, *ppos);
++ if(err){
++ printk("hppfs_read : seek failed, errno = %d\n", err);
++ return(err);
++ }
++ count = hppfs_read_file(hppfs->host_fd, buf, count);
++ if(count > 0)
++ *ppos += count;
++ }
++ else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1);
++
++ return(count);
++}
++
++static ssize_t hppfs_write(struct file *file, const char *buf, size_t len,
++ loff_t *ppos)
++{
++ struct hppfs_private *data = file->private_data;
++ struct file *proc_file = &data->proc_file;
++ ssize_t (*write)(struct file *, const char *, size_t, loff_t *);
++ int err;
++
++ write = proc_file->f_dentry->d_inode->i_fop->write;
++
++ proc_file->f_pos = file->f_pos;
++ err = (*write)(proc_file, buf, len, &proc_file->f_pos);
++ file->f_pos = proc_file->f_pos;
++
++ return(err);
++}
++
++static int open_host_sock(char *host_file, int *filter_out)
++{
++ char *end;
++ int fd;
++
++ end = &host_file[strlen(host_file)];
++ strcpy(end, "/rw");
++ *filter_out = 1;
++ fd = os_connect_socket(host_file);
++ if(fd > 0)
++ return(fd);
++
++ strcpy(end, "/r");
++ *filter_out = 0;
++ fd = os_connect_socket(host_file);
++ return(fd);
++}
++
++static void free_contents(struct hppfs_data *head)
++{
++ struct hppfs_data *data;
++ struct list_head *ele, *next;
++
++ if(head == NULL) return;
++
++ list_for_each_safe(ele, next, &head->list){
++ data = list_entry(ele, struct hppfs_data, list);
++ kfree(data);
++ }
++ kfree(head);
++}
++
++static struct hppfs_data *hppfs_get_data(int fd, int filter,
++ struct file *proc_file,
++ struct file *hppfs_file,
++ loff_t *size_out)
++{
++ struct hppfs_data *data, *new, *head;
++ int n, err;
++
++ err = -ENOMEM;
++ data = kmalloc(sizeof(*data), GFP_KERNEL);
++ if(data == NULL){
++ printk("hppfs_get_data : head allocation failed\n");
++ goto failed;
++ }
++
++ INIT_LIST_HEAD(&data->list);
++
++ head = data;
++ *size_out = 0;
++
++ if(filter){
++ while((n = read_proc(proc_file, data->contents,
++ sizeof(data->contents), NULL, 0)) > 0)
++ os_write_file(fd, data->contents, n);
++ err = os_shutdown_socket(fd, 0, 1);
++ if(err){
++ printk("hppfs_get_data : failed to shut down "
++ "socket\n");
++ goto failed_free;
++ }
++ }
++ while(1){
++ n = os_read_file(fd, data->contents, sizeof(data->contents));
++ if(n < 0){
++ err = n;
++ printk("hppfs_get_data : read failed, errno = %d\n",
++ err);
++ goto failed_free;
++ }
++ else if(n == 0)
++ break;
++
++ *size_out += n;
++
++ if(n < sizeof(data->contents))
++ break;
++
++ new = kmalloc(sizeof(*data), GFP_KERNEL);
++ if(new == 0){
++ printk("hppfs_get_data : data allocation failed\n");
++ err = -ENOMEM;
++ goto failed_free;
++ }
++
++ INIT_LIST_HEAD(&new->list);
++ list_add(&new->list, &data->list);
++ data = new;
++ }
++ return(head);
++
++ failed_free:
++ free_contents(head);
++ failed:
++ return(ERR_PTR(err));
++}
++
++static struct hppfs_private *hppfs_data(void)
++{
++ struct hppfs_private *data;
++
++ data = kmalloc(sizeof(*data), GFP_KERNEL);
++ if(data == NULL)
++ return(data);
++
++ *data = ((struct hppfs_private ) { .host_fd = -1,
++ .len = -1,
++ .contents = NULL } );
++ return(data);
++}
++
++static int file_mode(int fmode)
++{
++ if(fmode == (FMODE_READ | FMODE_WRITE))
++ return(O_RDWR);
++ if(fmode == FMODE_READ)
++ return(O_RDONLY);
++ if(fmode == FMODE_WRITE)
++ return(O_WRONLY);
++ return(0);
++}
++
++static int hppfs_open(struct inode *inode, struct file *file)
++{
++ struct hppfs_private *data;
++ struct dentry *proc_dentry;
++ char *host_file;
++ int err, fd, type, filter;
++
++ err = -ENOMEM;
++ data = hppfs_data();
++ if(data == NULL)
++ goto out;
++
++ host_file = dentry_name(file->f_dentry, strlen("/rw"));
++ if(host_file == NULL)
++ goto out_free2;
++
++ proc_dentry = HPPFS_I(inode)->proc_dentry;
++
++ /* XXX This isn't closed anywhere */
++ err = open_private_file(&data->proc_file, proc_dentry,
++ file_mode(file->f_mode));
++ if(err)
++ goto out_free1;
++
++ type = os_file_type(host_file);
++ if(type == OS_TYPE_FILE){
++ fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
++ if(fd >= 0)
++ data->host_fd = fd;
++ else printk("hppfs_open : failed to open '%s', errno = %d\n",
++ host_file, -fd);
++
++ data->contents = NULL;
++ }
++ else if(type == OS_TYPE_DIR){
++ fd = open_host_sock(host_file, &filter);
++ if(fd > 0){
++ data->contents = hppfs_get_data(fd, filter,
++ &data->proc_file,
++ file, &data->len);
++ if(!IS_ERR(data->contents))
++ data->host_fd = fd;
++ }
++ else printk("hppfs_open : failed to open a socket in "
++ "'%s', errno = %d\n", host_file, -fd);
++ }
++ kfree(host_file);
++
++ file->private_data = data;
++ return(0);
++
++ out_free1:
++ kfree(host_file);
++ out_free2:
++ free_contents(data->contents);
++ kfree(data);
++ out:
++ return(err);
++}
++
++static int hppfs_dir_open(struct inode *inode, struct file *file)
++{
++ struct hppfs_private *data;
++ struct dentry *proc_dentry;
++ int err;
++
++ err = -ENOMEM;
++ data = hppfs_data();
++ if(data == NULL)
++ goto out;
++
++ proc_dentry = HPPFS_I(inode)->proc_dentry;
++ err = open_private_file(&data->proc_file, proc_dentry,
++ file_mode(file->f_mode));
++ if(err)
++ goto out_free;
++
++ file->private_data = data;
++ return(0);
++
++ out_free:
++ kfree(data);
++ out:
++ return(err);
++}
++
++static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
++{
++ struct hppfs_private *data = file->private_data;
++ struct file *proc_file = &data->proc_file;
++ loff_t (*llseek)(struct file *, loff_t, int);
++ loff_t ret;
++
++ llseek = proc_file->f_dentry->d_inode->i_fop->llseek;
++ if(llseek != NULL){
++ ret = (*llseek)(proc_file, off, where);
++ if(ret < 0)
++ return(ret);
++ }
++
++ return(default_llseek(file, off, where));
++}
++
++static struct file_operations hppfs_file_fops = {
++ .owner = NULL,
++ .llseek = hppfs_llseek,
++ .read = hppfs_read,
++ .write = hppfs_write,
++ .open = hppfs_open,
++};
++
++struct hppfs_dirent {
++ void *vfs_dirent;
++ filldir_t filldir;
++ struct dentry *dentry;
++};
++
++static int hppfs_filldir(void *d, const char *name, int size,
++ loff_t offset, ino_t inode, unsigned int type)
++{
++ struct hppfs_dirent *dirent = d;
++
++ if(file_removed(dirent->dentry, name))
++ return(0);
++
++ return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset,
++ inode, type));
++}
++
++static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
++{
++ struct hppfs_private *data = file->private_data;
++ struct file *proc_file = &data->proc_file;
++ int (*readdir)(struct file *, void *, filldir_t);
++ struct hppfs_dirent dirent = ((struct hppfs_dirent)
++ { .vfs_dirent = ent,
++ .filldir = filldir,
++ .dentry = file->f_dentry } );
++ int err;
++
++ readdir = proc_file->f_dentry->d_inode->i_fop->readdir;
++
++ proc_file->f_pos = file->f_pos;
++ err = (*readdir)(proc_file, &dirent, hppfs_filldir);
++ file->f_pos = proc_file->f_pos;
++
++ return(err);
++}
++
++static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
++{
++ return(0);
++}
++
++static struct file_operations hppfs_dir_fops = {
++ .owner = NULL,
++ .readdir = hppfs_readdir,
++ .open = hppfs_dir_open,
++ .fsync = hppfs_fsync,
++};
++
++static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf)
++{
++ sf->f_blocks = 0;
++ sf->f_bfree = 0;
++ sf->f_bavail = 0;
++ sf->f_files = 0;
++ sf->f_ffree = 0;
++ sf->f_type = HPPFS_SUPER_MAGIC;
++ return(0);
++}
++
++static struct inode *hppfs_alloc_inode(struct super_block *sb)
++{
++ struct hppfs_inode_info *hi;
++
++ hi = kmalloc(sizeof(*hi), GFP_KERNEL);
++ if(hi == NULL)
++ return(NULL);
++
++ *hi = ((struct hppfs_inode_info) { .proc_dentry = NULL });
++ inode_init_once(&hi->vfs_inode);
++ return(&hi->vfs_inode);
++}
++
++void hppfs_delete_inode(struct inode *ino)
++{
++ clear_inode(ino);
++}
++
++static void hppfs_destroy_inode(struct inode *inode)
++{
++ kfree(HPPFS_I(inode));
++}
++
++static struct super_operations hppfs_sbops = {
++ .alloc_inode = hppfs_alloc_inode,
++ .destroy_inode = hppfs_destroy_inode,
++ .read_inode = hppfs_read_inode,
++ .delete_inode = hppfs_delete_inode,
++ .statfs = hppfs_statfs,
++};
++
++static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen)
++{
++ struct file proc_file;
++ struct dentry *proc_dentry;
++ int (*readlink)(struct dentry *, char *, int);
++ int err, n;
++
++ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
++ err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
++ if(err)
++ return(err);
++
++ readlink = proc_dentry->d_inode->i_op->readlink;
++ n = (*readlink)(proc_dentry, buffer, buflen);
++
++ close_private_file(&proc_file);
++
++ return(n);
++}
++
++static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++ struct file proc_file;
++ struct dentry *proc_dentry;
++ int (*follow_link)(struct dentry *, struct nameidata *);
++ int err, n;
++
++ proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
++ err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
++ if(err)
++ return(err);
++
++ follow_link = proc_dentry->d_inode->i_op->follow_link;
++ n = (*follow_link)(proc_dentry, nd);
++
++ close_private_file(&proc_file);
++
++ return(n);
++}
++
++static struct inode_operations hppfs_dir_iops = {
++ .lookup = hppfs_lookup,
++};
++
++static struct inode_operations hppfs_link_iops = {
++ .readlink = hppfs_readlink,
++ .follow_link = hppfs_follow_link,
++};
++
++static int init_inode(struct inode *inode, struct dentry *dentry)
++{
++ if(S_ISDIR(dentry->d_inode->i_mode)){
++ inode->i_op = &hppfs_dir_iops;
++ inode->i_fop = &hppfs_dir_fops;
++ }
++ else if(S_ISLNK(dentry->d_inode->i_mode)){
++ inode->i_op = &hppfs_link_iops;
++ inode->i_fop = &hppfs_file_fops;
++ }
++ else {
++ inode->i_op = &hppfs_file_iops;
++ inode->i_fop = &hppfs_file_fops;
++ }
++
++ HPPFS_I(inode)->proc_dentry = dentry;
++
++ return(0);
++}
++
++static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
++{
++ struct inode *root_inode;
++ struct file_system_type *procfs;
++ struct super_block *proc_sb;
++ int err;
++
++ err = -ENOENT;
++ procfs = get_fs_type("proc");
++ if(procfs == NULL)
++ goto out;
++
++ if(list_empty(&procfs->fs_supers))
++ goto out;
++
++ proc_sb = list_entry(procfs->fs_supers.next, struct super_block,
++ s_instances);
++
++ sb->s_blocksize = 1024;
++ sb->s_blocksize_bits = 10;
++ sb->s_magic = HPPFS_SUPER_MAGIC;
++ sb->s_op = &hppfs_sbops;
++
++ root_inode = iget(sb, 0);
++ if(root_inode == NULL)
++ goto out;
++
++ err = init_inode(root_inode, proc_sb->s_root);
++ if(err)
++ goto out_put;
++
++ err = -ENOMEM;
++ sb->s_root = d_alloc_root(root_inode);
++ if(sb->s_root == NULL)
++ goto out_put;
++
++ hppfs_read_inode(root_inode);
++
++ return(0);
++
++ out_put:
++ iput(root_inode);
++ out:
++ return(err);
++}
++
++static struct super_block *hppfs_read_super(struct file_system_type *type,
++ int flags, const char *dev_name,
++ void *data)
++{
++ return(get_sb_nodev(type, flags, data, hppfs_fill_super));
++}
++
++static struct file_system_type hppfs_type = {
++ .owner = THIS_MODULE,
++ .name = "hppfs",
++ .get_sb = hppfs_read_super,
++ .kill_sb = kill_anon_super,
++ .fs_flags = 0,
++};
++
++static int __init init_hppfs(void)
++{
++ return(register_filesystem(&hppfs_type));
++}
++
++static void __exit exit_hppfs(void)
++{
++ unregister_filesystem(&hppfs_type);
++}
++
++module_init(init_hppfs)
++module_exit(exit_hppfs)
++MODULE_LICENSE("GPL");
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/include/asm-um/archparam-i386.h b/include/asm-um/archparam-i386.h
+--- a/include/asm-um/archparam-i386.h Fri Aug 15 15:07:52 2003
++++ b/include/asm-um/archparam-i386.h Fri Aug 15 15:13:17 2003
+@@ -56,6 +56,65 @@
+ pr_reg[16] = PT_REGS_SS(regs); \
+ } while(0);
+
++#define VSYSCALL_BASE (__fix_to_virt(FIX_VSYSCALL))
++#define VSYSCALL_EHDR ((const struct elfhdr *) VSYSCALL_BASE)
++#define VSYSCALL_ENTRY ((unsigned long) &__kernel_vsyscall)
++extern void *__kernel_vsyscall;
++
++/*
++ * Architecture-neutral AT_ values in 0-17, leave some room
++ * for more of them, start the x86-specific ones at 32.
++ */
++#define AT_SYSINFO 32
++#define AT_SYSINFO_EHDR 33
++
++#define ARCH_DLINFO \
++do { \
++ NEW_AUX_ENT(AT_SYSINFO, VSYSCALL_ENTRY); \
++ NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE); \
++} while (0)
++
++/*
++ * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
++ * extra segments containing the vsyscall DSO contents. Dumping its
++ * contents makes post-mortem fully interpretable later without matching up
++ * the same kernel and hardware config to see what PC values meant.
++ * Dumping its extra ELF program headers includes all the other information
++ * a debugger needs to easily find how the vsyscall DSO was being used.
++ */
++#define ELF_CORE_EXTRA_PHDRS (VSYSCALL_EHDR->e_phnum)
++#define ELF_CORE_WRITE_EXTRA_PHDRS \
++do { \
++ const struct elf_phdr *const vsyscall_phdrs = \
++ (const struct elf_phdr *) (VSYSCALL_BASE \
++ + VSYSCALL_EHDR->e_phoff); \
++ int i; \
++ Elf32_Off ofs = 0; \
++ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
++ struct elf_phdr phdr = vsyscall_phdrs[i]; \
++ if (phdr.p_type == PT_LOAD) { \
++ ofs = phdr.p_offset = offset; \
++ offset += phdr.p_filesz; \
++ } \
++ else \
++ phdr.p_offset += ofs; \
++ phdr.p_paddr = 0; /* match other core phdrs */ \
++ DUMP_WRITE(&phdr, sizeof(phdr)); \
++ } \
++} while (0)
++#define ELF_CORE_WRITE_EXTRA_DATA \
++do { \
++ const struct elf_phdr *const vsyscall_phdrs = \
++ (const struct elf_phdr *) (VSYSCALL_BASE \
++ + VSYSCALL_EHDR->e_phoff); \
++ int i; \
++ for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) { \
++ if (vsyscall_phdrs[i].p_type == PT_LOAD) \
++ DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr, \
++ vsyscall_phdrs[i].p_filesz); \
++ } \
++} while (0)
++
+ /********* Bits for asm-um/delay.h **********/
+
+ typedef unsigned long um_udelay_t;
+diff -Naur a/include/asm-um/common.lds.S b/include/asm-um/common.lds.S
+--- a/include/asm-um/common.lds.S Fri Aug 15 15:04:49 2003
++++ b/include/asm-um/common.lds.S Fri Aug 15 15:10:46 2003
+@@ -1,3 +1,5 @@
++#include <asm-generic/vmlinux.lds.h>
++
+ .fini : { *(.fini) } =0x9090
+ _etext = .;
+ PROVIDE (etext = .);
+@@ -67,6 +69,10 @@
+ }
+ __initcall_end = .;
+
++ __con_initcall_start = .;
++ .con_initcall.init : { *(.con_initcall.init) }
++ __con_initcall_end = .;
++
+ __uml_initcall_start = .;
+ .uml.initcall.init : { *(.uml.initcall.init) }
+ __uml_initcall_end = .;
+@@ -80,7 +86,33 @@
+ .uml.exitcall : { *(.uml.exitcall.exit) }
+ __uml_exitcall_end = .;
+
+- . = ALIGN(4096);
++ . = ALIGN(4);
++ __alt_instructions = .;
++ .altinstructions : { *(.altinstructions) }
++ __alt_instructions_end = .;
++ .altinstr_replacement : { *(.altinstr_replacement) }
++ /* .exit.text is discard at runtime, not link time, to deal with references
++ from .altinstructions and .eh_frame */
++ .exit.text : { *(.exit.text) }
++ .exit.data : { *(.exit.data) }
++
++ __preinit_array_start = .;
++ .preinit_array : { *(.preinit_array) }
++ __preinit_array_end = .;
++ __init_array_start = .;
++ .init_array : { *(.init_array) }
++ __init_array_end = .;
++ __fini_array_start = .;
++ .fini_array : { *(.fini_array) }
++ __fini_array_end = .;
++
++ . = ALIGN(4096);
+ __initramfs_start = .;
+ .init.ramfs : { *(.init.ramfs) }
+ __initramfs_end = .;
++
++ /* Sections to be discarded */
++ /DISCARD/ : {
++ *(.exitcall.exit)
++ }
++
+diff -Naur a/include/asm-um/cpufeature.h b/include/asm-um/cpufeature.h
+--- a/include/asm-um/cpufeature.h Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/cpufeature.h Fri Aug 15 15:10:07 2003
+@@ -0,0 +1,6 @@
++#ifndef __UM_CPUFEATURE_H
++#define __UM_CPUFEATURE_H
++
++#include "asm/arch/cpufeature.h"
++
++#endif
+diff -Naur a/include/asm-um/current.h b/include/asm-um/current.h
+--- a/include/asm-um/current.h Fri Aug 15 15:04:11 2003
++++ b/include/asm-um/current.h Fri Aug 15 15:10:19 2003
+@@ -16,8 +16,10 @@
+ #define CURRENT_THREAD(dummy) (((unsigned long) &dummy) & \
+ (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER))
+
+-#define current ({ int dummy; \
+- ((struct thread_info *) CURRENT_THREAD(dummy))->task; })
++#define current_thread \
++ ({ int dummy; ((struct thread_info *) CURRENT_THREAD(dummy)); })
++
++#define current (current_thread->task)
+
+ #endif /* __ASSEMBLY__ */
+
+diff -Naur a/include/asm-um/fixmap.h b/include/asm-um/fixmap.h
+--- a/include/asm-um/fixmap.h Fri Aug 15 15:08:40 2003
++++ b/include/asm-um/fixmap.h Fri Aug 15 15:13:36 2003
+@@ -34,6 +34,7 @@
+ FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
+ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+ #endif
++ FIX_VSYSCALL,
+ __end_of_fixed_addresses
+ };
+
+@@ -63,6 +64,13 @@
+ #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT))
+ #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+
++/*
++ * This is the range that is readable by user mode, and things
++ * acting like user mode such as get_user_pages.
++ */
++#define FIXADDR_USER_START (__fix_to_virt(FIX_VSYSCALL))
++#define FIXADDR_USER_END (FIXADDR_USER_START + PAGE_SIZE)
++
+ extern void __this_fixmap_does_not_exist(void);
+
+ /*
+diff -Naur a/include/asm-um/irq.h b/include/asm-um/irq.h
+--- a/include/asm-um/irq.h Fri Aug 15 15:09:15 2003
++++ b/include/asm-um/irq.h Fri Aug 15 15:13:51 2003
+@@ -1,15 +1,6 @@
+ #ifndef __UM_IRQ_H
+ #define __UM_IRQ_H
+
+-/* The i386 irq.h has a struct task_struct in a prototype without including
+- * sched.h. This forward declaration kills the resulting warning.
+- */
+-struct task_struct;
+-
+-#include "asm/ptrace.h"
+-
+-#undef NR_IRQS
+-
+ #define TIMER_IRQ 0
+ #define UMN_IRQ 1
+ #define CONSOLE_IRQ 2
+@@ -28,8 +19,4 @@
+ #define LAST_IRQ XTERM_IRQ
+ #define NR_IRQS (LAST_IRQ + 1)
+
+-extern int um_request_irq(unsigned int irq, int fd, int type,
+- void (*handler)(int, void *, struct pt_regs *),
+- unsigned long irqflags, const char * devname,
+- void *dev_id);
+ #endif
+diff -Naur a/include/asm-um/local.h b/include/asm-um/local.h
+--- a/include/asm-um/local.h Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/local.h Fri Aug 15 15:12:46 2003
+@@ -0,0 +1,6 @@
++#ifndef __UM_LOCAL_H
++#define __UM_LOCAL_H
++
++#include "asm/arch/local.h"
++
++#endif
+diff -Naur a/include/asm-um/module-generic.h b/include/asm-um/module-generic.h
+--- a/include/asm-um/module-generic.h Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/module-generic.h Fri Aug 15 15:12:38 2003
+@@ -0,0 +1,6 @@
++#ifndef __UM_MODULE_GENERIC_H
++#define __UM_MODULE_GENERIC_H
++
++#include "asm/arch/module.h"
++
++#endif
+diff -Naur a/include/asm-um/module-i386.h b/include/asm-um/module-i386.h
+--- a/include/asm-um/module-i386.h Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/module-i386.h Fri Aug 15 15:12:37 2003
+@@ -0,0 +1,13 @@
++#ifndef __UM_MODULE_I386_H
++#define __UM_MODULE_I386_H
++
++/* UML is simple */
++struct mod_arch_specific
++{
++};
++
++#define Elf_Shdr Elf32_Shdr
++#define Elf_Sym Elf32_Sym
++#define Elf_Ehdr Elf32_Ehdr
++
++#endif
+diff -Naur a/include/asm-um/page.h b/include/asm-um/page.h
+--- a/include/asm-um/page.h Fri Aug 15 15:06:42 2003
++++ b/include/asm-um/page.h Fri Aug 15 15:12:40 2003
+@@ -4,7 +4,6 @@
+ struct page;
+
+ #include "asm/arch/page.h"
+-#include "asm/bug.h"
+
+ #undef __pa
+ #undef __va
+diff -Naur a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
+--- a/include/asm-um/pgtable.h Fri Aug 15 15:09:25 2003
++++ b/include/asm-um/pgtable.h Fri Aug 15 15:14:09 2003
+@@ -79,12 +79,13 @@
+
+ #define _PAGE_PRESENT 0x001
+ #define _PAGE_NEWPAGE 0x002
+-#define _PAGE_PROTNONE 0x004 /* If not present */
+-#define _PAGE_RW 0x008
+-#define _PAGE_USER 0x010
+-#define _PAGE_ACCESSED 0x020
+-#define _PAGE_DIRTY 0x040
+-#define _PAGE_NEWPROT 0x080
++#define _PAGE_NEWPROT 0x004
++#define _PAGE_FILE 0x008 /* set:pagecache unset:swap */
++#define _PAGE_PROTNONE 0x010 /* If not present */
++#define _PAGE_RW 0x020
++#define _PAGE_USER 0x040
++#define _PAGE_ACCESSED 0x080
++#define _PAGE_DIRTY 0x100
+
+ #define REGION_MASK 0xf0000000
+ #define REGION_SHIFT 28
+@@ -203,6 +204,16 @@
+ #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
+ #define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
+
++/*
++ * Bits 0 through 3 are taken
++ */
++#define PTE_FILE_MAX_BITS 28
++
++#define pte_to_pgoff(pte) ((pte).pte_low >> 4)
++
++#define pgoff_to_pte(off) \
++ ((pte_t) { ((off) << 4) + _PAGE_FILE })
++
+ static inline pte_t pte_mknewprot(pte_t pte)
+ {
+ pte_val(pte) |= _PAGE_NEWPROT;
+@@ -236,6 +247,12 @@
+ * The following only work if pte_present() is true.
+ * Undefined behaviour if not..
+ */
++static inline int pte_user(pte_t pte)
++{
++ return((pte_val(pte) & _PAGE_USER) &&
++ !(pte_val(pte) & _PAGE_PROTNONE));
++}
++
+ static inline int pte_read(pte_t pte)
+ {
+ return((pte_val(pte) & _PAGE_USER) &&
+@@ -253,6 +270,14 @@
+ !(pte_val(pte) & _PAGE_PROTNONE));
+ }
+
++/*
++ * The following only works if pte_present() is not true.
++ */
++static inline int pte_file(pte_t pte)
++{
++ return (pte).pte_low & _PAGE_FILE;
++}
++
+ static inline int pte_dirty(pte_t pte) { return pte_val(pte) & _PAGE_DIRTY; }
+ static inline int pte_young(pte_t pte) { return pte_val(pte) & _PAGE_ACCESSED; }
+ static inline int pte_newpage(pte_t pte) { return pte_val(pte) & _PAGE_NEWPAGE; }
+@@ -355,14 +380,26 @@
+ #define pmd_page(pmd) (phys_mem_map(pmd_val(pmd) & PAGE_MASK) + \
+ ((phys_addr(pmd_val(pmd)) >> PAGE_SHIFT)))
+
+-/* to find an entry in a page-table-directory. */
++/*
++ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
++ *
++ * this macro returns the index of the entry in the pgd page which would
++ * control the given virtual address
++ */
+ #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+-/* to find an entry in a page-table-directory */
++/*
++ * pgd_offset() returns a (pgd_t *)
++ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
++ */
+ #define pgd_offset(mm, address) \
+ ((mm)->pgd + ((address) >> PGDIR_SHIFT))
+
+-/* to find an entry in a kernel page-table-directory */
++
++/*
++ * a shortcut which implies the use of the kernel's pgd, instead
++ * of a process's
++ */
+ #define pgd_offset_k(address) pgd_offset(&init_mm, address)
+
+ #define pmd_index(address) \
+@@ -374,7 +411,12 @@
+ return (pmd_t *) dir;
+ }
+
+-/* Find an entry in the third-level page table.. */
++/*
++ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
++ *
++ * this macro returns the index of the entry in the pte page which would
++ * control the given virtual address
++ */
+ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ #define pte_offset_kernel(dir, address) \
+ ((pte_t *) pmd_page_kernel(*(dir)) + pte_index(address))
+@@ -400,11 +442,11 @@
+ #define update_mmu_cache(vma,address,pte) do ; while (0)
+
+ /* Encode and de-code a swap entry */
+-#define __swp_type(x) (((x).val >> 3) & 0x7f)
+-#define __swp_offset(x) ((x).val >> 10)
++#define __swp_type(x) (((x).val >> 4) & 0x3f)
++#define __swp_offset(x) ((x).val >> 11)
+
+ #define __swp_entry(type, offset) \
+- ((swp_entry_t) { ((type) << 3) | ((offset) << 10) })
++ ((swp_entry_t) { ((type) << 4) | ((offset) << 11) })
+ #define __pte_to_swp_entry(pte) \
+ ((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
+ #define __swp_entry_to_pte(x) ((pte_t) { (x).val })
+diff -Naur a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
+--- a/include/asm-um/processor-generic.h Fri Aug 15 15:04:48 2003
++++ b/include/asm-um/processor-generic.h Fri Aug 15 15:10:42 2003
+@@ -11,9 +11,7 @@
+ struct task_struct;
+
+ #include "linux/config.h"
+-#include "linux/signal.h"
+ #include "asm/ptrace.h"
+-#include "asm/siginfo.h"
+ #include "choose-mode.h"
+
+ struct mm_struct;
+@@ -101,14 +99,19 @@
+ } mm_segment_t;
+
+ extern struct task_struct *alloc_task_struct(void);
+-extern void free_task_struct(struct task_struct *task);
+
+ extern void release_thread(struct task_struct *);
+ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+ extern void dump_thread(struct pt_regs *regs, struct user *u);
++extern void prepare_to_copy(struct task_struct *tsk);
+
+ extern unsigned long thread_saved_pc(struct task_struct *t);
+
++static inline void mm_copy_segments(struct mm_struct *from_mm,
++ struct mm_struct *new_mm)
++{
++}
++
+ #define init_stack (init_thread_union.stack)
+
+ /*
+diff -Naur a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h
+--- a/include/asm-um/processor-i386.h Fri Aug 15 15:04:00 2003
++++ b/include/asm-um/processor-i386.h Fri Aug 15 15:10:18 2003
+@@ -6,8 +6,8 @@
+ #ifndef __UM_PROCESSOR_I386_H
+ #define __UM_PROCESSOR_I386_H
+
+-extern int cpu_has_xmm;
+-extern int cpu_has_cmov;
++extern int host_has_xmm;
++extern int host_has_cmov;
+
+ struct arch_thread {
+ unsigned long debugregs[8];
+diff -Naur a/include/asm-um/sections.h b/include/asm-um/sections.h
+--- a/include/asm-um/sections.h Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/sections.h Fri Aug 15 15:12:54 2003
+@@ -0,0 +1,7 @@
++#ifndef _UM_SECTIONS_H
++#define _UM_SECTIONS_H
++
++/* nothing to see, move along */
++#include <asm-generic/sections.h>
++
++#endif
+diff -Naur a/include/asm-um/smp.h b/include/asm-um/smp.h
+--- a/include/asm-um/smp.h Fri Aug 15 15:03:35 2003
++++ b/include/asm-um/smp.h Fri Aug 15 15:10:04 2003
+@@ -7,9 +7,10 @@
+
+ #include "linux/config.h"
+ #include "linux/bitops.h"
++#include "linux/threads.h"
+ #include "asm/current.h"
+
+-#define smp_processor_id() (current->thread_info->cpu)
++#define smp_processor_id() (current_thread->cpu)
+ #define cpu_logical_map(n) (n)
+ #define cpu_number_map(n) (n)
+ #define PROC_CHANGE_PENALTY 15 /* Pick a number, any number */
+@@ -30,6 +31,13 @@
+ {
+ }
+
++extern inline int any_online_cpu(unsigned int mask)
++{
++ if (mask & cpu_online_map)
++ return __ffs(mask & cpu_online_map);
++
++ return -1;
++}
+ #endif
+
+ #endif
+diff -Naur a/include/asm-um/system-generic.h b/include/asm-um/system-generic.h
+--- a/include/asm-um/system-generic.h Fri Aug 15 15:09:22 2003
++++ b/include/asm-um/system-generic.h Fri Aug 15 15:14:01 2003
+@@ -23,8 +23,10 @@
+ extern void block_signals(void);
+ extern void unblock_signals(void);
+
+-#define local_save_flags(flags) do { (flags) = get_signals(); } while(0)
+-#define local_irq_restore(flags) do { set_signals(flags); } while(0)
++#define local_save_flags(flags) do { typecheck(unsigned long, flags); \
++ (flags) = get_signals(); } while(0)
++#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \
++ set_signals(flags); } while(0)
+
+ #define local_irq_save(flags) do { local_save_flags(flags); \
+ local_irq_disable(); } while(0)
+diff -Naur a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
+--- a/include/asm-um/thread_info.h Fri Aug 15 15:05:00 2003
++++ b/include/asm-um/thread_info.h Fri Aug 15 15:11:11 2003
+@@ -9,6 +9,7 @@
+ #ifndef __ASSEMBLY__
+
+ #include <asm/processor.h>
++#include <asm/types.h>
+
+ struct thread_info {
+ struct task_struct *task; /* main task structure */
+@@ -43,15 +44,18 @@
+ static inline struct thread_info *current_thread_info(void)
+ {
+ struct thread_info *ti;
+- __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~16383UL));
++ unsigned long mask = PAGE_SIZE *
++ (1 << CONFIG_KERNEL_STACK_ORDER) - 1;
++ __asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~mask));
+ return ti;
+ }
+
+ /* thread information allocation */
+-#define THREAD_SIZE (4*PAGE_SIZE)
+-#define alloc_thread_info(tsk) ((struct thread_info *) \
+- __get_free_pages(GFP_KERNEL,2))
+-#define free_thread_info(ti) free_pages((unsigned long) (ti), 2)
++#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE)
++#define alloc_thread_info(tsk) \
++ ((struct thread_info *) kmalloc(THREAD_SIZE, GFP_KERNEL))
++#define free_thread_info(ti) kfree(ti)
++
+ #define get_thread_info(ti) get_task_struct((ti)->task)
+ #define put_thread_info(ti) put_task_struct((ti)->task)
+
+@@ -65,11 +69,13 @@
+ #define TIF_POLLING_NRFLAG 3 /* true if poll_idle() is polling
+ * TIF_NEED_RESCHED
+ */
++#define TIF_RESTART_BLOCK 4
+
+ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING)
+ #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED)
+ #define _TIF_POLLING_NRFLAG (1 << TIF_POLLING_NRFLAG)
++#define _TIF_RESTART_BLOCK (1 << TIF_RESTART_BLOCK)
+
+ #endif
+
+diff -Naur a/include/asm-um/timex.h b/include/asm-um/timex.h
+--- a/include/asm-um/timex.h Fri Aug 15 15:07:22 2003
++++ b/include/asm-um/timex.h Fri Aug 15 15:12:48 2003
+@@ -1,8 +1,6 @@
+ #ifndef __UM_TIMEX_H
+ #define __UM_TIMEX_H
+
+-#include "linux/time.h"
+-
+ typedef unsigned long cycles_t;
+
+ #define cacheflush_time (0)
+diff -Naur a/include/linux/mm.h b/include/linux/mm.h
+--- a/include/linux/mm.h Fri Aug 15 15:03:56 2003
++++ b/include/linux/mm.h Fri Aug 15 15:10:14 2003
+@@ -483,6 +483,9 @@
+ return __set_page_dirty_buffers(page);
+ }
+
++extern long do_mprotect(struct mm_struct *mm, unsigned long start,
++ size_t len, unsigned long prot);
++
+ /*
+ * On a two-level page table, this ends up being trivial. Thus the
+ * inlining and the symmetry break with pte_alloc_map() that does all
+@@ -513,9 +516,10 @@
+
+ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+
+-extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+- unsigned long len, unsigned long prot,
+- unsigned long flag, unsigned long pgoff);
++extern unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file *file,
++ unsigned long addr, unsigned long len,
++ unsigned long prot, unsigned long flag,
++ unsigned long pgoff);
+
+ static inline unsigned long do_mmap(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long prot,
+@@ -525,7 +529,8 @@
+ if ((offset + PAGE_ALIGN(len)) < offset)
+ goto out;
+ if (!(offset & ~PAGE_MASK))
+- ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
++ ret = do_mmap_pgoff(current->mm, file, addr, len, prot, flag,
++ offset >> PAGE_SHIFT);
+ out:
+ return ret;
+ }
+diff -Naur a/include/linux/proc_mm.h b/include/linux/proc_mm.h
+--- a/include/linux/proc_mm.h Wed Dec 31 19:00:00 1969
++++ b/include/linux/proc_mm.h Fri Aug 15 15:10:02 2003
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#ifndef __PROC_MM_H
++#define __PROC_MM_H
++
++#include "linux/sched.h"
++
++#define MM_MMAP 54
++#define MM_MUNMAP 55
++#define MM_MPROTECT 56
++#define MM_COPY_SEGMENTS 57
++
++struct mm_mmap {
++ unsigned long addr;
++ unsigned long len;
++ unsigned long prot;
++ unsigned long flags;
++ unsigned long fd;
++ unsigned long offset;
++};
++
++struct mm_munmap {
++ unsigned long addr;
++ unsigned long len;
++};
++
++struct mm_mprotect {
++ unsigned long addr;
++ unsigned long len;
++ unsigned int prot;
++};
++
++struct proc_mm_op {
++ int op;
++ union {
++ struct mm_mmap mmap;
++ struct mm_munmap munmap;
++ struct mm_mprotect mprotect;
++ int copy_segments;
++ } u;
++};
++
++extern struct mm_struct *proc_mm_get_mm(int fd);
++
++#endif
+diff -Naur a/mm/Makefile b/mm/Makefile
+--- a/mm/Makefile Fri Aug 15 15:07:22 2003
++++ b/mm/Makefile Fri Aug 15 15:12:48 2003
+@@ -12,3 +12,5 @@
+ slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
+
+ obj-$(CONFIG_SWAP) += page_io.o swap_state.o swapfile.o
++obj-$(CONFIG_PROC_MM) += proc_mm.o
++
+diff -Naur a/mm/memory.c b/mm/memory.c
+--- a/mm/memory.c Fri Aug 15 15:05:37 2003
++++ b/mm/memory.c Fri Aug 15 15:11:48 2003
+@@ -45,6 +45,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/vcache.h>
+ #include <linux/rmap-locking.h>
++#include <linux/init.h>
+
+ #include <asm/pgalloc.h>
+ #include <asm/rmap.h>
+@@ -669,6 +670,24 @@
+ }
+
+
++static struct vm_area_struct fixmap_vma = {
++ /* Catch users - if there are any valid
++ ones, we can make this be "&init_mm" or
++ something. */
++ .vm_mm = NULL,
++ .vm_page_prot = PAGE_READONLY,
++ .vm_flags = VM_READ | VM_EXEC,
++};
++
++static int init_fixmap_vma(void)
++{
++ fixmap_vma.vm_start = FIXADDR_START;
++ fixmap_vma.vm_end = FIXADDR_TOP;
++ return(0);
++}
++
++__initcall(init_fixmap_vma);
++
+ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ unsigned long start, int len, int write, int force,
+ struct page **pages, struct vm_area_struct **vmas)
+@@ -689,19 +708,8 @@
+
+ vma = find_extend_vma(mm, start);
+
+-#ifdef FIXADDR_USER_START
+- if (!vma &&
+- start >= FIXADDR_USER_START && start < FIXADDR_USER_END) {
+- static struct vm_area_struct fixmap_vma = {
+- /* Catch users - if there are any valid
+- ones, we can make this be "&init_mm" or
+- something. */
+- .vm_mm = NULL,
+- .vm_start = FIXADDR_USER_START,
+- .vm_end = FIXADDR_USER_END,
+- .vm_page_prot = PAGE_READONLY,
+- .vm_flags = VM_READ | VM_EXEC,
+- };
++#ifdef FIXADDR_START
++ if (!vma && start >= FIXADDR_START && start < FIXADDR_TOP) {
+ unsigned long pg = start & PAGE_MASK;
+ pgd_t *pgd;
+ pmd_t *pmd;
+diff -Naur a/mm/mmap.c b/mm/mmap.c
+--- a/mm/mmap.c Fri Aug 15 15:07:18 2003
++++ b/mm/mmap.c Fri Aug 15 15:12:45 2003
+@@ -457,11 +457,11 @@
+ * The caller must hold down_write(current->mm->mmap_sem).
+ */
+
+-unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
+- unsigned long len, unsigned long prot,
+- unsigned long flags, unsigned long pgoff)
++unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file * file,
++ unsigned long addr, unsigned long len,
++ unsigned long prot, unsigned long flags,
++ unsigned long pgoff)
+ {
+- struct mm_struct * mm = current->mm;
+ struct vm_area_struct * vma, * prev;
+ struct inode *inode;
+ unsigned int vm_flags;
+diff -Naur a/mm/mprotect.c b/mm/mprotect.c
+--- a/mm/mprotect.c Fri Aug 15 15:05:20 2003
++++ b/mm/mprotect.c Fri Aug 15 15:11:21 2003
+@@ -222,7 +222,8 @@
+ }
+
+ asmlinkage long
+-sys_mprotect(unsigned long start, size_t len, unsigned long prot)
++do_mprotect(struct mm_struct *mm, unsigned long start, size_t len,
++ unsigned long prot)
+ {
+ unsigned long nstart, end, tmp;
+ struct vm_area_struct * vma, * next, * prev;
+@@ -239,9 +240,9 @@
+ if (end == start)
+ return 0;
+
+- down_write(¤t->mm->mmap_sem);
++ down_write(&mm->mmap_sem);
+
+- vma = find_vma_prev(current->mm, start, &prev);
++ vma = find_vma_prev(mm, start, &prev);
+ error = -ENOMEM;
+ if (!vma || vma->vm_start > start)
+ goto out;
+@@ -301,6 +302,11 @@
+ prev->vm_mm->map_count--;
+ }
+ out:
+- up_write(¤t->mm->mmap_sem);
++ up_write(&mm->mmap_sem);
+ return error;
+ }
++
++asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot)
++{
++ return(do_mprotect(current->mm, start, len, prot));
++}
+diff -Naur a/mm/proc_mm.c b/mm/proc_mm.c
+--- a/mm/proc_mm.c Wed Dec 31 19:00:00 1969
++++ b/mm/proc_mm.c Fri Aug 15 15:11:44 2003
+@@ -0,0 +1,174 @@
++/*
++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#include "linux/mm.h"
++#include "linux/init.h"
++#include "linux/proc_fs.h"
++#include "linux/proc_mm.h"
++#include "linux/file.h"
++#include "asm/uaccess.h"
++#include "asm/mmu_context.h"
++
++static struct file_operations proc_mm_fops;
++
++struct mm_struct *proc_mm_get_mm(int fd)
++{
++ struct mm_struct *ret = ERR_PTR(-EBADF);
++ struct file *file;
++
++ file = fget(fd);
++ if (!file)
++ goto out;
++
++ ret = ERR_PTR(-EINVAL);
++ if(file->f_op != &proc_mm_fops)
++ goto out_fput;
++
++ ret = file->private_data;
++ out_fput:
++ fput(file);
++ out:
++ return(ret);
++}
++
++extern long do_mmap2(struct mm_struct *mm, unsigned long addr,
++ unsigned long len, unsigned long prot,
++ unsigned long flags, unsigned long fd,
++ unsigned long pgoff);
++
++static ssize_t write_proc_mm(struct file *file, const char *buffer,
++ size_t count, loff_t *ppos)
++{
++ struct mm_struct *mm = file->private_data;
++ struct proc_mm_op req;
++ int n, ret;
++
++ if(count > sizeof(req))
++ return(-EINVAL);
++
++ n = copy_from_user(&req, buffer, count);
++ if(n != 0)
++ return(-EFAULT);
++
++ ret = count;
++ switch(req.op){
++ case MM_MMAP: {
++ struct mm_mmap *map = &req.u.mmap;
++
++ ret = do_mmap2(mm, map->addr, map->len, map->prot,
++ map->flags, map->fd, map->offset >> PAGE_SHIFT);
++ if((ret & ~PAGE_MASK) == 0)
++ ret = count;
++
++ break;
++ }
++ case MM_MUNMAP: {
++ struct mm_munmap *unmap = &req.u.munmap;
++
++ down_write(&mm->mmap_sem);
++ ret = do_munmap(mm, unmap->addr, unmap->len);
++ up_write(&mm->mmap_sem);
++
++ if(ret == 0)
++ ret = count;
++ break;
++ }
++ case MM_MPROTECT: {
++ struct mm_mprotect *protect = &req.u.mprotect;
++
++ ret = do_mprotect(mm, protect->addr, protect->len,
++ protect->prot);
++ if(ret == 0)
++ ret = count;
++ break;
++ }
++
++ case MM_COPY_SEGMENTS: {
++ struct mm_struct *from = proc_mm_get_mm(req.u.copy_segments);
++
++ if(IS_ERR(from)){
++ ret = PTR_ERR(from);
++ break;
++ }
++
++ mm_copy_segments(from, mm);
++ break;
++ }
++ default:
++ ret = -EINVAL;
++ break;
++ }
++
++ return(ret);
++}
++
++static int open_proc_mm(struct inode *inode, struct file *file)
++{
++ struct mm_struct *mm = mm_alloc();
++ int ret;
++
++ ret = -ENOMEM;
++ if(mm == NULL)
++ goto out_mem;
++
++ ret = init_new_context(current, mm);
++ if(ret)
++ goto out_free;
++
++ spin_lock(&mmlist_lock);
++ list_add(&mm->mmlist, ¤t->mm->mmlist);
++ mmlist_nr++;
++ spin_unlock(&mmlist_lock);
++
++ file->private_data = mm;
++
++ return(0);
++
++ out_free:
++ mmput(mm);
++ out_mem:
++ return(ret);
++}
++
++static int release_proc_mm(struct inode *inode, struct file *file)
++{
++ struct mm_struct *mm = file->private_data;
++
++ mmput(mm);
++ return(0);
++}
++
++static struct file_operations proc_mm_fops = {
++ .open = open_proc_mm,
++ .release = release_proc_mm,
++ .write = write_proc_mm,
++};
++
++static int make_proc_mm(void)
++{
++ struct proc_dir_entry *ent;
++
++ ent = create_proc_entry("mm", 0222, &proc_root);
++ if(ent == NULL){
++ printk("make_proc_mm : Failed to register /proc/mm\n");
++ return(0);
++ }
++ ent->proc_fops = &proc_mm_fops;
++
++ return(0);
++}
++
++__initcall(make_proc_mm);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only. This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
--- /dev/null
+ fs/inode.c | 1
+ fs/namei.c | 66 ++++++++++++++++++++++++++++++++++++++---------------
+ include/linux/fs.h | 11 ++++----
+ 3 files changed, 54 insertions(+), 24 deletions(-)
+
+--- linux-2.4.18/fs/namei.c~vfs-pdirops-2.4.18-chaos 2003-09-01 14:58:03.000000000 +0400
++++ linux-2.4.18-alexey/fs/namei.c 2003-09-01 17:56:10.000000000 +0400
+@@ -101,6 +101,36 @@ void intent_release(struct lookup_intent
+
+ }
+
++static void *lock_dir(struct inode *dir, struct qstr *name)
++{
++ unsigned long hash;
++
++ if (!IS_PDIROPS(dir)) {
++ down(&dir->i_sem);
++ return 0;
++ }
++
++ /* OK. fs understands parallel directory operations.
++ * so, we try to acquire lock for hash of requested
++ * filename in order to prevent any operations with
++ * same name in same time -bzzz */
++
++ /* calculate name hash */
++ hash = full_name_hash(name->name, name->len);
++
++ /* lock this hash */
++ return dynlock_lock(&dir->i_dcache_lock, hash, 1, GFP_ATOMIC);
++}
++
++static void unlock_dir(struct inode *dir, void *lock)
++{
++ if (!IS_PDIROPS(dir)) {
++ up(&dir->i_sem);
++ return;
++ }
++ dynlock_unlock(&dir->i_dcache_lock, lock);
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+ * checking and hopefully speeding things up, we copy filenames to the
+ * kernel data space before using them..
+@@ -302,10 +332,10 @@ static struct dentry *real_lookup(struct
+ {
+ struct dentry * result;
+ struct inode *dir = parent->d_inode;
++ void *lock;
+
+ again:
+-
+- down(&dir->i_sem);
++ lock = lock_dir(dir, name);
+ /*
+ * First re-do the cached lookup just in case it was created
+ * while we waited for the directory semaphore..
+@@ -329,7 +359,7 @@ again:
+ else
+ result = dentry;
+ }
+- up(&dir->i_sem);
++ unlock_dir(dir, lock);
+ return result;
+ }
+
+@@ -337,7 +367,7 @@ again:
+ * Uhhuh! Nasty case: the cache was re-populated while
+ * we waited on the semaphore. Need to revalidate.
+ */
+- up(&dir->i_sem);
++ unlock_dir(dir, lock);
+ if (result->d_op && result->d_op->d_revalidate) {
+ if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
+ dput(result);
+@@ -1234,13 +1264,13 @@ struct file *filp_open(const char * path
+ goto exit;
+
+ dir = nd.dentry;
+- down(&dir->d_inode->i_sem);
++ nd.lock = lock_dir(dir->d_inode, &nd.last);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
+
+ do_last:
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry)) {
+- up(&dir->d_inode->i_sem);
++ unlock_dir(dir->d_inode, nd.lock);
+ goto exit;
+ }
+
+@@ -1249,7 +1279,7 @@ do_last:
+ if (!dentry->d_inode) {
+ error = vfs_create_it(dir->d_inode, dentry,
+ mode & ~current->fs->umask, &it);
+- up(&dir->d_inode->i_sem);
++ unlock_dir(dir->d_inode, nd.lock);
+ dput(nd.dentry);
+ nd.dentry = dentry;
+ if (error)
+@@ -1264,7 +1294,7 @@ do_last:
+ /*
+ * It already exists.
+ */
+- up(&dir->d_inode->i_sem);
++ unlock_dir(dir->d_inode, nd.lock);
+
+ error = -EEXIST;
+ if (flag & O_EXCL)
+@@ -1344,7 +1374,7 @@ do_link:
+ goto exit;
+ }
+ dir = nd.dentry;
+- down(&dir->d_inode->i_sem);
++ nd.lock = lock_dir(dir->d_inode, &nd.last);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
+ putname(nd.last.name);
+ goto do_last;
+@@ -1357,7 +1387,7 @@ static struct dentry *lookup_create(stru
+ {
+ struct dentry *dentry;
+
+- down(&nd->dentry->d_inode->i_sem);
++ nd->lock = lock_dir(nd->dentry->d_inode, &nd->last);
+ dentry = ERR_PTR(-EEXIST);
+ if (nd->last_type != LAST_NORM)
+ goto fail;
+@@ -1446,7 +1476,7 @@ asmlinkage long sys_mknod(const char * f
+ }
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ path_release(&nd);
+ out:
+@@ -1509,7 +1539,7 @@ asmlinkage long sys_mkdir(const char * p
+ mode & ~current->fs->umask);
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ path_release(&nd);
+ out:
+@@ -1619,14 +1649,14 @@ asmlinkage long sys_rmdir(const char * p
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
+- down(&nd.dentry->d_inode->i_sem);
++ nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ path_release(&nd);
+ exit:
+@@ -1685,7 +1715,7 @@ asmlinkage long sys_unlink(const char *
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
+- down(&nd.dentry->d_inode->i_sem);
++ nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+@@ -1696,7 +1726,7 @@ asmlinkage long sys_unlink(const char *
+ exit2:
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ path_release(&nd);
+ exit:
+@@ -1766,7 +1796,7 @@ asmlinkage long sys_symlink(const char *
+ error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ path_release(&nd);
+ out:
+@@ -1858,7 +1888,7 @@ asmlinkage long sys_link(const char * ol
+ error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+ dput(new_dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out_release:
+ path_release(&nd);
+ out:
+--- linux-2.4.18/include/linux/fs.h~vfs-pdirops-2.4.18-chaos 2003-09-01 14:58:03.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/fs.h 2003-09-01 16:36:16.000000000 +0400
+@@ -21,6 +21,7 @@
+ #include <linux/cache.h>
+ #include <linux/stddef.h>
+ #include <linux/string.h>
++#include <linux/dynlocks.h>
+
+ #include <asm/atomic.h>
+ #include <asm/bitops.h>
+@@ -136,6 +137,7 @@ extern int leases_enable, dir_notify_ena
+ #define S_IMMUTABLE 16 /* Immutable file */
+ #define S_DEAD 32 /* removed, but still open directory */
+ #define S_NOQUOTA 64 /* Inode is not counted to quota */
++#define S_PDIROPS 256 /* Parallel directory operations */
+
+ /*
+ * Note that nosuid etc flags are inode-specific: setting some file-system
+@@ -162,6 +164,7 @@ extern int leases_enable, dir_notify_ena
+ #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
+ #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME))
+ #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME)
++#define IS_PDIROPS(inode) __IS_FLG(inode, S_PDIROPS)
+
+ #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
+
+@@ -490,6 +493,7 @@ struct inode {
+ atomic_t i_writecount;
+ unsigned int i_attr_flags;
+ __u32 i_generation;
++ struct dynlock i_dcache_lock; /* for parallel directory ops */
+ union {
+ struct minix_inode_info minix_i;
+ struct ext2_inode_info ext2_i;
+@@ -713,6 +717,7 @@ struct nameidata {
+ unsigned int flags;
+ int last_type;
+ struct lookup_intent *intent;
++ void *lock;
+ };
+
+ #define DQUOT_USR_ENABLED 0x01 /* User diskquotas enabled */
+@@ -1610,12 +1615,6 @@ static inline struct dentry *get_parent(
+ return dget(dentry->d_parent);
+ }
+
+-static inline void unlock_dir(struct dentry *dir)
+-{
+- up(&dir->d_inode->i_sem);
+- dput(dir);
+-}
+-
+ /*
+ * Whee.. Deadlock country. Happily there are only two VFS
+ * operations that does this..
+--- linux-2.4.18/fs/inode.c~vfs-pdirops-2.4.18-chaos 2003-09-01 14:58:03.000000000 +0400
++++ linux-2.4.18-alexey/fs/inode.c 2003-09-01 16:36:16.000000000 +0400
+@@ -119,6 +119,7 @@ static struct inode *alloc_inode(struct
+ mapping->host = inode;
+ mapping->gfp_mask = GFP_HIGHUSER;
+ inode->i_mapping = mapping;
++ dynlock_init(&inode->i_dcache_lock);
+ }
+ return inode;
+ }
+
+_
--- /dev/null
+ fs/inode.c | 1
+ fs/namei.c | 66 ++++++++++++++++++++++++++++++++++++++---------------
+ include/linux/fs.h | 11 ++++----
+ 3 files changed, 54 insertions(+), 24 deletions(-)
+
+Index: linux-2.4.20-rh/fs/namei.c
+===================================================================
+--- linux-2.4.20-rh.orig/fs/namei.c 2003-09-04 20:58:33.000000000 +0800
++++ linux-2.4.20-rh/fs/namei.c 2003-09-04 21:21:20.000000000 +0800
+@@ -101,6 +101,36 @@
+
+ }
+
++static void *lock_dir(struct inode *dir, struct qstr *name)
++{
++ unsigned long hash;
++
++ if (!IS_PDIROPS(dir)) {
++ down(&dir->i_sem);
++ return 0;
++ }
++
++ /* OK. fs understands parallel directory operations.
++ * so, we try to acquire lock for hash of requested
++ * filename in order to prevent any operations with
++ * same name in same time -bzzz */
++
++ /* calculate name hash */
++ hash = full_name_hash(name->name, name->len);
++
++ /* lock this hash */
++ return dynlock_lock(&dir->i_dcache_lock, hash, 1, GFP_ATOMIC);
++}
++
++static void unlock_dir(struct inode *dir, void *lock)
++{
++ if (!IS_PDIROPS(dir)) {
++ up(&dir->i_sem);
++ return;
++ }
++ dynlock_unlock(&dir->i_dcache_lock, lock);
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+ * checking and hopefully speeding things up, we copy filenames to the
+ * kernel data space before using them..
+@@ -302,10 +332,10 @@
+ {
+ struct dentry * result;
+ struct inode *dir = parent->d_inode;
++ void *lock;
+
+ again:
+-
+- down(&dir->i_sem);
++ lock = lock_dir(dir, name);
+ /*
+ * First re-do the cached lookup just in case it was created
+ * while we waited for the directory semaphore..
+@@ -329,7 +359,7 @@
+ else
+ result = dentry;
+ }
+- up(&dir->i_sem);
++ unlock_dir(dir, lock);
+ return result;
+ }
+
+@@ -337,7 +367,7 @@
+ * Uhhuh! Nasty case: the cache was re-populated while
+ * we waited on the semaphore. Need to revalidate.
+ */
+- up(&dir->i_sem);
++ unlock_dir(dir, lock);
+ if (result->d_op && result->d_op->d_revalidate) {
+ if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
+ dput(result);
+@@ -1180,13 +1210,13 @@
+ goto exit;
+
+ dir = nd->dentry;
+- down(&dir->d_inode->i_sem);
++ nd->lock = lock_dir(dir->d_inode, &nd->last);
+ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+
+ do_last:
+ error = PTR_ERR(dentry);
+ if (IS_ERR(dentry)) {
+- up(&dir->d_inode->i_sem);
++ unlock_dir(dir->d_inode, nd->lock);
+ goto exit;
+ }
+
+@@ -1195,7 +1225,7 @@
+ if (!dentry->d_inode) {
+ error = vfs_create_it(dir->d_inode, dentry,
+ mode & ~current->fs->umask, it);
+- up(&dir->d_inode->i_sem);
++ unlock_dir(dir->d_inode, nd->lock);
+ dput(nd->dentry);
+ nd->dentry = dentry;
+ if (error)
+@@ -1209,7 +1239,7 @@
+ /*
+ * It already exists.
+ */
+- up(&dir->d_inode->i_sem);
++ unlock_dir(dir->d_inode, nd->lock);
+
+ error = -EEXIST;
+ if (flag & O_EXCL)
+@@ -1362,7 +1392,7 @@
+ goto exit;
+ }
+ dir = nd->dentry;
+- down(&dir->d_inode->i_sem);
++ nd->lock = lock_dir(dir->d_inode, &nd->last);
+ dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ putname(nd->last.name);
+ goto do_last;
+@@ -1380,7 +1410,7 @@
+ {
+ struct dentry *dentry;
+
+- down(&nd->dentry->d_inode->i_sem);
++ nd->lock = lock_dir(nd->dentry->d_inode, &nd->last);
+ dentry = ERR_PTR(-EEXIST);
+ if (nd->last_type != LAST_NORM)
+ goto fail;
+@@ -1469,7 +1499,7 @@
+ }
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ path_release(&nd);
+ out:
+@@ -1532,7 +1562,7 @@
+ mode & ~current->fs->umask);
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ path_release(&nd);
+ out:
+@@ -1642,14 +1672,14 @@
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
+- down(&nd.dentry->d_inode->i_sem);
++ nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+ error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ path_release(&nd);
+ exit:
+@@ -1708,7 +1738,7 @@
+ if (error != -EOPNOTSUPP)
+ goto exit1;
+ }
+- down(&nd.dentry->d_inode->i_sem);
++ nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ error = PTR_ERR(dentry);
+ if (!IS_ERR(dentry)) {
+@@ -1719,7 +1749,7 @@
+ exit2:
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ path_release(&nd);
+ exit:
+@@ -1789,7 +1819,7 @@
+ error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+ dput(dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ path_release(&nd);
+ out:
+@@ -1881,7 +1911,7 @@
+ error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+ dput(new_dentry);
+ }
+- up(&nd.dentry->d_inode->i_sem);
++ unlock_dir(nd.dentry->d_inode, nd.lock);
+ out_release:
+ path_release(&nd);
+ out:
+Index: linux-2.4.20-rh/include/linux/fs.h
+===================================================================
+--- linux-2.4.20-rh.orig/include/linux/fs.h 2003-09-04 20:59:14.000000000 +0800
++++ linux-2.4.20-rh/include/linux/fs.h 2003-09-04 21:03:46.000000000 +0800
+@@ -21,6 +21,7 @@
+ #include <linux/cache.h>
+ #include <linux/stddef.h>
+ #include <linux/string.h>
++#include <linux/dynlocks.h>
+
+ #include <asm/atomic.h>
+ #include <asm/bitops.h>
+@@ -136,6 +137,7 @@
+ #define S_IMMUTABLE 16 /* Immutable file */
+ #define S_DEAD 32 /* removed, but still open directory */
+ #define S_NOQUOTA 64 /* Inode is not counted to quota */
++#define S_PDIROPS 256 /* Parallel directory operations */
+
+ /*
+ * Note that nosuid etc flags are inode-specific: setting some file-system
+@@ -162,6 +164,7 @@
+ #define IS_IMMUTABLE(inode) ((inode)->i_flags & S_IMMUTABLE)
+ #define IS_NOATIME(inode) (__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME))
+ #define IS_NODIRATIME(inode) __IS_FLG(inode, MS_NODIRATIME)
++#define IS_PDIROPS(inode) __IS_FLG(inode, S_PDIROPS)
+
+ #define IS_DEADDIR(inode) ((inode)->i_flags & S_DEAD)
+
+@@ -489,6 +492,7 @@
+ atomic_t i_writecount;
+ unsigned int i_attr_flags;
+ __u32 i_generation;
++ struct dynlock i_dcache_lock; /* for parallel directory ops */
+ union {
+ struct minix_inode_info minix_i;
+ struct ext2_inode_info ext2_i;
+@@ -708,6 +712,7 @@
+ unsigned int flags;
+ int last_type;
+ struct lookup_intent *intent;
++ void *lock;
+ };
+
+ /*
+@@ -1621,12 +1626,6 @@
+ return dget(dentry->d_parent);
+ }
+
+-static inline void unlock_dir(struct dentry *dir)
+-{
+- up(&dir->d_inode->i_sem);
+- dput(dir);
+-}
+-
+ /*
+ * Whee.. Deadlock country. Happily there are only two VFS
+ * operations that does this..
+Index: linux-2.4.20-rh/fs/inode.c
+===================================================================
+--- linux-2.4.20-rh.orig/fs/inode.c 2003-09-04 20:58:35.000000000 +0800
++++ linux-2.4.20-rh/fs/inode.c 2003-09-04 21:03:46.000000000 +0800
+@@ -121,6 +121,7 @@
+ mapping->host = inode;
+ mapping->gfp_mask = GFP_HIGHUSER;
+ inode->i_mapping = mapping;
++ dynlock_init(&inode->i_dcache_lock);
+ }
+ return inode;
+ }
--- /dev/null
+include/linux/dynlocks.h
+lib/dynlocks.c
+lib/Makefile
--- /dev/null
+include/linux/ext3_fs.h
--- /dev/null
+fs/ext3/balloc.c
+fs/ext3/balloc.c.orig
+fs/ext3/dir.c
+fs/ext3/dir.c.orig
+fs/ext3/ialloc.c
+fs/ext3/ialloc.c.orig
+fs/ext3/inode.c
+fs/ext3/inode.c.orig
+fs/ext3/ioctl.c
+fs/ext3/ioctl.c.orig
+fs/ext3/namei.c
+fs/ext3/namei.c.orig
+fs/ext3/super.c
+fs/ext3/super.c.orig
+fs/ext3/symlink.c
+fs/ext3/symlink.c.orig
+include/linux/ext3_fs.h
+include/linux/ext3_fs.h.orig
+include/linux/ext3_jbd.h
+include/linux/ext3_jbd.h.orig
--- /dev/null
+fs/ext3/namei.c
--- /dev/null
+fs/ext3/file.c
+fs/ext3/file.c.orig
+fs/ext3/inode.c
+fs/ext3/super.c
+include/linux/ext3_fs.h
+include/linux/ext3_fs_sb.h
--- /dev/null
+fs/ext3/extents.c
+fs/ext3/ialloc.c
+fs/ext3/inode.c
+fs/ext3/Makefile
+fs/ext3/super.c
+include/linux/ext3_fs.h
+include/linux/ext3_fs_i.h
+include/linux/ext3_fs_sb.h
--- /dev/null
+fs/ext3/ialloc.c
+fs/ext3/namei.c
+include/asm-alpha/fcntl.h
+include/asm-arm/fcntl.h
+include/asm-cris/fcntl.h
+include/asm-i386/fcntl.h
+include/asm-ia64/fcntl.h
+include/asm-m68k/fcntl.h
+include/asm-mips64/fcntl.h
+include/asm-mips/fcntl.h
+include/asm-parisc/fcntl.h
+include/asm-ppc/fcntl.h
+include/asm-s390/fcntl.h
+include/asm-s390x/fcntl.h
+include/asm-sh/fcntl.h
+include/asm-sparc64/fcntl.h
+include/asm-sparc/fcntl.h
+include/linux/ext3_fs.h
+fs/ext3/inode.c
--- /dev/null
+fs/ext3/inode.c
+fs/ext3/super.c
--- /dev/null
+fs/ext3/super.c
--- /dev/null
+fs/ext3/namei.c
+fs/ext3/super.c
+include/linux/ext3_fs.h
+include/linux/ext3_fs_i.h
+fs/ext3/inode.c
+fs/ext3/ialloc.c
--- /dev/null
+Documentation/filesystems/ext2.txt
+fs/ext3/inode.c
+fs/ext3/iopen.c
+fs/ext3/iopen.h
+fs/ext3/Makefile
+fs/ext3/namei.c
+fs/ext3/super.c
+include/linux/ext3_fs.h
--- /dev/null
+Documentation/filesystems/ext2.txt
+fs/ext3/inode.c
+fs/ext3/iopen.c
+fs/ext3/iopen.h
+fs/ext3/Makefile
+fs/ext3/namei.c
+fs/ext3/super.c
+include/linux/ext3_fs.h
--- /dev/null
+fs/ext3/ext3-exports.c
+fs/ext3/ialloc.c
+fs/ext3/inode.c
+fs/ext3/Makefile
+fs/ext3/namei.c
+fs/ext3/super.c
+fs/ext3/xattr.c
+include/linux/ext3_fs.h
+include/linux/ext3_jbd.h
+include/linux/ext3_xattr.h
+include/linux/xattr.h
--- /dev/null
+include/linux/fs.h
+mm/filemap.c
--- /dev/null
+include/linux/fs.h
+mm/filemap.c
--- /dev/null
+include/asm-um/unistd.h
--- /dev/null
+arch/um/config.release
+arch/um/defconfig
+arch/um/drivers/chan_kern.c
+arch/um/drivers/chan_user.c
+arch/um/drivers/cow.h
+arch/um/drivers/cow_kern.c
+arch/um/drivers/cow_sys.h
+arch/um/drivers/cow_user.c
+arch/um/drivers/hostaudio_kern.c
+arch/um/drivers/line.c
+arch/um/drivers/Makefile
+arch/um/drivers/mconsole_kern.c
+arch/um/drivers/mconsole_user.c
+arch/um/drivers/mmapper_kern.c
+arch/um/drivers/net_kern.c
+arch/um/drivers/port_kern.c
+arch/um/drivers/ssl.c
+arch/um/drivers/stdio_console.c
+arch/um/drivers/ubd_kern.c
+arch/um/drivers/ubd_user.c
+arch/um/drivers/xterm.c
+arch/um/drivers/xterm_kern.c
+arch/um/dyn.lds.S
+arch/um/include/irq_kern.h
+arch/um/include/kern_util.h
+arch/um/include/line.h
+arch/um/include/mconsole.h
+arch/um/include/mem.h
+arch/um/include/mem_user.h
+arch/um/include/os.h
+arch/um/include/sysdep-i386/sigcontext.h
+arch/um/include/ubd_user.h
+arch/um/include/user.h
+arch/um/include/user_util.h
+arch/um/Kconfig
+arch/um/Kconfig_block
+arch/um/Kconfig_net
+arch/um/kernel/config.c.in
+arch/um/kernel/exec_kern.c
+arch/um/kernel/init_task.c
+arch/um/kernel/irq.c
+arch/um/kernel/Makefile
+arch/um/kernel/mem.c
+arch/um/kernel/mem_user.c
+arch/um/kernel/process.c
+arch/um/kernel/process_kern.c
+arch/um/kernel/ptrace.c
+arch/um/kernel/sigio_kern.c
+arch/um/kernel/signal_kern.c
+arch/um/kernel/skas/include/mode.h
+arch/um/kernel/skas/include/uaccess.h
+arch/um/kernel/skas/Makefile
+arch/um/kernel/skas/process.c
+arch/um/kernel/skas/process_kern.c
+arch/um/kernel/skas/util/mk_ptregs.c
+arch/um/kernel/smp.c
+arch/um/kernel/syscall_kern.c
+arch/um/kernel/sys_call_table.c
+arch/um/kernel/sysrq.c
+arch/um/kernel/time.c
+arch/um/kernel/time_kern.c
+arch/um/kernel/trap_kern.c
+arch/um/kernel/trap_user.c
+arch/um/kernel/tt/include/uaccess.h
+arch/um/kernel/tt/process_kern.c
+arch/um/kernel/tt/ptproxy/proxy.c
+arch/um/kernel/tt/tracer.c
+arch/um/kernel/tt/uaccess_user.c
+arch/um/kernel/tty_log.c
+arch/um/kernel/um_arch.c
+arch/um/kernel/umid.c
+arch/um/kernel/user_util.c
+arch/um/Makefile
+arch/um/Makefile-i386
+arch/um/Makefile-skas
+arch/um/os-Linux/drivers/tuntap_user.c
+arch/um/os-Linux/file.c
+arch/um/sys-i386/bugs.c
+arch/um/sys-i386/Makefile
+arch/um/uml.lds.S
+arch/um/util/mk_constants_kern.c
+fs/hostfs/hostfs.h
+fs/hostfs/hostfs_kern.c
+fs/hostfs/hostfs_user.c
+fs/hostfs/Makefile
+fs/hppfs/hppfs_kern.c
+fs/hppfs/Makefile
+fs/Makefile
+include/asm-um/archparam-i386.h
+include/asm-um/common.lds.S
+include/asm-um/cpufeature.h
+include/asm-um/current.h
+include/asm-um/fixmap.h
+include/asm-um/irq.h
+include/asm-um/local.h
+include/asm-um/module-generic.h
+include/asm-um/module-i386.h
+include/asm-um/page.h
+include/asm-um/pgtable.h
+include/asm-um/processor-generic.h
+include/asm-um/processor-i386.h
+include/asm-um/sections.h
+include/asm-um/smp.h
+include/asm-um/system-generic.h
+include/asm-um/thread_info.h
+include/asm-um/timex.h
+include/linux/mm.h
+include/linux/proc_mm.h
+mm/Makefile
+mm/memory.c
+mm/mmap.c
+mm/mprotect.c
+mm/proc_mm.c
--- /dev/null
+fs/namei.c
+include/linux/fs.h
+fs/inode.c
jbd-dont-account-blocks-twice.patch
jbd-commit-tricks.patch
ext3-o_direct-1-2.4.18-chaos.patch
-ext3-no-write-super.patch
-jbd-ctx_switch.patch
-jbd-get_write_access.patch
+ext3-no-write-super-chaos.patch
+ext3-extents-2.4.18-chaos.patch
+ext3-extents-oflag-2.4.18-chaos.patch
--- /dev/null
+dev_read_only.patch
+exports.patch
+kmem_cache_validate.patch
+lustre_version.patch
+vfs_intent-2.4.18-18-chaos65.patch
+invalidate_show.patch
+iod-rmap-exports.patch
+export-truncate.patch
+ext3-compat-2.4.18-chaos.patch
+ext-2.4-patch-1.patch
+ext-2.4-patch-2.patch
+ext-2.4-patch-3.patch
+ext-2.4-patch-4.patch
+ext-2.4-patch-5.patch
+linux-2.4.18ea-0.8.26-2.patch
+ext3-2.4-ino_t.patch
+ext3-2.4.18-ino_sb_macro-2.patch
+ext3-orphan_lock.patch
+ext3-delete_thread-2.4.18-2.patch
+extN-misc-fixup.patch
+extN-noread.patch
+extN-wantedi.patch
+ext3-san-2.4.20.patch
+extN-2.4.18-ino_sb_fixup.patch
+ext3-map_inode_page_2.4.18.patch
+ext3-error-export.patch
+iopen-2.4.18-2.patch
+jbd-dont-account-blocks-twice.patch
+jbd-commit-tricks.patch
+ext3-o_direct-1-2.4.18-chaos.patch
+ext3-no-write-super-chaos.patch
+dynamic-locks-2.4.18-chaos.patch
+vfs-pdirops-2.4.18-chaos.patch
+ext3-pdirops-2.4.18-chaos.patch
+add_page_private.patch
--- /dev/null
+uml-patch-2.6.0-test3-1.patch
+lustre_build.patch
+lustre_version.patch
+vfs_intent_2.6.0-test1.patch
+vfs_nointent_2.6.0-test1.patch
+vfs_races_2.5.72_rev1.patch
+vfs_mntcwd_2.5.72_rev1.patch
+ext3-san-jdike-2.5.73.patch
+iopen-2.6.0.patch
+export-truncate-2.5.63.patch
+qla2xxx-v8.00.00b1-2.5.73.patch
+uml-2.6.0-fix.patch
+ext3-map_inode_page-2.6.0.patch
+removepage-2.6.0.patch
if (!fd) /* no process opened the file after an mcreate */
RETURN(rc = 0);
+#if 0
/* we might not be able to get a valid handle on this file
* again so we really want to flush our write cache.. */
if (S_ISREG(inode->i_mode) && lsm) {
memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
oa.o_valid |= OBD_MD_FLHANDLE;
- rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
+ rc = obd_close(ll_s2obdexp(sbi), &oa, lsm, NULL);
if (rc)
CERROR("inode %lu object close failed: rc = "
"%d\n", lli->lli_st_ino, rc);
}
+#endif
rc2 = llu_mdc_close(&sbi->ll_mdc_conn, inode);
if (rc2 && !rc)
{
struct obd_uuid ll_sb_uuid;
struct lustre_handle ll_mdc_conn;
- struct lustre_handle ll_osc_conn;
+ struct obd_export ll_osc_exp;
obd_id ll_rootino;
int ll_flags;
struct list_head ll_conn_chain;
down(&lli->lli_getattr_sem);
if (!test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) {
- rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
+ rc = ll_inode_getattr(inode, lsm);
if (rc == 0) {
set_bit(LLI_F_DID_GETATTR, &lli->lli_flags);
} else {
dst->o_valid |= (valid & ~OBD_MD_FLID);
}
-int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
- char *ostdata)
+static int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm)
{
struct llu_sb_info *sbi = llu_i2sbi(inode);
struct obdo oa;
oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
- if (ostdata != NULL) {
- memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE);
- oa.o_valid |= OBD_MD_FLHANDLE;
- }
-
rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
if (rc)
RETURN(rc);
llu_update_inode(*inop, body, lic.lic_lsm);
if (llu_i2info(*inop)->lli_smd) {
- rc = llu_inode_getattr(*inop, llu_i2info(*inop)->lli_smd, NULL);
+ rc = llu_inode_getattr(*inop, llu_i2info(*inop)->lli_smd);
if (rc)
_sysio_i_gone(*inop);
}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *
+ * This file is part of Lustre, http://www.sf.net/projects/lustre/
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MDC
+
+#ifdef __KERNEL__
+# include <linux/module.h>
+# include <linux/pagemap.h>
+# include <linux/miscdevice.h>
+# include <linux/init.h>
+#else
+# include <liblustre.h>
+# include <linux/obd_class.h>
+#endif
+
+#include <linux/lustre_mds.h>
+#include <linux/lustre_lite.h>
+#include <linux/lustre_dlm.h>
+#include <linux/lprocfs_status.h>
+#include "mdc_internal.h"
+
+int it_disposition(struct lookup_intent *it, int flag)
+{
+ return it->d.lustre.it_disposition & flag;
+}
+EXPORT_SYMBOL(it_disposition);
+
+void it_set_disposition(struct lookup_intent *it, int flag)
+{
+ it->d.lustre.it_disposition |= flag;
+}
+EXPORT_SYMBOL(it_set_disposition);
+
+static void mdc_fid2mdc_op_data(struct mdc_op_data *data,
+ struct ll_uctxt *ctxt,
+ struct ll_fid *f1,
+ struct ll_fid *f2,
+ const char *name,
+ int namelen,
+ int mode)
+{
+ LASSERT(data);
+ LASSERT(ctxt);
+ LASSERT(f1);
+
+ data->ctxt = *ctxt;
+ data->fid1 = *f1;
+ if (f2)
+ data->fid2 = *f2;
+ else
+ memset(&data->fid2, 0, sizeof(data->fid2));
+ data->name = name;
+ data->namelen = namelen;
+ data->create_mode = mode;
+}
+
+static int it_to_lock_mode(struct lookup_intent *it)
+{
+ /* CREAT needs to be tested before open (both could be set) */
+ if (it->it_op & IT_CREAT)
+ return LCK_PW;
+ else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
+ return LCK_PR;
+
+ LBUG();
+ RETURN(-EINVAL);
+}
+
+int it_open_error(int phase, struct lookup_intent *it)
+{
+ if (it_disposition(it, DISP_OPEN_OPEN)) {
+ if (phase == DISP_OPEN_OPEN)
+ return it->d.lustre.it_status;
+ else
+ return 0;
+ }
+
+ if (it_disposition(it, DISP_OPEN_CREATE)) {
+ if (phase == DISP_OPEN_CREATE)
+ return it->d.lustre.it_status;
+ else
+ return 0;
+ }
+
+ if (it_disposition(it, DISP_LOOKUP_EXECD)) {
+ if (phase == DISP_LOOKUP_EXECD)
+ return it->d.lustre.it_status;
+ else
+ return 0;
+ }
+
+ if (it_disposition(it, DISP_IT_EXECD)) {
+ if (phase == DISP_IT_EXECD)
+ return it->d.lustre.it_status;
+ else
+ return 0;
+ }
+ CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition,
+ it->d.lustre.it_status);
+ LBUG();
+ return 0;
+}
+EXPORT_SYMBOL(it_open_error);
+
+/* this must be called on a lockh that is known to have a referenced lock */
+void mdc_set_lock_data(__u64 *l, void *data)
+{
+ struct ldlm_lock *lock;
+ struct lustre_handle *lockh = (struct lustre_handle *)l;
+ ENTRY;
+
+ if (!*l) {
+ EXIT;
+ return;
+ }
+
+ lock = ldlm_handle2lock(lockh);
+
+ LASSERT(lock != NULL);
+ l_lock(&lock->l_resource->lr_namespace->ns_lock);
+#if !defined(LIBLUSTRE)
+ if (lock->l_data && lock->l_data != data) {
+ struct inode *new_inode = data;
+ struct inode *old_inode = lock->l_data;
+ unsigned long state = old_inode->i_state & I_FREEING;
+ CERROR("Found existing inode %p/%lu/%u state %lu in lock: "
+ "setting data to %p/%lu/%u\n", old_inode,
+ old_inode->i_ino, old_inode->i_generation, state,
+ new_inode, new_inode->i_ino, new_inode->i_generation);
+ LASSERT(state);
+ }
+#endif
+ lock->l_data = data;
+ l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+ LDLM_LOCK_PUT(lock);
+
+ EXIT;
+}
+EXPORT_SYMBOL(mdc_set_lock_data);
+
+int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid,
+ ldlm_iterator_t it, void *data)
+{
+ struct ldlm_res_id res_id = { .name = {0} };
+ ENTRY;
+
+ res_id.name[0] = fid->id;
+ res_id.name[1] = fid->generation;
+
+ ldlm_change_cbdata(class_exp2obd(exp)->obd_namespace, &res_id, it,
+ data);
+ EXIT;
+ return 0;
+}
+
+
+
+/* We always reserve enough space in the reply packet for a stripe MD, because
+ * we don't know in advance the file type. */
+int mdc_enqueue(struct obd_export *exp,
+ int lock_type,
+ struct lookup_intent *it,
+ int lock_mode,
+ struct mdc_op_data *data,
+ struct lustre_handle *lockh,
+ char *tgt,
+ int tgtlen,
+ ldlm_completion_callback cb_completion,
+ ldlm_blocking_callback cb_blocking,
+ void *cb_data)
+{
+ struct ptlrpc_request *req;
+ struct obd_device *obddev = class_exp2obd(exp);
+ struct ldlm_res_id res_id =
+ { .name = {data->fid1.id, data->fid1.generation} };
+ int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
+ int rc, flags = LDLM_FL_HAS_INTENT;
+ int repsize[4] = {sizeof(struct ldlm_reply),
+ sizeof(struct mds_body),
+ obddev->u.cli.cl_max_mds_easize,
+ obddev->u.cli.cl_max_mds_cookiesize};
+ struct ldlm_reply *dlm_rep;
+ struct ldlm_intent *lit;
+ struct ldlm_request *lockreq;
+ void *eadata;
+ unsigned long irqflags;
+ int reply_buffers = 0;
+ ENTRY;
+
+// LDLM_DEBUG_NOLOCK("mdsintent=%s,name=%s,dir=%lu",
+// ldlm_it2str(it->it_op), it_name, it_inode->i_ino);
+
+ if (it->it_op & IT_OPEN) {
+ it->it_create_mode |= S_IFREG;
+ it->it_create_mode &= ~current->fs->umask;
+
+ size[2] = sizeof(struct mds_rec_create);
+ size[3] = data->namelen + 1;
+ size[4] = obddev->u.cli.cl_max_mds_easize;
+ req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 5,
+ size, NULL);
+ if (!req)
+ RETURN(-ENOMEM);
+
+ spin_lock_irqsave (&req->rq_lock, irqflags);
+ req->rq_replay = 1;
+ spin_unlock_irqrestore (&req->rq_lock, irqflags);
+
+ /* pack the intent */
+ lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
+ lit->opc = (__u64)it->it_op;
+
+ /* pack the intended request */
+ mdc_open_pack(req, 2, data, it->it_create_mode, 0,
+ LTIME_S(CURRENT_TIME),
+ it->it_flags, tgt, tgtlen);
+ /* get ready for the reply */
+ reply_buffers = 3;
+ req->rq_replen = lustre_msg_size(3, repsize);
+ } else if (it->it_op & IT_UNLINK) {
+ size[2] = sizeof(struct mds_rec_unlink);
+ size[3] = data->namelen + 1;
+ req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 4,
+ size, NULL);
+ if (!req)
+ RETURN(-ENOMEM);
+
+ /* pack the intent */
+ lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
+ lit->opc = (__u64)it->it_op;
+
+ /* pack the intended request */
+ mdc_unlink_pack(req, 2, data);
+ /* get ready for the reply */
+ reply_buffers = 4;
+ req->rq_replen = lustre_msg_size(4, repsize);
+ } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
+ int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
+ size[2] = sizeof(struct mds_body);
+ size[3] = data->namelen + 1;
+
+ req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 4,
+ size, NULL);
+ if (!req)
+ RETURN(-ENOMEM);
+
+ /* pack the intent */
+ lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
+ lit->opc = (__u64)it->it_op;
+
+ /* pack the intended request */
+ mdc_getattr_pack(req, valid, 2, it->it_flags, data);
+ /* get ready for the reply */
+ reply_buffers = 3;
+ req->rq_replen = lustre_msg_size(3, repsize);
+ } else if (it->it_op == IT_READDIR) {
+ req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 1,
+ size, NULL);
+ if (!req)
+ RETURN(-ENOMEM);
+
+ /* get ready for the reply */
+ reply_buffers = 1;
+ req->rq_replen = lustre_msg_size(1, repsize);
+ } else {
+ LBUG();
+ RETURN(-EINVAL);
+ }
+
+ mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+ rc = ldlm_cli_enqueue(exp, req, obddev->obd_namespace, NULL, res_id,
+ lock_type, NULL, 0, lock_mode, &flags,
+ cb_completion, cb_blocking, cb_data, lockh);
+ mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+
+ /* Similarly, if we're going to replay this request, we don't want to
+ * actually get a lock, just perform the intent. */
+ if (req->rq_transno || req->rq_replay) {
+ lockreq = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*lockreq));
+ lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
+ }
+
+ /* This can go when we're sure that this can never happen */
+ LASSERT(rc != -ENOENT);
+ if (rc == ELDLM_LOCK_ABORTED) {
+ lock_mode = 0;
+ memset(lockh, 0, sizeof(*lockh));
+ rc = 0;
+ } else if (rc != 0) {
+ CERROR("ldlm_cli_enqueue: %d\n", rc);
+ LASSERT (rc < 0);
+ ptlrpc_req_finished(req);
+ RETURN(rc);
+ } else { /* rc = 0 */
+ struct ldlm_lock *lock = ldlm_handle2lock(lockh);
+ LASSERT(lock);
+
+ /* If the server gave us back a different lock mode, we should
+ * fix up our variables. */
+ if (lock->l_req_mode != lock_mode) {
+ ldlm_lock_addref(lockh, lock->l_req_mode);
+ ldlm_lock_decref(lockh, lock_mode);
+ lock_mode = lock->l_req_mode;
+ }
+
+ LDLM_LOCK_PUT(lock);
+ }
+
+ dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
+ LASSERT(dlm_rep != NULL); /* checked by ldlm_cli_enqueue() */
+ LASSERT_REPSWABBED(req, 0); /* swabbed by ldlm_cli_enqueue() */
+
+ it->d.lustre.it_disposition = (int) dlm_rep->lock_policy_res1;
+ it->d.lustre.it_status = (int) dlm_rep->lock_policy_res2;
+ it->d.lustre.it_lock_mode = lock_mode;
+ it->d.lustre.it_data = req;
+
+ /* We know what to expect, so we do any byte flipping required here */
+ LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1);
+ if (reply_buffers >= 3) {
+ struct mds_body *body;
+
+ body = lustre_swab_repbuf(req, 1, sizeof (*body),
+ lustre_swab_mds_body);
+ if (body == NULL) {
+ CERROR ("Can't swab mds_body\n");
+ RETURN (-EPROTO);
+ }
+
+ if ((body->valid & OBD_MD_FLEASIZE) != 0) {
+ void *replayea;
+ /* The eadata is opaque; just check that it is
+ * there. Eventually, obd_unpackmd() will check
+ * the contents */
+ eadata = lustre_swab_repbuf(req, 2, body->eadatasize,
+ NULL);
+ if (eadata == NULL) {
+ CERROR ("Missing/short eadata\n");
+ RETURN (-EPROTO);
+ }
+ if (it->it_op & IT_OPEN) {
+ replayea = lustre_msg_buf(req->rq_reqmsg, 4,
+ obddev->u.cli.cl_max_mds_easize);
+ LASSERT(replayea);
+ memcpy(replayea, eadata, body->eadatasize);
+ }
+ }
+ }
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(mdc_enqueue);
+
+/*
+ * This long block is all about fixing up the lock and request state
+ * so that it is correct as of the moment _before_ the operation was
+ * applied; that way, the VFS will think that everything is normal and
+ * call Lustre's regular VFS methods.
+ *
+ * If we're performing a creation, that means that unless the creation
+ * failed with EEXIST, we should fake up a negative dentry.
+ *
+ * For everything else, we want to lookup to succeed.
+ *
+ * One additional note: if CREATE or OPEN succeeded, we add an extra
+ * reference to the request because we need to keep it around until
+ * ll_create/ll_open gets called.
+ *
+ * The server will return to us, in it_disposition, an indication of
+ * exactly what d.lustre.it_status refers to.
+ *
+ * If DISP_OPEN_OPEN is set, then d.lustre.it_status refers to the open() call,
+ * otherwise if DISP_OPEN_CREATE is set, then it status is the
+ * creation failure mode. In either case, one of DISP_LOOKUP_NEG or
+ * DISP_LOOKUP_POS will be set, indicating whether the child lookup
+ * was successful.
+ *
+ * Else, if DISP_LOOKUP_EXECD then d.lustre.it_status is the rc of the
+ * child lookup.
+ */
+int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
+ struct ll_fid *pfid, const char *name, int len,
+ struct ll_fid *cfid, struct lookup_intent *it, int flags,
+ struct ptlrpc_request **reqp,
+ ldlm_blocking_callback cb_blocking)
+{
+ struct lustre_handle lockh;
+ struct ptlrpc_request *request;
+ int rc = 0;
+ struct mds_body *mds_body;
+ struct lustre_handle old_lock;
+ struct ldlm_lock *lock;
+ ENTRY;
+ LASSERT(it);
+
+ CDEBUG(D_DLMTRACE, "name: %*s in %ld, intent: %s\n", len, name,
+ (unsigned long) pfid->id, ldlm_it2str(it->it_op));
+
+ if (cfid && (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)) {
+ /* We could just return 1 immediately, but since we should only
+ * be called in revalidate_it if we already have a lock, let's
+ * verify that. */
+ struct ldlm_res_id res_id ={.name = {cfid->id,
+ cfid->generation}};
+ struct lustre_handle lockh;
+ int mode, flags = LDLM_FL_BLOCK_GRANTED;
+
+ mode = LCK_PR;
+ rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags,
+ &res_id, LDLM_PLAIN, NULL, 0, LCK_PR,
+ &lockh);
+ if (!rc) {
+ mode = LCK_PW;
+ rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags,
+ &res_id, LDLM_PLAIN, NULL, 0,
+ LCK_PW, &lockh);
+ }
+ if (rc) {
+ memcpy(&it->d.lustre.it_lock_handle, &lockh,
+ sizeof(lockh));
+ it->d.lustre.it_lock_mode = mode;
+ }
+ RETURN(rc);
+ }
+
+ /* This function may be called twice, we only once want to
+ execute the request associated with the intent. If it was
+ done already, we skip past this and use the results. */
+ if (!it_disposition(it, DISP_ENQ_COMPLETE)) {
+ struct mdc_op_data op_data;
+ mdc_fid2mdc_op_data(&op_data, uctxt, pfid, cfid, name, len, 0);
+
+ rc = mdc_enqueue(exp, LDLM_PLAIN, it, it_to_lock_mode(it),
+ &op_data, &lockh, NULL, 0, ldlm_completion_ast,
+ cb_blocking, NULL);
+ if (rc < 0)
+ RETURN(rc);
+ memcpy(&it->d.lustre.it_lock_handle, &lockh, sizeof(lockh));
+ }
+ request = *reqp = it->d.lustre.it_data;
+ LASSERT(request != NULL);
+
+ if (!it_disposition(it, DISP_IT_EXECD)) {
+ /* The server failed before it even started executing the
+ * intent, i.e. because it couldn't unpack the request. */
+ LASSERT(it->d.lustre.it_status != 0);
+ RETURN(it->d.lustre.it_status);
+ }
+ rc = it_open_error(DISP_IT_EXECD, it);
+ if (rc)
+ RETURN(rc);
+
+ mds_body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*mds_body));
+ LASSERT(mds_body != NULL); /* mdc_enqueue checked */
+ LASSERT_REPSWABBED(request, 1); /* mdc_enqueue swabbed */
+
+ /* If we were revalidating a fid/name pair, mark the intent in
+ * case we fail and get called again from lookup */
+ if (cfid != NULL) {
+ it_set_disposition(it, DISP_ENQ_COMPLETE);
+ /* Also: did we find the same inode? */
+ if (memcmp(cfid, &mds_body->fid1, sizeof(*cfid))) {
+ ptlrpc_request_addref(request);
+ RETURN(-ESTALE);
+ }
+ }
+
+ /* If we're doing an IT_OPEN which did not result in an actual
+ * successful open, then we need to remove the bit which saves
+ * this request for unconditional replay. */
+ if (it->it_op & IT_OPEN) {
+ if (!it_disposition(it, DISP_OPEN_OPEN) ||
+ it->d.lustre.it_status != 0) {
+ unsigned long flags;
+
+ spin_lock_irqsave(&request->rq_lock, flags);
+ request->rq_replay = 0;
+ spin_unlock_irqrestore(&request->rq_lock, flags);
+ }
+ }
+
+ rc = it_open_error(DISP_LOOKUP_EXECD, it);
+ if (rc)
+ RETURN(rc);
+
+ /* keep requests around for the multiple phases of the call
+ * this shows the DISP_XX must guarantee we make it into the call
+ */
+ if (it_disposition(it, DISP_OPEN_CREATE) &&
+ !it_open_error(DISP_OPEN_CREATE, it))
+ ptlrpc_request_addref(request);
+ if (it_disposition(it, DISP_OPEN_OPEN) &&
+ !it_open_error(DISP_OPEN_OPEN, it))
+ ptlrpc_request_addref(request);
+
+ if (it->it_op & IT_CREAT) {
+ /* XXX this belongs in ll_create_iit */
+ } else if (it->it_op == IT_OPEN) {
+ LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
+ } else {
+ LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP));
+ }
+
+ /* If we already have a matching lock, then cancel the new
+ * one. We have to set the data here instead of in
+ * mdc_enqueue, because we need to use the child's inode as
+ * the l_data to match, and that's not available until
+ * intent_finish has performed the iget().) */
+ lock = ldlm_handle2lock(&lockh);
+ if (lock) {
+ LDLM_DEBUG(lock, "matching against this");
+ LDLM_LOCK_PUT(lock);
+ memcpy(&old_lock, &lockh, sizeof(lockh));
+ if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
+ LDLM_PLAIN, NULL, 0, LCK_NL, &old_lock)) {
+ ldlm_lock_decref_and_cancel(&lockh,
+ it->d.lustre.it_lock_mode);
+ memcpy(&lockh, &old_lock, sizeof(old_lock));
+ memcpy(&it->d.lustre.it_lock_handle, &lockh,
+ sizeof(lockh));
+ }
+ }
+ CDEBUG(D_DENTRY, "D_IT dentry %*s intent: %s status %d disp %x rc %d\n",
+ len, name, ldlm_it2str(it->it_op), it->d.lustre.it_status,
+ it->d.lustre.it_disposition, rc);
+
+ RETURN(rc);
+}
+EXPORT_SYMBOL(mdc_intent_lock);
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * linux/fs/obdfilter/filter_io.c
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/pagemap.h> // XXX kill me soon
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/iobuf.h>
+
+#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
+#include "filter_internal.h"
+
+
+/* We should only change the file mtime (and not the ctime, like
+ * update_inode_times() in generic_file_write()) when we only change data. */
+void inode_update_time(struct inode *inode, int ctime_too)
+{
+ time_t now = CURRENT_TIME;
+ if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
+ return;
+ inode->i_mtime = now;
+ if (ctime_too)
+ inode->i_ctime = now;
+ mark_inode_dirty_sync(inode);
+}
+
+int ext3_map_inode_page(struct inode *inode, struct page *page,
+ unsigned long *blocks, int *created, int create);
+int filter_direct_io(int rw, struct inode *inode, struct kiobuf *iobuf)
+{
+ struct page *page;
+ unsigned long *b = iobuf->blocks;
+ int rc, i, create = (rw == OBD_BRW_WRITE), blocks_per_page, *created;
+ int *cr, cleanup_phase;
+ ENTRY;
+
+ blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+ if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS)
+ RETURN(-EINVAL);
+
+ OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page);
+ if (created == NULL)
+ RETURN(-ENOMEM);
+ cleanup_phase = 1;
+
+ rc = lock_kiovec(1, &iobuf, 1);
+ if (rc < 0)
+ GOTO(cleanup, rc);
+ cleanup_phase = 2;
+
+ down(&inode->i_sem);
+ cleanup_phase = 3;
+ for (i = 0, cr = created, b = iobuf->blocks; i < iobuf->nr_pages; i++){
+ page = iobuf->maplist[i];
+
+ rc = ext3_map_inode_page(inode, page, b, cr, create);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ b += blocks_per_page;
+ cr += blocks_per_page;
+ }
+ up(&inode->i_sem);
+ cleanup_phase = 2;
+
+ rc = brw_kiovec(WRITE, 1, &iobuf, inode->i_dev, iobuf->blocks,
+ 1 << inode->i_blkbits);
+ CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n",
+ iobuf->nr_pages, rc);
+ if (rc != (1 << inode->i_blkbits) * iobuf->nr_pages * blocks_per_page)
+ CERROR("short write? expected %d, wrote %d\n",
+ (1 << inode->i_blkbits) * iobuf->nr_pages *
+ blocks_per_page, rc);
+ if (rc > 0)
+ rc = 0;
+
+ EXIT;
+cleanup:
+ switch(cleanup_phase) {
+ case 3:
+ up(&inode->i_sem);
+ case 2:
+ unlock_kiovec(1, &iobuf);
+ case 1:
+ OBD_FREE(created, sizeof(*created) *
+ iobuf->nr_pages*blocks_per_page);
+ break;
+ default:
+ CERROR("corrupt cleanup_phase (%d)?\n", cleanup_phase);
+ LBUG();
+ break;
+ }
+ return rc;
+}
+
+int filter_commitrw_write(struct obd_export *exp, int objcount,
+ struct obd_ioobj *obj, int niocount,
+ struct niobuf_local *res,
+ struct obd_trans_info *oti)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct obd_run_ctxt saved;
+ struct niobuf_local *lnb;
+ struct fsfilt_objinfo fso;
+ struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, };
+ struct kiobuf *iobuf;
+ struct inode *inode = NULL;
+ int rc = 0, i, cleanup_phase = 0, err;
+ unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */
+ ENTRY;
+ LASSERT(oti != NULL);
+ LASSERT(objcount == 1);
+ LASSERT(current->journal_info == NULL);
+
+ rc = alloc_kiovec(1, &iobuf);
+ if (rc)
+ GOTO(cleanup, rc);
+ cleanup_phase = 1;
+
+#if (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,18))
+ iobuf->dovary = 0; /* this prevents corruption, not present in 2.4.20 */
+#endif
+ rc = expand_kiobuf(iobuf, obj->ioo_bufcnt);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ iobuf->offset = 0;
+ iobuf->length = PAGE_SIZE * obj->ioo_bufcnt;
+ iobuf->nr_pages = obj->ioo_bufcnt;
+
+ cleanup_phase = 1;
+ fso.fso_dentry = res->dentry;
+ fso.fso_bufcnt = obj->ioo_bufcnt;
+ inode = res->dentry->d_inode;
+
+ for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
+ loff_t this_size;
+ iobuf->maplist[i] = lnb->page;
+ /* We expect these pages to be in offset order, but we'll
+ * be forgiving */
+ this_size = lnb->offset + lnb->len;
+ if (this_size > iattr.ia_size)
+ iattr.ia_size = this_size;
+ }
+
+ push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+ cleanup_phase = 2;
+
+ oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, oti);
+ if (IS_ERR(oti->oti_handle)) {
+ rc = PTR_ERR(oti->oti_handle);
+ CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+ "error starting transaction: rc = %d\n", rc);
+ oti->oti_handle = NULL;
+ GOTO(cleanup, rc);
+ }
+
+ if (time_after(jiffies, now + 15 * HZ))
+ CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+
+ rc = filter_direct_io(OBD_BRW_WRITE, inode, iobuf);
+ if (rc == 0) {
+ down(&inode->i_sem);
+ inode_update_time(inode, 1);
+ if (iattr.ia_size > inode->i_size) {
+ CDEBUG(D_INFO, "setting i_size to "LPU64"\n",
+ iattr.ia_size);
+ fsfilt_setattr(obd, res->dentry, oti->oti_handle,
+ &iattr, 0);
+ }
+ up(&inode->i_sem);
+ }
+
+ if (time_after(jiffies, now + 15 * HZ))
+ CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+
+ rc = filter_finish_transno(exp, oti, rc);
+ err = fsfilt_commit(obd, inode, oti->oti_handle, obd_sync_filter);
+ if (err)
+ rc = err;
+ if (obd_sync_filter)
+ LASSERT(oti->oti_transno <= obd->obd_last_committed);
+ if (time_after(jiffies, now + 15 * HZ))
+ CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+
+cleanup:
+ switch (cleanup_phase) {
+ case 2:
+ pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+ LASSERT(current->journal_info == NULL);
+ case 1:
+ free_kiovec(1, &iobuf);
+ case 0:
+ for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
+ /* flip_.. gets a ref, while free_page only frees
+ * when it decrefs to 0 */
+ if (rc == 0)
+ flip_into_page_cache(inode, lnb->page);
+ __free_page(lnb->page);
+ }
+ f_dput(res->dentry);
+ }
+
+ RETURN(rc);
+}
+
+#endif
+
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * linux/fs/obdfilter/filter_io.c
+ *
+ * Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ * Author: Peter Braam <braam@clusterfs.com>
+ * Author: Andreas Dilger <adilger@clusterfs.com>
+ * Author: Phil Schwan <phil@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/pagemap.h> // XXX kill me soon
+#include <linux/version.h>
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
+#include "filter_internal.h"
+
+int ext3_map_inode_page(struct inode *inode, struct page *page,
+ unsigned long *blocks, int *created, int create);
+
+/* 512byte block min */
+#define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512)
+struct dio_request {
+ atomic_t numreqs; /* number of reqs being processed */
+ struct bio *bio_list; /* list of completed bios */
+ wait_queue_head_t wait;
+ int created[MAX_BLOCKS_PER_PAGE];
+ unsigned long blocks[MAX_BLOCKS_PER_PAGE];
+ spinlock_t lock;
+};
+
+static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
+{
+ struct dio_request *dreq = bio->bi_private;
+ unsigned long flags;
+
+ spin_lock_irqsave(&dreq->lock, flags);
+ bio->bi_private = dreq->bio_list;
+ dreq->bio_list = bio;
+ spin_unlock_irqrestore(&dreq->lock, flags);
+ if (atomic_dec_and_test(&dreq->numreqs))
+ wake_up(&dreq->wait);
+
+ return 0;
+}
+
+static int can_be_merged(struct bio *bio, sector_t sector)
+{
+ int size;
+
+ if (!bio)
+ return 0;
+
+ size = bio->bi_size >> 9;
+ return bio->bi_sector + size == sector ? 1 : 0;
+}
+
+int filter_commitrw_write(struct obd_export *exp, int objcount,
+ struct obd_ioobj *obj, int niocount,
+ struct niobuf_local *res,
+ struct obd_trans_info *oti)
+{
+ struct obd_device *obd = exp->exp_obd;
+ struct obd_run_ctxt saved;
+ struct niobuf_local *lnb;
+ struct fsfilt_objinfo fso;
+ struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, };
+ struct inode *inode = NULL;
+ int rc = 0, i, k, cleanup_phase = 0, err;
+ unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */
+ int blocks_per_page;
+ struct dio_request *dreq;
+ struct bio *bio = NULL;
+ ENTRY;
+ LASSERT(oti != NULL);
+ LASSERT(objcount == 1);
+ LASSERT(current->journal_info == NULL);
+
+ blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+ LASSERT(blocks_per_page <= MAX_BLOCKS_PER_PAGE);
+
+ OBD_ALLOC(dreq, sizeof(*dreq));
+ if (dreq == NULL)
+ RETURN(-ENOMEM);
+ dreq->bio_list = NULL;
+ init_waitqueue_head(&dreq->wait);
+ atomic_set(&dreq->numreqs, 0);
+ spin_lock_init(&dreq->lock);
+
+ cleanup_phase = 1;
+ fso.fso_dentry = res->dentry;
+ fso.fso_bufcnt = obj->ioo_bufcnt;
+ inode = res->dentry->d_inode;
+
+ push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+ cleanup_phase = 2;
+
+ oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, oti);
+ if (IS_ERR(oti->oti_handle)) {
+ rc = PTR_ERR(oti->oti_handle);
+ CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+ "error starting transaction: rc = %d\n", rc);
+ oti->oti_handle = NULL;
+ GOTO(cleanup, rc);
+ }
+
+ if (time_after(jiffies, now + 15 * HZ))
+ CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+
+ for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
+ loff_t this_size;
+ sector_t sector;
+ int offs;
+
+ /* get block number for next page */
+ rc = ext3_map_inode_page(inode, lnb->page, dreq->blocks,
+ dreq->created, 1);
+ if (rc)
+ GOTO(cleanup, rc);
+
+ for (k = 0; k < blocks_per_page; k++) {
+ sector = dreq->blocks[k] * (inode->i_sb->s_blocksize >> 9);
+ offs = k * inode->i_sb->s_blocksize;
+
+ if (!bio || !can_be_merged(bio, sector) ||
+ !bio_add_page(bio, lnb->page, lnb->len, offs)) {
+ if (bio) {
+ atomic_inc(&dreq->numreqs);
+ submit_bio(WRITE, bio);
+ bio = NULL;
+ }
+ /* allocate new bio */
+ bio = bio_alloc(GFP_NOIO, obj->ioo_bufcnt);
+ bio->bi_bdev = inode->i_sb->s_bdev;
+ bio->bi_sector = sector;
+ bio->bi_end_io = dio_complete_routine;
+ bio->bi_private = dreq;
+
+ if (!bio_add_page(bio, lnb->page, lnb->len, 0))
+ LBUG();
+ }
+ }
+
+ /* We expect these pages to be in offset order, but we'll
+ * be forgiving */
+ this_size = lnb->offset + lnb->len;
+ if (this_size > iattr.ia_size)
+ iattr.ia_size = this_size;
+ }
+ if (bio) {
+ atomic_inc(&dreq->numreqs);
+ submit_bio(WRITE, bio);
+ }
+
+ /* time to wait for I/O completion */
+ wait_event(dreq->wait, atomic_read(&dreq->numreqs) == 0);
+
+ /* free all bios */
+ while (dreq->bio_list) {
+ bio = dreq->bio_list;
+ dreq->bio_list = bio->bi_private;
+ bio_put(bio);
+ }
+
+ if (rc == 0) {
+ down(&inode->i_sem);
+ inode_update_time(inode, 1);
+ if (iattr.ia_size > inode->i_size) {
+ CDEBUG(D_INFO, "setting i_size to "LPU64"\n",
+ iattr.ia_size);
+ fsfilt_setattr(obd, res->dentry, oti->oti_handle,
+ &iattr, 0);
+ }
+ up(&inode->i_sem);
+ }
+
+ if (time_after(jiffies, now + 15 * HZ))
+ CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+
+ rc = filter_finish_transno(exp, oti, rc);
+ err = fsfilt_commit(obd, inode, oti->oti_handle, obd_sync_filter);
+ if (err)
+ rc = err;
+ if (obd_sync_filter)
+ LASSERT(oti->oti_transno <= obd->obd_last_committed);
+ if (time_after(jiffies, now + 15 * HZ))
+ CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+
+cleanup:
+ switch (cleanup_phase) {
+ case 2:
+ pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+ LASSERT(current->journal_info == NULL);
+ case 1:
+ OBD_FREE(dreq, sizeof(*dreq));
+ case 0:
+ for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
+ /* flip_.. gets a ref, while free_page only frees
+ * when it decrefs to 0 */
+ if (rc == 0)
+ flip_into_page_cache(inode, lnb->page);
+ __free_page(lnb->page);
+ }
+ f_dput(res->dentry);
+ }
+
+ RETURN(rc);
+}
--- /dev/null
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ * Author Peter Braam <braam@clusterfs.com>
+ *
+ * This file is part of Lustre, http://www.lustre.org.
+ *
+ * Lustre is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * Lustre is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Lustre; if not, write to the Free Software
+ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ * For testing and management it is treated as an obd_device,
+ * although * it does not export a full OBD method table (the
+ * requests are coming * in over the wire, so object target modules
+ * do not have a full * method table.)
+ *
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_OSC
+
+#ifdef __KERNEL__
+# include <linux/version.h>
+# include <linux/module.h>
+# include <linux/mm.h>
+# include <linux/highmem.h>
+# include <linux/lustre_dlm.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+# include <linux/workqueue.h>
+# include <linux/smp_lock.h>
+# else
+# include <linux/locks.h>
+# endif
+#else /* __KERNEL__ */
+# include <liblustre.h>
+#endif
+
+#include <linux/kp30.h>
+#include <linux/lustre_mds.h> /* for mds_objid */
+#include <linux/obd_ost.h>
+#include <linux/lustre_commit_confd.h>
+#include <linux/obd_lov.h>
+
+#ifndef __CYGWIN__
+# include <linux/ctype.h>
+# include <linux/init.h>
+#else
+# include <ctype.h>
+#endif
+
+#include <linux/lustre_ha.h>
+#include <linux/obd_support.h> /* for OBD_FAIL_CHECK */
+#include <linux/lustre_lite.h> /* for ll_i2info */
+#include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
+#include <linux/lprocfs_status.h>
+#include "osc_internal.h"
+
+struct osc_created {
+ wait_queue_head_t osccd_waitq; /* the daemon sleeps on this */
+ wait_queue_head_t osccd_ctl_waitq; /* insmod rmmod sleep on this */
+ spinlock_t osccd_lock;
+ int osccd_flags;
+ struct task_struct *osccd_thread;
+ struct list_head osccd_queue_list_head;
+ struct list_head osccd_work_list_head;
+};
+
+
+#define OSCCD_STOPPING 0x1
+#define OSCCD_STOPPED 0x2
+#define OSCCD_RUNNING 0x4
+#define OSCCD_KICKED 0x8
+#define OSCCD_PRECREATED 0x10
+
+
+static struct osc_created osc_created;
+
+static int oscc_has_objects(struct osc_creator *oscc, int count)
+{
+ int rc;
+ spin_lock(&oscc->oscc_lock);
+ rc = ((__s64)(oscc->oscc_last_id - oscc->oscc_next_id) >= count);
+ spin_unlock(&oscc->oscc_lock);
+ return rc;
+}
+
+static int oscc_precreate(struct osc_creator *oscc, struct osc_created *osccd,
+ int wait)
+{
+ int rc = 0;
+ struct l_wait_info lwi = { 0 };
+ ENTRY;
+
+ if (oscc_has_objects(oscc, oscc->oscc_kick_barrier))
+ RETURN(0);
+
+ spin_lock(&osccd->osccd_lock);
+ spin_lock(&oscc->oscc_lock);
+ if (list_empty(&oscc->oscc_list)) {
+ list_add(&oscc->oscc_list, &osccd->osccd_queue_list_head);
+ osccd->osccd_flags |= OSCCD_KICKED;
+ wake_up(&osccd->osccd_waitq);
+ }
+ spin_unlock(&oscc->oscc_lock);
+ spin_unlock(&osccd->osccd_lock);
+
+ /* an MDS using this call may time out on this. This is a
+ * recovery style wait.
+ */
+ if (wait)
+ rc = l_wait_event(oscc->oscc_waitq, oscc_has_objects(oscc, 1),
+ &lwi);
+ if (rc || !wait)
+ RETURN(rc);
+
+ spin_lock(&oscc->oscc_lock);
+ rc = oscc->oscc_status;
+ spin_unlock(&oscc->oscc_lock);
+ RETURN(rc);
+}
+
+int osc_create(struct obd_export *exp, struct obdo *oa,
+ struct lov_stripe_md **ea, struct obd_trans_info *oti)
+{
+ struct lov_stripe_md *lsm;
+ struct osc_creator *oscc = &exp->u.eu_osc_data.oed_oscc;
+ struct osc_created *osccd = oscc->oscc_osccd;
+ int try_again = 1, rc = 0;
+ ENTRY;
+
+ LASSERT(oa);
+ LASSERT(ea);
+
+ lsm = *ea;
+ if (lsm == NULL) {
+ rc = obd_alloc_memmd(exp, &lsm);
+ if (rc < 0)
+ RETURN(rc);
+ }
+
+ /* this is the special case where create removes orphans */
+ if (oa->o_valid == OBD_MD_FLFLAGS &&
+ oa->o_flags == OBD_FL_DELORPHAN) {
+ /* delete from next_id on up */
+ oa->o_valid |= OBD_MD_FLID;
+ oa->o_id = oscc->oscc_next_id;
+ if (oa->o_id == 0)
+ RETURN(0);
+ rc = osc_real_create(oscc->oscc_exp, oa, ea, NULL);
+
+ spin_lock(&osccd->osccd_lock);
+ spin_lock(&oscc->oscc_lock);
+ oscc->oscc_status = rc;
+ oscc->oscc_last_id = oscc->oscc_next_id - 1;
+ spin_unlock(&oscc->oscc_lock);
+ spin_unlock(&osccd->osccd_lock);
+
+ RETURN(rc);
+ }
+
+ while (try_again) {
+ spin_lock(&oscc->oscc_lock);
+ if (oscc->oscc_last_id >= oscc->oscc_next_id) {
+ memcpy(oa, &oscc->oscc_oa, sizeof(*oa));
+ oa->o_id = oscc->oscc_next_id;
+ lsm->lsm_object_id = oscc->oscc_next_id;
+ *ea = lsm;
+ oscc->oscc_next_id++;
+ try_again = 0;
+ }
+ spin_unlock(&oscc->oscc_lock);
+ rc = oscc_precreate(oscc, osccd, try_again);
+ }
+
+ if (rc == 0)
+ CDEBUG(D_INFO, "returning objid "LPU64"\n", lsm->lsm_object_id);
+ else if (*ea == NULL)
+ obd_free_memmd(exp, &lsm);
+ RETURN(rc);
+}
+
+void osccd_do_create(struct osc_created *osccd)
+{
+ struct list_head *tmp;
+
+ next:
+ spin_lock(&osccd->osccd_lock);
+ list_for_each (tmp, &osccd->osccd_queue_list_head) {
+ int rc;
+ struct osc_creator *oscc = list_entry(tmp, struct osc_creator,
+ oscc_list);
+ list_del_init(&oscc->oscc_list);
+ list_add(&oscc->oscc_list, &osccd->osccd_work_list_head);
+ spin_lock(&oscc->oscc_lock);
+ oscc->oscc_oa.o_id = oscc->oscc_last_id + oscc->oscc_grow_count;
+ oscc->oscc_oa.o_valid |= OBD_MD_FLID;
+ spin_unlock(&oscc->oscc_lock);
+ spin_unlock(&osccd->osccd_lock);
+
+ rc = osc_real_create(oscc->oscc_exp, &oscc->oscc_oa,
+ &oscc->oscc_ea, NULL);
+
+ /* This is not used and leaked, so might as well free
+ * it now.*/
+ if (rc == 0 && oscc->oscc_ea != NULL)
+ obd_free_memmd(oscc->oscc_exp, &oscc->oscc_ea);
+
+ spin_lock(&osccd->osccd_lock);
+ spin_lock(&oscc->oscc_lock);
+ list_del_init(&oscc->oscc_list);
+ oscc->oscc_status = rc;
+ oscc->oscc_last_id = oscc->oscc_oa.o_id;
+ spin_unlock(&oscc->oscc_lock);
+ spin_unlock(&osccd->osccd_lock);
+
+ CDEBUG(D_INFO, "preallocated through id "LPU64" (last used "
+ LPU64")\n", oscc->oscc_last_id, oscc->oscc_next_id);
+ wake_up(&oscc->oscc_waitq);
+ goto next;
+ }
+ spin_unlock(&osccd->osccd_lock);
+}
+
+static int osccd_main(void *arg)
+{
+ struct osc_created *osccd = (struct osc_created *)arg;
+ unsigned long flags;
+ ENTRY;
+
+ lock_kernel();
+ kportal_daemonize("lustre_created");
+
+ SIGNAL_MASK_LOCK(current, flags);
+ sigfillset(¤t->blocked);
+ RECALC_SIGPENDING;
+ SIGNAL_MASK_UNLOCK(current, flags);
+
+ unlock_kernel();
+
+ /* Record that the thread is running */
+ osccd->osccd_flags = OSCCD_RUNNING;
+ wake_up(&osccd->osccd_ctl_waitq);
+
+ /* And now, loop forever on requests */
+ while (1) {
+ struct l_wait_info lwi = { 0 };
+ l_wait_event(osccd->osccd_waitq,
+ osccd->osccd_flags & (OSCCD_STOPPING|OSCCD_KICKED),
+ &lwi);
+
+ spin_lock(&osccd->osccd_lock);
+ if (osccd->osccd_flags & OSCCD_STOPPING) {
+ spin_unlock(&osccd->osccd_lock);
+ EXIT;
+ break;
+ }
+ osccd->osccd_flags &= ~OSCCD_KICKED;
+ spin_unlock(&osccd->osccd_lock);
+ osccd_do_create(osccd);
+ }
+
+ osccd->osccd_thread = NULL;
+ osccd->osccd_flags = OSCCD_STOPPED;
+ wake_up(&osccd->osccd_ctl_waitq);
+ CDEBUG(D_NET, "commit callback daemon exiting %d\n", current->pid);
+ RETURN(0);
+}
+
+void oscc_init(struct lustre_handle *exph)
+{
+ struct obd_export *exp = class_conn2export(exph);
+ struct osc_export_data *oed;
+
+ if (exp == NULL)
+ return;
+
+ oed = &exp->exp_osc_data;
+ memset(oed, 0, sizeof(*oed));
+ INIT_LIST_HEAD(&oed->oed_oscc.oscc_list);
+ init_waitqueue_head(&oed->oed_oscc.oscc_waitq);
+ spin_lock_init(&oed->oed_oscc.oscc_lock);
+ oed->oed_oscc.oscc_exp = exp;
+ oed->oed_oscc.oscc_osccd = &osc_created;
+ oed->oed_oscc.oscc_kick_barrier = 50;
+ oed->oed_oscc.oscc_grow_count = 100;
+ oed->oed_oscc.oscc_initial_create_count = 100;
+
+ oed->oed_oscc.oscc_next_id = 2;
+ oed->oed_oscc.oscc_last_id = 1;
+ /* XXX the export handle should give the oscc the last object */
+ /* oed->oed_oscc.oscc_last_id = exph->....; */
+}
+
+int osccd_setup(void)
+{
+ struct osc_created *osccd = &osc_created;
+ int rc;
+ struct l_wait_info lwi = { 0 };
+ ENTRY;
+
+ INIT_LIST_HEAD(&osccd->osccd_queue_list_head);
+ INIT_LIST_HEAD(&osccd->osccd_work_list_head);
+ init_waitqueue_head(&osccd->osccd_ctl_waitq);
+ init_waitqueue_head(&osccd->osccd_waitq);
+ spin_lock_init(&osccd->osccd_lock);
+ rc = kernel_thread(osccd_main, osccd,
+ CLONE_VM | CLONE_FS | CLONE_FILES);
+ if (rc < 0) {
+ CERROR("cannot start thread\n");
+ RETURN(rc);
+ }
+ l_wait_event(osccd->osccd_ctl_waitq, osccd->osccd_flags & OSCCD_RUNNING,
+ &lwi);
+ RETURN(0);
+}
+
+int osccd_cleanup(void)
+{
+ struct osc_created *osccd = &osc_created;
+ struct l_wait_info lwi = { 0 };
+ ENTRY;
+
+ spin_lock(&osccd->osccd_lock);
+ osccd->osccd_flags = OSCCD_STOPPING;
+ spin_unlock(&osccd->osccd_lock);
+
+ wake_up(&osccd->osccd_waitq);
+ l_wait_event(osccd->osccd_ctl_waitq,
+ osccd->osccd_flags & OSCCD_STOPPED, &lwi);
+ RETURN(0);
+}
};
int ptlrpc_expire_one_request(struct ptlrpc_request *req);
-int ptlrpc_check_set(struct ptlrpc_request_set *set);
void ptlrpc_pinger_sending_on_import(struct obd_import *imp);
#endif /* PTLRPC_INTERNAL_H */
{
ptlrpc_exit_portals();
ptlrpc_cleanup_connection();
-#ifdef ENABLE_ORPHANS
llog_cleanup_commit_master(0);
-#endif
}
/* connection.c */
EXPORT_SYMBOL(ptlrpc_prep_set);
EXPORT_SYMBOL(ptlrpc_set_add_req);
+EXPORT_SYMBOL(ptlrpc_set_add_new_req);
EXPORT_SYMBOL(ptlrpc_set_destroy);
+EXPORT_SYMBOL(ptlrpc_set_next_timeout);
+EXPORT_SYMBOL(ptlrpc_check_set);
EXPORT_SYMBOL(ptlrpc_set_wait);
+EXPORT_SYMBOL(ptlrpc_expired_set);
+EXPORT_SYMBOL(ptlrpc_interrupted_set);
/* service.c */
EXPORT_SYMBOL(ptlrpc_init_svc);
EXPORT_SYMBOL(lustre_swab_obd_ioobj);
EXPORT_SYMBOL(lustre_swab_niobuf_remote);
EXPORT_SYMBOL(lustre_swab_ost_body);
+EXPORT_SYMBOL(lustre_swab_ost_last_id);
EXPORT_SYMBOL(lustre_swab_ll_fid);
EXPORT_SYMBOL(lustre_swab_mds_status_req);
EXPORT_SYMBOL(lustre_swab_mds_fileh_body);
--- /dev/null
+#!/bin/sh
+
+set -e
+
+# Skip these tests
+# 3 - bug 1852
+ALWAYS_EXCEPT="3"
+
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+LTESTDIR=${LTESTDIR:-$LUSTRE/../ltest}
+PATH=$LUSTRE/utils:$LUSTRE/tests:$PATH
+
+RLUSTRE=${RLUSTRE:-$LUSTRE}
+RPWD=${RPWD:-$PWD}
+
+XMLCONFIG="`basename $0 .sh`.xml"
+
+. $LUSTRE/tests/test-framework.sh
+
+CHECKSTAT="${CHECKSTAT:-checkstat} -v"
+
+# XXX I wish all this stuff was in some default-config.sh somewhere
+MOUNT=${MOUNT:-/mnt/lustre}
+DIR=${DIR:-$MOUNT}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
+MDSSIZE=${MDSSIZE:-100000}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
+OSTSIZE=${OSTSIZE:-100000}
+UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh}
+FSTYPE=${FSTYPE:-ext3}
+TIMEOUT=${TIMEOUT:-5}
+
+STRIPE_BYTES=65536
+STRIPES_PER_OBJ=1
+
+
+gen_config() {
+ rm -f $XMLCONFIG
+ add_facet mds
+ add_facet ost
+ add_facet client --lustre_upcall $UPCALL
+ do_lmc --add mds --node mds_facet --mds mds1 --dev $MDSDEV --size $MDSSIZE
+ do_lmc --add lov --mds mds1 --lov lov1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
+ do_lmc --add ost --lov lov1 --failover --node ost_facet --ost ost1 --dev $OSTDEV --size $OSTSIZE
+ do_lmc --add mtpt --node client_facet --path $MOUNT --mds mds1 --ost lov1
+}
+
+
+build_test_filter
+
+gen_config
+start mds --reformat $MDSLCONFARGS
+start ost --reformat $OSTLCONFARGS
+start client --gdb $CLIENTLCONFARGS
+
+mkdir -p $DIR
+
+test_0() {
+ replay_barrier ost
+ fail ost
+}
+run_test 0 "empty replay"
+
+test_1() {
+ replay_barrier ost
+ touch $DIR/$tfile
+ fail ost
+ $CHECKSTAT -t file $DIR/$tfile || return 1
+}
+run_test 1 "touch"
+
+test_2() {
+ replay_barrier ost
+ for i in `seq 10`; do
+ echo "tag-$i" > $DIR/$tfile-$i
+ done
+ fail ost
+ for i in `seq 10`; do
+ grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i"
+ done
+}
+run_test 2 "|x| 10 open(O_CREAT)s"
+
+exit 0
+
+equals_msg test complete, cleaning up
+stop client ${FORCE:=--force} $CLIENTLCONFARGS
+stop ost ${FORCE}
+stop mds ${FORCE} $MDSLCONFARGS --dump cleanup.log
+
--- /dev/null
+#!/bin/sh
+
+set -e
+
+init_test_env() {
+ export TESTSUITE=`basename $0 .sh`
+ export XMLCONFIG="${TESTSUITE}.xml"
+ export LTESTDIR=${LTESTDIR:-$LUSTRE/../ltest}
+ export PATH=$LUSTRE/utils:$LUSTRE/tests:$PATH
+
+ export RLUSTRE=${RLUSTRE:-$LUSTRE}
+ export RPWD=${RPWD:-$PWD}
+ export CHECKSTAT="${CHECKSTAT:-checkstat} -v"
+}
+
+start() {
+ facet=$1
+ shift
+ lconf --node ${facet}_facet $@ $XMLCONFIG
+}
+
+stop() {
+ facet=$1
+ shift
+ lconf --node ${facet}_facet $@ --cleanup $XMLCONFIG
+}
+
+replay_barrier() {
+ local dev=$1
+ sync
+ df $MOUNT
+ lctl --device %${dev}1 readonly
+ lctl --device %${dev}1 notransno
+ lctl mark "REPLAY BARRIER"
+}
+
+fail() {
+ local facet=$1
+ stop $facet --force --failover --nomod
+ start $facet --nomod
+ df $MOUNT
+}
+
+do_lmc() {
+ lmc -m ${XMLCONFIG} $@
+}
+
+add_facet() {
+ local facet=$1
+ shift
+ do_lmc --add node --node ${facet}_facet $@ --timeout $TIMEOUT
+ do_lmc --add net --node ${facet}_facet --nid localhost --nettype tcp
+}
+
+error() {
+ echo "${TESTSUITE}: **** FAIL:" $@
+ exit 1
+}
+
+build_test_filter() {
+ for O in $ONLY; do
+ eval ONLY_${O}=true
+ done
+ for E in $EXCEPT $ALWAYS_EXCEPT; do
+ eval EXCEPT_${E}=true
+ done
+}
+
+_basetest() {
+ echo $*
+}
+
+basetest() {
+ IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
+}
+
+run_test() {
+ export base=`basetest $1`
+ if [ ! -z "$ONLY" ]; then
+ testname=ONLY_$1
+ if [ ${!testname}x != x ]; then
+ run_one $1 "$2"
+ return $?
+ fi
+ testname=ONLY_$base
+ if [ ${!testname}x != x ]; then
+ run_one $1 "$2"
+ return $?
+ fi
+ echo -n "."
+ return 0
+ fi
+ testname=EXCEPT_$1
+ if [ ${!testname}x != x ]; then
+ echo "skipping excluded test $1"
+ return 0
+ fi
+ testname=EXCEPT_$base
+ if [ ${!testname}x != x ]; then
+ echo "skipping excluded test $1 (base $base)"
+ return 0
+ fi
+ run_one $1 "$2"
+
+ return $?
+}
+
+EQUALS="======================================================================"
+equals_msg() {
+ msg="$@"
+
+ local suffixlen=$((65 - ${#msg}))
+ printf '===== %s %.*s\n' "$msg" $suffixlen $EQUALS
+}
+
+run_one() {
+ testnum=$1
+ message=$2
+ tfile=f$base
+ tdir=d$base
+
+ # Pretty tests run faster.
+ equals_msg $testnum: $message
+
+ test_${testnum} || error "test_$testnum failed with $?"
+}