From 147e8cdf3794e8b9c60bbf11b945575d6185290d Mon Sep 17 00:00:00 2001
From: phil <phil>
Date: Tue, 9 Sep 2003 03:55:05 +0000
Subject: [PATCH] merge b_llpmd into b_devel.  the major highlights: - new I/O
 backend - new client page cache and llite/lov/osc plumbing - pre-creation of
 OST objects - most of the OBD protocol now revolves around exports, not
 obd_devices

---
 lustre/include/linux/lustre_compat25.h             |   35 +-
 .../kernel_configs/uml_2.6.0_test3.config          |  325 +
 .../patches/dynamic-locks-2.4.18-chaos.patch       |  212 +
 .../patches/dynamic-locks-2.4.20-rh.patch          |  217 +
 .../kernel_patches/patches/ext-2.4-patch-5.patch   |   15 +
 .../patches/ext3-2.4.18-ino_sb_macro-2.patch       | 1461 ++++
 .../patches/ext3-compat-2.4.18-chaos.patch         |   19 +
 .../patches/ext3-delete_thread-2.4.18-2.patch      |  478 ++
 .../patches/ext3-extents-2.4.18-chaos.patch        | 1831 ++++
 .../patches/ext3-extents-oflag-2.4.18-chaos.patch  |  291 +
 .../patches/ext3-map_inode_page-2.6.0.patch        |   76 +
 .../patches/ext3-no-write-super-chaos.patch        |   15 +
 .../patches/ext3-o_direct-1.2.4.20-rh.patch        |  197 +
 .../patches/ext3-pdirops-2.4.18-chaos.patch        | 1238 +++
 lustre/kernel_patches/patches/iopen-2.4.18-2.patch |  422 +
 lustre/kernel_patches/patches/iopen-2.6.0.patch    |  403 +
 .../patches/linux-2.4.18ea-0.8.26-2.patch          | 1775 ++++
 .../kernel_patches/patches/removepage-2.4.20.patch |   28 +
 .../kernel_patches/patches/removepage-2.6.0.patch  |   28 +
 lustre/kernel_patches/patches/uml-2.6.0-fix.patch  |   19 +
 .../patches/uml-patch-2.6.0-test3-1.patch          | 8716 ++++++++++++++++++++
 .../patches/vfs-pdirops-2.4.18-chaos.patch         |  265 +
 .../patches/vfs-pdirops-2.4.20-rh.patch            |  269 +
 .../pc/dynamic-locks-2.4.18-chaos.pc               |    3 +
 lustre/kernel_patches/pc/ext-2.4-patch-5.pc        |    1 +
 .../pc/ext3-2.4.18-ino_sb_macro-2.pc               |   20 +
 .../kernel_patches/pc/ext3-compat-2.4.18-chaos.pc  |    1 +
 .../pc/ext3-delete_thread-2.4.18-2.pc              |    6 +
 .../kernel_patches/pc/ext3-extents-2.4.18-chaos.pc |    8 +
 .../pc/ext3-extents-oflag-2.4.18-chaos.pc          |   19 +
 .../kernel_patches/pc/ext3-map_inode_page-2.6.0.pc |    2 +
 .../kernel_patches/pc/ext3-no-write-super-chaos.pc |    1 +
 .../kernel_patches/pc/ext3-pdirops-2.4.18-chaos.pc |    6 +
 lustre/kernel_patches/pc/iopen-2.4.18-2.pc         |    8 +
 lustre/kernel_patches/pc/iopen-2.6.0.pc            |    8 +
 .../kernel_patches/pc/linux-2.4.18ea-0.8.26-2.pc   |   11 +
 lustre/kernel_patches/pc/removepage-2.4.20.pc      |    2 +
 lustre/kernel_patches/pc/removepage-2.6.0.pc       |    2 +
 lustre/kernel_patches/pc/uml-2.6.0-fix.pc          |    1 +
 .../kernel_patches/pc/uml-patch-2.6.0-test3-1.pc   |  113 +
 .../kernel_patches/pc/vfs-pdirops-2.4.18-chaos.pc  |    3 +
 lustre/kernel_patches/series/chaos-2.4.18          |    6 +-
 lustre/kernel_patches/series/chaos-2.4.18-pdirops  |   35 +
 lustre/kernel_patches/series/uml_2.6.0_test3       |   14 +
 lustre/liblustre/file.c                            |    4 +-
 lustre/liblustre/llite_lib.h                       |    2 +-
 lustre/liblustre/rw.c                              |    2 +-
 lustre/liblustre/super.c                           |   10 +-
 lustre/mdc/mdc_locks.c                             |  550 ++
 lustre/obdfilter/filter_io_24.c                    |  237 +
 lustre/obdfilter/filter_io_26.c                    |  228 +
 lustre/osc/osc_create.c                            |  343 +
 lustre/ptlrpc/ptlrpc_internal.h                    |    1 -
 lustre/ptlrpc/ptlrpc_module.c                      |    8 +-
 lustre/tests/replay-ost-single.sh                  |   90 +
 lustre/tests/test-framework.sh                     |  126 +
 56 files changed, 20171 insertions(+), 35 deletions(-)
 create mode 100644 lustre/kernel_patches/kernel_configs/uml_2.6.0_test3.config
 create mode 100644 lustre/kernel_patches/patches/dynamic-locks-2.4.18-chaos.patch
 create mode 100644 lustre/kernel_patches/patches/dynamic-locks-2.4.20-rh.patch
 create mode 100644 lustre/kernel_patches/patches/ext-2.4-patch-5.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro-2.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-compat-2.4.18-chaos.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-delete_thread-2.4.18-2.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-extents-oflag-2.4.18-chaos.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-map_inode_page-2.6.0.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-no-write-super-chaos.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-o_direct-1.2.4.20-rh.patch
 create mode 100644 lustre/kernel_patches/patches/ext3-pdirops-2.4.18-chaos.patch
 create mode 100644 lustre/kernel_patches/patches/iopen-2.4.18-2.patch
 create mode 100644 lustre/kernel_patches/patches/iopen-2.6.0.patch
 create mode 100644 lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26-2.patch
 create mode 100644 lustre/kernel_patches/patches/removepage-2.4.20.patch
 create mode 100644 lustre/kernel_patches/patches/removepage-2.6.0.patch
 create mode 100644 lustre/kernel_patches/patches/uml-2.6.0-fix.patch
 create mode 100644 lustre/kernel_patches/patches/uml-patch-2.6.0-test3-1.patch
 create mode 100644 lustre/kernel_patches/patches/vfs-pdirops-2.4.18-chaos.patch
 create mode 100644 lustre/kernel_patches/patches/vfs-pdirops-2.4.20-rh.patch
 create mode 100644 lustre/kernel_patches/pc/dynamic-locks-2.4.18-chaos.pc
 create mode 100644 lustre/kernel_patches/pc/ext-2.4-patch-5.pc
 create mode 100644 lustre/kernel_patches/pc/ext3-2.4.18-ino_sb_macro-2.pc
 create mode 100644 lustre/kernel_patches/pc/ext3-compat-2.4.18-chaos.pc
 create mode 100644 lustre/kernel_patches/pc/ext3-delete_thread-2.4.18-2.pc
 create mode 100644 lustre/kernel_patches/pc/ext3-extents-2.4.18-chaos.pc
 create mode 100644 lustre/kernel_patches/pc/ext3-extents-oflag-2.4.18-chaos.pc
 create mode 100644 lustre/kernel_patches/pc/ext3-map_inode_page-2.6.0.pc
 create mode 100644 lustre/kernel_patches/pc/ext3-no-write-super-chaos.pc
 create mode 100644 lustre/kernel_patches/pc/ext3-pdirops-2.4.18-chaos.pc
 create mode 100644 lustre/kernel_patches/pc/iopen-2.4.18-2.pc
 create mode 100644 lustre/kernel_patches/pc/iopen-2.6.0.pc
 create mode 100644 lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26-2.pc
 create mode 100644 lustre/kernel_patches/pc/removepage-2.4.20.pc
 create mode 100644 lustre/kernel_patches/pc/removepage-2.6.0.pc
 create mode 100644 lustre/kernel_patches/pc/uml-2.6.0-fix.pc
 create mode 100644 lustre/kernel_patches/pc/uml-patch-2.6.0-test3-1.pc
 create mode 100644 lustre/kernel_patches/pc/vfs-pdirops-2.4.18-chaos.pc
 create mode 100644 lustre/kernel_patches/series/chaos-2.4.18-pdirops
 create mode 100644 lustre/kernel_patches/series/uml_2.6.0_test3
 create mode 100644 lustre/mdc/mdc_locks.c
 create mode 100644 lustre/obdfilter/filter_io_24.c
 create mode 100644 lustre/obdfilter/filter_io_26.c
 create mode 100644 lustre/osc/osc_create.c
 create mode 100755 lustre/tests/replay-ost-single.sh
 create mode 100644 lustre/tests/test-framework.sh
diff --git a/lustre/include/linux/lustre_compat25.h b/lustre/include/linux/lustre_compat25.h
index 96e52c4..1f26364 100644
--- a/lustre/include/linux/lustre_compat25.h
+++ b/lustre/include/linux/lustre_compat25.h
@@ -32,8 +32,11 @@
 #include <linux/portals_compat25.h>
 
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define PGCACHE_WRLOCK(mapping)          write_lock(&mapping->page_lock)
-# define PGCACHE_WRUNLOCK(mapping)        write_unlock(&mapping->page_lock)
+
+/* XXX our code should be using the 2.6 calls, not the other way around */
+#define TryLockPage(page)                TestSetPageLocked(page)
+#define filemap_fdatasync(mapping)       filemap_fdatawrite(mapping)
+#define Page_Uptodate(page)              PageUptodate(page)
 
 #define KDEVT_INIT(val)                 { .value = val }
 
@@ -46,12 +49,22 @@
 
 #define ll_vfs_create(a,b,c,d)              vfs_create(a,b,c,d)
 
+#define ll_dev_t                        dev_t
+
+#include <linux/writeback.h>
+
 #else /* 2.4.. */
 
 #define ll_vfs_create(a,b,c,d)              vfs_create(a,b,c)
 #define ll_permission(a,b,c)                permission(a,b)
-# define PGCACHE_WRLOCK(mapping)          spin_lock(&pagecache_lock)
-# define PGCACHE_WRUNLOCK(mapping)        spin_unlock(&pagecache_lock)
+
+#define ll_dev_t                        int
+
+static inline void clear_page_dirty(struct page *page)
+{
+        if (PageDirty(page))
+                ClearPageDirty(page); 
+}
 
 /* 2.5 uses hlists for some things, like the d_hash.  we'll treat them
  * as 2.5 and let macros drop back.. */
@@ -94,20 +107,6 @@ static inline void __d_drop(struct dentry *dentry)
 
 #endif /* end of 2.4 compat macros */
 
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define filemap_fdatasync(mapping)       filemap_fdatawrite(mapping)
-#endif
-
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define TryLockPage(page)                TestSetPageLocked(page)
-#endif
-
-
-#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0)
-# define Page_Uptodate(page)              PageUptodate(page)
-#endif
-
 #if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0))
 #define  rb_node_s rb_node
 #define  rb_root_s rb_root
diff --git a/lustre/kernel_patches/kernel_configs/uml_2.6.0_test3.config b/lustre/kernel_patches/kernel_configs/uml_2.6.0_test3.config
new file mode 100644
index 0000000..f933188
--- /dev/null
+++ b/lustre/kernel_patches/kernel_configs/uml_2.6.0_test3.config
@@ -0,0 +1,325 @@
+#
+# Automatically generated make config: don't edit
+#
+CONFIG_USERMODE=y
+CONFIG_MMU=y
+CONFIG_UID16=y
+CONFIG_RWSEM_GENERIC_SPINLOCK=y
+
+#
+# UML-specific options
+#
+CONFIG_MODE_TT=y
+# CONFIG_MODE_SKAS is not set
+CONFIG_NET=y
+CONFIG_BINFMT_ELF=y
+CONFIG_BINFMT_MISC=y
+CONFIG_HOSTFS=y
+# CONFIG_HPPFS is not set
+CONFIG_MCONSOLE=y
+CONFIG_MAGIC_SYSRQ=y
+# CONFIG_HOST_2G_2G is not set
+# CONFIG_UML_SMP is not set
+# CONFIG_SMP is not set
+CONFIG_NEST_LEVEL=0
+CONFIG_KERNEL_HALF_GIGS=1
+# CONFIG_HIGHMEM is not set
+# CONFIG_PROC_MM is not set
+CONFIG_KERNEL_STACK_ORDER=2
+
+#
+# Code maturity level options
+#
+CONFIG_EXPERIMENTAL=y
+
+#
+# General setup
+#
+CONFIG_SWAP=y
+CONFIG_SYSVIPC=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_SYSCTL=y
+CONFIG_LOG_BUF_SHIFT=14
+# CONFIG_EMBEDDED is not set
+CONFIG_KALLSYMS=y
+CONFIG_FUTEX=y
+CONFIG_EPOLL=y
+CONFIG_IOSCHED_AS=y
+CONFIG_IOSCHED_DEADLINE=y
+
+#
+# Loadable module support
+#
+# CONFIG_MODULES is not set
+
+#
+# Generic Driver Options
+#
+# CONFIG_FW_LOADER is not set
+
+#
+# Character Devices
+#
+CONFIG_STDIO_CONSOLE=y
+CONFIG_SSL=y
+CONFIG_FD_CHAN=y
+# CONFIG_NULL_CHAN is not set
+CONFIG_PORT_CHAN=y
+CONFIG_PTY_CHAN=y
+CONFIG_TTY_CHAN=y
+CONFIG_XTERM_CHAN=y
+CONFIG_CON_ZERO_CHAN="fd:0,fd:1"
+CONFIG_CON_CHAN="xterm"
+CONFIG_SSL_CHAN="pty"
+CONFIG_UNIX98_PTYS=y
+CONFIG_UNIX98_PTY_COUNT=256
+# CONFIG_WATCHDOG is not set
+# CONFIG_UML_SOUND is not set
+# CONFIG_SOUND is not set
+# CONFIG_HOSTAUDIO is not set
+
+#
+# Block Devices
+#
+CONFIG_BLK_DEV_UBD=y
+# CONFIG_BLK_DEV_UBD_SYNC is not set
+CONFIG_BLK_DEV_COW_COMMON=y
+CONFIG_BLK_DEV_LOOP=y
+# CONFIG_BLK_DEV_NBD is not set
+# CONFIG_BLK_DEV_RAM is not set
+# CONFIG_MMAPPER is not set
+CONFIG_NETDEVICES=y
+
+#
+# UML Network Devices
+#
+CONFIG_UML_NET=y
+CONFIG_UML_NET_ETHERTAP=y
+CONFIG_UML_NET_TUNTAP=y
+CONFIG_UML_NET_SLIP=y
+CONFIG_UML_NET_DAEMON=y
+CONFIG_UML_NET_MCAST=y
+# CONFIG_UML_NET_PCAP is not set
+# CONFIG_UML_NET_SLIRP is not set
+
+#
+# Networking support
+#
+
+#
+# Networking options
+#
+CONFIG_PACKET=y
+CONFIG_PACKET_MMAP=y
+# CONFIG_NETLINK_DEV is not set
+CONFIG_UNIX=y
+# CONFIG_NET_KEY is not set
+CONFIG_INET=y
+# CONFIG_IP_MULTICAST is not set
+# CONFIG_IP_ADVANCED_ROUTER is not set
+# CONFIG_IP_PNP is not set
+# CONFIG_NET_IPIP is not set
+# CONFIG_NET_IPGRE is not set
+# CONFIG_ARPD is not set
+# CONFIG_INET_ECN is not set
+# CONFIG_SYN_COOKIES is not set
+# CONFIG_INET_AH is not set
+# CONFIG_INET_ESP is not set
+# CONFIG_INET_IPCOMP is not set
+# CONFIG_IPV6 is not set
+# CONFIG_DECNET is not set
+# CONFIG_BRIDGE is not set
+# CONFIG_NETFILTER is not set
+# CONFIG_XFRM_USER is not set
+
+#
+# SCTP Configuration (EXPERIMENTAL)
+#
+CONFIG_IPV6_SCTP__=y
+# CONFIG_IP_SCTP is not set
+# CONFIG_ATM is not set
+# CONFIG_VLAN_8021Q is not set
+# CONFIG_LLC is not set
+# CONFIG_X25 is not set
+# CONFIG_LAPB is not set
+# CONFIG_NET_DIVERT is not set
+# CONFIG_ECONET is not set
+# CONFIG_WAN_ROUTER is not set
+# CONFIG_NET_FASTROUTE is not set
+# CONFIG_NET_HW_FLOWCONTROL is not set
+
+#
+# QoS and/or fair queueing
+#
+# CONFIG_NET_SCHED is not set
+
+#
+# Network testing
+#
+# CONFIG_NET_PKTGEN is not set
+CONFIG_DUMMY=y
+# CONFIG_BONDING is not set
+# CONFIG_EQUALIZER is not set
+CONFIG_TUN=y
+# CONFIG_ETHERTAP is not set
+
+#
+# Ethernet (10 or 100Mbit)
+#
+# CONFIG_NET_ETHERNET is not set
+
+#
+# Ethernet (1000 Mbit)
+#
+
+#
+# Ethernet (10000 Mbit)
+#
+CONFIG_PPP=y
+# CONFIG_PPP_MULTILINK is not set
+# CONFIG_PPP_FILTER is not set
+# CONFIG_PPP_ASYNC is not set
+# CONFIG_PPP_SYNC_TTY is not set
+# CONFIG_PPP_DEFLATE is not set
+# CONFIG_PPP_BSDCOMP is not set
+# CONFIG_PPPOE is not set
+CONFIG_SLIP=y
+# CONFIG_SLIP_COMPRESSED is not set
+# CONFIG_SLIP_SMART is not set
+# CONFIG_SLIP_MODE_SLIP6 is not set
+
+#
+# Wireless LAN (non-hamradio)
+#
+# CONFIG_NET_RADIO is not set
+
+#
+# Token Ring devices (depends on LLC=y)
+#
+# CONFIG_SHAPER is not set
+
+#
+# Wan interfaces
+#
+# CONFIG_WAN is not set
+
+#
+# File systems
+#
+CONFIG_EXT2_FS=y
+# CONFIG_EXT2_FS_XATTR is not set
+CONFIG_EXT3_FS=y
+CONFIG_EXT3_FS_XATTR=y
+# CONFIG_EXT3_FS_POSIX_ACL is not set
+# CONFIG_EXT3_FS_SECURITY is not set
+CONFIG_JBD=y
+# CONFIG_JBD_DEBUG is not set
+CONFIG_FS_MBCACHE=y
+# CONFIG_REISERFS_FS is not set
+# CONFIG_JFS_FS is not set
+# CONFIG_XFS_FS is not set
+# CONFIG_MINIX_FS is not set
+# CONFIG_ROMFS_FS is not set
+# CONFIG_QUOTA is not set
+# CONFIG_AUTOFS_FS is not set
+# CONFIG_AUTOFS4_FS is not set
+
+#
+# CD-ROM/DVD Filesystems
+#
+# CONFIG_ISO9660_FS is not set
+# CONFIG_UDF_FS is not set
+
+#
+# DOS/FAT/NT Filesystems
+#
+# CONFIG_FAT_FS is not set
+# CONFIG_NTFS_FS is not set
+
+#
+# Pseudo filesystems
+#
+CONFIG_PROC_FS=y
+CONFIG_DEVFS_FS=y
+CONFIG_DEVFS_MOUNT=y
+# CONFIG_DEVFS_DEBUG is not set
+CONFIG_DEVPTS_FS=y
+# CONFIG_DEVPTS_FS_XATTR is not set
+CONFIG_TMPFS=y
+CONFIG_RAMFS=y
+
+#
+# Miscellaneous filesystems
+#
+# CONFIG_ADFS_FS is not set
+# CONFIG_AFFS_FS is not set
+# CONFIG_HFS_FS is not set
+# CONFIG_BEFS_FS is not set
+# CONFIG_BFS_FS is not set
+# CONFIG_EFS_FS is not set
+# CONFIG_CRAMFS is not set
+# CONFIG_VXFS_FS is not set
+# CONFIG_HPFS_FS is not set
+# CONFIG_QNX4FS_FS is not set
+# CONFIG_SYSV_FS is not set
+# CONFIG_UFS_FS is not set
+
+#
+# Network File Systems
+#
+# CONFIG_NFS_FS is not set
+# CONFIG_NFSD is not set
+# CONFIG_EXPORTFS is not set
+# CONFIG_SMB_FS is not set
+# CONFIG_CIFS is not set
+# CONFIG_NCP_FS is not set
+# CONFIG_CODA_FS is not set
+# CONFIG_INTERMEZZO_FS is not set
+# CONFIG_AFS_FS is not set
+
+#
+# Partition Types
+#
+# CONFIG_PARTITION_ADVANCED is not set
+CONFIG_MSDOS_PARTITION=y
+
+#
+# Security options
+#
+# CONFIG_SECURITY is not set
+
+#
+# Cryptographic options
+#
+# CONFIG_CRYPTO is not set
+
+#
+# Library routines
+#
+# CONFIG_CRC32 is not set
+
+#
+# SCSI support
+#
+# CONFIG_SCSI is not set
+
+#
+# Multi-device support (RAID and LVM)
+#
+# CONFIG_MD is not set
+
+#
+# Memory Technology Devices (MTD)
+#
+# CONFIG_MTD is not set
+
+#
+# Kernel hacking
+#
+CONFIG_DEBUG_SLAB=y
+# CONFIG_DEBUG_SPINLOCK is not set
+CONFIG_DEBUG_INFO=y
+CONFIG_FRAME_POINTER=y
+CONFIG_PT_PROXY=y
+# CONFIG_GPROF is not set
+# CONFIG_GCOV is not set
diff --git a/lustre/kernel_patches/patches/dynamic-locks-2.4.18-chaos.patch b/lustre/kernel_patches/patches/dynamic-locks-2.4.18-chaos.patch
new file mode 100644
index 0000000..a1cef3e
--- /dev/null
+++ b/lustre/kernel_patches/patches/dynamic-locks-2.4.18-chaos.patch
@@ -0,0 +1,212 @@
+ include/linux/dynlocks.h |   33 ++++++++++
+ lib/Makefile             |    4 -
+ lib/dynlocks.c           |  152 +++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 187 insertions(+), 2 deletions(-)
+
+--- /dev/null	2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/include/linux/dynlocks.h	2003-09-01 16:33:25.000000000 +0400
+@@ -0,0 +1,33 @@
++#ifndef _LINUX_DYNLOCKS_H
++#define _LINUX_DYNLOCKS_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++struct dynlock_member {
++	struct list_head	dl_list;
++	unsigned long		dl_value;	/* lock value */
++	int			dl_refcount;	/* number of users */
++	int			dl_readers;
++	int			dl_writers;
++	int			dl_pid;		/* holder of the lock */
++	wait_queue_head_t	dl_wait;
++};
++
++/*
++ * lock's namespace:
++ *   - list of locks
++ *   - lock to protect this list
++ */
++struct dynlock {
++	struct list_head dl_list;
++	spinlock_t dl_list_lock;
++};
++
++void dynlock_init(struct dynlock *dl);
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp);
++void dynlock_unlock(struct dynlock *dl, void *lock);
++
++
++#endif
++
+--- /dev/null	2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/lib/dynlocks.c	2003-09-01 16:36:00.000000000 +0400
+@@ -0,0 +1,152 @@
++/*
++ * Dynamic Locks
++ *
++ * struct dynlock is lockspace
++ * one may request lock (exclusive or shared) for some value
++ * in that lockspace
++ *
++ */
++
++#include <linux/dynlocks.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++
++/*
++ * dynlock_init
++ *
++ * initialize lockspace
++ *
++ */
++void dynlock_init(struct dynlock *dl)
++{
++	spin_lock_init(&dl->dl_list_lock);
++	INIT_LIST_HEAD(&dl->dl_list);
++}
++
++/*
++ * dynlock_lock
++ *
++ * acquires lock (exclusive or shared) in specified lockspace
++ * each lock in lockspace is allocated separately, so user have
++ * to specify GFP flags.
++ * routine returns pointer to lock. this pointer is intended to
++ * be passed to dynlock_unlock
++ *
++ */
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp)
++{
++	struct dynlock_member *nhl = NULL; 
++	struct dynlock_member *hl; 
++	struct list_head *cur;
++
++repeat:
++	/* find requested lock in lockspace */
++	spin_lock(&dl->dl_list_lock);
++	list_for_each(cur, &dl->dl_list) {
++		hl = list_entry(cur, struct dynlock_member, dl_list);
++		if (hl->dl_value == value) {
++			/* lock is found */
++			if (nhl) {
++				/* someone else just allocated
++				 * lock we didn't find and just created
++				 * so, we drop our lock
++				 */
++				kfree(nhl);
++				nhl = NULL;
++			}
++			hl->dl_refcount++;
++			goto found;
++		}
++	}
++	/* lock not found */
++	if (nhl) {
++		/* we already have allocated lock. use it */
++		hl = nhl;
++		nhl = NULL;
++		list_add(&hl->dl_list, &dl->dl_list);
++		goto found;
++	}
++	spin_unlock(&dl->dl_list_lock);
++	
++	/* lock not found and we haven't allocated lock yet. allocate it */
++	nhl = kmalloc(sizeof(struct dynlock_member), gfp);
++	if (nhl == NULL)
++		return NULL;
++	nhl->dl_refcount = 1;
++	nhl->dl_value = value;
++	nhl->dl_readers = 0;
++	nhl->dl_writers = 0;
++	init_waitqueue_head(&nhl->dl_wait);
++
++	/* while lock is being allocated, someone else may allocate it
++	 * and put onto to list. check this situation
++	 */
++	goto repeat;
++
++found:
++	if (rw) {
++		/* exclusive lock: user don't want to share lock at all
++		 * NOTE: one process may take the same lock several times
++		 * this functionaly is useful for rename operations */
++		while ((hl->dl_writers && hl->dl_pid != current->pid) ||
++				hl->dl_readers) {
++			spin_unlock(&dl->dl_list_lock);
++			wait_event(hl->dl_wait,
++				hl->dl_writers == 0 && hl->dl_readers == 0);
++			spin_lock(&dl->dl_list_lock);
++		}
++		hl->dl_writers++;
++	} else {
++		/* shared lock: user do not want to share lock with writer */
++		while (hl->dl_writers) {
++			spin_unlock(&dl->dl_list_lock);
++			wait_event(hl->dl_wait, hl->dl_writers == 0);
++			spin_lock(&dl->dl_list_lock);
++		}
++		hl->dl_readers++;
++	}
++	hl->dl_pid = current->pid;
++	spin_unlock(&dl->dl_list_lock);
++
++	return hl;
++}
++
++
++/*
++ * dynlock_unlock
++ *
++ * user have to specify lockspace (dl) and pointer to lock structure
++ * returned by dynlock_lock()
++ *
++ */
++void dynlock_unlock(struct dynlock *dl, void *lock)
++{
++	struct dynlock_member *hl = lock;
++	int wakeup = 0;
++	
++	spin_lock(&dl->dl_list_lock);
++	if (hl->dl_writers) {
++		hl->dl_writers--;
++		if (hl->dl_writers == 0)
++			wakeup = 1;
++	} else {
++		hl->dl_readers--;
++		if (hl->dl_readers == 0)
++			wakeup = 1;
++	}
++	if (wakeup) {
++		hl->dl_pid = 0;
++		wake_up(&hl->dl_wait);
++	}
++	if (--(hl->dl_refcount) == 0) 
++		list_del(&hl->dl_list);
++	spin_unlock(&dl->dl_list_lock);
++	if (hl->dl_refcount == 0)
++		kfree(hl);
++}
++
++EXPORT_SYMBOL(dynlock_init);
++EXPORT_SYMBOL(dynlock_lock);
++EXPORT_SYMBOL(dynlock_unlock);
++
+--- linux-2.4.18/lib/Makefile~dynamic-locks-2.4.18-chaos	2003-08-29 11:57:40.000000000 +0400
++++ linux-2.4.18-alexey/lib/Makefile	2003-09-01 16:35:23.000000000 +0400
+@@ -8,9 +8,9 @@
+ 
+ L_TARGET := lib.a
+ 
+-export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o
++export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o dynlocks.o
+ 
+-obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o
++obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o bust_spinlocks.o rbtree.o dynlocks.o
+ 
+ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+ obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
+
+_
diff --git a/lustre/kernel_patches/patches/dynamic-locks-2.4.20-rh.patch b/lustre/kernel_patches/patches/dynamic-locks-2.4.20-rh.patch
new file mode 100644
index 0000000..59f0a3e
--- /dev/null
+++ b/lustre/kernel_patches/patches/dynamic-locks-2.4.20-rh.patch
@@ -0,0 +1,217 @@
+ include/linux/dynlocks.h |   33 ++++++++++
+ lib/Makefile             |    4 -
+ lib/dynlocks.c           |  152 +++++++++++++++++++++++++++++++++++++++++++++++
+ 3 files changed, 187 insertions(+), 2 deletions(-)
+
+Index: linux-2.4.20-rh/include/linux/dynlocks.h
+===================================================================
+--- linux-2.4.20-rh.orig/include/linux/dynlocks.h	2003-09-04 18:25:49.000000000 +0800
++++ linux-2.4.20-rh/include/linux/dynlocks.h	2003-09-04 18:25:49.000000000 +0800
+@@ -0,0 +1,33 @@
++#ifndef _LINUX_DYNLOCKS_H
++#define _LINUX_DYNLOCKS_H
++
++#include <linux/list.h>
++#include <linux/wait.h>
++
++struct dynlock_member {
++	struct list_head	dl_list;
++	unsigned long		dl_value;	/* lock value */
++	int			dl_refcount;	/* number of users */
++	int			dl_readers;
++	int			dl_writers;
++	int			dl_pid;		/* holder of the lock */
++	wait_queue_head_t	dl_wait;
++};
++
++/*
++ * lock's namespace:
++ *   - list of locks
++ *   - lock to protect this list
++ */
++struct dynlock {
++	struct list_head dl_list;
++	spinlock_t dl_list_lock;
++};
++
++void dynlock_init(struct dynlock *dl);
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp);
++void dynlock_unlock(struct dynlock *dl, void *lock);
++
++
++#endif
++
+Index: linux-2.4.20-rh/lib/dynlocks.c
+===================================================================
+--- linux-2.4.20-rh.orig/lib/dynlocks.c	2003-09-04 18:25:49.000000000 +0800
++++ linux-2.4.20-rh/lib/dynlocks.c	2003-09-04 18:25:49.000000000 +0800
+@@ -0,0 +1,152 @@
++/*
++ * Dynamic Locks
++ *
++ * struct dynlock is lockspace
++ * one may request lock (exclusive or shared) for some value
++ * in that lockspace
++ *
++ */
++
++#include <linux/dynlocks.h>
++#include <linux/module.h>
++#include <linux/slab.h>
++#include <linux/sched.h>
++
++/*
++ * dynlock_init
++ *
++ * initialize lockspace
++ *
++ */
++void dynlock_init(struct dynlock *dl)
++{
++	spin_lock_init(&dl->dl_list_lock);
++	INIT_LIST_HEAD(&dl->dl_list);
++}
++
++/*
++ * dynlock_lock
++ *
++ * acquires lock (exclusive or shared) in specified lockspace
++ * each lock in lockspace is allocated separately, so user have
++ * to specify GFP flags.
++ * routine returns pointer to lock. this pointer is intended to
++ * be passed to dynlock_unlock
++ *
++ */
++void *dynlock_lock(struct dynlock *dl, unsigned long value, int rw, int gfp)
++{
++	struct dynlock_member *nhl = NULL; 
++	struct dynlock_member *hl; 
++	struct list_head *cur;
++
++repeat:
++	/* find requested lock in lockspace */
++	spin_lock(&dl->dl_list_lock);
++	list_for_each(cur, &dl->dl_list) {
++		hl = list_entry(cur, struct dynlock_member, dl_list);
++		if (hl->dl_value == value) {
++			/* lock is found */
++			if (nhl) {
++				/* someone else just allocated
++				 * lock we didn't find and just created
++				 * so, we drop our lock
++				 */
++				kfree(nhl);
++				nhl = NULL;
++			}
++			hl->dl_refcount++;
++			goto found;
++		}
++	}
++	/* lock not found */
++	if (nhl) {
++		/* we already have allocated lock. use it */
++		hl = nhl;
++		nhl = NULL;
++		list_add(&hl->dl_list, &dl->dl_list);
++		goto found;
++	}
++	spin_unlock(&dl->dl_list_lock);
++	
++	/* lock not found and we haven't allocated lock yet. allocate it */
++	nhl = kmalloc(sizeof(struct dynlock_member), gfp);
++	if (nhl == NULL)
++		return NULL;
++	nhl->dl_refcount = 1;
++	nhl->dl_value = value;
++	nhl->dl_readers = 0;
++	nhl->dl_writers = 0;
++	init_waitqueue_head(&nhl->dl_wait);
++
++	/* while lock is being allocated, someone else may allocate it
++	 * and put onto to list. check this situation
++	 */
++	goto repeat;
++
++found:
++	if (rw) {
++		/* exclusive lock: user don't want to share lock at all
++		 * NOTE: one process may take the same lock several times
++		 * this functionaly is useful for rename operations */
++		while ((hl->dl_writers && hl->dl_pid != current->pid) ||
++				hl->dl_readers) {
++			spin_unlock(&dl->dl_list_lock);
++			wait_event(hl->dl_wait,
++				hl->dl_writers == 0 && hl->dl_readers == 0);
++			spin_lock(&dl->dl_list_lock);
++		}
++		hl->dl_writers++;
++	} else {
++		/* shared lock: user do not want to share lock with writer */
++		while (hl->dl_writers) {
++			spin_unlock(&dl->dl_list_lock);
++			wait_event(hl->dl_wait, hl->dl_writers == 0);
++			spin_lock(&dl->dl_list_lock);
++		}
++		hl->dl_readers++;
++	}
++	hl->dl_pid = current->pid;
++	spin_unlock(&dl->dl_list_lock);
++
++	return hl;
++}
++
++
++/*
++ * dynlock_unlock
++ *
++ * user have to specify lockspace (dl) and pointer to lock structure
++ * returned by dynlock_lock()
++ *
++ */
++void dynlock_unlock(struct dynlock *dl, void *lock)
++{
++	struct dynlock_member *hl = lock;
++	int wakeup = 0;
++	
++	spin_lock(&dl->dl_list_lock);
++	if (hl->dl_writers) {
++		hl->dl_writers--;
++		if (hl->dl_writers == 0)
++			wakeup = 1;
++	} else {
++		hl->dl_readers--;
++		if (hl->dl_readers == 0)
++			wakeup = 1;
++	}
++	if (wakeup) {
++		hl->dl_pid = 0;
++		wake_up(&hl->dl_wait);
++	}
++	if (--(hl->dl_refcount) == 0) 
++		list_del(&hl->dl_list);
++	spin_unlock(&dl->dl_list_lock);
++	if (hl->dl_refcount == 0)
++		kfree(hl);
++}
++
++EXPORT_SYMBOL(dynlock_init);
++EXPORT_SYMBOL(dynlock_lock);
++EXPORT_SYMBOL(dynlock_unlock);
++
+Index: linux-2.4.20-rh/lib/Makefile
+===================================================================
+--- linux-2.4.20-rh.orig/lib/Makefile	2002-11-29 07:53:15.000000000 +0800
++++ linux-2.4.20-rh/lib/Makefile	2003-09-04 18:27:26.000000000 +0800
+@@ -8,10 +8,10 @@
+ 
+ L_TARGET := lib.a
+ 
+-export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o
++export-objs := cmdline.o dec_and_lock.o rwsem-spinlock.o rwsem.o rbtree.o dynlocks.o
+ 
+ obj-y := errno.o ctype.o string.o vsprintf.o brlock.o cmdline.o \
+-	 bust_spinlocks.o rbtree.o dump_stack.o
++	 bust_spinlocks.o rbtree.o dump_stack.o dynlocks.o
+ 
+ obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
+ obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
diff --git a/lustre/kernel_patches/patches/ext-2.4-patch-5.patch b/lustre/kernel_patches/patches/ext-2.4-patch-5.patch
new file mode 100644
index 0000000..a65f6ed
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext-2.4-patch-5.patch
@@ -0,0 +1,15 @@
+ include/linux/ext3_fs.h |    1 +
+ 1 files changed, 1 insertion(+)
+
+--- linux-2.4.18/include/linux/ext3_fs.h~ext-2.4-patch-5	2003-08-29 16:53:18.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs.h	2003-09-01 11:50:37.000000000 +0400
+@@ -344,6 +344,7 @@ struct ext3_inode {
+   #define EXT3_MOUNT_WRITEBACK_DATA	0x0C00	/* No data ordering */
+ #define EXT3_MOUNT_UPDATE_JOURNAL	0x1000	/* Update the journal format */
+ #define EXT3_MOUNT_NO_UID32		0x2000  /* Disable 32-bit UIDs */
++#define EXT3_MOUNT_INDEX		0x4000  /* Enable directory index */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+
+_
diff --git a/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro-2.patch b/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro-2.patch
new file mode 100644
index 0000000..8343e54
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-2.4.18-ino_sb_macro-2.patch
@@ -0,0 +1,1461 @@
+--- ./fs/ext3/balloc.c.orig	Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/balloc.c	Tue May  7 15:35:59 2002
+@@ -46,18 +46,18 @@ struct ext3_group_desc * ext3_get_group_
+ 	unsigned long desc;
+ 	struct ext3_group_desc * gdp;
+ 
+-	if (block_group >= sb->u.ext3_sb.s_groups_count) {
++	if (block_group >= EXT3_SB(sb)->s_groups_count) {
+ 		ext3_error (sb, "ext3_get_group_desc",
+ 			    "block_group >= groups_count - "
+ 			    "block_group = %d, groups_count = %lu",
+-			    block_group, sb->u.ext3_sb.s_groups_count);
++			    block_group, EXT3_SB(sb)->s_groups_count);
+ 
+ 		return NULL;
+ 	}
+ 	
+ 	group_desc = block_group / EXT3_DESC_PER_BLOCK(sb);
+ 	desc = block_group % EXT3_DESC_PER_BLOCK(sb);
+-	if (!sb->u.ext3_sb.s_group_desc[group_desc]) {
++	if (!EXT3_SB(sb)->s_group_desc[group_desc]) {
+ 		ext3_error (sb, "ext3_get_group_desc",
+ 			    "Group descriptor not loaded - "
+ 			    "block_group = %d, group_desc = %lu, desc = %lu",
+@@ -66,9 +66,9 @@ struct ext3_group_desc * ext3_get_group_
+ 	}
+ 	
+ 	gdp = (struct ext3_group_desc *) 
+-	      sb->u.ext3_sb.s_group_desc[group_desc]->b_data;
++	      EXT3_SB(sb)->s_group_desc[group_desc]->b_data;
+ 	if (bh)
+-		*bh = sb->u.ext3_sb.s_group_desc[group_desc];
++		*bh = EXT3_SB(sb)->s_group_desc[group_desc];
+ 	return gdp + desc;
+ }
+ 
+@@ -104,8 +104,8 @@ static int read_block_bitmap (struct sup
+ 	 * this group.  The IO will be retried next time.
+ 	 */
+ error_out:
+-	sb->u.ext3_sb.s_block_bitmap_number[bitmap_nr] = block_group;
+-	sb->u.ext3_sb.s_block_bitmap[bitmap_nr] = bh;
++	EXT3_SB(sb)->s_block_bitmap_number[bitmap_nr] = block_group;
++	EXT3_SB(sb)->s_block_bitmap[bitmap_nr] = bh;
+ 	return retval;
+ }
+ 
+@@ -128,16 +128,17 @@ static int __load_block_bitmap (struct s
+ 	int i, j, retval = 0;
+ 	unsigned long block_bitmap_number;
+ 	struct buffer_head * block_bitmap;
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
+ 
+-	if (block_group >= sb->u.ext3_sb.s_groups_count)
++	if (block_group >= sbi->s_groups_count)
+ 		ext3_panic (sb, "load_block_bitmap",
+ 			    "block_group >= groups_count - "
+ 			    "block_group = %d, groups_count = %lu",
+-			    block_group, sb->u.ext3_sb.s_groups_count);
++			    block_group, EXT3_SB(sb)->s_groups_count);
+ 
+-	if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED) {
+-		if (sb->u.ext3_sb.s_block_bitmap[block_group]) {
+-			if (sb->u.ext3_sb.s_block_bitmap_number[block_group] ==
++	if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED) {
++		if (sbi->s_block_bitmap[block_group]) {
++			if (sbi->s_block_bitmap_number[block_group] ==
+ 			    block_group)
+ 				return block_group;
+ 			ext3_error (sb, "__load_block_bitmap",
+@@ -149,21 +150,20 @@ static int __load_block_bitmap (struct s
+ 		return block_group;
+ 	}
+ 
+-	for (i = 0; i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
+-		    sb->u.ext3_sb.s_block_bitmap_number[i] != block_group; i++)
++	for (i = 0; i < sbi->s_loaded_block_bitmaps &&
++		    sbi->s_block_bitmap_number[i] != block_group; i++)
+ 		;
+-	if (i < sb->u.ext3_sb.s_loaded_block_bitmaps &&
+-  	    sb->u.ext3_sb.s_block_bitmap_number[i] == block_group) {
+-		block_bitmap_number = sb->u.ext3_sb.s_block_bitmap_number[i];
+-		block_bitmap = sb->u.ext3_sb.s_block_bitmap[i];
++	if (i < sbi->s_loaded_block_bitmaps &&
++  	    sbi->s_block_bitmap_number[i] == block_group) {
++		block_bitmap_number = sbi->s_block_bitmap_number[i];
++		block_bitmap = sbi->s_block_bitmap[i];
+ 		for (j = i; j > 0; j--) {
+-			sb->u.ext3_sb.s_block_bitmap_number[j] =
+-				sb->u.ext3_sb.s_block_bitmap_number[j - 1];
+-			sb->u.ext3_sb.s_block_bitmap[j] =
+-				sb->u.ext3_sb.s_block_bitmap[j - 1];
++			sbi->s_block_bitmap_number[j] =
++				sbi->s_block_bitmap_number[j - 1];
++			sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1];
+ 		}
+-		sb->u.ext3_sb.s_block_bitmap_number[0] = block_bitmap_number;
+-		sb->u.ext3_sb.s_block_bitmap[0] = block_bitmap;
++		sbi->s_block_bitmap_number[0] = block_bitmap_number;
++		sbi->s_block_bitmap[0] = block_bitmap;
+ 
+ 		/*
+ 		 * There's still one special case here --- if block_bitmap == 0
+@@ -173,17 +173,14 @@ static int __load_block_bitmap (struct s
+ 		if (!block_bitmap)
+ 			retval = read_block_bitmap (sb, block_group, 0);
+ 	} else {
+-		if (sb->u.ext3_sb.s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
+-			sb->u.ext3_sb.s_loaded_block_bitmaps++;
++		if (sbi->s_loaded_block_bitmaps<EXT3_MAX_GROUP_LOADED)
++			sbi->s_loaded_block_bitmaps++;
+ 		else
+-			brelse (sb->u.ext3_sb.s_block_bitmap
+-					[EXT3_MAX_GROUP_LOADED - 1]);
+-		for (j = sb->u.ext3_sb.s_loaded_block_bitmaps - 1;
+-					j > 0;  j--) {
+-			sb->u.ext3_sb.s_block_bitmap_number[j] =
+-				sb->u.ext3_sb.s_block_bitmap_number[j - 1];
+-			sb->u.ext3_sb.s_block_bitmap[j] =
+-				sb->u.ext3_sb.s_block_bitmap[j - 1];
++			brelse(sbi->s_block_bitmap[EXT3_MAX_GROUP_LOADED - 1]);
++		for (j = sbi->s_loaded_block_bitmaps - 1; j > 0;  j--) {
++			sbi->s_block_bitmap_number[j] =
++				sbi->s_block_bitmap_number[j - 1];
++			sbi->s_block_bitmap[j] = sbi->s_block_bitmap[j - 1];
+ 		}
+ 		retval = read_block_bitmap (sb, block_group, 0);
+ 	}
+@@ -206,24 +203,25 @@ static int __load_block_bitmap (struct s
+ static inline int load_block_bitmap (struct super_block * sb,
+ 				     unsigned int block_group)
+ {
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
+ 	int slot;
+-	
++
+ 	/*
+ 	 * Do the lookup for the slot.  First of all, check if we're asking
+ 	 * for the same slot as last time, and did we succeed that last time?
+ 	 */
+-	if (sb->u.ext3_sb.s_loaded_block_bitmaps > 0 &&
+-	    sb->u.ext3_sb.s_block_bitmap_number[0] == block_group &&
+-	    sb->u.ext3_sb.s_block_bitmap[0]) {
++	if (sbi->s_loaded_block_bitmaps > 0 &&
++	    sbi->s_block_bitmap_number[0] == block_group &&
++	    sbi->s_block_bitmap[0]) {
+ 		return 0;
+ 	}
+ 	/*
+ 	 * Or can we do a fast lookup based on a loaded group on a filesystem
+ 	 * small enough to be mapped directly into the superblock?
+ 	 */
+-	else if (sb->u.ext3_sb.s_groups_count <= EXT3_MAX_GROUP_LOADED && 
+-		 sb->u.ext3_sb.s_block_bitmap_number[block_group]==block_group
+-			&& sb->u.ext3_sb.s_block_bitmap[block_group]) {
++	else if (sbi->s_groups_count <= EXT3_MAX_GROUP_LOADED &&
++		 sbi->s_block_bitmap_number[block_group] == block_group
++			&& sbi->s_block_bitmap[block_group]) {
+ 		slot = block_group;
+ 	}
+ 	/*
+@@ -243,7 +241,7 @@ static inline int load_block_bitmap (str
+ 	 * If it's a valid slot, we may still have cached a previous IO error,
+ 	 * in which case the bh in the superblock cache will be zero.
+ 	 */
+-	if (!sb->u.ext3_sb.s_block_bitmap[slot])
++	if (!sbi->s_block_bitmap[slot])
+ 		return -EIO;
+ 	
+ 	/*
+@@ -275,7 +273,7 @@ void ext3_free_blocks (handle_t *handle,
+ 		return;
+ 	}
+ 	lock_super (sb);
+-	es = sb->u.ext3_sb.s_es;
++	es = EXT3_SB(sb)->s_es;
+ 	if (block < le32_to_cpu(es->s_first_data_block) ||
+ 	    block + count < block ||
+ 	    (block + count) > le32_to_cpu(es->s_blocks_count)) {
+@@ -304,7 +302,7 @@ do_more:
+ 	if (bitmap_nr < 0)
+ 		goto error_return;
+ 	
+-	bitmap_bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
++	bitmap_bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
+ 	gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
+ 	if (!gdp)
+ 		goto error_return;
+@@ -330,8 +328,8 @@ do_more:
+ 	if (err)
+ 		goto error_return;
+ 
+-	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
+-	err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
++	err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+ 	if (err)
+ 		goto error_return;
+ 
+@@ -341,7 +339,7 @@
+ 		if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
+ 		    block == le32_to_cpu(gdp->bg_inode_bitmap) ||
+ 		    in_range(block, le32_to_cpu(gdp->bg_inode_table),
+-			     sb->u.ext2_sb.s_itb_per_group)) {
++			     EXT3_SB(sb)->s_itb_per_group)) {
+ 			ext3_error(sb, __FUNCTION__,
+ 				   "Freeing block in system zone - block = %lu",
+ 				   block);
+@@ -410,8 +407,8 @@ do_more:
+ 	if (!err) err = ret;
+ 
+ 	/* And the superblock */
+-	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "dirtied superblock");
+-	ret = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock");
++	ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ 	if (!err) err = ret;
+ 
+ 	if (overflow && !err) {
+@@ -564,12 +560,12 @@ int ext3_new_block (handle_t *handle, st
+ 	}
+ 
+ 	lock_super (sb);
+-	es = sb->u.ext3_sb.s_es;
++	es = EXT3_SB(sb)->s_es;
+ 	if (le32_to_cpu(es->s_free_blocks_count) <=
+ 			le32_to_cpu(es->s_r_blocks_count) &&
+-	    ((sb->u.ext3_sb.s_resuid != current->fsuid) &&
+-	     (sb->u.ext3_sb.s_resgid == 0 ||
+-	      !in_group_p (sb->u.ext3_sb.s_resgid)) && 
++	    ((EXT3_SB(sb)->s_resuid != current->fsuid) &&
++	     (EXT3_SB(sb)->s_resgid == 0 ||
++	      !in_group_p (EXT3_SB(sb)->s_resgid)) &&
+ 	     !capable(CAP_SYS_RESOURCE)))
+ 		goto out;
+ 
+@@ -598,7 +595,7 @@ int ext3_new_block (handle_t *handle, st
+ 		if (bitmap_nr < 0)
+ 			goto io_error;
+ 		
+-		bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
++		bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
+ 
+ 		ext3_debug ("goal is at %d:%d.\n", i, j);
+ 
+@@ -621,9 +618,9 @@ int ext3_new_block (handle_t *handle, st
+ 	 * Now search the rest of the groups.  We assume that 
+ 	 * i and gdp correctly point to the last group visited.
+ 	 */
+-	for (k = 0; k < sb->u.ext3_sb.s_groups_count; k++) {
++	for (k = 0; k < EXT3_SB(sb)->s_groups_count; k++) {
+ 		i++;
+-		if (i >= sb->u.ext3_sb.s_groups_count)
++		if (i >= EXT3_SB(sb)->s_groups_count)
+ 			i = 0;
+ 		gdp = ext3_get_group_desc (sb, i, &bh2);
+ 		if (!gdp) {
+@@ -635,7 +632,7 @@ int ext3_new_block (handle_t *handle, st
+ 			if (bitmap_nr < 0)
+ 				goto io_error;
+ 	
+-			bh = sb->u.ext3_sb.s_block_bitmap[bitmap_nr];
++			bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
+ 			j = find_next_usable_block(-1, bh, 
+ 						   EXT3_BLOCKS_PER_GROUP(sb));
+ 			if (j >= 0) 
+@@ -674,8 +671,8 @@ got_block:
+ 	fatal = ext3_journal_get_write_access(handle, bh2);
+ 	if (fatal) goto out;
+ 
+-	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
+-	fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
++	fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+ 	if (fatal) goto out;
+ 
+ 	tmp = j + i * EXT3_BLOCKS_PER_GROUP(sb)
+@@ -796,7 +804,7 @@ got_block:
+ 	if (!fatal) fatal = err;
+ 	
+ 	BUFFER_TRACE(bh, "journal_dirty_metadata for superblock");
+-	err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ 	if (!fatal) fatal = err;
+ 
+ 	sb->s_dirt = 1;
+@@ -829,11 +837,11 @@ unsigned long ext3_count_free_blocks (st
+ 	int i;
+ 	
+ 	lock_super (sb);
+-	es = sb->u.ext3_sb.s_es;
++	es = EXT3_SB(sb)->s_es;
+ 	desc_count = 0;
+ 	bitmap_count = 0;
+ 	gdp = NULL;
+-	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
++	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+ 		gdp = ext3_get_group_desc (sb, i, NULL);
+ 		if (!gdp)
+ 			continue;
+@@ -842,7 +850,7 @@ unsigned long ext3_count_free_blocks (st
+ 		if (bitmap_nr < 0)
+ 			continue;
+ 		
+-		x = ext3_count_free (sb->u.ext3_sb.s_block_bitmap[bitmap_nr],
++		x = ext3_count_free (EXT3_SB(sb)->s_block_bitmap[bitmap_nr],
+ 				     sb->s_blocksize);
+ 		printk ("group %d: stored = %d, counted = %lu\n",
+ 			i, le16_to_cpu(gdp->bg_free_blocks_count), x);
+@@ -853,7 +861,7 @@ unsigned long ext3_count_free_blocks (st
+ 	unlock_super (sb);
+ 	return bitmap_count;
+ #else
+-	return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_blocks_count);
++	return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count);
+ #endif
+ }
+ 
+@@ -862,7 +870,7 @@ static inline int block_in_use (unsigned
+ 				unsigned char * map)
+ {
+ 	return ext3_test_bit ((block -
+-		le32_to_cpu(sb->u.ext3_sb.s_es->s_first_data_block)) %
++		le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block)) %
+ 			 EXT3_BLOCKS_PER_GROUP(sb), map);
+ }
+ 
+@@ -930,11 +938,11 @@ void ext3_check_blocks_bitmap (struct su
+ 	struct ext3_group_desc * gdp;
+ 	int i;
+ 
+-	es = sb->u.ext3_sb.s_es;
++	es = EXT3_SB(sb)->s_es;
+ 	desc_count = 0;
+ 	bitmap_count = 0;
+ 	gdp = NULL;
+-	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
++	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+ 		gdp = ext3_get_group_desc (sb, i, NULL);
+ 		if (!gdp)
+ 			continue;
+@@ -968,7 +976,7 @@ void ext3_check_blocks_bitmap (struct su
+ 				    "Inode bitmap for group %d is marked free",
+ 				    i);
+ 
+-		for (j = 0; j < sb->u.ext3_sb.s_itb_per_group; j++)
++		for (j = 0; j < EXT3_SB(sb)->s_itb_per_group; j++)
+ 			if (!block_in_use (le32_to_cpu(gdp->bg_inode_table) + j,
+ 							sb, bh->b_data))
+ 				ext3_error (sb, "ext3_check_blocks_bitmap",
+--- ./fs/ext3/dir.c.orig	Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/dir.c	Tue May  7 14:54:13 2002
+@@ -52,7 +52,7 @@ int ext3_check_dir_entry (const char * f
+ 	else if (((char *) de - bh->b_data) + rlen > dir->i_sb->s_blocksize)
+ 		error_msg = "directory entry across blocks";
+ 	else if (le32_to_cpu(de->inode) >
+-			le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
++			le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
+ 		error_msg = "inode out of bounds";
+ 
+ 	if (error_msg != NULL)
+--- ./fs/ext3/ialloc.c.orig	Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/ialloc.c	Tue May  7 15:39:26 2002
+@@ -73,8 +73,8 @@ static int read_inode_bitmap (struct sup
+ 	 * this group.  The IO will be retried next time.
+ 	 */
+ error_out:
+-	sb->u.ext3_sb.s_inode_bitmap_number[bitmap_nr] = block_group;
+-	sb->u.ext3_sb.s_inode_bitmap[bitmap_nr] = bh;
++	EXT3_SB(sb)->s_inode_bitmap_number[bitmap_nr] = block_group;
++	EXT3_SB(sb)->s_inode_bitmap[bitmap_nr] = bh;
+ 	return retval;
+ }
+ 
+@@ -225,7 +225,7 @@ void ext3_free_inode (handle_t *handle, 
+ 	clear_inode (inode);
+ 
+ 	lock_super (sb);
+-	es = sb->u.ext3_sb.s_es;
++	es = EXT3_SB(sb)->s_es;
+ 	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
+ 		ext3_error (sb, "ext3_free_inode",
+ 			    "reserved or nonexistent inode %lu", ino);
+@@ -237,7 +237,7 @@ void ext3_free_inode (handle_t *handle, 
+ 	if (bitmap_nr < 0)
+ 		goto error_return;
+ 
+-	bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
++	bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr];
+ 
+ 	BUFFER_TRACE(bh, "get_write_access");
+ 	fatal = ext3_journal_get_write_access(handle, bh);
+@@ -255,8 +255,8 @@ void ext3_free_inode (handle_t *handle, 
+ 		fatal = ext3_journal_get_write_access(handle, bh2);
+ 		if (fatal) goto error_return;
+ 
+-		BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get write access");
+-		fatal = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++		BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access");
++		fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+ 		if (fatal) goto error_return;
+ 
+ 		if (gdp) {
+@@ -271,9 +271,9 @@ void ext3_free_inode (handle_t *handle, 
+ 		if (!fatal) fatal = err;
+ 		es->s_free_inodes_count =
+ 			cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
+-		BUFFER_TRACE(sb->u.ext3_sb.s_sbh,
++		BUFFER_TRACE(EXT3_SB(sb)->s_sbh,
+ 					"call ext3_journal_dirty_metadata");
+-		err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++		err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ 		if (!fatal) fatal = err;
+ 	}
+ 	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+@@ -305,6 +305,8 @@ struct inode * ext3_new_inode (handle_t 
+ 	int i, j, avefreei;
+ 	struct inode * inode;
+ 	int bitmap_nr;
++	struct ext3_inode_info *ei;
++	struct ext3_sb_info *sbi;
+ 	struct ext3_group_desc * gdp;
+ 	struct ext3_group_desc * tmp;
+ 	struct ext3_super_block * es;
+@@ -318,7 +320,9 @@ struct inode * ext3_new_inode (handle_t 
+ 	inode = new_inode(sb);
+ 	if (!inode)
+ 		return ERR_PTR(-ENOMEM);
+-	init_rwsem(&inode->u.ext3_i.truncate_sem);
++	sbi = EXT3_SB(sb);
++	ei = EXT3_I(inode);
++	init_rwsem(&ei->truncate_sem);
+ 
+ 	lock_super (sb);
+ 	es = sb->u.ext3_sb.s_es;
+@@ -328,9 +332,9 @@ struct inode * ext3_new_inode (handle_t 
+ 
+ 	if (S_ISDIR(mode)) {
+ 		avefreei = le32_to_cpu(es->s_free_inodes_count) /
+-			sb->u.ext3_sb.s_groups_count;
++			sbi->s_groups_count;
+ 		if (!gdp) {
+-			for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
++			for (j = 0; j < sbi->s_groups_count; j++) {
+ 				struct buffer_head *temp_buffer;
+ 				tmp = ext3_get_group_desc (sb, j, &temp_buffer);
+ 				if (tmp &&
+@@ -350,7 +354,7 @@ repeat:
+ 		/*
+ 		 * Try to place the inode in its parent directory
+ 		 */
+-		i = dir->u.ext3_i.i_block_group;
++		i = EXT3_I(dir)->i_block_group;
+ 		tmp = ext3_get_group_desc (sb, i, &bh2);
+ 		if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
+ 			gdp = tmp;
+@@ -360,10 +364,10 @@ repeat:
+ 			 * Use a quadratic hash to find a group with a
+ 			 * free inode
+ 			 */
+-			for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
++			for (j = 1; j < sbi->s_groups_count; j <<= 1) {
+ 				i += j;
+-				if (i >= sb->u.ext3_sb.s_groups_count)
+-					i -= sb->u.ext3_sb.s_groups_count;
++				if (i >= sbi->s_groups_count)
++					i -= sbi->s_groups_count;
+ 				tmp = ext3_get_group_desc (sb, i, &bh2);
+ 				if (tmp &&
+ 				    le16_to_cpu(tmp->bg_free_inodes_count)) {
+@@ -376,9 +380,9 @@ repeat:
+ 			/*
+ 			 * That failed: try linear search for a free inode
+ 			 */
+-			i = dir->u.ext3_i.i_block_group + 1;
+-			for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
+-				if (++i >= sb->u.ext3_sb.s_groups_count)
++			i = EXT3_I(dir)->i_block_group + 1;
++			for (j = 2; j < sbi->s_groups_count; j++) {
++				if (++i >= sbi->s_groups_count)
+ 					i = 0;
+ 				tmp = ext3_get_group_desc (sb, i, &bh2);
+ 				if (tmp &&
+@@ -399,11 +403,11 @@ repeat:
+ 	if (bitmap_nr < 0)
+ 		goto fail;
+ 
+-	bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
++	bh = sbi->s_inode_bitmap[bitmap_nr];
+ 
+ 	if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data,
+-				      EXT3_INODES_PER_GROUP(sb))) <
+-	    EXT3_INODES_PER_GROUP(sb)) {
++				      sbi->s_inodes_per_group)) <
++	    sbi->s_inodes_per_group) {
+ 		BUFFER_TRACE(bh, "get_write_access");
+ 		err = ext3_journal_get_write_access(handle, bh);
+ 		if (err) goto fail;
+@@ -457,13 +461,13 @@ repeat:
+ 	err = ext3_journal_dirty_metadata(handle, bh2);
+ 	if (err) goto fail;
+ 	
+-	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
+-	err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++	BUFFER_TRACE(sbi->s_sbh, "get_write_access");
++	err = ext3_journal_get_write_access(handle, sbi->s_sbh);
+ 	if (err) goto fail;
+ 	es->s_free_inodes_count =
+ 		cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
+-	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "call ext3_journal_dirty_metadata");
+-	err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++	BUFFER_TRACE(sbi->s_sbh, "call ext3_journal_dirty_metadata");
++	err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+ 	sb->s_dirt = 1;
+ 	if (err) goto fail;
+ 
+@@ -483,31 +487,31 @@ repeat:
+ 	inode->i_blksize = PAGE_SIZE;
+ 	inode->i_blocks = 0;
+ 	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+-	inode->u.ext3_i.i_flags = dir->u.ext3_i.i_flags & ~EXT3_INDEX_FL;
++	ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL;
+ 	if (S_ISLNK(mode))
+-		inode->u.ext3_i.i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
++		ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL);
+ #ifdef EXT3_FRAGMENTS
+-	inode->u.ext3_i.i_faddr = 0;
+-	inode->u.ext3_i.i_frag_no = 0;
+-	inode->u.ext3_i.i_frag_size = 0;
++	ei->i_faddr = 0;
++	ei->i_frag_no = 0;
++	ei->i_frag_size = 0;
+ #endif
+-	inode->u.ext3_i.i_file_acl = 0;
+-	inode->u.ext3_i.i_dir_acl = 0;
+-	inode->u.ext3_i.i_dtime = 0;
+-	INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
++	ei->i_file_acl = 0;
++	ei->i_dir_acl = 0;
++	ei->i_dtime = 0;
++	INIT_LIST_HEAD(&ei->i_orphan);
+ #ifdef EXT3_PREALLOCATE
+-	inode->u.ext3_i.i_prealloc_count = 0;
++	ei->i_prealloc_count = 0;
+ #endif
+-	inode->u.ext3_i.i_block_group = i;
++	ei->i_block_group = i;
+ 	
+-	if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL)
++	if (ei->i_flags & EXT3_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+ 	if (IS_SYNC(inode))
+ 		handle->h_sync = 1;
+ 	insert_inode_hash(inode);
+-	inode->i_generation = sb->u.ext3_sb.s_next_generation++;
++	inode->i_generation = sbi->s_next_generation++;
+ 
+-	inode->u.ext3_i.i_state = EXT3_STATE_NEW;
++	ei->i_state = EXT3_STATE_NEW;
+ 	err = ext3_mark_inode_dirty(handle, inode);
+ 	if (err) goto fail;
+ 	
+@@ -585,19 +589,19 @@ struct inode *ext3_orphan_get (struct su
+ 
+ unsigned long ext3_count_free_inodes (struct super_block * sb)
+ {
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
++	struct ext3_super_block *es = sbi->s_es;
+ #ifdef EXT3FS_DEBUG
+-	struct ext3_super_block * es;
+ 	unsigned long desc_count, bitmap_count, x;
+ 	int bitmap_nr;
+ 	struct ext3_group_desc * gdp;
+ 	int i;
+ 
+ 	lock_super (sb);
+-	es = sb->u.ext3_sb.s_es;
+ 	desc_count = 0;
+ 	bitmap_count = 0;
+ 	gdp = NULL;
+-	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
++	for (i = 0; i < sbi->s_groups_count; i++) {
+ 		gdp = ext3_get_group_desc (sb, i, NULL);
+ 		if (!gdp)
+ 			continue;
+@@ -606,8 +610,8 @@ unsigned long ext3_count_free_inodes (st
+ 		if (bitmap_nr < 0)
+ 			continue;
+ 
+-		x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
+-				     EXT3_INODES_PER_GROUP(sb) / 8);
++		x = ext3_count_free(sbi->s_inode_bitmap[bitmap_nr],
++				    sbi->s_inodes_per_group / 8);
+ 		printk ("group %d: stored = %d, counted = %lu\n",
+ 			i, le16_to_cpu(gdp->bg_free_inodes_count), x);
+ 		bitmap_count += x;
+@@ -617,7 +621,7 @@ unsigned long ext3_count_free_inodes (st
+ 	unlock_super (sb);
+ 	return desc_count;
+ #else
+-	return le32_to_cpu(sb->u.ext3_sb.s_es->s_free_inodes_count);
++	return le32_to_cpu(es->s_free_inodes_count);
+ #endif
+ }
+ 
+@@ -626,16 +630,18 @@ unsigned long ext3_count_free_inodes (st
+ void ext3_check_inodes_bitmap (struct super_block * sb)
+ {
+ 	struct ext3_super_block * es;
++	struct ext3_sb_info *sbi;
+ 	unsigned long desc_count, bitmap_count, x;
+ 	int bitmap_nr;
+ 	struct ext3_group_desc * gdp;
+ 	int i;
+ 
+-	es = sb->u.ext3_sb.s_es;
++	sbi = EXT3_SB(sb);
++	es = sbi->s_es;
+ 	desc_count = 0;
+ 	bitmap_count = 0;
+ 	gdp = NULL;
+-	for (i = 0; i < sb->u.ext3_sb.s_groups_count; i++) {
++	for (i = 0; i < sbi->s_groups_count; i++) {
+ 		gdp = ext3_get_group_desc (sb, i, NULL);
+ 		if (!gdp)
+ 			continue;
+@@ -644,7 +650,7 @@ void ext3_check_inodes_bitmap (struct su
+ 		if (bitmap_nr < 0)
+ 			continue;
+ 
+-		x = ext3_count_free (sb->u.ext3_sb.s_inode_bitmap[bitmap_nr],
++		x = ext3_count_free (sbi->s_inode_bitmap[bitmap_nr],
+ 				     EXT3_INODES_PER_GROUP(sb) / 8);
+ 		if (le16_to_cpu(gdp->bg_free_inodes_count) != x)
+ 			ext3_error (sb, "ext3_check_inodes_bitmap",
+--- ./fs/ext3/inode.c.orig	Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/inode.c	Tue May  7 15:41:23 2002
+@@ -196,7 +196,7 @@ void ext3_delete_inode (struct inode * i
+ 	 * (Well, we could do this if we need to, but heck - it works)
+ 	 */
+ 	ext3_orphan_del(handle, inode);
+-	inode->u.ext3_i.i_dtime	= CURRENT_TIME;
++	EXT3_I(inode)->i_dtime = CURRENT_TIME;
+ 
+ 	/* 
+ 	 * One subtle ordering requirement: if anything has gone wrong
+@@ -220,13 +220,14 @@ no_delete:
+ void ext3_discard_prealloc (struct inode * inode)
+ {
+ #ifdef EXT3_PREALLOCATE
++	struct ext3_inode_info *ei = EXT3_I(inode);
+ 	lock_kernel();
+ 	/* Writer: ->i_prealloc* */
+-	if (inode->u.ext3_i.i_prealloc_count) {
+-		unsigned short total = inode->u.ext3_i.i_prealloc_count;
+-		unsigned long block = inode->u.ext3_i.i_prealloc_block;
+-		inode->u.ext3_i.i_prealloc_count = 0;
+-		inode->u.ext3_i.i_prealloc_block = 0;
++	if (ei->i_prealloc_count) {
++		unsigned short total = ei->i_prealloc_count;
++		unsigned long block = ei->i_prealloc_block;
++		ei->i_prealloc_count = 0;
++		ei->i_prealloc_block = 0;
+ 		/* Writer: end */
+ 		ext3_free_blocks (inode, block, total);
+ 	}
+@@ -243,13 +244,15 @@ static int ext3_alloc_block (handle_t *h
+ 	unsigned long result;
+ 
+ #ifdef EXT3_PREALLOCATE
++	struct ext3_inode_info *ei = EXT3_I(inode);
++
+ 	/* Writer: ->i_prealloc* */
+-	if (inode->u.ext3_i.i_prealloc_count &&
+-	    (goal == inode->u.ext3_i.i_prealloc_block ||
+-	     goal + 1 == inode->u.ext3_i.i_prealloc_block))
++	if (ei->i_prealloc_count &&
++	    (goal == ei->i_prealloc_block ||
++	     goal + 1 == ei->i_prealloc_block))
+ 	{
+-		result = inode->u.ext3_i.i_prealloc_block++;
+-		inode->u.ext3_i.i_prealloc_count--;
++		result = ei->i_prealloc_block++;
++		ei->i_prealloc_count--;
+ 		/* Writer: end */
+ 		ext3_debug ("preallocation hit (%lu/%lu).\n",
+ 			    ++alloc_hits, ++alloc_attempts);
+@@ -259,8 +262,8 @@ static int ext3_alloc_block (handle_t *h
+ 			    alloc_hits, ++alloc_attempts);
+ 		if (S_ISREG(inode->i_mode))
+ 			result = ext3_new_block (inode, goal, 
+-				 &inode->u.ext3_i.i_prealloc_count,
+-				 &inode->u.ext3_i.i_prealloc_block, err);
++				 &ei->i_prealloc_count,
++				 &ei->i_prealloc_block, err);
+ 		else
+ 			result = ext3_new_block (inode, goal, 0, 0, err);
+ 		/*
+@@ -394,7 +397,7 @@ static Indirect *ext3_get_branch(struct 
+ 
+ 	*err = 0;
+ 	/* i_data is not going away, no lock needed */
+-	add_chain (chain, NULL, inode->u.ext3_i.i_data + *offsets);
++	add_chain (chain, NULL, EXT3_I(inode)->i_data + *offsets);
+ 	if (!p->key)
+ 		goto no_block;
+ 	while (--depth) {
+@@ -437,7 +440,8 @@ no_block:
+ 
+ static inline unsigned long ext3_find_near(struct inode *inode, Indirect *ind)
+ {
+-	u32 *start = ind->bh ? (u32*) ind->bh->b_data : inode->u.ext3_i.i_data;
++	struct ext3_inode_info *ei = EXT3_I(inode);
++	u32 *start = ind->bh ? (u32*) ind->bh->b_data : ei->i_data;
+ 	u32 *p;
+ 
+ 	/* Try to find previous block */
+@@ -453,9 +456,8 @@ static inline unsigned long ext3_find_ne
+ 	 * It is going to be refered from inode itself? OK, just put it into
+ 	 * the same cylinder group then.
+ 	 */
+-	return (inode->u.ext3_i.i_block_group * 
+-		EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
+-	       le32_to_cpu(inode->i_sb->u.ext3_sb.s_es->s_first_data_block);
++	return (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++	       le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
+ }
+ 
+ /**
+@@ -474,14 +477,15 @@
+ static int ext3_find_goal(struct inode *inode, long block, Indirect chain[4],
+ 			  Indirect *partial, unsigned long *goal)
+ {
++	struct ext3_inode_info *ei = EXT3_I(inode);
+ 	/* Writer: ->i_next_alloc* */
+-	if (block == inode->u.ext3_i.i_next_alloc_block + 1) {
+-		inode->u.ext3_i.i_next_alloc_block++;
+-		inode->u.ext3_i.i_next_alloc_goal++;
++	if (block == ei->i_next_alloc_block + 1) {
++		ei->i_next_alloc_block++;
++		ei->i_next_alloc_goal++;
+ 	}
+ #ifdef SEARCH_FROM_ZERO
+-	inode->u.ext3_i.i_next_alloc_block = 0;
+-	inode->u.ext3_i.i_next_alloc_goal = 0;
++	ei->i_next_alloc_block = 0;
++	ei->i_next_alloc_goal = 0;
+ #endif
+ 	/* Writer: end */
+ 	/* Reader: pointers, ->i_next_alloc* */
+@@ -490,8 +493,8 @@ static int ext3_find_goal(struct inode *
+ 		 * try the heuristic for sequential allocation,
+ 		 * failing that at least try to get decent locality.
+ 		 */
+-		if (block == inode->u.ext3_i.i_next_alloc_block)
+-			*goal = inode->u.ext3_i.i_next_alloc_goal;
++		if (block == ei->i_next_alloc_block)
++			*goal = ei->i_next_alloc_goal;
+ 		if (!*goal)
+ 			*goal = ext3_find_near(inode, partial);
+ #ifdef SEARCH_FROM_ZERO
+@@ -619,6 +621,7 @@
+ {
+ 	int i;
+ 	int err = 0;
++	struct ext3_inode_info *ei = EXT3_I(inode);
+ 
+ 	/*
+ 	 * If we're splicing into a [td]indirect block (as opposed to the
+@@ -641,11 +644,11 @@ static int ext3_splice_branch(handle_t *
+ 	/* That's it */
+ 
+ 	*where->p = where->key;
+-	inode->u.ext3_i.i_next_alloc_block = block;
+-	inode->u.ext3_i.i_next_alloc_goal = le32_to_cpu(where[num-1].key);
++	ei->i_next_alloc_block = block;
++	ei->i_next_alloc_goal = le32_to_cpu(where[num-1].key);
+ #ifdef SEARCH_FROM_ZERO
+-	inode->u.ext3_i.i_next_alloc_block = 0;
+-	inode->u.ext3_i.i_next_alloc_goal = 0;
++	ei->i_next_alloc_block = 0;
++	ei->i_next_alloc_goal = 0;
+ #endif
+ 	/* Writer: end */
+ 
+@@ -729,6 +732,7 @@
+ 	unsigned long goal;
+ 	int left;
+ 	int depth = ext3_block_to_path(inode, iblock, offsets);
++	struct ext3_inode_info *ei = EXT3_I(inode);
+ 	loff_t new_size;
+ 
+ 	J_ASSERT(handle != NULL || create == 0);
+@@ -782,7 +785,7 @@ out:
+ 	/*
+ 	 * Block out ext3_truncate while we alter the tree
+ 	 */
+-	down_read(&inode->u.ext3_i.truncate_sem);
++	down_read(&ei->truncate_sem);
+ 	err = ext3_alloc_branch(handle, inode, left, goal,
+ 					offsets+(partial-chain), partial);
+ 
+@@ -794,7 +797,7 @@ out:
+ 	if (!err)
+ 		err = ext3_splice_branch(handle, inode, iblock, chain,
+ 					 partial, left);
+-	up_read(&inode->u.ext3_i.truncate_sem);
++	up_read(&ei->truncate_sem);
+ 	if (err == -EAGAIN)
+ 		goto changed;
+ 	if (err)
+@@ -807,8 +810,8 @@ out:
+ 	 * truncate is in progress.  It is racy between multiple parallel
+ 	 * instances of get_block, but we have the BKL.
+ 	 */
+-	if (new_size > inode->u.ext3_i.i_disksize)
+-		inode->u.ext3_i.i_disksize = new_size;
++	if (new_size > ei->i_disksize)
++		ei->i_disksize = new_size;
+ 
+ 	bh_result->b_state |= (1UL << BH_New);
+ 	goto got_it;
+@@ -921,7 +924,7 @@ struct buffer_head *ext3_bread(handle_t 
+ 		struct buffer_head *tmp_bh;
+ 
+ 		for (i = 1;
+-		     inode->u.ext3_i.i_prealloc_count &&
++		     EXT3_I(inode)->i_prealloc_count &&
+ 		     i < EXT3_SB(inode->i_sb)->s_es->s_prealloc_dir_blocks;
+ 		     i++) {
+ 			/*
+@@ -1131,8 +1134,8 @@ static int ext3_commit_write(struct file
+ 			kunmap(page);
+ 		}
+ 	}
+-	if (inode->i_size > inode->u.ext3_i.i_disksize) {
+-		inode->u.ext3_i.i_disksize = inode->i_size;
++	if (inode->i_size > EXT3_I(inode)->i_disksize) {
++		EXT3_I(inode)->i_disksize = inode->i_size;
+ 		ret2 = ext3_mark_inode_dirty(handle, inode);
+ 		if (!ret) 
+ 			ret = ret2;
+@@ -1832,7 +1835,8 @@ static void ext3_free_branches(handle_t 
+ void ext3_truncate(struct inode * inode)
+ {
+ 	handle_t *handle;
+-	u32 *i_data = inode->u.ext3_i.i_data;
++	struct ext3_inode_info *ei = EXT3_I(inode);
++	u32 *i_data = EXT3_I(inode)->i_data;
+ 	int addr_per_block = EXT3_ADDR_PER_BLOCK(inode->i_sb);
+ 	int offsets[4];
+ 	Indirect chain[4];
+@@ -1884,13 +1887,13 @@ void ext3_truncate(struct inode * inode)
+ 	 * on-disk inode. We do this via i_disksize, which is the value which
+ 	 * ext3 *really* writes onto the disk inode.
+ 	 */
+-	inode->u.ext3_i.i_disksize = inode->i_size;
++	ei->i_disksize = inode->i_size;
+ 
+ 	/*
+ 	 * From here we block out all ext3_get_block() callers who want to
+ 	 * modify the block allocation tree.
+ 	 */
+-	down_write(&inode->u.ext3_i.truncate_sem);
++	down_write(&ei->truncate_sem);
+ 
+ 	if (n == 1) {		/* direct blocks */
+ 		ext3_free_data(handle, inode, NULL, i_data+offsets[0],
+@@ -1954,7 +1957,7 @@ do_indirects:
+ 		case EXT3_TIND_BLOCK:
+ 			;
+ 	}
+-	up_write(&inode->u.ext3_i.truncate_sem);
++	up_write(&ei->truncate_sem);
+ 	inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ 	ext3_mark_inode_dirty(handle, inode);
+ 
+@@ -1983,6 +1986,8 @@ out_stop:
+ 
+ int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
+ {
++	struct super_block *sb = inode->i_sb;
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
+ 	struct buffer_head *bh = 0;
+ 	unsigned long block;
+ 	unsigned long block_group;
+@@ -1997,23 +2010,19 @@ int ext3_get_inode_loc (struct inode *in
+ 		inode->i_ino != EXT3_JOURNAL_INO &&
+-		inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
+-		inode->i_ino > le32_to_cpu(
+-			inode->i_sb->u.ext3_sb.s_es->s_inodes_count)) {
+-		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+-			    "bad inode number: %lu", inode->i_ino);
++		inode->i_ino < EXT3_FIRST_INO(sb)) ||
++		inode->i_ino > le32_to_cpu(sbi->s_es->s_inodes_count)) {
++		ext3_error (sb, __FUNCTION__, "bad inode #%lu", inode->i_ino);
+ 		goto bad_inode;
+ 	}
+-	block_group = (inode->i_ino - 1) / EXT3_INODES_PER_GROUP(inode->i_sb);
+-	if (block_group >= inode->i_sb->u.ext3_sb.s_groups_count) {
+-		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+-			    "group >= groups count");
++	block_group = (inode->i_ino - 1) / sbi->s_inodes_per_group;
++	if (block_group >= sbi->s_groups_count) {
++		ext3_error(sb, __FUNCTION__, "group >= groups count");
+ 		goto bad_inode;
+ 	}
+-	group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb);
+-	desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1);
+-	bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc];
++	group_desc = block_group >> sbi->s_desc_per_block_bits;
++	desc = block_group & (sbi->s_desc_per_block - 1);
++	bh = sbi->s_group_desc[group_desc];
+ 	if (!bh) {
+-		ext3_error (inode->i_sb, "ext3_get_inode_loc",
+-			    "Descriptor not loaded");
++		ext3_error(sb, __FUNCTION__, "Descriptor not loaded");
+ 		goto bad_inode;
+ 	}
+ 
+@@ -2021,17 +2022,17 @@ int ext3_get_inode_loc (struct inode *in
+ 	/*
+ 	 * Figure out the offset within the block group inode table
+ 	 */
+-	offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) *
+-		EXT3_INODE_SIZE(inode->i_sb);
++	offset = ((inode->i_ino - 1) % sbi->s_inodes_per_group) *
++		sbi->s_inode_size;
+ 	block = le32_to_cpu(gdp[desc].bg_inode_table) +
+-		(offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb));
+-	if (!(bh = sb_bread(inode->i_sb, block))) {
+-		ext3_error (inode->i_sb, "ext3_get_inode_loc",
++		(offset >> EXT3_BLOCK_SIZE_BITS(sb));
++	if (!(bh = sb_bread(sb, block))) {
++		ext3_error (sb, __FUNCTION__,
+ 			    "unable to read inode block - "
+ 			    "inode=%lu, block=%lu", inode->i_ino, block);
+ 		goto bad_inode;
+ 	}
+-	offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1);
++	offset &= (EXT3_BLOCK_SIZE(sb) - 1);
+ 
+ 	iloc->bh = bh;
+ 	iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset);
+@@ -2047,6 +2048,7 @@ void ext3_read_inode(struct inode * inod
+ {
+ 	struct ext3_iloc iloc;
+ 	struct ext3_inode *raw_inode;
++	struct ext3_inode_info *ei = EXT3_I(inode);
+ 	struct buffer_head *bh;
+ 	int block;
+ 	
+@@ -2054,7 +2056,7 @@ void ext3_read_inode(struct inode * inod
+ 		goto bad_inode;
+ 	bh = iloc.bh;
+ 	raw_inode = iloc.raw_inode;
+-	init_rwsem(&inode->u.ext3_i.truncate_sem);
++	init_rwsem(&ei->truncate_sem);
+ 	inode->i_mode = le16_to_cpu(raw_inode->i_mode);
+ 	inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
+ 	inode->i_gid = (gid_t)le16_to_cpu(raw_inode->i_gid_low);
+@@ -2067,7 +2069,7 @@ void ext3_read_inode(struct inode * inod
+ 	inode->i_atime = le32_to_cpu(raw_inode->i_atime);
+ 	inode->i_ctime = le32_to_cpu(raw_inode->i_ctime);
+ 	inode->i_mtime = le32_to_cpu(raw_inode->i_mtime);
+-	inode->u.ext3_i.i_dtime = le32_to_cpu(raw_inode->i_dtime);
++	ei->i_dtime = le32_to_cpu(raw_inode->i_dtime);
+ 	/* We now have enough fields to check if the inode was active or not.
+ 	 * This is needed because nfsd might try to access dead inodes
+ 	 * the test is that same one that e2fsck uses
+@@ -2075,7 +2077,7 @@ void ext3_read_inode(struct inode * inod
+ 	 */
+ 	if (inode->i_nlink == 0) {
+ 		if (inode->i_mode == 0 ||
+-		    !(inode->i_sb->u.ext3_sb.s_mount_state & EXT3_ORPHAN_FS)) {
++		    !(EXT3_SB(inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
+ 			/* this inode is deleted */
+ 			brelse (bh);
+ 			goto bad_inode;
+@@ -2090,33 +2092,33 @@ void ext3_read_inode(struct inode * inod
+ 					 * size */  
+ 	inode->i_blocks = le32_to_cpu(raw_inode->i_blocks);
+ 	inode->i_version = ++event;
+-	inode->u.ext3_i.i_flags = le32_to_cpu(raw_inode->i_flags);
++	ei->i_flags = le32_to_cpu(raw_inode->i_flags);
+ #ifdef EXT3_FRAGMENTS
+-	inode->u.ext3_i.i_faddr = le32_to_cpu(raw_inode->i_faddr);
+-	inode->u.ext3_i.i_frag_no = raw_inode->i_frag;
+-	inode->u.ext3_i.i_frag_size = raw_inode->i_fsize;
++	ei->i_faddr = le32_to_cpu(raw_inode->i_faddr);
++	ei->i_frag_no = raw_inode->i_frag;
++	ei->i_frag_size = raw_inode->i_fsize;
+ #endif
+-	inode->u.ext3_i.i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
++	ei->i_file_acl = le32_to_cpu(raw_inode->i_file_acl);
+ 	if (!S_ISREG(inode->i_mode)) {
+-		inode->u.ext3_i.i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
++		ei->i_dir_acl = le32_to_cpu(raw_inode->i_dir_acl);
+ 	} else {
+ 		inode->i_size |=
+ 			((__u64)le32_to_cpu(raw_inode->i_size_high)) << 32;
+ 	}
+-	inode->u.ext3_i.i_disksize = inode->i_size;
++	ei->i_disksize = inode->i_size;
+ 	inode->i_generation = le32_to_cpu(raw_inode->i_generation);
+ #ifdef EXT3_PREALLOCATE
+-	inode->u.ext3_i.i_prealloc_count = 0;
++	ei->i_prealloc_count = 0;
+ #endif
+-	inode->u.ext3_i.i_block_group = iloc.block_group;
++	ei->i_block_group = iloc.block_group;
+ 
+ 	/*
+ 	 * NOTE! The in-memory inode i_data array is in little-endian order
+ 	 * even on big-endian machines: we do NOT byteswap the block numbers!
+ 	 */
+ 	for (block = 0; block < EXT3_N_BLOCKS; block++)
+-		inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block];
+-	INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan);
++		ei->i_data[block] = iloc.raw_inode->i_block[block];
++	INIT_LIST_HEAD(&ei->i_orphan);
+ 
+ 	brelse (iloc.bh);
+ 
+@@ -2143,17 +2145,17 @@ void ext3_read_inode(struct inode * inod
+ 	/* inode->i_attr_flags = 0;				unused */
+-	if (inode->u.ext3_i.i_flags & EXT3_SYNC_FL) {
++	if (ei->i_flags & EXT3_SYNC_FL) {
+ 		/* inode->i_attr_flags |= ATTR_FLAG_SYNCRONOUS; unused */
+ 		inode->i_flags |= S_SYNC;
+ 	}
+-	if (inode->u.ext3_i.i_flags & EXT3_APPEND_FL) {
++	if (ei->i_flags & EXT3_APPEND_FL) {
+ 		/* inode->i_attr_flags |= ATTR_FLAG_APPEND;	unused */
+ 		inode->i_flags |= S_APPEND;
+ 	}
+-	if (inode->u.ext3_i.i_flags & EXT3_IMMUTABLE_FL) {
++	if (ei->i_flags & EXT3_IMMUTABLE_FL) {
+ 		/* inode->i_attr_flags |= ATTR_FLAG_IMMUTABLE;	unused */
+ 		inode->i_flags |= S_IMMUTABLE;
+ 	}
+-	if (inode->u.ext3_i.i_flags & EXT3_NOATIME_FL) {
++	if (ei->i_flags & EXT3_NOATIME_FL) {
+ 		/* inode->i_attr_flags |= ATTR_FLAG_NOATIME;	unused */
+ 		inode->i_flags |= S_NOATIME;
+ 	}
+@@ -2175,6 +2177,7 @@ static int ext3_do_update_inode(handle_t
+ 				struct ext3_iloc *iloc)
+ {
+ 	struct ext3_inode *raw_inode = iloc->raw_inode;
++	struct ext3_inode_info *ei = EXT3_I(inode);
+ 	struct buffer_head *bh = iloc->bh;
+ 	int err = 0, rc, block;
+ 
+@@ -2192,7 +2195,7 @@ static int ext3_do_update_inode(handle_t
+  * Fix up interoperability with old kernels. Otherwise, old inodes get
+  * re-used with the upper 16 bits of the uid/gid intact
+  */
+-		if(!inode->u.ext3_i.i_dtime) {
++		if(!ei->i_dtime) {
+ 			raw_inode->i_uid_high =
+ 				cpu_to_le16(high_16_bits(inode->i_uid));
+ 			raw_inode->i_gid_high =
+@@ -2210,34 +2213,33 @@ static int ext3_do_update_inode(handle_t
+ 		raw_inode->i_gid_high = 0;
+ 	}
+ 	raw_inode->i_links_count = cpu_to_le16(inode->i_nlink);
+-	raw_inode->i_size = cpu_to_le32(inode->u.ext3_i.i_disksize);
++	raw_inode->i_size = cpu_to_le32(ei->i_disksize);
+ 	raw_inode->i_atime = cpu_to_le32(inode->i_atime);
+ 	raw_inode->i_ctime = cpu_to_le32(inode->i_ctime);
+ 	raw_inode->i_mtime = cpu_to_le32(inode->i_mtime);
+ 	raw_inode->i_blocks = cpu_to_le32(inode->i_blocks);
+-	raw_inode->i_dtime = cpu_to_le32(inode->u.ext3_i.i_dtime);
+-	raw_inode->i_flags = cpu_to_le32(inode->u.ext3_i.i_flags);
++	raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
++	raw_inode->i_flags = cpu_to_le32(ei->i_flags);
+ #ifdef EXT3_FRAGMENTS
+-	raw_inode->i_faddr = cpu_to_le32(inode->u.ext3_i.i_faddr);
+-	raw_inode->i_frag = inode->u.ext3_i.i_frag_no;
+-	raw_inode->i_fsize = inode->u.ext3_i.i_frag_size;
++	raw_inode->i_faddr = cpu_to_le32(ei->i_faddr);
++	raw_inode->i_frag = ei->i_frag_no;
++	raw_inode->i_fsize = ei->i_frag_size;
+ #else
+ 	/* If we are not tracking these fields in the in-memory inode,
+ 	 * then preserve them on disk, but still initialise them to zero
+ 	 * for new inodes. */
+-	if (EXT3_I(inode)->i_state & EXT3_STATE_NEW) {
++	if (ei->i_state & EXT3_STATE_NEW) {
+ 		raw_inode->i_faddr = 0;
+ 		raw_inode->i_frag = 0;
+ 		raw_inode->i_fsize = 0;
+ 	}
+ #endif
+-	raw_inode->i_file_acl = cpu_to_le32(inode->u.ext3_i.i_file_acl);
++	raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
+ 	if (!S_ISREG(inode->i_mode)) {
+-		raw_inode->i_dir_acl = cpu_to_le32(inode->u.ext3_i.i_dir_acl);
++		raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
+ 	} else {
+-		raw_inode->i_size_high =
+-			cpu_to_le32(inode->u.ext3_i.i_disksize >> 32);
+-		if (inode->u.ext3_i.i_disksize > 0x7fffffffULL) {
++		raw_inode->i_size_high = cpu_to_le32(ei->i_disksize >> 32);
++		if (ei->i_disksize > MAX_NON_LFS) {
+ 			struct super_block *sb = inode->i_sb;
+ 			if (!EXT3_HAS_RO_COMPAT_FEATURE(sb,
+ 					EXT3_FEATURE_RO_COMPAT_LARGE_FILE) ||
+@@ -2247,7 +2249,7 @@ static int ext3_do_update_inode(handle_t
+ 				* created, add a flag to the superblock.
+ 				*/
+ 				err = ext3_journal_get_write_access(handle,
+-						sb->u.ext3_sb.s_sbh);
++						EXT3_SB(sb)->s_sbh);
+ 				if (err)
+ 					goto out_brelse;
+ 				ext3_update_dynamic_rev(sb);
+@@ -2256,7 +2258,7 @@ static int ext3_do_update_inode(handle_t
+ 				sb->s_dirt = 1;
+ 				handle->h_sync = 1;
+ 				err = ext3_journal_dirty_metadata(handle,
+-						sb->u.ext3_sb.s_sbh);
++						EXT3_SB(sb)->s_sbh);
+ 			}
+ 		}
+ 	}
+@@ -2265,13 +2267,13 @@ static int ext3_do_update_inode(handle_t
+ 		raw_inode->i_block[0] =
+ 			cpu_to_le32(kdev_t_to_nr(inode->i_rdev));
+ 	else for (block = 0; block < EXT3_N_BLOCKS; block++)
+-		raw_inode->i_block[block] = inode->u.ext3_i.i_data[block];
++		raw_inode->i_block[block] = ei->i_data[block];
+ 
+ 	BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
+ 	rc = ext3_journal_dirty_metadata(handle, bh);
+ 	if (!err)
+ 		err = rc;
+-	EXT3_I(inode)->i_state &= ~EXT3_STATE_NEW;
++	ei->i_state &= ~EXT3_STATE_NEW;
+ 
+ out_brelse:
+ 	brelse (bh);
+@@ -2379,7 +2381,7 @@ int ext3_setattr(struct dentry *dentry, 
+ 		}
+ 		
+ 		error = ext3_orphan_add(handle, inode);
+-		inode->u.ext3_i.i_disksize = attr->ia_size;
++		EXT3_I(inode)->i_disksize = attr->ia_size;
+ 		rc = ext3_mark_inode_dirty(handle, inode);
+ 		if (!error)
+ 			error = rc;
+@@ -2622,9 +2624,9 @@ int ext3_change_inode_journal_flag(struc
+ 	 */
+ 
+ 	if (val)
+-		inode->u.ext3_i.i_flags |= EXT3_JOURNAL_DATA_FL;
++		EXT3_I(inode)->i_flags |= EXT3_JOURNAL_DATA_FL;
+ 	else
+-		inode->u.ext3_i.i_flags &= ~EXT3_JOURNAL_DATA_FL;
++		EXT3_I(inode)->i_flags &= ~EXT3_JOURNAL_DATA_FL;
+ 
+ 	journal_unlock_updates(journal);
+ 
+--- ./fs/ext3/ioctl.c.orig	Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/ioctl.c	Tue May  7 15:20:52 2002
+@@ -18,13 +18,14 @@
+ int ext3_ioctl (struct inode * inode, struct file * filp, unsigned int cmd,
+ 		unsigned long arg)
+ {
++	struct ext3_inode_info *ei = EXT3_I(inode);
+ 	unsigned int flags;
+ 
+ 	ext3_debug ("cmd = %u, arg = %lu\n", cmd, arg);
+ 
+ 	switch (cmd) {
+ 	case EXT3_IOC_GETFLAGS:
+-		flags = inode->u.ext3_i.i_flags & EXT3_FL_USER_VISIBLE;
++		flags = ei->i_flags & EXT3_FL_USER_VISIBLE;
+ 		return put_user(flags, (int *) arg);
+ 	case EXT3_IOC_SETFLAGS: {
+ 		handle_t *handle = NULL;
+@@ -42,7 +42,7 @@ int ext3_ioctl (struct inode * inode, st
+ 		if (get_user(flags, (int *) arg))
+ 			return -EFAULT;
+ 
+-		oldflags = inode->u.ext3_i.i_flags;
++		oldflags = ei->i_flags;
+ 
+ 		/* The JOURNAL_DATA flag is modifiable only by root */
+ 		jflag = flags & EXT3_JOURNAL_DATA_FL;
+@@ -79,7 +79,7 @@ int ext3_ioctl (struct inode * inode, st
+ 		
+ 		flags = flags & EXT3_FL_USER_MODIFIABLE;
+ 		flags |= oldflags & ~EXT3_FL_USER_MODIFIABLE;
+-		inode->u.ext3_i.i_flags = flags;
++		ei->i_flags = flags;
+ 
+ 		if (flags & EXT3_SYNC_FL)
+ 			inode->i_flags |= S_SYNC;
+@@ -155,12 +155,12 @@ flags_err:
+ 			int ret = 0;
+ 
+ 			set_current_state(TASK_INTERRUPTIBLE);
+-			add_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
+-			if (timer_pending(&sb->u.ext3_sb.turn_ro_timer)) {
++			add_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
++			if (timer_pending(&EXT3_SB(sb)->turn_ro_timer)) {
+ 				schedule();
+ 				ret = 1;
+ 			}
+-			remove_wait_queue(&sb->u.ext3_sb.ro_wait_queue, &wait);
++			remove_wait_queue(&EXT3_SB(sb)->ro_wait_queue, &wait);
+ 			return ret;
+ 		}
+ #endif
+--- ./fs/ext3/namei.c.orig	Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/namei.c	Tue May  7 16:05:51 2002
+@@ -1430,8 +1430,8 @@ int ext3_orphan_add(handle_t *handle, st
+ 	J_ASSERT ((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ 		S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
+ 
+-	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "get_write_access");
+-	err = ext3_journal_get_write_access(handle, sb->u.ext3_sb.s_sbh);
++	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
++	err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
+ 	if (err)
+ 		goto out_unlock;
+ 	
+@@ -1442,7 +1442,7 @@ int ext3_orphan_add(handle_t *handle, st
+ 	/* Insert this inode at the head of the on-disk orphan list... */
+ 	NEXT_ORPHAN(inode) = le32_to_cpu(EXT3_SB(sb)->s_es->s_last_orphan);
+ 	EXT3_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
+-	err = ext3_journal_dirty_metadata(handle, sb->u.ext3_sb.s_sbh);
++	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
+ 	rc = ext3_mark_iloc_dirty(handle, inode, &iloc);
+ 	if (!err)
+ 		err = rc;
+@@ -1520,8 +1520,7 @@ int ext3_orphan_del(handle_t *handle, st
+ 		err = ext3_journal_dirty_metadata(handle, sbi->s_sbh);
+ 	} else {
+ 		struct ext3_iloc iloc2;
+-		struct inode *i_prev =
+-			list_entry(prev, struct inode, u.ext3_i.i_orphan);
++		struct inode *i_prev = orphan_list_entry(prev);
+ 
+ 		jbd_debug(4, "orphan inode %lu will point to %lu\n",
+ 			  i_prev->i_ino, ino_next);
+--- ./fs/ext3/super.c.orig	Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/super.c	Tue May  7 16:05:44 2002
+@@ -121,7 +121,7 @@ static int ext3_error_behaviour(struct s
+ 	/* If no overrides were specified on the mount, then fall back
+ 	 * to the default behaviour set in the filesystem's superblock
+ 	 * on disk. */
+-	switch (le16_to_cpu(sb->u.ext3_sb.s_es->s_errors)) {
++	switch (le16_to_cpu(EXT3_SB(sb)->s_es->s_errors)) {
+ 	case EXT3_ERRORS_PANIC:
+ 		return EXT3_ERRORS_PANIC;
+ 	case EXT3_ERRORS_RO:
+@@ -269,9 +269,9 @@ void ext3_abort (struct super_block * sb
+ 		return;
+ 	
+ 	printk (KERN_CRIT "Remounting filesystem read-only\n");
+-	sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS;
++	EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+ 	sb->s_flags |= MS_RDONLY;
+-	sb->u.ext3_sb.s_mount_opt |= EXT3_MOUNT_ABORT;
++	EXT3_SB(sb)->s_mount_opt |= EXT3_MOUNT_ABORT;
+ 	journal_abort(EXT3_SB(sb)->s_journal, -EIO);
+ }
+ 
+@@ -377,8 +377,6 @@ static int ext3_blkdev_remove(struct ext3
+ 	return ret;
+ }
+ 
+-#define orphan_list_entry(l) list_entry((l), struct inode, u.ext3_i.i_orphan)
+-
+ static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
+ {
+ 	struct list_head *l;
+@@ -818,7 +818,7 @@ static void ext3_orphan_cleanup (struct 
+ 		sb->s_flags &= ~MS_RDONLY;
+ 	}
+ 
+-	if (sb->u.ext3_sb.s_mount_state & EXT3_ERROR_FS) {
++	if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
+ 		if (es->s_last_orphan)
+ 			jbd_debug(1, "Errors on filesystem, "
+ 				  "clearing orphan list.\n");
+@@ -1463,12 +1463,14 @@ static void ext3_commit_super (struct su
+ 			       struct ext3_super_block * es,
+ 			       int sync)
+ {
++	struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
++
+ 	es->s_wtime = cpu_to_le32(CURRENT_TIME);
+-	BUFFER_TRACE(sb->u.ext3_sb.s_sbh, "marking dirty");
+-	mark_buffer_dirty(sb->u.ext3_sb.s_sbh);
++	BUFFER_TRACE(sbh, "marking dirty");
++	mark_buffer_dirty(sbh);
+ 	if (sync) {
+-		ll_rw_block(WRITE, 1, &sb->u.ext3_sb.s_sbh);
+-		wait_on_buffer(sb->u.ext3_sb.s_sbh);
++		ll_rw_block(WRITE, 1, &sbh);
++		wait_on_buffer(sbh);
+ 	}
+ }
+ 
+@@ -1519,7 +1521,7 @@ static void ext3_clear_journal_err(struc
+ 		ext3_warning(sb, __FUNCTION__, "Marking fs in need of "
+ 			     "filesystem check.");
+ 		
+-		sb->u.ext3_sb.s_mount_state |= EXT3_ERROR_FS;
++		EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
+ 		es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
+ 		ext3_commit_super (sb, es, 1);
+ 
+--- ./fs/ext3/symlink.c.orig	Fri Apr 12 10:27:49 2002
++++ ./fs/ext3/symlink.c	Tue May  7 15:25:39 2002
+@@ -23,13 +23,13 @@
+ 
+ static int ext3_readlink(struct dentry *dentry, char *buffer, int buflen)
+ {
+-	char *s = (char *)dentry->d_inode->u.ext3_i.i_data;
+-	return vfs_readlink(dentry, buffer, buflen, s);
++	struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
++	return vfs_readlink(dentry, buffer, buflen, (char *)ei->i_data);
+ }
+ 
+ static int ext3_follow_link(struct dentry *dentry, struct nameidata *nd)
+ {
+-	char *s = (char *)dentry->d_inode->u.ext3_i.i_data;
+-	return vfs_follow_link(nd, s);
++	struct ext3_inode_info *ei = EXT3_I(dentry->d_inode);
++	return vfs_follow_link(nd, (char*)ei->i_data);
+ }
+ 
+--- ./include/linux/ext3_fs.h.orig	Tue Apr 16 14:27:25 2002
++++ ./include/linux/ext3_fs.h	Tue May  7 16:47:36 2002
+@@ -84,22 +84,25 @@
+ #define EXT3_MIN_BLOCK_SIZE		1024
+ #define	EXT3_MAX_BLOCK_SIZE		4096
+ #define EXT3_MIN_BLOCK_LOG_SIZE		  10
++
+ #ifdef __KERNEL__
+-# define EXT3_BLOCK_SIZE(s)		((s)->s_blocksize)
+-#else
+-# define EXT3_BLOCK_SIZE(s)		(EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+-#endif
+-#define	EXT3_ADDR_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+-#ifdef __KERNEL__
+-# define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_blocksize_bits)
+-#else
+-# define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_log_block_size + 10)
+-#endif
+-#ifdef __KERNEL__
+-#define	EXT3_ADDR_PER_BLOCK_BITS(s)	((s)->u.ext3_sb.s_addr_per_block_bits)
+-#define EXT3_INODE_SIZE(s)		((s)->u.ext3_sb.s_inode_size)
+-#define EXT3_FIRST_INO(s)		((s)->u.ext3_sb.s_first_ino)
++#define EXT3_SB(sb)	(&((sb)->u.ext3_sb))
++#define EXT3_I(inode)	(&((inode)->u.ext3_i))
++
++#define EXT3_BLOCK_SIZE(s)		((s)->s_blocksize)
++#define EXT3_BLOCK_SIZE_BITS(s)		((s)->s_blocksize_bits)
++#define	EXT3_ADDR_PER_BLOCK_BITS(s)	(EXT3_SB(s)->s_addr_per_block_bits)
++#define EXT3_INODE_SIZE(s)		(EXT3_SB(s)->s_inode_size)
++#define EXT3_FIRST_INO(s)		(EXT3_SB(s)->s_first_ino)
+ #else
++
++/* Assume that user mode programs are passing in an ext3fs superblock, not
++ * a kernel struct super_block.  This will allow us to call the feature-test
++ * macros from user land. */
++#define EXT3_SB(sb)	(sb)
++
++#define EXT3_BLOCK_SIZE(s)	(EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
++#define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_log_block_size + 10)
+ #define EXT3_INODE_SIZE(s)	(((s)->s_rev_level == EXT3_GOOD_OLD_REV) ? \
+ 				 EXT3_GOOD_OLD_INODE_SIZE : \
+ 				 (s)->s_inode_size)
+@@ -108,6 +110,7 @@
+ 				 EXT3_GOOD_OLD_FIRST_INO : \
+ 				 (s)->s_first_ino)
+ #endif
++#define EXT3_ADDR_PER_BLOCK(s)	(EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+ 
+ /*
+  * Macro-instructions used to manage fragments
+@@ -116,8 +120,8 @@
+ #define	EXT3_MAX_FRAG_SIZE		4096
+ #define EXT3_MIN_FRAG_LOG_SIZE		  10
+ #ifdef __KERNEL__
+-# define EXT3_FRAG_SIZE(s)		((s)->u.ext3_sb.s_frag_size)
+-# define EXT3_FRAGS_PER_BLOCK(s)	((s)->u.ext3_sb.s_frags_per_block)
++# define EXT3_FRAG_SIZE(s)		(EXT3_SB(s)->s_frag_size)
++# define EXT3_FRAGS_PER_BLOCK(s)	(EXT3_SB(s)->s_frags_per_block)
+ #else
+ # define EXT3_FRAG_SIZE(s)		(EXT3_MIN_FRAG_SIZE << (s)->s_log_frag_size)
+ # define EXT3_FRAGS_PER_BLOCK(s)	(EXT3_BLOCK_SIZE(s) / EXT3_FRAG_SIZE(s))
+@@ -163,15 +167,13 @@
+ /*
+  * Macro-instructions used to manage group descriptors
+  */
++# define EXT3_BLOCKS_PER_GROUP(s)	(EXT3_SB(s)->s_blocks_per_group)
++# define EXT3_INODES_PER_GROUP(s)	(EXT3_SB(s)->s_inodes_per_group)
+ #ifdef __KERNEL__
+-# define EXT3_BLOCKS_PER_GROUP(s)	((s)->u.ext3_sb.s_blocks_per_group)
+-# define EXT3_DESC_PER_BLOCK(s)		((s)->u.ext3_sb.s_desc_per_block)
+-# define EXT3_INODES_PER_GROUP(s)	((s)->u.ext3_sb.s_inodes_per_group)
+-# define EXT3_DESC_PER_BLOCK_BITS(s)	((s)->u.ext3_sb.s_desc_per_block_bits)
++# define EXT3_DESC_PER_BLOCK(s)		(EXT3_SB(s)->s_desc_per_block)
++# define EXT3_DESC_PER_BLOCK_BITS(s)	(EXT3_SB(s)->s_desc_per_block_bits)
+ #else
+-# define EXT3_BLOCKS_PER_GROUP(s)	((s)->s_blocks_per_group)
+ # define EXT3_DESC_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_group_desc))
+-# define EXT3_INODES_PER_GROUP(s)	((s)->s_inodes_per_group)
+ #endif
+ 
+ /*
+@@ -344,7 +347,7 @@
+ #ifndef _LINUX_EXT2_FS_H
+ #define clear_opt(o, opt)		o &= ~EXT3_MOUNT_##opt
+ #define set_opt(o, opt)			o |= EXT3_MOUNT_##opt
+-#define test_opt(sb, opt)		((sb)->u.ext3_sb.s_mount_opt & \
++#define test_opt(sb, opt)		(EXT3_SB(sb)->s_mount_opt & \
+ 					 EXT3_MOUNT_##opt)
+ #else
+ #define EXT2_MOUNT_NOLOAD		EXT3_MOUNT_NOLOAD
+@@ -441,17 +443,11 @@
+ /*EC*/	__u32	s_reserved[197];	/* Padding to the end of the block */
+ };
+ 
+-#ifdef __KERNEL__
+-#define EXT3_SB(sb)	(&((sb)->u.ext3_sb))
+-#define EXT3_I(inode)	(&((inode)->u.ext3_i))
+-#else
+-/* Assume that user mode programs are passing in an ext3fs superblock, not
+- * a kernel struct super_block.  This will allow us to call the feature-test
+- * macros from user land. */
+-#define EXT3_SB(sb)	(sb)
+-#endif
+-
+-#define NEXT_ORPHAN(inode) (inode)->u.ext3_i.i_dtime
++#define NEXT_ORPHAN(inode) EXT3_I(inode)->i_dtime
++static inline struct inode *orphan_list_entry(struct list_head *l)
++{
++	return list_entry(l, struct inode, u.ext3_i.i_orphan);
++}
+ 
+ /*
+  * Codes for operating systems
+--- ./include/linux/ext3_jbd.h.orig	Tue May  7 14:44:08 2002
++++ ./include/linux/ext3_jbd.h	Tue May  7 14:44:43 2002
+@@ -291,7 +291,7 @@
+ 		return 1;
+ 	if (test_opt(inode->i_sb, DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA)
+ 		return 1;
+-	if (inode->u.ext3_i.i_flags & EXT3_JOURNAL_DATA_FL)
++	if (EXT3_I(inode)->i_flags & EXT3_JOURNAL_DATA_FL)
+ 		return 1;
+ 	return 0;
+ }
diff --git a/lustre/kernel_patches/patches/ext3-compat-2.4.18-chaos.patch b/lustre/kernel_patches/patches/ext3-compat-2.4.18-chaos.patch
new file mode 100644
index 0000000..7cd3384
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-compat-2.4.18-chaos.patch
@@ -0,0 +1,19 @@
+ fs/ext3/namei.c |    2 +-
+ 1 files changed, 1 insertion(+), 1 deletion(-)
+
+diff -puN fs/ext3/namei.c~ext3-compat-2.4.18-chaos fs/ext3/namei.c
+--- linux-2.4.18/fs/ext3/namei.c~ext3-compat-2.4.18-chaos	2003-08-28 20:14:27.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/namei.c	2003-08-28 20:14:27.000000000 +0400
+@@ -830,9 +830,9 @@ static int ext3_rmdir (struct inode * di
+ 	 * recovery. */
+ 	inode->i_size = 0;
+ 	ext3_orphan_add(handle, inode);
+-	ext3_mark_inode_dirty(handle, inode);
+ 	dir->i_nlink--;
+ 	inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
++	ext3_mark_inode_dirty(handle, inode);
+ 	dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
+ 	ext3_mark_inode_dirty(handle, dir);
+ 
+
+_
diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18-2.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18-2.patch
new file mode 100644
index 0000000..a173981
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.18-2.patch
@@ -0,0 +1,478 @@
+
+Create a service thread to handle delete and truncate of inodes, to avoid
+long latency while truncating very large files.
+
+
+ fs/ext3/inode.c            |  116 ++++++++++++++++++++++
+ fs/ext3/super.c            |  231 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |    5 
+ include/linux/ext3_fs_sb.h |   10 +
+ 4 files changed, 362 insertions(+)
+
+--- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18	Tue Jun  3 17:26:21 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c	Wed Jul  2 23:49:40 2003
+@@ -396,6 +396,220 @@ static void dump_orphan_list(struct supe
+ 	}
+ }
+ 
++#ifdef EXT3_DELETE_THREAD
++/*
++ * Delete inodes in a loop until there are no more to be deleted.
++ * Normally, we run in the background doing the deletes and sleeping again,
++ * and clients just add new inodes to be deleted onto the end of the list.
++ * If someone is concerned about free space (e.g. block allocation or similar)
++ * then they can sleep on s_delete_waiter_queue and be woken up when space
++ * has been freed.
++ */
++int ext3_delete_thread(void *data)
++{
++	struct super_block *sb = data;
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
++	struct task_struct *tsk = current;
++
++	/* Almost like daemonize, but not quite */
++	exit_mm(current);
++	tsk->session = 1;
++	tsk->pgrp = 1;
++	tsk->tty = NULL;
++	exit_files(current);
++	reparent_to_init();
++
++	sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
++	sigfillset(&tsk->blocked);
++
++	/*tsk->flags |= PF_KERNTHREAD;*/
++
++	INIT_LIST_HEAD(&sbi->s_delete_list);
++	wake_up(&sbi->s_delete_waiter_queue);
++	ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
++
++	/* main loop */
++	for (;;) {
++		wait_event_interruptible(sbi->s_delete_thread_queue,
++					 !list_empty(&sbi->s_delete_list) ||
++					 !test_opt(sb, ASYNCDEL));
++		ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
++			   tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
++
++		spin_lock(&sbi->s_delete_lock);
++		if (list_empty(&sbi->s_delete_list)) {
++			clear_opt(sbi->s_mount_opt, ASYNCDEL);
++			memset(&sbi->s_delete_list, 0,
++			       sizeof(sbi->s_delete_list));
++			spin_unlock(&sbi->s_delete_lock);
++			ext3_debug("delete thread on %s exiting\n",
++				   kdevname(sb->s_dev));
++			wake_up(&sbi->s_delete_waiter_queue);
++			break;
++		}
++
++		while (!list_empty(&sbi->s_delete_list)) {
++			struct inode *inode=list_entry(sbi->s_delete_list.next,
++						       struct inode, i_dentry);
++			unsigned long blocks = inode->i_blocks >>
++							(inode->i_blkbits - 9);
++
++			list_del_init(&inode->i_dentry);
++			spin_unlock(&sbi->s_delete_lock);
++			ext3_debug("%s delete ino %lu blk %lu\n",
++				   tsk->comm, inode->i_ino, blocks);
++
++			iput(inode);
++
++			spin_lock(&sbi->s_delete_lock);
++			sbi->s_delete_blocks -= blocks;
++			sbi->s_delete_inodes--;
++		}
++		if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
++			ext3_warning(sb, __FUNCTION__,
++				     "%lu blocks, %lu inodes on list?\n",
++				     sbi->s_delete_blocks,sbi->s_delete_inodes);
++			sbi->s_delete_blocks = 0;
++			sbi->s_delete_inodes = 0;
++		}
++		spin_unlock(&sbi->s_delete_lock);
++		wake_up(&sbi->s_delete_waiter_queue);
++	}
++
++	return 0;
++}
++
++static void ext3_start_delete_thread(struct super_block *sb)
++{
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
++	int rc;
++
++	spin_lock_init(&sbi->s_delete_lock);
++	init_waitqueue_head(&sbi->s_delete_thread_queue);
++	init_waitqueue_head(&sbi->s_delete_waiter_queue);
++
++	if (!test_opt(sb, ASYNCDEL))
++		return;
++
++	rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
++	if (rc < 0)
++		printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
++		       rc);
++	else
++		wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
++}
++
++static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
++{
++	if (sbi->s_delete_list.next == 0)	/* thread never started */
++		return;
++
++	clear_opt(sbi->s_mount_opt, ASYNCDEL);
++	wake_up(&sbi->s_delete_thread_queue);
++	wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
++}
++
++/* Instead of playing games with the inode flags, destruction, etc we just
++ * create a new inode locally and put it on a list for the truncate thread.
++ * We need large parts of the inode struct in order to complete the
++ * truncate and unlink, so we may as well just have a real inode to do it.
++ *
++ * If we have any problem deferring the delete, just delete it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++static void ext3_delete_inode_thread(struct inode *old_inode)
++{
++	struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++	struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++	struct inode *new_inode;
++	unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++	if (is_bad_inode(old_inode)) {
++		clear_inode(old_inode);
++		return;
++	}
++
++	if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++		goto out_delete;
++
++	/* We may want to delete the inode immediately and not defer it */
++	if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++		goto out_delete;
++
++	/* We can't use the delete thread as-is during real orphan recovery,
++	 * as we add to the orphan list here, causing ext3_orphan_cleanup()
++	 * to loop endlessly.  It would be nice to do so, but needs work.
++	 */
++	if (oei->i_state & EXT3_STATE_DELETE ||
++	    sbi->s_mount_state & EXT3_ORPHAN_FS) {
++		ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++			   old_inode->i_ino, blocks);
++		goto out_delete;
++	}
++
++	/* We can iget this inode again here, because our caller has unhashed
++	 * old_inode, so new_inode will be in a different inode struct.
++	 *
++	 * We need to ensure that the i_orphan pointers in the other inodes
++	 * point at the new inode copy instead of the old one so the orphan
++	 * list doesn't get corrupted when the old orphan inode is freed.
++	 */
++	down(&sbi->s_orphan_lock);
++
++	sbi->s_mount_state |= EXT3_ORPHAN_FS;
++	new_inode = iget(old_inode->i_sb, old_inode->i_ino);
++	sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
++	if (is_bad_inode(new_inode)) {
++		printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
++		iput(new_inode);
++		new_inode = NULL;
++	}
++	if (!new_inode) {
++		up(&sbi->s_orphan_lock);
++		ext3_debug("delete inode %lu directly (bad read)\n",
++			   old_inode->i_ino);
++		goto out_delete;
++	}
++	J_ASSERT(new_inode != old_inode);
++
++	J_ASSERT(!list_empty(&oei->i_orphan));
++
++	nei = EXT3_I(new_inode);
++	/* Ugh.  We need to insert new_inode into the same spot on the list
++	 * as old_inode was, to ensure the in-memory orphan list is still
++	 * in the same order as the on-disk orphan list (badness otherwise).
++	 */
++	nei->i_orphan = oei->i_orphan;
++	nei->i_orphan.next->prev = &nei->i_orphan;
++	nei->i_orphan.prev->next = &nei->i_orphan;
++	nei->i_state |= EXT3_STATE_DELETE;
++	up(&sbi->s_orphan_lock);
++
++	clear_inode(old_inode);
++
++	spin_lock(&sbi->s_delete_lock);
++	J_ASSERT(list_empty(&new_inode->i_dentry));
++	list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++	sbi->s_delete_blocks += blocks;
++	sbi->s_delete_inodes++;
++	spin_unlock(&sbi->s_delete_lock);
++
++	ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++		   new_inode->i_ino, blocks);
++
++	wake_up(&sbi->s_delete_thread_queue);
++	return;
++
++out_delete:
++	ext3_delete_inode(old_inode);
++}
++#else
++#define ext3_start_delete_thread(sbi) do {} while(0)
++#define ext3_stop_delete_thread(sbi) do {} while(0)
++#endif /* EXT3_DELETE_THREAD */
++
+ void ext3_put_super (struct super_block * sb)
+ {
+ 	struct ext3_sb_info *sbi = EXT3_SB(sb);
+@@ -403,6 +617,7 @@ void ext3_put_super (struct super_block 
+ 	kdev_t j_dev = sbi->s_journal->j_dev;
+ 	int i;
+ 
++	ext3_stop_delete_thread(sbi);
+ 	ext3_xattr_put_super(sb);
+ 	journal_destroy(sbi->s_journal);
+ 	if (!(sb->s_flags & MS_RDONLY)) {
+@@ -451,7 +666,11 @@ static struct super_operations ext3_sops
+ 	write_inode:	ext3_write_inode,	/* BKL not held.  Don't need */
+ 	dirty_inode:	ext3_dirty_inode,	/* BKL not held.  We take it */
+ 	put_inode:	ext3_put_inode,		/* BKL not held.  Don't need */
++#ifdef EXT3_DELETE_THREAD
++	delete_inode:	ext3_delete_inode_thread,/* BKL not held. We take it */
++#else
+ 	delete_inode:	ext3_delete_inode,	/* BKL not held.  We take it */
++#endif
+ 	put_super:	ext3_put_super,		/* BKL held */
+ 	write_super:	ext3_write_super,	/* BKL held */
+ 	write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
+@@ -511,6 +730,14 @@ static int parse_options (char * options
+ 	     this_char = strtok (NULL, ",")) {
+ 		if ((value = strchr (this_char, '=')) != NULL)
+ 			*value++ = 0;
++#ifdef EXT3_DELETE_THREAD
++		if (!strcmp(this_char, "asyncdel"))
++			set_opt(*mount_options, ASYNCDEL);
++		else if (!strcmp(this_char, "noasyncdel"))
++			clear_opt(*mount_options, ASYNCDEL);
++		else
++#endif
++
+ 		if (!strcmp (this_char, "bsddf"))
+ 			clear_opt (*mount_options, MINIX_DF);
+ 		else if (!strcmp (this_char, "nouid32")) {
+@@ -1206,6 +1433,7 @@ struct super_block * ext3_read_super (st
+ 	}
+ 
+ 	ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
++	ext3_start_delete_thread(sb);
+ 	/*
+ 	 * akpm: core read_super() calls in here with the superblock locked.
+ 	 * That deadlocks, because orphan cleanup needs to lock the superblock
+@@ -1648,6 +1876,9 @@ int ext3_remount (struct super_block * s
+ 	if (!parse_options(data, &tmp, sbi, &tmp, 1))
+ 		return -EINVAL;
+ 
++	if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
++		ext3_stop_delete_thread(sbi);
++
+ 	if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
+ 		ext3_abort(sb, __FUNCTION__, "Abort forced by user");
+ 
+--- linux/fs/ext3/file.c.orig	Fri Jan 17 10:57:31 2003
++++ linux/fs/ext3/file.c	Mon Jun 30 13:28:52 2003
+@@ -121,7 +121,11 @@ struct file_operations ext3_file_operati
+ };
+ 
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++	truncate:	ext3_truncate_thread,	/* BKL held */
++#else
+ 	truncate:	ext3_truncate,		/* BKL held */
++#endif
+ 	setattr:	ext3_setattr,		/* BKL held */
+ };
+ 
+--- linux-2.4.18-18.8.0-l15/fs/ext3/inode.c~ext3-delete_thread-2.4.18	Wed Jul  2 23:13:58 2003
++++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/inode.c	Wed Jul  2 23:50:29 2003
+@@ -2004,6 +2004,118 @@ out_stop:
+ 	ext3_journal_stop(handle, inode);
+ }
+ 
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead.  This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++	struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++	struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++	struct inode *new_inode;
++	handle_t *handle;
++	unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++	if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++		goto out_truncate;
++
++	/* XXX This is a temporary limitation for code simplicity.
++	 *     We could truncate to arbitrary sizes at some later time.
++	 */
++	if (old_inode->i_size != 0)
++		goto out_truncate;
++
++	/* We may want to truncate the inode immediately and not defer it */
++	if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++	    old_inode->i_size > oei->i_disksize)
++		goto out_truncate;
++
++	/* We can't use the delete thread as-is during real orphan recovery,
++	 * as we add to the orphan list here, causing ext3_orphan_cleanup()
++	 * to loop endlessly.  It would be nice to do so, but needs work.
++	 */
++	if (oei->i_state & EXT3_STATE_DELETE ||
++	    sbi->s_mount_state & EXT3_ORPHAN_FS) {
++		ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++			   old_inode->i_ino, blocks);
++		goto out_truncate;
++	}
++
++	ext3_discard_prealloc(old_inode);
++
++	/* old_inode   = 1
++	 * new_inode   = sb + GDT + ibitmap
++	 * orphan list = 1 inode/superblock for add, 2 inodes for del
++	 * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++	 */
++	handle = ext3_journal_start(old_inode, 7);
++	if (IS_ERR(handle))
++		goto out_truncate;
++
++	new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++	if (IS_ERR(new_inode)) {
++		ext3_debug("truncate inode %lu directly (no new inodes)\n",
++			   old_inode->i_ino);
++		goto out_journal;
++	}
++
++	nei = EXT3_I(new_inode);
++
++	down_write(&oei->truncate_sem);
++	new_inode->i_size = old_inode->i_size;
++	new_inode->i_blocks = old_inode->i_blocks;
++	new_inode->i_uid = old_inode->i_uid;
++	new_inode->i_gid = old_inode->i_gid;
++	new_inode->i_nlink = 0;
++
++	/* FIXME when we do arbitrary truncates */
++	old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++	old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++	memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++	memset(oei->i_data, 0, sizeof(oei->i_data));
++
++	nei->i_disksize = oei->i_disksize;
++	nei->i_state |= EXT3_STATE_DELETE;
++	up_write(&oei->truncate_sem);
++
++	if (ext3_orphan_add(handle, new_inode) < 0)
++		goto out_journal;
++
++	if (ext3_orphan_del(handle, old_inode) < 0) {
++		ext3_orphan_del(handle, new_inode);
++		iput(new_inode);
++		goto out_journal;
++	}
++
++	ext3_journal_stop(handle, old_inode);
++
++	spin_lock(&sbi->s_delete_lock);
++	J_ASSERT(list_empty(&new_inode->i_dentry));
++	list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++	sbi->s_delete_blocks += blocks;
++	sbi->s_delete_inodes++;
++	spin_unlock(&sbi->s_delete_lock);
++
++	ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++		   new_inode->i_ino, blocks);
++
++	wake_up(&sbi->s_delete_thread_queue);
++	return;
++
++out_journal:
++	ext3_journal_stop(handle, old_inode);
++out_truncate:
++	ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /* 
+  * ext3_get_inode_loc returns with an extra refcount against the
+  * inode's underlying buffer_head on success. 
+--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18	Tue Jun  3 17:26:20 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h	Wed Jul  2 23:19:09 2003
+@@ -190,6 +190,7 @@ struct ext3_group_desc
+  */
+ #define EXT3_STATE_JDATA		0x00000001 /* journaled data exists */
+ #define EXT3_STATE_NEW			0x00000002 /* inode is newly created */
++#define EXT3_STATE_DELETE		0x00000010 /* deferred delete inode */
+ 
+ /*
+  * ioctl commands
+@@ -317,6 +318,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_UPDATE_JOURNAL	0x1000	/* Update the journal format */
+ #define EXT3_MOUNT_NO_UID32		0x2000  /* Disable 32-bit UIDs */
+ #define EXT3_MOUNT_INDEX		0x4000  /* Enable directory index */
++#define EXT3_MOUNT_ASYNCDEL		0x20000	/* Delayed deletion */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+@@ -651,6 +653,9 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+ 
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+--- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18	Tue Jun  3 17:26:21 2003
++++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h	Wed Jul  2 23:19:09 2003
+@@ -29,6 +29,8 @@
+ 
+ #define EXT3_MAX_GROUP_LOADED	32
+ 
++#define EXT3_DELETE_THREAD
++
+ /*
+  * third extended-fs super-block data in memory
+  */
+@@ -74,6 +76,14 @@ struct ext3_sb_info {
+ 	struct timer_list turn_ro_timer;	/* For turning read-only (crash simulation) */
+ 	wait_queue_head_t ro_wait_queue;	/* For people waiting for the fs to go read-only */
+ #endif
++#ifdef EXT3_DELETE_THREAD
++	spinlock_t s_delete_lock;
++	struct list_head s_delete_list;
++	unsigned long s_delete_blocks;
++	unsigned long s_delete_inodes;
++	wait_queue_head_t s_delete_thread_queue;
++	wait_queue_head_t s_delete_waiter_queue;
++#endif
+ };
+ 
+ #endif	/* _LINUX_EXT3_FS_SB */
+
+_
diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos.patch
new file mode 100644
index 0000000..d0c315b
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-extents-2.4.18-chaos.patch
@@ -0,0 +1,1831 @@
+ fs/ext3/Makefile           |    3 
+ fs/ext3/extents.c          | 1573 +++++++++++++++++++++++++++++++++++++++++++++
+ fs/ext3/ialloc.c           |    4 
+ fs/ext3/inode.c            |   26 
+ fs/ext3/super.c            |    9 
+ include/linux/ext3_fs.h    |   18 
+ include/linux/ext3_fs_i.h  |    4 
+ include/linux/ext3_fs_sb.h |   10 
+ 8 files changed, 1641 insertions(+), 6 deletions(-)
+
+diff -puN /dev/null fs/ext3/extents.c
+--- /dev/null	2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-chaos-alexey/fs/ext3/extents.c	2003-08-25 21:11:58.000000000 +0400
+@@ -0,0 +1,1573 @@
++/*
++ *
++ * linux/fs/ext3/extents.c
++ *
++ * Extents support for EXT3
++ *
++ * 07/08/2003    Alex Tomas <bzzz@tmi.comex.ru>
++ * 
++ * TODO:
++ *   - ext3*_error() should be used in some situations
++ *   - find_goal() [to be tested and improved]
++ *   - error handling
++ *   - we could leak allocated block in some error cases
++ *   - quick search for index/leaf in ext3_ext_find_extent()
++ *   - tree reduction
++ *   - cache last found extent
++ *   - arch-independent
++ */
++
++#include <linux/module.h>
++#include <linux/fs.h>
++#include <linux/time.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/smp_lock.h>
++#include <linux/highuid.h>
++#include <linux/pagemap.h>
++#include <linux/quotaops.h>
++#include <linux/string.h>
++#include <linux/slab.h>
++#include <linux/locks.h>
++
++/*
++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks
++ * become very little, so index split, in-depth growing and
++ * other hard changes happens much more often
++ * this is for debug purposes only
++ */
++#define AGRESSIVE_TEST_
++
++/*
++ * if EXT_DEBUG defined you can use 'extdebug' mount option
++ * to get lots of info what's going on
++ */
++#define EXT_DEBUG
++#ifdef EXT_DEBUG
++#define ext_debug(inode,fmt,a...) 		\
++do {						\
++	if (test_opt((inode)->i_sb, EXTDEBUG))	\
++		printk(fmt, ##a);		\
++} while (0);
++#else
++#define ext_debug(inode,fmt,a...)
++#endif
++
++#define EXT3_ALLOC_NEEDED	2	/* block bitmap + group descriptor */
++
++/*
++ * ext3_inode has i_block array (total 60 bytes)
++ * first 4 bytes are used to store:
++ *  - tree depth (0 mean there is no tree yet. all extents in the inode)
++ *  - number of alive extents in the inode
++ */
++
++/*
++ * this is extent on-disk structure
++ * it's used at the bottom of the tree
++ */
++struct ext3_extent {
++	__u32	e_block;	/* first logical block extent covers */
++	__u32	e_start;	/* first physical block extents lives */
++	__u32	e_num;		/* number of blocks covered by extent */
++};
++
++/*
++ * this is index on-disk structure
++ * it's used at all the levels, but the bottom
++ */
++struct ext3_extent_idx {
++	__u32	e_block;	/* index covers logical blocks from 'block' */
++	__u32	e_leaf;		/* pointer to the physical block of the next *
++				 * level. leaf or next index could bet here */
++};
++
++/*
++ * each block (leaves and indexes), even inode-stored has header
++ */
++struct ext3_extent_header {	
++	__u16	e_num;		/* number of valid entries */
++	__u16	e_max;		/* capacity of store in entries */
++};
++
++/*
++ * array of ext3_ext_path contains path to some extent
++ * creation/lookup routines use it for traversal/splitting/etc
++ * truncate uses it to simulate recursive walking
++ */
++struct ext3_ext_path {
++	__u32				p_block;
++	__u16				p_depth;
++	struct ext3_extent		*p_ext;
++	struct ext3_extent_idx		*p_idx;
++	struct ext3_extent_header	*p_hdr;
++	struct buffer_head		*p_bh;
++};
++
++#define EXT_FIRST_EXTENT(__hdr__) \
++	((struct ext3_extent *) (((char *) (__hdr__)) +		\
++				 sizeof(struct ext3_extent_header)))
++#define EXT_FIRST_INDEX(__hdr__) \
++	((struct ext3_extent_idx *) (((char *) (__hdr__)) +	\
++				     sizeof(struct ext3_extent_header)))
++#define EXT_HAS_FREE_INDEX(__path__) \
++	((__path__)->p_hdr->e_num < (__path__)->p_hdr->e_max)
++#define EXT_LAST_EXTENT(__hdr__) \
++	(EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_num - 1)
++#define EXT_LAST_INDEX(__hdr__) \
++	(EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_num - 1)
++#define EXT_MAX_EXTENT(__hdr__) \
++	(EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_max - 1)
++#define EXT_MAX_INDEX(__hdr__) \
++	(EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_max - 1)
++
++
++#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
++
++/*
++ * could return:
++ *  - EROFS
++ *  - ENOMEM
++ */
++static int ext3_ext_get_access(handle_t *handle, struct inode *inode,
++				struct ext3_ext_path *path)
++{
++	if (path->p_bh) {
++		/* path points to block */
++		return ext3_journal_get_write_access(handle, path->p_bh);
++	}
++
++	/* path points to leaf/index in inode body */
++	return 0;
++}
++
++/*
++ * could return:
++ *  - EROFS
++ *  - ENOMEM
++ *  - EIO
++ */
++static int ext3_ext_dirty(handle_t *handle, struct inode *inode,
++				struct ext3_ext_path *path)
++{
++	if (path->p_bh) {
++		/* path points to block */
++		return ext3_journal_dirty_metadata(handle, path->p_bh);
++	}
++
++	/* path points to leaf/index in inode body */
++	return ext3_mark_inode_dirty(handle, inode);
++}
++
++static inline int ext3_ext_space_block(struct inode *inode)
++{
++	int size;
++
++	size = (inode->i_sb->s_blocksize - sizeof(struct ext3_extent_header))
++		/ sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++	size = 6; /* FIXME: for debug, remove this line */
++#endif
++	return size;
++}
++
++static inline int ext3_ext_space_inode(struct inode *inode)
++{
++	int size;
++
++	size = (sizeof(EXT3_I(inode)->i_data) -
++			sizeof(struct ext3_extent_header))
++			/ sizeof(struct ext3_extent);
++#ifdef AGRESSIVE_TEST
++	size = 3; /* FIXME: for debug, remove this line */
++#endif
++	return size;
++}
++
++static inline int ext3_ext_space_inode_idx(struct inode *inode)
++{
++	int size;
++
++	size = (sizeof(EXT3_I(inode)->i_data) -
++			sizeof(struct ext3_extent_header))
++			/ sizeof(struct ext3_extent_idx);
++#ifdef AGRESSIVE_TEST
++	size = 4; /* FIXME: for debug, remove this line */
++#endif
++	return size;
++}
++
++static void ext3_ext_show_path(struct inode *inode, struct ext3_ext_path *path)
++{
++	int k, l = path->p_depth;
++
++	ext_debug(inode, "path:");
++	for (k = 0; k <= l; k++, path++) {
++		if (path->p_idx) {
++			ext_debug(inode, "  %d->%d", path->p_idx->e_block,
++					path->p_idx->e_leaf);
++		} else if (path->p_ext) {
++			ext_debug(inode, "  %d:%d:%d",
++					path->p_ext->e_block,
++					path->p_ext->e_start,
++					path->p_ext->e_num);
++		} else
++			ext_debug(inode, "  []");
++	}
++	ext_debug(inode, "\n");
++}
++
++static void ext3_ext_show_leaf(struct inode *inode, struct ext3_ext_path *path)
++{
++	int depth = EXT3_I(inode)->i_depth;
++	struct ext3_extent_header *eh = path[depth].p_hdr;
++	struct ext3_extent *ex = EXT_FIRST_EXTENT(eh);
++	int i;
++
++	for (i = 0; i < eh->e_num; i++, ex++) {
++		ext_debug(inode, "%d:%d:%d ",
++				ex->e_block, ex->e_start, ex->e_num);
++	}
++	ext_debug(inode, "\n");
++}
++
++static void ext3_ext_drop_refs(struct inode *inode, struct ext3_ext_path *path)
++{
++	int depth = path->p_depth;
++	int i;
++
++	for (i = 0; i <= depth; i++, path++)
++		if (path->p_bh) {
++			brelse(path->p_bh);
++			path->p_bh = NULL;
++		}
++}
++
++static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path)
++{
++	struct ext3_inode_info *ei = EXT3_I(inode);
++	unsigned long bg_start;
++	unsigned long colour;
++	int depth;
++	
++	if (path) {
++		depth = path->p_depth;
++		/* try to find previous block */
++		if (path[depth].p_ext)
++			return path[depth].p_ext->e_start +
++				path[depth].p_ext->e_num - 1;
++		
++		/* it looks index is empty
++		 * try to find starting from index itself */
++		if (path[depth].p_bh)
++			return path[depth].p_bh->b_blocknr;
++	}
++
++	/* OK. use inode's group */
++	bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
++		le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
++	colour = (current->pid % 16) *
++			(EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
++	return bg_start + colour;
++}
++
++static struct ext3_ext_path *
++ext3_ext_find_extent(struct inode *inode, int block, struct ext3_ext_path *path)
++{
++	struct ext3_inode_info *ei = EXT3_I(inode);
++	struct ext3_extent_header *eh = (void *) ei->i_data;
++	struct ext3_extent_idx *ix;
++	struct buffer_head *bh;
++	struct ext3_extent *ex;
++	int depth, i, k, ppos = 0;
++	
++	eh = (struct ext3_extent_header *) ei->i_data;
++
++	/* initialize capacity of leaf in inode for first time */
++	if (eh->e_max == 0)
++		eh->e_max = ext3_ext_space_inode(inode);
++	i = depth = ei->i_depth;
++	EXT_ASSERT(i == 0 || eh->e_num > 0);
++	
++	/* account possible depth increase */
++	if (!path) {
++		path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2),
++				GFP_NOFS);
++		if (!path)
++			return ERR_PTR(-ENOMEM);
++	}
++	memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++
++	/* walk through the tree */
++	while (i) {
++		ext_debug(inode, "depth %d: num %d, max %d\n",
++				ppos, eh->e_num, eh->e_max);
++		ix = EXT_FIRST_INDEX(eh);
++		if (eh->e_num)
++			path[ppos].p_idx = ix;
++		EXT_ASSERT(eh->e_num <= eh->e_max);
++		for (k = 0; k < eh->e_num; k++, ix++) {
++			ext_debug(inode, "index: %d -> %d\n",
++					ix->e_block, ix->e_leaf);
++			if (block < ix->e_block)
++				break;
++			path[ppos].p_idx = ix;
++		}
++		path[ppos].p_block = path[ppos].p_idx->e_leaf;
++		path[ppos].p_depth = i;
++		path[ppos].p_hdr = eh;
++		path[ppos].p_ext = NULL;
++
++		bh = sb_bread(inode->i_sb, path[ppos].p_block);
++		if (!bh) {
++			ext3_ext_drop_refs(inode, path);
++			kfree(path);
++			return ERR_PTR(-EIO);
++		}
++		eh = (struct ext3_extent_header *) bh->b_data;
++		ppos++;
++		EXT_ASSERT(ppos <= depth);
++		path[ppos].p_bh = bh;
++		i--;
++	}
++
++	path[ppos].p_depth = i;
++	path[ppos].p_hdr = eh;
++	path[ppos].p_ext = NULL;
++	
++	/* find extent */
++	ex = EXT_FIRST_EXTENT(eh);
++	if (eh->e_num)
++		path[ppos].p_ext = ex;
++	EXT_ASSERT(eh->e_num <= eh->e_max);
++	for (k = 0; k < eh->e_num; k++, ex++) {
++		if (block < ex->e_block) 
++			break;
++		path[ppos].p_ext = ex;
++	}
++
++	ext3_ext_show_path(inode, path);
++
++	return path;
++}
++
++static void ext3_ext_check_boundary(struct inode *inode,
++					struct ext3_ext_path *curp,
++					void *addr, int len)
++{
++	void *end;
++
++	if (!len)
++		return;
++	if (curp->p_bh)
++		end = (void *) curp->p_hdr + inode->i_sb->s_blocksize;
++	else
++		end = (void *) curp->p_hdr + sizeof(EXT3_I(inode)->i_data);
++	if (((unsigned long) addr) + len > (unsigned long) end) {
++		printk("overflow! 0x%p > 0x%p\n", addr + len, end);
++		BUG();
++	}
++	if ((unsigned long) addr < (unsigned long) curp->p_hdr) {
++		printk("underflow! 0x%p < 0x%p\n", addr, curp->p_hdr);
++		BUG();
++	}
++}
++
++/*
++ * insert new index [logical;ptr] into the block at cupr
++ * it check where to insert: before curp or after curp
++ */
++static int ext3_ext_insert_index(handle_t *handle, struct inode *inode,
++				struct ext3_ext_path *curp, int logical,
++				int ptr)
++{
++	struct ext3_extent_idx *ix;
++	int len, err;
++
++	if ((err = ext3_ext_get_access(handle, inode, curp)))
++		return err;
++
++	EXT_ASSERT(logical != curp->p_idx->e_block);
++	len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
++	if (logical > curp->p_idx->e_block) {
++		/* insert after */
++		len = (len - 1) * sizeof(struct ext3_extent_idx);
++		len = len < 0 ? 0 : len;
++		ext_debug(inode, "insert new index %d after: %d. "
++				"move %d from 0x%p to 0x%p\n",
++				logical, ptr, len,
++				(curp->p_idx + 1), (curp->p_idx + 2));
++
++		ext3_ext_check_boundary(inode, curp, curp->p_idx + 2, len);
++		memmove(curp->p_idx + 2, curp->p_idx + 1, len);
++		ix = curp->p_idx + 1;
++	} else {
++		/* insert before */
++		len = len * sizeof(struct ext3_extent_idx);
++		len = len < 0 ? 0 : len;
++		ext_debug(inode, "insert new index %d before: %d. "
++				"move %d from 0x%p to 0x%p\n",
++				logical, ptr, len,
++				curp->p_idx, (curp->p_idx + 1));
++
++		ext3_ext_check_boundary(inode, curp, curp->p_idx + 1, len);
++		memmove(curp->p_idx + 1, curp->p_idx, len);
++		ix = curp->p_idx;
++	}
++
++	ix->e_block = logical;
++	ix->e_leaf = ptr;
++	curp->p_hdr->e_num++;
++
++	err = ext3_ext_dirty(handle, inode, curp);
++	ext3_std_error(inode->i_sb, err);
++
++	return err;
++}
++
++/*
++ * routine inserts new subtree into the path, using free index entry
++ * at depth 'at:
++ *  - allocates all needed blocks (new leaf and all intermediate index blocks)
++ *  - makes decision where to split
++ *  - moves remaining extens and index entries (right to the split point)
++ *    into the newly allocated blocks
++ *  - initialize subtree
++ */
++static int ext3_ext_split(handle_t *handle, struct inode *inode,
++				struct ext3_ext_path *path,
++				struct ext3_extent *newext, int at)
++{
++	struct buffer_head *bh = NULL;
++	int depth = EXT3_I(inode)->i_depth;
++	struct ext3_extent_header *neh;
++	struct ext3_extent_idx *fidx;
++	struct ext3_extent *ex;
++	int i = at, k, m, a;
++	long newblock, oldblock, border;
++	int *ablocks = NULL; /* array of allocated blocks */
++	int err = 0;
++
++	/* make decision: where to split? */
++	/* FIXME: now desicion is simplest: at current extent */
++
++	/* if current leaf will be splitted, then we should use 
++	 * border from split point */
++	if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
++		border = path[depth].p_ext[1].e_block;
++		ext_debug(inode, "leaf will be splitted."
++				" next leaf starts at %d\n",
++				(int)border);
++	} else {
++		border = newext->e_block;
++		ext_debug(inode, "leaf will be added."
++				" next leaf starts at %d\n",
++				(int)border);
++	}
++
++	/* 
++	 * if error occurs, then we break processing
++	 * and turn filesystem read-only. so, index won't
++	 * be inserted and tree will be in consistent
++	 * state. next mount will repair buffers too
++	 */
++
++	/*
++	 * get array to track all allocated blocks
++	 * we need this to handle errors and free blocks
++	 * upon them
++	 */
++	ablocks = kmalloc(sizeof(long) * depth, GFP_NOFS);
++	if (!ablocks)
++		return -ENOMEM;
++	memset(ablocks, 0, sizeof(long) * depth);
++
++	/* allocate all needed blocks */
++	ext_debug(inode, "allocate %d blocks for indexes and leaf\n",
++			depth - at);
++	ablocks[0] = newext->e_start++;
++	newext->e_num--;
++	for (a = 1; a < depth - at; a++) {
++		newblock = ext3_new_block(handle, inode, newext->e_start,
++						0, 0, &err);
++		if (newblock == 0)
++			goto cleanup;
++		ablocks[a] = newblock;
++	}
++
++	/* initialize new leaf */
++	newblock = ablocks[--a];
++	EXT_ASSERT(newblock);
++	bh = sb_getblk(inode->i_sb, newblock);
++	if (!bh) {
++		err = -EIO;
++		goto cleanup;
++	}
++	lock_buffer(bh);
++
++	if ((err = ext3_journal_get_create_access(handle, bh)))
++		goto cleanup;
++
++	neh = (struct ext3_extent_header *) bh->b_data;
++	neh->e_num = 0;
++	neh->e_max = ext3_ext_space_block(inode);
++	ex = EXT_FIRST_EXTENT(neh);
++
++	/* move remain of path[depth] to the new leaf */
++	EXT_ASSERT(path[depth].p_hdr->e_num ==
++			path[depth].p_hdr->e_max);
++	/* start copy from next extent */
++	/* TODO: we could do it by single memmove */
++	m = 0;
++	path[depth].p_ext++;
++	while (path[depth].p_ext <=
++			EXT_MAX_EXTENT(path[depth].p_hdr)) {
++		ext_debug(inode, "move %d:%d:%d in new leaf\n",
++				path[depth].p_ext->e_block,
++				path[depth].p_ext->e_start,
++				path[depth].p_ext->e_num);
++		memmove(ex++, path[depth].p_ext++,
++				sizeof(struct ext3_extent));
++		neh->e_num++;
++		m++;
++	}
++	mark_buffer_uptodate(bh, 1);
++	unlock_buffer(bh);
++
++	if ((err = ext3_journal_dirty_metadata(handle, bh)))
++		goto cleanup;	
++	brelse(bh);
++	bh = NULL;
++
++	/* correct old leaf */
++	if (m) {
++		if ((err = ext3_ext_get_access(handle, inode, path)))
++			goto cleanup;
++		path[depth].p_hdr->e_num -= m;
++		if ((err = ext3_ext_dirty(handle, inode, path)))
++			goto cleanup;
++		
++	}
++
++	/* create intermediate indexes */
++	k = depth - at - 1;
++	EXT_ASSERT(k >= 0);
++	if (k)
++		ext_debug(inode,
++				"create %d intermediate indices\n", k);
++	/* insert new index into current index block */
++	/* current depth stored in i var */
++	i = depth - 1;
++	while (k--) {
++		oldblock = newblock;
++		newblock = ablocks[--a];
++		bh = sb_getblk(inode->i_sb, newblock);
++		if (!bh) {
++			err = -EIO;
++			goto cleanup;
++		}
++		lock_buffer(bh);
++
++		if ((err = ext3_journal_get_create_access(handle, bh)))
++			goto cleanup;
++
++		neh = (struct ext3_extent_header *) bh->b_data;
++		neh->e_num = 1;
++		neh->e_max = ext3_ext_space_block(inode);
++		fidx = EXT_FIRST_INDEX(neh);
++		fidx->e_block = border;
++		fidx->e_leaf = oldblock;
++
++		ext_debug(inode,
++				"int.index at %d (block %u): %d -> %d\n",
++				i, (unsigned) newblock,
++				(int) border,
++				(int) oldblock);
++		/* copy indexes */
++		m = 0;
++		path[i].p_idx++;
++		EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) ==
++				EXT_LAST_INDEX(path[i].p_hdr));
++		ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
++				EXT_MAX_INDEX(path[i].p_hdr));
++		while (path[i].p_idx <=
++				EXT_MAX_INDEX(path[i].p_hdr)) {
++			ext_debug(inode, "%d: move %d:%d in new index\n",
++					i, path[i].p_idx->e_block,
++					path[i].p_idx->e_leaf);
++			memmove(++fidx, path[i].p_idx++,
++					sizeof(struct ext3_extent_idx));
++			neh->e_num++;
++			m++;
++		}
++
++		mark_buffer_uptodate(bh, 1);
++		unlock_buffer(bh);
++
++		if ((err = ext3_journal_dirty_metadata(handle, bh)))
++			goto cleanup;
++		brelse(bh);
++		bh = NULL;
++
++		/* correct old index */
++		if (m) {
++			err = ext3_ext_get_access(handle,inode,path+i);
++			if (err)
++				goto cleanup;
++			path[i].p_hdr->e_num -= m;
++			err = ext3_ext_dirty(handle, inode, path + i);
++			if (err)
++				goto cleanup;
++		}
++
++		i--;
++	}
++
++	/* insert new index */
++	if (!err) 
++		err = ext3_ext_insert_index(handle, inode, path + at,
++						border, newblock);
++
++cleanup:
++	if (bh) {
++		if (buffer_locked(bh))
++			unlock_buffer(bh);
++		brelse(bh);
++	}
++
++	if (err) {
++		/* free all allocated blocks in error case */
++		for (i = 0; i < depth; i++)
++			if (!ablocks[i])
++				continue;
++			ext3_free_blocks(handle, inode, ablocks[i], 1);
++	}
++	kfree(ablocks);
++
++	return err;
++}
++
++/*
++ * routine implements tree growing procedure:
++ *  - allocates new block
++ *  - moves top-level data (index block or leaf) into the new block
++ *  - initialize new top-level, creating index that points to the
++ *    just created block
++ */
++static int ext3_ext_grow_indepth(handle_t *handle, struct inode *inode,
++					struct ext3_ext_path *path,
++					struct ext3_extent *newext)
++{
++	struct buffer_head *bh;
++	struct ext3_ext_path *curp = path;
++	struct ext3_extent_header *neh;
++	struct ext3_extent_idx *fidx;
++	int len, err = 0;
++	long newblock;
++
++	/*
++	 * use already allocated by the called block for new root block
++	 */
++	newblock = newext->e_start++;
++	newext->e_num--;
++	
++	bh = sb_getblk(inode->i_sb, newblock);
++	if (!bh) {
++		err = -EIO;
++		ext3_std_error(inode->i_sb, err);
++		return err;
++	}
++	lock_buffer(bh);
++
++	if ((err = ext3_journal_get_create_access(handle, bh))) {
++		unlock_buffer(bh);
++		goto out;	
++	}
++
++	/* move top-level index/leaf into new block */
++	len = sizeof(struct ext3_extent_header) +
++		sizeof(struct ext3_extent) * curp->p_hdr->e_max;
++	EXT_ASSERT(len >= 0 && len < 4096);
++	memmove(bh->b_data, curp->p_hdr, len);
++
++	/* set size of new block */
++	neh = (struct ext3_extent_header *) bh->b_data;
++	neh->e_max = ext3_ext_space_block(inode);
++	mark_buffer_uptodate(bh, 1);
++	unlock_buffer(bh);
++
++	if ((err = ext3_journal_dirty_metadata(handle, bh)))
++		goto out;
++
++	/* create index in new top-level index: num,max,pointer */
++	if ((err = ext3_ext_get_access(handle, inode, curp)))
++		goto out;
++
++	curp->p_hdr->e_max = ext3_ext_space_inode_idx(inode);
++	curp->p_hdr->e_num = 1;
++	curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
++	curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block;
++	curp->p_idx->e_leaf = newblock;
++
++	neh = (struct ext3_extent_header *) EXT3_I(inode)->i_data;
++	fidx = EXT_FIRST_INDEX(neh);
++	ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %d\n",
++			neh->e_num, neh->e_max, fidx->e_block, fidx->e_leaf); 
++
++	EXT3_I(inode)->i_depth++;
++	err = ext3_ext_dirty(handle, inode, curp);
++out:
++	brelse(bh);
++
++	return err;
++}
++
++/*
++ * routine finds empty index and adds new leaf. if no free index found
++ * then it requests in-depth growing
++ */
++static int ext3_ext_create_new_leaf(handle_t *handle, struct inode *inode,
++					struct ext3_ext_path *path,
++					struct ext3_extent *newext)
++{
++	int depth = EXT3_I(inode)->i_depth;
++	struct ext3_ext_path *curp;
++	int i = depth, err = 0;
++	long newblock = newext->e_start;
++
++	/* walk up to the tree and look for free index entry */
++	curp = path + depth;
++	while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
++		i--;
++		curp--;
++	}
++
++	/* we use already allocated block for index block
++	 * so, subsequent data blocks should be contigoues */
++	if (EXT_HAS_FREE_INDEX(curp)) {
++		/* if we found index with free entry, then use that
++		 * entry: create all needed subtree and add new leaf */
++		err = ext3_ext_split(handle, inode, path, newext, i);
++	} else {
++		/* tree is full, time to grow in depth */
++		err = ext3_ext_grow_indepth(handle, inode, path, newext);
++	}
++
++	if (!err) {
++		/* refill path */
++		ext3_ext_drop_refs(inode, path);
++		path = ext3_ext_find_extent(inode, newext->e_block, path);
++		if (IS_ERR(path))
++			err = PTR_ERR(path);
++
++		/*
++		 * probably we've used some blocks from extent
++		 * let's allocate new block for it
++		 */
++		if (newext->e_num == 0 && !err) {
++			newext->e_start =
++				ext3_new_block(handle, inode, newblock,
++						0, 0, &err);
++			newext->e_num = 1;
++		}
++	}
++
++	return err;
++}
++
++/*
++ * returns next allocated block or 0xffffffff
++ * NOTE: it consider block number from index entry as
++ * allocated block. thus, index entries have to be consistent
++ * with leafs
++ */
++static inline unsigned ext3_ext_next_allocated_block(struct inode *inode,
++                                               struct ext3_ext_path *path)
++{
++	int depth;
++
++	EXT_ASSERT(path != NULL);
++	depth = path->p_depth;
++
++	if (depth == 0 && path->p_ext == NULL)
++		return 0xffffffff;
++
++	/* FIXME: what if index isn't full ?! */
++	while (depth >= 0) {
++		if (depth == path->p_depth) {
++			/* leaf */
++			if (path[depth].p_ext !=
++					EXT_LAST_EXTENT(path[depth].p_hdr))
++				return path[depth].p_ext[1].e_block;
++		} else {
++			/* index */
++			if (path[depth].p_idx !=
++					EXT_LAST_INDEX(path[depth].p_hdr))
++				return path[depth].p_idx[1].e_block;
++		}
++		depth--;        
++	}
++
++	return 0xffffffff;
++}
++
++/*
++ * returns first allocated block from next leaf or 0xffffffff
++ */
++static unsigned ext3_ext_next_leaf_block(struct inode *inode,
++                                               struct ext3_ext_path *path)
++{
++	int depth;
++
++	EXT_ASSERT(path != NULL);
++	depth = path->p_depth;
++
++	/* zero-tree has no leaf blocks at all */
++	if (depth == 0)
++		return 0xffffffff;
++
++	/* go to index block */
++	depth--;
++	
++	while (depth >= 0) {
++		if (path[depth].p_idx !=
++				EXT_LAST_INDEX(path[depth].p_hdr))
++			return path[depth].p_idx[1].e_block;
++		depth--;        
++	}
++
++	return 0xffffffff;
++}
++
++/*
++ * if leaf gets modified and modified extent is first in the leaf
++ * then we have to correct all indexes above
++ * TODO: do we need to correct tree in all cases?
++ */
++int ext3_ext_correct_indexes(handle_t *handle, struct inode *inode,
++				struct ext3_ext_path *path)
++{
++	int depth = EXT3_I(inode)->i_depth;	
++	struct ext3_extent_header *eh;
++	struct ext3_extent *ex;
++	long border;
++	int k, err = 0;
++	
++	eh = path[depth].p_hdr;
++	ex = path[depth].p_ext;
++
++	EXT_ASSERT(ex);
++	EXT_ASSERT(eh);
++	
++	if (depth == 0) {
++		/* there is no tree at all */
++		return 0;
++	}
++	
++	if (ex != EXT_FIRST_EXTENT(eh)) {
++		/* we correct tree if first leaf got modified only */
++		return 0;
++	}
++	
++	k = depth - 1;
++	border = path[depth].p_ext->e_block;
++	if ((err = ext3_ext_get_access(handle, inode, path + k)))
++		return err;
++	path[k].p_idx->e_block = border;
++	if ((err = ext3_ext_dirty(handle, inode, path + k)))
++		return err;
++
++	while (k--) {
++		/* change all left-side indexes */
++		if (path[k].p_idx != EXT_FIRST_INDEX(path[k].p_hdr)
++				&& k != 0)
++			break;
++		if ((err = ext3_ext_get_access(handle, inode, path + k)))
++			break;
++		path[k].p_idx->e_block = border;
++		if ((err = ext3_ext_dirty(handle, inode, path + k)))
++			break;
++	}
++
++	return err;
++}
++
++/*
++ * this routine tries to merge requsted extent into the existing
++ * extent or inserts requested extent as new one into the tree,
++ * creating new leaf in no-space case
++ */
++int ext3_ext_insert_extent(handle_t *handle, struct inode *inode,
++				struct ext3_ext_path *path,
++				struct ext3_extent *newext)
++{
++	int depth, len;
++	struct ext3_extent_header * eh;
++	struct ext3_extent *ex;
++	struct ext3_extent *nearex; /* nearest extent */
++	struct ext3_ext_path *npath = NULL;
++	int err;
++
++	depth = EXT3_I(inode)->i_depth;	
++	if ((ex = path[depth].p_ext)) {
++		/* try to insert block into found extent and return */
++		if (ex->e_block + ex->e_num == newext->e_block &&
++				ex->e_start + ex->e_num == newext->e_start) {
++#ifdef AGRESSIVE_TEST
++			if (ex->e_num >= 2)
++				goto repeat;
++#endif
++			if ((err = ext3_ext_get_access(handle, inode,
++							path + depth)))
++				return err;
++			ext_debug(inode, "append %d block to %d:%d (from %d)\n",
++					newext->e_num, ex->e_block, ex->e_num,
++					ex->e_start);
++			ex->e_num += newext->e_num;
++			err = ext3_ext_dirty(handle, inode, path + depth);
++			return err;
++		}
++	}
++
++repeat:
++	depth = EXT3_I(inode)->i_depth;	
++	eh = path[depth].p_hdr;
++	if (eh->e_num == eh->e_max) {
++		/* probably next leaf has space for us? */
++		int next = ext3_ext_next_leaf_block(inode, path);
++		if (next != 0xffffffff) {
++			ext_debug(inode, "next leaf block - %d\n", next);
++			EXT_ASSERT(!npath);
++			npath = ext3_ext_find_extent(inode, next, NULL);
++			if (IS_ERR(npath))
++				return PTR_ERR(npath);
++			EXT_ASSERT(npath->p_depth == path->p_depth);
++			eh = npath[depth].p_hdr;
++			if (eh->e_num < eh->e_max) {
++				ext_debug(inode,
++						"next leaf has free ext(%d)\n",
++						eh->e_num);
++				path = npath;
++				goto repeat;
++			}
++			ext_debug(inode, "next leaf hasno free space(%d,%d)\n",
++					eh->e_num, eh->e_max);
++		}
++		/*
++		 * there is no free space in found leaf
++		 * we're gonna add new leaf in the tree
++		 */
++		err = ext3_ext_create_new_leaf(handle, inode, path, newext);
++		if (err)
++			goto cleanup;
++		goto repeat;
++	}
++
++	nearex = path[depth].p_ext;
++
++	if ((err = ext3_ext_get_access(handle, inode, path + depth)))
++		goto cleanup;
++
++	if (!nearex) {
++		/* there is no extent in this leaf, create first one */
++		ext_debug(inode, "first extent in the leaf: %d:%d:%d\n",
++				newext->e_block, newext->e_start,
++				newext->e_num);
++		eh->e_num++;
++		path[depth].p_ext = EXT_FIRST_EXTENT(eh);
++
++	} else if (newext->e_block > nearex->e_block) {
++		EXT_ASSERT(newext->e_block != nearex->e_block);
++		len = EXT_MAX_EXTENT(eh) - nearex;
++		len = (len - 1) * sizeof(struct ext3_extent);
++		len = len < 0 ? 0 : len;
++		ext_debug(inode, "insert %d:%d:%d after: nearest 0x%p, "
++				"move %d from 0x%p to 0x%p\n",
++				newext->e_block, newext->e_start, newext->e_num,
++				nearex, len, nearex + 1, nearex + 2);
++		ext3_ext_check_boundary(inode, path + depth, nearex + 2, len);
++		memmove(nearex + 2, nearex + 1, len);
++		path[depth].p_ext = nearex + 1;
++		eh->e_num++;
++	} else {
++		EXT_ASSERT(newext->e_block != nearex->e_block);
++		len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent);
++		len = len < 0 ? 0 : len;
++		ext_debug(inode, "insert %d:%d:%d before: nearest 0x%p, "
++				"move %d from 0x%p to 0x%p\n",
++				newext->e_block, newext->e_start, newext->e_num,
++				nearex, len, nearex + 1, nearex + 2);
++		memmove(nearex + 1, nearex, len);
++		path[depth].p_ext = nearex;
++		eh->e_num++;
++
++		/* time to correct all indexes above */
++		err = ext3_ext_correct_indexes(handle, inode, path);
++	}
++
++	if (!err) {
++		nearex = path[depth].p_ext;
++		nearex->e_block = newext->e_block;
++		nearex->e_start = newext->e_start;
++		nearex->e_num = newext->e_num;
++	}
++
++	err = ext3_ext_dirty(handle, inode, path + depth);
++
++cleanup:
++	if (npath) {
++		ext3_ext_drop_refs(inode, npath);
++		kfree(npath);
++	}
++		
++	return err;
++}
++
++int ext3_ext_get_block(handle_t *handle, struct inode *inode, long iblock,
++			struct buffer_head *bh_result, int create,
++			int extend_disksize)
++{
++	struct ext3_ext_path *path;
++	int depth = EXT3_I(inode)->i_depth;
++	struct ext3_extent newex;
++	struct ext3_extent *ex;
++	int goal, newblock, err = 0;
++
++	ext_debug(inode, "block %d requested for inode %u, bh_result 0x%p\n",
++			(int) iblock, (unsigned) inode->i_ino, bh_result);
++	bh_result->b_state &= ~(1UL << BH_New);
++
++	down(&EXT3_I(inode)->i_ext_sem);
++
++	/* find extent for this block */
++	path = ext3_ext_find_extent(inode, iblock, NULL);
++	if (IS_ERR(path)) {
++		err = PTR_ERR(path);
++		goto out2;
++	}
++
++	if ((ex = path[depth].p_ext)) {
++		/* if found exent covers block, simple return it */
++		if (iblock >= ex->e_block && iblock < ex->e_block + ex->e_num) {
++			newblock = iblock - ex->e_block + ex->e_start;
++			ext_debug(inode, "%d fit into %d:%d -> %d\n",
++					(int) iblock, ex->e_block, ex->e_num,
++					newblock);
++			goto out;
++		}
++	}
++
++	/*
++	 * we couldn't try to create block if create flag is zero 
++	 */
++	if (!create) 
++		goto out2;
++
++	/* allocate new block */
++	goal = ext3_ext_find_goal(inode, path);
++	newblock = ext3_new_block(handle, inode, goal, 0, 0, &err);
++	if (!newblock)
++		goto out2;
++	ext_debug(inode, "allocate new block: goal %d, found %d\n",
++			goal, newblock);
++
++	/* try to insert new extent into found leaf and return */
++	newex.e_block = iblock;
++	newex.e_start = newblock;
++	newex.e_num = 1;
++	err = ext3_ext_insert_extent(handle, inode, path, &newex);
++	if (err)
++		goto out2;
++	
++	/* previous routine could use block we allocated */
++	newblock = newex.e_start;
++	bh_result->b_state |= (1UL << BH_New);
++
++out:
++	ext3_ext_show_leaf(inode, path);
++	bh_result->b_dev = inode->i_dev;
++	bh_result->b_blocknr = newblock;
++out2:
++	ext3_ext_drop_refs(inode, path);
++	kfree(path);
++	up(&EXT3_I(inode)->i_ext_sem);
++
++	return err;	
++}
++
++/*
++ * returns 1 if current index have to be freed (even partial)
++ */
++static int ext3_ext_more_to_truncate(struct inode *inode,
++				struct ext3_ext_path *path)
++{
++	EXT_ASSERT(path->p_idx);
++
++	if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
++		return 0;
++
++	/*
++	 * if truncate on deeper level happened it it wasn't partial
++	 * so we have to consider current index for truncation
++	 */
++	if (path->p_hdr->e_num == path->p_block)
++		return 0;
++
++	/*
++	 * put actual number of indexes to know is this number got
++	 * changed at the next iteration
++	 */
++	path->p_block = path->p_hdr->e_num;
++	
++	return 1;
++}
++
++/*
++ * routine removes index from the index block
++ * it's used in truncate case only. thus all requests are for
++ * last index in the block only
++ */
++int ext3_ext_remove_index(handle_t *handle, struct inode *inode,
++					struct ext3_ext_path *path)
++{
++	struct buffer_head *bh;
++	int err;
++	
++	/* free index block */
++	path--;
++	EXT_ASSERT(path->p_hdr->e_num);
++	if ((err = ext3_ext_get_access(handle, inode, path)))
++		return err;
++	path->p_hdr->e_num--;
++	if ((err = ext3_ext_dirty(handle, inode, path)))
++		return err;
++	bh = sb_get_hash_table(inode->i_sb, path->p_idx->e_leaf);
++	ext3_forget(handle, 0, inode, bh, path->p_idx->e_leaf);
++	ext3_free_blocks(handle, inode, path->p_idx->e_leaf, 1);
++
++	ext_debug(inode, "index is empty, remove it, free block %d\n",
++			path->p_idx->e_leaf);
++	return err;
++}
++
++/*
++ * returns 1 if current extent needs to be freed (even partial)
++ * instead, returns 0
++ */
++int ext3_ext_more_leaves_to_truncate(struct inode *inode,
++					struct ext3_ext_path *path)
++{
++	unsigned blocksize = inode->i_sb->s_blocksize;
++	struct ext3_extent *ex = path->p_ext;
++	int last_block; 
++
++	EXT_ASSERT(ex);
++
++	/* is there leave in the current leaf? */
++	if (ex < EXT_FIRST_EXTENT(path->p_hdr))
++		return 0;
++	
++	last_block = (inode->i_size + blocksize-1)
++			>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
++
++	if (last_block >= ex->e_block + ex->e_num)
++		return 0;
++
++	/* seems it extent have to be freed */
++	return 1;
++}
++
++handle_t *ext3_ext_journal_restart(handle_t *handle, int needed)
++{
++	int err;
++
++	if (handle->h_buffer_credits > needed)
++		return handle;
++	if (!ext3_journal_extend(handle, needed))
++		return handle;
++	err = ext3_journal_restart(handle, needed);
++	
++	return handle;
++}
++
++/*
++ * this routine calculate max number of blocks to be modified
++ * while freeing extent and is intended to be used in truncate path
++ */
++static int ext3_ext_calc_credits(struct inode *inode,
++					struct ext3_ext_path *path,
++					int num)
++{
++	int depth = EXT3_I(inode)->i_depth;
++	int needed;
++	
++	/*
++	 * extent couldn't cross group, so we will modify
++	 * single bitmap block and single group descriptor
++	 */
++	needed = 2;
++
++	/*
++	 * if this is last extent in a leaf, then we have to
++	 * free leaf block and remove pointer from index above.
++	 * that pointer could be last in index block, so we'll
++	 * have to remove it too. this way we could modify/free
++	 * the whole path + root index (inode stored) will be
++	 * modified
++	 */
++	if (!path || (num == path->p_ext->e_num &&
++				path->p_ext == EXT_FIRST_EXTENT(path->p_hdr)))
++		needed += (depth * EXT3_ALLOC_NEEDED) + 1;
++
++	return needed;
++}
++
++/*
++ * core of the truncate procedure:
++ * - calculated what part of each extent in the requested leaf
++ *   need to be freed
++ * - frees and forgets these blocks
++ *
++ * TODO: we could optimize and free several extents during
++ *       single journal_restart()-journal_restart() cycle
++ */
++static int ext3_ext_truncate_leaf(handle_t *handle,
++					struct inode *inode,
++					struct ext3_ext_path *path,
++					int depth)
++{
++	unsigned blocksize = inode->i_sb->s_blocksize;
++	int last_block; 
++	int i, err = 0, sf, num;
++
++	ext_debug(inode, "level %d - leaf\n", depth);
++	if (!path->p_hdr)
++		path->p_hdr =
++			(struct ext3_extent_header *) path->p_bh->b_data;
++
++	EXT_ASSERT(path->p_hdr->e_num <= path->p_hdr->e_max);
++	
++	last_block = (inode->i_size + blocksize-1)
++					>> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
++	path->p_ext = EXT_LAST_EXTENT(path->p_hdr);
++	while (ext3_ext_more_leaves_to_truncate(inode, path)) {
++
++		/* what part of extent have to be freed? */
++		sf = last_block > path->p_ext->e_block ?
++			last_block : path->p_ext->e_block;
++
++		/* number of blocks from extent to be freed */
++		num = path->p_ext->e_block + path->p_ext->e_num - sf;
++
++		/* calc physical first physical block to be freed */
++		sf = path->p_ext->e_start + (sf - path->p_ext->e_block);
++
++		i = ext3_ext_calc_credits(inode, path, num);
++		handle = ext3_ext_journal_restart(handle, i);
++		if (IS_ERR(handle))
++			return PTR_ERR(handle);
++		
++		ext_debug(inode, "free extent %d:%d:%d -> free %d:%d\n",
++				path->p_ext->e_block, path->p_ext->e_start,
++				path->p_ext->e_num, sf, num);
++		for (i = 0; i < num; i++) {
++			struct buffer_head *bh =
++				sb_get_hash_table(inode->i_sb, sf + i);
++			ext3_forget(handle, 0, inode, bh, sf + i);
++		}
++		ext3_free_blocks(handle, inode, sf, num);
++
++		/* collect extents usage stats */
++		spin_lock(&EXT3_SB(inode->i_sb)->s_ext_lock);
++		EXT3_SB(inode->i_sb)->s_ext_extents++;
++		EXT3_SB(inode->i_sb)->s_ext_blocks += num;
++		spin_unlock(&EXT3_SB(inode->i_sb)->s_ext_lock);
++
++		/* reduce extent */
++		if ((err = ext3_ext_get_access(handle, inode, path)))
++			return err;
++		path->p_ext->e_num -= num;
++		if (path->p_ext->e_num == 0)
++			path->p_hdr->e_num--;
++		if ((err = ext3_ext_dirty(handle, inode, path)))
++			return err;
++
++		path->p_ext--;
++	}
++	
++	/* if this leaf is free, then we should
++	 * remove it from index block above */
++	if (path->p_hdr->e_num == 0 && depth > 0) 
++		err = ext3_ext_remove_index(handle, inode, path);
++
++	return err;
++}
++
++static void ext3_ext_collect_stats(struct inode *inode)
++{
++	int depth;
++	
++	/* skip inodes with old good bitmap */
++	if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
++		return;
++	
++	/* collect on full truncate only */
++	if (inode->i_size)
++		return;
++
++	depth = EXT3_I(inode)->i_depth;
++	if (depth < EXT3_SB(inode->i_sb)->s_ext_mindepth)
++		 EXT3_SB(inode->i_sb)->s_ext_mindepth = depth;
++	if (depth > EXT3_SB(inode->i_sb)->s_ext_maxdepth)
++		 EXT3_SB(inode->i_sb)->s_ext_maxdepth = depth;
++	EXT3_SB(inode->i_sb)->s_ext_sum += depth;
++	EXT3_SB(inode->i_sb)->s_ext_count++;
++	
++}
++
++void ext3_ext_truncate(struct inode * inode)
++{
++	struct address_space *mapping = inode->i_mapping;
++	struct ext3_ext_path *path;
++	struct page * page;
++	handle_t *handle;
++	int i, depth, err = 0;
++
++	down(&EXT3_I(inode)->i_ext_sem);
++	ext3_ext_collect_stats(inode);
++
++	/*
++	 * We have to lock the EOF page here, because lock_page() nests
++	 * outside journal_start().
++	 */
++	if ((inode->i_size & (inode->i_sb->s_blocksize - 1)) == 0) {
++		/* Block boundary? Nothing to do */
++		page = NULL;
++	} else {
++		page = grab_cache_page(mapping,
++				inode->i_size >> PAGE_CACHE_SHIFT);
++		if (!page) {
++			up(&EXT3_I(inode)->i_ext_sem);
++			return;
++		}
++	}
++
++	/*
++	 * probably first extent we're gonna free will be last in block
++	 */
++	i = ext3_ext_calc_credits(inode, NULL, 0);
++	handle = ext3_journal_start(inode, i);
++	if (IS_ERR(handle)) {
++		if (page) {
++			clear_highpage(page);
++			flush_dcache_page(page);
++			unlock_page(page);
++			page_cache_release(page);
++		}
++		up(&EXT3_I(inode)->i_ext_sem);
++		return;
++	}
++
++	if (page)
++		ext3_block_truncate_page(handle, mapping, inode->i_size, page,
++						inode->i_sb->s_blocksize);
++
++	/* 
++	 * TODO: optimization is possible here
++	 * probably we need not scaning at all,
++	 * because page truncation is enough
++	 */
++	if (ext3_orphan_add(handle, inode))
++		goto out_stop;
++
++	/* we have to know where to truncate from in crash case */
++	EXT3_I(inode)->i_disksize = inode->i_size;
++	ext3_mark_inode_dirty(handle, inode);
++
++	/*
++	 * we start scanning from right side freeing all the blocks
++	 * after i_size and walking into the deep
++	 */
++	i = 0;
++	depth = EXT3_I(inode)->i_depth;
++	path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL);
++	if (IS_ERR(path)) {
++		ext3_error(inode->i_sb, "ext3_ext_truncate",
++				"Can't allocate path array");
++		goto out_stop;
++	}
++	memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
++
++	path[i].p_hdr = (struct ext3_extent_header *) EXT3_I(inode)->i_data;
++	while (i >= 0 && err == 0) {
++		if (i == depth) {
++			/* this is leaf block */
++			err = ext3_ext_truncate_leaf(handle, inode,
++							path + i, i);
++			/* root level have p_bh == NULL, brelse() eats this */
++			brelse(path[i].p_bh);
++			i--;
++			continue;
++		}
++		
++		/* this is index block */
++		if (!path[i].p_hdr) {
++			path[i].p_hdr =
++				(struct ext3_extent_header *) path[i].p_bh->b_data;
++			ext_debug(inode, "initialize header\n");
++		}
++
++		EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max);
++		
++		if (!path[i].p_idx) {
++			/* this level hasn't touched yet */
++			path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
++			path[i].p_block = path[i].p_hdr->e_num + 1;
++			ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
++					path[i].p_hdr, path[i].p_hdr->e_num);
++		} else {
++			/* we've already was here, see at next index */
++			path[i].p_idx--;
++		}
++
++		ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
++				i, EXT_FIRST_INDEX(path[i].p_hdr),
++				path[i].p_idx);
++		if (ext3_ext_more_to_truncate(inode, path + i)) {
++			/* go to the next level */
++			ext_debug(inode, "move to level %d (block %d)\n", i+1,
++					path[i].p_idx->e_leaf);
++			memset(path + i + 1, 0, sizeof(*path));
++			path[i+1].p_bh = sb_bread(inode->i_sb,
++							path[i].p_idx->e_leaf);
++			if (!path[i+1].p_bh) {
++				/* should we reset i_size? */
++				err = -EIO;
++				break;
++			}
++			i++;
++		} else {
++			/* we finish processing this index, go up */
++			if (path[i].p_hdr->e_num == 0 && i > 0) {
++				/* index is empty, remove it
++				 * handle must be already prepared by the
++				 * truncate_leaf()
++				 */
++				err = ext3_ext_remove_index(handle, inode,
++								path + i);
++			}
++			/* root level have p_bh == NULL, brelse() eats this */
++			brelse(path[i].p_bh);
++			i--;
++			ext_debug(inode, "return to level %d\n", i);
++		}
++	}
++
++	/* TODO: flexible tree reduction should be here */
++	if (path->p_hdr->e_num == 0) {
++		/*
++		 * truncate to zero freed all the tree
++		 * so, we need to correct i_depth
++		 */
++		EXT3_I(inode)->i_depth = 0;
++		path->p_hdr->e_max = 0;
++		ext3_mark_inode_dirty(handle, inode);
++	}
++
++	kfree(path);
++
++	/* In a multi-transaction truncate, we only make the final
++	 * transaction synchronous */
++	if (IS_SYNC(inode))
++		handle->h_sync = 1;
++
++out_stop:
++	/*
++	 * If this was a simple ftruncate(), and the file will remain alive
++	 * then we need to clear up the orphan record which we created above.
++	 * However, if this was a real unlink then we were called by
++	 * ext3_delete_inode(), and we allow that function to clean up the
++	 * orphan info for us.
++	 */
++	if (inode->i_nlink)
++		ext3_orphan_del(handle, inode);
++
++	up(&EXT3_I(inode)->i_ext_sem);
++	ext3_journal_stop(handle, inode);
++}
++
++/*
++ * this routine calculate max number of blocks we could modify
++ * in order to allocate new block for an inode
++ */
++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num)
++{
++	struct ext3_inode_info *ei = EXT3_I(inode);
++	int depth = ei->i_depth + 1;
++	int needed;
++	
++	/*
++	 * the worste case we're expecting is creation of the
++	 * new root (growing in depth) with index splitting
++	 * for splitting we have to consider depth + 1 because
++	 * previous growing could increase it
++	 */
++
++	/* 
++	 * growing in depth:
++	 * block allocation + new root + old root
++	 */
++	needed = EXT3_ALLOC_NEEDED + 2;
++
++	/* index split. we may need:
++	 *   allocate intermediate indexes and new leaf
++	 *   change two blocks at each level, but root
++	 *   modify root block (inode)
++	 */
++	needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1;
++
++	/* caller want to allocate num blocks */
++	needed *= num;
++	
++#ifdef CONFIG_QUOTA
++	/* 
++	 * FIXME: real calculation should be here
++	 * it depends on blockmap format of qouta file
++	 */
++	needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
++#endif
++
++	return needed;
++}
++
++/*
++ * called at mount time
++ */
++void ext3_ext_init(struct super_block *sb)
++{
++	/*
++	 * possible initialization would be here
++	 */
++
++	if (test_opt(sb, EXTENTS))
++		printk("EXT3-fs: file extents enabled\n");
++	spin_lock_init(&EXT3_SB(sb)->s_ext_lock);
++}
++
++/*
++ * called at umount time
++ */
++void ext3_ext_release(struct super_block *sb)
++{
++	struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++	/* show collected stats */
++	if (sbi->s_ext_count && sbi->s_ext_extents)
++		printk("EXT3-fs: min depth - %d, max depth - %d, "
++				"ave. depth - %d, ave. blocks/extent - %d\n",
++				sbi->s_ext_mindepth,
++				sbi->s_ext_maxdepth,
++				sbi->s_ext_sum / sbi->s_ext_count,
++				sbi->s_ext_blocks / sbi->s_ext_extents);
++}
++
+diff -puN fs/ext3/ialloc.c~ext3-extents fs/ext3/ialloc.c
+--- linux-2.4.18-chaos/fs/ext3/ialloc.c~ext3-extents	2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/ialloc.c	2003-08-25 21:12:14.000000000 +0400
+@@ -571,6 +571,10 @@ repeat:
+ 	ei->i_prealloc_count = 0;
+ #endif
+ 	ei->i_block_group = i;
++	if (test_opt(sb, EXTENTS))
++		EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
++	ei->i_depth = 0;
++	sema_init(&ei->i_ext_sem, 1);
+ 
+ 	if (ei->i_flags & EXT3_SYNC_FL)
+ 		inode->i_flags |= S_SYNC;
+diff -puN fs/ext3/inode.c~ext3-extents fs/ext3/inode.c
+--- linux-2.4.18-chaos/fs/ext3/inode.c~ext3-extents	2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/inode.c	2003-08-25 20:09:59.000000000 +0400
+@@ -842,6 +842,15 @@ changed:
+ 	goto reread;
+ }
+ 
++static inline int
++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block,
++		struct buffer_head *bh, int create, int extend_disksize)
++{
++	if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++		return ext3_ext_get_block(handle, inode, block, bh, create, 1);
++	return ext3_get_block_handle(handle, inode, block, bh, create, 1);
++}
++
+ /*
+  * The BKL is not held on entry here.
+  */
+@@ -855,7 +864,7 @@ static int ext3_get_block(struct inode *
+ 		handle = ext3_journal_current_handle();
+ 		J_ASSERT(handle != 0);
+ 	}
+-	ret = ext3_get_block_handle(handle, inode, iblock,
++	ret = ext3_get_block_wrap(handle, inode, iblock,
+ 				bh_result, create, 1);
+ 	return ret;
+ }
+@@ -882,7 +891,7 @@ ext3_direct_io_get_block(struct inode *i
+ 		}
+ 	}
+ 	if (ret == 0)
+-		ret = ext3_get_block_handle(handle, inode, iblock,
++		ret = ext3_get_block_wrap(handle, inode, iblock,
+ 					bh_result, create, 0);
+ 	if (ret == 0)
+ 		bh_result->b_size = (1 << inode->i_blkbits);
+@@ -904,7 +913,7 @@ struct buffer_head *ext3_getblk(handle_t
+ 	dummy.b_state = 0;
+ 	dummy.b_blocknr = -1000;
+ 	buffer_trace_init(&dummy.b_history);
+-	*errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
++	*errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1);
+ 	if (!*errp && buffer_mapped(&dummy)) {
+ 		struct buffer_head *bh;
+ 		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+@@ -1520,7 +1529,7 @@ ext3_block_truncate_page_prepare(struct 
+  * This required during truncate. We need to physically zero the tail end
+  * of that block so it doesn't yield old data if the file is later grown.
+  */
+-static int ext3_block_truncate_page(handle_t *handle,
++int ext3_block_truncate_page(handle_t *handle,
+ 				    struct address_space *mapping, loff_t from,
+ 				    struct page *page, unsigned blocksize)
+ {
+@@ -2040,6 +2049,9 @@ void ext3_truncate(struct inode * inode)
+ 	 */
+ 	ei->i_disksize = inode->i_size;
+ 
++	if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++		return ext3_ext_truncate(inode);
++
+ 	/*
+ 	 * From here we block out all ext3_get_block() callers who want to
+ 	 * modify the block allocation tree.
+@@ -2436,6 +2448,8 @@ void ext3_read_inode(struct inode * inod
+ 	ei->i_prealloc_count = 0;
+ #endif
+ 	ei->i_block_group = iloc.block_group;
++	ei->i_depth = raw_inode->osd2.linux2.l_i_depth;
++	sema_init(&ei->i_ext_sem, 1);
+ 
+ 	/*
+ 	 * NOTE! The in-memory inode i_data array is in little-endian order
+@@ -2556,6 +2570,7 @@ static int ext3_do_update_inode(handle_t
+ 		raw_inode->i_fsize = 0;
+ 	}
+ #endif
++ 	raw_inode->osd2.linux2.l_i_depth = ei->i_depth;
+ 	raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
+ 	if (!S_ISREG(inode->i_mode)) {
+ 		raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
+@@ -2759,6 +2774,9 @@ int ext3_writepage_trans_blocks(struct i
+ 	int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
+ 	int ret;
+ 	
++	if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
++		return ext3_ext_writepage_trans_blocks(inode, bpp);
++
+ 	if (ext3_should_journal_data(inode))
+ 		ret = 3 * (bpp + indirects) + 2;
+ 	else
+diff -puN fs/ext3/Makefile~ext3-extents fs/ext3/Makefile
+--- linux-2.4.18-chaos/fs/ext3/Makefile~ext3-extents	2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/Makefile	2003-08-25 20:09:59.000000000 +0400
+@@ -12,7 +12,8 @@ O_TARGET := ext3.o
+ export-objs :=	ext3-exports.o
+ 
+ obj-y    := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+-		ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
++		ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o \
++		extents.o
+ obj-m    := $(O_TARGET)
+ 
+ include $(TOPDIR)/Rules.make
+diff -puN fs/ext3/super.c~ext3-extents fs/ext3/super.c
+--- linux-2.4.18-chaos/fs/ext3/super.c~ext3-extents	2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/super.c	2003-08-25 20:09:59.000000000 +0400
+@@ -619,6 +619,7 @@ void ext3_put_super (struct super_block 
+ 	kdev_t j_dev = sbi->s_journal->j_dev;
+ 	int i;
+ 
++	ext3_ext_release(sb);
+ 	ext3_stop_delete_thread(sbi);
+ 	ext3_xattr_put_super(sb);
+ 	journal_destroy(sbi->s_journal);
+@@ -741,6 +742,12 @@ static int parse_options (char * options
+ 		else
+ #endif
+ 
++		if (!strcmp (this_char, "extents"))
++			set_opt (sbi->s_mount_opt, EXTENTS);
++		else
++		if (!strcmp (this_char, "extdebug"))
++			set_opt (sbi->s_mount_opt, EXTDEBUG);
++		else
+ 		if (!strcmp (this_char, "bsddf"))
+ 			clear_opt (*mount_options, MINIX_DF);
+ 		else if (!strcmp (this_char, "nouid32")) {
+@@ -1711,6 +1718,8 @@ static int ext3_create_journal(struct su
+ 	/* Make sure we flush the recovery flag to disk. */
+ 	ext3_commit_super(sb, es, 1);
+ 
++	ext3_ext_init(sb);
++
+ 	return 0;
+ }
+ 
+diff -puN include/linux/ext3_fs.h~ext3-extents include/linux/ext3_fs.h
+--- linux-2.4.18-chaos/include/linux/ext3_fs.h~ext3-extents	2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs.h	2003-08-25 21:12:14.000000000 +0400
+@@ -183,6 +183,7 @@ struct ext3_group_desc
+ #define EXT3_IMAGIC_FL			0x00002000 /* AFS directory */
+ #define EXT3_JOURNAL_DATA_FL		0x00004000 /* file data should be journaled */
+ #define EXT3_RESERVED_FL		0x80000000 /* reserved for ext3 lib */
++#define EXT3_EXTENTS_FL			0x00080000 /* Inode uses extents */
+ 
+ #define EXT3_FL_USER_VISIBLE		0x00005FFF /* User visible flags */
+ #define EXT3_FL_USER_MODIFIABLE		0x000000FF /* User modifiable flags */
+@@ -243,7 +244,7 @@ struct ext3_inode {
+ 		struct {
+ 			__u8	l_i_frag;	/* Fragment number */
+ 			__u8	l_i_fsize;	/* Fragment size */
+-			__u16	i_pad1;
++			__u16	l_i_depth;
+ 			__u16	l_i_uid_high;	/* these 2 fields    */
+ 			__u16	l_i_gid_high;	/* were reserved2[0] */
+ 			__u32	l_i_reserved2;
+@@ -324,6 +325,8 @@ struct ext3_inode {
+ #define EXT3_MOUNT_IOPEN		0x8000	/* Allow access via iopen */
+ #define EXT3_MOUNT_IOPEN_NOPRIV		0x10000	/* Make iopen world-readable */
+ #define EXT3_MOUNT_ASYNCDEL		0x20000	/* Delayed deletion */
++#define EXT3_MOUNT_EXTENTS		0x40000	/* Extents support */
++#define EXT3_MOUNT_EXTDEBUG		0x80000	/* Extents debug */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+@@ -663,6 +666,12 @@ extern void ext3_discard_prealloc (struc
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++extern int ext3_block_truncate_page(handle_t *handle,
++				    struct address_space *mapping, loff_t from,
++				    struct page *page, unsigned blocksize);
++extern int ext3_forget(handle_t *handle, int is_metadata,
++		       struct inode *inode, struct buffer_head *bh,
++		       int blocknr);
+ #ifdef EXT3_DELETE_THREAD
+ extern void ext3_truncate_thread(struct inode *inode);
+ #endif
+@@ -722,6 +731,13 @@ extern struct inode_operations ext3_dir_
+ /* symlink.c */
+ extern struct inode_operations ext3_fast_symlink_inode_operations;
+ 
++/* extents.c */
++extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
++extern int ext3_ext_get_block(handle_t *, struct inode *, long,
++				struct buffer_head *, int, int);
++extern void ext3_ext_truncate(struct inode *);
++extern void ext3_ext_init(struct super_block *);
++extern void ext3_ext_release(struct super_block *);
+ 
+ #endif	/* __KERNEL__ */
+ 
+diff -puN include/linux/ext3_fs_i.h~ext3-extents include/linux/ext3_fs_i.h
+--- linux-2.4.18-chaos/include/linux/ext3_fs_i.h~ext3-extents	2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs_i.h	2003-08-25 20:09:59.000000000 +0400
+@@ -73,6 +73,10 @@ struct ext3_inode_info {
+ 	 * by other means, so we have truncate_sem.
+ 	 */
+ 	struct rw_semaphore truncate_sem;
++
++	/* extents-related data */
++	struct semaphore i_ext_sem;
++	__u16 i_depth;
+ };
+ 
+ #endif	/* _LINUX_EXT3_FS_I */
+diff -puN include/linux/ext3_fs_sb.h~ext3-extents include/linux/ext3_fs_sb.h
+--- linux-2.4.18-chaos/include/linux/ext3_fs_sb.h~ext3-extents	2003-08-25 20:09:59.000000000 +0400
++++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs_sb.h	2003-08-25 20:09:59.000000000 +0400
+@@ -84,6 +84,16 @@ struct ext3_sb_info {
+ 	wait_queue_head_t s_delete_thread_queue;
+ 	wait_queue_head_t s_delete_waiter_queue;
+ #endif
++
++	/* extents */
++	int s_ext_debug;
++	int s_ext_mindepth;
++	int s_ext_maxdepth;
++	int s_ext_sum;
++	int s_ext_count;
++	spinlock_t s_ext_lock;
++	int s_ext_extents;
++	int s_ext_blocks;
+ };
+ 
+ #endif	/* _LINUX_EXT3_FS_SB */
+
+_
diff --git a/lustre/kernel_patches/patches/ext3-extents-oflag-2.4.18-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-oflag-2.4.18-chaos.patch
new file mode 100644
index 0000000..c12e397
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-extents-oflag-2.4.18-chaos.patch
@@ -0,0 +1,291 @@
+ fs/ext3/ialloc.c            |    5 +++--
+ fs/ext3/inode.c             |    2 +-
+ fs/ext3/namei.c             |   38 ++++++++++++++++++++++++++++++++++----
+ include/asm-alpha/fcntl.h   |    1 +
+ include/asm-arm/fcntl.h     |    1 +
+ include/asm-cris/fcntl.h    |    1 +
+ include/asm-i386/fcntl.h    |    1 +
+ include/asm-ia64/fcntl.h    |    1 +
+ include/asm-m68k/fcntl.h    |    1 +
+ include/asm-mips/fcntl.h    |    1 +
+ include/asm-mips64/fcntl.h  |    1 +
+ include/asm-parisc/fcntl.h  |    1 +
+ include/asm-ppc/fcntl.h     |    1 +
+ include/asm-s390/fcntl.h    |    1 +
+ include/asm-s390x/fcntl.h   |    1 +
+ include/asm-sh/fcntl.h      |    1 +
+ include/asm-sparc/fcntl.h   |    1 +
+ include/asm-sparc64/fcntl.h |    1 +
+ include/linux/ext3_fs.h     |    2 +-
+ 19 files changed, 54 insertions(+), 8 deletions(-)
+
+--- linux-2.4.18/fs/ext3/ialloc.c~ext3-extents-oflag-2.4.18-chaos	2003-09-08 23:12:48.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/ialloc.c	2003-09-08 23:12:56.000000000 +0400
+@@ -331,7 +331,8 @@ int ext3_itable_block_used(struct super_
+  */
+ struct inode * ext3_new_inode (handle_t *handle,
+ 				const struct inode * dir, int mode,
+-				unsigned long goal)
++				unsigned long goal,
++				struct lookup_intent *it)
+ {
+ 	struct super_block * sb;
+ 	struct buffer_head * bh;
+@@ -573,7 +574,7 @@ repeat:
+ 	ei->i_prealloc_count = 0;
+ #endif
+ 	ei->i_block_group = i;
+-	if (test_opt(sb, EXTENTS))
++	if (test_opt(sb, EXTENTS) && it && (it->it_flags & O_EXTENTS))
+ 		EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
+ 	ei->i_depth = 0;
+ 	sema_init(&ei->i_ext_sem, 1);
+--- linux-2.4.18/fs/ext3/namei.c~ext3-extents-oflag-2.4.18-chaos	2003-09-08 23:12:28.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/namei.c	2003-09-08 23:12:56.000000000 +0400
+@@ -1225,7 +1225,36 @@ static int ext3_create (struct inode * d
+ 		handle->h_sync = 1;
+ 
+ 	inode = ext3_new_inode (handle, dir, mode,
+-				(unsigned long)dentry->d_fsdata);
++				(unsigned long)dentry->d_fsdata, NULL);
++	err = PTR_ERR(inode);
++	if (!IS_ERR(inode)) {
++		inode->i_op = &ext3_file_inode_operations;
++		inode->i_fop = &ext3_file_operations;
++		inode->i_mapping->a_ops = &ext3_aops;
++		err = ext3_add_nondir(handle, dentry, inode);
++		ext3_mark_inode_dirty(handle, inode);
++	}
++	ext3_journal_stop(handle, dir);
++	return err;
++}
++
++static int ext3_create_it (struct inode * dir, struct dentry * dentry, int mode,
++				struct lookup_intent *it)
++{
++	handle_t *handle; 
++	struct inode * inode;
++	int err;
++
++	handle = ext3_journal_start(dir, EXT3_DATA_TRANS_BLOCKS +
++					EXT3_INDEX_EXTRA_TRANS_BLOCKS + 3);
++	if (IS_ERR(handle))
++		return PTR_ERR(handle);
++
++	if (IS_SYNC(dir))
++		handle->h_sync = 1;
++
++	inode = ext3_new_inode (handle, dir, mode,
++				(unsigned long)dentry->d_fsdata, it);
+ 	err = PTR_ERR(inode);
+ 	if (!IS_ERR(inode)) {
+ 		inode->i_op = &ext3_file_inode_operations;
+@@ -1254,7 +1283,7 @@ static int ext3_mknod (struct inode * di
+ 		handle->h_sync = 1;
+ 
+ 	inode = ext3_new_inode (handle, dir, mode,
+-				(unsigned long)dentry->d_fsdata);
++				(unsigned long)dentry->d_fsdata, NULL);
+ 	err = PTR_ERR(inode);
+ 	if (!IS_ERR(inode)) {
+ 		init_special_inode(inode, mode, rdev);
+@@ -1285,7 +1314,7 @@ static int ext3_mkdir(struct inode * dir
+ 		handle->h_sync = 1;
+ 
+ 	inode = ext3_new_inode (handle, dir, S_IFDIR | mode,
+-				(unsigned long)dentry->d_fsdata);
++				(unsigned long)dentry->d_fsdata, NULL);
+ 	err = PTR_ERR(inode);
+ 	if (IS_ERR(inode))
+ 		goto out_stop;
+@@ -1678,7 +1707,7 @@ static int ext3_symlink (struct inode * 
+ 		handle->h_sync = 1;
+ 
+ 	inode = ext3_new_inode (handle, dir, S_IFLNK|S_IRWXUGO,
+-				(unsigned long)dentry->d_fsdata);
++				(unsigned long)dentry->d_fsdata, NULL);
+ 	err = PTR_ERR(inode);
+ 	if (IS_ERR(inode))
+ 		goto out_stop;
+@@ -1882,6 +1911,7 @@ end_rename:
+  * directories can handle most operations...
+  */
+ struct inode_operations ext3_dir_inode_operations = {
++	create_it:	ext3_create_it,		/* BKL held */
+ 	create:		ext3_create,		/* BKL held */
+ 	lookup:		ext3_lookup,		/* BKL held */
+ 	link:		ext3_link,		/* BKL held */
+--- linux-2.4.18/include/asm-alpha/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:07.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-alpha/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -22,6 +22,7 @@
+ #define O_LARGEFILE	0400000 /* will be set by the kernel on every open */
+ #define O_ATOMICLOOKUP	01000000 /* do atomic file lookup */
+ #define O_DIRECT	02000000 /* direct disk access - should check with OSF/1 */
++#define O_EXTENTS	04000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/asm-arm/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:07.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-arm/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -21,6 +21,7 @@
+ #define O_DIRECT	0200000	/* direct disk access hint - currently ignored */
+ #define O_LARGEFILE	0400000
+ #define O_ATOMICLOOKUP 01000000
++#define O_EXTENTS	02000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/asm-cris/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2001-02-09 03:32:44.000000000 +0300
++++ linux-2.4.18-alexey/include/asm-cris/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -22,6 +22,7 @@
+ #define O_LARGEFILE	0100000
+ #define O_DIRECTORY	0200000	/* must be a directory */
+ #define O_NOFOLLOW	0400000 /* don't follow links */
++#define O_EXTENTS	01000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get f_flags */
+--- linux-2.4.18/include/asm-i386/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:09.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-i386/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -21,6 +21,7 @@
+ #define O_DIRECTORY	0200000	/* must be a directory */
+ #define O_NOFOLLOW	0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP	01000000 /* do atomic file lookup */
++#define O_EXTENTS	02000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/asm-ia64/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:09.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-ia64/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -29,6 +29,7 @@
+ #define O_DIRECTORY	0200000	/* must be a directory */
+ #define O_NOFOLLOW	0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP  01000000 /* do atomic file lookup */
++#define O_EXTENTS	02000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/asm-m68k/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2000-11-28 05:00:49.000000000 +0300
++++ linux-2.4.18-alexey/include/asm-m68k/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -20,6 +20,7 @@
+ #define O_NOFOLLOW	0100000	/* don't follow links */
+ #define O_DIRECT	0200000	/* direct disk access hint - currently ignored */
+ #define O_LARGEFILE	0400000
++#define O_EXTENTS	01000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/asm-mips64/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-mips64/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -27,6 +27,7 @@
+ #define O_DIRECTORY	0x10000	/* must be a directory */
+ #define O_NOFOLLOW	0x20000	/* don't follow links */
+ #define O_ATOMICLOOKUP	0x40000
++#define O_EXTENTS	0x80000 /* create file with extents if possible */
+ 
+ #define O_NDELAY	O_NONBLOCK
+ 
+--- linux-2.4.18/include/asm-mips/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:14.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-mips/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -27,6 +27,7 @@
+ #define O_DIRECTORY	0x10000	/* must be a directory */
+ #define O_NOFOLLOW	0x20000	/* don't follow links */
+ #define O_ATOMICLOOKUP	0x40000
++#define O_EXTENTS	02000000 /* create file with extents if possible */
+ 
+ #define O_NDELAY	O_NONBLOCK
+ 
+--- linux-2.4.18/include/asm-parisc/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2000-12-05 23:29:39.000000000 +0300
++++ linux-2.4.18-alexey/include/asm-parisc/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -19,6 +19,7 @@
+ #define O_NOCTTY	00400000 /* not fcntl */
+ #define O_DSYNC		01000000 /* HPUX only */
+ #define O_RSYNC		02000000 /* HPUX only */
++#define O_EXTENTS	04000000 /* create file with extents if possible */
+ 
+ #define FASYNC		00020000 /* fcntl, for BSD compatibility */
+ #define O_DIRECT	00040000 /* direct disk access hint - currently ignored */
+--- linux-2.4.18/include/asm-ppc/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-ppc/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -24,6 +24,7 @@
+ #define O_LARGEFILE     0200000
+ #define O_DIRECT	0400000	/* direct disk access hint */
+ #define O_ATOMICLOOKUP 01000000	/* do atomic file lookup */
++#define O_EXTENT	02000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/asm-s390/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-s390/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -28,6 +28,7 @@
+ #define O_DIRECTORY	0200000	/* must be a directory */
+ #define O_NOFOLLOW	0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP	01000000 /* do atomic file lookup */
++#define O_EXTENTS	02000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/asm-s390x/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-s390x/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -28,6 +28,7 @@
+ #define O_DIRECTORY	0200000	/* must be a directory */
+ #define O_NOFOLLOW	0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP 01000000	/* do atomic file lookup */
++#define O_EXTENTS	02000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/asm-sh/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:15.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-sh/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -21,6 +21,7 @@
+ #define O_DIRECTORY	0200000	/* must be a directory */
+ #define O_NOFOLLOW	0400000 /* don't follow links */
+ #define O_ATOMICLOOKUP  01000000
++#define O_EXTENTS	02000000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/asm-sparc64/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:16.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-sparc64/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -22,6 +22,7 @@
+ #define O_LARGEFILE	0x40000
+ #define O_ATOMICLOOKUP	0x80000 /* do atomic file lookup */
+ #define O_DIRECT        0x100000 /* direct disk access hint */
++#define O_EXTENTS	0x200000 /* create file with extents if possible */
+ 
+ 
+ #define F_DUPFD		0	/* dup */
+--- linux-2.4.18/include/asm-sparc/fcntl.h~ext3-extents-oflag-2.4.18-chaos	2003-07-28 17:52:16.000000000 +0400
++++ linux-2.4.18-alexey/include/asm-sparc/fcntl.h	2003-09-08 23:12:56.000000000 +0400
+@@ -22,6 +22,7 @@
+ #define O_LARGEFILE	0x40000
+ #define O_ATOMICLOOKUP	0x80000 /* do atomic file lookup */
+ #define O_DIRECT        0x100000 /* direct disk access hint */
++#define O_EXTENTS	0x200000 /* create file with extents if possible */
+ 
+ #define F_DUPFD		0	/* dup */
+ #define F_GETFD		1	/* get close_on_exec */
+--- linux-2.4.18/include/linux/ext3_fs.h~ext3-extents-oflag-2.4.18-chaos	2003-09-08 23:12:48.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs.h	2003-09-08 23:12:56.000000000 +0400
+@@ -641,7 +641,7 @@ extern int ext3_sync_file (struct file *
+ 
+ /* ialloc.c */
+ extern struct inode * ext3_new_inode (handle_t *, const struct inode *, int,
+-				      unsigned long);
++				      unsigned long, struct lookup_intent *);
+ extern void ext3_free_inode (handle_t *, struct inode *);
+ extern struct inode * ext3_orphan_get (struct super_block *, unsigned long);
+ extern unsigned long ext3_count_free_inodes (struct super_block *);
+--- linux-2.4.18/fs/ext3/inode.c~ext3-extents-oflag-2.4.18-chaos	2003-09-08 23:12:48.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/inode.c	2003-09-08 23:13:15.000000000 +0400
+@@ -2204,7 +2204,7 @@ void ext3_truncate_thread(struct inode *
+ 	if (IS_ERR(handle))
+ 		goto out_truncate;
+ 
+-	new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0);
++	new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode, 0, 0);
+ 	if (IS_ERR(new_inode)) {
+ 		ext3_debug("truncate inode %lu directly (no new inodes)\n",
+ 			   old_inode->i_ino);
+
+_
diff --git a/lustre/kernel_patches/patches/ext3-map_inode_page-2.6.0.patch b/lustre/kernel_patches/patches/ext3-map_inode_page-2.6.0.patch
new file mode 100644
index 0000000..4695c4f
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-map_inode_page-2.6.0.patch
@@ -0,0 +1,76 @@
+ fs/ext3/inode.c |   52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
+ fs/ext3/super.c |    3 +++
+ 2 files changed, 55 insertions(+)
+
+--- linux-2.6.0-test3/fs/ext3/inode.c~ext3-map_inode_page-2.6.0	2003-09-02 14:48:43.000000000 +0400
++++ linux-2.6.0-test3-alexey/fs/ext3/inode.c	2003-09-08 17:50:16.000000000 +0400
+@@ -3129,3 +3129,55 @@ int ext3_prep_san_write(struct inode *in
+ 		ret = ret2;
+ 	return ret;
+ }
++
++int ext3_map_inode_page(struct inode *inode, struct page *page,
++                        unsigned long *blocks, int *created, int create)
++{
++        unsigned int blocksize, blocks_per_page;
++        unsigned long iblock;
++        struct buffer_head dummy;
++        void *handle;
++        int i, rc = 0, failed = 0, needed_blocks;
++
++        blocksize = inode->i_sb->s_blocksize;
++        blocks_per_page = PAGE_SIZE >> inode->i_sb->s_blocksize_bits;
++        iblock = page->index >> (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
++
++        for (i = 0; i < blocks_per_page; i++, iblock++) {
++                blocks[i] = ext3_bmap(inode->i_mapping, iblock);
++                if (blocks[i] == 0) {
++                        failed++;
++                        created[i] = -1;
++                } else {
++                        created[i] = 0;
++                }
++        }
++
++        if (failed == 0 || create == 0)
++                return 0;
++
++        needed_blocks = ext3_writepage_trans_blocks(inode) * failed;
++        handle = ext3_journal_start(inode, needed_blocks);
++        if (IS_ERR(handle))
++                return PTR_ERR(handle);
++
++        iblock = page->index >> (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
++        for (i = 0; i < blocks_per_page; i++, iblock++) {
++                if (blocks[i] != 0)
++                        continue;
++
++                rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1, 1);
++                if (rc) {
++                        printk(KERN_INFO "ext3_map_inode_page: error reading "
++                               "block %ld\n", iblock);
++                        goto out;
++                }
++                blocks[i] = dummy.b_blocknr;
++                created[i] = 1;
++        }
++
++ out:
++	ext3_journal_stop(handle);
++        return rc;
++}
++
+--- linux-2.6.0-test3/fs/ext3/super.c~ext3-map_inode_page-2.6.0	2003-09-02 14:48:43.000000000 +0400
++++ linux-2.6.0-test3-alexey/fs/ext3/super.c	2003-09-08 17:48:33.000000000 +0400
+@@ -2094,6 +2094,9 @@ static void __exit exit_ext3_fs(void)
+ int ext3_prep_san_write(struct inode *inode, long *blocks,
+                         int nblocks, loff_t newsize);
+ EXPORT_SYMBOL(ext3_prep_san_write);
++int ext3_map_inode_page(struct inode *inode, struct page *page,
++                        unsigned long *blocks, int *created, int create)
++EXPORT_SYMBOL(ext3_map_inode_page);
+ 
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+
+_
diff --git a/lustre/kernel_patches/patches/ext3-no-write-super-chaos.patch b/lustre/kernel_patches/patches/ext3-no-write-super-chaos.patch
new file mode 100644
index 0000000..37a5d7a
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-no-write-super-chaos.patch
@@ -0,0 +1,15 @@
+ fs/ext3/super.c |    1 -
+ 1 files changed, 1 deletion(-)
+
+--- linux-2.4.18-chaos/fs/ext3/super.c~ext3-no-write-super-chaos	2003-08-24 21:34:53.000000000 +0400
++++ linux-2.4.18-chaos-alexey/fs/ext3/super.c	2003-08-24 21:40:47.000000000 +0400
+@@ -1818,7 +1818,6 @@ void ext3_write_super (struct super_bloc
+ 	if (down_trylock(&sb->s_lock) == 0)
+ 		BUG();
+ 	sb->s_dirt = 0;
+-	log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+ }
+ 
+ static int ext3_sync_fs(struct super_block *sb)
+
+_
diff --git a/lustre/kernel_patches/patches/ext3-o_direct-1.2.4.20-rh.patch b/lustre/kernel_patches/patches/ext3-o_direct-1.2.4.20-rh.patch
new file mode 100644
index 0000000..f0b7d7e
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-o_direct-1.2.4.20-rh.patch
@@ -0,0 +1,197 @@
+
+Index: linux-2.4.20-rh/fs/ext3/inode.c
+===================================================================
+--- linux-2.4.20-rh.orig/fs/ext3/inode.c	2003-09-04 18:01:41.000000000 +0800
++++ linux-2.4.20-rh/fs/ext3/inode.c	2003-09-04 18:18:54.000000000 +0800
+@@ -27,6 +27,7 @@
+ #include <linux/ext3_jbd.h>
+ #include <linux/jbd.h>
+ #include <linux/locks.h>
++#include <linux/iobuf.h>
+ #include <linux/smp_lock.h>
+ #include <linux/highuid.h>
+ #include <linux/quotaops.h>
+@@ -743,9 +744,9 @@
+  * The BKL may not be held on entry here.  Be sure to take it early.
+  */
+ 
+-static int ext3_get_block_handle(handle_t *handle, struct inode *inode, 
+-				 long iblock,
+-				 struct buffer_head *bh_result, int create)
++static int
++ext3_get_block_handle(handle_t *handle, struct inode *inode, long iblock,
++		struct buffer_head *bh_result, int create, int extend_disksize)
+ {
+ 	int err = -EIO;
+ 	int offsets[4];
+@@ -825,15 +826,18 @@
+ 	if (err)
+ 		goto cleanup;
+ 
+-	new_size = inode->i_size;
+-	/*
+-	 * This is not racy against ext3_truncate's modification of i_disksize
+-	 * because VM/VFS ensures that the file cannot be extended while
+-	 * truncate is in progress.  It is racy between multiple parallel
+-	 * instances of get_block, but we have the BKL.
+-	 */
+-	if (new_size > inode->u.ext3_i.i_disksize)
+-		inode->u.ext3_i.i_disksize = new_size;
++ 	if (extend_disksize) {
++ 		/*
++ 		 * This is not racy against ext3_truncate's modification of
++ 		 * i_disksize because VM/VFS ensures that the file cannot be
++ 		 * extended while truncate is in progress.  It is racy between
++ 		 * multiple parallel instances of get_block, but we have BKL.
++ 		 */
++ 		struct ext3_inode_info *ei = EXT3_I(inode);
++ 		new_size = inode->i_size;
++ 		if (new_size > ei->i_disksize)
++ 			ei->i_disksize = new_size;
++ 	}
+ 
+ 	bh_result->b_state |= (1UL << BH_New);
+ 	goto got_it;
+@@ -861,7 +865,38 @@
+ 		handle = ext3_journal_current_handle();
+ 		J_ASSERT(handle != 0);
+ 	}
+-	ret = ext3_get_block_handle(handle, inode, iblock, bh_result, create);
++	ret = ext3_get_block_handle(handle, inode, iblock,
++				bh_result, create, 1);
++	return ret;
++}
++
++#define DIO_CREDITS (EXT3_RESERVE_TRANS_BLOCKS + 32)
++
++static int
++ext3_direct_io_get_block(struct inode *inode, long iblock,
++		struct buffer_head *bh_result, int create)
++{
++	handle_t *handle = journal_current_handle();
++	int ret = 0;
++
++	lock_kernel();
++	if (handle && handle->h_buffer_credits <= EXT3_RESERVE_TRANS_BLOCKS) {
++		/*
++		 * Getting low on buffer credits...
++		 */
++		if (!ext3_journal_extend(handle, DIO_CREDITS)) {
++			/*
++			 * Couldn't extend the transaction.  Start a new one
++			 */
++			ret = ext3_journal_restart(handle, DIO_CREDITS);
++		}
++	}
++	if (ret == 0)
++		ret = ext3_get_block_handle(handle, inode, iblock,
++					bh_result, create, 0);
++	if (ret == 0)
++		bh_result->b_size = (1 << inode->i_blkbits);
++	unlock_kernel();
+ 	return ret;
+ }
+ 
+@@ -879,7 +914,7 @@
+ 	dummy.b_state = 0;
+ 	dummy.b_blocknr = -1000;
+ 	buffer_trace_init(&dummy.b_history);
+-	*errp = ext3_get_block_handle(handle, inode, block, &dummy, create);
++	*errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
+ 	if (!*errp && buffer_mapped(&dummy)) {
+ 		struct buffer_head *bh;
+ 		bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
+@@ -1387,6 +1422,67 @@
+ 	return journal_try_to_free_buffers(journal, page, wait);
+ }
+ 
++static int
++ext3_direct_IO(int rw, struct inode *inode, struct kiobuf *iobuf,
++		unsigned long blocknr, int blocksize)
++{
++	struct ext3_inode_info *ei = EXT3_I(inode);
++	handle_t *handle = NULL;
++	int ret;
++	int orphan = 0;
++	loff_t offset = blocknr << inode->i_blkbits;	/* ugh */
++	ssize_t count = iobuf->length;			/* ditto */
++
++	if (rw == WRITE) {
++		loff_t final_size = offset + count;
++
++		lock_kernel();
++		handle = ext3_journal_start(inode, DIO_CREDITS);
++		unlock_kernel();
++		if (IS_ERR(handle)) {
++			ret = PTR_ERR(handle);
++			goto out;
++		}
++		if (final_size > inode->i_size) {
++			lock_kernel();
++			ret = ext3_orphan_add(handle, inode);
++			unlock_kernel();
++			if (ret)
++				goto out_stop;
++			orphan = 1;
++			ei->i_disksize = inode->i_size;
++		}
++	}
++
++	ret = generic_direct_IO(rw, inode, iobuf, blocknr,
++				blocksize, ext3_direct_io_get_block);
++
++out_stop:
++	if (handle) {
++		int err;
++
++		lock_kernel();
++		if (orphan) 
++			ext3_orphan_del(handle, inode);
++		if (orphan && ret > 0) {
++			loff_t end = offset + ret;
++			if (end > inode->i_size) {
++				ei->i_disksize = end;
++				inode->i_size = end;
++				err = ext3_mark_inode_dirty(handle, inode);
++				if (!ret) 
++					ret = err;
++			}
++		}
++		err = ext3_journal_stop(handle, inode);
++		if (ret == 0)
++			ret = err;
++		unlock_kernel();
++	}
++out:
++	return ret;
++
++}
+ 
+ struct address_space_operations ext3_aops = {
+ 	readpage:	ext3_readpage,		/* BKL not held.  Don't need */
+@@ -1397,6 +1493,7 @@
+ 	bmap:		ext3_bmap,		/* BKL held */
+ 	flushpage:	ext3_flushpage,		/* BKL not held.  Don't need */
+ 	releasepage:	ext3_releasepage,	/* BKL not held.  Don't need */
++	direct_IO:	ext3_direct_IO,		/* BKL not held.  Don't need */
+ };
+ 
+ /*
+@@ -2970,7 +3067,7 @@
+ 	/* alloc blocks one by one */
+ 	for (i = 0; i < nblocks; i++) {
+ 		ret = ext3_get_block_handle(handle, inode, blocks[i],
+-						&bh_tmp, 1);
++						&bh_tmp, 1, 1);
+ 		if (ret)
+ 			break;
+ 
+@@ -3030,7 +3127,7 @@
+                 if (blocks[i] != 0)
+                         continue;
+ 
+-                rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1);
++                rc = ext3_get_block_handle(handle, inode, iblock, &dummy, 1, 1);
+                 if (rc) {
+                         printk(KERN_INFO "ext3_map_inode_page: error reading "
+                                "block %ld\n", iblock);
diff --git a/lustre/kernel_patches/patches/ext3-pdirops-2.4.18-chaos.patch b/lustre/kernel_patches/patches/ext3-pdirops-2.4.18-chaos.patch
new file mode 100644
index 0000000..f8f514b
--- /dev/null
+++ b/lustre/kernel_patches/patches/ext3-pdirops-2.4.18-chaos.patch
@@ -0,0 +1,1238 @@
+ fs/ext3/ialloc.c          |    3 
+ fs/ext3/inode.c           |    3 
+ fs/ext3/namei.c           |  582 +++++++++++++++++++++++++++++++++++++---------
+ fs/ext3/super.c           |   14 +
+ include/linux/ext3_fs.h   |    1 
+ include/linux/ext3_fs_i.h |    6 
+ 6 files changed, 500 insertions(+), 109 deletions(-)
+
+--- linux-2.4.18/fs/ext3/namei.c~ext3-pdirops-2.4.18-chaos	2003-09-01 14:58:06.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/namei.c	2003-09-02 11:46:15.000000000 +0400
+@@ -52,6 +52,9 @@ static struct buffer_head *ext3_append(h
+ {
+ 	struct buffer_head *bh;
+ 
++	/* with parallel dir operations all appends
++	 * have to be serialized -bzzz */
++	down(&EXT3_I(inode)->i_append_sem);
+ 	*block = inode->i_size >> inode->i_sb->s_blocksize_bits;
+ 
+ 	if ((bh = ext3_bread(handle, inode, *block, 1, err))) {
+@@ -59,6 +62,8 @@ static struct buffer_head *ext3_append(h
+ 		EXT3_I(inode)->i_disksize = inode->i_size;
+ 		ext3_journal_get_write_access(handle,bh);
+ 	}
++	up(&EXT3_I(inode)->i_append_sem);
++	
+ 	return bh;
+ }
+ 
+@@ -135,6 +140,8 @@ struct dx_frame
+ 	struct buffer_head *bh;
+ 	struct dx_entry *entries;
+ 	struct dx_entry *at;
++	unsigned long leaf;
++	unsigned int curidx;
+ };
+ 
+ struct dx_map_entry
+@@ -143,6 +150,30 @@ struct dx_map_entry
+ 	u32 offs;
+ };
+ 
++/* FIXME: this should be reworked using bb_spin_lock
++ * introduced in -mm tree
++ */
++#define BH_DXLock	25
++
++static inline void dx_lock_bh(struct buffer_head volatile *bh)
++{
++#ifdef CONFIG_SMP
++        while (test_and_set_bit(BH_DXLock, &bh->b_state)) {
++                while (test_bit(BH_DXLock, &bh->b_state))
++                        cpu_relax();
++        }
++#endif
++}
++
++static inline void dx_unlock_bh(struct buffer_head *bh)
++{
++#ifdef CONFIG_SMP
++        smp_mb__before_clear_bit();
++        clear_bit(BH_DXLock, &bh->b_state);
++#endif
++}
++
++
+ #ifdef CONFIG_EXT3_INDEX
+ static inline unsigned dx_get_block (struct dx_entry *entry);
+ static void dx_set_block (struct dx_entry *entry, unsigned value);
+@@ -154,7 +185,7 @@ static void dx_set_count (struct dx_entr
+ static void dx_set_limit (struct dx_entry *entries, unsigned value);
+ static unsigned dx_root_limit (struct inode *dir, unsigned infosize);
+ static unsigned dx_node_limit (struct inode *dir);
+-static struct dx_frame *dx_probe(struct dentry *dentry,
++static struct dx_frame *dx_probe(struct qstr *name,
+ 				 struct inode *dir,
+ 				 struct dx_hash_info *hinfo,
+ 				 struct dx_frame *frame,
+@@ -166,15 +197,18 @@ static void dx_sort_map(struct dx_map_en
+ static struct ext3_dir_entry_2 *dx_move_dirents (char *from, char *to,
+ 		struct dx_map_entry *offsets, int count);
+ static struct ext3_dir_entry_2* dx_pack_dirents (char *base, int size);
+-static void dx_insert_block (struct dx_frame *frame, u32 hash, u32 block);
++static void dx_insert_block (struct inode *, struct dx_frame *, u32, u32, u32);
+ static int ext3_htree_next_block(struct inode *dir, __u32 hash,
+ 				 struct dx_frame *frame,
+ 				 struct dx_frame *frames, int *err,
+ 				 __u32 *start_hash);
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+-		       struct ext3_dir_entry_2 **res_dir, int *err);
++		       struct ext3_dir_entry_2 **res_dir, int *err,
++		       int rwlock, void **lock);
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ 			     struct inode *inode);
++static inline void *ext3_lock_htree(struct inode *, unsigned long, int);
++static inline void ext3_unlock_htree(struct inode *, void *);
+ 
+ /*
+  * Future: use high four bits of block for coalesce-on-delete flags
+@@ -307,6 +341,94 @@ struct stats dx_show_entries(struct dx_h
+ #endif /* DX_DEBUG */
+ 
+ /*
++ * dx_find_position
++ *
++ * search position of specified hash in index
++ *
++ */
++
++struct dx_entry * dx_find_position(struct dx_entry * entries, u32 hash)
++{
++	struct dx_entry *p, *q, *m;
++	int count;
++
++	count = dx_get_count(entries);
++	p = entries + 1;
++	q = entries + count - 1;
++	while (p <= q)
++	{
++		m = p + (q - p)/2;
++		if (dx_get_hash(m) > hash)
++			q = m - 1;
++		else
++			p = m + 1;
++	}
++	return p - 1;
++}
++
++/*
++ * returns 1 if path is unchanged
++ */
++int dx_check_path(struct dx_frame *frame, u32 hash)
++{
++	struct dx_entry *p;
++	int ret = 1;
++
++	dx_lock_bh(frame->bh);
++	p = dx_find_position(frame->entries, hash);
++	if (frame->leaf != dx_get_block(p))
++		ret = 0;
++	dx_unlock_bh(frame->bh);
++	
++	return ret;
++}
++
++/*
++ * 0 - changed
++ * 1 - hasn't changed
++ */
++static int
++dx_check_full_path(struct dx_frame *frames, struct dx_hash_info *hinfo)
++{
++	struct dx_entry *p;
++	struct dx_frame *frame = frames;
++	u32 leaf;
++
++	/* check first level */
++	dx_lock_bh(frame->bh);
++	p = dx_find_position(frame->entries, hinfo->hash);
++	leaf = dx_get_block(p);
++	dx_unlock_bh(frame->bh);
++	
++	if (leaf != frame->leaf) 
++		return 0;
++	
++	/* is there 2nd level? */
++	frame++;
++	if (frame->bh == NULL)
++		return 1;
++
++	/* check second level */
++	dx_lock_bh(frame->bh);
++
++	/* probably 1st level got changed, check it */
++	if (!dx_check_path(frames, hinfo->hash)) {
++		/* path changed */
++		dx_unlock_bh(frame->bh);
++		return 0;
++	}
++
++	p = dx_find_position(frame->entries, hinfo->hash);
++	leaf = dx_get_block(p);
++	dx_unlock_bh(frame->bh);
++	
++	if (leaf != frame->leaf)
++		return 0;
++
++	return 1;
++}
++
++/*
+  * Probe for a directory leaf block to search.
+  *
+  * dx_probe can return ERR_BAD_DX_DIR, which means there was a format
+@@ -316,19 +438,20 @@ struct stats dx_show_entries(struct dx_h
+  * back to userspace.
+  */
+ static struct dx_frame *
+-dx_probe(struct dentry *dentry, struct inode *dir,
++dx_probe(struct qstr *name, struct inode *dir,
+ 	 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err)
+ {
+-	unsigned count, indirect;
+-	struct dx_entry *at, *entries, *p, *q, *m;
++	unsigned indirect;
++	struct dx_entry *at, *entries;
+ 	struct dx_root *root;
+ 	struct buffer_head *bh;
+ 	struct dx_frame *frame = frame_in;
+ 	u32 hash;
++	unsigned int curidx;
+ 
+ 	frame->bh = NULL;
+-	if (dentry)
+-		dir = dentry->d_parent->d_inode;
++	frame[1].bh = NULL;
++
+ 	if (!(bh = ext3_bread (NULL,dir, 0, 0, err)))
+ 		goto fail;
+ 	root = (struct dx_root *) bh->b_data;
+@@ -344,8 +467,8 @@ dx_probe(struct dentry *dentry, struct i
+ 	}
+ 	hinfo->hash_version = root->info.hash_version;
+ 	hinfo->seed = dir->i_sb->u.ext3_sb.s_hash_seed;
+-	if (dentry)
+-		ext3fs_dirhash(dentry->d_name.name, dentry->d_name.len, hinfo);
++	if (name)
++		ext3fs_dirhash(name->name, name->len, hinfo);
+ 	hash = hinfo->hash;
+ 
+ 	if (root->info.unused_flags & 1) {
+@@ -357,7 +480,19 @@ dx_probe(struct dentry *dentry, struct i
+ 		goto fail;
+ 	}
+ 
++repeat:
++	curidx = 0;
++	entries = (struct dx_entry *) (((char *)&root->info) +
++				       root->info.info_length);
++	assert(dx_get_limit(entries) == dx_root_limit(dir,
++						      root->info.info_length));
++	dxtrace (printk("Look up %x", hash));
++	dx_lock_bh(bh);
++	/* indirect must be initialized under bh lock because
++	 * 2nd level creation procedure may change it and dx_probe()
++	 * will suggest htree is still single-level -bzzz */
+ 	if ((indirect = root->info.indirect_levels) > 1) {
++		dx_unlock_bh(bh);
+ 		ext3_warning(dir->i_sb, __FUNCTION__,
+ 			     "Unimplemented inode hash depth: %#06x",
+ 			     root->info.indirect_levels);
+@@ -365,56 +500,46 @@ dx_probe(struct dentry *dentry, struct i
+ 		*err = ERR_BAD_DX_DIR;
+ 		goto fail;
+ 	}
+-
+-	entries = (struct dx_entry *) (((char *)&root->info) +
+-				       root->info.info_length);
+-	assert(dx_get_limit(entries) == dx_root_limit(dir,
+-						      root->info.info_length));
+-	dxtrace (printk("Look up %x", hash));
++	
+ 	while (1)
+ 	{
+-		count = dx_get_count(entries);
+-		assert (count && count <= dx_get_limit(entries));
+-		p = entries + 1;
+-		q = entries + count - 1;
+-		while (p <= q)
+-		{
+-			m = p + (q - p)/2;
+-			dxtrace(printk("."));
+-			if (dx_get_hash(m) > hash)
+-				q = m - 1;
+-			else
+-				p = m + 1;
+-		}
+-
+-		if (0) // linear search cross check
+-		{
+-			unsigned n = count - 1;
+-			at = entries;
+-			while (n--)
+-			{
+-				dxtrace(printk(","));
+-				if (dx_get_hash(++at) > hash)
+-				{
+-					at--;
+-					break;
+-				}
+-			}
+-			assert (at == p - 1);
+-		}
+-
+-		at = p - 1;
+-		dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
++		at = dx_find_position(entries, hinfo->hash);
++		dxtrace(printk(" %x->%u\n",
++				at == entries? 0: dx_get_hash(at),
++				dx_get_block(at)));
+ 		frame->bh = bh;
+ 		frame->entries = entries;
+ 		frame->at = at;
+-		if (!indirect--) return frame;
+-		if (!(bh = ext3_bread (NULL,dir, dx_get_block(at), 0, err)))
++		frame->curidx = curidx;
++		frame->leaf = dx_get_block(at);
++		if (!indirect--) {
++			dx_unlock_bh(bh);
++			return frame;
++		}
++		
++		/* step into next htree level */
++		curidx = dx_get_block(at);
++		dx_unlock_bh(bh);
++		if (!(bh = ext3_bread (NULL,dir, frame->leaf, 0, err)))
+ 			goto fail2;
++		
++		dx_lock_bh(bh);
++		/* splitting may change root index block and move
++		 * hash we're looking for into another index block
++		 * so, we have to check this situation and repeat
++		 * from begining if path got changed -bzzz */
++		if (!dx_check_path(frame, hash)) {
++			dx_unlock_bh(bh);
++			bh = frame->bh;
++			indirect++;
++			goto repeat;
++		}
++		
+ 		at = entries = ((struct dx_node *) bh->b_data)->entries;
+ 		assert (dx_get_limit(entries) == dx_node_limit (dir));
+ 		frame++;
+ 	}
++	dx_unlock_bh(bh);
+ fail2:
+ 	while (frame >= frame_in) {
+ 		brelse(frame->bh);
+@@ -428,8 +553,7 @@ static void dx_release (struct dx_frame 
+ {
+ 	if (frames[0].bh == NULL)
+ 		return;
+-
+-	if (((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels)
++	if (frames[1].bh != NULL)
+ 		brelse(frames[1].bh);
+ 	brelse(frames[0].bh);
+ }
+@@ -471,8 +595,10 @@ static int ext3_htree_next_block(struct 
+ 	 * nodes need to be read.
+ 	 */
+ 	while (1) {
+-		if (++(p->at) < p->entries + dx_get_count(p->entries))
++		if (++(p->at) < p->entries + dx_get_count(p->entries)) {
++			p->leaf = dx_get_block(p->at);
+ 			break;
++		}
+ 		if (p == frames)
+ 			return 0;
+ 		num_frames++;
+@@ -498,13 +624,17 @@ static int ext3_htree_next_block(struct 
+ 	 * block so no check is necessary
+ 	 */
+ 	while (num_frames--) {
+-		if (!(bh = ext3_bread(NULL, dir, dx_get_block(p->at),
+-				      0, err)))
++		u32 idx;
++		
++		idx = p->leaf = dx_get_block(p->at);
++		if (!(bh = ext3_bread(NULL, dir, idx, 0, err)))
+ 			return -1; /* Failure */
+ 		p++;
+ 		brelse (p->bh);
+ 		p->bh = bh;
+ 		p->at = p->entries = ((struct dx_node *) bh->b_data)->entries;
++		p->curidx = idx;
++		p->leaf = dx_get_block(p->at);
+ 	}
+ 	return 1;
+ }
+@@ -544,7 +674,7 @@ int ext3_htree_fill_tree(struct file *di
+ 	dir = dir_file->f_dentry->d_inode;
+ 	hinfo.hash = start_hash;
+ 	hinfo.minor_hash = 0;
+-	frame = dx_probe(0, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
++	frame = dx_probe(NULL, dir_file->f_dentry->d_inode, &hinfo, frames, &err);
+ 	if (!frame)
+ 		return err;
+ 
+@@ -626,7 +756,8 @@ static int dx_make_map (struct ext3_dir_
+ 			count++;
+ 		}
+ 		/* XXX: do we need to check rec_len == 0 case? -Chris */
+-		de = (struct ext3_dir_entry_2 *) ((char *) de + le16_to_cpu(de->rec_len));
++		de = (struct ext3_dir_entry_2 *)((char*)de +
++				le16_to_cpu(de->rec_len));
+ 	}
+ 	return count;
+ }
+@@ -659,7 +790,8 @@ static void dx_sort_map (struct dx_map_e
+ 	} while(more);
+ }
+ 
+-static void dx_insert_block(struct dx_frame *frame, u32 hash, u32 block)
++static void dx_insert_block(struct inode *dir, struct dx_frame *frame,
++			u32 hash, u32 block, u32 idx)
+ {
+ 	struct dx_entry *entries = frame->entries;
+ 	struct dx_entry *old = frame->at, *new = old + 1;
+@@ -671,6 +803,7 @@ static void dx_insert_block(struct dx_fr
+ 	dx_set_hash(new, hash);
+ 	dx_set_block(new, block);
+ 	dx_set_count(entries, count + 1);
++	
+ }
+ #endif
+ 
+@@ -753,7 +886,8 @@ static int inline search_dirblock(struct
+ 
+ 	
+ static struct buffer_head * ext3_find_entry (struct dentry *dentry,
+-					struct ext3_dir_entry_2 ** res_dir)
++					struct ext3_dir_entry_2 ** res_dir,
++					int rwlock, void **lock)
+ {
+ 	struct super_block * sb;
+ 	struct buffer_head * bh_use[NAMEI_RA_SIZE];
+@@ -769,6 +903,7 @@ static struct buffer_head * ext3_find_en
+ 	int namelen;
+ 	const u8 *name;
+ 	unsigned blocksize;
++	int do_not_use_dx = 0;
+ 
+ 	*res_dir = NULL;
+ 	sb = dir->i_sb;
+@@ -777,9 +912,10 @@ static struct buffer_head * ext3_find_en
+ 	name = dentry->d_name.name;
+ 	if (namelen > EXT3_NAME_LEN)
+ 		return NULL;
++repeat:
+ #ifdef CONFIG_EXT3_INDEX
+ 	if (is_dx(dir)) {
+-		bh = ext3_dx_find_entry(dentry, res_dir, &err);
++		bh = ext3_dx_find_entry(dentry, res_dir, &err, rwlock, lock);
+ 		/*
+ 		 * On success, or if the error was file not found,
+ 		 * return.  Otherwise, fall back to doing a search the
+@@ -788,8 +924,14 @@ static struct buffer_head * ext3_find_en
+ 		if (bh || (err != ERR_BAD_DX_DIR))
+ 			return bh;
+ 		dxtrace(printk("ext3_find_entry: dx failed, falling back\n"));
++		do_not_use_dx = 1;
+ 	}
+ #endif
++	*lock = ext3_lock_htree(dir, 0, rwlock);
++	if (is_dx(dir) && !do_not_use_dx) {
++		ext3_unlock_htree(dir, *lock);
++		goto repeat;
++	}
+ 	nblocks = dir->i_size >> EXT3_BLOCK_SIZE_BITS(sb);
+ 	start = EXT3_I(dir)->i_dir_start_lookup;
+ 	if (start >= nblocks)
+@@ -861,12 +1003,17 @@ cleanup_and_exit:
+ 	/* Clean up the read-ahead blocks */
+ 	for (; ra_ptr < ra_max; ra_ptr++)
+ 		brelse (bh_use[ra_ptr]);
++	if (!ret) {
++		ext3_unlock_htree(dir, *lock);
++		*lock = NULL;
++	}
+ 	return ret;
+ }
+ 
+ #ifdef CONFIG_EXT3_INDEX
+ static struct buffer_head * ext3_dx_find_entry(struct dentry *dentry,
+-		       struct ext3_dir_entry_2 **res_dir, int *err)
++		       struct ext3_dir_entry_2 **res_dir, int *err,
++		       int rwlock, void **lock)
+ {
+ 	struct super_block * sb;
+ 	struct dx_hash_info	hinfo;
+@@ -881,11 +1028,22 @@ static struct buffer_head * ext3_dx_find
+ 	struct inode *dir = dentry->d_parent->d_inode;
+ 	
+ 	sb = dir->i_sb;
+-	if (!(frame = dx_probe (dentry, 0, &hinfo, frames, err)))
++repeat:
++	if (!(frame = dx_probe (&dentry->d_name, dir, &hinfo, frames, err)))
+ 		return NULL;
++	
++	*lock = ext3_lock_htree(dir, frame->leaf, rwlock);
++	/* while locking leaf we just found may get splitted
++	 * so, we need another leaf. check this */
++	if (!dx_check_full_path(frames, &hinfo)) {
++		ext3_unlock_htree(dir, *lock);
++		dx_release(frames);
++		goto repeat;
++	}
++
+ 	hash = hinfo.hash;
+ 	do {
+-		block = dx_get_block(frame->at);
++		block = frame->leaf;
+ 		if (!(bh = ext3_bread (NULL,dir, block, 0, err)))
+ 			goto errout;
+ 		de = (struct ext3_dir_entry_2 *) bh->b_data;
+@@ -919,6 +1077,8 @@ static struct buffer_head * ext3_dx_find
+ 	*err = -ENOENT;
+ errout:
+ 	dxtrace(printk("%s not found\n", name));
++	ext3_unlock_htree(dir, *lock);
++	*lock = NULL;
+ 	dx_release (frames);
+ 	return NULL;
+ }
+@@ -931,6 +1091,7 @@ static struct dentry *ext3_lookup(struct
+ 	struct ext3_dir_entry_2 * de;
+ 	struct buffer_head * bh;
+ 	struct dentry *alternate = NULL;
++	void *lock = NULL;
+ 
+ 	if (dentry->d_name.len > EXT3_NAME_LEN)
+ 		return ERR_PTR(-ENAMETOOLONG);
+@@ -938,10 +1099,11 @@ static struct dentry *ext3_lookup(struct
+ 	if (ext3_check_for_iopen(dir, dentry))
+ 		return NULL;
+ 
+-	bh = ext3_find_entry(dentry, &de);
++	bh = ext3_find_entry(dentry, &de, 0, &lock);
+ 	inode = NULL;
+ 	if (bh) {
+ 		unsigned long ino = le32_to_cpu(de->inode);
++		ext3_unlock_htree(dir, lock);
+ 		brelse (bh);
+ 		inode = iget(dir->i_sb, ino);
+ 
+@@ -984,7 +1146,8 @@ dx_move_dirents(char *from, char *to, st
+ 	unsigned rec_len = 0;
+ 
+ 	while (count--) {
+-		struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) (from + map->offs);
++		struct ext3_dir_entry_2 *de =
++			(struct ext3_dir_entry_2 *) (from + map->offs);
+ 		rec_len = EXT3_DIR_REC_LEN(de->name_len);
+ 		memcpy (to, de, rec_len);
+ 		((struct ext3_dir_entry_2 *) to)->rec_len = rec_len;
+@@ -997,7 +1160,8 @@ dx_move_dirents(char *from, char *to, st
+ 
+ static struct ext3_dir_entry_2* dx_pack_dirents(char *base, int size)
+ {
+-	struct ext3_dir_entry_2 *next, *to, *prev, *de = (struct ext3_dir_entry_2 *) base;
++	struct ext3_dir_entry_2 *next, *to, *prev;
++	struct ext3_dir_entry_2 *de = (struct ext3_dir_entry_2 *) base;
+ 	unsigned rec_len = 0;
+ 
+ 	prev = to = de;
+@@ -1019,7 +1183,8 @@ static struct ext3_dir_entry_2* dx_pack_
+ 
+ static struct ext3_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
+ 			struct buffer_head **bh,struct dx_frame *frame,
+-			struct dx_hash_info *hinfo, int *error)
++			struct dx_hash_info *hinfo, void **target,
++			int *error)
+ {
+ 	unsigned blocksize = dir->i_sb->s_blocksize;
+ 	unsigned count, continued;
+@@ -1066,23 +1231,30 @@ static struct ext3_dir_entry_2 *do_split
+ 	hash2 = map[split].hash;
+ 	continued = hash2 == map[split - 1].hash;
+ 	dxtrace(printk("Split block %i at %x, %i/%i\n",
+-		dx_get_block(frame->at), hash2, split, count-split));
+-
++		frame->leaf, hash2, split, count-split));
++	
+ 	/* Fancy dance to stay within two buffers */
+ 	de2 = dx_move_dirents(data1, data2, map + split, count - split);
+ 	de = dx_pack_dirents(data1,blocksize);
+ 	de->rec_len = cpu_to_le16(data1 + blocksize - (char *) de);
+ 	de2->rec_len = cpu_to_le16(data2 + blocksize - (char *) de2);
+-	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data1, blocksize, 1));
+-	dxtrace(dx_show_leaf (hinfo, (struct ext3_dir_entry_2 *) data2, blocksize, 1));
++	dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data1, blocksize, 1));
++	dxtrace(dx_show_leaf(hinfo,(struct ext3_dir_entry_2*) data2, blocksize, 1));
+ 
+ 	/* Which block gets the new entry? */
++	*target = NULL;
+ 	if (hinfo->hash >= hash2)
+ 	{
+ 		swap(*bh, bh2);
+ 		de = de2;
+-	}
+-	dx_insert_block (frame, hash2 + continued, newblock);
++
++		/* entry will be stored into new block
++		 * we have to lock it before add_dirent_to_buf */
++		*target = ext3_lock_htree(dir, newblock, 1);
++	}
++	dx_lock_bh(frame->bh);
++	dx_insert_block (dir, frame, hash2 + continued, newblock, frame->curidx);
++	dx_unlock_bh(frame->bh);
+ 	err = ext3_journal_dirty_metadata (handle, bh2);
+ 	if (err)
+ 		goto journal_error;
+@@ -1156,7 +1328,8 @@ static int add_dirent_to_buf(handle_t *h
+ 	nlen = EXT3_DIR_REC_LEN(de->name_len);
+ 	rlen = le16_to_cpu(de->rec_len);
+ 	if (de->inode) {
+-		struct ext3_dir_entry_2 *de1 = (struct ext3_dir_entry_2 *)((char *)de + nlen);
++		struct ext3_dir_entry_2 *de1 =
++			(struct ext3_dir_entry_2 *)((char *)de + nlen);
+ 		de1->rec_len = cpu_to_le16(rlen - nlen);
+ 		de->rec_len = cpu_to_le16(nlen);
+ 		de = de1;
+@@ -1214,7 +1387,8 @@ static int make_indexed_dir(handle_t *ha
+ 	unsigned	blocksize;
+ 	struct dx_hash_info hinfo;
+ 	u32		block;
+-		
++	void		*lock, *new_lock;
++
+ 	blocksize =  dir->i_sb->s_blocksize;
+ 	dxtrace(printk("Creating index\n"));
+ 	retval = ext3_journal_get_write_access(handle, bh);
+@@ -1225,7 +1399,6 @@ static int make_indexed_dir(handle_t *ha
+ 	}
+ 	root = (struct dx_root *) bh->b_data;
+ 		
+-	EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
+ 	bh2 = ext3_append (handle, dir, &block, &retval);
+ 	if (!(bh2)) {
+ 		brelse(bh);
+@@ -1233,6 +1406,8 @@ static int make_indexed_dir(handle_t *ha
+ 	}
+ 	data1 = bh2->b_data;
+ 
++	lock = ext3_lock_htree(dir, block, 1);
++
+ 	/* The 0th block becomes the root, move the dirents out */
+ 	de = (struct ext3_dir_entry_2 *) &root->info;
+ 	len = ((char *) root) + blocksize - (char *) de;
+@@ -1261,13 +1436,25 @@ static int make_indexed_dir(handle_t *ha
+ 	frame->entries = entries;
+ 	frame->at = entries;
+ 	frame->bh = bh;
++	frame->curidx = 0;
++	frame->leaf = 0;
++	frame[1].bh = NULL;
+ 	bh = bh2;
+-	de = do_split(handle,dir, &bh, frame, &hinfo, &retval);
++	de = do_split(handle,dir, &bh, frame, &hinfo, &new_lock, &retval);
+ 	dx_release (frames);
+ 	if (!(de))
+-		return retval;
++		goto cleanup;
++
++	retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++cleanup:
++	if (new_lock)
++		ext3_unlock_htree(dir, new_lock);
++	/* we mark directory indexed in order to
++	 * avoid races while htree being created -bzzz */
++	EXT3_I(dir)->i_flags |= EXT3_INDEX_FL;
++	ext3_unlock_htree(dir, lock);
+ 
+-	return add_dirent_to_buf(handle, dentry, inode, de, bh);
++	return retval;
+ }
+ #endif
+ 
+@@ -1296,11 +1483,13 @@ static int ext3_add_entry (handle_t *han
+ 	unsigned blocksize;
+ 	unsigned nlen, rlen;
+ 	u32 block, blocks;
++	void *lock;
+ 
+ 	sb = dir->i_sb;
+ 	blocksize = sb->s_blocksize;
+ 	if (!dentry->d_name.len)
+ 		return -EINVAL;
++repeat:
+ #ifdef CONFIG_EXT3_INDEX
+ 	if (is_dx(dir)) {
+ 		retval = ext3_dx_add_entry(handle, dentry, inode);
+@@ -1311,36 +1500,53 @@ static int ext3_add_entry (handle_t *han
+ 		ext3_mark_inode_dirty(handle, dir);
+ 	}
+ #endif
++	lock = ext3_lock_htree(dir, 0, 1);
++	if (is_dx(dir)) {
++		/* we got lock for block 0
++		 * probably previous holder of the lock
++		 * created htree -bzzz */
++		ext3_unlock_htree(dir, lock);
++		goto repeat;
++	}
++	
+ 	blocks = dir->i_size >> sb->s_blocksize_bits;
+ 	for (block = 0, offset = 0; block < blocks; block++) {
+ 		bh = ext3_bread(handle, dir, block, 0, &retval);
+-		if(!bh)
++		if(!bh) {
++			ext3_unlock_htree(dir, lock);
+ 			return retval;
++		}
+ 		retval = add_dirent_to_buf(handle, dentry, inode, 0, bh);
+-		if (retval != -ENOSPC)
++		if (retval != -ENOSPC) {
++			ext3_unlock_htree(dir, lock);
+ 			return retval;
++		}
+ 
+ #ifdef CONFIG_EXT3_INDEX
+ 		if (blocks == 1 && !dx_fallback &&
+-		    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX))
+-			return make_indexed_dir(handle, dentry, inode, bh);
++		    EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_DIR_INDEX)) {
++			retval = make_indexed_dir(handle, dentry, inode, bh);
++			ext3_unlock_htree(dir, lock);
++			return retval;
++		}
+ #endif
+ 		brelse(bh);
+ 	}
+ 	bh = ext3_append(handle, dir, &block, &retval);
+-	if (!bh)
++	if (!bh) {
++		ext3_unlock_htree(dir, lock);
+ 		return retval;
++	}
+ 	de = (struct ext3_dir_entry_2 *) bh->b_data;
+ 	de->inode = 0;
+ 	de->rec_len = cpu_to_le16(rlen = blocksize);
+ 	nlen = 0;
+-	return add_dirent_to_buf(handle, dentry, inode, de, bh);
++	retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
++	ext3_unlock_htree(dir, lock);
++	return retval;
+ }
+ 
+ #ifdef CONFIG_EXT3_INDEX
+-/*
+- * Returns 0 for success, or a negative error value
+- */
+ static int ext3_dx_add_entry(handle_t *handle, struct dentry *dentry,
+ 			     struct inode *inode)
+ {
+@@ -1352,15 +1558,28 @@ static int ext3_dx_add_entry(handle_t *h
+ 	struct super_block * sb = dir->i_sb;
+ 	struct ext3_dir_entry_2 *de;
+ 	int err;
+-
+-	frame = dx_probe(dentry, 0, &hinfo, frames, &err);
++	int curidx;
++	void *idx_lock, *leaf_lock, *newleaf_lock;
++	
++repeat:
++	frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err);
+ 	if (!frame)
+ 		return err;
+-	entries = frame->entries;
+-	at = frame->at;
+ 
+-	if (!(bh = ext3_bread(handle,dir, dx_get_block(frame->at), 0, &err)))
++	/* we're going to chage leaf, so lock it first */
++	leaf_lock = ext3_lock_htree(dir, frame->leaf, 1);
++
++	/* while locking leaf we just found may get splitted
++	 * so we need to check this */
++	if (!dx_check_full_path(frames, &hinfo)) {
++		ext3_unlock_htree(dir, leaf_lock);
++		dx_release(frames);
++		goto repeat;
++	}
++	if (!(bh = ext3_bread(handle,dir, frame->leaf, 0, &err))) {
++		printk("can't ext3_bread(%d) = %d\n", (int) frame->leaf, err);
+ 		goto cleanup;
++	}
+ 
+ 	BUFFER_TRACE(bh, "get_write_access");
+ 	err = ext3_journal_get_write_access(handle, bh);
+@@ -1373,6 +1592,35 @@ static int ext3_dx_add_entry(handle_t *h
+ 		goto cleanup;
+ 	}
+ 
++	/* our leaf has no enough space. hence, we have to
++	 * split it. so lock index for this leaf first */
++	curidx = frame->curidx;
++	idx_lock = ext3_lock_htree(dir, curidx, 1);
++
++	/* now check did path get changed? */
++	dx_release(frames);
++
++	frame = dx_probe(&dentry->d_name, dentry->d_parent->d_inode,
++			&hinfo, frames, &err);
++	if (!frame) {
++		/* FIXME: error handling here */
++		brelse(bh);
++		ext3_unlock_htree(dir, idx_lock);
++		return err;
++	}
++	
++	if (frame->curidx != curidx) {
++		/* path has been changed. we have to drop old lock
++		 * and repeat */
++		brelse(bh);
++		ext3_unlock_htree(dir, idx_lock);
++		ext3_unlock_htree(dir, leaf_lock);
++		dx_release(frames);
++		goto repeat;
++	}
++	entries = frame->entries;
++	at = frame->at;
++
+ 	/* Block full, should compress but for now just split */
+ 	dxtrace(printk("using %u of %u node entries\n",
+ 		       dx_get_count(entries), dx_get_limit(entries)));
+@@ -1384,7 +1632,8 @@ static int ext3_dx_add_entry(handle_t *h
+ 		struct dx_entry *entries2;
+ 		struct dx_node *node2;
+ 		struct buffer_head *bh2;
+-
++		void *nb_lock;
++		
+ 		if (levels && (dx_get_count(frames->entries) ==
+ 			       dx_get_limit(frames->entries))) {
+ 			ext3_warning(sb, __FUNCTION__,
+@@ -1395,6 +1644,7 @@ static int ext3_dx_add_entry(handle_t *h
+ 		bh2 = ext3_append (handle, dir, &newblock, &err);
+ 		if (!(bh2))
+ 			goto cleanup;
++		nb_lock = ext3_lock_htree(dir, newblock, 1);
+ 		node2 = (struct dx_node *)(bh2->b_data);
+ 		entries2 = node2->entries;
+ 		node2->fake.rec_len = cpu_to_le16(sb->s_blocksize);
+@@ -1406,27 +1656,73 @@ static int ext3_dx_add_entry(handle_t *h
+ 		if (levels) {
+ 			unsigned icount1 = icount/2, icount2 = icount - icount1;
+ 			unsigned hash2 = dx_get_hash(entries + icount1);
++			void *ri_lock;
++
++			/* we have to protect root htree index against
++			 * another dx_add_entry() which would want to
++			 * split it too -bzzz */
++			ri_lock = ext3_lock_htree(dir, 0, 1);
++
++			/* as root index block blocked we must repeat
++			 * searching for current position of our 2nd index -bzzz */
++			dx_lock_bh(frame->bh);
++			frames->at = dx_find_position(frames->entries, hinfo.hash);
++			dx_unlock_bh(frame->bh);
++			
+ 			dxtrace(printk("Split index %i/%i\n", icount1, icount2));
+-				
+-			BUFFER_TRACE(frame->bh, "get_write_access"); /* index root */
++	
++			BUFFER_TRACE(frame->bh, "get_write_access");
+ 			err = ext3_journal_get_write_access(handle,
+ 							     frames[0].bh);
+ 			if (err)
+ 				goto journal_error;
+-				
++			
++			/* copy index into new one */
+ 			memcpy ((char *) entries2, (char *) (entries + icount1),
+ 				icount2 * sizeof(struct dx_entry));
+-			dx_set_count (entries, icount1);
+ 			dx_set_count (entries2, icount2);
+ 			dx_set_limit (entries2, dx_node_limit(dir));
+ 
+ 			/* Which index block gets the new entry? */
+ 			if (at - entries >= icount1) {
++				/* unlock index we won't use */
++				ext3_unlock_htree(dir, idx_lock);
++				idx_lock = nb_lock;
+ 				frame->at = at = at - entries - icount1 + entries2;
+-				frame->entries = entries = entries2;
++				frame->entries = entries2;
++				frame->curidx = curidx = newblock;
+ 				swap(frame->bh, bh2);
++			} else {
++				/* we'll use old index,so new one may be freed */
++				ext3_unlock_htree(dir, nb_lock);
+ 			}
+-			dx_insert_block (frames + 0, hash2, newblock);
++		
++			/* NOTE: very subtle piece of code
++			 * competing dx_probe() may find 2nd level index in root
++			 * index, then we insert new index here and set new count
++			 * in that 2nd level index. so, dx_probe() may see 2nd
++			 * level index w/o hash it looks for. the solution is
++			 * to check root index after we locked just founded 2nd
++			 * level index -bzzz */
++			dx_lock_bh(frames[0].bh);
++			dx_insert_block (dir, frames + 0, hash2, newblock, 0);
++			dx_unlock_bh(frames[0].bh);
++			
++			/* now old and new 2nd level index blocks contain
++			 * all pointers, so dx_probe() may find it in the both.
++			 * it's OK -bzzz */
++			
++			dx_lock_bh(frame->bh);
++			dx_set_count(entries, icount1);
++			dx_unlock_bh(frame->bh);
++
++			/* now old 2nd level index block points to first half
++			 * of leafs. it's importand that dx_probe() must
++			 * check root index block for changes under
++			 * dx_lock_bh(frame->bh) -bzzz */
++
++			ext3_unlock_htree(dir, ri_lock);
++		
+ 			dxtrace(dx_show_index ("node", frames[1].entries));
+ 			dxtrace(dx_show_index ("node",
+ 			       ((struct dx_node *) bh2->b_data)->entries));
+@@ -1435,38 +1731,61 @@ static int ext3_dx_add_entry(handle_t *h
+ 				goto journal_error;
+ 			brelse (bh2);
+ 		} else {
++			unsigned long leaf = frame->leaf;
++
+ 			dxtrace(printk("Creating second level index...\n"));
+ 			memcpy((char *) entries2, (char *) entries,
+ 			       icount * sizeof(struct dx_entry));
+ 			dx_set_limit(entries2, dx_node_limit(dir));
+ 
+ 			/* Set up root */
++ 			dx_lock_bh(frames[0].bh);
+ 			dx_set_count(entries, 1);
+ 			dx_set_block(entries + 0, newblock);
+ 			((struct dx_root *) frames[0].bh->b_data)->info.indirect_levels = 1;
++			dx_unlock_bh(frames[0].bh);
+ 
+ 			/* Add new access path frame */
+ 			frame = frames + 1;
+ 			frame->at = at = at - entries + entries2;
+ 			frame->entries = entries = entries2;
+ 			frame->bh = bh2;
++			frame->curidx = newblock;
++			frame->leaf = leaf;
+ 			err = ext3_journal_get_write_access(handle,
+ 							     frame->bh);
+ 			if (err)
+ 				goto journal_error;
++
++			/* first level index was root. it's already initialized */
++			/* we my unlock it now */
++			ext3_unlock_htree(dir, idx_lock);
++
++			/* current index is just created 2nd level index */
++			curidx = newblock;
++			idx_lock = nb_lock;
+ 		}
+ 		ext3_journal_dirty_metadata(handle, frames[0].bh);
+ 	}
+-	de = do_split(handle, dir, &bh, frame, &hinfo, &err);
++	de = do_split(handle, dir, &bh, frame, &hinfo, &newleaf_lock, &err);
+ 	if (!de)
+ 		goto cleanup;
++
++	/* index splitted */
++	ext3_unlock_htree(dir, idx_lock);
++	
+ 	err = add_dirent_to_buf(handle, dentry, inode, de, bh);
++
++	if (newleaf_lock)
++		ext3_unlock_htree(dir, newleaf_lock);
++	
+ 	bh = 0;
+ 	goto cleanup;
+ 	
+ journal_error:
+ 	ext3_std_error(dir->i_sb, err);
+ cleanup:
++	ext3_unlock_htree(dir, leaf_lock);
+ 	if (bh)
+ 		brelse(bh);
+ 	dx_release(frames);
+@@ -1899,6 +2218,7 @@ static int ext3_rmdir (struct inode * di
+ 	struct buffer_head * bh;
+ 	struct ext3_dir_entry_2 * de;
+ 	handle_t *handle;
++	void *lock;
+ 
+ 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+ 	if (IS_ERR(handle)) {
+@@ -1906,7 +2226,7 @@ static int ext3_rmdir (struct inode * di
+ 	}
+ 
+ 	retval = -ENOENT;
+-	bh = ext3_find_entry (dentry, &de);
++	bh = ext3_find_entry (dentry, &de, 1, &lock);
+ 	if (!bh)
+ 		goto end_rmdir;
+ 
+@@ -1917,14 +2237,19 @@ static int ext3_rmdir (struct inode * di
+ 	DQUOT_INIT(inode);
+ 
+ 	retval = -EIO;
+-	if (le32_to_cpu(de->inode) != inode->i_ino)
++	if (le32_to_cpu(de->inode) != inode->i_ino) {
++		ext3_unlock_htree(dir, lock);
+ 		goto end_rmdir;
++	}
+ 
+ 	retval = -ENOTEMPTY;
+-	if (!empty_dir (inode))
++	if (!empty_dir (inode)) {
++		ext3_unlock_htree(dir, lock);
+ 		goto end_rmdir;
++	}
+ 
+ 	retval = ext3_delete_entry(handle, dir, de, bh);
++	ext3_unlock_htree(dir, lock);
+ 	if (retval)
+ 		goto end_rmdir;
+ 	if (inode->i_nlink != 2)
+@@ -1957,6 +2282,7 @@ static int ext3_unlink(struct inode * di
+ 	struct buffer_head * bh;
+ 	struct ext3_dir_entry_2 * de;
+ 	handle_t *handle;
++	void *lock;
+ 
+ 	handle = ext3_journal_start(dir, EXT3_DELETE_TRANS_BLOCKS);
+ 	if (IS_ERR(handle)) {
+@@ -1967,7 +2293,7 @@ static int ext3_unlink(struct inode * di
+ 		handle->h_sync = 1;
+ 
+ 	retval = -ENOENT;
+-	bh = ext3_find_entry (dentry, &de);
++	bh = ext3_find_entry (dentry, &de, 1, &lock);
+ 	if (!bh)
+ 		goto end_unlink;
+ 
+@@ -1975,8 +2301,10 @@ static int ext3_unlink(struct inode * di
+ 	DQUOT_INIT(inode);
+ 
+ 	retval = -EIO;
+-	if (le32_to_cpu(de->inode) != inode->i_ino)
++	if (le32_to_cpu(de->inode) != inode->i_ino) {
++		ext3_unlock_htree(dir, lock);
+ 		goto end_unlink;
++	}
+ 	
+ 	if (!inode->i_nlink) {
+ 		ext3_warning (inode->i_sb, "ext3_unlink",
+@@ -1985,6 +2313,7 @@ static int ext3_unlink(struct inode * di
+ 		inode->i_nlink = 1;
+ 	}
+ 	retval = ext3_delete_entry(handle, dir, de, bh);
++	ext3_unlock_htree(dir, lock);
+ 	if (retval)
+ 		goto end_unlink;
+ 	dir->i_ctime = dir->i_mtime = CURRENT_TIME;
+@@ -2106,6 +2435,7 @@ static int ext3_rename (struct inode * o
+ 	struct buffer_head * old_bh, * new_bh, * dir_bh;
+ 	struct ext3_dir_entry_2 * old_de, * new_de;
+ 	int retval;
++	void *lock1 = NULL, *lock2 = NULL, *lock3 = NULL;
+ 
+ 	old_bh = new_bh = dir_bh = NULL;
+ 
+@@ -2118,7 +2448,10 @@ static int ext3_rename (struct inode * o
+ 	if (IS_SYNC(old_dir) || IS_SYNC(new_dir))
+ 		handle->h_sync = 1;
+ 
+-	old_bh = ext3_find_entry (old_dentry, &old_de);
++	if (old_dentry->d_parent == new_dentry->d_parent)
++		down(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
++
++	old_bh = ext3_find_entry (old_dentry, &old_de, 1, &lock1 /* FIXME */);
+ 	/*
+ 	 *  Check for inode number is _not_ due to possible IO errors.
+ 	 *  We might rmdir the source, keep it as pwd of some process
+@@ -2131,7 +2464,7 @@ static int ext3_rename (struct inode * o
+ 		goto end_rename;
+ 
+ 	new_inode = new_dentry->d_inode;
+-	new_bh = ext3_find_entry (new_dentry, &new_de);
++	new_bh = ext3_find_entry (new_dentry, &new_de, 1, &lock2 /* FIXME */);
+ 	if (new_bh) {
+ 		if (!new_inode) {
+ 			brelse (new_bh);
+@@ -2194,7 +2527,7 @@ static int ext3_rename (struct inode * o
+ 		struct buffer_head *old_bh2;
+ 		struct ext3_dir_entry_2 *old_de2;
+ 		
+-		old_bh2 = ext3_find_entry(old_dentry, &old_de2);
++		old_bh2 = ext3_find_entry(old_dentry, &old_de2, 1, &lock3 /* FIXME */);
+ 		if (old_bh2) {
+ 			retval = ext3_delete_entry(handle, old_dir,
+ 						   old_de2, old_bh2);
+@@ -2237,6 +2570,14 @@ static int ext3_rename (struct inode * o
+ 	retval = 0;
+ 
+ end_rename:
++	if (lock1)
++		ext3_unlock_htree(old_dentry->d_parent->d_inode, lock1);
++	if (lock2)
++		ext3_unlock_htree(new_dentry->d_parent->d_inode, lock2);
++	if (lock3)
++		ext3_unlock_htree(old_dentry->d_parent->d_inode, lock3);
++	if (old_dentry->d_parent == new_dentry->d_parent)
++		up(&EXT3_I(old_dentry->d_parent->d_inode)->i_rename_sem);
+ 	brelse (dir_bh);
+ 	brelse (old_bh);
+ 	brelse (new_bh);
+@@ -2245,6 +2586,29 @@ end_rename:
+ }
+ 
+ /*
++ * this locking primitives are used to protect parts
++ * of dir's htree. protection unit is block: leaf or index
++ */
++static inline void *ext3_lock_htree(struct inode *dir,
++					unsigned long value, int rwlock)
++{
++	void *lock;
++	
++	if (!test_opt(dir->i_sb, PDIROPS))
++		return NULL;
++	lock = dynlock_lock(&EXT3_I(dir)->i_htree_lock, value, 1, GFP_KERNEL);
++	return lock;
++}
++
++static inline void ext3_unlock_htree(struct inode *dir,
++					void *lock)
++{
++	if (!test_opt(dir->i_sb, PDIROPS) || !lock)
++		return;
++	dynlock_unlock(&EXT3_I(dir)->i_htree_lock, lock);
++}
++
++/*
+  * directories can handle most operations...
+  */
+ struct inode_operations ext3_dir_inode_operations = {
+--- linux-2.4.18/fs/ext3/super.c~ext3-pdirops-2.4.18-chaos	2003-09-01 16:33:25.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/super.c	2003-09-02 12:46:29.000000000 +0400
+@@ -786,6 +786,8 @@ static int parse_options (char * options
+ 				return 0;
+ 			}
+ 		}
++		else if (!strcmp (this_char, "pdirops"))
++			set_opt (sbi->s_mount_opt, PDIROPS);
+ 		else if (!strcmp (this_char, "grpid") ||
+ 			 !strcmp (this_char, "bsdgroups"))
+ 			set_opt (*mount_options, GRPID);
+@@ -812,6 +814,9 @@ static int parse_options (char * options
+ 			if (want_numeric(value, "sb", sb_block))
+ 				return 0;
+ 		}
++		else if (!strcmp (this_char, "pdirops")) {
++			set_opt (sbi->s_mount_opt, PDIROPS);
++		}
+ #ifdef CONFIG_JBD_DEBUG
+ 		else if (!strcmp (this_char, "ro-after")) {
+ 			unsigned long v;
+@@ -969,6 +974,10 @@ static int ext3_setup_super(struct super
+ 		ext3_check_inodes_bitmap (sb);
+ 	}
+ #endif
++#ifdef S_PDIROPS
++	if (test_opt (sb, PDIROPS))
++		sb->s_flags |= S_PDIROPS;
++#endif
+ 	setup_ro_after(sb);
+ 	return res;
+ }
+@@ -1463,6 +1472,11 @@ struct super_block * ext3_read_super (st
+ 		test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
+ 		"writeback");
+ 
++	if (test_opt(sb, PDIROPS)) {
++		printk (KERN_INFO "EXT3-fs: mounted filesystem with parallel dirops\n");
++		sb->s_flags |= S_PDIROPS;
++	}
++		
+ 	return sb;
+ 
+ failed_mount3:
+--- linux-2.4.18/include/linux/ext3_fs.h~ext3-pdirops-2.4.18-chaos	2003-09-01 14:58:06.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs.h	2003-09-02 11:46:15.000000000 +0400
+@@ -310,6 +310,7 @@ struct ext3_inode {
+ /*
+  * Mount flags
+  */
++#define EXT3_MOUNT_PDIROPS		0x800000/* Parallel dir operations */
+ #define EXT3_MOUNT_CHECK		0x0001	/* Do mount-time checks */
+ #define EXT3_MOUNT_GRPID		0x0004	/* Create files with directory's group */
+ #define EXT3_MOUNT_DEBUG		0x0008	/* Some debugging messages */
+--- linux-2.4.18/include/linux/ext3_fs_i.h~ext3-pdirops-2.4.18-chaos	2003-08-29 11:57:30.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs_i.h	2003-09-02 11:46:15.000000000 +0400
+@@ -17,6 +17,7 @@
+ #define _LINUX_EXT3_FS_I
+ 
+ #include <linux/rwsem.h>
++#include <linux/dynlocks.h>
+ 
+ /*
+  * second extended file system inode data in memory
+@@ -73,6 +74,11 @@ struct ext3_inode_info {
+ 	 * by other means, so we have truncate_sem.
+ 	 */
+ 	struct rw_semaphore truncate_sem;
++
++	/* following fields for parallel directory operations -bzzz */
++	struct dynlock i_htree_lock;
++	struct semaphore i_append_sem;
++	struct semaphore i_rename_sem;
+ };
+ 
+ #endif	/* _LINUX_EXT3_FS_I */
+--- linux-2.4.18/fs/ext3/inode.c~ext3-pdirops-2.4.18-chaos	2003-09-01 16:33:25.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/inode.c	2003-09-02 11:46:15.000000000 +0400
+@@ -2454,6 +2454,9 @@ void ext3_read_inode(struct inode * inod
+ 	} else if (S_ISDIR(inode->i_mode)) {
+ 		inode->i_op = &ext3_dir_inode_operations;
+ 		inode->i_fop = &ext3_dir_operations;
++		dynlock_init(&EXT3_I(inode)->i_htree_lock);
++		sema_init(&EXT3_I(inode)->i_rename_sem, 1);
++		sema_init(&EXT3_I(inode)->i_append_sem, 1);
+ 	} else if (S_ISLNK(inode->i_mode)) {
+ 		if (ext3_inode_is_fast_symlink(inode))
+ 			inode->i_op = &ext3_fast_symlink_inode_operations;
+--- linux-2.4.18/fs/ext3/ialloc.c~ext3-pdirops-2.4.18-chaos	2003-09-01 14:58:05.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/ialloc.c	2003-09-02 11:46:15.000000000 +0400
+@@ -601,6 +601,9 @@ repeat:
+ 		return ERR_PTR(-EDQUOT);
+ 	}
+ 	ext3_debug ("allocating inode %lu\n", inode->i_ino);
++	dynlock_init(&EXT3_I(inode)->i_htree_lock);
++	sema_init(&EXT3_I(inode)->i_rename_sem, 1);
++	sema_init(&EXT3_I(inode)->i_append_sem, 1);
+ 	return inode;
+ 
+ fail:
+
+_
diff --git a/lustre/kernel_patches/patches/iopen-2.4.18-2.patch b/lustre/kernel_patches/patches/iopen-2.4.18-2.patch
new file mode 100644
index 0000000..3d9a864
--- /dev/null
+++ b/lustre/kernel_patches/patches/iopen-2.4.18-2.patch
@@ -0,0 +1,422 @@
+ Documentation/filesystems/ext2.txt |   16 ++
+ fs/ext3/Makefile                   |    2 
+ fs/ext3/inode.c                    |    4 
+ fs/ext3/iopen.c                    |  259 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   13 +
+ fs/ext3/namei.c                    |   12 +
+ fs/ext3/super.c                    |   11 +
+ include/linux/ext3_fs.h            |    2 
+ 8 files changed, 318 insertions(+), 1 deletion(-)
+
+--- linux-2.4.18-p4smp/Documentation/filesystems/ext2.txt~iopen-2.4.18	2003-07-09 12:17:30.000000000 -0600
++++ linux-2.4.18-p4smp-braam/Documentation/filesystems/ext2.txt	2003-07-09 17:13:02.000000000 -0600
+@@ -35,6 +35,22 @@ resgid=n			The group ID which may use th
+ 
+ sb=n				Use alternate superblock at this location.
+ 
++iopen				Makes an invisible pseudo-directory called 
++				__iopen__ available in the root directory
++				of the filesystem.  Allows open-by-inode-
++				number.  i.e., inode 3145 can be accessed
++				via /mntpt/__iopen__/3145
++
++iopen_nopriv			This option makes the iopen directory be
++				world-readable.  This may be safer since it
++				allows daemons to run as an unprivileged user,
++				however it significantly changes the security
++				model of a Unix filesystem, since previously
++				all files under a mode 700 directory were not
++				generally avilable even if the
++				permissions on the file itself is
++				world-readable.
++
+ grpquota,noquota,quota,usrquota	Quota options are silently ignored by ext2.
+ 
+ 
+--- linux-2.4.18-p4smp/fs/ext3/Makefile~iopen-2.4.18	2003-07-09 17:12:12.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/Makefile	2003-07-09 17:13:15.000000000 -0600
+@@ -11,7 +11,7 @@ O_TARGET := ext3.o
+ 
+ export-objs :=	super.o inode.o xattr.o ext3-exports.o
+ 
+-obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
++obj-y    := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+ 		ioctl.o namei.o super.o symlink.o xattr.o hash.o ext3-exports.o
+ obj-m    := $(O_TARGET)
+ 
+--- linux-2.4.18-p4smp/fs/ext3/inode.c~iopen-2.4.18	2003-07-09 17:11:19.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/inode.c	2003-07-09 17:13:02.000000000 -0600
+@@ -31,6 +31,7 @@
+ #include <linux/highuid.h>
+ #include <linux/quotaops.h>
+ #include <linux/module.h>
++#include "iopen.h"
+ 
+ /*
+  * SEARCH_FROM_ZERO forces each block allocation to search from the start
+@@ -2165,6 +2166,9 @@ void ext3_read_inode(struct inode * inod
+ 	struct buffer_head *bh;
+ 	int block;
+ 	
++	if (ext3_iopen_get_inode(inode))
++		return;
++	
+ 	if(ext3_get_inode_loc(inode, &iloc))
+ 		goto bad_inode;
+ 	bh = iloc.bh;
+--- /dev/null	2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.c	2003-07-09 17:13:02.000000000 -0600
+@@ -0,0 +1,259 @@
++/*
++ * linux/fs/ext3/iopen.c
++ *
++ * Special support for open by inode number
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ * 
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ *
++ *
++ * Invariants:
++ *   - there is only ever a single DCACHE_NFSD_DISCONNECTED dentry alias
++ *     for an inode at one time.
++ *   - there are never both connected and DCACHE_NFSD_DISCONNECTED dentry
++ *     aliases on an inode at the same time.
++ *
++ * If we have any connected dentry aliases for an inode, use one of those
++ * in iopen_lookup().  Otherwise, we instantiate a single NFSD_DISCONNECTED
++ * dentry for this inode, which thereafter will be found by the dcache
++ * when looking up this inode number in __iopen__, so we don't return here
++ * until it is gone.
++ *
++ * If we get an inode via a regular name lookup, then we "rename" the
++ * NFSD_DISCONNECTED dentry to the proper name and parent.  This ensures
++ * existing users of the disconnected dentry will continue to use the same
++ * dentry as the connected users, and there will never be both kinds of
++ * dentry aliases at one time.
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/locks.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/smp_lock.h>
++#include "iopen.h"
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#define IOPEN_NAME_LEN	32
++
++/*
++ * This implements looking up an inode by number.
++ */
++static struct dentry *iopen_lookup(struct inode *dir, struct dentry *dentry)
++{
++	struct inode *inode;
++	unsigned long ino;
++	struct list_head *lp;
++	struct dentry *alternate;
++	char buf[IOPEN_NAME_LEN];
++	
++	if (dentry->d_name.len >= IOPEN_NAME_LEN)
++		return ERR_PTR(-ENAMETOOLONG);
++
++	memcpy(buf, dentry->d_name.name, dentry->d_name.len);
++	buf[dentry->d_name.len] = 0;
++
++	if (strcmp(buf, ".") == 0)
++		ino = dir->i_ino;
++	else if (strcmp(buf, "..") == 0)
++		ino = EXT3_ROOT_INO;
++	else
++		ino = simple_strtoul(buf, 0, 0);
++
++	if ((ino != EXT3_ROOT_INO &&
++	     //ino != EXT3_ACL_IDX_INO &&
++	     //ino != EXT3_ACL_DATA_INO &&
++	     ino < EXT3_FIRST_INO(dir->i_sb)) ||
++	    ino > le32_to_cpu(dir->i_sb->u.ext3_sb.s_es->s_inodes_count))
++		return ERR_PTR(-ENOENT);
++
++	inode = iget(dir->i_sb, ino);
++	if (!inode)
++		return ERR_PTR(-EACCES);
++	if (is_bad_inode(inode)) {
++		iput(inode);
++		return ERR_PTR(-ENOENT);
++	}
++
++	/* preferrably return a connected dentry */
++	spin_lock(&dcache_lock);
++	list_for_each(lp, &inode->i_dentry) {
++		alternate = list_entry(lp, struct dentry, d_alias);
++		assert(!(alternate->d_flags & DCACHE_NFSD_DISCONNECTED));
++	}
++
++	if (!list_empty(&inode->i_dentry)) {
++		alternate = list_entry(inode->i_dentry.next, 
++				       struct dentry, d_alias);
++		dget_locked(alternate);
++		alternate->d_vfs_flags |= DCACHE_REFERENCED;
++		iput(inode);
++		spin_unlock(&dcache_lock);
++		return alternate;
++	}
++	dentry->d_flags |= DCACHE_NFSD_DISCONNECTED;
++	spin_unlock(&dcache_lock);
++
++	d_add(dentry, inode);
++	return NULL;
++}
++
++#define do_switch(x,y) do { \
++	__typeof__ (x) __tmp = x; \
++	x = y; y = __tmp; } while (0)
++
++static inline void switch_names(struct dentry *dentry, struct dentry *target)
++{
++	const unsigned char *old_name, *new_name;
++
++	memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); 
++	old_name = target->d_name.name;
++	new_name = dentry->d_name.name;
++	if (old_name == target->d_iname)
++		old_name = dentry->d_iname;
++	if (new_name == dentry->d_iname)
++		new_name = target->d_iname;
++	target->d_name.name = new_name;
++	dentry->d_name.name = old_name;
++}
++
++/* This function is spliced into ext3_lookup and does the move of a
++ * disconnected dentry (if it exists) to a connected dentry.
++ */
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode)
++{
++	struct dentry *tmp, *goal = NULL;
++	struct list_head *lp;
++
++	/* preferrably return a connected dentry */
++	spin_lock(&dcache_lock);
++	/* verify this dentry is really new */
++	assert(!de->d_inode);
++	assert(list_empty(&de->d_subdirs));
++	assert(list_empty(&de->d_alias));
++
++
++	list_for_each(lp, &inode->i_dentry) {
++		tmp = list_entry(lp, struct dentry, d_alias);
++		if (tmp->d_flags & DCACHE_NFSD_DISCONNECTED) {
++			assert(tmp->d_alias.next == &inode->i_dentry);
++			assert(tmp->d_alias.prev == &inode->i_dentry);
++			goal = tmp;
++			dget_locked(goal);
++			break;
++		}
++	}
++
++	if (!goal) { 
++		spin_unlock(&dcache_lock);
++		return NULL; 
++	}
++
++	/* Move the goal to the de hash queue - like d_move() */
++	goal->d_flags &= ~DCACHE_NFSD_DISCONNECTED;
++	list_del(&goal->d_hash);
++	list_add(&goal->d_hash, &de->d_hash);
++
++	list_del(&goal->d_child);
++	list_del(&de->d_child);
++
++	/* Switch the parents and the names.. */
++	switch_names(goal, de);
++	do_switch(goal->d_parent, de->d_parent);
++	do_switch(goal->d_name.len, de->d_name.len);
++	do_switch(goal->d_name.hash, de->d_name.hash);
++
++	/* And add them back to the (new) parent lists */
++	list_add(&goal->d_child, &goal->d_parent->d_subdirs);
++	list_add(&de->d_child, &de->d_parent->d_subdirs);
++	spin_unlock(&dcache_lock);
++
++	return goal;
++}
++
++/*
++ * These are the special structures for the iopen pseudo directory.
++ */
++
++static struct inode_operations iopen_inode_operations = {
++	lookup:		iopen_lookup,		/* BKL held */
++};
++
++static struct file_operations iopen_file_operations = {
++	read:		generic_read_dir,
++};
++
++static int match_dentry(struct dentry *dentry, const char *name)
++{
++	int	len;
++
++	len = strlen(name);
++	if (dentry->d_name.len != len)
++		return 0;
++	if (strncmp(dentry->d_name.name, name, len))
++		return 0;
++	return 1;
++}
++
++/*
++ * This function is spliced into ext3_lookup and returns 1 the file
++ * name is __iopen__ and dentry has been filled in appropriately.
++ */
++int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry)
++{
++	struct inode *inode;
++
++	if (dir->i_ino != EXT3_ROOT_INO ||
++	    !test_opt(dir->i_sb, IOPEN) ||
++	    !match_dentry(dentry, "__iopen__"))
++		return 0;
++
++	inode = iget(dir->i_sb, EXT3_BAD_INO);
++
++	if (!inode) 
++		return 0;
++	d_add(dentry, inode);
++	return 1;
++}
++
++/*
++ * This function is spliced into read_inode; it returns 1 if inode
++ * number is the one for /__iopen__, in which case the inode is filled
++ * in appropriately.  Otherwise, this fuction returns 0.
++ */
++int ext3_iopen_get_inode(struct inode *inode)
++{
++	if (inode->i_ino != EXT3_BAD_INO)
++		return 0;
++
++	inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++	if (test_opt(inode->i_sb, IOPEN_NOPRIV))
++		inode->i_mode |= 0777;
++	inode->i_uid = 0;
++	inode->i_gid = 0;
++	inode->i_nlink = 1;
++	inode->i_size = 4096;
++	inode->i_atime = CURRENT_TIME;
++	inode->i_ctime = CURRENT_TIME;
++	inode->i_mtime = CURRENT_TIME;
++	inode->u.ext3_i.i_dtime = 0;
++	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size
++					 * (for stat), not the fs block
++					 * size */  
++	inode->i_blocks = 0;
++	inode->i_version = 1;
++	inode->i_generation = 0;
++
++	inode->i_op = &iopen_inode_operations;
++	inode->i_fop = &iopen_file_operations;
++	inode->i_mapping->a_ops = 0;
++
++	return 1;
++}
+--- /dev/null	2003-01-30 03:24:37.000000000 -0700
++++ linux-2.4.18-p4smp-braam/fs/ext3/iopen.h	2003-07-09 17:13:02.000000000 -0600
+@@ -0,0 +1,13 @@
++/*
++ * iopen.h
++ *
++ * Special support for opening files by inode number.
++ * 
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ * 
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++extern int ext3_check_for_iopen(struct inode *dir, struct dentry *dentry);
++extern int ext3_iopen_get_inode(struct inode *inode);
+--- linux-2.4.18-p4smp/fs/ext3/namei.c~iopen-2.4.18	2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/namei.c	2003-07-09 17:13:02.000000000 -0600
+@@ -34,6 +34,7 @@
+ #include <linux/string.h>
+ #include <linux/locks.h>
+ #include <linux/quotaops.h>
++#include "iopen.h"
+ 
+ /*
+  * define how far ahead to read directories while searching them.
+@@ -703,16 +704,21 @@ cleanup_and_exit:
+ 	return NULL;
+ }
+ #endif
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode);
+ 
+ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry)
+ {
+ 	struct inode * inode;
+ 	struct ext3_dir_entry_2 * de;
+ 	struct buffer_head * bh;
++	struct dentry *alternate = NULL;
+ 
+ 	if (dentry->d_name.len > EXT3_NAME_LEN)
+ 		return ERR_PTR(-ENAMETOOLONG);
+ 
++	if (ext3_check_for_iopen(dir, dentry))
++		return NULL;
++
+ 	bh = ext3_find_entry(dentry, &de);
+ 	inode = NULL;
+ 	if (bh) {
+@@ -723,6 +729,12 @@ static struct dentry *ext3_lookup(struct
+ 		if (!inode)
+ 			return ERR_PTR(-EACCES);
+ 	}
++
++	if (inode && (alternate = iopen_connect_dentry(dentry, inode))) {
++		iput(inode);
++		return alternate;
++	}
++
+ 	d_add(dentry, inode);
+ 	return NULL;
+ }
+--- linux-2.4.18-p4smp/fs/ext3/super.c~iopen-2.4.18	2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/fs/ext3/super.c	2003-07-09 17:13:02.000000000 -0600
+@@ -831,6 +831,17 @@ static int parse_options (char * options
+ 		         || !strcmp (this_char, "quota")
+ 		         || !strcmp (this_char, "usrquota"))
+ 			/* Don't do anything ;-) */ ;
++		else if (!strcmp (this_char, "iopen")) {
++			set_opt (sbi->s_mount_opt, IOPEN);
++			clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++		} else if (!strcmp (this_char, "noiopen")) {
++			clear_opt (sbi->s_mount_opt, IOPEN);
++			clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++		}
++		else if (!strcmp (this_char, "iopen_nopriv")) {
++			set_opt (sbi->s_mount_opt, IOPEN);
++			set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++		}
+ 		else if (!strcmp (this_char, "journal")) {
+ 			/* @@@ FIXME */
+ 			/* Eventually we will want to be able to create
+--- linux-2.4.18-p4smp/include/linux/ext3_fs.h~iopen-2.4.18	2003-07-09 13:32:38.000000000 -0600
++++ linux-2.4.18-p4smp-braam/include/linux/ext3_fs.h	2003-07-09 17:13:02.000000000 -0600
+@@ -321,6 +321,8 @@ struct ext3_inode {
+ #define EXT3_MOUNT_UPDATE_JOURNAL	0x1000	/* Update the journal format */
+ #define EXT3_MOUNT_NO_UID32		0x2000  /* Disable 32-bit UIDs */
+ #define EXT3_MOUNT_INDEX		0x4000  /* Enable directory index */
++#define EXT3_MOUNT_IOPEN		0x8000	/* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV		0x10000	/* Make iopen world-readable */
+ #define EXT3_MOUNT_ASYNCDEL		0x20000	/* Delayed deletion */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+
+_
diff --git a/lustre/kernel_patches/patches/iopen-2.6.0.patch b/lustre/kernel_patches/patches/iopen-2.6.0.patch
new file mode 100644
index 0000000..af67758
--- /dev/null
+++ b/lustre/kernel_patches/patches/iopen-2.6.0.patch
@@ -0,0 +1,403 @@
+ Documentation/filesystems/ext2.txt |   16 ++
+ fs/ext3/Makefile                   |    2 
+ fs/ext3/inode.c                    |    3 
+ fs/ext3/iopen.c                    |  239 +++++++++++++++++++++++++++++++++++++
+ fs/ext3/iopen.h                    |   15 ++
+ fs/ext3/namei.c                    |   13 ++
+ fs/ext3/super.c                    |   11 +
+ include/linux/ext3_fs.h            |    2 
+ 8 files changed, 300 insertions(+), 1 deletion(-)
+
+--- linux-2.6.0-test1/Documentation/filesystems/ext2.txt~iopen-2.6.0	2002-11-11 06:28:06.000000000 +0300
++++ linux-2.6.0-test1-alexey/Documentation/filesystems/ext2.txt	2003-08-24 13:02:02.000000000 +0400
+@@ -35,6 +35,22 @@ resgid=n			The group ID which may use th
+ 
+ sb=n				Use alternate superblock at this location.
+ 
++iopen				Makes an invisible pseudo-directory called 
++				__iopen__ available in the root directory
++				of the filesystem.  Allows open-by-inode-
++				number.  i.e., inode 3145 can be accessed
++				via /mntpt/__iopen__/3145
++
++iopen_nopriv			This option makes the iopen directory be
++				world-readable.  This may be safer since it
++				allows daemons to run as an unprivileged user,
++				however it significantly changes the security
++				model of a Unix filesystem, since previously
++				all files under a mode 700 directory were not
++				generally avilable even if the
++				permissions on the file itself is
++				world-readable.
++
+ grpquota,noquota,quota,usrquota	Quota options are silently ignored by ext2.
+ 
+ 
+--- linux-2.6.0-test1/fs/ext3/inode.c~iopen-2.6.0	2003-08-24 13:00:36.000000000 +0400
++++ linux-2.6.0-test1-alexey/fs/ext3/inode.c	2003-08-24 13:02:02.000000000 +0400
+@@ -37,6 +37,7 @@
+ #include <linux/mpage.h>
+ #include <linux/uio.h>
+ #include "xattr.h"
++#include "iopen.h"
+ #include "acl.h"
+ 
+ /*
+@@ -2477,6 +2478,8 @@ void ext3_read_inode(struct inode * inod
+ 	ei->i_acl = EXT3_ACL_NOT_CACHED;
+ 	ei->i_default_acl = EXT3_ACL_NOT_CACHED;
+ #endif
++	if (ext3_iopen_get_inode(inode))
++		return;
+ 	if (ext3_get_inode_loc(inode, &iloc, 0))
+ 		goto bad_inode;
+ 	bh = iloc.bh;
+--- /dev/null	2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.0-test1-alexey/fs/ext3/iopen.c	2003-08-24 13:02:02.000000000 +0400
+@@ -0,0 +1,239 @@
++
++
++/*
++ * linux/fs/ext3/iopen.c
++ *
++ * Special support for open by inode number
++ *
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ * 
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++#include <linux/sched.h>
++#include <linux/fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/smp_lock.h>
++#include "iopen.h"
++
++#ifndef assert
++#define assert(test) J_ASSERT(test)
++#endif
++
++#define IOPEN_NAME_LEN	32
++
++/*
++ * This implements looking up an inode by number.
++ */
++static struct dentry *iopen_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
++{
++	struct inode * inode;
++	unsigned long ino;
++        struct list_head *lp;
++        struct dentry *alternate;
++	char buf[IOPEN_NAME_LEN];
++	
++	if (dentry->d_name.len >= IOPEN_NAME_LEN)
++		return ERR_PTR(-ENAMETOOLONG);
++
++	memcpy(buf, dentry->d_name.name, dentry->d_name.len);
++	buf[dentry->d_name.len] = 0;
++
++	if (strcmp(buf, ".") == 0)
++		ino = dir->i_ino;
++	else if (strcmp(buf, "..") == 0)
++		ino = EXT3_ROOT_INO;
++	else
++		ino = simple_strtoul(buf, 0, 0);
++
++	if ((ino != EXT3_ROOT_INO &&
++	     //ino != EXT3_ACL_IDX_INO &&
++	     //ino != EXT3_ACL_DATA_INO &&
++	     ino < EXT3_FIRST_INO(dir->i_sb)) ||
++	    ino > le32_to_cpu(EXT3_SB(dir->i_sb)->s_es->s_inodes_count))
++		return ERR_PTR(-ENOENT);
++
++	inode = iget(dir->i_sb, ino);
++	if (!inode)
++		return ERR_PTR(-EACCES);
++	if (is_bad_inode(inode)) {
++		iput(inode);
++		return ERR_PTR(-ENOENT);
++	}
++
++        /* preferrably return a connected dentry */
++        spin_lock(&dcache_lock);
++        list_for_each(lp, &inode->i_dentry) {
++                alternate = list_entry(lp, struct dentry, d_alias);
++                assert(!(alternate->d_flags & DCACHE_DISCONNECTED));
++        }
++
++        if (!list_empty(&inode->i_dentry)) {
++                alternate = list_entry(inode->i_dentry.next, 
++                                       struct dentry, d_alias);
++                dget_locked(alternate);
++                alternate->d_vfs_flags |= DCACHE_REFERENCED;
++                iput(inode);
++                spin_unlock(&dcache_lock);
++                return alternate;
++        }
++        dentry->d_flags |= DCACHE_DISCONNECTED;
++        spin_unlock(&dcache_lock);
++
++	d_add(dentry, inode);
++	return NULL;
++}
++
++#define do_switch(x,y) do { \
++	__typeof__ (x) __tmp = x; \
++	x = y; y = __tmp; } while (0)
++
++static inline void switch_names(struct dentry * dentry, struct dentry * target)
++{
++	const unsigned char *old_name, *new_name;
++
++	memcpy(dentry->d_iname, target->d_iname, DNAME_INLINE_LEN); 
++	old_name = target->d_name.name;
++	new_name = dentry->d_name.name;
++	if (old_name == target->d_iname)
++		old_name = dentry->d_iname;
++	if (new_name == dentry->d_iname)
++		new_name = target->d_iname;
++	target->d_name.name = new_name;
++	dentry->d_name.name = old_name;
++}
++
++
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode)
++{
++        struct dentry *tmp, *goal = NULL;
++        struct list_head *lp;
++
++        /* preferrably return a connected dentry */
++        spin_lock(&dcache_lock);
++        /* verify this dentry is really new */
++        assert(!de->d_inode);
++        assert(list_empty(&de->d_subdirs));
++        assert(list_empty(&de->d_alias));
++
++
++        list_for_each(lp, &inode->i_dentry) {
++                tmp = list_entry(lp, struct dentry, d_alias);
++                if (tmp->d_flags & DCACHE_DISCONNECTED) {
++                        assert(tmp->d_alias.next == &inode->i_dentry);
++                        assert(tmp->d_alias.prev == &inode->i_dentry);
++                        goal = tmp;
++                        dget_locked(goal);
++                        break;
++                }
++        }
++
++        if (!goal) { 
++                spin_unlock(&dcache_lock);
++                return NULL; 
++        }
++
++        /* Move the goal to the de hash queue */
++        goal->d_flags &= ~DCACHE_DISCONNECTED;
++	hlist_add_before(&goal->d_hash, &de->d_hash);
++	hlist_del(&goal->d_hash);
++
++	list_del(&goal->d_child);
++	list_del(&de->d_child);
++
++	/* Switch the parents and the names.. */
++	switch_names(goal, de);
++	do_switch(goal->d_parent, de->d_parent);
++	do_switch(goal->d_name.len, de->d_name.len);
++	do_switch(goal->d_name.hash, de->d_name.hash);
++
++	/* And add them back to the (new) parent lists */
++	list_add(&goal->d_child, &goal->d_parent->d_subdirs);
++	list_add(&de->d_child, &de->d_parent->d_subdirs);
++
++        spin_unlock(&dcache_lock);
++        return goal;
++}
++
++/*
++ * These are the special structures for the iopen pseudo directory.
++ */
++
++static struct inode_operations iopen_inode_operations = {
++	lookup:		iopen_lookup,		/* BKL held */
++};
++
++static struct file_operations iopen_file_operations = {
++	read:		generic_read_dir,
++};
++
++static int match_dentry(struct dentry *dentry, const char *name)
++{
++	int	len;
++
++	len = strlen(name);
++	if (dentry->d_name.len != len)
++		return 0;
++	if (strncmp(dentry->d_name.name, name, len))
++		return 0;
++	return 1;
++}
++
++/*
++ * This function is spliced into ext3_lookup and returns 1 the file
++ * name is __iopen__ and dentry has been filled in appropriately.
++ */
++int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry)
++{
++	struct inode * inode;
++
++	if (dir->i_ino != EXT3_ROOT_INO ||
++	    !test_opt(dir->i_sb, IOPEN) ||
++	    !match_dentry(dentry, "__iopen__"))
++		return 0;
++
++	inode = iget(dir->i_sb, EXT3_BAD_INO);
++
++	if (!inode) 
++		return 0;
++	d_add(dentry, inode);
++	return 1;
++}
++
++/*
++ * This function is spliced into read_inode; it returns 1 if inode
++ * number is the one for /__iopen__, in which case the inode is filled
++ * in appropriately.  Otherwise, this fuction returns 0.
++ */
++int ext3_iopen_get_inode(struct inode * inode)
++{
++	if (inode->i_ino != EXT3_BAD_INO)
++		return 0;
++
++	inode->i_mode = S_IFDIR | S_IRUSR | S_IXUSR;
++	if (test_opt(inode->i_sb, IOPEN_NOPRIV))
++		inode->i_mode |= 0777;
++	inode->i_uid = 0;
++	inode->i_gid = 0;
++	inode->i_nlink = 1;
++	inode->i_size = 4096;
++	inode->i_atime = CURRENT_TIME;
++	inode->i_ctime = CURRENT_TIME;
++	inode->i_mtime = CURRENT_TIME;
++	EXT3_I(inode)->i_dtime = 0;
++	inode->i_blksize = PAGE_SIZE;	/* This is the optimal IO size
++					 * (for stat), not the fs block
++					 * size */  
++	inode->i_blocks = 0;
++	inode->i_version = 1;
++	inode->i_generation = 0;
++
++	inode->i_op = &iopen_inode_operations;
++	inode->i_fop = &iopen_file_operations;
++	inode->i_mapping->a_ops = 0;
++
++	return 1;
++}
+--- /dev/null	2003-01-30 13:24:37.000000000 +0300
++++ linux-2.6.0-test1-alexey/fs/ext3/iopen.h	2003-08-24 13:02:02.000000000 +0400
+@@ -0,0 +1,15 @@
++/*
++ * iopen.h
++ *
++ * Special support for opening files by inode number.
++ * 
++ * Copyright (C) 2001 by Theodore Ts'o (tytso@alum.mit.edu).
++ * 
++ * This file may be redistributed under the terms of the GNU General
++ * Public License.
++ */
++
++extern int ext3_check_for_iopen(struct inode * dir, struct dentry *dentry);
++extern int ext3_iopen_get_inode(struct inode * inode);
++
++
+--- linux-2.6.0-test1/fs/ext3/Makefile~iopen-2.6.0	2003-08-24 12:58:32.000000000 +0400
++++ linux-2.6.0-test1-alexey/fs/ext3/Makefile	2003-08-24 13:02:40.000000000 +0400
+@@ -5,7 +5,7 @@
+ obj-$(CONFIG_EXT3_FS) += ext3.o
+ 
+ ext3-y	:= balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+-	   ioctl.o namei.o super.o symlink.o hash.o
++	   iopen.o ioctl.o namei.o super.o symlink.o hash.o
+ 
+ ext3-$(CONFIG_EXT3_FS_XATTR)	 += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+--- linux-2.6.0-test1/fs/ext3/namei.c~iopen-2.6.0	2003-07-24 15:52:30.000000000 +0400
++++ linux-2.6.0-test1-alexey/fs/ext3/namei.c	2003-08-24 13:02:02.000000000 +0400
+@@ -37,6 +37,7 @@
+ #include <linux/buffer_head.h>
+ #include <linux/smp_lock.h>
+ #include "xattr.h"
++#include "iopen.h"
+ #include "acl.h"
+ 
+ /*
+@@ -970,15 +971,21 @@ errout:
+ }
+ #endif
+ 
++struct dentry *iopen_connect_dentry(struct dentry *de, struct inode *inode);
++ 
+ static struct dentry *ext3_lookup(struct inode * dir, struct dentry *dentry, struct nameidata *nd)
+ {
+ 	struct inode * inode;
+ 	struct ext3_dir_entry_2 * de;
+ 	struct buffer_head * bh;
++  	struct dentry *alternate = NULL;
+ 
+ 	if (dentry->d_name.len > EXT3_NAME_LEN)
+ 		return ERR_PTR(-ENAMETOOLONG);
+ 
++ 	if (ext3_check_for_iopen(dir, dentry))
++ 		return NULL;
++
+ 	bh = ext3_find_entry(dentry, &de);
+ 	inode = NULL;
+ 	if (bh) {
+@@ -991,6 +998,12 @@ static struct dentry *ext3_lookup(struct
+ 	}
+ 	if (inode)
+ 		return d_splice_alias(inode, dentry);
++
++	if (inode && (alternate = iopen_connect_dentry(dentry, inode))) {
++		iput(inode);
++		return alternate;
++	}
++
+ 	d_add(dentry, inode);
+ 	return NULL;
+ }
+--- linux-2.6.0-test1/fs/ext3/super.c~iopen-2.6.0	2003-08-24 13:00:36.000000000 +0400
++++ linux-2.6.0-test1-alexey/fs/ext3/super.c	2003-08-24 13:02:02.000000000 +0400
+@@ -755,6 +755,17 @@ static int parse_options (char * options
+ 		         || !strcmp (this_char, "quota")
+ 		         || !strcmp (this_char, "usrquota"))
+ 			/* Don't do anything ;-) */ ;
++		else if (!strcmp (this_char, "iopen")) {
++			set_opt (sbi->s_mount_opt, IOPEN);
++			clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++		} else if (!strcmp (this_char, "noiopen")) {
++			clear_opt (sbi->s_mount_opt, IOPEN);
++			clear_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++		}
++		else if (!strcmp (this_char, "iopen_nopriv")) {
++			set_opt (sbi->s_mount_opt, IOPEN);
++			set_opt (sbi->s_mount_opt, IOPEN_NOPRIV);
++		}
+ 		else if (!strcmp (this_char, "journal")) {
+ 			/* @@@ FIXME */
+ 			/* Eventually we will want to be able to create
+--- linux-2.6.0-test1/include/linux/ext3_fs.h~iopen-2.6.0	2003-08-24 12:58:57.000000000 +0400
++++ linux-2.6.0-test1-alexey/include/linux/ext3_fs.h	2003-08-24 13:02:02.000000000 +0400
+@@ -324,6 +324,8 @@ struct ext3_inode {
+ #define EXT3_MOUNT_NO_UID32		0x2000  /* Disable 32-bit UIDs */
+ #define EXT3_MOUNT_XATTR_USER		0x4000	/* Extended user attributes */
+ #define EXT3_MOUNT_POSIX_ACL		0x8000	/* POSIX Access Control Lists */
++#define EXT3_MOUNT_IOPEN	       0x10000  /* Allow access via iopen */
++#define EXT3_MOUNT_IOPEN_NOPRIV	       0x20000  /* Make iopen world-readable */
+ 
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+
+_
diff --git a/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26-2.patch b/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26-2.patch
new file mode 100644
index 0000000..c7d06a8
--- /dev/null
+++ b/lustre/kernel_patches/patches/linux-2.4.18ea-0.8.26-2.patch
@@ -0,0 +1,1775 @@
+ fs/ext3/Makefile           |    4 
+ fs/ext3/ext3-exports.c     |   13 
+ fs/ext3/ialloc.c           |    2 
+ fs/ext3/inode.c            |   29 -
+ fs/ext3/namei.c            |    8 
+ fs/ext3/super.c            |   23 
+ fs/ext3/xattr.c            | 1242 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h    |   46 -
+ include/linux/ext3_jbd.h   |    8 
+ include/linux/ext3_xattr.h |  155 +++++
+ include/linux/xattr.h      |   15 
+ 11 files changed, 1494 insertions(+), 51 deletions(-)
+
+--- /dev/null	2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/fs/ext3/ext3-exports.c	2003-09-01 14:55:39.000000000 +0400
+@@ -0,0 +1,13 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
++
++EXPORT_SYMBOL(ext3_force_commit);
++EXPORT_SYMBOL(ext3_bread);
++EXPORT_SYMBOL(ext3_xattr_register);
++EXPORT_SYMBOL(ext3_xattr_unregister);
++EXPORT_SYMBOL(ext3_xattr_get);
++EXPORT_SYMBOL(ext3_xattr_list);
++EXPORT_SYMBOL(ext3_xattr_set);
+--- linux-2.4.18/fs/ext3/ialloc.c~linux-2.4.18ea-0.8.26-2	2003-07-28 17:52:04.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/ialloc.c	2003-09-01 14:55:39.000000000 +0400
+@@ -17,6 +17,7 @@
+ #include <linux/jbd.h>
+ #include <linux/ext3_fs.h>
+ #include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
+ #include <linux/stat.h>
+ #include <linux/string.h>
+ #include <linux/locks.h>
+@@ -216,6 +217,7 @@ void ext3_free_inode (handle_t *handle, 
+ 	 * as writing the quota to disk may need the lock as well.
+ 	 */
+ 	DQUOT_INIT(inode);
++	ext3_xattr_drop_inode(handle, inode);
+ 	DQUOT_FREE_INODE(inode);
+ 	DQUOT_DROP(inode);
+ 
+--- linux-2.4.18/fs/ext3/inode.c~linux-2.4.18ea-0.8.26-2	2003-07-28 17:52:04.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/inode.c	2003-09-01 14:55:39.000000000 +0400
+@@ -39,6 +39,18 @@
+  */
+ #undef SEARCH_FROM_ZERO
+ 
++/*
++ * Test whether an inode is a fast symlink.
++ */
++static inline int ext3_inode_is_fast_symlink(struct inode *inode)
++{
++	int ea_blocks = EXT3_I(inode)->i_file_acl ?
++		(inode->i_sb->s_blocksize >> 9) : 0;
++
++	return (S_ISLNK(inode->i_mode) &&
++		inode->i_blocks - ea_blocks == 0);
++}
++
+ /* The ext3 forget function must perform a revoke if we are freeing data
+  * which has been journaled.  Metadata (eg. indirect blocks) must be
+  * revoked in all cases. 
+@@ -48,7 +60,7 @@
+  * still needs to be revoked.
+  */
+ 
+-static int ext3_forget(handle_t *handle, int is_metadata,
++int ext3_forget(handle_t *handle, int is_metadata,
+ 		       struct inode *inode, struct buffer_head *bh,
+ 		       int blocknr)
+ {
+@@ -164,9 +176,7 @@ void ext3_delete_inode (struct inode * i
+ {
+ 	handle_t *handle;
+ 	
+-	if (is_bad_inode(inode) ||
+-	    inode->i_ino == EXT3_ACL_IDX_INO ||
+-	    inode->i_ino == EXT3_ACL_DATA_INO)
++	if (is_bad_inode(inode))
+ 		goto no_delete;
+ 
+ 	lock_kernel();
+@@ -1877,6 +1887,8 @@ void ext3_truncate(struct inode * inode)
+ 	if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
+ 	    S_ISLNK(inode->i_mode)))
+ 		return;
++	if (ext3_inode_is_fast_symlink(inode))
++		return;
+ 	if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
+ 		return;
+ 
+@@ -2038,8 +2050,6 @@ int ext3_get_inode_loc (struct inode *in
+ 	struct ext3_group_desc * gdp;
+ 		
+ 	if ((inode->i_ino != EXT3_ROOT_INO &&
+-		inode->i_ino != EXT3_ACL_IDX_INO &&
+-		inode->i_ino != EXT3_ACL_DATA_INO &&
+ 		inode->i_ino != EXT3_JOURNAL_INO &&
+ 		inode->i_ino < EXT3_FIRST_INO(inode->i_sb)) ||
+ 		inode->i_ino > le32_to_cpu(
+@@ -2166,10 +2176,7 @@ void ext3_read_inode(struct inode * inod
+ 
+ 	brelse (iloc.bh);
+ 
+-	if (inode->i_ino == EXT3_ACL_IDX_INO ||
+-	    inode->i_ino == EXT3_ACL_DATA_INO)
+-		/* Nothing to do */ ;
+-	else if (S_ISREG(inode->i_mode)) {
++	if (S_ISREG(inode->i_mode)) {
+ 		inode->i_op = &ext3_file_inode_operations;
+ 		inode->i_fop = &ext3_file_operations;
+ 		inode->i_mapping->a_ops = &ext3_aops;
+@@ -2177,7 +2184,7 @@ void ext3_read_inode(struct inode * inod
+ 		inode->i_op = &ext3_dir_inode_operations;
+ 		inode->i_fop = &ext3_dir_operations;
+ 	} else if (S_ISLNK(inode->i_mode)) {
+-		if (!inode->i_blocks)
++		if (ext3_inode_is_fast_symlink(inode))
+ 			inode->i_op = &ext3_fast_symlink_inode_operations;
+ 		else {
+ 			inode->i_op = &page_symlink_inode_operations;
+--- linux-2.4.18/fs/ext3/Makefile~linux-2.4.18ea-0.8.26-2	2003-08-29 16:53:17.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/Makefile	2003-09-01 14:55:50.000000000 +0400
+@@ -9,10 +9,10 @@
+ 
+ O_TARGET := ext3.o
+ 
+-export-objs :=	super.o inode.o
++export-objs :=	ext3-exports.o
+ 
+ obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
+-		ioctl.o namei.o super.o symlink.o hash.o
++		ioctl.o namei.o super.o symlink.o xattr.o hash.o ext3-exports.o
+ obj-m    := $(O_TARGET)
+ 
+ include $(TOPDIR)/Rules.make
+--- linux-2.4.18/fs/ext3/namei.c~linux-2.4.18ea-0.8.26-2	2003-09-01 11:50:59.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/namei.c	2003-09-01 14:55:39.000000000 +0400
+@@ -29,6 +29,7 @@
+ #include <linux/sched.h>
+ #include <linux/ext3_fs.h>
+ #include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
+ #include <linux/fcntl.h>
+ #include <linux/stat.h>
+ #include <linux/string.h>
+@@ -1524,6 +1525,7 @@ static int ext3_add_nondir(handle_t *han
+ 		d_instantiate(dentry, inode);
+ 		return 0;
+ 	}
++	ext3_xattr_drop_inode(handle, inode);
+ 	ext3_dec_count(handle, inode);
+ 	iput(inode);
+ 	return err;
+@@ -1612,7 +1614,7 @@ static int ext3_mkdir(struct inode * dir
+ 	if (IS_SYNC(dir))
+ 		handle->h_sync = 1;
+ 
+-	inode = ext3_new_inode (handle, dir, S_IFDIR);
++	inode = ext3_new_inode (handle, dir, S_IFDIR | mode);
+ 	err = PTR_ERR(inode);
+ 	if (IS_ERR(inode))
+ 		goto out_stop;
+@@ -1620,7 +1622,6 @@ static int ext3_mkdir(struct inode * dir
+ 	inode->i_op = &ext3_dir_inode_operations;
+ 	inode->i_fop = &ext3_dir_operations;
+ 	inode->i_size = EXT3_I(inode)->i_disksize = inode->i_sb->s_blocksize;
+-	inode->i_blocks = 0;	
+ 	dir_block = ext3_bread (handle, inode, 0, 1, &err);
+ 	if (!dir_block) {
+ 		inode->i_nlink--; /* is this nlink == 0? */
+@@ -1647,9 +1648,6 @@ static int ext3_mkdir(struct inode * dir
+ 	BUFFER_TRACE(dir_block, "call ext3_journal_dirty_metadata");
+ 	ext3_journal_dirty_metadata(handle, dir_block);
+ 	brelse (dir_block);
+-	inode->i_mode = S_IFDIR | mode;
+-	if (dir->i_mode & S_ISGID)
+-		inode->i_mode |= S_ISGID;
+ 	ext3_mark_inode_dirty(handle, inode);
+ 	err = ext3_add_entry (handle, dentry, inode);
+ 	if (err) {
+--- linux-2.4.18/fs/ext3/super.c~linux-2.4.18ea-0.8.26-2	2003-08-29 16:53:17.000000000 +0400
++++ linux-2.4.18-alexey/fs/ext3/super.c	2003-09-01 14:55:39.000000000 +0400
+@@ -24,6 +24,7 @@
+ #include <linux/jbd.h>
+ #include <linux/ext3_fs.h>
+ #include <linux/ext3_jbd.h>
++#include <linux/ext3_xattr.h>
+ #include <linux/slab.h>
+ #include <linux/init.h>
+ #include <linux/locks.h>
+@@ -406,6 +407,7 @@ void ext3_put_super (struct super_block 
+ 	kdev_t j_dev = sbi->s_journal->j_dev;
+ 	int i;
+ 
++	ext3_xattr_put_super(sb);
+ 	journal_destroy(sbi->s_journal);
+ 	if (!(sb->s_flags & MS_RDONLY)) {
+ 		EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
+@@ -1743,18 +1745,27 @@ int ext3_statfs (struct super_block * sb
+ 
+ static DECLARE_FSTYPE_DEV(ext3_fs_type, "ext3", ext3_read_super);
+ 
+-static int __init init_ext3_fs(void)
++static void exit_ext3_fs(void)
+ {
+-        return register_filesystem(&ext3_fs_type);
++	unregister_filesystem(&ext3_fs_type);
++	exit_ext3_xattr_user();
++	exit_ext3_xattr();
+ }
+ 
+-static void __exit exit_ext3_fs(void)
++static int __init init_ext3_fs(void)
+ {
+-	unregister_filesystem(&ext3_fs_type);
++	int error = init_ext3_xattr();
++	if (!error)
++		error = init_ext3_xattr_user();
++	if (!error)
++		error = register_filesystem(&ext3_fs_type);
++	if (!error)
++		return 0;
++
++	exit_ext3_fs();
++	return error;
+ }
+ 
+-EXPORT_SYMBOL(ext3_force_commit);
+-EXPORT_SYMBOL(ext3_bread);
+ 
+ MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
+ MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
+--- /dev/null	2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/fs/ext3/xattr.c	2003-09-01 14:55:39.000000000 +0400
+@@ -0,0 +1,1242 @@
++/*
++ * linux/fs/ext3/xattr.c
++ *
++ * Copyright (C) 2001 by Andreas Gruenbacher, <a.gruenbacher@computer.org>
++ *
++ * Fix by Harrison Xing <harrison@mountainviewdata.com>.
++ * Ext3 code with a lot of help from Eric Jarman <ejarman@acm.org>.
++ * Extended attributes for symlinks and special files added per
++ *  suggestion of Luka Renko <luka.renko@hermes.si>.
++ */
++
++/*
++ * Extended attributes are stored on disk blocks allocated outside of
++ * any inode. The i_file_acl field is then made to point to this allocated
++ * block. If all extended attributes of an inode are identical, these
++ * inodes may share the same extended attribute block. Such situations
++ * are automatically detected by keeping a cache of recent attribute block
++ * numbers and hashes over the block's contents in memory.
++ *
++ *
++ * Extended attribute block layout:
++ *
++ *   +------------------+
++ *   | header           |
++ *   ¦ entry 1          | |
++ *   | entry 2          | | growing downwards
++ *   | entry 3          | v
++ *   | four null bytes  |
++ *   | . . .            |
++ *   | value 1          | ^
++ *   | value 3          | | growing upwards
++ *   | value 2          | |
++ *   +------------------+
++ *
++ * The block header is followed by multiple entry descriptors. These entry
++ * descriptors are variable in size, and alligned to EXT3_XATTR_PAD
++ * byte boundaries. The entry descriptors are sorted by attribute name,
++ * so that two extended attribute blocks can be compared efficiently.
++ *
++ * Attribute values are aligned to the end of the block, stored in
++ * no specific order. They are also padded to EXT3_XATTR_PAD byte
++ * boundaries. No additional gaps are left between them.
++ *
++ * Locking strategy
++ * ----------------
++ * The VFS already holds the BKL and the inode->i_sem semaphore when any of
++ * the xattr inode operations are called, so we are guaranteed that only one
++ * processes accesses extended attributes of an inode at any time.
++ *
++ * For writing we also grab the ext3_xattr_sem semaphore. This ensures that
++ * only a single process is modifying an extended attribute block, even
++ * if the block is shared among inodes.
++ *
++ * Note for porting to 2.5
++ * -----------------------
++ * The BKL will no longer be held in the xattr inode operations.
++ */
++
++#include <linux/fs.h>
++#include <linux/locks.h>
++#include <linux/slab.h>
++#include <linux/ext3_jbd.h>
++#include <linux/ext3_fs.h>
++#include <linux/ext3_xattr.h>
++#ifdef CONFIG_EXT3_FS_XATTR_SHARING
++#include <linux/mbcache.h>
++#endif
++#include <linux/quotaops.h>
++#include <asm/semaphore.h>
++#include <linux/compatmac.h>
++#include <linux/module.h>
++
++/* These symbols may be needed by a module. */
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
++# define mark_buffer_dirty(bh) mark_buffer_dirty(bh, 1)
++#endif
++
++#define HDR(bh) ((struct ext3_xattr_header *)((bh)->b_data))
++#define ENTRY(ptr) ((struct ext3_xattr_entry *)(ptr))
++#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
++#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)
++
++#ifdef EXT3_XATTR_DEBUG
++# define ea_idebug(inode, f...) do { \
++		printk(KERN_DEBUG "inode %s:%ld: ", \
++			kdevname(inode->i_dev), inode->i_ino); \
++		printk(f); \
++		printk("\n"); \
++	} while (0)
++# define ea_bdebug(bh, f...) do { \
++		printk(KERN_DEBUG "block %s:%ld: ", \
++			kdevname(bh->b_dev), bh->b_blocknr); \
++		printk(f); \
++		printk("\n"); \
++	} while (0)
++#else
++# define ea_idebug(f...)
++# define ea_bdebug(f...)
++#endif
++
++static int ext3_xattr_set2(handle_t *, struct inode *, struct buffer_head *,
++			   struct ext3_xattr_header *);
++
++#ifdef CONFIG_EXT3_FS_XATTR_SHARING
++
++static int ext3_xattr_cache_insert(struct buffer_head *);
++static struct buffer_head *ext3_xattr_cache_find(struct inode *,
++						 struct ext3_xattr_header *);
++static void ext3_xattr_cache_remove(struct buffer_head *);
++static void ext3_xattr_rehash(struct ext3_xattr_header *,
++			      struct ext3_xattr_entry *);
++
++static struct mb_cache *ext3_xattr_cache;
++
++#else
++# define ext3_xattr_cache_insert(bh) 0
++# define ext3_xattr_cache_find(inode, header) NULL
++# define ext3_xattr_cache_remove(bh) do {} while(0)
++# define ext3_xattr_rehash(header, entry) do {} while(0)
++#endif
++
++/*
++ * If a file system does not share extended attributes among inodes,
++ * we should not need the ext3_xattr_sem semaphore. However, the
++ * filesystem may still contain shared blocks, so we always take
++ * the lock.
++ */
++
++DECLARE_MUTEX(ext3_xattr_sem);
++
++static inline void
++ext3_xattr_lock(void)
++{
++	down(&ext3_xattr_sem);
++}
++
++static inline void
++ext3_xattr_unlock(void)
++{
++	up(&ext3_xattr_sem);
++}
++
++static inline int
++ext3_xattr_new_block(handle_t *handle, struct inode *inode,
++		     int * errp, int force)
++{
++	struct super_block *sb = inode->i_sb;
++	int goal = le32_to_cpu(EXT3_SB(sb)->s_es->s_first_data_block) +
++		EXT3_I(inode)->i_block_group * EXT3_BLOCKS_PER_GROUP(sb);
++
++	/* How can we enforce the allocation? */
++	int block = ext3_new_block(handle, inode, goal, 0, 0, errp);
++#ifdef OLD_QUOTAS
++	if (!*errp)
++		inode->i_blocks += inode->i_sb->s_blocksize >> 9;
++#endif
++	return block;
++}
++
++static inline int
++ext3_xattr_quota_alloc(struct inode *inode, int force)
++{
++	/* How can we enforce the allocation? */
++#ifdef OLD_QUOTAS
++	int error = DQUOT_ALLOC_BLOCK(inode->i_sb, inode, 1);
++	if (!error)
++		inode->i_blocks += inode->i_sb->s_blocksize >> 9;
++#else
++	int error = DQUOT_ALLOC_BLOCK(inode, 1);
++#endif
++	return error;
++}
++
++#ifdef OLD_QUOTAS
++
++static inline void
++ext3_xattr_quota_free(struct inode *inode)
++{
++	DQUOT_FREE_BLOCK(inode->i_sb, inode, 1);
++	inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
++}
++
++static inline void
++ext3_xattr_free_block(handle_t *handle, struct inode * inode,
++		      unsigned long block)
++{
++	ext3_free_blocks(handle, inode, block, 1);
++	inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
++}
++
++#else
++# define ext3_xattr_quota_free(inode) \
++	DQUOT_FREE_BLOCK(inode, 1)
++# define ext3_xattr_free_block(handle, inode, block) \
++	ext3_free_blocks(handle, inode, block, 1)
++#endif
++
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)
++
++static inline struct buffer_head *
++sb_bread(struct super_block *sb, int block)
++{
++	return bread(sb->s_dev, block, sb->s_blocksize);
++}
++
++static inline struct buffer_head *
++sb_getblk(struct super_block *sb, int block)
++{
++	return getblk(sb->s_dev, block, sb->s_blocksize);
++}
++
++#endif
++
++struct ext3_xattr_handler *ext3_xattr_handlers[EXT3_XATTR_INDEX_MAX];
++rwlock_t ext3_handler_lock = RW_LOCK_UNLOCKED;
++
++int
++ext3_xattr_register(int name_index, struct ext3_xattr_handler *handler)
++{
++	int error = -EINVAL;
++
++	if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
++		write_lock(&ext3_handler_lock);
++		if (!ext3_xattr_handlers[name_index-1]) {
++			ext3_xattr_handlers[name_index-1] = handler;
++			error = 0;
++		}
++		write_unlock(&ext3_handler_lock);
++	}
++	return error;
++}
++
++void
++ext3_xattr_unregister(int name_index, struct ext3_xattr_handler *handler)
++{
++	if (name_index > 0 || name_index <= EXT3_XATTR_INDEX_MAX) {
++		write_lock(&ext3_handler_lock);
++		ext3_xattr_handlers[name_index-1] = NULL;
++		write_unlock(&ext3_handler_lock);
++	}
++}
++
++static inline const char *
++strcmp_prefix(const char *a, const char *a_prefix)
++{
++	while (*a_prefix && *a == *a_prefix) {
++		a++;
++		a_prefix++;
++	}
++	return *a_prefix ? NULL : a;
++}
++
++/*
++ * Decode the extended attribute name, and translate it into
++ * the name_index and name suffix.
++ */
++static inline struct ext3_xattr_handler *
++ext3_xattr_resolve_name(const char **name)
++{
++	struct ext3_xattr_handler *handler = NULL;
++	int i;
++
++	if (!*name)
++		return NULL;
++	read_lock(&ext3_handler_lock);
++	for (i=0; i<EXT3_XATTR_INDEX_MAX; i++) {
++		if (ext3_xattr_handlers[i]) {
++			const char *n = strcmp_prefix(*name,
++				ext3_xattr_handlers[i]->prefix);
++			if (n) {
++				handler = ext3_xattr_handlers[i];
++				*name = n;
++				break;
++			}
++		}
++	}
++	read_unlock(&ext3_handler_lock);
++	return handler;
++}
++
++static inline struct ext3_xattr_handler *
++ext3_xattr_handler(int name_index)
++{
++	struct ext3_xattr_handler *handler = NULL;
++	if (name_index > 0 && name_index <= EXT3_XATTR_INDEX_MAX) {
++		read_lock(&ext3_handler_lock);
++		handler = ext3_xattr_handlers[name_index-1];
++		read_unlock(&ext3_handler_lock);
++	}
++	return handler;
++}
++
++/*
++ * Inode operation getxattr()
++ *
++ * dentry->d_inode->i_sem down
++ * BKL held [before 2.5.x]
++ */
++ssize_t
++ext3_getxattr(struct dentry *dentry, const char *name,
++	      void *buffer, size_t size)
++{
++	struct ext3_xattr_handler *handler;
++	struct inode *inode = dentry->d_inode;
++
++	handler = ext3_xattr_resolve_name(&name);
++	if (!handler)
++		return -ENOTSUP;
++	return handler->get(inode, name, buffer, size);
++}
++
++/*
++ * Inode operation listxattr()
++ *
++ * dentry->d_inode->i_sem down
++ * BKL held [before 2.5.x]
++ */
++ssize_t
++ext3_listxattr(struct dentry *dentry, char *buffer, size_t size)
++{
++	return ext3_xattr_list(dentry->d_inode, buffer, size);
++}
++
++/*
++ * Inode operation setxattr()
++ *
++ * dentry->d_inode->i_sem down
++ * BKL held [before 2.5.x]
++ */
++int
++ext3_setxattr(struct dentry *dentry, const char *name,
++	      void *value, size_t size, int flags)
++{
++	struct ext3_xattr_handler *handler;
++	struct inode *inode = dentry->d_inode;
++
++	if (size == 0)
++		value = "";  /* empty EA, do not remove */
++	handler = ext3_xattr_resolve_name(&name);
++	if (!handler)
++		return -ENOTSUP;
++	return handler->set(inode, name, value, size, flags);
++}
++
++/*
++ * Inode operation removexattr()
++ *
++ * dentry->d_inode->i_sem down
++ * BKL held [before 2.5.x]
++ */
++int
++ext3_removexattr(struct dentry *dentry, const char *name)
++{
++	struct ext3_xattr_handler *handler;
++	struct inode *inode = dentry->d_inode;
++
++	handler = ext3_xattr_resolve_name(&name);
++	if (!handler)
++		return -ENOTSUP;
++	return handler->set(inode, name, NULL, 0, XATTR_REPLACE);
++}
++
++/*
++ * ext3_xattr_get()
++ *
++ * Copy an extended attribute into the buffer
++ * provided, or compute the buffer size required.
++ * Buffer is NULL to compute the size of the buffer required.
++ *
++ * Returns a negative error number on failure, or the number of bytes
++ * used / required on success.
++ */
++int
++ext3_xattr_get(struct inode *inode, int name_index, const char *name,
++	       void *buffer, size_t buffer_size)
++{
++	struct buffer_head *bh = NULL;
++	struct ext3_xattr_entry *entry;
++	unsigned int block, size;
++	char *end;
++	int name_len, error;
++
++	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
++		  name_index, name, buffer, (long)buffer_size);
++
++	if (name == NULL)
++		return -EINVAL;
++	if (!EXT3_I(inode)->i_file_acl)
++		return -ENOATTR;
++	block = EXT3_I(inode)->i_file_acl;
++	ea_idebug(inode, "reading block %d", block);
++	bh = sb_bread(inode->i_sb, block);
++	if (!bh)
++		return -EIO;
++	ea_bdebug(bh, "b_count=%d, refcount=%d",
++		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
++	end = bh->b_data + bh->b_size;
++	if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
++	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
++bad_block:	ext3_error(inode->i_sb, "ext3_xattr_get",
++			"inode %ld: bad block %d", inode->i_ino, block);
++		error = -EIO;
++		goto cleanup;
++	}
++	/* find named attribute */
++	name_len = strlen(name);
++
++	error = -ERANGE;
++	if (name_len > 255)
++		goto cleanup;
++	entry = FIRST_ENTRY(bh);
++	while (!IS_LAST_ENTRY(entry)) {
++		struct ext3_xattr_entry *next =
++			EXT3_XATTR_NEXT(entry);
++		if ((char *)next >= end)
++			goto bad_block;
++		if (name_index == entry->e_name_index &&
++		    name_len == entry->e_name_len &&
++		    memcmp(name, entry->e_name, name_len) == 0)
++			goto found;
++		entry = next;
++	}
++	/* Check the remaining name entries */
++	while (!IS_LAST_ENTRY(entry)) {
++		struct ext3_xattr_entry *next =
++			EXT3_XATTR_NEXT(entry);
++		if ((char *)next >= end)
++			goto bad_block;
++		entry = next;
++	}
++	if (ext3_xattr_cache_insert(bh))
++		ea_idebug(inode, "cache insert failed");
++	error = -ENOATTR;
++	goto cleanup;
++found:
++	/* check the buffer size */
++	if (entry->e_value_block != 0)
++		goto bad_block;
++	size = le32_to_cpu(entry->e_value_size);
++	if (size > inode->i_sb->s_blocksize ||
++	    le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
++		goto bad_block;
++
++	if (ext3_xattr_cache_insert(bh))
++		ea_idebug(inode, "cache insert failed");
++	if (buffer) {
++		error = -ERANGE;
++		if (size > buffer_size)
++			goto cleanup;
++		/* return value of attribute */
++		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
++			size);
++	}
++	error = size;
++
++cleanup:
++	brelse(bh);
++
++	return error;
++}
++
++/*
++ * ext3_xattr_list()
++ *
++ * Copy a list of attribute names into the buffer
++ * provided, or compute the buffer size required.
++ * Buffer is NULL to compute the size of the buffer required.
++ *
++ * Returns a negative error number on failure, or the number of bytes
++ * used / required on success.
++ */
++int
++ext3_xattr_list(struct inode *inode, char *buffer, size_t buffer_size)
++{
++	struct buffer_head *bh = NULL;
++	struct ext3_xattr_entry *entry;
++	unsigned int block, size = 0;
++	char *buf, *end;
++	int error;
++
++	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
++		  buffer, (long)buffer_size);
++
++	if (!EXT3_I(inode)->i_file_acl)
++		return 0;
++	block = EXT3_I(inode)->i_file_acl;
++	ea_idebug(inode, "reading block %d", block);
++	bh = sb_bread(inode->i_sb, block);
++	if (!bh)
++		return -EIO;
++	ea_bdebug(bh, "b_count=%d, refcount=%d",
++		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
++	end = bh->b_data + bh->b_size;
++	if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
++	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
++bad_block:	ext3_error(inode->i_sb, "ext3_xattr_list",
++			"inode %ld: bad block %d", inode->i_ino, block);
++		error = -EIO;
++		goto cleanup;
++	}
++	/* compute the size required for the list of attribute names */
++	for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
++	     entry = EXT3_XATTR_NEXT(entry)) {
++		struct ext3_xattr_handler *handler;
++		struct ext3_xattr_entry *next =
++			EXT3_XATTR_NEXT(entry);
++		if ((char *)next >= end)
++			goto bad_block;
++
++		handler = ext3_xattr_handler(entry->e_name_index);
++		if (handler) {
++			size += handler->list(NULL, inode, entry->e_name,
++					      entry->e_name_len) + 1;
++		}
++	}
++
++	if (ext3_xattr_cache_insert(bh))
++		ea_idebug(inode, "cache insert failed");
++	if (!buffer) {
++		error = size;
++		goto cleanup;
++	} else {
++		error = -ERANGE;
++		if (size > buffer_size)
++			goto cleanup;
++	}
++
++	/* list the attribute names */
++	buf = buffer;
++	for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
++	     entry = EXT3_XATTR_NEXT(entry)) {
++		struct ext3_xattr_handler *handler;
++
++		handler = ext3_xattr_handler(entry->e_name_index);
++		if (handler) {
++			buf += handler->list(buf, inode, entry->e_name,
++					     entry->e_name_len);
++			*buf++ = '\0';
++		}
++	}
++	error = size;
++
++cleanup:
++	brelse(bh);
++
++	return error;
++}
++
++/*
++ * If the EXT3_FEATURE_COMPAT_EXT_ATTR feature of this file system is
++ * not set, set it.
++ */
++static void ext3_xattr_update_super_block(handle_t *handle,
++					  struct super_block *sb)
++{
++	if (EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_EXT_ATTR))
++		return;
++
++	lock_super(sb);
++	ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
++#if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,0)
++	EXT3_SB(sb)->s_feature_compat |= EXT3_FEATURE_COMPAT_EXT_ATTR;
++#endif
++	EXT3_SB(sb)->s_es->s_feature_compat |=
++		cpu_to_le32(EXT3_FEATURE_COMPAT_EXT_ATTR);
++	sb->s_dirt = 1;
++	ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
++	unlock_super(sb);
++}
++
++/*
++ * ext3_xattr_set()
++ *
++ * Create, replace or remove an extended attribute for this inode. Buffer
++ * is NULL to remove an existing extended attribute, and non-NULL to
++ * either replace an existing extended attribute, or create a new extended
++ * attribute. The flags XATTR_REPLACE and XATTR_CREATE
++ * specify that an extended attribute must exist and must not exist
++ * previous to the call, respectively.
++ *
++ * Returns 0, or a negative error number on failure.
++ */
++int
++ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
++	       const char *name, void *value, size_t value_len, int flags)
++{
++	struct super_block *sb = inode->i_sb;
++	struct buffer_head *bh = NULL;
++	struct ext3_xattr_header *header = NULL;
++	struct ext3_xattr_entry *here, *last;
++	unsigned int name_len;
++	int min_offs = sb->s_blocksize, not_found = 1, free, error;
++	char *end;
++	
++	/*
++	 * header -- Points either into bh, or to a temporarily
++	 *           allocated buffer.
++	 * here -- The named entry found, or the place for inserting, within
++	 *         the block pointed to by header.
++	 * last -- Points right after the last named entry within the block
++	 *         pointed to by header.
++	 * min_offs -- The offset of the first value (values are aligned
++	 *             towards the end of the block).
++	 * end -- Points right after the block pointed to by header.
++	 */
++	
++	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
++		  name_index, name, value, (long)value_len);
++
++	if (IS_RDONLY(inode))
++		return -EROFS;
++	if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
++		return -EPERM;
++	if (value == NULL)
++		value_len = 0;
++	if (name == NULL)
++		return -EINVAL;
++	name_len = strlen(name);
++	if (name_len > 255 || value_len > sb->s_blocksize)
++		return -ERANGE;
++	ext3_xattr_lock();
++
++	if (EXT3_I(inode)->i_file_acl) {
++		/* The inode already has an extended attribute block. */
++		int block = EXT3_I(inode)->i_file_acl;
++
++		bh = sb_bread(sb, block);
++		error = -EIO;
++		if (!bh)
++			goto cleanup;
++		ea_bdebug(bh, "b_count=%d, refcount=%d",
++			atomic_read(&(bh->b_count)),
++			le32_to_cpu(HDR(bh)->h_refcount));
++		header = HDR(bh);
++		end = bh->b_data + bh->b_size;
++		if (header->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
++		    header->h_blocks != cpu_to_le32(1)) {
++bad_block:		ext3_error(sb, "ext3_xattr_set",
++				"inode %ld: bad block %d", inode->i_ino, block);
++			error = -EIO;
++			goto cleanup;
++		}
++		/* Find the named attribute. */
++		here = FIRST_ENTRY(bh);
++		while (!IS_LAST_ENTRY(here)) {
++			struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(here);
++			if ((char *)next >= end)
++				goto bad_block;
++			if (!here->e_value_block && here->e_value_size) {
++				int offs = le16_to_cpu(here->e_value_offs);
++				if (offs < min_offs)
++					min_offs = offs;
++			}
++			not_found = name_index - here->e_name_index;
++			if (!not_found)
++				not_found = name_len - here->e_name_len;
++			if (!not_found)
++				not_found = memcmp(name, here->e_name,name_len);
++			if (not_found <= 0)
++				break;
++			here = next;
++		}
++		last = here;
++		/* We still need to compute min_offs and last. */
++		while (!IS_LAST_ENTRY(last)) {
++			struct ext3_xattr_entry *next = EXT3_XATTR_NEXT(last);
++			if ((char *)next >= end)
++				goto bad_block;
++			if (!last->e_value_block && last->e_value_size) {
++				int offs = le16_to_cpu(last->e_value_offs);
++				if (offs < min_offs)
++					min_offs = offs;
++			}
++			last = next;
++		}
++
++		/* Check whether we have enough space left. */
++		free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
++	} else {
++		/* We will use a new extended attribute block. */
++		free = sb->s_blocksize -
++			sizeof(struct ext3_xattr_header) - sizeof(__u32);
++		here = last = NULL;  /* avoid gcc uninitialized warning. */
++	}
++
++	if (not_found) {
++		/* Request to remove a nonexistent attribute? */
++		error = -ENOATTR;
++		if (flags & XATTR_REPLACE)
++			goto cleanup;
++		error = 0;
++		if (value == NULL)
++			goto cleanup;
++		else
++			free -= EXT3_XATTR_LEN(name_len);
++	} else {
++		/* Request to create an existing attribute? */
++		error = -EEXIST;
++		if (flags & XATTR_CREATE)
++			goto cleanup;
++		if (!here->e_value_block && here->e_value_size) {
++			unsigned int size = le32_to_cpu(here->e_value_size);
++
++			if (le16_to_cpu(here->e_value_offs) + size > 
++			    sb->s_blocksize || size > sb->s_blocksize)
++				goto bad_block;
++			free += EXT3_XATTR_SIZE(size);
++		}
++	}
++	free -= EXT3_XATTR_SIZE(value_len);
++	error = -ENOSPC;
++	if (free < 0)
++		goto cleanup;
++
++	/* Here we know that we can set the new attribute. */
++
++	if (header) {
++		if (header->h_refcount == cpu_to_le32(1)) {
++			ea_bdebug(bh, "modifying in-place");
++			ext3_xattr_cache_remove(bh);
++			error = ext3_journal_get_write_access(handle, bh);
++			if (error)
++				goto cleanup;
++		} else {
++			int offset;
++
++			ea_bdebug(bh, "cloning");
++			header = kmalloc(bh->b_size, GFP_KERNEL);
++			error = -ENOMEM;
++			if (header == NULL)
++				goto cleanup;
++			memcpy(header, HDR(bh), bh->b_size);
++			header->h_refcount = cpu_to_le32(1);
++			offset = (char *)header - bh->b_data;
++			here = ENTRY((char *)here + offset);
++			last = ENTRY((char *)last + offset);
++		}
++	} else {
++		/* Allocate a buffer where we construct the new block. */
++		header = kmalloc(sb->s_blocksize, GFP_KERNEL);
++		error = -ENOMEM;
++		if (header == NULL)
++			goto cleanup;
++		memset(header, 0, sb->s_blocksize);
++		end = (char *)header + sb->s_blocksize;
++		header->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC);
++		header->h_blocks = header->h_refcount = cpu_to_le32(1);
++		last = here = ENTRY(header+1);
++	}
++
++	if (not_found) {
++		/* Insert the new name. */
++		int size = EXT3_XATTR_LEN(name_len);
++		int rest = (char *)last - (char *)here;
++		memmove((char *)here + size, here, rest);
++		memset(here, 0, size);
++		here->e_name_index = name_index;
++		here->e_name_len = name_len;
++		memcpy(here->e_name, name, name_len);
++	} else {
++		/* Remove the old value. */
++		if (!here->e_value_block && here->e_value_size) {
++			char *first_val = (char *)header + min_offs;
++			int offs = le16_to_cpu(here->e_value_offs);
++			char *val = (char *)header + offs;
++			size_t size = EXT3_XATTR_SIZE(
++				le32_to_cpu(here->e_value_size));
++			memmove(first_val + size, first_val, val - first_val);
++			memset(first_val, 0, size);
++			here->e_value_offs = 0;
++			min_offs += size;
++
++			/* Adjust all value offsets. */
++			last = ENTRY(header+1);
++			while (!IS_LAST_ENTRY(last)) {
++				int o = le16_to_cpu(last->e_value_offs);
++				if (!last->e_value_block && o < offs)
++					last->e_value_offs =
++						cpu_to_le16(o + size);
++				last = EXT3_XATTR_NEXT(last);
++			}
++		}
++		if (value == NULL) {
++			/* Remove this attribute. */
++			if (EXT3_XATTR_NEXT(ENTRY(header+1)) == last) {
++				/* This block is now empty. */
++				error = ext3_xattr_set2(handle, inode, bh,NULL);
++				goto cleanup;
++			} else {
++				/* Remove the old name. */
++				int size = EXT3_XATTR_LEN(name_len);
++				last = ENTRY((char *)last - size);
++				memmove(here, (char*)here + size,
++					(char*)last - (char*)here);
++				memset(last, 0, size);
++			}
++		}
++	}
++
++	if (value != NULL) {
++		/* Insert the new value. */
++		here->e_value_size = cpu_to_le32(value_len);
++		if (value_len) {
++			size_t size = EXT3_XATTR_SIZE(value_len);
++			char *val = (char *)header + min_offs - size;
++			here->e_value_offs =
++				cpu_to_le16((char *)val - (char *)header);
++			memset(val + size - EXT3_XATTR_PAD, 0,
++			       EXT3_XATTR_PAD); /* Clear the pad bytes. */
++			memcpy(val, value, value_len);
++		}
++	}
++	ext3_xattr_rehash(header, here);
++
++	error = ext3_xattr_set2(handle, inode, bh, header);
++
++cleanup:
++	brelse(bh);
++	if (!(bh && header == HDR(bh)))
++		kfree(header);
++	ext3_xattr_unlock();
++
++	return error;
++}
++
++/*
++ * Second half of ext3_xattr_set(): Update the file system.
++ */
++static int
++ext3_xattr_set2(handle_t *handle, struct inode *inode,
++		struct buffer_head *old_bh, struct ext3_xattr_header *header)
++{
++	struct super_block *sb = inode->i_sb;
++	struct buffer_head *new_bh = NULL;
++	int error;
++
++	if (header) {
++		new_bh = ext3_xattr_cache_find(inode, header);
++		if (new_bh) {
++			/*
++			 * We found an identical block in the cache.
++			 * The old block will be released after updating
++			 * the inode.
++			 */
++			ea_bdebug(old_bh, "reusing block %ld",
++				new_bh->b_blocknr);
++			
++			error = -EDQUOT;
++			if (ext3_xattr_quota_alloc(inode, 1))
++				goto cleanup;
++			
++			error = ext3_journal_get_write_access(handle, new_bh);
++			if (error)
++				goto cleanup;
++			HDR(new_bh)->h_refcount = cpu_to_le32(
++				le32_to_cpu(HDR(new_bh)->h_refcount) + 1);
++			ea_bdebug(new_bh, "refcount now=%d",
++				le32_to_cpu(HDR(new_bh)->h_refcount));
++		} else if (old_bh && header == HDR(old_bh)) {
++			/* Keep this block. */
++			new_bh = old_bh;
++			(void)ext3_xattr_cache_insert(new_bh);
++		} else {
++			/* We need to allocate a new block */
++			int force = EXT3_I(inode)->i_file_acl != 0;
++			int block = ext3_xattr_new_block(handle, inode,
++							 &error, force);
++			if (error)
++				goto cleanup;
++			ea_idebug(inode, "creating block %d", block);
++
++			new_bh = sb_getblk(sb, block);
++			if (!new_bh) {
++getblk_failed:			ext3_xattr_free_block(handle, inode, block);
++				error = -EIO;
++				goto cleanup;
++			}
++			lock_buffer(new_bh);
++			error = ext3_journal_get_create_access(handle, new_bh);
++			if (error) {
++				unlock_buffer(new_bh);
++				goto getblk_failed;
++			}
++			memcpy(new_bh->b_data, header, new_bh->b_size);
++			mark_buffer_uptodate(new_bh, 1);
++			unlock_buffer(new_bh);
++			(void)ext3_xattr_cache_insert(new_bh);
++			ext3_xattr_update_super_block(handle, sb);
++		}
++		error = ext3_journal_dirty_metadata(handle, new_bh);
++		if (error)
++			goto cleanup;
++	}
++
++	/* Update the inode. */
++	EXT3_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
++	inode->i_ctime = CURRENT_TIME;
++	ext3_mark_inode_dirty(handle, inode);
++	if (IS_SYNC(inode))
++		handle->h_sync = 1;
++
++	error = 0;
++	if (old_bh && old_bh != new_bh) {
++		/*
++		 * If there was an old block, and we are not still using it,
++		 * we now release the old block.
++		*/
++		unsigned int refcount = le32_to_cpu(HDR(old_bh)->h_refcount);
++
++		error = ext3_journal_get_write_access(handle, old_bh);
++		if (error)
++			goto cleanup;
++		if (refcount == 1) {
++			/* Free the old block. */
++			ea_bdebug(old_bh, "freeing");
++			ext3_xattr_free_block(handle, inode, old_bh->b_blocknr);
++
++			/* ext3_forget() calls bforget() for us, but we
++			   let our caller release old_bh, so we need to
++			   duplicate the handle before. */
++			get_bh(old_bh);
++			ext3_forget(handle, 1, inode, old_bh,old_bh->b_blocknr);
++		} else {
++			/* Decrement the refcount only. */
++			refcount--;
++			HDR(old_bh)->h_refcount = cpu_to_le32(refcount);
++			ext3_xattr_quota_free(inode);
++			ext3_journal_dirty_metadata(handle, old_bh);
++			ea_bdebug(old_bh, "refcount now=%d", refcount);
++		}
++	}
++
++cleanup:
++	if (old_bh != new_bh)
++		brelse(new_bh);
++
++	return error;
++}
++
++/*
++ * ext3_xattr_drop_inode()
++ *
++ * Free extended attribute resources associated with this inode. This
++ * is called immediately before an inode is freed.
++ */
++void
++ext3_xattr_drop_inode(handle_t *handle, struct inode *inode)
++{
++	struct buffer_head *bh;
++	unsigned int block = EXT3_I(inode)->i_file_acl;
++
++	if (!block)
++		return;
++	ext3_xattr_lock();
++
++	bh = sb_bread(inode->i_sb, block);
++	if (!bh) {
++		ext3_error(inode->i_sb, "ext3_xattr_drop_inode",
++			"inode %ld: block %d read error", inode->i_ino, block);
++		goto cleanup;
++	}
++	ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
++	if (HDR(bh)->h_magic != cpu_to_le32(EXT3_XATTR_MAGIC) ||
++	    HDR(bh)->h_blocks != cpu_to_le32(1)) {
++		ext3_error(inode->i_sb, "ext3_xattr_drop_inode",
++			"inode %ld: bad block %d", inode->i_ino, block);
++		goto cleanup;
++	}
++	ext3_journal_get_write_access(handle, bh);
++	ea_bdebug(bh, "refcount now=%d", le32_to_cpu(HDR(bh)->h_refcount) - 1);
++	if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
++		ext3_xattr_cache_remove(bh);
++		ext3_xattr_free_block(handle, inode, block);
++		ext3_forget(handle, 1, inode, bh, block);
++		bh = NULL;
++	} else {
++		HDR(bh)->h_refcount = cpu_to_le32(
++			le32_to_cpu(HDR(bh)->h_refcount) - 1);
++		ext3_journal_dirty_metadata(handle, bh);
++		if (IS_SYNC(inode))
++			handle->h_sync = 1;
++		ext3_xattr_quota_free(inode);
++	}
++	EXT3_I(inode)->i_file_acl = 0;
++
++cleanup:
++	brelse(bh);
++	ext3_xattr_unlock();
++}
++
++/*
++ * ext3_xattr_put_super()
++ *
++ * This is called when a file system is unmounted.
++ */
++void
++ext3_xattr_put_super(struct super_block *sb)
++{
++#ifdef CONFIG_EXT3_FS_XATTR_SHARING
++	mb_cache_shrink(ext3_xattr_cache, sb->s_dev);
++#endif
++}
++
++#ifdef CONFIG_EXT3_FS_XATTR_SHARING
++
++/*
++ * ext3_xattr_cache_insert()
++ *
++ * Create a new entry in the extended attribute cache, and insert
++ * it unless such an entry is already in the cache.
++ *
++ * Returns 0, or a negative error number on failure.
++ */
++static int
++ext3_xattr_cache_insert(struct buffer_head *bh)
++{
++	__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
++	struct mb_cache_entry *ce;
++	int error;
++
++	ce = mb_cache_entry_alloc(ext3_xattr_cache);
++	if (!ce)
++		return -ENOMEM;
++	error = mb_cache_entry_insert(ce, bh->b_dev, bh->b_blocknr, &hash);
++	if (error) {
++		mb_cache_entry_free(ce);
++		if (error == -EBUSY) {
++			ea_bdebug(bh, "already in cache (%d cache entries)",
++				atomic_read(&ext3_xattr_cache->c_entry_count));
++			error = 0;
++		}
++	} else {
++		ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
++			  atomic_read(&ext3_xattr_cache->c_entry_count));
++		mb_cache_entry_release(ce);
++	}
++	return error;
++}
++
++/*
++ * ext3_xattr_cmp()
++ *
++ * Compare two extended attribute blocks for equality.
++ *
++ * Returns 0 if the blocks are equal, 1 if they differ, and
++ * a negative error number on errors.
++ */
++static int
++ext3_xattr_cmp(struct ext3_xattr_header *header1,
++	       struct ext3_xattr_header *header2)
++{
++	struct ext3_xattr_entry *entry1, *entry2;
++
++	entry1 = ENTRY(header1+1);
++	entry2 = ENTRY(header2+1);
++	while (!IS_LAST_ENTRY(entry1)) {
++		if (IS_LAST_ENTRY(entry2))
++			return 1;
++		if (entry1->e_hash != entry2->e_hash ||
++		    entry1->e_name_len != entry2->e_name_len ||
++		    entry1->e_value_size != entry2->e_value_size ||
++		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
++			return 1;
++		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
++			return -EIO;
++		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
++			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
++			   le32_to_cpu(entry1->e_value_size)))
++			return 1;
++
++		entry1 = EXT3_XATTR_NEXT(entry1);
++		entry2 = EXT3_XATTR_NEXT(entry2);
++	}
++	if (!IS_LAST_ENTRY(entry2))
++		return 1;
++	return 0;
++}
++
++/*
++ * ext3_xattr_cache_find()
++ *
++ * Find an identical extended attribute block.
++ *
++ * Returns a pointer to the block found, or NULL if such a block was
++ * not found or an error occurred.
++ */
++static struct buffer_head *
++ext3_xattr_cache_find(struct inode *inode, struct ext3_xattr_header *header)
++{
++	__u32 hash = le32_to_cpu(header->h_hash);
++	struct mb_cache_entry *ce;
++
++	if (!header->h_hash)
++		return NULL;  /* never share */
++	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
++	ce = mb_cache_entry_find_first(ext3_xattr_cache, 0, inode->i_dev, hash);
++	while (ce) {
++		struct buffer_head *bh = sb_bread(inode->i_sb, ce->e_block);
++
++		if (!bh) {
++			ext3_error(inode->i_sb, "ext3_xattr_cache_find",
++				"inode %ld: block %ld read error",
++				inode->i_ino, ce->e_block);
++		} else if (le32_to_cpu(HDR(bh)->h_refcount) >
++			   EXT3_XATTR_REFCOUNT_MAX) {
++			ea_idebug(inode, "block %ld refcount %d>%d",ce->e_block,
++				le32_to_cpu(HDR(bh)->h_refcount),
++				EXT3_XATTR_REFCOUNT_MAX);
++		} else if (!ext3_xattr_cmp(header, HDR(bh))) {
++			ea_bdebug(bh, "b_count=%d",atomic_read(&(bh->b_count)));
++			mb_cache_entry_release(ce);
++			return bh;
++		}
++		brelse(bh);
++		ce = mb_cache_entry_find_next(ce, 0, inode->i_dev, hash);
++	}
++	return NULL;
++}
++
++/*
++ * ext3_xattr_cache_remove()
++ *
++ * Remove the cache entry of a block from the cache. Called when a
++ * block becomes invalid.
++ */
++static void
++ext3_xattr_cache_remove(struct buffer_head *bh)
++{
++	struct mb_cache_entry *ce;
++
++	ce = mb_cache_entry_get(ext3_xattr_cache, bh->b_dev, bh->b_blocknr);
++	if (ce) {
++		ea_bdebug(bh, "removing (%d cache entries remaining)",
++			  atomic_read(&ext3_xattr_cache->c_entry_count)-1);
++		mb_cache_entry_free(ce);
++	} else 
++		ea_bdebug(bh, "no cache entry");
++}
++
++#define NAME_HASH_SHIFT 5
++#define VALUE_HASH_SHIFT 16
++
++/*
++ * ext3_xattr_hash_entry()
++ *
++ * Compute the hash of an extended attribute.
++ */
++static inline void ext3_xattr_hash_entry(struct ext3_xattr_header *header,
++					 struct ext3_xattr_entry *entry)
++{
++	__u32 hash = 0;
++	char *name = entry->e_name;
++	int n;
++
++	for (n=0; n < entry->e_name_len; n++) {
++		hash = (hash << NAME_HASH_SHIFT) ^
++		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
++		       *name++;
++	}
++
++	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
++		__u32 *value = (__u32 *)((char *)header +
++			le16_to_cpu(entry->e_value_offs));
++		for (n = (le32_to_cpu(entry->e_value_size) +
++		     EXT3_XATTR_ROUND) >> EXT3_XATTR_PAD_BITS; n; n--) {
++			hash = (hash << VALUE_HASH_SHIFT) ^
++			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
++			       le32_to_cpu(*value++);
++		}
++	}
++	entry->e_hash = cpu_to_le32(hash);
++}
++
++#undef NAME_HASH_SHIFT
++#undef VALUE_HASH_SHIFT
++
++#define BLOCK_HASH_SHIFT 16
++
++/*
++ * ext3_xattr_rehash()
++ *
++ * Re-compute the extended attribute hash value after an entry has changed.
++ */
++static void ext3_xattr_rehash(struct ext3_xattr_header *header,
++			      struct ext3_xattr_entry *entry)
++{
++	struct ext3_xattr_entry *here;
++	__u32 hash = 0;
++	
++	ext3_xattr_hash_entry(header, entry);
++	here = ENTRY(header+1);
++	while (!IS_LAST_ENTRY(here)) {
++		if (!here->e_hash) {
++			/* Block is not shared if an entry's hash value == 0 */
++			hash = 0;
++			break;
++		}
++		hash = (hash << BLOCK_HASH_SHIFT) ^
++		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
++		       le32_to_cpu(here->e_hash);
++		here = EXT3_XATTR_NEXT(here);
++	}
++	header->h_hash = cpu_to_le32(hash);
++}
++
++#undef BLOCK_HASH_SHIFT
++
++int __init
++init_ext3_xattr(void)
++{
++	ext3_xattr_cache = mb_cache_create("ext3_xattr", NULL,
++		sizeof(struct mb_cache_entry) +
++		sizeof(struct mb_cache_entry_index), 1, 61);
++	if (!ext3_xattr_cache)
++		return -ENOMEM;
++
++	return 0;
++}
++
++void
++exit_ext3_xattr(void)
++{
++	if (ext3_xattr_cache)
++		mb_cache_destroy(ext3_xattr_cache);
++	ext3_xattr_cache = NULL;
++}
++
++#else  /* CONFIG_EXT3_FS_XATTR_SHARING */
++
++int __init
++init_ext3_xattr(void)
++{
++	return 0;
++}
++
++void
++exit_ext3_xattr(void)
++{
++}
++
++#endif  /* CONFIG_EXT3_FS_XATTR_SHARING */
+--- linux-2.4.18/include/linux/ext3_fs.h~linux-2.4.18ea-0.8.26-2	2003-09-01 11:51:00.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_fs.h	2003-09-01 14:55:39.000000000 +0400
+@@ -63,8 +63,6 @@
+  */
+ #define	EXT3_BAD_INO		 1	/* Bad blocks inode */
+ #define EXT3_ROOT_INO		 2	/* Root inode */
+-#define EXT3_ACL_IDX_INO	 3	/* ACL inode */
+-#define EXT3_ACL_DATA_INO	 4	/* ACL inode */
+ #define EXT3_BOOT_LOADER_INO	 5	/* Boot loader inode */
+ #define EXT3_UNDEL_DIR_INO	 6	/* Undelete directory inode */
+ #define EXT3_RESIZE_INO		 7	/* Reserved group descriptors inode */
+@@ -94,7 +92,6 @@
+ #else
+ # define EXT3_BLOCK_SIZE(s)		(EXT3_MIN_BLOCK_SIZE << (s)->s_log_block_size)
+ #endif
+-#define EXT3_ACLE_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (struct ext3_acl_entry))
+ #define	EXT3_ADDR_PER_BLOCK(s)		(EXT3_BLOCK_SIZE(s) / sizeof (__u32))
+ #ifdef __KERNEL__
+ # define EXT3_BLOCK_SIZE_BITS(s)	((s)->s_blocksize_bits)
+@@ -129,28 +126,6 @@
+ #endif
+ 
+ /*
+- * ACL structures
+- */
+-struct ext3_acl_header	/* Header of Access Control Lists */
+-{
+-	__u32	aclh_size;
+-	__u32	aclh_file_count;
+-	__u32	aclh_acle_count;
+-	__u32	aclh_first_acle;
+-};
+-
+-struct ext3_acl_entry	/* Access Control List Entry */
+-{
+-	__u32	acle_size;
+-	__u16	acle_perms;	/* Access permissions */
+-	__u16	acle_type;	/* Type of entry */
+-	__u16	acle_tag;	/* User or group identity */
+-	__u16	acle_pad1;
+-	__u32	acle_next;	/* Pointer on next entry for the */
+-					/* same inode or on next free entry */
+-};
+-
+-/*
+  * Structure of a blocks group descriptor
+  */
+ struct ext3_group_desc
+@@ -521,7 +496,7 @@ struct ext3_super_block {
+ #define EXT3_FEATURE_INCOMPAT_RECOVER		0x0004 /* Needs recovery */
+ #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV	0x0008 /* Journal device */
+ 
+-#define EXT3_FEATURE_COMPAT_SUPP	0
++#define EXT3_FEATURE_COMPAT_SUPP	EXT3_FEATURE_COMPAT_EXT_ATTR
+ #define EXT3_FEATURE_INCOMPAT_SUPP	(EXT3_FEATURE_INCOMPAT_FILETYPE| \
+ 					 EXT3_FEATURE_INCOMPAT_RECOVER)
+ #define EXT3_FEATURE_RO_COMPAT_SUPP	(EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+@@ -623,6 +598,24 @@ struct dx_hash_info
+ #define HASH_NB_ALWAYS		1
+ 
+ 
++/* Defined for extended attributes */
++#define CONFIG_EXT3_FS_XATTR y
++#ifndef ENOATTR
++#define ENOATTR ENODATA		/* No such attribute */
++#endif
++#ifndef ENOTSUP
++#define ENOTSUP EOPNOTSUPP	/* Operation not supported */
++#endif
++#ifndef XATTR_NAME_MAX
++#define XATTR_NAME_MAX   255	/* # chars in an extended attribute name */
++#define XATTR_SIZE_MAX 65536	/* size of an extended attribute value (64k) */
++#define XATTR_LIST_MAX 65536	/* size of extended attribute namelist (64k) */
++#endif
++#ifndef XATTR_CREATE
++#define XATTR_CREATE	1	/* set value, fail if attr already exists */
++#define XATTR_REPLACE	2	/* set value, fail if attr does not exist */
++#endif
++
+ /*
+  * Describe an inode's exact location on disk and in memory
+  */
+@@ -704,6 +697,7 @@ extern void ext3_check_inodes_bitmap (st
+ extern unsigned long ext3_count_free (struct buffer_head *, unsigned);
+ 
+ /* inode.c */
++extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int);
+ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+ 
+--- linux-2.4.18/include/linux/ext3_jbd.h~linux-2.4.18ea-0.8.26-2	2003-08-29 16:53:17.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/ext3_jbd.h	2003-09-01 14:55:39.000000000 +0400
+@@ -30,13 +30,19 @@
+ 
+ #define EXT3_SINGLEDATA_TRANS_BLOCKS	8
+ 
++/* Extended attributes may touch two data buffers, two bitmap buffers,
++ * and two group and summaries. */
++
++#define EXT3_XATTR_TRANS_BLOCKS		8
++
+ /* Define the minimum size for a transaction which modifies data.  This
+  * needs to take into account the fact that we may end up modifying two
+  * quota files too (one for the group, one for the user quota).  The
+  * superblock only gets updated once, of course, so don't bother
+  * counting that again for the quota updates. */
+ 
+-#define EXT3_DATA_TRANS_BLOCKS		(3 * EXT3_SINGLEDATA_TRANS_BLOCKS - 2)
++#define EXT3_DATA_TRANS_BLOCKS		(3 * EXT3_SINGLEDATA_TRANS_BLOCKS + \
++					 EXT3_XATTR_TRANS_BLOCKS - 2)
+ 
+ extern int ext3_writepage_trans_blocks(struct inode *inode);
+ 
+--- /dev/null	2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/include/linux/ext3_xattr.h	2003-09-01 14:55:39.000000000 +0400
+@@ -0,0 +1,155 @@
++/*
++  File: linux/ext3_xattr.h
++
++  On-disk format of extended attributes for the ext3 filesystem.
++
++  (C) 2001 Andreas Gruenbacher, <a.gruenbacher@computer.org>
++*/
++
++#include <linux/config.h>
++#include <linux/init.h>
++#include <linux/xattr.h>
++
++/* Magic value in attribute blocks */
++#define EXT3_XATTR_MAGIC		0xEA020000
++
++/* Maximum number of references to one attribute block */
++#define EXT3_XATTR_REFCOUNT_MAX		1024
++
++/* Name indexes */
++#define EXT3_XATTR_INDEX_MAX			10
++#define EXT3_XATTR_INDEX_USER			1
++
++struct ext3_xattr_header {
++	__u32	h_magic;	/* magic number for identification */
++	__u32	h_refcount;	/* reference count */
++	__u32	h_blocks;	/* number of disk blocks used */
++	__u32	h_hash;		/* hash value of all attributes */
++	__u32	h_reserved[4];	/* zero right now */
++};
++
++struct ext3_xattr_entry {
++	__u8	e_name_len;	/* length of name */
++	__u8	e_name_index;	/* attribute name index */
++	__u16	e_value_offs;	/* offset in disk block of value */
++	__u32	e_value_block;	/* disk block attribute is stored on (n/i) */
++	__u32	e_value_size;	/* size of attribute value */
++	__u32	e_hash;		/* hash value of name and value */
++	char	e_name[0];	/* attribute name */
++};
++
++#define EXT3_XATTR_PAD_BITS		2
++#define EXT3_XATTR_PAD		(1<<EXT3_XATTR_PAD_BITS)
++#define EXT3_XATTR_ROUND		(EXT3_XATTR_PAD-1)
++#define EXT3_XATTR_LEN(name_len) \
++	(((name_len) + EXT3_XATTR_ROUND + \
++	sizeof(struct ext3_xattr_entry)) & ~EXT3_XATTR_ROUND)
++#define EXT3_XATTR_NEXT(entry) \
++	( (struct ext3_xattr_entry *)( \
++	  (char *)(entry) + EXT3_XATTR_LEN((entry)->e_name_len)) )
++#define EXT3_XATTR_SIZE(size) \
++	(((size) + EXT3_XATTR_ROUND) & ~EXT3_XATTR_ROUND)
++
++#ifdef __KERNEL__
++
++# ifdef CONFIG_EXT3_FS_XATTR
++
++struct ext3_xattr_handler {
++	char *prefix;
++	size_t (*list)(char *list, struct inode *inode, const char *name,
++		       int name_len);
++	int (*get)(struct inode *inode, const char *name, void *buffer,
++		   size_t size);
++	int (*set)(struct inode *inode, const char *name, void *buffer,
++		   size_t size, int flags);
++};
++
++extern int ext3_xattr_register(int, struct ext3_xattr_handler *);
++extern void ext3_xattr_unregister(int, struct ext3_xattr_handler *);
++
++extern int ext3_setxattr(struct dentry *, const char *, void *, size_t, int);
++extern ssize_t ext3_getxattr(struct dentry *, const char *, void *, size_t);
++extern ssize_t ext3_listxattr(struct dentry *, char *, size_t);
++extern int ext3_removexattr(struct dentry *, const char *);
++
++extern int ext3_xattr_get(struct inode *, int, const char *, void *, size_t);
++extern int ext3_xattr_list(struct inode *, char *, size_t);
++extern int ext3_xattr_set(handle_t *handle, struct inode *, int, const char *, void *, size_t, int);
++
++extern void ext3_xattr_drop_inode(handle_t *, struct inode *);
++extern void ext3_xattr_put_super(struct super_block *);
++
++extern int init_ext3_xattr(void) __init;
++extern void exit_ext3_xattr(void);
++
++# else  /* CONFIG_EXT3_FS_XATTR */
++#  define ext3_setxattr		NULL
++#  define ext3_getxattr		NULL
++#  define ext3_listxattr	NULL
++#  define ext3_removexattr	NULL
++
++static inline int
++ext3_xattr_get(struct inode *inode, int name_index, const char *name,
++	       void *buffer, size_t size, int flags)
++{
++	return -ENOTSUP;
++}
++
++static inline int
++ext3_xattr_list(struct inode *inode, void *buffer, size_t size, int flags)
++{
++	return -ENOTSUP;
++}
++
++static inline int
++ext3_xattr_set(handle_t *handle, struct inode *inode, int name_index,
++	       const char *name, void *value, size_t size, int flags)
++{
++	return -ENOTSUP;
++}
++
++static inline void
++ext3_xattr_drop_inode(handle_t *handle, struct inode *inode)
++{
++}
++
++static inline void
++ext3_xattr_put_super(struct super_block *sb)
++{
++}
++
++static inline int
++init_ext3_xattr(void)
++{
++	return 0;
++}
++
++static inline void
++exit_ext3_xattr(void)
++{
++}
++
++# endif  /* CONFIG_EXT3_FS_XATTR */
++
++# ifdef CONFIG_EXT3_FS_XATTR_USER
++
++extern int init_ext3_xattr_user(void) __init;
++extern void exit_ext3_xattr_user(void);
++
++# else  /* CONFIG_EXT3_FS_XATTR_USER */
++
++static inline int
++init_ext3_xattr_user(void)
++{
++	return 0;
++}
++
++static inline void
++exit_ext3_xattr_user(void)
++{
++}
++
++#endif  /* CONFIG_EXT3_FS_XATTR_USER */
++
++#endif  /* __KERNEL__ */
++
+--- /dev/null	2003-01-30 13:24:37.000000000 +0300
++++ linux-2.4.18-alexey/include/linux/xattr.h	2003-09-01 14:55:39.000000000 +0400
+@@ -0,0 +1,15 @@
++/*
++  File: linux/xattr.h
++
++  Extended attributes handling.
++
++  Copyright (C) 2001 by Andreas Gruenbacher <a.gruenbacher@computer.org>
++  Copyright (C) 2001 SGI - Silicon Graphics, Inc <linux-xfs@oss.sgi.com>
++*/
++#ifndef _LINUX_XATTR_H
++#define _LINUX_XATTR_H
++
++#define XATTR_CREATE	1	/* set value, fail if attr already exists */
++#define XATTR_REPLACE	2	/* set value, fail if attr does not exist */
++
++#endif	/* _LINUX_XATTR_H */
+
+_
diff --git a/lustre/kernel_patches/patches/removepage-2.4.20.patch b/lustre/kernel_patches/patches/removepage-2.4.20.patch
new file mode 100644
index 0000000..cc721e1
--- /dev/null
+++ b/lustre/kernel_patches/patches/removepage-2.4.20.patch
@@ -0,0 +1,28 @@
+ include/linux/fs.h |    1 +
+ mm/filemap.c       |    3 +++
+ 2 files changed, 4 insertions(+)
+
+--- linux-2.4.20-b_llpmd-l24/include/linux/fs.h~removepage-2.4.20	2003-09-05 11:45:42.000000000 -0700
++++ linux-2.4.20-b_llpmd-l24-zab/include/linux/fs.h	2003-09-05 11:46:25.000000000 -0700
+@@ -402,6 +402,7 @@ struct address_space_operations {
+ 	int (*releasepage) (struct page *, int);
+ #define KERNEL_HAS_O_DIRECT /* this is for modules out of the kernel */
+ 	int (*direct_IO)(int, struct inode *, struct kiobuf *, unsigned long, int);
++	void (*removepage)(struct page *); /* called when page gets removed from the inode */
+ };
+ 
+ struct address_space {
+--- linux-2.4.20-b_llpmd-l24/mm/filemap.c~removepage-2.4.20	2003-09-05 11:45:42.000000000 -0700
++++ linux-2.4.20-b_llpmd-l24-zab/mm/filemap.c	2003-09-05 11:46:25.000000000 -0700
+@@ -95,6 +95,9 @@ static inline void remove_page_from_inod
+ {
+ 	struct address_space * mapping = page->mapping;
+ 
++	if (mapping->a_ops->removepage)
++		mapping->a_ops->removepage(page);
++	
+ 	mapping->nrpages--;
+ 	list_del(&page->list);
+ 	page->mapping = NULL;
+
+_
diff --git a/lustre/kernel_patches/patches/removepage-2.6.0.patch b/lustre/kernel_patches/patches/removepage-2.6.0.patch
new file mode 100644
index 0000000..268ca97
--- /dev/null
+++ b/lustre/kernel_patches/patches/removepage-2.6.0.patch
@@ -0,0 +1,28 @@
+ include/linux/fs.h |    1 +
+ mm/filemap.c       |    3 +++
+ 2 files changed, 4 insertions(+)
+
+--- linux-2.6.0-test3-l25/include/linux/fs.h~removepage-2.6.0	2003-09-05 15:31:52.000000000 -0700
++++ linux-2.6.0-test3-l25-zab/include/linux/fs.h	2003-09-08 10:47:30.000000000 -0700
+@@ -311,6 +311,7 @@ struct address_space_operations {
+ 	int (*releasepage) (struct page *, int);
+ 	int (*direct_IO)(int, struct kiocb *, const struct iovec *iov,
+ 			loff_t offset, unsigned long nr_segs);
++	void (*removepage)(struct page *); /* called when page gets removed from the inode */
+ };
+ 
+ struct backing_dev_info;
+--- linux-2.6.0-test3-l25/mm/filemap.c~removepage-2.6.0	2003-08-08 21:34:39.000000000 -0700
++++ linux-2.6.0-test3-l25-zab/mm/filemap.c	2003-09-08 10:48:10.000000000 -0700
+@@ -81,6 +81,9 @@ void __remove_from_page_cache(struct pag
+ {
+ 	struct address_space *mapping = page->mapping;
+ 
++	if (mapping->a_ops->removepage)
++		mapping->a_ops->removepage(page);
++
+ 	radix_tree_delete(&mapping->page_tree, page->index);
+ 	list_del(&page->list);
+ 	page->mapping = NULL;
+
+_
diff --git a/lustre/kernel_patches/patches/uml-2.6.0-fix.patch b/lustre/kernel_patches/patches/uml-2.6.0-fix.patch
new file mode 100644
index 0000000..2910f97
--- /dev/null
+++ b/lustre/kernel_patches/patches/uml-2.6.0-fix.patch
@@ -0,0 +1,19 @@
+ include/asm-um/unistd.h |    2 ++
+ 1 files changed, 2 insertions(+)
+
+diff -puN include/asm-um/unistd.h~uml-2.6.0-fix include/asm-um/unistd.h
+--- linux-2.6.0-test3/include/asm-um/unistd.h~uml-2.6.0-fix	2003-09-04 18:39:45.000000000 +0400
++++ linux-2.6.0-test3-alexey/include/asm-um/unistd.h	2003-09-04 18:39:59.000000000 +0400
+@@ -6,8 +6,10 @@
+ #ifndef _UM_UNISTD_H_
+ #define _UM_UNISTD_H_
+ 
++#ifdef __KERNEL__
+ #include "linux/resource.h"
+ #include "asm/uaccess.h"
++#endif
+ 
+ extern long sys_open(const char *filename, int flags, int mode);
+ extern long sys_dup(unsigned int fildes);
+
+_
diff --git a/lustre/kernel_patches/patches/uml-patch-2.6.0-test3-1.patch b/lustre/kernel_patches/patches/uml-patch-2.6.0-test3-1.patch
new file mode 100644
index 0000000..8ea5a43
--- /dev/null
+++ b/lustre/kernel_patches/patches/uml-patch-2.6.0-test3-1.patch
@@ -0,0 +1,8716 @@
+diff -Naur a/arch/um/Kconfig b/arch/um/Kconfig
+--- a/arch/um/Kconfig	Fri Aug 15 15:05:57 2003
++++ b/arch/um/Kconfig	Fri Aug 15 15:11:53 2003
+@@ -61,6 +61,20 @@
+ 
+ config NET
+ 	bool "Networking support"
++	help
++	Unless you really know what you are doing, you should say Y here.
++	The reason is that some programs need kernel networking support even
++	when running on a stand-alone machine that isn't connected to any
++	other computer. If you are upgrading from an older kernel, you
++	should consider updating your networking tools too because changes
++	in the kernel and the tools often go hand in hand. The tools are
++	contained in the package net-tools, the location and version number
++	of which are given in Documentation/Changes.
++
++	For a general introduction to Linux networking, it is highly
++	recommended to read the NET-HOWTO, available from
++	<http://www.tldp.org/docs.html#howto>.
++
+ 
+ source "fs/Kconfig.binfmt"
+ 
+@@ -85,6 +99,19 @@
+         If you'd like to be able to work with files stored on the host, 
+         say Y or M here; otherwise say N.
+ 
++config HPPFS
++	tristate "HoneyPot ProcFS"
++	help
++	hppfs (HoneyPot ProcFS) is a filesystem which allows UML /proc 
++	entries to be overridden, removed, or fabricated from the host.
++	Its purpose is to allow a UML to appear to be a physical machine
++	by removing or changing anything in /proc which gives away the
++	identity of a UML.
++
++	See http://user-mode-linux.sf.net/hppfs.html for more information.
++
++	You only need this if you are setting up a UML honeypot.  Otherwise,
++	it is safe to say 'N' here.
+ 
+ config MCONSOLE
+ 	bool "Management console"
+@@ -105,6 +132,16 @@
+ config MAGIC_SYSRQ
+ 	bool "Magic SysRq key"
+ 	depends on MCONSOLE
++	help
++	If you say Y here, you will have some control over the system even
++	if the system crashes for example during kernel debugging (e.g., you
++	will be able to flush the buffer cache to disk, reboot the system
++	immediately or dump some status information). This is accomplished
++	by pressing various keys while holding SysRq (Alt+PrintScreen). It
++	also works on a serial console (on PC hardware at least), if you
++	send a BREAK and then within 5 seconds a command keypress. The
++	keys are documented in Documentation/sysrq.txt. Don't say Y
++	unless you really know what this hack does.
+ 
+ config HOST_2G_2G
+ 	bool "2G/2G host address space split"
+@@ -159,6 +196,9 @@
+ config HIGHMEM
+ 	bool "Highmem support"
+ 
++config PROC_MM
++	bool "/proc/mm support"
++
+ config KERNEL_STACK_ORDER
+ 	int "Kernel stack size order"
+ 	default 2
+@@ -239,6 +279,10 @@
+ config PT_PROXY
+ 	bool "Enable ptrace proxy"
+ 	depends on XTERM_CHAN && DEBUG_INFO
++	help
++	This option enables a debugging interface which allows gdb to debug
++	the kernel without needing to actually attach to kernel threads.
++	If you want to do kernel debugging, say Y here; otherwise say N.
+ 
+ config GPROF
+ 	bool "Enable gprof support"
+diff -Naur a/arch/um/Kconfig_block b/arch/um/Kconfig_block
+--- a/arch/um/Kconfig_block	Fri Aug 15 15:07:32 2003
++++ b/arch/um/Kconfig_block	Fri Aug 15 15:12:56 2003
+@@ -29,6 +29,20 @@
+         wise choice too.  In all other cases (for example, if you're just
+         playing around with User-Mode Linux) you can choose N.
+ 
++# Turn this back on when the driver actually works
++#
++#config BLK_DEV_COW
++#	tristate "COW block device"
++#	help
++#	This is a layered driver which sits above two other block devices.
++#	One is read-only, and the other is a read-write layer which stores
++#	all changes.  This provides the illusion that the read-only layer
++#	can be mounted read-write and changed.
++
++config BLK_DEV_COW_COMMON
++	bool
++	default BLK_DEV_COW || BLK_DEV_UBD
++
+ config BLK_DEV_LOOP
+ 	tristate "Loopback device support"
+ 
+diff -Naur a/arch/um/Kconfig_net b/arch/um/Kconfig_net
+--- a/arch/um/Kconfig_net	Fri Aug 15 15:06:52 2003
++++ b/arch/um/Kconfig_net	Fri Aug 15 15:12:43 2003
+@@ -1,5 +1,5 @@
+ 
+-menu "Network Devices"
++menu "UML Network Devices"
+ 	depends on NET
+ 
+ # UML virtual driver
+@@ -176,73 +176,5 @@
+ 	
+         Startup example: "eth0=slirp,FE:FD:01:02:03:04,/usr/local/bin/slirp"
+ 
+-
+-# Below are hardware-independent drivers mirrored from
+-# drivers/net/Config.in. It would be nice if Linux
+-# had HW independent drivers separated from the other
+-# but it does not. Until then each non-ISA/PCI arch
+-# needs to provide it's own menu of network drivers
+-config DUMMY
+-	tristate "Dummy net driver support"
+-
+-config BONDING
+-	tristate "Bonding driver support"
+-
+-config EQUALIZER
+-	tristate "EQL (serial line load balancing) support"
+-
+-config TUN
+-	tristate "Universal TUN/TAP device driver support"
+-
+-config ETHERTAP
+-	tristate "Ethertap network tap (OBSOLETE)"
+-	depends on EXPERIMENTAL && NETLINK
+-
+-config PPP
+-	tristate "PPP (point-to-point protocol) support"
+-
+-config PPP_MULTILINK
+-	bool "PPP multilink support (EXPERIMENTAL)"
+-	depends on PPP && EXPERIMENTAL
+-
+-config PPP_FILTER
+-	bool "PPP filtering"
+-	depends on PPP && FILTER
+-
+-config PPP_ASYNC
+-	tristate "PPP support for async serial ports"
+-	depends on PPP
+-
+-config PPP_SYNC_TTY
+-	tristate "PPP support for sync tty ports"
+-	depends on PPP
+-
+-config PPP_DEFLATE
+-	tristate "PPP Deflate compression"
+-	depends on PPP
+-
+-config PPP_BSDCOMP
+-	tristate "PPP BSD-Compress compression"
+-	depends on PPP
+-
+-config PPPOE
+-	tristate "PPP over Ethernet (EXPERIMENTAL)"
+-	depends on PPP && EXPERIMENTAL
+-
+-config SLIP
+-	tristate "SLIP (serial line) support"
+-
+-config SLIP_COMPRESSED
+-	bool "CSLIP compressed headers"
+-	depends on SLIP=y
+-
+-config SLIP_SMART
+-	bool "Keepalive and linefill"
+-	depends on SLIP=y
+-
+-config SLIP_MODE_SLIP6
+-	bool "Six bit SLIP encapsulation"
+-	depends on SLIP=y
+-
+ endmenu
+ 
+diff -Naur a/arch/um/Makefile b/arch/um/Makefile
+--- a/arch/um/Makefile	Fri Aug 15 15:07:18 2003
++++ b/arch/um/Makefile	Fri Aug 15 15:12:45 2003
+@@ -24,15 +24,17 @@
+ # Have to precede the include because the included Makefiles reference them.
+ SYMLINK_HEADERS = include/asm-um/archparam.h include/asm-um/system.h \
+ 	include/asm-um/sigcontext.h include/asm-um/processor.h \
+-	include/asm-um/ptrace.h include/asm-um/arch-signal.h
++	include/asm-um/ptrace.h include/asm-um/arch-signal.h \
++	include/asm-um/module.h
+ 
+ ARCH_SYMLINKS = include/asm-um/arch $(ARCH_DIR)/include/sysdep $(ARCH_DIR)/os \
+ 	$(SYMLINK_HEADERS) $(ARCH_DIR)/include/uml-config.h
+ 
+ GEN_HEADERS += $(ARCH_DIR)/include/task.h $(ARCH_DIR)/include/kern_constants.h
+ 
+-include $(ARCH_DIR)/Makefile-$(SUBARCH)
+-include $(ARCH_DIR)/Makefile-os-$(OS)
++.PHONY: sys_prepare
++sys_prepare:
++	@:
+ 
+ MAKEFILE-$(CONFIG_MODE_TT) += Makefile-tt
+ MAKEFILE-$(CONFIG_MODE_SKAS) += Makefile-skas
+@@ -41,6 +43,9 @@
+   include $(addprefix $(ARCH_DIR)/,$(MAKEFILE-y))
+ endif
+ 
++include $(ARCH_DIR)/Makefile-$(SUBARCH)
++include $(ARCH_DIR)/Makefile-os-$(OS)
++
+ EXTRAVERSION := $(EXTRAVERSION)-1um
+ 
+ ARCH_INCLUDE = -I$(ARCH_DIR)/include
+@@ -52,14 +57,14 @@
+ 
+ CFLAGS += $(CFLAGS-y) -D__arch_um__ -DSUBARCH=\"$(SUBARCH)\" \
+ 	-D_LARGEFILE64_SOURCE $(ARCH_INCLUDE) -Derrno=kernel_errno \
+-	$(MODE_INCLUDE)
++	-Dsigprocmask=kernel_sigprocmask $(MODE_INCLUDE)
+ 
+ LINK_WRAPS = -Wl,--wrap,malloc -Wl,--wrap,free -Wl,--wrap,calloc
+ 
+ SIZE = (($(CONFIG_NEST_LEVEL) + $(CONFIG_KERNEL_HALF_GIGS)) * 0x20000000)
+ 
+ ifeq ($(CONFIG_MODE_SKAS), y)
+-$(SYS_HEADERS) : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h
++$(SYS_HEADERS) : $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h
+ endif
+ 
+ include/linux/version.h: arch/$(ARCH)/Makefile
+@@ -116,6 +121,7 @@
+ 
+ USER_CFLAGS := $(patsubst -I%,,$(CFLAGS))
+ USER_CFLAGS := $(patsubst -Derrno=kernel_errno,,$(USER_CFLAGS))
++USER_CFLAGS := $(patsubst -Dsigprocmask=kernel_sigprocmask,,$(USER_CFLAGS))
+ USER_CFLAGS := $(patsubst -D__KERNEL__,,$(USER_CFLAGS)) $(ARCH_INCLUDE) \
+ 	$(MODE_INCLUDE)
+ 
+@@ -123,9 +129,10 @@
+ USER_CFLAGS += -D_GNU_SOURCE
+ 
+ CLEAN_FILES += linux x.i gmon.out $(ARCH_DIR)/uml.lds.s \
+-	$(ARCH_DIR)/dyn_link.ld.s $(GEN_HEADERS)
++	$(ARCH_DIR)/dyn_link.ld.s $(ARCH_DIR)/include/uml-config.h \
++	$(GEN_HEADERS)
+ 
+-$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c
++$(ARCH_DIR)/main.o: $(ARCH_DIR)/main.c sys_prepare
+ 	$(CC) $(USER_CFLAGS) $(EXTRA_CFLAGS) -c -o $@ $<
+ 
+ archmrproper:
+@@ -161,19 +168,23 @@
+ $(ARCH_DIR)/os:
+ 	cd $(ARCH_DIR) && ln -sf os-$(OS) os
+ 
+-$(ARCH_DIR)/include/uml-config.h :
++$(ARCH_DIR)/include/uml-config.h : $(TOPDIR)/include/linux/autoconf.h
+ 	sed 's/ CONFIG/ UML_CONFIG/' $(TOPDIR)/include/linux/autoconf.h > $@
+ 
++filechk_$(ARCH_DIR)/include/task.h := $(ARCH_DIR)/util/mk_task
++
+ $(ARCH_DIR)/include/task.h : $(ARCH_DIR)/util/mk_task
+-	$< > $@
++	$(call filechk,$@)
++
++filechk_$(ARCH_DIR)/include/kern_constants.h := $(ARCH_DIR)/util/mk_constants
+ 
+ $(ARCH_DIR)/include/kern_constants.h : $(ARCH_DIR)/util/mk_constants
+-	$< > $@
++	$(call filechk,$@)
+ 
+-$(ARCH_DIR)/util/mk_task : $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h \
+-	$(ARCH_DIR)/util FORCE ;
++$(ARCH_DIR)/util/mk_task $(ARCH_DIR)/util/mk_constants : $(ARCH_DIR)/util \
++	sys_prepare FORCE ;
+ 
+ $(ARCH_DIR)/util: FORCE
+-	@$(call descend,$@,)
++	$(MAKE) -f scripts/Makefile.build obj=$@
+ 
+-export SUBARCH USER_CFLAGS OS
++export SUBARCH USER_CFLAGS OS 
+diff -Naur a/arch/um/Makefile-i386 b/arch/um/Makefile-i386
+--- a/arch/um/Makefile-i386	Fri Aug 15 15:07:46 2003
++++ b/arch/um/Makefile-i386	Fri Aug 15 15:13:14 2003
+@@ -16,22 +16,28 @@
+ 
+ SYS_HEADERS = $(SYS_DIR)/sc.h $(SYS_DIR)/thread.h
+ 
++sys_prepare: $(SYS_DIR)/sc.h
++
+ prepare: $(SYS_HEADERS)
+ 
++filechk_$(SYS_DIR)/sc.h := $(SYS_UTIL_DIR)/mk_sc
++
+ $(SYS_DIR)/sc.h: $(SYS_UTIL_DIR)/mk_sc
+-	$< > $@
++	$(call filechk,$@)
++
++filechk_$(SYS_DIR)/thread.h := $(SYS_UTIL_DIR)/mk_thread 
+ 
+ $(SYS_DIR)/thread.h: $(SYS_UTIL_DIR)/mk_thread 
+-	$< > $@
++	$(call filechk,$@)
+ 
+-$(SYS_UTIL_DIR)/mk_sc: FORCE ; 
+-	@$(call descend,$(SYS_UTIL_DIR),$@)
++$(SYS_UTIL_DIR)/mk_sc: scripts/fixdep include/config/MARKER FORCE ; 
++	+@$(call descend,$(SYS_UTIL_DIR),$@)
+ 
+-$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) FORCE ; 
+-	@$(call descend,$(SYS_UTIL_DIR),$@)
++$(SYS_UTIL_DIR)/mk_thread: $(ARCH_SYMLINKS) $(GEN_HEADERS) sys_prepare FORCE ; 
++	+@$(call descend,$(SYS_UTIL_DIR),$@)
+ 
+ $(SYS_UTIL_DIR): include/asm FORCE
+-	@$(call descend,$@,)
++	+@$(call descend,$@,)
+ 
+ sysclean :
+ 	rm -f $(SYS_HEADERS)
+diff -Naur a/arch/um/Makefile-skas b/arch/um/Makefile-skas
+--- a/arch/um/Makefile-skas	Fri Aug 15 15:05:43 2003
++++ b/arch/um/Makefile-skas	Fri Aug 15 15:11:52 2003
+@@ -14,7 +14,7 @@
+ LINK_SKAS = -Wl,-rpath,/lib 
+ LD_SCRIPT_SKAS = dyn.lds.s
+ 
+-GEN_HEADERS += $(ARCH_DIR)/kernel/skas/include/skas_ptregs.h
++GEN_HEADERS += $(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h
+ 
+-$(ARCH_DIR)/kernel/skas/include/skas_ptregs.h :
+-	$(MAKE) -C $(ARCH_DIR)/kernel/skas include/skas_ptregs.h
++$(TOPDIR)/$(ARCH_DIR)/include/skas_ptregs.h :
++	$(call descend,$(ARCH_DIR)/kernel/skas,$@)
+diff -Naur a/arch/um/config.release b/arch/um/config.release
+--- a/arch/um/config.release	Fri Aug 15 15:09:05 2003
++++ b/arch/um/config.release	Fri Aug 15 15:13:48 2003
+@@ -228,7 +228,6 @@
+ CONFIG_EXT2_FS=y
+ CONFIG_SYSV_FS=m
+ CONFIG_UDF_FS=m
+-# CONFIG_UDF_RW is not set
+ CONFIG_UFS_FS=m
+ # CONFIG_UFS_FS_WRITE is not set
+ 
+diff -Naur a/arch/um/defconfig b/arch/um/defconfig
+--- a/arch/um/defconfig	Fri Aug 15 15:07:30 2003
++++ b/arch/um/defconfig	Fri Aug 15 15:12:54 2003
+@@ -6,7 +6,6 @@
+ CONFIG_SWAP=y
+ CONFIG_UID16=y
+ CONFIG_RWSEM_GENERIC_SPINLOCK=y
+-CONFIG_CONFIG_LOG_BUF_SHIFT=14
+ 
+ #
+ # Code maturity level options
+@@ -116,7 +115,6 @@
+ CONFIG_PACKET_MMAP=y
+ # CONFIG_NETLINK_DEV is not set
+ # CONFIG_NETFILTER is not set
+-# CONFIG_FILTER is not set
+ CONFIG_UNIX=y
+ # CONFIG_NET_KEY is not set
+ CONFIG_INET=y
+@@ -385,7 +383,6 @@
+ #
+ # Disk-On-Chip Device Drivers
+ #
+-# CONFIG_MTD_DOC1000 is not set
+ # CONFIG_MTD_DOC2000 is not set
+ # CONFIG_MTD_DOC2001 is not set
+ 
+diff -Naur a/arch/um/drivers/Makefile b/arch/um/drivers/Makefile
+--- a/arch/um/drivers/Makefile	Fri Aug 15 15:06:42 2003
++++ b/arch/um/drivers/Makefile	Fri Aug 15 15:12:40 2003
+@@ -1,5 +1,5 @@
+ # 
+-# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
++# Copyright (C) 2000, 2002, 2003 Jeff Dike (jdike@karaya.com)
+ # Licensed under the GPL
+ #
+ 
+@@ -39,6 +39,8 @@
+ obj-$(CONFIG_TTY_CHAN) += tty.o 
+ obj-$(CONFIG_XTERM_CHAN) += xterm.o xterm_kern.o
+ obj-$(CONFIG_UML_WATCHDOG) += harddog.o
++obj-$(CONFIG_BLK_DEV_COW) += cow_kern.o
++obj-$(CONFIG_BLK_DEV_COW_COMMON) += cow_user.o
+ 
+ obj-y += stdio_console.o $(CHAN_OBJS)
+ 
+@@ -46,7 +48,7 @@
+ 
+ USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(USER_SINGLE_OBJS)) fd.o \
+ 	null.o pty.o tty.o xterm.o
+-USER_OBJS := $(foreach file,$(USER_OBJS),arch/um/drivers/$(file))
++USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+ 
+ $(USER_OBJS) : %.o: %.c
+ 	$(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+diff -Naur a/arch/um/drivers/chan_kern.c b/arch/um/drivers/chan_kern.c
+--- a/arch/um/drivers/chan_kern.c	Fri Aug 15 15:09:13 2003
++++ b/arch/um/drivers/chan_kern.c	Fri Aug 15 15:13:51 2003
+@@ -8,6 +8,7 @@
+ #include <linux/list.h>
+ #include <linux/slab.h>
+ #include <linux/tty.h>
++#include <linux/string.h>
+ #include <linux/tty_flip.h>
+ #include <asm/irq.h>
+ #include "chan_kern.h"
+diff -Naur a/arch/um/drivers/chan_user.c b/arch/um/drivers/chan_user.c
+--- a/arch/um/drivers/chan_user.c	Fri Aug 15 15:03:46 2003
++++ b/arch/um/drivers/chan_user.c	Fri Aug 15 15:10:09 2003
+@@ -188,8 +188,8 @@
+ 	if(!isatty(fd)) return;
+ 
+ 	pid = tcgetpgrp(fd);
+-	if(!CHOOSE_MODE(is_tracer_winch(pid, fd, device_data), 0) && 
+-	   (pid == -1)){
++	if(!CHOOSE_MODE_PROC(is_tracer_winch, is_skas_winch, pid, fd, 
++			     device_data) && (pid == -1)){
+ 		thread = winch_tramp(fd, device_data, &thread_fd);
+ 		if(fd != -1){
+ 			register_winch_irq(thread_fd, fd, thread, device_data);
+diff -Naur a/arch/um/drivers/cow.h b/arch/um/drivers/cow.h
+--- a/arch/um/drivers/cow.h	Wed Dec 31 19:00:00 1969
++++ b/arch/um/drivers/cow.h	Fri Aug 15 15:10:34 2003
+@@ -0,0 +1,40 @@
++#ifndef __COW_H__
++#define __COW_H__
++
++#include <asm/types.h>
++
++#if __BYTE_ORDER == __BIG_ENDIAN
++# define ntohll(x) (x)
++# define htonll(x) (x)
++#elif __BYTE_ORDER == __LITTLE_ENDIAN
++# define ntohll(x)  bswap_64(x)
++# define htonll(x)  bswap_64(x)
++#else
++#error "__BYTE_ORDER not defined"
++#endif
++
++extern int init_cow_file(int fd, char *cow_file, char *backing_file, 
++			 int sectorsize, int *bitmap_offset_out, 
++			 unsigned long *bitmap_len_out, int *data_offset_out);
++
++extern int file_reader(__u64 offset, char *buf, int len, void *arg);
++extern int read_cow_header(int (*reader)(__u64, char *, int, void *), 
++			   void *arg, __u32 *magic_out, 
++			   char **backing_file_out, time_t *mtime_out, 
++			   __u64 *size_out, int *sectorsize_out, 
++			   int *bitmap_offset_out);
++
++extern int write_cow_header(char *cow_file, int fd, char *backing_file, 
++			    int sectorsize, long long *size);
++
++extern void cow_sizes(__u64 size, int sectorsize, int bitmap_offset, 
++		      unsigned long *bitmap_len_out, int *data_offset_out);
++
++#endif
++
++/*
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/cow_kern.c b/arch/um/drivers/cow_kern.c
+--- a/arch/um/drivers/cow_kern.c	Wed Dec 31 19:00:00 1969
++++ b/arch/um/drivers/cow_kern.c	Fri Aug 15 15:13:51 2003
+@@ -0,0 +1,628 @@
++#define COW_MAJOR 60
++#define MAJOR_NR COW_MAJOR
++
++#include <linux/stddef.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/stat.h>
++#include <linux/vmalloc.h>
++#include <linux/blkdev.h>
++#include <linux/blk.h>
++#include <linux/fs.h>
++#include <linux/genhd.h>
++#include <linux/devfs_fs.h>
++#include <asm/uaccess.h>
++#include "2_5compat.h"
++#include "cow.h"
++#include "ubd_user.h"
++
++#define COW_SHIFT 4
++
++struct cow {
++	int count;
++	char *cow_path;
++	dev_t cow_dev;
++	struct block_device *cow_bdev;
++	char *backing_path;
++	dev_t backing_dev;
++	struct block_device *backing_bdev;
++	int sectorsize;
++	unsigned long *bitmap;
++	unsigned long bitmap_len;
++	int bitmap_offset;
++	int data_offset;
++	devfs_handle_t devfs;
++	struct semaphore sem;
++	struct semaphore io_sem;
++	atomic_t working;
++	spinlock_t io_lock;
++	struct buffer_head *bh;
++	struct buffer_head *bhtail;
++	void *end_io;
++};
++
++#define DEFAULT_COW { \
++	.count			= 0, \
++	.cow_path		= NULL, \
++	.cow_dev		= 0, \
++	.backing_path		= NULL, \
++	.backing_dev		= 0, \
++        .bitmap			= NULL, \
++	.bitmap_len		= 0, \
++	.bitmap_offset		= 0, \
++        .data_offset		= 0, \
++	.devfs			= NULL, \
++	.working		= ATOMIC_INIT(0), \
++	.io_lock		= SPIN_LOCK_UNLOCKED, \
++}
++
++#define MAX_DEV (8)
++#define MAX_MINOR (MAX_DEV << COW_SHIFT)
++
++struct cow cow_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_COW };
++
++/* Not modified by this driver */
++static int blk_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = BLOCK_SIZE };
++static int hardsect_sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 512 };
++
++/* Protected by cow_lock */
++static int sizes[MAX_MINOR] = { [ 0 ... MAX_MINOR - 1 ] = 0 };
++
++static struct hd_struct	cow_part[MAX_MINOR] =
++	{ [ 0 ... MAX_MINOR - 1 ] = { 0, 0, 0 } };
++
++/* Protected by io_request_lock */
++static request_queue_t *cow_queue;
++
++static int cow_open(struct inode *inode, struct file *filp);
++static int cow_release(struct inode * inode, struct file * file);
++static int cow_ioctl(struct inode * inode, struct file * file,
++		     unsigned int cmd, unsigned long arg);
++static int cow_revalidate(kdev_t rdev);
++
++static struct block_device_operations cow_blops = {
++       .open		= cow_open,
++       .release	= cow_release,
++       .ioctl		= cow_ioctl,
++       .revalidate	= cow_revalidate,
++};
++
++/* Initialized in an initcall, and unchanged thereafter */
++devfs_handle_t cow_dir_handle;
++
++#define INIT_GENDISK(maj, name, parts, shift, bsizes, max, blops) \
++{ \
++	.major 		= maj, \
++	.major_name  	= name, \
++	.minor_shift 	= shift, \
++	.max_p  	= 1 << shift, \
++	.part  		= parts, \
++	.sizes  	= bsizes, \
++	.nr_real  	= max, \
++	.real_devices  	= NULL, \
++	.next  		= NULL, \
++	.fops  		= blops, \
++	.de_arr  	= NULL, \
++	.flags  	= 0 \
++}
++
++static spinlock_t cow_lock = SPIN_LOCK_UNLOCKED;
++
++static struct gendisk cow_gendisk = INIT_GENDISK(MAJOR_NR, "cow", cow_part,
++						 COW_SHIFT, sizes, MAX_DEV, 
++						 &cow_blops);
++
++static int cow_add(int n)
++{
++	struct cow *dev = &cow_dev[n];
++	char name[sizeof("nnnnnn\0")];
++	int err = -ENODEV;
++
++	if(dev->cow_path == NULL)
++		goto out;
++
++	sprintf(name, "%d", n);
++	dev->devfs = devfs_register(cow_dir_handle, name, DEVFS_FL_REMOVABLE,
++				    MAJOR_NR, n << COW_SHIFT, S_IFBLK | 
++				    S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP,
++				    &cow_blops, NULL);
++
++	init_MUTEX_LOCKED(&dev->sem);
++	init_MUTEX(&dev->io_sem);
++
++	return(0);
++
++out:
++	return(err);
++}
++
++/*
++* Add buffer_head to back of pending list
++*/
++static void cow_add_bh(struct cow *cow, struct buffer_head *bh)
++{
++	unsigned long flags;
++
++	spin_lock_irqsave(&cow->io_lock, flags);
++	if(cow->bhtail != NULL){
++		cow->bhtail->b_reqnext = bh;
++		cow->bhtail = bh;
++	}
++	else {
++		cow->bh = bh;
++		cow->bhtail = bh;
++	}
++	spin_unlock_irqrestore(&cow->io_lock, flags);
++}
++
++/*
++* Grab first pending buffer
++*/
++static struct buffer_head *cow_get_bh(struct cow *cow)
++{
++	struct buffer_head *bh;
++
++	spin_lock_irq(&cow->io_lock);
++	bh = cow->bh;
++	if(bh != NULL){
++		if(bh == cow->bhtail)
++			cow->bhtail = NULL;
++		cow->bh = bh->b_reqnext;
++		bh->b_reqnext = NULL;
++	}
++	spin_unlock_irq(&cow->io_lock);
++
++	return(bh);
++}
++
++static void cow_handle_bh(struct cow *cow, struct buffer_head *bh, 
++			  struct buffer_head **cow_bh, int ncow_bh)
++{
++	int i;
++
++	if(ncow_bh > 0)
++		ll_rw_block(WRITE, ncow_bh, cow_bh);
++
++	for(i = 0; i < ncow_bh ; i++){
++		wait_on_buffer(cow_bh[i]);
++		brelse(cow_bh[i]);
++	}
++
++	ll_rw_block(WRITE, 1, &bh);
++	brelse(bh);
++}
++
++static struct buffer_head *cow_new_bh(struct cow *dev, int sector)
++{
++	struct buffer_head *bh;
++
++	sector = (dev->bitmap_offset + sector / 8) / dev->sectorsize;
++	bh = getblk(dev->cow_dev, sector, dev->sectorsize);
++	memcpy(bh->b_data, dev->bitmap + sector / (8 * sizeof(dev->bitmap[0])),
++	       dev->sectorsize);
++	return(bh);
++}
++
++/* Copied from loop.c, needed to avoid deadlocking in make_request. */
++
++static int cow_thread(void *data)
++{
++	struct cow *dev = data;
++	struct buffer_head *bh;
++
++	daemonize();
++	exit_files(current);
++
++	sprintf(current->comm, "cow%d", dev - cow_dev);
++
++	spin_lock_irq(&current->sigmask_lock);
++	sigfillset(&current->blocked);
++	flush_signals(current);
++	spin_unlock_irq(&current->sigmask_lock);
++
++	atomic_inc(&dev->working);
++
++	current->policy = SCHED_OTHER;
++	current->nice = -20;
++
++	current->flags |= PF_NOIO;
++
++	/*
++	 * up sem, we are running
++	 */
++	up(&dev->sem);
++
++	for(;;){
++		int start, len, nbh, i, update_bitmap = 0;
++		struct buffer_head *cow_bh[2];
++
++		down_interruptible(&dev->io_sem);
++		/*
++		 * could be upped because of tear-down, not because of
++		 * pending work
++		 */
++		if(!atomic_read(&dev->working))
++			break;
++
++		bh = cow_get_bh(dev);
++		if(bh == NULL){
++			printk(KERN_ERR "cow: missing bh\n");
++			continue;
++		}
++
++		start = bh->b_blocknr * bh->b_size / dev->sectorsize;
++		len = bh->b_size / dev->sectorsize;
++		for(i = 0; i < len ; i++){
++			if(ubd_test_bit(start +ni, 
++					(unsigned char *) dev->bitmap))
++				continue;
++
++			update_bitmap = 1;
++			ubd_set_bit(start + i, (unsigned char *) dev->bitmap);
++		}
++
++		cow_bh[0] = NULL;
++		cow_bh[1] = NULL;
++		nbh = 0;
++		if(update_bitmap){
++			cow_bh[0] = cow_new_bh(dev, start);
++			nbh++;
++			if(start / dev->sectorsize != 
++			   (start + len) / dev->sectorsize){
++				cow_bh[1] = cow_new_bh(dev, start + len);
++				nbh++;
++			}
++		}
++		
++		bh->b_dev = dev->cow_dev;
++		bh->b_blocknr += dev->data_offset / dev->sectorsize;
++
++		cow_handle_bh(dev, bh, cow_bh, nbh);
++
++		/*
++		 * upped both for pending work and tear-down, lo_pending
++		 * will hit zero then
++		 */
++		if(atomic_dec_and_test(&dev->working))
++			break;
++	}
++
++	up(&dev->sem);
++	return(0);
++}
++
++static int cow_make_request(request_queue_t *q, int rw, struct buffer_head *bh)
++{
++	struct cow *dev;
++	int n, minor;
++
++	minor = MINOR(bh->b_rdev);
++	n = minor >> COW_SHIFT;
++	dev = &cow_dev[n];
++
++	dev->end_io = NULL;
++	if(ubd_test_bit(bh->b_rsector, (unsigned char *) dev->bitmap)){
++		bh->b_rdev = dev->cow_dev;
++		bh->b_rsector += dev->data_offset / dev->sectorsize;
++	}
++	else if(rw == WRITE){
++		bh->b_dev = dev->cow_dev;
++		bh->b_blocknr += dev->data_offset / dev->sectorsize;
++
++		cow_add_bh(dev, bh);
++		up(&dev->io_sem);
++		return(0);
++	}
++	else {
++		bh->b_rdev = dev->backing_dev;
++	}
++
++	return(1);
++}
++
++int cow_init(void)
++{
++	int i;
++
++	cow_dir_handle = devfs_mk_dir (NULL, "cow", NULL);
++	if (devfs_register_blkdev(MAJOR_NR, "cow", &cow_blops)) {
++		printk(KERN_ERR "cow: unable to get major %d\n", MAJOR_NR);
++		return -1;
++	}
++	read_ahead[MAJOR_NR] = 8;		/* 8 sector (4kB) read-ahead */
++	blksize_size[MAJOR_NR] = blk_sizes;
++	blk_size[MAJOR_NR] = sizes;
++	INIT_HARDSECT(hardsect_size, MAJOR_NR, hardsect_sizes);
++
++	cow_queue = BLK_DEFAULT_QUEUE(MAJOR_NR);
++	blk_init_queue(cow_queue, NULL);
++	INIT_ELV(cow_queue, &cow_queue->elevator);
++	blk_queue_make_request(cow_queue, cow_make_request);
++
++       add_gendisk(&cow_gendisk);
++
++	for(i=0;i<MAX_DEV;i++) 
++		cow_add(i);
++
++	return(0);
++}
++
++__initcall(cow_init);
++
++static int reader(__u64 start, char *buf, int count, void *arg)
++{
++	dev_t dev = *((dev_t *) arg);
++	struct buffer_head *bh;
++	__u64 block;
++	int cur, offset, left, n, blocksize = get_hardsect_size(dev);
++
++	if(blocksize == 0)
++		panic("Zero blocksize");
++
++	block = start / blocksize;
++	offset = start % blocksize;
++	left = count;
++	cur = 0;
++	while(left > 0){
++		n = (left > blocksize) ? blocksize : left;
++
++		bh = bread(dev, block, (n < 512) ? 512 : n);
++		if(bh == NULL)
++			return(-EIO);
++
++		n -= offset;
++		memcpy(&buf[cur], bh->b_data + offset, n);
++		block++;
++		left -= n;
++		cur += n;
++		offset = 0;
++		brelse(bh);
++	}
++
++	return(count);
++}
++
++static int cow_open(struct inode *inode, struct file *filp)
++{
++	int (*dev_ioctl)(struct inode *, struct file *, unsigned int, 
++			 unsigned long);
++	mm_segment_t fs;
++	struct cow *dev;
++	__u64 size;
++	__u32 magic;
++	time_t mtime;
++	char *backing_file;
++	int n, offset, err = 0;
++
++	n = DEVICE_NR(inode->i_rdev);
++	if(n >= MAX_DEV)
++		return(-ENODEV);
++	dev = &cow_dev[n];
++	offset = n << COW_SHIFT;
++
++	spin_lock(&cow_lock);
++
++	if(dev->count == 0){
++		dev->cow_dev = name_to_kdev_t(dev->cow_path);
++		if(dev->cow_dev == 0){
++			printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") "
++			       "failed\n", dev->cow_path);
++			err = -ENODEV;
++		}
++
++		dev->backing_dev = name_to_kdev_t(dev->backing_path);
++		if(dev->backing_dev == 0){
++			printk(KERN_ERR "cow_open - name_to_kdev_t(\"%s\") "
++			       "failed\n", dev->backing_path);
++			err = -ENODEV;
++		}
++
++		if(err) 
++			goto out;
++
++		dev->cow_bdev = bdget(dev->cow_dev);
++		if(dev->cow_bdev == NULL){
++			printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", 
++			       dev->cow_path);
++			err = -ENOMEM;
++		}
++		dev->backing_bdev = bdget(dev->backing_dev);
++		if(dev->backing_bdev == NULL){
++			printk(KERN_ERR "cow_open - bdget(\"%s\") failed\n", 
++			       dev->backing_path);
++			err = -ENOMEM;
++		}
++
++		if(err) 
++			goto out;
++
++		err = blkdev_get(dev->cow_bdev, FMODE_READ|FMODE_WRITE, 0, 
++				 BDEV_RAW);
++		if(err){
++			printk("cow_open - blkdev_get of COW device failed, "
++			       "error = %d\n", err);
++			goto out;
++		}
++		
++		err = blkdev_get(dev->backing_bdev, FMODE_READ, 0, BDEV_RAW);
++		if(err){
++			printk("cow_open - blkdev_get of backing device "
++			       "failed, error = %d\n", err);
++			goto out;
++		}
++		
++		err = read_cow_header(reader, &dev->cow_dev, &magic, 
++				      &backing_file, &mtime, &size,
++				      &dev->sectorsize, &dev->bitmap_offset);
++		if(err){
++			printk(KERN_ERR "cow_open - read_cow_header failed, "
++			       "err = %d\n", err);
++			goto out;
++		}
++
++		cow_sizes(size, dev->sectorsize, dev->bitmap_offset, 
++			  &dev->bitmap_len, &dev->data_offset);
++		dev->bitmap = (void *) vmalloc(dev->bitmap_len);
++		if(dev->bitmap == NULL){
++			err = -ENOMEM;
++			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
++			goto out;
++		}
++		flush_tlb_kernel_vm();
++		
++		err = reader(dev->bitmap_offset, (char *) dev->bitmap, 
++			     dev->bitmap_len, &dev->cow_dev);
++		if(err < 0){
++			printk(KERN_ERR "Failed to read COW bitmap\n");
++			vfree(dev->bitmap);
++			goto out;
++		}
++
++		dev_ioctl = dev->backing_bdev->bd_op->ioctl;
++		fs = get_fs();
++		set_fs(KERNEL_DS);
++		err = (*dev_ioctl)(inode, filp, BLKGETSIZE, 
++				   (unsigned long) &sizes[offset]);
++		set_fs(fs);
++		if(err){
++			printk(KERN_ERR "cow_open - BLKGETSIZE failed, "
++			       "error = %d\n", err);
++			goto out;
++		}
++
++		kernel_thread(cow_thread, dev, 
++			      CLONE_FS | CLONE_FILES | CLONE_SIGHAND);
++		down(&dev->sem);
++	}
++	dev->count++;
++out:
++	spin_unlock(&cow_lock);
++	return(err);
++}
++
++static int cow_release(struct inode * inode, struct file * file)
++{
++	struct cow *dev;
++	int n, err;
++
++	n = DEVICE_NR(inode->i_rdev);
++	if(n >= MAX_DEV)
++		return(-ENODEV);
++	dev = &cow_dev[n];
++
++	spin_lock(&cow_lock);
++
++	if(--dev->count > 0)
++		goto out;
++
++	err = blkdev_put(dev->cow_bdev, BDEV_RAW);
++	if(err)
++		printk("cow_release - blkdev_put of cow device failed, "
++		       "error = %d\n", err);
++	bdput(dev->cow_bdev);
++	dev->cow_bdev = 0;
++
++	err = blkdev_put(dev->backing_bdev, BDEV_RAW);
++	if(err)
++		printk("cow_release - blkdev_put of backing device failed, "
++		       "error = %d\n", err);
++	bdput(dev->backing_bdev);
++	dev->backing_bdev = 0;
++
++out:
++	spin_unlock(&cow_lock);
++	return(0);
++}
++
++static int cow_ioctl(struct inode * inode, struct file * file,
++		     unsigned int cmd, unsigned long arg)
++{
++	struct cow *dev;
++	int (*dev_ioctl)(struct inode *, struct file *, unsigned int, 
++			 unsigned long);
++	int n;
++
++	n = DEVICE_NR(inode->i_rdev);
++	if(n >= MAX_DEV)
++		return(-ENODEV);
++	dev = &cow_dev[n];
++
++	dev_ioctl = dev->backing_bdev->bd_op->ioctl;
++	return((*dev_ioctl)(inode, file, cmd, arg));
++}
++
++static int cow_revalidate(kdev_t rdev)
++{
++	printk(KERN_ERR "Need to implement cow_revalidate\n");
++	return(0);
++}
++
++static int parse_unit(char **ptr)
++{
++	char *str = *ptr, *end;
++	int n = -1;
++
++	if(isdigit(*str)) {
++		n = simple_strtoul(str, &end, 0);
++		if(end == str)
++			return(-1);
++		*ptr = end;
++	}
++	else if (('a' <= *str) && (*str <= 'h')) {
++		n = *str - 'a';
++		str++;
++		*ptr = str;
++	}
++	return(n);
++}
++
++static int cow_setup(char *str)
++{
++	struct cow *dev;
++	char *cow_name, *backing_name;
++	int unit;
++
++	unit = parse_unit(&str);
++	if(unit < 0){
++		printk(KERN_ERR "cow_setup - Couldn't parse unit number\n");
++		return(1);
++	}
++
++	if(*str != '='){
++		printk(KERN_ERR "cow_setup - Missing '=' after unit "
++		       "number\n");
++		return(1);
++	}
++	str++;
++
++	cow_name = str;
++	backing_name = strchr(str, ',');
++	if(backing_name == NULL){
++		printk(KERN_ERR "cow_setup - missing backing device name\n");
++		return(0);
++	}
++	*backing_name = '\0';
++	backing_name++;
++
++	spin_lock(&cow_lock);
++
++	dev = &cow_dev[unit];
++	dev->cow_path = cow_name;
++	dev->backing_path = backing_name;
++	
++	spin_unlock(&cow_lock);
++	return(0);
++}
++
++__setup("cow", cow_setup);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only.  This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/cow_sys.h b/arch/um/drivers/cow_sys.h
+--- a/arch/um/drivers/cow_sys.h	Wed Dec 31 19:00:00 1969
++++ b/arch/um/drivers/cow_sys.h	Fri Aug 15 15:12:37 2003
+@@ -0,0 +1,48 @@
++#ifndef __COW_SYS_H__
++#define __COW_SYS_H__
++
++#include "kern_util.h"
++#include "user_util.h"
++#include "os.h"
++#include "user.h"
++
++static inline void *cow_malloc(int size)
++{
++	return(um_kmalloc(size));
++}
++
++static inline void cow_free(void *ptr)
++{
++	kfree(ptr);
++}
++
++#define cow_printf printk
++
++static inline char *cow_strdup(char *str)
++{
++	return(uml_strdup(str));
++}
++
++static inline int cow_seek_file(int fd, __u64 offset)
++{
++	return(os_seek_file(fd, offset));
++}
++
++static inline int cow_file_size(char *file, __u64 *size_out)
++{
++	return(os_file_size(file, size_out));
++}
++
++static inline int cow_write_file(int fd, char *buf, int size)
++{
++	return(os_write_file(fd, buf, size));
++}
++
++#endif
++
++/*
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/cow_user.c b/arch/um/drivers/cow_user.c
+--- a/arch/um/drivers/cow_user.c	Wed Dec 31 19:00:00 1969
++++ b/arch/um/drivers/cow_user.c	Fri Aug 15 15:12:34 2003
+@@ -0,0 +1,296 @@
++#include <stddef.h>
++#include <string.h>
++#include <errno.h>
++#include <unistd.h>
++#include <byteswap.h>
++#include <sys/stat.h>
++#include <sys/time.h>
++#include <sys/param.h>
++#include <netinet/in.h>
++
++#include "cow.h"
++#include "cow_sys.h"
++
++#define PATH_LEN_V1 256
++
++struct cow_header_v1 {
++	int magic;
++	int version;
++	char backing_file[PATH_LEN_V1];
++	time_t mtime;
++	__u64 size;
++	int sectorsize;
++};
++
++#define PATH_LEN_V2 MAXPATHLEN
++
++struct cow_header_v2 {
++	unsigned long magic;
++	unsigned long version;
++	char backing_file[PATH_LEN_V2];
++	time_t mtime;
++	__u64 size;
++	int sectorsize;
++};
++
++union cow_header {
++	struct cow_header_v1 v1;
++	struct cow_header_v2 v2;
++};
++
++#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
++#define COW_VERSION 2
++
++void cow_sizes(__u64 size, int sectorsize, int bitmap_offset, 
++	       unsigned long *bitmap_len_out, int *data_offset_out)
++{
++	*bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
++
++	*data_offset_out = bitmap_offset + *bitmap_len_out;
++	*data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize;
++	*data_offset_out *= sectorsize;
++}
++
++static int absolutize(char *to, int size, char *from)
++{
++	char save_cwd[256], *slash;
++	int remaining;
++
++	if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
++		cow_printf("absolutize : unable to get cwd - errno = %d\n", 
++			   errno);
++		return(-1);
++	}
++	slash = strrchr(from, '/');
++	if(slash != NULL){
++		*slash = '\0';
++		if(chdir(from)){
++			*slash = '/';
++			cow_printf("absolutize : Can't cd to '%s' - " 
++				   "errno = %d\n", from, errno);
++			return(-1);
++		}
++		*slash = '/';
++		if(getcwd(to, size) == NULL){
++			cow_printf("absolutize : unable to get cwd of '%s' - "
++			       "errno = %d\n", from, errno);
++			return(-1);
++		}
++		remaining = size - strlen(to);
++		if(strlen(slash) + 1 > remaining){
++			cow_printf("absolutize : unable to fit '%s' into %d "
++			       "chars\n", from, size);
++			return(-1);
++		}
++		strcat(to, slash);
++	}
++	else {
++		if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){
++			cow_printf("absolutize : unable to fit '%s' into %d "
++			       "chars\n", from, size);
++			return(-1);
++		}
++		strcpy(to, save_cwd);
++		strcat(to, "/");
++		strcat(to, from);
++	}
++	chdir(save_cwd);
++	return(0);
++}
++
++int write_cow_header(char *cow_file, int fd, char *backing_file, 
++		     int sectorsize, long long *size)
++{
++	struct cow_header_v2 *header;
++	struct stat64 buf;
++	int err;
++
++	err = cow_seek_file(fd, 0);
++	if(err != 0){
++		cow_printf("write_cow_header - lseek failed, errno = %d\n", 
++			   errno);
++		return(-errno);
++	}
++
++	err = -ENOMEM;
++	header = cow_malloc(sizeof(*header));
++	if(header == NULL){
++		cow_printf("Failed to allocate COW V2 header\n");
++		goto out;
++	}
++	header->magic = htonl(COW_MAGIC);
++	header->version = htonl(COW_VERSION);
++
++	err = -EINVAL;
++	if(strlen(backing_file) > sizeof(header->backing_file) - 1){
++		cow_printf("Backing file name \"%s\" is too long - names are "
++			   "limited to %d characters\n", backing_file, 
++			   sizeof(header->backing_file) - 1);
++		goto out_free;
++	}
++
++	if(absolutize(header->backing_file, sizeof(header->backing_file), 
++		      backing_file))
++		goto out_free;
++
++	err = stat64(header->backing_file, &buf);
++	if(err < 0){
++		cow_printf("Stat of backing file '%s' failed, errno = %d\n",
++			   header->backing_file, errno);
++		err = -errno;
++		goto out_free;
++	}
++
++	err = cow_file_size(header->backing_file, size);
++	if(err){
++		cow_printf("Couldn't get size of backing file '%s', "
++			   "errno = %d\n", header->backing_file, -*size);
++		goto out_free;
++	}
++
++	header->mtime = htonl(buf.st_mtime);
++	header->size = htonll(*size);
++	header->sectorsize = htonl(sectorsize);
++
++	err = write(fd, header, sizeof(*header));
++	if(err != sizeof(*header)){
++		cow_printf("Write of header to new COW file '%s' failed, "
++			   "errno = %d\n", cow_file, errno);
++		goto out_free;
++	}
++	err = 0;
++ out_free:
++	cow_free(header);
++ out:
++	return(err);
++}
++
++int file_reader(__u64 offset, char *buf, int len, void *arg)
++{
++	int fd = *((int *) arg);
++
++	return(pread(fd, buf, len, offset));
++}
++
++int read_cow_header(int (*reader)(__u64, char *, int, void *), void *arg, 
++		    __u32 *magic_out, char **backing_file_out, 
++		    time_t *mtime_out, __u64 *size_out, 
++		    int *sectorsize_out, int *bitmap_offset_out)
++{
++	union cow_header *header;
++	char *file;
++	int err, n;
++	unsigned long version, magic;
++
++	header = cow_malloc(sizeof(*header));
++	if(header == NULL){
++	        cow_printf("read_cow_header - Failed to allocate header\n");
++		return(-ENOMEM);
++	}
++	err = -EINVAL;
++	n = (*reader)(0, (char *) header, sizeof(*header), arg);
++	if(n < offsetof(typeof(header->v1), backing_file)){
++		cow_printf("read_cow_header - short header\n");
++		goto out;
++	}
++
++	magic = header->v1.magic;
++	if(magic == COW_MAGIC) {
++		version = header->v1.version;
++	}
++	else if(magic == ntohl(COW_MAGIC)){
++		version = ntohl(header->v1.version);
++	}
++	/* No error printed because the non-COW case comes through here */
++	else goto out;
++
++	*magic_out = COW_MAGIC;
++
++	if(version == 1){
++		if(n < sizeof(header->v1)){
++			cow_printf("read_cow_header - failed to read V1 "
++				   "header\n");
++			goto out;
++		}
++		*mtime_out = header->v1.mtime;
++		*size_out = header->v1.size;
++		*sectorsize_out = header->v1.sectorsize;
++		*bitmap_offset_out = sizeof(header->v1);
++		file = header->v1.backing_file;
++	}
++	else if(version == 2){
++		if(n < sizeof(header->v2)){
++			cow_printf("read_cow_header - failed to read V2 "
++				   "header\n");
++			goto out;
++		}
++		*mtime_out = ntohl(header->v2.mtime);
++		*size_out = ntohll(header->v2.size);
++		*sectorsize_out = ntohl(header->v2.sectorsize);
++		*bitmap_offset_out = sizeof(header->v2);
++		file = header->v2.backing_file;
++	}
++	else {
++		cow_printf("read_cow_header - invalid COW version\n");
++		goto out;
++	}
++	err = -ENOMEM;
++	*backing_file_out = cow_strdup(file);
++	if(*backing_file_out == NULL){
++		cow_printf("read_cow_header - failed to allocate backing "
++			   "file\n");
++		goto out;
++	}
++	err = 0;
++ out:
++	cow_free(header);
++	return(err);
++}
++
++int init_cow_file(int fd, char *cow_file, char *backing_file, int sectorsize,
++		  int *bitmap_offset_out, unsigned long *bitmap_len_out, 
++		  int *data_offset_out)
++{
++	__u64 size, offset;
++	char zero = 0;
++	int err;
++
++	err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size);
++	if(err) 
++		goto out;
++	
++	cow_sizes(size, sectorsize, sizeof(struct cow_header_v2), 
++		  bitmap_len_out, data_offset_out);
++	*bitmap_offset_out = sizeof(struct cow_header_v2);
++
++	offset = *data_offset_out + size - sizeof(zero);
++	err = cow_seek_file(fd, offset);
++	if(err != 0){
++		cow_printf("cow bitmap lseek failed : errno = %d\n", errno);
++		goto out;
++	}
++
++	/* does not really matter how much we write it is just to set EOF 
++	 * this also sets the entire COW bitmap
++	 * to zero without having to allocate it 
++	 */
++	err = cow_write_file(fd, &zero, sizeof(zero));
++	if(err != sizeof(zero)){
++		err = -EINVAL;
++		cow_printf("Write of bitmap to new COW file '%s' failed, "
++			   "errno = %d\n", cow_file, errno);
++		goto out;
++	}
++
++	return(0);
++
++ out:
++	return(err);
++}
++
++/*
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/drivers/hostaudio_kern.c b/arch/um/drivers/hostaudio_kern.c
+--- a/arch/um/drivers/hostaudio_kern.c	Fri Aug 15 15:09:05 2003
++++ b/arch/um/drivers/hostaudio_kern.c	Fri Aug 15 15:13:48 2003
+@@ -11,6 +11,7 @@
+ #include "linux/fs.h"
+ #include "linux/sound.h"
+ #include "linux/soundcard.h"
++#include "asm/uaccess.h"
+ #include "kern_util.h"
+ #include "init.h"
+ #include "hostaudio.h"
+@@ -22,7 +23,7 @@
+ #ifndef MODULE
+ static int set_dsp(char *name, int *add)
+ {
+-	dsp = uml_strdup(name);
++	dsp = name;
+ 	return(0);
+ }
+ 
+@@ -34,7 +35,7 @@
+ 
+ static int set_mixer(char *name, int *add)
+ {
+-	mixer = uml_strdup(name);
++	mixer = name;
+ 	return(0);
+ }
+ 
+@@ -51,23 +52,55 @@
+ 			      loff_t *ppos)
+ {
+         struct hostaudio_state *state = file->private_data;
++	void *kbuf;
++	int err;
+ 
+ #ifdef DEBUG
+         printk("hostaudio: read called, count = %d\n", count);
+ #endif
+ 
+-        return(hostaudio_read_user(state, buffer, count, ppos));
++	kbuf = kmalloc(count, GFP_KERNEL);
++	if(kbuf == NULL)
++		return(-ENOMEM);
++
++        err = hostaudio_read_user(state, kbuf, count, ppos);
++	if(err < 0)
++		goto out;
++
++	if(copy_to_user(buffer, kbuf, err))
++		err = -EFAULT;
++
++ out:
++	kfree(kbuf);
++	return(err);
+ }
+ 
+ static ssize_t hostaudio_write(struct file *file, const char *buffer, 
+ 			       size_t count, loff_t *ppos)
+ {
+         struct hostaudio_state *state = file->private_data;
++	void *kbuf;
++	int err;
+ 
+ #ifdef DEBUG
+         printk("hostaudio: write called, count = %d\n", count);
+ #endif
+-        return(hostaudio_write_user(state, buffer, count, ppos));
++
++	kbuf = kmalloc(count, GFP_KERNEL);
++	if(kbuf == NULL)
++		return(-ENOMEM);
++
++	err = -EFAULT;
++	if(copy_from_user(kbuf, buffer, count))
++		goto out;
++
++        err = hostaudio_write_user(state, kbuf, count, ppos);
++	if(err < 0)
++		goto out;
++
++ out:
++	kfree(kbuf);
++	return(err);
+ }
+ 
+ static unsigned int hostaudio_poll(struct file *file, 
+@@ -86,12 +119,43 @@
+ 			   unsigned int cmd, unsigned long arg)
+ {
+         struct hostaudio_state *state = file->private_data;
++	unsigned long data = 0;
++	int err;
+ 
+ #ifdef DEBUG
+         printk("hostaudio: ioctl called, cmd = %u\n", cmd);
+ #endif
++	switch(cmd){
++	case SNDCTL_DSP_SPEED:
++	case SNDCTL_DSP_STEREO:
++	case SNDCTL_DSP_GETBLKSIZE:
++	case SNDCTL_DSP_CHANNELS:
++	case SNDCTL_DSP_SUBDIVIDE:
++	case SNDCTL_DSP_SETFRAGMENT:
++		if(get_user(data, (int *) arg))
++			return(-EFAULT);
++		break;
++	default:
++		break;
++	}
++
++        err = hostaudio_ioctl_user(state, cmd, (unsigned long) &data);
++
++	switch(cmd){
++	case SNDCTL_DSP_SPEED:
++	case SNDCTL_DSP_STEREO:
++	case SNDCTL_DSP_GETBLKSIZE:
++	case SNDCTL_DSP_CHANNELS:
++	case SNDCTL_DSP_SUBDIVIDE:
++	case SNDCTL_DSP_SETFRAGMENT:
++		if(put_user(data, (int *) arg))
++			return(-EFAULT);
++		break;
++	default:
++		break;
++	}
+ 
+-        return(hostaudio_ioctl_user(state, cmd, arg));
++	return(err);
+ }
+ 
+ static int hostaudio_open(struct inode *inode, struct file *file)
+@@ -225,7 +289,8 @@
+ 
+ static int __init hostaudio_init_module(void)
+ {
+-        printk(KERN_INFO "UML Audio Relay\n");
++        printk(KERN_INFO "UML Audio Relay (host dsp = %s, host mixer = %s)\n",
++	       dsp, mixer);
+ 
+ 	module_data.dev_audio = register_sound_dsp(&hostaudio_fops, -1);
+         if(module_data.dev_audio < 0){
+diff -Naur a/arch/um/drivers/line.c b/arch/um/drivers/line.c
+--- a/arch/um/drivers/line.c	Fri Aug 15 15:08:24 2003
++++ b/arch/um/drivers/line.c	Fri Aug 15 15:13:28 2003
+@@ -6,8 +6,8 @@
+ #include "linux/sched.h"
+ #include "linux/slab.h"
+ #include "linux/list.h"
++#include "linux/interrupt.h"
+ #include "linux/devfs_fs_kernel.h"
+-#include "asm/irq.h"
+ #include "asm/uaccess.h"
+ #include "chan_kern.h"
+ #include "irq_user.h"
+@@ -16,16 +16,18 @@
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "os.h"
++#include "irq_kern.h"
+ 
+ #define LINE_BUFSIZE 4096
+ 
+-void line_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ 	struct line *dev = data;
+ 
+ 	if(dev->count > 0) 
+ 		chan_interrupt(&dev->chan_list, &dev->task, dev->tty, irq, 
+ 			       dev);
++	return IRQ_HANDLED;
+ }
+ 
+ void line_timer_cb(void *arg)
+@@ -136,20 +138,22 @@
+ 	return(len);
+ }
+ 
+-void line_write_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t line_write_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ 	struct line *dev = data;
+ 	struct tty_struct *tty = dev->tty;
+ 	int err;
+ 
+ 	err = flush_buffer(dev);
+-	if(err == 0) return;
++	if(err == 0) 
++		return(IRQ_NONE);
+ 	else if(err < 0){
+ 		dev->head = dev->buffer;
+ 		dev->tail = dev->buffer;
+ 	}
+ 
+-	if(tty == NULL) return;
++	if(tty == NULL) 
++		return(IRQ_NONE);
+ 
+ 	if(test_bit(TTY_DO_WRITE_WAKEUP, &tty->flags) &&
+ 	   (tty->ldisc.write_wakeup != NULL))
+@@ -161,9 +165,9 @@
+ 	 * writes.
+ 	 */
+ 
+-	if (waitqueue_active(&tty->write_wait))
++	if(waitqueue_active(&tty->write_wait))
+ 		wake_up_interruptible(&tty->write_wait);
+-
++	return(IRQ_HANDLED);
+ }
+ 
+ int line_write_room(struct tty_struct *tty)
+@@ -369,7 +373,7 @@
+ 
+ 	dev = simple_strtoul(name, &end, 0);
+ 	if((*end != '\0') || (end == name)){
+-		*error_out = "line_setup failed to parse device number";
++		*error_out = "line_get_config failed to parse device number";
+ 		return(0);
+ 	}
+ 
+@@ -379,15 +383,15 @@
+ 	}
+ 
+ 	line = &lines[dev];
++
+ 	down(&line->sem);
+-	
+ 	if(!line->valid)
+ 		CONFIG_CHUNK(str, size, n, "none", 1);
+ 	else if(line->count == 0)
+ 		CONFIG_CHUNK(str, size, n, line->init_str, 1);
+ 	else n = chan_config_string(&line->chan_list, str, size, error_out);
+-
+ 	up(&line->sem);
++
+ 	return(n);
+ }
+ 
+@@ -412,7 +416,8 @@
+ 		return NULL;
+ 
+ 	driver->driver_name = line_driver->name;
+-	driver->name = line_driver->devfs_name;
++	driver->name = line_driver->device_name;
++	driver->devfs_name = line_driver->devfs_name;
+ 	driver->major = line_driver->major;
+ 	driver->minor_start = line_driver->minor_start;
+ 	driver->type = line_driver->type;
+@@ -432,7 +437,7 @@
+ 
+ 	for(i = 0; i < nlines; i++){
+ 		if(!lines[i].valid) 
+-			tty_unregister_devfs(driver, i);
++			tty_unregister_device(driver, i);
+ 	}
+ 
+ 	mconsole_register_dev(&line_driver->mc);
+@@ -465,24 +470,25 @@
+ 	struct line *line;
+ };
+ 
+-void winch_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t winch_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ 	struct winch *winch = data;
+ 	struct tty_struct *tty;
+ 	int err;
+ 	char c;
+ 
+-	err = generic_read(winch->fd, &c, NULL);
+-	if(err < 0){
+-		if(err != -EAGAIN){
+-			printk("winch_interrupt : read failed, errno = %d\n", 
+-			       -err);
+-			printk("fd %d is losing SIGWINCH support\n", 
+-			       winch->tty_fd);
+-			free_irq(irq, data);
+-			return;
++	if(winch->fd != -1){
++		err = generic_read(winch->fd, &c, NULL);
++		if(err < 0){
++			if(err != -EAGAIN){
++				printk("winch_interrupt : read failed, "
++				       "errno = %d\n", -err);
++				printk("fd %d is losing SIGWINCH support\n", 
++				       winch->tty_fd);
++				return(IRQ_HANDLED);
++			}
++			goto out;
+ 		}
+-		goto out;
+ 	}
+ 	tty = winch->line->tty;
+ 	if(tty != NULL){
+@@ -492,7 +498,9 @@
+ 		kill_pg(tty->pgrp, SIGWINCH, 1);
+ 	}
+  out:
+-	reactivate_fd(winch->fd, WINCH_IRQ);
++	if(winch->fd != -1)
++		reactivate_fd(winch->fd, WINCH_IRQ);
++	return(IRQ_HANDLED);
+ }
+ 
+ DECLARE_MUTEX(winch_handler_sem);
+@@ -529,7 +537,10 @@
+ 
+ 	list_for_each(ele, &winch_handlers){
+ 		winch = list_entry(ele, struct winch, list);
+-		close(winch->fd);
++		if(winch->fd != -1){
++			deactivate_fd(winch->fd, WINCH_IRQ);
++			close(winch->fd);
++		}
+ 		if(winch->pid != -1) 
+ 			os_kill_process(winch->pid, 1);
+ 	}
+diff -Naur a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c
+--- a/arch/um/drivers/mconsole_kern.c	Fri Aug 15 15:03:47 2003
++++ b/arch/um/drivers/mconsole_kern.c	Fri Aug 15 15:10:11 2003
+@@ -27,6 +27,7 @@
+ #include "init.h"
+ #include "os.h"
+ #include "umid.h"
++#include "irq_kern.h"
+ 
+ static int do_unlink_socket(struct notifier_block *notifier, 
+ 			    unsigned long what, void *data)
+@@ -67,7 +68,7 @@
+ 
+ DECLARE_WORK(mconsole_work, mc_work_proc, NULL);
+ 
+-void mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t mconsole_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ 	int fd;
+ 	struct mconsole_entry *new;
+@@ -88,6 +89,7 @@
+ 	}
+ 	if(!list_empty(&mc_requests)) schedule_work(&mconsole_work);
+ 	reactivate_fd(fd, MCONSOLE_IRQ);
++	return(IRQ_HANDLED);
+ }
+ 
+ void mconsole_version(struct mc_request *req)
+@@ -100,20 +102,34 @@
+ 	mconsole_reply(req, version, 0, 0);
+ }
+ 
++void mconsole_log(struct mc_request *req)
++{
++	int len;
++	char *ptr = req->request.data;
++	
++	ptr += strlen("log");
++	while(isspace(*ptr)) ptr++;
++
++	len = ptr - req->request.data;
++	printk("%.*s", len, ptr);
++	mconsole_reply(req, "", 0, 0);
++}
++
+ #define UML_MCONSOLE_HELPTEXT \
+-"Commands:
+-    version - Get kernel version
+-    help - Print this message
+-    halt - Halt UML
+-    reboot - Reboot UML
+-    config <dev>=<config> - Add a new device to UML; 
+-	same syntax as command line
+-    config <dev> - Query the configuration of a device
+-    remove <dev> - Remove a device from UML
+-    sysrq <letter> - Performs the SysRq action controlled by the letter
+-    cad - invoke the Ctl-Alt-Del handler
+-    stop - pause the UML; it will do nothing until it receives a 'go'
+-    go - continue the UML after a 'stop'
++"Commands: \n\
++    version - Get kernel version \n\
++    help - Print this message \n\
++    halt - Halt UML \n\
++    reboot - Reboot UML \n\
++    config <dev>=<config> - Add a new device to UML;  \n\
++	same syntax as command line \n\
++    config <dev> - Query the configuration of a device \n\
++    remove <dev> - Remove a device from UML \n\
++    sysrq <letter> - Performs the SysRq action controlled by the letter \n\
++    cad - invoke the Ctl-Alt-Del handler \n\
++    stop - pause the UML; it will do nothing until it receives a 'go' \n\
++    go - continue the UML after a 'stop' \n\
++    log <string> - make UML enter <string> into the kernel log\n\
+ "
+ 
+ void mconsole_help(struct mc_request *req)
+@@ -302,7 +318,7 @@
+ 	if(umid_file_name("mconsole", file, sizeof(file))) return(-1);
+ 	snprintf(mconsole_socket_name, sizeof(file), "%s", file);
+ 
+-	sock = create_unix_socket(file, sizeof(file));
++	sock = create_unix_socket(file, sizeof(file), 1);
+ 	if (sock < 0){
+ 		printk("Failed to initialize management console\n");
+ 		return(1);
+diff -Naur a/arch/um/drivers/mconsole_user.c b/arch/um/drivers/mconsole_user.c
+--- a/arch/um/drivers/mconsole_user.c	Fri Aug 15 15:04:47 2003
++++ b/arch/um/drivers/mconsole_user.c	Fri Aug 15 15:10:35 2003
+@@ -28,6 +28,7 @@
+ 	{ "cad", mconsole_cad, 1 },
+ 	{ "stop", mconsole_stop, 0 },
+ 	{ "go", mconsole_go, 1 },
++	{ "log", mconsole_log, 1 },
+ };
+ 
+ /* Initialized in mconsole_init, which is an initcall */
+@@ -139,6 +140,7 @@
+ 		memcpy(reply.data, str, len);
+ 		reply.data[len] = '\0';
+ 		total -= len;
++ 		str += len;
+ 		reply.len = len + 1;
+ 
+ 		len = sizeof(reply) + reply.len - sizeof(reply.data);
+diff -Naur a/arch/um/drivers/mmapper_kern.c b/arch/um/drivers/mmapper_kern.c
+--- a/arch/um/drivers/mmapper_kern.c	Fri Aug 15 15:04:33 2003
++++ b/arch/um/drivers/mmapper_kern.c	Fri Aug 15 15:10:32 2003
+@@ -120,7 +120,10 @@
+ 	printk(KERN_INFO "Mapper v0.1\n");
+ 
+ 	v_buf = (char *) find_iomem("mmapper", &mmapper_size);
+-	if(mmapper_size == 0) return(0);
++	if(mmapper_size == 0){
++		printk(KERN_ERR "mmapper_init - find_iomem failed\n");
++		return(0);
++	}
+ 
+ 	p_buf = __pa(v_buf);
+ 
+diff -Naur a/arch/um/drivers/net_kern.c b/arch/um/drivers/net_kern.c
+--- a/arch/um/drivers/net_kern.c	Fri Aug 15 15:05:49 2003
++++ b/arch/um/drivers/net_kern.c	Fri Aug 15 15:11:52 2003
+@@ -26,6 +26,7 @@
+ #include "mconsole_kern.h"
+ #include "init.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ 
+ static spinlock_t opened_lock = SPIN_LOCK_UNLOCKED;
+ LIST_HEAD(opened);
+@@ -61,14 +62,14 @@
+ 	return pkt_len;
+ }
+ 
+-void uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs)
++irqreturn_t uml_net_interrupt(int irq, void *dev_id, struct pt_regs *regs)
+ {
+ 	struct net_device *dev = dev_id;
+ 	struct uml_net_private *lp = dev->priv;
+ 	int err;
+ 
+ 	if(!netif_running(dev))
+-		return;
++		return(IRQ_NONE);
+ 
+ 	spin_lock(&lp->lock);
+ 	while((err = uml_net_rx(dev)) > 0) ;
+@@ -83,6 +84,7 @@
+ 
+  out:
+ 	spin_unlock(&lp->lock);
++	return(IRQ_HANDLED);
+ }
+ 
+ static int uml_net_open(struct net_device *dev)
+@@ -292,7 +294,7 @@
+ 	struct uml_net *device;
+ 	struct net_device *dev;
+ 	struct uml_net_private *lp;
+-	int err, size;
++	int save, err, size;
+ 
+ 	size = transport->private_size + sizeof(struct uml_net_private) + 
+ 		sizeof(((struct uml_net_private *) 0)->user);
+@@ -362,21 +364,29 @@
+ 		return 1;
+ 	lp = dev->priv;
+ 
+-	INIT_LIST_HEAD(&lp->list);
+-	spin_lock_init(&lp->lock);
+-	lp->dev = dev;
+-	lp->fd = -1;
+-	lp->mac = { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0 };
+-	lp->have_mac = device->have_mac;
+-	lp->protocol = transport->kern->protocol;
+-	lp->open = transport->user->open;
+-	lp->close = transport->user->close;
+-	lp->remove = transport->user->remove;
+-	lp->read = transport->kern->read;
+-	lp->write = transport->kern->write;
+-	lp->add_address = transport->user->add_address;
+-	lp->delete_address = transport->user->delete_address;
+-	lp->set_mtu = transport->user->set_mtu;
++	/* lp.user is the first four bytes of the transport data, which
++	 * has already been initialized.  This structure assignment will
++	 * overwrite that, so we make sure that .user gets overwritten with
++	 * what it already has.
++	 */
++	save = lp->user[0];
++	*lp = ((struct uml_net_private) 
++		{ .list  		= LIST_HEAD_INIT(lp->list),
++		  .lock 		= SPIN_LOCK_UNLOCKED,
++		  .dev 			= dev,
++		  .fd 			= -1,
++		  .mac 			= { 0xfe, 0xfd, 0x0, 0x0, 0x0, 0x0},
++		  .have_mac 		= device->have_mac,
++		  .protocol 		= transport->kern->protocol,
++		  .open 		= transport->user->open,
++		  .close 		= transport->user->close,
++		  .remove 		= transport->user->remove,
++		  .read 		= transport->kern->read,
++		  .write 		= transport->kern->write,
++		  .add_address 		= transport->user->add_address,
++		  .delete_address  	= transport->user->delete_address,
++		  .set_mtu 		= transport->user->set_mtu,
++		  .user  		= { save } });
+ 
+ 	init_timer(&lp->tl);
+ 	lp->tl.function = uml_net_user_timer_expire;
+diff -Naur a/arch/um/drivers/port_kern.c b/arch/um/drivers/port_kern.c
+--- a/arch/um/drivers/port_kern.c	Fri Aug 15 15:04:01 2003
++++ b/arch/um/drivers/port_kern.c	Fri Aug 15 15:10:18 2003
+@@ -6,6 +6,7 @@
+ #include "linux/list.h"
+ #include "linux/sched.h"
+ #include "linux/slab.h"
++#include "linux/interrupt.h"
+ #include "linux/irq.h"
+ #include "linux/spinlock.h"
+ #include "linux/errno.h"
+@@ -14,6 +15,7 @@
+ #include "kern_util.h"
+ #include "kern.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ #include "port.h"
+ #include "init.h"
+ #include "os.h"
+@@ -44,7 +46,7 @@
+ 	struct port_list *port;
+ };
+ 
+-static void pipe_interrupt(int irq, void *data, struct pt_regs *regs)
++static irqreturn_t pipe_interrupt(int irq, void *data, struct pt_regs *regs)
+ {
+ 	struct connection *conn = data;
+ 	int fd;
+@@ -52,7 +54,7 @@
+  	fd = os_rcv_fd(conn->socket[0], &conn->helper_pid);
+ 	if(fd < 0){
+ 		if(fd == -EAGAIN)
+-			return;
++			return(IRQ_NONE);
+ 
+ 		printk(KERN_ERR "pipe_interrupt : os_rcv_fd returned %d\n", 
+ 		       -fd);
+@@ -65,6 +67,7 @@
+ 	list_add(&conn->list, &conn->port->connections);
+ 
+ 	up(&conn->port->sem);
++	return(IRQ_HANDLED);
+ }
+ 
+ static int port_accept(struct port_list *port)
+@@ -138,12 +141,13 @@
+ 
+ DECLARE_WORK(port_work, port_work_proc, NULL);
+ 
+-static void port_interrupt(int irq, void *data, struct pt_regs *regs)
++static irqreturn_t port_interrupt(int irq, void *data, struct pt_regs *regs)
+ {
+ 	struct port_list *port = data;
+ 
+ 	port->has_connection = 1;
+ 	schedule_work(&port_work);
++	return(IRQ_HANDLED);
+ } 
+ 
+ void *port_data(int port_num)
+diff -Naur a/arch/um/drivers/ssl.c b/arch/um/drivers/ssl.c
+--- a/arch/um/drivers/ssl.c	Fri Aug 15 15:06:09 2003
++++ b/arch/um/drivers/ssl.c	Fri Aug 15 15:12:30 2003
+@@ -53,8 +53,9 @@
+ 
+ static struct line_driver driver = {
+ 	.name 			= "UML serial line",
+-	.devfs_name 		= "tts/%d",
+-	.major 			= TTYAUX_MAJOR,
++	.device_name 		= "ttS",
++	.devfs_name 		= "tts/",
++	.major 			= TTY_MAJOR,
+ 	.minor_start 		= 64,
+ 	.type 		 	= TTY_DRIVER_TYPE_SERIAL,
+ 	.subtype 	 	= 0,
+diff -Naur a/arch/um/drivers/stdio_console.c b/arch/um/drivers/stdio_console.c
+--- a/arch/um/drivers/stdio_console.c	Fri Aug 15 15:04:51 2003
++++ b/arch/um/drivers/stdio_console.c	Fri Aug 15 15:10:56 2003
+@@ -83,7 +83,8 @@
+ 
+ static struct line_driver driver = {
+ 	.name 			= "UML console",
+-	.devfs_name 		= "vc/%d",
++	.device_name 		= "tty",
++	.devfs_name 		= "vc/",
+ 	.major 			= TTY_MAJOR,
+ 	.minor_start 		= 0,
+ 	.type 		 	= TTY_DRIVER_TYPE_CONSOLE,
+@@ -159,6 +160,15 @@
+ 
+ static int con_init_done = 0;
+ 
++static struct tty_operations console_ops = {
++	.open 	 		= con_open,
++	.close 	 		= con_close,
++	.write 	 		= con_write,
++	.chars_in_buffer 	= chars_in_buffer,
++	.set_termios 		= set_termios,
++	.write_room		= line_write_room,
++};
++
+ int stdio_init(void)
+ {
+ 	char *new_title;
+@@ -166,7 +176,8 @@
+ 	printk(KERN_INFO "Initializing stdio console driver\n");
+ 
+ 	console_driver = line_register_devfs(&console_lines, &driver,
+-				&console_ops, vts, sizeof(vts)/sizeof(vts[0]));
++					     &console_ops, vts,
++					     sizeof(vts)/sizeof(vts[0]));
+ 
+ 	lines_init(vts, sizeof(vts)/sizeof(vts[0]));
+ 
+@@ -188,15 +199,6 @@
+ 	if(con_init_done) up(&vts[console->index].sem);
+ }
+ 
+-static struct tty_operations console_ops = {
+-	.open 	 		= con_open,
+-	.close 	 		= con_close,
+-	.write 	 		= con_write,
+-	.chars_in_buffer 	= chars_in_buffer,
+-	.set_termios 		= set_termios,
+-	.write_room		= line_write_room,
+-};
+-
+ static struct tty_driver *console_device(struct console *c, int *index)
+ {
+ 	*index = c->index;
+@@ -212,12 +214,14 @@
+ 					       console_device, console_setup,
+ 					       CON_PRINTBUFFER);
+ 
+-static void __init stdio_console_init(void)
++static int __init stdio_console_init(void)
+ {
+ 	INIT_LIST_HEAD(&vts[0].chan_list);
+ 	list_add(&init_console_chan.list, &vts[0].chan_list);
+ 	register_console(&stdiocons);
++	return(0);
+ }
++
+ console_initcall(stdio_console_init);
+ 
+ static int console_chan_setup(char *str)
+diff -Naur a/arch/um/drivers/ubd_kern.c b/arch/um/drivers/ubd_kern.c
+--- a/arch/um/drivers/ubd_kern.c	Fri Aug 15 15:05:56 2003
++++ b/arch/um/drivers/ubd_kern.c	Fri Aug 15 15:11:53 2003
+@@ -8,6 +8,13 @@
+  * old style ubd by setting UBD_SHIFT to 0
+  * 2002-09-27...2002-10-18 massive tinkering for 2.5
+  * partitions have changed in 2.5
++ * 2003-01-29 more tinkering for 2.5.59-1
++ * This should now address the sysfs problems and has
++ * the symlink for devfs to allow for booting with
++ * the common /dev/ubd/discX/... names rather than
++ * only /dev/ubdN/discN this version also has lots of
++ * clean ups preparing for ubd-many.
++ * James McMechan
+  */
+ 
+ #define MAJOR_NR UBD_MAJOR
+@@ -40,6 +47,7 @@
+ #include "mconsole_kern.h"
+ #include "init.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ #include "ubd_user.h"
+ #include "2_5compat.h"
+ #include "os.h"
+@@ -70,7 +78,7 @@
+ static request_queue_t *ubd_queue;
+ 
+ /* Protected by ubd_lock */
+-static int fake_major = 0;
++static int fake_major = MAJOR_NR;
+ 
+ static struct gendisk *ubd_gendisk[MAX_DEV];
+ static struct gendisk *fake_gendisk[MAX_DEV];
+@@ -99,12 +107,12 @@
+ 
+ struct ubd {
+ 	char *file;
+-	int is_dir;
+ 	int count;
+ 	int fd;
+ 	__u64 size;
+ 	struct openflags boot_openflags;
+ 	struct openflags openflags;
++	int no_cow;
+ 	struct cow cow;
+ };
+ 
+@@ -118,12 +126,12 @@
+ 
+ #define DEFAULT_UBD { \
+ 	.file = 		NULL, \
+-	.is_dir =		0, \
+ 	.count =		0, \
+ 	.fd =			-1, \
+ 	.size =			-1, \
+ 	.boot_openflags =	OPEN_FLAGS, \
+ 	.openflags =		OPEN_FLAGS, \
++        .no_cow =               0, \
+         .cow =			DEFAULT_COW, \
+ }
+ 
+@@ -131,8 +139,10 @@
+ 
+ static int ubd0_init(void)
+ {
+-	if(ubd_dev[0].file == NULL)
+-		ubd_dev[0].file = "root_fs";
++	struct ubd *dev = &ubd_dev[0];
++
++	if(dev->file == NULL)
++		dev->file = "root_fs";
+ 	return(0);
+ }
+ 
+@@ -199,19 +209,39 @@
+ "    Create ide0 entries that map onto ubd devices.\n\n"
+ );
+ 
++static int parse_unit(char **ptr)
++{
++	char *str = *ptr, *end;
++	int n = -1;
++
++	if(isdigit(*str)) {
++		n = simple_strtoul(str, &end, 0);
++		if(end == str)
++			return(-1);
++		*ptr = end;
++	}
++	else if (('a' <= *str) && (*str <= 'h')) {
++		n = *str - 'a';
++		str++;
++		*ptr = str;
++	}
++	return(n);
++}
++
+ static int ubd_setup_common(char *str, int *index_out)
+ {
++	struct ubd *dev;
+ 	struct openflags flags = global_openflags;
+ 	char *backing_file;
+ 	int n, err;
+ 
+ 	if(index_out) *index_out = -1;
+-	n = *str++;
++	n = *str;
+ 	if(n == '='){
+-		static int fake_major_allowed = 1;
+ 		char *end;
+ 		int major;
+ 
++		str++;
+ 		if(!strcmp(str, "sync")){
+ 			global_openflags.s = 1;
+ 			return(0);
+@@ -223,20 +253,14 @@
+ 			return(1);
+ 		}
+ 
+-		if(!fake_major_allowed){
+-			printk(KERN_ERR "Can't assign a fake major twice\n");
+-			return(1);
+-		}
+-
+ 		err = 1;
+  		spin_lock(&ubd_lock);
+- 		if(!fake_major_allowed){
++ 		if(fake_major != MAJOR_NR){
+  			printk(KERN_ERR "Can't assign a fake major twice\n");
+  			goto out1;
+  		}
+  
+  		fake_major = major;
+-		fake_major_allowed = 0;
+ 
+ 		printk(KERN_INFO "Setting extra ubd major number to %d\n",
+ 		       major);
+@@ -246,25 +270,23 @@
+ 		return(err);
+ 	}
+ 
+-	if(n < '0'){
+-		printk(KERN_ERR "ubd_setup : index out of range\n"); }
+-
+-	if((n >= '0') && (n <= '9')) n -= '0';
+-	else if((n >= 'a') && (n <= 'z')) n -= 'a';
+-	else {
+-		printk(KERN_ERR "ubd_setup : device syntax invalid\n");
++	n = parse_unit(&str);
++	if(n < 0){
++		printk(KERN_ERR "ubd_setup : couldn't parse unit number "
++		       "'%s'\n", str);
+ 		return(1);
+ 	}
+ 	if(n >= MAX_DEV){
+-		printk(KERN_ERR "ubd_setup : index out of range "
+-		       "(%d devices)\n", MAX_DEV);	
++		printk(KERN_ERR "ubd_setup : index %d out of range "
++		       "(%d devices)\n", n, MAX_DEV);
+ 		return(1);
+ 	}
+ 
+ 	err = 1;
+ 	spin_lock(&ubd_lock);
+ 
+-	if(ubd_dev[n].file != NULL){
++	dev = &ubd_dev[n];
++	if(dev->file != NULL){
+ 		printk(KERN_ERR "ubd_setup : device already configured\n");
+ 		goto out2;
+ 	}
+@@ -279,6 +301,11 @@
+ 		flags.s = 1;
+ 		str++;
+ 	}
++	if (*str == 'd'){
++		dev->no_cow = 1;
++		str++;
++	}
++
+ 	if(*str++ != '='){
+ 		printk(KERN_ERR "ubd_setup : Expected '='\n");
+ 		goto out2;
+@@ -287,14 +314,17 @@
+ 	err = 0;
+ 	backing_file = strchr(str, ',');
+ 	if(backing_file){
+-		*backing_file = '\0';
+-		backing_file++;
++		if(dev->no_cow)
++			printk(KERN_ERR "Can't specify both 'd' and a "
++			       "cow file\n");
++		else {
++			*backing_file = '\0';
++			backing_file++;
++		}
+ 	}
+-	ubd_dev[n].file = str;
+-	if(ubd_is_dir(ubd_dev[n].file))
+-		ubd_dev[n].is_dir = 1;
+-	ubd_dev[n].cow.file = backing_file;
+-	ubd_dev[n].boot_openflags = flags;
++	dev->file = str;
++	dev->cow.file = backing_file;
++	dev->boot_openflags = flags;
+  out2:
+ 	spin_unlock(&ubd_lock);
+ 	return(err);
+@@ -324,8 +354,7 @@
+ static int fakehd_set = 0;
+ static int fakehd(char *str)
+ {
+-	printk(KERN_INFO 
+-	       "fakehd : Changing ubd name to \"hd\".\n");
++	printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
+ 	fakehd_set = 1;
+ 	return 1;
+ }
+@@ -394,9 +423,10 @@
+ 	do_ubd_request(ubd_queue);
+ }
+ 
+-static void ubd_intr(int irq, void *dev, struct pt_regs *unused)
++static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
+ {
+ 	ubd_handler();
++	return(IRQ_HANDLED);
+ }
+ 
+ /* Only changed by ubd_init, which is an initcall. */
+@@ -432,16 +462,18 @@
+ static int ubd_open_dev(struct ubd *dev)
+ {
+ 	struct openflags flags;
+-	int err, n, create_cow, *create_ptr;
++	char **back_ptr;
++	int err, create_cow, *create_ptr;
+ 
++	dev->openflags = dev->boot_openflags;
+ 	create_cow = 0;
+ 	create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
+-	dev->fd = open_ubd_file(dev->file, &dev->openflags, &dev->cow.file,
++	back_ptr = dev->no_cow ? NULL : &dev->cow.file;
++	dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
+ 				&dev->cow.bitmap_offset, &dev->cow.bitmap_len, 
+ 				&dev->cow.data_offset, create_ptr);
+ 
+ 	if((dev->fd == -ENOENT) && create_cow){
+-		n = dev - ubd_dev;
+ 		dev->fd = create_cow_file(dev->file, dev->cow.file, 
+ 					  dev->openflags, 1 << 9,
+ 					  &dev->cow.bitmap_offset, 
+@@ -458,7 +490,10 @@
+ 	if(dev->cow.file != NULL){
+ 		err = -ENOMEM;
+ 		dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
+-		if(dev->cow.bitmap == NULL) goto error;
++		if(dev->cow.bitmap == NULL){
++			printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
++			goto error;
++		}
+ 		flush_tlb_kernel_vm();
+ 
+ 		err = read_cow_bitmap(dev->fd, dev->cow.bitmap, 
+@@ -484,17 +519,31 @@
+ 			
+ {
+ 	struct gendisk *disk;
++	char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
++	int err;
+ 
+ 	disk = alloc_disk(1 << UBD_SHIFT);
+-	if (!disk)
+-		return -ENOMEM;
++	if(disk == NULL)
++		return(-ENOMEM);
+ 
+ 	disk->major = major;
+ 	disk->first_minor = unit << UBD_SHIFT;
+ 	disk->fops = &ubd_blops;
+ 	set_capacity(disk, size / 512);
+-	sprintf(disk->disk_name, "ubd");
+-	sprintf(disk->devfs_name, "ubd/disc%d", unit);
++	if(major == MAJOR_NR){
++		sprintf(disk->disk_name, "ubd%d", unit);
++		sprintf(disk->devfs_name, "ubd/disc%d", unit);
++		sprintf(from, "ubd/%d", unit);
++		sprintf(to, "disc%d/disc", unit);
++		err = devfs_mk_symlink(from, to);
++		if(err)
++			printk("ubd_new_disk failed to make link from %s to "
++			       "%s, error = %d\n", from, to, err);
++	}
++	else {
++		sprintf(disk->disk_name, "ubd_fake%d", unit);
++		sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
++	}
+ 
+ 	disk->private_data = &ubd_dev[unit];
+ 	disk->queue = ubd_queue;
+@@ -509,10 +558,7 @@
+ 	struct ubd *dev = &ubd_dev[n];
+ 	int err;
+ 
+-	if(dev->is_dir)
+-		return(-EISDIR);
+-
+-	if (!dev->file)
++	if(dev->file == NULL)
+ 		return(-ENODEV);
+ 
+ 	if (ubd_open_dev(dev))
+@@ -526,7 +572,7 @@
+ 	if(err) 
+ 		return(err);
+  
+-	if(fake_major)
++	if(fake_major != MAJOR_NR)
+ 		ubd_new_disk(fake_major, dev->size, n, 
+ 			     &fake_gendisk[n]);
+ 
+@@ -564,42 +610,42 @@
+ 	return(err);
+ }
+ 
+-static int ubd_get_config(char *dev, char *str, int size, char **error_out)
++static int ubd_get_config(char *name, char *str, int size, char **error_out)
+ {
+-	struct ubd *ubd;
++	struct ubd *dev;
+ 	char *end;
+-	int major, n = 0;
++	int n, len = 0;
+ 
+-	major = simple_strtoul(dev, &end, 0);
+-	if((*end != '\0') || (end == dev)){
+-		*error_out = "ubd_get_config : didn't parse major number";
++	n = simple_strtoul(name, &end, 0);
++	if((*end != '\0') || (end == name)){
++		*error_out = "ubd_get_config : didn't parse device number";
+ 		return(-1);
+ 	}
+ 
+-	if((major >= MAX_DEV) || (major < 0)){
+-		*error_out = "ubd_get_config : major number out of range";
++	if((n >= MAX_DEV) || (n < 0)){
++		*error_out = "ubd_get_config : device number out of range";
+ 		return(-1);
+ 	}
+ 
+-	ubd = &ubd_dev[major];
++	dev = &ubd_dev[n];
+ 	spin_lock(&ubd_lock);
+ 
+-	if(ubd->file == NULL){
+-		CONFIG_CHUNK(str, size, n, "", 1);
++	if(dev->file == NULL){
++		CONFIG_CHUNK(str, size, len, "", 1);
+ 		goto out;
+ 	}
+ 
+-	CONFIG_CHUNK(str, size, n, ubd->file, 0);
++	CONFIG_CHUNK(str, size, len, dev->file, 0);
+ 
+-	if(ubd->cow.file != NULL){
+-		CONFIG_CHUNK(str, size, n, ",", 0);
+-		CONFIG_CHUNK(str, size, n, ubd->cow.file, 1);
++	if(dev->cow.file != NULL){
++		CONFIG_CHUNK(str, size, len, ",", 0);
++		CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
+ 	}
+-	else CONFIG_CHUNK(str, size, n, "", 1);
++	else CONFIG_CHUNK(str, size, len, "", 1);
+ 
+  out:
+ 	spin_unlock(&ubd_lock);
+-	return(n);
++	return(len);
+ }
+ 
+ static int ubd_remove(char *str)
+@@ -607,11 +653,9 @@
+ 	struct ubd *dev;
+ 	int n, err = -ENODEV;
+ 
+-	if(!isdigit(*str))
+-		return(err);	/* it should be a number 0-7/a-h */
++	n = parse_unit(&str);
+ 
+-	n = *str - '0';
+-	if(n >= MAX_DEV) 
++	if((n < 0) || (n >= MAX_DEV))
+ 		return(err);
+ 
+ 	dev = &ubd_dev[n];
+@@ -672,7 +716,7 @@
+ 		
+ 	elevator_init(ubd_queue, &elevator_noop);
+ 
+-	if (fake_major != 0) {
++	if (fake_major != MAJOR_NR) {
+ 		char name[sizeof("ubd_nnn\0")];
+ 
+ 		snprintf(name, sizeof(name), "ubd_%d", fake_major);
+@@ -717,15 +761,9 @@
+ {
+ 	struct gendisk *disk = inode->i_bdev->bd_disk;
+ 	struct ubd *dev = disk->private_data;
+-	int err = -EISDIR;
+-
+-	if(dev->is_dir == 1)
+-		goto out;
++	int err = 0;
+ 
+-	err = 0;
+ 	if(dev->count == 0){
+-		dev->openflags = dev->boot_openflags;
+-
+ 		err = ubd_open_dev(dev);
+ 		if(err){
+ 			printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
+@@ -799,15 +837,6 @@
+ 
+ 	if(req->rq_status == RQ_INACTIVE) return(1);
+ 
+-	if(dev->is_dir){
+-		strcpy(req->buffer, "HOSTFS:");
+-		strcat(req->buffer, dev->file);
+- 		spin_lock(&ubd_io_lock);
+-		end_request(req, 1);
+- 		spin_unlock(&ubd_io_lock);
+-		return(1);
+-	}
+-
+ 	if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
+ 		printk("Write attempted on readonly ubd device %s\n", 
+ 		       disk->disk_name);
+diff -Naur a/arch/um/drivers/ubd_user.c b/arch/um/drivers/ubd_user.c
+--- a/arch/um/drivers/ubd_user.c	Fri Aug 15 15:04:51 2003
++++ b/arch/um/drivers/ubd_user.c	Fri Aug 15 15:10:54 2003
+@@ -24,142 +24,24 @@
+ #include "user.h"
+ #include "ubd_user.h"
+ #include "os.h"
++#include "cow.h"
+ 
+ #include <endian.h>
+ #include <byteswap.h>
+-#if __BYTE_ORDER == __BIG_ENDIAN
+-# define ntohll(x) (x)
+-# define htonll(x) (x)
+-#elif __BYTE_ORDER == __LITTLE_ENDIAN
+-# define ntohll(x)  bswap_64(x)
+-# define htonll(x)  bswap_64(x)
+-#else
+-#error "__BYTE_ORDER not defined"
+-#endif
+-
+-#define PATH_LEN_V1 256
+-
+-struct cow_header_v1 {
+-	int magic;
+-	int version;
+-	char backing_file[PATH_LEN_V1];
+-	time_t mtime;
+-	__u64 size;
+-	int sectorsize;
+-};
+-
+-#define PATH_LEN_V2 MAXPATHLEN
+-
+-struct cow_header_v2 {
+-	unsigned long magic;
+-	unsigned long version;
+-	char backing_file[PATH_LEN_V2];
+-	time_t mtime;
+-	__u64 size;
+-	int sectorsize;
+-};
+-
+-union cow_header {
+-	struct cow_header_v1 v1;
+-	struct cow_header_v2 v2;
+-};
+-
+-#define COW_MAGIC 0x4f4f4f4d  /* MOOO */
+-#define COW_VERSION 2
+-
+-static void sizes(__u64 size, int sectorsize, int bitmap_offset, 
+-		  unsigned long *bitmap_len_out, int *data_offset_out)
+-{
+-	*bitmap_len_out = (size + sectorsize - 1) / (8 * sectorsize);
+-
+-	*data_offset_out = bitmap_offset + *bitmap_len_out;
+-	*data_offset_out = (*data_offset_out + sectorsize - 1) / sectorsize;
+-	*data_offset_out *= sectorsize;
+-}
+-
+-static int read_cow_header(int fd, int *magic_out, char **backing_file_out, 
+-			   time_t *mtime_out, __u64 *size_out, 
+-			   int *sectorsize_out, int *bitmap_offset_out)
+-{
+-	union cow_header *header;
+-	char *file;
+-	int err, n;
+-	unsigned long version, magic;
+-
+-	header = um_kmalloc(sizeof(*header));
+-	if(header == NULL){
+-		printk("read_cow_header - Failed to allocate header\n");
+-		return(-ENOMEM);
+-	}
+-	err = -EINVAL;
+-	n = read(fd, header, sizeof(*header));
+-	if(n < offsetof(typeof(header->v1), backing_file)){
+-		printk("read_cow_header - short header\n");
+-		goto out;
+-	}
+-
+-	magic = header->v1.magic;
+-	if(magic == COW_MAGIC) {
+-		version = header->v1.version;
+-	}
+-	else if(magic == ntohl(COW_MAGIC)){
+-		version = ntohl(header->v1.version);
+-	}
+-	else goto out;
+-
+-	*magic_out = COW_MAGIC;
+-
+-	if(version == 1){
+-		if(n < sizeof(header->v1)){
+-			printk("read_cow_header - failed to read V1 header\n");
+-			goto out;
+-		}
+-		*mtime_out = header->v1.mtime;
+-		*size_out = header->v1.size;
+-		*sectorsize_out = header->v1.sectorsize;
+-		*bitmap_offset_out = sizeof(header->v1);
+-		file = header->v1.backing_file;
+-	}
+-	else if(version == 2){
+-		if(n < sizeof(header->v2)){
+-			printk("read_cow_header - failed to read V2 header\n");
+-			goto out;
+-		}
+-		*mtime_out = ntohl(header->v2.mtime);
+-		*size_out = ntohll(header->v2.size);
+-		*sectorsize_out = ntohl(header->v2.sectorsize);
+-		*bitmap_offset_out = sizeof(header->v2);
+-		file = header->v2.backing_file;
+-	}
+-	else {
+-		printk("read_cow_header - invalid COW version\n");
+-		goto out;
+-	}
+-	err = -ENOMEM;
+-	*backing_file_out = uml_strdup(file);
+-	if(*backing_file_out == NULL){
+-		printk("read_cow_header - failed to allocate backing file\n");
+-		goto out;
+-	}
+-	err = 0;
+- out:
+-	kfree(header);
+-	return(err);
+-}
+ 
+ static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
+ {
+-	struct stat buf1, buf2;
++	struct stat64 buf1, buf2;
+ 
+ 	if(from_cmdline == NULL) return(1);
+ 	if(!strcmp(from_cmdline, from_cow)) return(1);
+ 
+-	if(stat(from_cmdline, &buf1) < 0){
++	if(stat64(from_cmdline, &buf1) < 0){
+ 		printk("Couldn't stat '%s', errno = %d\n", from_cmdline, 
+ 		       errno);
+ 		return(1);
+ 	}
+-	if(stat(from_cow, &buf2) < 0){
++	if(stat64(from_cow, &buf2) < 0){
+ 		printk("Couldn't stat '%s', errno = %d\n", from_cow, errno);
+ 		return(1);
+ 	}
+@@ -178,6 +60,7 @@
+ 	long long actual;
+ 	int err;
+ 
++	printk("%ld", htonll(size));
+   	if(stat64(file, &buf) < 0){
+ 		printk("Failed to stat backing file \"%s\", errno = %d\n",
+ 		       file, errno);
+@@ -215,118 +98,6 @@
+ 	return(0);
+ }
+ 
+-static int absolutize(char *to, int size, char *from)
+-{
+-	char save_cwd[256], *slash;
+-	int remaining;
+-
+-	if(getcwd(save_cwd, sizeof(save_cwd)) == NULL) {
+-		printk("absolutize : unable to get cwd - errno = %d\n", errno);
+-		return(-1);
+-	}
+-	slash = strrchr(from, '/');
+-	if(slash != NULL){
+-		*slash = '\0';
+-		if(chdir(from)){
+-			*slash = '/';
+-			printk("absolutize : Can't cd to '%s' - errno = %d\n",
+-			       from, errno);
+-			return(-1);
+-		}
+-		*slash = '/';
+-		if(getcwd(to, size) == NULL){
+-			printk("absolutize : unable to get cwd of '%s' - "
+-			       "errno = %d\n", from, errno);
+-			return(-1);
+-		}
+-		remaining = size - strlen(to);
+-		if(strlen(slash) + 1 > remaining){
+-			printk("absolutize : unable to fit '%s' into %d "
+-			       "chars\n", from, size);
+-			return(-1);
+-		}
+-		strcat(to, slash);
+-	}
+-	else {
+-		if(strlen(save_cwd) + 1 + strlen(from) + 1 > size){
+-			printk("absolutize : unable to fit '%s' into %d "
+-			       "chars\n", from, size);
+-			return(-1);
+-		}
+-		strcpy(to, save_cwd);
+-		strcat(to, "/");
+-		strcat(to, from);
+-	}
+-	chdir(save_cwd);
+-	return(0);
+-}
+-
+-static int write_cow_header(char *cow_file, int fd, char *backing_file, 
+-			    int sectorsize, long long *size)
+-{
+-        struct cow_header_v2 *header;
+-	struct stat64 buf;
+-	int err;
+-
+-	err = os_seek_file(fd, 0);
+-	if(err != 0){
+-		printk("write_cow_header - lseek failed, errno = %d\n", errno);
+-		return(-errno);
+-	}
+-
+-	err = -ENOMEM;
+-	header = um_kmalloc(sizeof(*header));
+-	if(header == NULL){
+-		printk("Failed to allocate COW V2 header\n");
+-		goto out;
+-	}
+-	header->magic = htonl(COW_MAGIC);
+-	header->version = htonl(COW_VERSION);
+-
+-	err = -EINVAL;
+-	if(strlen(backing_file) > sizeof(header->backing_file) - 1){
+-		printk("Backing file name \"%s\" is too long - names are "
+-		       "limited to %d characters\n", backing_file, 
+-		       sizeof(header->backing_file) - 1);
+-		goto out_free;
+-	}
+-
+-	if(absolutize(header->backing_file, sizeof(header->backing_file), 
+-		      backing_file))
+-		goto out_free;
+-
+-	err = stat64(header->backing_file, &buf);
+-	if(err < 0){
+-		printk("Stat of backing file '%s' failed, errno = %d\n",
+-		       header->backing_file, errno);
+-		err = -errno;
+-		goto out_free;
+-	}
+-
+-	err = os_file_size(header->backing_file, size);
+-	if(err){
+-		printk("Couldn't get size of backing file '%s', errno = %d\n",
+-		       header->backing_file, -*size);
+-		goto out_free;
+-	}
+-
+-	header->mtime = htonl(buf.st_mtime);
+-	header->size = htonll(*size);
+-	header->sectorsize = htonl(sectorsize);
+-
+-	err = write(fd, header, sizeof(*header));
+-	if(err != sizeof(*header)){
+-		printk("Write of header to new COW file '%s' failed, "
+-		       "errno = %d\n", cow_file, errno);
+-		goto out_free;
+-	}
+-	err = 0;
+- out_free:
+-	kfree(header);
+- out:
+-	return(err);
+-}
+-
+ int open_ubd_file(char *file, struct openflags *openflags, 
+ 		  char **backing_file_out, int *bitmap_offset_out, 
+ 		  unsigned long *bitmap_len_out, int *data_offset_out, 
+@@ -346,10 +117,17 @@
+                 if((fd = os_open_file(file, *openflags, mode)) < 0) 
+ 			return(fd);
+         }
++
++	err = os_lock_file(fd, openflags->w);
++	if(err){
++		printk("Failed to lock '%s', errno = %d\n", file, -err);
++		goto error;
++	}
++	
+ 	if(backing_file_out == NULL) return(fd);
+ 
+-	err = read_cow_header(fd, &magic, &backing_file, &mtime, &size, 
+-			      &sectorsize, bitmap_offset_out);
++	err = read_cow_header(file_reader, &fd, &magic, &backing_file, &mtime, 
++			      &size, &sectorsize, bitmap_offset_out);
+ 	if(err && (*backing_file_out != NULL)){
+ 		printk("Failed to read COW header from COW file \"%s\", "
+ 		       "errno = %d\n", file, err);
+@@ -376,12 +154,12 @@
+ 		if(err) goto error;
+ 	}
+ 
+-	sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out, 
+-	      data_offset_out);
++	cow_sizes(size, sectorsize, *bitmap_offset_out, bitmap_len_out, 
++		  data_offset_out);
+ 
+         return(fd);
+  error:
+-	close(fd);
++	os_close_file(fd);
+ 	return(err);
+ }
+ 
+@@ -389,10 +167,7 @@
+ 		    int sectorsize, int *bitmap_offset_out, 
+ 		    unsigned long *bitmap_len_out, int *data_offset_out)
+ {
+-	__u64 blocks;
+-	long zero;
+-	int err, fd, i;
+-	long long size;
++	int err, fd;
+ 
+ 	flags.c = 1;
+ 	fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
+@@ -403,29 +178,12 @@
+ 		goto out;
+ 	}
+ 
+-	err = write_cow_header(cow_file, fd, backing_file, sectorsize, &size);
+-	if(err) goto out_close;
+-
+-	blocks = (size + sectorsize - 1) / sectorsize;
+-	blocks = (blocks + sizeof(long) * 8 - 1) / (sizeof(long) * 8);
+-	zero = 0;
+-	for(i = 0; i < blocks; i++){
+-		err = write(fd, &zero, sizeof(zero));
+-		if(err != sizeof(zero)){
+-			printk("Write of bitmap to new COW file '%s' failed, "
+-			       "errno = %d\n", cow_file, errno);
+-			goto out_close;
+-		}
+-	}
+-
+-	sizes(size, sectorsize, sizeof(struct cow_header_v2), 
+-	      bitmap_len_out, data_offset_out);
+-	*bitmap_offset_out = sizeof(struct cow_header_v2);
+-
+-	return(fd);
+-
+- out_close:
+-	close(fd);
++	err = init_cow_file(fd, cow_file, backing_file, sectorsize, 
++			    bitmap_offset_out, bitmap_len_out, 
++			    data_offset_out);
++	if(!err)
++		return(fd);
++	os_close_file(fd);
+  out:
+ 	return(err);
+ }
+@@ -448,14 +206,6 @@
+ 	else return(n);
+ }
+ 
+-int ubd_is_dir(char *file)
+-{
+-	struct stat64 buf;
+-
+-	if(stat64(file, &buf) < 0) return(0);
+-	return(S_ISDIR(buf.st_mode));
+-}
+-
+ void do_io(struct io_thread_req *req)
+ {
+ 	char *buf;
+diff -Naur a/arch/um/drivers/xterm.c b/arch/um/drivers/xterm.c
+--- a/arch/um/drivers/xterm.c	Fri Aug 15 15:04:00 2003
++++ b/arch/um/drivers/xterm.c	Fri Aug 15 15:10:18 2003
+@@ -108,7 +108,7 @@
+ 	}
+ 	close(fd);
+ 
+-	fd = create_unix_socket(file, sizeof(file));
++	fd = create_unix_socket(file, sizeof(file), 1);
+ 	if(fd < 0){
+ 		printk("xterm_open : create_unix_socket failed, errno = %d\n", 
+ 		       -fd);
+diff -Naur a/arch/um/drivers/xterm_kern.c b/arch/um/drivers/xterm_kern.c
+--- a/arch/um/drivers/xterm_kern.c	Fri Aug 15 15:07:37 2003
++++ b/arch/um/drivers/xterm_kern.c	Fri Aug 15 15:13:03 2003
+@@ -5,9 +5,12 @@
+ 
+ #include "linux/errno.h"
+ #include "linux/slab.h"
++#include "linux/signal.h"
++#include "linux/interrupt.h"
+ #include "asm/semaphore.h"
+ #include "asm/irq.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ #include "kern_util.h"
+ #include "os.h"
+ #include "xterm.h"
+@@ -19,17 +22,18 @@
+ 	int new_fd;
+ };
+ 
+-static void xterm_interrupt(int irq, void *data, struct pt_regs *regs)
++static irqreturn_t xterm_interrupt(int irq, void *data, struct pt_regs *regs)
+ {
+ 	struct xterm_wait *xterm = data;
+ 	int fd;
+ 
+ 	fd = os_rcv_fd(xterm->fd, &xterm->pid);
+ 	if(fd == -EAGAIN)
+-		return;
++		return(IRQ_NONE);
+ 
+ 	xterm->new_fd = fd;
+ 	up(&xterm->sem);
++	return(IRQ_HANDLED);
+ }
+ 
+ int xterm_fd(int socket, int *pid_out)
+diff -Naur a/arch/um/dyn.lds.S b/arch/um/dyn.lds.S
+--- a/arch/um/dyn.lds.S	Fri Aug 15 15:06:20 2003
++++ b/arch/um/dyn.lds.S	Fri Aug 15 15:12:31 2003
+@@ -15,7 +15,11 @@
+   . = ALIGN(4096);		/* Init code and data */
+   _stext = .;
+   __init_begin = .;
+-  .text.init : { *(.text.init) }
++  .init.text : { 
++	_sinittext = .;
++	*(.init.text)
++	_einittext = .;
++  }
+ 
+   . = ALIGN(4096);
+ 
+@@ -67,7 +71,7 @@
+ 
+   #include "asm/common.lds.S"
+ 
+-  .data.init : { *(.data.init) }
++  init.data : { *(.init.data) }
+ 
+   /* Ensure the __preinit_array_start label is properly aligned.  We
+      could instead move the label definition inside the section, but
+diff -Naur a/arch/um/include/irq_kern.h b/arch/um/include/irq_kern.h
+--- a/arch/um/include/irq_kern.h	Wed Dec 31 19:00:00 1969
++++ b/arch/um/include/irq_kern.h	Fri Aug 15 15:11:53 2003
+@@ -0,0 +1,28 @@
++/* 
++ * Copyright (C) 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#ifndef __IRQ_KERN_H__
++#define __IRQ_KERN_H__
++
++#include "linux/interrupt.h"
++
++extern int um_request_irq(unsigned int irq, int fd, int type,
++			  irqreturn_t (*handler)(int, void *, 
++						 struct pt_regs *),
++			  unsigned long irqflags,  const char * devname,
++			  void *dev_id);
++
++#endif
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only.  This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/arch/um/include/kern_util.h b/arch/um/include/kern_util.h
+--- a/arch/um/include/kern_util.h	Fri Aug 15 15:05:04 2003
++++ b/arch/um/include/kern_util.h	Fri Aug 15 15:11:18 2003
+@@ -63,10 +63,9 @@
+ extern void *syscall_sp(void *t);
+ extern void syscall_trace(void);
+ extern int hz(void);
+-extern void idle_timer(void);
++extern void uml_idle_timer(void);
+ extern unsigned int do_IRQ(int irq, union uml_pt_regs *regs);
+ extern int external_pid(void *t);
+-extern int pid_to_processor_id(int pid);
+ extern void boot_timer_handler(int sig);
+ extern void interrupt_end(void);
+ extern void initial_thread_cb(void (*proc)(void *), void *arg);
+@@ -90,9 +89,7 @@
+ extern char *uml_strdup(char *string);
+ extern void unprotect_kernel_mem(void);
+ extern void protect_kernel_mem(void);
+-extern void set_kmem_end(unsigned long);
+ extern void uml_cleanup(void);
+-extern int pid_to_processor_id(int pid);
+ extern void set_current(void *t);
+ extern void lock_signalled_task(void *t);
+ extern void IPI_handler(int cpu);
+@@ -101,7 +98,9 @@
+ extern int clear_user_proc(void *buf, int size);
+ extern int copy_to_user_proc(void *to, void *from, int size);
+ extern int copy_from_user_proc(void *to, void *from, int size);
++extern int strlen_user_proc(char *str);
+ extern void bus_handler(int sig, union uml_pt_regs *regs);
++extern void winch(int sig, union uml_pt_regs *regs);
+ extern long execute_syscall(void *r);
+ extern int smp_sigio_handler(void);
+ extern void *get_current(void);
+diff -Naur a/arch/um/include/line.h b/arch/um/include/line.h
+--- a/arch/um/include/line.h	Fri Aug 15 15:07:40 2003
++++ b/arch/um/include/line.h	Fri Aug 15 15:13:11 2003
+@@ -9,12 +9,14 @@
+ #include "linux/list.h"
+ #include "linux/workqueue.h"
+ #include "linux/tty.h"
++#include "linux/interrupt.h"
+ #include "asm/semaphore.h"
+ #include "chan_user.h"
+ #include "mconsole_kern.h"
+ 
+ struct line_driver {
+ 	char *name;
++	char *device_name;
+ 	char *devfs_name;
+ 	short major;
+ 	short minor_start;
+@@ -67,8 +69,9 @@
+ 
+ #define LINES_INIT(n) {  num :		n }
+ 
+-extern void line_interrupt(int irq, void *data, struct pt_regs *unused);
+-extern void line_write_interrupt(int irq, void *data, struct pt_regs *unused);
++extern irqreturn_t line_interrupt(int irq, void *data, struct pt_regs *unused);
++extern irqreturn_t line_write_interrupt(int irq, void *data, 
++					struct pt_regs *unused);
+ extern void line_close(struct line *lines, struct tty_struct *tty);
+ extern int line_open(struct line *lines, struct tty_struct *tty, 
+ 		     struct chan_opts *opts);
+diff -Naur a/arch/um/include/mconsole.h b/arch/um/include/mconsole.h
+--- a/arch/um/include/mconsole.h	Fri Aug 15 15:05:26 2003
++++ b/arch/um/include/mconsole.h	Fri Aug 15 15:11:43 2003
+@@ -77,6 +77,7 @@
+ extern void mconsole_cad(struct mc_request *req);
+ extern void mconsole_stop(struct mc_request *req);
+ extern void mconsole_go(struct mc_request *req);
++extern void mconsole_log(struct mc_request *req);
+ 
+ extern int mconsole_get_request(int fd, struct mc_request *req);
+ extern int mconsole_notify(char *sock_name, int type, const void *data, 
+diff -Naur a/arch/um/include/mem.h b/arch/um/include/mem.h
+--- a/arch/um/include/mem.h	Fri Aug 15 15:09:22 2003
++++ b/arch/um/include/mem.h	Fri Aug 15 15:14:01 2003
+@@ -13,7 +13,6 @@
+ };
+ 
+ extern void set_usable_vm(unsigned long start, unsigned long end);
+-extern void set_kmem_end(unsigned long new);
+ 
+ #endif
+ 
+diff -Naur a/arch/um/include/mem_user.h b/arch/um/include/mem_user.h
+--- a/arch/um/include/mem_user.h	Fri Aug 15 15:07:31 2003
++++ b/arch/um/include/mem_user.h	Fri Aug 15 15:12:54 2003
+@@ -51,9 +51,6 @@
+ 
+ extern int init_mem_user(void);
+ extern int create_mem_file(unsigned long len);
+-extern void setup_range(int fd, char *driver, unsigned long start,
+-			unsigned long pfn, unsigned long total, int need_vm, 
+-			struct mem_region *region, void *reserved);
+ extern void setup_memory(void *entry);
+ extern unsigned long find_iomem(char *driver, unsigned long *len_out);
+ extern int init_maps(struct mem_region *region);
+diff -Naur a/arch/um/include/os.h b/arch/um/include/os.h
+--- a/arch/um/include/os.h	Fri Aug 15 15:04:50 2003
++++ b/arch/um/include/os.h	Fri Aug 15 15:10:48 2003
+@@ -103,10 +103,11 @@
+ extern int os_shutdown_socket(int fd, int r, int w);
+ extern void os_close_file(int fd);
+ extern int os_rcv_fd(int fd, int *helper_pid_out);
+-extern int create_unix_socket(char *file, int len);
++extern int create_unix_socket(char *file, int len, int close_on_exec);
+ extern int os_connect_socket(char *name);
+ extern int os_file_type(char *file);
+ extern int os_file_mode(char *file, struct openflags *mode_out);
++extern int os_lock_file(int fd, int excl);
+ 
+ extern unsigned long os_process_pc(int pid);
+ extern int os_process_parent(int pid);
+@@ -120,6 +121,7 @@
+ extern int os_protect_memory(void *addr, unsigned long len, 
+ 			     int r, int w, int x);
+ extern int os_unmap_memory(void *addr, int len);
++extern void os_flush_stdout(void);
+ 
+ #endif
+ 
+diff -Naur a/arch/um/include/sysdep-i386/sigcontext.h b/arch/um/include/sysdep-i386/sigcontext.h
+--- a/arch/um/include/sysdep-i386/sigcontext.h	Fri Aug 15 15:07:37 2003
++++ b/arch/um/include/sysdep-i386/sigcontext.h	Fri Aug 15 15:13:03 2003
+@@ -28,8 +28,8 @@
+  */
+ #define SC_START_SYSCALL(sc) do SC_EAX(sc) = -ENOSYS; while(0)
+ 
+-/* These are General Protection and Page Fault */
+-#define SEGV_IS_FIXABLE(trap) ((trap == 13) || (trap == 14))
++/* This is Page Fault */
++#define SEGV_IS_FIXABLE(trap) (trap == 14)
+ 
+ #define SC_SEGV_IS_FIXABLE(sc) (SEGV_IS_FIXABLE(SC_TRAPNO(sc)))
+ 
+diff -Naur a/arch/um/include/ubd_user.h b/arch/um/include/ubd_user.h
+--- a/arch/um/include/ubd_user.h	Fri Aug 15 15:06:34 2003
++++ b/arch/um/include/ubd_user.h	Fri Aug 15 15:12:37 2003
+@@ -39,7 +39,6 @@
+ extern int write_ubd_fs(int fd, char *buffer, int len);
+ extern int start_io_thread(unsigned long sp, int *fds_out);
+ extern void do_io(struct io_thread_req *req);
+-extern int ubd_is_dir(char *file);
+ 
+ static inline int ubd_test_bit(__u64 bit, unsigned char *data)
+ {
+diff -Naur a/arch/um/include/user.h b/arch/um/include/user.h
+--- a/arch/um/include/user.h	Fri Aug 15 15:03:58 2003
++++ b/arch/um/include/user.h	Fri Aug 15 15:10:14 2003
+@@ -14,7 +14,7 @@
+ extern void kfree(void *ptr);
+ extern int in_aton(char *str);
+ extern int open_gdb_chan(void);
+-
++extern int strlcpy(char *, const char *, int);
+ #endif
+ 
+ /*
+diff -Naur a/arch/um/include/user_util.h b/arch/um/include/user_util.h
+--- a/arch/um/include/user_util.h	Fri Aug 15 15:04:33 2003
++++ b/arch/um/include/user_util.h	Fri Aug 15 15:10:32 2003
+@@ -59,7 +59,6 @@
+ extern void *add_signal_handler(int sig, void (*handler)(int));
+ extern int start_fork_tramp(void *arg, unsigned long temp_stack, 
+ 			    int clone_flags, int (*tramp)(void *));
+-extern int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags);
+ extern int linux_main(int argc, char **argv);
+ extern void set_cmdline(char *cmd);
+ extern void input_cb(void (*proc)(void *), void *arg, int arg_len);
+@@ -90,7 +89,8 @@
+ extern int arch_fixup(unsigned long address, void *sc_ptr);
+ extern void forward_pending_sigio(int target);
+ extern int can_do_skas(void);
+- 
++extern void arch_init_thread(void);
++
+ #endif
+ 
+ /*
+diff -Naur a/arch/um/kernel/Makefile b/arch/um/kernel/Makefile
+--- a/arch/um/kernel/Makefile	Fri Aug 15 15:07:32 2003
++++ b/arch/um/kernel/Makefile	Fri Aug 15 15:12:57 2003
+@@ -19,6 +19,8 @@
+ obj-$(CONFIG_MODE_TT) += tt/
+ obj-$(CONFIG_MODE_SKAS) += skas/
+ 
++clean-files	:= config.c
++
+ user-objs-$(CONFIG_TTY_LOG) += tty_log.o
+ 
+ USER_OBJS := $(filter %_user.o,$(obj-y))  $(user-objs-y) config.o helper.o \
+@@ -43,17 +45,13 @@
+ $(obj)/frame.o: $(src)/frame.c
+ 	$(CC) $(CFLAGS_$(notdir $@)) -c -o $@ $<
+ 
+-QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; while(<STDIN>) { $$_ =~ s/CONFIG/$$config/; print $$_ }'
++QUOTE = 'my $$config=`cat $(TOPDIR)/.config`; $$config =~ s/"/\\"/g ; $$config =~ s/\n/\\n"\n"/g ; while(<STDIN>) { $$_ =~ s/CONFIG/$$config/; print $$_ }'
+ 
+ $(obj)/config.c : $(src)/config.c.in $(TOPDIR)/.config
+ 	$(PERL) -e $(QUOTE) < $(src)/config.c.in > $@
+ 
+ $(obj)/config.o : $(obj)/config.c
+ 
+-clean:
+-	rm -f config.c
+-	for dir in $(subdir-y) ; do $(MAKE) -C $$dir clean; done
+-
+ modules:
+ 
+ fastdep:
+diff -Naur a/arch/um/kernel/config.c.in b/arch/um/kernel/config.c.in
+--- a/arch/um/kernel/config.c.in	Fri Aug 15 15:07:37 2003
++++ b/arch/um/kernel/config.c.in	Fri Aug 15 15:13:03 2003
+@@ -7,9 +7,7 @@
+ #include <stdlib.h>
+ #include "init.h"
+ 
+-static __initdata char *config = "
+-CONFIG
+-";
++static __initdata char *config = "CONFIG";
+ 
+ static int __init print_config(char *line, int *add)
+ {
+diff -Naur a/arch/um/kernel/exec_kern.c b/arch/um/kernel/exec_kern.c
+--- a/arch/um/kernel/exec_kern.c	Fri Aug 15 15:04:54 2003
++++ b/arch/um/kernel/exec_kern.c	Fri Aug 15 15:11:03 2003
+@@ -32,10 +32,15 @@
+ 	CHOOSE_MODE_PROC(start_thread_tt, start_thread_skas, regs, eip, esp);
+ }
+ 
++extern void log_exec(char **argv, void *tty);
++
+ static int execve1(char *file, char **argv, char **env)
+ {
+         int error;
+ 
++#ifdef CONFIG_TTY_LOG
++	log_exec(argv, current->tty);
++#endif
+         error = do_execve(file, argv, env, &current->thread.regs);
+         if (error == 0){
+                 current->ptrace &= ~PT_DTRACE;
+diff -Naur a/arch/um/kernel/init_task.c b/arch/um/kernel/init_task.c
+--- a/arch/um/kernel/init_task.c	Fri Aug 15 15:09:24 2003
++++ b/arch/um/kernel/init_task.c	Fri Aug 15 15:14:04 2003
+@@ -17,6 +17,7 @@
+ struct mm_struct init_mm = INIT_MM(init_mm);
+ static struct files_struct init_files = INIT_FILES;
+ static struct signal_struct init_signals = INIT_SIGNALS(init_signals);
++static struct sighand_struct init_sighand = INIT_SIGHAND(init_sighand);
+ 
+ /*
+  * Initial task structure.
+@@ -38,26 +39,12 @@
+ __attribute__((__section__(".data.init_task"))) = 
+ { INIT_THREAD_INFO(init_task) };
+ 
+-struct task_struct *alloc_task_struct(void)
+-{
+-	return((struct task_struct *) 
+-	       __get_free_pages(GFP_KERNEL, CONFIG_KERNEL_STACK_ORDER));
+-}
+-
+ void unprotect_stack(unsigned long stack)
+ {
+ 	protect_memory(stack, (1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE, 
+ 		       1, 1, 0, 1);
+ }
+ 
+-void free_task_struct(struct task_struct *task)
+-{
+-	/* free_pages decrements the page counter and only actually frees
+-	 * the pages if they are now not accessed by anything.
+-	 */
+-	free_pages((unsigned long) task, CONFIG_KERNEL_STACK_ORDER);
+-}
+-
+ /*
+  * Overrides for Emacs so that we follow Linus's tabbing style.
+  * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/kernel/irq.c b/arch/um/kernel/irq.c
+--- a/arch/um/kernel/irq.c	Fri Aug 15 15:07:53 2003
++++ b/arch/um/kernel/irq.c	Fri Aug 15 15:13:18 2003
+@@ -28,6 +28,7 @@
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ 
+ static void register_irq_proc (unsigned int irq);
+ 
+@@ -82,65 +83,52 @@
+ 	end_none
+ };
+ 
+-/* Not changed */
+-volatile unsigned long irq_err_count;
+-
+ /*
+  * Generic, controller-independent functions:
+  */
+ 
+-int get_irq_list(char *buf)
++int show_interrupts(struct seq_file *p, void *v)
+ {
+ 	int i, j;
+-	unsigned long flags;
+ 	struct irqaction * action;
+-	char *p = buf;
++	unsigned long flags;
+ 
+-	p += sprintf(p, "           ");
+-	for (j=0; j<num_online_cpus(); j++)
+-		p += sprintf(p, "CPU%d       ",j);
+-	*p++ = '\n';
++	seq_printf(p, "           ");
++	for (j=0; j<NR_CPUS; j++)
++		if (cpu_online(j))
++			seq_printf(p, "CPU%d       ",j);
++	seq_putc(p, '\n');
+ 
+ 	for (i = 0 ; i < NR_IRQS ; i++) {
+ 		spin_lock_irqsave(&irq_desc[i].lock, flags);
+ 		action = irq_desc[i].action;
+ 		if (!action) 
+-			goto end;
+-		p += sprintf(p, "%3d: ",i);
++			goto skip;
++		seq_printf(p, "%3d: ",i);
+ #ifndef CONFIG_SMP
+-		p += sprintf(p, "%10u ", kstat_irqs(i));
++		seq_printf(p, "%10u ", kstat_irqs(i));
+ #else
+-		for (j = 0; j < num_online_cpus(); j++)
+-			p += sprintf(p, "%10u ",
+-				kstat_cpu(cpu_logical_map(j)).irqs[i]);
++		for (j = 0; j < NR_CPUS; j++)
++			if (cpu_online(j))
++				seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
+ #endif
+-		p += sprintf(p, " %14s", irq_desc[i].handler->typename);
+-		p += sprintf(p, "  %s", action->name);
++		seq_printf(p, " %14s", irq_desc[i].handler->typename);
++		seq_printf(p, "  %s", action->name);
+ 
+ 		for (action=action->next; action; action = action->next)
+-			p += sprintf(p, ", %s", action->name);
+-		*p++ = '\n';
+-	end:
++			seq_printf(p, ", %s", action->name);
++
++		seq_putc(p, '\n');
++skip:
+ 		spin_unlock_irqrestore(&irq_desc[i].lock, flags);
+ 	}
+-	p += sprintf(p, "\n");
+-#ifdef notdef
+-#ifdef CONFIG_SMP
+-	p += sprintf(p, "LOC: ");
+-	for (j = 0; j < num_online_cpus(); j++)
+-		p += sprintf(p, "%10u ",
+-			apic_timer_irqs[cpu_logical_map(j)]);
+-	p += sprintf(p, "\n");
+-#endif
+-#endif
+-	p += sprintf(p, "ERR: %10lu\n", irq_err_count);
+-	return p - buf;
+-}
+-
++	seq_printf(p, "NMI: ");
++	for (j = 0; j < NR_CPUS; j++)
++		if (cpu_online(j))
++			seq_printf(p, "%10u ", nmi_count(j));
++	seq_putc(p, '\n');
+ 
+-int show_interrupts(struct seq_file *p, void *v)
+-{
+-	return(0);
++	return 0;
+ }
+ 
+ /*
+@@ -281,13 +269,12 @@
+ 	 * 0 return value means that this irq is already being
+ 	 * handled by some other CPU. (or is disabled)
+ 	 */
+-	int cpu = smp_processor_id();
+ 	irq_desc_t *desc = irq_desc + irq;
+ 	struct irqaction * action;
+ 	unsigned int status;
+ 
+ 	irq_enter();
+-	kstat_cpu(cpu).irqs[irq]++;
++	kstat_this_cpu.irqs[irq]++;
+ 	spin_lock(&desc->lock);
+ 	desc->handler->ack(irq);
+ 	/*
+@@ -384,7 +371,7 @@
+  */
+  
+ int request_irq(unsigned int irq,
+-		void (*handler)(int, void *, struct pt_regs *),
++		irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ 		unsigned long irqflags, 
+ 		const char * devname,
+ 		void *dev_id)
+@@ -430,15 +417,19 @@
+ }
+ 
+ int um_request_irq(unsigned int irq, int fd, int type,
+-		   void (*handler)(int, void *, struct pt_regs *),
++		   irqreturn_t (*handler)(int, void *, struct pt_regs *),
+ 		   unsigned long irqflags, const char * devname,
+ 		   void *dev_id)
+ {
+-	int retval;
++	int err;
+ 
+-	retval = request_irq(irq, handler, irqflags, devname, dev_id);
+-	if(retval) return(retval);
+-	return(activate_fd(irq, fd, type, dev_id));
++	err = request_irq(irq, handler, irqflags, devname, dev_id);
++	if(err) 
++		return(err);
++
++	if(fd != -1)
++		err = activate_fd(irq, fd, type, dev_id);
++	return(err);
+ }
+ 
+ /* this was setup_x86_irq but it seems pretty generic */
+diff -Naur a/arch/um/kernel/mem.c b/arch/um/kernel/mem.c
+--- a/arch/um/kernel/mem.c	Fri Aug 15 15:05:20 2003
++++ b/arch/um/kernel/mem.c	Fri Aug 15 15:11:21 2003
+@@ -119,11 +119,6 @@
+ 	return(kmem_top);
+ }
+ 
+-void set_kmem_end(unsigned long new)
+-{
+-	kmem_top = new;
+-}
+-
+ #ifdef CONFIG_HIGHMEM
+ /* Changed during early boot */
+ pte_t *kmap_pte;
+@@ -218,7 +213,7 @@
+ 		if(regions[i] == NULL) break;		
+ 	}
+ 	if(i == NREGIONS){
+-		printk("setup_range : no free regions\n");
++		printk("setup_one_range : no free regions\n");
+ 		i = -1;
+ 		goto out;
+ 	}
+@@ -227,7 +222,9 @@
+ 		fd = create_mem_file(len);
+ 
+ 	if(region == NULL){
+-		region = alloc_bootmem_low_pages(sizeof(*region));
++		if(kmalloc_ok)
++			region = kmalloc(sizeof(*region), GFP_KERNEL);
++		else region = alloc_bootmem_low_pages(sizeof(*region));
+ 		if(region == NULL)
+ 			panic("Failed to allocating mem_region");
+ 	}
+@@ -528,9 +525,9 @@
+ 	return(NREGIONS);
+ }
+ 
+-void setup_range(int fd, char *driver, unsigned long start, unsigned long pfn,
+-		 unsigned long len, int need_vm, struct mem_region *region, 
+-		 void *reserved)
++static void setup_range(int fd, char *driver, unsigned long start, 
++			unsigned long pfn, unsigned long len, int need_vm, 
++			struct mem_region *region, void *reserved)
+ {
+ 	int i, cur;
+ 
+diff -Naur a/arch/um/kernel/mem_user.c b/arch/um/kernel/mem_user.c
+--- a/arch/um/kernel/mem_user.c	Fri Aug 15 15:06:25 2003
++++ b/arch/um/kernel/mem_user.c	Fri Aug 15 15:12:36 2003
+@@ -111,6 +111,11 @@
+ 		offset = 0;
+ 	}
+ 
++	if(offset >= region->len){
++		printf("%d bytes of physical memory is insufficient\n",
++		       region->len);
++		exit(1);
++	}
+ 	loc = mmap(start, region->len - offset, PROT_READ | PROT_WRITE, 
+ 		   MAP_SHARED | MAP_FIXED, region->fd, offset);
+ 	if(loc != start){
+@@ -122,26 +127,26 @@
+ 
+ static int __init parse_iomem(char *str, int *add)
+ {
+-	struct stat buf;
++	struct stat64 buf;
+ 	char *file, *driver;
+ 	int fd;
+ 
+ 	driver = str;
+ 	file = strchr(str,',');
+ 	if(file == NULL){
+-		printk("parse_iomem : failed to parse iomem\n");
++		printf("parse_iomem : failed to parse iomem\n");
+ 		return(1);
+ 	}
+ 	*file = '\0';
+ 	file++;
+ 	fd = os_open_file(file, of_rdwr(OPENFLAGS()), 0);
+ 	if(fd < 0){
+-		printk("parse_iomem - Couldn't open io file, errno = %d\n", 
++		printf("parse_iomem - Couldn't open io file, errno = %d\n", 
+ 		       errno);
+ 		return(1);
+ 	}
+-	if(fstat(fd, &buf) < 0) {
+-		printk("parse_iomem - cannot fstat file, errno = %d\n", errno);
++	if(fstat64(fd, &buf) < 0) {
++		printf("parse_iomem - cannot fstat file, errno = %d\n", errno);
+ 		return(1);
+ 	}
+ 	add_iomem(driver, fd, buf.st_size);
+diff -Naur a/arch/um/kernel/process.c b/arch/um/kernel/process.c
+--- a/arch/um/kernel/process.c	Fri Aug 15 15:08:15 2003
++++ b/arch/um/kernel/process.c	Fri Aug 15 15:13:26 2003
+@@ -72,7 +72,6 @@
+ 		    SIGUSR1, SIGIO, SIGWINCH, SIGALRM, SIGVTALRM, -1);
+ 	set_handler(SIGUSR2, (__sighandler_t) sig_handler, 
+ 		    SA_NOMASK | flags, -1);
+-	(void) CHOOSE_MODE(signal(SIGCHLD, SIG_IGN), (void *) 0);
+ 	signal(SIGHUP, SIG_IGN);
+ 
+ 	init_irq_signals(altstack);
+@@ -127,7 +126,8 @@
+ 	if(err < 0) panic("Waiting for outer trampoline failed - errno = %d", 
+ 			  errno);
+ 	if(!WIFSIGNALED(status) || (WTERMSIG(status) != SIGKILL))
+-		panic("outer trampoline didn't exit with SIGKILL");
++		panic("outer trampoline didn't exit with SIGKILL, "
++		      "status = %d", status);
+ 
+ 	return(arg.pid);
+ }
+diff -Naur a/arch/um/kernel/process_kern.c b/arch/um/kernel/process_kern.c
+--- a/arch/um/kernel/process_kern.c	Fri Aug 15 15:06:24 2003
++++ b/arch/um/kernel/process_kern.c	Fri Aug 15 15:12:35 2003
+@@ -52,17 +52,12 @@
+ 
+ struct task_struct *get_task(int pid, int require)
+ {
+-        struct task_struct *task, *ret;
++        struct task_struct *ret;
+ 
+-        ret = NULL;
+         read_lock(&tasklist_lock);
+-        for_each_process(task){
+-                if(task->pid == pid){
+-                        ret = task;
+-                        break;
+-                }
+-        }
++	ret = find_task_by_pid(pid);
+         read_unlock(&tasklist_lock);
++
+         if(require && (ret == NULL)) panic("get_task couldn't find a task\n");
+         return(ret);
+ }
+@@ -103,13 +98,14 @@
+ 
+ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
+ {
+-	struct task_struct *p;
++	int pid;
+ 
+ 	current->thread.request.u.thread.proc = fn;
+ 	current->thread.request.u.thread.arg = arg;
+-	p = do_fork(CLONE_VM | flags, 0, NULL, 0, NULL, NULL);
+-	if(IS_ERR(p)) panic("do_fork failed in kernel_thread");
+-	return(p->pid);
++	pid = do_fork(CLONE_VM | flags, 0, NULL, 0, NULL, NULL);
++	if(pid < 0)
++		panic("do_fork failed in kernel_thread, errno = %d", pid);
++	return(pid);
+ }
+ 
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next, 
+@@ -157,6 +153,10 @@
+ 	return(current);
+ }
+ 
++void prepare_to_copy(struct task_struct *tsk)
++{
++}
++
+ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
+ 		unsigned long stack_top, struct task_struct * p, 
+ 		struct pt_regs *regs)
+@@ -190,7 +190,7 @@
+ 
+ void default_idle(void)
+ {
+-	idle_timer();
++	uml_idle_timer();
+ 
+ 	atomic_inc(&init_mm.mm_count);
+ 	current->mm = &init_mm;
+@@ -363,6 +363,11 @@
+ 	return(clear_user(buf, size));
+ }
+ 
++int strlen_user_proc(char *str)
++{
++	return(strlen_user(str));
++}
++
+ int smp_sigio_handler(void)
+ {
+ #ifdef CONFIG_SMP
+diff -Naur a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
+--- a/arch/um/kernel/ptrace.c	Fri Aug 15 15:04:36 2003
++++ b/arch/um/kernel/ptrace.c	Fri Aug 15 15:10:33 2003
+@@ -311,11 +311,8 @@
+ 
+ 	/* the 0x80 provides a way for the tracing parent to distinguish
+ 	   between a syscall stop and SIGTRAP delivery */
+- 	current->exit_code = SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
+- 					? 0x80 : 0);
+-	current->state = TASK_STOPPED;
+-	notify_parent(current, SIGCHLD);
+-	schedule();
++	ptrace_notify(SIGTRAP | ((current->ptrace & PT_TRACESYSGOOD)
++				 ? 0x80 : 0));
+ 
+ 	/*
+ 	 * this isn't the same as continuing with a signal, but it will do
+diff -Naur a/arch/um/kernel/sigio_kern.c b/arch/um/kernel/sigio_kern.c
+--- a/arch/um/kernel/sigio_kern.c	Fri Aug 15 15:04:52 2003
++++ b/arch/um/kernel/sigio_kern.c	Fri Aug 15 15:10:59 2003
+@@ -6,18 +6,21 @@
+ #include "linux/kernel.h"
+ #include "linux/list.h"
+ #include "linux/slab.h"
+-#include "asm/irq.h"
++#include "linux/signal.h"
++#include "linux/interrupt.h"
+ #include "init.h"
+ #include "sigio.h"
+ #include "irq_user.h"
++#include "irq_kern.h"
+ 
+ /* Protected by sigio_lock() called from write_sigio_workaround */
+ static int sigio_irq_fd = -1;
+ 
+-void sigio_interrupt(int irq, void *data, struct pt_regs *unused)
++irqreturn_t sigio_interrupt(int irq, void *data, struct pt_regs *unused)
+ {
+ 	read_sigio_fd(sigio_irq_fd);
+ 	reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
++	return(IRQ_HANDLED);
+ }
+ 
+ int write_sigio_irq(int fd)
+diff -Naur a/arch/um/kernel/signal_kern.c b/arch/um/kernel/signal_kern.c
+--- a/arch/um/kernel/signal_kern.c	Fri Aug 15 15:06:38 2003
++++ b/arch/um/kernel/signal_kern.c	Fri Aug 15 15:12:40 2003
+@@ -36,7 +36,7 @@
+ 	if(sig == SIGSEGV){
+ 		struct k_sigaction *ka;
+ 
+-		ka = &current->sig->action[SIGSEGV - 1];
++		ka = &current->sighand->action[SIGSEGV - 1];
+ 		ka->sa.sa_handler = SIG_DFL;
+ 	}
+ 	force_sig(SIGSEGV, current);
+@@ -142,7 +142,7 @@
+ 		return(0);
+ 
+ 	/* Whee!  Actually deliver the signal.  */
+-	ka = &current->sig->action[sig -1 ];
++	ka = &current->sighand->action[sig -1 ];
+ 	err = handle_signal(regs, sig, ka, &info, oldset, error);
+ 	if(!err) return(1);
+ 
+@@ -201,7 +201,7 @@
+ 	}
+ }
+ 
+-int sys_rt_sigsuspend(sigset_t *unewset, size_t sigsetsize)
++int sys_rt_sigsuspend(sigset_t __user *unewset, size_t sigsetsize)
+ {
+ 	sigset_t saveset, newset;
+ 
+@@ -227,6 +227,42 @@
+ 	}
+ }
+ 
++int sys_sigaction(int sig, const struct old_sigaction __user *act,
++			 struct old_sigaction __user *oact)
++{
++	struct k_sigaction new_ka, old_ka;
++	int ret;
++
++	if (act) {
++		old_sigset_t mask;
++		if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
++		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
++		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
++			return -EFAULT;
++		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
++		__get_user(mask, &act->sa_mask);
++		siginitset(&new_ka.sa.sa_mask, mask);
++	}
++
++	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
++
++	if (!ret && oact) {
++		if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
++		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
++		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
++			return -EFAULT;
++		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
++		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
++	}
++
++	return ret;
++}
++
++int sys_sigaltstack(const stack_t *uss, stack_t *uoss)
++{
++	return(do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs)));
++}
++
+ static int copy_sc_from_user(struct pt_regs *to, void *from, 
+ 			     struct arch_frame_data *arch)
+ {
+@@ -239,8 +275,8 @@
+ 
+ int sys_sigreturn(struct pt_regs regs)
+ {
+-	void *sc = sp_to_sc(PT_REGS_SP(&current->thread.regs));
+-	void *mask = sp_to_mask(PT_REGS_SP(&current->thread.regs));
++	void __user *sc = sp_to_sc(PT_REGS_SP(&current->thread.regs));
++	void __user *mask = sp_to_mask(PT_REGS_SP(&current->thread.regs));
+ 	int sig_size = (_NSIG_WORDS - 1) * sizeof(unsigned long);
+ 
+ 	spin_lock_irq(&current->sighand->siglock);
+@@ -257,7 +293,8 @@
+ 
+ int sys_rt_sigreturn(struct pt_regs regs)
+ {
+-	struct ucontext *uc = sp_to_uc(PT_REGS_SP(&current->thread.regs));
++	unsigned long sp = PT_REGS_SP(&current->thread.regs);
++	struct ucontext __user *uc = sp_to_uc(sp);
+ 	void *fp;
+ 	int sig_size = _NSIG_WORDS * sizeof(unsigned long);
+ 
+diff -Naur a/arch/um/kernel/skas/Makefile b/arch/um/kernel/skas/Makefile
+--- a/arch/um/kernel/skas/Makefile	Fri Aug 15 15:05:00 2003
++++ b/arch/um/kernel/skas/Makefile	Fri Aug 15 15:11:08 2003
+@@ -7,18 +7,22 @@
+ 	process_kern.o syscall_kern.o syscall_user.o time.o tlb.o trap_user.o \
+ 	sys-$(SUBARCH)/
+ 
++host-progs	:= util/mk_ptregs
++clean-files	:= include/skas_ptregs.h
++
+ USER_OBJS = $(filter %_user.o,$(obj-y)) process.o time.o
+ USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+ 
+-include/skas_ptregs.h : util/mk_ptregs
+-	util/mk_ptregs > $@
+-
+-util/mk_ptregs :
+-	$(MAKE) -C util
++$(TOPDIR)/arch/um/include/skas_ptregs.h : $(src)/util/mk_ptregs
++	@echo -n '  Generating $@'
++	@$< > $@.tmp
++	@if [ -r $@ ] && cmp -s $@ $@.tmp; then \
++		echo ' (unchanged)'; \
++		rm -f $@.tmp; \
++	else \
++		echo ' (updated)'; \
++		mv -f $@.tmp $@; \
++	fi
+ 
+ $(USER_OBJS) : %.o: %.c
+ 	$(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
+-
+-clean :
+-	$(MAKE) -C util clean
+-	$(RM) -f include/skas_ptregs.h
+diff -Naur a/arch/um/kernel/skas/include/mode.h b/arch/um/kernel/skas/include/mode.h
+--- a/arch/um/kernel/skas/include/mode.h	Fri Aug 15 15:06:34 2003
++++ b/arch/um/kernel/skas/include/mode.h	Fri Aug 15 15:12:37 2003
+@@ -20,6 +20,7 @@
+ extern void halt_skas(void);
+ extern void reboot_skas(void);
+ extern void kill_off_processes_skas(void);
++extern int is_skas_winch(int pid, int fd, void *data);
+ 
+ #endif
+ 
+diff -Naur a/arch/um/kernel/skas/include/uaccess.h b/arch/um/kernel/skas/include/uaccess.h
+--- a/arch/um/kernel/skas/include/uaccess.h	Fri Aug 15 15:05:28 2003
++++ b/arch/um/kernel/skas/include/uaccess.h	Fri Aug 15 15:11:44 2003
+@@ -19,7 +19,7 @@
+ #define access_ok_skas(type, addr, size) \
+ 	((segment_eq(get_fs(), KERNEL_DS)) || \
+ 	 (((unsigned long) (addr) < TASK_SIZE) && \
+-	  ((unsigned long) (addr) + (size) < TASK_SIZE)))
++	  ((unsigned long) (addr) + (size) <= TASK_SIZE)))
+ 
+ static inline int verify_area_skas(int type, const void * addr, 
+ 				   unsigned long size)
+diff -Naur a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c
+--- a/arch/um/kernel/skas/process.c	Fri Aug 15 15:08:54 2003
++++ b/arch/um/kernel/skas/process.c	Fri Aug 15 15:13:46 2003
+@@ -4,6 +4,7 @@
+  */
+ 
+ #include <stdlib.h>
++#include <unistd.h>
+ #include <errno.h>
+ #include <signal.h>
+ #include <setjmp.h>
+@@ -24,6 +25,16 @@
+ #include "os.h"
+ #include "proc_mm.h"
+ #include "skas_ptrace.h"
++#include "chan_user.h"
++
++int is_skas_winch(int pid, int fd, void *data)
++{
++	if(pid != getpid())
++		return(0);
++
++	register_winch_irq(-1, fd, -1, data);
++	return(1);
++}
+ 
+ unsigned long exec_regs[FRAME_SIZE];
+ unsigned long exec_fp_regs[HOST_FP_SIZE];
+@@ -72,8 +83,6 @@
+ 	handle_syscall(regs);
+ }
+ 
+-int userspace_pid;
+-
+ static int userspace_tramp(void *arg)
+ {
+ 	init_new_thread_signals(0);
+@@ -83,6 +92,8 @@
+ 	return(0);
+ }
+ 
++int userspace_pid;
++
+ void start_userspace(void)
+ {
+ 	void *stack;
+@@ -149,6 +160,7 @@
+ 			case SIGILL:
+ 			case SIGBUS:
+ 			case SIGFPE:
++			case SIGWINCH:
+ 				user_signal(WSTOPSIG(status), regs);
+ 				break;
+ 			default:
+@@ -328,7 +340,8 @@
+ int new_mm(int from)
+ {
+ 	struct proc_mm_op copy;
+-	int n, fd = os_open_file("/proc/mm", of_write(OPENFLAGS()), 0);
++	int n, fd = os_open_file("/proc/mm", 
++				 of_cloexec(of_write(OPENFLAGS())), 0);
+ 
+ 	if(fd < 0)
+ 		return(-errno);
+@@ -342,6 +355,7 @@
+ 			printk("new_mm : /proc/mm copy_segments failed, "
+ 			       "errno = %d\n", errno);
+ 	}
++
+ 	return(fd);
+ }
+ 
+diff -Naur a/arch/um/kernel/skas/process_kern.c b/arch/um/kernel/skas/process_kern.c
+--- a/arch/um/kernel/skas/process_kern.c	Fri Aug 15 15:04:51 2003
++++ b/arch/um/kernel/skas/process_kern.c	Fri Aug 15 15:10:56 2003
+@@ -61,9 +61,8 @@
+ 	thread_wait(&current->thread.mode.skas.switch_buf, 
+ 		    current->thread.mode.skas.fork_buf);
+ 
+-#ifdef CONFIG_SMP
+-	schedule_tail(NULL);
+-#endif
++	if(current->thread.prev_sched != NULL)
++		schedule_tail(current->thread.prev_sched);
+ 	current->thread.prev_sched = NULL;
+ 
+ 	n = run_kernel_thread(fn, arg, &current->thread.exec_buf);
+@@ -93,9 +92,8 @@
+ 		    current->thread.mode.skas.fork_buf);
+   	
+ 	force_flush_all();
+-#ifdef CONFIG_SMP
+-	schedule_tail(current->thread.prev_sched);
+-#endif
++	if(current->thread.prev_sched != NULL)
++		schedule_tail(current->thread.prev_sched);
+ 	current->thread.prev_sched = NULL;
+ 	unblock_signals();
+ 
+@@ -164,7 +162,7 @@
+ 	capture_signal_stack();
+ 
+ 	init_new_thread_signals(1);
+-	idle_timer();
++	uml_idle_timer();
+ 
+ 	init_task.thread.request.u.thread.proc = start_kernel_proc;
+ 	init_task.thread.request.u.thread.arg = NULL;
+diff -Naur a/arch/um/kernel/skas/util/mk_ptregs.c b/arch/um/kernel/skas/util/mk_ptregs.c
+--- a/arch/um/kernel/skas/util/mk_ptregs.c	Fri Aug 15 15:05:20 2003
++++ b/arch/um/kernel/skas/util/mk_ptregs.c	Fri Aug 15 15:11:21 2003
+@@ -1,3 +1,4 @@
++#include <stdio.h>
+ #include <asm/ptrace.h>
+ #include <asm/user.h>
+ 
+diff -Naur a/arch/um/kernel/smp.c b/arch/um/kernel/smp.c
+--- a/arch/um/kernel/smp.c	Fri Aug 15 15:04:50 2003
++++ b/arch/um/kernel/smp.c	Fri Aug 15 15:10:52 2003
+@@ -140,8 +140,10 @@
+ 
+         current->thread.request.u.thread.proc = idle_proc;
+         current->thread.request.u.thread.arg = (void *) cpu;
+-	new_task = do_fork(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, NULL);
+-	if(IS_ERR(new_task)) panic("do_fork failed in idle_thread");
++	new_task = copy_process(CLONE_VM | CLONE_IDLETASK, 0, NULL, 0, NULL, 
++				NULL);
++	if(IS_ERR(new_task)) 
++		panic("copy_process failed in idle_thread");
+ 
+ 	cpu_tasks[cpu] = ((struct cpu_task) 
+ 		          { .pid = 	new_task->thread.mode.tt.extern_pid,
+@@ -150,6 +152,7 @@
+ 	CHOOSE_MODE(write(new_task->thread.mode.tt.switch_pipe[1], &c, 
+ 			  sizeof(c)),
+ 		    ({ panic("skas mode doesn't support SMP"); }));
++	wake_up_forked_process(new_task);
+ 	return(new_task);
+ }
+ 
+@@ -254,15 +257,19 @@
+ 	atomic_inc(&scf_finished);
+ }
+ 
+-int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, 
+-		      int wait)
++int smp_call_function_on_cpu(void (*_func)(void *info), void *_info, int wait,
++				unsigned long mask)
+ {
+-	int cpus = num_online_cpus() - 1;
+-	int i;
+-
+-	if (!cpus)
+-		return 0;
++	int i, cpu, num_cpus;
+ 
++	cpu = get_cpu();
++	mask &= ~(1UL << cpu);
++	num_cpus = hweight32(mask);
++	if(num_cpus == 0){
++		put_cpu_no_resched();
++		return(0);
++	}
++	
+ 	spin_lock_bh(&call_lock);
+ 	atomic_set(&scf_started, 0);
+ 	atomic_set(&scf_finished, 0);
+@@ -270,19 +277,25 @@
+ 	info = _info;
+ 
+ 	for (i=0;i<NR_CPUS;i++)
+-		if((i != current->thread_info->cpu) && 
+-		   test_bit(i, &cpu_online_map))
++		if(cpu_online(i) && ((1UL << i) & mask))
+ 			write(cpu_data[i].ipi_pipe[1], "C", 1);
+ 
+-	while (atomic_read(&scf_started) != cpus)
++	while(atomic_read(&scf_started) != num_cpus)
+ 		barrier();
+ 
+-	if (wait)
+-		while (atomic_read(&scf_finished) != cpus)
++	if(wait)
++		while(atomic_read(&scf_finished) != num_cpus)
+ 			barrier();
+ 
+ 	spin_unlock_bh(&call_lock);
+-	return 0;
++	put_cpu_no_resched();
++	return(0);
++}
++
++int smp_call_function(void (*_func)(void *info), void *_info, int nonatomic, 
++		      int wait)
++{
++	return(smp_call_function_on_cpu(_func, _info, wait, cpu_online_map));
+ }
+ 
+ #endif
+diff -Naur a/arch/um/kernel/sys_call_table.c b/arch/um/kernel/sys_call_table.c
+--- a/arch/um/kernel/sys_call_table.c	Fri Aug 15 15:07:57 2003
++++ b/arch/um/kernel/sys_call_table.c	Fri Aug 15 15:13:24 2003
+@@ -219,6 +219,18 @@
+ extern syscall_handler_t sys_gettid;
+ extern syscall_handler_t sys_readahead;
+ extern syscall_handler_t sys_tkill;
++extern syscall_handler_t sys_setxattr;
++extern syscall_handler_t sys_lsetxattr;
++extern syscall_handler_t sys_fsetxattr;
++extern syscall_handler_t sys_getxattr;
++extern syscall_handler_t sys_lgetxattr;
++extern syscall_handler_t sys_fgetxattr;
++extern syscall_handler_t sys_listxattr;
++extern syscall_handler_t sys_llistxattr;
++extern syscall_handler_t sys_flistxattr;
++extern syscall_handler_t sys_removexattr;
++extern syscall_handler_t sys_lremovexattr;
++extern syscall_handler_t sys_fremovexattr;
+ extern syscall_handler_t sys_sendfile64;
+ extern syscall_handler_t sys_futex;
+ extern syscall_handler_t sys_sched_setaffinity;
+@@ -235,6 +247,19 @@
+ extern syscall_handler_t sys_epoll_wait;
+ extern syscall_handler_t sys_remap_file_pages;
+ extern syscall_handler_t sys_set_tid_address;
++extern syscall_handler_t sys_timer_create;
++extern syscall_handler_t sys_timer_settime;
++extern syscall_handler_t sys_timer_gettime;
++extern syscall_handler_t sys_timer_getoverrun;
++extern syscall_handler_t sys_timer_delete;
++extern syscall_handler_t sys_clock_settime;
++extern syscall_handler_t sys_clock_gettime;
++extern syscall_handler_t sys_clock_getres;
++extern syscall_handler_t sys_clock_nanosleep;
++extern syscall_handler_t sys_statfs64;
++extern syscall_handler_t sys_fstatfs64;
++extern syscall_handler_t sys_tgkill;
++extern syscall_handler_t sys_utimes;
+ 
+ #ifdef CONFIG_NFSD
+ #define NFSSERVCTL sys_nfsservctl
+@@ -459,18 +484,18 @@
+ 	[ __NR_getdents64 ] = sys_getdents64,
+ 	[ __NR_gettid ] = sys_gettid,
+ 	[ __NR_readahead ] = sys_readahead,
+-	[ __NR_setxattr ] = sys_ni_syscall,
+-	[ __NR_lsetxattr ] = sys_ni_syscall,
+-	[ __NR_fsetxattr ] = sys_ni_syscall,
+-	[ __NR_getxattr ] = sys_ni_syscall,
+-	[ __NR_lgetxattr ] = sys_ni_syscall,
+-	[ __NR_fgetxattr ] = sys_ni_syscall,
+-	[ __NR_listxattr ] = sys_ni_syscall,
+-	[ __NR_llistxattr ] = sys_ni_syscall,
+-	[ __NR_flistxattr ] = sys_ni_syscall,
+-	[ __NR_removexattr ] = sys_ni_syscall,
+-	[ __NR_lremovexattr ] = sys_ni_syscall,
+-	[ __NR_fremovexattr ] = sys_ni_syscall,
++	[ __NR_setxattr ] = sys_setxattr,
++	[ __NR_lsetxattr ] = sys_lsetxattr,
++	[ __NR_fsetxattr ] = sys_fsetxattr,
++	[ __NR_getxattr ] = sys_getxattr,
++	[ __NR_lgetxattr ] = sys_lgetxattr,
++	[ __NR_fgetxattr ] = sys_fgetxattr,
++	[ __NR_listxattr ] = sys_listxattr,
++	[ __NR_llistxattr ] = sys_llistxattr,
++	[ __NR_flistxattr ] = sys_flistxattr,
++	[ __NR_removexattr ] = sys_removexattr,
++	[ __NR_lremovexattr ] = sys_lremovexattr,
++	[ __NR_fremovexattr ] = sys_fremovexattr,
+ 	[ __NR_tkill ] = sys_tkill,
+ 	[ __NR_sendfile64 ] = sys_sendfile64,
+ 	[ __NR_futex ] = sys_futex,
+@@ -488,6 +513,19 @@
+ 	[ __NR_epoll_wait ] = sys_epoll_wait,
+         [ __NR_remap_file_pages ] = sys_remap_file_pages,
+         [ __NR_set_tid_address ] = sys_set_tid_address,
++	[ __NR_timer_create ] = sys_timer_create,
++	[ __NR_timer_settime ] = sys_timer_settime,
++	[ __NR_timer_gettime ] = sys_timer_gettime,
++	[ __NR_timer_getoverrun ] = sys_timer_getoverrun,
++	[ __NR_timer_delete ] = sys_timer_delete,
++	[ __NR_clock_settime ] = sys_clock_settime,
++	[ __NR_clock_gettime ] = sys_clock_gettime,
++	[ __NR_clock_getres ] = sys_clock_getres,
++	[ __NR_clock_nanosleep ] = sys_clock_nanosleep,
++	[ __NR_statfs64 ] = sys_statfs64,
++	[ __NR_fstatfs64 ] = sys_fstatfs64,
++	[ __NR_tgkill ] = sys_tgkill,
++	[ __NR_utimes ] = sys_utimes,
+ 
+ 	ARCH_SYSCALLS
+ 	[ LAST_SYSCALL + 1 ... NR_syscalls ] = 
+diff -Naur a/arch/um/kernel/syscall_kern.c b/arch/um/kernel/syscall_kern.c
+--- a/arch/um/kernel/syscall_kern.c	Fri Aug 15 15:07:37 2003
++++ b/arch/um/kernel/syscall_kern.c	Fri Aug 15 15:13:03 2003
+@@ -35,39 +35,40 @@
+ 
+ long sys_fork(void)
+ {
+-	struct task_struct *p;
++	long ret;
+ 
+ 	current->thread.forking = 1;
+-        p = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL);
++        ret = do_fork(SIGCHLD, 0, NULL, 0, NULL, NULL);
+ 	current->thread.forking = 0;
+-	return(IS_ERR(p) ? PTR_ERR(p) : p->pid);
++	return(ret);
+ }
+ 
+-long sys_clone(unsigned long clone_flags, unsigned long newsp)
++long sys_clone(unsigned long clone_flags, unsigned long newsp, 
++	       int *parent_tid, int *child_tid)
+ {
+-	struct task_struct *p;
++	long ret;
+ 
+ 	current->thread.forking = 1;
+-	p = do_fork(clone_flags, newsp, NULL, 0, NULL, NULL);
++	ret = do_fork(clone_flags, newsp, NULL, 0, parent_tid, child_tid);
+ 	current->thread.forking = 0;
+-	return(IS_ERR(p) ? PTR_ERR(p) : p->pid);
++	return(ret);
+ }
+ 
+ long sys_vfork(void)
+ {
+-	struct task_struct *p;
++	long ret;
+ 
+ 	current->thread.forking = 1;
+-	p = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, NULL);
++	ret = do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, NULL, 0, NULL, 
++		      NULL);
+ 	current->thread.forking = 0;
+-	return(IS_ERR(p) ? PTR_ERR(p) : p->pid);
++	return(ret);
+ }
+ 
+ /* common code for old and new mmaps */
+-static inline long do_mmap2(
+-	unsigned long addr, unsigned long len,
+-	unsigned long prot, unsigned long flags,
+-	unsigned long fd, unsigned long pgoff)
++long do_mmap2(struct mm_struct *mm, unsigned long addr, unsigned long len,
++	      unsigned long prot, unsigned long flags, unsigned long fd,
++	      unsigned long pgoff)
+ {
+ 	int error = -EBADF;
+ 	struct file * file = NULL;
+@@ -79,9 +80,9 @@
+ 			goto out;
+ 	}
+ 
+-	down_write(&current->mm->mmap_sem);
+-	error = do_mmap_pgoff(file, addr, len, prot, flags, pgoff);
+-	up_write(&current->mm->mmap_sem);
++	down_write(&mm->mmap_sem);
++	error = do_mmap_pgoff(mm, file, addr, len, prot, flags, pgoff);
++	up_write(&mm->mmap_sem);
+ 
+ 	if (file)
+ 		fput(file);
+@@ -93,7 +94,7 @@
+ 	       unsigned long prot, unsigned long flags,
+ 	       unsigned long fd, unsigned long pgoff)
+ {
+-	return do_mmap2(addr, len, prot, flags, fd, pgoff);
++	return do_mmap2(current->mm, addr, len, prot, flags, fd, pgoff);
+ }
+ 
+ /*
+@@ -120,7 +121,8 @@
+ 	if (offset & ~PAGE_MASK)
+ 		goto out;
+ 
+-	err = do_mmap2(addr, len, prot, flags, fd, offset >> PAGE_SHIFT);
++	err = do_mmap2(current->mm, addr, len, prot, flags, fd, 
++		       offset >> PAGE_SHIFT);
+  out:
+ 	return err;
+ }
+@@ -141,37 +143,6 @@
+         return error;
+ }
+ 
+-int sys_sigaction(int sig, const struct old_sigaction *act,
+-			 struct old_sigaction *oact)
+-{
+-	struct k_sigaction new_ka, old_ka;
+-	int ret;
+-
+-	if (act) {
+-		old_sigset_t mask;
+-		if (verify_area(VERIFY_READ, act, sizeof(*act)) ||
+-		    __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
+-		    __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
+-			return -EFAULT;
+-		__get_user(new_ka.sa.sa_flags, &act->sa_flags);
+-		__get_user(mask, &act->sa_mask);
+-		siginitset(&new_ka.sa.sa_mask, mask);
+-	}
+-
+-	ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
+-
+-	if (!ret && oact) {
+-		if (verify_area(VERIFY_WRITE, oact, sizeof(*oact)) ||
+-		    __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
+-		    __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
+-			return -EFAULT;
+-		__put_user(old_ka.sa.sa_flags, &oact->sa_flags);
+-		__put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
+-	}
+-
+-	return ret;
+-}
+-
+ /*
+  * sys_ipc() is the de-multiplexer for the SysV IPC calls..
+  *
+@@ -253,7 +224,7 @@
+ 		return sys_shmctl (first, second,
+ 				   (struct shmid_ds *) ptr);
+ 	default:
+-		return -EINVAL;
++		return -ENOSYS;
+ 	}
+ }
+ 
+@@ -302,11 +273,6 @@
+ 	return error;
+ }
+ 
+-int sys_sigaltstack(const stack_t *uss, stack_t *uoss)
+-{
+-	return(do_sigaltstack(uss, uoss, PT_REGS_SP(&current->thread.regs)));
+-}
+-
+ long execute_syscall(void *r)
+ {
+ 	return(CHOOSE_MODE_PROC(execute_syscall_tt, execute_syscall_skas, r));
+diff -Naur a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c
+--- a/arch/um/kernel/sysrq.c	Fri Aug 15 15:05:01 2003
++++ b/arch/um/kernel/sysrq.c	Fri Aug 15 15:11:13 2003
+@@ -11,6 +11,14 @@
+ #include "sysrq.h"
+ #include "user_util.h"
+ 
++void show_stack(struct task_struct *task, unsigned long *sp)
++{
++	if(task)
++		show_trace_task(task);
++	else
++		show_trace(sp);
++}
++
+ void show_trace(unsigned long * stack)
+ {
+         int i;
+diff -Naur a/arch/um/kernel/time.c b/arch/um/kernel/time.c
+--- a/arch/um/kernel/time.c	Fri Aug 15 15:04:49 2003
++++ b/arch/um/kernel/time.c	Fri Aug 15 15:10:46 2003
+@@ -15,12 +15,16 @@
+ #include "process.h"
+ #include "signal_user.h"
+ #include "time_user.h"
++#include "kern_constants.h"
+ 
+ extern struct timeval xtime;
+ 
++struct timeval local_offset = { 0, 0 };
++
+ void timer(void)
+ {
+ 	gettimeofday(&xtime, NULL);
++	timeradd(&xtime, &local_offset, &xtime);
+ }
+ 
+ void set_interval(int timer_type)
+@@ -65,7 +69,7 @@
+ 		       errno);
+ }
+ 
+-void idle_timer(void)
++void uml_idle_timer(void)
+ {
+ 	if(signal(SIGVTALRM, SIG_IGN) == SIG_ERR)
+ 		panic("Couldn't unset SIGVTALRM handler");
+@@ -82,8 +86,6 @@
+ 	set_interval(ITIMER_VIRTUAL);
+ }
+ 
+-struct timeval local_offset = { 0, 0 };
+-
+ void do_gettimeofday(struct timeval *tv)
+ {
+ 	unsigned long flags;
+@@ -100,7 +102,7 @@
+ 	unsigned long flags;
+ 	struct timeval tv_in;
+ 
+-	if ((unsigned long)tv->tv_nsec >= NSEC_PER_SEC)
++	if ((unsigned long) tv->tv_nsec >= UM_NSEC_PER_SEC)
+ 		return -EINVAL;
+ 
+ 	tv_in.tv_sec = tv->tv_sec;
+@@ -110,6 +112,8 @@
+ 	gettimeofday(&now, NULL);
+ 	timersub(&tv_in, &now, &local_offset);
+ 	time_unlock(flags);
++
++	return(0);
+ }
+ 
+ void idle_sleep(int secs)
+diff -Naur a/arch/um/kernel/time_kern.c b/arch/um/kernel/time_kern.c
+--- a/arch/um/kernel/time_kern.c	Fri Aug 15 15:07:19 2003
++++ b/arch/um/kernel/time_kern.c	Fri Aug 15 15:12:46 2003
+@@ -55,12 +55,13 @@
+ 	do_timer(&regs);
+ }
+ 
+-void um_timer(int irq, void *dev, struct pt_regs *regs)
++irqreturn_t um_timer(int irq, void *dev, struct pt_regs *regs)
+ {
+ 	do_timer(regs);
+-	write_seqlock(&xtime_lock);
++	write_seqlock_irq(&xtime_lock);
+ 	timer();
+-	write_sequnlock(&xtime_lock);
++	write_sequnlock_irq(&xtime_lock);
++	return(IRQ_HANDLED);
+ }
+ 
+ long um_time(int * tloc)
+@@ -78,12 +79,12 @@
+ long um_stime(int * tptr)
+ {
+ 	int value;
+-	struct timeval new;
++	struct timespec new;
+ 
+ 	if (get_user(value, tptr))
+                 return -EFAULT;
+ 	new.tv_sec = value;
+-	new.tv_usec = 0;
++	new.tv_nsec = 0;
+ 	do_settimeofday(&new);
+ 	return 0;
+ }
+@@ -122,7 +123,9 @@
+ void timer_handler(int sig, union uml_pt_regs *regs)
+ {
+ #ifdef CONFIG_SMP
++	local_irq_disable();
+ 	update_process_times(user_context(UPT_SP(regs)));
++	local_irq_enable();
+ #endif
+ 	if(current->thread_info->cpu == 0)
+ 		timer_irq(regs);
+diff -Naur a/arch/um/kernel/trap_kern.c b/arch/um/kernel/trap_kern.c
+--- a/arch/um/kernel/trap_kern.c	Fri Aug 15 15:04:01 2003
++++ b/arch/um/kernel/trap_kern.c	Fri Aug 15 15:10:18 2003
+@@ -16,6 +16,7 @@
+ #include "asm/tlbflush.h"
+ #include "asm/a.out.h"
+ #include "asm/current.h"
++#include "asm/irq.h"
+ #include "user_util.h"
+ #include "kern_util.h"
+ #include "kern.h"
+@@ -180,6 +181,11 @@
+ 	else relay_signal(sig, regs);
+ }
+ 
++void winch(int sig, union uml_pt_regs *regs)
++{
++	do_IRQ(WINCH_IRQ, regs);
++}
++
+ void trap_init(void)
+ {
+ }
+diff -Naur a/arch/um/kernel/trap_user.c b/arch/um/kernel/trap_user.c
+--- a/arch/um/kernel/trap_user.c	Fri Aug 15 15:05:45 2003
++++ b/arch/um/kernel/trap_user.c	Fri Aug 15 15:11:52 2003
+@@ -82,6 +82,8 @@
+ 		     .is_irq 		= 0 },
+ 	[ SIGILL ] { .handler 		= relay_signal,
+ 		     .is_irq 		= 0 },
++	[ SIGWINCH ] { .handler		= winch,
++		       .is_irq		= 1 },
+ 	[ SIGBUS ] { .handler 		= bus_handler,
+ 		     .is_irq 		= 0 },
+ 	[ SIGSEGV] { .handler 		= segv_handler,
+diff -Naur a/arch/um/kernel/tt/include/uaccess.h b/arch/um/kernel/tt/include/uaccess.h
+--- a/arch/um/kernel/tt/include/uaccess.h	Fri Aug 15 15:07:25 2003
++++ b/arch/um/kernel/tt/include/uaccess.h	Fri Aug 15 15:12:52 2003
+@@ -46,18 +46,20 @@
+ 
+ static inline int copy_from_user_tt(void *to, const void *from, int n)
+ {
+-	return(access_ok_tt(VERIFY_READ, from, n) ?
+-	       __do_copy_from_user(to, from, n, 
+-				   &current->thread.fault_addr,
+-				   &current->thread.fault_catcher) : n);
++	if(!access_ok_tt(VERIFY_READ, from, n)) 
++		return(n);
++
++	return(__do_copy_from_user(to, from, n, &current->thread.fault_addr,
++				   &current->thread.fault_catcher));
+ }
+ 
+ static inline int copy_to_user_tt(void *to, const void *from, int n)
+ {
+-	return(access_ok_tt(VERIFY_WRITE, to, n) ?
+-	       __do_copy_to_user(to, from, n, 
+-				   &current->thread.fault_addr,
+-				   &current->thread.fault_catcher) : n);
++	if(!access_ok_tt(VERIFY_WRITE, to, n))
++		return(n);
++		
++	return(__do_copy_to_user(to, from, n, &current->thread.fault_addr,
++				 &current->thread.fault_catcher));
+ }
+ 
+ extern int __do_strncpy_from_user(char *dst, const char *src, size_t n,
+@@ -67,7 +69,9 @@
+ {
+ 	int n;
+ 
+-	if(!access_ok_tt(VERIFY_READ, src, 1)) return(-EFAULT);
++	if(!access_ok_tt(VERIFY_READ, src, 1)) 
++		return(-EFAULT);
++
+ 	n = __do_strncpy_from_user(dst, src, count, 
+ 				   &current->thread.fault_addr,
+ 				   &current->thread.fault_catcher);
+@@ -87,10 +91,11 @@
+ 
+ static inline int clear_user_tt(void *mem, int len)
+ {
+-	return(access_ok_tt(VERIFY_WRITE, mem, len) ? 
+-	       __do_clear_user(mem, len, 
+-			       &current->thread.fault_addr,
+-			       &current->thread.fault_catcher) : len);
++	if(!access_ok_tt(VERIFY_WRITE, mem, len))
++		return(len);
++
++	return(__do_clear_user(mem, len, &current->thread.fault_addr,
++			       &current->thread.fault_catcher));
+ }
+ 
+ extern int __do_strnlen_user(const char *str, unsigned long n,
+diff -Naur a/arch/um/kernel/tt/process_kern.c b/arch/um/kernel/tt/process_kern.c
+--- a/arch/um/kernel/tt/process_kern.c	Fri Aug 15 15:07:55 2003
++++ b/arch/um/kernel/tt/process_kern.c	Fri Aug 15 15:13:23 2003
+@@ -104,7 +104,10 @@
+ 
+ void release_thread_tt(struct task_struct *task)
+ {
+-	os_kill_process(task->thread.mode.tt.extern_pid, 0);
++	int pid = task->thread.mode.tt.extern_pid;
++
++	if(os_getpid() != pid)
++		os_kill_process(pid, 0);
+ }
+ 
+ void exit_thread_tt(void)
+@@ -125,27 +128,27 @@
+ 	UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
+ 	suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
+ 
+-	block_signals();
++	force_flush_all();
++	if(current->thread.prev_sched != NULL)
++		schedule_tail(current->thread.prev_sched);
++	current->thread.prev_sched = NULL;
++
+ 	init_new_thread_signals(1);
+-#ifdef CONFIG_SMP
+-	schedule_tail(current->thread.prev_sched);
+-#endif
+ 	enable_timer();
+ 	free_page(current->thread.temp_stack);
+ 	set_cmdline("(kernel thread)");
+-	force_flush_all();
+ 
+-	current->thread.prev_sched = NULL;
+ 	change_sig(SIGUSR1, 1);
+ 	change_sig(SIGVTALRM, 1);
+ 	change_sig(SIGPROF, 1);
+-	unblock_signals();
++	local_irq_enable();
+ 	if(!run_kernel_thread(fn, arg, &current->thread.exec_buf))
+ 		do_exit(0);
+ }
+ 
+ static int new_thread_proc(void *stack)
+ {
++	local_irq_disable();
+ 	init_new_thread_stack(stack, new_thread_handler);
+ 	os_usr1_process(os_getpid());
+ 	return(0);
+@@ -165,35 +168,32 @@
+  	UPT_SC(&current->thread.regs.regs) = (void *) (&sig + 1);
+ 	suspend_new_thread(current->thread.mode.tt.switch_pipe[0]);
+ 
+-#ifdef CONFIG_SMP	
+-	schedule_tail(NULL);
+-#endif
++	force_flush_all();
++	if(current->thread.prev_sched != NULL)
++		schedule_tail(current->thread.prev_sched);
++	current->thread.prev_sched = NULL;
++
+ 	enable_timer();
+ 	change_sig(SIGVTALRM, 1);
+ 	local_irq_enable();
+-	force_flush_all();
+ 	if(current->mm != current->parent->mm)
+ 		protect_memory(uml_reserved, high_physmem - uml_reserved, 1, 
+ 			       1, 0, 1);
+ 	task_protections((unsigned long) current->thread_info);
+ 
+-	current->thread.prev_sched = NULL;
+-
+ 	free_page(current->thread.temp_stack);
++	local_irq_disable();
+ 	change_sig(SIGUSR1, 0);
+ 	set_user_mode(current);
+ }
+ 
+-static int sigusr1 = SIGUSR1;
+-
+ int fork_tramp(void *stack)
+ {
+-	int sig = sigusr1;
+-
+ 	local_irq_disable();
++	arch_init_thread();
+ 	init_new_thread_stack(stack, finish_fork_handler);
+ 
+-	kill(os_getpid(), sig);
++	os_usr1_process(os_getpid());
+ 	return(0);
+ }
+ 
+diff -Naur a/arch/um/kernel/tt/ptproxy/proxy.c b/arch/um/kernel/tt/ptproxy/proxy.c
+--- a/arch/um/kernel/tt/ptproxy/proxy.c	Fri Aug 15 15:07:01 2003
++++ b/arch/um/kernel/tt/ptproxy/proxy.c	Fri Aug 15 15:12:44 2003
+@@ -293,10 +293,10 @@
+ }
+ 
+ char gdb_init_string[] = 
+-"att 1
+-b panic
+-b stop
+-handle SIGWINCH nostop noprint pass
++"att 1 \n\
++b panic \n\
++b stop \n\
++handle SIGWINCH nostop noprint pass \n\
+ ";
+ 
+ int start_debugger(char *prog, int startup, int stop, int *fd_out)
+diff -Naur a/arch/um/kernel/tt/tracer.c b/arch/um/kernel/tt/tracer.c
+--- a/arch/um/kernel/tt/tracer.c	Fri Aug 15 15:03:51 2003
++++ b/arch/um/kernel/tt/tracer.c	Fri Aug 15 15:10:12 2003
+@@ -39,7 +39,7 @@
+ 		return(0);
+ 
+ 	register_winch_irq(tracer_winch[0], fd, -1, data);
+-	return(0);
++	return(1);
+ }
+ 
+ static void tracer_winch_handler(int sig)
+@@ -401,7 +401,7 @@
+ 		
+ 		if(!strcmp(line, "go"))	debug_stop = 0;
+ 		else if(!strcmp(line, "parent")) debug_parent = 1;
+-		else printk("Unknown debug option : '%s'\n", line);
++		else printf("Unknown debug option : '%s'\n", line);
+ 
+ 		line = next;
+ 	}
+diff -Naur a/arch/um/kernel/tt/uaccess_user.c b/arch/um/kernel/tt/uaccess_user.c
+--- a/arch/um/kernel/tt/uaccess_user.c	Fri Aug 15 15:05:00 2003
++++ b/arch/um/kernel/tt/uaccess_user.c	Fri Aug 15 15:11:10 2003
+@@ -8,15 +8,20 @@
+ #include <string.h>
+ #include "user_util.h"
+ #include "uml_uaccess.h"
++#include "task.h"
++#include "kern_util.h"
+ 
+ int __do_copy_from_user(void *to, const void *from, int n,
+ 			void **fault_addr, void **fault_catcher)
+ {
++	struct tt_regs save = TASK_REGS(get_current())->tt;
+ 	unsigned long fault;
+ 	int faulted;
+ 
+ 	fault = __do_user_copy(to, from, n, fault_addr, fault_catcher,
+ 			       __do_copy, &faulted);
++	TASK_REGS(get_current())->tt = save;
++
+ 	if(!faulted) return(0);
+ 	else return(n - (fault - (unsigned long) from));
+ }
+@@ -29,11 +34,14 @@
+ int __do_strncpy_from_user(char *dst, const char *src, unsigned long count,
+ 			   void **fault_addr, void **fault_catcher)
+ {
++	struct tt_regs save = TASK_REGS(get_current())->tt;
+ 	unsigned long fault;
+ 	int faulted;
+ 
+ 	fault = __do_user_copy(dst, src, count, fault_addr, fault_catcher,
+ 			       __do_strncpy, &faulted);
++	TASK_REGS(get_current())->tt = save;
++
+ 	if(!faulted) return(strlen(dst));
+ 	else return(-1);
+ }
+@@ -46,11 +54,14 @@
+ int __do_clear_user(void *mem, unsigned long len,
+ 		    void **fault_addr, void **fault_catcher)
+ {
++	struct tt_regs save = TASK_REGS(get_current())->tt;
+ 	unsigned long fault;
+ 	int faulted;
+ 
+ 	fault = __do_user_copy(mem, NULL, len, fault_addr, fault_catcher,
+ 			       __do_clear, &faulted);
++	TASK_REGS(get_current())->tt = save;
++
+ 	if(!faulted) return(0);
+ 	else return(len - (fault - (unsigned long) mem));
+ }
+@@ -58,6 +69,7 @@
+ int __do_strnlen_user(const char *str, unsigned long n,
+ 		      void **fault_addr, void **fault_catcher)
+ {
++	struct tt_regs save = TASK_REGS(get_current())->tt;
+ 	int ret;
+ 	unsigned long *faddrp = (unsigned long *)fault_addr;
+ 	jmp_buf jbuf;
+@@ -71,6 +83,8 @@
+ 	}
+ 	*fault_addr = NULL;
+ 	*fault_catcher = NULL;
++
++	TASK_REGS(get_current())->tt = save;
+ 	return ret;
+ }
+ 
+diff -Naur a/arch/um/kernel/tty_log.c b/arch/um/kernel/tty_log.c
+--- a/arch/um/kernel/tty_log.c	Fri Aug 15 15:07:04 2003
++++ b/arch/um/kernel/tty_log.c	Fri Aug 15 15:12:44 2003
+@@ -13,6 +13,7 @@
+ #include <sys/time.h>
+ #include "init.h"
+ #include "user.h"
++#include "kern_util.h"
+ #include "os.h"
+ 
+ #define TTY_LOG_DIR "./"
+@@ -24,29 +25,40 @@
+ #define TTY_LOG_OPEN 1
+ #define TTY_LOG_CLOSE 2
+ #define TTY_LOG_WRITE 3
++#define TTY_LOG_EXEC 4
++
++#define TTY_READ 1
++#define TTY_WRITE 2
+ 
+ struct tty_log_buf {
+ 	int what;
+ 	unsigned long tty;
+ 	int len;
++	int direction;
++	unsigned long sec;
++	unsigned long usec;
+ };
+ 
+-int open_tty_log(void *tty)
++int open_tty_log(void *tty, void *current_tty)
+ {
+ 	struct timeval tv;
+ 	struct tty_log_buf data;
+ 	char buf[strlen(tty_log_dir) + sizeof("01234567890-01234567\0")];
+ 	int fd;
+ 
++	gettimeofday(&tv, NULL);
+ 	if(tty_log_fd != -1){
+-		data = ((struct tty_log_buf) { what :	TTY_LOG_OPEN,
+-					       tty : (unsigned long) tty,
+-					       len : 0 });
++		data = ((struct tty_log_buf) { .what 	= TTY_LOG_OPEN,
++					       .tty  = (unsigned long) tty,
++					       .len  = sizeof(current_tty),
++					       .direction = 0,
++					       .sec = tv.tv_sec,
++					       .usec = tv.tv_usec } );
+ 		write(tty_log_fd, &data, sizeof(data));
++		write(tty_log_fd, &current_tty, data.len);
+ 		return(tty_log_fd);
+ 	}
+ 
+-	gettimeofday(&tv, NULL);
+ 	sprintf(buf, "%s/%0u-%0u", tty_log_dir, (unsigned int) tv.tv_sec, 
+  		(unsigned int) tv.tv_usec);
+ 
+@@ -62,30 +74,114 @@
+ void close_tty_log(int fd, void *tty)
+ {
+ 	struct tty_log_buf data;
++	struct timeval tv;
+ 
+ 	if(tty_log_fd != -1){
+-		data = ((struct tty_log_buf) { what :	TTY_LOG_CLOSE,
+-					       tty : (unsigned long) tty,
+-					       len : 0 });
++		gettimeofday(&tv, NULL);
++		data = ((struct tty_log_buf) { .what 	= TTY_LOG_CLOSE,
++					       .tty  = (unsigned long) tty,
++					       .len  = 0,
++					       .direction = 0,
++					       .sec = tv.tv_sec,
++					       .usec = tv.tv_usec } );
+ 		write(tty_log_fd, &data, sizeof(data));
+ 		return;
+ 	}
+ 	close(fd);
+ }
+ 
+-int write_tty_log(int fd, char *buf, int len, void *tty)
++static int log_chunk(int fd, const char *buf, int len)
+ {
++	int total = 0, try, missed, n;
++	char chunk[64];
++
++	while(len > 0){
++		try = (len > sizeof(chunk)) ? sizeof(chunk) : len;
++		missed = copy_from_user_proc(chunk, (char *) buf, try);
++		try -= missed;
++		n = write(fd, chunk, try);
++		if(n != try)
++			return(-errno);
++		if(missed != 0)
++			return(-EFAULT);
++
++		len -= try;
++		total += try;
++		buf += try;
++	}
++
++	return(total);
++}
++
++int write_tty_log(int fd, const char *buf, int len, void *tty, int is_read)
++{
++	struct timeval tv;
+ 	struct tty_log_buf data;
++	int direction;
+ 
+ 	if(fd == tty_log_fd){
+-		data = ((struct tty_log_buf) { what :	TTY_LOG_WRITE,
+-					       tty : (unsigned long) tty,
+-					       len : len });
++		gettimeofday(&tv, NULL);
++		direction = is_read ? TTY_READ : TTY_WRITE;
++		data = ((struct tty_log_buf) { .what 	= TTY_LOG_WRITE,
++					       .tty  = (unsigned long) tty,
++					       .len  = len,
++					       .direction = direction,
++					       .sec = tv.tv_sec,
++					       .usec = tv.tv_usec } );
+ 		write(tty_log_fd, &data, sizeof(data));
+ 	}
+-	return(write(fd, buf, len));
++
++	return(log_chunk(fd, buf, len));
+ }
+ 
++void log_exec(char **argv, void *tty)
++{
++	struct timeval tv;
++	struct tty_log_buf data;
++	char **ptr,*arg;
++	int len;
++	
++	if(tty_log_fd == -1) return;
++
++	gettimeofday(&tv, NULL);
++
++	len = 0;
++	for(ptr = argv; ; ptr++){
++		if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
++			return;
++		if(arg == NULL) break;
++		len += strlen_user_proc(arg);
++	}
++
++	data = ((struct tty_log_buf) { .what 	= TTY_LOG_EXEC,
++				       .tty  = (unsigned long) tty,
++				       .len  = len,
++				       .direction = 0,
++				       .sec = tv.tv_sec,
++				       .usec = tv.tv_usec } );
++	write(tty_log_fd, &data, sizeof(data));
++
++	for(ptr = argv; ; ptr++){
++		if(copy_from_user_proc(&arg, ptr, sizeof(arg)))
++			return;
++		if(arg == NULL) break;
++		log_chunk(tty_log_fd, arg, strlen_user_proc(arg));
++	}
++}
++
++extern void register_tty_logger(int (*opener)(void *, void *),
++				int (*writer)(int, const char *, int, 
++					      void *, int),
++				void (*closer)(int, void *));
++
++static int register_logger(void)
++{
++	register_tty_logger(open_tty_log, write_tty_log, close_tty_log);
++	return(0);
++}
++
++__uml_initcall(register_logger);
++
+ static int __init set_tty_log_dir(char *name, int *add)
+ {
+ 	tty_log_dir = name;
+@@ -104,7 +200,7 @@
+ 
+ 	tty_log_fd = strtoul(name, &end, 0);
+ 	if((*end != '\0') || (end == name)){
+-		printk("set_tty_log_fd - strtoul failed on '%s'\n", name);
++		printf("set_tty_log_fd - strtoul failed on '%s'\n", name);
+ 		tty_log_fd = -1;
+ 	}
+ 	return 0;
+diff -Naur a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c
+--- a/arch/um/kernel/um_arch.c	Fri Aug 15 15:07:48 2003
++++ b/arch/um/kernel/um_arch.c	Fri Aug 15 15:13:14 2003
+@@ -38,13 +38,18 @@
+ #include "mode_kern.h"
+ #include "mode.h"
+ 
+-#define DEFAULT_COMMAND_LINE "root=6200"
++#define DEFAULT_COMMAND_LINE "root=ubd0"
+ 
+ struct cpuinfo_um boot_cpu_data = { 
+ 	.loops_per_jiffy	= 0,
+ 	.ipi_pipe		= { -1, -1 }
+ };
+ 
++/* Placeholder to make UML link until the vsyscall stuff is actually 
++ * implemented
++ */
++void *__kernel_vsyscall;
++
+ unsigned long thread_saved_pc(struct task_struct *task)
+ {
+ 	return(os_process_pc(CHOOSE_MODE_PROC(thread_pid_tt, thread_pid_skas,
+@@ -61,10 +66,14 @@
+ 		return 0;
+ #endif
+ 
+-	seq_printf(m, "bogomips\t: %lu.%02lu\n",
++	seq_printf(m, "processor\t: %d\n", index);
++	seq_printf(m, "vendor_id\t: User Mode Linux\n");
++	seq_printf(m, "model name\t: UML\n");
++	seq_printf(m, "mode\t\t: %s\n", CHOOSE_MODE("tt", "skas"));
++	seq_printf(m, "host\t\t: %s\n", host_info);
++	seq_printf(m, "bogomips\t: %lu.%02lu\n\n",
+ 		   loops_per_jiffy/(500000/HZ),
+ 		   (loops_per_jiffy/(5000/HZ)) % 100);
+-	seq_printf(m, "host\t\t: %s\n", host_info);
+ 
+ 	return(0);
+ }
+@@ -134,12 +143,12 @@
+ 	if(umid != NULL){
+ 		snprintf(argv1_begin, 
+ 			 (argv1_end - argv1_begin) * sizeof(*ptr), 
+-			 "(%s)", umid);
++			 "(%s) ", umid);
+ 		ptr = &argv1_begin[strlen(argv1_begin)];
+ 	}
+ 	else ptr = argv1_begin;
+ 
+-	snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), " [%s]", cmd);
++	snprintf(ptr, (argv1_end - ptr) * sizeof(*ptr), "[%s]", cmd);
+ 	memset(argv1_begin + strlen(argv1_begin), '\0', 
+ 	       argv1_end - argv1_begin - strlen(argv1_begin));
+ #endif
+@@ -179,7 +188,7 @@
+ static int __init uml_ncpus_setup(char *line, int *add)
+ {
+        if (!sscanf(line, "%d", &ncpus)) {
+-               printk("Couldn't parse [%s]\n", line);
++               printf("Couldn't parse [%s]\n", line);
+                return -1;
+        }
+ 
+@@ -210,7 +219,7 @@
+ 
+ static int __init mode_tt_setup(char *line, int *add)
+ {
+-	printk("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n");
++	printf("CONFIG_MODE_TT disabled - 'mode=tt' ignored\n");
+ 	return(0);
+ }
+ 
+@@ -221,7 +230,7 @@
+ 
+ static int __init mode_tt_setup(char *line, int *add)
+ {
+-	printk("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n");
++	printf("CONFIG_MODE_SKAS disabled - 'mode=tt' redundant\n");
+ 	return(0);
+ }
+ 
+@@ -369,6 +378,7 @@
+ 		2 * PAGE_SIZE;
+ 
+ 	task_protections((unsigned long) &init_thread_info);
++	os_flush_stdout();
+ 
+ 	return(CHOOSE_MODE(start_uml_tt(), start_uml_skas()));
+ }
+diff -Naur a/arch/um/kernel/umid.c b/arch/um/kernel/umid.c
+--- a/arch/um/kernel/umid.c	Fri Aug 15 15:08:44 2003
++++ b/arch/um/kernel/umid.c	Fri Aug 15 15:13:39 2003
+@@ -33,18 +33,19 @@
+ static int umid_is_random = 1;
+ static int umid_inited = 0;
+ 
+-static int make_umid(void);
++static int make_umid(int (*printer)(const char *fmt, ...));
+ 
+-static int __init set_umid(char *name, int is_random)
++static int __init set_umid(char *name, int is_random, 
++			   int (*printer)(const char *fmt, ...))
+ {
+ 	if(umid_inited){
+-		printk("Unique machine name can't be set twice\n");
++		(*printer)("Unique machine name can't be set twice\n");
+ 		return(-1);
+ 	}
+ 
+ 	if(strlen(name) > UMID_LEN - 1)
+-		printk("Unique machine name is being truncated to %s "
+-		       "characters\n", UMID_LEN);
++		(*printer)("Unique machine name is being truncated to %s "
++			   "characters\n", UMID_LEN);
+ 	strlcpy(umid, name, sizeof(umid));
+ 
+ 	umid_is_random = is_random;
+@@ -54,7 +55,7 @@
+ 
+ static int __init set_umid_arg(char *name, int *add)
+ {
+-	return(set_umid(name, 0));
++	return(set_umid(name, 0, printf));
+ }
+ 
+ __uml_setup("umid=", set_umid_arg,
+@@ -67,7 +68,7 @@
+ {
+ 	int n;
+ 
+-	if(!umid_inited && make_umid()) return(-1);
++	if(!umid_inited && make_umid(printk)) return(-1);
+ 
+ 	n = strlen(uml_dir) + strlen(umid) + strlen(name) + 1;
+ 	if(n > len){
+@@ -92,14 +93,14 @@
+ 	fd = os_open_file(file, of_create(of_excl(of_rdwr(OPENFLAGS()))), 
+ 			  0644);
+ 	if(fd < 0){
+-		printk("Open of machine pid file \"%s\" failed - "
++		printf("Open of machine pid file \"%s\" failed - "
+ 		       "errno = %d\n", file, -fd);
+ 		return 0;
+ 	}
+ 
+ 	sprintf(pid, "%d\n", os_getpid());
+ 	if(write(fd, pid, strlen(pid)) != strlen(pid))
+-		printk("Write of pid file failed - errno = %d\n", errno);
++		printf("Write of pid file failed - errno = %d\n", errno);
+ 	close(fd);
+ 	return 0;
+ }
+@@ -197,7 +198,7 @@
+ 	if((strlen(name) > 0) && (name[strlen(name) - 1] != '/')){
+ 		uml_dir = malloc(strlen(name) + 1);
+ 		if(uml_dir == NULL){
+-			printk("Failed to malloc uml_dir - error = %d\n",
++			printf("Failed to malloc uml_dir - error = %d\n",
+ 			       errno);
+ 			uml_dir = name;
+ 			return(0);
+@@ -217,7 +218,7 @@
+ 		char *home = getenv("HOME");
+ 
+ 		if(home == NULL){
+-			printk("make_uml_dir : no value in environment for "
++			printf("make_uml_dir : no value in environment for "
+ 			       "$HOME\n");
+ 			exit(1);
+ 		}
+@@ -239,25 +240,25 @@
+ 	strcpy(uml_dir, dir);
+ 	
+ 	if((mkdir(uml_dir, 0777) < 0) && (errno != EEXIST)){
+-	        printk("Failed to mkdir %s - errno = %i\n", uml_dir, errno);
++	        printf("Failed to mkdir %s - errno = %i\n", uml_dir, errno);
+ 		return(-1);
+ 	}
+ 	return 0;
+ }
+ 
+-static int __init make_umid(void)
++static int __init make_umid(int (*printer)(const char *fmt, ...))
+ {
+ 	int fd, err;
+ 	char tmp[strlen(uml_dir) + UMID_LEN + 1];
+ 
+ 	strlcpy(tmp, uml_dir, sizeof(tmp));
+ 
+-	if(*umid == 0){
++	if(!umid_inited){
+ 		strcat(tmp, "XXXXXX");
+ 		fd = mkstemp(tmp);
+ 		if(fd < 0){
+-			printk("make_umid - mkstemp failed, errno = %d\n",
+-			       errno);
++			(*printer)("make_umid - mkstemp failed, errno = %d\n",
++				   errno);
+ 			return(1);
+ 		}
+ 
+@@ -267,7 +268,7 @@
+ 		 * for directories.
+ 		 */
+ 		unlink(tmp);
+-		set_umid(&tmp[strlen(uml_dir)], 1);
++		set_umid(&tmp[strlen(uml_dir)], 1, printer);
+ 	}
+ 	
+ 	sprintf(tmp, "%s%s", uml_dir, umid);
+@@ -275,14 +276,14 @@
+ 	if((err = mkdir(tmp, 0777)) < 0){
+ 		if(errno == EEXIST){
+ 			if(not_dead_yet(tmp)){
+-				printk("umid '%s' is in use\n", umid);
++				(*printer)("umid '%s' is in use\n", umid);
+ 				return(-1);
+ 			}
+ 			err = mkdir(tmp, 0777);
+ 		}
+ 	}
+ 	if(err < 0){
+-		printk("Failed to create %s - errno = %d\n", umid, errno);
++		(*printer)("Failed to create %s - errno = %d\n", umid, errno);
+ 		return(-1);
+ 	}
+ 
+@@ -295,7 +296,13 @@
+ );
+ 
+ __uml_postsetup(make_uml_dir);
+-__uml_postsetup(make_umid);
++
++static int __init make_umid_setup(void)
++{
++	return(make_umid(printf));
++}
++
++__uml_postsetup(make_umid_setup);
+ __uml_postsetup(create_pid_file);
+ 
+ /*
+diff -Naur a/arch/um/kernel/user_util.c b/arch/um/kernel/user_util.c
+--- a/arch/um/kernel/user_util.c	Fri Aug 15 15:04:48 2003
++++ b/arch/um/kernel/user_util.c	Fri Aug 15 15:10:41 2003
+@@ -119,17 +119,6 @@
+ 	}
+ }
+ 
+-int clone_and_wait(int (*fn)(void *), void *arg, void *sp, int flags)
+-{
+-	int pid;
+-
+-	pid = clone(fn, sp, flags, arg);
+- 	if(pid < 0) return(-1);
+-	wait_for_stop(pid, SIGSTOP, PTRACE_CONT, NULL);
+-	ptrace(PTRACE_CONT, pid, 0, 0);
+-	return(pid);
+-}
+-
+ int raw(int fd, int complain)
+ {
+ 	struct termios tt;
+diff -Naur a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
+--- a/arch/um/os-Linux/drivers/tuntap_user.c	Fri Aug 15 15:09:23 2003
++++ b/arch/um/os-Linux/drivers/tuntap_user.c	Fri Aug 15 15:14:02 2003
+@@ -142,7 +142,7 @@
+ 			return(-errno);
+ 		}
+ 		memset(&ifr, 0, sizeof(ifr));
+-		ifr.ifr_flags = IFF_TAP;
++		ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+ 		strlcpy(ifr.ifr_name, pri->dev_name, sizeof(ifr.ifr_name));
+ 		if(ioctl(pri->fd, TUNSETIFF, (void *) &ifr) < 0){
+ 			printk("TUNSETIFF failed, errno = %d", errno);
+diff -Naur a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
+--- a/arch/um/os-Linux/file.c	Fri Aug 15 15:09:15 2003
++++ b/arch/um/os-Linux/file.c	Fri Aug 15 15:13:54 2003
+@@ -315,7 +315,7 @@
+ 	return(new);
+ }
+ 
+-int create_unix_socket(char *file, int len)
++int create_unix_socket(char *file, int len, int close_on_exec)
+ {
+ 	struct sockaddr_un addr;
+ 	int sock, err;
+@@ -327,6 +327,10 @@
+ 		return(-errno);
+ 	}
+ 
++	if(close_on_exec && fcntl(sock, F_SETFD, 1) < 0)
++		printk("create_unix_socket : Setting FD_CLOEXEC failed, "
++		       "errno = %d", errno);
++
+ 	addr.sun_family = AF_UNIX;
+ 
+ 	/* XXX Be more careful about overflow */
+@@ -342,6 +346,37 @@
+ 	return(sock);
+ }
+ 
++void os_flush_stdout(void)
++{
++	fflush(stdout);
++}
++
++int os_lock_file(int fd, int excl)
++{
++	int type = excl ? F_WRLCK : F_RDLCK;
++	struct flock lock = ((struct flock) { .l_type	= type,
++					      .l_whence	= SEEK_SET,
++					      .l_start	= 0,
++					      .l_len	= 0 } );
++	int err, save;
++
++	err = fcntl(fd, F_SETLK, &lock);
++	if(!err)
++		goto out;
++
++	save = -errno;
++	err = fcntl(fd, F_GETLK, &lock);
++	if(err){
++		err = -errno;
++		goto out;
++	}
++	
++	printk("F_SETLK failed, file already locked by pid %d\n", lock.l_pid);
++	err = save;
++ out:
++	return(err);
++}
++
+ /*
+  * Overrides for Emacs so that we follow Linus's tabbing style.
+  * Emacs will notice this stuff at the end of the file and automatically
+diff -Naur a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile
+--- a/arch/um/sys-i386/Makefile	Fri Aug 15 15:04:47 2003
++++ b/arch/um/sys-i386/Makefile	Fri Aug 15 15:10:35 2003
+@@ -1,7 +1,8 @@
+-obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o module.o \
+-	ptrace.o ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o
++obj-y = bugs.o checksum.o extable.o fault.o ksyms.o ldt.o ptrace.o \
++	ptrace_user.o semaphore.o sigcontext.o syscalls.o sysrq.o
+ 
+ obj-$(CONFIG_HIGHMEM) += highmem.o
++obj-$(CONFIG_MODULES) += module.o
+ 
+ USER_OBJS := bugs.o ptrace_user.o sigcontext.o fault.o
+ USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
+@@ -9,6 +10,8 @@
+ SYMLINKS = semaphore.c highmem.c module.c
+ SYMLINKS := $(foreach f,$(SYMLINKS),$(src)/$f)
+ 
++clean-files := $(SYMLINKS)
++
+ semaphore.c-dir = kernel
+ highmem.c-dir = mm
+ module.c-dir = kernel
+@@ -24,8 +27,7 @@
+ $(SYMLINKS): 
+ 	$(call make_link,$@)
+ 
+-clean:
+-	$(MAKE) -C util clean
++subdir- := util
+ 
+ fastdep:
+ 
+diff -Naur a/arch/um/sys-i386/bugs.c b/arch/um/sys-i386/bugs.c
+--- a/arch/um/sys-i386/bugs.c	Fri Aug 15 15:07:41 2003
++++ b/arch/um/sys-i386/bugs.c	Fri Aug 15 15:13:14 2003
+@@ -8,6 +8,7 @@
+ #include <errno.h>
+ #include <string.h>
+ #include <sys/signal.h>
++#include <asm/ldt.h>
+ #include "kern_util.h"
+ #include "user.h"
+ #include "sysdep/ptrace.h"
+@@ -16,8 +17,8 @@
+ #define MAXTOKEN 64
+ 
+ /* Set during early boot */
+-int cpu_has_cmov = 1;
+-int cpu_has_xmm = 0;
++int host_has_cmov = 1;
++int host_has_xmm = 0;
+ 
+ static char token(int fd, char *buf, int len, char stop)
+ {
+@@ -104,6 +105,25 @@
+ 	return(1);
+ }
+ 
++static void disable_lcall(void)
++{
++	struct modify_ldt_ldt_s ldt;
++	int err;
++
++	bzero(&ldt, sizeof(ldt));
++	ldt.entry_number = 7;
++	ldt.base_addr = 0;
++	ldt.limit = 0;
++	err = modify_ldt(1, &ldt, sizeof(ldt));
++	if(err)
++		printk("Failed to disable lcall7 - errno = %d\n", errno);
++}
++
++void arch_init_thread(void)
++{
++	disable_lcall();
++}
++
+ void arch_check_bugs(void)
+ {
+ 	int have_it;
+@@ -113,8 +133,8 @@
+ 		       "checks\n");
+ 		return;
+ 	}
+-	if(check_cpu_feature("cmov", &have_it)) cpu_has_cmov = have_it;
+-	if(check_cpu_feature("xmm", &have_it)) cpu_has_xmm = have_it;
++	if(check_cpu_feature("cmov", &have_it)) host_has_cmov = have_it;
++	if(check_cpu_feature("xmm", &have_it)) host_has_xmm = have_it;
+ }
+ 
+ int arch_handle_signal(int sig, union uml_pt_regs *regs)
+@@ -130,18 +150,18 @@
+ 	if((*((char *) ip) != 0x0f) || ((*((char *) (ip + 1)) & 0xf0) != 0x40))
+ 		return(0);
+ 
+-	if(cpu_has_cmov == 0)
++	if(host_has_cmov == 0)
+ 		panic("SIGILL caused by cmov, which this processor doesn't "
+ 		      "implement, boot a filesystem compiled for older "
+ 		      "processors");
+-	else if(cpu_has_cmov == 1)
++	else if(host_has_cmov == 1)
+ 		panic("SIGILL caused by cmov, which this processor claims to "
+ 		      "implement");
+-	else if(cpu_has_cmov == -1)
++	else if(host_has_cmov == -1)
+ 		panic("SIGILL caused by cmov, couldn't tell if this processor "
+ 		      "implements it, boot a filesystem compiled for older "
+ 		      "processors");
+-	else panic("Bad value for cpu_has_cmov (%d)", cpu_has_cmov);
++	else panic("Bad value for host_has_cmov (%d)", host_has_cmov);
+ 	return(0);
+ }
+ 
+diff -Naur a/arch/um/uml.lds.S b/arch/um/uml.lds.S
+--- a/arch/um/uml.lds.S	Fri Aug 15 15:05:37 2003
++++ b/arch/um/uml.lds.S	Fri Aug 15 15:11:48 2003
+@@ -26,7 +26,11 @@
+   . = ALIGN(4096);		/* Init code and data */
+   _stext = .;
+   __init_begin = .;
+-  .text.init : { *(.text.init) }
++  .init.text : { 
++	_sinittext = .;
++	*(.init.text)
++	_einittext = .;
++  }
+   . = ALIGN(4096);
+   .text      :
+   {
+@@ -38,7 +42,7 @@
+ 
+   #include "asm/common.lds.S"
+ 
+-  .data.init : { *(.data.init) }
++  init.data : { *(init.data) }
+   .data    :
+   {
+     . = ALIGN(KERNEL_STACK_SIZE);		/* init_task */
+diff -Naur a/arch/um/util/mk_constants_kern.c b/arch/um/util/mk_constants_kern.c
+--- a/arch/um/util/mk_constants_kern.c	Fri Aug 15 15:04:15 2003
++++ b/arch/um/util/mk_constants_kern.c	Fri Aug 15 15:10:27 2003
+@@ -1,5 +1,6 @@
+ #include "linux/kernel.h"
+ #include "linux/stringify.h"
++#include "linux/time.h"
+ #include "asm/page.h"
+ 
+ extern void print_head(void);
+@@ -11,6 +12,7 @@
+ {
+   print_head();
+   print_constant_int("UM_KERN_PAGE_SIZE", PAGE_SIZE);
++
+   print_constant_str("UM_KERN_EMERG", KERN_EMERG);
+   print_constant_str("UM_KERN_ALERT", KERN_ALERT);
+   print_constant_str("UM_KERN_CRIT", KERN_CRIT);
+@@ -19,6 +21,8 @@
+   print_constant_str("UM_KERN_NOTICE", KERN_NOTICE);
+   print_constant_str("UM_KERN_INFO", KERN_INFO);
+   print_constant_str("UM_KERN_DEBUG", KERN_DEBUG);
++
++  print_constant_int("UM_NSEC_PER_SEC", NSEC_PER_SEC);
+   print_tail();
+   return(0);
+ }
+diff -Naur a/fs/Makefile b/fs/Makefile
+--- a/fs/Makefile	Fri Aug 15 15:06:45 2003
++++ b/fs/Makefile	Fri Aug 15 15:12:41 2003
+@@ -91,3 +91,5 @@
+ obj-$(CONFIG_XFS_FS)		+= xfs/
+ obj-$(CONFIG_AFS_FS)		+= afs/
+ obj-$(CONFIG_BEFS_FS)		+= befs/
++obj-$(CONFIG_HOSTFS)		+= hostfs/
++obj-$(CONFIG_HPPFS)		+= hppfs/
+diff -Naur a/fs/hostfs/Makefile b/fs/hostfs/Makefile
+--- a/fs/hostfs/Makefile	Wed Dec 31 19:00:00 1969
++++ b/fs/hostfs/Makefile	Fri Aug 15 15:10:07 2003
+@@ -0,0 +1,36 @@
++# 
++# Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
++# Licensed under the GPL
++#
++
++# struct stat64 changed the inode field name between 2.2 and 2.4 from st_ino
++# to __st_ino.  It stayed in the same place, so as long as the correct name
++# is used, hostfs compiled on 2.2 should work on 2.4 and vice versa.
++
++STAT64_INO_FIELD := $(shell grep -q __st_ino /usr/include/bits/stat.h && \
++				echo __)st_ino
++
++hostfs-objs := hostfs_kern.o hostfs_user.o
++
++obj-y = 
++obj-$(CONFIG_HOSTFS) += hostfs.o
++
++SINGLE_OBJS = $(foreach f,$(patsubst %.o,%,$(obj-y) $(obj-m)),$($(f)-objs))
++
++USER_OBJS := $(filter %_user.o,$(obj-y) $(obj-m) $(SINGLE_OBJS))
++USER_OBJS := $(foreach file,$(USER_OBJS),$(obj)/$(file))
++
++USER_CFLAGS += -DSTAT64_INO_FIELD=$(STAT64_INO_FIELD)
++
++$(USER_OBJS) : %.o: %.c
++	$(CC) $(CFLAGS_$(notdir $@)) $(USER_CFLAGS) -c -o $@ $<
++
++clean:
++
++modules:
++
++fastdep:
++
++dep:
++
++archmrproper: clean
+diff -Naur a/fs/hostfs/hostfs.h b/fs/hostfs/hostfs.h
+--- a/fs/hostfs/hostfs.h	Wed Dec 31 19:00:00 1969
++++ b/fs/hostfs/hostfs.h	Fri Aug 15 15:10:06 2003
+@@ -0,0 +1,79 @@
++#ifndef __UM_FS_HOSTFS
++#define __UM_FS_HOSTFS
++
++#include "os.h"
++
++/* These are exactly the same definitions as in fs.h, but the names are 
++ * changed so that this file can be included in both kernel and user files.
++ */
++
++#define HOSTFS_ATTR_MODE	1
++#define HOSTFS_ATTR_UID 	2
++#define HOSTFS_ATTR_GID 	4
++#define HOSTFS_ATTR_SIZE	8
++#define HOSTFS_ATTR_ATIME	16
++#define HOSTFS_ATTR_MTIME	32
++#define HOSTFS_ATTR_CTIME	64
++#define HOSTFS_ATTR_ATIME_SET	128
++#define HOSTFS_ATTR_MTIME_SET	256
++#define HOSTFS_ATTR_FORCE	512	/* Not a change, but a change it */
++#define HOSTFS_ATTR_ATTR_FLAG	1024
++
++struct hostfs_iattr {
++	unsigned int	ia_valid;
++	mode_t		ia_mode;
++	uid_t		ia_uid;
++	gid_t		ia_gid;
++	loff_t		ia_size;
++	struct timespec	ia_atime;
++	struct timespec	ia_mtime;
++	struct timespec	ia_ctime;
++	unsigned int	ia_attr_flags;
++};
++
++extern int stat_file(const char *path, unsigned long long *inode_out, 
++		     int *mode_out, int *nlink_out, int *uid_out, int *gid_out,
++		     unsigned long long *size_out, struct timespec *atime_out, 
++		     struct timespec *mtime_out, struct timespec *ctime_out, 
++		     int *blksize_out, unsigned long long *blocks_out);
++extern int access_file(char *path, int r, int w, int x);
++extern int open_file(char *path, int r, int w, int append);
++extern int file_type(const char *path, int *rdev);
++extern void *open_dir(char *path, int *err_out);
++extern char *read_dir(void *stream, unsigned long long *pos, 
++		      unsigned long long *ino_out, int *len_out);
++extern void close_file(void *stream);
++extern void close_dir(void *stream);
++extern int read_file(int fd, unsigned long long *offset, char *buf, int len);
++extern int write_file(int fd, unsigned long long *offset, const char *buf,
++		      int len);
++extern int lseek_file(int fd, long long offset, int whence);
++extern int file_create(char *name, int ur, int uw, int ux, int gr, 
++		       int gw, int gx, int or, int ow, int ox);
++extern int set_attr(const char *file, struct hostfs_iattr *attrs);
++extern int make_symlink(const char *from, const char *to);
++extern int unlink_file(const char *file);
++extern int do_mkdir(const char *file, int mode);
++extern int do_rmdir(const char *file);
++extern int do_mknod(const char *file, int mode, int dev);
++extern int link_file(const char *from, const char *to);
++extern int do_readlink(char *file, char *buf, int size);
++extern int rename_file(char *from, char *to);
++extern int do_statfs(char *root, long *bsize_out, long long *blocks_out, 
++		     long long *bfree_out, long long *bavail_out, 
++		     long long *files_out, long long *ffree_out, 
++		     void *fsid_out, int fsid_size, long *namelen_out, 
++		     long *spare_out);
++
++#endif
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only.  This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
+--- a/fs/hostfs/hostfs_kern.c	Wed Dec 31 19:00:00 1969
++++ b/fs/hostfs/hostfs_kern.c	Fri Aug 15 15:10:12 2003
+@@ -0,0 +1,1010 @@
++/* 
++ * Copyright (C) 2000, 2001, 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ *
++ * Ported the filesystem routines to 2.5.
++ * 2003-02-10 Petr Baudis <pasky@ucw.cz>
++ */
++
++#include <linux/stddef.h>
++#include <linux/fs.h>
++#include <linux/version.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/pagemap.h>
++#include <linux/blkdev.h>
++#include <linux/list.h>
++#include <linux/buffer_head.h>
++#include <linux/root_dev.h>
++#include <linux/statfs.h>
++#include <asm/uaccess.h>
++#include "hostfs.h"
++#include "kern_util.h"
++#include "kern.h"
++#include "user_util.h"
++#include "2_5compat.h"
++#include "init.h"
++
++struct hostfs_inode_info {
++	char *host_filename;
++	int fd;
++	int mode;
++	struct inode vfs_inode;
++};
++
++static inline struct hostfs_inode_info *HOSTFS_I(struct inode *inode)
++{
++	return(list_entry(inode, struct hostfs_inode_info, vfs_inode));
++}
++
++#define FILE_HOSTFS_I(file) HOSTFS_I((file)->f_dentry->d_inode)
++
++int hostfs_d_delete(struct dentry *dentry)
++{
++	return(1);
++}
++
++struct dentry_operations hostfs_dentry_ops = {
++	.d_delete		= hostfs_d_delete,
++};
++
++/* Changed in hostfs_args before the kernel starts running */
++static char *root_ino = "/";
++static int append = 0;
++
++#define HOSTFS_SUPER_MAGIC 0x00c0ffee
++
++static struct inode_operations hostfs_iops;
++static struct inode_operations hostfs_dir_iops;
++static struct address_space_operations hostfs_link_aops;
++
++static int __init hostfs_args(char *options, int *add)
++{
++	char *ptr;
++
++	ptr = strchr(options, ',');
++	if(ptr != NULL)
++		*ptr++ = '\0';
++	if(*options != '\0')
++		root_ino = options;
++
++	options = ptr;
++	while(options){
++		ptr = strchr(options, ',');
++		if(ptr != NULL)
++			*ptr++ = '\0';
++		if(*options != '\0'){
++			if(!strcmp(options, "append"))
++				append = 1;
++			else printf("hostfs_args - unsupported option - %s\n",
++				    options);
++		}
++		options = ptr;
++	}
++	return(0);
++}
++
++__uml_setup("hostfs=", hostfs_args,
++"hostfs=<root dir>,<flags>,...\n"
++"    This is used to set hostfs parameters.  The root directory argument\n"
++"    is used to confine all hostfs mounts to within the specified directory\n"
++"    tree on the host.  If this isn't specified, then a user inside UML can\n"
++"    mount anything on the host that's accessible to the user that's running\n"
++"    it.\n"
++"    The only flag currently supported is 'append', which specifies that all\n"
++"    files opened by hostfs will be opened in append mode.\n\n"
++);
++
++static char *dentry_name(struct dentry *dentry, int extra)
++{
++	struct dentry *parent;
++	char *root, *name;
++	int len;
++
++	len = 0;
++	parent = dentry;
++	while(parent->d_parent != parent){
++		len += parent->d_name.len + 1;
++		parent = parent->d_parent;
++	}
++	
++	root = HOSTFS_I(parent->d_inode)->host_filename;
++	len += strlen(root);
++	name = kmalloc(len + extra + 1, GFP_KERNEL);
++	if(name == NULL) return(NULL);
++
++	name[len] = '\0';
++	parent = dentry;
++	while(parent->d_parent != parent){
++		len -= parent->d_name.len + 1;
++		name[len] = '/';
++		strncpy(&name[len + 1], parent->d_name.name, 
++			parent->d_name.len);
++		parent = parent->d_parent;
++	}
++	strncpy(name, root, strlen(root));
++	return(name);
++}
++
++static char *inode_name(struct inode *ino, int extra)
++{
++	struct dentry *dentry;
++
++	dentry = list_entry(ino->i_dentry.next, struct dentry, d_alias);
++	return(dentry_name(dentry, extra));
++}
++
++static int read_name(struct inode *ino, char *name)
++{
++	/* The non-int inode fields are copied into ints by stat_file and
++	 * then copied into the inode because passing the actual pointers
++	 * in and having them treated as int * breaks on big-endian machines
++	 */
++	int err;
++	int i_mode, i_nlink, i_blksize;
++	unsigned long long i_size;
++	unsigned long long i_ino;
++	unsigned long long i_blocks;
++
++	err = stat_file(name, &i_ino, &i_mode, &i_nlink, &ino->i_uid, 
++			&ino->i_gid, &i_size, &ino->i_atime, &ino->i_mtime, 
++			&ino->i_ctime, &i_blksize, &i_blocks);
++	if(err) 
++		return(err);
++
++	ino->i_ino = i_ino;
++	ino->i_mode = i_mode;
++	ino->i_nlink = i_nlink;
++	ino->i_size = i_size;
++	ino->i_blksize = i_blksize;
++	ino->i_blocks = i_blocks;
++	if((ino->i_sb->s_dev == ROOT_DEV) && (ino->i_uid == getuid()))
++		ino->i_uid = 0;
++	return(0);
++}
++
++static char *follow_link(char *link)
++{
++	int len, n;
++	char *name, *resolved, *end;
++
++	len = 64;
++	while(1){
++		n = -ENOMEM;
++		name = kmalloc(len, GFP_KERNEL);
++		if(name == NULL)
++			goto out;
++
++		n = do_readlink(link, name, len);
++		if(n < len)
++			break;
++		len *= 2;
++		kfree(name);
++	}
++	if(n < 0)
++		goto out_free;
++
++	if(*name == '/')
++		return(name);
++
++	end = strrchr(link, '/');
++	if(end == NULL)
++		return(name);
++
++	*(end + 1) = '\0';
++	len = strlen(link) + strlen(name) + 1;
++
++	resolved = kmalloc(len, GFP_KERNEL);
++	if(resolved == NULL){
++		n = -ENOMEM;
++		goto out_free;
++	}
++
++	sprintf(resolved, "%s%s", link, name);
++	kfree(name);
++	kfree(link);
++	return(resolved);
++
++ out_free:
++	kfree(name);
++ out:
++	return(ERR_PTR(n));
++}
++
++static int read_inode(struct inode *ino)
++{
++	char *name;
++	int err = 0;
++
++	/* Unfortunately, we are called from iget() when we don't have a dentry
++	 * allocated yet.
++	 */
++	if(list_empty(&ino->i_dentry))
++		goto out;
++ 
++	err = -ENOMEM;
++	name = inode_name(ino, 0);
++	if(name == NULL) 
++		goto out;
++
++	if(file_type(name, NULL) == OS_TYPE_SYMLINK){
++		name = follow_link(name);
++		if(IS_ERR(name)){
++			err = PTR_ERR(name);
++			goto out;
++		}
++	}
++	
++	err = read_name(ino, name);
++	kfree(name);
++ out:
++	return(err);
++}
++
++int hostfs_statfs(struct super_block *sb, struct kstatfs *sf)
++{
++	/* do_statfs uses struct statfs64 internally, but the linux kernel
++	 * struct statfs still has 32-bit versions for most of these fields,
++	 * so we convert them here
++	 */
++	int err;
++	long long f_blocks;
++	long long f_bfree;
++	long long f_bavail;
++	long long f_files;
++	long long f_ffree;
++
++	err = do_statfs(HOSTFS_I(sb->s_root->d_inode)->host_filename,
++			&sf->f_bsize, &f_blocks, &f_bfree, &f_bavail, &f_files,
++			&f_ffree, &sf->f_fsid, sizeof(sf->f_fsid), 
++			&sf->f_namelen, sf->f_spare);
++	if(err) return(err);
++	sf->f_blocks = f_blocks;
++	sf->f_bfree = f_bfree;
++	sf->f_bavail = f_bavail;
++	sf->f_files = f_files;
++	sf->f_ffree = f_ffree;
++	sf->f_type = HOSTFS_SUPER_MAGIC;
++	return(0);
++}
++
++static struct inode *hostfs_alloc_inode(struct super_block *sb)
++{
++	struct hostfs_inode_info *hi;
++
++	hi = kmalloc(sizeof(*hi), GFP_KERNEL);
++	if(hi == NULL) 
++		return(NULL);
++
++	*hi = ((struct hostfs_inode_info) { .host_filename	= NULL,
++					    .fd			= -1,
++					    .mode		= 0 });
++	inode_init_once(&hi->vfs_inode);
++	return(&hi->vfs_inode);
++}
++
++static void hostfs_destroy_inode(struct inode *inode)
++{
++	if(HOSTFS_I(inode)->host_filename) 
++		kfree(HOSTFS_I(inode)->host_filename);
++
++	if(HOSTFS_I(inode)->fd != -1) 
++		close_file(&HOSTFS_I(inode)->fd);
++
++	kfree(HOSTFS_I(inode));
++}
++
++static void hostfs_read_inode(struct inode *inode)
++{
++	read_inode(inode);
++}
++
++static struct super_operations hostfs_sbops = { 
++	.alloc_inode	= hostfs_alloc_inode,
++	.destroy_inode	= hostfs_destroy_inode,
++	.read_inode	= hostfs_read_inode,
++	.statfs		= hostfs_statfs,
++};
++
++int hostfs_readdir(struct file *file, void *ent, filldir_t filldir)
++{
++	void *dir;
++	char *name;
++	unsigned long long next, ino;
++	int error, len;
++
++	name = dentry_name(file->f_dentry, 0);
++	if(name == NULL) return(-ENOMEM);
++	dir = open_dir(name, &error);
++	kfree(name);
++	if(dir == NULL) return(-error);
++	next = file->f_pos;
++	while((name = read_dir(dir, &next, &ino, &len)) != NULL){
++		error = (*filldir)(ent, name, len, file->f_pos, 
++				   ino, DT_UNKNOWN);
++		if(error) break;
++		file->f_pos = next;
++	}
++	close_dir(dir);
++	return(0);
++}
++
++int hostfs_file_open(struct inode *ino, struct file *file)
++{
++	char *name;
++	int mode = 0, r = 0, w = 0, fd;
++
++	mode = file->f_mode & (FMODE_READ | FMODE_WRITE);
++	if((mode & HOSTFS_I(ino)->mode) == mode)
++		return(0);
++
++	/* The file may already have been opened, but with the wrong access,
++	 * so this resets things and reopens the file with the new access.
++	 */
++	if(HOSTFS_I(ino)->fd != -1){
++		close_file(&HOSTFS_I(ino)->fd);
++		HOSTFS_I(ino)->fd = -1;
++	}
++
++	HOSTFS_I(ino)->mode |= mode;
++	if(HOSTFS_I(ino)->mode & FMODE_READ) 
++		r = 1;
++	if(HOSTFS_I(ino)->mode & FMODE_WRITE) 
++		w = 1;
++	if(w) 
++		r = 1;
++
++	name = dentry_name(file->f_dentry, 0);
++	if(name == NULL) 
++		return(-ENOMEM);
++
++	fd = open_file(name, r, w, append);
++	kfree(name);
++	if(fd < 0) return(fd);
++	FILE_HOSTFS_I(file)->fd = fd;
++
++	return(0);
++}
++
++int hostfs_fsync(struct file *file, struct dentry *dentry, int datasync)
++{
++	return(0);
++}
++
++static struct file_operations hostfs_file_fops = {
++	.llseek		= generic_file_llseek,
++	.read		= generic_file_read,
++	.write		= generic_file_write,
++	.mmap		= generic_file_mmap,
++	.open		= hostfs_file_open,
++	.release	= NULL,
++	.fsync		= hostfs_fsync,
++};
++
++static struct file_operations hostfs_dir_fops = {
++	.readdir	= hostfs_readdir,
++	.read		= generic_read_dir,
++};
++
++int hostfs_writepage(struct page *page, struct writeback_control *wbc)
++{
++	struct address_space *mapping = page->mapping;
++	struct inode *inode = mapping->host;
++	char *buffer;
++	unsigned long long base;
++	int count = PAGE_CACHE_SIZE;
++	int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
++	int err;
++
++	if (page->index >= end_index)
++		count = inode->i_size & (PAGE_CACHE_SIZE-1);
++
++	buffer = kmap(page);
++	base = ((unsigned long long) page->index) << PAGE_CACHE_SHIFT;
++
++	err = write_file(HOSTFS_I(inode)->fd, &base, buffer, count);
++	if(err != count){
++		ClearPageUptodate(page);
++		goto out;
++	}
++
++	if (base > inode->i_size)
++		inode->i_size = base;
++
++	if (PageError(page))
++		ClearPageError(page);	
++	err = 0;
++
++ out:	
++	kunmap(page);
++
++	unlock_page(page);
++	return err; 
++}
++
++int hostfs_readpage(struct file *file, struct page *page)
++{
++	char *buffer;
++	long long start;
++	int err = 0;
++
++	start = (long long) page->index << PAGE_CACHE_SHIFT;
++	buffer = kmap(page);
++	err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
++			PAGE_CACHE_SIZE);
++	if(err < 0) goto out;
++
++	memset(&buffer[err], 0, PAGE_CACHE_SIZE - err);
++
++	flush_dcache_page(page);
++	SetPageUptodate(page);
++	if (PageError(page)) ClearPageError(page);
++	err = 0;
++ out:
++	kunmap(page);
++	unlock_page(page);
++	return(err);
++}
++
++int hostfs_prepare_write(struct file *file, struct page *page, 
++			 unsigned int from, unsigned int to)
++{
++	char *buffer;
++	long long start, tmp;
++	int err;
++
++	start = (long long) page->index << PAGE_CACHE_SHIFT;
++	buffer = kmap(page);
++	if(from != 0){
++		tmp = start;
++		err = read_file(FILE_HOSTFS_I(file)->fd, &tmp, buffer,
++				from);
++		if(err < 0) goto out;
++	}
++	if(to != PAGE_CACHE_SIZE){
++		start += to;
++		err = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer + to,
++				PAGE_CACHE_SIZE - to);
++		if(err < 0) goto out;		
++	}
++	err = 0;
++ out:
++	kunmap(page);
++	return(err);
++}
++
++int hostfs_commit_write(struct file *file, struct page *page, unsigned from,
++		 unsigned to)
++{
++	struct address_space *mapping = page->mapping;
++	struct inode *inode = mapping->host;
++	char *buffer;
++	long long start;
++	int err = 0;
++
++	start = (long long) (page->index << PAGE_CACHE_SHIFT) + from;
++	buffer = kmap(page);
++	err = write_file(FILE_HOSTFS_I(file)->fd, &start, buffer + from, 
++			 to - from);
++	if(err > 0) err = 0;
++	if(!err && (start > inode->i_size))
++		inode->i_size = start;
++
++	kunmap(page);
++	return(err);
++}
++
++static struct address_space_operations hostfs_aops = {
++	.writepage 	= hostfs_writepage,
++	.readpage	= hostfs_readpage,
++/* 	.set_page_dirty = __set_page_dirty_nobuffers, */
++	.prepare_write	= hostfs_prepare_write,
++	.commit_write	= hostfs_commit_write
++};
++
++static int init_inode(struct inode *inode, struct dentry *dentry)
++{
++	char *name;
++	int type, err = -ENOMEM, rdev;
++
++	if(dentry){
++		name = dentry_name(dentry, 0);
++		if(name == NULL)
++			goto out;
++		type = file_type(name, &rdev);
++		kfree(name);
++	}
++	else type = OS_TYPE_DIR;
++
++	err = 0;
++	if(type == OS_TYPE_SYMLINK)
++		inode->i_op = &page_symlink_inode_operations;
++	else if(type == OS_TYPE_DIR)
++		inode->i_op = &hostfs_dir_iops;
++	else inode->i_op = &hostfs_iops;
++
++	if(type == OS_TYPE_DIR) inode->i_fop = &hostfs_dir_fops;
++	else inode->i_fop = &hostfs_file_fops;
++
++	if(type == OS_TYPE_SYMLINK) 
++		inode->i_mapping->a_ops = &hostfs_link_aops;
++	else inode->i_mapping->a_ops = &hostfs_aops;
++
++	switch (type) {
++	case OS_TYPE_CHARDEV:
++		init_special_inode(inode, S_IFCHR, rdev);
++		break;
++	case OS_TYPE_BLOCKDEV:
++		init_special_inode(inode, S_IFBLK, rdev);
++		break;
++	case OS_TYPE_FIFO:
++		init_special_inode(inode, S_IFIFO, 0);
++		break;
++	case OS_TYPE_SOCK:
++		init_special_inode(inode, S_IFSOCK, 0);
++		break;
++	}
++ out:
++	return(err);
++}
++
++int hostfs_create(struct inode *dir, struct dentry *dentry, int mode, 
++                 struct nameidata *nd)
++{
++	struct inode *inode;
++	char *name;
++	int error, fd;
++
++	error = -ENOMEM;
++	inode = iget(dir->i_sb, 0);
++	if(inode == NULL) goto out;
++
++	error = init_inode(inode, dentry);
++	if(error) 
++		goto out_put;
++	
++	error = -ENOMEM;
++	name = dentry_name(dentry, 0);
++	if(name == NULL)
++		goto out_put;
++
++	fd = file_create(name, 
++			 mode & S_IRUSR, mode & S_IWUSR, mode & S_IXUSR, 
++			 mode & S_IRGRP, mode & S_IWGRP, mode & S_IXGRP, 
++			 mode & S_IROTH, mode & S_IWOTH, mode & S_IXOTH);
++	if(fd < 0) 
++		error = fd;
++	else error = read_name(inode, name);
++
++	kfree(name);
++	if(error)
++		goto out_put;
++
++	HOSTFS_I(inode)->fd = fd;
++	HOSTFS_I(inode)->mode = FMODE_READ | FMODE_WRITE;
++	d_instantiate(dentry, inode);
++	return(0);
++
++ out_free:
++	kfree(name);
++ out_put:
++	iput(inode);
++ out:
++	return(error);
++}
++
++struct dentry *hostfs_lookup(struct inode *ino, struct dentry *dentry, 
++                            struct nameidata *nd)
++{
++	struct inode *inode;
++	char *name;
++	int err;
++
++	err = -ENOMEM;
++	inode = iget(ino->i_sb, 0);
++	if(inode == NULL) 
++		goto out;
++ 
++	err = init_inode(inode, dentry);
++	if(err) 
++		goto out_put;
++
++	err = -ENOMEM;
++	name = dentry_name(dentry, 0);
++	if(name == NULL)
++		goto out_put;
++
++	err = read_name(inode, name);
++	kfree(name);
++	if(err == -ENOENT){
++		iput(inode);
++		inode = NULL;
++	}
++	else if(err)
++		goto out_put;
++
++	d_add(dentry, inode);
++	dentry->d_op = &hostfs_dentry_ops;
++	return(NULL);
++
++ out_put:
++	iput(inode);
++ out:
++	return(ERR_PTR(err));
++}
++
++static char *inode_dentry_name(struct inode *ino, struct dentry *dentry)
++{
++        char *file;
++	int len;
++
++	file = inode_name(ino, dentry->d_name.len + 1);
++	if(file == NULL) return(NULL);
++        strcat(file, "/");
++	len = strlen(file);
++        strncat(file, dentry->d_name.name, dentry->d_name.len);
++	file[len + dentry->d_name.len] = '\0';
++        return(file);
++}
++
++int hostfs_link(struct dentry *to, struct inode *ino, struct dentry *from)
++{
++        char *from_name, *to_name;
++        int err;
++
++        if((from_name = inode_dentry_name(ino, from)) == NULL) 
++                return(-ENOMEM);
++        to_name = dentry_name(to, 0);
++	if(to_name == NULL){
++		kfree(from_name);
++		return(-ENOMEM);
++	}
++        err = link_file(to_name, from_name);
++        kfree(from_name);
++        kfree(to_name);
++        return(err);
++}
++
++int hostfs_unlink(struct inode *ino, struct dentry *dentry)
++{
++	char *file;
++	int err;
++
++	if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++	if(append)
++		return(-EPERM);
++
++	err = unlink_file(file);
++	kfree(file);
++	return(err);
++}
++
++int hostfs_symlink(struct inode *ino, struct dentry *dentry, const char *to)
++{
++	char *file;
++	int err;
++
++	if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++	err = make_symlink(file, to);
++	kfree(file);
++	return(err);
++}
++
++int hostfs_mkdir(struct inode *ino, struct dentry *dentry, int mode)
++{
++	char *file;
++	int err;
++
++	if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++	err = do_mkdir(file, mode);
++	kfree(file);
++	return(err);
++}
++
++int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
++{
++	char *file;
++	int err;
++
++	if((file = inode_dentry_name(ino, dentry)) == NULL) return(-ENOMEM);
++	err = do_rmdir(file);
++	kfree(file);
++	return(err);
++}
++
++int hostfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
++{
++	struct inode *inode;
++	char *name;
++	int err = -ENOMEM;
++ 
++	inode = iget(dir->i_sb, 0);
++	if(inode == NULL) 
++		goto out;
++
++	err = init_inode(inode, dentry);
++	if(err) 
++		goto out_put;
++
++	err = -ENOMEM;
++	name = dentry_name(dentry, 0);
++	if(name == NULL)
++		goto out_put;
++
++	init_special_inode(inode, mode, dev);
++	err = do_mknod(name, mode, dev);
++	if(err)
++		goto out_free;
++
++	err = read_name(inode, name);
++	kfree(name);
++	if(err)
++		goto out_put;
++
++	d_instantiate(dentry, inode);
++	return(0);
++
++ out_free:
++	kfree(name);
++ out_put:
++	iput(inode);
++ out:
++	return(err);
++}
++
++int hostfs_rename(struct inode *from_ino, struct dentry *from,
++		  struct inode *to_ino, struct dentry *to)
++{
++	char *from_name, *to_name;
++	int err;
++
++	if((from_name = inode_dentry_name(from_ino, from)) == NULL)
++		return(-ENOMEM);
++	if((to_name = inode_dentry_name(to_ino, to)) == NULL){
++		kfree(from_name);
++		return(-ENOMEM);
++	}
++	err = rename_file(from_name, to_name);
++	kfree(from_name);
++	kfree(to_name);
++	return(err);
++}
++
++void hostfs_truncate(struct inode *ino)
++{
++	not_implemented();
++}
++
++int hostfs_permission(struct inode *ino, int desired, struct nameidata *nd)
++{
++	char *name;
++	int r = 0, w = 0, x = 0, err;
++
++	if(desired & MAY_READ) r = 1;
++	if(desired & MAY_WRITE) w = 1;
++	if(desired & MAY_EXEC) x = 1;
++	name = inode_name(ino, 0);
++	if(name == NULL) return(-ENOMEM);
++	err = access_file(name, r, w, x);
++	kfree(name);
++	if(!err) err = vfs_permission(ino, desired);
++	return(err);
++}
++
++int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
++{
++	struct hostfs_iattr attrs;
++	char *name;
++	int err;
++	
++	if(append) 
++		attr->ia_valid &= ~ATTR_SIZE;
++
++	attrs.ia_valid = 0;
++	if(attr->ia_valid & ATTR_MODE){
++		attrs.ia_valid |= HOSTFS_ATTR_MODE;
++		attrs.ia_mode = attr->ia_mode;
++	}
++	if(attr->ia_valid & ATTR_UID){
++		if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && 
++		   (attr->ia_uid == 0))
++			attr->ia_uid = getuid();
++		attrs.ia_valid |= HOSTFS_ATTR_UID;
++		attrs.ia_uid = attr->ia_uid;
++	}
++	if(attr->ia_valid & ATTR_GID){
++		if((dentry->d_inode->i_sb->s_dev == ROOT_DEV) && 
++		   (attr->ia_gid == 0))
++			attr->ia_gid = getuid();
++		attrs.ia_valid |= HOSTFS_ATTR_GID;
++		attrs.ia_gid = attr->ia_gid;
++	}
++	if(attr->ia_valid & ATTR_SIZE){
++		attrs.ia_valid |= HOSTFS_ATTR_SIZE;
++		attrs.ia_size = attr->ia_size;
++	}
++	if(attr->ia_valid & ATTR_ATIME){
++		attrs.ia_valid |= HOSTFS_ATTR_ATIME;
++		attrs.ia_atime = attr->ia_atime;
++	}
++	if(attr->ia_valid & ATTR_MTIME){
++		attrs.ia_valid |= HOSTFS_ATTR_MTIME;
++		attrs.ia_mtime = attr->ia_mtime;
++	}
++	if(attr->ia_valid & ATTR_CTIME){
++		attrs.ia_valid |= HOSTFS_ATTR_CTIME;
++		attrs.ia_ctime = attr->ia_ctime;
++	}
++	if(attr->ia_valid & ATTR_ATIME_SET){
++		attrs.ia_valid |= HOSTFS_ATTR_ATIME_SET;
++	}
++	if(attr->ia_valid & ATTR_MTIME_SET){
++		attrs.ia_valid |= HOSTFS_ATTR_MTIME_SET;
++	}
++	name = dentry_name(dentry, 0);
++	if(name == NULL) return(-ENOMEM);
++	err = set_attr(name, &attrs);
++	kfree(name);
++	if(err)
++		return(err);
++
++	return(inode_setattr(dentry->d_inode, attr));
++}
++
++int hostfs_getattr(struct vfsmount *mnt, struct dentry *dentry, 
++	   struct kstat *stat)
++{
++	generic_fillattr(dentry->d_inode, stat);
++	return(0);
++}
++
++static struct inode_operations hostfs_iops = {
++	.create		= hostfs_create,
++	.link		= hostfs_link,
++	.unlink		= hostfs_unlink,
++	.symlink	= hostfs_symlink,
++	.mkdir		= hostfs_mkdir,
++	.rmdir		= hostfs_rmdir,
++	.mknod		= hostfs_mknod,
++	.rename		= hostfs_rename,
++	.truncate	= hostfs_truncate,
++	.permission	= hostfs_permission,
++	.setattr	= hostfs_setattr,
++	.getattr	= hostfs_getattr,
++};
++
++static struct inode_operations hostfs_dir_iops = {
++	.create		= hostfs_create,
++	.lookup		= hostfs_lookup,
++	.link		= hostfs_link,
++	.unlink		= hostfs_unlink,
++	.symlink	= hostfs_symlink,
++	.mkdir		= hostfs_mkdir,
++	.rmdir		= hostfs_rmdir,
++	.mknod		= hostfs_mknod,
++	.rename		= hostfs_rename,
++	.truncate	= hostfs_truncate,
++	.permission	= hostfs_permission,
++	.setattr	= hostfs_setattr,
++	.getattr	= hostfs_getattr,
++};
++
++int hostfs_link_readpage(struct file *file, struct page *page)
++{
++	char *buffer, *name;
++	long long start;
++	int err;
++
++	start = page->index << PAGE_CACHE_SHIFT;
++	buffer = kmap(page);
++	name = inode_name(page->mapping->host, 0);
++	if(name == NULL) return(-ENOMEM);
++	err = do_readlink(name, buffer, PAGE_CACHE_SIZE);
++	kfree(name);
++	if(err == PAGE_CACHE_SIZE)
++		err = -E2BIG;
++	else if(err > 0){
++		flush_dcache_page(page);
++		SetPageUptodate(page);
++		if (PageError(page)) ClearPageError(page);
++		err = 0;
++	}
++	kunmap(page);
++	unlock_page(page);
++	return(err);
++}
++
++static struct address_space_operations hostfs_link_aops = {
++	.readpage	= hostfs_link_readpage,
++};
++
++static int hostfs_fill_sb_common(struct super_block *sb, void *d, int silent)
++{
++	struct inode *root_inode;
++	char *name, *data = d;
++	int err;
++
++	sb->s_blocksize = 1024;
++	sb->s_blocksize_bits = 10;
++	sb->s_magic = HOSTFS_SUPER_MAGIC;
++	sb->s_op = &hostfs_sbops;
++
++	if((data == NULL) || (*data == '\0')) 
++		data = root_ino;
++
++	err = -ENOMEM;
++	name = kmalloc(strlen(data) + 1, GFP_KERNEL);
++	if(name == NULL) 
++		goto out;
++
++	strcpy(name, data);
++
++	root_inode = iget(sb, 0);
++	if(root_inode == NULL)
++		goto out_free;
++
++	err = init_inode(root_inode, NULL);
++	if(err)
++		goto out_put;
++
++	HOSTFS_I(root_inode)->host_filename = name;
++
++	err = -ENOMEM;
++	sb->s_root = d_alloc_root(root_inode);
++	if(sb->s_root == NULL)
++		goto out_put;
++
++	err = read_inode(root_inode);
++	if(err)
++		goto out_put;
++
++	return(0);
++
++ out_put:
++	iput(root_inode);
++ out_free:
++	kfree(name);
++ out:
++	return(err);
++}
++
++static struct super_block *hostfs_read_sb(struct file_system_type *type,
++					     int flags, const char *dev_name,
++					     void *data)
++{
++	return(get_sb_nodev(type, flags, data, hostfs_fill_sb_common));
++}
++
++static struct file_system_type hostfs_type = {
++	.owner 		= THIS_MODULE,
++	.name 		= "hostfs",
++	.get_sb 	= hostfs_read_sb,
++	.kill_sb	= kill_anon_super,
++	.fs_flags 	= 0,
++};
++
++static int __init init_hostfs(void)
++{
++	return(register_filesystem(&hostfs_type));
++}
++
++static void __exit exit_hostfs(void)
++{
++	unregister_filesystem(&hostfs_type);
++}
++
++module_init(init_hostfs)
++module_exit(exit_hostfs)
++MODULE_LICENSE("GPL");
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only.  This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hostfs/hostfs_user.c b/fs/hostfs/hostfs_user.c
+--- a/fs/hostfs/hostfs_user.c	Wed Dec 31 19:00:00 1969
++++ b/fs/hostfs/hostfs_user.c	Fri Aug 15 15:10:43 2003
+@@ -0,0 +1,361 @@
++/* 
++ * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#include <unistd.h>
++#include <stdio.h>
++#include <fcntl.h>
++#include <dirent.h>
++#include <errno.h>
++#include <utime.h>
++#include <string.h>
++#include <sys/stat.h>
++#include <sys/time.h>
++#include <sys/vfs.h>
++#include "hostfs.h"
++#include "kern_util.h"
++#include "user.h"
++
++int stat_file(const char *path, unsigned long long *inode_out, int *mode_out,
++	      int *nlink_out, int *uid_out, int *gid_out, 
++	      unsigned long long *size_out, struct timespec *atime_out,
++	      struct timespec *mtime_out, struct timespec *ctime_out,
++	      int *blksize_out, unsigned long long *blocks_out)
++{
++	struct stat64 buf;
++
++	if(lstat64(path, &buf) < 0) 
++		return(-errno);
++
++	/* See the Makefile for why STAT64_INO_FIELD is passed in
++	 * by the build
++	 */
++	if(inode_out != NULL) *inode_out = buf.STAT64_INO_FIELD;
++	if(mode_out != NULL) *mode_out = buf.st_mode;
++	if(nlink_out != NULL) *nlink_out = buf.st_nlink;
++	if(uid_out != NULL) *uid_out = buf.st_uid;
++	if(gid_out != NULL) *gid_out = buf.st_gid;
++	if(size_out != NULL) *size_out = buf.st_size;
++	if(atime_out != NULL) {
++		atime_out->tv_sec = buf.st_atime;
++		atime_out->tv_nsec = 0;
++	}
++	if(mtime_out != NULL) {
++		mtime_out->tv_sec = buf.st_mtime;
++		mtime_out->tv_nsec = 0;
++	}
++	if(ctime_out != NULL) {
++		ctime_out->tv_sec = buf.st_ctime;
++		ctime_out->tv_nsec = 0;
++	}
++	if(blksize_out != NULL) *blksize_out = buf.st_blksize;
++	if(blocks_out != NULL) *blocks_out = buf.st_blocks;
++	return(0);
++}
++
++int file_type(const char *path, int *rdev)
++{
++ 	struct stat64 buf;
++
++	if(lstat64(path, &buf) < 0) 
++		return(-errno);
++	if(rdev != NULL) 
++		*rdev = buf.st_rdev;
++
++	if(S_ISDIR(buf.st_mode)) return(OS_TYPE_DIR);
++	else if(S_ISLNK(buf.st_mode)) return(OS_TYPE_SYMLINK);
++	else if(S_ISCHR(buf.st_mode)) return(OS_TYPE_CHARDEV);
++	else if(S_ISBLK(buf.st_mode)) return(OS_TYPE_BLOCKDEV);
++	else if(S_ISFIFO(buf.st_mode))return(OS_TYPE_FIFO);
++	else if(S_ISSOCK(buf.st_mode))return(OS_TYPE_SOCK);
++	else return(OS_TYPE_FILE);
++}
++
++int access_file(char *path, int r, int w, int x)
++{
++	int mode = 0;
++
++	if(r) mode = R_OK;
++	if(w) mode |= W_OK;
++	if(x) mode |= X_OK;
++	if(access(path, mode) != 0) return(-errno);
++	else return(0);
++}
++
++int open_file(char *path, int r, int w, int append)
++{
++	int mode = 0, fd;
++
++	if(r && !w) 
++		mode = O_RDONLY;
++	else if(!r && w) 
++		mode = O_WRONLY;
++	else if(r && w) 
++		mode = O_RDWR;
++	else panic("Impossible mode in open_file");
++
++	if(append)
++		mode |= O_APPEND;
++	fd = open64(path, mode);
++	if(fd < 0) return(-errno);
++	else return(fd);
++}
++
++void *open_dir(char *path, int *err_out)
++{
++	DIR *dir;
++
++	dir = opendir(path);
++	*err_out = errno;
++	if(dir == NULL) return(NULL);
++	return(dir);
++}
++
++char *read_dir(void *stream, unsigned long long *pos, 
++	       unsigned long long *ino_out, int *len_out)
++{
++	DIR *dir = stream;
++	struct dirent *ent;
++
++	seekdir(dir, *pos);
++	ent = readdir(dir);
++	if(ent == NULL) return(NULL);
++	*len_out = strlen(ent->d_name);
++	*ino_out = ent->d_ino;
++	*pos = telldir(dir);
++	return(ent->d_name);
++}
++
++int read_file(int fd, unsigned long long *offset, char *buf, int len)
++{
++	int n;
++
++	n = pread64(fd, buf, len, *offset);
++	if(n < 0) return(-errno);
++	*offset += n;
++	return(n);
++}
++
++int write_file(int fd, unsigned long long *offset, const char *buf, int len)
++{
++	int n;
++
++	n = pwrite64(fd, buf, len, *offset);
++	if(n < 0) return(-errno);
++	*offset += n;
++	return(n);
++}
++
++int lseek_file(int fd, long long offset, int whence)
++{
++	int ret;
++
++	ret = lseek64(fd, offset, whence);
++	if(ret < 0) return(-errno);
++	return(0);
++}
++
++void close_file(void *stream)
++{
++	close(*((int *) stream));
++}
++
++void close_dir(void *stream)
++{
++	closedir(stream);
++}
++
++int file_create(char *name, int ur, int uw, int ux, int gr, 
++		int gw, int gx, int or, int ow, int ox)
++{
++	int mode, fd;
++
++	mode = 0;
++	mode |= ur ? S_IRUSR : 0;
++	mode |= uw ? S_IWUSR : 0;
++	mode |= ux ? S_IXUSR : 0;
++	mode |= gr ? S_IRGRP : 0;
++	mode |= gw ? S_IWGRP : 0;
++	mode |= gx ? S_IXGRP : 0;
++	mode |= or ? S_IROTH : 0;
++	mode |= ow ? S_IWOTH : 0;
++	mode |= ox ? S_IXOTH : 0;
++	fd = open64(name, O_CREAT | O_RDWR, mode);
++	if(fd < 0) 
++		return(-errno);
++	return(fd);
++}
++
++int set_attr(const char *file, struct hostfs_iattr *attrs)
++{
++	struct utimbuf buf;
++	int err, ma;
++
++	if(attrs->ia_valid & HOSTFS_ATTR_MODE){
++		if(chmod(file, attrs->ia_mode) != 0) return(-errno);
++	}
++	if(attrs->ia_valid & HOSTFS_ATTR_UID){
++		if(chown(file, attrs->ia_uid, -1)) return(-errno);
++	}
++	if(attrs->ia_valid & HOSTFS_ATTR_GID){
++		if(chown(file, -1, attrs->ia_gid)) return(-errno);
++	}
++	if(attrs->ia_valid & HOSTFS_ATTR_SIZE){
++		if(truncate(file, attrs->ia_size)) return(-errno);
++	}
++	ma = HOSTFS_ATTR_ATIME_SET | HOSTFS_ATTR_MTIME_SET;
++	if((attrs->ia_valid & ma) == ma){
++		buf.actime = attrs->ia_atime.tv_sec;
++		buf.modtime = attrs->ia_mtime.tv_sec;
++		if(utime(file, &buf) != 0) return(-errno);
++	}
++	else {
++		struct timespec ts;
++
++		if(attrs->ia_valid & HOSTFS_ATTR_ATIME_SET){
++			err = stat_file(file, NULL, NULL, NULL, NULL, NULL, 
++					NULL, NULL, &ts, NULL, NULL, NULL);
++			if(err != 0) 
++				return(err);
++			buf.actime = attrs->ia_atime.tv_sec;
++			buf.modtime = ts.tv_sec;
++			if(utime(file, &buf) != 0) 
++				return(-errno);
++		}
++		if(attrs->ia_valid & HOSTFS_ATTR_MTIME_SET){
++			err = stat_file(file, NULL, NULL, NULL, NULL, NULL, 
++					NULL, &ts, NULL, NULL, NULL, NULL);
++			if(err != 0) 
++				return(err);
++			buf.actime = ts.tv_sec;
++			buf.modtime = attrs->ia_mtime.tv_sec;
++			if(utime(file, &buf) != 0) 
++				return(-errno);
++		}
++	}
++	if(attrs->ia_valid & HOSTFS_ATTR_CTIME) ;
++	if(attrs->ia_valid & (HOSTFS_ATTR_ATIME | HOSTFS_ATTR_MTIME)){
++		err = stat_file(file, NULL, NULL, NULL, NULL, NULL, NULL, 
++				&attrs->ia_atime, &attrs->ia_mtime, NULL, 
++				NULL, NULL);
++		if(err != 0) return(err);
++	}
++	return(0);
++}
++
++int make_symlink(const char *from, const char *to)
++{
++	int err;
++
++	err = symlink(to, from);
++	if(err) return(-errno);
++	return(0);
++}
++
++int unlink_file(const char *file)
++{
++	int err;
++
++	err = unlink(file);
++	if(err) return(-errno);
++	return(0);
++}
++
++int do_mkdir(const char *file, int mode)
++{
++	int err;
++
++	err = mkdir(file, mode);
++	if(err) return(-errno);
++	return(0);
++}
++
++int do_rmdir(const char *file)
++{
++	int err;
++
++	err = rmdir(file);
++	if(err) return(-errno);
++	return(0);
++}
++
++int do_mknod(const char *file, int mode, int dev)
++{
++	int err;
++
++	err = mknod(file, mode, dev);
++	if(err) return(-errno);
++	return(0);
++}
++
++int link_file(const char *to, const char *from)
++{
++	int err;
++
++	err = link(to, from);
++	if(err) return(-errno);
++	return(0);
++}
++
++int do_readlink(char *file, char *buf, int size)
++{
++	int n;
++
++	n = readlink(file, buf, size);
++	if(n < 0) 
++		return(-errno);
++	if(n < size) 
++		buf[n] = '\0';
++	return(n);
++}
++
++int rename_file(char *from, char *to)
++{
++	int err;
++
++	err = rename(from, to);
++	if(err < 0) return(-errno);
++	return(0);	
++}
++
++int do_statfs(char *root, long *bsize_out, long long *blocks_out, 
++	      long long *bfree_out, long long *bavail_out, 
++	      long long *files_out, long long *ffree_out,
++	      void *fsid_out, int fsid_size, long *namelen_out, 
++	      long *spare_out)
++{
++	struct statfs64 buf;
++	int err;
++
++	err = statfs64(root, &buf);
++	if(err < 0) return(-errno);
++	*bsize_out = buf.f_bsize;
++	*blocks_out = buf.f_blocks;
++	*bfree_out = buf.f_bfree;
++	*bavail_out = buf.f_bavail;
++	*files_out = buf.f_files;
++	*ffree_out = buf.f_ffree;
++	memcpy(fsid_out, &buf.f_fsid, 
++	       sizeof(buf.f_fsid) > fsid_size ? fsid_size : 
++	       sizeof(buf.f_fsid));
++	*namelen_out = buf.f_namelen;
++	spare_out[0] = buf.f_spare[0];
++	spare_out[1] = buf.f_spare[1];
++	spare_out[2] = buf.f_spare[2];
++	spare_out[3] = buf.f_spare[3];
++	spare_out[4] = buf.f_spare[4];
++	spare_out[5] = buf.f_spare[5];
++	return(0);
++}
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only.  This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/fs/hppfs/Makefile b/fs/hppfs/Makefile
+--- a/fs/hppfs/Makefile	Wed Dec 31 19:00:00 1969
++++ b/fs/hppfs/Makefile	Fri Aug 15 15:12:31 2003
+@@ -0,0 +1,19 @@
++# 
++# Copyright (C) 2002, 2003 Jeff Dike (jdike@karaya.com)
++# Licensed under the GPL
++#
++
++hppfs-objs := hppfs_kern.o
++
++obj-y = 
++obj-$(CONFIG_HPPFS) += hppfs.o
++
++clean:
++
++modules:
++
++fastdep:
++
++dep:
++
++archmrproper: clean
+diff -Naur a/fs/hppfs/hppfs_kern.c b/fs/hppfs/hppfs_kern.c
+--- a/fs/hppfs/hppfs_kern.c	Wed Dec 31 19:00:00 1969
++++ b/fs/hppfs/hppfs_kern.c	Fri Aug 15 15:11:52 2003
+@@ -0,0 +1,811 @@
++/* 
++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#include <linux/fs.h>
++#include <linux/module.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/list.h>
++#include <linux/kernel.h>
++#include <linux/ctype.h>
++#include <linux/dcache.h>
++#include <linux/statfs.h>
++#include <asm/uaccess.h>
++#include <asm/fcntl.h>
++#include "os.h"
++
++static int init_inode(struct inode *inode, struct dentry *dentry);
++
++struct hppfs_data {
++	struct list_head list;
++	char contents[PAGE_SIZE - sizeof(struct list_head)];
++};
++
++struct hppfs_private {
++	struct file proc_file;
++	int host_fd;
++	loff_t len;
++	struct hppfs_data *contents;
++};
++
++struct hppfs_inode_info {
++        struct dentry *proc_dentry;
++	struct inode vfs_inode;
++};
++
++static inline struct hppfs_inode_info *HPPFS_I(struct inode *inode)
++{
++	return(list_entry(inode, struct hppfs_inode_info, vfs_inode));
++}
++
++#define HPPFS_SUPER_MAGIC 0xb00000ee
++
++static struct super_operations hppfs_sbops;
++
++static int is_pid(struct dentry *dentry)
++{
++	struct super_block *sb;
++	int i;
++
++	sb = dentry->d_sb;
++	if((sb->s_op != &hppfs_sbops) || (dentry->d_parent != sb->s_root))
++		return(0);
++
++	for(i = 0; i < dentry->d_name.len; i++){
++		if(!isdigit(dentry->d_name.name[i]))
++			return(0);
++	}
++	return(1);
++}
++
++static char *dentry_name(struct dentry *dentry, int extra)
++{
++	struct dentry *parent;
++	char *root, *name;
++	const char *seg_name;
++	int len, seg_len;
++
++	len = 0;
++	parent = dentry;
++	while(parent->d_parent != parent){
++		if(is_pid(parent))
++			len += strlen("pid") + 1;
++		else len += parent->d_name.len + 1;
++		parent = parent->d_parent;
++	}
++	
++	root = "proc";
++	len += strlen(root);
++	name = kmalloc(len + extra + 1, GFP_KERNEL);
++	if(name == NULL) return(NULL);
++
++	name[len] = '\0';
++	parent = dentry;
++	while(parent->d_parent != parent){
++		if(is_pid(parent)){
++			seg_name = "pid";
++			seg_len = strlen("pid");
++		}
++		else {
++			seg_name = parent->d_name.name;
++			seg_len = parent->d_name.len;
++		}
++
++		len -= seg_len + 1;
++		name[len] = '/';
++		strncpy(&name[len + 1], seg_name, seg_len);
++		parent = parent->d_parent;
++	}
++	strncpy(name, root, strlen(root));
++	return(name);
++}
++
++struct dentry_operations hppfs_dentry_ops = {
++};
++
++static int file_removed(struct dentry *dentry, const char *file)
++{
++	char *host_file;
++	int extra, fd;
++
++	extra = 0;
++	if(file != NULL) extra += strlen(file) + 1;
++
++	host_file = dentry_name(dentry, extra + strlen("/remove"));
++	if(host_file == NULL){
++		printk("file_removed : allocation failed\n");
++		return(-ENOMEM);
++	}
++
++	if(file != NULL){
++		strcat(host_file, "/");
++		strcat(host_file, file);
++	}
++	strcat(host_file, "/remove");
++
++	fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
++	kfree(host_file);
++	if(fd > 0){
++		os_close_file(fd);
++		return(1);
++	}
++	return(0);
++}
++
++static void hppfs_read_inode(struct inode *ino)
++{
++	struct inode *proc_ino;
++
++	if(HPPFS_I(ino)->proc_dentry == NULL)
++		return;
++
++	proc_ino = HPPFS_I(ino)->proc_dentry->d_inode;
++	ino->i_uid = proc_ino->i_uid;
++	ino->i_gid = proc_ino->i_gid;
++	ino->i_atime = proc_ino->i_atime;
++	ino->i_mtime = proc_ino->i_mtime;
++	ino->i_ctime = proc_ino->i_ctime;
++	ino->i_ino = proc_ino->i_ino;
++	ino->i_mode = proc_ino->i_mode;
++	ino->i_nlink = proc_ino->i_nlink;
++	ino->i_size = proc_ino->i_size;
++	ino->i_blksize = proc_ino->i_blksize;
++	ino->i_blocks = proc_ino->i_blocks;
++}
++
++static struct dentry *hppfs_lookup(struct inode *ino, struct dentry *dentry, 
++                                  struct nameidata *nd)
++{
++	struct dentry *proc_dentry, *new, *parent;
++	struct inode *inode;
++	int err, deleted;
++
++	deleted = file_removed(dentry, NULL);
++	if(deleted < 0)
++		return(ERR_PTR(deleted));
++	else if(deleted)
++		return(ERR_PTR(-ENOENT));
++
++	err = -ENOMEM;
++	parent = HPPFS_I(ino)->proc_dentry;
++	down(&parent->d_inode->i_sem);
++	proc_dentry = d_lookup(parent, &dentry->d_name);
++	if(proc_dentry == NULL){
++		proc_dentry = d_alloc(parent, &dentry->d_name);
++		if(proc_dentry == NULL){
++			up(&parent->d_inode->i_sem);
++			goto out;
++		}
++		new = (*parent->d_inode->i_op->lookup)(parent->d_inode, 
++						       proc_dentry, NULL);
++		if(new){
++			dput(proc_dentry);
++			proc_dentry = new;
++		}
++	}
++	up(&parent->d_inode->i_sem);
++
++	if(IS_ERR(proc_dentry))
++		return(proc_dentry);
++
++	inode = iget(ino->i_sb, 0);
++	if(inode == NULL) 
++		goto out_dput;
++
++	err = init_inode(inode, proc_dentry);
++	if(err) 
++		goto out_put;
++	
++	hppfs_read_inode(inode);
++
++ 	d_add(dentry, inode);
++	dentry->d_op = &hppfs_dentry_ops;
++	return(NULL);
++
++ out_put:
++	iput(inode);
++ out_dput:
++	dput(proc_dentry);
++ out:
++	return(ERR_PTR(err));
++}
++
++static struct inode_operations hppfs_file_iops = {
++};
++
++static ssize_t read_proc(struct file *file, char *buf, ssize_t count, 
++			 loff_t *ppos, int is_user)
++{
++	ssize_t (*read)(struct file *, char *, size_t, loff_t *);
++	ssize_t n;
++
++	read = file->f_dentry->d_inode->i_fop->read;
++
++	if(!is_user)
++		set_fs(KERNEL_DS);
++		
++	n = (*read)(file, buf, count, &file->f_pos);
++
++	if(!is_user)
++		set_fs(USER_DS);
++
++	if(ppos) *ppos = file->f_pos;
++	return(n);
++}
++
++static ssize_t hppfs_read_file(int fd, char *buf, ssize_t count)
++{
++	ssize_t n;
++	int cur, err;
++	char *new_buf;
++
++	n = -ENOMEM;
++	new_buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
++	if(new_buf == NULL){
++		printk("hppfs_read_file : kmalloc failed\n");
++		goto out;
++	}
++	n = 0;
++	while(count > 0){
++		cur = min_t(ssize_t, count, PAGE_SIZE);
++		err = os_read_file(fd, new_buf, cur);
++		if(err < 0){
++			printk("hppfs_read : read failed, errno = %d\n",
++			       count);
++			n = err;
++			goto out_free;
++		}
++		else if(err == 0)
++			break;
++
++		if(copy_to_user(buf, new_buf, err)){
++			n = -EFAULT;
++			goto out_free;
++		}
++		n += err;
++		count -= err;
++	}
++ out_free:
++	kfree(new_buf);
++ out:
++	return(n);
++}
++
++static ssize_t hppfs_read(struct file *file, char *buf, size_t count, 
++			  loff_t *ppos)
++{
++	struct hppfs_private *hppfs = file->private_data;
++	struct hppfs_data *data;
++	loff_t off;
++	int err;
++
++	if(hppfs->contents != NULL){
++		if(*ppos >= hppfs->len) return(0);
++
++		data = hppfs->contents;
++		off = *ppos;
++		while(off >= sizeof(data->contents)){
++			data = list_entry(data->list.next, struct hppfs_data,
++					  list);
++			off -= sizeof(data->contents);
++		}
++
++		if(off + count > hppfs->len)
++			count = hppfs->len - off;
++		copy_to_user(buf, &data->contents[off], count);
++		*ppos += count;
++	}
++	else if(hppfs->host_fd != -1){
++		err = os_seek_file(hppfs->host_fd, *ppos);
++		if(err){
++			printk("hppfs_read : seek failed, errno = %d\n", err);
++			return(err);
++		}
++		count = hppfs_read_file(hppfs->host_fd, buf, count);
++		if(count > 0)
++			*ppos += count;
++	}
++	else count = read_proc(&hppfs->proc_file, buf, count, ppos, 1);
++
++	return(count);
++}
++
++static ssize_t hppfs_write(struct file *file, const char *buf, size_t len, 
++			   loff_t *ppos)
++{
++	struct hppfs_private *data = file->private_data;
++	struct file *proc_file = &data->proc_file;
++	ssize_t (*write)(struct file *, const char *, size_t, loff_t *);
++	int err;
++
++	write = proc_file->f_dentry->d_inode->i_fop->write;
++
++	proc_file->f_pos = file->f_pos;
++	err = (*write)(proc_file, buf, len, &proc_file->f_pos);
++	file->f_pos = proc_file->f_pos;
++
++	return(err);
++}
++
++static int open_host_sock(char *host_file, int *filter_out)
++{
++	char *end;
++	int fd;
++
++	end = &host_file[strlen(host_file)];
++	strcpy(end, "/rw");
++	*filter_out = 1;
++	fd = os_connect_socket(host_file);
++	if(fd > 0)
++		return(fd);
++
++	strcpy(end, "/r");
++	*filter_out = 0;
++	fd = os_connect_socket(host_file);
++	return(fd);
++}
++
++static void free_contents(struct hppfs_data *head)
++{
++	struct hppfs_data *data;
++	struct list_head *ele, *next;
++
++	if(head == NULL) return;
++
++	list_for_each_safe(ele, next, &head->list){
++		data = list_entry(ele, struct hppfs_data, list);
++		kfree(data);
++	}
++	kfree(head);
++}
++
++static struct hppfs_data *hppfs_get_data(int fd, int filter, 
++					 struct file *proc_file, 
++					 struct file *hppfs_file, 
++					 loff_t *size_out)
++{
++	struct hppfs_data *data, *new, *head;
++	int n, err;
++
++	err = -ENOMEM;
++	data = kmalloc(sizeof(*data), GFP_KERNEL);
++	if(data == NULL){
++		printk("hppfs_get_data : head allocation failed\n");
++		goto failed;
++	}
++
++	INIT_LIST_HEAD(&data->list);
++
++	head = data;
++	*size_out = 0;
++
++	if(filter){
++		while((n = read_proc(proc_file, data->contents,
++				     sizeof(data->contents), NULL, 0)) > 0)
++			os_write_file(fd, data->contents, n);
++		err = os_shutdown_socket(fd, 0, 1);
++		if(err){
++			printk("hppfs_get_data : failed to shut down "
++			       "socket\n");
++			goto failed_free;
++		}
++	}
++	while(1){
++		n = os_read_file(fd, data->contents, sizeof(data->contents));
++		if(n < 0){
++			err = n;
++			printk("hppfs_get_data : read failed, errno = %d\n",
++			       err);
++			goto failed_free;
++		}
++		else if(n == 0)
++			break;
++
++		*size_out += n;
++
++		if(n < sizeof(data->contents))
++			break;
++
++		new = kmalloc(sizeof(*data), GFP_KERNEL);
++		if(new == 0){
++			printk("hppfs_get_data : data allocation failed\n");
++			err = -ENOMEM;
++			goto failed_free;
++		}
++	
++		INIT_LIST_HEAD(&new->list);
++		list_add(&new->list, &data->list);
++		data = new;
++	}
++	return(head);
++
++ failed_free:
++	free_contents(head);
++ failed:		
++	return(ERR_PTR(err));
++}
++
++static struct hppfs_private *hppfs_data(void)
++{
++	struct hppfs_private *data;
++
++	data = kmalloc(sizeof(*data), GFP_KERNEL);
++	if(data == NULL)
++		return(data);
++
++	*data = ((struct hppfs_private ) { .host_fd  		= -1,
++					   .len  		= -1,
++					   .contents 		= NULL } );
++	return(data);
++}
++
++static int file_mode(int fmode)
++{
++	if(fmode == (FMODE_READ | FMODE_WRITE))
++		return(O_RDWR);
++	if(fmode == FMODE_READ)
++		return(O_RDONLY);
++	if(fmode == FMODE_WRITE)
++		return(O_WRONLY);
++	return(0);
++}
++
++static int hppfs_open(struct inode *inode, struct file *file)
++{
++	struct hppfs_private *data;
++	struct dentry *proc_dentry;
++	char *host_file;
++	int err, fd, type, filter;
++
++	err = -ENOMEM;
++	data = hppfs_data();
++	if(data == NULL)
++		goto out;
++
++	host_file = dentry_name(file->f_dentry, strlen("/rw"));
++	if(host_file == NULL)
++		goto out_free2;
++
++	proc_dentry = HPPFS_I(inode)->proc_dentry;
++
++	/* XXX This isn't closed anywhere */
++	err = open_private_file(&data->proc_file, proc_dentry, 
++				file_mode(file->f_mode));
++	if(err)
++		goto out_free1;
++
++	type = os_file_type(host_file);
++	if(type == OS_TYPE_FILE){
++		fd = os_open_file(host_file, of_read(OPENFLAGS()), 0);
++		if(fd >= 0) 
++			data->host_fd = fd;
++		else printk("hppfs_open : failed to open '%s', errno = %d\n",
++			    host_file, -fd);
++
++		data->contents = NULL;
++	}
++	else if(type == OS_TYPE_DIR){
++		fd = open_host_sock(host_file, &filter);
++		if(fd > 0){
++			data->contents = hppfs_get_data(fd, filter, 
++							&data->proc_file, 
++							file, &data->len);
++			if(!IS_ERR(data->contents))
++				data->host_fd = fd;
++		}
++		else printk("hppfs_open : failed to open a socket in "
++			    "'%s', errno = %d\n", host_file, -fd);
++	}
++	kfree(host_file);
++
++	file->private_data = data;
++	return(0);
++
++ out_free1:
++	kfree(host_file);
++ out_free2:
++	free_contents(data->contents);
++	kfree(data);
++ out:
++	return(err);
++}
++
++static int hppfs_dir_open(struct inode *inode, struct file *file)
++{
++	struct hppfs_private *data;
++	struct dentry *proc_dentry;
++	int err;
++
++	err = -ENOMEM;
++	data = hppfs_data();
++	if(data == NULL)
++		goto out;
++
++	proc_dentry = HPPFS_I(inode)->proc_dentry;
++	err = open_private_file(&data->proc_file, proc_dentry, 
++				file_mode(file->f_mode));
++	if(err)
++		goto out_free;
++
++	file->private_data = data;
++	return(0);
++
++ out_free:
++	kfree(data);
++ out:
++	return(err);
++}
++
++static loff_t hppfs_llseek(struct file *file, loff_t off, int where)
++{
++	struct hppfs_private *data = file->private_data;
++	struct file *proc_file = &data->proc_file;
++	loff_t (*llseek)(struct file *, loff_t, int);
++	loff_t ret;
++
++	llseek = proc_file->f_dentry->d_inode->i_fop->llseek;
++	if(llseek != NULL){
++		ret = (*llseek)(proc_file, off, where);
++		if(ret < 0)
++			return(ret);
++	}
++
++	return(default_llseek(file, off, where));
++}
++
++static struct file_operations hppfs_file_fops = {
++	.owner		= NULL,
++	.llseek		= hppfs_llseek,
++	.read		= hppfs_read,
++	.write		= hppfs_write,
++	.open		= hppfs_open,
++};
++
++struct hppfs_dirent {
++	void *vfs_dirent;
++	filldir_t filldir;
++	struct dentry *dentry;
++};
++
++static int hppfs_filldir(void *d, const char *name, int size, 
++			 loff_t offset, ino_t inode, unsigned int type)
++{
++	struct hppfs_dirent *dirent = d;
++
++	if(file_removed(dirent->dentry, name))
++		return(0);
++
++	return((*dirent->filldir)(dirent->vfs_dirent, name, size, offset, 
++				  inode, type));
++}
++
++static int hppfs_readdir(struct file *file, void *ent, filldir_t filldir)
++{
++	struct hppfs_private *data = file->private_data;
++	struct file *proc_file = &data->proc_file;
++	int (*readdir)(struct file *, void *, filldir_t);
++	struct hppfs_dirent dirent = ((struct hppfs_dirent)
++		                      { .vfs_dirent  	= ent,
++					.filldir 	= filldir,
++					.dentry  	= file->f_dentry } );
++	int err;
++
++	readdir = proc_file->f_dentry->d_inode->i_fop->readdir;
++
++	proc_file->f_pos = file->f_pos;
++	err = (*readdir)(proc_file, &dirent, hppfs_filldir);
++	file->f_pos = proc_file->f_pos;
++
++	return(err);
++}
++
++static int hppfs_fsync(struct file *file, struct dentry *dentry, int datasync)
++{
++	return(0);
++}
++
++static struct file_operations hppfs_dir_fops = {
++	.owner		= NULL,
++	.readdir	= hppfs_readdir,
++	.open		= hppfs_dir_open,
++	.fsync		= hppfs_fsync,
++};
++
++static int hppfs_statfs(struct super_block *sb, struct kstatfs *sf)
++{
++	sf->f_blocks = 0;
++	sf->f_bfree = 0;
++	sf->f_bavail = 0;
++	sf->f_files = 0;
++	sf->f_ffree = 0;
++	sf->f_type = HPPFS_SUPER_MAGIC;
++	return(0);
++}
++
++static struct inode *hppfs_alloc_inode(struct super_block *sb)
++{
++	struct hppfs_inode_info *hi;
++
++	hi = kmalloc(sizeof(*hi), GFP_KERNEL);
++	if(hi == NULL) 
++		return(NULL);
++
++	*hi = ((struct hppfs_inode_info) { .proc_dentry	= NULL });
++	inode_init_once(&hi->vfs_inode);
++	return(&hi->vfs_inode);
++}
++
++void hppfs_delete_inode(struct inode *ino)
++{
++	clear_inode(ino);
++}
++
++static void hppfs_destroy_inode(struct inode *inode)
++{
++	kfree(HPPFS_I(inode));
++}
++
++static struct super_operations hppfs_sbops = { 
++	.alloc_inode	= hppfs_alloc_inode,
++	.destroy_inode	= hppfs_destroy_inode,
++	.read_inode	= hppfs_read_inode,
++	.delete_inode	= hppfs_delete_inode,
++	.statfs		= hppfs_statfs,
++};
++
++static int hppfs_readlink(struct dentry *dentry, char *buffer, int buflen)
++{
++	struct file proc_file;
++	struct dentry *proc_dentry;
++	int (*readlink)(struct dentry *, char *, int);
++	int err, n;
++
++	proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
++	err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
++	if(err) 
++		return(err);
++
++	readlink = proc_dentry->d_inode->i_op->readlink;
++	n = (*readlink)(proc_dentry, buffer, buflen);
++
++	close_private_file(&proc_file);
++	
++	return(n);
++}
++
++static int hppfs_follow_link(struct dentry *dentry, struct nameidata *nd)
++{
++	struct file proc_file;
++	struct dentry *proc_dentry;
++	int (*follow_link)(struct dentry *, struct nameidata *);
++	int err, n;
++
++	proc_dentry = HPPFS_I(dentry->d_inode)->proc_dentry;
++	err = open_private_file(&proc_file, proc_dentry, O_RDONLY);
++	if(err) 
++		return(err);
++
++	follow_link = proc_dentry->d_inode->i_op->follow_link;
++	n = (*follow_link)(proc_dentry, nd);
++
++	close_private_file(&proc_file);
++	
++	return(n);
++}
++
++static struct inode_operations hppfs_dir_iops = {
++	.lookup		= hppfs_lookup,
++};
++
++static struct inode_operations hppfs_link_iops = {
++	.readlink	= hppfs_readlink,
++	.follow_link	= hppfs_follow_link,
++};
++
++static int init_inode(struct inode *inode, struct dentry *dentry)
++{
++	if(S_ISDIR(dentry->d_inode->i_mode)){
++		inode->i_op = &hppfs_dir_iops;
++		inode->i_fop = &hppfs_dir_fops;
++	}
++	else if(S_ISLNK(dentry->d_inode->i_mode)){
++		inode->i_op = &hppfs_link_iops;
++		inode->i_fop = &hppfs_file_fops;
++	}
++	else {
++		inode->i_op = &hppfs_file_iops;
++		inode->i_fop = &hppfs_file_fops;
++	}
++
++	HPPFS_I(inode)->proc_dentry = dentry;
++
++	return(0);
++}
++
++static int hppfs_fill_super(struct super_block *sb, void *d, int silent)
++{
++	struct inode *root_inode;
++	struct file_system_type *procfs;
++	struct super_block *proc_sb;
++	int err;
++
++	err = -ENOENT;
++	procfs = get_fs_type("proc");
++	if(procfs == NULL) 
++		goto out;
++
++	if(list_empty(&procfs->fs_supers))
++		goto out;
++
++	proc_sb = list_entry(procfs->fs_supers.next, struct super_block,
++			     s_instances);
++	
++	sb->s_blocksize = 1024;
++	sb->s_blocksize_bits = 10;
++	sb->s_magic = HPPFS_SUPER_MAGIC;
++	sb->s_op = &hppfs_sbops;
++
++	root_inode = iget(sb, 0);
++	if(root_inode == NULL)
++		goto out;
++
++	err = init_inode(root_inode, proc_sb->s_root);
++	if(err)
++		goto out_put;
++
++	err = -ENOMEM;
++	sb->s_root = d_alloc_root(root_inode);
++	if(sb->s_root == NULL)
++		goto out_put;
++
++	hppfs_read_inode(root_inode);
++
++	return(0);
++
++ out_put:
++	iput(root_inode);
++ out:
++	return(err);
++}
++
++static struct super_block *hppfs_read_super(struct file_system_type *type,
++					     int flags, const char *dev_name,
++					     void *data)
++{
++	return(get_sb_nodev(type, flags, data, hppfs_fill_super));
++}
++
++static struct file_system_type hppfs_type = {
++	.owner 		= THIS_MODULE,
++	.name 		= "hppfs",
++	.get_sb 	= hppfs_read_super,
++	.kill_sb	= kill_anon_super,
++	.fs_flags 	= 0,
++};
++
++static int __init init_hppfs(void)
++{
++	return(register_filesystem(&hppfs_type));
++}
++
++static void __exit exit_hppfs(void)
++{
++	unregister_filesystem(&hppfs_type);
++}
++
++module_init(init_hppfs)
++module_exit(exit_hppfs)
++MODULE_LICENSE("GPL");
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only.  This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
+diff -Naur a/include/asm-um/archparam-i386.h b/include/asm-um/archparam-i386.h
+--- a/include/asm-um/archparam-i386.h	Fri Aug 15 15:07:52 2003
++++ b/include/asm-um/archparam-i386.h	Fri Aug 15 15:13:17 2003
+@@ -56,6 +56,65 @@
+ 	pr_reg[16] = PT_REGS_SS(regs);		\
+ } while(0);
+ 
++#define VSYSCALL_BASE	(__fix_to_virt(FIX_VSYSCALL))
++#define VSYSCALL_EHDR	((const struct elfhdr *) VSYSCALL_BASE)
++#define VSYSCALL_ENTRY	((unsigned long) &__kernel_vsyscall)
++extern void *__kernel_vsyscall;
++
++/*
++ * Architecture-neutral AT_ values in 0-17, leave some room
++ * for more of them, start the x86-specific ones at 32.
++ */
++#define AT_SYSINFO		32
++#define AT_SYSINFO_EHDR		33
++
++#define ARCH_DLINFO						\
++do {								\
++		NEW_AUX_ENT(AT_SYSINFO,	VSYSCALL_ENTRY);	\
++		NEW_AUX_ENT(AT_SYSINFO_EHDR, VSYSCALL_BASE);	\
++} while (0)
++
++/*
++ * These macros parameterize elf_core_dump in fs/binfmt_elf.c to write out
++ * extra segments containing the vsyscall DSO contents.  Dumping its
++ * contents makes post-mortem fully interpretable later without matching up
++ * the same kernel and hardware config to see what PC values meant.
++ * Dumping its extra ELF program headers includes all the other information
++ * a debugger needs to easily find how the vsyscall DSO was being used.
++ */
++#define ELF_CORE_EXTRA_PHDRS		(VSYSCALL_EHDR->e_phnum)
++#define ELF_CORE_WRITE_EXTRA_PHDRS					      \
++do {									      \
++	const struct elf_phdr *const vsyscall_phdrs =			      \
++		(const struct elf_phdr *) (VSYSCALL_BASE		      \
++					   + VSYSCALL_EHDR->e_phoff);	      \
++	int i;								      \
++	Elf32_Off ofs = 0;						      \
++	for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) {			      \
++		struct elf_phdr phdr = vsyscall_phdrs[i];		      \
++		if (phdr.p_type == PT_LOAD) {				      \
++			ofs = phdr.p_offset = offset;			      \
++			offset += phdr.p_filesz;			      \
++		}							      \
++		else							      \
++			phdr.p_offset += ofs;				      \
++		phdr.p_paddr = 0; /* match other core phdrs */		      \
++		DUMP_WRITE(&phdr, sizeof(phdr));			      \
++	}								      \
++} while (0)
++#define ELF_CORE_WRITE_EXTRA_DATA					      \
++do {									      \
++	const struct elf_phdr *const vsyscall_phdrs =			      \
++		(const struct elf_phdr *) (VSYSCALL_BASE		      \
++					   + VSYSCALL_EHDR->e_phoff);	      \
++	int i;								      \
++	for (i = 0; i < VSYSCALL_EHDR->e_phnum; ++i) {			      \
++		if (vsyscall_phdrs[i].p_type == PT_LOAD)		      \
++			DUMP_WRITE((void *) vsyscall_phdrs[i].p_vaddr,	      \
++				   vsyscall_phdrs[i].p_filesz);		      \
++	}								      \
++} while (0)
++
+ /********* Bits for asm-um/delay.h **********/
+ 
+ typedef unsigned long um_udelay_t;
+diff -Naur a/include/asm-um/common.lds.S b/include/asm-um/common.lds.S
+--- a/include/asm-um/common.lds.S	Fri Aug 15 15:04:49 2003
++++ b/include/asm-um/common.lds.S	Fri Aug 15 15:10:46 2003
+@@ -1,3 +1,5 @@
++#include <asm-generic/vmlinux.lds.h>
++
+   .fini      : { *(.fini)    } =0x9090
+   _etext = .;
+   PROVIDE (etext = .);
+@@ -67,6 +69,10 @@
+   }
+   __initcall_end = .;
+ 
++  __con_initcall_start = .;
++  .con_initcall.init : { *(.con_initcall.init) }
++  __con_initcall_end = .;
++
+   __uml_initcall_start = .;
+   .uml.initcall.init : { *(.uml.initcall.init) }
+   __uml_initcall_end = .;
+@@ -80,7 +86,33 @@
+   .uml.exitcall : { *(.uml.exitcall.exit) }
+   __uml_exitcall_end = .;
+ 
+-  . = ALIGN(4096);
++  . = ALIGN(4);
++  __alt_instructions = .;
++  .altinstructions : { *(.altinstructions) } 
++  __alt_instructions_end = .; 
++  .altinstr_replacement : { *(.altinstr_replacement) } 
++  /* .exit.text is discard at runtime, not link time, to deal with references
++     from .altinstructions and .eh_frame */
++  .exit.text : { *(.exit.text) }
++  .exit.data : { *(.exit.data) }
++ 
++  __preinit_array_start = .;
++  .preinit_array : { *(.preinit_array) }
++  __preinit_array_end = .;
++  __init_array_start = .;
++  .init_array : { *(.init_array) }
++  __init_array_end = .;
++  __fini_array_start = .;
++  .fini_array : { *(.fini_array) }
++  __fini_array_end = .;
++
++   . = ALIGN(4096);
+   __initramfs_start = .;
+   .init.ramfs : { *(.init.ramfs) }
+   __initramfs_end = .;
++
++  /* Sections to be discarded */
++  /DISCARD/ : {
++ 	*(.exitcall.exit)
++  }
++ 
+diff -Naur a/include/asm-um/cpufeature.h b/include/asm-um/cpufeature.h
+--- a/include/asm-um/cpufeature.h	Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/cpufeature.h	Fri Aug 15 15:10:07 2003
+@@ -0,0 +1,6 @@
++#ifndef __UM_CPUFEATURE_H
++#define __UM_CPUFEATURE_H
++
++#include "asm/arch/cpufeature.h"
++
++#endif
+diff -Naur a/include/asm-um/current.h b/include/asm-um/current.h
+--- a/include/asm-um/current.h	Fri Aug 15 15:04:11 2003
++++ b/include/asm-um/current.h	Fri Aug 15 15:10:19 2003
+@@ -16,8 +16,10 @@
+ #define CURRENT_THREAD(dummy) (((unsigned long) &dummy) & \
+ 			        (PAGE_MASK << CONFIG_KERNEL_STACK_ORDER))
+ 
+-#define current ({ int dummy; \
+-                   ((struct thread_info *) CURRENT_THREAD(dummy))->task; })
++#define current_thread \
++	({ int dummy; ((struct thread_info *) CURRENT_THREAD(dummy)); })
++
++#define current (current_thread->task)
+ 
+ #endif /* __ASSEMBLY__ */
+ 
+diff -Naur a/include/asm-um/fixmap.h b/include/asm-um/fixmap.h
+--- a/include/asm-um/fixmap.h	Fri Aug 15 15:08:40 2003
++++ b/include/asm-um/fixmap.h	Fri Aug 15 15:13:36 2003
+@@ -34,6 +34,7 @@
+ 	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
+ 	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
+ #endif
++	FIX_VSYSCALL,
+ 	__end_of_fixed_addresses
+ };
+ 
+@@ -63,6 +64,13 @@
+ #define __fix_to_virt(x)	(FIXADDR_TOP - ((x) << PAGE_SHIFT))
+ #define __virt_to_fix(x)      ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT)
+ 
++/*
++ * This is the range that is readable by user mode, and things
++ * acting like user mode such as get_user_pages.
++ */
++#define FIXADDR_USER_START	(__fix_to_virt(FIX_VSYSCALL))
++#define FIXADDR_USER_END	(FIXADDR_USER_START + PAGE_SIZE)
++
+ extern void __this_fixmap_does_not_exist(void);
+ 
+ /*
+diff -Naur a/include/asm-um/irq.h b/include/asm-um/irq.h
+--- a/include/asm-um/irq.h	Fri Aug 15 15:09:15 2003
++++ b/include/asm-um/irq.h	Fri Aug 15 15:13:51 2003
+@@ -1,15 +1,6 @@
+ #ifndef __UM_IRQ_H
+ #define __UM_IRQ_H
+ 
+-/* The i386 irq.h has a struct task_struct in a prototype without including
+- * sched.h.  This forward declaration kills the resulting warning.
+- */
+-struct task_struct;
+-
+-#include "asm/ptrace.h"
+-
+-#undef NR_IRQS
+-
+ #define TIMER_IRQ		0
+ #define UMN_IRQ			1
+ #define CONSOLE_IRQ		2
+@@ -28,8 +19,4 @@
+ #define LAST_IRQ XTERM_IRQ
+ #define NR_IRQS (LAST_IRQ + 1)
+ 
+-extern int um_request_irq(unsigned int irq, int fd, int type,
+-			  void (*handler)(int, void *, struct pt_regs *),
+-			  unsigned long irqflags,  const char * devname,
+-			  void *dev_id);
+ #endif
+diff -Naur a/include/asm-um/local.h b/include/asm-um/local.h
+--- a/include/asm-um/local.h	Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/local.h	Fri Aug 15 15:12:46 2003
+@@ -0,0 +1,6 @@
++#ifndef __UM_LOCAL_H
++#define __UM_LOCAL_H
++
++#include "asm/arch/local.h"
++
++#endif
+diff -Naur a/include/asm-um/module-generic.h b/include/asm-um/module-generic.h
+--- a/include/asm-um/module-generic.h	Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/module-generic.h	Fri Aug 15 15:12:38 2003
+@@ -0,0 +1,6 @@
++#ifndef __UM_MODULE_GENERIC_H
++#define __UM_MODULE_GENERIC_H
++
++#include "asm/arch/module.h"
++
++#endif
+diff -Naur a/include/asm-um/module-i386.h b/include/asm-um/module-i386.h
+--- a/include/asm-um/module-i386.h	Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/module-i386.h	Fri Aug 15 15:12:37 2003
+@@ -0,0 +1,13 @@
++#ifndef __UM_MODULE_I386_H
++#define __UM_MODULE_I386_H
++
++/* UML is simple */
++struct mod_arch_specific
++{
++};
++
++#define Elf_Shdr Elf32_Shdr
++#define Elf_Sym Elf32_Sym
++#define Elf_Ehdr Elf32_Ehdr
++
++#endif
+diff -Naur a/include/asm-um/page.h b/include/asm-um/page.h
+--- a/include/asm-um/page.h	Fri Aug 15 15:06:42 2003
++++ b/include/asm-um/page.h	Fri Aug 15 15:12:40 2003
+@@ -4,7 +4,6 @@
+ struct page;
+ 
+ #include "asm/arch/page.h"
+-#include "asm/bug.h"
+ 
+ #undef __pa
+ #undef __va
+diff -Naur a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
+--- a/include/asm-um/pgtable.h	Fri Aug 15 15:09:25 2003
++++ b/include/asm-um/pgtable.h	Fri Aug 15 15:14:09 2003
+@@ -79,12 +79,13 @@
+ 
+ #define _PAGE_PRESENT	0x001
+ #define _PAGE_NEWPAGE	0x002
+-#define _PAGE_PROTNONE	0x004	/* If not present */
+-#define _PAGE_RW	0x008
+-#define _PAGE_USER	0x010
+-#define _PAGE_ACCESSED	0x020
+-#define _PAGE_DIRTY	0x040
+-#define _PAGE_NEWPROT   0x080
++#define _PAGE_NEWPROT   0x004
++#define _PAGE_FILE	0x008   /* set:pagecache unset:swap */
++#define _PAGE_PROTNONE	0x010	/* If not present */
++#define _PAGE_RW	0x020
++#define _PAGE_USER	0x040
++#define _PAGE_ACCESSED	0x080
++#define _PAGE_DIRTY	0x100
+ 
+ #define REGION_MASK	0xf0000000
+ #define REGION_SHIFT	28
+@@ -203,6 +204,16 @@
+ #define pfn_pte(pfn, prot) __pte(pfn_to_phys(pfn) | pgprot_val(prot))
+ #define pfn_pmd(pfn, prot) __pmd(pfn_to_phys(pfn) | pgprot_val(prot))
+ 
++/*
++ * Bits 0 through 3 are taken
++ */
++#define PTE_FILE_MAX_BITS	28
++
++#define pte_to_pgoff(pte) ((pte).pte_low >> 4)
++
++#define pgoff_to_pte(off) \
++	((pte_t) { ((off) << 4) + _PAGE_FILE })
++
+ static inline pte_t pte_mknewprot(pte_t pte)
+ {
+  	pte_val(pte) |= _PAGE_NEWPROT;
+@@ -236,6 +247,12 @@
+  * The following only work if pte_present() is true.
+  * Undefined behaviour if not..
+  */
++static inline int pte_user(pte_t pte)
++{ 
++	return((pte_val(pte) & _PAGE_USER) && 
++	       !(pte_val(pte) & _PAGE_PROTNONE));
++}
++
+ static inline int pte_read(pte_t pte)
+ { 
+ 	return((pte_val(pte) & _PAGE_USER) && 
+@@ -253,6 +270,14 @@
+ 	       !(pte_val(pte) & _PAGE_PROTNONE));
+ }
+ 
++/*
++ * The following only works if pte_present() is not true.
++ */
++static inline int pte_file(pte_t pte)
++{ 
++	return (pte).pte_low & _PAGE_FILE; 
++}
++
+ static inline int pte_dirty(pte_t pte)	{ return pte_val(pte) & _PAGE_DIRTY; }
+ static inline int pte_young(pte_t pte)	{ return pte_val(pte) & _PAGE_ACCESSED; }
+ static inline int pte_newpage(pte_t pte) { return pte_val(pte) & _PAGE_NEWPAGE; }
+@@ -355,14 +380,26 @@
+ #define pmd_page(pmd) (phys_mem_map(pmd_val(pmd) & PAGE_MASK) + \
+ 		       ((phys_addr(pmd_val(pmd)) >> PAGE_SHIFT)))
+ 
+-/* to find an entry in a page-table-directory. */
++/*
++ * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD]
++ *
++ * this macro returns the index of the entry in the pgd page which would
++ * control the given virtual address
++ */
+ #define pgd_index(address) ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+ 
+-/* to find an entry in a page-table-directory */
++/*
++ * pgd_offset() returns a (pgd_t *)
++ * pgd_index() is used get the offset into the pgd page's array of pgd_t's;
++ */
+ #define pgd_offset(mm, address) \
+ ((mm)->pgd + ((address) >> PGDIR_SHIFT))
+ 
+-/* to find an entry in a kernel page-table-directory */
++
++/*
++ * a shortcut which implies the use of the kernel's pgd, instead
++ * of a process's
++ */
+ #define pgd_offset_k(address) pgd_offset(&init_mm, address)
+ 
+ #define pmd_index(address) \
+@@ -374,7 +411,12 @@
+ 	return (pmd_t *) dir;
+ }
+ 
+-/* Find an entry in the third-level page table.. */ 
++/*
++ * the pte page can be thought of an array like this: pte_t[PTRS_PER_PTE]
++ *
++ * this macro returns the index of the entry in the pte page which would
++ * control the given virtual address
++ */
+ #define pte_index(address) (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+ #define pte_offset_kernel(dir, address) \
+ 	((pte_t *) pmd_page_kernel(*(dir)) +  pte_index(address))
+@@ -400,11 +442,11 @@
+ #define update_mmu_cache(vma,address,pte) do ; while (0)
+ 
+ /* Encode and de-code a swap entry */
+-#define __swp_type(x)			(((x).val >> 3) & 0x7f)
+-#define __swp_offset(x)			((x).val >> 10)
++#define __swp_type(x)			(((x).val >> 4) & 0x3f)
++#define __swp_offset(x)			((x).val >> 11)
+ 
+ #define __swp_entry(type, offset) \
+-	((swp_entry_t) { ((type) << 3) | ((offset) << 10) })
++	((swp_entry_t) { ((type) << 4) | ((offset) << 11) })
+ #define __pte_to_swp_entry(pte) \
+ 	((swp_entry_t) { pte_val(pte_mkuptodate(pte)) })
+ #define __swp_entry_to_pte(x)		((pte_t) { (x).val })
+diff -Naur a/include/asm-um/processor-generic.h b/include/asm-um/processor-generic.h
+--- a/include/asm-um/processor-generic.h	Fri Aug 15 15:04:48 2003
++++ b/include/asm-um/processor-generic.h	Fri Aug 15 15:10:42 2003
+@@ -11,9 +11,7 @@
+ struct task_struct;
+ 
+ #include "linux/config.h"
+-#include "linux/signal.h"
+ #include "asm/ptrace.h"
+-#include "asm/siginfo.h"
+ #include "choose-mode.h"
+ 
+ struct mm_struct;
+@@ -101,14 +99,19 @@
+ } mm_segment_t;
+ 
+ extern struct task_struct *alloc_task_struct(void);
+-extern void free_task_struct(struct task_struct *task);
+ 
+ extern void release_thread(struct task_struct *);
+ extern int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags);
+ extern void dump_thread(struct pt_regs *regs, struct user *u);
++extern void prepare_to_copy(struct task_struct *tsk);
+ 
+ extern unsigned long thread_saved_pc(struct task_struct *t);
+ 
++static inline void mm_copy_segments(struct mm_struct *from_mm, 
++				    struct mm_struct *new_mm)
++{
++}
++
+ #define init_stack	(init_thread_union.stack)
+ 
+ /*
+diff -Naur a/include/asm-um/processor-i386.h b/include/asm-um/processor-i386.h
+--- a/include/asm-um/processor-i386.h	Fri Aug 15 15:04:00 2003
++++ b/include/asm-um/processor-i386.h	Fri Aug 15 15:10:18 2003
+@@ -6,8 +6,8 @@
+ #ifndef __UM_PROCESSOR_I386_H
+ #define __UM_PROCESSOR_I386_H
+ 
+-extern int cpu_has_xmm;
+-extern int cpu_has_cmov;
++extern int host_has_xmm;
++extern int host_has_cmov;
+ 
+ struct arch_thread {
+ 	unsigned long debugregs[8];
+diff -Naur a/include/asm-um/sections.h b/include/asm-um/sections.h
+--- a/include/asm-um/sections.h	Wed Dec 31 19:00:00 1969
++++ b/include/asm-um/sections.h	Fri Aug 15 15:12:54 2003
+@@ -0,0 +1,7 @@
++#ifndef _UM_SECTIONS_H
++#define _UM_SECTIONS_H
++
++/* nothing to see, move along */
++#include <asm-generic/sections.h>
++
++#endif
+diff -Naur a/include/asm-um/smp.h b/include/asm-um/smp.h
+--- a/include/asm-um/smp.h	Fri Aug 15 15:03:35 2003
++++ b/include/asm-um/smp.h	Fri Aug 15 15:10:04 2003
+@@ -7,9 +7,10 @@
+ 
+ #include "linux/config.h"
+ #include "linux/bitops.h"
++#include "linux/threads.h"
+ #include "asm/current.h"
+ 
+-#define smp_processor_id() (current->thread_info->cpu)
++#define smp_processor_id() (current_thread->cpu)
+ #define cpu_logical_map(n) (n)
+ #define cpu_number_map(n) (n)
+ #define PROC_CHANGE_PENALTY	15 /* Pick a number, any number */
+@@ -30,6 +31,13 @@
+ {
+ }
+ 
++extern inline int any_online_cpu(unsigned int mask)
++{
++        if (mask & cpu_online_map)
++                return __ffs(mask & cpu_online_map);
++
++        return -1;
++}
+ #endif
+ 
+ #endif
+diff -Naur a/include/asm-um/system-generic.h b/include/asm-um/system-generic.h
+--- a/include/asm-um/system-generic.h	Fri Aug 15 15:09:22 2003
++++ b/include/asm-um/system-generic.h	Fri Aug 15 15:14:01 2003
+@@ -23,8 +23,10 @@
+ extern void block_signals(void);
+ extern void unblock_signals(void);
+ 
+-#define local_save_flags(flags) do { (flags) = get_signals(); } while(0)
+-#define local_irq_restore(flags) do { set_signals(flags); } while(0)
++#define local_save_flags(flags) do { typecheck(unsigned long, flags); \
++				     (flags) = get_signals(); } while(0)
++#define local_irq_restore(flags) do { typecheck(unsigned long, flags); \
++				      set_signals(flags); } while(0)
+ 
+ #define local_irq_save(flags) do { local_save_flags(flags); \
+                                    local_irq_disable(); } while(0)
+diff -Naur a/include/asm-um/thread_info.h b/include/asm-um/thread_info.h
+--- a/include/asm-um/thread_info.h	Fri Aug 15 15:05:00 2003
++++ b/include/asm-um/thread_info.h	Fri Aug 15 15:11:11 2003
+@@ -9,6 +9,7 @@
+ #ifndef __ASSEMBLY__
+ 
+ #include <asm/processor.h>
++#include <asm/types.h>
+ 
+ struct thread_info {
+ 	struct task_struct	*task;		/* main task structure */
+@@ -43,15 +44,18 @@
+ static inline struct thread_info *current_thread_info(void)
+ {
+ 	struct thread_info *ti;
+-	__asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~16383UL));
++	unsigned long mask = PAGE_SIZE * 
++		(1 << CONFIG_KERNEL_STACK_ORDER) - 1;
++	__asm__("andl %%esp,%0; ":"=r" (ti) : "0" (~mask));
+ 	return ti;
+ }
+ 
+ /* thread information allocation */
+-#define THREAD_SIZE (4*PAGE_SIZE)
+-#define alloc_thread_info(tsk) ((struct thread_info *) \
+-	__get_free_pages(GFP_KERNEL,2))
+-#define free_thread_info(ti) free_pages((unsigned long) (ti), 2)
++#define THREAD_SIZE ((1 << CONFIG_KERNEL_STACK_ORDER) * PAGE_SIZE)
++#define alloc_thread_info(tsk) \
++	((struct thread_info *) kmalloc(THREAD_SIZE, GFP_KERNEL))
++#define free_thread_info(ti) kfree(ti)
++	
+ #define get_thread_info(ti) get_task_struct((ti)->task)
+ #define put_thread_info(ti) put_task_struct((ti)->task)
+ 
+@@ -65,11 +69,13 @@
+ #define TIF_POLLING_NRFLAG      3       /* true if poll_idle() is polling 
+ 					 * TIF_NEED_RESCHED 
+ 					 */
++#define TIF_RESTART_BLOCK 	4
+ 
+ #define _TIF_SYSCALL_TRACE	(1 << TIF_SYSCALL_TRACE)
+ #define _TIF_SIGPENDING		(1 << TIF_SIGPENDING)
+ #define _TIF_NEED_RESCHED	(1 << TIF_NEED_RESCHED)
+ #define _TIF_POLLING_NRFLAG     (1 << TIF_POLLING_NRFLAG)
++#define _TIF_RESTART_BLOCK	(1 << TIF_RESTART_BLOCK)
+ 
+ #endif
+ 
+diff -Naur a/include/asm-um/timex.h b/include/asm-um/timex.h
+--- a/include/asm-um/timex.h	Fri Aug 15 15:07:22 2003
++++ b/include/asm-um/timex.h	Fri Aug 15 15:12:48 2003
+@@ -1,8 +1,6 @@
+ #ifndef __UM_TIMEX_H
+ #define __UM_TIMEX_H
+ 
+-#include "linux/time.h"
+-
+ typedef unsigned long cycles_t;
+ 
+ #define cacheflush_time (0)
+diff -Naur a/include/linux/mm.h b/include/linux/mm.h
+--- a/include/linux/mm.h	Fri Aug 15 15:03:56 2003
++++ b/include/linux/mm.h	Fri Aug 15 15:10:14 2003
+@@ -483,6 +483,9 @@
+ 	return __set_page_dirty_buffers(page);
+ }
+ 
++extern long do_mprotect(struct mm_struct *mm, unsigned long start, 
++			size_t len, unsigned long prot);
++
+ /*
+  * On a two-level page table, this ends up being trivial. Thus the
+  * inlining and the symmetry break with pte_alloc_map() that does all
+@@ -513,9 +516,10 @@
+ 
+ extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
+ 
+-extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+-	unsigned long len, unsigned long prot,
+-	unsigned long flag, unsigned long pgoff);
++extern unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file *file, 
++				   unsigned long addr, unsigned long len,
++				   unsigned long prot, unsigned long flag,
++				   unsigned long pgoff);
+ 
+ static inline unsigned long do_mmap(struct file *file, unsigned long addr,
+ 	unsigned long len, unsigned long prot,
+@@ -525,7 +529,8 @@
+ 	if ((offset + PAGE_ALIGN(len)) < offset)
+ 		goto out;
+ 	if (!(offset & ~PAGE_MASK))
+-		ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT);
++		ret = do_mmap_pgoff(current->mm, file, addr, len, prot, flag, 
++				    offset >> PAGE_SHIFT);
+ out:
+ 	return ret;
+ }
+diff -Naur a/include/linux/proc_mm.h b/include/linux/proc_mm.h
+--- a/include/linux/proc_mm.h	Wed Dec 31 19:00:00 1969
++++ b/include/linux/proc_mm.h	Fri Aug 15 15:10:02 2003
+@@ -0,0 +1,48 @@
++/* 
++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#ifndef __PROC_MM_H
++#define __PROC_MM_H
++
++#include "linux/sched.h"
++
++#define MM_MMAP 54
++#define MM_MUNMAP 55
++#define MM_MPROTECT 56
++#define MM_COPY_SEGMENTS 57
++
++struct mm_mmap {
++	unsigned long addr;
++	unsigned long len;
++	unsigned long prot;
++	unsigned long flags;
++	unsigned long fd;
++	unsigned long offset;
++};
++
++struct mm_munmap {
++	unsigned long addr;
++	unsigned long len;	
++};
++
++struct mm_mprotect {
++	unsigned long addr;
++	unsigned long len;
++        unsigned int prot;
++};
++
++struct proc_mm_op {
++	int op;
++	union {
++		struct mm_mmap mmap;
++		struct mm_munmap munmap;
++	        struct mm_mprotect mprotect;
++		int copy_segments;
++	} u;
++};
++
++extern struct mm_struct *proc_mm_get_mm(int fd);
++
++#endif
+diff -Naur a/mm/Makefile b/mm/Makefile
+--- a/mm/Makefile	Fri Aug 15 15:07:22 2003
++++ b/mm/Makefile	Fri Aug 15 15:12:48 2003
+@@ -12,3 +12,5 @@
+ 			   slab.o swap.o truncate.o vcache.o vmscan.o $(mmu-y)
+ 
+ obj-$(CONFIG_SWAP)	+= page_io.o swap_state.o swapfile.o
++obj-$(CONFIG_PROC_MM)	+= proc_mm.o
++
+diff -Naur a/mm/memory.c b/mm/memory.c
+--- a/mm/memory.c	Fri Aug 15 15:05:37 2003
++++ b/mm/memory.c	Fri Aug 15 15:11:48 2003
+@@ -45,6 +45,7 @@
+ #include <linux/pagemap.h>
+ #include <linux/vcache.h>
+ #include <linux/rmap-locking.h>
++#include <linux/init.h>
+ 
+ #include <asm/pgalloc.h>
+ #include <asm/rmap.h>
+@@ -669,6 +670,24 @@
+ }
+ 
+ 
++static struct vm_area_struct fixmap_vma = {
++	/* Catch users - if there are any valid
++	   ones, we can make this be "&init_mm" or
++	   something.  */
++	.vm_mm = NULL,
++	.vm_page_prot = PAGE_READONLY,
++	.vm_flags = VM_READ | VM_EXEC,
++};
++
++static int init_fixmap_vma(void)
++{
++	fixmap_vma.vm_start = FIXADDR_START;
++	fixmap_vma.vm_end = FIXADDR_TOP;
++	return(0);
++}
++
++__initcall(init_fixmap_vma);
++
+ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
+ 		unsigned long start, int len, int write, int force,
+ 		struct page **pages, struct vm_area_struct **vmas)
+@@ -689,19 +708,8 @@
+ 
+ 		vma = find_extend_vma(mm, start);
+ 
+-#ifdef FIXADDR_USER_START
+-		if (!vma &&
+-		    start >= FIXADDR_USER_START && start < FIXADDR_USER_END) {
+-			static struct vm_area_struct fixmap_vma = {
+-				/* Catch users - if there are any valid
+-				   ones, we can make this be "&init_mm" or
+-				   something.  */
+-				.vm_mm = NULL,
+-				.vm_start = FIXADDR_USER_START,
+-				.vm_end = FIXADDR_USER_END,
+-				.vm_page_prot = PAGE_READONLY,
+-				.vm_flags = VM_READ | VM_EXEC,
+-			};
++#ifdef FIXADDR_START
++		if (!vma && start >= FIXADDR_START && start < FIXADDR_TOP) {
+ 			unsigned long pg = start & PAGE_MASK;
+ 			pgd_t *pgd;
+ 			pmd_t *pmd;
+diff -Naur a/mm/mmap.c b/mm/mmap.c
+--- a/mm/mmap.c	Fri Aug 15 15:07:18 2003
++++ b/mm/mmap.c	Fri Aug 15 15:12:45 2003
+@@ -457,11 +457,11 @@
+  * The caller must hold down_write(current->mm->mmap_sem).
+  */
+ 
+-unsigned long do_mmap_pgoff(struct file * file, unsigned long addr,
+-			unsigned long len, unsigned long prot,
+-			unsigned long flags, unsigned long pgoff)
++unsigned long do_mmap_pgoff(struct mm_struct *mm, struct file * file, 
++			    unsigned long addr, unsigned long len,
++			    unsigned long prot, unsigned long flags,
++			    unsigned long pgoff)
+ {
+-	struct mm_struct * mm = current->mm;
+ 	struct vm_area_struct * vma, * prev;
+ 	struct inode *inode;
+ 	unsigned int vm_flags;
+diff -Naur a/mm/mprotect.c b/mm/mprotect.c
+--- a/mm/mprotect.c	Fri Aug 15 15:05:20 2003
++++ b/mm/mprotect.c	Fri Aug 15 15:11:21 2003
+@@ -222,7 +222,8 @@
+ }
+ 
+ asmlinkage long
+-sys_mprotect(unsigned long start, size_t len, unsigned long prot)
++do_mprotect(struct mm_struct *mm, unsigned long start, size_t len, 
++	     unsigned long prot)
+ {
+ 	unsigned long nstart, end, tmp;
+ 	struct vm_area_struct * vma, * next, * prev;
+@@ -239,9 +240,9 @@
+ 	if (end == start)
+ 		return 0;
+ 
+-	down_write(&current->mm->mmap_sem);
++	down_write(&mm->mmap_sem);
+ 
+-	vma = find_vma_prev(current->mm, start, &prev);
++	vma = find_vma_prev(mm, start, &prev);
+ 	error = -ENOMEM;
+ 	if (!vma || vma->vm_start > start)
+ 		goto out;
+@@ -301,6 +302,11 @@
+ 		prev->vm_mm->map_count--;
+ 	}
+ out:
+-	up_write(&current->mm->mmap_sem);
++	up_write(&mm->mmap_sem);
+ 	return error;
+ }
++
++asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot)
++{
++        return(do_mprotect(current->mm, start, len, prot));
++}
+diff -Naur a/mm/proc_mm.c b/mm/proc_mm.c
+--- a/mm/proc_mm.c	Wed Dec 31 19:00:00 1969
++++ b/mm/proc_mm.c	Fri Aug 15 15:11:44 2003
+@@ -0,0 +1,174 @@
++/* 
++ * Copyright (C) 2002 Jeff Dike (jdike@karaya.com)
++ * Licensed under the GPL
++ */
++
++#include "linux/mm.h"
++#include "linux/init.h"
++#include "linux/proc_fs.h"
++#include "linux/proc_mm.h"
++#include "linux/file.h"
++#include "asm/uaccess.h"
++#include "asm/mmu_context.h"
++
++static struct file_operations proc_mm_fops;
++
++struct mm_struct *proc_mm_get_mm(int fd)
++{
++	struct mm_struct *ret = ERR_PTR(-EBADF);
++	struct file *file;
++
++	file = fget(fd);
++	if (!file)
++		goto out;
++
++	ret = ERR_PTR(-EINVAL);
++	if(file->f_op != &proc_mm_fops)
++		goto out_fput;
++
++	ret = file->private_data;
++ out_fput:
++	fput(file);
++ out:
++	return(ret);
++}
++
++extern long do_mmap2(struct mm_struct *mm, unsigned long addr, 
++		     unsigned long len, unsigned long prot, 
++		     unsigned long flags, unsigned long fd,
++		     unsigned long pgoff);
++
++static ssize_t write_proc_mm(struct file *file, const char *buffer,
++			     size_t count, loff_t *ppos)
++{
++	struct mm_struct *mm = file->private_data;
++	struct proc_mm_op req;
++	int n, ret;
++
++	if(count > sizeof(req))
++		return(-EINVAL);
++
++	n = copy_from_user(&req, buffer, count);
++	if(n != 0)
++		return(-EFAULT);
++
++	ret = count;
++	switch(req.op){
++	case MM_MMAP: {
++		struct mm_mmap *map = &req.u.mmap;
++
++		ret = do_mmap2(mm, map->addr, map->len, map->prot, 
++			       map->flags, map->fd, map->offset >> PAGE_SHIFT);
++		if((ret & ~PAGE_MASK) == 0)
++			ret = count;
++	
++		break;
++	}
++	case MM_MUNMAP: {
++		struct mm_munmap *unmap = &req.u.munmap;
++
++		down_write(&mm->mmap_sem);
++		ret = do_munmap(mm, unmap->addr, unmap->len);
++		up_write(&mm->mmap_sem);
++
++		if(ret == 0)
++			ret = count;
++		break;
++	}
++	case MM_MPROTECT: {
++		struct mm_mprotect *protect = &req.u.mprotect;
++
++		ret = do_mprotect(mm, protect->addr, protect->len, 
++				  protect->prot);
++		if(ret == 0)
++			ret = count;
++		break;
++	}
++
++	case MM_COPY_SEGMENTS: {
++		struct mm_struct *from = proc_mm_get_mm(req.u.copy_segments);
++
++		if(IS_ERR(from)){
++			ret = PTR_ERR(from);
++			break;
++		}
++
++		mm_copy_segments(from, mm);
++		break;
++	}
++	default:
++		ret = -EINVAL;
++		break;
++	}
++
++	return(ret);
++}
++
++static int open_proc_mm(struct inode *inode, struct file *file)
++{
++	struct mm_struct *mm = mm_alloc();
++	int ret;
++
++	ret = -ENOMEM;
++	if(mm == NULL)
++		goto out_mem;
++
++	ret = init_new_context(current, mm);
++	if(ret)
++		goto out_free;
++
++	spin_lock(&mmlist_lock);
++	list_add(&mm->mmlist, &current->mm->mmlist);
++	mmlist_nr++;
++	spin_unlock(&mmlist_lock);
++
++	file->private_data = mm;
++
++	return(0);
++
++ out_free:
++	mmput(mm);
++ out_mem:
++	return(ret);
++}
++
++static int release_proc_mm(struct inode *inode, struct file *file)
++{
++	struct mm_struct *mm = file->private_data;
++
++	mmput(mm);
++	return(0);
++}
++
++static struct file_operations proc_mm_fops = {
++	.open		= open_proc_mm,
++	.release	= release_proc_mm,
++	.write		= write_proc_mm,
++};
++
++static int make_proc_mm(void)
++{
++	struct proc_dir_entry *ent;
++
++	ent = create_proc_entry("mm", 0222, &proc_root);
++	if(ent == NULL){
++		printk("make_proc_mm : Failed to register /proc/mm\n");
++		return(0);
++	}
++	ent->proc_fops = &proc_mm_fops;
++
++	return(0);
++}
++
++__initcall(make_proc_mm);
++
++/*
++ * Overrides for Emacs so that we follow Linus's tabbing style.
++ * Emacs will notice this stuff at the end of the file and automatically
++ * adjust the settings for this buffer only.  This must remain at the end
++ * of the file.
++ * ---------------------------------------------------------------------------
++ * Local variables:
++ * c-file-style: "linux"
++ * End:
++ */
diff --git a/lustre/kernel_patches/patches/vfs-pdirops-2.4.18-chaos.patch b/lustre/kernel_patches/patches/vfs-pdirops-2.4.18-chaos.patch
new file mode 100644
index 0000000..a9cc225
--- /dev/null
+++ b/lustre/kernel_patches/patches/vfs-pdirops-2.4.18-chaos.patch
@@ -0,0 +1,265 @@
+ fs/inode.c         |    1 
+ fs/namei.c         |   66 ++++++++++++++++++++++++++++++++++++++---------------
+ include/linux/fs.h |   11 ++++----
+ 3 files changed, 54 insertions(+), 24 deletions(-)
+
+--- linux-2.4.18/fs/namei.c~vfs-pdirops-2.4.18-chaos	2003-09-01 14:58:03.000000000 +0400
++++ linux-2.4.18-alexey/fs/namei.c	2003-09-01 17:56:10.000000000 +0400
+@@ -101,6 +101,36 @@ void intent_release(struct lookup_intent
+ 
+ }
+ 
++static void *lock_dir(struct inode *dir, struct qstr *name)
++{
++	unsigned long hash;
++	
++	if (!IS_PDIROPS(dir)) {
++		down(&dir->i_sem);
++		return 0;
++	}
++
++	/* OK. fs understands parallel directory operations.
++	 * so, we try to acquire lock for hash of requested
++	 * filename in order to prevent any operations with
++	 * same name in same time -bzzz */
++
++	/* calculate name hash */
++	hash = full_name_hash(name->name, name->len);
++
++	/* lock this hash */
++	return dynlock_lock(&dir->i_dcache_lock, hash, 1, GFP_ATOMIC);
++}
++
++static void unlock_dir(struct inode *dir, void *lock)
++{
++	if (!IS_PDIROPS(dir)) {
++		up(&dir->i_sem);
++		return;
++	}
++	dynlock_unlock(&dir->i_dcache_lock, lock);
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+  * checking and hopefully speeding things up, we copy filenames to the
+  * kernel data space before using them..
+@@ -302,10 +332,10 @@ static struct dentry *real_lookup(struct
+ {
+ 	struct dentry * result;
+ 	struct inode *dir = parent->d_inode;
++	void *lock;
+ 
+ again:
+-
+-	down(&dir->i_sem);
++	lock = lock_dir(dir, name);
+ 	/*
+ 	 * First re-do the cached lookup just in case it was created
+ 	 * while we waited for the directory semaphore..
+@@ -329,7 +359,7 @@ again:
+ 			else
+ 				result = dentry;
+ 		}
+-		up(&dir->i_sem);
++		unlock_dir(dir, lock);
+ 		return result;
+ 	}
+ 
+@@ -337,7 +367,7 @@ again:
+ 	 * Uhhuh! Nasty case: the cache was re-populated while
+ 	 * we waited on the semaphore. Need to revalidate.
+ 	 */
+-	up(&dir->i_sem);
++	unlock_dir(dir, lock);
+ 	if (result->d_op && result->d_op->d_revalidate) {
+ 		if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
+ 			dput(result);
+@@ -1234,13 +1264,13 @@ struct file *filp_open(const char * path
+ 		goto exit;
+ 
+ 	dir = nd.dentry;
+-	down(&dir->d_inode->i_sem);
++	nd.lock = lock_dir(dir->d_inode, &nd.last);
+ 	dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
+ 
+ do_last:
+ 	error = PTR_ERR(dentry);
+ 	if (IS_ERR(dentry)) {
+-		up(&dir->d_inode->i_sem);
++		unlock_dir(dir->d_inode, nd.lock);
+ 		goto exit;
+ 	}
+ 
+@@ -1249,7 +1279,7 @@ do_last:
+ 	if (!dentry->d_inode) {
+ 		error = vfs_create_it(dir->d_inode, dentry,
+ 				   mode & ~current->fs->umask, &it);
+-		up(&dir->d_inode->i_sem);
++		unlock_dir(dir->d_inode, nd.lock);
+ 		dput(nd.dentry);
+ 		nd.dentry = dentry;
+ 		if (error)
+@@ -1264,7 +1294,7 @@ do_last:
+ 	/*
+ 	 * It already exists.
+ 	 */
+-	up(&dir->d_inode->i_sem);
++	unlock_dir(dir->d_inode, nd.lock);
+ 
+ 	error = -EEXIST;
+ 	if (flag & O_EXCL)
+@@ -1344,7 +1374,7 @@ do_link:
+ 		goto exit;
+ 	}
+ 	dir = nd.dentry;
+-	down(&dir->d_inode->i_sem);
++	nd.lock = lock_dir(dir->d_inode, &nd.last);
+ 	dentry = lookup_hash_it(&nd.last, nd.dentry, &it);
+ 	putname(nd.last.name);
+ 	goto do_last;
+@@ -1357,7 +1387,7 @@ static struct dentry *lookup_create(stru
+ {
+ 	struct dentry *dentry;
+ 
+-	down(&nd->dentry->d_inode->i_sem);
++	nd->lock = lock_dir(nd->dentry->d_inode, &nd->last);
+ 	dentry = ERR_PTR(-EEXIST);
+ 	if (nd->last_type != LAST_NORM)
+ 		goto fail;
+@@ -1446,7 +1476,7 @@ asmlinkage long sys_mknod(const char * f
+ 		}
+ 		dput(dentry);
+ 	}
+-	up(&nd.dentry->d_inode->i_sem);
++	unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ 	path_release(&nd);
+ out:
+@@ -1509,7 +1539,7 @@ asmlinkage long sys_mkdir(const char * p
+ 					  mode & ~current->fs->umask);
+ 			dput(dentry);
+ 		}
+-		up(&nd.dentry->d_inode->i_sem);
++		unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ 		path_release(&nd);
+ out:
+@@ -1619,14 +1649,14 @@ asmlinkage long sys_rmdir(const char * p
+  		if (error != -EOPNOTSUPP)
+  			goto exit1;
+  	}
+-	down(&nd.dentry->d_inode->i_sem);
++	nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ 	dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ 	error = PTR_ERR(dentry);
+ 	if (!IS_ERR(dentry)) {
+ 		error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ 		dput(dentry);
+ 	}
+-	up(&nd.dentry->d_inode->i_sem);
++	unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ 	path_release(&nd);
+ exit:
+@@ -1685,7 +1715,7 @@ asmlinkage long sys_unlink(const char * 
+  		if (error != -EOPNOTSUPP)
+  			goto exit1;
+  	}
+-	down(&nd.dentry->d_inode->i_sem);
++	nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ 	dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ 	error = PTR_ERR(dentry);
+ 	if (!IS_ERR(dentry)) {
+@@ -1696,7 +1726,7 @@ asmlinkage long sys_unlink(const char * 
+ 	exit2:
+ 		dput(dentry);
+ 	}
+-	up(&nd.dentry->d_inode->i_sem);
++	unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ 	path_release(&nd);
+ exit:
+@@ -1766,7 +1796,7 @@ asmlinkage long sys_symlink(const char *
+ 			error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+ 			dput(dentry);
+ 		}
+-		up(&nd.dentry->d_inode->i_sem);
++		unlock_dir(nd.dentry->d_inode, nd.lock);
+ 	out2:
+ 		path_release(&nd);
+ 	out:
+@@ -1858,7 +1888,7 @@ asmlinkage long sys_link(const char * ol
+ 			error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+ 			dput(new_dentry);
+ 		}
+-		up(&nd.dentry->d_inode->i_sem);
++		unlock_dir(nd.dentry->d_inode, nd.lock);
+ out_release:
+ 		path_release(&nd);
+ out:
+--- linux-2.4.18/include/linux/fs.h~vfs-pdirops-2.4.18-chaos	2003-09-01 14:58:03.000000000 +0400
++++ linux-2.4.18-alexey/include/linux/fs.h	2003-09-01 16:36:16.000000000 +0400
+@@ -21,6 +21,7 @@
+ #include <linux/cache.h>
+ #include <linux/stddef.h>
+ #include <linux/string.h>
++#include <linux/dynlocks.h>
+ 
+ #include <asm/atomic.h>
+ #include <asm/bitops.h>
+@@ -136,6 +137,7 @@ extern int leases_enable, dir_notify_ena
+ #define S_IMMUTABLE	16	/* Immutable file */
+ #define S_DEAD		32	/* removed, but still open directory */
+ #define S_NOQUOTA	64	/* Inode is not counted to quota */
++#define S_PDIROPS	256	/* Parallel directory operations */
+ 
+ /*
+  * Note that nosuid etc flags are inode-specific: setting some file-system
+@@ -162,6 +164,7 @@ extern int leases_enable, dir_notify_ena
+ #define IS_IMMUTABLE(inode)	((inode)->i_flags & S_IMMUTABLE)
+ #define IS_NOATIME(inode)	(__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME))
+ #define IS_NODIRATIME(inode)	__IS_FLG(inode, MS_NODIRATIME)
++#define IS_PDIROPS(inode)	__IS_FLG(inode, S_PDIROPS)
+ 
+ #define IS_DEADDIR(inode)	((inode)->i_flags & S_DEAD)
+ 
+@@ -490,6 +493,7 @@ struct inode {
+ 	atomic_t		i_writecount;
+ 	unsigned int		i_attr_flags;
+ 	__u32			i_generation;
++	struct dynlock		i_dcache_lock;	/* for parallel directory ops */
+ 	union {
+ 		struct minix_inode_info		minix_i;
+ 		struct ext2_inode_info		ext2_i;
+@@ -713,6 +717,7 @@ struct nameidata {
+ 	unsigned int flags;
+ 	int last_type;
+ 	struct lookup_intent *intent;
++	void *lock;
+ };
+ 
+ #define DQUOT_USR_ENABLED	0x01		/* User diskquotas enabled */
+@@ -1610,12 +1615,6 @@ static inline struct dentry *get_parent(
+ 	return dget(dentry->d_parent);
+ }
+ 
+-static inline void unlock_dir(struct dentry *dir)
+-{
+-	up(&dir->d_inode->i_sem);
+-	dput(dir);
+-}
+-
+ /*
+  * Whee.. Deadlock country. Happily there are only two VFS
+  * operations that does this..
+--- linux-2.4.18/fs/inode.c~vfs-pdirops-2.4.18-chaos	2003-09-01 14:58:03.000000000 +0400
++++ linux-2.4.18-alexey/fs/inode.c	2003-09-01 16:36:16.000000000 +0400
+@@ -119,6 +119,7 @@ static struct inode *alloc_inode(struct 
+ 		mapping->host = inode;
+ 		mapping->gfp_mask = GFP_HIGHUSER;
+ 		inode->i_mapping = mapping;
++		dynlock_init(&inode->i_dcache_lock);
+ 	}
+ 	return inode;
+ }
+
+_
diff --git a/lustre/kernel_patches/patches/vfs-pdirops-2.4.20-rh.patch b/lustre/kernel_patches/patches/vfs-pdirops-2.4.20-rh.patch
new file mode 100644
index 0000000..c9228a8
--- /dev/null
+++ b/lustre/kernel_patches/patches/vfs-pdirops-2.4.20-rh.patch
@@ -0,0 +1,269 @@
+ fs/inode.c         |    1 
+ fs/namei.c         |   66 ++++++++++++++++++++++++++++++++++++++---------------
+ include/linux/fs.h |   11 ++++----
+ 3 files changed, 54 insertions(+), 24 deletions(-)
+
+Index: linux-2.4.20-rh/fs/namei.c
+===================================================================
+--- linux-2.4.20-rh.orig/fs/namei.c	2003-09-04 20:58:33.000000000 +0800
++++ linux-2.4.20-rh/fs/namei.c	2003-09-04 21:21:20.000000000 +0800
+@@ -101,6 +101,36 @@
+ 
+ }
+ 
++static void *lock_dir(struct inode *dir, struct qstr *name)
++{
++	unsigned long hash;
++	
++	if (!IS_PDIROPS(dir)) {
++		down(&dir->i_sem);
++		return 0;
++	}
++
++	/* OK. fs understands parallel directory operations.
++	 * so, we try to acquire lock for hash of requested
++	 * filename in order to prevent any operations with
++	 * same name in same time -bzzz */
++
++	/* calculate name hash */
++	hash = full_name_hash(name->name, name->len);
++
++	/* lock this hash */
++	return dynlock_lock(&dir->i_dcache_lock, hash, 1, GFP_ATOMIC);
++}
++
++static void unlock_dir(struct inode *dir, void *lock)
++{
++	if (!IS_PDIROPS(dir)) {
++		up(&dir->i_sem);
++		return;
++	}
++	dynlock_unlock(&dir->i_dcache_lock, lock);
++}
++
+ /* In order to reduce some races, while at the same time doing additional
+  * checking and hopefully speeding things up, we copy filenames to the
+  * kernel data space before using them..
+@@ -302,10 +332,10 @@
+ {
+ 	struct dentry * result;
+ 	struct inode *dir = parent->d_inode;
++	void *lock;
+ 
+ again:
+-
+-	down(&dir->i_sem);
++	lock = lock_dir(dir, name);
+ 	/*
+ 	 * First re-do the cached lookup just in case it was created
+ 	 * while we waited for the directory semaphore..
+@@ -329,7 +359,7 @@
+ 			else
+ 				result = dentry;
+ 		}
+-		up(&dir->i_sem);
++		unlock_dir(dir, lock);
+ 		return result;
+ 	}
+ 
+@@ -337,7 +367,7 @@
+ 	 * Uhhuh! Nasty case: the cache was re-populated while
+ 	 * we waited on the semaphore. Need to revalidate.
+ 	 */
+-	up(&dir->i_sem);
++	unlock_dir(dir, lock);
+ 	if (result->d_op && result->d_op->d_revalidate) {
+ 		if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
+ 			dput(result);
+@@ -1180,13 +1210,13 @@
+ 		goto exit;
+ 
+ 	dir = nd->dentry;
+-	down(&dir->d_inode->i_sem);
++	nd->lock = lock_dir(dir->d_inode, &nd->last);
+ 	dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ 
+ do_last:
+ 	error = PTR_ERR(dentry);
+ 	if (IS_ERR(dentry)) {
+-		up(&dir->d_inode->i_sem);
++		unlock_dir(dir->d_inode, nd->lock);
+ 		goto exit;
+ 	}
+ 
+@@ -1195,7 +1225,7 @@
+ 	if (!dentry->d_inode) {
+ 		error = vfs_create_it(dir->d_inode, dentry,
+ 				   mode & ~current->fs->umask, it);
+-		up(&dir->d_inode->i_sem);
++		unlock_dir(dir->d_inode, nd->lock);		
+ 		dput(nd->dentry);
+ 		nd->dentry = dentry;
+ 		if (error)
+@@ -1209,7 +1239,7 @@
+ 	/*
+ 	 * It already exists.
+ 	 */
+-	up(&dir->d_inode->i_sem);
++	unlock_dir(dir->d_inode, nd->lock);
+ 
+ 	error = -EEXIST;
+ 	if (flag & O_EXCL)
+@@ -1362,7 +1392,7 @@
+ 		goto exit;
+ 	}
+ 	dir = nd->dentry;
+-	down(&dir->d_inode->i_sem);
++	nd->lock = lock_dir(dir->d_inode, &nd->last);
+ 	dentry = lookup_hash_it(&nd->last, nd->dentry, it);
+ 	putname(nd->last.name);
+ 	goto do_last;
+@@ -1380,7 +1410,7 @@
+ {
+ 	struct dentry *dentry;
+ 
+-	down(&nd->dentry->d_inode->i_sem);
++	nd->lock = lock_dir(nd->dentry->d_inode, &nd->last);
+ 	dentry = ERR_PTR(-EEXIST);
+ 	if (nd->last_type != LAST_NORM)
+ 		goto fail;
+@@ -1469,7 +1499,7 @@
+ 		}
+ 		dput(dentry);
+ 	}
+-	up(&nd.dentry->d_inode->i_sem);
++	unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ 	path_release(&nd);
+ out:
+@@ -1532,7 +1562,7 @@
+ 					  mode & ~current->fs->umask);
+ 			dput(dentry);
+ 		}
+-		up(&nd.dentry->d_inode->i_sem);
++		unlock_dir(nd.dentry->d_inode, nd.lock);
+ out2:
+ 		path_release(&nd);
+ out:
+@@ -1642,14 +1672,14 @@
+  		if (error != -EOPNOTSUPP)
+  			goto exit1;
+  	}
+-	down(&nd.dentry->d_inode->i_sem);
++	nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ 	dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ 	error = PTR_ERR(dentry);
+ 	if (!IS_ERR(dentry)) {
+ 		error = vfs_rmdir(nd.dentry->d_inode, dentry);
+ 		dput(dentry);
+ 	}
+-	up(&nd.dentry->d_inode->i_sem);
++	unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ 	path_release(&nd);
+ exit:
+@@ -1708,7 +1738,7 @@
+  		if (error != -EOPNOTSUPP)
+  			goto exit1;
+  	}
+-	down(&nd.dentry->d_inode->i_sem);
++	nd.lock = lock_dir(nd.dentry->d_inode, &nd.last);
+ 	dentry = lookup_hash_it(&nd.last, nd.dentry, NULL);
+ 	error = PTR_ERR(dentry);
+ 	if (!IS_ERR(dentry)) {
+@@ -1719,7 +1749,7 @@
+ 	exit2:
+ 		dput(dentry);
+ 	}
+-	up(&nd.dentry->d_inode->i_sem);
++	unlock_dir(nd.dentry->d_inode, nd.lock);
+ exit1:
+ 	path_release(&nd);
+ exit:
+@@ -1789,7 +1819,7 @@
+ 			error = vfs_symlink(nd.dentry->d_inode, dentry, from);
+ 			dput(dentry);
+ 		}
+-		up(&nd.dentry->d_inode->i_sem);
++		unlock_dir(nd.dentry->d_inode, nd.lock);
+ 	out2:
+ 		path_release(&nd);
+ 	out:
+@@ -1881,7 +1911,7 @@
+ 			error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
+ 			dput(new_dentry);
+ 		}
+-		up(&nd.dentry->d_inode->i_sem);
++		unlock_dir(nd.dentry->d_inode, nd.lock);
+ out_release:
+ 		path_release(&nd);
+ out:
+Index: linux-2.4.20-rh/include/linux/fs.h
+===================================================================
+--- linux-2.4.20-rh.orig/include/linux/fs.h	2003-09-04 20:59:14.000000000 +0800
++++ linux-2.4.20-rh/include/linux/fs.h	2003-09-04 21:03:46.000000000 +0800
+@@ -21,6 +21,7 @@
+ #include <linux/cache.h>
+ #include <linux/stddef.h>
+ #include <linux/string.h>
++#include <linux/dynlocks.h>
+ 
+ #include <asm/atomic.h>
+ #include <asm/bitops.h>
+@@ -136,6 +137,7 @@
+ #define S_IMMUTABLE	16	/* Immutable file */
+ #define S_DEAD		32	/* removed, but still open directory */
+ #define S_NOQUOTA	64	/* Inode is not counted to quota */
++#define S_PDIROPS	256	/* Parallel directory operations */
+ 
+ /*
+  * Note that nosuid etc flags are inode-specific: setting some file-system
+@@ -162,6 +164,7 @@
+ #define IS_IMMUTABLE(inode)	((inode)->i_flags & S_IMMUTABLE)
+ #define IS_NOATIME(inode)	(__IS_FLG(inode, MS_NOATIME) || ((inode)->i_flags & S_NOATIME))
+ #define IS_NODIRATIME(inode)	__IS_FLG(inode, MS_NODIRATIME)
++#define IS_PDIROPS(inode)	__IS_FLG(inode, S_PDIROPS)
+ 
+ #define IS_DEADDIR(inode)	((inode)->i_flags & S_DEAD)
+ 
+@@ -489,6 +492,7 @@
+ 	atomic_t		i_writecount;
+ 	unsigned int		i_attr_flags;
+ 	__u32			i_generation;
++	struct dynlock		i_dcache_lock;	/* for parallel directory ops */
+ 	union {
+ 		struct minix_inode_info		minix_i;
+ 		struct ext2_inode_info		ext2_i;
+@@ -708,6 +712,7 @@
+ 	unsigned int flags;
+ 	int last_type;
+ 	struct lookup_intent *intent;
++	void *lock;
+ };
+ 
+ /*
+@@ -1621,12 +1626,6 @@
+ 	return dget(dentry->d_parent);
+ }
+ 
+-static inline void unlock_dir(struct dentry *dir)
+-{
+-	up(&dir->d_inode->i_sem);
+-	dput(dir);
+-}
+-
+ /*
+  * Whee.. Deadlock country. Happily there are only two VFS
+  * operations that does this..
+Index: linux-2.4.20-rh/fs/inode.c
+===================================================================
+--- linux-2.4.20-rh.orig/fs/inode.c	2003-09-04 20:58:35.000000000 +0800
++++ linux-2.4.20-rh/fs/inode.c	2003-09-04 21:03:46.000000000 +0800
+@@ -121,6 +121,7 @@
+ 		mapping->host = inode;
+ 		mapping->gfp_mask = GFP_HIGHUSER;
+ 		inode->i_mapping = mapping;
++		dynlock_init(&inode->i_dcache_lock);
+ 	}
+ 	return inode;
+ }
diff --git a/lustre/kernel_patches/pc/dynamic-locks-2.4.18-chaos.pc b/lustre/kernel_patches/pc/dynamic-locks-2.4.18-chaos.pc
new file mode 100644
index 0000000..b626dcf
--- /dev/null
+++ b/lustre/kernel_patches/pc/dynamic-locks-2.4.18-chaos.pc
@@ -0,0 +1,3 @@
+include/linux/dynlocks.h
+lib/dynlocks.c
+lib/Makefile
diff --git a/lustre/kernel_patches/pc/ext-2.4-patch-5.pc b/lustre/kernel_patches/pc/ext-2.4-patch-5.pc
new file mode 100644
index 0000000..7191405
--- /dev/null
+++ b/lustre/kernel_patches/pc/ext-2.4-patch-5.pc
@@ -0,0 +1 @@
+include/linux/ext3_fs.h
diff --git a/lustre/kernel_patches/pc/ext3-2.4.18-ino_sb_macro-2.pc b/lustre/kernel_patches/pc/ext3-2.4.18-ino_sb_macro-2.pc
new file mode 100644
index 0000000..bd89204
--- /dev/null
+++ b/lustre/kernel_patches/pc/ext3-2.4.18-ino_sb_macro-2.pc
@@ -0,0 +1,20 @@
+fs/ext3/balloc.c
+fs/ext3/balloc.c.orig
+fs/ext3/dir.c
+fs/ext3/dir.c.orig
+fs/ext3/ialloc.c
+fs/ext3/ialloc.c.orig
+fs/ext3/inode.c
+fs/ext3/inode.c.orig
+fs/ext3/ioctl.c
+fs/ext3/ioctl.c.orig
+fs/ext3/namei.c
+fs/ext3/namei.c.orig
+fs/ext3/super.c
+fs/ext3/super.c.orig
+fs/ext3/symlink.c
+fs/ext3/symlink.c.orig
+include/linux/ext3_fs.h
+include/linux/ext3_fs.h.orig
+include/linux/ext3_jbd.h
+include/linux/ext3_jbd.h.orig
diff --git a/lustre/kernel_patches/pc/ext3-compat-2.4.18-chaos.pc b/lustre/kernel_patches/pc/ext3-compat-2.4.18-chaos.pc
new file mode 100644
index 0000000..9b16759
--- /dev/null
+++ b/lustre/kernel_patches/pc/ext3-compat-2.4.18-chaos.pc
@@ -0,0 +1 @@
+fs/ext3/namei.c
diff --git a/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18-2.pc b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18-2.pc
new file mode 100644
index 0000000..42243c8
--- /dev/null
+++ b/lustre/kernel_patches/pc/ext3-delete_thread-2.4.18-2.pc
@@ -0,0 +1,6 @@
+fs/ext3/file.c
+fs/ext3/file.c.orig
+fs/ext3/inode.c
+fs/ext3/super.c
+include/linux/ext3_fs.h
+include/linux/ext3_fs_sb.h
diff --git a/lustre/kernel_patches/pc/ext3-extents-2.4.18-chaos.pc b/lustre/kernel_patches/pc/ext3-extents-2.4.18-chaos.pc
new file mode 100644
index 0000000..f408025
--- /dev/null
+++ b/lustre/kernel_patches/pc/ext3-extents-2.4.18-chaos.pc
@@ -0,0 +1,8 @@
+fs/ext3/extents.c
+fs/ext3/ialloc.c
+fs/ext3/inode.c
+fs/ext3/Makefile
+fs/ext3/super.c
+include/linux/ext3_fs.h
+include/linux/ext3_fs_i.h
+include/linux/ext3_fs_sb.h
diff --git a/lustre/kernel_patches/pc/ext3-extents-oflag-2.4.18-chaos.pc b/lustre/kernel_patches/pc/ext3-extents-oflag-2.4.18-chaos.pc
new file mode 100644
index 0000000..56c1739
--- /dev/null
+++ b/lustre/kernel_patches/pc/ext3-extents-oflag-2.4.18-chaos.pc
@@ -0,0 +1,19 @@
+fs/ext3/ialloc.c
+fs/ext3/namei.c
+include/asm-alpha/fcntl.h
+include/asm-arm/fcntl.h
+include/asm-cris/fcntl.h
+include/asm-i386/fcntl.h
+include/asm-ia64/fcntl.h
+include/asm-m68k/fcntl.h
+include/asm-mips64/fcntl.h
+include/asm-mips/fcntl.h
+include/asm-parisc/fcntl.h
+include/asm-ppc/fcntl.h
+include/asm-s390/fcntl.h
+include/asm-s390x/fcntl.h
+include/asm-sh/fcntl.h
+include/asm-sparc64/fcntl.h
+include/asm-sparc/fcntl.h
+include/linux/ext3_fs.h
+fs/ext3/inode.c
diff --git a/lustre/kernel_patches/pc/ext3-map_inode_page-2.6.0.pc b/lustre/kernel_patches/pc/ext3-map_inode_page-2.6.0.pc
new file mode 100644
index 0000000..231df0e
--- /dev/null
+++ b/lustre/kernel_patches/pc/ext3-map_inode_page-2.6.0.pc
@@ -0,0 +1,2 @@
+fs/ext3/inode.c
+fs/ext3/super.c
diff --git a/lustre/kernel_patches/pc/ext3-no-write-super-chaos.pc b/lustre/kernel_patches/pc/ext3-no-write-super-chaos.pc
new file mode 100644
index 0000000..08795de
--- /dev/null
+++ b/lustre/kernel_patches/pc/ext3-no-write-super-chaos.pc
@@ -0,0 +1 @@
+fs/ext3/super.c
diff --git a/lustre/kernel_patches/pc/ext3-pdirops-2.4.18-chaos.pc b/lustre/kernel_patches/pc/ext3-pdirops-2.4.18-chaos.pc
new file mode 100644
index 0000000..2ad2584
--- /dev/null
+++ b/lustre/kernel_patches/pc/ext3-pdirops-2.4.18-chaos.pc
@@ -0,0 +1,6 @@
+fs/ext3/namei.c
+fs/ext3/super.c
+include/linux/ext3_fs.h
+include/linux/ext3_fs_i.h
+fs/ext3/inode.c
+fs/ext3/ialloc.c
diff --git a/lustre/kernel_patches/pc/iopen-2.4.18-2.pc b/lustre/kernel_patches/pc/iopen-2.4.18-2.pc
new file mode 100644
index 0000000..308490e
--- /dev/null
+++ b/lustre/kernel_patches/pc/iopen-2.4.18-2.pc
@@ -0,0 +1,8 @@
+Documentation/filesystems/ext2.txt
+fs/ext3/inode.c
+fs/ext3/iopen.c
+fs/ext3/iopen.h
+fs/ext3/Makefile
+fs/ext3/namei.c
+fs/ext3/super.c
+include/linux/ext3_fs.h
diff --git a/lustre/kernel_patches/pc/iopen-2.6.0.pc b/lustre/kernel_patches/pc/iopen-2.6.0.pc
new file mode 100644
index 0000000..308490e
--- /dev/null
+++ b/lustre/kernel_patches/pc/iopen-2.6.0.pc
@@ -0,0 +1,8 @@
+Documentation/filesystems/ext2.txt
+fs/ext3/inode.c
+fs/ext3/iopen.c
+fs/ext3/iopen.h
+fs/ext3/Makefile
+fs/ext3/namei.c
+fs/ext3/super.c
+include/linux/ext3_fs.h
diff --git a/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26-2.pc b/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26-2.pc
new file mode 100644
index 0000000..1078cb4
--- /dev/null
+++ b/lustre/kernel_patches/pc/linux-2.4.18ea-0.8.26-2.pc
@@ -0,0 +1,11 @@
+fs/ext3/ext3-exports.c
+fs/ext3/ialloc.c
+fs/ext3/inode.c
+fs/ext3/Makefile
+fs/ext3/namei.c
+fs/ext3/super.c
+fs/ext3/xattr.c
+include/linux/ext3_fs.h
+include/linux/ext3_jbd.h
+include/linux/ext3_xattr.h
+include/linux/xattr.h
diff --git a/lustre/kernel_patches/pc/removepage-2.4.20.pc b/lustre/kernel_patches/pc/removepage-2.4.20.pc
new file mode 100644
index 0000000..c659e15
--- /dev/null
+++ b/lustre/kernel_patches/pc/removepage-2.4.20.pc
@@ -0,0 +1,2 @@
+include/linux/fs.h
+mm/filemap.c
diff --git a/lustre/kernel_patches/pc/removepage-2.6.0.pc b/lustre/kernel_patches/pc/removepage-2.6.0.pc
new file mode 100644
index 0000000..c659e15
--- /dev/null
+++ b/lustre/kernel_patches/pc/removepage-2.6.0.pc
@@ -0,0 +1,2 @@
+include/linux/fs.h
+mm/filemap.c
diff --git a/lustre/kernel_patches/pc/uml-2.6.0-fix.pc b/lustre/kernel_patches/pc/uml-2.6.0-fix.pc
new file mode 100644
index 0000000..980e3ee
--- /dev/null
+++ b/lustre/kernel_patches/pc/uml-2.6.0-fix.pc
@@ -0,0 +1 @@
+include/asm-um/unistd.h
diff --git a/lustre/kernel_patches/pc/uml-patch-2.6.0-test3-1.pc b/lustre/kernel_patches/pc/uml-patch-2.6.0-test3-1.pc
new file mode 100644
index 0000000..9a32c9a
--- /dev/null
+++ b/lustre/kernel_patches/pc/uml-patch-2.6.0-test3-1.pc
@@ -0,0 +1,113 @@
+arch/um/config.release
+arch/um/defconfig
+arch/um/drivers/chan_kern.c
+arch/um/drivers/chan_user.c
+arch/um/drivers/cow.h
+arch/um/drivers/cow_kern.c
+arch/um/drivers/cow_sys.h
+arch/um/drivers/cow_user.c
+arch/um/drivers/hostaudio_kern.c
+arch/um/drivers/line.c
+arch/um/drivers/Makefile
+arch/um/drivers/mconsole_kern.c
+arch/um/drivers/mconsole_user.c
+arch/um/drivers/mmapper_kern.c
+arch/um/drivers/net_kern.c
+arch/um/drivers/port_kern.c
+arch/um/drivers/ssl.c
+arch/um/drivers/stdio_console.c
+arch/um/drivers/ubd_kern.c
+arch/um/drivers/ubd_user.c
+arch/um/drivers/xterm.c
+arch/um/drivers/xterm_kern.c
+arch/um/dyn.lds.S
+arch/um/include/irq_kern.h
+arch/um/include/kern_util.h
+arch/um/include/line.h
+arch/um/include/mconsole.h
+arch/um/include/mem.h
+arch/um/include/mem_user.h
+arch/um/include/os.h
+arch/um/include/sysdep-i386/sigcontext.h
+arch/um/include/ubd_user.h
+arch/um/include/user.h
+arch/um/include/user_util.h
+arch/um/Kconfig
+arch/um/Kconfig_block
+arch/um/Kconfig_net
+arch/um/kernel/config.c.in
+arch/um/kernel/exec_kern.c
+arch/um/kernel/init_task.c
+arch/um/kernel/irq.c
+arch/um/kernel/Makefile
+arch/um/kernel/mem.c
+arch/um/kernel/mem_user.c
+arch/um/kernel/process.c
+arch/um/kernel/process_kern.c
+arch/um/kernel/ptrace.c
+arch/um/kernel/sigio_kern.c
+arch/um/kernel/signal_kern.c
+arch/um/kernel/skas/include/mode.h
+arch/um/kernel/skas/include/uaccess.h
+arch/um/kernel/skas/Makefile
+arch/um/kernel/skas/process.c
+arch/um/kernel/skas/process_kern.c
+arch/um/kernel/skas/util/mk_ptregs.c
+arch/um/kernel/smp.c
+arch/um/kernel/syscall_kern.c
+arch/um/kernel/sys_call_table.c
+arch/um/kernel/sysrq.c
+arch/um/kernel/time.c
+arch/um/kernel/time_kern.c
+arch/um/kernel/trap_kern.c
+arch/um/kernel/trap_user.c
+arch/um/kernel/tt/include/uaccess.h
+arch/um/kernel/tt/process_kern.c
+arch/um/kernel/tt/ptproxy/proxy.c
+arch/um/kernel/tt/tracer.c
+arch/um/kernel/tt/uaccess_user.c
+arch/um/kernel/tty_log.c
+arch/um/kernel/um_arch.c
+arch/um/kernel/umid.c
+arch/um/kernel/user_util.c
+arch/um/Makefile
+arch/um/Makefile-i386
+arch/um/Makefile-skas
+arch/um/os-Linux/drivers/tuntap_user.c
+arch/um/os-Linux/file.c
+arch/um/sys-i386/bugs.c
+arch/um/sys-i386/Makefile
+arch/um/uml.lds.S
+arch/um/util/mk_constants_kern.c
+fs/hostfs/hostfs.h
+fs/hostfs/hostfs_kern.c
+fs/hostfs/hostfs_user.c
+fs/hostfs/Makefile
+fs/hppfs/hppfs_kern.c
+fs/hppfs/Makefile
+fs/Makefile
+include/asm-um/archparam-i386.h
+include/asm-um/common.lds.S
+include/asm-um/cpufeature.h
+include/asm-um/current.h
+include/asm-um/fixmap.h
+include/asm-um/irq.h
+include/asm-um/local.h
+include/asm-um/module-generic.h
+include/asm-um/module-i386.h
+include/asm-um/page.h
+include/asm-um/pgtable.h
+include/asm-um/processor-generic.h
+include/asm-um/processor-i386.h
+include/asm-um/sections.h
+include/asm-um/smp.h
+include/asm-um/system-generic.h
+include/asm-um/thread_info.h
+include/asm-um/timex.h
+include/linux/mm.h
+include/linux/proc_mm.h
+mm/Makefile
+mm/memory.c
+mm/mmap.c
+mm/mprotect.c
+mm/proc_mm.c
diff --git a/lustre/kernel_patches/pc/vfs-pdirops-2.4.18-chaos.pc b/lustre/kernel_patches/pc/vfs-pdirops-2.4.18-chaos.pc
new file mode 100644
index 0000000..f244b84
--- /dev/null
+++ b/lustre/kernel_patches/pc/vfs-pdirops-2.4.18-chaos.pc
@@ -0,0 +1,3 @@
+fs/namei.c
+include/linux/fs.h
+fs/inode.c
diff --git a/lustre/kernel_patches/series/chaos-2.4.18 b/lustre/kernel_patches/series/chaos-2.4.18
index 8a36dc0..5ecead5 100644
--- a/lustre/kernel_patches/series/chaos-2.4.18
+++ b/lustre/kernel_patches/series/chaos-2.4.18
@@ -23,6 +23,6 @@ iopen-2.4.18.patch
 jbd-dont-account-blocks-twice.patch
 jbd-commit-tricks.patch
 ext3-o_direct-1-2.4.18-chaos.patch
-ext3-no-write-super.patch
-jbd-ctx_switch.patch
-jbd-get_write_access.patch
+ext3-no-write-super-chaos.patch
+ext3-extents-2.4.18-chaos.patch
+ext3-extents-oflag-2.4.18-chaos.patch
diff --git a/lustre/kernel_patches/series/chaos-2.4.18-pdirops b/lustre/kernel_patches/series/chaos-2.4.18-pdirops
new file mode 100644
index 0000000..d4545e2
--- /dev/null
+++ b/lustre/kernel_patches/series/chaos-2.4.18-pdirops
@@ -0,0 +1,35 @@
+dev_read_only.patch
+exports.patch
+kmem_cache_validate.patch
+lustre_version.patch
+vfs_intent-2.4.18-18-chaos65.patch
+invalidate_show.patch
+iod-rmap-exports.patch
+export-truncate.patch
+ext3-compat-2.4.18-chaos.patch
+ext-2.4-patch-1.patch
+ext-2.4-patch-2.patch
+ext-2.4-patch-3.patch
+ext-2.4-patch-4.patch
+ext-2.4-patch-5.patch
+linux-2.4.18ea-0.8.26-2.patch
+ext3-2.4-ino_t.patch
+ext3-2.4.18-ino_sb_macro-2.patch
+ext3-orphan_lock.patch
+ext3-delete_thread-2.4.18-2.patch
+extN-misc-fixup.patch
+extN-noread.patch
+extN-wantedi.patch
+ext3-san-2.4.20.patch
+extN-2.4.18-ino_sb_fixup.patch
+ext3-map_inode_page_2.4.18.patch
+ext3-error-export.patch
+iopen-2.4.18-2.patch
+jbd-dont-account-blocks-twice.patch
+jbd-commit-tricks.patch
+ext3-o_direct-1-2.4.18-chaos.patch
+ext3-no-write-super-chaos.patch
+dynamic-locks-2.4.18-chaos.patch
+vfs-pdirops-2.4.18-chaos.patch
+ext3-pdirops-2.4.18-chaos.patch
+add_page_private.patch
diff --git a/lustre/kernel_patches/series/uml_2.6.0_test3 b/lustre/kernel_patches/series/uml_2.6.0_test3
new file mode 100644
index 0000000..7b89a36
--- /dev/null
+++ b/lustre/kernel_patches/series/uml_2.6.0_test3
@@ -0,0 +1,14 @@
+uml-patch-2.6.0-test3-1.patch
+lustre_build.patch
+lustre_version.patch
+vfs_intent_2.6.0-test1.patch
+vfs_nointent_2.6.0-test1.patch
+vfs_races_2.5.72_rev1.patch
+vfs_mntcwd_2.5.72_rev1.patch
+ext3-san-jdike-2.5.73.patch
+iopen-2.6.0.patch
+export-truncate-2.5.63.patch
+qla2xxx-v8.00.00b1-2.5.73.patch
+uml-2.6.0-fix.patch
+ext3-map_inode_page-2.6.0.patch
+removepage-2.6.0.patch
diff --git a/lustre/liblustre/file.c b/lustre/liblustre/file.c
index 88af047..5d4c927 100644
--- a/lustre/liblustre/file.c
+++ b/lustre/liblustre/file.c
@@ -470,6 +470,7 @@ static int llu_file_release(struct inode *inode)
         if (!fd) /* no process opened the file after an mcreate */
                 RETURN(rc = 0);
 
+#if 0
         /* we might not be able to get a valid handle on this file
          * again so we really want to flush our write cache.. */
         if (S_ISREG(inode->i_mode) && lsm) {
@@ -481,11 +482,12 @@ static int llu_file_release(struct inode *inode)
                 memcpy(&oa.o_inline, &fd->fd_ost_och, FD_OSTDATA_SIZE);
                 oa.o_valid |= OBD_MD_FLHANDLE;
 
-                rc = obd_close(&sbi->ll_osc_conn, &oa, lsm, NULL);
+                rc = obd_close(ll_s2obdexp(sbi), &oa, lsm, NULL);
                 if (rc)
                         CERROR("inode %lu object close failed: rc = "
                                "%d\n", lli->lli_st_ino, rc);
 	}
+#endif
 
         rc2 = llu_mdc_close(&sbi->ll_mdc_conn, inode);
         if (rc2 && !rc)
diff --git a/lustre/liblustre/llite_lib.h b/lustre/liblustre/llite_lib.h
index ce2e23b..977dbca 100644
--- a/lustre/liblustre/llite_lib.h
+++ b/lustre/liblustre/llite_lib.h
@@ -20,7 +20,7 @@ struct llu_sb_info
 {
         struct obd_uuid         ll_sb_uuid;
         struct lustre_handle    ll_mdc_conn;
-        struct lustre_handle    ll_osc_conn;
+        struct obd_export    	ll_osc_exp;
         obd_id                  ll_rootino;
         int                     ll_flags;
         struct list_head        ll_conn_chain;
diff --git a/lustre/liblustre/rw.c b/lustre/liblustre/rw.c
index 847b1d0..c5df187 100644
--- a/lustre/liblustre/rw.c
+++ b/lustre/liblustre/rw.c
@@ -74,7 +74,7 @@ int llu_extent_lock(struct ll_file_data *fd, struct inode *inode,
         down(&lli->lli_getattr_sem);
 
         if (!test_bit(LLI_F_DID_GETATTR, &lli->lli_flags)) {
-                rc = ll_inode_getattr(inode, lsm, fd ? &fd->fd_ost_och : NULL);
+                rc = ll_inode_getattr(inode, lsm);
                 if (rc == 0) {
                         set_bit(LLI_F_DID_GETATTR, &lli->lli_flags);
                 } else {
diff --git a/lustre/liblustre/super.c b/lustre/liblustre/super.c
index 0939352..a51be12 100644
--- a/lustre/liblustre/super.c
+++ b/lustre/liblustre/super.c
@@ -170,8 +170,7 @@ void obdo_from_inode(struct obdo *dst, struct inode *src, obd_flag valid)
         dst->o_valid |= (valid & ~OBD_MD_FLID);
 }
 
-int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
-                      char *ostdata)
+static int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm)
 {
         struct llu_sb_info *sbi = llu_i2sbi(inode);
         struct obdo oa;
@@ -187,11 +186,6 @@ int llu_inode_getattr(struct inode *inode, struct lov_stripe_md *lsm,
         oa.o_valid = OBD_MD_FLID | OBD_MD_FLTYPE | OBD_MD_FLSIZE |
                 OBD_MD_FLBLOCKS | OBD_MD_FLMTIME | OBD_MD_FLCTIME;
 
-        if (ostdata != NULL) {
-                memcpy(&oa.o_inline, ostdata, FD_OSTDATA_SIZE);
-                oa.o_valid |= OBD_MD_FLHANDLE;
-        }
-
         rc = obd_getattr(&sbi->ll_osc_conn, &oa, lsm);
         if (rc)
                 RETURN(rc);
@@ -327,7 +321,7 @@ static int llu_iop_lookup(struct pnode *pnode,
         llu_update_inode(*inop, body, lic.lic_lsm);
 
         if (llu_i2info(*inop)->lli_smd) {
-                rc = llu_inode_getattr(*inop, llu_i2info(*inop)->lli_smd, NULL);
+                rc = llu_inode_getattr(*inop, llu_i2info(*inop)->lli_smd);
                 if (rc)
                         _sysio_i_gone(*inop);
         }
diff --git a/lustre/mdc/mdc_locks.c b/lustre/mdc/mdc_locks.c
new file mode 100644
index 0000000..2e63dc7
--- /dev/null
+++ b/lustre/mdc/mdc_locks.c
@@ -0,0 +1,550 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ * Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *
+ *   This file is part of Lustre, http://www.sf.net/projects/lustre/
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_MDC
+
+#ifdef __KERNEL__
+# include <linux/module.h>
+# include <linux/pagemap.h>
+# include <linux/miscdevice.h>
+# include <linux/init.h>
+#else
+# include <liblustre.h>
+# include <linux/obd_class.h>
+#endif
+
+#include <linux/lustre_mds.h>
+#include <linux/lustre_lite.h>
+#include <linux/lustre_dlm.h>
+#include <linux/lprocfs_status.h>
+#include "mdc_internal.h"
+
+int it_disposition(struct lookup_intent *it, int flag)
+{
+        return it->d.lustre.it_disposition & flag;
+}
+EXPORT_SYMBOL(it_disposition);
+
+void it_set_disposition(struct lookup_intent *it, int flag)
+{
+        it->d.lustre.it_disposition |= flag;
+}
+EXPORT_SYMBOL(it_set_disposition);
+
+static void mdc_fid2mdc_op_data(struct mdc_op_data *data,
+                            struct ll_uctxt *ctxt,
+                            struct ll_fid *f1,
+                            struct ll_fid *f2,
+                            const char *name,
+                            int namelen,
+                            int mode)
+{
+        LASSERT(data);
+        LASSERT(ctxt);
+        LASSERT(f1);
+
+        data->ctxt = *ctxt;
+        data->fid1 = *f1;
+        if (f2)
+                data->fid2 = *f2;
+        else 
+                memset(&data->fid2, 0, sizeof(data->fid2));
+        data->name = name;
+        data->namelen = namelen;
+        data->create_mode = mode;
+}
+
+static int it_to_lock_mode(struct lookup_intent *it)
+{
+        /* CREAT needs to be tested before open (both could be set) */
+        if (it->it_op & IT_CREAT)
+                return LCK_PW;
+        else if (it->it_op & (IT_READDIR | IT_GETATTR | IT_OPEN | IT_LOOKUP))
+                return LCK_PR;
+
+        LBUG();
+        RETURN(-EINVAL);
+}
+
+int it_open_error(int phase, struct lookup_intent *it)
+{
+        if (it_disposition(it, DISP_OPEN_OPEN)) {
+                if (phase == DISP_OPEN_OPEN)
+                        return it->d.lustre.it_status;
+                else
+                        return 0;
+        }
+
+        if (it_disposition(it, DISP_OPEN_CREATE)) {
+                if (phase == DISP_OPEN_CREATE)
+                        return it->d.lustre.it_status;
+                else
+                        return 0;
+        }
+
+        if (it_disposition(it, DISP_LOOKUP_EXECD)) {
+                if (phase == DISP_LOOKUP_EXECD)
+                        return it->d.lustre.it_status;
+                else
+                        return 0;
+        }
+
+        if (it_disposition(it, DISP_IT_EXECD)) {
+                if (phase == DISP_IT_EXECD)
+                        return it->d.lustre.it_status;
+                else
+                        return 0;
+        }
+        CERROR("it disp: %X, status: %d\n", it->d.lustre.it_disposition,
+               it->d.lustre.it_status);
+        LBUG();
+        return 0;
+}
+EXPORT_SYMBOL(it_open_error);
+
+/* this must be called on a lockh that is known to have a referenced lock */
+void mdc_set_lock_data(__u64 *l, void *data)
+{
+        struct ldlm_lock *lock;
+        struct lustre_handle *lockh = (struct lustre_handle *)l;
+        ENTRY;
+
+        if (!*l) {
+                EXIT;
+                return;
+        }
+
+        lock = ldlm_handle2lock(lockh);
+
+        LASSERT(lock != NULL);
+        l_lock(&lock->l_resource->lr_namespace->ns_lock);
+#if !defined(LIBLUSTRE)
+        if (lock->l_data && lock->l_data != data) {
+                struct inode *new_inode = data;
+                struct inode *old_inode = lock->l_data;
+                unsigned long state = old_inode->i_state & I_FREEING;
+                CERROR("Found existing inode %p/%lu/%u state %lu in lock: "
+                       "setting data to %p/%lu/%u\n", old_inode,
+                       old_inode->i_ino, old_inode->i_generation, state,
+                       new_inode, new_inode->i_ino, new_inode->i_generation);
+                LASSERT(state);
+        }
+#endif
+        lock->l_data = data;
+        l_unlock(&lock->l_resource->lr_namespace->ns_lock);
+        LDLM_LOCK_PUT(lock);
+
+        EXIT;
+}
+EXPORT_SYMBOL(mdc_set_lock_data);
+
+int mdc_change_cbdata(struct obd_export *exp, struct ll_fid *fid, 
+                      ldlm_iterator_t it, void *data)
+{
+        struct ldlm_res_id res_id = { .name = {0} };
+        ENTRY;
+
+        res_id.name[0] = fid->id;
+        res_id.name[1] = fid->generation;
+
+        ldlm_change_cbdata(class_exp2obd(exp)->obd_namespace, &res_id, it, 
+                           data);
+        EXIT;
+        return 0;
+}
+
+
+
+/* We always reserve enough space in the reply packet for a stripe MD, because
+ * we don't know in advance the file type. */
+int mdc_enqueue(struct obd_export *exp,
+                int lock_type,
+                struct lookup_intent *it,
+                int lock_mode,
+                struct mdc_op_data *data,
+                struct lustre_handle *lockh,
+                char *tgt,
+                int tgtlen,
+                ldlm_completion_callback cb_completion,
+                ldlm_blocking_callback cb_blocking,
+                void *cb_data)
+{
+        struct ptlrpc_request *req;
+        struct obd_device *obddev = class_exp2obd(exp);
+        struct ldlm_res_id res_id =
+                { .name = {data->fid1.id, data->fid1.generation} };
+        int size[6] = {sizeof(struct ldlm_request), sizeof(struct ldlm_intent)};
+        int rc, flags = LDLM_FL_HAS_INTENT;
+        int repsize[4] = {sizeof(struct ldlm_reply),
+                          sizeof(struct mds_body),
+                          obddev->u.cli.cl_max_mds_easize,
+                          obddev->u.cli.cl_max_mds_cookiesize};
+        struct ldlm_reply *dlm_rep;
+        struct ldlm_intent *lit;
+        struct ldlm_request *lockreq;
+        void *eadata;
+        unsigned long irqflags;
+        int   reply_buffers = 0;
+        ENTRY;
+
+//        LDLM_DEBUG_NOLOCK("mdsintent=%s,name=%s,dir=%lu",
+//                          ldlm_it2str(it->it_op), it_name, it_inode->i_ino);
+
+        if (it->it_op & IT_OPEN) {
+                it->it_create_mode |= S_IFREG;
+                it->it_create_mode &= ~current->fs->umask;
+
+                size[2] = sizeof(struct mds_rec_create);
+                size[3] = data->namelen + 1;
+                size[4] = obddev->u.cli.cl_max_mds_easize;
+                req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 5,
+                                      size, NULL);
+                if (!req)
+                        RETURN(-ENOMEM);
+
+                spin_lock_irqsave (&req->rq_lock, irqflags);
+                req->rq_replay = 1;
+                spin_unlock_irqrestore (&req->rq_lock, irqflags);
+
+                /* pack the intent */
+                lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
+                lit->opc = (__u64)it->it_op;
+
+                /* pack the intended request */
+                mdc_open_pack(req, 2, data, it->it_create_mode, 0, 
+                              LTIME_S(CURRENT_TIME),
+                              it->it_flags, tgt, tgtlen);
+                /* get ready for the reply */
+                reply_buffers = 3;
+                req->rq_replen = lustre_msg_size(3, repsize);
+        } else if (it->it_op & IT_UNLINK) {
+                size[2] = sizeof(struct mds_rec_unlink);
+                size[3] = data->namelen + 1;
+                req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 4,
+                                      size, NULL);
+                if (!req)
+                        RETURN(-ENOMEM);
+
+                /* pack the intent */
+                lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
+                lit->opc = (__u64)it->it_op;
+
+                /* pack the intended request */
+                mdc_unlink_pack(req, 2, data);
+                /* get ready for the reply */
+                reply_buffers = 4;
+                req->rq_replen = lustre_msg_size(4, repsize);
+        } else if (it->it_op & (IT_GETATTR | IT_LOOKUP)) {
+                int valid = OBD_MD_FLNOTOBD | OBD_MD_FLEASIZE;
+                size[2] = sizeof(struct mds_body);
+                size[3] = data->namelen + 1;
+
+                req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 4,
+                                      size, NULL);
+                if (!req)
+                        RETURN(-ENOMEM);
+
+                /* pack the intent */
+                lit = lustre_msg_buf(req->rq_reqmsg, 1, sizeof (*lit));
+                lit->opc = (__u64)it->it_op;
+
+                /* pack the intended request */
+                mdc_getattr_pack(req, valid, 2, it->it_flags, data);
+                /* get ready for the reply */
+                reply_buffers = 3;
+                req->rq_replen = lustre_msg_size(3, repsize);
+        } else if (it->it_op == IT_READDIR) {
+                req = ptlrpc_prep_req(class_exp2cliimp(exp), LDLM_ENQUEUE, 1,
+                                      size, NULL);
+                if (!req)
+                        RETURN(-ENOMEM);
+
+                /* get ready for the reply */
+                reply_buffers = 1;
+                req->rq_replen = lustre_msg_size(1, repsize);
+        }  else {
+                LBUG();
+                RETURN(-EINVAL);
+        }
+
+        mdc_get_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+        rc = ldlm_cli_enqueue(exp, req, obddev->obd_namespace, NULL, res_id,
+                              lock_type, NULL, 0, lock_mode, &flags,
+                              cb_completion, cb_blocking, cb_data, lockh);
+        mdc_put_rpc_lock(obddev->u.cli.cl_rpc_lock, it);
+
+        /* Similarly, if we're going to replay this request, we don't want to
+         * actually get a lock, just perform the intent. */
+        if (req->rq_transno || req->rq_replay) {
+                lockreq = lustre_msg_buf(req->rq_reqmsg, 0, sizeof (*lockreq));
+                lockreq->lock_flags |= LDLM_FL_INTENT_ONLY;
+        }
+
+        /* This can go when we're sure that this can never happen */
+        LASSERT(rc != -ENOENT);
+        if (rc == ELDLM_LOCK_ABORTED) {
+                lock_mode = 0;
+                memset(lockh, 0, sizeof(*lockh));
+                rc = 0;
+        } else if (rc != 0) {
+                CERROR("ldlm_cli_enqueue: %d\n", rc);
+                LASSERT (rc < 0);
+                ptlrpc_req_finished(req);
+                RETURN(rc);
+        } else { /* rc = 0 */
+                struct ldlm_lock *lock = ldlm_handle2lock(lockh);
+                LASSERT(lock);
+
+                /* If the server gave us back a different lock mode, we should
+                 * fix up our variables. */
+                if (lock->l_req_mode != lock_mode) {
+                        ldlm_lock_addref(lockh, lock->l_req_mode);
+                        ldlm_lock_decref(lockh, lock_mode);
+                        lock_mode = lock->l_req_mode;
+                }
+
+                LDLM_LOCK_PUT(lock);
+        }
+
+        dlm_rep = lustre_msg_buf(req->rq_repmsg, 0, sizeof (*dlm_rep));
+        LASSERT(dlm_rep != NULL);           /* checked by ldlm_cli_enqueue() */
+        LASSERT_REPSWABBED(req, 0);         /* swabbed by ldlm_cli_enqueue() */
+
+        it->d.lustre.it_disposition = (int) dlm_rep->lock_policy_res1;
+        it->d.lustre.it_status = (int) dlm_rep->lock_policy_res2;
+        it->d.lustre.it_lock_mode = lock_mode;
+        it->d.lustre.it_data = req;
+
+        /* We know what to expect, so we do any byte flipping required here */
+        LASSERT(reply_buffers == 4 || reply_buffers == 3 || reply_buffers == 1);
+        if (reply_buffers >= 3) {
+                struct mds_body *body;
+
+                body = lustre_swab_repbuf(req, 1, sizeof (*body),
+                                           lustre_swab_mds_body);
+                if (body == NULL) {
+                        CERROR ("Can't swab mds_body\n");
+                        RETURN (-EPROTO);
+                }
+
+                if ((body->valid & OBD_MD_FLEASIZE) != 0) {
+                        void *replayea;
+                        /* The eadata is opaque; just check that it is
+                         * there.  Eventually, obd_unpackmd() will check
+                         * the contents */
+                        eadata = lustre_swab_repbuf(req, 2, body->eadatasize,
+                                                    NULL);
+                        if (eadata == NULL) {
+                                CERROR ("Missing/short eadata\n");
+                                RETURN (-EPROTO);
+                        }
+                        if (it->it_op & IT_OPEN) {
+                                replayea = lustre_msg_buf(req->rq_reqmsg, 4, 
+                                                          obddev->u.cli.cl_max_mds_easize);
+                                LASSERT(replayea);
+                                memcpy(replayea, eadata, body->eadatasize);
+                        }
+                }
+        }
+
+        RETURN(rc);
+}
+EXPORT_SYMBOL(mdc_enqueue);
+
+/* 
+ * This long block is all about fixing up the lock and request state
+ * so that it is correct as of the moment _before_ the operation was
+ * applied; that way, the VFS will think that everything is normal and
+ * call Lustre's regular VFS methods.
+ *
+ * If we're performing a creation, that means that unless the creation
+ * failed with EEXIST, we should fake up a negative dentry.
+ *
+ * For everything else, we want to lookup to succeed.
+ *
+ * One additional note: if CREATE or OPEN succeeded, we add an extra
+ * reference to the request because we need to keep it around until
+ * ll_create/ll_open gets called.
+ *
+ * The server will return to us, in it_disposition, an indication of
+ * exactly what d.lustre.it_status refers to.
+ *
+ * If DISP_OPEN_OPEN is set, then d.lustre.it_status refers to the open() call,
+ * otherwise if DISP_OPEN_CREATE is set, then it status is the
+ * creation failure mode.  In either case, one of DISP_LOOKUP_NEG or
+ * DISP_LOOKUP_POS will be set, indicating whether the child lookup
+ * was successful.
+ *
+ * Else, if DISP_LOOKUP_EXECD then d.lustre.it_status is the rc of the
+ * child lookup.
+ */
+int mdc_intent_lock(struct obd_export *exp, struct ll_uctxt *uctxt,
+                    struct ll_fid *pfid, const char *name, int len,
+                    struct ll_fid *cfid, struct lookup_intent *it, int flags,
+                    struct ptlrpc_request **reqp,
+                    ldlm_blocking_callback cb_blocking)
+{
+        struct lustre_handle lockh;
+        struct ptlrpc_request *request;
+        int rc = 0;
+        struct mds_body *mds_body;
+        struct lustre_handle old_lock;
+        struct ldlm_lock *lock;
+        ENTRY;
+        LASSERT(it);
+
+        CDEBUG(D_DLMTRACE, "name: %*s in %ld, intent: %s\n", len, name,
+               (unsigned long) pfid->id, ldlm_it2str(it->it_op));
+
+        if (cfid && (it->it_op == IT_LOOKUP || it->it_op == IT_GETATTR)) {
+                /* We could just return 1 immediately, but since we should only
+                 * be called in revalidate_it if we already have a lock, let's
+                 * verify that. */
+                struct ldlm_res_id res_id ={.name = {cfid->id, 
+                                                     cfid->generation}};
+                struct lustre_handle lockh;
+                int mode, flags = LDLM_FL_BLOCK_GRANTED;
+
+                mode = LCK_PR;
+                rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags,
+                                     &res_id, LDLM_PLAIN, NULL, 0, LCK_PR,
+                                     &lockh);
+                if (!rc) {
+                        mode = LCK_PW;
+                        rc = ldlm_lock_match(exp->exp_obd->obd_namespace, flags,
+                                             &res_id, LDLM_PLAIN, NULL, 0,
+                                             LCK_PW, &lockh);
+                }
+                if (rc) {
+                        memcpy(&it->d.lustre.it_lock_handle, &lockh, 
+                               sizeof(lockh));
+                        it->d.lustre.it_lock_mode = mode;
+                }
+                RETURN(rc);
+        }
+
+        /* This function may be called twice, we only once want to
+           execute the request associated with the intent. If it was
+           done already, we skip past this and use the results. */ 
+        if (!it_disposition(it, DISP_ENQ_COMPLETE)) {
+                struct mdc_op_data op_data;
+                mdc_fid2mdc_op_data(&op_data, uctxt, pfid, cfid, name, len, 0);
+
+                rc = mdc_enqueue(exp, LDLM_PLAIN, it, it_to_lock_mode(it),
+                                 &op_data, &lockh, NULL, 0, ldlm_completion_ast,
+                                 cb_blocking, NULL);
+                if (rc < 0)
+                        RETURN(rc);
+                memcpy(&it->d.lustre.it_lock_handle, &lockh, sizeof(lockh));
+        }
+        request = *reqp = it->d.lustre.it_data;
+        LASSERT(request != NULL);
+
+        if (!it_disposition(it, DISP_IT_EXECD)) {
+                /* The server failed before it even started executing the
+                 * intent, i.e. because it couldn't unpack the request. */
+                LASSERT(it->d.lustre.it_status != 0);
+                RETURN(it->d.lustre.it_status);
+        }
+        rc = it_open_error(DISP_IT_EXECD, it);
+        if (rc)
+                RETURN(rc);
+
+        mds_body = lustre_msg_buf(request->rq_repmsg, 1, sizeof(*mds_body));
+        LASSERT(mds_body != NULL);           /* mdc_enqueue checked */
+        LASSERT_REPSWABBED(request, 1); /* mdc_enqueue swabbed */
+
+        /* If we were revalidating a fid/name pair, mark the intent in
+         * case we fail and get called again from lookup */
+        if (cfid != NULL) {
+                it_set_disposition(it, DISP_ENQ_COMPLETE);
+                /* Also: did we find the same inode? */
+                if (memcmp(cfid, &mds_body->fid1, sizeof(*cfid))) {
+                        ptlrpc_request_addref(request);
+                        RETURN(-ESTALE);
+                }
+        }
+
+        /* If we're doing an IT_OPEN which did not result in an actual
+         * successful open, then we need to remove the bit which saves
+         * this request for unconditional replay. */
+        if (it->it_op & IT_OPEN) {
+                if (!it_disposition(it, DISP_OPEN_OPEN) ||
+                    it->d.lustre.it_status != 0) {
+                        unsigned long flags;
+
+                        spin_lock_irqsave(&request->rq_lock, flags);
+                        request->rq_replay = 0;
+                        spin_unlock_irqrestore(&request->rq_lock, flags);
+                }
+        }
+
+        rc = it_open_error(DISP_LOOKUP_EXECD, it);
+        if (rc)
+                RETURN(rc);
+
+        /* keep requests around for the multiple phases of the call
+         * this shows the DISP_XX must guarantee we make it into the call
+         */
+        if (it_disposition(it, DISP_OPEN_CREATE) &&
+            !it_open_error(DISP_OPEN_CREATE, it))
+                ptlrpc_request_addref(request);
+        if (it_disposition(it, DISP_OPEN_OPEN) &&
+            !it_open_error(DISP_OPEN_OPEN, it))
+                ptlrpc_request_addref(request);
+
+        if (it->it_op & IT_CREAT) {
+                /* XXX this belongs in ll_create_iit */
+        } else if (it->it_op == IT_OPEN) {
+                LASSERT(!it_disposition(it, DISP_OPEN_CREATE));
+        } else {
+                LASSERT(it->it_op & (IT_GETATTR | IT_LOOKUP));
+        }
+
+        /* If we already have a matching lock, then cancel the new
+         * one.  We have to set the data here instead of in
+         * mdc_enqueue, because we need to use the child's inode as
+         * the l_data to match, and that's not available until
+         * intent_finish has performed the iget().) */
+        lock = ldlm_handle2lock(&lockh);
+        if (lock) {
+                LDLM_DEBUG(lock, "matching against this");
+                LDLM_LOCK_PUT(lock);
+                memcpy(&old_lock, &lockh, sizeof(lockh));
+                if (ldlm_lock_match(NULL, LDLM_FL_BLOCK_GRANTED, NULL,
+                                    LDLM_PLAIN, NULL, 0, LCK_NL, &old_lock)) {
+                        ldlm_lock_decref_and_cancel(&lockh,
+                                                    it->d.lustre.it_lock_mode);
+                        memcpy(&lockh, &old_lock, sizeof(old_lock));
+                        memcpy(&it->d.lustre.it_lock_handle, &lockh,
+                               sizeof(lockh));
+                }
+        }
+        CDEBUG(D_DENTRY, "D_IT dentry %*s intent: %s status %d disp %x rc %d\n",
+               len, name, ldlm_it2str(it->it_op), it->d.lustre.it_status,
+               it->d.lustre.it_disposition, rc);
+
+        RETURN(rc);
+}
+EXPORT_SYMBOL(mdc_intent_lock);
diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c
new file mode 100644
index 0000000..a109ef6
--- /dev/null
+++ b/lustre/obdfilter/filter_io_24.c
@@ -0,0 +1,237 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  linux/fs/obdfilter/filter_io.c
+ *
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/pagemap.h> // XXX kill me soon
+#include <linux/version.h>
+
+#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/iobuf.h>
+
+#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
+#include "filter_internal.h"
+
+
+/* We should only change the file mtime (and not the ctime, like
+ * update_inode_times() in generic_file_write()) when we only change data. */
+void inode_update_time(struct inode *inode, int ctime_too)
+{
+        time_t now = CURRENT_TIME;
+        if (inode->i_mtime == now && (!ctime_too || inode->i_ctime == now))
+                return;
+        inode->i_mtime = now;
+        if (ctime_too)
+                inode->i_ctime = now;
+        mark_inode_dirty_sync(inode);
+}
+
+int ext3_map_inode_page(struct inode *inode, struct page *page,
+                        unsigned long *blocks, int *created, int create);
+int filter_direct_io(int rw, struct inode *inode, struct kiobuf *iobuf)
+{
+        struct page *page;
+        unsigned long *b = iobuf->blocks;
+        int rc, i, create = (rw == OBD_BRW_WRITE), blocks_per_page, *created;
+        int *cr, cleanup_phase;
+        ENTRY;
+
+        blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+        if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS)
+                RETURN(-EINVAL);
+
+        OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page);
+        if (created == NULL)
+                RETURN(-ENOMEM);
+        cleanup_phase = 1;
+
+        rc = lock_kiovec(1, &iobuf, 1);
+        if (rc < 0)
+                GOTO(cleanup, rc);
+        cleanup_phase = 2;
+
+        down(&inode->i_sem);
+        cleanup_phase = 3;
+        for (i = 0, cr = created, b = iobuf->blocks; i < iobuf->nr_pages; i++){
+                page = iobuf->maplist[i];
+
+                rc = ext3_map_inode_page(inode, page, b, cr, create);
+                if (rc)
+                        GOTO(cleanup, rc);
+
+                b += blocks_per_page;
+                cr += blocks_per_page;
+        }
+        up(&inode->i_sem);
+        cleanup_phase = 2;
+
+        rc = brw_kiovec(WRITE, 1, &iobuf, inode->i_dev, iobuf->blocks,
+                        1 << inode->i_blkbits);
+        CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n",
+               iobuf->nr_pages, rc);
+        if (rc != (1 << inode->i_blkbits) * iobuf->nr_pages * blocks_per_page)
+                CERROR("short write?  expected %d, wrote %d\n",
+                       (1 << inode->i_blkbits) * iobuf->nr_pages *
+                       blocks_per_page, rc);
+        if (rc > 0)
+                rc = 0;
+
+        EXIT;
+cleanup:
+        switch(cleanup_phase) {
+                case 3:
+                        up(&inode->i_sem);
+                case 2:
+                        unlock_kiovec(1, &iobuf);
+                case 1:
+                        OBD_FREE(created, sizeof(*created) * 
+                                          iobuf->nr_pages*blocks_per_page);
+                        break;
+                default:
+                        CERROR("corrupt cleanup_phase (%d)?\n", cleanup_phase);
+                        LBUG();
+                        break;
+        }
+        return rc;
+}
+
+int filter_commitrw_write(struct obd_export *exp, int objcount,
+                                 struct obd_ioobj *obj, int niocount,
+                                 struct niobuf_local *res,
+                                 struct obd_trans_info *oti)
+{
+        struct obd_device *obd = exp->exp_obd;
+        struct obd_run_ctxt saved;
+        struct niobuf_local *lnb;
+        struct fsfilt_objinfo fso;
+        struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, };
+        struct kiobuf *iobuf;
+        struct inode *inode = NULL;
+        int rc = 0, i, cleanup_phase = 0, err;
+        unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */
+        ENTRY;
+        LASSERT(oti != NULL);
+        LASSERT(objcount == 1);
+        LASSERT(current->journal_info == NULL);
+
+        rc = alloc_kiovec(1, &iobuf);
+        if (rc)
+                GOTO(cleanup, rc);
+        cleanup_phase = 1;
+
+#if (LINUX_VERSION_CODE == KERNEL_VERSION(2,4,18))
+        iobuf->dovary = 0; /* this prevents corruption, not present in 2.4.20 */
+#endif
+        rc = expand_kiobuf(iobuf, obj->ioo_bufcnt);
+        if (rc)
+                GOTO(cleanup, rc);
+
+        iobuf->offset = 0;
+        iobuf->length = PAGE_SIZE * obj->ioo_bufcnt;
+        iobuf->nr_pages = obj->ioo_bufcnt;
+
+        cleanup_phase = 1;
+        fso.fso_dentry = res->dentry;
+        fso.fso_bufcnt = obj->ioo_bufcnt;
+        inode = res->dentry->d_inode;
+
+        for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
+                loff_t this_size;
+                iobuf->maplist[i] = lnb->page;
+                /* We expect these pages to be in offset order, but we'll
+                 * be forgiving */
+                this_size = lnb->offset + lnb->len;
+                if (this_size > iattr.ia_size)
+                        iattr.ia_size = this_size;
+        }
+
+        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        cleanup_phase = 2; 
+
+        oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, oti);
+        if (IS_ERR(oti->oti_handle)) {
+                rc = PTR_ERR(oti->oti_handle);
+                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+                       "error starting transaction: rc = %d\n", rc);
+                oti->oti_handle = NULL;
+                GOTO(cleanup, rc);
+        }
+
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+
+        rc = filter_direct_io(OBD_BRW_WRITE, inode, iobuf);
+        if (rc == 0) {
+                down(&inode->i_sem);
+                inode_update_time(inode, 1);
+                if (iattr.ia_size > inode->i_size) {
+                        CDEBUG(D_INFO, "setting i_size to "LPU64"\n",
+                               iattr.ia_size);
+                        fsfilt_setattr(obd, res->dentry, oti->oti_handle,
+                                       &iattr, 0);
+                }
+                up(&inode->i_sem);
+        }
+
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+
+        rc = filter_finish_transno(exp, oti, rc);
+        err = fsfilt_commit(obd, inode, oti->oti_handle, obd_sync_filter);
+        if (err)
+                rc = err;
+        if (obd_sync_filter)
+                LASSERT(oti->oti_transno <= obd->obd_last_committed);
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+
+cleanup:
+        switch (cleanup_phase) {
+        case 2:
+                pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+                LASSERT(current->journal_info == NULL);
+        case 1:
+                free_kiovec(1, &iobuf);
+        case 0:
+                for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
+                        /* flip_.. gets a ref, while free_page only frees
+                         * when it decrefs to 0 */
+                        if (rc == 0)
+                                flip_into_page_cache(inode, lnb->page);
+                        __free_page(lnb->page);
+                }
+                f_dput(res->dentry);
+        }
+
+        RETURN(rc);
+}
+
+#endif
+
diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c
new file mode 100644
index 0000000..ec9957a
--- /dev/null
+++ b/lustre/obdfilter/filter_io_26.c
@@ -0,0 +1,228 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  linux/fs/obdfilter/filter_io.c
+ *
+ *  Copyright (c) 2001-2003 Cluster File Systems, Inc.
+ *   Author: Peter Braam <braam@clusterfs.com>
+ *   Author: Andreas Dilger <adilger@clusterfs.com>
+ *   Author: Phil Schwan <phil@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ */
+
+#include <linux/config.h>
+#include <linux/module.h>
+#include <linux/pagemap.h> // XXX kill me soon
+#include <linux/version.h>
+
+#define DEBUG_SUBSYSTEM S_FILTER
+
+#include <linux/obd_class.h>
+#include <linux/lustre_fsfilt.h>
+#include "filter_internal.h"
+
+int ext3_map_inode_page(struct inode *inode, struct page *page,
+                        unsigned long *blocks, int *created, int create);
+
+/* 512byte block min */
+#define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512)
+struct dio_request {
+        atomic_t numreqs;       /* number of reqs being processed */
+        struct bio *bio_list;   /* list of completed bios */
+        wait_queue_head_t wait;
+	int created[MAX_BLOCKS_PER_PAGE];
+	unsigned long blocks[MAX_BLOCKS_PER_PAGE];
+        spinlock_t lock;
+};
+
+static int dio_complete_routine(struct bio *bio, unsigned int done, int error)
+{
+        struct dio_request *dreq = bio->bi_private;
+        unsigned long flags;
+
+        spin_lock_irqsave(&dreq->lock, flags);
+        bio->bi_private = dreq->bio_list;
+        dreq->bio_list = bio;
+        spin_unlock_irqrestore(&dreq->lock, flags);
+        if (atomic_dec_and_test(&dreq->numreqs))
+                wake_up(&dreq->wait);
+
+        return 0;
+}
+
+static int can_be_merged(struct bio *bio, sector_t sector)
+{
+	int size;
+	
+	if (!bio)
+		return 0;
+	
+	size = bio->bi_size >> 9;
+	return bio->bi_sector + size == sector ? 1 : 0;
+}
+
+int filter_commitrw_write(struct obd_export *exp, int objcount,
+                                 struct obd_ioobj *obj, int niocount,
+                                 struct niobuf_local *res,
+                                 struct obd_trans_info *oti)
+{
+        struct obd_device *obd = exp->exp_obd;
+        struct obd_run_ctxt saved;
+        struct niobuf_local *lnb;
+        struct fsfilt_objinfo fso;
+        struct iattr iattr = { .ia_valid = ATTR_SIZE, .ia_size = 0, };
+        struct inode *inode = NULL;
+        int rc = 0, i, k, cleanup_phase = 0, err;
+        unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */
+	int blocks_per_page;
+        struct dio_request *dreq;
+        struct bio *bio = NULL;
+        ENTRY;
+        LASSERT(oti != NULL);
+        LASSERT(objcount == 1);
+        LASSERT(current->journal_info == NULL);
+
+        blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
+	LASSERT(blocks_per_page <= MAX_BLOCKS_PER_PAGE);
+
+        OBD_ALLOC(dreq, sizeof(*dreq));
+        if (dreq == NULL)
+                RETURN(-ENOMEM);
+        dreq->bio_list = NULL;
+        init_waitqueue_head(&dreq->wait);
+        atomic_set(&dreq->numreqs, 0);
+        spin_lock_init(&dreq->lock);
+
+        cleanup_phase = 1;
+        fso.fso_dentry = res->dentry;
+        fso.fso_bufcnt = obj->ioo_bufcnt;
+        inode = res->dentry->d_inode;
+
+        push_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+        cleanup_phase = 2; 
+
+        oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, oti);
+        if (IS_ERR(oti->oti_handle)) {
+                rc = PTR_ERR(oti->oti_handle);
+                CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR,
+                       "error starting transaction: rc = %d\n", rc);
+                oti->oti_handle = NULL;
+                GOTO(cleanup, rc);
+        }
+
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow brw_start %lus\n", (jiffies - now) / HZ);
+
+        for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
+                loff_t this_size;
+		sector_t sector;
+		int offs;
+
+		/* get block number for next page */
+                rc = ext3_map_inode_page(inode, lnb->page, dreq->blocks,
+                                                dreq->created, 1);
+                if (rc)
+                        GOTO(cleanup, rc);
+
+		for (k = 0; k < blocks_per_page; k++) {
+			sector = dreq->blocks[k] * (inode->i_sb->s_blocksize >> 9);
+			offs = k * inode->i_sb->s_blocksize;
+
+			if (!bio || !can_be_merged(bio, sector) ||
+				!bio_add_page(bio, lnb->page, lnb->len, offs)) {
+				if (bio) {
+                                        atomic_inc(&dreq->numreqs);
+					submit_bio(WRITE, bio);
+					bio = NULL;
+				}
+				/* allocate new bio */
+				bio = bio_alloc(GFP_NOIO, obj->ioo_bufcnt);
+				bio->bi_bdev = inode->i_sb->s_bdev;
+				bio->bi_sector = sector;
+				bio->bi_end_io = dio_complete_routine; 
+                                bio->bi_private = dreq;
+
+				if (!bio_add_page(bio, lnb->page, lnb->len, 0))
+					LBUG();
+			}
+		}
+
+                /* We expect these pages to be in offset order, but we'll
+                 * be forgiving */
+                this_size = lnb->offset + lnb->len;
+                if (this_size > iattr.ia_size)
+                        iattr.ia_size = this_size;
+        }
+	if (bio) {
+                atomic_inc(&dreq->numreqs);
+                submit_bio(WRITE, bio);
+        }
+
+	/* time to wait for I/O completion */
+        wait_event(dreq->wait, atomic_read(&dreq->numreqs) == 0);
+
+        /* free all bios */
+        while (dreq->bio_list) {
+                bio = dreq->bio_list;
+                dreq->bio_list = bio->bi_private;
+                bio_put(bio);
+        }
+
+        if (rc == 0) {
+                down(&inode->i_sem);
+                inode_update_time(inode, 1);
+                if (iattr.ia_size > inode->i_size) {
+                        CDEBUG(D_INFO, "setting i_size to "LPU64"\n",
+                               iattr.ia_size);
+                        fsfilt_setattr(obd, res->dentry, oti->oti_handle,
+                                       &iattr, 0);
+                }
+                up(&inode->i_sem);
+        }
+
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow direct_io %lus\n", (jiffies - now) / HZ);
+
+        rc = filter_finish_transno(exp, oti, rc);
+        err = fsfilt_commit(obd, inode, oti->oti_handle, obd_sync_filter);
+        if (err)
+                rc = err;
+        if (obd_sync_filter)
+                LASSERT(oti->oti_transno <= obd->obd_last_committed);
+        if (time_after(jiffies, now + 15 * HZ))
+                CERROR("slow commitrw commit %lus\n", (jiffies - now) / HZ);
+
+cleanup:
+        switch (cleanup_phase) {
+        case 2:
+                pop_ctxt(&saved, &obd->u.filter.fo_ctxt, NULL);
+                LASSERT(current->journal_info == NULL);
+        case 1:
+                OBD_FREE(dreq, sizeof(*dreq));
+        case 0:
+                for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) {
+                        /* flip_.. gets a ref, while free_page only frees
+                         * when it decrefs to 0 */
+                        if (rc == 0)
+                                flip_into_page_cache(inode, lnb->page);
+                        __free_page(lnb->page);
+                }
+                f_dput(res->dentry);
+        }
+
+        RETURN(rc);
+}
diff --git a/lustre/osc/osc_create.c b/lustre/osc/osc_create.c
new file mode 100644
index 0000000..3fb9d08
--- /dev/null
+++ b/lustre/osc/osc_create.c
@@ -0,0 +1,343 @@
+/* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
+ * vim:expandtab:shiftwidth=8:tabstop=8:
+ *
+ *  Copyright (C) 2001-2003 Cluster File Systems, Inc.
+ *   Author Peter Braam <braam@clusterfs.com>
+ *
+ *   This file is part of Lustre, http://www.lustre.org.
+ *
+ *   Lustre is free software; you can redistribute it and/or
+ *   modify it under the terms of version 2 of the GNU General Public
+ *   License as published by the Free Software Foundation.
+ *
+ *   Lustre is distributed in the hope that it will be useful,
+ *   but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *   GNU General Public License for more details.
+ *
+ *   You should have received a copy of the GNU General Public License
+ *   along with Lustre; if not, write to the Free Software
+ *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ *
+ *  For testing and management it is treated as an obd_device,
+ *  although * it does not export a full OBD method table (the
+ *  requests are coming * in over the wire, so object target modules
+ *  do not have a full * method table.)
+ *
+ */
+
+#ifndef EXPORT_SYMTAB
+# define EXPORT_SYMTAB
+#endif
+#define DEBUG_SUBSYSTEM S_OSC
+
+#ifdef __KERNEL__
+# include <linux/version.h>
+# include <linux/module.h>
+# include <linux/mm.h>
+# include <linux/highmem.h>
+# include <linux/lustre_dlm.h>
+# if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0))
+#  include <linux/workqueue.h>
+#  include <linux/smp_lock.h>
+# else
+#  include <linux/locks.h>
+# endif
+#else /* __KERNEL__ */
+# include <liblustre.h>
+#endif
+
+#include <linux/kp30.h>
+#include <linux/lustre_mds.h> /* for mds_objid */
+#include <linux/obd_ost.h>
+#include <linux/lustre_commit_confd.h>
+#include <linux/obd_lov.h>
+
+#ifndef  __CYGWIN__
+# include <linux/ctype.h>
+# include <linux/init.h>
+#else
+# include <ctype.h>
+#endif
+
+#include <linux/lustre_ha.h>
+#include <linux/obd_support.h> /* for OBD_FAIL_CHECK */
+#include <linux/lustre_lite.h> /* for ll_i2info */
+#include <portals/lib-types.h> /* for PTL_MD_MAX_IOV */
+#include <linux/lprocfs_status.h>
+#include "osc_internal.h"
+
+struct osc_created {
+        wait_queue_head_t osccd_waitq;       /* the daemon sleeps on this */
+        wait_queue_head_t osccd_ctl_waitq;   /* insmod rmmod sleep on this */
+        spinlock_t osccd_lock;
+        int osccd_flags;
+        struct task_struct *osccd_thread;
+        struct list_head osccd_queue_list_head;
+        struct list_head osccd_work_list_head;
+};
+
+
+#define OSCCD_STOPPING          0x1
+#define OSCCD_STOPPED           0x2
+#define OSCCD_RUNNING           0x4
+#define OSCCD_KICKED            0x8
+#define OSCCD_PRECREATED         0x10
+
+
+static struct osc_created osc_created;
+
+static int oscc_has_objects(struct osc_creator *oscc, int count)
+{
+        int rc;
+        spin_lock(&oscc->oscc_lock);
+        rc = ((__s64)(oscc->oscc_last_id - oscc->oscc_next_id) >= count);
+        spin_unlock(&oscc->oscc_lock);
+        return rc;
+}
+
+static int oscc_precreate(struct osc_creator *oscc, struct osc_created *osccd,
+                          int wait)
+{
+        int rc = 0;
+        struct l_wait_info lwi = { 0 };
+        ENTRY;
+
+        if (oscc_has_objects(oscc, oscc->oscc_kick_barrier))
+                RETURN(0);
+
+        spin_lock(&osccd->osccd_lock);
+        spin_lock(&oscc->oscc_lock);
+        if (list_empty(&oscc->oscc_list)) {
+                list_add(&oscc->oscc_list, &osccd->osccd_queue_list_head);
+                osccd->osccd_flags |= OSCCD_KICKED;
+                wake_up(&osccd->osccd_waitq);
+        }
+        spin_unlock(&oscc->oscc_lock);
+        spin_unlock(&osccd->osccd_lock);
+
+        /* an MDS using this call may time out on this. This is a
+         *  recovery style wait.
+         */
+        if (wait)
+                rc = l_wait_event(oscc->oscc_waitq, oscc_has_objects(oscc, 1),
+                                  &lwi);
+        if (rc || !wait)
+                RETURN(rc);
+
+        spin_lock(&oscc->oscc_lock);
+        rc = oscc->oscc_status;
+        spin_unlock(&oscc->oscc_lock);
+        RETURN(rc);
+}
+
+int osc_create(struct obd_export *exp, struct obdo *oa,
+               struct lov_stripe_md **ea, struct obd_trans_info *oti)
+{
+        struct lov_stripe_md *lsm;
+        struct osc_creator *oscc = &exp->u.eu_osc_data.oed_oscc;
+        struct osc_created *osccd = oscc->oscc_osccd;
+        int try_again = 1, rc = 0;
+        ENTRY;
+
+        LASSERT(oa);
+        LASSERT(ea);
+
+        lsm = *ea;
+        if (lsm == NULL) {
+                rc = obd_alloc_memmd(exp, &lsm);
+                if (rc < 0)
+                        RETURN(rc);
+        }
+
+	/* this is the special case where create removes orphans */
+	if (oa->o_valid == OBD_MD_FLFLAGS &&
+	    oa->o_flags == OBD_FL_DELORPHAN) {
+                /* delete from next_id on up */
+                oa->o_valid |= OBD_MD_FLID;
+                oa->o_id = oscc->oscc_next_id;
+                if (oa->o_id == 0)
+                        RETURN(0);
+                rc = osc_real_create(oscc->oscc_exp, oa, ea, NULL);
+
+                spin_lock(&osccd->osccd_lock);
+                spin_lock(&oscc->oscc_lock);
+                oscc->oscc_status = rc;
+                oscc->oscc_last_id = oscc->oscc_next_id - 1;
+                spin_unlock(&oscc->oscc_lock);
+                spin_unlock(&osccd->osccd_lock);
+
+		RETURN(rc);
+	}
+
+        while (try_again) {
+                spin_lock(&oscc->oscc_lock);
+                if (oscc->oscc_last_id >= oscc->oscc_next_id) {
+                        memcpy(oa, &oscc->oscc_oa, sizeof(*oa));
+                        oa->o_id = oscc->oscc_next_id;
+                        lsm->lsm_object_id = oscc->oscc_next_id;
+                        *ea = lsm;
+                        oscc->oscc_next_id++;
+                        try_again = 0;
+                }
+                spin_unlock(&oscc->oscc_lock);
+                rc = oscc_precreate(oscc, osccd, try_again);
+        }
+
+        if (rc == 0)
+                CDEBUG(D_INFO, "returning objid "LPU64"\n", lsm->lsm_object_id);
+        else if (*ea == NULL)
+                obd_free_memmd(exp, &lsm);
+        RETURN(rc);
+}
+
+void osccd_do_create(struct osc_created *osccd)
+{
+        struct list_head *tmp;
+
+ next:
+        spin_lock(&osccd->osccd_lock);
+        list_for_each (tmp, &osccd->osccd_queue_list_head) {
+                int rc;
+                struct osc_creator *oscc = list_entry(tmp, struct osc_creator,
+                                                      oscc_list);
+                list_del_init(&oscc->oscc_list);
+                list_add(&oscc->oscc_list, &osccd->osccd_work_list_head);
+                spin_lock(&oscc->oscc_lock);
+		oscc->oscc_oa.o_id = oscc->oscc_last_id + oscc->oscc_grow_count;
+		oscc->oscc_oa.o_valid |= OBD_MD_FLID;
+                spin_unlock(&oscc->oscc_lock);
+                spin_unlock(&osccd->osccd_lock);
+
+                rc = osc_real_create(oscc->oscc_exp, &oscc->oscc_oa,
+                                     &oscc->oscc_ea, NULL);
+
+                /* This is not used and leaked, so might as well free
+                 * it now.*/
+                if (rc == 0 && oscc->oscc_ea != NULL) 
+                        obd_free_memmd(oscc->oscc_exp, &oscc->oscc_ea);
+
+                spin_lock(&osccd->osccd_lock);
+                spin_lock(&oscc->oscc_lock);
+                list_del_init(&oscc->oscc_list);
+                oscc->oscc_status = rc;
+                oscc->oscc_last_id = oscc->oscc_oa.o_id;
+                spin_unlock(&oscc->oscc_lock);
+                spin_unlock(&osccd->osccd_lock);
+
+                CDEBUG(D_INFO, "preallocated through id "LPU64" (last used "
+                       LPU64")\n", oscc->oscc_last_id, oscc->oscc_next_id);
+                wake_up(&oscc->oscc_waitq);
+                goto next;
+        }
+        spin_unlock(&osccd->osccd_lock);
+}
+
+static int osccd_main(void *arg)
+{
+        struct osc_created *osccd = (struct osc_created *)arg;
+        unsigned long flags;
+        ENTRY;
+
+        lock_kernel();
+        kportal_daemonize("lustre_created");
+
+        SIGNAL_MASK_LOCK(current, flags);
+        sigfillset(&current->blocked);
+        RECALC_SIGPENDING;
+        SIGNAL_MASK_UNLOCK(current, flags);
+
+        unlock_kernel();
+
+        /* Record that the  thread is running */
+        osccd->osccd_flags =  OSCCD_RUNNING;
+        wake_up(&osccd->osccd_ctl_waitq);
+
+        /* And now, loop forever on requests */
+        while (1) {
+                struct l_wait_info lwi = { 0 };
+                l_wait_event(osccd->osccd_waitq,
+                             osccd->osccd_flags & (OSCCD_STOPPING|OSCCD_KICKED),
+                             &lwi);
+
+                spin_lock(&osccd->osccd_lock);
+                if (osccd->osccd_flags & OSCCD_STOPPING) {
+                        spin_unlock(&osccd->osccd_lock);
+                        EXIT;
+                        break;
+                }
+                osccd->osccd_flags &= ~OSCCD_KICKED;
+                spin_unlock(&osccd->osccd_lock);
+                osccd_do_create(osccd);
+        }
+
+        osccd->osccd_thread = NULL;
+        osccd->osccd_flags = OSCCD_STOPPED;
+        wake_up(&osccd->osccd_ctl_waitq);
+        CDEBUG(D_NET, "commit callback daemon exiting %d\n", current->pid);
+        RETURN(0);
+}
+
+void oscc_init(struct lustre_handle *exph)
+{
+        struct obd_export *exp = class_conn2export(exph);
+        struct osc_export_data *oed;
+
+        if (exp == NULL)
+                return;
+
+        oed = &exp->exp_osc_data;
+        memset(oed, 0, sizeof(*oed));
+        INIT_LIST_HEAD(&oed->oed_oscc.oscc_list);
+        init_waitqueue_head(&oed->oed_oscc.oscc_waitq);
+        spin_lock_init(&oed->oed_oscc.oscc_lock);
+        oed->oed_oscc.oscc_exp = exp;
+        oed->oed_oscc.oscc_osccd = &osc_created;
+        oed->oed_oscc.oscc_kick_barrier = 50;
+        oed->oed_oscc.oscc_grow_count = 100;
+        oed->oed_oscc.oscc_initial_create_count = 100;
+
+        oed->oed_oscc.oscc_next_id = 2;
+        oed->oed_oscc.oscc_last_id = 1;
+        /* XXX the export handle should give the oscc the last object */
+        /* oed->oed_oscc.oscc_last_id = exph->....; */
+}
+
+int osccd_setup(void)
+{
+        struct osc_created *osccd = &osc_created;
+        int rc;
+        struct l_wait_info lwi = { 0 };
+        ENTRY;
+
+        INIT_LIST_HEAD(&osccd->osccd_queue_list_head);
+        INIT_LIST_HEAD(&osccd->osccd_work_list_head);
+        init_waitqueue_head(&osccd->osccd_ctl_waitq);
+        init_waitqueue_head(&osccd->osccd_waitq);
+        spin_lock_init(&osccd->osccd_lock);
+        rc = kernel_thread(osccd_main, osccd,
+                           CLONE_VM | CLONE_FS | CLONE_FILES);
+        if (rc < 0) {
+                CERROR("cannot start thread\n");
+                RETURN(rc);
+        }
+        l_wait_event(osccd->osccd_ctl_waitq, osccd->osccd_flags & OSCCD_RUNNING,
+                     &lwi);
+        RETURN(0);
+}
+
+int osccd_cleanup(void)
+{
+        struct osc_created *osccd = &osc_created;
+        struct l_wait_info lwi = { 0 };
+        ENTRY;
+
+        spin_lock(&osccd->osccd_lock);
+        osccd->osccd_flags = OSCCD_STOPPING;
+        spin_unlock(&osccd->osccd_lock);
+
+        wake_up(&osccd->osccd_waitq);
+        l_wait_event(osccd->osccd_ctl_waitq,
+                     osccd->osccd_flags & OSCCD_STOPPED, &lwi);
+        RETURN(0);
+}
diff --git a/lustre/ptlrpc/ptlrpc_internal.h b/lustre/ptlrpc/ptlrpc_internal.h
index a84a29c..db70ea7 100644
--- a/lustre/ptlrpc/ptlrpc_internal.h
+++ b/lustre/ptlrpc/ptlrpc_internal.h
@@ -96,7 +96,6 @@ enum {
 };
 
 int ptlrpc_expire_one_request(struct ptlrpc_request *req);
-int ptlrpc_check_set(struct ptlrpc_request_set *set);
 
 void ptlrpc_pinger_sending_on_import(struct obd_import *imp);
 #endif /* PTLRPC_INTERNAL_H */
diff --git a/lustre/ptlrpc/ptlrpc_module.c b/lustre/ptlrpc/ptlrpc_module.c
index d33670f..094de0b 100644
--- a/lustre/ptlrpc/ptlrpc_module.c
+++ b/lustre/ptlrpc/ptlrpc_module.c
@@ -120,9 +120,7 @@ static void __exit ptlrpc_exit(void)
 {
         ptlrpc_exit_portals();
         ptlrpc_cleanup_connection();
-#ifdef ENABLE_ORPHANS
         llog_cleanup_commit_master(0);
-#endif
 }
 
 /* connection.c */
@@ -170,8 +168,13 @@ EXPORT_SYMBOL(ptlrpc_next_xid);
 
 EXPORT_SYMBOL(ptlrpc_prep_set);
 EXPORT_SYMBOL(ptlrpc_set_add_req);
+EXPORT_SYMBOL(ptlrpc_set_add_new_req);
 EXPORT_SYMBOL(ptlrpc_set_destroy);
+EXPORT_SYMBOL(ptlrpc_set_next_timeout);
+EXPORT_SYMBOL(ptlrpc_check_set);
 EXPORT_SYMBOL(ptlrpc_set_wait);
+EXPORT_SYMBOL(ptlrpc_expired_set);
+EXPORT_SYMBOL(ptlrpc_interrupted_set);
 
 /* service.c */
 EXPORT_SYMBOL(ptlrpc_init_svc);
@@ -192,6 +195,7 @@ EXPORT_SYMBOL(lustre_swab_obd_statfs);
 EXPORT_SYMBOL(lustre_swab_obd_ioobj);
 EXPORT_SYMBOL(lustre_swab_niobuf_remote);
 EXPORT_SYMBOL(lustre_swab_ost_body);
+EXPORT_SYMBOL(lustre_swab_ost_last_id);
 EXPORT_SYMBOL(lustre_swab_ll_fid);
 EXPORT_SYMBOL(lustre_swab_mds_status_req);
 EXPORT_SYMBOL(lustre_swab_mds_fileh_body);
diff --git a/lustre/tests/replay-ost-single.sh b/lustre/tests/replay-ost-single.sh
new file mode 100755
index 0000000..eabee0a
--- /dev/null
+++ b/lustre/tests/replay-ost-single.sh
@@ -0,0 +1,90 @@
+#!/bin/sh
+
+set -e
+
+# Skip these tests
+# 3 - bug 1852
+ALWAYS_EXCEPT="3"
+
+LUSTRE=${LUSTRE:-`dirname $0`/..}
+LTESTDIR=${LTESTDIR:-$LUSTRE/../ltest}
+PATH=$LUSTRE/utils:$LUSTRE/tests:$PATH
+
+RLUSTRE=${RLUSTRE:-$LUSTRE}
+RPWD=${RPWD:-$PWD}
+
+XMLCONFIG="`basename $0 .sh`.xml"
+
+. $LUSTRE/tests/test-framework.sh
+
+CHECKSTAT="${CHECKSTAT:-checkstat} -v"
+
+# XXX I wish all this stuff was in some default-config.sh somewhere
+MOUNT=${MOUNT:-/mnt/lustre}
+DIR=${DIR:-$MOUNT}
+MDSDEV=${MDSDEV:-/tmp/mds-`hostname`}
+MDSSIZE=${MDSSIZE:-100000}
+OSTDEV=${OSTDEV:-/tmp/ost-`hostname`}
+OSTSIZE=${OSTSIZE:-100000}
+UPCALL=${UPCALL:-$PWD/replay-single-upcall.sh}
+FSTYPE=${FSTYPE:-ext3}
+TIMEOUT=${TIMEOUT:-5}
+
+STRIPE_BYTES=65536
+STRIPES_PER_OBJ=1
+
+
+gen_config() {
+    rm -f $XMLCONFIG
+    add_facet mds
+    add_facet ost
+    add_facet client --lustre_upcall $UPCALL
+    do_lmc --add mds --node mds_facet --mds mds1 --dev $MDSDEV --size $MDSSIZE
+    do_lmc --add lov --mds mds1 --lov lov1 --stripe_sz $STRIPE_BYTES --stripe_cnt $STRIPES_PER_OBJ --stripe_pattern 0
+    do_lmc --add ost --lov lov1 --failover --node ost_facet --ost ost1 --dev $OSTDEV --size $OSTSIZE
+    do_lmc --add mtpt --node client_facet --path $MOUNT --mds mds1 --ost lov1
+}
+
+
+build_test_filter
+
+gen_config
+start mds --reformat $MDSLCONFARGS
+start ost --reformat $OSTLCONFARGS
+start client --gdb $CLIENTLCONFARGS
+
+mkdir -p $DIR
+
+test_0() {
+    replay_barrier ost
+    fail ost
+}
+run_test 0 "empty replay"
+
+test_1() {
+    replay_barrier ost
+    touch $DIR/$tfile
+    fail ost
+    $CHECKSTAT -t file $DIR/$tfile || return 1
+}
+run_test 1 "touch"
+
+test_2() {
+    replay_barrier ost
+    for i in `seq 10`; do
+        echo "tag-$i" > $DIR/$tfile-$i
+    done 
+    fail ost
+    for i in `seq 10`; do
+      grep -q "tag-$i" $DIR/$tfile-$i || error "f1c-$i"
+    done 
+}
+run_test 2 "|x| 10 open(O_CREAT)s"
+
+exit 0
+
+equals_msg test complete, cleaning up
+stop client ${FORCE:=--force} $CLIENTLCONFARGS
+stop ost ${FORCE}
+stop mds ${FORCE} $MDSLCONFARGS --dump cleanup.log
+
diff --git a/lustre/tests/test-framework.sh b/lustre/tests/test-framework.sh
new file mode 100644
index 0000000..3ba7402
--- /dev/null
+++ b/lustre/tests/test-framework.sh
@@ -0,0 +1,126 @@
+#!/bin/sh
+
+set -e
+
+init_test_env() {
+    export TESTSUITE=`basename $0 .sh`
+    export XMLCONFIG="${TESTSUITE}.xml"
+    export LTESTDIR=${LTESTDIR:-$LUSTRE/../ltest}
+    export PATH=$LUSTRE/utils:$LUSTRE/tests:$PATH
+
+    export RLUSTRE=${RLUSTRE:-$LUSTRE}
+    export RPWD=${RPWD:-$PWD}
+    export CHECKSTAT="${CHECKSTAT:-checkstat} -v"
+}
+
+start() {
+    facet=$1
+    shift
+    lconf --node ${facet}_facet $@ $XMLCONFIG
+}
+
+stop() {
+    facet=$1
+    shift
+    lconf --node ${facet}_facet $@ --cleanup $XMLCONFIG
+}
+
+replay_barrier() {
+    local dev=$1
+    sync
+    df $MOUNT
+    lctl --device %${dev}1 readonly
+    lctl --device %${dev}1 notransno
+    lctl mark "REPLAY BARRIER"
+}
+
+fail() {
+    local facet=$1
+    stop $facet --force --failover --nomod
+    start $facet --nomod
+    df $MOUNT
+}
+
+do_lmc() {
+    lmc -m ${XMLCONFIG} $@
+}
+
+add_facet() {
+    local facet=$1
+    shift
+    do_lmc --add node --node ${facet}_facet $@ --timeout $TIMEOUT
+    do_lmc --add net --node ${facet}_facet --nid localhost --nettype tcp
+}
+
+error() {
+    echo "${TESTSUITE}: **** FAIL:" $@
+    exit 1
+}
+
+build_test_filter() {
+        for O in $ONLY; do
+            eval ONLY_${O}=true
+        done
+        for E in $EXCEPT $ALWAYS_EXCEPT; do
+            eval EXCEPT_${E}=true
+        done
+}
+
+_basetest() {
+    echo $*
+}
+
+basetest() {
+    IFS=abcdefghijklmnopqrstuvwxyz _basetest $1
+}
+
+run_test() {
+        export base=`basetest $1`
+        if [ ! -z "$ONLY" ]; then
+                 testname=ONLY_$1
+                 if [ ${!testname}x != x ]; then
+                     run_one $1 "$2"
+                     return $?
+                 fi
+                 testname=ONLY_$base
+                 if [ ${!testname}x != x ]; then
+                     run_one $1 "$2"
+                     return $?
+                 fi
+                 echo -n "."
+                 return 0
+        fi
+        testname=EXCEPT_$1
+        if [ ${!testname}x != x ]; then
+                 echo "skipping excluded test $1"
+                 return 0
+        fi
+        testname=EXCEPT_$base
+        if [ ${!testname}x != x ]; then
+                 echo "skipping excluded test $1 (base $base)"
+                 return 0
+        fi
+        run_one $1 "$2"
+
+        return $?
+}
+
+EQUALS="======================================================================"
+equals_msg() {
+   msg="$@"
+
+   local suffixlen=$((65 - ${#msg}))
+   printf '===== %s %.*s\n' "$msg" $suffixlen $EQUALS
+}
+
+run_one() {
+    testnum=$1
+    message=$2
+    tfile=f$base
+    tdir=d$base
+
+    # Pretty tests run faster.
+    equals_msg $testnum: $message
+
+    test_${testnum} || error "test_$testnum failed with $?"
+}
-- 
1.8.3.1