--- /dev/null
+diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
+--- origin/fs/ext3/super.c 2003-05-04 17:23:52.000000000 +0400
++++ linux/fs/ext3/super.c 2003-05-04 17:09:20.000000000 +0400
+@@ -398,6 +398,210 @@ static void dump_orphan_list(struct supe
+ }
+ }
+
++#ifdef EXT3_DELETE_THREAD
++/*
++ * Delete inodes in a loop until there are no more to be deleted.
++ * Normally, we run in the background doing the deletes and sleeping again,
++ * and clients just add new inodes to be deleted onto the end of the list.
++ * If someone is concerned about free space (e.g. block allocation or similar)
++ * then they can sleep on s_delete_waiter_queue and be woken up when space
++ * has been freed.
++ */
++int ext3_delete_thread(void *data)
++{
++ struct super_block *sb = data;
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ struct task_struct *tsk = current;
++
++ /* Almost like daemonize, but not quite */
++ exit_mm(current);
++ tsk->session = 1;
++ tsk->pgrp = 1;
++ tsk->tty = NULL;
++ exit_files(current);
++ reparent_to_init();
++
++ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
++ sigfillset(&tsk->blocked);
++
++ /*tsk->flags |= PF_KERNTHREAD;*/
++
++ INIT_LIST_HEAD(&sbi->s_delete_list);
++ wake_up(&sbi->s_delete_waiter_queue);
++ ext3_debug("EXT3-fs: delete thread on %s started\n",
++ kdevname(sb->s_dev));
++
++ /* main loop */
++ for (;;) {
++ sleep_on(&sbi->s_delete_thread_queue);
++ ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
++ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
++
++ spin_lock(&sbi->s_delete_lock);
++ if (list_empty(&sbi->s_delete_list)) {
++ memset(&sbi->s_delete_list, 0,
++ sizeof(sbi->s_delete_list));
++ spin_unlock(&sbi->s_delete_lock);
++ ext3_debug("ext3 delete thread on %s exiting\n",
++ kdevname(sb->s_dev));
++ wake_up(&sbi->s_delete_waiter_queue);
++ break;
++ }
++
++ while (!list_empty(&sbi->s_delete_list)) {
++ struct inode *inode=list_entry(sbi->s_delete_list.next,
++ struct inode, i_dentry);
++ unsigned long blocks = inode->i_blocks >>
++ (inode->i_blkbits - 9);
++
++ list_del_init(&inode->i_dentry);
++ spin_unlock(&sbi->s_delete_lock);
++ ext3_debug("%s delete ino %lu blk %lu\n",
++ tsk->comm, inode->i_ino, blocks);
++
++ iput(inode);
++
++ spin_lock(&sbi->s_delete_lock);
++ sbi->s_delete_blocks -= blocks;
++ sbi->s_delete_inodes--;
++ }
++ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0)
++ ext3_warning(sb, __FUNCTION__,
++ "%lu blocks, %lu inodes on list?\n", sb,
++ sbi->s_delete_blocks,sbi->s_delete_inodes);
++ sbi->s_delete_blocks = 0;
++ sbi->s_delete_inodes = 0;
++ spin_unlock(&sbi->s_delete_lock);
++ wake_up(&sbi->s_delete_waiter_queue);
++ }
++
++ return 0;
++}
++
++static void ext3_start_delete_thread(struct super_block *sb)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(sb);
++ int rc;
++
++ spin_lock_init(&sbi->s_delete_lock);
++ memset(&sbi->s_delete_list, 0, sizeof(sbi->s_delete_list));
++ init_waitqueue_head(&sbi->s_delete_thread_queue);
++ init_waitqueue_head(&sbi->s_delete_waiter_queue);
++ sbi->s_delete_blocks = 0;
++ sbi->s_delete_inodes = 0;
++
++ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
++ if (rc < 0)
++ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
++ rc);
++ else
++ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
++}
++
++static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
++{
++ wake_up(&sbi->s_delete_thread_queue);
++ wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
++}
++
++/* Instead of playing games with the inode flags, destruction, etc we just
++ * create a new inode locally and put it on a list for the truncate thread.
++ * We need large parts of the inode struct in order to complete the
++ * truncate and unlink, so we may as well just have a real inode to do it.
++ *
++ * If we have any problem deferring the delete, just delete it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * truncate thread when we run out of space.
++ *
++ * In 2.5 this can be done much more cleanly by just registering a "drop"
++ * method in the super_operations struct.
++ */
++static void ext3_delete_inode_thread(struct inode *old_inode)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++ struct inode *new_inode;
++ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++ if (is_bad_inode(old_inode)) {
++ clear_inode(old_inode);
++ return;
++ }
++
++ if (!test_opt (old_inode->i_sb, ASYNCDEL)) {
++ ext3_delete_inode(old_inode);
++ return;
++ }
++
++ /* We may want to delete the inode immediately and not defer it */
++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++ !sbi->s_delete_list.next) {
++ ext3_delete_inode(old_inode);
++ return;
++ }
++
++ if (EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) {
++ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++ old_inode->i_ino, blocks);
++ ext3_delete_inode(old_inode);
++ return;
++ }
++
++ /* We can iget this inode again here, because our caller has unhashed
++ * old_inode, so new_inode will be in a different inode struct.
++ *
++ * We need to ensure that the i_orphan pointers in the other inodes
++ * point at the new inode copy instead of the old one so the orphan
++ * list doesn't get corrupted when the old orphan inode is freed.
++ */
++ down(&sbi->s_orphan_lock);
++
++ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++ new_inode = iget(old_inode->i_sb, old_inode->i_ino);
++ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++ if (is_bad_inode(new_inode)) {
++ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
++ iput(new_inode);
++ new_inode = NULL;
++ }
++ if (!new_inode) {
++ up(&sbi->s_orphan_lock);
++ ext3_debug(KERN_DEBUG "delete inode %lu directly (bad read)\n",
++ old_inode->i_ino);
++ ext3_delete_inode(old_inode);
++ return;
++ }
++ J_ASSERT(new_inode != old_inode);
++
++ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++ /* Ugh. We need to insert new_inode into the same spot on the list
++ * as old_inode was, to ensure the in-memory orphan list is still
++ * in the same order as the on-disk orphan list (badness otherwise).
++ */
++ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
++ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
++ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
++ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++ up(&sbi->s_orphan_lock);
++
++ clear_inode(old_inode);
++
++ ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++ new_inode->i_ino, blocks);
++ spin_lock(&sbi->s_delete_lock);
++ J_ASSERT(list_empty(&new_inode->i_dentry));
++ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++ sbi->s_delete_blocks += blocks;
++ sbi->s_delete_inodes++;
++ spin_unlock(&sbi->s_delete_lock);
++
++ wake_up(&sbi->s_delete_thread_queue);
++}
++#else
++#define ext3_start_delete_thread(sbi) do {} while(0)
++#define ext3_stop_delete_thread(sbi) do {} while(0)
++#endif /* EXT3_DELETE_THREAD */
++
+ void ext3_put_super (struct super_block * sb)
+ {
+ struct ext3_sb_info *sbi = EXT3_SB(sb);
+@@ -405,6 +611,7 @@ void ext3_put_super (struct super_block
+ kdev_t j_dev = sbi->s_journal->j_dev;
+ int i;
+
++ ext3_stop_delete_thread(sbi);
+ ext3_xattr_put_super(sb);
+ journal_destroy(sbi->s_journal);
+ if (!(sb->s_flags & MS_RDONLY)) {
+@@ -453,7 +660,11 @@ static struct super_operations ext3_sops
+ write_inode: ext3_write_inode, /* BKL not held. Don't need */
+ dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */
+ put_inode: ext3_put_inode, /* BKL not held. Don't need */
++#ifdef EXT3_DELETE_THREAD
++ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */
++#else
+ delete_inode: ext3_delete_inode, /* BKL not held. We take it */
++#endif
+ put_super: ext3_put_super, /* BKL held */
+ write_super: ext3_write_super, /* BKL held */
+ write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
+@@ -514,6 +725,11 @@ static int parse_options (char * options
+ this_char = strtok (NULL, ",")) {
+ if ((value = strchr (this_char, '=')) != NULL)
+ *value++ = 0;
++#ifdef EXT3_DELETE_THREAD
++ if (!strcmp(this_char, "asyncdel"))
++ set_opt(*mount_options, ASYNCDEL);
++ else
++#endif
+ #ifdef CONFIG_EXT3_FS_XATTR_USER
+ if (!strcmp (this_char, "user_xattr"))
+ set_opt (*mount_options, XATTR_USER);
+@@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st
+ }
+
+ ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
++ ext3_start_delete_thread(sb);
+ /*
+ * akpm: core read_super() calls in here with the superblock locked.
+ * That deadlocks, because orphan cleanup needs to lock the superblock
+diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
+--- origin/include/linux/ext3_fs.h 2003-05-04 17:22:49.000000000 +0400
++++ linux/include/linux/ext3_fs.h 2003-05-04 15:06:10.000000000 +0400
+@@ -193,6 +193,7 @@ struct ext3_group_desc
+ */
+ #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
+ #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
++#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */
+
+ /*
+ * ioctl commands
+@@ -321,6 +322,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
+ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
+ #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
++#define EXT3_MOUNT_ASYNCDEL 0x10000 /* Delayed deletion */
+
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef _LINUX_EXT2_FS_H
+diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
+--- origin/include/linux/ext3_fs_sb.h 2003-05-04 17:23:52.000000000 +0400
++++ linux/include/linux/ext3_fs_sb.h 2003-05-04 11:37:04.000000000 +0400
+@@ -29,6 +29,8 @@
+
+ #define EXT3_MAX_GROUP_LOADED 8
+
++#define EXT3_DELETE_THREAD
++
+ /*
+ * third extended-fs super-block data in memory
+ */
+@@ -76,6 +78,14 @@ struct ext3_sb_info {
+ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
+ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
+ #endif
++#ifdef EXT3_DELETE_THREAD
++ spinlock_t s_delete_lock;
++ struct list_head s_delete_list;
++ unsigned long s_delete_blocks;
++ unsigned long s_delete_inodes;
++ wait_queue_head_t s_delete_thread_queue;
++ wait_queue_head_t s_delete_waiter_queue;
++#endif
+ };
+
+ #endif /* _LINUX_EXT3_FS_SB */
--- /dev/null
+ fs/ext3/ialloc.c | 47 ++++++++++++++++++++++-
+ fs/ext3/inode.c | 96 +++++++++++++++++++++++++++++++++++++-----------
+ include/linux/ext3_fs.h | 2 +
+ 3 files changed, 121 insertions(+), 24 deletions(-)
+
+--- linux-2.4.20/fs/ext3/ialloc.c~extN-noread-2.4.20 2003-05-04 16:41:22.000000000 +0800
++++ linux-2.4.20-root/fs/ext3/ialloc.c 2003-05-04 17:19:29.000000000 +0800
+@@ -289,6 +289,37 @@ error_return:
+ }
+
+ /*
++ * @block_group: block group of inode
++ * @offset: relative offset of inode within @block_group
++ *
++ * Check whether any of the inodes in this disk block are in use.
++ *
++ * Caller must be holding superblock lock (group/bitmap read lock in future).
++ */
++int ext3_itable_block_used(struct super_block *sb, unsigned int block_group,
++ int offset)
++{
++ int bitmap_nr = load_inode_bitmap(sb, block_group);
++ int inodes_per_block;
++ unsigned long inum, iend;
++ struct buffer_head *ibitmap;
++
++ if (bitmap_nr < 0)
++ return 1;
++
++ inodes_per_block = sb->s_blocksize / EXT3_SB(sb)->s_inode_size;
++ inum = offset & ~(inodes_per_block - 1);
++ iend = inum + inodes_per_block;
++ ibitmap = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr];
++ for (; inum < iend; inum++) {
++ if (inum != offset && ext3_test_bit(inum, ibitmap->b_data))
++ return 1;
++ }
++
++ return 0;
++}
++
++/*
+ * There are two policies for allocating an inode. If the new inode is
+ * a directory, then a forward search is made for a block group with both
+ * free space and a low directory-to-inode ratio; if that fails, then of
+@@ -310,6 +341,7 @@ struct inode * ext3_new_inode (handle_t
+ struct ext3_group_desc * gdp;
+ struct ext3_group_desc * tmp;
+ struct ext3_super_block * es;
++ struct ext3_iloc iloc;
+ int err = 0;
+
+ /* Cannot create files in a deleted directory */
+@@ -510,8 +542,19 @@ repeat:
+ inode->i_generation = sb->u.ext3_sb.s_next_generation++;
+
+ inode->u.ext3_i.i_state = EXT3_STATE_NEW;
+- err = ext3_mark_inode_dirty(handle, inode);
+- if (err) goto fail;
++ err = ext3_get_inode_loc_new(inode, &iloc, 1);
++ if (err) goto fail;
++ BUFFER_TRACE(iloc->bh, "get_write_access");
++ err = ext3_journal_get_write_access(handle, iloc.bh);
++ if (err) {
++ brelse(iloc.bh);
++ iloc.bh = NULL;
++ goto fail;
++ }
++ err = ext3_mark_iloc_dirty(handle, inode, &iloc);
++ if (err) goto fail;
++
++
+
+ unlock_super (sb);
+ if(DQUOT_ALLOC_INODE(inode)) {
+--- linux-2.4.20/fs/ext3/inode.c~extN-noread-2.4.20 2003-05-04 16:41:26.000000000 +0800
++++ linux-2.4.20-root/fs/ext3/inode.c 2003-05-04 17:22:49.000000000 +0800
+@@ -2013,14 +2013,19 @@ out_stop:
+ ext3_journal_stop(handle, inode);
+ }
+
+-/*
+- * ext3_get_inode_loc returns with an extra refcount against the
+- * inode's underlying buffer_head on success.
+- */
+-
+-int ext3_get_inode_loc (struct inode *inode, struct ext3_iloc *iloc)
++#define NUM_INODE_PREREAD 16
++
++/*
++ * ext3_get_inode_loc returns with an extra refcount against the inode's
++ * underlying buffer_head on success. If this is for a new inode allocation
++ * (new is non-zero) then we may be able to optimize away the read if there
++ * are no other in-use inodes in this inode table block. If we need to do
++ * a read, then read in a whole chunk of blocks to avoid blocking again soon
++ * if we are doing lots of creates/updates.
++ */
++int ext3_get_inode_loc_new(struct inode *inode, struct ext3_iloc *iloc, int new)
+ {
+- struct buffer_head *bh = 0;
++ struct buffer_head *bh[NUM_INODE_PREREAD];
+ unsigned long block;
+ unsigned long block_group;
+ unsigned long group_desc;
+@@ -2045,31 +2050,73 @@ int ext3_get_inode_loc (struct inode *in
+ }
+ group_desc = block_group >> EXT3_DESC_PER_BLOCK_BITS(inode->i_sb);
+ desc = block_group & (EXT3_DESC_PER_BLOCK(inode->i_sb) - 1);
+- bh = inode->i_sb->u.ext3_sb.s_group_desc[group_desc];
+- if (!bh) {
++ if (!(inode->i_sb->u.ext3_sb.s_group_desc[group_desc])) {
+ ext3_error (inode->i_sb, "ext3_get_inode_loc",
+ "Descriptor not loaded");
+ goto bad_inode;
+ }
+
+- gdp = (struct ext3_group_desc *) bh->b_data;
++ gdp = (struct ext3_group_desc *)(inode->i_sb->u.ext3_sb.s_group_desc[group_desc]->b_data);
+ /*
+ * Figure out the offset within the block group inode table
+ */
+- offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb)) *
+- EXT3_INODE_SIZE(inode->i_sb);
++ offset = ((inode->i_ino - 1) % EXT3_INODES_PER_GROUP(inode->i_sb));
++
+ block = le32_to_cpu(gdp[desc].bg_inode_table) +
+- (offset >> EXT3_BLOCK_SIZE_BITS(inode->i_sb));
+- if (!(bh = sb_bread(inode->i_sb, block))) {
+- ext3_error (inode->i_sb, "ext3_get_inode_loc",
+- "unable to read inode block - "
+- "inode=%lu, block=%lu", inode->i_ino, block);
+- goto bad_inode;
+- }
+- offset &= (EXT3_BLOCK_SIZE(inode->i_sb) - 1);
++ (offset * EXT3_INODE_SIZE(inode->i_sb) >> EXT3_BLOCK_SIZE_BITS(inode->i_sb));
+
+- iloc->bh = bh;
+- iloc->raw_inode = (struct ext3_inode *) (bh->b_data + offset);
++ bh[0] = sb_getblk(inode->i_sb, block);
++ if (buffer_uptodate(bh[0]))
++ goto done;
++
++ /* If we don't really need to read this block, and it isn't already
++ * in memory, then we just zero it out. Otherwise, we keep the
++ * current block contents (deleted inode data) for posterity.
++ */
++ if (new && !ext3_itable_block_used(inode->i_sb, block_group, offset)) {
++ lock_buffer(bh[0]);
++ memset(bh[0]->b_data, 0, bh[0]->b_size);
++ mark_buffer_uptodate(bh[0], 1);
++ unlock_buffer(bh[0]);
++ } else {
++ unsigned long block_end, itable_end;
++ int count = 1;
++
++ itable_end = le32_to_cpu(gdp[desc].bg_inode_table) +
++ inode->i_sb->u.ext3_sb.s_itb_per_group;
++ block_end = block + NUM_INODE_PREREAD;
++ if (block_end > itable_end)
++ block_end = itable_end;
++
++ for (; block < block_end; block++) {
++ bh[count] = sb_getblk(inode->i_sb, block);
++ if (count && (buffer_uptodate(bh[count]) ||
++ buffer_locked(bh[count]))) {
++ __brelse(bh[count]);
++ } else
++ count++;
++ }
++
++ ll_rw_block(READ, count, bh);
++
++ /* Release all but the block we actually need (bh[0]) */
++ while (--count > 0)
++ __brelse(bh[count]);
++
++ wait_on_buffer(bh[0]);
++ if (!buffer_uptodate(bh[0])) {
++ ext3_error(inode->i_sb, __FUNCTION__,
++ "unable to read inode block - "
++ "inode=%lu, block=%lu", inode->i_ino,
++ bh[0]->b_blocknr);
++ goto bad_inode;
++ }
++ }
++ done:
++ offset = (offset * EXT3_INODE_SIZE(inode->i_sb)) & (EXT3_BLOCK_SIZE(inode->i_sb) - 1);
++
++ iloc->bh = bh[0];
++ iloc->raw_inode = (struct ext3_inode *)(bh[0]->b_data + offset);
+ iloc->block_group = block_group;
+
+ return 0;
+@@ -2078,6 +2125,11 @@ int ext3_get_inode_loc (struct inode *in
+ return -EIO;
+ }
+
++int ext3_get_inode_loc(struct inode *inode, struct ext3_iloc *iloc)
++{
++ return ext3_get_inode_loc_new(inode, iloc, 0);
++}
++
+ void ext3_read_inode(struct inode * inode)
+ {
+ struct ext3_iloc iloc;
+--- linux-2.4.20/include/linux/ext3_fs.h~extN-noread-2.4.20 2003-05-04 16:41:22.000000000 +0800
++++ linux-2.4.20-root/include/linux/ext3_fs.h 2003-05-04 17:19:29.000000000 +0800
+@@ -683,6 +683,8 @@ extern int ext3_forget(handle_t *, int,
+ extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *);
+ extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *);
+
++extern int ext3_itable_block_used(struct super_block *sb, unsigned int, int);
++extern int ext3_get_inode_loc_new(struct inode *, struct ext3_iloc *, int);
+ extern int ext3_get_inode_loc (struct inode *, struct ext3_iloc *);
+ extern void ext3_read_inode (struct inode *);
+ extern void ext3_write_inode (struct inode *, int);
+
+_