-diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
---- origin/fs/ext3/super.c 2003-05-04 17:23:52.000000000 +0400
-+++ linux/fs/ext3/super.c 2003-05-04 17:09:20.000000000 +0400
-@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe
+ fs/ext3/file.c | 4
+ fs/ext3/inode.c | 116 ++++++++++++++++++++++
+ fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++
+ include/linux/ext3_fs.h | 5
+ include/linux/ext3_fs_sb.h | 10 +
+ 5 files changed, 365 insertions(+)
+
+Index: linux-2.4.20/fs/ext3/super.c
+===================================================================
+--- linux-2.4.20.orig/fs/ext3/super.c 2004-01-12 20:13:37.000000000 +0300
++++ linux-2.4.20/fs/ext3/super.c 2004-01-13 16:59:54.000000000 +0300
+@@ -48,6 +48,8 @@
+ static void ext3_clear_journal_err(struct super_block * sb,
+ struct ext3_super_block * es);
+
++static int ext3_sync_fs(struct super_block * sb);
++
+ #ifdef CONFIG_JBD_DEBUG
+ int journal_no_write[2];
+
+@@ -398,6 +400,221 @@
}
}
+
+ clear_opt(sbi->s_mount_opt, ASYNCDEL);
+ wake_up(&sbi->s_delete_thread_queue);
-+ wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
++ wait_event(sbi->s_delete_waiter_queue,
++ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0);
+}
+
+/* Instead of playing games with the inode flags, destruction, etc we just
+ * If we have any problem deferring the delete, just delete it right away.
+ * If we defer it, we also mark how many blocks it would free, so that we
+ * can keep the statfs data correct, and we know if we should sleep on the
-+ * truncate thread when we run out of space.
-+ *
-+ * In 2.5 this can be done much more cleanly by just registering a "drop"
-+ * method in the super_operations struct.
++ * delete thread when we run out of space.
+ */
+static void ext3_delete_inode_thread(struct inode *old_inode)
+{
+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
+ struct inode *new_inode;
+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
+
+ return;
+ }
+
-+ if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
-+ ext3_delete_inode(old_inode);
-+ return;
-+ }
++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++ goto out_delete;
+
+ /* We may want to delete the inode immediately and not defer it */
-+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+ !sbi->s_delete_list.next) {
-+ ext3_delete_inode(old_inode);
-+ return;
-+ }
++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
++ goto out_delete;
+
-+ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
-+ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
++ /* We can't use the delete thread as-is during real orphan recovery,
++ * as we add to the orphan list here, causing ext3_orphan_cleanup()
++ * to loop endlessly. It would be nice to do so, but needs work.
++ */
++ if (oei->i_state & EXT3_STATE_DELETE ||
++ sbi->s_mount_state & EXT3_ORPHAN_FS) {
+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
+ old_inode->i_ino, blocks);
-+ ext3_delete_inode(old_inode);
-+ return;
++ goto out_delete;
+ }
+
+ /* We can iget this inode again here, because our caller has unhashed
+ */
+ down(&sbi->s_orphan_lock);
+
-+ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
++ sbi->s_mount_state |= EXT3_ORPHAN_FS;
+ new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
++ sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
+ if (is_bad_inode(new_inode)) {
+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
+ iput(new_inode);
+ up(&sbi->s_orphan_lock);
+ ext3_debug("delete inode %lu directly (bad read)\n",
+ old_inode->i_ino);
-+ ext3_delete_inode(old_inode);
-+ return;
++ goto out_delete;
+ }
+ J_ASSERT(new_inode != old_inode);
+
-+ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
++ J_ASSERT(!list_empty(&oei->i_orphan));
++
++ nei = EXT3_I(new_inode);
+ /* Ugh. We need to insert new_inode into the same spot on the list
+ * as old_inode was, to ensure the in-memory orphan list is still
+ * in the same order as the on-disk orphan list (badness otherwise).
+ */
-+ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
-+ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
-+ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
-+ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
++ nei->i_orphan = oei->i_orphan;
++ nei->i_orphan.next->prev = &nei->i_orphan;
++ nei->i_orphan.prev->next = &nei->i_orphan;
++ nei->i_state |= EXT3_STATE_DELETE;
+ up(&sbi->s_orphan_lock);
+
+ clear_inode(old_inode);
+ new_inode->i_ino, blocks);
+
+ wake_up(&sbi->s_delete_thread_queue);
++ return;
++
++out_delete:
++ ext3_delete_inode(old_inode);
+}
+#else
+#define ext3_start_delete_thread(sbi) do {} while(0)
void ext3_put_super (struct super_block * sb)
{
struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -405,6 +611,7 @@ void ext3_put_super (struct super_block
+@@ -405,6 +622,7 @@
kdev_t j_dev = sbi->s_journal->j_dev;
int i;
-+ ext3_stop_delete_thread(sbi);
++ J_ASSERT(sbi->s_delete_inodes == 0);
ext3_xattr_put_super(sb);
journal_destroy(sbi->s_journal);
if (!(sb->s_flags & MS_RDONLY)) {
-@@ -453,7 +660,11 @@ static struct super_operations ext3_sops
+@@ -453,9 +671,14 @@
write_inode: ext3_write_inode, /* BKL not held. Don't need */
dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */
put_inode: ext3_put_inode, /* BKL not held. Don't need */
+#endif
put_super: ext3_put_super, /* BKL held */
write_super: ext3_write_super, /* BKL held */
++ sync_fs: ext3_sync_fs,
write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
-@@ -514,6 +725,13 @@ static int parse_options (char * options
- this_char = strtok (NULL, ",")) {
- if ((value = strchr (this_char, '=')) != NULL)
- *value++ = 0;
+ unlockfs: ext3_unlockfs, /* BKL not held. We take it */
+ statfs: ext3_statfs, /* BKL held */
+@@ -521,6 +744,13 @@
+ clear_opt (*mount_options, XATTR_USER);
+ else
+ #endif
+#ifdef EXT3_DELETE_THREAD
+ if (!strcmp(this_char, "asyncdel"))
+ set_opt(*mount_options, ASYNCDEL);
+ clear_opt(*mount_options, ASYNCDEL);
+ else
+#endif
- #ifdef CONFIG_EXT3_FS_XATTR_USER
- if (!strcmp (this_char, "user_xattr"))
- set_opt (*mount_options, XATTR_USER);
-@@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st
+ if (!strcmp (this_char, "bsddf"))
+ clear_opt (*mount_options, MINIX_DF);
+ else if (!strcmp (this_char, "nouid32")) {
+@@ -1220,6 +1450,7 @@
}
ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
/*
* akpm: core read_super() calls in here with the superblock locked.
* That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
+@@ -1625,6 +1856,21 @@
+ }
+ }
+
++static int ext3_sync_fs(struct super_block *sb)
++{
++ tid_t target;
++
++ if (atomic_read(&sb->s_active) == 0) {
++ /* fs is being umounted: time to stop delete thread */
++ ext3_stop_delete_thread(EXT3_SB(sb));
++ }
++
++ sb->s_dirt = 0;
++ target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
++ log_wait_commit(EXT3_SB(sb)->s_journal, target);
++ return 0;
++}
++
+ /*
+ * LVM calls this function before a (read-only) snapshot is created. This
+ * gives us a chance to flush the journal completely and mark the fs clean.
+@@ -1682,6 +1928,9 @@
if (!parse_options(data, &tmp, sbi, &tmp, 1))
return -EINVAL;
if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
ext3_abort(sb, __FUNCTION__, "Abort forced by user");
-diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
---- origin/include/linux/ext3_fs.h 2003-05-04 17:22:49.000000000 +0400
-+++ linux/include/linux/ext3_fs.h 2003-05-04 15:06:10.000000000 +0400
-@@ -193,6 +193,7 @@ struct ext3_group_desc
+Index: linux-2.4.20/fs/ext3/inode.c
+===================================================================
+--- linux-2.4.20.orig/fs/ext3/inode.c 2004-01-12 20:13:37.000000000 +0300
++++ linux-2.4.20/fs/ext3/inode.c 2004-01-13 16:55:45.000000000 +0300
+@@ -2552,6 +2552,118 @@
+ return err;
+ }
+
++#ifdef EXT3_DELETE_THREAD
++/* Move blocks from to-be-truncated inode over to a new inode, and delete
++ * that one from the delete thread instead. This avoids a lot of latency
++ * when truncating large files.
++ *
++ * If we have any problem deferring the truncate, just truncate it right away.
++ * If we defer it, we also mark how many blocks it would free, so that we
++ * can keep the statfs data correct, and we know if we should sleep on the
++ * delete thread when we run out of space.
++ */
++void ext3_truncate_thread(struct inode *old_inode)
++{
++ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
++ struct inode *new_inode;
++ handle_t *handle;
++ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
++
++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
++ goto out_truncate;
++
++ /* XXX This is a temporary limitation for code simplicity.
++ * We could truncate to arbitrary sizes at some later time.
++ */
++ if (old_inode->i_size != 0)
++ goto out_truncate;
++
++ /* We may want to truncate the inode immediately and not defer it */
++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
++ old_inode->i_size > oei->i_disksize)
++ goto out_truncate;
++
++ /* We can't use the delete thread as-is during real orphan recovery,
++ * as we add to the orphan list here, causing ext3_orphan_cleanup()
++ * to loop endlessly. It would be nice to do so, but needs work.
++ */
++ if (oei->i_state & EXT3_STATE_DELETE ||
++ sbi->s_mount_state & EXT3_ORPHAN_FS) {
++ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
++ old_inode->i_ino, blocks);
++ goto out_truncate;
++ }
++
++ ext3_discard_prealloc(old_inode);
++
++ /* old_inode = 1
++ * new_inode = sb + GDT + ibitmap
++ * orphan list = 1 inode/superblock for add, 2 inodes for del
++ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
++ */
++ handle = ext3_journal_start(old_inode, 7);
++ if (IS_ERR(handle))
++ goto out_truncate;
++
++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
++ if (IS_ERR(new_inode)) {
++ ext3_debug("truncate inode %lu directly (no new inodes)\n",
++ old_inode->i_ino);
++ goto out_journal;
++ }
++
++ nei = EXT3_I(new_inode);
++
++ down_write(&oei->truncate_sem);
++ new_inode->i_size = old_inode->i_size;
++ new_inode->i_blocks = old_inode->i_blocks;
++ new_inode->i_uid = old_inode->i_uid;
++ new_inode->i_gid = old_inode->i_gid;
++ new_inode->i_nlink = 0;
++
++ /* FIXME when we do arbitrary truncates */
++ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
++ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
++
++ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
++ memset(oei->i_data, 0, sizeof(oei->i_data));
++
++ nei->i_disksize = oei->i_disksize;
++ nei->i_state |= EXT3_STATE_DELETE;
++ up_write(&oei->truncate_sem);
++
++ if (ext3_orphan_add(handle, new_inode) < 0)
++ goto out_journal;
++
++ if (ext3_orphan_del(handle, old_inode) < 0) {
++ ext3_orphan_del(handle, new_inode);
++ iput(new_inode);
++ goto out_journal;
++ }
++
++ ext3_journal_stop(handle, old_inode);
++
++ spin_lock(&sbi->s_delete_lock);
++ J_ASSERT(list_empty(&new_inode->i_dentry));
++ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
++ sbi->s_delete_blocks += blocks;
++ sbi->s_delete_inodes++;
++ spin_unlock(&sbi->s_delete_lock);
++
++ ext3_debug("delete inode %lu (%lu blocks) by thread\n",
++ new_inode->i_ino, blocks);
++
++ wake_up(&sbi->s_delete_thread_queue);
++ return;
++
++out_journal:
++ ext3_journal_stop(handle, old_inode);
++out_truncate:
++ ext3_truncate(old_inode);
++}
++#endif /* EXT3_DELETE_THREAD */
++
+ /*
+ * On success, We end up with an outstanding reference count against
+ * iloc->bh. This _must_ be cleaned up later.
+Index: linux-2.4.20/fs/ext3/file.c
+===================================================================
+--- linux-2.4.20.orig/fs/ext3/file.c 2004-01-12 20:13:36.000000000 +0300
++++ linux-2.4.20/fs/ext3/file.c 2004-01-13 16:55:45.000000000 +0300
+@@ -125,7 +125,11 @@
+ };
+
+ struct inode_operations ext3_file_inode_operations = {
++#ifdef EXT3_DELETE_THREAD
++ truncate: ext3_truncate_thread, /* BKL held */
++#else
+ truncate: ext3_truncate, /* BKL held */
++#endif
+ setattr: ext3_setattr, /* BKL held */
+ setxattr: ext3_setxattr, /* BKL held */
+ getxattr: ext3_getxattr, /* BKL held */
+Index: linux-2.4.20/fs/buffer.c
+===================================================================
+--- linux-2.4.20.orig/fs/buffer.c 2003-05-16 05:29:12.000000000 +0400
++++ linux-2.4.20/fs/buffer.c 2004-01-13 16:55:45.000000000 +0300
+@@ -328,6 +328,8 @@
+ if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
+ sb->s_op->write_super(sb);
+ unlock_super(sb);
++ if (sb->s_op && sb->s_op->sync_fs)
++ sb->s_op->sync_fs(sb);
+ unlock_kernel();
+
+ return sync_buffers(dev, 1);
+Index: linux-2.4.20/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.4.20.orig/include/linux/ext3_fs.h 2004-01-12 20:13:37.000000000 +0300
++++ linux-2.4.20/include/linux/ext3_fs.h 2004-01-13 16:55:45.000000000 +0300
+@@ -193,6 +193,7 @@
*/
#define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
#define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
/*
* ioctl commands
-@@ -321,6 +322,7 @@ struct ext3_inode {
+@@ -320,6 +321,7 @@
#define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
#define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
#define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
-+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
++#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
/* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
#ifndef _LINUX_EXT2_FS_H
-diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
---- origin/include/linux/ext3_fs_sb.h 2003-05-04 17:23:52.000000000 +0400
-+++ linux/include/linux/ext3_fs_sb.h 2003-05-04 11:37:04.000000000 +0400
+@@ -696,6 +698,9 @@
+ extern void ext3_dirty_inode(struct inode *);
+ extern int ext3_change_inode_journal_flag(struct inode *, int);
+ extern void ext3_truncate (struct inode *);
++#ifdef EXT3_DELETE_THREAD
++extern void ext3_truncate_thread(struct inode *inode);
++#endif
+
+ /* ioctl.c */
+ extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
+Index: linux-2.4.20/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-2.4.20.orig/include/linux/ext3_fs_sb.h 2004-01-12 20:13:37.000000000 +0300
++++ linux-2.4.20/include/linux/ext3_fs_sb.h 2004-01-13 16:55:45.000000000 +0300
@@ -29,6 +29,8 @@
#define EXT3_MAX_GROUP_LOADED 8
/*
* third extended-fs super-block data in memory
*/
-@@ -76,6 +78,14 @@ struct ext3_sb_info {
+@@ -76,6 +78,14 @@
struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
#endif
};
#endif /* _LINUX_EXT3_FS_SB */
+Index: linux-2.4.20/include/linux/fs.h
+===================================================================
+--- linux-2.4.20.orig/include/linux/fs.h 2004-01-12 20:13:36.000000000 +0300
++++ linux-2.4.20/include/linux/fs.h 2004-01-13 16:55:45.000000000 +0300
+@@ -917,6 +917,7 @@
+ void (*delete_inode) (struct inode *);
+ void (*put_super) (struct super_block *);
+ void (*write_super) (struct super_block *);
++ int (*sync_fs) (struct super_block *);
+ void (*write_super_lockfs) (struct super_block *);
+ void (*unlockfs) (struct super_block *);
+ int (*statfs) (struct super_block *, struct statfs *);