X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fkernel_patches%2Fpatches%2Fext3-delete_thread-2.4.21-chaos.patch;fp=lustre%2Fkernel_patches%2Fpatches%2Fext3-delete_thread-2.4.21-chaos.patch;h=0000000000000000000000000000000000000000;hb=191061ee668400324f4505cf498f1ee2d57e4962;hp=2fc365d1e27ba567c7a805633622338a98ae4e7f;hpb=86f8aaddd53ca75bdf3f4d5c1c92536be2da1415;p=fs%2Flustre-release.git diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-chaos.patch deleted file mode 100644 index 2fc365d..0000000 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-chaos.patch +++ /dev/null @@ -1,496 +0,0 @@ - fs/ext3/file.c | 4 - fs/ext3/inode.c | 116 ++++++++++++++++++++++ - fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++ - include/linux/ext3_fs.h | 5 - include/linux/ext3_fs_sb.h | 10 + - 5 files changed, 365 insertions(+) - -Index: linux-2.4.21-chaos/fs/ext3/super.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/super.c 2004-01-12 19:20:07.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/super.c 2004-01-13 17:25:49.000000000 +0300 -@@ -425,6 +425,221 @@ - } - } - -+#ifdef EXT3_DELETE_THREAD -+/* -+ * Delete inodes in a loop until there are no more to be deleted. -+ * Normally, we run in the background doing the deletes and sleeping again, -+ * and clients just add new inodes to be deleted onto the end of the list. -+ * If someone is concerned about free space (e.g. block allocation or similar) -+ * then they can sleep on s_delete_waiter_queue and be woken up when space -+ * has been freed. -+ */ -+int ext3_delete_thread(void *data) -+{ -+ struct super_block *sb = data; -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ struct task_struct *tsk = current; -+ -+ /* Almost like daemonize, but not quite */ -+ exit_mm(current); -+ tsk->session = 1; -+ tsk->pgrp = 1; -+ tsk->tty = NULL; -+ exit_files(current); -+ reparent_to_init(); -+ -+ sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev)); -+ sigfillset(&tsk->blocked); -+ -+ /*tsk->flags |= PF_KERNTHREAD;*/ -+ -+ INIT_LIST_HEAD(&sbi->s_delete_list); -+ wake_up(&sbi->s_delete_waiter_queue); -+ ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev)); -+ -+ /* main loop */ -+ for (;;) { -+ wait_event_interruptible(sbi->s_delete_thread_queue, -+ !list_empty(&sbi->s_delete_list) || -+ !test_opt(sb, ASYNCDEL)); -+ ext3_debug("%s woken up: %lu inodes, %lu blocks\n", -+ tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks); -+ -+ spin_lock(&sbi->s_delete_lock); -+ if (list_empty(&sbi->s_delete_list)) { -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ memset(&sbi->s_delete_list, 0, -+ sizeof(sbi->s_delete_list)); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("delete thread on %s exiting\n", -+ kdevname(sb->s_dev)); -+ wake_up(&sbi->s_delete_waiter_queue); -+ break; -+ } -+ -+ while (!list_empty(&sbi->s_delete_list)) { -+ struct inode *inode=list_entry(sbi->s_delete_list.next, -+ struct inode, i_dentry); -+ unsigned long blocks = inode->i_blocks >> -+ (inode->i_blkbits - 9); -+ -+ list_del_init(&inode->i_dentry); -+ spin_unlock(&sbi->s_delete_lock); -+ ext3_debug("%s delete ino %lu blk %lu\n", -+ tsk->comm, inode->i_ino, blocks); -+ -+ iput(inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ sbi->s_delete_blocks -= blocks; -+ sbi->s_delete_inodes--; -+ } -+ if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) { -+ ext3_warning(sb, __FUNCTION__, -+ "%lu blocks, %lu inodes on list?\n", -+ sbi->s_delete_blocks,sbi->s_delete_inodes); -+ sbi->s_delete_blocks = 0; -+ sbi->s_delete_inodes = 0; -+ } -+ spin_unlock(&sbi->s_delete_lock); -+ wake_up(&sbi->s_delete_waiter_queue); -+ } -+ -+ return 0; -+} -+ -+static void ext3_start_delete_thread(struct super_block *sb) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(sb); -+ int rc; -+ -+ spin_lock_init(&sbi->s_delete_lock); -+ init_waitqueue_head(&sbi->s_delete_thread_queue); -+ init_waitqueue_head(&sbi->s_delete_waiter_queue); -+ -+ if (!test_opt(sb, ASYNCDEL)) -+ return; -+ -+ rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES); -+ if (rc < 0) -+ printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n", -+ rc); -+ else -+ wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next); -+} -+ -+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi) -+{ -+ if (sbi->s_delete_list.next == 0) /* thread never started */ -+ return; -+ -+ clear_opt(sbi->s_mount_opt, ASYNCDEL); -+ wake_up(&sbi->s_delete_thread_queue); -+ wait_event(sbi->s_delete_waiter_queue, -+ sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0); -+} -+ -+/* Instead of playing games with the inode flags, destruction, etc we just -+ * create a new inode locally and put it on a list for the truncate thread. -+ * We need large parts of the inode struct in order to complete the -+ * truncate and unlink, so we may as well just have a real inode to do it. -+ * -+ * If we have any problem deferring the delete, just delete it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+static void ext3_delete_inode_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (is_bad_inode(old_inode)) { -+ clear_inode(old_inode); -+ return; -+ } -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_delete; -+ -+ /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS) -+ goto out_delete; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_delete; -+ } -+ -+ /* We can iget this inode again here, because our caller has unhashed -+ * old_inode, so new_inode will be in a different inode struct. -+ * -+ * We need to ensure that the i_orphan pointers in the other inodes -+ * point at the new inode copy instead of the old one so the orphan -+ * list doesn't get corrupted when the old orphan inode is freed. -+ */ -+ down(&sbi->s_orphan_lock); -+ -+ sbi->s_mount_state |= EXT3_ORPHAN_FS; -+ new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ sbi->s_mount_state &= ~EXT3_ORPHAN_FS; -+ if (is_bad_inode(new_inode)) { -+ printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); -+ iput(new_inode); -+ new_inode = NULL; -+ } -+ if (!new_inode) { -+ up(&sbi->s_orphan_lock); -+ ext3_debug("delete inode %lu directly (bad read)\n", -+ old_inode->i_ino); -+ goto out_delete; -+ } -+ J_ASSERT(new_inode != old_inode); -+ -+ J_ASSERT(!list_empty(&oei->i_orphan)); -+ -+ nei = EXT3_I(new_inode); -+ /* Ugh. We need to insert new_inode into the same spot on the list -+ * as old_inode was, to ensure the in-memory orphan list is still -+ * in the same order as the on-disk orphan list (badness otherwise). -+ */ -+ nei->i_orphan = oei->i_orphan; -+ nei->i_orphan.next->prev = &nei->i_orphan; -+ nei->i_orphan.prev->next = &nei->i_orphan; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up(&sbi->s_orphan_lock); -+ -+ clear_inode(old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_delete: -+ ext3_delete_inode(old_inode); -+} -+#else -+#define ext3_start_delete_thread(sbi) do {} while(0) -+#define ext3_stop_delete_thread(sbi) do {} while(0) -+#endif /* EXT3_DELETE_THREAD */ -+ - void ext3_put_super (struct super_block * sb) - { - struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -432,6 +647,7 @@ - kdev_t j_dev = sbi->s_journal->j_dev; - int i; - -+ J_ASSERT(sbi->s_delete_inodes == 0); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -501,7 +717,11 @@ - write_inode: ext3_write_inode, /* BKL not held. Don't need */ - dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ - put_inode: ext3_put_inode, /* BKL not held. Don't need */ -+#ifdef EXT3_DELETE_THREAD -+ delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */ -+#else - delete_inode: ext3_delete_inode, /* BKL not held. We take it */ -+#endif - put_super: ext3_put_super, /* BKL held */ - write_super: ext3_write_super, /* BKL held */ - sync_fs: ext3_sync_fs, -@@ -579,6 +799,13 @@ - *mount_flags &= ~MS_POSIXACL; - else - #endif -+#ifdef EXT3_DELETE_THREAD -+ if (!strcmp(this_char, "asyncdel")) -+ set_opt(*mount_options, ASYNCDEL); -+ else if (!strcmp(this_char, "noasyncdel")) -+ clear_opt(*mount_options, ASYNCDEL); -+ else -+#endif - if (!strcmp (this_char, "bsddf")) - clear_opt (*mount_options, MINIX_DF); - else if (!strcmp (this_char, "nouid32")) { -@@ -1283,6 +1510,7 @@ - } - - ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); -+ ext3_start_delete_thread(sb); - /* - * akpm: core read_super() calls in here with the superblock locked. - * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1676,7 +1904,12 @@ - static int ext3_sync_fs(struct super_block *sb) - { - tid_t target; -- -+ -+ if (atomic_read(&sb->s_active) == 0) { -+ /* fs is being umounted: time to stop delete thread */ -+ ext3_stop_delete_thread(EXT3_SB(sb)); -+ } -+ - sb->s_dirt = 0; - target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); - log_wait_commit(EXT3_SB(sb)->s_journal, target); -@@ -1746,6 +1979,9 @@ - if (!parse_options(data, &tmp, sbi, &mount_flags, &tmp, 1)) - return -EINVAL; - -+ if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY)) -+ ext3_stop_delete_thread(sbi); -+ - if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) - ext3_abort(sb, __FUNCTION__, "Abort forced by user"); - -Index: linux-2.4.21-chaos/fs/ext3/inode.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/inode.c 2004-01-12 19:20:06.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/inode.c 2004-01-12 19:20:07.000000000 +0300 -@@ -2179,6 +2179,118 @@ - return; /* AKPM: return what? */ - } - -+#ifdef EXT3_DELETE_THREAD -+/* Move blocks from to-be-truncated inode over to a new inode, and delete -+ * that one from the delete thread instead. This avoids a lot of latency -+ * when truncating large files. -+ * -+ * If we have any problem deferring the truncate, just truncate it right away. -+ * If we defer it, we also mark how many blocks it would free, so that we -+ * can keep the statfs data correct, and we know if we should sleep on the -+ * delete thread when we run out of space. -+ */ -+void ext3_truncate_thread(struct inode *old_inode) -+{ -+ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); -+ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); -+ struct inode *new_inode; -+ handle_t *handle; -+ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); -+ -+ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) -+ goto out_truncate; -+ -+ /* XXX This is a temporary limitation for code simplicity. -+ * We could truncate to arbitrary sizes at some later time. -+ */ -+ if (old_inode->i_size != 0) -+ goto out_truncate; -+ -+ /* We may want to truncate the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ old_inode->i_size > oei->i_disksize) -+ goto out_truncate; -+ -+ /* We can't use the delete thread as-is during real orphan recovery, -+ * as we add to the orphan list here, causing ext3_orphan_cleanup() -+ * to loop endlessly. It would be nice to do so, but needs work. -+ */ -+ if (oei->i_state & EXT3_STATE_DELETE || -+ sbi->s_mount_state & EXT3_ORPHAN_FS) { -+ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", -+ old_inode->i_ino, blocks); -+ goto out_truncate; -+ } -+ -+ ext3_discard_prealloc(old_inode); -+ -+ /* old_inode = 1 -+ * new_inode = sb + GDT + ibitmap -+ * orphan list = 1 inode/superblock for add, 2 inodes for del -+ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS -+ */ -+ handle = ext3_journal_start(old_inode, 7); -+ if (IS_ERR(handle)) -+ goto out_truncate; -+ -+ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); -+ if (IS_ERR(new_inode)) { -+ ext3_debug("truncate inode %lu directly (no new inodes)\n", -+ old_inode->i_ino); -+ goto out_journal; -+ } -+ -+ nei = EXT3_I(new_inode); -+ -+ down_write(&oei->truncate_sem); -+ new_inode->i_size = old_inode->i_size; -+ new_inode->i_blocks = old_inode->i_blocks; -+ new_inode->i_uid = old_inode->i_uid; -+ new_inode->i_gid = old_inode->i_gid; -+ new_inode->i_nlink = 0; -+ -+ /* FIXME when we do arbitrary truncates */ -+ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; -+ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; -+ -+ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); -+ memset(oei->i_data, 0, sizeof(oei->i_data)); -+ -+ nei->i_disksize = oei->i_disksize; -+ nei->i_state |= EXT3_STATE_DELETE; -+ up_write(&oei->truncate_sem); -+ -+ if (ext3_orphan_add(handle, new_inode) < 0) -+ goto out_journal; -+ -+ if (ext3_orphan_del(handle, old_inode) < 0) { -+ ext3_orphan_del(handle, new_inode); -+ iput(new_inode); -+ goto out_journal; -+ } -+ -+ ext3_journal_stop(handle, old_inode); -+ -+ spin_lock(&sbi->s_delete_lock); -+ J_ASSERT(list_empty(&new_inode->i_dentry)); -+ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); -+ sbi->s_delete_blocks += blocks; -+ sbi->s_delete_inodes++; -+ spin_unlock(&sbi->s_delete_lock); -+ -+ ext3_debug("delete inode %lu (%lu blocks) by thread\n", -+ new_inode->i_ino, blocks); -+ -+ wake_up(&sbi->s_delete_thread_queue); -+ return; -+ -+out_journal: -+ ext3_journal_stop(handle, old_inode); -+out_truncate: -+ ext3_truncate(old_inode); -+} -+#endif /* EXT3_DELETE_THREAD */ -+ - /* - * ext3_get_inode_loc returns with an extra refcount against the - * inode's underlying buffer_head on success. -Index: linux-2.4.21-chaos/fs/ext3/file.c -=================================================================== ---- linux-2.4.21-chaos.orig/fs/ext3/file.c 2004-01-12 19:20:06.000000000 +0300 -+++ linux-2.4.21-chaos/fs/ext3/file.c 2004-01-12 19:20:07.000000000 +0300 -@@ -132,7 +132,11 @@ - }; - - struct inode_operations ext3_file_inode_operations = { -+#ifdef EXT3_DELETE_THREAD -+ truncate: ext3_truncate_thread, /* BKL held */ -+#else - truncate: ext3_truncate, /* BKL held */ -+#endif - setattr: ext3_setattr, /* BKL held */ - setxattr: ext3_setxattr, /* BKL held */ - getxattr: ext3_getxattr, /* BKL held */ -Index: linux-2.4.21-chaos/include/linux/ext3_fs.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h 2004-01-12 19:20:06.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2004-01-12 19:20:07.000000000 +0300 -@@ -195,6 +195,7 @@ - */ - #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ - #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */ -+#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */ - - /* - * ioctl commands -@@ -323,6 +324,7 @@ - #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ - #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ - #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef _LINUX_EXT2_FS_H -@@ -693,6 +695,9 @@ - extern int ext3_change_inode_journal_flag(struct inode *, int); - extern void ext3_truncate (struct inode *); - extern void ext3_set_inode_flags(struct inode *); -+#ifdef EXT3_DELETE_THREAD -+extern void ext3_truncate_thread(struct inode *inode); -+#endif - - /* ioctl.c */ - extern int ext3_ioctl (struct inode *, struct file *, unsigned int, -Index: linux-2.4.21-chaos/include/linux/ext3_fs_sb.h -=================================================================== ---- linux-2.4.21-chaos.orig/include/linux/ext3_fs_sb.h 2004-01-12 19:20:07.000000000 +0300 -+++ linux-2.4.21-chaos/include/linux/ext3_fs_sb.h 2004-01-12 20:53:51.000000000 +0300 -@@ -29,6 +29,8 @@ - - #define EXT3_MAX_GROUP_LOADED 32 - -+#define EXT3_DELETE_THREAD -+ - /* - * third extended-fs super-block data in memory - */ -@@ -76,6 +78,14 @@ - struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ - wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ - #endif -+#ifdef EXT3_DELETE_THREAD -+ spinlock_t s_delete_lock; -+ struct list_head s_delete_list; -+ unsigned long s_delete_blocks; -+ unsigned long s_delete_inodes; -+ wait_queue_head_t s_delete_thread_queue; -+ wait_queue_head_t s_delete_waiter_queue; -+#endif - }; - - #endif /* _LINUX_EXT3_FS_SB */