X-Git-Url: https://git.whamcloud.com/?a=blobdiff_plain;f=lustre%2Fkernel_patches%2Fpatches%2Fext3-delete_thread-2.4.20.patch;h=a8816ec42a7615ad9270f3b2c98d7a36911396d4;hb=887646a546deb11ad8046ab6ed072691fb2c513b;hp=34c51588fe208fb2ef31036b2ebfdc86023ab502;hpb=067c526a4e5592095a3335478ec2580535ed2be4;p=fs%2Flustre-release.git diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch index 34c5158..a8816ec 100644 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch +++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20.patch @@ -1,7 +1,13 @@ -diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c ---- origin/fs/ext3/super.c 2003-05-04 17:23:52.000000000 +0400 -+++ linux/fs/ext3/super.c 2003-05-04 17:09:20.000000000 +0400 -@@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe + fs/ext3/file.c | 4 + fs/ext3/inode.c | 116 ++++++++++++++++++++++ + fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++ + include/linux/ext3_fs.h | 5 + include/linux/ext3_fs_sb.h | 10 + + 5 files changed, 365 insertions(+) + +--- linux/fs/ext3/super.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003 ++++ linux-mmonroe/fs/ext3/super.c Thu Jul 10 14:11:33 2003 +@@ -400,6 +400,220 @@ static void dump_orphan_list(struct supe } } @@ -126,14 +132,12 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + * If we have any problem deferring the delete, just delete it right away. + * If we defer it, we also mark how many blocks it would free, so that we + * can keep the statfs data correct, and we know if we should sleep on the -+ * truncate thread when we run out of space. -+ * -+ * In 2.5 this can be done much more cleanly by just registering a "drop" -+ * method in the super_operations struct. ++ * delete thread when we run out of space. + */ +static void ext3_delete_inode_thread(struct inode *old_inode) +{ + struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); ++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); + struct inode *new_inode; + unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); + @@ -142,24 +146,22 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + return; + } + -+ if (!test_opt(old_inode->i_sb, ASYNCDEL)) { -+ ext3_delete_inode(old_inode); -+ return; -+ } ++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) ++ goto out_delete; + + /* We may want to delete the inode immediately and not defer it */ -+ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || -+ !sbi->s_delete_list.next) { -+ ext3_delete_inode(old_inode); -+ return; -+ } ++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS) ++ goto out_delete; + -+ if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) || -+ (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) { ++ /* We can't use the delete thread as-is during real orphan recovery, ++ * as we add to the orphan list here, causing ext3_orphan_cleanup() ++ * to loop endlessly. It would be nice to do so, but needs work. ++ */ ++ if (oei->i_state & EXT3_STATE_DELETE || ++ sbi->s_mount_state & EXT3_ORPHAN_FS) { + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", + old_inode->i_ino, blocks); -+ ext3_delete_inode(old_inode); -+ return; ++ goto out_delete; + } + + /* We can iget this inode again here, because our caller has unhashed @@ -171,9 +173,9 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + */ + down(&sbi->s_orphan_lock); + -+ EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS; ++ sbi->s_mount_state |= EXT3_ORPHAN_FS; + new_inode = iget(old_inode->i_sb, old_inode->i_ino); -+ EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS; ++ sbi->s_mount_state &= ~EXT3_ORPHAN_FS; + if (is_bad_inode(new_inode)) { + printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino); + iput(new_inode); @@ -183,20 +185,21 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + up(&sbi->s_orphan_lock); + ext3_debug("delete inode %lu directly (bad read)\n", + old_inode->i_ino); -+ ext3_delete_inode(old_inode); -+ return; ++ goto out_delete; + } + J_ASSERT(new_inode != old_inode); + -+ J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan)); ++ J_ASSERT(!list_empty(&oei->i_orphan)); ++ ++ nei = EXT3_I(new_inode); + /* Ugh. We need to insert new_inode into the same spot on the list + * as old_inode was, to ensure the in-memory orphan list is still + * in the same order as the on-disk orphan list (badness otherwise). + */ -+ EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan; -+ EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE; ++ nei->i_orphan = oei->i_orphan; ++ nei->i_orphan.next->prev = &nei->i_orphan; ++ nei->i_orphan.prev->next = &nei->i_orphan; ++ nei->i_state |= EXT3_STATE_DELETE; + up(&sbi->s_orphan_lock); + + clear_inode(old_inode); @@ -212,6 +215,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + new_inode->i_ino, blocks); + + wake_up(&sbi->s_delete_thread_queue); ++ return; ++ ++out_delete: ++ ext3_delete_inode(old_inode); +} +#else +#define ext3_start_delete_thread(sbi) do {} while(0) @@ -221,7 +228,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c void ext3_put_super (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -405,6 +611,7 @@ void ext3_put_super (struct super_block +@@ -407,6 +621,7 @@ void ext3_put_super (struct super_block kdev_t j_dev = sbi->s_journal->j_dev; int i; @@ -229,7 +236,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -453,7 +660,11 @@ static struct super_operations ext3_sops +@@ -455,7 +670,11 @@ static struct super_operations ext3_sops write_inode: ext3_write_inode, /* BKL not held. Don't need */ dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ put_inode: ext3_put_inode, /* BKL not held. Don't need */ @@ -240,11 +247,11 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c +#endif put_super: ext3_put_super, /* BKL held */ write_super: ext3_write_super, /* BKL held */ - write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */ -@@ -514,6 +725,13 @@ static int parse_options (char * options - this_char = strtok (NULL, ",")) { - if ((value = strchr (this_char, '=')) != NULL) - *value++ = 0; + sync_fs: ext3_sync_fs, +@@ -524,6 +743,13 @@ static int parse_options (char * options + clear_opt (*mount_options, XATTR_USER); + else + #endif +#ifdef EXT3_DELETE_THREAD + if (!strcmp(this_char, "asyncdel")) + set_opt(*mount_options, ASYNCDEL); @@ -252,10 +259,10 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c + clear_opt(*mount_options, ASYNCDEL); + else +#endif - #ifdef CONFIG_EXT3_FS_XATTR_USER - if (!strcmp (this_char, "user_xattr")) - set_opt (*mount_options, XATTR_USER); -@@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st + if (!strcmp (this_char, "bsddf")) + clear_opt (*mount_options, MINIX_DF); + else if (!strcmp (this_char, "nouid32")) { +@@ -1223,6 +1449,7 @@ struct super_block * ext3_read_super (st } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -263,7 +270,7 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s +@@ -1678,6 +1905,9 @@ int ext3_remount (struct super_block * s if (!parse_options(data, &tmp, sbi, &tmp, 1)) return -EINVAL; @@ -273,9 +280,143 @@ diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) ext3_abort(sb, __FUNCTION__, "Abort forced by user"); -diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h ---- origin/include/linux/ext3_fs.h 2003-05-04 17:22:49.000000000 +0400 -+++ linux/include/linux/ext3_fs.h 2003-05-04 15:06:10.000000000 +0400 +--- linux/fs/ext3/inode.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:29 2003 ++++ linux-mmonroe/fs/ext3/inode.c Thu Jul 10 14:11:33 2003 +@@ -2013,6 +2013,118 @@ out_stop: + ext3_journal_stop(handle, inode); + } + ++#ifdef EXT3_DELETE_THREAD ++/* Move blocks from to-be-truncated inode over to a new inode, and delete ++ * that one from the delete thread instead. This avoids a lot of latency ++ * when truncating large files. ++ * ++ * If we have any problem deferring the truncate, just truncate it right away. ++ * If we defer it, we also mark how many blocks it would free, so that we ++ * can keep the statfs data correct, and we know if we should sleep on the ++ * delete thread when we run out of space. ++ */ ++void ext3_truncate_thread(struct inode *old_inode) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb); ++ struct ext3_inode_info *nei, *oei = EXT3_I(old_inode); ++ struct inode *new_inode; ++ handle_t *handle; ++ unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9); ++ ++ if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next) ++ goto out_truncate; ++ ++ /* XXX This is a temporary limitation for code simplicity. ++ * We could truncate to arbitrary sizes at some later time. ++ */ ++ if (old_inode->i_size != 0) ++ goto out_truncate; ++ ++ /* We may want to truncate the inode immediately and not defer it */ ++ if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS || ++ old_inode->i_size > oei->i_disksize) ++ goto out_truncate; ++ ++ /* We can't use the delete thread as-is during real orphan recovery, ++ * as we add to the orphan list here, causing ext3_orphan_cleanup() ++ * to loop endlessly. It would be nice to do so, but needs work. ++ */ ++ if (oei->i_state & EXT3_STATE_DELETE || ++ sbi->s_mount_state & EXT3_ORPHAN_FS) { ++ ext3_debug("doing deferred inode %lu delete (%lu blocks)\n", ++ old_inode->i_ino, blocks); ++ goto out_truncate; ++ } ++ ++ ext3_discard_prealloc(old_inode); ++ ++ /* old_inode = 1 ++ * new_inode = sb + GDT + ibitmap ++ * orphan list = 1 inode/superblock for add, 2 inodes for del ++ * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS ++ */ ++ handle = ext3_journal_start(old_inode, 7); ++ if (IS_ERR(handle)) ++ goto out_truncate; ++ ++ new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode); ++ if (IS_ERR(new_inode)) { ++ ext3_debug("truncate inode %lu directly (no new inodes)\n", ++ old_inode->i_ino); ++ goto out_journal; ++ } ++ ++ nei = EXT3_I(new_inode); ++ ++ down_write(&oei->truncate_sem); ++ new_inode->i_size = old_inode->i_size; ++ new_inode->i_blocks = old_inode->i_blocks; ++ new_inode->i_uid = old_inode->i_uid; ++ new_inode->i_gid = old_inode->i_gid; ++ new_inode->i_nlink = 0; ++ ++ /* FIXME when we do arbitrary truncates */ ++ old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0; ++ old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME; ++ ++ memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data)); ++ memset(oei->i_data, 0, sizeof(oei->i_data)); ++ ++ nei->i_disksize = oei->i_disksize; ++ nei->i_state |= EXT3_STATE_DELETE; ++ up_write(&oei->truncate_sem); ++ ++ if (ext3_orphan_add(handle, new_inode) < 0) ++ goto out_journal; ++ ++ if (ext3_orphan_del(handle, old_inode) < 0) { ++ ext3_orphan_del(handle, new_inode); ++ iput(new_inode); ++ goto out_journal; ++ } ++ ++ ext3_journal_stop(handle, old_inode); ++ ++ spin_lock(&sbi->s_delete_lock); ++ J_ASSERT(list_empty(&new_inode->i_dentry)); ++ list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list); ++ sbi->s_delete_blocks += blocks; ++ sbi->s_delete_inodes++; ++ spin_unlock(&sbi->s_delete_lock); ++ ++ ext3_debug("delete inode %lu (%lu blocks) by thread\n", ++ new_inode->i_ino, blocks); ++ ++ wake_up(&sbi->s_delete_thread_queue); ++ return; ++ ++out_journal: ++ ext3_journal_stop(handle, old_inode); ++out_truncate: ++ ext3_truncate(old_inode); ++} ++#endif /* EXT3_DELETE_THREAD */ ++ + /* + * ext3_get_inode_loc returns with an extra refcount against the + * inode's underlying buffer_head on success. +--- linux/fs/ext3/file.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:21 2003 ++++ linux-mmonroe/fs/ext3/file.c Thu Jul 10 14:12:17 2003 +@@ -125,7 +125,11 @@ struct file_operations ext3_file_operati + }; + + struct inode_operations ext3_file_inode_operations = { ++#ifdef EXT3_DELETE_THREAD ++ truncate: ext3_truncate_thread, /* BKL held */ ++#else + truncate: ext3_truncate, /* BKL held */ ++#endif + setattr: ext3_setattr, /* BKL held */ + setxattr: ext3_setxattr, /* BKL held */ + getxattr: ext3_getxattr, /* BKL held */ +--- linux/include/linux/ext3_fs.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:26 2003 ++++ linux-mmonroe/include/linux/ext3_fs.h Thu Jul 10 14:11:33 2003 @@ -193,6 +193,7 @@ struct ext3_group_desc */ #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ @@ -284,17 +425,26 @@ diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h /* * ioctl commands -@@ -321,6 +322,7 @@ struct ext3_inode { +@@ -320,6 +321,7 @@ struct ext3_inode { #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */ #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */ #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */ -+#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ ++#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef _LINUX_EXT2_FS_H -diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h ---- origin/include/linux/ext3_fs_sb.h 2003-05-04 17:23:52.000000000 +0400 -+++ linux/include/linux/ext3_fs_sb.h 2003-05-04 11:37:04.000000000 +0400 +@@ -694,6 +696,9 @@ extern void ext3_discard_prealloc (struc + extern void ext3_dirty_inode(struct inode *); + extern int ext3_change_inode_journal_flag(struct inode *, int); + extern void ext3_truncate (struct inode *); ++#ifdef EXT3_DELETE_THREAD ++extern void ext3_truncate_thread(struct inode *inode); ++#endif + + /* ioctl.c */ + extern int ext3_ioctl (struct inode *, struct file *, unsigned int, +--- linux/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003 ++++ linux-mmonroe/include/linux/ext3_fs_sb.h Thu Jul 10 14:11:33 2003 @@ -29,6 +29,8 @@ #define EXT3_MAX_GROUP_LOADED 8 @@ -319,3 +469,5 @@ diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h }; #endif /* _LINUX_EXT3_FS_SB */ + +_