Whamcloud - gitweb
- landing of b_hd_cleanup_merge to HEAD.
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-delete_thread-2.4.21-chaos.patch
diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.21-chaos.patch
deleted file mode 100644 (file)
index 2fc365d..0000000
+++ /dev/null
@@ -1,496 +0,0 @@
- fs/ext3/file.c             |    4 
- fs/ext3/inode.c            |  116 ++++++++++++++++++++++
- fs/ext3/super.c            |  230 +++++++++++++++++++++++++++++++++++++++++++++
- include/linux/ext3_fs.h    |    5 
- include/linux/ext3_fs_sb.h |   10 +
- 5 files changed, 365 insertions(+)
-
-Index: linux-2.4.21-chaos/fs/ext3/super.c
-===================================================================
---- linux-2.4.21-chaos.orig/fs/ext3/super.c    2004-01-12 19:20:07.000000000 +0300
-+++ linux-2.4.21-chaos/fs/ext3/super.c 2004-01-13 17:25:49.000000000 +0300
-@@ -425,6 +425,221 @@
-       }
- }
-+#ifdef EXT3_DELETE_THREAD
-+/*
-+ * Delete inodes in a loop until there are no more to be deleted.
-+ * Normally, we run in the background doing the deletes and sleeping again,
-+ * and clients just add new inodes to be deleted onto the end of the list.
-+ * If someone is concerned about free space (e.g. block allocation or similar)
-+ * then they can sleep on s_delete_waiter_queue and be woken up when space
-+ * has been freed.
-+ */
-+int ext3_delete_thread(void *data)
-+{
-+      struct super_block *sb = data;
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      struct task_struct *tsk = current;
-+
-+      /* Almost like daemonize, but not quite */
-+      exit_mm(current);
-+      tsk->session = 1;
-+      tsk->pgrp = 1;
-+      tsk->tty = NULL;
-+      exit_files(current);
-+      reparent_to_init();
-+
-+      sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
-+      sigfillset(&tsk->blocked);
-+
-+      /*tsk->flags |= PF_KERNTHREAD;*/
-+
-+      INIT_LIST_HEAD(&sbi->s_delete_list);
-+      wake_up(&sbi->s_delete_waiter_queue);
-+      ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
-+
-+      /* main loop */
-+      for (;;) {
-+              wait_event_interruptible(sbi->s_delete_thread_queue,
-+                                       !list_empty(&sbi->s_delete_list) ||
-+                                       !test_opt(sb, ASYNCDEL));
-+              ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
-+                         tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
-+
-+              spin_lock(&sbi->s_delete_lock);
-+              if (list_empty(&sbi->s_delete_list)) {
-+                      clear_opt(sbi->s_mount_opt, ASYNCDEL);
-+                      memset(&sbi->s_delete_list, 0,
-+                             sizeof(sbi->s_delete_list));
-+                      spin_unlock(&sbi->s_delete_lock);
-+                      ext3_debug("delete thread on %s exiting\n",
-+                                 kdevname(sb->s_dev));
-+                      wake_up(&sbi->s_delete_waiter_queue);
-+                      break;
-+              }
-+
-+              while (!list_empty(&sbi->s_delete_list)) {
-+                      struct inode *inode=list_entry(sbi->s_delete_list.next,
-+                                                     struct inode, i_dentry);
-+                      unsigned long blocks = inode->i_blocks >>
-+                                                      (inode->i_blkbits - 9);
-+
-+                      list_del_init(&inode->i_dentry);
-+                      spin_unlock(&sbi->s_delete_lock);
-+                      ext3_debug("%s delete ino %lu blk %lu\n",
-+                                 tsk->comm, inode->i_ino, blocks);
-+
-+                      iput(inode);
-+
-+                      spin_lock(&sbi->s_delete_lock);
-+                      sbi->s_delete_blocks -= blocks;
-+                      sbi->s_delete_inodes--;
-+              }
-+              if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
-+                      ext3_warning(sb, __FUNCTION__,
-+                                   "%lu blocks, %lu inodes on list?\n",
-+                                   sbi->s_delete_blocks,sbi->s_delete_inodes);
-+                      sbi->s_delete_blocks = 0;
-+                      sbi->s_delete_inodes = 0;
-+              }
-+              spin_unlock(&sbi->s_delete_lock);
-+              wake_up(&sbi->s_delete_waiter_queue);
-+      }
-+
-+      return 0;
-+}
-+
-+static void ext3_start_delete_thread(struct super_block *sb)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int rc;
-+
-+      spin_lock_init(&sbi->s_delete_lock);
-+      init_waitqueue_head(&sbi->s_delete_thread_queue);
-+      init_waitqueue_head(&sbi->s_delete_waiter_queue);
-+
-+      if (!test_opt(sb, ASYNCDEL))
-+              return;
-+
-+      rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
-+      if (rc < 0)
-+              printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
-+                     rc);
-+      else
-+              wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
-+}
-+
-+static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
-+{
-+      if (sbi->s_delete_list.next == 0)       /* thread never started */
-+              return;
-+
-+      clear_opt(sbi->s_mount_opt, ASYNCDEL);
-+      wake_up(&sbi->s_delete_thread_queue);
-+      wait_event(sbi->s_delete_waiter_queue,
-+                      sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0);
-+}
-+
-+/* Instead of playing games with the inode flags, destruction, etc we just
-+ * create a new inode locally and put it on a list for the truncate thread.
-+ * We need large parts of the inode struct in order to complete the
-+ * truncate and unlink, so we may as well just have a real inode to do it.
-+ *
-+ * If we have any problem deferring the delete, just delete it right away.
-+ * If we defer it, we also mark how many blocks it would free, so that we
-+ * can keep the statfs data correct, and we know if we should sleep on the
-+ * delete thread when we run out of space.
-+ */
-+static void ext3_delete_inode_thread(struct inode *old_inode)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
-+      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
-+      struct inode *new_inode;
-+      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
-+
-+      if (is_bad_inode(old_inode)) {
-+              clear_inode(old_inode);
-+              return;
-+      }
-+
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
-+              goto out_delete;
-+
-+      /* We may want to delete the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
-+              goto out_delete;
-+
-+      /* We can't use the delete thread as-is during real orphan recovery,
-+       * as we add to the orphan list here, causing ext3_orphan_cleanup()
-+       * to loop endlessly.  It would be nice to do so, but needs work.
-+       */
-+      if (oei->i_state & EXT3_STATE_DELETE ||
-+          sbi->s_mount_state & EXT3_ORPHAN_FS) {
-+              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
-+                         old_inode->i_ino, blocks);
-+              goto out_delete;
-+      }
-+
-+      /* We can iget this inode again here, because our caller has unhashed
-+       * old_inode, so new_inode will be in a different inode struct.
-+       *
-+       * We need to ensure that the i_orphan pointers in the other inodes
-+       * point at the new inode copy instead of the old one so the orphan
-+       * list doesn't get corrupted when the old orphan inode is freed.
-+       */
-+      down(&sbi->s_orphan_lock);
-+
-+      sbi->s_mount_state |= EXT3_ORPHAN_FS;
-+      new_inode = iget(old_inode->i_sb, old_inode->i_ino);
-+      sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
-+      if (is_bad_inode(new_inode)) {
-+              printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
-+              iput(new_inode);
-+              new_inode = NULL;
-+      }
-+      if (!new_inode) {
-+              up(&sbi->s_orphan_lock);
-+              ext3_debug("delete inode %lu directly (bad read)\n",
-+                         old_inode->i_ino);
-+              goto out_delete;
-+      }
-+      J_ASSERT(new_inode != old_inode);
-+
-+      J_ASSERT(!list_empty(&oei->i_orphan));
-+
-+      nei = EXT3_I(new_inode);
-+      /* Ugh.  We need to insert new_inode into the same spot on the list
-+       * as old_inode was, to ensure the in-memory orphan list is still
-+       * in the same order as the on-disk orphan list (badness otherwise).
-+       */
-+      nei->i_orphan = oei->i_orphan;
-+      nei->i_orphan.next->prev = &nei->i_orphan;
-+      nei->i_orphan.prev->next = &nei->i_orphan;
-+      nei->i_state |= EXT3_STATE_DELETE;
-+      up(&sbi->s_orphan_lock);
-+
-+      clear_inode(old_inode);
-+
-+      spin_lock(&sbi->s_delete_lock);
-+      J_ASSERT(list_empty(&new_inode->i_dentry));
-+      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
-+      sbi->s_delete_blocks += blocks;
-+      sbi->s_delete_inodes++;
-+      spin_unlock(&sbi->s_delete_lock);
-+
-+      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
-+                 new_inode->i_ino, blocks);
-+
-+      wake_up(&sbi->s_delete_thread_queue);
-+      return;
-+
-+out_delete:
-+      ext3_delete_inode(old_inode);
-+}
-+#else
-+#define ext3_start_delete_thread(sbi) do {} while(0)
-+#define ext3_stop_delete_thread(sbi) do {} while(0)
-+#endif /* EXT3_DELETE_THREAD */
-+
- void ext3_put_super (struct super_block * sb)
- {
-       struct ext3_sb_info *sbi = EXT3_SB(sb);
-@@ -432,6 +647,7 @@
-       kdev_t j_dev = sbi->s_journal->j_dev;
-       int i;
-+      J_ASSERT(sbi->s_delete_inodes == 0);
-       ext3_xattr_put_super(sb);
-       journal_destroy(sbi->s_journal);
-       if (!(sb->s_flags & MS_RDONLY)) {
-@@ -501,7 +717,11 @@
-       write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
-       dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
-       put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
-+#ifdef EXT3_DELETE_THREAD
-+      delete_inode:   ext3_delete_inode_thread,/* BKL not held. We take it */
-+#else
-       delete_inode:   ext3_delete_inode,      /* BKL not held.  We take it */
-+#endif
-       put_super:      ext3_put_super,         /* BKL held */
-       write_super:    ext3_write_super,       /* BKL held */
-       sync_fs:        ext3_sync_fs,
-@@ -579,6 +799,13 @@
-                       *mount_flags &= ~MS_POSIXACL;
-               else
- #endif
-+#ifdef EXT3_DELETE_THREAD
-+              if (!strcmp(this_char, "asyncdel"))
-+                      set_opt(*mount_options, ASYNCDEL);
-+              else if (!strcmp(this_char, "noasyncdel"))
-+                      clear_opt(*mount_options, ASYNCDEL);
-+              else
-+#endif
-               if (!strcmp (this_char, "bsddf"))
-                       clear_opt (*mount_options, MINIX_DF);
-               else if (!strcmp (this_char, "nouid32")) {
-@@ -1283,6 +1510,7 @@
-       }
-       ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
-+      ext3_start_delete_thread(sb);
-       /*
-        * akpm: core read_super() calls in here with the superblock locked.
-        * That deadlocks, because orphan cleanup needs to lock the superblock
-@@ -1676,7 +1904,12 @@
- static int ext3_sync_fs(struct super_block *sb)
- {
-       tid_t target;
--      
-+
-+      if (atomic_read(&sb->s_active) == 0) {
-+              /* fs is being umounted: time to stop delete thread */
-+              ext3_stop_delete_thread(EXT3_SB(sb));
-+      }
-+
-       sb->s_dirt = 0;
-       target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
-       log_wait_commit(EXT3_SB(sb)->s_journal, target);
-@@ -1746,6 +1979,9 @@
-       if (!parse_options(data, &tmp, sbi, &mount_flags, &tmp, 1))
-               return -EINVAL;
-+      if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
-+              ext3_stop_delete_thread(sbi);
-+
-       if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
-               ext3_abort(sb, __FUNCTION__, "Abort forced by user");
-Index: linux-2.4.21-chaos/fs/ext3/inode.c
-===================================================================
---- linux-2.4.21-chaos.orig/fs/ext3/inode.c    2004-01-12 19:20:06.000000000 +0300
-+++ linux-2.4.21-chaos/fs/ext3/inode.c 2004-01-12 19:20:07.000000000 +0300
-@@ -2179,6 +2179,118 @@
-       return;         /* AKPM: return what? */
- }
-+#ifdef EXT3_DELETE_THREAD
-+/* Move blocks from to-be-truncated inode over to a new inode, and delete
-+ * that one from the delete thread instead.  This avoids a lot of latency
-+ * when truncating large files.
-+ *
-+ * If we have any problem deferring the truncate, just truncate it right away.
-+ * If we defer it, we also mark how many blocks it would free, so that we
-+ * can keep the statfs data correct, and we know if we should sleep on the
-+ * delete thread when we run out of space.
-+ */
-+void ext3_truncate_thread(struct inode *old_inode)
-+{
-+      struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
-+      struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
-+      struct inode *new_inode;
-+      handle_t *handle;
-+      unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
-+
-+      if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
-+              goto out_truncate;
-+
-+      /* XXX This is a temporary limitation for code simplicity.
-+       *     We could truncate to arbitrary sizes at some later time.
-+       */
-+      if (old_inode->i_size != 0)
-+              goto out_truncate;
-+
-+      /* We may want to truncate the inode immediately and not defer it */
-+      if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
-+          old_inode->i_size > oei->i_disksize)
-+              goto out_truncate;
-+
-+      /* We can't use the delete thread as-is during real orphan recovery,
-+       * as we add to the orphan list here, causing ext3_orphan_cleanup()
-+       * to loop endlessly.  It would be nice to do so, but needs work.
-+       */
-+      if (oei->i_state & EXT3_STATE_DELETE ||
-+          sbi->s_mount_state & EXT3_ORPHAN_FS) {
-+              ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
-+                         old_inode->i_ino, blocks);
-+              goto out_truncate;
-+      }
-+
-+      ext3_discard_prealloc(old_inode);
-+
-+      /* old_inode   = 1
-+       * new_inode   = sb + GDT + ibitmap
-+       * orphan list = 1 inode/superblock for add, 2 inodes for del
-+       * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
-+       */
-+      handle = ext3_journal_start(old_inode, 7);
-+      if (IS_ERR(handle))
-+              goto out_truncate;
-+
-+      new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
-+      if (IS_ERR(new_inode)) {
-+              ext3_debug("truncate inode %lu directly (no new inodes)\n",
-+                         old_inode->i_ino);
-+              goto out_journal;
-+      }
-+
-+      nei = EXT3_I(new_inode);
-+
-+      down_write(&oei->truncate_sem);
-+      new_inode->i_size = old_inode->i_size;
-+      new_inode->i_blocks = old_inode->i_blocks;
-+      new_inode->i_uid = old_inode->i_uid;
-+      new_inode->i_gid = old_inode->i_gid;
-+      new_inode->i_nlink = 0;
-+
-+      /* FIXME when we do arbitrary truncates */
-+      old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
-+      old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
-+
-+      memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
-+      memset(oei->i_data, 0, sizeof(oei->i_data));
-+
-+      nei->i_disksize = oei->i_disksize;
-+      nei->i_state |= EXT3_STATE_DELETE;
-+      up_write(&oei->truncate_sem);
-+
-+      if (ext3_orphan_add(handle, new_inode) < 0)
-+              goto out_journal;
-+
-+      if (ext3_orphan_del(handle, old_inode) < 0) {
-+              ext3_orphan_del(handle, new_inode);
-+              iput(new_inode);
-+              goto out_journal;
-+      }
-+
-+      ext3_journal_stop(handle, old_inode);
-+
-+      spin_lock(&sbi->s_delete_lock);
-+      J_ASSERT(list_empty(&new_inode->i_dentry));
-+      list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
-+      sbi->s_delete_blocks += blocks;
-+      sbi->s_delete_inodes++;
-+      spin_unlock(&sbi->s_delete_lock);
-+
-+      ext3_debug("delete inode %lu (%lu blocks) by thread\n",
-+                 new_inode->i_ino, blocks);
-+
-+      wake_up(&sbi->s_delete_thread_queue);
-+      return;
-+
-+out_journal:
-+      ext3_journal_stop(handle, old_inode);
-+out_truncate:
-+      ext3_truncate(old_inode);
-+}
-+#endif /* EXT3_DELETE_THREAD */
-+
- /* 
-  * ext3_get_inode_loc returns with an extra refcount against the
-  * inode's underlying buffer_head on success. 
-Index: linux-2.4.21-chaos/fs/ext3/file.c
-===================================================================
---- linux-2.4.21-chaos.orig/fs/ext3/file.c     2004-01-12 19:20:06.000000000 +0300
-+++ linux-2.4.21-chaos/fs/ext3/file.c  2004-01-12 19:20:07.000000000 +0300
-@@ -132,7 +132,11 @@
- };
- struct inode_operations ext3_file_inode_operations = {
-+#ifdef EXT3_DELETE_THREAD
-+      truncate:       ext3_truncate_thread,   /* BKL held */
-+#else
-       truncate:       ext3_truncate,          /* BKL held */
-+#endif
-       setattr:        ext3_setattr,           /* BKL held */
-       setxattr:       ext3_setxattr,          /* BKL held */
-       getxattr:       ext3_getxattr,          /* BKL held */
-Index: linux-2.4.21-chaos/include/linux/ext3_fs.h
-===================================================================
---- linux-2.4.21-chaos.orig/include/linux/ext3_fs.h    2004-01-12 19:20:06.000000000 +0300
-+++ linux-2.4.21-chaos/include/linux/ext3_fs.h 2004-01-12 19:20:07.000000000 +0300
-@@ -195,6 +195,7 @@
-  */
- #define EXT3_STATE_JDATA              0x00000001 /* journaled data exists */
- #define EXT3_STATE_NEW                        0x00000002 /* inode is newly created */
-+#define EXT3_STATE_DELETE             0x00000010 /* deferred delete inode */
- /*
-  * ioctl commands
-@@ -323,6 +324,7 @@
- #define EXT3_MOUNT_NO_UID32           0x2000  /* Disable 32-bit UIDs */
- #define EXT3_MOUNT_XATTR_USER         0x4000  /* Extended user attributes */
- #define EXT3_MOUNT_POSIX_ACL          0x8000  /* POSIX Access Control Lists */
-+#define EXT3_MOUNT_ASYNCDEL           0x20000 /* Delayed deletion */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef _LINUX_EXT2_FS_H
-@@ -693,6 +695,9 @@
- extern int ext3_change_inode_journal_flag(struct inode *, int);
- extern void ext3_truncate (struct inode *);
- extern void ext3_set_inode_flags(struct inode *);
-+#ifdef EXT3_DELETE_THREAD
-+extern void ext3_truncate_thread(struct inode *inode);
-+#endif
- /* ioctl.c */
- extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
-Index: linux-2.4.21-chaos/include/linux/ext3_fs_sb.h
-===================================================================
---- linux-2.4.21-chaos.orig/include/linux/ext3_fs_sb.h 2004-01-12 19:20:07.000000000 +0300
-+++ linux-2.4.21-chaos/include/linux/ext3_fs_sb.h      2004-01-12 20:53:51.000000000 +0300
-@@ -29,6 +29,8 @@
- #define EXT3_MAX_GROUP_LOADED 32
-+#define EXT3_DELETE_THREAD
-+
- /*
-  * third extended-fs super-block data in memory
-  */
-@@ -76,6 +78,14 @@
-       struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
-       wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
- #endif
-+#ifdef EXT3_DELETE_THREAD
-+      spinlock_t s_delete_lock;
-+      struct list_head s_delete_list;
-+      unsigned long s_delete_blocks;
-+      unsigned long s_delete_inodes;
-+      wait_queue_head_t s_delete_thread_queue;
-+      wait_queue_head_t s_delete_waiter_queue;
-+#endif
- };
- #endif        /* _LINUX_EXT3_FS_SB */