Whamcloud - gitweb
merge b_devel into HEAD (20030626 merge tag) for 0.7.1
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-delete_thread-2.4.20.patch
1 diff -puNr origin/fs/ext3/super.c linux/fs/ext3/super.c
2 --- origin/fs/ext3/super.c      2003-05-04 17:23:52.000000000 +0400
3 +++ linux/fs/ext3/super.c       2003-05-04 17:09:20.000000000 +0400
4 @@ -398,6 +398,219 @@ static void dump_orphan_list(struct supe
5         }
6  }
7  
8 +#ifdef EXT3_DELETE_THREAD
9 +/*
10 + * Delete inodes in a loop until there are no more to be deleted.
11 + * Normally, we run in the background doing the deletes and sleeping again,
12 + * and clients just add new inodes to be deleted onto the end of the list.
13 + * If someone is concerned about free space (e.g. block allocation or similar)
14 + * then they can sleep on s_delete_waiter_queue and be woken up when space
15 + * has been freed.
16 + */
17 +int ext3_delete_thread(void *data)
18 +{
19 +       struct super_block *sb = data;
20 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
21 +       struct task_struct *tsk = current;
22 +
23 +       /* Almost like daemonize, but not quite */
24 +       exit_mm(current);
25 +       tsk->session = 1;
26 +       tsk->pgrp = 1;
27 +       tsk->tty = NULL;
28 +       exit_files(current);
29 +       reparent_to_init();
30 +
31 +       sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
32 +       sigfillset(&tsk->blocked);
33 +
34 +       /*tsk->flags |= PF_KERNTHREAD;*/
35 +
36 +       INIT_LIST_HEAD(&sbi->s_delete_list);
37 +       wake_up(&sbi->s_delete_waiter_queue);
38 +       ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
39 +
40 +       /* main loop */
41 +       for (;;) {
42 +               wait_event_interruptible(sbi->s_delete_thread_queue,
43 +                                        !list_empty(&sbi->s_delete_list) ||
44 +                                        !test_opt(sb, ASYNCDEL));
45 +               ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
46 +                          tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
47 +
48 +               spin_lock(&sbi->s_delete_lock);
49 +               if (list_empty(&sbi->s_delete_list)) {
50 +                       clear_opt(sbi->s_mount_opt, ASYNCDEL);
51 +                       memset(&sbi->s_delete_list, 0,
52 +                              sizeof(sbi->s_delete_list));
53 +                       spin_unlock(&sbi->s_delete_lock);
54 +                       ext3_debug("delete thread on %s exiting\n",
55 +                                  kdevname(sb->s_dev));
56 +                       wake_up(&sbi->s_delete_waiter_queue);
57 +                       break;
58 +               }
59 +
60 +               while (!list_empty(&sbi->s_delete_list)) {
61 +                       struct inode *inode=list_entry(sbi->s_delete_list.next,
62 +                                                      struct inode, i_dentry);
63 +                       unsigned long blocks = inode->i_blocks >>
64 +                                                       (inode->i_blkbits - 9);
65 +
66 +                       list_del_init(&inode->i_dentry);
67 +                       spin_unlock(&sbi->s_delete_lock);
68 +                       ext3_debug("%s delete ino %lu blk %lu\n",
69 +                                  tsk->comm, inode->i_ino, blocks);
70 +
71 +                       iput(inode);
72 +
73 +                       spin_lock(&sbi->s_delete_lock);
74 +                       sbi->s_delete_blocks -= blocks;
75 +                       sbi->s_delete_inodes--;
76 +               }
77 +               if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
78 +                       ext3_warning(sb, __FUNCTION__,
79 +                                    "%lu blocks, %lu inodes on list?\n",
80 +                                    sbi->s_delete_blocks,sbi->s_delete_inodes);
81 +                       sbi->s_delete_blocks = 0;
82 +                       sbi->s_delete_inodes = 0;
83 +               }
84 +               spin_unlock(&sbi->s_delete_lock);
85 +               wake_up(&sbi->s_delete_waiter_queue);
86 +       }
87 +
88 +       return 0;
89 +}
90 +
91 +static void ext3_start_delete_thread(struct super_block *sb)
92 +{
93 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
94 +       int rc;
95 +
96 +       spin_lock_init(&sbi->s_delete_lock);
97 +       init_waitqueue_head(&sbi->s_delete_thread_queue);
98 +       init_waitqueue_head(&sbi->s_delete_waiter_queue);
99 +
100 +       if (!test_opt(sb, ASYNCDEL))
101 +               return;
102 +
103 +       rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
104 +       if (rc < 0)
105 +               printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
106 +                      rc);
107 +       else
108 +               wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
109 +}
110 +
111 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
112 +{
113 +       if (sbi->s_delete_list.next == 0)       /* thread never started */
114 +               return;
115 +
116 +       clear_opt(sbi->s_mount_opt, ASYNCDEL);
117 +       wake_up(&sbi->s_delete_thread_queue);
118 +       wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
119 +}
120 +
121 +/* Instead of playing games with the inode flags, destruction, etc we just
122 + * create a new inode locally and put it on a list for the truncate thread.
123 + * We need large parts of the inode struct in order to complete the
124 + * truncate and unlink, so we may as well just have a real inode to do it.
125 + *
126 + * If we have any problem deferring the delete, just delete it right away.
127 + * If we defer it, we also mark how many blocks it would free, so that we
128 + * can keep the statfs data correct, and we know if we should sleep on the
129 + * truncate thread when we run out of space.
130 + *
131 + * In 2.5 this can be done much more cleanly by just registering a "drop"
132 + * method in the super_operations struct.
133 + */
134 +static void ext3_delete_inode_thread(struct inode *old_inode)
135 +{
136 +       struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
137 +       struct inode *new_inode;
138 +       unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
139 +
140 +       if (is_bad_inode(old_inode)) {
141 +               clear_inode(old_inode);
142 +               return;
143 +       }
144 +
145 +       if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
146 +               ext3_delete_inode(old_inode);
147 +               return;
148 +       }
149 +
150 +       /* We may want to delete the inode immediately and not defer it */
151 +       if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
152 +           !sbi->s_delete_list.next) {
153 +               ext3_delete_inode(old_inode);
154 +               return;
155 +       }
156 +
157 +       if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
158 +           (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
159 +               ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
160 +                          old_inode->i_ino, blocks);
161 +               ext3_delete_inode(old_inode);
162 +               return;
163 +       }
164 +
165 +       /* We can iget this inode again here, because our caller has unhashed
166 +        * old_inode, so new_inode will be in a different inode struct.
167 +        *
168 +        * We need to ensure that the i_orphan pointers in the other inodes
169 +        * point at the new inode copy instead of the old one so the orphan
170 +        * list doesn't get corrupted when the old orphan inode is freed.
171 +        */
172 +       down(&sbi->s_orphan_lock);
173 +
174 +       EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
175 +       new_inode = iget(old_inode->i_sb, old_inode->i_ino);
176 +       EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
177 +       if (is_bad_inode(new_inode)) {
178 +               printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
179 +               iput(new_inode);
180 +               new_inode = NULL;
181 +       }
182 +       if (!new_inode) {
183 +               up(&sbi->s_orphan_lock);
184 +               ext3_debug("delete inode %lu directly (bad read)\n",
185 +                          old_inode->i_ino);
186 +               ext3_delete_inode(old_inode);
187 +               return;
188 +       }
189 +       J_ASSERT(new_inode != old_inode);
190 +
191 +       J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
192 +       /* Ugh.  We need to insert new_inode into the same spot on the list
193 +        * as old_inode was, to ensure the in-memory orphan list is still
194 +        * in the same order as the on-disk orphan list (badness otherwise).
195 +        */
196 +       EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
197 +       EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
198 +       EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
199 +       EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
200 +       up(&sbi->s_orphan_lock);
201 +
202 +       clear_inode(old_inode);
203 +
204 +       spin_lock(&sbi->s_delete_lock);
205 +       J_ASSERT(list_empty(&new_inode->i_dentry));
206 +       list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
207 +       sbi->s_delete_blocks += blocks;
208 +       sbi->s_delete_inodes++;
209 +       spin_unlock(&sbi->s_delete_lock);
210 +
211 +       ext3_debug("delete inode %lu (%lu blocks) by thread\n",
212 +                  new_inode->i_ino, blocks);
213 +
214 +       wake_up(&sbi->s_delete_thread_queue);
215 +}
216 +#else
217 +#define ext3_start_delete_thread(sbi) do {} while(0)
218 +#define ext3_stop_delete_thread(sbi) do {} while(0)
219 +#endif /* EXT3_DELETE_THREAD */
220 +
221  void ext3_put_super (struct super_block * sb)
222  {
223         struct ext3_sb_info *sbi = EXT3_SB(sb);
224 @@ -405,6 +611,7 @@ void ext3_put_super (struct super_block 
225         kdev_t j_dev = sbi->s_journal->j_dev;
226         int i;
227  
228 +       ext3_stop_delete_thread(sbi);
229         ext3_xattr_put_super(sb);
230         journal_destroy(sbi->s_journal);
231         if (!(sb->s_flags & MS_RDONLY)) {
232 @@ -453,7 +660,11 @@ static struct super_operations ext3_sops
233         write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
234         dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
235         put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
236 +#ifdef EXT3_DELETE_THREAD
237 +       delete_inode:   ext3_delete_inode_thread,/* BKL not held. We take it */
238 +#else
239         delete_inode:   ext3_delete_inode,      /* BKL not held.  We take it */
240 +#endif
241         put_super:      ext3_put_super,         /* BKL held */
242         write_super:    ext3_write_super,       /* BKL held */
243         write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
244 @@ -514,6 +725,13 @@ static int parse_options (char * options
245              this_char = strtok (NULL, ",")) {
246                 if ((value = strchr (this_char, '=')) != NULL)
247                         *value++ = 0;
248 +#ifdef EXT3_DELETE_THREAD
249 +               if (!strcmp(this_char, "asyncdel"))
250 +                       set_opt(*mount_options, ASYNCDEL);
251 +               else if (!strcmp(this_char, "noasyncdel"))
252 +                       clear_opt(*mount_options, ASYNCDEL);
253 +               else
254 +#endif
255  #ifdef CONFIG_EXT3_FS_XATTR_USER
256                 if (!strcmp (this_char, "user_xattr"))
257                         set_opt (*mount_options, XATTR_USER);
258 @@ -1220,6 +1436,7 @@ struct super_block * ext3_read_super (st
259         }
260  
261         ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
262 +       ext3_start_delete_thread(sb);
263         /*
264          * akpm: core read_super() calls in here with the superblock locked.
265          * That deadlocks, because orphan cleanup needs to lock the superblock
266 @@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
267         if (!parse_options(data, &tmp, sbi, &tmp, 1))
268                 return -EINVAL;
269  
270 +       if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
271 +               ext3_stop_delete_thread(sbi);
272 +
273         if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
274                 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
275  
276 diff -puNr origin/include/linux/ext3_fs.h linux/include/linux/ext3_fs.h
277 --- origin/include/linux/ext3_fs.h      2003-05-04 17:22:49.000000000 +0400
278 +++ linux/include/linux/ext3_fs.h       2003-05-04 15:06:10.000000000 +0400
279 @@ -193,6 +193,7 @@ struct ext3_group_desc
280   */
281  #define EXT3_STATE_JDATA               0x00000001 /* journaled data exists */
282  #define EXT3_STATE_NEW                 0x00000002 /* inode is newly created */
283 +#define EXT3_STATE_DELETE              0x00000010 /* deferred delete inode */
284  
285  /*
286   * ioctl commands
287 @@ -321,6 +322,7 @@ struct ext3_inode {
288  #define EXT3_MOUNT_UPDATE_JOURNAL      0x1000  /* Update the journal format */
289  #define EXT3_MOUNT_NO_UID32            0x2000  /* Disable 32-bit UIDs */
290  #define EXT3_MOUNT_XATTR_USER          0x4000  /* Extended user attributes */
291 +#define EXT3_MOUNT_ASYNCDEL            0x20000 /* Delayed deletion */
292  
293  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
294  #ifndef _LINUX_EXT2_FS_H
295 diff -puNr origin/include/linux/ext3_fs_sb.h linux/include/linux/ext3_fs_sb.h
296 --- origin/include/linux/ext3_fs_sb.h   2003-05-04 17:23:52.000000000 +0400
297 +++ linux/include/linux/ext3_fs_sb.h    2003-05-04 11:37:04.000000000 +0400
298 @@ -29,6 +29,8 @@
299  
300  #define EXT3_MAX_GROUP_LOADED  8
301  
302 +#define EXT3_DELETE_THREAD
303 +
304  /*
305   * third extended-fs super-block data in memory
306   */
307 @@ -76,6 +78,14 @@ struct ext3_sb_info {
308         struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
309         wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
310  #endif
311 +#ifdef EXT3_DELETE_THREAD
312 +       spinlock_t s_delete_lock;
313 +       struct list_head s_delete_list;
314 +       unsigned long s_delete_blocks;
315 +       unsigned long s_delete_inodes;
316 +       wait_queue_head_t s_delete_thread_queue;
317 +       wait_queue_head_t s_delete_waiter_queue;
318 +#endif
319  };
320  
321  #endif /* _LINUX_EXT3_FS_SB */