Whamcloud - gitweb
e01fecae45038a3f32cfd259716ad130a3991c43
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-delete_thread-2.4.18.patch
1  fs/ext3/super.c            |  229 +++++++++++++++++++++++++++++++++++++++++++++
2  include/linux/ext3_fs.h    |    2 
3  include/linux/ext3_fs_sb.h |   10 +
4  3 files changed, 241 insertions(+)
5
6 --- linux-2.4.18-18.8.0-l15/fs/ext3/super.c~ext3-delete_thread-2.4.18   Tue Jun  3 17:26:21 2003
7 +++ linux-2.4.18-18.8.0-l15-adilger/fs/ext3/super.c     Wed Jun 18 11:59:14 2003
8 @@ -396,6 +396,219 @@ static void dump_orphan_list(struct supe
9         }
10  }
11  
12 +#ifdef EXT3_DELETE_THREAD
13 +/*
14 + * Delete inodes in a loop until there are no more to be deleted.
15 + * Normally, we run in the background doing the deletes and sleeping again,
16 + * and clients just add new inodes to be deleted onto the end of the list.
17 + * If someone is concerned about free space (e.g. block allocation or similar)
18 + * then they can sleep on s_delete_waiter_queue and be woken up when space
19 + * has been freed.
20 + */
21 +int ext3_delete_thread(void *data)
22 +{
23 +       struct super_block *sb = data;
24 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
25 +       struct task_struct *tsk = current;
26 +
27 +       /* Almost like daemonize, but not quite */
28 +       exit_mm(current);
29 +       tsk->session = 1;
30 +       tsk->pgrp = 1;
31 +       tsk->tty = NULL;
32 +       exit_files(current);
33 +       reparent_to_init();
34 +
35 +       sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
36 +       sigfillset(&tsk->blocked);
37 +
38 +       /*tsk->flags |= PF_KERNTHREAD;*/
39 +
40 +       INIT_LIST_HEAD(&sbi->s_delete_list);
41 +       wake_up(&sbi->s_delete_waiter_queue);
42 +       ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
43 +
44 +       /* main loop */
45 +       for (;;) {
46 +               wait_event_interruptible(sbi->s_delete_thread_queue,
47 +                                        !list_empty(&sbi->s_delete_list) ||
48 +                                        !test_opt(sb, ASYNCDEL));
49 +               ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
50 +                          tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
51 +
52 +               spin_lock(&sbi->s_delete_lock);
53 +               if (list_empty(&sbi->s_delete_list)) {
54 +                       clear_opt(sbi->s_mount_opt, ASYNCDEL);
55 +                       memset(&sbi->s_delete_list, 0,
56 +                              sizeof(sbi->s_delete_list));
57 +                       spin_unlock(&sbi->s_delete_lock);
58 +                       ext3_debug("delete thread on %s exiting\n",
59 +                                  kdevname(sb->s_dev));
60 +                       wake_up(&sbi->s_delete_waiter_queue);
61 +                       break;
62 +               }
63 +
64 +               while (!list_empty(&sbi->s_delete_list)) {
65 +                       struct inode *inode=list_entry(sbi->s_delete_list.next,
66 +                                                      struct inode, i_dentry);
67 +                       unsigned long blocks = inode->i_blocks >>
68 +                                                       (inode->i_blkbits - 9);
69 +
70 +                       list_del_init(&inode->i_dentry);
71 +                       spin_unlock(&sbi->s_delete_lock);
72 +                       ext3_debug("%s delete ino %lu blk %lu\n",
73 +                                  tsk->comm, inode->i_ino, blocks);
74 +
75 +                       iput(inode);
76 +
77 +                       spin_lock(&sbi->s_delete_lock);
78 +                       sbi->s_delete_blocks -= blocks;
79 +                       sbi->s_delete_inodes--;
80 +               }
81 +               if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
82 +                       ext3_warning(sb, __FUNCTION__,
83 +                                    "%lu blocks, %lu inodes on list?\n",
84 +                                    sbi->s_delete_blocks,sbi->s_delete_inodes);
85 +                       sbi->s_delete_blocks = 0;
86 +                       sbi->s_delete_inodes = 0;
87 +               }
88 +               spin_unlock(&sbi->s_delete_lock);
89 +               wake_up(&sbi->s_delete_waiter_queue);
90 +       }
91 +
92 +       return 0;
93 +}
94 +
95 +static void ext3_start_delete_thread(struct super_block *sb)
96 +{
97 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
98 +       int rc;
99 +
100 +       spin_lock_init(&sbi->s_delete_lock);
101 +       init_waitqueue_head(&sbi->s_delete_thread_queue);
102 +       init_waitqueue_head(&sbi->s_delete_waiter_queue);
103 +
104 +       if (!test_opt(sb, ASYNCDEL))
105 +               return;
106 +
107 +       rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
108 +       if (rc < 0)
109 +               printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
110 +                      rc);
111 +       else
112 +               wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
113 +}
114 +
115 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
116 +{
117 +       if (sbi->s_delete_list.next == 0)       /* thread never started */
118 +               return;
119 +
120 +       clear_opt(sbi->s_mount_opt, ASYNCDEL);
121 +       wake_up(&sbi->s_delete_thread_queue);
122 +       wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
123 +}
124 +
125 +/* Instead of playing games with the inode flags, destruction, etc we just
126 + * create a new inode locally and put it on a list for the truncate thread.
127 + * We need large parts of the inode struct in order to complete the
128 + * truncate and unlink, so we may as well just have a real inode to do it.
129 + *
130 + * If we have any problem deferring the delete, just delete it right away.
131 + * If we defer it, we also mark how many blocks it would free, so that we
132 + * can keep the statfs data correct, and we know if we should sleep on the
133 + * truncate thread when we run out of space.
134 + *
135 + * In 2.5 this can be done much more cleanly by just registering a "drop"
136 + * method in the super_operations struct.
137 + */
138 +static void ext3_delete_inode_thread(struct inode *old_inode)
139 +{
140 +       struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
141 +       struct inode *new_inode;
142 +       unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
143 +
144 +       if (is_bad_inode(old_inode)) {
145 +               clear_inode(old_inode);
146 +               return;
147 +       }
148 +
149 +       if (!test_opt(old_inode->i_sb, ASYNCDEL)) {
150 +               ext3_delete_inode(old_inode);
151 +               return;
152 +       }
153 +
154 +       /* We may want to delete the inode immediately and not defer it */
155 +       if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
156 +           !sbi->s_delete_list.next) {
157 +               ext3_delete_inode(old_inode);
158 +               return;
159 +       }
160 +
161 +       if ((EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) ||
162 +           (EXT3_SB(old_inode->i_sb)->s_mount_state & EXT3_ORPHAN_FS)) {
163 +               ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
164 +                          old_inode->i_ino, blocks);
165 +               ext3_delete_inode(old_inode);
166 +               return;
167 +       }
168 +
169 +       /* We can iget this inode again here, because our caller has unhashed
170 +        * old_inode, so new_inode will be in a different inode struct.
171 +        *
172 +        * We need to ensure that the i_orphan pointers in the other inodes
173 +        * point at the new inode copy instead of the old one so the orphan
174 +        * list doesn't get corrupted when the old orphan inode is freed.
175 +        */
176 +       down(&sbi->s_orphan_lock);
177 +
178 +       EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
179 +       new_inode = iget(old_inode->i_sb, old_inode->i_ino);
180 +       EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
181 +       if (is_bad_inode(new_inode)) {
182 +               printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
183 +               iput(new_inode);
184 +               new_inode = NULL;
185 +       }
186 +       if (!new_inode) {
187 +               up(&sbi->s_orphan_lock);
188 +               ext3_debug("delete inode %lu directly (bad read)\n",
189 +                          old_inode->i_ino);
190 +               ext3_delete_inode(old_inode);
191 +               return;
192 +       }
193 +       J_ASSERT(new_inode != old_inode);
194 +
195 +       J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
196 +       /* Ugh.  We need to insert new_inode into the same spot on the list
197 +        * as old_inode was, to ensure the in-memory orphan list is still
198 +        * in the same order as the on-disk orphan list (badness otherwise).
199 +        */
200 +       EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
201 +       EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
202 +       EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
203 +       EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
204 +       up(&sbi->s_orphan_lock);
205 +
206 +       clear_inode(old_inode);
207 +
208 +       spin_lock(&sbi->s_delete_lock);
209 +       J_ASSERT(list_empty(&new_inode->i_dentry));
210 +       list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
211 +       sbi->s_delete_blocks += blocks;
212 +       sbi->s_delete_inodes++;
213 +       spin_unlock(&sbi->s_delete_lock);
214 +
215 +       ext3_debug("delete inode %lu (%lu blocks) by thread\n",
216 +                  new_inode->i_ino, blocks);
217 +
218 +       wake_up(&sbi->s_delete_thread_queue);
219 +}
220 +#else
221 +#define ext3_start_delete_thread(sbi) do {} while(0)
222 +#define ext3_stop_delete_thread(sbi) do {} while(0)
223 +#endif /* EXT3_DELETE_THREAD */
224 +
225  void ext3_put_super (struct super_block * sb)
226  {
227         struct ext3_sb_info *sbi = EXT3_SB(sb);
228 @@ -403,6 +615,7 @@ void ext3_put_super (struct super_block 
229         kdev_t j_dev = sbi->s_journal->j_dev;
230         int i;
231  
232 +       ext3_stop_delete_thread(sbi);
233         ext3_xattr_put_super(sb);
234         journal_destroy(sbi->s_journal);
235         if (!(sb->s_flags & MS_RDONLY)) {
236 @@ -451,7 +664,11 @@ static struct super_operations ext3_sops
237         write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
238         dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
239         put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
240 +#ifdef EXT3_DELETE_THREAD
241 +       delete_inode:   ext3_delete_inode_thread,/* BKL not held. We take it */
242 +#else
243         delete_inode:   ext3_delete_inode,      /* BKL not held.  We take it */
244 +#endif
245         put_super:      ext3_put_super,         /* BKL held */
246         write_super:    ext3_write_super,       /* BKL held */
247         write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
248 @@ -511,6 +728,14 @@ static int parse_options (char * options
249              this_char = strtok (NULL, ",")) {
250                 if ((value = strchr (this_char, '=')) != NULL)
251                         *value++ = 0;
252 +#ifdef EXT3_DELETE_THREAD
253 +               if (!strcmp(this_char, "asyncdel"))
254 +                       set_opt(*mount_options, ASYNCDEL);
255 +               else if (!strcmp(this_char, "noasyncdel"))
256 +                       clear_opt(*mount_options, ASYNCDEL);
257 +               else
258 +#endif
259 +
260                 if (!strcmp (this_char, "bsddf"))
261                         clear_opt (*mount_options, MINIX_DF);
262                 else if (!strcmp (this_char, "nouid32")) {
263 @@ -1206,6 +1431,7 @@ struct super_block * ext3_read_super (st
264         }
265  
266         ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
267 +       ext3_start_delete_thread(sb);
268         /*
269          * akpm: core read_super() calls in here with the superblock locked.
270          * That deadlocks, because orphan cleanup needs to lock the superblock
271 @@ -1648,6 +1874,9 @@ int ext3_remount (struct super_block * s
272         if (!parse_options(data, &tmp, sbi, &tmp, 1))
273                 return -EINVAL;
274  
275 +       if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
276 +               ext3_stop_delete_thread(sbi);
277 +
278         if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
279                 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
280  
281 --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18   Tue Jun  3 17:26:20 2003
282 +++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs.h     Tue Jun 17 12:36:56 2003
283 @@ -190,6 +190,7 @@ struct ext3_group_desc
284   */
285  #define EXT3_STATE_JDATA               0x00000001 /* journaled data exists */
286  #define EXT3_STATE_NEW                 0x00000002 /* inode is newly created */
287 +#define EXT3_STATE_DELETE              0x00000010 /* deferred delete inode */
288  
289  /*
290   * ioctl commands
291 @@ -317,6 +318,7 @@ struct ext3_inode {
292  #define EXT3_MOUNT_UPDATE_JOURNAL      0x1000  /* Update the journal format */
293  #define EXT3_MOUNT_NO_UID32            0x2000  /* Disable 32-bit UIDs */
294  #define EXT3_MOUNT_INDEX               0x4000  /* Enable directory index */
295 +#define EXT3_MOUNT_ASYNCDEL            0x20000 /* Delayed deletion */
296  
297  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
298  #ifndef _LINUX_EXT2_FS_H
299 --- linux-2.4.18-18.8.0-l15/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18        Tue Jun  3 17:26:21 2003
300 +++ linux-2.4.18-18.8.0-l15-adilger/include/linux/ext3_fs_sb.h  Tue Jun 17 12:36:56 2003
301 @@ -29,6 +29,8 @@
302  
303  #define EXT3_MAX_GROUP_LOADED  32
304  
305 +#define EXT3_DELETE_THREAD
306 +
307  /*
308   * third extended-fs super-block data in memory
309   */
310 @@ -74,6 +76,14 @@ struct ext3_sb_info {
311         struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
312         wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
313  #endif
314 +#ifdef EXT3_DELETE_THREAD
315 +       spinlock_t s_delete_lock;
316 +       struct list_head s_delete_list;
317 +       unsigned long s_delete_blocks;
318 +       unsigned long s_delete_inodes;
319 +       wait_queue_head_t s_delete_thread_queue;
320 +       wait_queue_head_t s_delete_waiter_queue;
321 +#endif
322  };
323  
324  #endif /* _LINUX_EXT3_FS_SB */
325
326 _