2 fs/ext3/inode.c | 116 ++++++++++++++++++++++
3 fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++
4 include/linux/ext3_fs.h | 5
5 include/linux/ext3_fs_sb.h | 10 +
6 5 files changed, 365 insertions(+)
8 --- linux/fs/ext3/super.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003
9 +++ linux-mmonroe/fs/ext3/super.c Thu Jul 10 14:11:33 2003
10 @@ -400,6 +400,220 @@ static void dump_orphan_list(struct supe
14 +#ifdef EXT3_DELETE_THREAD
16 + * Delete inodes in a loop until there are no more to be deleted.
17 + * Normally, we run in the background doing the deletes and sleeping again,
18 + * and clients just add new inodes to be deleted onto the end of the list.
19 + * If someone is concerned about free space (e.g. block allocation or similar)
20 + * then they can sleep on s_delete_waiter_queue and be woken up when space
23 +int ext3_delete_thread(void *data)
25 + struct super_block *sb = data;
26 + struct ext3_sb_info *sbi = EXT3_SB(sb);
27 + struct task_struct *tsk = current;
29 + /* Almost like daemonize, but not quite */
34 + exit_files(current);
37 + sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
38 + sigfillset(&tsk->blocked);
40 + /*tsk->flags |= PF_KERNTHREAD;*/
42 + INIT_LIST_HEAD(&sbi->s_delete_list);
43 + wake_up(&sbi->s_delete_waiter_queue);
44 + ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
48 + wait_event_interruptible(sbi->s_delete_thread_queue,
49 + !list_empty(&sbi->s_delete_list) ||
50 + !test_opt(sb, ASYNCDEL));
51 + ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
52 + tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
54 + spin_lock(&sbi->s_delete_lock);
55 + if (list_empty(&sbi->s_delete_list)) {
56 + clear_opt(sbi->s_mount_opt, ASYNCDEL);
57 + memset(&sbi->s_delete_list, 0,
58 + sizeof(sbi->s_delete_list));
59 + spin_unlock(&sbi->s_delete_lock);
60 + ext3_debug("delete thread on %s exiting\n",
61 + kdevname(sb->s_dev));
62 + wake_up(&sbi->s_delete_waiter_queue);
66 + while (!list_empty(&sbi->s_delete_list)) {
67 + struct inode *inode=list_entry(sbi->s_delete_list.next,
68 + struct inode, i_dentry);
69 + unsigned long blocks = inode->i_blocks >>
70 + (inode->i_blkbits - 9);
72 + list_del_init(&inode->i_dentry);
73 + spin_unlock(&sbi->s_delete_lock);
74 + ext3_debug("%s delete ino %lu blk %lu\n",
75 + tsk->comm, inode->i_ino, blocks);
79 + spin_lock(&sbi->s_delete_lock);
80 + sbi->s_delete_blocks -= blocks;
81 + sbi->s_delete_inodes--;
83 + if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
84 + ext3_warning(sb, __FUNCTION__,
85 + "%lu blocks, %lu inodes on list?\n",
86 + sbi->s_delete_blocks,sbi->s_delete_inodes);
87 + sbi->s_delete_blocks = 0;
88 + sbi->s_delete_inodes = 0;
90 + spin_unlock(&sbi->s_delete_lock);
91 + wake_up(&sbi->s_delete_waiter_queue);
97 +static void ext3_start_delete_thread(struct super_block *sb)
99 + struct ext3_sb_info *sbi = EXT3_SB(sb);
102 + spin_lock_init(&sbi->s_delete_lock);
103 + init_waitqueue_head(&sbi->s_delete_thread_queue);
104 + init_waitqueue_head(&sbi->s_delete_waiter_queue);
106 + if (!test_opt(sb, ASYNCDEL))
109 + rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
111 + printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
114 + wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
117 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
119 + if (sbi->s_delete_list.next == 0) /* thread never started */
122 + clear_opt(sbi->s_mount_opt, ASYNCDEL);
123 + wake_up(&sbi->s_delete_thread_queue);
124 + wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
127 +/* Instead of playing games with the inode flags, destruction, etc we just
128 + * create a new inode locally and put it on a list for the truncate thread.
129 + * We need large parts of the inode struct in order to complete the
130 + * truncate and unlink, so we may as well just have a real inode to do it.
132 + * If we have any problem deferring the delete, just delete it right away.
133 + * If we defer it, we also mark how many blocks it would free, so that we
134 + * can keep the statfs data correct, and we know if we should sleep on the
135 + * delete thread when we run out of space.
137 +static void ext3_delete_inode_thread(struct inode *old_inode)
139 + struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
140 + struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
141 + struct inode *new_inode;
142 + unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
144 + if (is_bad_inode(old_inode)) {
145 + clear_inode(old_inode);
149 + if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
152 + /* We may want to delete the inode immediately and not defer it */
153 + if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
156 + /* We can't use the delete thread as-is during real orphan recovery,
157 + * as we add to the orphan list here, causing ext3_orphan_cleanup()
158 + * to loop endlessly. It would be nice to do so, but needs work.
160 + if (oei->i_state & EXT3_STATE_DELETE ||
161 + sbi->s_mount_state & EXT3_ORPHAN_FS) {
162 + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
163 + old_inode->i_ino, blocks);
167 + /* We can iget this inode again here, because our caller has unhashed
168 + * old_inode, so new_inode will be in a different inode struct.
170 + * We need to ensure that the i_orphan pointers in the other inodes
171 + * point at the new inode copy instead of the old one so the orphan
172 + * list doesn't get corrupted when the old orphan inode is freed.
174 + down(&sbi->s_orphan_lock);
176 + sbi->s_mount_state |= EXT3_ORPHAN_FS;
177 + new_inode = iget(old_inode->i_sb, old_inode->i_ino);
178 + sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
179 + if (is_bad_inode(new_inode)) {
180 + printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
185 + up(&sbi->s_orphan_lock);
186 + ext3_debug("delete inode %lu directly (bad read)\n",
190 + J_ASSERT(new_inode != old_inode);
192 + J_ASSERT(!list_empty(&oei->i_orphan));
194 + nei = EXT3_I(new_inode);
195 + /* Ugh. We need to insert new_inode into the same spot on the list
196 + * as old_inode was, to ensure the in-memory orphan list is still
197 + * in the same order as the on-disk orphan list (badness otherwise).
199 + nei->i_orphan = oei->i_orphan;
200 + nei->i_orphan.next->prev = &nei->i_orphan;
201 + nei->i_orphan.prev->next = &nei->i_orphan;
202 + nei->i_state |= EXT3_STATE_DELETE;
203 + up(&sbi->s_orphan_lock);
205 + clear_inode(old_inode);
207 + spin_lock(&sbi->s_delete_lock);
208 + J_ASSERT(list_empty(&new_inode->i_dentry));
209 + list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
210 + sbi->s_delete_blocks += blocks;
211 + sbi->s_delete_inodes++;
212 + spin_unlock(&sbi->s_delete_lock);
214 + ext3_debug("delete inode %lu (%lu blocks) by thread\n",
215 + new_inode->i_ino, blocks);
217 + wake_up(&sbi->s_delete_thread_queue);
221 + ext3_delete_inode(old_inode);
224 +#define ext3_start_delete_thread(sbi) do {} while(0)
225 +#define ext3_stop_delete_thread(sbi) do {} while(0)
226 +#endif /* EXT3_DELETE_THREAD */
228 void ext3_put_super (struct super_block * sb)
230 struct ext3_sb_info *sbi = EXT3_SB(sb);
231 @@ -407,6 +621,7 @@ void ext3_put_super (struct super_block
232 kdev_t j_dev = sbi->s_journal->j_dev;
235 + ext3_stop_delete_thread(sbi);
236 ext3_xattr_put_super(sb);
237 journal_destroy(sbi->s_journal);
238 if (!(sb->s_flags & MS_RDONLY)) {
239 @@ -455,7 +670,11 @@ static struct super_operations ext3_sops
240 write_inode: ext3_write_inode, /* BKL not held. Don't need */
241 dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */
242 put_inode: ext3_put_inode, /* BKL not held. Don't need */
243 +#ifdef EXT3_DELETE_THREAD
244 + delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */
246 delete_inode: ext3_delete_inode, /* BKL not held. We take it */
248 put_super: ext3_put_super, /* BKL held */
249 write_super: ext3_write_super, /* BKL held */
250 sync_fs: ext3_sync_fs,
251 @@ -524,6 +743,13 @@ static int parse_options (char * options
252 clear_opt (*mount_options, XATTR_USER);
255 +#ifdef EXT3_DELETE_THREAD
256 + if (!strcmp(this_char, "asyncdel"))
257 + set_opt(*mount_options, ASYNCDEL);
258 + else if (!strcmp(this_char, "noasyncdel"))
259 + clear_opt(*mount_options, ASYNCDEL);
262 if (!strcmp (this_char, "bsddf"))
263 clear_opt (*mount_options, MINIX_DF);
264 else if (!strcmp (this_char, "nouid32")) {
265 @@ -1223,6 +1449,7 @@ struct super_block * ext3_read_super (st
268 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
269 + ext3_start_delete_thread(sb);
271 * akpm: core read_super() calls in here with the superblock locked.
272 * That deadlocks, because orphan cleanup needs to lock the superblock
273 @@ -1678,6 +1905,9 @@ int ext3_remount (struct super_block * s
274 if (!parse_options(data, &tmp, sbi, &tmp, 1))
277 + if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
278 + ext3_stop_delete_thread(sbi);
280 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
281 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
283 --- linux/fs/ext3/inode.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:29 2003
284 +++ linux-mmonroe/fs/ext3/inode.c Thu Jul 10 14:11:33 2003
285 @@ -2013,6 +2013,118 @@ out_stop:
286 ext3_journal_stop(handle, inode);
289 +#ifdef EXT3_DELETE_THREAD
290 +/* Move blocks from to-be-truncated inode over to a new inode, and delete
291 + * that one from the delete thread instead. This avoids a lot of latency
292 + * when truncating large files.
294 + * If we have any problem deferring the truncate, just truncate it right away.
295 + * If we defer it, we also mark how many blocks it would free, so that we
296 + * can keep the statfs data correct, and we know if we should sleep on the
297 + * delete thread when we run out of space.
299 +void ext3_truncate_thread(struct inode *old_inode)
301 + struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
302 + struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
303 + struct inode *new_inode;
305 + unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
307 + if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
310 + /* XXX This is a temporary limitation for code simplicity.
311 + * We could truncate to arbitrary sizes at some later time.
313 + if (old_inode->i_size != 0)
316 + /* We may want to truncate the inode immediately and not defer it */
317 + if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
318 + old_inode->i_size > oei->i_disksize)
321 + /* We can't use the delete thread as-is during real orphan recovery,
322 + * as we add to the orphan list here, causing ext3_orphan_cleanup()
323 + * to loop endlessly. It would be nice to do so, but needs work.
325 + if (oei->i_state & EXT3_STATE_DELETE ||
326 + sbi->s_mount_state & EXT3_ORPHAN_FS) {
327 + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
328 + old_inode->i_ino, blocks);
332 + ext3_discard_prealloc(old_inode);
335 + * new_inode = sb + GDT + ibitmap
336 + * orphan list = 1 inode/superblock for add, 2 inodes for del
337 + * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
339 + handle = ext3_journal_start(old_inode, 7);
340 + if (IS_ERR(handle))
343 + new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
344 + if (IS_ERR(new_inode)) {
345 + ext3_debug("truncate inode %lu directly (no new inodes)\n",
350 + nei = EXT3_I(new_inode);
352 + down_write(&oei->truncate_sem);
353 + new_inode->i_size = old_inode->i_size;
354 + new_inode->i_blocks = old_inode->i_blocks;
355 + new_inode->i_uid = old_inode->i_uid;
356 + new_inode->i_gid = old_inode->i_gid;
357 + new_inode->i_nlink = 0;
359 + /* FIXME when we do arbitrary truncates */
360 + old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
361 + old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
363 + memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
364 + memset(oei->i_data, 0, sizeof(oei->i_data));
366 + nei->i_disksize = oei->i_disksize;
367 + nei->i_state |= EXT3_STATE_DELETE;
368 + up_write(&oei->truncate_sem);
370 + if (ext3_orphan_add(handle, new_inode) < 0)
373 + if (ext3_orphan_del(handle, old_inode) < 0) {
374 + ext3_orphan_del(handle, new_inode);
379 + ext3_journal_stop(handle, old_inode);
381 + spin_lock(&sbi->s_delete_lock);
382 + J_ASSERT(list_empty(&new_inode->i_dentry));
383 + list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
384 + sbi->s_delete_blocks += blocks;
385 + sbi->s_delete_inodes++;
386 + spin_unlock(&sbi->s_delete_lock);
388 + ext3_debug("delete inode %lu (%lu blocks) by thread\n",
389 + new_inode->i_ino, blocks);
391 + wake_up(&sbi->s_delete_thread_queue);
395 + ext3_journal_stop(handle, old_inode);
397 + ext3_truncate(old_inode);
399 +#endif /* EXT3_DELETE_THREAD */
402 * ext3_get_inode_loc returns with an extra refcount against the
403 * inode's underlying buffer_head on success.
404 --- linux/fs/ext3/file.c~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:21 2003
405 +++ linux-mmonroe/fs/ext3/file.c Thu Jul 10 14:12:17 2003
406 @@ -125,7 +125,11 @@ struct file_operations ext3_file_operati
409 struct inode_operations ext3_file_inode_operations = {
410 +#ifdef EXT3_DELETE_THREAD
411 + truncate: ext3_truncate_thread, /* BKL held */
413 truncate: ext3_truncate, /* BKL held */
415 setattr: ext3_setattr, /* BKL held */
416 setxattr: ext3_setxattr, /* BKL held */
417 getxattr: ext3_getxattr, /* BKL held */
418 --- linux/include/linux/ext3_fs.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:26 2003
419 +++ linux-mmonroe/include/linux/ext3_fs.h Thu Jul 10 14:11:33 2003
420 @@ -193,6 +193,7 @@ struct ext3_group_desc
422 #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
423 #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
424 +#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */
428 @@ -320,6 +321,7 @@ struct ext3_inode {
429 #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
430 #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
431 #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
432 +#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
434 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
435 #ifndef _LINUX_EXT2_FS_H
436 @@ -694,6 +696,9 @@ extern void ext3_discard_prealloc (struc
437 extern void ext3_dirty_inode(struct inode *);
438 extern int ext3_change_inode_journal_flag(struct inode *, int);
439 extern void ext3_truncate (struct inode *);
440 +#ifdef EXT3_DELETE_THREAD
441 +extern void ext3_truncate_thread(struct inode *inode);
445 extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
446 --- linux/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.20 Thu Jul 10 14:11:32 2003
447 +++ linux-mmonroe/include/linux/ext3_fs_sb.h Thu Jul 10 14:11:33 2003
450 #define EXT3_MAX_GROUP_LOADED 8
452 +#define EXT3_DELETE_THREAD
455 * third extended-fs super-block data in memory
457 @@ -76,6 +78,14 @@ struct ext3_sb_info {
458 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
459 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
461 +#ifdef EXT3_DELETE_THREAD
462 + spinlock_t s_delete_lock;
463 + struct list_head s_delete_list;
464 + unsigned long s_delete_blocks;
465 + unsigned long s_delete_inodes;
466 + wait_queue_head_t s_delete_thread_queue;
467 + wait_queue_head_t s_delete_waiter_queue;
471 #endif /* _LINUX_EXT3_FS_SB */