2 fs/ext3/inode.c | 116 ++++++++++++++++++++++
3 fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++
4 include/linux/ext3_fs.h | 5
5 include/linux/ext3_fs_sb.h | 10 +
6 5 files changed, 365 insertions(+)
8 Index: linux-2.4.20/fs/ext3/super.c
9 ===================================================================
10 --- linux-2.4.20.orig/fs/ext3/super.c 2004-01-12 20:13:37.000000000 +0300
11 +++ linux-2.4.20/fs/ext3/super.c 2004-01-13 16:59:54.000000000 +0300
13 static void ext3_clear_journal_err(struct super_block * sb,
14 struct ext3_super_block * es);
16 +static int ext3_sync_fs(struct super_block * sb);
18 #ifdef CONFIG_JBD_DEBUG
19 int journal_no_write[2];
25 +#ifdef EXT3_DELETE_THREAD
27 + * Delete inodes in a loop until there are no more to be deleted.
28 + * Normally, we run in the background doing the deletes and sleeping again,
29 + * and clients just add new inodes to be deleted onto the end of the list.
30 + * If someone is concerned about free space (e.g. block allocation or similar)
31 + * then they can sleep on s_delete_waiter_queue and be woken up when space
34 +int ext3_delete_thread(void *data)
36 + struct super_block *sb = data;
37 + struct ext3_sb_info *sbi = EXT3_SB(sb);
38 + struct task_struct *tsk = current;
40 + /* Almost like daemonize, but not quite */
45 + exit_files(current);
48 + sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
49 + sigfillset(&tsk->blocked);
51 + /*tsk->flags |= PF_KERNTHREAD;*/
53 + INIT_LIST_HEAD(&sbi->s_delete_list);
54 + wake_up(&sbi->s_delete_waiter_queue);
55 + ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
59 + wait_event_interruptible(sbi->s_delete_thread_queue,
60 + !list_empty(&sbi->s_delete_list) ||
61 + !test_opt(sb, ASYNCDEL));
62 + ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
63 + tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
65 + spin_lock(&sbi->s_delete_lock);
66 + if (list_empty(&sbi->s_delete_list)) {
67 + clear_opt(sbi->s_mount_opt, ASYNCDEL);
68 + memset(&sbi->s_delete_list, 0,
69 + sizeof(sbi->s_delete_list));
70 + spin_unlock(&sbi->s_delete_lock);
71 + ext3_debug("delete thread on %s exiting\n",
72 + kdevname(sb->s_dev));
73 + wake_up(&sbi->s_delete_waiter_queue);
77 + while (!list_empty(&sbi->s_delete_list)) {
78 + struct inode *inode=list_entry(sbi->s_delete_list.next,
79 + struct inode, i_dentry);
80 + unsigned long blocks = inode->i_blocks >>
81 + (inode->i_blkbits - 9);
83 + list_del_init(&inode->i_dentry);
84 + spin_unlock(&sbi->s_delete_lock);
85 + ext3_debug("%s delete ino %lu blk %lu\n",
86 + tsk->comm, inode->i_ino, blocks);
90 + spin_lock(&sbi->s_delete_lock);
91 + sbi->s_delete_blocks -= blocks;
92 + sbi->s_delete_inodes--;
94 + if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
95 + ext3_warning(sb, __FUNCTION__,
96 + "%lu blocks, %lu inodes on list?\n",
97 + sbi->s_delete_blocks,sbi->s_delete_inodes);
98 + sbi->s_delete_blocks = 0;
99 + sbi->s_delete_inodes = 0;
101 + spin_unlock(&sbi->s_delete_lock);
102 + wake_up(&sbi->s_delete_waiter_queue);
108 +static void ext3_start_delete_thread(struct super_block *sb)
110 + struct ext3_sb_info *sbi = EXT3_SB(sb);
113 + spin_lock_init(&sbi->s_delete_lock);
114 + init_waitqueue_head(&sbi->s_delete_thread_queue);
115 + init_waitqueue_head(&sbi->s_delete_waiter_queue);
117 + if (!test_opt(sb, ASYNCDEL))
120 + rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
122 + printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
125 + wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
128 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
130 + if (sbi->s_delete_list.next == 0) /* thread never started */
133 + clear_opt(sbi->s_mount_opt, ASYNCDEL);
134 + wake_up(&sbi->s_delete_thread_queue);
135 + wait_event(sbi->s_delete_waiter_queue,
136 + sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0);
139 +/* Instead of playing games with the inode flags, destruction, etc we just
140 + * create a new inode locally and put it on a list for the truncate thread.
141 + * We need large parts of the inode struct in order to complete the
142 + * truncate and unlink, so we may as well just have a real inode to do it.
144 + * If we have any problem deferring the delete, just delete it right away.
145 + * If we defer it, we also mark how many blocks it would free, so that we
146 + * can keep the statfs data correct, and we know if we should sleep on the
147 + * delete thread when we run out of space.
149 +static void ext3_delete_inode_thread(struct inode *old_inode)
151 + struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
152 + struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
153 + struct inode *new_inode;
154 + unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
156 + if (is_bad_inode(old_inode)) {
157 + clear_inode(old_inode);
161 + if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
164 + /* We may want to delete the inode immediately and not defer it */
165 + if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS)
168 + /* We can't use the delete thread as-is during real orphan recovery,
169 + * as we add to the orphan list here, causing ext3_orphan_cleanup()
170 + * to loop endlessly. It would be nice to do so, but needs work.
172 + if (oei->i_state & EXT3_STATE_DELETE ||
173 + sbi->s_mount_state & EXT3_ORPHAN_FS) {
174 + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
175 + old_inode->i_ino, blocks);
179 + /* We can iget this inode again here, because our caller has unhashed
180 + * old_inode, so new_inode will be in a different inode struct.
182 + * We need to ensure that the i_orphan pointers in the other inodes
183 + * point at the new inode copy instead of the old one so the orphan
184 + * list doesn't get corrupted when the old orphan inode is freed.
186 + down(&sbi->s_orphan_lock);
188 + sbi->s_mount_state |= EXT3_ORPHAN_FS;
189 + new_inode = iget(old_inode->i_sb, old_inode->i_ino);
190 + sbi->s_mount_state &= ~EXT3_ORPHAN_FS;
191 + if (is_bad_inode(new_inode)) {
192 + printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
197 + up(&sbi->s_orphan_lock);
198 + ext3_debug("delete inode %lu directly (bad read)\n",
202 + J_ASSERT(new_inode != old_inode);
204 + J_ASSERT(!list_empty(&oei->i_orphan));
206 + nei = EXT3_I(new_inode);
207 + /* Ugh. We need to insert new_inode into the same spot on the list
208 + * as old_inode was, to ensure the in-memory orphan list is still
209 + * in the same order as the on-disk orphan list (badness otherwise).
211 + nei->i_orphan = oei->i_orphan;
212 + nei->i_orphan.next->prev = &nei->i_orphan;
213 + nei->i_orphan.prev->next = &nei->i_orphan;
214 + nei->i_state |= EXT3_STATE_DELETE;
215 + up(&sbi->s_orphan_lock);
217 + clear_inode(old_inode);
219 + spin_lock(&sbi->s_delete_lock);
220 + J_ASSERT(list_empty(&new_inode->i_dentry));
221 + list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
222 + sbi->s_delete_blocks += blocks;
223 + sbi->s_delete_inodes++;
224 + spin_unlock(&sbi->s_delete_lock);
226 + ext3_debug("delete inode %lu (%lu blocks) by thread\n",
227 + new_inode->i_ino, blocks);
229 + wake_up(&sbi->s_delete_thread_queue);
233 + ext3_delete_inode(old_inode);
236 +#define ext3_start_delete_thread(sbi) do {} while(0)
237 +#define ext3_stop_delete_thread(sbi) do {} while(0)
238 +#endif /* EXT3_DELETE_THREAD */
240 void ext3_put_super (struct super_block * sb)
242 struct ext3_sb_info *sbi = EXT3_SB(sb);
244 kdev_t j_dev = sbi->s_journal->j_dev;
247 + J_ASSERT(sbi->s_delete_inodes == 0);
248 ext3_xattr_put_super(sb);
249 journal_destroy(sbi->s_journal);
250 if (!(sb->s_flags & MS_RDONLY)) {
252 write_inode: ext3_write_inode, /* BKL not held. Don't need */
253 dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */
254 put_inode: ext3_put_inode, /* BKL not held. Don't need */
255 +#ifdef EXT3_DELETE_THREAD
256 + delete_inode: ext3_delete_inode_thread,/* BKL not held. We take it */
258 delete_inode: ext3_delete_inode, /* BKL not held. We take it */
260 put_super: ext3_put_super, /* BKL held */
261 write_super: ext3_write_super, /* BKL held */
262 + sync_fs: ext3_sync_fs,
263 write_super_lockfs: ext3_write_super_lockfs, /* BKL not held. Take it */
264 unlockfs: ext3_unlockfs, /* BKL not held. We take it */
265 statfs: ext3_statfs, /* BKL held */
267 clear_opt (*mount_options, XATTR_USER);
270 +#ifdef EXT3_DELETE_THREAD
271 + if (!strcmp(this_char, "asyncdel"))
272 + set_opt(*mount_options, ASYNCDEL);
273 + else if (!strcmp(this_char, "noasyncdel"))
274 + clear_opt(*mount_options, ASYNCDEL);
277 if (!strcmp (this_char, "bsddf"))
278 clear_opt (*mount_options, MINIX_DF);
279 else if (!strcmp (this_char, "nouid32")) {
280 @@ -1220,6 +1450,7 @@
283 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
284 + ext3_start_delete_thread(sb);
286 * akpm: core read_super() calls in here with the superblock locked.
287 * That deadlocks, because orphan cleanup needs to lock the superblock
288 @@ -1625,6 +1856,21 @@
292 +static int ext3_sync_fs(struct super_block *sb)
296 + if (atomic_read(&sb->s_active) == 0) {
297 + /* fs is being umounted: time to stop delete thread */
298 + ext3_stop_delete_thread(EXT3_SB(sb));
302 + target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
303 + log_wait_commit(EXT3_SB(sb)->s_journal, target);
308 * LVM calls this function before a (read-only) snapshot is created. This
309 * gives us a chance to flush the journal completely and mark the fs clean.
310 @@ -1682,6 +1928,9 @@
311 if (!parse_options(data, &tmp, sbi, &tmp, 1))
314 + if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
315 + ext3_stop_delete_thread(sbi);
317 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
318 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
320 Index: linux-2.4.20/fs/ext3/inode.c
321 ===================================================================
322 --- linux-2.4.20.orig/fs/ext3/inode.c 2004-01-12 20:13:37.000000000 +0300
323 +++ linux-2.4.20/fs/ext3/inode.c 2004-01-13 16:55:45.000000000 +0300
324 @@ -2552,6 +2552,118 @@
328 +#ifdef EXT3_DELETE_THREAD
329 +/* Move blocks from to-be-truncated inode over to a new inode, and delete
330 + * that one from the delete thread instead. This avoids a lot of latency
331 + * when truncating large files.
333 + * If we have any problem deferring the truncate, just truncate it right away.
334 + * If we defer it, we also mark how many blocks it would free, so that we
335 + * can keep the statfs data correct, and we know if we should sleep on the
336 + * delete thread when we run out of space.
338 +void ext3_truncate_thread(struct inode *old_inode)
340 + struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
341 + struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
342 + struct inode *new_inode;
344 + unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
346 + if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
349 + /* XXX This is a temporary limitation for code simplicity.
350 + * We could truncate to arbitrary sizes at some later time.
352 + if (old_inode->i_size != 0)
355 + /* We may want to truncate the inode immediately and not defer it */
356 + if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
357 + old_inode->i_size > oei->i_disksize)
360 + /* We can't use the delete thread as-is during real orphan recovery,
361 + * as we add to the orphan list here, causing ext3_orphan_cleanup()
362 + * to loop endlessly. It would be nice to do so, but needs work.
364 + if (oei->i_state & EXT3_STATE_DELETE ||
365 + sbi->s_mount_state & EXT3_ORPHAN_FS) {
366 + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
367 + old_inode->i_ino, blocks);
371 + ext3_discard_prealloc(old_inode);
374 + * new_inode = sb + GDT + ibitmap
375 + * orphan list = 1 inode/superblock for add, 2 inodes for del
376 + * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
378 + handle = ext3_journal_start(old_inode, 7);
379 + if (IS_ERR(handle))
382 + new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
383 + if (IS_ERR(new_inode)) {
384 + ext3_debug("truncate inode %lu directly (no new inodes)\n",
389 + nei = EXT3_I(new_inode);
391 + down_write(&oei->truncate_sem);
392 + new_inode->i_size = old_inode->i_size;
393 + new_inode->i_blocks = old_inode->i_blocks;
394 + new_inode->i_uid = old_inode->i_uid;
395 + new_inode->i_gid = old_inode->i_gid;
396 + new_inode->i_nlink = 0;
398 + /* FIXME when we do arbitrary truncates */
399 + old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
400 + old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
402 + memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
403 + memset(oei->i_data, 0, sizeof(oei->i_data));
405 + nei->i_disksize = oei->i_disksize;
406 + nei->i_state |= EXT3_STATE_DELETE;
407 + up_write(&oei->truncate_sem);
409 + if (ext3_orphan_add(handle, new_inode) < 0)
412 + if (ext3_orphan_del(handle, old_inode) < 0) {
413 + ext3_orphan_del(handle, new_inode);
418 + ext3_journal_stop(handle, old_inode);
420 + spin_lock(&sbi->s_delete_lock);
421 + J_ASSERT(list_empty(&new_inode->i_dentry));
422 + list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
423 + sbi->s_delete_blocks += blocks;
424 + sbi->s_delete_inodes++;
425 + spin_unlock(&sbi->s_delete_lock);
427 + ext3_debug("delete inode %lu (%lu blocks) by thread\n",
428 + new_inode->i_ino, blocks);
430 + wake_up(&sbi->s_delete_thread_queue);
434 + ext3_journal_stop(handle, old_inode);
436 + ext3_truncate(old_inode);
438 +#endif /* EXT3_DELETE_THREAD */
441 * On success, We end up with an outstanding reference count against
442 * iloc->bh. This _must_ be cleaned up later.
443 Index: linux-2.4.20/fs/ext3/file.c
444 ===================================================================
445 --- linux-2.4.20.orig/fs/ext3/file.c 2004-01-12 20:13:36.000000000 +0300
446 +++ linux-2.4.20/fs/ext3/file.c 2004-01-13 16:55:45.000000000 +0300
450 struct inode_operations ext3_file_inode_operations = {
451 +#ifdef EXT3_DELETE_THREAD
452 + truncate: ext3_truncate_thread, /* BKL held */
454 truncate: ext3_truncate, /* BKL held */
456 setattr: ext3_setattr, /* BKL held */
457 setxattr: ext3_setxattr, /* BKL held */
458 getxattr: ext3_getxattr, /* BKL held */
459 Index: linux-2.4.20/fs/buffer.c
460 ===================================================================
461 --- linux-2.4.20.orig/fs/buffer.c 2003-05-16 05:29:12.000000000 +0400
462 +++ linux-2.4.20/fs/buffer.c 2004-01-13 16:55:45.000000000 +0300
464 if (sb->s_dirt && sb->s_op && sb->s_op->write_super)
465 sb->s_op->write_super(sb);
467 + if (sb->s_op && sb->s_op->sync_fs)
468 + sb->s_op->sync_fs(sb);
471 return sync_buffers(dev, 1);
472 Index: linux-2.4.20/include/linux/ext3_fs.h
473 ===================================================================
474 --- linux-2.4.20.orig/include/linux/ext3_fs.h 2004-01-12 20:13:37.000000000 +0300
475 +++ linux-2.4.20/include/linux/ext3_fs.h 2004-01-13 16:55:45.000000000 +0300
478 #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
479 #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
480 +#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */
485 #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
486 #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
487 #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
488 +#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
490 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
491 #ifndef _LINUX_EXT2_FS_H
493 extern void ext3_dirty_inode(struct inode *);
494 extern int ext3_change_inode_journal_flag(struct inode *, int);
495 extern void ext3_truncate (struct inode *);
496 +#ifdef EXT3_DELETE_THREAD
497 +extern void ext3_truncate_thread(struct inode *inode);
501 extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
502 Index: linux-2.4.20/include/linux/ext3_fs_sb.h
503 ===================================================================
504 --- linux-2.4.20.orig/include/linux/ext3_fs_sb.h 2004-01-12 20:13:37.000000000 +0300
505 +++ linux-2.4.20/include/linux/ext3_fs_sb.h 2004-01-13 16:55:45.000000000 +0300
508 #define EXT3_MAX_GROUP_LOADED 8
510 +#define EXT3_DELETE_THREAD
513 * third extended-fs super-block data in memory
516 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
517 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
519 +#ifdef EXT3_DELETE_THREAD
520 + spinlock_t s_delete_lock;
521 + struct list_head s_delete_list;
522 + unsigned long s_delete_blocks;
523 + unsigned long s_delete_inodes;
524 + wait_queue_head_t s_delete_thread_queue;
525 + wait_queue_head_t s_delete_waiter_queue;
529 #endif /* _LINUX_EXT3_FS_SB */
530 Index: linux-2.4.20/include/linux/fs.h
531 ===================================================================
532 --- linux-2.4.20.orig/include/linux/fs.h 2004-01-12 20:13:36.000000000 +0300
533 +++ linux-2.4.20/include/linux/fs.h 2004-01-13 16:55:45.000000000 +0300
535 void (*delete_inode) (struct inode *);
536 void (*put_super) (struct super_block *);
537 void (*write_super) (struct super_block *);
538 + int (*sync_fs) (struct super_block *);
539 void (*write_super_lockfs) (struct super_block *);
540 void (*unlockfs) (struct super_block *);
541 int (*statfs) (struct super_block *, struct statfs *);