2 fs/ext3/inode.c | 116 ++++++++++++++++++++++
3 fs/ext3/super.c | 230 +++++++++++++++++++++++++++++++++++++++++++++
4 include/linux/ext3_fs.h | 5
5 include/linux/ext3_fs_sb.h | 10 +
6 5 files changed, 365 insertions(+)
8 Index: linux-2.4.24/fs/ext3/super.c
9 ===================================================================
10 --- linux-2.4.24.orig/fs/ext3/super.c 2004-01-12 20:36:31.000000000 +0300
11 +++ linux-2.4.24/fs/ext3/super.c 2004-01-13 16:27:43.000000000 +0300
16 +#ifdef EXT3_DELETE_THREAD
18 + * Delete inodes in a loop until there are no more to be deleted.
19 + * Normally, we run in the background doing the deletes and sleeping again,
20 + * and clients just add new inodes to be deleted onto the end of the list.
21 + * If someone is concerned about free space (e.g. block allocation or similar)
22 + * then they can sleep on s_delete_waiter_queue and be woken up when space
25 +int ext3_delete_thread(void *data)
27 + struct super_block *sb = data;
28 + struct ext3_sb_info *sbi = EXT3_SB(sb);
29 + struct task_struct *tsk = current;
30 + struct inode *inode;
31 + unsigned long blocks;
33 + /* Almost like daemonize, but not quite */
38 + exit_files(current);
41 + sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
42 + sigfillset(&tsk->blocked);
44 + /*tsk->flags |= PF_KERNTHREAD;*/
46 + INIT_LIST_HEAD(&sbi->s_delete_list);
47 + wake_up(&sbi->s_delete_waiter_queue);
48 + ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
52 + wait_event_interruptible(sbi->s_delete_thread_queue,
53 + !list_empty(&sbi->s_delete_list) ||
54 + !test_opt(sb, ASYNCDEL));
55 + ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
56 + tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
58 + spin_lock(&sbi->s_delete_lock);
59 + if (list_empty(&sbi->s_delete_list)) {
60 + clear_opt(sbi->s_mount_opt, ASYNCDEL);
61 + memset(&sbi->s_delete_list, 0,
62 + sizeof(sbi->s_delete_list));
63 + spin_unlock(&sbi->s_delete_lock);
64 + ext3_debug("delete thread on %s exiting\n",
65 + kdevname(sb->s_dev));
66 + wake_up(&sbi->s_delete_waiter_queue);
70 + while (!list_empty(&sbi->s_delete_list)) {
71 + inode = list_entry(sbi->s_delete_list.next,
72 + struct inode, i_devices);
73 + blocks = inode->i_blocks >> (inode->i_blkbits - 9);
75 + list_del_init(&inode->i_devices);
76 + spin_unlock(&sbi->s_delete_lock);
77 + ext3_debug("%s delete ino %lu blk %lu\n",
78 + tsk->comm, inode->i_ino, blocks);
80 + J_ASSERT(EXT3_I(inode)->i_state & EXT3_STATE_DELETE);
81 + J_ASSERT(inode->i_nlink == 1);
85 + spin_lock(&sbi->s_delete_lock);
86 + sbi->s_delete_blocks -= blocks;
87 + sbi->s_delete_inodes--;
89 + if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
90 + ext3_warning(sb, __FUNCTION__,
91 + "%lu blocks, %lu inodes on list?\n",
92 + sbi->s_delete_blocks,sbi->s_delete_inodes);
93 + sbi->s_delete_blocks = 0;
94 + sbi->s_delete_inodes = 0;
96 + spin_unlock(&sbi->s_delete_lock);
97 + wake_up(&sbi->s_delete_waiter_queue);
103 +static void ext3_start_delete_thread(struct super_block *sb)
105 + struct ext3_sb_info *sbi = EXT3_SB(sb);
108 + spin_lock_init(&sbi->s_delete_lock);
109 + init_waitqueue_head(&sbi->s_delete_thread_queue);
110 + init_waitqueue_head(&sbi->s_delete_waiter_queue);
112 + if (!test_opt(sb, ASYNCDEL))
115 + rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
117 + printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
120 + wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
123 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
125 + if (sbi->s_delete_list.next == 0) /* thread never started */
128 + clear_opt(sbi->s_mount_opt, ASYNCDEL);
129 + wake_up(&sbi->s_delete_thread_queue);
130 + wait_event(sbi->s_delete_waiter_queue,
131 + sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0);
134 +#define ext3_start_delete_thread(sbi) do {} while(0)
135 +#define ext3_stop_delete_thread(sbi) do {} while(0)
136 +#endif /* EXT3_DELETE_THREAD */
138 void ext3_put_super (struct super_block * sb)
140 struct ext3_sb_info *sbi = EXT3_SB(sb);
142 kdev_t j_dev = sbi->s_journal->j_dev;
145 + J_ASSERT(sbi->s_delete_inodes == 0);
146 ext3_xattr_put_super(sb);
147 journal_destroy(sbi->s_journal);
148 if (!(sb->s_flags & MS_RDONLY)) {
150 clear_opt (*mount_options, XATTR_USER);
153 +#ifdef EXT3_DELETE_THREAD
154 + if (!strcmp(this_char, "asyncdel"))
155 + set_opt(*mount_options, ASYNCDEL);
156 + else if (!strcmp(this_char, "noasyncdel"))
157 + clear_opt(*mount_options, ASYNCDEL);
160 if (!strcmp (this_char, "bsddf"))
161 clear_opt (*mount_options, MINIX_DF);
162 else if (!strcmp (this_char, "nouid32")) {
163 @@ -1227,6 +1357,7 @@
166 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
167 + ext3_start_delete_thread(sb);
169 * akpm: core read_super() calls in here with the superblock locked.
170 * That deadlocks, because orphan cleanup needs to lock the superblock
171 @@ -1618,7 +1749,12 @@
172 static int ext3_sync_fs(struct super_block *sb)
177 + if (atomic_read(&sb->s_active) == 0) {
178 + /* fs is being umounted: time to stop delete thread */
179 + ext3_stop_delete_thread(EXT3_SB(sb));
183 target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
184 log_wait_commit(EXT3_SB(sb)->s_journal, target);
185 @@ -1682,6 +1818,9 @@
186 if (!parse_options(data, &tmp, sbi, &tmp, 1))
189 + if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
190 + ext3_stop_delete_thread(sbi);
192 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
193 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
195 Index: linux-2.4.24/fs/ext3/inode.c
196 ===================================================================
197 --- linux-2.4.24.orig/fs/ext3/inode.c 2004-01-12 20:36:31.000000000 +0300
198 +++ linux-2.4.24/fs/ext3/inode.c 2004-01-12 20:36:32.000000000 +0300
199 @@ -2551,6 +2551,118 @@
203 +#ifdef EXT3_DELETE_THREAD
204 +/* Move blocks from to-be-truncated inode over to a new inode, and delete
205 + * that one from the delete thread instead. This avoids a lot of latency
206 + * when truncating large files.
208 + * If we have any problem deferring the truncate, just truncate it right away.
209 + * If we defer it, we also mark how many blocks it would free, so that we
210 + * can keep the statfs data correct, and we know if we should sleep on the
211 + * delete thread when we run out of space.
213 +void ext3_truncate_thread(struct inode *old_inode)
215 + struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
216 + struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
217 + struct inode *new_inode;
219 + unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
221 + if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
224 + /* XXX This is a temporary limitation for code simplicity.
225 + * We could truncate to arbitrary sizes at some later time.
227 + if (old_inode->i_size != 0)
230 + /* We may want to truncate the inode immediately and not defer it */
231 + if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
232 + old_inode->i_size > oei->i_disksize)
235 + /* We can't use the delete thread as-is during real orphan recovery,
236 + * as we add to the orphan list here, causing ext3_orphan_cleanup()
237 + * to loop endlessly. It would be nice to do so, but needs work.
239 + if (oei->i_state & EXT3_STATE_DELETE ||
240 + sbi->s_mount_state & EXT3_ORPHAN_FS) {
241 + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
242 + old_inode->i_ino, blocks);
246 + ext3_discard_prealloc(old_inode);
249 + * new_inode = sb + GDT + ibitmap
250 + * orphan list = 1 inode/superblock for add, 2 inodes for del
251 + * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
253 + handle = ext3_journal_start(old_inode, 7);
254 + if (IS_ERR(handle))
257 + new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
258 + if (IS_ERR(new_inode)) {
259 + ext3_debug("truncate inode %lu directly (no new inodes)\n",
264 + nei = EXT3_I(new_inode);
266 + down_write(&oei->truncate_sem);
267 + new_inode->i_size = old_inode->i_size;
268 + new_inode->i_blocks = old_inode->i_blocks;
269 + new_inode->i_uid = old_inode->i_uid;
270 + new_inode->i_gid = old_inode->i_gid;
271 + new_inode->i_nlink = 1;
273 + /* FIXME when we do arbitrary truncates */
274 + old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
275 + old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
277 + memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
278 + memset(oei->i_data, 0, sizeof(oei->i_data));
280 + nei->i_disksize = oei->i_disksize;
281 + nei->i_state |= EXT3_STATE_DELETE;
282 + up_write(&oei->truncate_sem);
284 + if (ext3_orphan_add(handle, new_inode) < 0)
287 + if (ext3_orphan_del(handle, old_inode) < 0) {
288 + ext3_orphan_del(handle, new_inode);
293 + ext3_journal_stop(handle, old_inode);
295 + spin_lock(&sbi->s_delete_lock);
296 + J_ASSERT(list_empty(&new_inode->i_devices));
297 + list_add_tail(&new_inode->i_devices, &sbi->s_delete_list);
298 + sbi->s_delete_blocks += blocks;
299 + sbi->s_delete_inodes++;
300 + spin_unlock(&sbi->s_delete_lock);
302 + ext3_debug("delete inode %lu (%lu blocks) by thread\n",
303 + new_inode->i_ino, blocks);
305 + wake_up(&sbi->s_delete_thread_queue);
309 + ext3_journal_stop(handle, old_inode);
311 + ext3_truncate(old_inode);
313 +#endif /* EXT3_DELETE_THREAD */
316 * On success, We end up with an outstanding reference count against
317 * iloc->bh. This _must_ be cleaned up later.
318 Index: linux-2.4.24/fs/ext3/file.c
319 ===================================================================
320 --- linux-2.4.24.orig/fs/ext3/file.c 2004-01-12 20:36:29.000000000 +0300
321 +++ linux-2.4.24/fs/ext3/file.c 2004-01-12 20:36:32.000000000 +0300
325 struct inode_operations ext3_file_inode_operations = {
326 +#ifdef EXT3_DELETE_THREAD
327 + truncate: ext3_truncate_thread, /* BKL held */
329 truncate: ext3_truncate, /* BKL held */
331 setattr: ext3_setattr, /* BKL held */
332 setxattr: ext3_setxattr, /* BKL held */
333 getxattr: ext3_getxattr, /* BKL held */
334 Index: linux-2.4.24/fs/ext3/namei.c
335 ===================================================================
336 --- linux-2.4.24.orig/fs/ext3/namei.c 2004-01-12 20:36:31.000000000 +0300
337 +++ linux-2.4.24/fs/ext3/namei.c 2004-01-12 20:36:32.000000000 +0300
338 @@ -1936,6 +1936,36 @@
342 +static int ext3_try_to_delay_deletion(struct inode *inode)
344 + struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb);
345 + struct ext3_inode_info *ei = EXT3_I(inode);
346 + unsigned long blocks;
348 + if (!test_opt(inode->i_sb, ASYNCDEL))
351 + /* We may want to delete the inode immediately and not defer it */
352 + blocks = inode->i_blocks >> (inode->i_blkbits - 9);
353 + if (IS_SYNC(inode) || blocks <= EXT3_NDIR_BLOCKS)
356 + inode->i_nlink = 1;
357 + atomic_inc(&inode->i_count);
358 + ei->i_state |= EXT3_STATE_DELETE;
360 + spin_lock(&sbi->s_delete_lock);
361 + J_ASSERT(list_empty(&inode->i_devices));
362 + list_add_tail(&inode->i_devices, &sbi->s_delete_list);
363 + sbi->s_delete_blocks += blocks;
364 + sbi->s_delete_inodes++;
365 + spin_unlock(&sbi->s_delete_lock);
367 + wake_up(&sbi->s_delete_thread_queue);
372 static int ext3_unlink(struct inode * dir, struct dentry *dentry)
375 @@ -1977,8 +2007,10 @@
376 ext3_update_dx_flag(dir);
377 ext3_mark_inode_dirty(handle, dir);
379 - if (!inode->i_nlink)
380 + if (!inode->i_nlink) {
381 + ext3_try_to_delay_deletion(inode);
382 ext3_orphan_add(handle, inode);
384 inode->i_ctime = dir->i_ctime;
385 ext3_mark_inode_dirty(handle, inode);
387 Index: linux-2.4.24/include/linux/ext3_fs.h
388 ===================================================================
389 --- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-01-12 20:36:31.000000000 +0300
390 +++ linux-2.4.24/include/linux/ext3_fs.h 2004-01-12 20:36:32.000000000 +0300
393 #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
394 #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
395 +#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */
400 #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
401 #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
402 #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
403 +#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
405 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
406 #ifndef _LINUX_EXT2_FS_H
408 extern void ext3_dirty_inode(struct inode *);
409 extern int ext3_change_inode_journal_flag(struct inode *, int);
410 extern void ext3_truncate (struct inode *);
411 +#ifdef EXT3_DELETE_THREAD
412 +extern void ext3_truncate_thread(struct inode *inode);
414 extern void ext3_set_inode_flags(struct inode *);
417 Index: linux-2.4.24/include/linux/ext3_fs_sb.h
418 ===================================================================
419 --- linux-2.4.24.orig/include/linux/ext3_fs_sb.h 2004-01-12 20:36:31.000000000 +0300
420 +++ linux-2.4.24/include/linux/ext3_fs_sb.h 2004-01-12 20:36:32.000000000 +0300
423 #define EXT3_MAX_GROUP_LOADED 8
425 +#define EXT3_DELETE_THREAD
428 * third extended-fs super-block data in memory
431 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
432 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
434 +#ifdef EXT3_DELETE_THREAD
435 + spinlock_t s_delete_lock;
436 + struct list_head s_delete_list;
437 + unsigned long s_delete_blocks;
438 + unsigned long s_delete_inodes;
439 + wait_queue_head_t s_delete_thread_queue;
440 + wait_queue_head_t s_delete_waiter_queue;
444 #endif /* _LINUX_EXT3_FS_SB */