1 Index: linux-2.4.29/fs/ext3/super.c
2 ===================================================================
3 --- linux-2.4.29.orig/fs/ext3/super.c 2005-05-03 15:53:33.047533872 +0300
4 +++ linux-2.4.29/fs/ext3/super.c 2005-05-03 15:54:47.192262160 +0300
9 +#ifdef EXT3_DELETE_THREAD
11 + * Delete inodes in a loop until there are no more to be deleted.
12 + * Normally, we run in the background doing the deletes and sleeping again,
13 + * and clients just add new inodes to be deleted onto the end of the list.
14 + * If someone is concerned about free space (e.g. block allocation or similar)
15 + * then they can sleep on s_delete_waiter_queue and be woken up when space
18 +int ext3_delete_thread(void *data)
20 + struct super_block *sb = data;
21 + struct ext3_sb_info *sbi = EXT3_SB(sb);
22 + struct task_struct *tsk = current;
24 + /* Almost like daemonize, but not quite */
29 + exit_files(current);
32 + sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
33 + sigfillset(&tsk->blocked);
35 + /*tsk->flags |= PF_KERNTHREAD;*/
37 + INIT_LIST_HEAD(&sbi->s_delete_list);
38 + wake_up(&sbi->s_delete_waiter_queue);
39 + ext3_debug("delete thread on %s started\n", kdevname(sb->s_dev));
43 + wait_event_interruptible(sbi->s_delete_thread_queue,
44 + !list_empty(&sbi->s_delete_list) ||
45 + !test_opt(sb, ASYNCDEL));
46 + ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
47 + tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
49 + spin_lock(&sbi->s_delete_lock);
50 + if (list_empty(&sbi->s_delete_list)) {
51 + clear_opt(sbi->s_mount_opt, ASYNCDEL);
52 + memset(&sbi->s_delete_list, 0,
53 + sizeof(sbi->s_delete_list));
54 + spin_unlock(&sbi->s_delete_lock);
55 + ext3_debug("delete thread on %s exiting\n",
56 + kdevname(sb->s_dev));
57 + wake_up(&sbi->s_delete_waiter_queue);
61 + while (!list_empty(&sbi->s_delete_list)) {
62 + struct inode *inode=list_entry(sbi->s_delete_list.next,
63 + struct inode, i_devices);
64 + unsigned long blocks = inode->i_blocks >>
65 + (inode->i_blkbits - 9);
67 + list_del_init(&inode->i_devices);
68 + spin_unlock(&sbi->s_delete_lock);
69 + ext3_debug("%s delete ino %lu blk %lu\n",
70 + tsk->comm, inode->i_ino, blocks);
72 + J_ASSERT(EXT3_I(inode)->i_state & EXT3_STATE_DELETE);
73 + J_ASSERT(inode->i_nlink == 1);
77 + spin_lock(&sbi->s_delete_lock);
78 + sbi->s_delete_blocks -= blocks;
79 + sbi->s_delete_inodes--;
81 + if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0) {
82 + ext3_warning(sb, __FUNCTION__,
83 + "%lu blocks, %lu inodes on list?\n",
84 + sbi->s_delete_blocks,sbi->s_delete_inodes);
85 + sbi->s_delete_blocks = 0;
86 + sbi->s_delete_inodes = 0;
88 + spin_unlock(&sbi->s_delete_lock);
89 + wake_up(&sbi->s_delete_waiter_queue);
95 +static void ext3_start_delete_thread(struct super_block *sb)
97 + struct ext3_sb_info *sbi = EXT3_SB(sb);
100 + spin_lock_init(&sbi->s_delete_lock);
101 + init_waitqueue_head(&sbi->s_delete_thread_queue);
102 + init_waitqueue_head(&sbi->s_delete_waiter_queue);
104 + if (!test_opt(sb, ASYNCDEL))
107 + rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
109 + printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
112 + wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
115 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
117 + if (sbi->s_delete_list.next == 0) /* thread never started */
120 + clear_opt(sbi->s_mount_opt, ASYNCDEL);
121 + wake_up(&sbi->s_delete_thread_queue);
122 + wait_event(sbi->s_delete_waiter_queue,
123 + sbi->s_delete_list.next == 0 && sbi->s_delete_inodes == 0);
126 +#define ext3_start_delete_thread(sbi) do {} while(0)
127 +#define ext3_stop_delete_thread(sbi) do {} while(0)
128 +#endif /* EXT3_DELETE_THREAD */
130 void ext3_put_super (struct super_block * sb)
132 struct ext3_sb_info *sbi = EXT3_SB(sb);
134 kdev_t j_dev = sbi->s_journal->j_dev;
137 +#ifdef EXT3_DELETE_THREAD
138 + J_ASSERT(sbi->s_delete_inodes == 0);
140 ext3_xattr_put_super(sb);
141 journal_destroy(sbi->s_journal);
142 if (!(sb->s_flags & MS_RDONLY)) {
144 clear_opt (*mount_options, XATTR_USER);
147 +#ifdef EXT3_DELETE_THREAD
148 + if (!strcmp(this_char, "asyncdel"))
149 + set_opt(*mount_options, ASYNCDEL);
150 + else if (!strcmp(this_char, "noasyncdel"))
151 + clear_opt(*mount_options, ASYNCDEL);
154 if (!strcmp (this_char, "bsddf"))
155 clear_opt (*mount_options, MINIX_DF);
156 else if (!strcmp (this_char, "nouid32")) {
157 @@ -1244,6 +1375,7 @@
160 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
161 + ext3_start_delete_thread(sb);
162 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
163 ext3_orphan_cleanup(sb, es);
164 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
165 @@ -1626,7 +1758,12 @@
166 static int ext3_sync_fs(struct super_block *sb)
171 + if (atomic_read(&sb->s_active) == 0) {
172 + /* fs is being umounted: time to stop delete thread */
173 + ext3_stop_delete_thread(EXT3_SB(sb));
177 target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
178 log_wait_commit(EXT3_SB(sb)->s_journal, target);
179 @@ -1690,6 +1827,9 @@
180 if (!parse_options(data, &tmp, sbi, &tmp, 1))
183 + if (!test_opt(sb, ASYNCDEL) || (*flags & MS_RDONLY))
184 + ext3_stop_delete_thread(sbi);
186 if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
187 ext3_abort(sb, __FUNCTION__, "Abort forced by user");
189 Index: linux-2.4.29/fs/ext3/inode.c
190 ===================================================================
191 --- linux-2.4.29.orig/fs/ext3/inode.c 2005-05-03 15:53:36.555000656 +0300
192 +++ linux-2.4.29/fs/ext3/inode.c 2005-05-03 15:53:56.901907456 +0300
193 @@ -2562,6 +2562,118 @@
197 +#ifdef EXT3_DELETE_THREAD
198 +/* Move blocks from to-be-truncated inode over to a new inode, and delete
199 + * that one from the delete thread instead. This avoids a lot of latency
200 + * when truncating large files.
202 + * If we have any problem deferring the truncate, just truncate it right away.
203 + * If we defer it, we also mark how many blocks it would free, so that we
204 + * can keep the statfs data correct, and we know if we should sleep on the
205 + * delete thread when we run out of space.
207 +void ext3_truncate_thread(struct inode *old_inode)
209 + struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
210 + struct ext3_inode_info *nei, *oei = EXT3_I(old_inode);
211 + struct inode *new_inode;
213 + unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
215 + if (!test_opt(old_inode->i_sb, ASYNCDEL) || !sbi->s_delete_list.next)
218 + /* XXX This is a temporary limitation for code simplicity.
219 + * We could truncate to arbitrary sizes at some later time.
221 + if (old_inode->i_size != 0)
224 + /* We may want to truncate the inode immediately and not defer it */
225 + if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
226 + old_inode->i_size > oei->i_disksize)
229 + /* We can't use the delete thread as-is during real orphan recovery,
230 + * as we add to the orphan list here, causing ext3_orphan_cleanup()
231 + * to loop endlessly. It would be nice to do so, but needs work.
233 + if (oei->i_state & EXT3_STATE_DELETE ||
234 + sbi->s_mount_state & EXT3_ORPHAN_FS) {
235 + ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
236 + old_inode->i_ino, blocks);
240 + ext3_discard_prealloc(old_inode);
243 + * new_inode = sb + GDT + ibitmap
244 + * orphan list = 1 inode/superblock for add, 2 inodes for del
245 + * quota files = 2 * EXT3_SINGLEDATA_TRANS_BLOCKS
247 + handle = ext3_journal_start(old_inode, 7);
248 + if (IS_ERR(handle))
251 + new_inode = ext3_new_inode(handle, old_inode, old_inode->i_mode);
252 + if (IS_ERR(new_inode)) {
253 + ext3_debug("truncate inode %lu directly (no new inodes)\n",
258 + nei = EXT3_I(new_inode);
260 + down_write(&oei->truncate_sem);
261 + new_inode->i_size = old_inode->i_size;
262 + new_inode->i_blocks = old_inode->i_blocks;
263 + new_inode->i_uid = old_inode->i_uid;
264 + new_inode->i_gid = old_inode->i_gid;
265 + new_inode->i_nlink = 1;
267 + /* FIXME when we do arbitrary truncates */
268 + old_inode->i_blocks = oei->i_file_acl ? old_inode->i_blksize / 512 : 0;
269 + old_inode->i_mtime = old_inode->i_ctime = CURRENT_TIME;
271 + memcpy(nei->i_data, oei->i_data, sizeof(nei->i_data));
272 + memset(oei->i_data, 0, sizeof(oei->i_data));
274 + nei->i_disksize = oei->i_disksize;
275 + nei->i_state |= EXT3_STATE_DELETE;
276 + up_write(&oei->truncate_sem);
278 + if (ext3_orphan_add(handle, new_inode) < 0)
281 + if (ext3_orphan_del(handle, old_inode) < 0) {
282 + ext3_orphan_del(handle, new_inode);
287 + ext3_journal_stop(handle, old_inode);
289 + spin_lock(&sbi->s_delete_lock);
290 + J_ASSERT(list_empty(&new_inode->i_devices));
291 + list_add_tail(&new_inode->i_devices, &sbi->s_delete_list);
292 + sbi->s_delete_blocks += blocks;
293 + sbi->s_delete_inodes++;
294 + spin_unlock(&sbi->s_delete_lock);
296 + ext3_debug("delete inode %lu (%lu blocks) by thread\n",
297 + new_inode->i_ino, blocks);
299 + wake_up(&sbi->s_delete_thread_queue);
303 + ext3_journal_stop(handle, old_inode);
305 + ext3_truncate(old_inode);
307 +#endif /* EXT3_DELETE_THREAD */
310 * On success, We end up with an outstanding reference count against
311 * iloc->bh. This _must_ be cleaned up later.
312 Index: linux-2.4.29/fs/ext3/file.c
313 ===================================================================
314 --- linux-2.4.29.orig/fs/ext3/file.c 2005-04-07 19:31:00.000000000 +0300
315 +++ linux-2.4.29/fs/ext3/file.c 2005-05-03 15:53:56.902907304 +0300
319 struct inode_operations ext3_file_inode_operations = {
320 +#ifdef EXT3_DELETE_THREAD
321 + truncate: ext3_truncate_thread, /* BKL held */
323 truncate: ext3_truncate, /* BKL held */
325 setattr: ext3_setattr, /* BKL held */
326 setxattr: ext3_setxattr, /* BKL held */
327 getxattr: ext3_getxattr, /* BKL held */
328 Index: linux-2.4.29/fs/ext3/namei.c
329 ===================================================================
330 --- linux-2.4.29.orig/fs/ext3/namei.c 2005-05-03 15:53:33.044534328 +0300
331 +++ linux-2.4.29/fs/ext3/namei.c 2005-05-03 15:53:56.905906848 +0300
336 +#ifdef EXT3_DELETE_THREAD
337 +static int ext3_try_to_delay_deletion(struct inode *inode)
339 + struct ext3_sb_info *sbi = EXT3_SB(inode->i_sb);
340 + struct ext3_inode_info *ei = EXT3_I(inode);
341 + unsigned long blocks;
343 + if (!test_opt(inode->i_sb, ASYNCDEL))
346 + /* We may want to delete the inode immediately and not defer it */
347 + blocks = inode->i_blocks >> (inode->i_blkbits - 9);
348 + if (IS_SYNC(inode) || blocks <= EXT3_NDIR_BLOCKS)
351 + inode->i_nlink = 1;
352 + atomic_inc(&inode->i_count);
353 + ei->i_state |= EXT3_STATE_DELETE;
355 + spin_lock(&sbi->s_delete_lock);
356 + J_ASSERT(list_empty(&inode->i_devices));
357 + list_add_tail(&inode->i_devices, &sbi->s_delete_list);
358 + sbi->s_delete_blocks += blocks;
359 + sbi->s_delete_inodes++;
360 + spin_unlock(&sbi->s_delete_lock);
362 + wake_up(&sbi->s_delete_thread_queue);
367 +#define ext3_try_to_delay_deletion(inode) do {} while (0)
370 static int ext3_unlink(struct inode * dir, struct dentry *dentry)
374 dir->u.ext3_i.i_flags &= ~EXT3_INDEX_FL;
375 ext3_mark_inode_dirty(handle, dir);
377 - if (!inode->i_nlink)
378 + if (!inode->i_nlink) {
379 + ext3_try_to_delay_deletion(inode);
380 ext3_orphan_add(handle, inode);
382 inode->i_ctime = dir->i_ctime;
383 ext3_mark_inode_dirty(handle, inode);
385 Index: linux-2.4.29/include/linux/ext3_fs.h
386 ===================================================================
387 --- linux-2.4.29.orig/include/linux/ext3_fs.h 2005-05-03 15:53:37.124914016 +0300
388 +++ linux-2.4.29/include/linux/ext3_fs.h 2005-05-03 15:53:56.907906544 +0300
391 #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */
392 #define EXT3_STATE_NEW 0x00000002 /* inode is newly created */
393 +#define EXT3_STATE_DELETE 0x00000010 /* deferred delete inode */
398 #define EXT3_MOUNT_UPDATE_JOURNAL 0x1000 /* Update the journal format */
399 #define EXT3_MOUNT_NO_UID32 0x2000 /* Disable 32-bit UIDs */
400 #define EXT3_MOUNT_XATTR_USER 0x4000 /* Extended user attributes */
401 +#define EXT3_MOUNT_ASYNCDEL 0x20000 /* Delayed deletion */
403 /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
404 #ifndef _LINUX_EXT2_FS_H
406 extern void ext3_dirty_inode(struct inode *);
407 extern int ext3_change_inode_journal_flag(struct inode *, int);
408 extern void ext3_truncate (struct inode *);
409 +#ifdef EXT3_DELETE_THREAD
410 +extern void ext3_truncate_thread(struct inode *inode);
412 extern void ext3_set_inode_flags(struct inode *);
415 Index: linux-2.4.29/include/linux/ext3_fs_sb.h
416 ===================================================================
417 --- linux-2.4.29.orig/include/linux/ext3_fs_sb.h 2005-05-03 15:53:33.048533720 +0300
418 +++ linux-2.4.29/include/linux/ext3_fs_sb.h 2005-05-03 15:53:56.909906240 +0300
421 #define EXT3_MAX_GROUP_LOADED 8
423 +#define EXT3_DELETE_THREAD
426 * third extended-fs super-block data in memory
429 struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */
430 wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */
432 +#ifdef EXT3_DELETE_THREAD
433 + spinlock_t s_delete_lock;
434 + struct list_head s_delete_list;
435 + unsigned long s_delete_blocks;
436 + unsigned long s_delete_inodes;
437 + wait_queue_head_t s_delete_thread_queue;
438 + wait_queue_head_t s_delete_waiter_queue;
442 #endif /* _LINUX_EXT3_FS_SB */