Whamcloud - gitweb
- merge 0.7rc1 from b_devel to HEAD (20030612 merge point)
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-delete_thread-2.4.18.patch
1  0 files changed
2
3 --- linux-2.4.18-chaos52/fs/ext3/super.c~ext3-delete_thread-2.4.18      2003-06-01 03:24:13.000000000 +0800
4 +++ linux-2.4.18-chaos52-root/fs/ext3/super.c   2003-06-03 17:01:49.000000000 +0800
5 @@ -398,6 +398,210 @@ static void dump_orphan_list(struct supe
6         }
7  }
8  
9 +#ifdef EXT3_DELETE_THREAD
10 +/*
11 + * Delete inodes in a loop until there are no more to be deleted.
12 + * Normally, we run in the background doing the deletes and sleeping again,
13 + * and clients just add new inodes to be deleted onto the end of the list.
14 + * If someone is concerned about free space (e.g. block allocation or similar)
15 + * then they can sleep on s_delete_waiter_queue and be woken up when space
16 + * has been freed.
17 + */
18 +int ext3_delete_thread(void *data)
19 +{
20 +       struct super_block *sb = data;
21 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
22 +       struct task_struct *tsk = current;
23 +
24 +       /* Almost like daemonize, but not quite */
25 +       exit_mm(current);
26 +       tsk->session = 1;
27 +       tsk->pgrp = 1;
28 +       tsk->tty = NULL;
29 +       exit_files(current);
30 +       reparent_to_init();
31 +
32 +       sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
33 +       sigfillset(&tsk->blocked);
34 +
35 +       /*tsk->flags |= PF_KERNTHREAD;*/
36 +
37 +       INIT_LIST_HEAD(&sbi->s_delete_list);
38 +       wake_up(&sbi->s_delete_waiter_queue);
39 +       ext3_debug("EXT3-fs: delete thread on %s started\n",
40 +              kdevname(sb->s_dev));
41 +
42 +       /* main loop */
43 +       for (;;) {
44 +               sleep_on(&sbi->s_delete_thread_queue);
45 +               ext3_debug("%s woken up: %lu inodes, %lu blocks\n",
46 +                          tsk->comm,sbi->s_delete_inodes,sbi->s_delete_blocks);
47 +
48 +               spin_lock(&sbi->s_delete_lock);
49 +               if (list_empty(&sbi->s_delete_list)) {
50 +                       memset(&sbi->s_delete_list, 0,
51 +                              sizeof(sbi->s_delete_list));
52 +                       spin_unlock(&sbi->s_delete_lock);
53 +                       ext3_debug("ext3 delete thread on %s exiting\n",
54 +                              kdevname(sb->s_dev));
55 +                       wake_up(&sbi->s_delete_waiter_queue);
56 +                       break;
57 +               }
58 +
59 +               while (!list_empty(&sbi->s_delete_list)) {
60 +                       struct inode *inode=list_entry(sbi->s_delete_list.next,
61 +                                                      struct inode, i_dentry);
62 +                       unsigned long blocks = inode->i_blocks >>
63 +                                                       (inode->i_blkbits - 9);
64 +
65 +                       list_del_init(&inode->i_dentry);
66 +                       spin_unlock(&sbi->s_delete_lock);
67 +                       ext3_debug("%s delete ino %lu blk %lu\n",
68 +                                  tsk->comm, inode->i_ino, blocks);
69 +
70 +                       iput(inode);
71 +
72 +                       spin_lock(&sbi->s_delete_lock);
73 +                       sbi->s_delete_blocks -= blocks;
74 +                       sbi->s_delete_inodes--;
75 +               }
76 +               if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0)
77 +                       ext3_warning(sb, __FUNCTION__,
78 +                                    "%lu blocks, %lu inodes on list?\n",
79 +                                    sbi->s_delete_blocks,sbi->s_delete_inodes);
80 +               sbi->s_delete_blocks = 0;
81 +               sbi->s_delete_inodes = 0;
82 +               spin_unlock(&sbi->s_delete_lock);
83 +               wake_up(&sbi->s_delete_waiter_queue);
84 +       }
85 +
86 +       return 0;
87 +}
88 +
89 +static void ext3_start_delete_thread(struct super_block *sb)
90 +{
91 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
92 +       int rc;
93 +
94 +       spin_lock_init(&sbi->s_delete_lock);
95 +       memset(&sbi->s_delete_list, 0, sizeof(sbi->s_delete_list));
96 +       init_waitqueue_head(&sbi->s_delete_thread_queue);
97 +       init_waitqueue_head(&sbi->s_delete_waiter_queue);
98 +       sbi->s_delete_blocks = 0;
99 +       sbi->s_delete_inodes = 0;
100 +
101 +       rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
102 +       if (rc < 0)
103 +               printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
104 +                      rc);
105 +       else
106 +               wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
107 +}
108 +
109 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
110 +{
111 +       wake_up(&sbi->s_delete_thread_queue);
112 +       wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
113 +}
114 +
115 +/* Instead of playing games with the inode flags, destruction, etc we just
116 + * create a new inode locally and put it on a list for the truncate thread.
117 + * We need large parts of the inode struct in order to complete the
118 + * truncate and unlink, so we may as well just have a real inode to do it.
119 + *
120 + * If we have any problem deferring the delete, just delete it right away.
121 + * If we defer it, we also mark how many blocks it would free, so that we
122 + * can keep the statfs data correct, and we know if we should sleep on the
123 + * truncate thread when we run out of space.
124 + *
125 + * In 2.5 this can be done much more cleanly by just registering a "drop"
126 + * method in the super_operations struct.
127 + */
128 +static void ext3_delete_inode_thread(struct inode *old_inode)
129 +{
130 +       struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
131 +       struct inode *new_inode;
132 +       unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
133 +
134 +       if (is_bad_inode(old_inode)) {
135 +               clear_inode(old_inode);
136 +               return;
137 +       }
138 +       
139 +       if (!test_opt (old_inode->i_sb, ASYNCDEL)) {
140 +               ext3_delete_inode(old_inode);
141 +               return;
142 +       }
143 +
144 +       /* We may want to delete the inode immediately and not defer it */
145 +       if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
146 +           !sbi->s_delete_list.next) {
147 +               ext3_delete_inode(old_inode);
148 +               return;
149 +       }
150 +
151 +       if (EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) {
152 +               ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
153 +                          old_inode->i_ino, blocks);
154 +               ext3_delete_inode(old_inode);
155 +               return;
156 +       }
157 +
158 +       /* We can iget this inode again here, because our caller has unhashed
159 +        * old_inode, so new_inode will be in a different inode struct.
160 +        *
161 +        * We need to ensure that the i_orphan pointers in the other inodes
162 +        * point at the new inode copy instead of the old one so the orphan
163 +        * list doesn't get corrupted when the old orphan inode is freed.
164 +        */
165 +       down(&sbi->s_orphan_lock);
166 +
167 +       EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
168 +       new_inode = iget(old_inode->i_sb, old_inode->i_ino);
169 +       EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
170 +       if (is_bad_inode(new_inode)) {
171 +               printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
172 +               iput(new_inode);
173 +               new_inode = NULL;
174 +       }
175 +       if (!new_inode) {
176 +               up(&sbi->s_orphan_lock);
177 +               ext3_debug(KERN_DEBUG "delete inode %lu directly (bad read)\n",
178 +                          old_inode->i_ino);
179 +               ext3_delete_inode(old_inode);
180 +               return;
181 +       }
182 +       J_ASSERT(new_inode != old_inode);
183 +
184 +       J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
185 +       /* Ugh.  We need to insert new_inode into the same spot on the list
186 +        * as old_inode was, to ensure the in-memory orphan list is still
187 +        * in the same order as the on-disk orphan list (badness otherwise).
188 +        */
189 +       EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
190 +       EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
191 +       EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
192 +       EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
193 +       up(&sbi->s_orphan_lock);
194 +
195 +       clear_inode(old_inode);
196 +
197 +       ext3_debug("delete inode %lu (%lu blocks) by thread\n",
198 +                  new_inode->i_ino, blocks);
199 +       spin_lock(&sbi->s_delete_lock);
200 +       J_ASSERT(list_empty(&new_inode->i_dentry));
201 +       list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
202 +       sbi->s_delete_blocks += blocks;
203 +       sbi->s_delete_inodes++;
204 +       spin_unlock(&sbi->s_delete_lock);
205 +
206 +       wake_up(&sbi->s_delete_thread_queue);
207 +}
208 +#else
209 +#define ext3_start_delete_thread(sbi) do {} while(0)
210 +#define ext3_stop_delete_thread(sbi) do {} while(0)
211 +#endif /* EXT3_DELETE_THREAD */
212 +
213  void ext3_put_super (struct super_block * sb)
214  {
215         struct ext3_sb_info *sbi = EXT3_SB(sb);
216 @@ -405,6 +609,7 @@ void ext3_put_super (struct super_block 
217         kdev_t j_dev = sbi->s_journal->j_dev;
218         int i;
219  
220 +       ext3_stop_delete_thread(sbi);
221         ext3_xattr_put_super(sb);
222         journal_destroy(sbi->s_journal);
223         if (!(sb->s_flags & MS_RDONLY)) {
224 @@ -453,7 +658,11 @@ static struct super_operations ext3_sops
225         write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
226         dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
227         put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
228 +#ifdef EXT3_DELETE_THREAD
229 +       delete_inode:   ext3_delete_inode_thread,/* BKL not held. We take it */
230 +#else
231         delete_inode:   ext3_delete_inode,      /* BKL not held.  We take it */
232 +#endif
233         put_super:      ext3_put_super,         /* BKL held */
234         write_super:    ext3_write_super,       /* BKL held */
235         sync_fs:        ext3_sync_fs,
236 @@ -514,6 +723,12 @@ static int parse_options (char * options
237              this_char = strtok (NULL, ",")) {
238                 if ((value = strchr (this_char, '=')) != NULL)
239                         *value++ = 0;
240 +#ifdef EXT3_DELETE_THREAD
241 +               if (!strcmp(this_char, "asyncdel"))
242 +                       set_opt(*mount_options, ASYNCDEL);
243 +               else
244 +#endif
245 +
246                 if (!strcmp (this_char, "bsddf"))
247                         clear_opt (*mount_options, MINIX_DF);
248                 else if (!strcmp (this_char, "nouid32")) {
249 @@ -1209,6 +1424,7 @@ struct super_block * ext3_read_super (st
250         }
251  
252         ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
253 +       ext3_start_delete_thread(sb);
254         /*
255          * akpm: core read_super() calls in here with the superblock locked.
256          * That deadlocks, because orphan cleanup needs to lock the superblock
257 --- linux-2.4.18-chaos52/include/linux/ext3_fs.h~ext3-delete_thread-2.4.18      2003-06-01 03:24:11.000000000 +0800
258 +++ linux-2.4.18-chaos52-root/include/linux/ext3_fs.h   2003-06-03 17:03:28.000000000 +0800
259 @@ -190,6 +190,7 @@ struct ext3_group_desc
260   */
261  #define EXT3_STATE_JDATA               0x00000001 /* journaled data exists */
262  #define EXT3_STATE_NEW                 0x00000002 /* inode is newly created */
263 +#define EXT3_STATE_DELETE              0x00000010 /* deferred delete inode */
264  
265  /*
266   * ioctl commands
267 @@ -317,6 +318,7 @@ struct ext3_inode {
268  #define EXT3_MOUNT_UPDATE_JOURNAL      0x1000  /* Update the journal format */
269  #define EXT3_MOUNT_NO_UID32            0x2000  /* Disable 32-bit UIDs */
270  #define EXT3_MOUNT_INDEX               0x4000  /* Enable directory index */
271 +#define EXT3_MOUNT_ASYNCDEL            0x20000 /* Delayed deletion */
272  
273  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
274  #ifndef _LINUX_EXT2_FS_H
275 --- linux-2.4.18-chaos52/include/linux/ext3_fs_sb.h~ext3-delete_thread-2.4.18   2003-06-01 03:24:13.000000000 +0800
276 +++ linux-2.4.18-chaos52-root/include/linux/ext3_fs_sb.h        2003-06-03 16:59:24.000000000 +0800
277 @@ -29,6 +29,8 @@
278  
279  #define EXT3_MAX_GROUP_LOADED  32
280  
281 +#define EXT3_DELETE_THREAD
282 +
283  /*
284   * third extended-fs super-block data in memory
285   */
286 @@ -74,6 +76,14 @@ struct ext3_sb_info {
287         struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
288         wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
289  #endif
290 +#ifdef EXT3_DELETE_THREAD
291 +       spinlock_t s_delete_lock;
292 +       struct list_head s_delete_list;
293 +       unsigned long s_delete_blocks;
294 +       unsigned long s_delete_inodes;
295 +       wait_queue_head_t s_delete_thread_queue;
296 +       wait_queue_head_t s_delete_waiter_queue;
297 +#endif
298  };
299  
300  #endif /* _LINUX_EXT3_FS_SB */
301
302 _