Whamcloud - gitweb
merge b_devel into HEAD, which will become 0.7.3
[fs/lustre-release.git] / lustre / kernel_patches / patches / extN-delete_thread.patch
1  0 files changed
2
3 --- linux-2.4.18-p4smp-61chaos/include/linux/ext3_fs.h~extN-delete_thread       2003-05-29 10:19:15.000000000 +0800
4 +++ linux-2.4.18-p4smp-61chaos-root/include/linux/ext3_fs.h     2003-05-29 10:50:04.000000000 +0800
5 @@ -190,6 +190,7 @@ struct ext3_group_desc
6   */
7  #define EXT3_STATE_JDATA               0x00000001 /* journaled data exists */
8  #define EXT3_STATE_NEW                 0x00000002 /* inode is newly created */
9 +#define EXT3_STATE_DELETE              0x00000010 /* deferred delete inode */
10  
11  /*
12   * ioctl commands
13 --- linux-2.4.18-p4smp-61chaos/include/linux/ext3_fs_sb.h~extN-delete_thread    2003-05-29 10:19:15.000000000 +0800
14 +++ linux-2.4.18-p4smp-61chaos-root/include/linux/ext3_fs_sb.h  2003-05-29 10:50:04.000000000 +0800
15 @@ -29,6 +29,8 @@
16  
17  #define EXT3_MAX_GROUP_LOADED  32
18  
19 +#define EXT3_DELETE_THREAD
20 +
21  /*
22   * third extended-fs super-block data in memory
23   */
24 @@ -74,6 +76,14 @@ struct ext3_sb_info {
25         struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
26         wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
27  #endif
28 +#ifdef EXT3_DELETE_THREAD
29 +       spinlock_t s_delete_lock;
30 +       struct list_head s_delete_list;
31 +       unsigned long s_delete_blocks;
32 +       unsigned long s_delete_inodes;
33 +       wait_queue_head_t s_delete_thread_queue;
34 +       wait_queue_head_t s_delete_waiter_queue;
35 +#endif
36  };
37  
38  #endif /* _LINUX_EXT3_FS_SB */
39 --- linux-2.4.18-p4smp-61chaos/fs/ext3/super.c~extN-delete_thread       2003-05-29 10:19:15.000000000 +0800
40 +++ linux-2.4.18-p4smp-61chaos-root/fs/ext3/super.c     2003-05-29 10:50:04.000000000 +0800
41 @@ -398,6 +398,207 @@ static void dump_orphan_list(struct supe
42         }
43  }
44  
45 +#ifdef EXT3_DELETE_THREAD
46 +/*
47 + * Delete inodes in a loop until there are no more to be deleted.
48 + * Normally, we run in the background doing the deletes and sleeping again,
49 + * and clients just add new inodes to be deleted onto the end of the list.
50 + * If someone is concerned about free space (e.g. block allocation or similar)
51 + * then they can sleep on s_delete_waiter_queue and be woken up when space
52 + * has been freed.
53 + */
54 +int ext3_delete_thread(void *data)
55 +{
56 +       struct super_block *sb = data;
57 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
58 +       struct task_struct *tsk = current;
59 +
60 +       /* Almost like daemonize, but not quite */
61 +       exit_mm(current);
62 +       tsk->session = 1;
63 +       tsk->pgrp = 1;
64 +       tsk->tty = NULL;
65 +       exit_files(current);
66 +       reparent_to_init();
67 +
68 +       sprintf(tsk->comm, "kdelext3-%s", kdevname(sb->s_dev));
69 +       sigfillset(&tsk->blocked);
70 +
71 +       tsk->flags |= PF_KERNTHREAD;
72 +
73 +       INIT_LIST_HEAD(&sbi->s_delete_list);
74 +       wake_up(&sbi->s_delete_waiter_queue);
75 +       printk(KERN_INFO "EXT3-fs: delete thread on %s started\n",
76 +              kdevname(sb->s_dev));
77 +
78 +       /* main loop */
79 +       for (;;) {
80 +               sleep_on(&sbi->s_delete_thread_queue);
81 +               printk(KERN_DEBUG "%s woken up: %lu inodes, %lu blocks\n",
82 +                      tsk->comm, sbi->s_delete_inodes, sbi->s_delete_blocks);
83 +
84 +               spin_lock(&sbi->s_delete_lock);
85 +               if (list_empty(&sbi->s_delete_list)) {
86 +                       memset(&sbi->s_delete_list, 0,
87 +                              sizeof(sbi->s_delete_list));
88 +                       spin_unlock(&sbi->s_delete_lock);
89 +                       printk(KERN_DEBUG "ext3 delete thread on %s exiting\n",
90 +                              kdevname(sb->s_dev));
91 +                       wake_up(&sbi->s_delete_waiter_queue);
92 +                       break;
93 +               }
94 +
95 +               while (!list_empty(&sbi->s_delete_list)) {
96 +                       struct inode *inode=list_entry(sbi->s_delete_list.next,
97 +                                                      struct inode, i_dentry);
98 +                       unsigned long blocks = inode->i_blocks >>
99 +                                                       (inode->i_blkbits - 9);
100 +
101 +                       list_del_init(&inode->i_dentry);
102 +                       spin_unlock(&sbi->s_delete_lock);
103 +                       printk(KERN_DEBUG "%s delete ino %lu blk %lu\n",
104 +                                  tsk->comm, inode->i_ino, blocks);
105 +
106 +                       iput(inode);
107 +
108 +                       spin_lock(&sbi->s_delete_lock);
109 +                       sbi->s_delete_blocks -= blocks;
110 +                       sbi->s_delete_inodes--;
111 +               }
112 +               if (sbi->s_delete_blocks != 0 || sbi->s_delete_inodes != 0)
113 +                       printk(KERN_WARNING
114 +                              "%lu blocks and %lu left on list?\n",
115 +                              sbi->s_delete_blocks, sbi->s_delete_inodes);
116 +               sbi->s_delete_blocks = 0;
117 +               sbi->s_delete_inodes = 0;
118 +               spin_unlock(&sbi->s_delete_lock);
119 +               wake_up(&sbi->s_delete_waiter_queue);
120 +       }
121 +
122 +       return 0;
123 +}
124 +
125 +static void ext3_start_delete_thread(struct super_block *sb)
126 +{
127 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
128 +       int rc;
129 +
130 +       spin_lock_init(&sbi->s_delete_lock);
131 +       memset(&sbi->s_delete_list, 0, sizeof(sbi->s_delete_list));
132 +       init_waitqueue_head(&sbi->s_delete_thread_queue);
133 +       init_waitqueue_head(&sbi->s_delete_waiter_queue);
134 +       sbi->s_delete_blocks = 0;
135 +       sbi->s_delete_inodes = 0;
136 +       rc = kernel_thread(ext3_delete_thread, sb, CLONE_VM | CLONE_FILES);
137 +       if (rc < 0)
138 +               printk(KERN_ERR "EXT3-fs: cannot start delete thread: rc %d\n",
139 +                      rc);
140 +       else
141 +               wait_event(sbi->s_delete_waiter_queue, sbi->s_delete_list.next);
142 +}
143 +
144 +static void ext3_stop_delete_thread(struct ext3_sb_info *sbi)
145 +{
146 +       wake_up(&sbi->s_delete_thread_queue);
147 +       wait_event(sbi->s_delete_waiter_queue, list_empty(&sbi->s_delete_list));
148 +}
149 +
150 +/* Instead of playing games with the inode flags, destruction, etc we just
151 + * duplicate the inode data locally and put it on a list for the truncate
152 + * thread.  We need large parts of the inode struct in order to complete
153 + * the truncate and unlink, so we may as well just copy the whole thing.
154 + *
155 + * If we have any problem deferring the delete, just delete it right away.
156 + * If we defer it, we also mark how many blocks it would free, so that we
157 + * can keep the statfs data correct, and we know if we should sleep on the
158 + * truncate thread when we run out of space.
159 + *
160 + * One shouldn't consider this duplicate an "inode", as it isn't really
161 + * visible to the VFS, but rather a data struct that holds truncate data.
162 + *
163 + * In 2.5 this can be done much more cleanly by just registering a "drop"
164 + * method in the super_operations struct.
165 + */
166 +static void ext3_delete_inode_thread(struct inode *old_inode)
167 +{
168 +       struct ext3_sb_info *sbi = EXT3_SB(old_inode->i_sb);
169 +       struct inode *new_inode;
170 +       unsigned long blocks = old_inode->i_blocks >> (old_inode->i_blkbits-9);
171 +
172 +       if (is_bad_inode(old_inode)) {
173 +               clear_inode(old_inode);
174 +               return;
175 +       }
176 +
177 +       /* We may want to delete the inode immediately and not defer it */
178 +       if (IS_SYNC(old_inode) || blocks <= EXT3_NDIR_BLOCKS ||
179 +           !sbi->s_delete_list.next) {
180 +               ext3_delete_inode(old_inode);
181 +               return;
182 +       }
183 +
184 +       if (EXT3_I(old_inode)->i_state & EXT3_STATE_DELETE) {
185 +               ext3_debug("doing deferred inode %lu delete (%lu blocks)\n",
186 +                          old_inode->i_ino, blocks);
187 +               ext3_delete_inode(old_inode);
188 +               return;
189 +       }
190 +
191 +       /* We can iget this inode again here, because our caller has unhashed
192 +        * old_inode, so new_inode will be in a different inode struct.
193 +        *
194 +        * We need to ensure that the i_orphan pointers in the other inodes
195 +        * point at the new inode copy instead of the old one so the orphan
196 +        * list doesn't get corrupted when the old orphan inode is freed.
197 +        */
198 +       down(&sbi->s_orphan_lock);
199 +
200 +       EXT3_SB(old_inode->i_sb)->s_mount_state |= EXT3_ORPHAN_FS;
201 +       new_inode = iget(old_inode->i_sb, old_inode->i_ino);
202 +       EXT3_SB(old_inode->i_sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
203 +       if (is_bad_inode(new_inode)) {
204 +               printk(KERN_WARNING "read bad inode %lu\n", old_inode->i_ino);
205 +               iput(new_inode);
206 +               new_inode = NULL;
207 +       }
208 +       if (!new_inode) {
209 +               up(&sbi->s_orphan_lock);
210 +               ext3_debug(KERN_DEBUG "delete inode %lu directly (bad read)\n",
211 +                          old_inode->i_ino);
212 +               ext3_delete_inode(old_inode);
213 +               return;
214 +       }
215 +       J_ASSERT(new_inode != old_inode);
216 +
217 +       J_ASSERT(!list_empty(&EXT3_I(old_inode)->i_orphan));
218 +       /* Ugh.  We need to insert new_inode into the same spot on the list
219 +        * as old_inode was, to ensure the in-memory orphan list is still
220 +        * the same as the on-disk orphan list.
221 +        */
222 +       EXT3_I(new_inode)->i_orphan = EXT3_I(old_inode)->i_orphan;
223 +       EXT3_I(new_inode)->i_orphan.next->prev = &EXT3_I(new_inode)->i_orphan;
224 +       EXT3_I(new_inode)->i_orphan.prev->next = &EXT3_I(new_inode)->i_orphan;
225 +       EXT3_I(new_inode)->i_state |= EXT3_STATE_DELETE;
226 +       up(&sbi->s_orphan_lock);
227 +
228 +       clear_inode(old_inode);
229 +
230 +       printk(KERN_DEBUG "delete inode %lu (%lu blocks) by thread\n",
231 +              new_inode->i_ino, blocks);
232 +       spin_lock(&sbi->s_delete_lock);
233 +       J_ASSERT(list_empty(&new_inode->i_dentry));
234 +       list_add_tail(&new_inode->i_dentry, &sbi->s_delete_list);
235 +       sbi->s_delete_blocks += blocks;
236 +       sbi->s_delete_inodes++;
237 +       spin_unlock(&sbi->s_delete_lock);
238 +
239 +       wake_up(&sbi->s_delete_thread_queue);
240 +}
241 +#else
242 +#define ext3_start_delete_thread(sbi) do {} while(0)
243 +#define ext3_stop_delete_thread(sbi) do {} while(0)
244 +#endif /* EXT3_DELETE_THREAD */
245 +
246  void ext3_put_super (struct super_block * sb)
247  {
248         struct ext3_sb_info *sbi = EXT3_SB(sb);
249 @@ -405,6 +606,7 @@ void ext3_put_super (struct super_block 
250         kdev_t j_dev = sbi->s_journal->j_dev;
251         int i;
252  
253 +       ext3_stop_delete_thread(sbi);
254         ext3_xattr_put_super(sb);
255         journal_destroy(sbi->s_journal);
256         if (!(sb->s_flags & MS_RDONLY)) {
257 @@ -453,7 +655,11 @@ static struct super_operations ext3_sops
258         write_inode:    ext3_write_inode,       /* BKL not held.  Don't need */
259         dirty_inode:    ext3_dirty_inode,       /* BKL not held.  We take it */
260         put_inode:      ext3_put_inode,         /* BKL not held.  Don't need */
261 +#ifdef EXT3_DELETE_THREAD
262 +       delete_inode:   ext3_delete_inode_thread,/* BKL not held. We take it */
263 +#else
264         delete_inode:   ext3_delete_inode,      /* BKL not held.  We take it */
265 +#endif
266         put_super:      ext3_put_super,         /* BKL held */
267         write_super:    ext3_write_super,       /* BKL held */
268         sync_fs:        ext3_sync_fs,
269 @@ -1209,6 +1415,7 @@ struct super_block * ext3_read_super (st
270         }
271  
272         ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
273 +       ext3_start_delete_thread(sb);
274         /*
275          * akpm: core read_super() calls in here with the superblock locked.
276          * That deadlocks, because orphan cleanup needs to lock the superblock
277
278 _