Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-mballoc3-rhel4.patch
1 Index: linux-2.6.9-full/include/linux/ext3_fs_i.h
2 ===================================================================
3 --- linux-2.6.9-full.orig/include/linux/ext3_fs_i.h     2007-03-28 01:29:38.000000000 +0400
4 +++ linux-2.6.9-full/include/linux/ext3_fs_i.h  2007-03-28 15:45:41.000000000 +0400
5 @@ -130,6 +130,10 @@ struct ext3_inode_info {
6         struct inode vfs_inode;
7  
8         __u32 i_cached_extent[4];
9 +
10 +       /* mballoc */
11 +       struct list_head i_prealloc_list;
12 +       spinlock_t i_prealloc_lock;
13  };
14  
15  #endif /* _LINUX_EXT3_FS_I */
16 Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
17 ===================================================================
18 --- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h    2007-03-28 15:42:16.000000000 +0400
19 +++ linux-2.6.9-full/include/linux/ext3_fs_sb.h 2007-03-28 15:45:41.000000000 +0400
20 @@ -23,9 +23,16 @@
21  #define EXT_INCLUDE
22  #include <linux/blockgroup_lock.h>
23  #include <linux/percpu_counter.h>
24 +#include <linux/list.h>
25  #endif
26  #endif
27  #include <linux/rbtree.h>
28 +#include <linux/proc_fs.h>
29 +
30 +struct ext3_buddy_group_blocks;
31 +struct ext3_locality_group;
32 +struct ext3_mb_history;
33 +#define EXT3_BB_MAX_BLOCKS
34  
35  /*
36   * third extended-fs super-block data in memory
37 Index: linux-2.6.9-full/include/linux/ext3_fs.h
38 ===================================================================
39 --- linux-2.6.9-full.orig/include/linux/ext3_fs.h       2007-03-28 15:45:07.000000000 +0400
40 +++ linux-2.6.9-full/include/linux/ext3_fs.h    2007-03-28 15:45:41.000000000 +0400
41 @@ -389,6 +389,7 @@ struct ext3_inode {
42  #define EXT3_MOUNT_IOPEN_NOPRIV                0x100000/* Make iopen world-readable */
43  #define EXT3_MOUNT_EXTENTS             0x200000/* Extents support */
44  #define EXT3_MOUNT_EXTDEBUG            0x400000/* Extents debug */
45 +#define EXT3_MOUNT_MBALLOC             0x800000/* Buddy allocation support */
46  
47  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
48  #ifndef clear_opt
49 @@ -749,8 +750,9 @@ struct dir_private_info {
50  extern int ext3_bg_has_super(struct super_block *sb, int group);
51  extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
52  extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
53 +extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *);
54  extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
55 -                             unsigned long);
56 +                             unsigned long, int);
57  extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
58                                  unsigned long, unsigned long, int *);
59  extern unsigned long ext3_count_free_blocks (struct super_block *);
60 Index: linux-2.6.9-full/fs/ext3/super.c
61 ===================================================================
62 --- linux-2.6.9-full.orig/fs/ext3/super.c       2007-03-28 15:42:16.000000000 +0400
63 +++ linux-2.6.9-full/fs/ext3/super.c    2007-03-28 15:45:41.000000000 +0400
64 @@ -600,6 +600,7 @@ enum {
65         Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
66         Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
67         Opt_extents, Opt_noextents, Opt_extdebug,
68 +       Opt_mballoc, Opt_nomballoc, Opt_stripe,
69  };
70  
71  static match_table_t tokens = {
72 @@ -653,6 +654,9 @@ static match_table_t tokens = {
73         {Opt_noextents, "noextents"},
74         {Opt_extdebug, "extdebug"},
75         {Opt_barrier, "barrier=%u"},
76 +       {Opt_mballoc, "mballoc"},
77 +       {Opt_nomballoc, "nomballoc"},
78 +       {Opt_stripe, "stripe=%u"},
79         {Opt_err, NULL},
80         {Opt_resize, "resize"},
81  };
82 @@ -965,6 +969,19 @@ clear_qf_name:
83                 case Opt_extdebug:
84                         set_opt (sbi->s_mount_opt, EXTDEBUG);
85                         break;
86 +               case Opt_mballoc:
87 +                       set_opt(sbi->s_mount_opt, MBALLOC);
88 +                       break;
89 +               case Opt_nomballoc:
90 +                       clear_opt(sbi->s_mount_opt, MBALLOC);
91 +                       break;
92 +               case Opt_stripe:
93 +                       if (match_int(&args[0], &option))
94 +                               return 0;
95 +                       if (option < 0)
96 +                               return 0;
97 +                       sbi->s_stripe = option;
98 +                       break;
99                 default:
100                         printk (KERN_ERR
101                                 "EXT3-fs: Unrecognized mount option \"%s\" "
102 @@ -1654,6 +1671,7 @@ static int ext3_fill_super (struct super
103                 ext3_count_dirs(sb));
104  
105         ext3_ext_init(sb);
106 +       ext3_mb_init(sb, needs_recovery);
107  
108         return 0;
109  
110 Index: linux-2.6.9-full/fs/ext3/extents.c
111 ===================================================================
112 --- linux-2.6.9-full.orig/fs/ext3/extents.c     2007-03-28 01:29:41.000000000 +0400
113 +++ linux-2.6.9-full/fs/ext3/extents.c  2007-03-28 15:45:41.000000000 +0400
114 @@ -779,7 +779,7 @@ cleanup:
115                 for (i = 0; i < depth; i++) {
116                         if (!ablocks[i])
117                                 continue;
118 -                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
119 +                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
120                 }
121         }
122         kfree(ablocks);
123 @@ -1586,7 +1586,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
124                   path->p_idx->ei_leaf);
125         bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
126         ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
127 -       ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
128 +       ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
129         return err;
130  }
131  
132 @@ -2071,10 +2071,12 @@ ext3_remove_blocks(struct ext3_extents_t
133         int needed = ext3_remove_blocks_credits(tree, ex, from, to);
134         handle_t *handle = ext3_journal_start(tree->inode, needed);
135         struct buffer_head *bh;
136 -       int i;
137 +       int i, metadata = 0;
138  
139         if (IS_ERR(handle))
140                 return PTR_ERR(handle);
141 +       if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode))
142 +               metadata = 1;
143         if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
144                 /* tail removal */
145                 unsigned long num, start;
146 @@ -2086,7 +2088,7 @@ ext3_remove_blocks(struct ext3_extents_t
147                         bh = sb_find_get_block(tree->inode->i_sb, start + i);
148                         ext3_forget(handle, 0, tree->inode, bh, start + i);
149                 }
150 -               ext3_free_blocks(handle, tree->inode, start, num);
151 +               ext3_free_blocks(handle, tree->inode, start, num, metadata);
152         } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
153                 printk("strange request: removal %lu-%lu from %u:%u\n",
154                        from, to, ex->ee_block, ex->ee_len);
155 @@ -2177,11 +2179,8 @@ int ext3_ext_get_block(handle_t *handle,
156         struct ext3_extent *ex;
157         int goal, newblock, err = 0, depth;
158         struct ext3_extents_tree tree;
159 -       unsigned long next;
160 -       int allocated = 0;
161 -
162 -       /* until we have multiblock allocation */
163 -       max_blocks = 1;
164 +       unsigned long allocated = 0;
165 +       struct ext3_allocation_request ar;
166  
167         clear_buffer_new(bh_result);
168         ext3_init_tree_desc(&tree, inode);
169 @@ -2253,18 +2252,33 @@ int ext3_ext_get_block(handle_t *handle,
170                 goto out2;
171         }
172  
173 +       /* find neighbour allocated blocks */
174 +       ar.lleft = iblock;
175 +       err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft);
176 +       if (err)
177 +               goto out2;
178 +       ar.lright = iblock;
179 +       err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright);
180 +       if (err)
181 +               goto out2;
182 +
183         /* find next allocated block so that we know how many
184          * blocks we can allocate without ovelapping next extent */
185 -       EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len);
186 -       next = ext3_ext_next_allocated_block(path);
187 -       EXT_ASSERT(next > iblock);
188 -       allocated = next - iblock;
189 +       EXT_ASSERT(ar.pright == 0 || ar.lright > iblock);
190 +       if (ar.pright == 0)
191 +               allocated = EXT_MAX_BLOCK - iblock;
192 +       else
193 +               allocated = ar.lright - iblock;
194         if (allocated > max_blocks)
195                 allocated = max_blocks;
196  
197         /* allocate new block */
198 -       goal = ext3_ext_find_goal(inode, path, iblock);
199 -       newblock = ext3_new_block(handle, inode, goal, &err);
200 +       ar.inode = inode;
201 +       ar.goal = ext3_ext_find_goal(inode, path, iblock);
202 +       ar.logical = iblock;
203 +       ar.len = allocated;
204 +       ar.flags = EXT3_MB_HINT_DATA;
205 +       newblock = ext3_mb_new_blocks(handle, &ar, &err);
206         if (!newblock)
207                 goto out2;
208         ext_debug(&tree, "allocate new block: goal %d, found %d\n",
209 @@ -2274,11 +2288,14 @@ int ext3_ext_get_block(handle_t *handle,
210         newex.ee_block = iblock;
211         newex.ee_start = newblock;
212         newex.ee_start_hi = 0;
213 -       newex.ee_len = 1;
214 +       newex.ee_len = ar.len;
215         err = ext3_ext_insert_extent(handle, &tree, path, &newex);
216         if (err) {
217                 /* free data blocks we just allocated */
218 -               ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len);
219 +               /* not a good idea to call discard here directly,
220 +                * but otherwise we'd need to call it every free() */
221 +               ext3_mb_discard_inode_preallocations(inode);
222 +               ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0);
223                 goto out2;
224         }
225         
226 @@ -2287,6 +2304,7 @@ int ext3_ext_get_block(handle_t *handle,
227  
228         /* previous routine could use block we allocated */
229         newblock = newex.ee_start;
230 +       allocated = newex.ee_len;
231         set_buffer_new(bh_result);
232  
233         ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len,
234 @@ -2339,6 +2357,9 @@ void ext3_ext_truncate(struct inode * in
235         down(&EXT3_I(inode)->truncate_sem);
236         ext3_ext_invalidate_cache(&tree);
237  
238 +       /* it's important to discard preallocations under truncate_sem */
239 +       ext3_mb_discard_inode_preallocations(inode);
240 +
241         /* 
242          * TODO: optimization is possible here
243          * probably we need not scaning at all,
244 Index: linux-2.6.9-full/fs/ext3/Makefile
245 ===================================================================
246 --- linux-2.6.9-full.orig/fs/ext3/Makefile      2007-03-28 01:29:38.000000000 +0400
247 +++ linux-2.6.9-full/fs/ext3/Makefile   2007-03-28 15:45:41.000000000 +0400
248 @@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
249  
250  ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
251            ioctl.o namei.o super.o symlink.o hash.o resize.o \
252 -          extents.o
253 +          extents.o mballoc.o
254  
255  ext3-$(CONFIG_EXT3_FS_XATTR)    += xattr.o xattr_user.o xattr_trusted.o
256  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
257 Index: linux-2.6.9-full/fs/ext3/xattr.c
258 ===================================================================
259 --- linux-2.6.9-full.orig/fs/ext3/xattr.c       2006-05-18 23:57:04.000000000 +0400
260 +++ linux-2.6.9-full/fs/ext3/xattr.c    2007-03-28 15:45:41.000000000 +0400
261 @@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle,
262                         new_bh = sb_getblk(sb, block);
263                         if (!new_bh) {
264  getblk_failed:
265 -                               ext3_free_blocks(handle, inode, block, 1);
266 +                               ext3_free_blocks(handle, inode, block, 1, 1);
267                                 error = -EIO;
268                                 goto cleanup;
269                         }
270 @@ -1328,7 +1328,7 @@ getblk_failed:
271                         if (ce)
272                                 mb_cache_entry_free(ce);
273                         ea_bdebug(old_bh, "freeing");
274 -                       ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
275 +                       ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
276  
277                         /* ext3_forget() calls bforget() for us, but we
278                            let our caller release old_bh, so we need to
279 @@ -1427,7 +1427,7 @@ ext3_xattr_delete_inode(handle_t *handle
280         if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
281                 if (ce)
282                         mb_cache_entry_free(ce);
283 -               ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
284 +               ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
285                 get_bh(bh);
286                 ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
287         } else {
288 Index: linux-2.6.9-full/fs/ext3/balloc.c
289 ===================================================================
290 --- linux-2.6.9-full.orig/fs/ext3/balloc.c      2006-03-10 18:20:03.000000000 +0300
291 +++ linux-2.6.9-full/fs/ext3/balloc.c   2007-03-28 15:45:41.000000000 +0400
292 @@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
293   *
294   * Return buffer_head on success or NULL in case of failure.
295   */
296 -static struct buffer_head *
297 +struct buffer_head *
298  read_block_bitmap(struct super_block *sb, unsigned int block_group)
299  {
300         struct ext3_group_desc * desc;
301 @@ -267,6 +267,8 @@ void ext3_discard_reservation(struct ino
302         struct reserve_window_node *rsv = &ei->i_rsv_window;
303         spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
304  
305 +       ext3_mb_discard_inode_preallocations(inode);
306 +
307         if (!rsv_is_empty(&rsv->rsv_window)) {
308                 spin_lock(rsv_lock);
309                 if (!rsv_is_empty(&rsv->rsv_window))
310 @@ -451,21 +453,25 @@ error_return:
311         return;
312  }
313  
314 -/* Free given blocks, update quota and i_blocks field */
315 -void ext3_free_blocks(handle_t *handle, struct inode *inode,
316 -                       unsigned long block, unsigned long count)
317 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
318 +               unsigned long block, unsigned long count, int metadata)
319  {
320 -       struct super_block * sb;
321 -       int dquot_freed_blocks;
322 +       struct super_block *sb;
323 +       int freed;
324 +
325 +       /* this isn't the right place to decide whether block is metadata
326 +        * inode.c/extents.c knows better, but for safety ... */
327 +       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
328 +                       ext3_should_journal_data(inode))
329 +               metadata = 1;
330  
331         sb = inode->i_sb;
332 -       if (!sb) {
333 -               printk ("ext3_free_blocks: nonexistent device");
334 -               return;
335 -       }
336 -       ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
337 -       if (dquot_freed_blocks)
338 -               DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
339 +       if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
340 +               ext3_free_blocks_sb(handle, sb, block, count, &freed);
341 +       else
342 +               ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
343 +       if (freed)
344 +               DQUOT_FREE_BLOCK(inode, freed);
345         return;
346  }
347  
348 @@ -1131,7 +1137,7 @@ int ext3_should_retry_alloc(struct super
349   * bitmap, and then for any free bit if that fails.
350   * This function also updates quota and i_blocks field.
351   */
352 -int ext3_new_block(handle_t *handle, struct inode *inode,
353 +int ext3_new_block_old(handle_t *handle, struct inode *inode,
354                         unsigned long goal, int *errp)
355  {
356         struct buffer_head *bitmap_bh = NULL;
357 Index: linux-2.6.9-full/fs/ext3/inode.c
358 ===================================================================
359 --- linux-2.6.9-full.orig/fs/ext3/inode.c       2007-03-28 01:29:39.000000000 +0400
360 +++ linux-2.6.9-full/fs/ext3/inode.c    2007-03-28 15:45:41.000000000 +0400
361 @@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
362                 ext3_journal_forget(handle, branch[i].bh);
363         }
364         for (i = 0; i < keys; i++)
365 -               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
366 +               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0);
367         return err;
368  }
369  
370 @@ -673,7 +673,7 @@ err_out:
371         if (err == -EAGAIN)
372                 for (i = 0; i < num; i++)
373                         ext3_free_blocks(handle, inode, 
374 -                                        le32_to_cpu(where[i].key), 1);
375 +                                        le32_to_cpu(where[i].key), 1, 0);
376         return err;
377  }
378  
379 @@ -1834,7 +1834,7 @@ ext3_clear_blocks(handle_t *handle, stru
380                 }
381         }
382  
383 -       ext3_free_blocks(handle, inode, block_to_free, count);
384 +       ext3_free_blocks(handle, inode, block_to_free, count, 0);
385  }
386  
387  /**
388 @@ -2007,7 +2007,7 @@ static void ext3_free_branches(handle_t 
389                                 ext3_journal_test_restart(handle, inode);
390                         }
391  
392 -                       ext3_free_blocks(handle, inode, nr, 1);
393 +                       ext3_free_blocks(handle, inode, nr, 1, 1);
394  
395                         if (parent_bh) {
396                                 /*