Whamcloud - gitweb
b=20298 (Merge head ldiskfs and b1_8 ldiskfs)
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext3-mballoc3-sles10.patch
1 Index: linux-2.6.16.46-0.14/include/linux/ext3_fs_i.h
2 ===================================================================
3 --- linux-2.6.16.46-0.14.orig/include/linux/ext3_fs_i.h
4 +++ linux-2.6.16.46-0.14/include/linux/ext3_fs_i.h
5 @@ -135,6 +135,10 @@ struct ext3_inode_info {
6         struct inode vfs_inode;
7  
8         __u32 i_cached_extent[4];
9 +
10 +       /* mballoc */
11 +       struct list_head i_prealloc_list;
12 +       spinlock_t i_prealloc_lock;
13  };
14  
15  #endif /* _LINUX_EXT3_FS_I */
16 Index: linux-2.6.16.46-0.14/include/linux/ext3_fs_sb.h
17 ===================================================================
18 --- linux-2.6.16.46-0.14.orig/include/linux/ext3_fs_sb.h
19 +++ linux-2.6.16.46-0.14/include/linux/ext3_fs_sb.h
20 @@ -21,8 +21,15 @@
21  #include <linux/wait.h>
22  #include <linux/blockgroup_lock.h>
23  #include <linux/percpu_counter.h>
24 +#include <linux/list.h>
25  #endif
26  #include <linux/rbtree.h>
27 +#include <linux/proc_fs.h>
28 +
29 +struct ext3_buddy_group_blocks;
30 +struct ext3_locality_group;
31 +struct ext3_mb_history;
32 +#define EXT3_BB_MAX_BLOCKS
33  
34  /*
35   * third extended-fs super-block data in memory
36 Index: linux-2.6.16.46-0.14/include/linux/ext3_fs.h
37 ===================================================================
38 --- linux-2.6.16.46-0.14.orig/include/linux/ext3_fs.h
39 +++ linux-2.6.16.46-0.14/include/linux/ext3_fs.h
40 @@ -407,6 +407,7 @@ struct ext3_inode {
41  #define EXT3_MOUNT_IOPEN_NOPRIV                0x800000/* Make iopen world-readable */
42  #define EXT3_MOUNT_EXTENTS             0x1000000/* Extents support */
43  #define EXT3_MOUNT_EXTDEBUG            0x2000000/* Extents debug */
44 +#define EXT3_MOUNT_MBALLOC             0x4000000/* Buddy allocation support */
45  
46  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
47  #ifndef clear_opt
48 @@ -784,8 +785,9 @@ struct dir_private_info {
49  extern int ext3_bg_has_super(struct super_block *sb, int group);
50  extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
51  extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
52 +extern int ext3_new_block_old (handle_t *, struct inode *, unsigned long, int *);
53  extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
54 -                             unsigned long);
55 +                             unsigned long, int);
56  extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
57                                  unsigned long, unsigned long, int *);
58  extern unsigned long ext3_count_free_blocks (struct super_block *);
59 Index: linux-2.6.16.46-0.14/fs/ext3/super.c
60 ===================================================================
61 --- linux-2.6.16.46-0.14.orig/fs/ext3/super.c
62 +++ linux-2.6.16.46-0.14/fs/ext3/super.c
63 @@ -685,6 +685,7 @@ enum {
64         Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
65         Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
66         Opt_extents, Opt_noextents, Opt_extdebug,
67 +       Opt_mballoc, Opt_nomballoc, Opt_stripe,
68         Opt_grpquota
69  };
70  
71 @@ -740,6 +741,9 @@ static match_table_t tokens = {
72         {Opt_noextents, "noextents"},
73         {Opt_extdebug, "extdebug"},
74         {Opt_barrier, "barrier=%u"},
75 +       {Opt_mballoc, "mballoc"},
76 +       {Opt_nomballoc, "nomballoc"},
77 +       {Opt_stripe, "stripe=%u"},
78         {Opt_err, NULL},
79         {Opt_resize, "resize"},
80  };
81 @@ -1089,6 +1093,19 @@ clear_qf_name:
82                 case Opt_extdebug:
83                         set_opt (sbi->s_mount_opt, EXTDEBUG);
84                         break;
85 +               case Opt_mballoc:
86 +                       set_opt(sbi->s_mount_opt, MBALLOC);
87 +                       break;
88 +               case Opt_nomballoc:
89 +                       clear_opt(sbi->s_mount_opt, MBALLOC);
90 +                       break;
91 +               case Opt_stripe:
92 +                       if (match_int(&args[0], &option))
93 +                               return 0;
94 +                       if (option < 0)
95 +                               return 0;
96 +                       sbi->s_stripe = option;
97 +                       break;
98                 default:
99                         printk (KERN_ERR
100                                 "EXT3-fs: Unrecognized mount option \"%s\" "
101 @@ -1820,6 +1837,7 @@ static int ext3_fill_super (struct super
102                 ext3_count_dirs(sb));
103  
104         ext3_ext_init(sb);
105 +       ext3_mb_init(sb, needs_recovery);
106         lock_kernel();
107         return 0;
108  
109 Index: linux-2.6.16.46-0.14/fs/ext3/extents.c
110 ===================================================================
111 --- linux-2.6.16.46-0.14.orig/fs/ext3/extents.c
112 +++ linux-2.6.16.46-0.14/fs/ext3/extents.c
113 @@ -819,7 +819,7 @@ cleanup:
114                 for (i = 0; i < depth; i++) {
115                         if (!ablocks[i])
116                                 continue;
117 -                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
118 +                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
119                 }
120         }
121         kfree(ablocks);
122 @@ -1629,7 +1629,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
123                   path->p_idx->ei_leaf);
124         bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
125         ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
126 -       ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
127 +       ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
128         return err;
129  }
130  
131 @@ -2128,10 +2128,12 @@ ext3_remove_blocks(struct ext3_extents_t
132         int needed = ext3_remove_blocks_credits(tree, ex, from, to);
133         handle_t *handle = ext3_journal_start(tree->inode, needed);
134         struct buffer_head *bh;
135 -       int i;
136 +       int i, metadata = 0;
137  
138         if (IS_ERR(handle))
139                 return PTR_ERR(handle);
140 +       if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode))
141 +               metadata = 1;
142         if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
143                 /* tail removal */
144                 unsigned long num, start;
145 @@ -2143,7 +2145,7 @@ ext3_remove_blocks(struct ext3_extents_t
146                         bh = sb_find_get_block(tree->inode->i_sb, start + i);
147                         ext3_forget(handle, 0, tree->inode, bh, start + i);
148                 }
149 -               ext3_free_blocks(handle, tree->inode, start, num);
150 +               ext3_free_blocks(handle, tree->inode, start, num, metadata);
151         } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
152                 printk("strange request: removal %lu-%lu from %u:%u\n",
153                        from, to, ex->ee_block, ex->ee_len);
154 @@ -2234,11 +2236,8 @@ int ext3_ext_get_block(handle_t *handle,
155         struct ext3_extent *ex;
156         int goal, newblock, err = 0, depth;
157         struct ext3_extents_tree tree;
158 -       unsigned long next;
159 -       int allocated = 0;
160 -
161 -       /* until we have multiblock allocation */
162 -       max_blocks = 1;
163 +       unsigned long allocated = 0;
164 +       struct ext3_allocation_request ar;
165  
166         clear_buffer_new(bh_result);
167         ext3_init_tree_desc(&tree, inode);
168 @@ -2310,18 +2309,36 @@ int ext3_ext_get_block(handle_t *handle,
169                 goto out2;
170         }
171  
172 +       /* find neighbour allocated blocks */
173 +       ar.lleft = iblock;
174 +       err = ext3_ext_search_left(&tree, path, &ar.lleft, &ar.pleft);
175 +       if (err)
176 +               goto out2;
177 +       ar.lright = iblock;
178 +       err = ext3_ext_search_right(&tree, path, &ar.lright, &ar.pright);
179 +       if (err)
180 +               goto out2;
181 +
182         /* find next allocated block so that we know how many
183          * blocks we can allocate without ovelapping next extent */
184 -       EXT_ASSERT(iblock >= ex->ee_block + ex->ee_len);
185 -       next = ext3_ext_next_allocated_block(path);
186 -       EXT_ASSERT(next > iblock);
187 -       allocated = next - iblock;
188 +       EXT_ASSERT(ar.pright == 0 || ar.lright > iblock);
189 +       if (ar.pright == 0)
190 +               allocated = EXT_MAX_BLOCK - iblock;
191 +       else
192 +               allocated = ar.lright - iblock;
193         if (allocated > max_blocks)
194                 allocated = max_blocks;
195  
196         /* allocate new block */
197 -       goal = ext3_ext_find_goal(inode, path, iblock);
198 -       newblock = ext3_new_block(handle, inode, goal, &err);
199 +       ar.inode = inode;
200 +       ar.goal = ext3_ext_find_goal(inode, path, iblock);
201 +       ar.logical = iblock;
202 +       ar.len = allocated;
203 +       if (S_ISREG(inode->i_mode))
204 +               ar.flags = EXT3_MB_HINT_DATA;
205 +       else
206 +               ar.flags = 0;
207 +       newblock = ext3_mb_new_blocks(handle, &ar, &err);
208         if (!newblock)
209                 goto out2;
210         ext_debug(&tree, "allocate new block: goal %d, found %d\n",
211 @@ -2331,11 +2348,14 @@ int ext3_ext_get_block(handle_t *handle,
212         newex.ee_block = iblock;
213         newex.ee_start = newblock;
214         newex.ee_start_hi = 0;
215 -       newex.ee_len = 1;
216 +       newex.ee_len = ar.len;
217         err = ext3_ext_insert_extent(handle, &tree, path, &newex);
218         if (err) {
219                 /* free data blocks we just allocated */
220 -               ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len);
221 +               /* not a good idea to call discard here directly,
222 +                * but otherwise we'd need to call it every free() */
223 +               ext3_mb_discard_inode_preallocations(inode);
224 +               ext3_free_blocks(handle, inode, newex.ee_start, newex.ee_len, 0);
225                 goto out2;
226         }
227         
228 @@ -2344,6 +2364,7 @@ int ext3_ext_get_block(handle_t *handle,
229  
230         /* previous routine could use block we allocated */
231         newblock = newex.ee_start;
232 +       allocated = newex.ee_len;
233         set_buffer_new(bh_result);
234  
235         ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len,
236 @@ -2396,6 +2417,9 @@ void ext3_ext_truncate(struct inode * in
237         down(&EXT3_I(inode)->truncate_sem);
238         ext3_ext_invalidate_cache(&tree);
239  
240 +       /* it's important to discard preallocations under truncate_sem */
241 +       ext3_mb_discard_inode_preallocations(inode);
242 +
243         /* 
244          * TODO: optimization is possible here
245          * probably we need not scaning at all,
246 Index: linux-2.6.18.8/fs/ext3/Makefile
247 ===================================================================
248 --- linux-2.6.18.8.orig/fs/ext3/Makefile
249 +++ linux-2.6.18.8/fs/ext3/Makefile
250 @@ -5,7 +5,7 @@
251  obj-$(CONFIG_EXT3_FS) += ext3.o
252  
253  ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
254 -          ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o
255 +          ioctl.o namei.o super.o symlink.o hash.o resize.o extents.o mballoc.o
256  
257  ext3-$(CONFIG_EXT3_FS_XATTR)    += xattr.o xattr_user.o xattr_trusted.o
258  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
259 Index: linux-2.6.16.46-0.14/fs/ext3/xattr.c
260 ===================================================================
261 --- linux-2.6.16.46-0.14.orig/fs/ext3/xattr.c
262 +++ linux-2.6.16.46-0.14/fs/ext3/xattr.c
263 @@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
264                 ea_bdebug(bh, "refcount now=0; freeing");
265                 if (ce)
266                         mb_cache_entry_free(ce);
267 -               ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
268 +               ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
269                 get_bh(bh);
270                 ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
271         } else {
272 @@ -804,7 +804,7 @@ inserted:
273                         new_bh = sb_getblk(sb, block);
274                         if (!new_bh) {
275  getblk_failed:
276 -                               ext3_free_blocks(handle, inode, block, 1);
277 +                               ext3_free_blocks(handle, inode, block, 1, 1);
278                                 error = -EIO;
279                                 goto cleanup;
280                         }
281 Index: linux-2.6.16.46-0.14/fs/ext3/balloc.c
282 ===================================================================
283 --- linux-2.6.16.46-0.14.orig/fs/ext3/balloc.c
284 +++ linux-2.6.16.46-0.14/fs/ext3/balloc.c
285 @@ -80,7 +80,7 @@ struct ext3_group_desc * ext3_get_group_
286   *
287   * Return buffer_head on success or NULL in case of failure.
288   */
289 -static struct buffer_head *
290 +struct buffer_head *
291  read_block_bitmap(struct super_block *sb, unsigned int block_group)
292  {
293         struct ext3_group_desc * desc;
294 @@ -296,6 +296,8 @@ void ext3_discard_reservation(struct ino
295         struct ext3_reserve_window_node *rsv;
296         spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock;
297  
298 +       ext3_mb_discard_inode_preallocations(inode);
299 +
300         if (!block_i)
301                 return;
302  
303 @@ -491,21 +493,25 @@ error_return:
304         return;
305  }
306  
307 -/* Free given blocks, update quota and i_blocks field */
308 -void ext3_free_blocks(handle_t *handle, struct inode *inode,
309 -                       unsigned long block, unsigned long count)
310 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
311 +               unsigned long block, unsigned long count, int metadata)
312  {
313 -       struct super_block * sb;
314 -       int dquot_freed_blocks;
315 +       struct super_block *sb;
316 +       int freed;
317 +
318 +       /* this isn't the right place to decide whether block is metadata
319 +        * inode.c/extents.c knows better, but for safety ... */
320 +       if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode) ||
321 +                       ext3_should_journal_data(inode))
322 +               metadata = 1;
323  
324         sb = inode->i_sb;
325 -       if (!sb) {
326 -               printk ("ext3_free_blocks: nonexistent device");
327 -               return;
328 -       }
329 -       ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
330 -       if (dquot_freed_blocks)
331 -               DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
332 +       if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
333 +               ext3_free_blocks_sb(handle, sb, block, count, &freed);
334 +       else
335 +               ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
336 +       if (freed)
337 +               DQUOT_FREE_BLOCK(inode, freed);
338         return;
339  }
340  
341 @@ -1154,7 +1160,7 @@ int ext3_should_retry_alloc(struct super
342   * bitmap, and then for any free bit if that fails.
343   * This function also updates quota and i_blocks field.
344   */
345 -int ext3_new_block(handle_t *handle, struct inode *inode,
346 +int ext3_new_block_old(handle_t *handle, struct inode *inode,
347                         unsigned long goal, int *errp)
348  {
349         struct buffer_head *bitmap_bh = NULL;
350 Index: linux-2.6.16.46-0.14/fs/ext3/inode.c
351 ===================================================================
352 --- linux-2.6.16.46-0.14.orig/fs/ext3/inode.c
353 +++ linux-2.6.16.46-0.14/fs/ext3/inode.c
354 @@ -569,7 +569,7 @@ static int ext3_alloc_branch(handle_t *h
355                 ext3_journal_forget(handle, branch[i].bh);
356         }
357         for (i = 0; i < keys; i++)
358 -               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
359 +               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 0);
360         return err;
361  }
362  
363 @@ -1866,7 +1866,7 @@ ext3_clear_blocks(handle_t *handle, stru
364                 }
365         }
366  
367 -       ext3_free_blocks(handle, inode, block_to_free, count);
368 +       ext3_free_blocks(handle, inode, block_to_free, count, 0);
369  }
370  
371  /**
372 @@ -2039,7 +2039,7 @@ static void ext3_free_branches(handle_t 
373                                 ext3_journal_test_restart(handle, inode);
374                         }
375  
376 -                       ext3_free_blocks(handle, inode, nr, 1);
377 +                       ext3_free_blocks(handle, inode, nr, 1, 1);
378  
379                         if (parent_bh) {
380                                 /*