Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-mballoc2-2.6.7.patch
1 Index: linux-2.6.7/fs/ext3/mballoc.c
2 ===================================================================
3 --- linux-2.6.7.orig/fs/ext3/mballoc.c  2003-01-30 13:24:37.000000000 +0300
4 +++ linux-2.6.7/fs/ext3/mballoc.c       2004-09-06 12:51:42.000000000 +0400
5 @@ -0,0 +1,1428 @@
6 +/*
7 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
8 + * Written by Alex Tomas <alex@clusterfs.com>
9 + *
10 + * This program is free software; you can redistribute it and/or modify
11 + * it under the terms of the GNU General Public License version 2 as
12 + * published by the Free Software Foundation.
13 + *
14 + * This program is distributed in the hope that it will be useful,
15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 + * GNU General Public License for more details.
18 + *
19 + * You should have received a copy of the GNU General Public Licens
20 + * along with this program; if not, write to the Free Software
21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
22 + */
23 +
24 +
25 +/*
26 + * mballoc.c contains the multiblocks allocation routines
27 + */
28 +
29 +#include <linux/config.h>
30 +#include <linux/time.h>
31 +#include <linux/fs.h>
32 +#include <linux/namei.h>
33 +#include <linux/jbd.h>
34 +#include <linux/ext3_fs.h>
35 +#include <linux/ext3_jbd.h>
36 +#include <linux/quotaops.h>
37 +#include <linux/buffer_head.h>
38 +#include <linux/module.h>
39 +
40 +/*
41 + * TODO:
42 + *   - do not scan from the beginning, try to remember first free block
43 + *   - mb_mark_used_* may allocate chunk right after splitting buddy
44 + *   - special flag to advice allocator to look for requested + N blocks
45 + *     this may improve interaction between extents and mballoc
46 + */
47 +
48 +/*
49 + * with AGRESSIVE_CHECK allocator runs consistency checks over
50 + * structures. this checks slow things down a lot
51 + */
52 +#define AGGRESSIVE_CHECK__
53 +
54 +/*
55 + */
56 +#define MB_DEBUG__
57 +#ifdef MB_DEBUG
58 +#define mb_debug(fmt,a...)     printk(fmt, ##a)
59 +#else
60 +#define mb_debug(fmt,a...)
61 +#endif
62 +
63 +/*
64 + * where to save buddies structures beetween umount/mount (clean case only)
65 + */
66 +#define EXT3_BUDDY_FILE                ".buddy"
67 +
68 +/*
69 + * max. number of chunks to be tracked in ext3_free_extent struct
70 + */
71 +#define MB_ARR_SIZE    32
72 +
73 +struct ext3_allocation_context {
74 +       struct super_block *ac_sb;
75 +
76 +       /* search goals */
77 +       int ac_g_group;
78 +       int ac_g_start;
79 +       int ac_g_len;
80 +       int ac_g_flags;
81 +       
82 +       /* the best found extent */
83 +       int ac_b_group;
84 +       int ac_b_start;
85 +       int ac_b_len;
86 +       
87 +       /* number of iterations done. we have to track to limit searching */
88 +       int ac_repeats;
89 +       int ac_groups_scanned;
90 +       int ac_status;
91 +};
92 +
93 +#define AC_STATUS_CONTINUE     1
94 +#define AC_STATUS_FOUND                2
95 +
96 +
97 +struct ext3_buddy {
98 +       void *bd_bitmap;
99 +       void *bd_buddy;
100 +       int bd_blkbits;
101 +       struct buffer_head *bd_bh;
102 +       struct buffer_head *bd_bh2;
103 +       struct ext3_buddy_group_blocks *bd_bd;
104 +       struct super_block *bd_sb;
105 +};
106 +
107 +struct ext3_free_extent {
108 +       int fe_start;
109 +       int fe_len;
110 +       unsigned char fe_orders[MB_ARR_SIZE];
111 +       unsigned char fe_nums;
112 +       unsigned char fe_back;
113 +};
114 +
115 +#define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
116 +
117 +
118 +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
119 +struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
120 +void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
121 +int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *);
122 +int ext3_mb_reserve_blocks(struct super_block *, int);
123 +void ext3_mb_release_blocks(struct super_block *, int);
124 +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
125 +void ext3_mb_free_committed_blocks(struct super_block *);
126 +
127 +#define mb_correct_addr_and_bit(bit,addr)      \
128 +{                                              \
129 +       if ((unsigned) addr & 1) {              \
130 +               bit += 8;                       \
131 +               addr--;                         \
132 +       }                                       \
133 +       if ((unsigned) addr & 2) {              \
134 +               bit += 16;                      \
135 +               addr--;                         \
136 +               addr--;                         \
137 +       }                                       \
138 +}
139 +
140 +static inline int mb_test_bit(int bit, void *addr)
141 +{
142 +       mb_correct_addr_and_bit(bit,addr);
143 +       return test_bit(bit, addr);
144 +}
145 +
146 +static inline void mb_set_bit(int bit, void *addr)
147 +{
148 +       mb_correct_addr_and_bit(bit,addr);
149 +       set_bit(bit, addr);
150 +}
151 +
152 +static inline void mb_clear_bit(int bit, void *addr)
153 +{
154 +       mb_correct_addr_and_bit(bit,addr);
155 +       clear_bit(bit, addr);
156 +}
157 +
158 +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
159 +{
160 +       int i = 1;
161 +       void *bb;
162 +
163 +       J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
164 +       J_ASSERT(max != NULL);
165 +
166 +       if (order > e3b->bd_blkbits + 1)
167 +               return NULL;
168 +
169 +       /* at order 0 we see each particular block */
170 +       *max = 1 << (e3b->bd_blkbits + 3);
171 +       if (order == 0)
172 +               return e3b->bd_bitmap;
173 +
174 +       bb = e3b->bd_buddy;
175 +       *max = *max >> 1;
176 +       while (i < order) {
177 +               bb += 1 << (e3b->bd_blkbits - i);
178 +               i++;
179 +               *max = *max >> 1;
180 +       }
181 +       return bb;
182 +}
183 +
184 +static int ext3_mb_load_desc(struct super_block *sb, int group,
185 +                               struct ext3_buddy *e3b)
186 +{
187 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
188 +
189 +       J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
190 +       J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
191 +
192 +       /* load bitmap */
193 +       e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
194 +       if (e3b->bd_bh == NULL) {
195 +               ext3_error(sb, "ext3_mb_load_desc",
196 +                               "can't get block for buddy bitmap\n");
197 +               goto out;
198 +       }
199 +       if (!buffer_uptodate(e3b->bd_bh)) {
200 +               ll_rw_block(READ, 1, &e3b->bd_bh);
201 +               wait_on_buffer(e3b->bd_bh);
202 +       }
203 +       J_ASSERT(buffer_uptodate(e3b->bd_bh));
204 +
205 +       /* load buddy */
206 +       e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
207 +       if (e3b->bd_bh2 == NULL) {
208 +               ext3_error(sb, "ext3_mb_load_desc",
209 +                               "can't get block for buddy bitmap\n");
210 +               goto out;
211 +       }
212 +       if (!buffer_uptodate(e3b->bd_bh2)) {
213 +               ll_rw_block(READ, 1, &e3b->bd_bh2);
214 +               wait_on_buffer(e3b->bd_bh2);
215 +       }
216 +       J_ASSERT(buffer_uptodate(e3b->bd_bh2));
217 +
218 +       e3b->bd_bitmap = e3b->bd_bh->b_data;
219 +       e3b->bd_buddy = e3b->bd_bh2->b_data;
220 +       e3b->bd_blkbits = sb->s_blocksize_bits;
221 +       e3b->bd_bd = sbi->s_buddy_blocks + group;
222 +       e3b->bd_sb = sb;
223 +
224 +       return 0;
225 +out:
226 +       brelse(e3b->bd_bh);
227 +       brelse(e3b->bd_bh2);
228 +       e3b->bd_bh = NULL;
229 +       e3b->bd_bh2 = NULL;
230 +       return -EIO;
231 +}
232 +
233 +static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
234 +{
235 +       mark_buffer_dirty(e3b->bd_bh);
236 +       mark_buffer_dirty(e3b->bd_bh2);
237 +}
238 +
239 +static void ext3_mb_release_desc(struct ext3_buddy *e3b)
240 +{
241 +       brelse(e3b->bd_bh);
242 +       brelse(e3b->bd_bh2);
243 +}
244 +
245 +#ifdef AGGRESSIVE_CHECK
246 +static void mb_check_buddy(struct ext3_buddy *e3b)
247 +{
248 +       int order = e3b->bd_blkbits + 1;
249 +       int max, max2, i, j, k, count;
250 +       void *buddy, *buddy2;
251 +
252 +       if (!test_opt(e3b->bd_sb, MBALLOC))
253 +               return;
254 +
255 +       while (order > 1) {
256 +               buddy = mb_find_buddy(e3b, order, &max);
257 +               J_ASSERT(buddy);
258 +               buddy2 = mb_find_buddy(e3b, order - 1, &max2);
259 +               J_ASSERT(buddy2);
260 +               J_ASSERT(buddy != buddy2);
261 +               J_ASSERT(max * 2 == max2);
262 +
263 +               count = 0;
264 +               for (i = 0; i < max; i++) {
265 +
266 +                       if (!mb_test_bit(i, buddy)) {
267 +                               /* only single bit in buddy2 may be 1 */
268 +                               if (mb_test_bit(i << 1, buddy2))
269 +                                       J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
270 +                               else if (mb_test_bit((i << 1) + 1, buddy2))
271 +                                       J_ASSERT(!mb_test_bit(i << 1, buddy2));
272 +                               continue;
273 +                       }
274 +
275 +                       /* both bits in buddy2 must be 0 */
276 +                       J_ASSERT(!mb_test_bit(i << 1, buddy2));
277 +                       J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
278 +
279 +                       for (j = 0; j < (1 << order); j++) {
280 +                               k = (i * (1 << order)) + j;
281 +                               J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
282 +                       }
283 +                       count++;
284 +               }
285 +               J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
286 +               order--;
287 +       }
288 +
289 +       buddy = mb_find_buddy(e3b, 0, &max);
290 +       for (i = 0; i < max; i++) {
291 +               if (mb_test_bit(i, buddy))
292 +                       continue;
293 +               /* check used bits only */
294 +               for (j = 0; j < e3b->bd_blkbits + 1; j++) {
295 +                       buddy2 = mb_find_buddy(e3b, j, &max2);
296 +                       k = i >> j;
297 +                       J_ASSERT(k < max2);
298 +                       J_ASSERT(!mb_test_bit(k, buddy2));
299 +               }
300 +       }
301 +}
302 +#else
303 +#define mb_check_buddy(e3b)
304 +#endif
305 +
306 +static inline void
307 +ext3_lock_group(struct super_block *sb, int group)
308 +{
309 +       spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
310 +}
311 +
312 +static inline void
313 +ext3_unlock_group(struct super_block *sb, int group)
314 +{
315 +       spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
316 +}
317 +
318 +static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
319 +{
320 +       int order = 1;
321 +       void *bb;
322 +
323 +       J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
324 +       J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
325 +
326 +       bb = e3b->bd_buddy;
327 +       while (order <= e3b->bd_blkbits + 1) {
328 +               block = block >> 1;
329 +               if (mb_test_bit(block, bb)) {
330 +                       /* this block is part of buddy of order 'order' */
331 +                       return order;
332 +               }
333 +               bb += 1 << (e3b->bd_blkbits - order);
334 +               order++;
335 +       }
336 +       return 0;
337 +}
338 +
339 +static inline void mb_clear_bits(void *bm, int cur, int len)
340 +{
341 +       __u32 *addr;
342 +
343 +       len = cur + len;
344 +       while (cur < len) {
345 +               if ((cur & 31) == 0 && (len - cur) >= 32) {
346 +                       /* fast path: clear whole word at once */
347 +                       addr = bm + (cur >> 3);
348 +                       *addr = 0;
349 +                       cur += 32;
350 +                       continue;
351 +               }
352 +               mb_clear_bit(cur, bm);
353 +               cur++;
354 +       }
355 +}
356 +
357 +static inline void mb_set_bits(void *bm, int cur, int len)
358 +{
359 +       __u32 *addr;
360 +
361 +       len = cur + len;
362 +       while (cur < len) {
363 +               if ((cur & 31) == 0 && (len - cur) >= 32) {
364 +                       /* fast path: clear whole word at once */
365 +                       addr = bm + (cur >> 3);
366 +                       *addr = 0xffffffff;
367 +                       cur += 32;
368 +                       continue;
369 +               }
370 +               mb_set_bit(cur, bm);
371 +               cur++;
372 +       }
373 +}
374 +
375 +static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
376 +{
377 +       int block, max, order;
378 +       void *buddy, *buddy2;
379 +
380 +       mb_check_buddy(e3b);
381 +       while (count-- > 0) {
382 +               block = first++;
383 +               order = 0;
384 +
385 +               J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
386 +               mb_set_bit(block, e3b->bd_bitmap);
387 +               e3b->bd_bd->bb_counters[order]++;
388 +
389 +               /* start of the buddy */
390 +               buddy = mb_find_buddy(e3b, order, &max);
391 +
392 +               do {
393 +                       block &= ~1UL;
394 +                       if (!mb_test_bit(block, buddy) ||
395 +                                       !mb_test_bit(block + 1, buddy))
396 +                               break;
397 +
398 +                       /* both the buddies are free, try to coalesce them */
399 +                       buddy2 = mb_find_buddy(e3b, order + 1, &max);
400 +
401 +                       if (!buddy2)
402 +                               break;
403 +
404 +                       if (order > 0) {
405 +                               /* for special purposes, we don't clear
406 +                                * free bits in bitmap */
407 +                               mb_clear_bit(block, buddy);
408 +                               mb_clear_bit(block + 1, buddy);
409 +                       }
410 +                       e3b->bd_bd->bb_counters[order]--;
411 +                       e3b->bd_bd->bb_counters[order]--;
412 +
413 +                       block = block >> 1;
414 +                       order++;
415 +                       e3b->bd_bd->bb_counters[order]++;
416 +
417 +                       mb_set_bit(block, buddy2);
418 +                       buddy = buddy2;
419 +               } while (1);
420 +       }
421 +       mb_check_buddy(e3b);
422 +
423 +       return 0;
424 +}
425 +
426 +/*
427 + * returns 1 if out extent is enough to fill needed space
428 + */
429 +int mb_make_backward_extent(struct ext3_free_extent *in,
430 +                               struct ext3_free_extent *out, int needed)
431 +{
432 +       int i;
433 +
434 +       J_ASSERT(in);
435 +       J_ASSERT(out);
436 +       J_ASSERT(in->fe_nums < MB_ARR_SIZE);
437 +
438 +       out->fe_len = 0;
439 +       out->fe_start = in->fe_start + in->fe_len;
440 +       out->fe_nums = 0;
441 +
442 +       /* for single-chunk extent we need not back order
443 +        * also, if an extent doesn't fill needed space
444 +        * then it makes no sense to try back order becase
445 +        * if we select this extent then it'll be use as is */
446 +       if (in->fe_nums < 2 || in->fe_len < needed)
447 +               return 0;
448 +
449 +       i = in->fe_nums - 1;
450 +       while (i >= 0 && out->fe_len < needed) {
451 +               out->fe_len += (1 << in->fe_orders[i]);
452 +               out->fe_start -= (1 << in->fe_orders[i]);
453 +               i--;
454 +       }
455 +       /* FIXME: in some situation fe_orders may be too small to hold
456 +        * all the buddies */
457 +       J_ASSERT(out->fe_len >= needed);
458 +       
459 +       for (i++; i < in->fe_nums; i++)
460 +               out->fe_orders[out->fe_nums++] = in->fe_orders[i];
461 +       J_ASSERT(out->fe_nums < MB_ARR_SIZE);
462 +       out->fe_back = 1;
463 +
464 +       return 1;
465 +}
466 +
467 +int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
468 +                       int needed, struct ext3_free_extent *ex)
469 +{
470 +       int space = needed;
471 +       int next, max, ord;
472 +       void *buddy;
473 +
474 +       J_ASSERT(ex != NULL);
475 +
476 +       ex->fe_nums = 0;
477 +       ex->fe_len = 0;
478 +       
479 +       buddy = mb_find_buddy(e3b, order, &max);
480 +       J_ASSERT(buddy);
481 +       J_ASSERT(block < max);
482 +       if (!mb_test_bit(block, buddy))
483 +               goto nofree;
484 +
485 +       if (order == 0) {
486 +               /* find actual order */
487 +               order = mb_find_order_for_block(e3b, block);
488 +               block = block >> order;
489 +       }
490 +
491 +       ex->fe_orders[ex->fe_nums++] = order;
492 +       ex->fe_len = 1 << order;
493 +       ex->fe_start = block << order;
494 +       ex->fe_back = 0;
495 +
496 +       while ((space = space - (1 << order)) > 0) {
497 +
498 +               buddy = mb_find_buddy(e3b, order, &max);
499 +               J_ASSERT(buddy);
500 +
501 +               if (block + 1 >= max)
502 +                       break;
503 +
504 +               next = (block + 1) * (1 << order);
505 +               if (!mb_test_bit(next, e3b->bd_bitmap))
506 +                       break;
507 +
508 +               ord = mb_find_order_for_block(e3b, next);
509 +
510 +               if ((1 << ord) >= needed) {
511 +                       /* we dont want to coalesce with self-enough buddies */
512 +                       break;
513 +               }
514 +               order = ord;
515 +               block = next >> order;
516 +               ex->fe_len += 1 << order;
517 +
518 +               if (ex->fe_nums < MB_ARR_SIZE)
519 +                       ex->fe_orders[ex->fe_nums++] = order;
520 +       }
521 +
522 +nofree:
523 +       J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
524 +       return ex->fe_len;
525 +}
526 +
527 +static int mb_mark_used_backward(struct ext3_buddy *e3b,
528 +                                       struct ext3_free_extent *ex, int len)
529 +{
530 +       int start = ex->fe_start, len0 = len;
531 +       int ord, mlen, max, cur;
532 +       void *buddy;
533 +
534 +       start = ex->fe_start + ex->fe_len - 1;
535 +       while (len) {
536 +               ord = mb_find_order_for_block(e3b, start);
537 +               if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
538 +                               len >= (1 << ord)) {
539 +                       /* the whole chunk may be allocated at once! */
540 +                       mlen = 1 << ord;
541 +                       buddy = mb_find_buddy(e3b, ord, &max);
542 +                       J_ASSERT((start >> ord) < max);
543 +                       mb_clear_bit(start >> ord, buddy);
544 +                       e3b->bd_bd->bb_counters[ord]--;
545 +                       start -= mlen;
546 +                       len -= mlen;
547 +                       J_ASSERT(len >= 0);
548 +                       J_ASSERT(start >= 0);
549 +                       continue;
550 +               }
551 +
552 +               /* we have to split large buddy */
553 +               J_ASSERT(ord > 0);
554 +               buddy = mb_find_buddy(e3b, ord, &max);
555 +               mb_clear_bit(start >> ord, buddy);
556 +               e3b->bd_bd->bb_counters[ord]--;
557 +
558 +               ord--;
559 +               cur = (start >> ord) & ~1U;
560 +               buddy = mb_find_buddy(e3b, ord, &max);
561 +               mb_set_bit(cur, buddy);
562 +               mb_set_bit(cur + 1, buddy);
563 +               e3b->bd_bd->bb_counters[ord]++;
564 +               e3b->bd_bd->bb_counters[ord]++;
565 +       }
566 +
567 +       /* now drop all the bits in bitmap */
568 +       mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
569 +
570 +       mb_check_buddy(e3b);
571 +
572 +       return 0;
573 +}
574 +
575 +static int mb_mark_used_forward(struct ext3_buddy *e3b,
576 +                               struct ext3_free_extent *ex, int len)
577 +{
578 +       int start = ex->fe_start, len0 = len;
579 +       int ord, mlen, max, cur;
580 +       void *buddy;
581 +
582 +       while (len) {
583 +               ord = mb_find_order_for_block(e3b, start);
584 +
585 +               if (((start >> ord) << ord) == start && len >= (1 << ord)) {
586 +                       /* the whole chunk may be allocated at once! */
587 +                       mlen = 1 << ord;
588 +                       buddy = mb_find_buddy(e3b, ord, &max);
589 +                       J_ASSERT((start >> ord) < max);
590 +                       mb_clear_bit(start >> ord, buddy);
591 +                       e3b->bd_bd->bb_counters[ord]--;
592 +                       start += mlen;
593 +                       len -= mlen;
594 +                       J_ASSERT(len >= 0);
595 +                       continue;
596 +               }
597 +
598 +               /* we have to split large buddy */
599 +               J_ASSERT(ord > 0);
600 +               buddy = mb_find_buddy(e3b, ord, &max);
601 +               mb_clear_bit(start >> ord, buddy);
602 +               e3b->bd_bd->bb_counters[ord]--;
603 +
604 +               ord--;
605 +               cur = (start >> ord) & ~1U;
606 +               buddy = mb_find_buddy(e3b, ord, &max);
607 +               mb_set_bit(cur, buddy);
608 +               mb_set_bit(cur + 1, buddy);
609 +               e3b->bd_bd->bb_counters[ord]++;
610 +               e3b->bd_bd->bb_counters[ord]++;
611 +       }
612 +
613 +       /* now drop all the bits in bitmap */
614 +       mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
615 +
616 +       mb_check_buddy(e3b);
617 +
618 +       return 0;
619 +}
620 +
621 +int inline mb_mark_used(struct ext3_buddy *e3b,
622 +                       struct ext3_free_extent *ex, int len)
623 +{
624 +       int err;
625 +
626 +       J_ASSERT(ex);
627 +       if (ex->fe_back == 0)
628 +               err = mb_mark_used_forward(e3b, ex, len);
629 +       else
630 +               err = mb_mark_used_backward(e3b, ex, len);
631 +       return err;
632 +}
633 +
634 +int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
635 +                               struct ext3_buddy *e3b, int group)
636 +{
637 +       struct super_block *sb = ac->ac_sb;
638 +       int err, gorder, max, i;
639 +       struct ext3_free_extent curex;
640 +
641 +       /* let's know order of allocation */
642 +       gorder = 0;
643 +       while (ac->ac_g_len > (1 << gorder))
644 +               gorder++;
645 +
646 +       if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
647 +               /* someone asks for space at this specified block
648 +                * probably he wants to merge it into existing extent */
649 +               if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
650 +                       /* good. at least one block is free */
651 +                       max = mb_find_extent(e3b, 0, ac->ac_g_start,
652 +                                               ac->ac_g_len, &curex);
653 +                       max = min(curex.fe_len, ac->ac_g_len);
654 +                       mb_mark_used(e3b, &curex, max);
655 +                       
656 +                       ac->ac_b_group = group;
657 +                       ac->ac_b_start = curex.fe_start;
658 +                       ac->ac_b_len = max;
659 +                       ac->ac_status = AC_STATUS_FOUND;
660 +                       err = 0;
661 +                       goto out;
662 +               }
663 +               /* don't try to find goal anymore */
664 +               ac->ac_g_flags &= ~1;
665 +       }
666 +
667 +       i = 0;
668 +       while (1) {
669 +               i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
670 +               if (i >= sb->s_blocksize * 8)
671 +                       break;
672 +
673 +               max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
674 +               if (max >= ac->ac_g_len) {
675 +                       max = min(curex.fe_len, ac->ac_g_len);
676 +                       mb_mark_used(e3b, &curex, max);
677 +                       
678 +                       ac->ac_b_group = group;
679 +                       ac->ac_b_start = curex.fe_start;
680 +                       ac->ac_b_len = max;
681 +                       ac->ac_status = AC_STATUS_FOUND;
682 +                       break;
683 +               }
684 +               i += max;
685 +       }
686 +
687 +       return 0;
688 +
689 +out:
690 +       return err;
691 +}
692 +
693 +int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
694 +{
695 +       struct ext3_group_desc *gdp;
696 +       int free_blocks;
697 +
698 +       gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
699 +       if (!gdp)
700 +               return 0;
701 +       free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
702 +       if (free_blocks == 0)
703 +               return 0;
704 +
705 +       /* someone wants this block very much */
706 +       if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
707 +               return 1;
708 +
709 +       /* FIXME: I'd like to take fragmentation into account here */
710 +       if (cr == 0) {
711 +               if (free_blocks >= ac->ac_g_len >> 1)
712 +                       return 1;
713 +       } else if (cr == 1) {
714 +               if (free_blocks >= ac->ac_g_len >> 2)
715 +                       return 1;
716 +       } else if (cr == 2) {
717 +               return 1;
718 +       } else {
719 +               BUG();
720 +       }
721 +       return 0;
722 +}
723 +
724 +int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
725 +                       unsigned long goal, int *len, int flags, int *errp)
726 +{
727 +       struct buffer_head *bitmap_bh = NULL;
728 +       struct ext3_allocation_context ac;
729 +       int i, group, block, cr, err = 0;
730 +       struct ext3_group_desc *gdp;
731 +       struct ext3_super_block *es;
732 +       struct buffer_head *gdp_bh;
733 +       struct ext3_sb_info *sbi;
734 +       struct super_block *sb;
735 +       struct ext3_buddy e3b;
736 +
737 +       J_ASSERT(len != NULL);
738 +       J_ASSERT(*len > 0);
739 +
740 +       sb = inode->i_sb;
741 +       if (!sb) {
742 +               printk("ext3_mb_new_nblocks: nonexistent device");
743 +               return 0;
744 +       }
745 +
746 +       if (!test_opt(sb, MBALLOC)) {
747 +               static int ext3_mballoc_warning = 0;
748 +               if (ext3_mballoc_warning == 0) {
749 +                       printk(KERN_ERR "EXT3-fs: multiblock request with "
750 +                               "mballoc disabled!\n");
751 +                       ext3_mballoc_warning++;
752 +               }
753 +               *len = 1;
754 +               err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp);
755 +               return err;
756 +       }
757 +
758 +       ext3_mb_poll_new_transaction(sb, handle);
759 +
760 +       sbi = EXT3_SB(sb);
761 +       es = EXT3_SB(sb)->s_es;
762 +
763 +       if (!(flags & 2)) {
764 +               /* someone asks for non-reserved blocks */
765 +               BUG_ON(*len > 1);
766 +               err = ext3_mb_reserve_blocks(sb, 1);
767 +               if (err) {
768 +                       *errp = err;
769 +                       return 0;
770 +               }
771 +       }
772 +
773 +       /*
774 +        * Check quota for allocation of this blocks.
775 +        */
776 +       while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
777 +               *len -= 1;
778 +       if (*len == 0) {
779 +               *errp = -EDQUOT;
780 +               block = 0;
781 +               goto out;
782 +       }
783 +
784 +       /* start searching from the goal */
785 +       if (goal < le32_to_cpu(es->s_first_data_block) ||
786 +           goal >= le32_to_cpu(es->s_blocks_count))
787 +               goal = le32_to_cpu(es->s_first_data_block);
788 +       group = (goal - le32_to_cpu(es->s_first_data_block)) /
789 +                       EXT3_BLOCKS_PER_GROUP(sb);
790 +       block = ((goal - le32_to_cpu(es->s_first_data_block)) %
791 +                       EXT3_BLOCKS_PER_GROUP(sb));
792 +
793 +       /* set up allocation goals */
794 +       ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
795 +       ac.ac_status = 0;
796 +       ac.ac_groups_scanned = 0;
797 +       ac.ac_sb = inode->i_sb;
798 +       ac.ac_g_group = group;
799 +       ac.ac_g_start = block;
800 +       ac.ac_g_len = *len;
801 +       ac.ac_g_flags = flags;
802 +
803 +       /* loop over the groups */
804 +       for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
805 +               for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
806 +                       if (group == EXT3_SB(sb)->s_groups_count)
807 +                               group = 0;
808 +
809 +                       /* check is group good for our criteries */
810 +                       if (!mb_good_group(&ac, group, cr))
811 +                               continue;
812 +
813 +                       err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
814 +                       if (err)
815 +                               goto out_err;
816 +
817 +                       ext3_lock_group(sb, group);
818 +                       if (!mb_good_group(&ac, group, cr)) {
819 +                               /* someone did allocation from this group */
820 +                               ext3_unlock_group(sb, group);
821 +                               ext3_mb_release_desc(&e3b);
822 +                               continue;
823 +                       }
824 +
825 +                       err = ext3_mb_new_in_group(&ac, &e3b, group);
826 +                       ext3_unlock_group(sb, group);
827 +                       if (ac.ac_status == AC_STATUS_FOUND)
828 +                               ext3_mb_dirty_buddy(&e3b);
829 +                       ext3_mb_release_desc(&e3b);
830 +                       if (err)
831 +                               goto out_err;
832 +                       if (ac.ac_status == AC_STATUS_FOUND)
833 +                               break;
834 +               }
835 +       }
836 +
837 +       if (ac.ac_status != AC_STATUS_FOUND) {
838 +               /* unfortunately, we can't satisfy this request */
839 +               J_ASSERT(ac.ac_b_len == 0);
840 +               DQUOT_FREE_BLOCK(inode, *len);
841 +               *errp = -ENOSPC;
842 +               block = 0;
843 +               goto out;
844 +       }
845 +
846 +       /* good news - free block(s) have been found. now it's time
847 +        * to mark block(s) in good old journaled bitmap */
848 +       block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
849 +                       + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
850 +
851 +       /* we made a desicion, now mark found blocks in good old
852 +        * bitmap to be journaled */
853 +
854 +       ext3_debug("using block group %d(%d)\n",
855 +                       ac.ac_b_group.group, gdp->bg_free_blocks_count);
856 +
857 +       bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
858 +       if (!bitmap_bh) {
859 +               *errp = -EIO;
860 +               goto out_err;
861 +       }
862 +
863 +       err = ext3_journal_get_write_access(handle, bitmap_bh);
864 +       if (err) {
865 +               *errp = err;
866 +               goto out_err;
867 +       }
868 +
869 +       gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
870 +       if (!gdp) {
871 +               *errp = -EIO;
872 +               goto out_err;
873 +       }
874 +       
875 +       err = ext3_journal_get_write_access(handle, gdp_bh);
876 +       if (err)
877 +               goto out_err;
878 +
879 +       block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
880 +                               + le32_to_cpu(es->s_first_data_block);
881 +
882 +       if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
883 +           block == le32_to_cpu(gdp->bg_inode_bitmap) ||
884 +           in_range(block, le32_to_cpu(gdp->bg_inode_table),
885 +                     EXT3_SB(sb)->s_itb_per_group))
886 +               ext3_error(sb, "ext3_new_block",
887 +                           "Allocating block in system zone - "
888 +                           "block = %u", block);
889 +#if 0
890 +       for (i = 0; i < ac.ac_b_len; i++)
891 +               J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
892 +#endif
893 +       mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
894 +
895 +       ext3_lock_group(sb, ac.ac_b_group);
896 +       gdp->bg_free_blocks_count =
897 +                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 
898 +                                       ac.ac_b_len);
899 +       ext3_unlock_group(sb, ac.ac_b_group);
900 +       percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
901 +
902 +       err = ext3_journal_dirty_metadata(handle, bitmap_bh);
903 +       if (err)
904 +               goto out_err;
905 +       err = ext3_journal_dirty_metadata(handle, gdp_bh);
906 +       if (err)
907 +               goto out_err;
908 +
909 +       sb->s_dirt = 1;
910 +       *errp = 0;
911 +       brelse(bitmap_bh);
912 +
913 +       /* drop non-allocated, but dquote'd blocks */
914 +       J_ASSERT(*len >= ac.ac_b_len);
915 +       DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
916 +
917 +       *len = ac.ac_b_len;
918 +       J_ASSERT(block != 0);
919 +       goto out;
920 +
921 +out_err:
922 +       /* if we've already allocated something, roll it back */
923 +       if (ac.ac_status == AC_STATUS_FOUND) {
924 +               /* FIXME: free blocks here */
925 +       }
926 +
927 +       DQUOT_FREE_BLOCK(inode, *len);
928 +       brelse(bitmap_bh);
929 +       *errp = err;
930 +       block = 0;
931 +out:
932 +       if (!(flags & 2)) {
933 +               /* block wasn't reserved before and we reserved it
934 +                * at the beginning of allocation. it doesn't matter
935 +                * whether we allocated anything or we failed: time
936 +                * to release reservation. NOTE: because I expect
937 +                * any multiblock request from delayed allocation
938 +                * path only, here is single block always */
939 +               ext3_mb_release_blocks(sb, 1);
940 +       }
941 +       return block;
942 +}
943 +
944 +int ext3_mb_generate_buddy(struct super_block *sb, int group)
945 +{
946 +       struct buffer_head *bh;
947 +       int i, err, count = 0;
948 +       struct ext3_buddy e3b;
949 +       
950 +       err = ext3_mb_load_desc(sb, group, &e3b);
951 +       if (err)
952 +               goto out;
953 +       memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
954 +       memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
955 +
956 +       bh = read_block_bitmap(sb, group);
957 +       if (bh == NULL) {
958 +               err = -EIO; 
959 +               goto out2;
960 +       }
961 +
962 +       /* loop over the blocks, nad create buddies for free ones */
963 +       for (i = 0; i < sb->s_blocksize * 8; i++) {
964 +               if (!mb_test_bit(i, (void *) bh->b_data)) {
965 +                       mb_free_blocks(&e3b, i, 1);
966 +                       count++;
967 +               }
968 +       }
969 +       brelse(bh);
970 +       mb_check_buddy(&e3b);
971 +       ext3_mb_dirty_buddy(&e3b);
972 +
973 +out2:
974 +       ext3_mb_release_desc(&e3b);
975 +out:
976 +       return err;
977 +}
978 +
979 +EXPORT_SYMBOL(ext3_mb_new_blocks);
980 +
981 +#define MB_CREDITS     \
982 +       (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS +   \
983 +               2 * EXT3_QUOTA_INIT_BLOCKS)
984 +
985 +int ext3_mb_init_backend(struct super_block *sb)
986 +{
987 +       struct inode *root = sb->s_root->d_inode;
988 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
989 +       struct dentry *db;
990 +       tid_t target;
991 +       int err, i;
992 +
993 +       sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
994 +                                       sbi->s_groups_count, GFP_KERNEL);
995 +       if (sbi->s_buddy_blocks == NULL) {
996 +               printk("can't allocate mem for buddy maps\n");
997 +               return -ENOMEM;
998 +       }
999 +       memset(sbi->s_buddy_blocks, 0,
1000 +               sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
1001 +       sbi->s_buddy = NULL;
1002 +
1003 +       down(&root->i_sem);
1004 +       db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
1005 +                               strlen(EXT3_BUDDY_FILE));
1006 +       if (IS_ERR(db)) {
1007 +               err = PTR_ERR(db);
1008 +               printk("can't lookup buddy file: %d\n", err);
1009 +               goto out;
1010 +       }
1011 +
1012 +       if (db->d_inode != NULL) {
1013 +               sbi->s_buddy = igrab(db->d_inode);
1014 +               goto map;
1015 +       }
1016 +
1017 +       err = ext3_create(root, db, S_IFREG, NULL);
1018 +       if (err) {
1019 +               printk("error while creation buddy file: %d\n", err);
1020 +       } else {
1021 +               sbi->s_buddy = igrab(db->d_inode);
1022 +       }
1023 +
1024 +map:
1025 +       for (i = 0; i < sbi->s_groups_count; i++) {
1026 +               struct buffer_head *bh = NULL;
1027 +               handle_t *handle;
1028 +
1029 +               handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
1030 +               if (IS_ERR(handle)) {
1031 +                       err = PTR_ERR(handle);
1032 +                       goto out2;
1033 +               }
1034 +               
1035 +               /* allocate block for bitmap */
1036 +               bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
1037 +               if (bh == NULL) {
1038 +                       printk("can't get block for buddy bitmap: %d\n", err);
1039 +                       goto out2;
1040 +               }
1041 +               sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
1042 +               brelse(bh);
1043 +
1044 +               /* allocate block for buddy */
1045 +               bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
1046 +               if (bh == NULL) {
1047 +                       printk("can't get block for buddy: %d\n", err);
1048 +                       goto out2;
1049 +               }
1050 +               sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
1051 +               brelse(bh);
1052 +               ext3_journal_stop(handle);
1053 +               spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
1054 +               sbi->s_buddy_blocks[i].bb_md_cur = NULL;
1055 +               sbi->s_buddy_blocks[i].bb_tid = 0;
1056 +       }
1057 +
1058 +       if (journal_start_commit(sbi->s_journal, &target))
1059 +               log_wait_commit(sbi->s_journal, target);
1060 +
1061 +out2:
1062 +       dput(db);
1063 +out:
1064 +       up(&root->i_sem);
1065 +       return err;
1066 +}
1067 +
1068 +int ext3_mb_release(struct super_block *sb)
1069 +{
1070 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1071 +       
1072 +       if (!test_opt(sb, MBALLOC))
1073 +               return 0;
1074 +
1075 +       /* release freed, non-committed blocks */
1076 +       spin_lock(&sbi->s_md_lock);
1077 +       list_splice_init(&sbi->s_closed_transaction,
1078 +                       &sbi->s_committed_transaction);
1079 +       list_splice_init(&sbi->s_active_transaction,
1080 +                       &sbi->s_committed_transaction);
1081 +       spin_unlock(&sbi->s_md_lock);
1082 +       ext3_mb_free_committed_blocks(sb);
1083 +
1084 +       if (sbi->s_buddy_blocks)
1085 +               kfree(sbi->s_buddy_blocks);
1086 +       if (sbi->s_buddy)
1087 +               iput(sbi->s_buddy);
1088 +       if (sbi->s_blocks_reserved)
1089 +               printk("ext3-fs: %ld blocks being reserved at umount!\n",
1090 +                               sbi->s_blocks_reserved);
1091 +       return 0;
1092 +}
1093 +
1094 +int ext3_mb_init(struct super_block *sb)
1095 +{
1096 +       struct ext3_super_block *es;
1097 +       int i;
1098 +
1099 +       if (!test_opt(sb, MBALLOC))
1100 +               return 0;
1101 +
1102 +       /* init file for buddy data */
1103 +       clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1104 +       ext3_mb_init_backend(sb);
1105 +
1106 +       es = EXT3_SB(sb)->s_es;
1107 +       for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
1108 +               ext3_mb_generate_buddy(sb, i);
1109 +       spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
1110 +       spin_lock_init(&EXT3_SB(sb)->s_md_lock);
1111 +       INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
1112 +       INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
1113 +       INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
1114 +       set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1115 +       printk("EXT3-fs: mballoc enabled\n");
1116 +       return 0;
1117 +}
1118 +
1119 +void ext3_mb_free_committed_blocks(struct super_block *sb)
1120 +{
1121 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1122 +       int err, i, count = 0, count2 = 0;
1123 +       struct ext3_free_metadata *md;
1124 +       struct ext3_buddy e3b;
1125 +
1126 +       if (list_empty(&sbi->s_committed_transaction))
1127 +               return;
1128 +
1129 +       /* there is committed blocks to be freed yet */
1130 +       do {
1131 +               /* get next array of blocks */
1132 +               md = NULL;
1133 +               spin_lock(&sbi->s_md_lock);
1134 +               if (!list_empty(&sbi->s_committed_transaction)) {
1135 +                       md = list_entry(sbi->s_committed_transaction.next,
1136 +                                       struct ext3_free_metadata, list);
1137 +                       list_del(&md->list);
1138 +               }
1139 +               spin_unlock(&sbi->s_md_lock);
1140 +
1141 +               if (md == NULL)
1142 +                       break;
1143 +
1144 +               mb_debug("gonna free %u blocks in group %u (0x%p):",
1145 +                               md->num, md->group, md);
1146 +
1147 +               err = ext3_mb_load_desc(sb, md->group, &e3b);
1148 +               BUG_ON(err != 0);
1149 +
1150 +               /* there are blocks to put in buddy to make them really free */
1151 +               count += md->num;
1152 +               count2++;
1153 +               ext3_lock_group(sb, md->group);
1154 +               for (i = 0; i < md->num; i++) {
1155 +                       mb_debug(" %u", md->blocks[i]);
1156 +                       mb_free_blocks(&e3b, md->blocks[i], 1);
1157 +               }
1158 +               mb_debug("\n");
1159 +               ext3_unlock_group(sb, md->group);
1160 +
1161 +               kfree(md);
1162 +               ext3_mb_dirty_buddy(&e3b);
1163 +               ext3_mb_release_desc(&e3b);
1164 +
1165 +       } while (md);
1166 +       mb_debug("freed %u blocks in %u structures\n", count, count2);
1167 +}
1168 +
1169 +void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
1170 +{
1171 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1172 +
1173 +       if (sbi->s_last_transaction == handle->h_transaction->t_tid)
1174 +               return;
1175 +
1176 +       /* new transaction! time to close last one and free blocks for
1177 +        * committed transaction. we know that only transaction can be
1178 +        * active, so previos transaction can be being logged and we
1179 +        * know that transaction before previous is known to be alreade
1180 +        * logged. this means that now we may free blocks freed in all
1181 +        * transactions before previous one. hope I'm clear enough ... */
1182 +
1183 +       spin_lock(&sbi->s_md_lock);
1184 +       if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
1185 +               mb_debug("new transaction %lu, old %lu\n",
1186 +                               (unsigned long) handle->h_transaction->t_tid,
1187 +                               (unsigned long) sbi->s_last_transaction);
1188 +               list_splice_init(&sbi->s_closed_transaction,
1189 +                                       &sbi->s_committed_transaction);
1190 +               list_splice_init(&sbi->s_active_transaction,
1191 +                                       &sbi->s_closed_transaction);
1192 +               sbi->s_last_transaction = handle->h_transaction->t_tid;
1193 +       }
1194 +       spin_unlock(&sbi->s_md_lock);
1195 +
1196 +       ext3_mb_free_committed_blocks(sb);
1197 +}
1198 +
1199 +int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
1200 +                               int group, int block, int count)
1201 +{
1202 +       struct ext3_buddy_group_blocks *db = e3b->bd_bd;
1203 +       struct super_block *sb = e3b->bd_sb;
1204 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1205 +       struct ext3_free_metadata *md;
1206 +       int i;
1207 +
1208 +       ext3_lock_group(sb, group);
1209 +       for (i = 0; i < count; i++) {
1210 +               md = db->bb_md_cur;
1211 +               if (md && db->bb_tid != handle->h_transaction->t_tid) {
1212 +                       db->bb_md_cur = NULL;
1213 +                       md = NULL;
1214 +               }
1215 +
1216 +               if (md == NULL) {
1217 +                       ext3_unlock_group(sb, group);
1218 +                       md = kmalloc(sizeof(*md), GFP_KERNEL);
1219 +                       if (md == NULL)
1220 +                               return -ENOMEM;
1221 +                       md->num = 0;
1222 +                       md->group = group;
1223 +
1224 +                       ext3_lock_group(sb, group);
1225 +                       if (db->bb_md_cur == NULL) {
1226 +                               spin_lock(&sbi->s_md_lock);
1227 +                               list_add(&md->list, &sbi->s_active_transaction);
1228 +                               spin_unlock(&sbi->s_md_lock);
1229 +                               db->bb_md_cur = md;
1230 +                               db->bb_tid = handle->h_transaction->t_tid;
1231 +                               mb_debug("new md 0x%p for group %u\n",
1232 +                                                       md, md->group);
1233 +                       } else {
1234 +                               kfree(md);
1235 +                               md = db->bb_md_cur;
1236 +                       }
1237 +               }
1238 +
1239 +               BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
1240 +               md->blocks[md->num] = block + i;
1241 +               md->num++;
1242 +               if (md->num == EXT3_BB_MAX_BLOCKS) {
1243 +                       /* no more space, put full container on a sb's list */
1244 +                       db->bb_md_cur = NULL;
1245 +               }
1246 +       }
1247 +       ext3_unlock_group(sb, group);
1248 +       return 0;
1249 +}
1250 +
1251 +void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
1252 +                       unsigned long block, unsigned long count, int metadata)
1253 +{
1254 +       struct buffer_head *bitmap_bh = NULL;
1255 +       struct ext3_group_desc *gdp;
1256 +       struct ext3_super_block *es;
1257 +       unsigned long bit, overflow;
1258 +       struct buffer_head *gd_bh;
1259 +       unsigned long block_group;
1260 +       struct ext3_sb_info *sbi;
1261 +       struct super_block *sb;
1262 +       struct ext3_buddy e3b;
1263 +       int err = 0, ret;
1264 +
1265 +       sb = inode->i_sb;
1266 +       if (!sb) {
1267 +               printk ("ext3_free_blocks: nonexistent device");
1268 +               return;
1269 +       }
1270 +
1271 +       ext3_mb_poll_new_transaction(sb, handle);
1272 +
1273 +       sbi = EXT3_SB(sb);
1274 +       es = EXT3_SB(sb)->s_es;
1275 +       if (block < le32_to_cpu(es->s_first_data_block) ||
1276 +           block + count < block ||
1277 +           block + count > le32_to_cpu(es->s_blocks_count)) {
1278 +               ext3_error (sb, "ext3_free_blocks",
1279 +                           "Freeing blocks not in datazone - "
1280 +                           "block = %lu, count = %lu", block, count);
1281 +               goto error_return;
1282 +       }
1283 +
1284 +       ext3_debug("freeing block %lu\n", block);
1285 +
1286 +do_more:
1287 +       overflow = 0;
1288 +       block_group = (block - le32_to_cpu(es->s_first_data_block)) /
1289 +                     EXT3_BLOCKS_PER_GROUP(sb);
1290 +       bit = (block - le32_to_cpu(es->s_first_data_block)) %
1291 +                     EXT3_BLOCKS_PER_GROUP(sb);
1292 +       /*
1293 +        * Check to see if we are freeing blocks across a group
1294 +        * boundary.
1295 +        */
1296 +       if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
1297 +               overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
1298 +               count -= overflow;
1299 +       }
1300 +       brelse(bitmap_bh);
1301 +       bitmap_bh = read_block_bitmap(sb, block_group);
1302 +       if (!bitmap_bh)
1303 +               goto error_return;
1304 +       gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
1305 +       if (!gdp)
1306 +               goto error_return;
1307 +
1308 +       if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
1309 +           in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
1310 +           in_range (block, le32_to_cpu(gdp->bg_inode_table),
1311 +                     EXT3_SB(sb)->s_itb_per_group) ||
1312 +           in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
1313 +                     EXT3_SB(sb)->s_itb_per_group))
1314 +               ext3_error (sb, "ext3_free_blocks",
1315 +                           "Freeing blocks in system zones - "
1316 +                           "Block = %lu, count = %lu",
1317 +                           block, count);
1318 +
1319 +       BUFFER_TRACE(bitmap_bh, "getting write access");
1320 +       err = ext3_journal_get_write_access(handle, bitmap_bh);
1321 +       if (err)
1322 +               goto error_return;
1323 +
1324 +       /*
1325 +        * We are about to modify some metadata.  Call the journal APIs
1326 +        * to unshare ->b_data if a currently-committing transaction is
1327 +        * using it
1328 +        */
1329 +       BUFFER_TRACE(gd_bh, "get_write_access");
1330 +       err = ext3_journal_get_write_access(handle, gd_bh);
1331 +       if (err)
1332 +               goto error_return;
1333 +
1334 +       err = ext3_mb_load_desc(sb, block_group, &e3b);
1335 +       if (err)
1336 +               goto error_return;
1337 +
1338 +       if (metadata) {
1339 +               /* blocks being freed are metadata. these blocks shouldn't
1340 +                * be used until this transaction is committed */
1341 +               ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
1342 +       } else { 
1343 +               ext3_lock_group(sb, block_group);
1344 +               mb_free_blocks(&e3b, bit, count);
1345 +               gdp->bg_free_blocks_count =
1346 +                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
1347 +               ext3_unlock_group(sb, block_group);
1348 +               percpu_counter_mod(&sbi->s_freeblocks_counter, count);
1349 +       }
1350 +       
1351 +       ext3_mb_dirty_buddy(&e3b);
1352 +       ext3_mb_release_desc(&e3b);
1353 +
1354 +       /* FIXME: undo logic will be implemented later and another way */
1355 +       mb_clear_bits(bitmap_bh->b_data, bit, count);
1356 +       DQUOT_FREE_BLOCK(inode, count);
1357 +
1358 +       /* We dirtied the bitmap block */
1359 +       BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
1360 +       err = ext3_journal_dirty_metadata(handle, bitmap_bh);
1361 +
1362 +       /* And the group descriptor block */
1363 +       BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
1364 +       ret = ext3_journal_dirty_metadata(handle, gd_bh);
1365 +       if (!err) err = ret;
1366 +
1367 +       if (overflow && !err) {
1368 +               block += count;
1369 +               count = overflow;
1370 +               goto do_more;
1371 +       }
1372 +       sb->s_dirt = 1;
1373 +error_return:
1374 +       brelse(bitmap_bh);
1375 +       ext3_std_error(sb, err);
1376 +       return;
1377 +}
1378 +
1379 +int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
1380 +{
1381 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1382 +       int free, ret = -ENOSPC;
1383 +
1384 +       BUG_ON(blocks < 0);
1385 +       spin_lock(&sbi->s_reserve_lock);
1386 +       free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1387 +       if (blocks <= free - sbi->s_blocks_reserved) {
1388 +               sbi->s_blocks_reserved += blocks;
1389 +               ret = 0;
1390 +       }
1391 +       spin_unlock(&sbi->s_reserve_lock);
1392 +       return ret;
1393 +}
1394 +
1395 +void ext3_mb_release_blocks(struct super_block *sb, int blocks)
1396 +{
1397 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1398 +
1399 +       BUG_ON(blocks < 0);
1400 +       spin_lock(&sbi->s_reserve_lock);
1401 +       sbi->s_blocks_reserved -= blocks;
1402 +       WARN_ON(sbi->s_blocks_reserved < 0);
1403 +       if (sbi->s_blocks_reserved < 0)
1404 +               sbi->s_blocks_reserved = 0;
1405 +       spin_unlock(&sbi->s_reserve_lock);
1406 +}
1407 +
1408 +int ext3_new_block(handle_t *handle, struct inode *inode,
1409 +                       unsigned long goal, u32 *pc, u32 *pb, int *errp)
1410 +{
1411 +       int ret, len;
1412 +
1413 +       if (!test_opt(inode->i_sb, MBALLOC)) {
1414 +               ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp);
1415 +               goto out;
1416 +       }
1417 +       len = 1;
1418 +       ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
1419 +out:
1420 +       return ret;
1421 +}
1422 +
1423 +
1424 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
1425 +                       unsigned long block, unsigned long count, int metadata)
1426 +{
1427 +       if (!test_opt(inode->i_sb, MBALLOC))
1428 +               ext3_free_blocks_old(handle, inode, block, count);
1429 +       else
1430 +               ext3_mb_free_blocks(handle, inode, block, count, metadata);
1431 +       return;
1432 +}
1433 +
1434 Index: linux-2.6.7/fs/ext3/super.c
1435 ===================================================================
1436 --- linux-2.6.7.orig/fs/ext3/super.c    2004-09-03 08:46:59.000000000 +0400
1437 +++ linux-2.6.7/fs/ext3/super.c 2004-09-03 08:46:59.000000000 +0400
1438 @@ -392,6 +392,7 @@
1439         struct ext3_super_block *es = sbi->s_es;
1440         int i;
1441  
1442 +       ext3_mb_release(sb);
1443         ext3_ext_release(sb);
1444         ext3_xattr_put_super(sb);
1445         journal_destroy(sbi->s_journal);
1446 @@ -594,7 +595,7 @@
1447         Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1448         Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
1449         Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
1450 -       Opt_ignore, Opt_err, Opt_extents, Opt_extdebug
1451 +       Opt_ignore, Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
1452  };
1453  
1454  static match_table_t tokens = {
1455 @@ -644,6 +645,7 @@
1456         {Opt_iopen_nopriv,  "iopen_nopriv"},
1457         {Opt_extents, "extents"},
1458         {Opt_extdebug, "extdebug"},
1459 +       {Opt_mballoc, "mballoc"},
1460         {Opt_err, NULL}
1461  };
1462  
1463 @@ -929,6 +931,9 @@
1464                 case Opt_extdebug:
1465                         set_opt (sbi->s_mount_opt, EXTDEBUG);
1466                         break;
1467 +               case Opt_mballoc:
1468 +                       set_opt (sbi->s_mount_opt, MBALLOC);
1469 +                       break;
1470                 default:
1471                         printk (KERN_ERR
1472                                 "EXT3-fs: Unrecognized mount option \"%s\" "
1473 @@ -1602,7 +1607,8 @@
1474                 ext3_count_dirs(sb));
1475  
1476         ext3_ext_init(sb);
1477
1478 +       ext3_mb_init(sb);
1479 +
1480         return 0;
1481  
1482  failed_mount3:
1483 Index: linux-2.6.7/fs/ext3/Makefile
1484 ===================================================================
1485 --- linux-2.6.7.orig/fs/ext3/Makefile   2004-09-03 08:46:59.000000000 +0400
1486 +++ linux-2.6.7/fs/ext3/Makefile        2004-09-03 08:46:59.000000000 +0400
1487 @@ -5,7 +5,7 @@
1488  obj-$(CONFIG_EXT3_FS) += ext3.o
1489  
1490  ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
1491 -          ioctl.o namei.o super.o symlink.o hash.o extents.o
1492 +          ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
1493  
1494  ext3-$(CONFIG_EXT3_FS_XATTR)    += xattr.o xattr_user.o xattr_trusted.o
1495  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
1496 Index: linux-2.6.7/fs/ext3/balloc.c
1497 ===================================================================
1498 --- linux-2.6.7.orig/fs/ext3/balloc.c   2004-08-26 17:11:16.000000000 +0400
1499 +++ linux-2.6.7/fs/ext3/balloc.c        2004-09-03 08:46:59.000000000 +0400
1500 @@ -78,7 +78,7 @@
1501   *
1502   * Return buffer_head on success or NULL in case of failure.
1503   */
1504 -static struct buffer_head *
1505 +struct buffer_head *
1506  read_block_bitmap(struct super_block *sb, unsigned int block_group)
1507  {
1508         struct ext3_group_desc * desc;
1509 @@ -98,8 +98,8 @@
1510  }
1511  
1512  /* Free given blocks, update quota and i_blocks field */
1513 -void ext3_free_blocks (handle_t *handle, struct inode * inode,
1514 -                       unsigned long block, unsigned long count)
1515 +void ext3_free_blocks_old (handle_t *handle, struct inode * inode,
1516 +                               unsigned long block, unsigned long count)
1517  {
1518         struct buffer_head *bitmap_bh = NULL;
1519         struct buffer_head *gd_bh;
1520 @@ -474,8 +474,8 @@
1521   * This function also updates quota and i_blocks field.
1522   */
1523  int
1524 -ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal,
1525 -               u32 *prealloc_count, u32 *prealloc_block, int *errp)
1526 +ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal,
1527 +                       u32 *prealloc_count, u32 *prealloc_block, int *errp)
1528  {
1529         struct buffer_head *bitmap_bh = NULL;   /* bh */
1530         struct buffer_head *gdp_bh;             /* bh2 */
1531 Index: linux-2.6.7/fs/ext3/namei.c
1532 ===================================================================
1533 --- linux-2.6.7.orig/fs/ext3/namei.c    2004-09-03 08:46:59.000000000 +0400
1534 +++ linux-2.6.7/fs/ext3/namei.c 2004-09-03 08:46:59.000000000 +0400
1535 @@ -1640,7 +1640,7 @@
1536   * If the create succeeds, we fill in the inode information
1537   * with d_instantiate(). 
1538   */
1539 -static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1540 +int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1541                 struct nameidata *nd)
1542  {
1543         handle_t *handle; 
1544 Index: linux-2.6.7/fs/ext3/inode.c
1545 ===================================================================
1546 --- linux-2.6.7.orig/fs/ext3/inode.c    2004-09-03 08:46:59.000000000 +0400
1547 +++ linux-2.6.7/fs/ext3/inode.c 2004-09-03 08:46:59.000000000 +0400
1548 @@ -254,7 +254,7 @@
1549                 ei->i_prealloc_count = 0;
1550                 ei->i_prealloc_block = 0;
1551                 /* Writer: end */
1552 -               ext3_free_blocks (inode, block, total);
1553 +               ext3_free_blocks (inode, block, total, 1);
1554         }
1555  #endif
1556  }
1557 @@ -633,7 +633,7 @@
1558                 ext3_journal_forget(handle, branch[i].bh);
1559         }
1560         for (i = 0; i < keys; i++)
1561 -               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
1562 +               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
1563         return err;
1564  }
1565  
1566 @@ -734,7 +734,7 @@
1567         if (err == -EAGAIN)
1568                 for (i = 0; i < num; i++)
1569                         ext3_free_blocks(handle, inode, 
1570 -                                        le32_to_cpu(where[i].key), 1);
1571 +                                        le32_to_cpu(where[i].key), 1, 1);
1572         return err;
1573  }
1574  
1575 @@ -1911,7 +1911,7 @@
1576                 }
1577         }
1578  
1579 -       ext3_free_blocks(handle, inode, block_to_free, count);
1580 +       ext3_free_blocks(handle, inode, block_to_free, count, 1);
1581  }
1582  
1583  /**
1584 @@ -2082,7 +2082,7 @@
1585                                 ext3_journal_test_restart(handle, inode);
1586                         }
1587  
1588 -                       ext3_free_blocks(handle, inode, nr, 1);
1589 +                       ext3_free_blocks(handle, inode, nr, 1, 1);
1590  
1591                         if (parent_bh) {
1592                                 /*
1593 Index: linux-2.6.7/fs/ext3/extents.c
1594 ===================================================================
1595 --- linux-2.6.7.orig/fs/ext3/extents.c  2004-09-03 08:46:59.000000000 +0400
1596 +++ linux-2.6.7/fs/ext3/extents.c       2004-09-03 08:46:59.000000000 +0400
1597 @@ -740,7 +740,7 @@
1598                 for (i = 0; i < depth; i++) {
1599                         if (!ablocks[i])
1600                                 continue;
1601 -                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
1602 +                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
1603                 }
1604         }
1605         kfree(ablocks);
1606 @@ -1388,7 +1388,7 @@
1607                         path->p_idx->ei_leaf);
1608         bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
1609         ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
1610 -       ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
1611 +       ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
1612         return err;
1613  }
1614  
1615 @@ -1876,10 +1876,12 @@
1616         int needed = ext3_remove_blocks_credits(tree, ex, from, to);
1617         handle_t *handle = ext3_journal_start(tree->inode, needed);
1618         struct buffer_head *bh;
1619 -       int i;
1620 +       int i, metadata = 0;
1621  
1622         if (IS_ERR(handle))
1623                 return PTR_ERR(handle);
1624 +       if (S_ISDIR(tree->inode->i_mode))
1625 +               metadata = 1;
1626         if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
1627                 /* tail removal */
1628                 unsigned long num, start;
1629 @@ -1891,7 +1893,7 @@
1630                         bh = sb_find_get_block(tree->inode->i_sb, start + i);
1631                         ext3_forget(handle, 0, tree->inode, bh, start + i);
1632                 }
1633 -               ext3_free_blocks(handle, tree->inode, start, num);
1634 +               ext3_free_blocks(handle, tree->inode, start, num, metadata);
1635         } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
1636                 printk("strange request: removal %lu-%lu from %u:%u\n",
1637                         from, to, ex->ee_block, ex->ee_len);
1638 Index: linux-2.6.7/fs/ext3/xattr.c
1639 ===================================================================
1640 --- linux-2.6.7.orig/fs/ext3/xattr.c    2004-09-03 08:46:59.000000000 +0400
1641 +++ linux-2.6.7/fs/ext3/xattr.c 2004-09-03 08:46:59.000000000 +0400
1642 @@ -1366,7 +1366,7 @@
1643                         new_bh = sb_getblk(sb, block);
1644                         if (!new_bh) {
1645  getblk_failed:
1646 -                               ext3_free_blocks(handle, inode, block, 1);
1647 +                               ext3_free_blocks(handle, inode, block, 1, 1);
1648                                 error = -EIO;
1649                                 goto cleanup;
1650                         }
1651 @@ -1408,7 +1408,7 @@
1652                 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
1653                         /* Free the old block. */
1654                         ea_bdebug(old_bh, "freeing");
1655 -                       ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
1656 +                       ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
1657  
1658                         /* ext3_forget() calls bforget() for us, but we
1659                            let our caller release old_bh, so we need to
1660 @@ -1497,7 +1497,7 @@
1661         lock_buffer(bh);
1662         if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
1663                 ext3_xattr_cache_remove(bh);
1664 -               ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
1665 +               ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
1666                 get_bh(bh);
1667                 ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
1668         } else {
1669 Index: linux-2.6.7/include/linux/ext3_fs.h
1670 ===================================================================
1671 --- linux-2.6.7.orig/include/linux/ext3_fs.h    2004-09-03 08:46:59.000000000 +0400
1672 +++ linux-2.6.7/include/linux/ext3_fs.h 2004-09-03 08:47:35.000000000 +0400
1673 @@ -57,6 +57,8 @@
1674  #define ext3_debug(f, a...)    do {} while (0)
1675  #endif
1676  
1677 +#define EXT3_MULTIBLOCK_ALLOCATOR      1
1678 +
1679  /*
1680   * Special inodes numbers
1681   */
1682 @@ -335,6 +337,7 @@
1683  #define EXT3_MOUNT_IOPEN_NOPRIV                0x80000 /* Make iopen world-readable */
1684  #define EXT3_MOUNT_EXTENTS             0x10000 /* Extents support */
1685  #define EXT3_MOUNT_EXTDEBUG            0x20000 /* Extents debug */
1686 +#define EXT3_MOUNT_MBALLOC             0x100000/* Buddy allocation support */
1687  
1688  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1689  #ifndef clear_opt
1690 @@ -695,7 +698,7 @@
1691  extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
1692                                             __u32 *, __u32 *, int *);
1693  extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
1694 -                             unsigned long);
1695 +                             unsigned long, int);
1696  extern unsigned long ext3_count_free_blocks (struct super_block *);
1697  extern void ext3_check_blocks_bitmap (struct super_block *);
1698  extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
1699 Index: linux-2.6.7/include/linux/ext3_fs_sb.h
1700 ===================================================================
1701 --- linux-2.6.7.orig/include/linux/ext3_fs_sb.h 2004-09-03 08:46:59.000000000 +0400
1702 +++ linux-2.6.7/include/linux/ext3_fs_sb.h      2004-09-03 08:46:59.000000000 +0400
1703 @@ -23,9 +23,29 @@
1704  #define EXT_INCLUDE
1705  #include <linux/blockgroup_lock.h>
1706  #include <linux/percpu_counter.h>
1707 +#include <linux/list.h>
1708  #endif
1709  #endif
1710  
1711 +#define EXT3_BB_MAX_BLOCKS     30
1712 +struct ext3_free_metadata {
1713 +       unsigned short group;
1714 +       unsigned short num;
1715 +       unsigned short blocks[EXT3_BB_MAX_BLOCKS];
1716 +       struct list_head list;
1717 +};
1718 +
1719 +#define EXT3_BB_MAX_ORDER      14
1720 +
1721 +struct ext3_buddy_group_blocks {
1722 +       sector_t        bb_bitmap;
1723 +       sector_t        bb_buddy;
1724 +       spinlock_t      bb_lock;
1725 +       unsigned        bb_counters[EXT3_BB_MAX_ORDER];
1726 +       struct ext3_free_metadata *bb_md_cur;
1727 +       unsigned long bb_tid;
1728 +};
1729 +
1730  /*
1731   * third extended-fs super-block data in memory
1732   */
1733 @@ -76,6 +96,17 @@
1734         char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
1735         int s_jquota_fmt;                       /* Format of quota to use */
1736  #endif
1737 +
1738 +       /* for buddy allocator */
1739 +       struct ext3_buddy_group_blocks *s_buddy_blocks;
1740 +       struct inode *s_buddy;
1741 +       long s_blocks_reserved;
1742 +       spinlock_t s_reserve_lock;
1743 +       struct list_head s_active_transaction;
1744 +       struct list_head s_closed_transaction;
1745 +       struct list_head s_committed_transaction;
1746 +       spinlock_t s_md_lock;
1747 +       tid_t s_last_transaction;
1748  };
1749  
1750  #endif /* _LINUX_EXT3_FS_SB */