Whamcloud - gitweb
land b1_4_smallfix on b1_4(20050202_1817)
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext3-mballoc2-2.6-suse.patch
1 Index: linux-2.6.5-sles9/fs/ext3/mballoc.c
2 ===================================================================
3 --- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c    2003-01-30 13:24:37.000000000 +0300
4 +++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300
5 @@ -0,0 +1,1441 @@
6 +/*
7 + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com
8 + * Written by Alex Tomas <alex@clusterfs.com>
9 + *
10 + * This program is free software; you can redistribute it and/or modify
11 + * it under the terms of the GNU General Public License version 2 as
12 + * published by the Free Software Foundation.
13 + *
14 + * This program is distributed in the hope that it will be useful,
15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 + * GNU General Public License for more details.
18 + *
19 + * You should have received a copy of the GNU General Public Licens
20 + * along with this program; if not, write to the Free Software
21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
22 + */
23 +
24 +
25 +/*
26 + * mballoc.c contains the multiblocks allocation routines
27 + */
28 +
29 +#include <linux/config.h>
30 +#include <linux/time.h>
31 +#include <linux/fs.h>
32 +#include <linux/namei.h>
33 +#include <linux/jbd.h>
34 +#include <linux/ext3_fs.h>
35 +#include <linux/ext3_jbd.h>
36 +#include <linux/quotaops.h>
37 +#include <linux/buffer_head.h>
38 +#include <linux/module.h>
39 +
40 +/*
41 + * TODO:
42 + *   - do not scan from the beginning, try to remember first free block
43 + *   - mb_mark_used_* may allocate chunk right after splitting buddy
44 + *   - special flag to advice allocator to look for requested + N blocks
45 + *     this may improve interaction between extents and mballoc
46 + */
47 +
48 +/*
49 + * with AGRESSIVE_CHECK allocator runs consistency checks over
50 + * structures. this checks slow things down a lot
51 + */
52 +#define AGGRESSIVE_CHECK__
53 +
54 +/*
55 + */
56 +#define MB_DEBUG__
57 +#ifdef MB_DEBUG
58 +#define mb_debug(fmt,a...)     printk(fmt, ##a)
59 +#else
60 +#define mb_debug(fmt,a...)
61 +#endif
62 +
63 +/*
64 + * where to save buddies structures beetween umount/mount (clean case only)
65 + */
66 +#define EXT3_BUDDY_FILE                ".buddy"
67 +
68 +/*
69 + * max. number of chunks to be tracked in ext3_free_extent struct
70 + */
71 +#define MB_ARR_SIZE    32
72 +
73 +struct ext3_allocation_context {
74 +       struct super_block *ac_sb;
75 +
76 +       /* search goals */
77 +       int ac_g_group;
78 +       int ac_g_start;
79 +       int ac_g_len;
80 +       int ac_g_flags;
81 +       
82 +       /* the best found extent */
83 +       int ac_b_group;
84 +       int ac_b_start;
85 +       int ac_b_len;
86 +       
87 +       /* number of iterations done. we have to track to limit searching */
88 +       int ac_repeats;
89 +       int ac_groups_scanned;
90 +       int ac_status;
91 +};
92 +
93 +#define AC_STATUS_CONTINUE     1
94 +#define AC_STATUS_FOUND                2
95 +
96 +
97 +struct ext3_buddy {
98 +       void *bd_bitmap;
99 +       void *bd_buddy;
100 +       int bd_blkbits;
101 +       struct buffer_head *bd_bh;
102 +       struct buffer_head *bd_bh2;
103 +       struct ext3_buddy_group_blocks *bd_bd;
104 +       struct super_block *bd_sb;
105 +};
106 +
107 +struct ext3_free_extent {
108 +       int fe_start;
109 +       int fe_len;
110 +       unsigned char fe_orders[MB_ARR_SIZE];
111 +       unsigned char fe_nums;
112 +       unsigned char fe_back;
113 +};
114 +
115 +#define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
116 +
117 +
118 +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
119 +struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
120 +void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
121 +int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
122 +int ext3_mb_reserve_blocks(struct super_block *, int);
123 +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
124 +void ext3_mb_free_committed_blocks(struct super_block *);
125 +
126 +#define mb_correct_addr_and_bit(bit,addr)      \
127 +{                                              \
128 +       if ((unsigned long)addr & 1) {          \
129 +               bit += 8;                       \
130 +               addr--;                         \
131 +       }                                       \
132 +       if ((unsigned long)addr & 2) {          \
133 +               bit += 16;                      \
134 +               addr--;                         \
135 +               addr--;                         \
136 +       }                                       \
137 +}
138 +
139 +static inline int mb_test_bit(int bit, void *addr)
140 +{
141 +       mb_correct_addr_and_bit(bit,addr);
142 +       return test_bit(bit, addr);
143 +}
144 +
145 +static inline void mb_set_bit(int bit, void *addr)
146 +{
147 +       mb_correct_addr_and_bit(bit,addr);
148 +       set_bit(bit, addr);
149 +}
150 +
151 +static inline void mb_clear_bit(int bit, void *addr)
152 +{
153 +       mb_correct_addr_and_bit(bit,addr);
154 +       clear_bit(bit, addr);
155 +}
156 +
157 +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
158 +{
159 +       int i = 1;
160 +       void *bb;
161 +
162 +       J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
163 +       J_ASSERT(max != NULL);
164 +
165 +       if (order > e3b->bd_blkbits + 1)
166 +               return NULL;
167 +
168 +       /* at order 0 we see each particular block */
169 +       *max = 1 << (e3b->bd_blkbits + 3);
170 +       if (order == 0)
171 +               return e3b->bd_bitmap;
172 +
173 +       bb = e3b->bd_buddy;
174 +       *max = *max >> 1;
175 +       while (i < order) {
176 +               bb += 1 << (e3b->bd_blkbits - i);
177 +               i++;
178 +               *max = *max >> 1;
179 +       }
180 +       return bb;
181 +}
182 +
183 +static int ext3_mb_load_desc(struct super_block *sb, int group,
184 +                               struct ext3_buddy *e3b)
185 +{
186 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
187 +
188 +       J_ASSERT(sbi->s_buddy_blocks[group]->bb_bitmap);
189 +       J_ASSERT(sbi->s_buddy_blocks[group]->bb_buddy);
190 +
191 +       /* load bitmap */
192 +       e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_bitmap);
193 +       if (e3b->bd_bh == NULL) {
194 +               ext3_error(sb, "ext3_mb_load_desc",
195 +                               "can't get block for buddy bitmap\n");
196 +               goto out;
197 +       }
198 +       if (!buffer_uptodate(e3b->bd_bh)) {
199 +               ll_rw_block(READ, 1, &e3b->bd_bh);
200 +               wait_on_buffer(e3b->bd_bh);
201 +       }
202 +       J_ASSERT(buffer_uptodate(e3b->bd_bh));
203 +
204 +       /* load buddy */
205 +       e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group]->bb_buddy);
206 +       if (e3b->bd_bh2 == NULL) {
207 +               ext3_error(sb, "ext3_mb_load_desc",
208 +                               "can't get block for buddy bitmap\n");
209 +               goto out;
210 +       }
211 +       if (!buffer_uptodate(e3b->bd_bh2)) {
212 +               ll_rw_block(READ, 1, &e3b->bd_bh2);
213 +               wait_on_buffer(e3b->bd_bh2);
214 +       }
215 +       J_ASSERT(buffer_uptodate(e3b->bd_bh2));
216 +
217 +       e3b->bd_bitmap = e3b->bd_bh->b_data;
218 +       e3b->bd_buddy = e3b->bd_bh2->b_data;
219 +       e3b->bd_blkbits = sb->s_blocksize_bits;
220 +       e3b->bd_bd = sbi->s_buddy_blocks[group];
221 +       e3b->bd_sb = sb;
222 +
223 +       return 0;
224 +out:
225 +       brelse(e3b->bd_bh);
226 +       brelse(e3b->bd_bh2);
227 +       e3b->bd_bh = NULL;
228 +       e3b->bd_bh2 = NULL;
229 +       return -EIO;
230 +}
231 +
232 +static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
233 +{
234 +       mark_buffer_dirty(e3b->bd_bh);
235 +       mark_buffer_dirty(e3b->bd_bh2);
236 +}
237 +
238 +static void ext3_mb_release_desc(struct ext3_buddy *e3b)
239 +{
240 +       brelse(e3b->bd_bh);
241 +       brelse(e3b->bd_bh2);
242 +}
243 +
244 +#ifdef AGGRESSIVE_CHECK
245 +static void mb_check_buddy(struct ext3_buddy *e3b)
246 +{
247 +       int order = e3b->bd_blkbits + 1;
248 +       int max, max2, i, j, k, count;
249 +       void *buddy, *buddy2;
250 +
251 +       if (!test_opt(e3b->bd_sb, MBALLOC))
252 +               return;
253 +
254 +       while (order > 1) {
255 +               buddy = mb_find_buddy(e3b, order, &max);
256 +               J_ASSERT(buddy);
257 +               buddy2 = mb_find_buddy(e3b, order - 1, &max2);
258 +               J_ASSERT(buddy2);
259 +               J_ASSERT(buddy != buddy2);
260 +               J_ASSERT(max * 2 == max2);
261 +
262 +               count = 0;
263 +               for (i = 0; i < max; i++) {
264 +
265 +                       if (!mb_test_bit(i, buddy)) {
266 +                               /* only single bit in buddy2 may be 1 */
267 +                               if (mb_test_bit(i << 1, buddy2))
268 +                                       J_ASSERT(!mb_test_bit((i<<1)+1, buddy2));
269 +                               else if (mb_test_bit((i << 1) + 1, buddy2))
270 +                                       J_ASSERT(!mb_test_bit(i << 1, buddy2));
271 +                               continue;
272 +                       }
273 +
274 +                       /* both bits in buddy2 must be 0 */
275 +                       J_ASSERT(!mb_test_bit(i << 1, buddy2));
276 +                       J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2));
277 +
278 +                       for (j = 0; j < (1 << order); j++) {
279 +                               k = (i * (1 << order)) + j;
280 +                               J_ASSERT(mb_test_bit(k, e3b->bd_bitmap));
281 +                       }
282 +                       count++;
283 +               }
284 +               J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
285 +               order--;
286 +       }
287 +
288 +       buddy = mb_find_buddy(e3b, 0, &max);
289 +       for (i = 0; i < max; i++) {
290 +               if (mb_test_bit(i, buddy))
291 +                       continue;
292 +               /* check used bits only */
293 +               for (j = 0; j < e3b->bd_blkbits + 1; j++) {
294 +                       buddy2 = mb_find_buddy(e3b, j, &max2);
295 +                       k = i >> j;
296 +                       J_ASSERT(k < max2);
297 +                       J_ASSERT(!mb_test_bit(k, buddy2));
298 +               }
299 +       }
300 +}
301 +#else
302 +#define mb_check_buddy(e3b)
303 +#endif
304 +
305 +static inline void
306 +ext3_lock_group(struct super_block *sb, int group)
307 +{
308 +       spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
309 +}
310 +
311 +static inline void
312 +ext3_unlock_group(struct super_block *sb, int group)
313 +{
314 +       spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group]->bb_lock);
315 +}
316 +
317 +static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
318 +{
319 +       int order = 1;
320 +       void *bb;
321 +
322 +       J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
323 +       J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
324 +
325 +       bb = e3b->bd_buddy;
326 +       while (order <= e3b->bd_blkbits + 1) {
327 +               block = block >> 1;
328 +               if (mb_test_bit(block, bb)) {
329 +                       /* this block is part of buddy of order 'order' */
330 +                       return order;
331 +               }
332 +               bb += 1 << (e3b->bd_blkbits - order);
333 +               order++;
334 +       }
335 +       return 0;
336 +}
337 +
338 +static inline void mb_clear_bits(void *bm, int cur, int len)
339 +{
340 +       __u32 *addr;
341 +
342 +       len = cur + len;
343 +       while (cur < len) {
344 +               if ((cur & 31) == 0 && (len - cur) >= 32) {
345 +                       /* fast path: clear whole word at once */
346 +                       addr = bm + (cur >> 3);
347 +                       *addr = 0;
348 +                       cur += 32;
349 +                       continue;
350 +               }
351 +               mb_clear_bit(cur, bm);
352 +               cur++;
353 +       }
354 +}
355 +
356 +static inline void mb_set_bits(void *bm, int cur, int len)
357 +{
358 +       __u32 *addr;
359 +
360 +       len = cur + len;
361 +       while (cur < len) {
362 +               if ((cur & 31) == 0 && (len - cur) >= 32) {
363 +                       /* fast path: clear whole word at once */
364 +                       addr = bm + (cur >> 3);
365 +                       *addr = 0xffffffff;
366 +                       cur += 32;
367 +                       continue;
368 +               }
369 +               mb_set_bit(cur, bm);
370 +               cur++;
371 +       }
372 +}
373 +
374 +static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
375 +{
376 +       int block, max, order;
377 +       void *buddy, *buddy2;
378 +
379 +       mb_check_buddy(e3b);
380 +       while (count-- > 0) {
381 +               block = first++;
382 +               order = 0;
383 +
384 +               J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap));
385 +               mb_set_bit(block, e3b->bd_bitmap);
386 +               e3b->bd_bd->bb_counters[order]++;
387 +
388 +               /* start of the buddy */
389 +               buddy = mb_find_buddy(e3b, order, &max);
390 +
391 +               do {
392 +                       block &= ~1UL;
393 +                       if (!mb_test_bit(block, buddy) ||
394 +                                       !mb_test_bit(block + 1, buddy))
395 +                               break;
396 +
397 +                       /* both the buddies are free, try to coalesce them */
398 +                       buddy2 = mb_find_buddy(e3b, order + 1, &max);
399 +
400 +                       if (!buddy2)
401 +                               break;
402 +
403 +                       if (order > 0) {
404 +                               /* for special purposes, we don't clear
405 +                                * free bits in bitmap */
406 +                               mb_clear_bit(block, buddy);
407 +                               mb_clear_bit(block + 1, buddy);
408 +                       }
409 +                       e3b->bd_bd->bb_counters[order]--;
410 +                       e3b->bd_bd->bb_counters[order]--;
411 +
412 +                       block = block >> 1;
413 +                       order++;
414 +                       e3b->bd_bd->bb_counters[order]++;
415 +
416 +                       mb_set_bit(block, buddy2);
417 +                       buddy = buddy2;
418 +               } while (1);
419 +       }
420 +       mb_check_buddy(e3b);
421 +
422 +       return 0;
423 +}
424 +
425 +/*
426 + * returns 1 if out extent is enough to fill needed space
427 + */
428 +int mb_make_backward_extent(struct ext3_free_extent *in,
429 +                               struct ext3_free_extent *out, int needed)
430 +{
431 +       int i;
432 +
433 +       J_ASSERT(in);
434 +       J_ASSERT(out);
435 +       J_ASSERT(in->fe_nums < MB_ARR_SIZE);
436 +
437 +       out->fe_len = 0;
438 +       out->fe_start = in->fe_start + in->fe_len;
439 +       out->fe_nums = 0;
440 +
441 +       /* for single-chunk extent we need not back order
442 +        * also, if an extent doesn't fill needed space
443 +        * then it makes no sense to try back order becase
444 +        * if we select this extent then it'll be use as is */
445 +       if (in->fe_nums < 2 || in->fe_len < needed)
446 +               return 0;
447 +
448 +       i = in->fe_nums - 1;
449 +       while (i >= 0 && out->fe_len < needed) {
450 +               out->fe_len += (1 << in->fe_orders[i]);
451 +               out->fe_start -= (1 << in->fe_orders[i]);
452 +               i--;
453 +       }
454 +       /* FIXME: in some situation fe_orders may be too small to hold
455 +        * all the buddies */
456 +       J_ASSERT(out->fe_len >= needed);
457 +       
458 +       for (i++; i < in->fe_nums; i++)
459 +               out->fe_orders[out->fe_nums++] = in->fe_orders[i];
460 +       J_ASSERT(out->fe_nums < MB_ARR_SIZE);
461 +       out->fe_back = 1;
462 +
463 +       return 1;
464 +}
465 +
466 +int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
467 +                       int needed, struct ext3_free_extent *ex)
468 +{
469 +       int space = needed;
470 +       int next, max, ord;
471 +       void *buddy;
472 +
473 +       J_ASSERT(ex != NULL);
474 +
475 +       ex->fe_nums = 0;
476 +       ex->fe_len = 0;
477 +       
478 +       buddy = mb_find_buddy(e3b, order, &max);
479 +       J_ASSERT(buddy);
480 +       J_ASSERT(block < max);
481 +       if (!mb_test_bit(block, buddy))
482 +               goto nofree;
483 +
484 +       if (order == 0) {
485 +               /* find actual order */
486 +               order = mb_find_order_for_block(e3b, block);
487 +               block = block >> order;
488 +       }
489 +
490 +       ex->fe_orders[ex->fe_nums++] = order;
491 +       ex->fe_len = 1 << order;
492 +       ex->fe_start = block << order;
493 +       ex->fe_back = 0;
494 +
495 +       while ((space = space - (1 << order)) > 0) {
496 +
497 +               buddy = mb_find_buddy(e3b, order, &max);
498 +               J_ASSERT(buddy);
499 +
500 +               if (block + 1 >= max)
501 +                       break;
502 +
503 +               next = (block + 1) * (1 << order);
504 +               if (!mb_test_bit(next, e3b->bd_bitmap))
505 +                       break;
506 +
507 +               ord = mb_find_order_for_block(e3b, next);
508 +
509 +               if ((1 << ord) >= needed) {
510 +                       /* we dont want to coalesce with self-enough buddies */
511 +                       break;
512 +               }
513 +               order = ord;
514 +               block = next >> order;
515 +               ex->fe_len += 1 << order;
516 +
517 +               if (ex->fe_nums < MB_ARR_SIZE)
518 +                       ex->fe_orders[ex->fe_nums++] = order;
519 +       }
520 +
521 +nofree:
522 +       J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
523 +       return ex->fe_len;
524 +}
525 +
526 +static int mb_mark_used_backward(struct ext3_buddy *e3b,
527 +                                       struct ext3_free_extent *ex, int len)
528 +{
529 +       int start = ex->fe_start, len0 = len;
530 +       int ord, mlen, max, cur;
531 +       void *buddy;
532 +
533 +       start = ex->fe_start + ex->fe_len - 1;
534 +       while (len) {
535 +               ord = mb_find_order_for_block(e3b, start);
536 +               if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
537 +                               len >= (1 << ord)) {
538 +                       /* the whole chunk may be allocated at once! */
539 +                       mlen = 1 << ord;
540 +                       buddy = mb_find_buddy(e3b, ord, &max);
541 +                       J_ASSERT((start >> ord) < max);
542 +                       mb_clear_bit(start >> ord, buddy);
543 +                       e3b->bd_bd->bb_counters[ord]--;
544 +                       start -= mlen;
545 +                       len -= mlen;
546 +                       J_ASSERT(len >= 0);
547 +                       J_ASSERT(start >= 0);
548 +                       continue;
549 +               }
550 +
551 +               /* we have to split large buddy */
552 +               J_ASSERT(ord > 0);
553 +               buddy = mb_find_buddy(e3b, ord, &max);
554 +               mb_clear_bit(start >> ord, buddy);
555 +               e3b->bd_bd->bb_counters[ord]--;
556 +
557 +               ord--;
558 +               cur = (start >> ord) & ~1U;
559 +               buddy = mb_find_buddy(e3b, ord, &max);
560 +               mb_set_bit(cur, buddy);
561 +               mb_set_bit(cur + 1, buddy);
562 +               e3b->bd_bd->bb_counters[ord]++;
563 +               e3b->bd_bd->bb_counters[ord]++;
564 +       }
565 +
566 +       /* now drop all the bits in bitmap */
567 +       mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
568 +
569 +       mb_check_buddy(e3b);
570 +
571 +       return 0;
572 +}
573 +
574 +static int mb_mark_used_forward(struct ext3_buddy *e3b,
575 +                               struct ext3_free_extent *ex, int len)
576 +{
577 +       int start = ex->fe_start, len0 = len;
578 +       int ord, mlen, max, cur;
579 +       void *buddy;
580 +
581 +       while (len) {
582 +               ord = mb_find_order_for_block(e3b, start);
583 +
584 +               if (((start >> ord) << ord) == start && len >= (1 << ord)) {
585 +                       /* the whole chunk may be allocated at once! */
586 +                       mlen = 1 << ord;
587 +                       buddy = mb_find_buddy(e3b, ord, &max);
588 +                       J_ASSERT((start >> ord) < max);
589 +                       mb_clear_bit(start >> ord, buddy);
590 +                       e3b->bd_bd->bb_counters[ord]--;
591 +                       start += mlen;
592 +                       len -= mlen;
593 +                       J_ASSERT(len >= 0);
594 +                       continue;
595 +               }
596 +
597 +               /* we have to split large buddy */
598 +               J_ASSERT(ord > 0);
599 +               buddy = mb_find_buddy(e3b, ord, &max);
600 +               mb_clear_bit(start >> ord, buddy);
601 +               e3b->bd_bd->bb_counters[ord]--;
602 +
603 +               ord--;
604 +               cur = (start >> ord) & ~1U;
605 +               buddy = mb_find_buddy(e3b, ord, &max);
606 +               mb_set_bit(cur, buddy);
607 +               mb_set_bit(cur + 1, buddy);
608 +               e3b->bd_bd->bb_counters[ord]++;
609 +               e3b->bd_bd->bb_counters[ord]++;
610 +       }
611 +
612 +       /* now drop all the bits in bitmap */
613 +       mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
614 +
615 +       mb_check_buddy(e3b);
616 +
617 +       return 0;
618 +}
619 +
620 +int inline mb_mark_used(struct ext3_buddy *e3b,
621 +                       struct ext3_free_extent *ex, int len)
622 +{
623 +       int err;
624 +
625 +       J_ASSERT(ex);
626 +       if (ex->fe_back == 0)
627 +               err = mb_mark_used_forward(e3b, ex, len);
628 +       else
629 +               err = mb_mark_used_backward(e3b, ex, len);
630 +       return err;
631 +}
632 +
633 +int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
634 +                               struct ext3_buddy *e3b, int group)
635 +{
636 +       struct super_block *sb = ac->ac_sb;
637 +       int err, gorder, max, i;
638 +       struct ext3_free_extent curex;
639 +
640 +       /* let's know order of allocation */
641 +       gorder = 0;
642 +       while (ac->ac_g_len > (1 << gorder))
643 +               gorder++;
644 +
645 +       if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
646 +               /* someone asks for space at this specified block
647 +                * probably he wants to merge it into existing extent */
648 +               if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
649 +                       /* good. at least one block is free */
650 +                       max = mb_find_extent(e3b, 0, ac->ac_g_start,
651 +                                               ac->ac_g_len, &curex);
652 +                       max = min(curex.fe_len, ac->ac_g_len);
653 +                       mb_mark_used(e3b, &curex, max);
654 +                       
655 +                       ac->ac_b_group = group;
656 +                       ac->ac_b_start = curex.fe_start;
657 +                       ac->ac_b_len = max;
658 +                       ac->ac_status = AC_STATUS_FOUND;
659 +                       err = 0;
660 +                       goto out;
661 +               }
662 +               /* don't try to find goal anymore */
663 +               ac->ac_g_flags &= ~1;
664 +       }
665 +
666 +       i = 0;
667 +       while (1) {
668 +               i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
669 +               if (i >= sb->s_blocksize * 8)
670 +                       break;
671 +
672 +               max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
673 +               if (max >= ac->ac_g_len) {
674 +                       max = min(curex.fe_len, ac->ac_g_len);
675 +                       mb_mark_used(e3b, &curex, max);
676 +                       
677 +                       ac->ac_b_group = group;
678 +                       ac->ac_b_start = curex.fe_start;
679 +                       ac->ac_b_len = max;
680 +                       ac->ac_status = AC_STATUS_FOUND;
681 +                       break;
682 +               }
683 +               i += max;
684 +       }
685 +
686 +       return 0;
687 +
688 +out:
689 +       return err;
690 +}
691 +
692 +int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
693 +{
694 +       struct ext3_group_desc *gdp;
695 +       int free_blocks;
696 +
697 +       gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
698 +       if (!gdp)
699 +               return 0;
700 +       free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
701 +       if (free_blocks == 0)
702 +               return 0;
703 +
704 +       /* someone wants this block very much */
705 +       if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
706 +               return 1;
707 +
708 +       /* FIXME: I'd like to take fragmentation into account here */
709 +       if (cr == 0) {
710 +               if (free_blocks >= ac->ac_g_len >> 1)
711 +                       return 1;
712 +       } else if (cr == 1) {
713 +               if (free_blocks >= ac->ac_g_len >> 2)
714 +                       return 1;
715 +       } else if (cr == 2) {
716 +               return 1;
717 +       } else {
718 +               BUG();
719 +       }
720 +       return 0;
721 +}
722 +
723 +int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
724 +                      unsigned long goal, int *len, int flags, int *errp)
725 +{
726 +       struct buffer_head *bitmap_bh = NULL;
727 +       struct ext3_allocation_context ac;
728 +       int i, group, block, cr, err = 0;
729 +       struct ext3_group_desc *gdp;
730 +       struct ext3_super_block *es;
731 +       struct buffer_head *gdp_bh;
732 +       struct ext3_sb_info *sbi;
733 +       struct super_block *sb;
734 +       struct ext3_buddy e3b;
735 +
736 +       J_ASSERT(len != NULL);
737 +       J_ASSERT(*len > 0);
738 +
739 +       sb = inode->i_sb;
740 +       if (!sb) {
741 +               printk("ext3_mb_new_nblocks: nonexistent device");
742 +               return 0;
743 +       }
744 +
745 +       if (!test_opt(sb, MBALLOC)) {
746 +               static int ext3_mballoc_warning = 0;
747 +               if (ext3_mballoc_warning == 0) {
748 +                       printk(KERN_ERR "EXT3-fs: multiblock request with "
749 +                               "mballoc disabled!\n");
750 +                       ext3_mballoc_warning++;
751 +               }
752 +               *len = 1;
753 +               err = ext3_new_block_old(handle, inode, goal, errp);
754 +               return err;
755 +       }
756 +
757 +       ext3_mb_poll_new_transaction(sb, handle);
758 +
759 +       sbi = EXT3_SB(sb);
760 +       es = EXT3_SB(sb)->s_es;
761 +
762 +       if (!(flags & 2)) {
763 +               /* someone asks for non-reserved blocks */
764 +               BUG_ON(*len > 1);
765 +               err = ext3_mb_reserve_blocks(sb, 1);
766 +               if (err) {
767 +                       *errp = err;
768 +                       return 0;
769 +               }
770 +       }
771 +
772 +       /*
773 +        * Check quota for allocation of this blocks.
774 +        */
775 +       while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
776 +               *len -= 1;
777 +       if (*len == 0) {
778 +               *errp = -EDQUOT;
779 +               block = 0;
780 +               goto out;
781 +       }
782 +
783 +       /* start searching from the goal */
784 +       if (goal < le32_to_cpu(es->s_first_data_block) ||
785 +           goal >= le32_to_cpu(es->s_blocks_count))
786 +               goal = le32_to_cpu(es->s_first_data_block);
787 +       group = (goal - le32_to_cpu(es->s_first_data_block)) /
788 +                       EXT3_BLOCKS_PER_GROUP(sb);
789 +       block = ((goal - le32_to_cpu(es->s_first_data_block)) %
790 +                       EXT3_BLOCKS_PER_GROUP(sb));
791 +
792 +       /* set up allocation goals */
793 +       ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
794 +       ac.ac_status = 0;
795 +       ac.ac_groups_scanned = 0;
796 +       ac.ac_sb = inode->i_sb;
797 +       ac.ac_g_group = group;
798 +       ac.ac_g_start = block;
799 +       ac.ac_g_len = *len;
800 +       ac.ac_g_flags = flags;
801 +
802 +       /* loop over the groups */
803 +       for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
804 +               for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
805 +                       if (group == EXT3_SB(sb)->s_groups_count)
806 +                               group = 0;
807 +
808 +                       /* check is group good for our criteries */
809 +                       if (!mb_good_group(&ac, group, cr))
810 +                               continue;
811 +
812 +                       err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
813 +                       if (err)
814 +                               goto out_err;
815 +
816 +                       ext3_lock_group(sb, group);
817 +                       if (!mb_good_group(&ac, group, cr)) {
818 +                               /* someone did allocation from this group */
819 +                               ext3_unlock_group(sb, group);
820 +                               ext3_mb_release_desc(&e3b);
821 +                               continue;
822 +                       }
823 +
824 +                       err = ext3_mb_new_in_group(&ac, &e3b, group);
825 +                       ext3_unlock_group(sb, group);
826 +                       if (ac.ac_status == AC_STATUS_FOUND)
827 +                               ext3_mb_dirty_buddy(&e3b);
828 +                       ext3_mb_release_desc(&e3b);
829 +                       if (err)
830 +                               goto out_err;
831 +                       if (ac.ac_status == AC_STATUS_FOUND)
832 +                               break;
833 +               }
834 +       }
835 +
836 +       if (ac.ac_status != AC_STATUS_FOUND) {
837 +               /* unfortunately, we can't satisfy this request */
838 +               J_ASSERT(ac.ac_b_len == 0);
839 +               DQUOT_FREE_BLOCK(inode, *len);
840 +               *errp = -ENOSPC;
841 +               block = 0;
842 +               goto out;
843 +       }
844 +
845 +       /* good news - free block(s) have been found. now it's time
846 +        * to mark block(s) in good old journaled bitmap */
847 +       block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
848 +                       + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
849 +
850 +       /* we made a desicion, now mark found blocks in good old
851 +        * bitmap to be journaled */
852 +
853 +       ext3_debug("using block group %d(%d)\n",
854 +                       ac.ac_b_group.group, gdp->bg_free_blocks_count);
855 +
856 +       bitmap_bh = read_block_bitmap(sb, ac.ac_b_group);
857 +       if (!bitmap_bh) {
858 +               *errp = -EIO;
859 +               goto out_err;
860 +       }
861 +
862 +       err = ext3_journal_get_write_access(handle, bitmap_bh);
863 +       if (err) {
864 +               *errp = err;
865 +               goto out_err;
866 +       }
867 +
868 +       gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
869 +       if (!gdp) {
870 +               *errp = -EIO;
871 +               goto out_err;
872 +       }
873 +       
874 +       err = ext3_journal_get_write_access(handle, gdp_bh);
875 +       if (err)
876 +               goto out_err;
877 +
878 +       block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
879 +                               + le32_to_cpu(es->s_first_data_block);
880 +
881 +       if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
882 +           block == le32_to_cpu(gdp->bg_inode_bitmap) ||
883 +           in_range(block, le32_to_cpu(gdp->bg_inode_table),
884 +                     EXT3_SB(sb)->s_itb_per_group))
885 +               ext3_error(sb, "ext3_new_block",
886 +                           "Allocating block in system zone - "
887 +                           "block = %u", block);
888 +#if 0
889 +       for (i = 0; i < ac.ac_b_len; i++)
890 +               J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
891 +#endif
892 +       mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
893 +
894 +       ext3_lock_group(sb, ac.ac_b_group);
895 +       gdp->bg_free_blocks_count =
896 +                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 
897 +                                       ac.ac_b_len);
898 +       ext3_unlock_group(sb, ac.ac_b_group);
899 +       percpu_counter_mod(&sbi->s_freeblocks_counter, -ac.ac_b_len);
900 +
901 +       err = ext3_journal_dirty_metadata(handle, bitmap_bh);
902 +       if (err)
903 +               goto out_err;
904 +       err = ext3_journal_dirty_metadata(handle, gdp_bh);
905 +       if (err)
906 +               goto out_err;
907 +
908 +       sb->s_dirt = 1;
909 +       *errp = 0;
910 +       brelse(bitmap_bh);
911 +
912 +       /* drop non-allocated, but dquote'd blocks */
913 +       J_ASSERT(*len >= ac.ac_b_len);
914 +       DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
915 +
916 +       *len = ac.ac_b_len;
917 +       J_ASSERT(block != 0);
918 +       goto out;
919 +
920 +out_err:
921 +       /* if we've already allocated something, roll it back */
922 +       if (ac.ac_status == AC_STATUS_FOUND) {
923 +               /* FIXME: free blocks here */
924 +       }
925 +
926 +       DQUOT_FREE_BLOCK(inode, *len);
927 +       brelse(bitmap_bh);
928 +       *errp = err;
929 +       block = 0;
930 +out:
931 +       if (!(flags & 2)) {
932 +               /* block wasn't reserved before and we reserved it
933 +                * at the beginning of allocation. it doesn't matter
934 +                * whether we allocated anything or we failed: time
935 +                * to release reservation. NOTE: because I expect
936 +                * any multiblock request from delayed allocation
937 +                * path only, here is single block always */
938 +               ext3_mb_release_blocks(sb, 1);
939 +       }
940 +       return block;
941 +}
942 +
943 +int ext3_mb_generate_buddy(struct super_block *sb, int group)
944 +{
945 +       struct buffer_head *bh;
946 +       int i, err, count = 0;
947 +       struct ext3_buddy e3b;
948 +       
949 +       err = ext3_mb_load_desc(sb, group, &e3b);
950 +       if (err)
951 +               goto out;
952 +       memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
953 +       memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
954 +
955 +       bh = read_block_bitmap(sb, group);
956 +       if (bh == NULL) {
957 +               err = -EIO; 
958 +               goto out2;
959 +       }
960 +
961 +       /* loop over the blocks, and create buddies for free ones */
962 +       for (i = 0; i < sb->s_blocksize * 8; i++) {
963 +               if (!mb_test_bit(i, (void *) bh->b_data)) {
964 +                       mb_free_blocks(&e3b, i, 1);
965 +                       count++;
966 +               }
967 +       }
968 +       brelse(bh);
969 +       mb_check_buddy(&e3b);
970 +       ext3_mb_dirty_buddy(&e3b);
971 +
972 +out2:
973 +       ext3_mb_release_desc(&e3b);
974 +out:
975 +       return err;
976 +}
977 +
978 +EXPORT_SYMBOL(ext3_mb_new_blocks);
979 +
980 +#define MB_CREDITS     \
981 +       (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS +   \
982 +               2 * EXT3_SINGLEDATA_TRANS_BLOCKS)
983 +
984 +int ext3_mb_init_backend(struct super_block *sb)
985 +{
986 +       struct inode *root = sb->s_root->d_inode;
987 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
988 +       struct dentry *db;
989 +       tid_t target;
990 +       int err, i;
991 +
992 +       sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks *) *
993 +                                       sbi->s_groups_count, GFP_KERNEL);
994 +       if (sbi->s_buddy_blocks == NULL) {
995 +               printk("EXT3-fs: can't allocate mem for buddy maps\n");
996 +               return -ENOMEM;
997 +       }
998 +       memset(sbi->s_buddy_blocks, 0,
999 +               sizeof(struct ext3_buddy_group_blocks *) * sbi->s_groups_count);
1000 +       sbi->s_buddy = NULL;
1001 +
1002 +       down(&root->i_sem);
1003 +       db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
1004 +                               strlen(EXT3_BUDDY_FILE));
1005 +       if (IS_ERR(db)) {
1006 +               err = PTR_ERR(db);
1007 +               printk("EXT3-fs: can't lookup buddy file: %d\n", err);
1008 +               goto out;
1009 +       }
1010 +
1011 +       if (db->d_inode != NULL) {
1012 +               sbi->s_buddy = igrab(db->d_inode);
1013 +               goto map;
1014 +       }
1015 +
1016 +       err = ext3_create(root, db, S_IFREG, NULL);
1017 +       if (err) {
1018 +               printk("error while creation buddy file: %d\n", err);
1019 +       } else {
1020 +               sbi->s_buddy = igrab(db->d_inode);
1021 +       }
1022 +
1023 +map:
1024 +       for (i = 0; i < sbi->s_groups_count; i++) {
1025 +               struct buffer_head *bh = NULL;
1026 +               handle_t *handle;
1027 +
1028 +               sbi->s_buddy_blocks[i] =
1029 +                       kmalloc(sizeof(struct ext3_buddy_group_blocks),
1030 +                                       GFP_KERNEL);
1031 +               if (sbi->s_buddy_blocks[i] == NULL) {
1032 +                       printk("EXT3-fs: can't allocate mem for buddy\n");
1033 +                       err = -ENOMEM;
1034 +                       goto out2;
1035 +               }
1036 +
1037 +               handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
1038 +               if (IS_ERR(handle)) {
1039 +                       err = PTR_ERR(handle);
1040 +                       goto out2;
1041 +               }
1042 +               
1043 +               /* allocate block for bitmap */
1044 +               bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
1045 +               if (bh == NULL) {
1046 +                       printk("can't get block for buddy bitmap: %d\n", err);
1047 +                       goto out2;
1048 +               }
1049 +               sbi->s_buddy_blocks[i]->bb_bitmap = bh->b_blocknr;
1050 +               brelse(bh);
1051 +
1052 +               /* allocate block for buddy */
1053 +               bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
1054 +               if (bh == NULL) {
1055 +                       printk("can't get block for buddy: %d\n", err);
1056 +                       goto out2;
1057 +               }
1058 +               sbi->s_buddy_blocks[i]->bb_buddy = bh->b_blocknr;
1059 +               brelse(bh);
1060 +               ext3_journal_stop(handle);
1061 +               spin_lock_init(&sbi->s_buddy_blocks[i]->bb_lock);
1062 +               sbi->s_buddy_blocks[i]->bb_md_cur = NULL;
1063 +               sbi->s_buddy_blocks[i]->bb_tid = 0;
1064 +       }
1065 +
1066 +       if (journal_start_commit(sbi->s_journal, &target))
1067 +               log_wait_commit(sbi->s_journal, target);
1068 +
1069 +out2:
1070 +       dput(db);
1071 +out:
1072 +       up(&root->i_sem);
1073 +       return err;
1074 +}
1075 +
1076 +int ext3_mb_release(struct super_block *sb)
1077 +{
1078 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1079 +       int i;
1080 +       
1081 +       if (!test_opt(sb, MBALLOC))
1082 +               return 0;
1083 +
1084 +       /* release freed, non-committed blocks */
1085 +       spin_lock(&sbi->s_md_lock);
1086 +       list_splice_init(&sbi->s_closed_transaction,
1087 +                       &sbi->s_committed_transaction);
1088 +       list_splice_init(&sbi->s_active_transaction,
1089 +                       &sbi->s_committed_transaction);
1090 +       spin_unlock(&sbi->s_md_lock);
1091 +       ext3_mb_free_committed_blocks(sb);
1092 +
1093 +       if (sbi->s_buddy_blocks) {
1094 +               for (i = 0; i < sbi->s_groups_count; i++)
1095 +                       if (sbi->s_buddy_blocks[i])
1096 +                               kfree(sbi->s_buddy_blocks[i]);
1097 +               kfree(sbi->s_buddy_blocks);
1098 +       }
1099 +       if (sbi->s_buddy)
1100 +               iput(sbi->s_buddy);
1101 +       if (sbi->s_blocks_reserved)
1102 +               printk("ext3-fs: %ld blocks being reserved at umount!\n",
1103 +                               sbi->s_blocks_reserved);
1104 +       return 0;
1105 +}
1106 +
1107 +int ext3_mb_init(struct super_block *sb)
1108 +{
1109 +       struct ext3_super_block *es;
1110 +       int i;
1111 +
1112 +       if (!test_opt(sb, MBALLOC))
1113 +               return 0;
1114 +
1115 +       /* init file for buddy data */
1116 +       clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1117 +       if (ext3_mb_init_backend(sb))
1118 +               return 0;
1119 +
1120 +       es = EXT3_SB(sb)->s_es;
1121 +       for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
1122 +               ext3_mb_generate_buddy(sb, i);
1123 +       spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
1124 +       spin_lock_init(&EXT3_SB(sb)->s_md_lock);
1125 +       INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
1126 +       INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
1127 +       INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
1128 +       set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1129 +       printk("EXT3-fs: mballoc enabled\n");
1130 +       return 0;
1131 +}
1132 +
1133 +void ext3_mb_free_committed_blocks(struct super_block *sb)
1134 +{
1135 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1136 +       int err, i, count = 0, count2 = 0;
1137 +       struct ext3_free_metadata *md;
1138 +       struct ext3_buddy e3b;
1139 +
1140 +       if (list_empty(&sbi->s_committed_transaction))
1141 +               return;
1142 +
1143 +       /* there is committed blocks to be freed yet */
1144 +       do {
1145 +               /* get next array of blocks */
1146 +               md = NULL;
1147 +               spin_lock(&sbi->s_md_lock);
1148 +               if (!list_empty(&sbi->s_committed_transaction)) {
1149 +                       md = list_entry(sbi->s_committed_transaction.next,
1150 +                                       struct ext3_free_metadata, list);
1151 +                       list_del(&md->list);
1152 +               }
1153 +               spin_unlock(&sbi->s_md_lock);
1154 +
1155 +               if (md == NULL)
1156 +                       break;
1157 +
1158 +               mb_debug("gonna free %u blocks in group %u (0x%p):",
1159 +                               md->num, md->group, md);
1160 +
1161 +               err = ext3_mb_load_desc(sb, md->group, &e3b);
1162 +               BUG_ON(err != 0);
1163 +
1164 +               /* there are blocks to put in buddy to make them really free */
1165 +               count += md->num;
1166 +               count2++;
1167 +               ext3_lock_group(sb, md->group);
1168 +               for (i = 0; i < md->num; i++) {
1169 +                       mb_debug(" %u", md->blocks[i]);
1170 +                       mb_free_blocks(&e3b, md->blocks[i], 1);
1171 +               }
1172 +               mb_debug("\n");
1173 +               ext3_unlock_group(sb, md->group);
1174 +
1175 +               kfree(md);
1176 +               ext3_mb_dirty_buddy(&e3b);
1177 +               ext3_mb_release_desc(&e3b);
1178 +
1179 +       } while (md);
1180 +       mb_debug("freed %u blocks in %u structures\n", count, count2);
1181 +}
1182 +
1183 +void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
1184 +{
1185 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1186 +
1187 +       if (sbi->s_last_transaction == handle->h_transaction->t_tid)
1188 +               return;
1189 +
1190 +       /* new transaction! time to close last one and free blocks for
1191 +        * committed transaction. we know that only transaction can be
1192 +        * active, so previos transaction can be being logged and we
1193 +        * know that transaction before previous is known to be alreade
1194 +        * logged. this means that now we may free blocks freed in all
1195 +        * transactions before previous one. hope I'm clear enough ... */
1196 +
1197 +       spin_lock(&sbi->s_md_lock);
1198 +       if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
1199 +               mb_debug("new transaction %lu, old %lu\n",
1200 +                               (unsigned long) handle->h_transaction->t_tid,
1201 +                               (unsigned long) sbi->s_last_transaction);
1202 +               list_splice_init(&sbi->s_closed_transaction,
1203 +                                       &sbi->s_committed_transaction);
1204 +               list_splice_init(&sbi->s_active_transaction,
1205 +                                       &sbi->s_closed_transaction);
1206 +               sbi->s_last_transaction = handle->h_transaction->t_tid;
1207 +       }
1208 +       spin_unlock(&sbi->s_md_lock);
1209 +
1210 +       ext3_mb_free_committed_blocks(sb);
1211 +}
1212 +
1213 +int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
1214 +                               int group, int block, int count)
1215 +{
1216 +       struct ext3_buddy_group_blocks *db = e3b->bd_bd;
1217 +       struct super_block *sb = e3b->bd_sb;
1218 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1219 +       struct ext3_free_metadata *md;
1220 +       int i;
1221 +
1222 +       ext3_lock_group(sb, group);
1223 +       for (i = 0; i < count; i++) {
1224 +               md = db->bb_md_cur;
1225 +               if (md && db->bb_tid != handle->h_transaction->t_tid) {
1226 +                       db->bb_md_cur = NULL;
1227 +                       md = NULL;
1228 +               }
1229 +
1230 +               if (md == NULL) {
1231 +                       ext3_unlock_group(sb, group);
1232 +                       md = kmalloc(sizeof(*md), GFP_KERNEL);
1233 +                       if (md == NULL)
1234 +                               return -ENOMEM;
1235 +                       md->num = 0;
1236 +                       md->group = group;
1237 +
1238 +                       ext3_lock_group(sb, group);
1239 +                       if (db->bb_md_cur == NULL) {
1240 +                               spin_lock(&sbi->s_md_lock);
1241 +                               list_add(&md->list, &sbi->s_active_transaction);
1242 +                               spin_unlock(&sbi->s_md_lock);
1243 +                               db->bb_md_cur = md;
1244 +                               db->bb_tid = handle->h_transaction->t_tid;
1245 +                               mb_debug("new md 0x%p for group %u\n",
1246 +                                                       md, md->group);
1247 +                       } else {
1248 +                               kfree(md);
1249 +                               md = db->bb_md_cur;
1250 +                       }
1251 +               }
1252 +
1253 +               BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
1254 +               md->blocks[md->num] = block + i;
1255 +               md->num++;
1256 +               if (md->num == EXT3_BB_MAX_BLOCKS) {
1257 +                       /* no more space, put full container on a sb's list */
1258 +                       db->bb_md_cur = NULL;
1259 +               }
1260 +       }
1261 +       ext3_unlock_group(sb, group);
1262 +       return 0;
1263 +}
1264 +
1265 +void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
1266 +                       unsigned long block, unsigned long count, int metadata)
1267 +{
1268 +       struct buffer_head *bitmap_bh = NULL;
1269 +       struct ext3_group_desc *gdp;
1270 +       struct ext3_super_block *es;
1271 +       unsigned long bit, overflow;
1272 +       struct buffer_head *gd_bh;
1273 +       unsigned long block_group;
1274 +       struct ext3_sb_info *sbi;
1275 +       struct super_block *sb;
1276 +       struct ext3_buddy e3b;
1277 +       int err = 0, ret;
1278 +
1279 +       sb = inode->i_sb;
1280 +       if (!sb) {
1281 +               printk ("ext3_free_blocks: nonexistent device");
1282 +               return;
1283 +       }
1284 +
1285 +       ext3_mb_poll_new_transaction(sb, handle);
1286 +
1287 +       sbi = EXT3_SB(sb);
1288 +       es = EXT3_SB(sb)->s_es;
1289 +       if (block < le32_to_cpu(es->s_first_data_block) ||
1290 +           block + count < block ||
1291 +           block + count > le32_to_cpu(es->s_blocks_count)) {
1292 +               ext3_error (sb, "ext3_free_blocks",
1293 +                           "Freeing blocks not in datazone - "
1294 +                           "block = %lu, count = %lu", block, count);
1295 +               goto error_return;
1296 +       }
1297 +
1298 +       ext3_debug("freeing block %lu\n", block);
1299 +
1300 +do_more:
1301 +       overflow = 0;
1302 +       block_group = (block - le32_to_cpu(es->s_first_data_block)) /
1303 +                     EXT3_BLOCKS_PER_GROUP(sb);
1304 +       bit = (block - le32_to_cpu(es->s_first_data_block)) %
1305 +                     EXT3_BLOCKS_PER_GROUP(sb);
1306 +       /*
1307 +        * Check to see if we are freeing blocks across a group
1308 +        * boundary.
1309 +        */
1310 +       if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
1311 +               overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
1312 +               count -= overflow;
1313 +       }
1314 +       brelse(bitmap_bh);
1315 +       bitmap_bh = read_block_bitmap(sb, block_group);
1316 +       if (!bitmap_bh)
1317 +               goto error_return;
1318 +       gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
1319 +       if (!gdp)
1320 +               goto error_return;
1321 +
1322 +       if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
1323 +           in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
1324 +           in_range (block, le32_to_cpu(gdp->bg_inode_table),
1325 +                     EXT3_SB(sb)->s_itb_per_group) ||
1326 +           in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
1327 +                     EXT3_SB(sb)->s_itb_per_group))
1328 +               ext3_error (sb, "ext3_free_blocks",
1329 +                           "Freeing blocks in system zones - "
1330 +                           "Block = %lu, count = %lu",
1331 +                           block, count);
1332 +
1333 +       BUFFER_TRACE(bitmap_bh, "getting write access");
1334 +       err = ext3_journal_get_write_access(handle, bitmap_bh);
1335 +       if (err)
1336 +               goto error_return;
1337 +
1338 +       /*
1339 +        * We are about to modify some metadata.  Call the journal APIs
1340 +        * to unshare ->b_data if a currently-committing transaction is
1341 +        * using it
1342 +        */
1343 +       BUFFER_TRACE(gd_bh, "get_write_access");
1344 +       err = ext3_journal_get_write_access(handle, gd_bh);
1345 +       if (err)
1346 +               goto error_return;
1347 +
1348 +       err = ext3_mb_load_desc(sb, block_group, &e3b);
1349 +       if (err)
1350 +               goto error_return;
1351 +
1352 +       if (metadata) {
1353 +               /* blocks being freed are metadata. these blocks shouldn't
1354 +                * be used until this transaction is committed */
1355 +               ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
1356 +       } else { 
1357 +               ext3_lock_group(sb, block_group);
1358 +               mb_free_blocks(&e3b, bit, count);
1359 +               gdp->bg_free_blocks_count =
1360 +                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
1361 +               ext3_unlock_group(sb, block_group);
1362 +               percpu_counter_mod(&sbi->s_freeblocks_counter, count);
1363 +       }
1364 +       
1365 +       ext3_mb_dirty_buddy(&e3b);
1366 +       ext3_mb_release_desc(&e3b);
1367 +
1368 +       /* FIXME: undo logic will be implemented later and another way */
1369 +       mb_clear_bits(bitmap_bh->b_data, bit, count);
1370 +       DQUOT_FREE_BLOCK(inode, count);
1371 +
1372 +       /* We dirtied the bitmap block */
1373 +       BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
1374 +       err = ext3_journal_dirty_metadata(handle, bitmap_bh);
1375 +
1376 +       /* And the group descriptor block */
1377 +       BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
1378 +       ret = ext3_journal_dirty_metadata(handle, gd_bh);
1379 +       if (!err) err = ret;
1380 +
1381 +       if (overflow && !err) {
1382 +               block += count;
1383 +               count = overflow;
1384 +               goto do_more;
1385 +       }
1386 +       sb->s_dirt = 1;
1387 +error_return:
1388 +       brelse(bitmap_bh);
1389 +       ext3_std_error(sb, err);
1390 +       return;
1391 +}
1392 +
1393 +int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
1394 +{
1395 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1396 +       int free, ret = -ENOSPC;
1397 +
1398 +       BUG_ON(blocks < 0);
1399 +       spin_lock(&sbi->s_reserve_lock);
1400 +       free = percpu_counter_read_positive(&sbi->s_freeblocks_counter);
1401 +       if (blocks <= free - sbi->s_blocks_reserved) {
1402 +               sbi->s_blocks_reserved += blocks;
1403 +               ret = 0;
1404 +       }
1405 +       spin_unlock(&sbi->s_reserve_lock);
1406 +       return ret;
1407 +}
1408 +
1409 +void ext3_mb_release_blocks(struct super_block *sb, int blocks)
1410 +{
1411 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1412 +
1413 +       BUG_ON(blocks < 0);
1414 +       spin_lock(&sbi->s_reserve_lock);
1415 +       sbi->s_blocks_reserved -= blocks;
1416 +       WARN_ON(sbi->s_blocks_reserved < 0);
1417 +       if (sbi->s_blocks_reserved < 0)
1418 +               sbi->s_blocks_reserved = 0;
1419 +       spin_unlock(&sbi->s_reserve_lock);
1420 +}
1421 +
1422 +int ext3_new_block(handle_t *handle, struct inode *inode,
1423 +                       unsigned long goal, int *errp)
1424 +{
1425 +       int ret, len;
1426 +
1427 +       if (!test_opt(inode->i_sb, MBALLOC)) {
1428 +               ret = ext3_new_block_old(handle, inode, goal, errp);
1429 +               goto out;
1430 +       }
1431 +       len = 1;
1432 +       ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
1433 +out:
1434 +       return ret;
1435 +}
1436 +
1437 +
1438 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
1439 +                       unsigned long block, unsigned long count, int metadata)
1440 +{
1441 +       if (!test_opt(inode->i_sb, MBALLOC))
1442 +               ext3_free_blocks_old(handle, inode, block, count);
1443 +       else
1444 +               ext3_mb_free_blocks(handle, inode, block, count, metadata);
1445 +       return;
1446 +}
1447 Index: linux-2.6.5-sles9/fs/ext3/super.c
1448 ===================================================================
1449 --- linux-2.6.5-sles9.orig/fs/ext3/super.c      2004-11-09 02:23:21.597220752 +0300
1450 +++ linux-2.6.5-sles9/fs/ext3/super.c   2004-11-09 02:26:12.572228600 +0300
1451 @@ -389,6 +389,7 @@
1452         struct ext3_super_block *es = sbi->s_es;
1453         int i;
1454  
1455 +       ext3_mb_release(sb);
1456         ext3_ext_release(sb);
1457         ext3_xattr_put_super(sb);
1458         journal_destroy(sbi->s_journal);
1459 @@ -542,7 +543,7 @@
1460         Opt_commit, Opt_journal_update, Opt_journal_inum,
1461         Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1462         Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
1463 -       Opt_err, Opt_extents, Opt_extdebug
1464 +       Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc,
1465  };
1466  
1467  static match_table_t tokens = {
1468 @@ -589,6 +590,7 @@
1469         {Opt_iopen_nopriv, "iopen_nopriv"},
1470         {Opt_extents, "extents"},
1471         {Opt_extdebug, "extdebug"},
1472 +       {Opt_mballoc, "mballoc"},
1473         {Opt_err, NULL}
1474  };
1475  
1476 @@ -810,6 +812,9 @@
1477                 case Opt_extdebug:
1478                         set_opt (sbi->s_mount_opt, EXTDEBUG);
1479                         break;
1480 +               case Opt_mballoc:
1481 +                       set_opt (sbi->s_mount_opt, MBALLOC);
1482 +                       break;
1483                 default:
1484                         printk (KERN_ERR
1485                                 "EXT3-fs: Unrecognized mount option \"%s\" "
1486 @@ -1463,7 +1468,8 @@
1487                 ext3_count_dirs(sb));
1488  
1489         ext3_ext_init(sb);
1490
1491 +       ext3_mb_init(sb);
1492 +
1493         return 0;
1494  
1495  failed_mount3:
1496 Index: linux-2.6.5-sles9/fs/ext3/Makefile
1497 ===================================================================
1498 --- linux-2.6.5-sles9.orig/fs/ext3/Makefile     2004-11-09 02:23:21.593221360 +0300
1499 +++ linux-2.6.5-sles9/fs/ext3/Makefile  2004-11-09 02:26:12.572228600 +0300
1500 @@ -5,7 +5,7 @@
1501  obj-$(CONFIG_EXT3_FS) += ext3.o
1502  
1503  ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
1504 -          ioctl.o namei.o super.o symlink.o hash.o extents.o
1505 +          ioctl.o namei.o super.o symlink.o hash.o extents.o mballoc.o
1506  
1507  ext3-$(CONFIG_EXT3_FS_XATTR)    += xattr.o xattr_user.o xattr_trusted.o
1508  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
1509 Index: linux-2.6.5-sles9/fs/ext3/balloc.c
1510 ===================================================================
1511 --- linux-2.6.5-sles9.orig/fs/ext3/balloc.c     2004-11-03 08:36:51.000000000 +0300
1512 +++ linux-2.6.5-sles9/fs/ext3/balloc.c  2004-11-09 02:26:53.078070776 +0300
1513 @@ -78,7 +78,7 @@
1514   *
1515   * Return buffer_head on success or NULL in case of failure.
1516   */
1517 -static struct buffer_head *
1518 +struct buffer_head *
1519  read_block_bitmap(struct super_block *sb, unsigned int block_group)
1520  {
1521         struct ext3_group_desc * desc;
1522 @@ -274,7 +274,7 @@
1523  }
1524  
1525  /* Free given blocks, update quota and i_blocks field */
1526 -void ext3_free_blocks(handle_t *handle, struct inode *inode,
1527 +void ext3_free_blocks_old(handle_t *handle, struct inode *inode,
1528                         unsigned long block, unsigned long count)
1529  {
1530         struct buffer_head *bitmap_bh = NULL;
1531 @@ -1142,7 +1142,7 @@
1532   * bitmap, and then for any free bit if that fails.
1533   * This function also updates quota and i_blocks field.
1534   */
1535 -int ext3_new_block(handle_t *handle, struct inode *inode,
1536 +int ext3_new_block_old(handle_t *handle, struct inode *inode,
1537                         unsigned long goal, int *errp)
1538  {
1539         struct buffer_head *bitmap_bh = NULL;
1540 Index: linux-2.6.5-sles9/fs/ext3/namei.c
1541 ===================================================================
1542 --- linux-2.6.5-sles9.orig/fs/ext3/namei.c      2004-11-09 02:18:27.616912552 +0300
1543 +++ linux-2.6.5-sles9/fs/ext3/namei.c   2004-11-09 02:26:12.580227384 +0300
1544 @@ -1640,7 +1640,7 @@
1545   * If the create succeeds, we fill in the inode information
1546   * with d_instantiate(). 
1547   */
1548 -static int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1549 +int ext3_create (struct inode * dir, struct dentry * dentry, int mode,
1550                 struct nameidata *nd)
1551  {
1552         handle_t *handle; 
1553 Index: linux-2.6.5-sles9/fs/ext3/inode.c
1554 ===================================================================
1555 --- linux-2.6.5-sles9.orig/fs/ext3/inode.c      2004-11-09 02:23:21.592221512 +0300
1556 +++ linux-2.6.5-sles9/fs/ext3/inode.c   2004-11-09 02:26:12.587226320 +0300
1557 @@ -572,7 +572,7 @@
1558                 ext3_journal_forget(handle, branch[i].bh);
1559         }
1560         for (i = 0; i < keys; i++)
1561 -               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
1562 +               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
1563         return err;
1564  }
1565  
1566 @@ -673,7 +673,7 @@
1567         if (err == -EAGAIN)
1568                 for (i = 0; i < num; i++)
1569                         ext3_free_blocks(handle, inode, 
1570 -                                        le32_to_cpu(where[i].key), 1);
1571 +                                        le32_to_cpu(where[i].key), 1, 1);
1572         return err;
1573  }
1574  
1575 @@ -1829,7 +1829,7 @@
1576                 }
1577         }
1578  
1579 -       ext3_free_blocks(handle, inode, block_to_free, count);
1580 +       ext3_free_blocks(handle, inode, block_to_free, count, 1);
1581  }
1582  
1583  /**
1584 @@ -2000,7 +2000,7 @@
1585                                 ext3_journal_test_restart(handle, inode);
1586                         }
1587  
1588 -                       ext3_free_blocks(handle, inode, nr, 1);
1589 +                       ext3_free_blocks(handle, inode, nr, 1, 1);
1590  
1591                         if (parent_bh) {
1592                                 /*
1593 Index: linux-2.6.5-sles9/fs/ext3/extents.c
1594 ===================================================================
1595 --- linux-2.6.5-sles9.orig/fs/ext3/extents.c    2004-11-09 02:25:56.143726112 +0300
1596 +++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:26:12.591225712 +0300
1597 @@ -740,7 +740,7 @@
1598                 for (i = 0; i < depth; i++) {
1599                         if (!ablocks[i])
1600                                 continue;
1601 -                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
1602 +                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
1603                 }
1604         }
1605         kfree(ablocks);
1606 @@ -1391,7 +1391,7 @@
1607                         path->p_idx->ei_leaf);
1608         bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
1609         ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
1610 -       ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
1611 +       ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
1612         return err;
1613  }
1614  
1615 @@ -1879,10 +1879,12 @@
1616         int needed = ext3_remove_blocks_credits(tree, ex, from, to);
1617         handle_t *handle = ext3_journal_start(tree->inode, needed);
1618         struct buffer_head *bh;
1619 -       int i;
1620 +       int i, metadata = 0;
1621  
1622         if (IS_ERR(handle))
1623                 return PTR_ERR(handle);
1624 +       if (S_ISDIR(tree->inode->i_mode))
1625 +               metadata = 1;
1626         if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
1627                 /* tail removal */
1628                 unsigned long num, start;
1629 @@ -1894,7 +1896,7 @@
1630                         bh = sb_find_get_block(tree->inode->i_sb, start + i);
1631                         ext3_forget(handle, 0, tree->inode, bh, start + i);
1632                 }
1633 -               ext3_free_blocks(handle, tree->inode, start, num);
1634 +               ext3_free_blocks(handle, tree->inode, start, num, metadata);
1635         } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
1636                 printk("strange request: removal %lu-%lu from %u:%u\n",
1637                         from, to, ex->ee_block, ex->ee_len);
1638 Index: linux-2.6.5-sles9/fs/ext3/xattr.c
1639 ===================================================================
1640 --- linux-2.6.5-sles9.orig/fs/ext3/xattr.c      2004-11-09 02:22:55.777146000 +0300
1641 +++ linux-2.6.5-sles9/fs/ext3/xattr.c   2004-11-09 02:26:12.593225408 +0300
1642 @@ -1366,7 +1366,7 @@
1643                         new_bh = sb_getblk(sb, block);
1644                         if (!new_bh) {
1645  getblk_failed:
1646 -                               ext3_free_blocks(handle, inode, block, 1);
1647 +                               ext3_free_blocks(handle, inode, block, 1, 1);
1648                                 error = -EIO;
1649                                 goto cleanup;
1650                         }
1651 @@ -1408,7 +1408,7 @@
1652                 if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
1653                         /* Free the old block. */
1654                         ea_bdebug(old_bh, "freeing");
1655 -                       ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1);
1656 +                       ext3_free_blocks(handle, inode, old_bh->b_blocknr, 1, 1);
1657  
1658                         /* ext3_forget() calls bforget() for us, but we
1659                            let our caller release old_bh, so we need to
1660 @@ -1504,7 +1504,7 @@
1661         lock_buffer(bh);
1662         if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
1663                 ext3_xattr_cache_remove(bh);
1664 -               ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1);
1665 +               ext3_free_blocks(handle, inode, EXT3_I(inode)->i_file_acl, 1, 1);
1666                 get_bh(bh);
1667                 ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
1668         } else {
1669 Index: linux-2.6.5-sles9/include/linux/ext3_fs.h
1670 ===================================================================
1671 --- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h      2004-11-09 02:25:17.238640584 +0300
1672 +++ linux-2.6.5-sles9/include/linux/ext3_fs.h   2004-11-09 02:26:12.596224952 +0300
1673 @@ -57,6 +57,8 @@
1674  #define ext3_debug(f, a...)    do {} while (0)
1675  #endif
1676  
1677 +#define EXT3_MULTIBLOCK_ALLOCATOR      1
1678 +
1679  /*
1680   * Special inodes numbers
1681   */
1682 @@ -339,6 +341,7 @@
1683  #define EXT3_MOUNT_IOPEN_NOPRIV                0x80000 /* Make iopen world-readable */
1684  #define EXT3_MOUNT_EXTENTS             0x100000/* Extents support */
1685  #define EXT3_MOUNT_EXTDEBUG            0x200000/* Extents debug */
1686 +#define EXT3_MOUNT_MBALLOC             0x400000/* Buddy allocation support */
1687  
1688  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1689  #ifndef clear_opt
1690 @@ -698,7 +701,7 @@
1691  extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
1692  extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
1693  extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
1694 -                             unsigned long);
1695 +                             unsigned long, int);
1696  extern unsigned long ext3_count_free_blocks (struct super_block *);
1697  extern void ext3_check_blocks_bitmap (struct super_block *);
1698  extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
1699 @@ -743,6 +746,13 @@
1700  extern int ext3_ioctl (struct inode *, struct file *, unsigned int,
1701                        unsigned long);
1702  
1703 +/* mballoc.c */
1704 +extern int ext3_mb_init(struct super_block *sb);
1705 +extern int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
1706 +                             unsigned long goal,int *len, int flags,int *errp);
1707 +extern int ext3_mb_release(struct super_block *sb);
1708 +extern void ext3_mb_release_blocks(struct super_block *, int);
1709 +
1710  /* namei.c */
1711  extern int ext3_orphan_add(handle_t *, struct inode *);
1712  extern int ext3_orphan_del(handle_t *, struct inode *);
1713 Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h
1714 ===================================================================
1715 --- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h   2004-11-09 02:20:51.598024096 +0300
1716 +++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h        2004-11-09 02:28:18.753046200 +0300
1717 @@ -23,10 +23,30 @@
1718  #define EXT_INCLUDE
1719  #include <linux/blockgroup_lock.h>
1720  #include <linux/percpu_counter.h>
1721 +#include <linux/list.h>
1722  #endif
1723  #endif
1724  #include <linux/rbtree.h>
1725  
1726 +#define EXT3_BB_MAX_BLOCKS     30
1727 +struct ext3_free_metadata {
1728 +       unsigned short group;
1729 +       unsigned short num;
1730 +       unsigned short blocks[EXT3_BB_MAX_BLOCKS];
1731 +       struct list_head list;
1732 +};
1733 +
1734 +#define EXT3_BB_MAX_ORDER      14
1735 +
1736 +struct ext3_buddy_group_blocks {
1737 +       sector_t        bb_bitmap;
1738 +       sector_t        bb_buddy;
1739 +       spinlock_t      bb_lock;
1740 +       unsigned        bb_counters[EXT3_BB_MAX_ORDER];
1741 +       struct ext3_free_metadata *bb_md_cur;
1742 +       unsigned long bb_tid;
1743 +};
1744 +
1745  /*
1746   * third extended-fs super-block data in memory
1747   */
1748 @@ -78,6 +98,17 @@
1749         struct timer_list turn_ro_timer;        /* For turning read-only (crash simulation) */
1750         wait_queue_head_t ro_wait_queue;        /* For people waiting for the fs to go read-only */
1751  #endif
1752 +
1753 +       /* for buddy allocator */
1754 +       struct ext3_buddy_group_blocks **s_buddy_blocks;
1755 +       struct inode *s_buddy;
1756 +       long s_blocks_reserved;
1757 +       spinlock_t s_reserve_lock;
1758 +       struct list_head s_active_transaction;
1759 +       struct list_head s_closed_transaction;
1760 +       struct list_head s_committed_transaction;
1761 +       spinlock_t s_md_lock;
1762 +       tid_t s_last_transaction;
1763  };
1764  
1765  #endif /* _LINUX_EXT3_FS_SB */