Whamcloud - gitweb
file lbnal_cb.c was initially added on branch b1_4.
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-mballoc2-2.4.24.patch
1 Index: linux-2.4.24/fs/ext3/mballoc.c
2 ===================================================================
3 --- linux-2.4.24.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300
4 +++ linux-2.4.24/fs/ext3/mballoc.c      2004-08-06 04:50:53.000000000 +0400
5 @@ -0,0 +1,1399 @@
6 +/*
7 + * Copyright (c) 2004, Cluster File Systems, Inc, info@clusterfs.com
8 + * Written by Alex Tomas <alex@clusterfs.com>
9 + *
10 + * This program is free software; you can redistribute it and/or modify
11 + * it under the terms of the GNU General Public License version 2 as
12 + * published by the Free Software Foundation.
13 + *
14 + * This program is distributed in the hope that it will be useful,
15 + * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17 + * GNU General Public License for more details.
18 + *
19 + * You should have received a copy of the GNU General Public Licens
20 + * along with this program; if not, write to the Free Software
21 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-
22 + */
23 +
24 +
25 +/*
26 + * mballoc.c contains the multiblocks allocation routines
27 + */
28 +
29 +#include <linux/config.h>
30 +#include <linux/time.h>
31 +#include <linux/fs.h>
32 +#include <linux/locks.h>
33 +#include <linux/jbd.h>
34 +#include <linux/slab.h>
35 +#include <linux/ext3_fs.h>
36 +#include <linux/ext3_jbd.h>
37 +#include <linux/quotaops.h>
38 +
39 +/*
40 + * TODO:
41 + *   - do not scan from the beginning, try to remember first free block
42 + *   - mb_mark_used_* may allocate chunk right after splitting buddy
43 + *   - special flag to advice allocator to look for requested + N blocks
44 + *     this may improve interaction between extents and mballoc
45 + */
46 +
47 +/*
48 + * with AGRESSIVE_CHECK allocator runs consistency checks over
49 + * structures. this checks slow things down a lot
50 + */
51 +#define AGGRESSIVE_CHECK__
52 +
53 +/*
54 + */
55 +#define MB_DEBUG__
56 +#ifdef MB_DEBUG
57 +#define mb_debug(fmt,a...)     printk(fmt, ##a)
58 +#else
59 +#define mb_debug(fmt,a...)
60 +#endif
61 +
62 +/*
63 + * where to save buddies structures beetween umount/mount (clean case only)
64 + */
65 +#define EXT3_BUDDY_FILE                ".buddy"
66 +
67 +/*
68 + * max. number of chunks to be tracked in ext3_free_extent struct
69 + */
70 +#define MB_ARR_SIZE    32
71 +
72 +struct ext3_allocation_context {
73 +       struct super_block *ac_sb;
74 +
75 +       /* search goals */
76 +       int ac_g_group;
77 +       int ac_g_start;
78 +       int ac_g_len;
79 +       int ac_g_flags;
80 +       
81 +       /* the best found extent */
82 +       int ac_b_group;
83 +       int ac_b_start;
84 +       int ac_b_len;
85 +       
86 +       /* number of iterations done. we have to track to limit searching */
87 +       int ac_repeats;
88 +       int ac_groups_scanned;
89 +       int ac_status;
90 +};
91 +
92 +#define AC_STATUS_CONTINUE     1
93 +#define AC_STATUS_FOUND                2
94 +
95 +
96 +struct ext3_buddy {
97 +       void *bd_bitmap;
98 +       void *bd_buddy;
99 +       int bd_blkbits;
100 +       struct buffer_head *bd_bh;
101 +       struct buffer_head *bd_bh2;
102 +       struct ext3_buddy_group_blocks *bd_bd;
103 +       struct super_block *bd_sb;
104 +};
105 +
106 +struct ext3_free_extent {
107 +       int fe_start;
108 +       int fe_len;
109 +       unsigned char fe_orders[MB_ARR_SIZE];
110 +       unsigned char fe_nums;
111 +       unsigned char fe_back;
112 +};
113 +
114 +#define in_range(b, first, len)        ((b) >= (first) && (b) <= (first) + (len) - 1)
115 +
116 +
117 +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
118 +void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long);
119 +int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *);
120 +int ext3_mb_reserve_blocks(struct super_block *, int);
121 +void ext3_mb_release_blocks(struct super_block *, int);
122 +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
123 +void ext3_mb_free_committed_blocks(struct super_block *);
124 +int load_block_bitmap (struct super_block *, unsigned int);
125 +
126 +struct buffer_head * 
127 +read_block_bitmap_bh(struct super_block *sb, unsigned int block_group)
128 +{
129 +       struct buffer_head *bh;
130 +       int bitmap_nr;
131 +
132 +       bitmap_nr = load_block_bitmap(sb, block_group);
133 +       if (bitmap_nr < 0)
134 +               return NULL;
135 +       
136 +       bh = EXT3_SB(sb)->s_block_bitmap[bitmap_nr];
137 +       return bh;
138 +}
139 +
140 +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max)
141 +{
142 +       int i = 1;
143 +       void *bb;
144 +
145 +       J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
146 +       J_ASSERT(max != NULL);
147 +
148 +       if (order > e3b->bd_blkbits + 1)
149 +               return NULL;
150 +
151 +       /* at order 0 we see each particular block */
152 +       *max = 1 << (e3b->bd_blkbits + 3);
153 +       if (order == 0)
154 +               return e3b->bd_bitmap;
155 +
156 +       bb = e3b->bd_buddy;
157 +       *max = *max >> 1;
158 +       while (i < order) {
159 +               bb += 1 << (e3b->bd_blkbits - i);
160 +               i++;
161 +               *max = *max >> 1;
162 +       }
163 +       return bb;
164 +}
165 +
166 +static int ext3_mb_load_desc(struct super_block *sb, int group,
167 +                               struct ext3_buddy *e3b)
168 +{
169 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
170 +
171 +       J_ASSERT(sbi->s_buddy_blocks[group].bb_bitmap);
172 +       J_ASSERT(sbi->s_buddy_blocks[group].bb_buddy);
173 +
174 +       /* load bitmap */
175 +       e3b->bd_bh = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_bitmap);
176 +       if (e3b->bd_bh == NULL) {
177 +               ext3_error(sb, "ext3_mb_load_desc",
178 +                               "can't get block for buddy bitmap\n");
179 +               goto out;
180 +       }
181 +       if (!buffer_uptodate(e3b->bd_bh)) {
182 +               ll_rw_block(READ, 1, &e3b->bd_bh);
183 +               wait_on_buffer(e3b->bd_bh);
184 +       }
185 +       J_ASSERT(buffer_uptodate(e3b->bd_bh));
186 +
187 +       /* load buddy */
188 +       e3b->bd_bh2 = sb_getblk(sb, sbi->s_buddy_blocks[group].bb_buddy);
189 +       if (e3b->bd_bh2 == NULL) {
190 +               ext3_error(sb, "ext3_mb_load_desc",
191 +                               "can't get block for buddy bitmap\n");
192 +               goto out;
193 +       }
194 +       if (!buffer_uptodate(e3b->bd_bh2)) {
195 +               ll_rw_block(READ, 1, &e3b->bd_bh2);
196 +               wait_on_buffer(e3b->bd_bh2);
197 +       }
198 +       J_ASSERT(buffer_uptodate(e3b->bd_bh2));
199 +
200 +       e3b->bd_bitmap = e3b->bd_bh->b_data;
201 +       e3b->bd_buddy = e3b->bd_bh2->b_data;
202 +       e3b->bd_blkbits = sb->s_blocksize_bits;
203 +       e3b->bd_bd = sbi->s_buddy_blocks + group;
204 +       e3b->bd_sb = sb;
205 +
206 +       return 0;
207 +out:
208 +       brelse(e3b->bd_bh);
209 +       brelse(e3b->bd_bh2);
210 +       e3b->bd_bh = NULL;
211 +       e3b->bd_bh2 = NULL;
212 +       return -EIO;
213 +}
214 +
215 +static void ext3_mb_dirty_buddy(struct ext3_buddy *e3b)
216 +{
217 +       mark_buffer_dirty(e3b->bd_bh);
218 +       mark_buffer_dirty(e3b->bd_bh2);
219 +}
220 +
221 +static void ext3_mb_release_desc(struct ext3_buddy *e3b)
222 +{
223 +       brelse(e3b->bd_bh);
224 +       brelse(e3b->bd_bh2);
225 +}
226 +
227 +#ifdef AGGRESSIVE_CHECK
228 +static void mb_check_buddy(struct ext3_buddy *e3b)
229 +{
230 +       int order = e3b->bd_blkbits + 1;
231 +       int max, max2, i, j, k, count;
232 +       void *buddy, *buddy2;
233 +
234 +       if (!test_opt(e3b->bd_sb, MBALLOC))
235 +               return;
236 +
237 +       while (order > 1) {
238 +               buddy = mb_find_buddy(e3b, order, &max);
239 +               J_ASSERT(buddy);
240 +               buddy2 = mb_find_buddy(e3b, order - 1, &max2);
241 +               J_ASSERT(buddy2);
242 +               J_ASSERT(buddy != buddy2);
243 +               J_ASSERT(max * 2 == max2);
244 +
245 +               count = 0;
246 +               for (i = 0; i < max; i++) {
247 +
248 +                       if (!test_bit(i, buddy)) {
249 +                               /* only single bit in buddy2 may be 1 */
250 +                               if (test_bit(i << 1, buddy2))
251 +                                       J_ASSERT(!test_bit((i<<1)+1, buddy2));
252 +                               else if (test_bit((i << 1) + 1, buddy2))
253 +                                       J_ASSERT(!test_bit(i << 1, buddy2));
254 +                               continue;
255 +                       }
256 +
257 +                       /* both bits in buddy2 must be 0 */
258 +                       J_ASSERT(!test_bit(i << 1, buddy2));
259 +                       J_ASSERT(!test_bit((i << 1) + 1, buddy2));
260 +
261 +                       for (j = 0; j < (1 << order); j++) {
262 +                               k = (i * (1 << order)) + j;
263 +                               J_ASSERT(test_bit(k, e3b->bd_bitmap));
264 +                       }
265 +                       count++;
266 +               }
267 +               J_ASSERT(e3b->bd_bd->bb_counters[order] == count);
268 +               order--;
269 +       }
270 +
271 +       buddy = mb_find_buddy(e3b, 0, &max);
272 +       for (i = 0; i < max; i++) {
273 +               if (test_bit(i, buddy))
274 +                       continue;
275 +               /* check used bits only */
276 +               for (j = 0; j < e3b->bd_blkbits + 1; j++) {
277 +                       buddy2 = mb_find_buddy(e3b, j, &max2);
278 +                       k = i >> j;
279 +                       J_ASSERT(k < max2);
280 +                       J_ASSERT(!test_bit(k, buddy2));
281 +               }
282 +       }
283 +}
284 +#else
285 +#define mb_check_buddy(e3b)
286 +#endif
287 +
288 +static inline void
289 +ext3_lock_group(struct super_block *sb, int group)
290 +{
291 +       spin_lock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
292 +}
293 +
294 +static inline void
295 +ext3_unlock_group(struct super_block *sb, int group)
296 +{
297 +       spin_unlock(&EXT3_SB(sb)->s_buddy_blocks[group].bb_lock);
298 +}
299 +
300 +static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
301 +{
302 +       int order = 1;
303 +       void *bb;
304 +
305 +       J_ASSERT(e3b->bd_bitmap != e3b->bd_buddy);
306 +       J_ASSERT(block < (1 << (e3b->bd_blkbits + 3)));
307 +
308 +       bb = e3b->bd_buddy;
309 +       while (order <= e3b->bd_blkbits + 1) {
310 +               block = block >> 1;
311 +               if (test_bit(block, bb)) {
312 +                       /* this block is part of buddy of order 'order' */
313 +                       return order;
314 +               }
315 +               bb += 1 << (e3b->bd_blkbits - order);
316 +               order++;
317 +       }
318 +       return 0;
319 +}
320 +
321 +static inline void mb_clear_bits(void *bm, int cur, int len)
322 +{
323 +       __u32 *addr;
324 +
325 +       len = cur + len;
326 +       while (cur < len) {
327 +               if ((cur & 31) == 0 && (len - cur) >= 32) {
328 +                       /* fast path: clear whole word at once */
329 +                       addr = bm + (cur >> 3);
330 +                       *addr = 0;
331 +                       cur += 32;
332 +                       continue;
333 +               }
334 +               clear_bit(cur, bm);
335 +               cur++;
336 +       }
337 +}
338 +
339 +static inline void mb_set_bits(void *bm, int cur, int len)
340 +{
341 +       __u32 *addr;
342 +
343 +       len = cur + len;
344 +       while (cur < len) {
345 +               if ((cur & 31) == 0 && (len - cur) >= 32) {
346 +                       /* fast path: clear whole word at once */
347 +                       addr = bm + (cur >> 3);
348 +                       *addr = 0xffffffff;
349 +                       cur += 32;
350 +                       continue;
351 +               }
352 +               set_bit(cur, bm);
353 +               cur++;
354 +       }
355 +}
356 +
357 +static int mb_free_blocks(struct ext3_buddy *e3b, int first, int count)
358 +{
359 +       int block, max, order;
360 +       void *buddy, *buddy2;
361 +
362 +       mb_check_buddy(e3b);
363 +       while (count-- > 0) {
364 +               block = first++;
365 +               order = 0;
366 +
367 +               J_ASSERT(!test_bit(block, e3b->bd_bitmap));
368 +               set_bit(block, e3b->bd_bitmap);
369 +               e3b->bd_bd->bb_counters[order]++;
370 +
371 +               /* start of the buddy */
372 +               buddy = mb_find_buddy(e3b, order, &max);
373 +
374 +               do {
375 +                       block &= ~1UL;
376 +                       if (!test_bit(block, buddy) ||
377 +                                       !test_bit(block + 1, buddy))
378 +                               break;
379 +
380 +                       /* both the buddies are free, try to coalesce them */
381 +                       buddy2 = mb_find_buddy(e3b, order + 1, &max);
382 +
383 +                       if (!buddy2)
384 +                               break;
385 +
386 +                       if (order > 0) {
387 +                               /* for special purposes, we don't clear
388 +                                * free bits in bitmap */
389 +                               clear_bit(block, buddy);
390 +                               clear_bit(block + 1, buddy);
391 +                       }
392 +                       e3b->bd_bd->bb_counters[order]--;
393 +                       e3b->bd_bd->bb_counters[order]--;
394 +
395 +                       block = block >> 1;
396 +                       order++;
397 +                       e3b->bd_bd->bb_counters[order]++;
398 +
399 +                       set_bit(block, buddy2);
400 +                       buddy = buddy2;
401 +               } while (1);
402 +       }
403 +       mb_check_buddy(e3b);
404 +
405 +       return 0;
406 +}
407 +
408 +/*
409 + * returns 1 if out extent is enough to fill needed space
410 + */
411 +int mb_make_backward_extent(struct ext3_free_extent *in,
412 +                               struct ext3_free_extent *out, int needed)
413 +{
414 +       int i;
415 +
416 +       J_ASSERT(in);
417 +       J_ASSERT(out);
418 +       J_ASSERT(in->fe_nums < MB_ARR_SIZE);
419 +
420 +       out->fe_len = 0;
421 +       out->fe_start = in->fe_start + in->fe_len;
422 +       out->fe_nums = 0;
423 +
424 +       /* for single-chunk extent we need not back order
425 +        * also, if an extent doesn't fill needed space
426 +        * then it makes no sense to try back order becase
427 +        * if we select this extent then it'll be use as is */
428 +       if (in->fe_nums < 2 || in->fe_len < needed)
429 +               return 0;
430 +
431 +       i = in->fe_nums - 1;
432 +       while (i >= 0 && out->fe_len < needed) {
433 +               out->fe_len += (1 << in->fe_orders[i]);
434 +               out->fe_start -= (1 << in->fe_orders[i]);
435 +               i--;
436 +       }
437 +       /* FIXME: in some situation fe_orders may be too small to hold
438 +        * all the buddies */
439 +       J_ASSERT(out->fe_len >= needed);
440 +       
441 +       for (i++; i < in->fe_nums; i++)
442 +               out->fe_orders[out->fe_nums++] = in->fe_orders[i];
443 +       J_ASSERT(out->fe_nums < MB_ARR_SIZE);
444 +       out->fe_back = 1;
445 +
446 +       return 1;
447 +}
448 +
449 +int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
450 +                       int needed, struct ext3_free_extent *ex)
451 +{
452 +       int space = needed;
453 +       int next, max, ord;
454 +       void *buddy;
455 +
456 +       J_ASSERT(ex != NULL);
457 +
458 +       ex->fe_nums = 0;
459 +       ex->fe_len = 0;
460 +       
461 +       buddy = mb_find_buddy(e3b, order, &max);
462 +       J_ASSERT(buddy);
463 +       J_ASSERT(block < max);
464 +       if (!test_bit(block, buddy))
465 +               goto nofree;
466 +
467 +       if (order == 0) {
468 +               /* find actual order */
469 +               order = mb_find_order_for_block(e3b, block);
470 +               block = block >> order;
471 +       }
472 +
473 +       ex->fe_orders[ex->fe_nums++] = order;
474 +       ex->fe_len = 1 << order;
475 +       ex->fe_start = block << order;
476 +       ex->fe_back = 0;
477 +
478 +       while ((space = space - (1 << order)) > 0) {
479 +
480 +               buddy = mb_find_buddy(e3b, order, &max);
481 +               J_ASSERT(buddy);
482 +
483 +               if (block + 1 >= max)
484 +                       break;
485 +
486 +               next = (block + 1) * (1 << order);
487 +               if (!test_bit(next, e3b->bd_bitmap))
488 +                       break;
489 +
490 +               ord = mb_find_order_for_block(e3b, next);
491 +
492 +               if ((1 << ord) >= needed) {
493 +                       /* we dont want to coalesce with self-enough buddies */
494 +                       break;
495 +               }
496 +               order = ord;
497 +               block = next >> order;
498 +               ex->fe_len += 1 << order;
499 +
500 +               if (ex->fe_nums < MB_ARR_SIZE)
501 +                       ex->fe_orders[ex->fe_nums++] = order;
502 +       }
503 +
504 +nofree:
505 +       J_ASSERT(ex->fe_start + ex->fe_len <= (1 << (e3b->bd_blkbits + 3)));
506 +       return ex->fe_len;
507 +}
508 +
509 +static int mb_mark_used_backward(struct ext3_buddy *e3b,
510 +                                       struct ext3_free_extent *ex, int len)
511 +{
512 +       int start = ex->fe_start, len0 = len;
513 +       int ord, mlen, max, cur;
514 +       void *buddy;
515 +
516 +       start = ex->fe_start + ex->fe_len - 1;
517 +       while (len) {
518 +               ord = mb_find_order_for_block(e3b, start);
519 +               if (((start >> ord) << ord) == (start - (1 << ord) + 1) &&
520 +                               len >= (1 << ord)) {
521 +                       /* the whole chunk may be allocated at once! */
522 +                       mlen = 1 << ord;
523 +                       buddy = mb_find_buddy(e3b, ord, &max);
524 +                       J_ASSERT((start >> ord) < max);
525 +                       clear_bit(start >> ord, buddy);
526 +                       e3b->bd_bd->bb_counters[ord]--;
527 +                       start -= mlen;
528 +                       len -= mlen;
529 +                       J_ASSERT(len >= 0);
530 +                       J_ASSERT(start >= 0);
531 +                       continue;
532 +               }
533 +
534 +               /* we have to split large buddy */
535 +               J_ASSERT(ord > 0);
536 +               buddy = mb_find_buddy(e3b, ord, &max);
537 +               clear_bit(start >> ord, buddy);
538 +               e3b->bd_bd->bb_counters[ord]--;
539 +
540 +               ord--;
541 +               cur = (start >> ord) & ~1U;
542 +               buddy = mb_find_buddy(e3b, ord, &max);
543 +               set_bit(cur, buddy);
544 +               set_bit(cur + 1, buddy);
545 +               e3b->bd_bd->bb_counters[ord]++;
546 +               e3b->bd_bd->bb_counters[ord]++;
547 +       }
548 +
549 +       /* now drop all the bits in bitmap */
550 +       mb_clear_bits(e3b->bd_bitmap, ex->fe_start + ex->fe_len - len0, len0);
551 +
552 +       mb_check_buddy(e3b);
553 +
554 +       return 0;
555 +}
556 +
557 +static int mb_mark_used_forward(struct ext3_buddy *e3b,
558 +                               struct ext3_free_extent *ex, int len)
559 +{
560 +       int start = ex->fe_start, len0 = len;
561 +       int ord, mlen, max, cur;
562 +       void *buddy;
563 +
564 +       while (len) {
565 +               ord = mb_find_order_for_block(e3b, start);
566 +
567 +               if (((start >> ord) << ord) == start && len >= (1 << ord)) {
568 +                       /* the whole chunk may be allocated at once! */
569 +                       mlen = 1 << ord;
570 +                       buddy = mb_find_buddy(e3b, ord, &max);
571 +                       J_ASSERT((start >> ord) < max);
572 +                       clear_bit(start >> ord, buddy);
573 +                       e3b->bd_bd->bb_counters[ord]--;
574 +                       start += mlen;
575 +                       len -= mlen;
576 +                       J_ASSERT(len >= 0);
577 +                       continue;
578 +               }
579 +
580 +               /* we have to split large buddy */
581 +               J_ASSERT(ord > 0);
582 +               buddy = mb_find_buddy(e3b, ord, &max);
583 +               clear_bit(start >> ord, buddy);
584 +               e3b->bd_bd->bb_counters[ord]--;
585 +
586 +               ord--;
587 +               cur = (start >> ord) & ~1U;
588 +               buddy = mb_find_buddy(e3b, ord, &max);
589 +               set_bit(cur, buddy);
590 +               set_bit(cur + 1, buddy);
591 +               e3b->bd_bd->bb_counters[ord]++;
592 +               e3b->bd_bd->bb_counters[ord]++;
593 +       }
594 +
595 +       /* now drop all the bits in bitmap */
596 +       mb_clear_bits(e3b->bd_bitmap, ex->fe_start, len0);
597 +
598 +       mb_check_buddy(e3b);
599 +
600 +       return 0;
601 +}
602 +
603 +int inline mb_mark_used(struct ext3_buddy *e3b,
604 +                       struct ext3_free_extent *ex, int len)
605 +{
606 +       int err;
607 +
608 +       J_ASSERT(ex);
609 +       if (ex->fe_back == 0)
610 +               err = mb_mark_used_forward(e3b, ex, len);
611 +       else
612 +               err = mb_mark_used_backward(e3b, ex, len);
613 +       return err;
614 +}
615 +
616 +int ext3_mb_new_in_group(struct ext3_allocation_context *ac,
617 +                               struct ext3_buddy *e3b, int group)
618 +{
619 +       struct super_block *sb = ac->ac_sb;
620 +       int err, gorder, max, i;
621 +       struct ext3_free_extent curex;
622 +
623 +       /* let's know order of allocation */
624 +       gorder = 0;
625 +       while (ac->ac_g_len > (1 << gorder))
626 +               gorder++;
627 +
628 +       if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) {
629 +               /* someone asks for space at this specified block
630 +                * probably he wants to merge it into existing extent */
631 +               if (test_bit(ac->ac_g_start, e3b->bd_bitmap)) {
632 +                       /* good. at least one block is free */
633 +                       max = mb_find_extent(e3b, 0, ac->ac_g_start,
634 +                                               ac->ac_g_len, &curex);
635 +                       max = min(curex.fe_len, ac->ac_g_len);
636 +                       mb_mark_used(e3b, &curex, max);
637 +                       
638 +                       ac->ac_b_group = group;
639 +                       ac->ac_b_start = curex.fe_start;
640 +                       ac->ac_b_len = max;
641 +                       ac->ac_status = AC_STATUS_FOUND;
642 +                       err = 0;
643 +                       goto out;
644 +               }
645 +               /* don't try to find goal anymore */
646 +               ac->ac_g_flags &= ~1;
647 +       }
648 +
649 +       i = 0;
650 +       while (1) {
651 +               i = find_next_bit(e3b->bd_bitmap, sb->s_blocksize * 8, i);
652 +               if (i >= sb->s_blocksize * 8)
653 +                       break;
654 +
655 +               max = mb_find_extent(e3b, 0, i, ac->ac_g_len, &curex);
656 +               if (max >= ac->ac_g_len) {
657 +                       max = min(curex.fe_len, ac->ac_g_len);
658 +                       mb_mark_used(e3b, &curex, max);
659 +                       
660 +                       ac->ac_b_group = group;
661 +                       ac->ac_b_start = curex.fe_start;
662 +                       ac->ac_b_len = max;
663 +                       ac->ac_status = AC_STATUS_FOUND;
664 +                       break;
665 +               }
666 +               i += max;
667 +       }
668 +
669 +       return 0;
670 +
671 +out:
672 +       return err;
673 +}
674 +
675 +int mb_good_group(struct ext3_allocation_context *ac, int group, int cr)
676 +{
677 +       struct ext3_group_desc *gdp;
678 +       int free_blocks;
679 +
680 +       gdp = ext3_get_group_desc(ac->ac_sb, group, NULL);
681 +       if (!gdp)
682 +               return 0;
683 +       free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
684 +       if (free_blocks == 0)
685 +               return 0;
686 +
687 +       /* someone wants this block very much */
688 +       if ((ac->ac_g_flags & 1) && ac->ac_g_group == group)
689 +               return 1;
690 +
691 +       /* FIXME: I'd like to take fragmentation into account here */
692 +       if (cr == 0) {
693 +               if (free_blocks >= ac->ac_g_len >> 1)
694 +                       return 1;
695 +       } else if (cr == 1) {
696 +               if (free_blocks >= ac->ac_g_len >> 2)
697 +                       return 1;
698 +       } else if (cr == 2) {
699 +               return 1;
700 +       } else {
701 +               BUG();
702 +       }
703 +       return 0;
704 +}
705 +
706 +int ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
707 +                       unsigned long goal, int *len, int flags, int *errp)
708 +{
709 +       struct buffer_head *bitmap_bh = NULL;
710 +       struct ext3_allocation_context ac;
711 +       int i, group, block, cr, err = 0;
712 +       struct ext3_group_desc *gdp;
713 +       struct ext3_super_block *es;
714 +       struct buffer_head *gdp_bh;
715 +       struct ext3_sb_info *sbi;
716 +       struct super_block *sb;
717 +       struct ext3_buddy e3b;
718 +
719 +       J_ASSERT(len != NULL);
720 +       J_ASSERT(*len > 0);
721 +
722 +       sb = inode->i_sb;
723 +       if (!sb) {
724 +               printk("ext3_mb_new_nblocks: nonexistent device");
725 +               return 0;
726 +       }
727 +
728 +       ext3_mb_poll_new_transaction(sb, handle);
729 +
730 +       sbi = EXT3_SB(sb);
731 +       es = EXT3_SB(sb)->s_es;
732 +
733 +       if (!(flags & 2)) {
734 +               /* someone asks for non-reserved blocks */
735 +               BUG_ON(*len > 1);
736 +               err = ext3_mb_reserve_blocks(sb, 1);
737 +               if (err) {
738 +                       *errp = err;
739 +                       return 0;
740 +               }
741 +       }
742 +
743 +       /*
744 +        * Check quota for allocation of this blocks.
745 +        */
746 +       while (*len && DQUOT_ALLOC_BLOCK(inode, *len))
747 +               *len -= 1;
748 +       if (*len == 0) {
749 +               *errp = -EDQUOT;
750 +               block = 0;
751 +               goto out;
752 +       }
753 +
754 +       /* start searching from the goal */
755 +       if (goal < le32_to_cpu(es->s_first_data_block) ||
756 +           goal >= le32_to_cpu(es->s_blocks_count))
757 +               goal = le32_to_cpu(es->s_first_data_block);
758 +       group = (goal - le32_to_cpu(es->s_first_data_block)) /
759 +                       EXT3_BLOCKS_PER_GROUP(sb);
760 +       block = ((goal - le32_to_cpu(es->s_first_data_block)) %
761 +                       EXT3_BLOCKS_PER_GROUP(sb));
762 +
763 +       /* set up allocation goals */
764 +       ac.ac_b_group = ac.ac_b_start = ac.ac_b_len = 0;
765 +       ac.ac_status = 0;
766 +       ac.ac_groups_scanned = 0;
767 +       ac.ac_sb = inode->i_sb;
768 +       ac.ac_g_group = group;
769 +       ac.ac_g_start = block;
770 +       ac.ac_g_len = *len;
771 +       ac.ac_g_flags = flags;
772 +
773 +       /* loop over the groups */
774 +       for (cr = 0; cr < 3 && ac.ac_status != AC_STATUS_FOUND; cr++) {
775 +               for (i = 0; i < EXT3_SB(sb)->s_groups_count; group++, i++) {
776 +                       if (group == EXT3_SB(sb)->s_groups_count)
777 +                               group = 0;
778 +
779 +                       /* check is group good for our criteries */
780 +                       if (!mb_good_group(&ac, group, cr))
781 +                               continue;
782 +
783 +                       err = ext3_mb_load_desc(ac.ac_sb, group, &e3b);
784 +                       if (err)
785 +                               goto out_err;
786 +
787 +                       ext3_lock_group(sb, group);
788 +                       if (!mb_good_group(&ac, group, cr)) {
789 +                               /* someone did allocation from this group */
790 +                               ext3_unlock_group(sb, group);
791 +                               ext3_mb_release_desc(&e3b);
792 +                               continue;
793 +                       }
794 +
795 +                       err = ext3_mb_new_in_group(&ac, &e3b, group);
796 +                       ext3_unlock_group(sb, group);
797 +                       if (ac.ac_status == AC_STATUS_FOUND)
798 +                               ext3_mb_dirty_buddy(&e3b);
799 +                       ext3_mb_release_desc(&e3b);
800 +                       if (err)
801 +                               goto out_err;
802 +                       if (ac.ac_status == AC_STATUS_FOUND)
803 +                               break;
804 +               }
805 +       }
806 +
807 +       if (ac.ac_status != AC_STATUS_FOUND) {
808 +               /* unfortunately, we can't satisfy this request */
809 +               J_ASSERT(ac.ac_b_len == 0);
810 +               DQUOT_FREE_BLOCK(inode, *len);
811 +               *errp = -ENOSPC;
812 +               block = 0;
813 +               goto out;
814 +       }
815 +
816 +       /* good news - free block(s) have been found. now it's time
817 +        * to mark block(s) in good old journaled bitmap */
818 +       block = ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
819 +                       + ac.ac_b_start + le32_to_cpu(es->s_first_data_block);
820 +
821 +       /* we made a desicion, now mark found blocks in good old
822 +        * bitmap to be journaled */
823 +
824 +       ext3_debug("using block group %d(%d)\n",
825 +                       ac.ac_b_group.group, gdp->bg_free_blocks_count);
826 +
827 +       bitmap_bh = read_block_bitmap_bh(sb, ac.ac_b_group);
828 +       if (!bitmap_bh) {
829 +               *errp = -EIO;
830 +               goto out_err;
831 +       }
832 +
833 +       err = ext3_journal_get_write_access(handle, bitmap_bh);
834 +       if (err) {
835 +               *errp = err;
836 +               goto out_err;
837 +       }
838 +
839 +       gdp = ext3_get_group_desc(sb, ac.ac_b_group, &gdp_bh);
840 +       if (!gdp) {
841 +               *errp = -EIO;
842 +               goto out_err;
843 +       }
844 +       
845 +       err = ext3_journal_get_write_access(handle, gdp_bh);
846 +       if (err)
847 +               goto out_err;
848 +
849 +       block = ac.ac_b_start + ac.ac_b_group * EXT3_BLOCKS_PER_GROUP(sb)
850 +                               + le32_to_cpu(es->s_first_data_block);
851 +
852 +       if (block == le32_to_cpu(gdp->bg_block_bitmap) ||
853 +           block == le32_to_cpu(gdp->bg_inode_bitmap) ||
854 +           in_range(block, le32_to_cpu(gdp->bg_inode_table),
855 +                     EXT3_SB(sb)->s_itb_per_group))
856 +               ext3_error(sb, "ext3_new_block",
857 +                           "Allocating block in system zone - "
858 +                           "block = %u", block);
859 +#if 0
860 +       for (i = 0; i < ac.ac_b_len; i++)
861 +               J_ASSERT(!test_bit(ac.ac_b_start + i, bitmap_bh->b_data));
862 +#endif
863 +       mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len);
864 +
865 +       ext3_lock_group(sb, ac.ac_b_group);
866 +       gdp->bg_free_blocks_count =
867 +                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 
868 +                                       ac.ac_b_len);
869 +       ext3_unlock_group(sb, ac.ac_b_group);
870 +       spin_lock(&sbi->s_md_lock);
871 +       es->s_free_blocks_count =
872 +               cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - ac.ac_b_len);
873 +       spin_unlock(&sbi->s_md_lock);
874 +
875 +       err = ext3_journal_dirty_metadata(handle, bitmap_bh);
876 +       if (err)
877 +               goto out_err;
878 +       err = ext3_journal_dirty_metadata(handle, gdp_bh);
879 +       if (err)
880 +               goto out_err;
881 +
882 +       sb->s_dirt = 1;
883 +       *errp = 0;
884 +
885 +       /* drop non-allocated, but dquote'd blocks */
886 +       J_ASSERT(*len >= ac.ac_b_len);
887 +       DQUOT_FREE_BLOCK(inode, *len - ac.ac_b_len);
888 +
889 +       *len = ac.ac_b_len;
890 +       J_ASSERT(block != 0);
891 +       goto out;
892 +
893 +out_err:
894 +       /* if we've already allocated something, roll it back */
895 +       if (ac.ac_status == AC_STATUS_FOUND) {
896 +               /* FIXME: free blocks here */
897 +       }
898 +
899 +       DQUOT_FREE_BLOCK(inode, *len);
900 +       *errp = err;
901 +       block = 0;
902 +out:
903 +       if (!(flags & 2)) {
904 +               /* block wasn't reserved before and we reserved it
905 +                * at the beginning of allocation. it doesn't matter
906 +                * whether we allocated anything or we failed: time
907 +                * to release reservation. NOTE: because I expect
908 +                * any multiblock request from delayed allocation
909 +                * path only, here is single block always */
910 +               ext3_mb_release_blocks(sb, 1);
911 +       }
912 +       return block;
913 +}
914 +
915 +int ext3_mb_generate_buddy(struct super_block *sb, int group)
916 +{
917 +       struct buffer_head *bh;
918 +       int i, err, count = 0;
919 +       struct ext3_buddy e3b;
920 +       
921 +       err = ext3_mb_load_desc(sb, group, &e3b);
922 +       if (err)
923 +               goto out;
924 +       memset(e3b.bd_bh->b_data, 0, sb->s_blocksize);
925 +       memset(e3b.bd_bh2->b_data, 0, sb->s_blocksize);
926 +
927 +       bh = read_block_bitmap_bh(sb, group);
928 +       if (bh == NULL) {
929 +               err = -EIO; 
930 +               goto out2;
931 +       }
932 +
933 +       /* loop over the blocks, nad create buddies for free ones */
934 +       for (i = 0; i < sb->s_blocksize * 8; i++) {
935 +               if (!test_bit(i, (void *) bh->b_data)) {
936 +                       mb_free_blocks(&e3b, i, 1);
937 +                       count++;
938 +               }
939 +       }
940 +       mb_check_buddy(&e3b);
941 +       ext3_mb_dirty_buddy(&e3b);
942 +
943 +out2:
944 +       ext3_mb_release_desc(&e3b);
945 +out:
946 +       return err;
947 +}
948 +
949 +#define MB_CREDITS     \
950 +       (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS)
951 +
952 +int ext3_mb_init_backend(struct super_block *sb)
953 +{
954 +       struct inode *root = sb->s_root->d_inode;
955 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
956 +       struct dentry *db;
957 +       tid_t target;
958 +       int err, i;
959 +
960 +       sbi->s_buddy_blocks = kmalloc(sizeof(struct ext3_buddy_group_blocks) *
961 +                                       sbi->s_groups_count, GFP_KERNEL);
962 +       if (sbi->s_buddy_blocks == NULL) {
963 +               printk("can't allocate mem for buddy maps\n");
964 +               return -ENOMEM;
965 +       }
966 +       memset(sbi->s_buddy_blocks, 0,
967 +               sizeof(struct ext3_buddy_group_blocks) * sbi->s_groups_count);
968 +       sbi->s_buddy = NULL;
969 +
970 +       down(&root->i_sem);
971 +       db = lookup_one_len(EXT3_BUDDY_FILE, sb->s_root,
972 +                               strlen(EXT3_BUDDY_FILE));
973 +       if (IS_ERR(db)) {
974 +               err = PTR_ERR(db);
975 +               printk("can't lookup buddy file: %d\n", err);
976 +               goto out;
977 +       }
978 +
979 +       if (db->d_inode != NULL) {
980 +               sbi->s_buddy = igrab(db->d_inode);
981 +               goto map;
982 +       }
983 +
984 +       err = ext3_create(root, db, S_IFREG, NULL);
985 +       if (err) {
986 +               printk("error while creation buddy file: %d\n", err);
987 +       } else {
988 +               sbi->s_buddy = igrab(db->d_inode);
989 +       }
990 +
991 +map:
992 +       for (i = 0; i < sbi->s_groups_count; i++) {
993 +               struct buffer_head *bh = NULL;
994 +               handle_t *handle;
995 +
996 +               handle = ext3_journal_start(sbi->s_buddy, MB_CREDITS);
997 +               if (IS_ERR(handle)) {
998 +                       err = PTR_ERR(handle);
999 +                       goto out2;
1000 +               }
1001 +               
1002 +               /* allocate block for bitmap */
1003 +               bh = ext3_getblk(handle, sbi->s_buddy, i * 2, 1, &err);
1004 +               if (bh == NULL) {
1005 +                       printk("can't get block for buddy bitmap: %d\n", err);
1006 +                       goto out2;
1007 +               }
1008 +               sbi->s_buddy_blocks[i].bb_bitmap = bh->b_blocknr;
1009 +               brelse(bh);
1010 +
1011 +               /* allocate block for buddy */
1012 +               bh = ext3_getblk(handle, sbi->s_buddy, i * 2 + 1, 1, &err);
1013 +               if (bh == NULL) {
1014 +                       printk("can't get block for buddy: %d\n", err);
1015 +                       goto out2;
1016 +               }
1017 +               sbi->s_buddy_blocks[i].bb_buddy = bh->b_blocknr;
1018 +               brelse(bh);
1019 +               ext3_journal_stop(handle, sbi->s_buddy);
1020 +               spin_lock_init(&sbi->s_buddy_blocks[i].bb_lock);
1021 +               sbi->s_buddy_blocks[i].bb_md_cur = NULL;
1022 +               sbi->s_buddy_blocks[i].bb_tid = 0;
1023 +       }
1024 +
1025 +       if ((target = log_start_commit(sbi->s_journal, NULL)))
1026 +               log_wait_commit(sbi->s_journal, target);
1027 +
1028 +out2:
1029 +       dput(db);
1030 +out:
1031 +       up(&root->i_sem);
1032 +       return err;
1033 +}
1034 +
1035 +int ext3_mb_release(struct super_block *sb)
1036 +{
1037 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1038 +       
1039 +       if (!test_opt(sb, MBALLOC))
1040 +               return 0;
1041 +
1042 +       /* release freed, non-committed blocks */
1043 +       spin_lock(&sbi->s_md_lock);
1044 +       list_splice_init(&sbi->s_closed_transaction,
1045 +                       &sbi->s_committed_transaction);
1046 +       list_splice_init(&sbi->s_active_transaction,
1047 +                       &sbi->s_committed_transaction);
1048 +       spin_unlock(&sbi->s_md_lock);
1049 +       ext3_mb_free_committed_blocks(sb);
1050 +
1051 +       if (sbi->s_buddy_blocks)
1052 +               kfree(sbi->s_buddy_blocks);
1053 +       if (sbi->s_buddy)
1054 +               iput(sbi->s_buddy);
1055 +       if (sbi->s_blocks_reserved)
1056 +               printk("ext3-fs: %ld blocks being reserved at umount!\n",
1057 +                               sbi->s_blocks_reserved);
1058 +       return 0;
1059 +}
1060 +
1061 +int ext3_mb_init(struct super_block *sb)
1062 +{
1063 +       struct ext3_super_block *es;
1064 +       int i;
1065 +
1066 +       if (!test_opt(sb, MBALLOC))
1067 +               return 0;
1068 +
1069 +       /* init file for buddy data */
1070 +       clear_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1071 +       ext3_mb_init_backend(sb);
1072 +
1073 +       es = EXT3_SB(sb)->s_es;
1074 +       for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
1075 +               ext3_mb_generate_buddy(sb, i);
1076 +       spin_lock_init(&EXT3_SB(sb)->s_reserve_lock);
1077 +       spin_lock_init(&EXT3_SB(sb)->s_md_lock);
1078 +       INIT_LIST_HEAD(&EXT3_SB(sb)->s_active_transaction);
1079 +       INIT_LIST_HEAD(&EXT3_SB(sb)->s_closed_transaction);
1080 +       INIT_LIST_HEAD(&EXT3_SB(sb)->s_committed_transaction);
1081 +       set_opt(EXT3_SB(sb)->s_mount_opt, MBALLOC);
1082 +       printk("EXT3-fs: mballoc enabled\n");
1083 +       return 0;
1084 +}
1085 +
1086 +void ext3_mb_free_committed_blocks(struct super_block *sb)
1087 +{
1088 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1089 +       int err, i, count = 0, count2 = 0;
1090 +       struct ext3_free_metadata *md;
1091 +       struct ext3_buddy e3b;
1092 +
1093 +       if (list_empty(&sbi->s_committed_transaction))
1094 +               return;
1095 +
1096 +       /* there is committed blocks to be freed yet */
1097 +       do {
1098 +               /* get next array of blocks */
1099 +               md = NULL;
1100 +               spin_lock(&sbi->s_md_lock);
1101 +               if (!list_empty(&sbi->s_committed_transaction)) {
1102 +                       md = list_entry(sbi->s_committed_transaction.next,
1103 +                                       struct ext3_free_metadata, list);
1104 +                       list_del(&md->list);
1105 +               }
1106 +               spin_unlock(&sbi->s_md_lock);
1107 +
1108 +               if (md == NULL)
1109 +                       break;
1110 +
1111 +               mb_debug("gonna free %u blocks in group %u (0x%p):",
1112 +                               md->num, md->group, md);
1113 +
1114 +               err = ext3_mb_load_desc(sb, md->group, &e3b);
1115 +               BUG_ON(err != 0);
1116 +
1117 +               /* there are blocks to put in buddy to make them really free */
1118 +               count += md->num;
1119 +               count2++;
1120 +               ext3_lock_group(sb, md->group);
1121 +               for (i = 0; i < md->num; i++) {
1122 +                       mb_debug(" %u", md->blocks[i]);
1123 +                       mb_free_blocks(&e3b, md->blocks[i], 1);
1124 +               }
1125 +               mb_debug("\n");
1126 +               ext3_unlock_group(sb, md->group);
1127 +
1128 +               kfree(md);
1129 +               ext3_mb_dirty_buddy(&e3b);
1130 +               ext3_mb_release_desc(&e3b);
1131 +
1132 +       } while (md);
1133 +       mb_debug("freed %u blocks in %u structures\n", count, count2);
1134 +}
1135 +
1136 +void ext3_mb_poll_new_transaction(struct super_block *sb, handle_t *handle)
1137 +{
1138 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1139 +
1140 +       if (sbi->s_last_transaction == handle->h_transaction->t_tid)
1141 +               return;
1142 +
1143 +       /* new transaction! time to close last one and free blocks for
1144 +        * committed transaction. we know that only transaction can be
1145 +        * active, so previos transaction can be being logged and we
1146 +        * know that transaction before previous is known to be alreade
1147 +        * logged. this means that now we may free blocks freed in all
1148 +        * transactions before previous one. hope I'm clear enough ... */
1149 +
1150 +       spin_lock(&sbi->s_md_lock);
1151 +       if (sbi->s_last_transaction != handle->h_transaction->t_tid) {
1152 +               mb_debug("new transaction %lu, old %lu\n",
1153 +                               (unsigned long) handle->h_transaction->t_tid,
1154 +                               (unsigned long) sbi->s_last_transaction);
1155 +               list_splice_init(&sbi->s_closed_transaction,
1156 +                                       &sbi->s_committed_transaction);
1157 +               list_splice_init(&sbi->s_active_transaction,
1158 +                                       &sbi->s_closed_transaction);
1159 +               sbi->s_last_transaction = handle->h_transaction->t_tid;
1160 +       }
1161 +       spin_unlock(&sbi->s_md_lock);
1162 +
1163 +       ext3_mb_free_committed_blocks(sb);
1164 +}
1165 +
1166 +int ext3_mb_free_metadata(handle_t *handle, struct ext3_buddy *e3b,
1167 +                               int group, int block, int count)
1168 +{
1169 +       struct ext3_buddy_group_blocks *db = e3b->bd_bd;
1170 +       struct super_block *sb = e3b->bd_sb;
1171 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1172 +       struct ext3_free_metadata *md;
1173 +       int i;
1174 +
1175 +       ext3_lock_group(sb, group);
1176 +       for (i = 0; i < count; i++) {
1177 +               md = db->bb_md_cur;
1178 +               if (md && db->bb_tid != handle->h_transaction->t_tid) {
1179 +                       db->bb_md_cur = NULL;
1180 +                       md = NULL;
1181 +               }
1182 +
1183 +               if (md == NULL) {
1184 +                       ext3_unlock_group(sb, group);
1185 +                       md = kmalloc(sizeof(*md), GFP_KERNEL);
1186 +                       if (md == NULL)
1187 +                               return -ENOMEM;
1188 +                       md->num = 0;
1189 +                       md->group = group;
1190 +
1191 +                       ext3_lock_group(sb, group);
1192 +                       if (db->bb_md_cur == NULL) {
1193 +                               spin_lock(&sbi->s_md_lock);
1194 +                               list_add(&md->list, &sbi->s_active_transaction);
1195 +                               spin_unlock(&sbi->s_md_lock);
1196 +                               db->bb_md_cur = md;
1197 +                               db->bb_tid = handle->h_transaction->t_tid;
1198 +                               mb_debug("new md 0x%p for group %u\n",
1199 +                                                       md, md->group);
1200 +                       } else {
1201 +                               kfree(md);
1202 +                               md = db->bb_md_cur;
1203 +                       }
1204 +               }
1205 +
1206 +               BUG_ON(md->num >= EXT3_BB_MAX_BLOCKS);
1207 +               md->blocks[md->num] = block + i;
1208 +               md->num++;
1209 +               if (md->num == EXT3_BB_MAX_BLOCKS) {
1210 +                       /* no more space, put full container on a sb's list */
1211 +                       db->bb_md_cur = NULL;
1212 +               }
1213 +       }
1214 +       ext3_unlock_group(sb, group);
1215 +       return 0;
1216 +}
1217 +
1218 +void ext3_mb_free_blocks(handle_t *handle, struct inode *inode,
1219 +                       unsigned long block, unsigned long count, int metadata)
1220 +{
1221 +       struct buffer_head *bitmap_bh = NULL;
1222 +       struct ext3_group_desc *gdp;
1223 +       struct ext3_super_block *es;
1224 +       unsigned long bit, overflow;
1225 +       struct buffer_head *gd_bh;
1226 +       unsigned long block_group;
1227 +       struct ext3_sb_info *sbi;
1228 +       struct super_block *sb;
1229 +       struct ext3_buddy e3b;
1230 +       int err = 0, ret;
1231 +
1232 +       sb = inode->i_sb;
1233 +       if (!sb) {
1234 +               printk ("ext3_free_blocks: nonexistent device");
1235 +               return;
1236 +       }
1237 +
1238 +       ext3_mb_poll_new_transaction(sb, handle);
1239 +
1240 +       sbi = EXT3_SB(sb);
1241 +       es = EXT3_SB(sb)->s_es;
1242 +       if (block < le32_to_cpu(es->s_first_data_block) ||
1243 +           block + count < block ||
1244 +           block + count > le32_to_cpu(es->s_blocks_count)) {
1245 +               ext3_error (sb, "ext3_free_blocks",
1246 +                           "Freeing blocks not in datazone - "
1247 +                           "block = %lu, count = %lu", block, count);
1248 +               goto error_return;
1249 +       }
1250 +
1251 +       ext3_debug("freeing block %lu\n", block);
1252 +
1253 +do_more:
1254 +       overflow = 0;
1255 +       block_group = (block - le32_to_cpu(es->s_first_data_block)) /
1256 +                     EXT3_BLOCKS_PER_GROUP(sb);
1257 +       bit = (block - le32_to_cpu(es->s_first_data_block)) %
1258 +                     EXT3_BLOCKS_PER_GROUP(sb);
1259 +       /*
1260 +        * Check to see if we are freeing blocks across a group
1261 +        * boundary.
1262 +        */
1263 +       if (bit + count > EXT3_BLOCKS_PER_GROUP(sb)) {
1264 +               overflow = bit + count - EXT3_BLOCKS_PER_GROUP(sb);
1265 +               count -= overflow;
1266 +       }
1267 +       bitmap_bh = read_block_bitmap_bh(sb, block_group);
1268 +       if (!bitmap_bh)
1269 +               goto error_return;
1270 +       gdp = ext3_get_group_desc (sb, block_group, &gd_bh);
1271 +       if (!gdp)
1272 +               goto error_return;
1273 +
1274 +       if (in_range (le32_to_cpu(gdp->bg_block_bitmap), block, count) ||
1275 +           in_range (le32_to_cpu(gdp->bg_inode_bitmap), block, count) ||
1276 +           in_range (block, le32_to_cpu(gdp->bg_inode_table),
1277 +                     EXT3_SB(sb)->s_itb_per_group) ||
1278 +           in_range (block + count - 1, le32_to_cpu(gdp->bg_inode_table),
1279 +                     EXT3_SB(sb)->s_itb_per_group))
1280 +               ext3_error (sb, "ext3_free_blocks",
1281 +                           "Freeing blocks in system zones - "
1282 +                           "Block = %lu, count = %lu",
1283 +                           block, count);
1284 +
1285 +       BUFFER_TRACE(bitmap_bh, "getting write access");
1286 +       err = ext3_journal_get_write_access(handle, bitmap_bh);
1287 +       if (err)
1288 +               goto error_return;
1289 +
1290 +       /*
1291 +        * We are about to modify some metadata.  Call the journal APIs
1292 +        * to unshare ->b_data if a currently-committing transaction is
1293 +        * using it
1294 +        */
1295 +       BUFFER_TRACE(gd_bh, "get_write_access");
1296 +       err = ext3_journal_get_write_access(handle, gd_bh);
1297 +       if (err)
1298 +               goto error_return;
1299 +
1300 +       err = ext3_mb_load_desc(sb, block_group, &e3b);
1301 +       if (err)
1302 +               goto error_return;
1303 +
1304 +       if (metadata) {
1305 +               /* blocks being freed are metadata. these blocks shouldn't
1306 +                * be used until this transaction is committed */
1307 +               ext3_mb_free_metadata(handle, &e3b, block_group, bit, count);
1308 +       } else { 
1309 +               ext3_lock_group(sb, block_group);
1310 +               mb_free_blocks(&e3b, bit, count);
1311 +               gdp->bg_free_blocks_count =
1312 +                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
1313 +               ext3_unlock_group(sb, block_group);
1314 +               spin_lock(&sbi->s_md_lock);
1315 +               es->s_free_blocks_count =
1316 +                       cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) + count);
1317 +               spin_unlock(&sbi->s_md_lock);
1318 +       }
1319 +       
1320 +       ext3_mb_dirty_buddy(&e3b);
1321 +       ext3_mb_release_desc(&e3b);
1322 +
1323 +       /* FIXME: undo logic will be implemented later and another way */
1324 +       mb_clear_bits(bitmap_bh->b_data, bit, count);
1325 +       DQUOT_FREE_BLOCK(inode, count);
1326 +
1327 +       /* We dirtied the bitmap block */
1328 +       BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
1329 +       err = ext3_journal_dirty_metadata(handle, bitmap_bh);
1330 +
1331 +       /* And the group descriptor block */
1332 +       BUFFER_TRACE(gd_bh, "dirtied group descriptor block");
1333 +       ret = ext3_journal_dirty_metadata(handle, gd_bh);
1334 +       if (!err) err = ret;
1335 +
1336 +       if (overflow && !err) {
1337 +               block += count;
1338 +               count = overflow;
1339 +               goto do_more;
1340 +       }
1341 +       sb->s_dirt = 1;
1342 +error_return:
1343 +       ext3_std_error(sb, err);
1344 +       return;
1345 +}
1346 +
1347 +int ext3_mb_reserve_blocks(struct super_block *sb, int blocks)
1348 +{
1349 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1350 +       struct ext3_super_block *es;
1351 +       int free, ret = -ENOSPC;
1352 +
1353 +       BUG_ON(blocks < 0);
1354 +       es = EXT3_SB(sb)->s_es;
1355 +       spin_lock(&sbi->s_reserve_lock);
1356 +       free = le32_to_cpu(es->s_free_blocks_count);
1357 +       if (blocks <= free - sbi->s_blocks_reserved) {
1358 +               sbi->s_blocks_reserved += blocks;
1359 +               ret = 0;
1360 +       }
1361 +       spin_unlock(&sbi->s_reserve_lock);
1362 +       return ret;
1363 +}
1364 +
1365 +void ext3_mb_release_blocks(struct super_block *sb, int blocks)
1366 +{
1367 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1368 +
1369 +       BUG_ON(blocks < 0);
1370 +       spin_lock(&sbi->s_reserve_lock);
1371 +       sbi->s_blocks_reserved -= blocks;
1372 +       if (sbi->s_blocks_reserved < 0)
1373 +               printk("EXT3-fs: reserve leak %ld\n", sbi->s_blocks_reserved);
1374 +       if (sbi->s_blocks_reserved < 0)
1375 +               sbi->s_blocks_reserved = 0;
1376 +       spin_unlock(&sbi->s_reserve_lock);
1377 +}
1378 +
1379 +int ext3_new_block(handle_t *handle, struct inode *inode,
1380 +                       unsigned long goal, u32 *pc, u32 *pb, int *errp)
1381 +{
1382 +       int ret, len;
1383 +
1384 +       if (!test_opt(inode->i_sb, MBALLOC)) {
1385 +               ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp);
1386 +               goto out;
1387 +       }
1388 +       len = 1;
1389 +       ret = ext3_mb_new_blocks(handle, inode, goal, &len, 0, errp);
1390 +out:
1391 +       return ret;
1392 +}
1393 +
1394 +
1395 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
1396 +                       unsigned long block, unsigned long count, int metadata)
1397 +{
1398 +       if (!test_opt(inode->i_sb, MBALLOC))
1399 +               ext3_free_blocks_old(handle, inode, block, count);
1400 +       else
1401 +               ext3_mb_free_blocks(handle, inode, block, count, metadata);
1402 +       return;
1403 +}
1404 +
1405 Index: linux-2.4.24/fs/ext3/super.c
1406 ===================================================================
1407 --- linux-2.4.24.orig/fs/ext3/super.c   2004-08-06 03:59:09.000000000 +0400
1408 +++ linux-2.4.24/fs/ext3/super.c        2004-08-06 03:59:09.000000000 +0400
1409 @@ -529,6 +529,7 @@
1410         kdev_t j_dev = sbi->s_journal->j_dev;
1411         int i;
1412  
1413 +       ext3_mb_release(sb);
1414         J_ASSERT(sbi->s_delete_inodes == 0);
1415         ext3_ext_release(sb);
1416         ext3_xattr_put_super(sb);
1417 @@ -781,6 +782,8 @@
1418                         else if (want_numeric(value, "journal", inum))
1419                                 return 0;
1420                 }
1421 +               else if (!strcmp (this_char, "mballoc"))
1422 +                       set_opt (*mount_options, MBALLOC);
1423                 else if (!strcmp (this_char, "noload"))
1424                         set_opt (*mount_options, NOLOAD);
1425                 else if (!strcmp (this_char, "data")) {
1426 @@ -1406,6 +1409,7 @@
1427                 "writeback");
1428  
1429         ext3_ext_init(sb);
1430 +       ext3_mb_init(sb);
1431  
1432         if (test_opt(sb, PDIROPS)) {
1433                 printk (KERN_INFO "EXT3-fs: mounted filesystem with parallel dirops\n");
1434 Index: linux-2.4.24/fs/ext3/Makefile
1435 ===================================================================
1436 --- linux-2.4.24.orig/fs/ext3/Makefile  2004-08-06 03:59:07.000000000 +0400
1437 +++ linux-2.4.24/fs/ext3/Makefile       2004-08-06 03:59:09.000000000 +0400
1438 @@ -13,7 +13,7 @@
1439  
1440  obj-y    := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
1441                 ioctl.o namei.o super.o symlink.o hash.o ext3-exports.o \
1442 -               xattr_trusted.o extents.o extents-in-ea.o
1443 +               xattr_trusted.o extents.o extents-in-ea.o mballoc.o
1444  export-objs += extents.o
1445  
1446  obj-m    := $(O_TARGET)
1447 Index: linux-2.4.24/fs/ext3/balloc.c
1448 ===================================================================
1449 --- linux-2.4.24.orig/fs/ext3/balloc.c  2004-08-06 00:42:23.000000000 +0400
1450 +++ linux-2.4.24/fs/ext3/balloc.c       2004-08-06 03:59:09.000000000 +0400
1451 @@ -203,8 +203,7 @@
1452   * differentiating between a group for which we have never performed a bitmap
1453   * IO request, and a group for which the last bitmap read request failed.
1454   */
1455 -static inline int load_block_bitmap (struct super_block * sb,
1456 -                                    unsigned int block_group)
1457 +int load_block_bitmap (struct super_block * sb, unsigned int block_group)
1458  {
1459         int slot;
1460         
1461 @@ -253,8 +252,8 @@
1462  }
1463  
1464  /* Free given blocks, update quota and i_blocks field */
1465 -void ext3_free_blocks (handle_t *handle, struct inode * inode,
1466 -                       unsigned long block, unsigned long count)
1467 +void ext3_free_blocks_old (handle_t *handle, struct inode * inode,
1468 +                               unsigned long block, unsigned long count)
1469  {
1470         struct buffer_head *bitmap_bh;
1471         struct buffer_head *gd_bh;
1472 @@ -528,9 +527,9 @@
1473   * bitmap, and then for any free bit if that fails.
1474   * This function also updates quota and i_blocks field.
1475   */
1476 -int ext3_new_block (handle_t *handle, struct inode * inode,
1477 -               unsigned long goal, u32 * prealloc_count,
1478 -               u32 * prealloc_block, int * errp)
1479 +int ext3_new_block_old (handle_t *handle, struct inode * inode,
1480 +                       unsigned long goal, u32 * prealloc_count,
1481 +                       u32 * prealloc_block, int * errp)
1482  {
1483         struct buffer_head * bh, *bhtmp;
1484         struct buffer_head * bh2;
1485 Index: linux-2.4.24/fs/ext3/namei.c
1486 ===================================================================
1487 --- linux-2.4.24.orig/fs/ext3/namei.c   2004-08-06 03:59:09.000000000 +0400
1488 +++ linux-2.4.24/fs/ext3/namei.c        2004-08-06 03:59:09.000000000 +0400
1489 @@ -1944,7 +1944,7 @@
1490   * If the create succeeds, we fill in the inode information
1491   * with d_instantiate(). 
1492   */
1493 -static int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
1494 +int ext3_create (struct inode * dir, struct dentry * dentry, int mode)
1495  {
1496         handle_t *handle; 
1497         struct inode * inode;
1498 Index: linux-2.4.24/fs/ext3/inode.c
1499 ===================================================================
1500 --- linux-2.4.24.orig/fs/ext3/inode.c   2004-08-06 03:59:09.000000000 +0400
1501 +++ linux-2.4.24/fs/ext3/inode.c        2004-08-06 03:59:09.000000000 +0400
1502 @@ -254,7 +254,7 @@
1503                 inode->u.ext3_i.i_prealloc_count = 0;
1504                 inode->u.ext3_i.i_prealloc_block = 0;
1505                 /* Writer: end */
1506 -               ext3_free_blocks (inode, block, total);
1507 +               ext3_free_blocks (inode, block, total, 1);
1508         }
1509         unlock_kernel();
1510  #endif
1511 @@ -618,7 +618,7 @@
1512                 ext3_journal_forget(handle, branch[i].bh);
1513         }
1514         for (i = 0; i < keys; i++)
1515 -               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
1516 +               ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
1517         return err;
1518  }
1519  
1520 @@ -722,7 +722,7 @@
1521         if (err == -EAGAIN)
1522                 for (i = 0; i < num; i++)
1523                         ext3_free_blocks(handle, inode, 
1524 -                                        le32_to_cpu(where[i].key), 1);
1525 +                                        le32_to_cpu(where[i].key), 1, 1);
1526         return err;
1527  }
1528  
1529 @@ -1650,7 +1650,7 @@
1530                 }
1531         }
1532  
1533 -       ext3_free_blocks(handle, inode, block_to_free, count);
1534 +       ext3_free_blocks(handle, inode, block_to_free, count, 1);
1535  }
1536  
1537  /**
1538 @@ -1821,7 +1821,7 @@
1539                                 ext3_journal_test_restart(handle, inode);
1540                         }
1541  
1542 -                       ext3_free_blocks(handle, inode, nr, 1);
1543 +                       ext3_free_blocks(handle, inode, nr, 1, 1);
1544  
1545                         if (parent_bh) {
1546                                 /*
1547 Index: linux-2.4.24/fs/ext3/extents.c
1548 ===================================================================
1549 --- linux-2.4.24.orig/fs/ext3/extents.c 2004-08-06 03:59:01.000000000 +0400
1550 +++ linux-2.4.24/fs/ext3/extents.c      2004-08-06 03:59:09.000000000 +0400
1551 @@ -741,7 +741,7 @@
1552                 for (i = 0; i < depth; i++) {
1553                         if (!ablocks[i])
1554                                 continue;
1555 -                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
1556 +                       ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
1557                 }
1558         }
1559         kfree(ablocks);
1560 @@ -1385,7 +1385,7 @@
1561                         path->p_idx->e_leaf);
1562         bh = sb_get_hash_table(tree->inode->i_sb, path->p_idx->e_leaf);
1563         ext3_forget(handle, 1, tree->inode, bh, path->p_idx->e_leaf);
1564 -       ext3_free_blocks(handle, tree->inode, path->p_idx->e_leaf, 1);
1565 +       ext3_free_blocks(handle, tree->inode, path->p_idx->e_leaf, 1, 1);
1566         return err;
1567  }
1568  
1569 @@ -1842,10 +1842,12 @@
1570         int needed = ext3_remove_blocks_credits(tree, ex, from, to);
1571         handle_t *handle = ext3_journal_start(tree->inode, needed);
1572         struct buffer_head *bh;
1573 -       int i;
1574 +       int i, metadata = 0;
1575  
1576         if (IS_ERR(handle))
1577                 return PTR_ERR(handle);
1578 +       if (S_ISDIR(tree->inode->i_mode))
1579 +               metadata = 1;
1580         if (from >= ex->e_block && to == ex->e_block + ex->e_num - 1) {
1581                 /* tail removal */
1582                 unsigned long num, start;
1583 @@ -1857,7 +1859,7 @@
1584                         bh = sb_get_hash_table(tree->inode->i_sb, start + i);
1585                         ext3_forget(handle, 0, tree->inode, bh, start + i);
1586                 }
1587 -               ext3_free_blocks(handle, tree->inode, start, num);
1588 +               ext3_free_blocks(handle, tree->inode, start, num, metadata);
1589         } else if (from == ex->e_block && to <= ex->e_block + ex->e_num - 1) {
1590                 printk("strange request: removal %lu-%lu from %u:%u\n",
1591                         from, to, ex->e_block, ex->e_num);
1592 Index: linux-2.4.24/fs/ext3/xattr.c
1593 ===================================================================
1594 --- linux-2.4.24.orig/fs/ext3/xattr.c   2004-08-06 03:59:08.000000000 +0400
1595 +++ linux-2.4.24/fs/ext3/xattr.c        2004-08-06 03:59:09.000000000 +0400
1596 @@ -174,7 +174,7 @@
1597  ext3_xattr_free_block(handle_t *handle, struct inode * inode,
1598                       unsigned long block)
1599  {
1600 -       ext3_free_blocks(handle, inode, block, 1);
1601 +       ext3_free_blocks(handle, inode, block, 1, 1);
1602         inode->i_blocks -= inode->i_sb->s_blocksize >> 9;
1603  }
1604  
1605 @@ -182,7 +182,7 @@
1606  # define ext3_xattr_quota_free(inode) \
1607         DQUOT_FREE_BLOCK(inode, 1)
1608  # define ext3_xattr_free_block(handle, inode, block) \
1609 -       ext3_free_blocks(handle, inode, block, 1)
1610 +       ext3_free_blocks(handle, inode, block, 1, 1)
1611  #endif
1612  
1613  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18)
1614 Index: linux-2.4.24/include/linux/ext3_fs.h
1615 ===================================================================
1616 --- linux-2.4.24.orig/include/linux/ext3_fs.h   2004-08-06 03:59:09.000000000 +0400
1617 +++ linux-2.4.24/include/linux/ext3_fs.h        2004-08-06 03:59:09.000000000 +0400
1618 @@ -343,6 +343,7 @@
1619  #define EXT3_MOUNT_ASYNCDEL            0x20000 /* Delayed deletion */
1620  #define EXT3_MOUNT_EXTENTS             0x40000 /* Extents support */
1621  #define EXT3_MOUNT_EXTDEBUG            0x80000 /* Extents debug */
1622 +#define EXT3_MOUNT_MBALLOC             0x100000/* buddy allocation support */
1623  
1624  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1625  #ifndef _LINUX_EXT2_FS_H
1626 @@ -677,7 +678,7 @@
1627  extern int ext3_new_block (handle_t *, struct inode *, unsigned long,
1628                                             __u32 *, __u32 *, int *);
1629  extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
1630 -                             unsigned long);
1631 +                             unsigned long, int);
1632  extern unsigned long ext3_count_free_blocks (struct super_block *);
1633  extern void ext3_check_blocks_bitmap (struct super_block *);
1634  extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb,
1635 Index: linux-2.4.24/include/linux/ext3_fs_sb.h
1636 ===================================================================
1637 --- linux-2.4.24.orig/include/linux/ext3_fs_sb.h        2004-08-06 03:59:09.000000000 +0400
1638 +++ linux-2.4.24/include/linux/ext3_fs_sb.h     2004-08-06 04:01:55.000000000 +0400
1639 @@ -19,6 +19,7 @@
1640  #ifdef __KERNEL__
1641  #include <linux/timer.h>
1642  #include <linux/wait.h>
1643 +#include <linux/list.h>
1644  #endif
1645  
1646  /*
1647 @@ -31,6 +32,25 @@
1648  
1649  #define EXT3_DELETE_THREAD
1650  
1651 +#define EXT3_BB_MAX_BLOCKS     30
1652 +struct ext3_free_metadata {
1653 +       unsigned short group;
1654 +       unsigned short num;
1655 +       unsigned short blocks[EXT3_BB_MAX_BLOCKS];
1656 +       struct list_head list;
1657 +};
1658 +
1659 +#define EXT3_BB_MAX_ORDER      14
1660 +
1661 +struct ext3_buddy_group_blocks {
1662 +       unsigned long   bb_bitmap;
1663 +       unsigned long   bb_buddy;
1664 +       spinlock_t      bb_lock;
1665 +       unsigned        bb_counters[EXT3_BB_MAX_ORDER];
1666 +       struct ext3_free_metadata *bb_md_cur;
1667 +       unsigned long bb_tid;
1668 +};
1669 +
1670  /*
1671   * third extended-fs super-block data in memory
1672   */
1673 @@ -87,6 +107,17 @@
1674         wait_queue_head_t s_delete_waiter_queue;
1675  #endif
1676         u32 s_mdsnum;
1677 +
1678 +       /* for buddy allocator */
1679 +       struct ext3_buddy_group_blocks *s_buddy_blocks;
1680 +       struct inode *s_buddy;
1681 +       long s_blocks_reserved;
1682 +       spinlock_t s_reserve_lock;
1683 +       struct list_head s_active_transaction;
1684 +       struct list_head s_closed_transaction;
1685 +       struct list_head s_committed_transaction;
1686 +       spinlock_t s_md_lock;
1687 +       unsigned int s_last_transaction;
1688  };
1689  
1690  #endif /* _LINUX_EXT3_FS_SB */
1691 Index: linux-2.4.24/include/asm-i386/bitops.h
1692 ===================================================================
1693 --- linux-2.4.24.orig/include/asm-i386/bitops.h 2004-08-06 01:43:20.000000000 +0400
1694 +++ linux-2.4.24/include/asm-i386/bitops.h      2004-08-06 03:59:09.000000000 +0400
1695 @@ -352,6 +352,67 @@
1696  }
1697  
1698  /**
1699 + * find_first_bit - find the first set bit in a memory region
1700 + * @addr: The address to start the search at
1701 + * @size: The maximum size to search
1702 + *
1703 + * Returns the bit-number of the first set bit, not the number of the byte
1704 + * containing a bit.
1705 + */
1706 +static __inline__ int find_first_bit(const unsigned long *addr, unsigned size)
1707 +{
1708 +       int d0, d1;
1709 +       int res;
1710 +
1711 +       /* This looks at memory. Mark it volatile to tell gcc not to move it around */
1712 +       __asm__ __volatile__(
1713 +               "xorl %%eax,%%eax\n\t"
1714 +               "repe; scasl\n\t"
1715 +               "jz 1f\n\t"
1716 +               "leal -4(%%edi),%%edi\n\t"
1717 +               "bsfl (%%edi),%%eax\n"
1718 +               "1:\tsubl %%ebx,%%edi\n\t"
1719 +               "shll $3,%%edi\n\t"
1720 +               "addl %%edi,%%eax"
1721 +               :"=a" (res), "=&c" (d0), "=&D" (d1)
1722 +               :"1" ((size + 31) >> 5), "2" (addr), "b" (addr) : "memory");
1723 +       return res;
1724 +}
1725 +
1726 +/**
1727 + * find_next_bit - find the first set bit in a memory region
1728 + * @addr: The address to base the search on
1729 + * @offset: The bitnumber to start searching at
1730 + * @size: The maximum size to search
1731 + */
1732 +static __inline__ int find_next_bit(const unsigned long *addr, int size, int offset)
1733 +{
1734 +       const unsigned long *p = addr + (offset >> 5);
1735 +       int set = 0, bit = offset & 31, res;
1736 +
1737 +       if (bit) {
1738 +               /*
1739 +                * Look for nonzero in the first 32 bits:
1740 +                */
1741 +               __asm__("bsfl %1,%0\n\t"
1742 +                       "jne 1f\n\t"
1743 +                       "movl $32, %0\n"
1744 +                       "1:"
1745 +                       : "=r" (set)
1746 +                       : "r" (*p >> bit));
1747 +               if (set < (32 - bit))
1748 +                       return set + offset;
1749 +               set = 32 - bit;
1750 +               p++;
1751 +       }
1752 +       /*
1753 +        * No set bit yet, search remaining full words for a bit
1754 +        */
1755 +       res = find_first_bit (p, size - 32 * (p - addr));
1756 +       return (offset + set + res);
1757 +}
1758 +
1759 +/**
1760   * hweightN - returns the hamming weight of a N-bit word
1761   * @x: the word to weigh
1762   *