Whamcloud - gitweb
Branch HEAD
authoradilger <adilger>
Wed, 14 Feb 2007 04:56:39 +0000 (04:56 +0000)
committeradilger <adilger>
Wed, 14 Feb 2007 04:56:39 +0000 (04:56 +0000)
Fix 2.6.18 ext3 mballoc, extents patches for several omissions.
Reorder patch hunks to match other patches to facilitate comparisons.
b=10090  ldiskfs corruption under memory pressure, coverity fixes
b=6191   mballoc fails on x86_64 > 2TB
b=10634  mballoc using wrong find_next_bit() on big endian systems
b=6449   don't print "too long searching" message to console

Compile tested.

ldiskfs/kernel_patches/patches/ext3-extents-2.6.18-vanilla.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch
lustre/kernel_patches/patches/ext3-extents-2.6.18-vanilla.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.18-vanilla.patch

index f2988a2..8bd7acb 100644 (file)
@@ -2538,26 +2538,30 @@ Index: linux-stage/fs/ext3/super.c
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
-+      Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_noextents, Opt_extdebug,
        Opt_grpquota
  };
  
-@@ -690,6 +694,8 @@ static match_table_t tokens = {
+@@ -690,6 +694,9 @@ static match_table_t tokens = {
        {Opt_iopen, "iopen"},
        {Opt_noiopen, "noiopen"},
        {Opt_iopen_nopriv, "iopen_nopriv"},
 +      {Opt_extents, "extents"},
++      {Opt_noextents, "noextents"},
 +      {Opt_extdebug, "extdebug"},
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
-@@ -1035,6 +1041,12 @@ clear_qf_name:
+@@ -1035,6 +1041,15 @@ clear_qf_name:
                case Opt_bh:
                        clear_opt(sbi->s_mount_opt, NOBH);
                        break;
 +              case Opt_extents:
 +                      set_opt (sbi->s_mount_opt, EXTENTS);
 +                      break;
++              case Opt_noextents:
++                      clear_opt (sbi->s_mount_opt, EXTENTS);
++                      break;
 +              case Opt_extdebug:
 +                      set_opt (sbi->s_mount_opt, EXTDEBUG);
 +                      break;
index 20fa78a..d83625a 100644 (file)
@@ -1,8 +1,400 @@
+Index: linux-stage/include/linux/ext3_fs.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_fs.h   2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h        2006-07-16 02:29:49.000000000 +0800
+@@ -53,6 +53,14 @@
+ #define ext3_debug(f, a...)   do {} while (0)
+ #endif
++#define EXT3_MULTIBLOCK_ALLOCATOR     1
++
++#define EXT3_MB_HINT_MERGE            1
++#define EXT3_MB_HINT_RESERVED         2
++#define EXT3_MB_HINT_METADATA         4
++#define EXT3_MB_HINT_FIRST            8
++#define EXT3_MB_HINT_BEST             16
++
+ /*
+  * Special inodes numbers
+  */
+@@ -379,6 +387,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x800000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x1000000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x2000000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC            0x4000000/* Buddy allocation support */
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -405,6 +413,14 @@
+ #define ext3_find_first_zero_bit      ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit               ext2_find_next_zero_bit
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif        /* __LITTLE_ENDIAN */
++#endif        /* !ext2_find_next_le_bit */
++
+ /*
+  * Maximal mount counts between two filesystem checks
+  */
+@@ -749,12 +758,12 @@ ext3_group_first_block_no(struct super_b
+ /* balloc.c */
+ extern int ext3_bg_has_super(struct super_block *sb, int group);
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+-extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
++extern ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode,
+                       ext3_fsblk_t goal, int *errp);
+ extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
+                       ext3_fsblk_t goal, unsigned long *count, int *errp);
+ extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
+-                      ext3_fsblk_t block, unsigned long count);
++                      ext3_fsblk_t block, unsigned long count, int metadata);
+ extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
+                                ext3_fsblk_t block, unsigned long count,
+                               unsigned long *pdquot_freed_blocks);
+@@ -881,6 +890,21 @@ extern void ext3_extents_initialize_bloc
+ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
+                         unsigned int cmd, unsigned long arg);
++/* mballoc.c */
++extern long ext3_mb_stats;
++extern long ext3_mb_max_to_scan;
++extern int ext3_mb_init(struct super_block *sb, int needs_recovery);
++extern int ext3_mb_release(struct super_block *sb);
++extern ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
++                                 ext3_fsblk_t goal, int *errp);
++extern ext3_fsblk_t ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++                                     ext3_fsblk_t goal, int *len, int flags,
++                                     int *errp);
++extern int ext3_mb_reserve_blocks(struct super_block *sb, int);
++extern void ext3_mb_release_blocks(struct super_block *sb, int);
++int __init init_ext3_proc(void);
++void exit_ext3_proc(void);
++
+ #endif        /* __KERNEL__ */
+ /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
+Index: linux-stage/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h     2006-07-16 02:29:49.000000000 +0800
+@@ -21,8 +21,14 @@
+ #include <linux/wait.h>
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #include <linux/rbtree.h>
++#include <linux/proc_fs.h>
++
++struct ext3_buddy_group_blocks;
++struct ext3_mb_history;
++#define EXT3_BB_MAX_BLOCKS
+ /*
+  * third extended-fs super-block data in memory
+@@ -78,6 +84,43 @@ struct ext3_sb_info {
+       char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
+       int s_jquota_fmt;                       /* Format of quota to use */
+ #endif
++
++      /* for buddy allocator */
++      struct ext3_group_info ***s_group_info;
++      struct inode *s_buddy_cache;
++      long s_blocks_reserved;
++      spinlock_t s_reserve_lock;
++      struct list_head s_active_transaction;
++      struct list_head s_closed_transaction;
++      struct list_head s_committed_transaction;
++      spinlock_t s_md_lock;
++      tid_t s_last_transaction;
++      int s_mb_factor;
++      unsigned short *s_mb_offsets, *s_mb_maxs;
++      unsigned long s_stripe;
++
++      /* history to debug policy */
++      struct ext3_mb_history *s_mb_history;
++      int s_mb_history_cur;
++      int s_mb_history_max;
++      struct proc_dir_entry *s_mb_proc;
++      spinlock_t s_mb_history_lock;
++
++      /* stats for buddy allocator */
++      atomic_t s_bal_reqs;    /* number of reqs with len > 1 */
++      atomic_t s_bal_success; /* we found long enough chunks */
++      atomic_t s_bal_allocated;       /* in blocks */
++      atomic_t s_bal_ex_scanned;      /* total extents scanned */
++      atomic_t s_bal_goals;   /* goal hits */
++      atomic_t s_bal_breaks;  /* too long searches */
++      atomic_t s_bal_2orders; /* 2^order hits */
++      spinlock_t s_bal_lock;
++      unsigned long s_mb_buddies_generated;
++      unsigned long long s_mb_generation_time;
+ };
++
++#define EXT3_GROUP_INFO(sb, group)                                       \
++      EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
++                               [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
+ #endif        /* _LINUX_EXT3_FS_SB */
+Index: linux-stage/fs/ext3/super.c
+===================================================================
+--- linux-stage.orig/fs/ext3/super.c   2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/super.c        2006-07-16 02:29:49.000000000 +0800
+@@ -391,6 +391,7 @@ static void ext3_put_super (struct super
+       struct ext3_super_block *es = sbi->s_es;
+       int i;
++      ext3_mb_release(sb);
+       ext3_ext_release(sb);
+       ext3_xattr_put_super(sb);
+       journal_destroy(sbi->s_journal);
+@@ -642,6 +643,7 @@ enum {
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
+       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+       Opt_extents, Opt_noextents, Opt_extdebug,
++      Opt_mballoc, Opt_nomballoc, Opt_stripe,
+       Opt_grpquota
+ };
+@@ -696,6 +697,9 @@ static match_table_t tokens = {
+       {Opt_extents, "extents"},
+       {Opt_noextents, "noextents"},
+       {Opt_extdebug, "extdebug"},
++      {Opt_mballoc, "mballoc"},
++      {Opt_nomballoc, "nomballoc"},
++      {Opt_stripe, "stripe=%u"},
+       {Opt_barrier, "barrier=%u"},
+       {Opt_err, NULL},
+       {Opt_resize, "resize"},
+@@ -1047,6 +1049,19 @@ clear_qf_name:
+               case Opt_extdebug:
+                       set_opt (sbi->s_mount_opt, EXTDEBUG);
+                       break;
++              case Opt_mballoc:
++                      set_opt (sbi->s_mount_opt, MBALLOC);
++                      break;
++              case Opt_nomballoc:
++                      clear_opt(sbi->s_mount_opt, MBALLOC);
++                      break;
++              case Opt_stripe:
++                      if (match_int(&args[0], &option))
++                              return 0;
++                      if (option < 0)
++                              return 0;
++                      sbi->s_stripe = option;
++                      break;
+               default:
+                       printk (KERN_ERR
+                               "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1773,6 +1778,7 @@ static int ext3_fill_super (struct super
+               "writeback");
+       ext3_ext_init(sb);
++      ext3_mb_init(sb, needs_recovery);
+       lock_kernel();
+       return 0;
+@@ -2712,7 +2718,13 @@ static struct file_system_type ext3_fs_t
+ static int __init init_ext3_fs(void)
+ {
+-      int err = init_ext3_xattr();
++      int err;
++
++      err = init_ext3_proc();
++      if (err)
++              return err;
++
++      err = init_ext3_xattr();
+       if (err)
+               return err;
+       err = init_inodecache();
+@@ -2734,6 +2746,7 @@ static void __exit exit_ext3_fs(void)
+       unregister_filesystem(&ext3_fs_type);
+       destroy_inodecache();
+       exit_ext3_xattr();
++      exit_ext3_proc();
+ }
+ int ext3_prep_san_write(struct inode *inode, long *blocks,
+Index: linux-stage/fs/ext3/extents.c
+===================================================================
+--- linux-stage.orig/fs/ext3/extents.c 2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/extents.c      2006-07-16 02:29:49.000000000 +0800
+@@ -771,7 +771,7 @@ cleanup:
+               for (i = 0; i < depth; i++) {
+                       if (!ablocks[i])
+                               continue;
+-                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+               }
+       }
+       kfree(ablocks);
+@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+                 path->p_idx->ei_leaf);
+       bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+       ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+-      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+       return err;
+ }
+@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+       int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+       handle_t *handle = ext3_journal_start(tree->inode, needed);
+       struct buffer_head *bh;
+-      int i;
++      int i, metadata = 0;
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
++      if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode))
++              metadata = 1;
+       if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+               /* tail removal */
+               unsigned long num, start;
+@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+                       bh = sb_find_get_block(tree->inode->i_sb, start + i);
+                       ext3_forget(handle, 0, tree->inode, bh, start + i);
+               }
+-              ext3_free_blocks(handle, tree->inode, start, num);
++              ext3_free_blocks(handle, tree->inode, start, num, metadata);
+       } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+               printk("strange request: removal %lu-%lu from %u:%u\n",
+                      from, to, ex->ee_block, ex->ee_len);
+Index: linux-stage/fs/ext3/inode.c
+===================================================================
+--- linux-stage.orig/fs/ext3/inode.c   2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/inode.c        2006-07-16 02:29:49.000000000 +0800
+@@ -562,7 +562,7 @@ static int ext3_alloc_blocks(handle_t *h
+       return ret;
+ failed_out:
+       for (i = 0; i <index; i++)
+-              ext3_free_blocks(handle, inode, new_blocks[i], 1);
++              ext3_free_blocks(handle, inode, new_blocks[i], 1, 1);
+       return ret;
+ }
+@@ -661,9 +661,9 @@ failed:
+               ext3_journal_forget(handle, branch[i].bh);
+       }
+       for (i = 0; i <indirect_blks; i++)
+-              ext3_free_blocks(handle, inode, new_blocks[i], 1);
++              ext3_free_blocks(handle, inode, new_blocks[i], 1, 1);
+-      ext3_free_blocks(handle, inode, new_blocks[i], num);
++      ext3_free_blocks(handle, inode, new_blocks[i], num, 1);
+       return err;
+ }
+@@ -760,9 +760,9 @@ err_out:
+       for (i = 1; i <= num; i++) {
+               BUFFER_TRACE(where[i].bh, "call journal_forget");
+               ext3_journal_forget(handle, where[i].bh);
+-              ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
++              ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1,1);
+       }
+-      ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
++      ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 1);
+       return err;
+ }
+@@ -2007,7 +2007,7 @@ static void ext3_clear_blocks(handle_t *
+               }
+       }
+-      ext3_free_blocks(handle, inode, block_to_free, count);
++      ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+ /**
+@@ -2180,7 +2180,7 @@ static void ext3_free_branches(handle_t 
+                               ext3_journal_test_restart(handle, inode);
+                       }
+-                      ext3_free_blocks(handle, inode, nr, 1);
++                      ext3_free_blocks(handle, inode, nr, 1, 1);
+                       if (parent_bh) {
+                               /*
+Index: linux-stage/fs/ext3/balloc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/balloc.c  2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c       2006-07-16 02:33:13.000000000 +0800
+@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+  *
+  * Return buffer_head on success or NULL in case of failure.
+  */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+       struct ext3_group_desc * desc;
+@@ -490,24 +490,6 @@ error_return:
+       return;
+ }
+-/* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
+-                      ext3_fsblk_t block, unsigned long count)
+-{
+-      struct super_block * sb;
+-      unsigned long dquot_freed_blocks;
+-
+-      sb = inode->i_sb;
+-      if (!sb) {
+-              printk ("ext3_free_blocks: nonexistent device");
+-              return;
+-      }
+-      ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+-      if (dquot_freed_blocks)
+-              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+-      return;
+-}
+-
+ /*
+  * For ext3 allocations, we must not reuse any blocks which are
+  * allocated in the bitmap buffer's "last committed data" copy.  This
+@@ -1463,7 +1445,7 @@ out:
+       return 0;
+ }
+-ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
++ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode,
+                       ext3_fsblk_t goal, int *errp)
+ {
+       unsigned long count = 1;
+Index: linux-stage/fs/ext3/xattr.c
+===================================================================
+--- linux-stage.orig/fs/ext3/xattr.c   2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c        2006-07-16 02:29:49.000000000 +0800
+@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
+               ea_bdebug(bh, "refcount now=0; freeing");
+               if (ce)
+                       mb_cache_entry_free(ce);
+-              ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
++              ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
+               get_bh(bh);
+               ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+       } else {
+@@ -805,7 +805,7 @@ inserted:
+                       new_bh = sb_getblk(sb, block);
+                       if (!new_bh) {
+ getblk_failed:
+-                              ext3_free_blocks(handle, inode, block, 1);
++                              ext3_free_blocks(handle, inode, block, 1, 1);
+                               error = -EIO;
+                               goto cleanup;
+                       }
 Index: linux-stage/fs/ext3/mballoc.c
 ===================================================================
 --- /dev/null  1970-01-01 00:00:00.000000000 +0000
 +++ linux-stage/fs/ext3/mballoc.c      2006-07-16 02:29:49.000000000 +0800
-@@ -0,0 +1,2434 @@
+@@ -0,0 +1,2727 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -91,6 +483,11 @@ Index: linux-stage/fs/ext3/mballoc.c
 +
 +long ext3_mb_stats = 1;
 +
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
 +#ifdef EXT3_BB_MAX_BLOCKS
 +#undef EXT3_BB_MAX_BLOCKS
 +#endif
@@ -147,6 +544,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      __u8 ac_repeats;
 +      __u8 ac_2order;         /* if request is to allocate 2^N blocks and
 +                               * N > 0, the field stores N, otherwise 0 */
++
++      struct page *ac_buddy_page;
++      struct page *ac_bitmap_page;
 +};
 +
 +#define AC_STATUS_CONTINUE    1
@@ -156,6 +556,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +struct ext3_mb_history {
 +      struct ext3_free_extent goal;   /* goal allocation */
 +      struct ext3_free_extent result; /* result allocation */
++      unsigned pid;
++      unsigned ino;
 +      __u16 found;    /* how many extents have been found */
 +      __u16 groups;   /* how many groups have been scanned */
 +      __u16 tail;     /* what tail broke some buddy */
@@ -178,9 +580,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +#define EXT3_MB_BUDDY(e3b)    ((e3b)->bd_buddy)
 +
 +#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
 +#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
 +                              struct ext3_allocation_context *ac);
 +#endif
 +
@@ -190,7 +592,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +
 +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
 +struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
-+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
 +int ext3_mb_reserve_blocks(struct super_block *, int);
 +void ext3_mb_release_blocks(struct super_block *, int);
 +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
@@ -424,8 +825,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +
 +static void
 +ext3_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap,
-+                      struct ext3_group_info *grp)
++                      int group)
 +{
++      struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group);
 +      unsigned short max = EXT3_BLOCKS_PER_GROUP(sb);
 +      unsigned short i = 0, first, len;
 +      unsigned free = 0, fragments = 0;
@@ -436,7 +838,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      while (i < max) {
 +              fragments++;
 +              first = i;
-+              i = find_next_bit(bitmap, max, i);
++              i = ext2_find_next_le_bit(bitmap, max, i);
 +              len = i - first;
 +              free += len;
 +              if (len > 1) 
@@ -452,8 +854,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +       * others waits for init completion on page lock */
 +      clear_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &grp->bb_state);
 +      if (free != grp->bb_free) {
-+              printk("EXT3-fs: %u blocks in bitmap, %u in group descriptor\n",
-+                      free, grp->bb_free);
++              printk("EXT3-fs: group %u: %u blocks in bitmap, %u in gd\n",
++                      group, free, grp->bb_free);
 +              grp->bb_free = free;
 +      }
 +
@@ -534,7 +936,10 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      for (i = 0; i < groups_per_page && bh[i]; i++)
 +              wait_on_buffer(bh[i]);
 +
-+      /* XXX: I/O error handling here */
++      err = -EIO;
++      for (i = 0; i < groups_per_page && bh[i]; i++)
++              if (!buffer_uptodate(bh[i]))
++                      goto out;
 +
 +      first_block = page->index * blocks_per_page;
 +      for (i = 0; i < blocks_per_page; i++) {
@@ -552,11 +957,10 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      mb_debug("put buddy for group %u in page %lu/%x\n",
 +                              group, page->index, i * blocksize);
 +                      memset(data, 0xff, blocksize);
-+                      EXT3_SB(sb)->s_group_info[group]->bb_fragments = 0;
-+                      memset(EXT3_SB(sb)->s_group_info[group]->bb_counters, 0,
++                      EXT3_GROUP_INFO(sb, group)->bb_fragments = 0;
++                      memset(EXT3_GROUP_INFO(sb, group)->bb_counters, 0,
 +                              sizeof(unsigned short)*(sb->s_blocksize_bits+2));
-+                      ext3_mb_generate_buddy(sb, data, bitmap,
-+                                              EXT3_SB(sb)->s_group_info[group]);
++                      ext3_mb_generate_buddy(sb, data, bitmap, group);
 +              } else {
 +                      /* this is block of bitmap */
 +                      mb_debug("put bitmap for group %u in page %lu/%x\n",
@@ -567,10 +971,12 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      SetPageUptodate(page);
 +
 +out:
-+      for (i = 0; i < groups_per_page && bh[i]; i++)
-+              brelse(bh[i]);
-+      if (bh && bh != &bhs)
-+              kfree(bh);
++      if (bh) {
++              for (i = 0; i < groups_per_page && bh[i]; i++)
++                      brelse(bh[i]);
++              if (bh != &bhs)
++                      kfree(bh);
++      }
 +      return err;
 +}
 +
@@ -587,7 +993,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
 +
 +      e3b->bd_blkbits = sb->s_blocksize_bits;
-+      e3b->bd_info = sbi->s_group_info[group];
++      e3b->bd_info = EXT3_GROUP_INFO(sb, group);
 +      e3b->bd_sb = sb;
 +      e3b->bd_group = group;
 +      e3b->bd_buddy_page = NULL;
@@ -597,12 +1003,15 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      pnum = block / blocks_per_page;
 +      poff = block % blocks_per_page;
 +
++      /* we could use find_or_create_page(), but it locks page
++       * what we'd like to avoid in fast path ... */
 +      page = find_get_page(inode->i_mapping, pnum);
 +      if (page == NULL || !PageUptodate(page)) {
 +              if (page)
 +                      page_cache_release(page);
 +              page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
 +              if (page) {
++                      BUG_ON(page->mapping != inode->i_mapping);
 +                      if (!PageUptodate(page))
 +                              ext3_mb_init_cache(page);
 +                      unlock_page(page);
@@ -624,6 +1033,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      page_cache_release(page);
 +              page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
 +              if (page) {
++                      BUG_ON(page->mapping != inode->i_mapping);
 +                      if (!PageUptodate(page))
 +                              ext3_mb_init_cache(page);
 +                      unlock_page(page);
@@ -663,14 +1073,14 @@ Index: linux-stage/fs/ext3/mballoc.c
 +ext3_lock_group(struct super_block *sb, int group)
 +{
 +      bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT,
-+                      &EXT3_SB(sb)->s_group_info[group]->bb_state);
++                    &EXT3_GROUP_INFO(sb, group)->bb_state);
 +}
 +
 +static inline void
 +ext3_unlock_group(struct super_block *sb, int group)
 +{
 +      bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT,
-+                      &EXT3_SB(sb)->s_group_info[group]->bb_state);
++                      &EXT3_GROUP_INFO(sb, group)->bb_state);
 +}
 +
 +static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
@@ -800,7 +1210,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
 +                              int needed, struct ext3_free_extent *ex)
 +{
-+      int next, max, ord;
++      int next = block, max, ord;
 +      void *buddy;
 +
 +      J_ASSERT(ex != NULL);
@@ -825,6 +1235,11 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      ex->fe_start = block << order;
 +      ex->fe_group = e3b->bd_group;
 +
++      /* calc difference from given start */
++      next = next - ex->fe_start;
++      ex->fe_len -= next;
++      ex->fe_start += next;
++
 +      while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
 +
 +              if (block + 1 >= max)
@@ -928,6 +1343,13 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      ac->ac_status = AC_STATUS_FOUND;
 +      ac->ac_tail = ret & 0xffff;
 +      ac->ac_buddy = ret >> 16;
++
++      /* hold in-core structures until allocated
++       * blocks are marked non-free in on-disk bitmap */
++      ac->ac_buddy_page = e3b->bd_buddy_page;
++      page_cache_get(e3b->bd_buddy_page);
++      ac->ac_bitmap_page = e3b->bd_bitmap_page;
++      page_cache_get(e3b->bd_bitmap_page);
 +}
 +
 +/*
@@ -963,7 +1385,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      }
 +
 +      /*
-+       * Let's check whether the chuck is good enough
++       * Let's check whether the chunk is good enough
 +       */
 +      if (ex->fe_len == gex->fe_len) {
 +              *bex = *ex;
@@ -1037,6 +1459,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                              struct ext3_buddy *e3b)
 +{
 +      int group = ac->ac_g_ex.fe_group, max, err;
++      struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb);
++      struct ext3_super_block *es = sbi->s_es;
 +      struct ext3_free_extent ex;
 +
 +      err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);
@@ -1047,7 +1471,25 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
 +                              ac->ac_g_ex.fe_len, &ex);
 +      
-+      if (max > 0) {
++      if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
++              unsigned long start;
++              start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) +
++                      ex.fe_start + le32_to_cpu(es->s_first_data_block));
++              if (start % sbi->s_stripe == 0) {
++                      ac->ac_found++;
++                      ac->ac_b_ex = ex;
++                      ext3_mb_use_best_found(ac, e3b);
++              }
++      } else if (max >= ac->ac_g_ex.fe_len) {
++              J_ASSERT(ex.fe_len > 0);
++              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++              ac->ac_found++;
++              ac->ac_b_ex = ex;
++              ext3_mb_use_best_found(ac, e3b);
++      } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++              /* Sometimes, caller may want to merge even small
++               * number of blocks to an existing extent */
 +              J_ASSERT(ex.fe_len > 0);
 +              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
 +              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
@@ -1075,7 +1517,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      int i, k, max;
 +
 +      J_ASSERT(ac->ac_2order > 0);
-+      for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++      for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
 +              if (grp->bb_counters[i] == 0)
 +                      continue;
 +
@@ -1140,11 +1582,46 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      }
 +}
 +
++/*
++ * This is a special case for storages like raid5
++ * we try to find stripe-aligned chunks for stripe-size requests
++ */
++static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac,
++                               struct ext3_buddy *e3b)
++{
++      struct super_block *sb = ac->ac_sb;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      void *bitmap = EXT3_MB_BITMAP(e3b);
++      struct ext3_free_extent ex;
++      unsigned long i, max;
++
++      J_ASSERT(sbi->s_stripe != 0);
++
++      /* find first stripe-aligned block */
++      i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb)
++              + le32_to_cpu(sbi->s_es->s_first_data_block);
++      i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe;
++      i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))
++                      % EXT3_BLOCKS_PER_GROUP(sb);
++
++      while (i < sb->s_blocksize * 8) {
++              if (!mb_test_bit(i, bitmap)) {
++                      max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex);
++                      if (max >= sbi->s_stripe) {
++                              ac->ac_found++;
++                              ac->ac_b_ex = ex;
++                              ext3_mb_use_best_found(ac, e3b);
++                              break;
++                      }
++              }
++              i += sbi->s_stripe;
++      }
++}
++
 +static int ext3_mb_good_group(struct ext3_allocation_context *ac,
 +                              int group, int cr)
 +{
-+      struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb);
-+      struct ext3_group_info *grp = sbi->s_group_info[group];
++      struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group);
 +      unsigned free, fragments, i, bits;
 +
 +      J_ASSERT(cr >= 0 && cr < 4);
@@ -1161,15 +1638,18 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              case 0:
 +                      J_ASSERT(ac->ac_2order != 0);
 +                      bits = ac->ac_sb->s_blocksize_bits + 1;
-+                      for (i = ac->ac_2order; i < bits; i++)
++                      for (i = ac->ac_2order; i <= bits; i++)
 +                              if (grp->bb_counters[i] > 0)
 +                                      return 1;
++                      break;
 +              case 1:
 +                      if ((free / fragments) >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 2:
 +                      if (free >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 3:
 +                      return 1;
 +              default:
@@ -1234,6 +1714,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              }
 +      }
 +
++      ac.ac_buddy_page = NULL;
++      ac.ac_bitmap_page = NULL;
++
 +      /*
 +       * Check quota for allocation of this blocks.
 +       */
@@ -1270,23 +1753,27 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      ac.ac_2order = 0;
 +      ac.ac_criteria = 0;
 +
++      if (*len == 1 && sbi->s_stripe) {
++              /* looks like a metadata, let's use a dirty hack for raid5
++               * move all metadata in first groups in hope to hit cached
++               * sectors and thus avoid read-modify cycles in raid5 */
++              ac.ac_g_ex.fe_group = group = 0;
++      }
++
 +      /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
 +      i = ffs(*len);
-+      if (i >= 8) {
++      if (i >= ext3_mb_order2_reqs) {
 +              i--;
 +              if ((*len & (~(1 << i))) == 0)
 +                      ac.ac_2order = i;
 +      }
 +
-+      /* Sometimes, caller may want to merge even small
-+       * number of blocks to an existing extent */
-+      if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+              err = ext3_mb_find_by_goal(&ac, &e3b);
-+              if (err)
-+                      goto out_err;
-+              if (ac.ac_status == AC_STATUS_FOUND)
-+                      goto found;
-+      }
++      /* first, try the goal */
++      err = ext3_mb_find_by_goal(&ac, &e3b);
++      if (err)
++              goto out_err;
++      if (ac.ac_status == AC_STATUS_FOUND)
++              goto found;
 +
 +      /* Let's just scan groups to find more-less suitable blocks */
 +      cr = ac.ac_2order ? 0 : 1;
@@ -1297,7 +1784,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      if (group == EXT3_SB(sb)->s_groups_count)
 +                              group = 0;
 +
-+                      if (EXT3_MB_GRP_NEED_INIT(sbi->s_group_info[group])) {
++                      if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) {
 +                              /* we need full data about the group
 +                               * to make a good selection */
 +                              err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b);
@@ -1325,6 +1812,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      ac.ac_groups_scanned++;
 +                      if (cr == 0)
 +                              ext3_mb_simple_scan_group(&ac, &e3b);
++                      else if (cr == 1 && *len == sbi->s_stripe)
++                              ext3_mb_scan_aligned(&ac, &e3b);
 +                      else
 +                              ext3_mb_complex_scan_group(&ac, &e3b);
 +
@@ -1332,8 +1821,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +
 +                      ext3_mb_release_desc(&e3b);
 +
-+                      if (err)
-+                              goto out_err;
 +                      if (ac.ac_status != AC_STATUS_CONTINUE)
 +                              break;
 +              }
@@ -1347,7 +1834,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +               */
 +
 +              /*if (ac.ac_found > ext3_mb_max_to_scan)
-+                      printk(KERN_ERR "EXT3-fs: too long searching at "
++                      printk(KERN_DEBUG "EXT3-fs: too long searching at "
 +                              "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len,
 +                              ac.ac_g_ex.fe_len);*/
 +              ext3_mb_try_best_found(&ac, &e3b);
@@ -1356,7 +1843,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
-+                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++                      printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n");
 +                       */
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
@@ -1376,17 +1863,16 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              *errp = -ENOSPC;
 +              block = 0;
 +#if 1
-+              printk(KERN_ERR "EXT3-fs: cant allocate: status %d, flags %d\n",
++              printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n",
 +                      ac.ac_status, ac.ac_flags);
-+              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d, cr %d\n",
++              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n",
 +                      ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,
 +                      ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);
 +              printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",
 +                      sbi->s_blocks_reserved, ac.ac_found);
 +              printk("EXT3-fs: groups: ");
 +              for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
-+                      printk("%d: %d ", i,
-+                              sbi->s_group_info[i]->bb_free);
++                      printk("%d: %d ", i, EXT3_GROUP_INFO(sb, i)->bb_free);
 +              printk("\n");
 +#endif
 +              goto out;
@@ -1484,6 +1970,11 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      *errp = err;
 +      block = 0;
 +out:
++      if (ac.ac_buddy_page)
++              page_cache_release(ac.ac_buddy_page);
++      if (ac.ac_bitmap_page)
++              page_cache_release(ac.ac_bitmap_page);
++
 +      if (!(flags & EXT3_MB_HINT_RESERVED)) {
 +              /* block wasn't reserved before and we reserved it
 +               * at the beginning of allocation. it doesn't matter
@@ -1507,7 +1998,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      atomic_inc(&sbi->s_bal_breaks);
 +      }
 +
-+      ext3_mb_store_history(sb, &ac);
++      ext3_mb_store_history(sb, inode->i_ino, &ac);
 +
 +      return block;
 +}
@@ -1572,9 +2063,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      char buf[20], buf2[20];
 +
 +      if (v == SEQ_START_TOKEN) {
-+              seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+                       "goal", "result", "found", "grps", "cr", "merge",
-+                       "tail", "broken");
++              seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++                       "pid", "inode", "goal", "result", "found", "grps", "cr",
++                       "merge", "tail", "broken");
 +              return 0;
 +      }
 +
@@ -1582,9 +2073,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              hs->goal.fe_start, hs->goal.fe_len);
 +      sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
 +              hs->result.fe_start, hs->result.fe_len);
-+      seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+                      buf2, hs->found, hs->groups, hs->cr, 
-+                      hs->merged ? "M" : "", hs->tail,
++      seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++                      hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++                      hs->cr, hs->merged ? "M" : "", hs->tail,
 +                      hs->buddy ? 1 << hs->buddy : 0);
 +      return 0;
 +}
@@ -1652,12 +2143,107 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      .release        = ext3_mb_seq_history_release,
 +};
 +
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      long group;
++
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++
++      group = *pos + 1;
++      return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      long group;
++
++      ++*pos;
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++      group = *pos + 1;
++      return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++      struct super_block *sb = seq->private;
++      long group = (long) v, i;
++      struct sg {
++              struct ext3_group_info info;
++              unsigned short counters[16];
++      } sg;
++
++      group--;
++      if (group == 0)
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++                       "group", "free", "frags", "first", "2^0", "2^1", "2^2",
++                       "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++                       "2^11", "2^12", "2^13");
++
++      i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++              sizeof(struct ext3_group_info);
++      ext3_lock_group(sb, group);
++      memcpy(&sg, EXT3_GROUP_INFO(sb, group), i);
++      ext3_unlock_group(sb, group);
++
++      if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++              return 0;
++
++      seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free,
++                      sg.info.bb_fragments, sg.info.bb_first_free);
++      for (i = 0; i <= 13; i++)
++              seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++                              sg.info.bb_counters[i] : 0);
++      seq_printf(seq, " ]\n");
++
++      return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++      .start  = ext3_mb_seq_groups_start,
++      .next   = ext3_mb_seq_groups_next,
++      .stop   = ext3_mb_seq_groups_stop,
++      .show   = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++      struct super_block *sb = PDE(inode)->data;
++      int rc;
++
++      rc = seq_open(file, &ext3_mb_seq_groups_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = sb;
++      }
++      return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ext3_mb_seq_groups_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = seq_release,
++};
++
 +static void ext3_mb_history_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      char name[64];
 +
 +      snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++      remove_proc_entry("mb_groups", sbi->s_mb_proc);
 +      remove_proc_entry("mb_history", sbi->s_mb_proc);
 +      remove_proc_entry(name, proc_root_ext3);
 +
@@ -1680,6 +2266,11 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      p->proc_fops = &ext3_mb_seq_history_fops;
 +                      p->data = sb;
 +              }
++              p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++              if (p) {
++                      p->proc_fops = &ext3_mb_seq_groups_fops;
++                      p->data = sb;
++              }
 +      }
 +
 +      sbi->s_mb_history_max = 1000;
@@ -1692,7 +2283,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +}
 +
 +static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++                      struct ext3_allocation_context *ac)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      struct ext3_mb_history h;
@@ -1700,6 +2292,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      if (likely(sbi->s_mb_history == NULL))
 +              return;
 +
++      h.pid = current->pid;
++      h.ino = ino;
 +      h.goal = ac->ac_g_ex;
 +      h.result = ac->ac_b_ex;
 +      h.found = ac->ac_found;
@@ -1727,21 +2321,40 @@ Index: linux-stage/fs/ext3/mballoc.c
 +int ext3_mb_init_backend(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int i, len;
-+
-+      len = sizeof(struct ext3_buddy_group_blocks *) * sbi->s_groups_count;
-+      sbi->s_group_info = kmalloc(len, GFP_KERNEL);
++      int i, j, len, metalen;
++      int num_meta_group_infos =
++              (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) >>
++                      EXT3_DESC_PER_BLOCK_BITS(sb);
++      struct ext3_group_info **meta_group_info;
++
++      /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
++       * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
++       * So a two level scheme suffices for now. */
++      sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) *
++                                  num_meta_group_infos, GFP_KERNEL);
 +      if (sbi->s_group_info == NULL) {
-+              printk(KERN_ERR "EXT3-fs: can't allocate mem for buddy\n");
++              printk(KERN_ERR "EXT3-fs: can't allocate buddy meta group\n");
 +              return -ENOMEM;
 +      }
-+      memset(sbi->s_group_info, 0, len);
-+
 +      sbi->s_buddy_cache = new_inode(sb);
 +      if (sbi->s_buddy_cache == NULL) {
 +              printk(KERN_ERR "EXT3-fs: can't get new inode\n");
-+              kfree(sbi->s_group_info);
-+              return -ENOMEM;
++              goto err_freesgi;
++      }
++
++      metalen = sizeof(*meta_group_info) << EXT3_DESC_PER_BLOCK_BITS(sb);
++      for (i = 0; i < num_meta_group_infos; i++) {
++              if ((i + 1) == num_meta_group_infos)
++                      metalen = sizeof(*meta_group_info) *
++                              (sbi->s_groups_count -
++                                      (i << EXT3_DESC_PER_BLOCK_BITS(sb)));
++              meta_group_info = kmalloc(metalen, GFP_KERNEL);
++              if (meta_group_info == NULL) {
++                      printk(KERN_ERR "EXT3-fs: can't allocate mem for a "
++                             "buddy group\n");
++                      goto err_freemeta;
++              }
++              sbi->s_group_info[i] = meta_group_info;
 +      }
 +
 +      /* 
@@ -1753,30 +2366,42 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      for (i = 0; i < sbi->s_groups_count; i++) {
 +              struct ext3_group_desc * desc;
 +
-+              sbi->s_group_info[i] = kmalloc(len, GFP_KERNEL);
-+              if (sbi->s_group_info[i] == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
-+                      goto err_out;
++              meta_group_info =
++                      sbi->s_group_info[i >> EXT3_DESC_PER_BLOCK_BITS(sb)];
++              j = i & (EXT3_DESC_PER_BLOCK(sb) - 1);
++
++              meta_group_info[j] = kmalloc(len, GFP_KERNEL);
++              if (meta_group_info[j] == NULL) {
++                      printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n");
++                      i--;
++                      goto err_freebuddy;
 +              }
 +              desc = ext3_get_group_desc(sb, i, NULL);
 +              if (desc == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant read descriptor %u\n", i);
-+                      goto err_out;
++                      printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
++                      goto err_freebuddy;
 +              }
-+              memset(sbi->s_group_info[i], 0, len);
++              memset(meta_group_info[j], 0, len);
 +              set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT,
-+                      &sbi->s_group_info[i]->bb_state);
-+              sbi->s_group_info[i]->bb_free =
++                      &meta_group_info[j]->bb_state);
++              meta_group_info[j]->bb_free =
 +                      le16_to_cpu(desc->bg_free_blocks_count);
 +      }
 +
 +      return 0;
 +
-+err_out:
++err_freebuddy:
++      while (i >= 0) {
++              kfree(EXT3_GROUP_INFO(sb, i));
++              i--;
++      }
++      i = num_meta_group_infos;
++err_freemeta:
 +      while (--i >= 0)
 +              kfree(sbi->s_group_info[i]);
 +      iput(sbi->s_buddy_cache);
-+
++err_freesgi:
++      kfree(sbi->s_group_info);
 +      return -ENOMEM;
 +}
 +
@@ -1818,7 +2443,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              max = max >> 1;
 +              i++;
 +      } while (i <= sb->s_blocksize_bits + 1);
-+      
 +
 +      /* init file for buddy data */
 +      if ((i = ext3_mb_init_backend(sb))) {
@@ -1855,7 +2479,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +int ext3_mb_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int i;
++      int i, num_meta_group_infos;
 +      
 +      if (!test_opt(sb, MBALLOC))
 +              return 0;
@@ -1870,11 +2494,13 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      ext3_mb_free_committed_blocks(sb);
 +
 +      if (sbi->s_group_info) {
-+              for (i = 0; i < sbi->s_groups_count; i++) {
-+                      if (sbi->s_group_info[i] == NULL)
-+                              continue;
++              for (i = 0; i < sbi->s_groups_count; i++)
++                      kfree(EXT3_GROUP_INFO(sb, i));
++              num_meta_group_infos = (sbi->s_groups_count +
++                      EXT3_DESC_PER_BLOCK(sb) - 1) >>
++                      EXT3_DESC_PER_BLOCK_BITS(sb);
++              for (i = 0; i < num_meta_group_infos; i++)
 +                      kfree(sbi->s_group_info[i]);
-+              }
 +              kfree(sbi->s_group_info);
 +      }
 +      if (sbi->s_mb_offsets)
@@ -1936,6 +2562,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                              md->num, md->group, md);
 +
 +              err = ext3_mb_load_buddy(sb, md->group, &e3b);
++              /* we expect to find existing buddy because it's pinned */
 +              BUG_ON(err != 0);
 +
 +              /* there are blocks to put in buddy to make them really free */
@@ -2234,7 +2861,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      return ret;
 +}
 +
-+
 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
 +                      unsigned long block, unsigned long count, int metadata)
 +{
@@ -2242,7 +2868,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      int freed;
 +
 +      sb = inode->i_sb;
-+      if (!test_opt(sb, MBALLOC))
++      if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
 +              ext3_free_blocks_sb(handle, sb, block, count, &freed);
 +      else
 +              ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
@@ -2255,6 +2881,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +#define EXT3_MB_STATS_NAME        "mb_stats"
 +#define EXT3_MB_MAX_TO_SCAN_NAME  "mb_max_to_scan"
 +#define EXT3_MB_MIN_TO_SCAN_NAME  "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ       "mb_order2_req"
 +
 +static int ext3_mb_stats_read(char *page, char **start, off_t off,
 +              int count, int *eof, void *data)
@@ -2276,7 +2903,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      char str[32];
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_STATS_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2310,7 +2937,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2349,7 +2976,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2367,260 +2994,134 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      return count;
 +}
 +
-+int __init init_ext3_proc(void)
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++                                 int count, int *eof, void *data)
 +{
-+      struct proc_dir_entry *proc_ext3_mb_stats;
-+      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
-+      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
-+
-+      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
-+      if (proc_root_ext3 == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
-+              return -EIO;
-+      }
-+
-+      /* Initialize EXT3_MB_STATS_NAME */
-+      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
-+                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
-+      if (proc_ext3_mb_stats == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
-+                              EXT3_MB_STATS_NAME);
-+              remove_proc_entry(EXT3_ROOT, proc_root_fs);
-+              return -EIO;
-+      }
++      int len;
 +
-+      proc_ext3_mb_stats->data = NULL;
-+      proc_ext3_mb_stats->read_proc  = ext3_mb_stats_read;
-+      proc_ext3_mb_stats->write_proc = ext3_mb_stats_write;
++      *eof = 1;
++      if (off != 0)
++              return 0;
 +
-+      /* Initialize EXT3_MAX_TO_SCAN_NAME */
-+      proc_ext3_mb_max_to_scan = create_proc_entry(
-+                      EXT3_MB_MAX_TO_SCAN_NAME,
-+                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
-+      if (proc_ext3_mb_max_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
-+                              EXT3_MB_MAX_TO_SCAN_NAME);
-+              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
-+              remove_proc_entry(EXT3_ROOT, proc_root_fs);
-+              return -EIO;
-+      }
++      len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++      *start = page;
++      return len;
++}
 +
-+      proc_ext3_mb_max_to_scan->data = NULL;
-+      proc_ext3_mb_max_to_scan->read_proc  = ext3_mb_max_to_scan_read;
-+      proc_ext3_mb_max_to_scan->write_proc = ext3_mb_max_to_scan_write;
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++                                  unsigned long count, void *data)
++{
++      char str[32];
++      long value;
 +
-+      /* Initialize EXT3_MIN_TO_SCAN_NAME */
-+      proc_ext3_mb_min_to_scan = create_proc_entry(
-+                      EXT3_MB_MIN_TO_SCAN_NAME,
-+                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
-+      if (proc_ext3_mb_min_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
-+                              EXT3_MB_MIN_TO_SCAN_NAME);
-+              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
-+              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
-+              remove_proc_entry(EXT3_ROOT, proc_root_fs);
-+              return -EIO;
++      if (count >= sizeof(str)) {
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++              return -EOVERFLOW;
 +      }
 +
-+      proc_ext3_mb_min_to_scan->data = NULL;
-+      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
-+      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
-+
-+      return 0;
-+}
-+
-+void exit_ext3_proc(void)
-+{
-+      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
-+      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
-+      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
-+      remove_proc_entry(EXT3_ROOT, proc_root_fs);
-+}
++      if (copy_from_user(str, buffer, count))
++              return -EFAULT;
 +
-Index: linux-stage/fs/ext3/extents.c
-===================================================================
---- linux-stage.orig/fs/ext3/extents.c 2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/extents.c      2006-07-16 02:29:49.000000000 +0800
-@@ -771,7 +771,7 @@ cleanup:
-               for (i = 0; i < depth; i++) {
-                       if (!ablocks[i])
-                               continue;
--                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
-+                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
-               }
-       }
-       kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
-                 path->p_idx->ei_leaf);
-       bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
-       ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
--      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
-+      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
-       return err;
- }
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
-       int needed = ext3_remove_blocks_credits(tree, ex, from, to);
-       handle_t *handle = ext3_journal_start(tree->inode, needed);
-       struct buffer_head *bh;
--      int i;
-+      int i, metadata = 0;
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-+      if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode))
-+              metadata = 1;
-       if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
-               /* tail removal */
-               unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
-                       bh = sb_find_get_block(tree->inode->i_sb, start + i);
-                       ext3_forget(handle, 0, tree->inode, bh, start + i);
-               }
--              ext3_free_blocks(handle, tree->inode, start, num);
-+              ext3_free_blocks(handle, tree->inode, start, num, metadata);
-       } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
-               printk("strange request: removal %lu-%lu from %u:%u\n",
-                      from, to, ex->ee_block, ex->ee_len);
-Index: linux-stage/fs/ext3/xattr.c
-===================================================================
---- linux-stage.orig/fs/ext3/xattr.c   2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/xattr.c        2006-07-16 02:29:49.000000000 +0800
-@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
-               ea_bdebug(bh, "refcount now=0; freeing");
-               if (ce)
-                       mb_cache_entry_free(ce);
--              ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
-+              ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
-               get_bh(bh);
-               ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
-       } else {
-@@ -805,7 +805,7 @@ inserted:
-                       new_bh = sb_getblk(sb, block);
-                       if (!new_bh) {
- getblk_failed:
--                              ext3_free_blocks(handle, inode, block, 1);
-+                              ext3_free_blocks(handle, inode, block, 1, 1);
-                               error = -EIO;
-                               goto cleanup;
-                       }
-Index: linux-stage/fs/ext3/balloc.c
-===================================================================
---- linux-stage.orig/fs/ext3/balloc.c  2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/balloc.c       2006-07-16 02:33:13.000000000 +0800
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
-  *
-  * Return buffer_head on success or NULL in case of failure.
-  */
--static struct buffer_head *
-+struct buffer_head *
- read_block_bitmap(struct super_block *sb, unsigned int block_group)
- {
-       struct ext3_group_desc * desc;
-@@ -490,24 +490,6 @@ error_return:
-       return;
- }
--/* Free given blocks, update quota and i_blocks field */
--void ext3_free_blocks(handle_t *handle, struct inode *inode,
--                      ext3_fsblk_t block, unsigned long count)
--{
--      struct super_block * sb;
--      unsigned long dquot_freed_blocks;
--
--      sb = inode->i_sb;
--      if (!sb) {
--              printk ("ext3_free_blocks: nonexistent device");
--              return;
--      }
--      ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
--      if (dquot_freed_blocks)
--              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
--      return;
--}
--
- /*
-  * For ext3 allocations, we must not reuse any blocks which are
-  * allocated in the bitmap buffer's "last committed data" copy.  This
-@@ -1463,7 +1445,7 @@ out:
-       return 0;
- }
--ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
-+ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode,
-                       ext3_fsblk_t goal, int *errp)
- {
-       unsigned long count = 1;
-Index: linux-stage/fs/ext3/super.c
-===================================================================
---- linux-stage.orig/fs/ext3/super.c   2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/super.c        2006-07-16 02:29:49.000000000 +0800
-@@ -391,6 +391,7 @@ static void ext3_put_super (struct super
-       struct ext3_super_block *es = sbi->s_es;
-       int i;
-+      ext3_mb_release(sb);
-       ext3_ext_release(sb);
-       ext3_xattr_put_super(sb);
-       journal_destroy(sbi->s_journal);
-@@ -641,7 +642,7 @@ enum {
-       Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
-       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
-       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_extents, Opt_extdebug,
-+      Opt_extents, Opt_extdebug, Opt_mballoc,
-       Opt_grpquota
- };
-@@ -696,6 +697,7 @@ static match_table_t tokens = {
-       {Opt_iopen_nopriv, "iopen_nopriv"},
-       {Opt_extents, "extents"},
-       {Opt_extdebug, "extdebug"},
-+      {Opt_mballoc, "mballoc"},
-       {Opt_barrier, "barrier=%u"},
-       {Opt_err, NULL},
-       {Opt_resize, "resize"},
-@@ -1047,6 +1049,9 @@ clear_qf_name:
-               case Opt_extdebug:
-                       set_opt (sbi->s_mount_opt, EXTDEBUG);
-                       break;
-+              case Opt_mballoc:
-+                      set_opt (sbi->s_mount_opt, MBALLOC);
-+                      break;
-               default:
-                       printk (KERN_ERR
-                               "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1773,6 +1778,7 @@ static int ext3_fill_super (struct super
-               "writeback");
-       ext3_ext_init(sb);
-+      ext3_mb_init(sb, needs_recovery);
-       lock_kernel();
-       return 0;
-@@ -2712,7 +2718,13 @@ static struct file_system_type ext3_fs_t
- static int __init init_ext3_fs(void)
- {
--      int err = init_ext3_xattr();
-+      int err;
++      /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++      value = simple_strtol(str, NULL, 0);
++      if (value <= 0)
++              return -ERANGE;
 +
-+      err = init_ext3_proc();
-+      if (err)
-+              return err;
++      ext3_mb_order2_reqs = value;    
 +
-+      err = init_ext3_xattr();
-       if (err)
-               return err;
-       err = init_inodecache();
-@@ -2734,6 +2746,7 @@ static void __exit exit_ext3_fs(void)
-       unregister_filesystem(&ext3_fs_type);
-       destroy_inodecache();
-       exit_ext3_xattr();
-+      exit_ext3_proc();
- }
- int ext3_prep_san_write(struct inode *inode, long *blocks,
++      return count;
++}
++
++int __init init_ext3_proc(void)
++{
++      struct proc_dir_entry *proc_ext3_mb_stats;
++      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_order2_req;
++
++      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
++      if (proc_root_ext3 == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
++              return -EIO;
++      }
++
++      /* Initialize EXT3_MB_STATS_NAME */
++      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_stats == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_STATS_NAME);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_stats->data = NULL;
++      proc_ext3_mb_stats->read_proc  = ext3_mb_stats_read;
++      proc_ext3_mb_stats->write_proc = ext3_mb_stats_write;
++
++      /* Initialize EXT3_MAX_TO_SCAN_NAME */
++      proc_ext3_mb_max_to_scan = create_proc_entry(
++                      EXT3_MB_MAX_TO_SCAN_NAME,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_max_to_scan == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_MAX_TO_SCAN_NAME);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_max_to_scan->data = NULL;
++      proc_ext3_mb_max_to_scan->read_proc  = ext3_mb_max_to_scan_read;
++      proc_ext3_mb_max_to_scan->write_proc = ext3_mb_max_to_scan_write;
++
++      /* Initialize EXT3_MIN_TO_SCAN_NAME */
++      proc_ext3_mb_min_to_scan = create_proc_entry(
++                      EXT3_MB_MIN_TO_SCAN_NAME,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_min_to_scan == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_MIN_TO_SCAN_NAME);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_min_to_scan->data = NULL;
++      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
++      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
++
++      /* Initialize EXT3_ORDER2_REQ */
++      proc_ext3_mb_order2_req = create_proc_entry(
++                      EXT3_MB_ORDER2_REQ,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_order2_req == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_ORDER2_REQ);
++              remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_order2_req->data = NULL;
++      proc_ext3_mb_order2_req->read_proc  = ext3_mb_order2_req_read;
++      proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
++      return 0;
++}
++
++void exit_ext3_proc(void)
++{
++      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
++      remove_proc_entry(EXT3_ROOT, proc_root_fs);
++}
 Index: linux-stage/fs/ext3/Makefile
 ===================================================================
 --- linux-stage.orig/fs/ext3/Makefile  2006-07-16 02:29:43.000000000 +0800
@@ -2634,177 +3135,3 @@ Index: linux-stage/fs/ext3/Makefile
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
-Index: linux-stage/include/linux/ext3_fs.h
-===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h   2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/include/linux/ext3_fs.h        2006-07-16 02:29:49.000000000 +0800
-@@ -53,6 +53,14 @@
- #define ext3_debug(f, a...)   do {} while (0)
- #endif
-+#define EXT3_MULTIBLOCK_ALLOCATOR     1
-+
-+#define EXT3_MB_HINT_MERGE            1
-+#define EXT3_MB_HINT_RESERVED         2
-+#define EXT3_MB_HINT_METADATA         4
-+#define EXT3_MB_HINT_FIRST            8
-+#define EXT3_MB_HINT_BEST             16
-+
- /*
-  * Special inodes numbers
-  */
-@@ -379,6 +387,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x800000/* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS            0x1000000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG           0x2000000/* Extents debug */
-+#define EXT3_MOUNT_MBALLOC            0x4000000/* Buddy allocation support */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef clear_opt
-@@ -749,12 +758,12 @@ ext3_group_first_block_no(struct super_b
- /* balloc.c */
- extern int ext3_bg_has_super(struct super_block *sb, int group);
- extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
--extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
--                      ext3_fsblk_t goal, int *errp);
-+//extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
-+//                    ext3_fsblk_t goal, int *errp);
- extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
-                       ext3_fsblk_t goal, unsigned long *count, int *errp);
- extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
--                      ext3_fsblk_t block, unsigned long count);
-+                      ext3_fsblk_t block, unsigned long count, int metadata);
- extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
-                                ext3_fsblk_t block, unsigned long count,
-                               unsigned long *pdquot_freed_blocks);
-@@ -881,6 +890,17 @@ extern void ext3_extents_initialize_bloc
- extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
-                         unsigned int cmd, unsigned long arg);
-+/* mballoc.c */
-+extern long ext3_mb_stats;
-+extern long ext3_mb_max_to_scan;
-+extern int ext3_mb_init(struct super_block *, int);
-+extern int ext3_mb_release(struct super_block *);
-+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
-+extern int ext3_mb_reserve_blocks(struct super_block *, int);
-+extern void ext3_mb_release_blocks(struct super_block *, int);
-+int __init init_ext3_proc(void);
-+void exit_ext3_proc(void);
-+
- #endif        /* __KERNEL__ */
- /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
-Index: linux-stage/include/linux/ext3_fs_sb.h
-===================================================================
---- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/include/linux/ext3_fs_sb.h     2006-07-16 02:29:49.000000000 +0800
-@@ -21,8 +21,14 @@
- #include <linux/wait.h>
- #include <linux/blockgroup_lock.h>
- #include <linux/percpu_counter.h>
-+#include <linux/list.h>
- #endif
- #include <linux/rbtree.h>
-+#include <linux/proc_fs.h>
-+
-+struct ext3_buddy_group_blocks;
-+struct ext3_mb_history;
-+#define EXT3_BB_MAX_BLOCKS
- /*
-  * third extended-fs super-block data in memory
-@@ -78,6 +84,38 @@ struct ext3_sb_info {
-       char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
-       int s_jquota_fmt;                       /* Format of quota to use */
- #endif
-+
-+      /* for buddy allocator */
-+      struct ext3_group_info **s_group_info;
-+      struct inode *s_buddy_cache;
-+      long s_blocks_reserved;
-+      spinlock_t s_reserve_lock;
-+      struct list_head s_active_transaction;
-+      struct list_head s_closed_transaction;
-+      struct list_head s_committed_transaction;
-+      spinlock_t s_md_lock;
-+      tid_t s_last_transaction;
-+      int s_mb_factor;
-+      unsigned short *s_mb_offsets, *s_mb_maxs;
-+
-+      /* history to debug policy */
-+      struct ext3_mb_history *s_mb_history;
-+      int s_mb_history_cur;
-+      int s_mb_history_max;
-+      struct proc_dir_entry *s_mb_proc;
-+      spinlock_t s_mb_history_lock;
-+
-+      /* stats for buddy allocator */
-+      atomic_t s_bal_reqs;    /* number of reqs with len > 1 */
-+      atomic_t s_bal_success; /* we found long enough chunks */
-+      atomic_t s_bal_allocated;       /* in blocks */
-+      atomic_t s_bal_ex_scanned;      /* total extents scanned */
-+      atomic_t s_bal_goals;   /* goal hits */
-+      atomic_t s_bal_breaks;  /* too long searches */
-+      atomic_t s_bal_2orders; /* 2^order hits */
-+      spinlock_t s_bal_lock;
-+      unsigned long s_mb_buddies_generated;
-+      unsigned long long s_mb_generation_time;
- };
- #endif        /* _LINUX_EXT3_FS_SB */
-Index: linux-stage/fs/ext3/inode.c
-===================================================================
---- linux-stage.orig/fs/ext3/inode.c   2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/inode.c        2006-07-16 02:29:49.000000000 +0800
-@@ -562,7 +562,7 @@ static int ext3_alloc_blocks(handle_t *h
-       return ret;
- failed_out:
-       for (i = 0; i <index; i++)
--              ext3_free_blocks(handle, inode, new_blocks[i], 1);
-+              ext3_free_blocks(handle, inode, new_blocks[i], 1, 1);
-       return ret;
- }
-@@ -661,9 +661,9 @@ failed:
-               ext3_journal_forget(handle, branch[i].bh);
-       }
-       for (i = 0; i <indirect_blks; i++)
--              ext3_free_blocks(handle, inode, new_blocks[i], 1);
-+              ext3_free_blocks(handle, inode, new_blocks[i], 1, 1);
--      ext3_free_blocks(handle, inode, new_blocks[i], num);
-+      ext3_free_blocks(handle, inode, new_blocks[i], num, 1);
-       return err;
- }
-@@ -760,9 +760,9 @@ err_out:
-       for (i = 1; i <= num; i++) {
-               BUFFER_TRACE(where[i].bh, "call journal_forget");
-               ext3_journal_forget(handle, where[i].bh);
--              ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
-+              ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1,1);
-       }
--      ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
-+      ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 1);
-       return err;
- }
-@@ -2007,7 +2007,7 @@ static void ext3_clear_blocks(handle_t *
-               }
-       }
--      ext3_free_blocks(handle, inode, block_to_free, count);
-+      ext3_free_blocks(handle, inode, block_to_free, count, 1);
- }
- /**
-@@ -2180,7 +2180,7 @@ static void ext3_free_branches(handle_t 
-                               ext3_journal_test_restart(handle, inode);
-                       }
--                      ext3_free_blocks(handle, inode, nr, 1);
-+                      ext3_free_blocks(handle, inode, nr, 1, 1);
-                       if (parent_bh) {
-                               /*
index f2988a2..8bd7acb 100644 (file)
@@ -2538,26 +2538,30 @@ Index: linux-stage/fs/ext3/super.c
        Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
-+      Opt_extents, Opt_extdebug,
++      Opt_extents, Opt_noextents, Opt_extdebug,
        Opt_grpquota
  };
  
-@@ -690,6 +694,8 @@ static match_table_t tokens = {
+@@ -690,6 +694,9 @@ static match_table_t tokens = {
        {Opt_iopen, "iopen"},
        {Opt_noiopen, "noiopen"},
        {Opt_iopen_nopriv, "iopen_nopriv"},
 +      {Opt_extents, "extents"},
++      {Opt_noextents, "noextents"},
 +      {Opt_extdebug, "extdebug"},
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
-@@ -1035,6 +1041,12 @@ clear_qf_name:
+@@ -1035,6 +1041,15 @@ clear_qf_name:
                case Opt_bh:
                        clear_opt(sbi->s_mount_opt, NOBH);
                        break;
 +              case Opt_extents:
 +                      set_opt (sbi->s_mount_opt, EXTENTS);
 +                      break;
++              case Opt_noextents:
++                      clear_opt (sbi->s_mount_opt, EXTENTS);
++                      break;
 +              case Opt_extdebug:
 +                      set_opt (sbi->s_mount_opt, EXTDEBUG);
 +                      break;
index 20fa78a..d83625a 100644 (file)
@@ -1,8 +1,400 @@
+Index: linux-stage/include/linux/ext3_fs.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_fs.h   2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/include/linux/ext3_fs.h        2006-07-16 02:29:49.000000000 +0800
+@@ -53,6 +53,14 @@
+ #define ext3_debug(f, a...)   do {} while (0)
+ #endif
++#define EXT3_MULTIBLOCK_ALLOCATOR     1
++
++#define EXT3_MB_HINT_MERGE            1
++#define EXT3_MB_HINT_RESERVED         2
++#define EXT3_MB_HINT_METADATA         4
++#define EXT3_MB_HINT_FIRST            8
++#define EXT3_MB_HINT_BEST             16
++
+ /*
+  * Special inodes numbers
+  */
+@@ -379,6 +387,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x800000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x1000000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x2000000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC            0x4000000/* Buddy allocation support */
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -405,6 +413,14 @@
+ #define ext3_find_first_zero_bit      ext2_find_first_zero_bit
+ #define ext3_find_next_zero_bit               ext2_find_next_zero_bit
++#ifndef ext2_find_next_le_bit
++#ifdef __LITTLE_ENDIAN
++#define ext2_find_next_le_bit(addr, size, off) find_next_bit((addr), (size), (off))
++#else
++#error "mballoc needs a patch for big-endian systems - CFS bug 10634"
++#endif        /* __LITTLE_ENDIAN */
++#endif        /* !ext2_find_next_le_bit */
++
+ /*
+  * Maximal mount counts between two filesystem checks
+  */
+@@ -749,12 +758,12 @@ ext3_group_first_block_no(struct super_b
+ /* balloc.c */
+ extern int ext3_bg_has_super(struct super_block *sb, int group);
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+-extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
++extern ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode,
+                       ext3_fsblk_t goal, int *errp);
+ extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
+                       ext3_fsblk_t goal, unsigned long *count, int *errp);
+ extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
+-                      ext3_fsblk_t block, unsigned long count);
++                      ext3_fsblk_t block, unsigned long count, int metadata);
+ extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
+                                ext3_fsblk_t block, unsigned long count,
+                               unsigned long *pdquot_freed_blocks);
+@@ -881,6 +890,21 @@ extern void ext3_extents_initialize_bloc
+ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
+                         unsigned int cmd, unsigned long arg);
++/* mballoc.c */
++extern long ext3_mb_stats;
++extern long ext3_mb_max_to_scan;
++extern int ext3_mb_init(struct super_block *sb, int needs_recovery);
++extern int ext3_mb_release(struct super_block *sb);
++extern ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
++                                 ext3_fsblk_t goal, int *errp);
++extern ext3_fsblk_t ext3_mb_new_blocks(handle_t *handle, struct inode *inode,
++                                     ext3_fsblk_t goal, int *len, int flags,
++                                     int *errp);
++extern int ext3_mb_reserve_blocks(struct super_block *sb, int);
++extern void ext3_mb_release_blocks(struct super_block *sb, int);
++int __init init_ext3_proc(void);
++void exit_ext3_proc(void);
++
+ #endif        /* __KERNEL__ */
+ /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
+Index: linux-stage/include/linux/ext3_fs_sb.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/include/linux/ext3_fs_sb.h     2006-07-16 02:29:49.000000000 +0800
+@@ -21,8 +21,14 @@
+ #include <linux/wait.h>
+ #include <linux/blockgroup_lock.h>
+ #include <linux/percpu_counter.h>
++#include <linux/list.h>
+ #endif
+ #include <linux/rbtree.h>
++#include <linux/proc_fs.h>
++
++struct ext3_buddy_group_blocks;
++struct ext3_mb_history;
++#define EXT3_BB_MAX_BLOCKS
+ /*
+  * third extended-fs super-block data in memory
+@@ -78,6 +84,43 @@ struct ext3_sb_info {
+       char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
+       int s_jquota_fmt;                       /* Format of quota to use */
+ #endif
++
++      /* for buddy allocator */
++      struct ext3_group_info ***s_group_info;
++      struct inode *s_buddy_cache;
++      long s_blocks_reserved;
++      spinlock_t s_reserve_lock;
++      struct list_head s_active_transaction;
++      struct list_head s_closed_transaction;
++      struct list_head s_committed_transaction;
++      spinlock_t s_md_lock;
++      tid_t s_last_transaction;
++      int s_mb_factor;
++      unsigned short *s_mb_offsets, *s_mb_maxs;
++      unsigned long s_stripe;
++
++      /* history to debug policy */
++      struct ext3_mb_history *s_mb_history;
++      int s_mb_history_cur;
++      int s_mb_history_max;
++      struct proc_dir_entry *s_mb_proc;
++      spinlock_t s_mb_history_lock;
++
++      /* stats for buddy allocator */
++      atomic_t s_bal_reqs;    /* number of reqs with len > 1 */
++      atomic_t s_bal_success; /* we found long enough chunks */
++      atomic_t s_bal_allocated;       /* in blocks */
++      atomic_t s_bal_ex_scanned;      /* total extents scanned */
++      atomic_t s_bal_goals;   /* goal hits */
++      atomic_t s_bal_breaks;  /* too long searches */
++      atomic_t s_bal_2orders; /* 2^order hits */
++      spinlock_t s_bal_lock;
++      unsigned long s_mb_buddies_generated;
++      unsigned long long s_mb_generation_time;
+ };
++
++#define EXT3_GROUP_INFO(sb, group)                                       \
++      EXT3_SB(sb)->s_group_info[(group) >> EXT3_DESC_PER_BLOCK_BITS(sb)] \
++                               [(group) & (EXT3_DESC_PER_BLOCK(sb) - 1)]
+ #endif        /* _LINUX_EXT3_FS_SB */
+Index: linux-stage/fs/ext3/super.c
+===================================================================
+--- linux-stage.orig/fs/ext3/super.c   2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/super.c        2006-07-16 02:29:49.000000000 +0800
+@@ -391,6 +391,7 @@ static void ext3_put_super (struct super
+       struct ext3_super_block *es = sbi->s_es;
+       int i;
++      ext3_mb_release(sb);
+       ext3_ext_release(sb);
+       ext3_xattr_put_super(sb);
+       journal_destroy(sbi->s_journal);
+@@ -642,6 +643,7 @@ enum {
+       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
+       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
+       Opt_extents, Opt_noextents, Opt_extdebug,
++      Opt_mballoc, Opt_nomballoc, Opt_stripe,
+       Opt_grpquota
+ };
+@@ -696,6 +697,9 @@ static match_table_t tokens = {
+       {Opt_extents, "extents"},
+       {Opt_noextents, "noextents"},
+       {Opt_extdebug, "extdebug"},
++      {Opt_mballoc, "mballoc"},
++      {Opt_nomballoc, "nomballoc"},
++      {Opt_stripe, "stripe=%u"},
+       {Opt_barrier, "barrier=%u"},
+       {Opt_err, NULL},
+       {Opt_resize, "resize"},
+@@ -1047,6 +1049,19 @@ clear_qf_name:
+               case Opt_extdebug:
+                       set_opt (sbi->s_mount_opt, EXTDEBUG);
+                       break;
++              case Opt_mballoc:
++                      set_opt (sbi->s_mount_opt, MBALLOC);
++                      break;
++              case Opt_nomballoc:
++                      clear_opt(sbi->s_mount_opt, MBALLOC);
++                      break;
++              case Opt_stripe:
++                      if (match_int(&args[0], &option))
++                              return 0;
++                      if (option < 0)
++                              return 0;
++                      sbi->s_stripe = option;
++                      break;
+               default:
+                       printk (KERN_ERR
+                               "EXT3-fs: Unrecognized mount option \"%s\" "
+@@ -1773,6 +1778,7 @@ static int ext3_fill_super (struct super
+               "writeback");
+       ext3_ext_init(sb);
++      ext3_mb_init(sb, needs_recovery);
+       lock_kernel();
+       return 0;
+@@ -2712,7 +2718,13 @@ static struct file_system_type ext3_fs_t
+ static int __init init_ext3_fs(void)
+ {
+-      int err = init_ext3_xattr();
++      int err;
++
++      err = init_ext3_proc();
++      if (err)
++              return err;
++
++      err = init_ext3_xattr();
+       if (err)
+               return err;
+       err = init_inodecache();
+@@ -2734,6 +2746,7 @@ static void __exit exit_ext3_fs(void)
+       unregister_filesystem(&ext3_fs_type);
+       destroy_inodecache();
+       exit_ext3_xattr();
++      exit_ext3_proc();
+ }
+ int ext3_prep_san_write(struct inode *inode, long *blocks,
+Index: linux-stage/fs/ext3/extents.c
+===================================================================
+--- linux-stage.orig/fs/ext3/extents.c 2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/extents.c      2006-07-16 02:29:49.000000000 +0800
+@@ -771,7 +771,7 @@ cleanup:
+               for (i = 0; i < depth; i++) {
+                       if (!ablocks[i])
+                               continue;
+-                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
++                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
+               }
+       }
+       kfree(ablocks);
+@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+                 path->p_idx->ei_leaf);
+       bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
+       ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
+-      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
++      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
+       return err;
+ }
+@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+       int needed = ext3_remove_blocks_credits(tree, ex, from, to);
+       handle_t *handle = ext3_journal_start(tree->inode, needed);
+       struct buffer_head *bh;
+-      int i;
++      int i, metadata = 0;
+       if (IS_ERR(handle))
+               return PTR_ERR(handle);
++      if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode))
++              metadata = 1;
+       if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
+               /* tail removal */
+               unsigned long num, start;
+@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+                       bh = sb_find_get_block(tree->inode->i_sb, start + i);
+                       ext3_forget(handle, 0, tree->inode, bh, start + i);
+               }
+-              ext3_free_blocks(handle, tree->inode, start, num);
++              ext3_free_blocks(handle, tree->inode, start, num, metadata);
+       } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
+               printk("strange request: removal %lu-%lu from %u:%u\n",
+                      from, to, ex->ee_block, ex->ee_len);
+Index: linux-stage/fs/ext3/inode.c
+===================================================================
+--- linux-stage.orig/fs/ext3/inode.c   2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/inode.c        2006-07-16 02:29:49.000000000 +0800
+@@ -562,7 +562,7 @@ static int ext3_alloc_blocks(handle_t *h
+       return ret;
+ failed_out:
+       for (i = 0; i <index; i++)
+-              ext3_free_blocks(handle, inode, new_blocks[i], 1);
++              ext3_free_blocks(handle, inode, new_blocks[i], 1, 1);
+       return ret;
+ }
+@@ -661,9 +661,9 @@ failed:
+               ext3_journal_forget(handle, branch[i].bh);
+       }
+       for (i = 0; i <indirect_blks; i++)
+-              ext3_free_blocks(handle, inode, new_blocks[i], 1);
++              ext3_free_blocks(handle, inode, new_blocks[i], 1, 1);
+-      ext3_free_blocks(handle, inode, new_blocks[i], num);
++      ext3_free_blocks(handle, inode, new_blocks[i], num, 1);
+       return err;
+ }
+@@ -760,9 +760,9 @@ err_out:
+       for (i = 1; i <= num; i++) {
+               BUFFER_TRACE(where[i].bh, "call journal_forget");
+               ext3_journal_forget(handle, where[i].bh);
+-              ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
++              ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1,1);
+       }
+-      ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
++      ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 1);
+       return err;
+ }
+@@ -2007,7 +2007,7 @@ static void ext3_clear_blocks(handle_t *
+               }
+       }
+-      ext3_free_blocks(handle, inode, block_to_free, count);
++      ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+ /**
+@@ -2180,7 +2180,7 @@ static void ext3_free_branches(handle_t 
+                               ext3_journal_test_restart(handle, inode);
+                       }
+-                      ext3_free_blocks(handle, inode, nr, 1);
++                      ext3_free_blocks(handle, inode, nr, 1, 1);
+                       if (parent_bh) {
+                               /*
+Index: linux-stage/fs/ext3/balloc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/balloc.c  2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/balloc.c       2006-07-16 02:33:13.000000000 +0800
+@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+  *
+  * Return buffer_head on success or NULL in case of failure.
+  */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+       struct ext3_group_desc * desc;
+@@ -490,24 +490,6 @@ error_return:
+       return;
+ }
+-/* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
+-                      ext3_fsblk_t block, unsigned long count)
+-{
+-      struct super_block * sb;
+-      unsigned long dquot_freed_blocks;
+-
+-      sb = inode->i_sb;
+-      if (!sb) {
+-              printk ("ext3_free_blocks: nonexistent device");
+-              return;
+-      }
+-      ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+-      if (dquot_freed_blocks)
+-              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+-      return;
+-}
+-
+ /*
+  * For ext3 allocations, we must not reuse any blocks which are
+  * allocated in the bitmap buffer's "last committed data" copy.  This
+@@ -1463,7 +1445,7 @@ out:
+       return 0;
+ }
+-ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
++ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode,
+                       ext3_fsblk_t goal, int *errp)
+ {
+       unsigned long count = 1;
+Index: linux-stage/fs/ext3/xattr.c
+===================================================================
+--- linux-stage.orig/fs/ext3/xattr.c   2006-07-16 02:29:43.000000000 +0800
++++ linux-stage/fs/ext3/xattr.c        2006-07-16 02:29:49.000000000 +0800
+@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
+               ea_bdebug(bh, "refcount now=0; freeing");
+               if (ce)
+                       mb_cache_entry_free(ce);
+-              ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
++              ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
+               get_bh(bh);
+               ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
+       } else {
+@@ -805,7 +805,7 @@ inserted:
+                       new_bh = sb_getblk(sb, block);
+                       if (!new_bh) {
+ getblk_failed:
+-                              ext3_free_blocks(handle, inode, block, 1);
++                              ext3_free_blocks(handle, inode, block, 1, 1);
+                               error = -EIO;
+                               goto cleanup;
+                       }
 Index: linux-stage/fs/ext3/mballoc.c
 ===================================================================
 --- /dev/null  1970-01-01 00:00:00.000000000 +0000
 +++ linux-stage/fs/ext3/mballoc.c      2006-07-16 02:29:49.000000000 +0800
-@@ -0,0 +1,2434 @@
+@@ -0,0 +1,2727 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -91,6 +483,11 @@ Index: linux-stage/fs/ext3/mballoc.c
 +
 +long ext3_mb_stats = 1;
 +
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
 +#ifdef EXT3_BB_MAX_BLOCKS
 +#undef EXT3_BB_MAX_BLOCKS
 +#endif
@@ -147,6 +544,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      __u8 ac_repeats;
 +      __u8 ac_2order;         /* if request is to allocate 2^N blocks and
 +                               * N > 0, the field stores N, otherwise 0 */
++
++      struct page *ac_buddy_page;
++      struct page *ac_bitmap_page;
 +};
 +
 +#define AC_STATUS_CONTINUE    1
@@ -156,6 +556,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +struct ext3_mb_history {
 +      struct ext3_free_extent goal;   /* goal allocation */
 +      struct ext3_free_extent result; /* result allocation */
++      unsigned pid;
++      unsigned ino;
 +      __u16 found;    /* how many extents have been found */
 +      __u16 groups;   /* how many groups have been scanned */
 +      __u16 tail;     /* what tail broke some buddy */
@@ -178,9 +580,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +#define EXT3_MB_BUDDY(e3b)    ((e3b)->bd_buddy)
 +
 +#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
 +#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
 +                              struct ext3_allocation_context *ac);
 +#endif
 +
@@ -190,7 +592,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +
 +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *);
 +struct buffer_head * read_block_bitmap(struct super_block *, unsigned int);
-+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *);
 +int ext3_mb_reserve_blocks(struct super_block *, int);
 +void ext3_mb_release_blocks(struct super_block *, int);
 +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *);
@@ -424,8 +825,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +
 +static void
 +ext3_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap,
-+                      struct ext3_group_info *grp)
++                      int group)
 +{
++      struct ext3_group_info *grp = EXT3_GROUP_INFO(sb, group);
 +      unsigned short max = EXT3_BLOCKS_PER_GROUP(sb);
 +      unsigned short i = 0, first, len;
 +      unsigned free = 0, fragments = 0;
@@ -436,7 +838,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      while (i < max) {
 +              fragments++;
 +              first = i;
-+              i = find_next_bit(bitmap, max, i);
++              i = ext2_find_next_le_bit(bitmap, max, i);
 +              len = i - first;
 +              free += len;
 +              if (len > 1) 
@@ -452,8 +854,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +       * others waits for init completion on page lock */
 +      clear_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &grp->bb_state);
 +      if (free != grp->bb_free) {
-+              printk("EXT3-fs: %u blocks in bitmap, %u in group descriptor\n",
-+                      free, grp->bb_free);
++              printk("EXT3-fs: group %u: %u blocks in bitmap, %u in gd\n",
++                      group, free, grp->bb_free);
 +              grp->bb_free = free;
 +      }
 +
@@ -534,7 +936,10 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      for (i = 0; i < groups_per_page && bh[i]; i++)
 +              wait_on_buffer(bh[i]);
 +
-+      /* XXX: I/O error handling here */
++      err = -EIO;
++      for (i = 0; i < groups_per_page && bh[i]; i++)
++              if (!buffer_uptodate(bh[i]))
++                      goto out;
 +
 +      first_block = page->index * blocks_per_page;
 +      for (i = 0; i < blocks_per_page; i++) {
@@ -552,11 +957,10 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      mb_debug("put buddy for group %u in page %lu/%x\n",
 +                              group, page->index, i * blocksize);
 +                      memset(data, 0xff, blocksize);
-+                      EXT3_SB(sb)->s_group_info[group]->bb_fragments = 0;
-+                      memset(EXT3_SB(sb)->s_group_info[group]->bb_counters, 0,
++                      EXT3_GROUP_INFO(sb, group)->bb_fragments = 0;
++                      memset(EXT3_GROUP_INFO(sb, group)->bb_counters, 0,
 +                              sizeof(unsigned short)*(sb->s_blocksize_bits+2));
-+                      ext3_mb_generate_buddy(sb, data, bitmap,
-+                                              EXT3_SB(sb)->s_group_info[group]);
++                      ext3_mb_generate_buddy(sb, data, bitmap, group);
 +              } else {
 +                      /* this is block of bitmap */
 +                      mb_debug("put bitmap for group %u in page %lu/%x\n",
@@ -567,10 +971,12 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      SetPageUptodate(page);
 +
 +out:
-+      for (i = 0; i < groups_per_page && bh[i]; i++)
-+              brelse(bh[i]);
-+      if (bh && bh != &bhs)
-+              kfree(bh);
++      if (bh) {
++              for (i = 0; i < groups_per_page && bh[i]; i++)
++                      brelse(bh[i]);
++              if (bh != &bhs)
++                      kfree(bh);
++      }
 +      return err;
 +}
 +
@@ -587,7 +993,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
 +
 +      e3b->bd_blkbits = sb->s_blocksize_bits;
-+      e3b->bd_info = sbi->s_group_info[group];
++      e3b->bd_info = EXT3_GROUP_INFO(sb, group);
 +      e3b->bd_sb = sb;
 +      e3b->bd_group = group;
 +      e3b->bd_buddy_page = NULL;
@@ -597,12 +1003,15 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      pnum = block / blocks_per_page;
 +      poff = block % blocks_per_page;
 +
++      /* we could use find_or_create_page(), but it locks page
++       * what we'd like to avoid in fast path ... */
 +      page = find_get_page(inode->i_mapping, pnum);
 +      if (page == NULL || !PageUptodate(page)) {
 +              if (page)
 +                      page_cache_release(page);
 +              page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
 +              if (page) {
++                      BUG_ON(page->mapping != inode->i_mapping);
 +                      if (!PageUptodate(page))
 +                              ext3_mb_init_cache(page);
 +                      unlock_page(page);
@@ -624,6 +1033,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      page_cache_release(page);
 +              page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
 +              if (page) {
++                      BUG_ON(page->mapping != inode->i_mapping);
 +                      if (!PageUptodate(page))
 +                              ext3_mb_init_cache(page);
 +                      unlock_page(page);
@@ -663,14 +1073,14 @@ Index: linux-stage/fs/ext3/mballoc.c
 +ext3_lock_group(struct super_block *sb, int group)
 +{
 +      bit_spin_lock(EXT3_GROUP_INFO_LOCKED_BIT,
-+                      &EXT3_SB(sb)->s_group_info[group]->bb_state);
++                    &EXT3_GROUP_INFO(sb, group)->bb_state);
 +}
 +
 +static inline void
 +ext3_unlock_group(struct super_block *sb, int group)
 +{
 +      bit_spin_unlock(EXT3_GROUP_INFO_LOCKED_BIT,
-+                      &EXT3_SB(sb)->s_group_info[group]->bb_state);
++                      &EXT3_GROUP_INFO(sb, group)->bb_state);
 +}
 +
 +static int mb_find_order_for_block(struct ext3_buddy *e3b, int block)
@@ -800,7 +1210,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
 +                              int needed, struct ext3_free_extent *ex)
 +{
-+      int next, max, ord;
++      int next = block, max, ord;
 +      void *buddy;
 +
 +      J_ASSERT(ex != NULL);
@@ -825,6 +1235,11 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      ex->fe_start = block << order;
 +      ex->fe_group = e3b->bd_group;
 +
++      /* calc difference from given start */
++      next = next - ex->fe_start;
++      ex->fe_len -= next;
++      ex->fe_start += next;
++
 +      while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
 +
 +              if (block + 1 >= max)
@@ -928,6 +1343,13 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      ac->ac_status = AC_STATUS_FOUND;
 +      ac->ac_tail = ret & 0xffff;
 +      ac->ac_buddy = ret >> 16;
++
++      /* hold in-core structures until allocated
++       * blocks are marked non-free in on-disk bitmap */
++      ac->ac_buddy_page = e3b->bd_buddy_page;
++      page_cache_get(e3b->bd_buddy_page);
++      ac->ac_bitmap_page = e3b->bd_bitmap_page;
++      page_cache_get(e3b->bd_bitmap_page);
 +}
 +
 +/*
@@ -963,7 +1385,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      }
 +
 +      /*
-+       * Let's check whether the chuck is good enough
++       * Let's check whether the chunk is good enough
 +       */
 +      if (ex->fe_len == gex->fe_len) {
 +              *bex = *ex;
@@ -1037,6 +1459,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                              struct ext3_buddy *e3b)
 +{
 +      int group = ac->ac_g_ex.fe_group, max, err;
++      struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb);
++      struct ext3_super_block *es = sbi->s_es;
 +      struct ext3_free_extent ex;
 +
 +      err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);
@@ -1047,7 +1471,25 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
 +                              ac->ac_g_ex.fe_len, &ex);
 +      
-+      if (max > 0) {
++      if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
++              unsigned long start;
++              start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) +
++                      ex.fe_start + le32_to_cpu(es->s_first_data_block));
++              if (start % sbi->s_stripe == 0) {
++                      ac->ac_found++;
++                      ac->ac_b_ex = ex;
++                      ext3_mb_use_best_found(ac, e3b);
++              }
++      } else if (max >= ac->ac_g_ex.fe_len) {
++              J_ASSERT(ex.fe_len > 0);
++              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++              ac->ac_found++;
++              ac->ac_b_ex = ex;
++              ext3_mb_use_best_found(ac, e3b);
++      } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++              /* Sometimes, caller may want to merge even small
++               * number of blocks to an existing extent */
 +              J_ASSERT(ex.fe_len > 0);
 +              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
 +              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
@@ -1075,7 +1517,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      int i, k, max;
 +
 +      J_ASSERT(ac->ac_2order > 0);
-+      for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++      for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
 +              if (grp->bb_counters[i] == 0)
 +                      continue;
 +
@@ -1140,11 +1582,46 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      }
 +}
 +
++/*
++ * This is a special case for storages like raid5
++ * we try to find stripe-aligned chunks for stripe-size requests
++ */
++static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac,
++                               struct ext3_buddy *e3b)
++{
++      struct super_block *sb = ac->ac_sb;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      void *bitmap = EXT3_MB_BITMAP(e3b);
++      struct ext3_free_extent ex;
++      unsigned long i, max;
++
++      J_ASSERT(sbi->s_stripe != 0);
++
++      /* find first stripe-aligned block */
++      i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb)
++              + le32_to_cpu(sbi->s_es->s_first_data_block);
++      i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe;
++      i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))
++                      % EXT3_BLOCKS_PER_GROUP(sb);
++
++      while (i < sb->s_blocksize * 8) {
++              if (!mb_test_bit(i, bitmap)) {
++                      max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex);
++                      if (max >= sbi->s_stripe) {
++                              ac->ac_found++;
++                              ac->ac_b_ex = ex;
++                              ext3_mb_use_best_found(ac, e3b);
++                              break;
++                      }
++              }
++              i += sbi->s_stripe;
++      }
++}
++
 +static int ext3_mb_good_group(struct ext3_allocation_context *ac,
 +                              int group, int cr)
 +{
-+      struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb);
-+      struct ext3_group_info *grp = sbi->s_group_info[group];
++      struct ext3_group_info *grp = EXT3_GROUP_INFO(ac->ac_sb, group);
 +      unsigned free, fragments, i, bits;
 +
 +      J_ASSERT(cr >= 0 && cr < 4);
@@ -1161,15 +1638,18 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              case 0:
 +                      J_ASSERT(ac->ac_2order != 0);
 +                      bits = ac->ac_sb->s_blocksize_bits + 1;
-+                      for (i = ac->ac_2order; i < bits; i++)
++                      for (i = ac->ac_2order; i <= bits; i++)
 +                              if (grp->bb_counters[i] > 0)
 +                                      return 1;
++                      break;
 +              case 1:
 +                      if ((free / fragments) >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 2:
 +                      if (free >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 3:
 +                      return 1;
 +              default:
@@ -1234,6 +1714,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              }
 +      }
 +
++      ac.ac_buddy_page = NULL;
++      ac.ac_bitmap_page = NULL;
++
 +      /*
 +       * Check quota for allocation of this blocks.
 +       */
@@ -1270,23 +1753,27 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      ac.ac_2order = 0;
 +      ac.ac_criteria = 0;
 +
++      if (*len == 1 && sbi->s_stripe) {
++              /* looks like a metadata, let's use a dirty hack for raid5
++               * move all metadata in first groups in hope to hit cached
++               * sectors and thus avoid read-modify cycles in raid5 */
++              ac.ac_g_ex.fe_group = group = 0;
++      }
++
 +      /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
 +      i = ffs(*len);
-+      if (i >= 8) {
++      if (i >= ext3_mb_order2_reqs) {
 +              i--;
 +              if ((*len & (~(1 << i))) == 0)
 +                      ac.ac_2order = i;
 +      }
 +
-+      /* Sometimes, caller may want to merge even small
-+       * number of blocks to an existing extent */
-+      if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+              err = ext3_mb_find_by_goal(&ac, &e3b);
-+              if (err)
-+                      goto out_err;
-+              if (ac.ac_status == AC_STATUS_FOUND)
-+                      goto found;
-+      }
++      /* first, try the goal */
++      err = ext3_mb_find_by_goal(&ac, &e3b);
++      if (err)
++              goto out_err;
++      if (ac.ac_status == AC_STATUS_FOUND)
++              goto found;
 +
 +      /* Let's just scan groups to find more-less suitable blocks */
 +      cr = ac.ac_2order ? 0 : 1;
@@ -1297,7 +1784,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      if (group == EXT3_SB(sb)->s_groups_count)
 +                              group = 0;
 +
-+                      if (EXT3_MB_GRP_NEED_INIT(sbi->s_group_info[group])) {
++                      if (EXT3_MB_GRP_NEED_INIT(EXT3_GROUP_INFO(sb, group))) {
 +                              /* we need full data about the group
 +                               * to make a good selection */
 +                              err = ext3_mb_load_buddy(ac.ac_sb, group, &e3b);
@@ -1325,6 +1812,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      ac.ac_groups_scanned++;
 +                      if (cr == 0)
 +                              ext3_mb_simple_scan_group(&ac, &e3b);
++                      else if (cr == 1 && *len == sbi->s_stripe)
++                              ext3_mb_scan_aligned(&ac, &e3b);
 +                      else
 +                              ext3_mb_complex_scan_group(&ac, &e3b);
 +
@@ -1332,8 +1821,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +
 +                      ext3_mb_release_desc(&e3b);
 +
-+                      if (err)
-+                              goto out_err;
 +                      if (ac.ac_status != AC_STATUS_CONTINUE)
 +                              break;
 +              }
@@ -1347,7 +1834,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +               */
 +
 +              /*if (ac.ac_found > ext3_mb_max_to_scan)
-+                      printk(KERN_ERR "EXT3-fs: too long searching at "
++                      printk(KERN_DEBUG "EXT3-fs: too long searching at "
 +                              "%u (%d/%d)\n", cr, ac.ac_b_ex.fe_len,
 +                              ac.ac_g_ex.fe_len);*/
 +              ext3_mb_try_best_found(&ac, &e3b);
@@ -1356,7 +1843,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                       * Someone more lucky has already allocated it.
 +                       * The only thing we can do is just take first
 +                       * found block(s)
-+                      printk(KERN_ERR "EXT3-fs: and someone won our chunk\n");
++                      printk(KERN_DEBUG "EXT3-fs: someone won our chunk\n");
 +                       */
 +                      ac.ac_b_ex.fe_group = 0;
 +                      ac.ac_b_ex.fe_start = 0;
@@ -1376,17 +1863,16 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              *errp = -ENOSPC;
 +              block = 0;
 +#if 1
-+              printk(KERN_ERR "EXT3-fs: cant allocate: status %d, flags %d\n",
++              printk(KERN_ERR "EXT3-fs: can't allocate: status %d flags %d\n",
 +                      ac.ac_status, ac.ac_flags);
-+              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d, cr %d\n",
++              printk(KERN_ERR "EXT3-fs: goal %d, best found %d/%d/%d cr %d\n",
 +                      ac.ac_g_ex.fe_len, ac.ac_b_ex.fe_group,
 +                      ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len, cr);
 +              printk(KERN_ERR "EXT3-fs: %lu block reserved, %d found\n",
 +                      sbi->s_blocks_reserved, ac.ac_found);
 +              printk("EXT3-fs: groups: ");
 +              for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++)
-+                      printk("%d: %d ", i,
-+                              sbi->s_group_info[i]->bb_free);
++                      printk("%d: %d ", i, EXT3_GROUP_INFO(sb, i)->bb_free);
 +              printk("\n");
 +#endif
 +              goto out;
@@ -1484,6 +1970,11 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      *errp = err;
 +      block = 0;
 +out:
++      if (ac.ac_buddy_page)
++              page_cache_release(ac.ac_buddy_page);
++      if (ac.ac_bitmap_page)
++              page_cache_release(ac.ac_bitmap_page);
++
 +      if (!(flags & EXT3_MB_HINT_RESERVED)) {
 +              /* block wasn't reserved before and we reserved it
 +               * at the beginning of allocation. it doesn't matter
@@ -1507,7 +1998,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      atomic_inc(&sbi->s_bal_breaks);
 +      }
 +
-+      ext3_mb_store_history(sb, &ac);
++      ext3_mb_store_history(sb, inode->i_ino, &ac);
 +
 +      return block;
 +}
@@ -1572,9 +2063,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      char buf[20], buf2[20];
 +
 +      if (v == SEQ_START_TOKEN) {
-+              seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+                       "goal", "result", "found", "grps", "cr", "merge",
-+                       "tail", "broken");
++              seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++                       "pid", "inode", "goal", "result", "found", "grps", "cr",
++                       "merge", "tail", "broken");
 +              return 0;
 +      }
 +
@@ -1582,9 +2073,9 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              hs->goal.fe_start, hs->goal.fe_len);
 +      sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
 +              hs->result.fe_start, hs->result.fe_len);
-+      seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+                      buf2, hs->found, hs->groups, hs->cr, 
-+                      hs->merged ? "M" : "", hs->tail,
++      seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++                      hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++                      hs->cr, hs->merged ? "M" : "", hs->tail,
 +                      hs->buddy ? 1 << hs->buddy : 0);
 +      return 0;
 +}
@@ -1652,12 +2143,107 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      .release        = ext3_mb_seq_history_release,
 +};
 +
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      long group;
++
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++
++      group = *pos + 1;
++      return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      long group;
++
++      ++*pos;
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++      group = *pos + 1;
++      return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++      struct super_block *sb = seq->private;
++      long group = (long) v, i;
++      struct sg {
++              struct ext3_group_info info;
++              unsigned short counters[16];
++      } sg;
++
++      group--;
++      if (group == 0)
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++                       "group", "free", "frags", "first", "2^0", "2^1", "2^2",
++                       "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++                       "2^11", "2^12", "2^13");
++
++      i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++              sizeof(struct ext3_group_info);
++      ext3_lock_group(sb, group);
++      memcpy(&sg, EXT3_GROUP_INFO(sb, group), i);
++      ext3_unlock_group(sb, group);
++
++      if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++              return 0;
++
++      seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free,
++                      sg.info.bb_fragments, sg.info.bb_first_free);
++      for (i = 0; i <= 13; i++)
++              seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++                              sg.info.bb_counters[i] : 0);
++      seq_printf(seq, " ]\n");
++
++      return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++      .start  = ext3_mb_seq_groups_start,
++      .next   = ext3_mb_seq_groups_next,
++      .stop   = ext3_mb_seq_groups_stop,
++      .show   = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++      struct super_block *sb = PDE(inode)->data;
++      int rc;
++
++      rc = seq_open(file, &ext3_mb_seq_groups_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = sb;
++      }
++      return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ext3_mb_seq_groups_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = seq_release,
++};
++
 +static void ext3_mb_history_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      char name[64];
 +
 +      snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++      remove_proc_entry("mb_groups", sbi->s_mb_proc);
 +      remove_proc_entry("mb_history", sbi->s_mb_proc);
 +      remove_proc_entry(name, proc_root_ext3);
 +
@@ -1680,6 +2266,11 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                      p->proc_fops = &ext3_mb_seq_history_fops;
 +                      p->data = sb;
 +              }
++              p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++              if (p) {
++                      p->proc_fops = &ext3_mb_seq_groups_fops;
++                      p->data = sb;
++              }
 +      }
 +
 +      sbi->s_mb_history_max = 1000;
@@ -1692,7 +2283,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +}
 +
 +static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++                      struct ext3_allocation_context *ac)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      struct ext3_mb_history h;
@@ -1700,6 +2292,8 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      if (likely(sbi->s_mb_history == NULL))
 +              return;
 +
++      h.pid = current->pid;
++      h.ino = ino;
 +      h.goal = ac->ac_g_ex;
 +      h.result = ac->ac_b_ex;
 +      h.found = ac->ac_found;
@@ -1727,21 +2321,40 @@ Index: linux-stage/fs/ext3/mballoc.c
 +int ext3_mb_init_backend(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int i, len;
-+
-+      len = sizeof(struct ext3_buddy_group_blocks *) * sbi->s_groups_count;
-+      sbi->s_group_info = kmalloc(len, GFP_KERNEL);
++      int i, j, len, metalen;
++      int num_meta_group_infos =
++              (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) >>
++                      EXT3_DESC_PER_BLOCK_BITS(sb);
++      struct ext3_group_info **meta_group_info;
++
++      /* An 8TB filesystem with 64-bit pointers requires a 4096 byte
++       * kmalloc. A 128kb malloc should suffice for a 256TB filesystem.
++       * So a two level scheme suffices for now. */
++      sbi->s_group_info = kmalloc(sizeof(*sbi->s_group_info) *
++                                  num_meta_group_infos, GFP_KERNEL);
 +      if (sbi->s_group_info == NULL) {
-+              printk(KERN_ERR "EXT3-fs: can't allocate mem for buddy\n");
++              printk(KERN_ERR "EXT3-fs: can't allocate buddy meta group\n");
 +              return -ENOMEM;
 +      }
-+      memset(sbi->s_group_info, 0, len);
-+
 +      sbi->s_buddy_cache = new_inode(sb);
 +      if (sbi->s_buddy_cache == NULL) {
 +              printk(KERN_ERR "EXT3-fs: can't get new inode\n");
-+              kfree(sbi->s_group_info);
-+              return -ENOMEM;
++              goto err_freesgi;
++      }
++
++      metalen = sizeof(*meta_group_info) << EXT3_DESC_PER_BLOCK_BITS(sb);
++      for (i = 0; i < num_meta_group_infos; i++) {
++              if ((i + 1) == num_meta_group_infos)
++                      metalen = sizeof(*meta_group_info) *
++                              (sbi->s_groups_count -
++                                      (i << EXT3_DESC_PER_BLOCK_BITS(sb)));
++              meta_group_info = kmalloc(metalen, GFP_KERNEL);
++              if (meta_group_info == NULL) {
++                      printk(KERN_ERR "EXT3-fs: can't allocate mem for a "
++                             "buddy group\n");
++                      goto err_freemeta;
++              }
++              sbi->s_group_info[i] = meta_group_info;
 +      }
 +
 +      /* 
@@ -1753,30 +2366,42 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      for (i = 0; i < sbi->s_groups_count; i++) {
 +              struct ext3_group_desc * desc;
 +
-+              sbi->s_group_info[i] = kmalloc(len, GFP_KERNEL);
-+              if (sbi->s_group_info[i] == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant allocate mem for buddy\n");
-+                      goto err_out;
++              meta_group_info =
++                      sbi->s_group_info[i >> EXT3_DESC_PER_BLOCK_BITS(sb)];
++              j = i & (EXT3_DESC_PER_BLOCK(sb) - 1);
++
++              meta_group_info[j] = kmalloc(len, GFP_KERNEL);
++              if (meta_group_info[j] == NULL) {
++                      printk(KERN_ERR "EXT3-fs: can't allocate buddy mem\n");
++                      i--;
++                      goto err_freebuddy;
 +              }
 +              desc = ext3_get_group_desc(sb, i, NULL);
 +              if (desc == NULL) {
-+                      printk(KERN_ERR "EXT3-fs: cant read descriptor %u\n", i);
-+                      goto err_out;
++                      printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
++                      goto err_freebuddy;
 +              }
-+              memset(sbi->s_group_info[i], 0, len);
++              memset(meta_group_info[j], 0, len);
 +              set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT,
-+                      &sbi->s_group_info[i]->bb_state);
-+              sbi->s_group_info[i]->bb_free =
++                      &meta_group_info[j]->bb_state);
++              meta_group_info[j]->bb_free =
 +                      le16_to_cpu(desc->bg_free_blocks_count);
 +      }
 +
 +      return 0;
 +
-+err_out:
++err_freebuddy:
++      while (i >= 0) {
++              kfree(EXT3_GROUP_INFO(sb, i));
++              i--;
++      }
++      i = num_meta_group_infos;
++err_freemeta:
 +      while (--i >= 0)
 +              kfree(sbi->s_group_info[i]);
 +      iput(sbi->s_buddy_cache);
-+
++err_freesgi:
++      kfree(sbi->s_group_info);
 +      return -ENOMEM;
 +}
 +
@@ -1818,7 +2443,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +              max = max >> 1;
 +              i++;
 +      } while (i <= sb->s_blocksize_bits + 1);
-+      
 +
 +      /* init file for buddy data */
 +      if ((i = ext3_mb_init_backend(sb))) {
@@ -1855,7 +2479,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +int ext3_mb_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
-+      int i;
++      int i, num_meta_group_infos;
 +      
 +      if (!test_opt(sb, MBALLOC))
 +              return 0;
@@ -1870,11 +2494,13 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      ext3_mb_free_committed_blocks(sb);
 +
 +      if (sbi->s_group_info) {
-+              for (i = 0; i < sbi->s_groups_count; i++) {
-+                      if (sbi->s_group_info[i] == NULL)
-+                              continue;
++              for (i = 0; i < sbi->s_groups_count; i++)
++                      kfree(EXT3_GROUP_INFO(sb, i));
++              num_meta_group_infos = (sbi->s_groups_count +
++                      EXT3_DESC_PER_BLOCK(sb) - 1) >>
++                      EXT3_DESC_PER_BLOCK_BITS(sb);
++              for (i = 0; i < num_meta_group_infos; i++)
 +                      kfree(sbi->s_group_info[i]);
-+              }
 +              kfree(sbi->s_group_info);
 +      }
 +      if (sbi->s_mb_offsets)
@@ -1936,6 +2562,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +                              md->num, md->group, md);
 +
 +              err = ext3_mb_load_buddy(sb, md->group, &e3b);
++              /* we expect to find existing buddy because it's pinned */
 +              BUG_ON(err != 0);
 +
 +              /* there are blocks to put in buddy to make them really free */
@@ -2234,7 +2861,6 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      return ret;
 +}
 +
-+
 +void ext3_free_blocks(handle_t *handle, struct inode * inode,
 +                      unsigned long block, unsigned long count, int metadata)
 +{
@@ -2242,7 +2868,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      int freed;
 +
 +      sb = inode->i_sb;
-+      if (!test_opt(sb, MBALLOC))
++      if (!test_opt(sb, MBALLOC) || !EXT3_SB(sb)->s_group_info)
 +              ext3_free_blocks_sb(handle, sb, block, count, &freed);
 +      else
 +              ext3_mb_free_blocks(handle, inode, block, count, metadata, &freed);
@@ -2255,6 +2881,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +#define EXT3_MB_STATS_NAME        "mb_stats"
 +#define EXT3_MB_MAX_TO_SCAN_NAME  "mb_max_to_scan"
 +#define EXT3_MB_MIN_TO_SCAN_NAME  "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ       "mb_order2_req"
 +
 +static int ext3_mb_stats_read(char *page, char **start, off_t off,
 +              int count, int *eof, void *data)
@@ -2276,7 +2903,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      char str[32];
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_STATS_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2310,7 +2937,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MAX_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2349,7 +2976,7 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string to long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2367,260 +2994,134 @@ Index: linux-stage/fs/ext3/mballoc.c
 +      return count;
 +}
 +
-+int __init init_ext3_proc(void)
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++                                 int count, int *eof, void *data)
 +{
-+      struct proc_dir_entry *proc_ext3_mb_stats;
-+      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
-+      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
-+
-+      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
-+      if (proc_root_ext3 == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
-+              return -EIO;
-+      }
-+
-+      /* Initialize EXT3_MB_STATS_NAME */
-+      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
-+                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
-+      if (proc_ext3_mb_stats == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
-+                              EXT3_MB_STATS_NAME);
-+              remove_proc_entry(EXT3_ROOT, proc_root_fs);
-+              return -EIO;
-+      }
++      int len;
 +
-+      proc_ext3_mb_stats->data = NULL;
-+      proc_ext3_mb_stats->read_proc  = ext3_mb_stats_read;
-+      proc_ext3_mb_stats->write_proc = ext3_mb_stats_write;
++      *eof = 1;
++      if (off != 0)
++              return 0;
 +
-+      /* Initialize EXT3_MAX_TO_SCAN_NAME */
-+      proc_ext3_mb_max_to_scan = create_proc_entry(
-+                      EXT3_MB_MAX_TO_SCAN_NAME,
-+                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
-+      if (proc_ext3_mb_max_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
-+                              EXT3_MB_MAX_TO_SCAN_NAME);
-+              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
-+              remove_proc_entry(EXT3_ROOT, proc_root_fs);
-+              return -EIO;
-+      }
++      len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++      *start = page;
++      return len;
++}
 +
-+      proc_ext3_mb_max_to_scan->data = NULL;
-+      proc_ext3_mb_max_to_scan->read_proc  = ext3_mb_max_to_scan_read;
-+      proc_ext3_mb_max_to_scan->write_proc = ext3_mb_max_to_scan_write;
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++                                  unsigned long count, void *data)
++{
++      char str[32];
++      long value;
 +
-+      /* Initialize EXT3_MIN_TO_SCAN_NAME */
-+      proc_ext3_mb_min_to_scan = create_proc_entry(
-+                      EXT3_MB_MIN_TO_SCAN_NAME,
-+                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
-+      if (proc_ext3_mb_min_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
-+                              EXT3_MB_MIN_TO_SCAN_NAME);
-+              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
-+              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
-+              remove_proc_entry(EXT3_ROOT, proc_root_fs);
-+              return -EIO;
++      if (count >= sizeof(str)) {
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++              return -EOVERFLOW;
 +      }
 +
-+      proc_ext3_mb_min_to_scan->data = NULL;
-+      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
-+      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
-+
-+      return 0;
-+}
-+
-+void exit_ext3_proc(void)
-+{
-+      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
-+      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
-+      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
-+      remove_proc_entry(EXT3_ROOT, proc_root_fs);
-+}
++      if (copy_from_user(str, buffer, count))
++              return -EFAULT;
 +
-Index: linux-stage/fs/ext3/extents.c
-===================================================================
---- linux-stage.orig/fs/ext3/extents.c 2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/extents.c      2006-07-16 02:29:49.000000000 +0800
-@@ -771,7 +771,7 @@ cleanup:
-               for (i = 0; i < depth; i++) {
-                       if (!ablocks[i])
-                               continue;
--                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1);
-+                      ext3_free_blocks(handle, tree->inode, ablocks[i], 1, 1);
-               }
-       }
-       kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
-                 path->p_idx->ei_leaf);
-       bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
-       ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
--      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1);
-+      ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1, 1);
-       return err;
- }
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
-       int needed = ext3_remove_blocks_credits(tree, ex, from, to);
-       handle_t *handle = ext3_journal_start(tree->inode, needed);
-       struct buffer_head *bh;
--      int i;
-+      int i, metadata = 0;
-       if (IS_ERR(handle))
-               return PTR_ERR(handle);
-+      if (S_ISDIR(tree->inode->i_mode) || S_ISLNK(tree->inode->i_mode))
-+              metadata = 1;
-       if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
-               /* tail removal */
-               unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
-                       bh = sb_find_get_block(tree->inode->i_sb, start + i);
-                       ext3_forget(handle, 0, tree->inode, bh, start + i);
-               }
--              ext3_free_blocks(handle, tree->inode, start, num);
-+              ext3_free_blocks(handle, tree->inode, start, num, metadata);
-       } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
-               printk("strange request: removal %lu-%lu from %u:%u\n",
-                      from, to, ex->ee_block, ex->ee_len);
-Index: linux-stage/fs/ext3/xattr.c
-===================================================================
---- linux-stage.orig/fs/ext3/xattr.c   2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/xattr.c        2006-07-16 02:29:49.000000000 +0800
-@@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
-               ea_bdebug(bh, "refcount now=0; freeing");
-               if (ce)
-                       mb_cache_entry_free(ce);
--              ext3_free_blocks(handle, inode, bh->b_blocknr, 1);
-+              ext3_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
-               get_bh(bh);
-               ext3_forget(handle, 1, inode, bh, bh->b_blocknr);
-       } else {
-@@ -805,7 +805,7 @@ inserted:
-                       new_bh = sb_getblk(sb, block);
-                       if (!new_bh) {
- getblk_failed:
--                              ext3_free_blocks(handle, inode, block, 1);
-+                              ext3_free_blocks(handle, inode, block, 1, 1);
-                               error = -EIO;
-                               goto cleanup;
-                       }
-Index: linux-stage/fs/ext3/balloc.c
-===================================================================
---- linux-stage.orig/fs/ext3/balloc.c  2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/balloc.c       2006-07-16 02:33:13.000000000 +0800
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
-  *
-  * Return buffer_head on success or NULL in case of failure.
-  */
--static struct buffer_head *
-+struct buffer_head *
- read_block_bitmap(struct super_block *sb, unsigned int block_group)
- {
-       struct ext3_group_desc * desc;
-@@ -490,24 +490,6 @@ error_return:
-       return;
- }
--/* Free given blocks, update quota and i_blocks field */
--void ext3_free_blocks(handle_t *handle, struct inode *inode,
--                      ext3_fsblk_t block, unsigned long count)
--{
--      struct super_block * sb;
--      unsigned long dquot_freed_blocks;
--
--      sb = inode->i_sb;
--      if (!sb) {
--              printk ("ext3_free_blocks: nonexistent device");
--              return;
--      }
--      ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
--      if (dquot_freed_blocks)
--              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
--      return;
--}
--
- /*
-  * For ext3 allocations, we must not reuse any blocks which are
-  * allocated in the bitmap buffer's "last committed data" copy.  This
-@@ -1463,7 +1445,7 @@ out:
-       return 0;
- }
--ext3_fsblk_t ext3_new_block(handle_t *handle, struct inode *inode,
-+ext3_fsblk_t ext3_new_block_old(handle_t *handle, struct inode *inode,
-                       ext3_fsblk_t goal, int *errp)
- {
-       unsigned long count = 1;
-Index: linux-stage/fs/ext3/super.c
-===================================================================
---- linux-stage.orig/fs/ext3/super.c   2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/super.c        2006-07-16 02:29:49.000000000 +0800
-@@ -391,6 +391,7 @@ static void ext3_put_super (struct super
-       struct ext3_super_block *es = sbi->s_es;
-       int i;
-+      ext3_mb_release(sb);
-       ext3_ext_release(sb);
-       ext3_xattr_put_super(sb);
-       journal_destroy(sbi->s_journal);
-@@ -641,7 +642,7 @@ enum {
-       Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota,
-       Opt_ignore, Opt_barrier, Opt_err, Opt_resize, Opt_usrquota,
-       Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
--      Opt_extents, Opt_extdebug,
-+      Opt_extents, Opt_extdebug, Opt_mballoc,
-       Opt_grpquota
- };
-@@ -696,6 +697,7 @@ static match_table_t tokens = {
-       {Opt_iopen_nopriv, "iopen_nopriv"},
-       {Opt_extents, "extents"},
-       {Opt_extdebug, "extdebug"},
-+      {Opt_mballoc, "mballoc"},
-       {Opt_barrier, "barrier=%u"},
-       {Opt_err, NULL},
-       {Opt_resize, "resize"},
-@@ -1047,6 +1049,9 @@ clear_qf_name:
-               case Opt_extdebug:
-                       set_opt (sbi->s_mount_opt, EXTDEBUG);
-                       break;
-+              case Opt_mballoc:
-+                      set_opt (sbi->s_mount_opt, MBALLOC);
-+                      break;
-               default:
-                       printk (KERN_ERR
-                               "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1773,6 +1778,7 @@ static int ext3_fill_super (struct super
-               "writeback");
-       ext3_ext_init(sb);
-+      ext3_mb_init(sb, needs_recovery);
-       lock_kernel();
-       return 0;
-@@ -2712,7 +2718,13 @@ static struct file_system_type ext3_fs_t
- static int __init init_ext3_fs(void)
- {
--      int err = init_ext3_xattr();
-+      int err;
++      /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++      value = simple_strtol(str, NULL, 0);
++      if (value <= 0)
++              return -ERANGE;
 +
-+      err = init_ext3_proc();
-+      if (err)
-+              return err;
++      ext3_mb_order2_reqs = value;    
 +
-+      err = init_ext3_xattr();
-       if (err)
-               return err;
-       err = init_inodecache();
-@@ -2734,6 +2746,7 @@ static void __exit exit_ext3_fs(void)
-       unregister_filesystem(&ext3_fs_type);
-       destroy_inodecache();
-       exit_ext3_xattr();
-+      exit_ext3_proc();
- }
- int ext3_prep_san_write(struct inode *inode, long *blocks,
++      return count;
++}
++
++int __init init_ext3_proc(void)
++{
++      struct proc_dir_entry *proc_ext3_mb_stats;
++      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_order2_req;
++
++      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
++      if (proc_root_ext3 == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
++              return -EIO;
++      }
++
++      /* Initialize EXT3_MB_STATS_NAME */
++      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_stats == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_STATS_NAME);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_stats->data = NULL;
++      proc_ext3_mb_stats->read_proc  = ext3_mb_stats_read;
++      proc_ext3_mb_stats->write_proc = ext3_mb_stats_write;
++
++      /* Initialize EXT3_MAX_TO_SCAN_NAME */
++      proc_ext3_mb_max_to_scan = create_proc_entry(
++                      EXT3_MB_MAX_TO_SCAN_NAME,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_max_to_scan == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_MAX_TO_SCAN_NAME);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_max_to_scan->data = NULL;
++      proc_ext3_mb_max_to_scan->read_proc  = ext3_mb_max_to_scan_read;
++      proc_ext3_mb_max_to_scan->write_proc = ext3_mb_max_to_scan_write;
++
++      /* Initialize EXT3_MIN_TO_SCAN_NAME */
++      proc_ext3_mb_min_to_scan = create_proc_entry(
++                      EXT3_MB_MIN_TO_SCAN_NAME,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_min_to_scan == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_MIN_TO_SCAN_NAME);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_min_to_scan->data = NULL;
++      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
++      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
++
++      /* Initialize EXT3_ORDER2_REQ */
++      proc_ext3_mb_order2_req = create_proc_entry(
++                      EXT3_MB_ORDER2_REQ,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_order2_req == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_ORDER2_REQ);
++              remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_order2_req->data = NULL;
++      proc_ext3_mb_order2_req->read_proc  = ext3_mb_order2_req_read;
++      proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
++      return 0;
++}
++
++void exit_ext3_proc(void)
++{
++      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
++      remove_proc_entry(EXT3_ROOT, proc_root_fs);
++}
 Index: linux-stage/fs/ext3/Makefile
 ===================================================================
 --- linux-stage.orig/fs/ext3/Makefile  2006-07-16 02:29:43.000000000 +0800
@@ -2634,177 +3135,3 @@ Index: linux-stage/fs/ext3/Makefile
  
  ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
  ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
-Index: linux-stage/include/linux/ext3_fs.h
-===================================================================
---- linux-stage.orig/include/linux/ext3_fs.h   2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/include/linux/ext3_fs.h        2006-07-16 02:29:49.000000000 +0800
-@@ -53,6 +53,14 @@
- #define ext3_debug(f, a...)   do {} while (0)
- #endif
-+#define EXT3_MULTIBLOCK_ALLOCATOR     1
-+
-+#define EXT3_MB_HINT_MERGE            1
-+#define EXT3_MB_HINT_RESERVED         2
-+#define EXT3_MB_HINT_METADATA         4
-+#define EXT3_MB_HINT_FIRST            8
-+#define EXT3_MB_HINT_BEST             16
-+
- /*
-  * Special inodes numbers
-  */
-@@ -379,6 +387,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x800000/* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS            0x1000000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG           0x2000000/* Extents debug */
-+#define EXT3_MOUNT_MBALLOC            0x4000000/* Buddy allocation support */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef clear_opt
-@@ -749,12 +758,12 @@ ext3_group_first_block_no(struct super_b
- /* balloc.c */
- extern int ext3_bg_has_super(struct super_block *sb, int group);
- extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
--extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
--                      ext3_fsblk_t goal, int *errp);
-+//extern ext3_fsblk_t ext3_new_block (handle_t *handle, struct inode *inode,
-+//                    ext3_fsblk_t goal, int *errp);
- extern ext3_fsblk_t ext3_new_blocks (handle_t *handle, struct inode *inode,
-                       ext3_fsblk_t goal, unsigned long *count, int *errp);
- extern void ext3_free_blocks (handle_t *handle, struct inode *inode,
--                      ext3_fsblk_t block, unsigned long count);
-+                      ext3_fsblk_t block, unsigned long count, int metadata);
- extern void ext3_free_blocks_sb (handle_t *handle, struct super_block *sb,
-                                ext3_fsblk_t block, unsigned long count,
-                               unsigned long *pdquot_freed_blocks);
-@@ -881,6 +890,17 @@ extern void ext3_extents_initialize_bloc
- extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
-                         unsigned int cmd, unsigned long arg);
-+/* mballoc.c */
-+extern long ext3_mb_stats;
-+extern long ext3_mb_max_to_scan;
-+extern int ext3_mb_init(struct super_block *, int);
-+extern int ext3_mb_release(struct super_block *);
-+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
-+extern int ext3_mb_reserve_blocks(struct super_block *, int);
-+extern void ext3_mb_release_blocks(struct super_block *, int);
-+int __init init_ext3_proc(void);
-+void exit_ext3_proc(void);
-+
- #endif        /* __KERNEL__ */
- /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
-Index: linux-stage/include/linux/ext3_fs_sb.h
-===================================================================
---- linux-stage.orig/include/linux/ext3_fs_sb.h        2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/include/linux/ext3_fs_sb.h     2006-07-16 02:29:49.000000000 +0800
-@@ -21,8 +21,14 @@
- #include <linux/wait.h>
- #include <linux/blockgroup_lock.h>
- #include <linux/percpu_counter.h>
-+#include <linux/list.h>
- #endif
- #include <linux/rbtree.h>
-+#include <linux/proc_fs.h>
-+
-+struct ext3_buddy_group_blocks;
-+struct ext3_mb_history;
-+#define EXT3_BB_MAX_BLOCKS
- /*
-  * third extended-fs super-block data in memory
-@@ -78,6 +84,38 @@ struct ext3_sb_info {
-       char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
-       int s_jquota_fmt;                       /* Format of quota to use */
- #endif
-+
-+      /* for buddy allocator */
-+      struct ext3_group_info **s_group_info;
-+      struct inode *s_buddy_cache;
-+      long s_blocks_reserved;
-+      spinlock_t s_reserve_lock;
-+      struct list_head s_active_transaction;
-+      struct list_head s_closed_transaction;
-+      struct list_head s_committed_transaction;
-+      spinlock_t s_md_lock;
-+      tid_t s_last_transaction;
-+      int s_mb_factor;
-+      unsigned short *s_mb_offsets, *s_mb_maxs;
-+
-+      /* history to debug policy */
-+      struct ext3_mb_history *s_mb_history;
-+      int s_mb_history_cur;
-+      int s_mb_history_max;
-+      struct proc_dir_entry *s_mb_proc;
-+      spinlock_t s_mb_history_lock;
-+
-+      /* stats for buddy allocator */
-+      atomic_t s_bal_reqs;    /* number of reqs with len > 1 */
-+      atomic_t s_bal_success; /* we found long enough chunks */
-+      atomic_t s_bal_allocated;       /* in blocks */
-+      atomic_t s_bal_ex_scanned;      /* total extents scanned */
-+      atomic_t s_bal_goals;   /* goal hits */
-+      atomic_t s_bal_breaks;  /* too long searches */
-+      atomic_t s_bal_2orders; /* 2^order hits */
-+      spinlock_t s_bal_lock;
-+      unsigned long s_mb_buddies_generated;
-+      unsigned long long s_mb_generation_time;
- };
- #endif        /* _LINUX_EXT3_FS_SB */
-Index: linux-stage/fs/ext3/inode.c
-===================================================================
---- linux-stage.orig/fs/ext3/inode.c   2006-07-16 02:29:43.000000000 +0800
-+++ linux-stage/fs/ext3/inode.c        2006-07-16 02:29:49.000000000 +0800
-@@ -562,7 +562,7 @@ static int ext3_alloc_blocks(handle_t *h
-       return ret;
- failed_out:
-       for (i = 0; i <index; i++)
--              ext3_free_blocks(handle, inode, new_blocks[i], 1);
-+              ext3_free_blocks(handle, inode, new_blocks[i], 1, 1);
-       return ret;
- }
-@@ -661,9 +661,9 @@ failed:
-               ext3_journal_forget(handle, branch[i].bh);
-       }
-       for (i = 0; i <indirect_blks; i++)
--              ext3_free_blocks(handle, inode, new_blocks[i], 1);
-+              ext3_free_blocks(handle, inode, new_blocks[i], 1, 1);
--      ext3_free_blocks(handle, inode, new_blocks[i], num);
-+      ext3_free_blocks(handle, inode, new_blocks[i], num, 1);
-       return err;
- }
-@@ -760,9 +760,9 @@ err_out:
-       for (i = 1; i <= num; i++) {
-               BUFFER_TRACE(where[i].bh, "call journal_forget");
-               ext3_journal_forget(handle, where[i].bh);
--              ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1);
-+              ext3_free_blocks(handle,inode,le32_to_cpu(where[i-1].key),1,1);
-       }
--      ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks);
-+      ext3_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 1);
-       return err;
- }
-@@ -2007,7 +2007,7 @@ static void ext3_clear_blocks(handle_t *
-               }
-       }
--      ext3_free_blocks(handle, inode, block_to_free, count);
-+      ext3_free_blocks(handle, inode, block_to_free, count, 1);
- }
- /**
-@@ -2180,7 +2180,7 @@ static void ext3_free_branches(handle_t 
-                               ext3_journal_test_restart(handle, inode);
-                       }
--                      ext3_free_blocks(handle, inode, nr, 1);
-+                      ext3_free_blocks(handle, inode, nr, 1, 1);
-                       if (parent_bh) {
-                               /*