Whamcloud - gitweb
Merge b1_5 from b1_4 (20060524_0846)
authorscjody <scjody>
Tue, 30 May 2006 02:02:30 +0000 (02:02 +0000)
committerscjody <scjody>
Tue, 30 May 2006 02:02:30 +0000 (02:02 +0000)
53 files changed:
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/ChangeLog
lustre/autoMakefile.am
lustre/autoconf/lustre-core.m4
lustre/contrib/.cvsignore [new file with mode: 0644]
lustre/contrib/Makefile.am [new file with mode: 0644]
lustre/contrib/README [new file with mode: 0644]
lustre/doc/llverdev.txt [new file with mode: 0644]
lustre/doc/llverfs.txt [new file with mode: 0644]
lustre/include/linux/lustre_compat25.h
lustre/include/linux/lustre_lite.h
lustre/include/linux/lustre_types.h
lustre/include/lustre/lustre_idl.h
lustre/include/lustre_net.h
lustre/include/obd.h
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64-smp.config
lustre/kernel_patches/kernel_configs/kernel-2.6.9-2.6-rhel4-ia64.config
lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch [new file with mode: 0644]
lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.12.patch
lustre/kernel_patches/patches/ext3-mballoc2-2.6.9-rhel4.patch
lustre/llite/dcache.c
lustre/llite/file.c
lustre/llite/llite_lib.c
lustre/llite/rw.c
lustre/lvfs/fsfilt_ext3.c
lustre/mdc/mdc_internal.h
lustre/mdc/mdc_lib.c
lustre/mdc/mdc_locks.c
lustre/mdc/mdc_request.c
lustre/mds/handler.c
lustre/mds/mds_lib.c
lustre/mds/mds_log.c
lustre/mds/mds_open.c
lustre/mds/mds_reint.c
lustre/obdclass/llog_ioctl.c
lustre/obdfilter/filter.c
lustre/obdfilter/filter_io_24.c
lustre/obdfilter/filter_io_26.c
lustre/ost/ost_handler.c
lustre/ptlrpc/pack_generic.c
lustre/tests/acceptance-small.sh
lustre/tests/conf-sanity.sh
lustre/tests/test-framework.sh
lustre/utils/.cvsignore
lustre/utils/Makefile.am
lustre/utils/lconf
lustre/utils/llverdev.c
lustre/utils/llverfs.c
lustre/utils/wirecheck.c
lustre/utils/wiretest.c

index 2a64875..33dc268 100644 (file)
@@ -1,7 +1,7 @@
-Index: linux-2.6.5-7.201/include/linux/ext3_fs.h
+Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.5-7.201.orig/include/linux/ext3_fs.h     2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/include/linux/ext3_fs.h  2005-12-17 03:13:38.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h        2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h     2006-04-26 23:40:28.000000000 +0400
 @@ -57,6 +57,14 @@ struct statfs;
  #define ext3_debug(f, a...)   do {} while (0)
  #endif
@@ -54,10 +54,10 @@ Index: linux-2.6.5-7.201/include/linux/ext3_fs.h
  #endif        /* __KERNEL__ */
  
  #define EXT3_IOC_CREATE_INUM                  _IOW('f', 5, long)
-Index: linux-2.6.5-7.201/include/linux/ext3_fs_sb.h
+Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-2.6.5-7.201.orig/include/linux/ext3_fs_sb.h  2005-12-17 02:53:25.000000000 +0300
-+++ linux-2.6.5-7.201/include/linux/ext3_fs_sb.h       2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h     2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h  2006-04-26 23:40:28.000000000 +0400
 @@ -23,9 +23,15 @@
  #define EXT_INCLUDE
  #include <linux/blockgroup_lock.h>
@@ -113,10 +113,10 @@ Index: linux-2.6.5-7.201/include/linux/ext3_fs_sb.h
  };
  
  #endif        /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.5-7.201/fs/ext3/super.c
+Index: linux-2.6.5-7.252-full/fs/ext3/super.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/super.c     2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/super.c  2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/super.c        2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/super.c     2006-04-26 23:40:28.000000000 +0400
 @@ -389,6 +389,7 @@ void ext3_put_super (struct super_block 
        struct ext3_super_block *es = sbi->s_es;
        int i;
@@ -125,7 +125,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -543,7 +544,7 @@ enum {
+@@ -545,7 +546,7 @@ enum {
        Opt_ignore, Opt_barrier,
        Opt_err,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
@@ -134,7 +134,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
  };
  
  static match_table_t tokens = {
-@@ -590,6 +591,7 @@ static match_table_t tokens = {
+@@ -591,6 +592,7 @@ static match_table_t tokens = {
        {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
@@ -142,7 +142,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
-@@ -811,6 +813,9 @@ static int parse_options (char * options
+@@ -813,6 +815,9 @@ static int parse_options (char * options
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
@@ -152,7 +152,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1464,6 +1469,7 @@ static int ext3_fill_super (struct super
+@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -160,7 +160,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
  
        return 0;
  
-@@ -2112,7 +2118,13 @@ static struct file_system_type ext3_fs_t
+@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t
  
  static int __init init_ext3_fs(void)
  {
@@ -175,7 +175,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2141,6 +2153,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void)
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -183,11 +183,11 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
  }
  
  int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.5-7.201/fs/ext3/extents.c
+Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/extents.c   2005-12-17 02:53:29.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/extents.c        2005-12-17 03:10:23.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c      2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/extents.c   2006-04-26 23:40:28.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -196,7 +196,7 @@ Index: linux-2.6.5-7.201/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -205,7 +205,7 @@ Index: linux-2.6.5-7.201/fs/ext3/extents.c
        return err;
  }
  
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -219,7 +219,7 @@ Index: linux-2.6.5-7.201/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -228,11 +228,11 @@ Index: linux-2.6.5-7.201/fs/ext3/extents.c
        } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
                printk("strange request: removal %lu-%lu from %u:%u\n",
                       from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.5-7.201/fs/ext3/inode.c
+Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/inode.c     2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/inode.c  2005-12-17 03:10:23.000000000 +0300
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c        2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/inode.c     2006-04-26 23:40:28.000000000 +0400
+@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h
                ext3_journal_forget(handle, branch[i].bh);
        }
        for (i = 0; i < keys; i++)
@@ -241,7 +241,7 @@ Index: linux-2.6.5-7.201/fs/ext3/inode.c
        return err;
  }
  
-@@ -673,7 +673,7 @@ err_out:
+@@ -675,7 +675,7 @@ err_out:
        if (err == -EAGAIN)
                for (i = 0; i < num; i++)
                        ext3_free_blocks(handle, inode, 
@@ -250,7 +250,7 @@ Index: linux-2.6.5-7.201/fs/ext3/inode.c
        return err;
  }
  
-@@ -1835,7 +1835,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru
                }
        }
  
@@ -259,7 +259,7 @@ Index: linux-2.6.5-7.201/fs/ext3/inode.c
  }
  
  /**
-@@ -2006,7 +2006,7 @@ static void ext3_free_branches(handle_t 
+@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t 
                                ext3_journal_test_restart(handle, inode);
                        }
  
@@ -268,10 +268,10 @@ Index: linux-2.6.5-7.201/fs/ext3/inode.c
  
                        if (parent_bh) {
                                /*
-Index: linux-2.6.5-7.201/fs/ext3/balloc.c
+Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/balloc.c    2005-10-11 00:12:45.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/balloc.c 2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c       2006-02-14 15:26:58.000000000 +0300
++++ linux-2.6.5-7.252-full/fs/ext3/balloc.c    2006-04-26 23:40:28.000000000 +0400
 @@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_
   *
   * Return buffer_head on success or NULL in case of failure.
@@ -299,10 +299,10 @@ Index: linux-2.6.5-7.201/fs/ext3/balloc.c
                        unsigned long goal, int *errp)
  {
        struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c     2005-12-17 02:53:26.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/xattr.c  2005-12-17 03:10:41.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c        2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/xattr.c     2006-04-26 23:40:28.000000000 +0400
 @@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle,
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
@@ -330,11 +330,11 @@ Index: linux-2.6.5-7.201/fs/ext3/xattr.c
                get_bh(bh);
                ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
        } else {
-Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
+Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/mballoc.c   2005-12-09 13:08:53.191437750 +0300
-+++ linux-2.6.5-7.201/fs/ext3/mballoc.c        2005-12-17 03:15:04.000000000 +0300
-@@ -0,0 +1,2430 @@
+--- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c      2006-04-22 17:31:47.543334750 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c   2006-04-26 23:42:45.000000000 +0400
+@@ -0,0 +1,2616 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -423,6 +423,12 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +long ext3_mb_stats = 1;
 +
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
 +#ifdef EXT3_BB_MAX_BLOCKS
 +#undef EXT3_BB_MAX_BLOCKS
 +#endif
@@ -488,6 +494,8 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +struct ext3_mb_history {
 +      struct ext3_free_extent goal;   /* goal allocation */
 +      struct ext3_free_extent result; /* result allocation */
++      unsigned pid;
++      unsigned ino;
 +      __u16 found;    /* how many extents have been found */
 +      __u16 groups;   /* how many groups have been scanned */
 +      __u16 tail;     /* what tail broke some buddy */
@@ -510,9 +518,9 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +#define EXT3_MB_BUDDY(e3b)    ((e3b)->bd_buddy)
 +
 +#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
 +#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
 +                              struct ext3_allocation_context *ac);
 +#endif
 +
@@ -1134,7 +1142,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
 +                              int needed, struct ext3_free_extent *ex)
 +{
-+      int next, max, ord;
++      int next = block, max, ord;
 +      void *buddy;
 +
 +      J_ASSERT(ex != NULL);
@@ -1159,6 +1167,11 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      ex->fe_start = block << order;
 +      ex->fe_group = e3b->bd_group;
 +
++      /* calc difference from given start */
++      next = next - ex->fe_start;
++      ex->fe_len -= next;
++      ex->fe_start += next;
++
 +      while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
 +
 +              if (block + 1 >= max)
@@ -1381,7 +1394,16 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
 +                              ac->ac_g_ex.fe_len, &ex);
 +      
-+      if (max > 0) {
++      if (max >= ac->ac_g_ex.fe_len) {
++              J_ASSERT(ex.fe_len > 0);
++              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++              ac->ac_found++;
++              ac->ac_b_ex = ex;
++              ext3_mb_use_best_found(ac, e3b);
++      } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++              /* Sometimes, caller may want to merge even small
++               * number of blocks to an existing extent */
 +              J_ASSERT(ex.fe_len > 0);
 +              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
 +              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
@@ -1409,7 +1431,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      int i, k, max;
 +
 +      J_ASSERT(ac->ac_2order > 0);
-+      for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++      for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
 +              if (grp->bb_counters[i] == 0)
 +                      continue;
 +
@@ -1495,15 +1517,18 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +              case 0:
 +                      J_ASSERT(ac->ac_2order != 0);
 +                      bits = ac->ac_sb->s_blocksize_bits + 1;
-+                      for (i = ac->ac_2order; i < bits; i++)
++                      for (i = ac->ac_2order; i <= bits; i++)
 +                              if (grp->bb_counters[i] > 0)
 +                                      return 1;
++                      break;
 +              case 1:
 +                      if ((free / fragments) >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 2:
 +                      if (free >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 3:
 +                      return 1;
 +              default:
@@ -1606,21 +1631,18 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +      /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
 +      i = ffs(*len);
-+      if (i >= 8) {
++      if (i >= ext3_mb_order2_reqs) {
 +              i--;
 +              if ((*len & (~(1 << i))) == 0)
 +                      ac.ac_2order = i;
 +      }
 +
-+      /* Sometimes, caller may want to merge even small
-+       * number of blocks to an existing extent */
-+      if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+              err = ext3_mb_find_by_goal(&ac, &e3b);
-+              if (err)
-+                      goto out_err;
-+              if (ac.ac_status == AC_STATUS_FOUND)
-+                      goto found;
-+      }
++      /* first, try the goal */
++      err = ext3_mb_find_by_goal(&ac, &e3b);
++      if (err)
++              goto out_err;
++      if (ac.ac_status == AC_STATUS_FOUND)
++              goto found;
 +
 +      /* Let's just scan groups to find more-less suitable blocks */
 +      cr = ac.ac_2order ? 0 : 1;
@@ -1839,7 +1861,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                      atomic_inc(&sbi->s_bal_breaks);
 +      }
 +
-+      ext3_mb_store_history(sb, &ac);
++      ext3_mb_store_history(sb, inode->i_ino, &ac);
 +
 +      return block;
 +}
@@ -1904,9 +1926,9 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      char buf[20], buf2[20];
 +
 +      if (v == SEQ_START_TOKEN) {
-+              seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+                       "goal", "result", "found", "grps", "cr", "merge",
-+                       "tail", "broken");
++              seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++                       "pid", "inode", "goal", "result", "found", "grps", "cr",
++                       "merge", "tail", "broken");
 +              return 0;
 +      }
 +
@@ -1914,9 +1936,9 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +              hs->goal.fe_start, hs->goal.fe_len);
 +      sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
 +              hs->result.fe_start, hs->result.fe_len);
-+      seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+                      buf2, hs->found, hs->groups, hs->cr, 
-+                      hs->merged ? "M" : "", hs->tail,
++      seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++                      hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++                      hs->cr, hs->merged ? "M" : "", hs->tail,
 +                      hs->buddy ? 1 << hs->buddy : 0);
 +      return 0;
 +}
@@ -1980,12 +2002,108 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      .release        = ext3_mb_seq_history_release,
 +};
 +
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++
++      group = *pos + 1;
++      return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      ++*pos;
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++      group = *pos + 1;
++      return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group = (int) v, i;
++      struct sg {
++              struct ext3_group_info info;
++              unsigned short counters[16];
++      } sg;
++
++      group--;
++      if (group == 0)
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++                       "group", "free", "frags", "first", "2^0", "2^1", "2^2", 
++                       "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++                       "2^11", "2^12", "2^13");
++
++      i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++              sizeof(struct ext3_group_info);
++      ext3_lock_group(sb, group);
++      memcpy(&sg, sbi->s_group_info[group], i);
++      ext3_unlock_group(sb, group);
++
++      if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++              return 0;
++
++      seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++                      sg.info.bb_fragments, sg.info.bb_first_free);
++      for (i = 0; i <= 13; i++)
++              seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++                              sg.info.bb_counters[i] : 0);
++      seq_printf(seq, " ]\n");
++
++      return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++      .start  = ext3_mb_seq_groups_start,
++      .next   = ext3_mb_seq_groups_next,
++      .stop   = ext3_mb_seq_groups_stop,
++      .show   = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++      struct super_block *sb = PDE(inode)->data;
++      int rc;
++
++      rc = seq_open(file, &ext3_mb_seq_groups_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = sb;
++      }
++      return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ext3_mb_seq_groups_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = seq_release,
++};
++
 +static void ext3_mb_history_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      char name[64];
 +
 +      snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++      remove_proc_entry("mb_groups", sbi->s_mb_proc);
 +      remove_proc_entry("mb_history", sbi->s_mb_proc);
 +      remove_proc_entry(name, proc_root_ext3);
 +
@@ -2008,6 +2126,11 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                      p->proc_fops = &ext3_mb_seq_history_fops;
 +                      p->data = sb;
 +              }
++              p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++              if (p) {
++                      p->proc_fops = &ext3_mb_seq_groups_fops;
++                      p->data = sb;
++              }
 +      }
 +
 +      sbi->s_mb_history_max = 1000;
@@ -2020,7 +2143,8 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +}
 +
 +static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++                      struct ext3_allocation_context *ac)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      struct ext3_mb_history h;
@@ -2028,6 +2152,8 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      if (likely(sbi->s_mb_history == NULL))
 +              return;
 +
++      h.pid = current->pid;
++      h.ino = ino;
 +      h.goal = ac->ac_g_ex;
 +      h.result = ac->ac_b_ex;
 +      h.found = ac->ac_found;
@@ -2584,6 +2710,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +#define EXT3_MB_STATS_NAME        "mb_stats"
 +#define EXT3_MB_MAX_TO_SCAN_NAME  "mb_max_to_scan"
 +#define EXT3_MB_MIN_TO_SCAN_NAME  "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ      "mb_order2_req"
 +
 +static int ext3_mb_stats_read(char *page, char **start, off_t off,
 +              int count, int *eof, void *data)
@@ -2671,6 +2798,45 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      return len;
 +}
 +
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++              unsigned long count, void *data)
++{
++      char str[32];
++      long value;
++
++      if (count >= sizeof(str)) {
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++              return -EOVERFLOW;
++      }
++
++      if (copy_from_user(str, buffer, count))
++              return -EFAULT;
++
++      /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++      value = simple_strtol(str, NULL, 0);
++      if (value <= 0)
++              return -ERANGE;
++
++      ext3_mb_order2_reqs = value;    
++
++      return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++              int count, int *eof, void *data)
++{
++      int len;
++
++      *eof = 1;
++      if (off != 0)
++              return 0;
++
++      len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++      *start = page;
++      return len;
++}
++
 +static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
 +              unsigned long count, void *data)
 +{
@@ -2701,6 +2867,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      struct proc_dir_entry *proc_ext3_mb_stats;
 +      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
 +      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_order2_req;
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
@@ -2755,6 +2922,24 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
 +      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
 +
++      /* Initialize EXT3_ORDER2_REQ */
++      proc_ext3_mb_order2_req = create_proc_entry(
++                      EXT3_MB_ORDER2_REQ,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_order2_req == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_ORDER2_REQ);
++              remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_order2_req->data = NULL;
++      proc_ext3_mb_order2_req->read_proc  = ext3_mb_order2_req_read;
++      proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
 +      return 0;
 +}
 +
@@ -2763,13 +2948,14 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-Index: linux-2.6.5-7.201/fs/ext3/Makefile
+Index: linux-2.6.5-7.252-full/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/Makefile    2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/Makefile 2005-12-17 03:10:23.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile       2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/Makefile    2006-04-26 23:40:28.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o \
index 70f4f8a..0297609 100644 (file)
@@ -1,7 +1,7 @@
-Index: linux-2.6.12.6/include/linux/ext3_fs.h
+Index: linux-2.6.12.6-bull/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.12.6.orig/include/linux/ext3_fs.h        2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/include/linux/ext3_fs.h     2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/include/linux/ext3_fs.h   2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/include/linux/ext3_fs.h        2006-04-29 20:39:10.000000000 +0400
 @@ -57,6 +57,14 @@ struct statfs;
  #define ext3_debug(f, a...)   do {} while (0)
  #endif
@@ -52,10 +52,10 @@ Index: linux-2.6.12.6/include/linux/ext3_fs.h
  #endif        /* __KERNEL__ */
  
  /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
-Index: linux-2.6.12.6/include/linux/ext3_fs_sb.h
+Index: linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-2.6.12.6.orig/include/linux/ext3_fs_sb.h     2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/include/linux/ext3_fs_sb.h  2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/include/linux/ext3_fs_sb.h        2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h     2006-04-29 20:39:10.000000000 +0400
 @@ -21,8 +21,14 @@
  #include <linux/wait.h>
  #include <linux/blockgroup_lock.h>
@@ -110,10 +110,10 @@ Index: linux-2.6.12.6/include/linux/ext3_fs_sb.h
  };
  
  #endif        /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.12.6/fs/ext3/super.c
+Index: linux-2.6.12.6-bull/fs/ext3/super.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/super.c        2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/super.c     2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/super.c   2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/super.c        2006-04-29 20:39:10.000000000 +0400
 @@ -387,6 +387,7 @@ static void ext3_put_super (struct super
        struct ext3_super_block *es = sbi->s_es;
        int i;
@@ -131,7 +131,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
  };
  
  static match_table_t tokens = {
-@@ -649,6 +651,7 @@ static match_table_t tokens = {
+@@ -650,6 +651,7 @@ static match_table_t tokens = {
        {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
@@ -139,7 +139,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
-@@ -964,6 +967,9 @@ clear_qf_name:
+@@ -965,6 +967,9 @@ clear_qf_name:
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
@@ -149,7 +149,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1669,6 +1675,7 @@ static int ext3_fill_super (struct super
+@@ -1670,6 +1675,7 @@ static int ext3_fill_super (struct super
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -157,7 +157,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
        lock_kernel();
        return 0;
  
-@@ -2548,7 +2555,13 @@ static struct file_system_type ext3_fs_t
+@@ -2549,7 +2555,13 @@ static struct file_system_type ext3_fs_t
  
  static int __init init_ext3_fs(void)
  {
@@ -172,7 +172,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2570,6 +2583,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2571,6 +2583,7 @@ static void __exit exit_ext3_fs(void)
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -180,11 +180,11 @@ Index: linux-2.6.12.6/fs/ext3/super.c
  }
  
  int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.12.6/fs/ext3/extents.c
+Index: linux-2.6.12.6-bull/fs/ext3/extents.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/extents.c      2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/extents.c   2005-12-17 02:21:21.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.12.6-bull.orig/fs/ext3/extents.c 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/extents.c      2006-04-29 20:39:10.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -193,7 +193,7 @@ Index: linux-2.6.12.6/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -202,7 +202,7 @@ Index: linux-2.6.12.6/fs/ext3/extents.c
        return err;
  }
  
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -216,7 +216,7 @@ Index: linux-2.6.12.6/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -225,10 +225,10 @@ Index: linux-2.6.12.6/fs/ext3/extents.c
        } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
                printk("strange request: removal %lu-%lu from %u:%u\n",
                       from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.12.6/fs/ext3/inode.c
+Index: linux-2.6.12.6-bull/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/inode.c        2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/inode.c     2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/inode.c   2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/inode.c        2006-04-29 20:39:10.000000000 +0400
 @@ -564,7 +564,7 @@ static int ext3_alloc_branch(handle_t *h
                ext3_journal_forget(handle, branch[i].bh);
        }
@@ -256,10 +256,10 @@ Index: linux-2.6.12.6/fs/ext3/inode.c
  
                        if (parent_bh) {
                                /*
-Index: linux-2.6.12.6/fs/ext3/balloc.c
+Index: linux-2.6.12.6-bull/fs/ext3/balloc.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/balloc.c       2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/fs/ext3/balloc.c    2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/balloc.c  2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/balloc.c       2006-04-29 20:39:10.000000000 +0400
 @@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
   *
   * Return buffer_head on success or NULL in case of failure.
@@ -303,10 +303,10 @@ Index: linux-2.6.12.6/fs/ext3/balloc.c
                        unsigned long goal, int *errp)
  {
        struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.12.6/fs/ext3/xattr.c
+Index: linux-2.6.12.6-bull/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/xattr.c        2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/fs/ext3/xattr.c     2005-12-17 02:21:33.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/xattr.c   2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/xattr.c        2006-04-29 20:39:10.000000000 +0400
 @@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
                ea_bdebug(bh, "refcount now=0; freeing");
                if (ce)
@@ -325,11 +325,11 @@ Index: linux-2.6.12.6/fs/ext3/xattr.c
                                error = -EIO;
                                goto cleanup;
                        }
-Index: linux-2.6.12.6/fs/ext3/mballoc.c
+Index: linux-2.6.12.6-bull/fs/ext3/mballoc.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/mballoc.c      2005-12-09 13:08:53.191437750 +0300
-+++ linux-2.6.12.6/fs/ext3/mballoc.c   2005-12-17 02:21:21.000000000 +0300
-@@ -0,0 +1,2429 @@
+--- linux-2.6.12.6-bull.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
++++ linux-2.6.12.6-bull/fs/ext3/mballoc.c      2006-04-30 01:24:11.000000000 +0400
+@@ -0,0 +1,2615 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -418,6 +418,12 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +long ext3_mb_stats = 1;
 +
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
 +#ifdef EXT3_BB_MAX_BLOCKS
 +#undef EXT3_BB_MAX_BLOCKS
 +#endif
@@ -483,6 +489,8 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +struct ext3_mb_history {
 +      struct ext3_free_extent goal;   /* goal allocation */
 +      struct ext3_free_extent result; /* result allocation */
++      unsigned pid;
++      unsigned ino;
 +      __u16 found;    /* how many extents have been found */
 +      __u16 groups;   /* how many groups have been scanned */
 +      __u16 tail;     /* what tail broke some buddy */
@@ -505,9 +513,9 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +#define EXT3_MB_BUDDY(e3b)    ((e3b)->bd_buddy)
 +
 +#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
 +#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
 +                              struct ext3_allocation_context *ac);
 +#endif
 +
@@ -1129,7 +1137,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
 +                              int needed, struct ext3_free_extent *ex)
 +{
-+      int next, max, ord;
++      int next = block, max, ord;
 +      void *buddy;
 +
 +      J_ASSERT(ex != NULL);
@@ -1154,6 +1162,11 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      ex->fe_start = block << order;
 +      ex->fe_group = e3b->bd_group;
 +
++      /* calc difference from given start */
++      next = next - ex->fe_start;
++      ex->fe_len -= next;
++      ex->fe_start += next;
++
 +      while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
 +
 +              if (block + 1 >= max)
@@ -1376,7 +1389,16 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
 +                              ac->ac_g_ex.fe_len, &ex);
 +      
-+      if (max > 0) {
++      if (max >= ac->ac_g_ex.fe_len) {
++              J_ASSERT(ex.fe_len > 0);
++              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++              ac->ac_found++;
++              ac->ac_b_ex = ex;
++              ext3_mb_use_best_found(ac, e3b);
++      } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++              /* Sometimes, caller may want to merge even small
++               * number of blocks to an existing extent */
 +              J_ASSERT(ex.fe_len > 0);
 +              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
 +              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
@@ -1404,7 +1426,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      int i, k, max;
 +
 +      J_ASSERT(ac->ac_2order > 0);
-+      for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++      for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
 +              if (grp->bb_counters[i] == 0)
 +                      continue;
 +
@@ -1490,15 +1512,18 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +              case 0:
 +                      J_ASSERT(ac->ac_2order != 0);
 +                      bits = ac->ac_sb->s_blocksize_bits + 1;
-+                      for (i = ac->ac_2order; i < bits; i++)
++                      for (i = ac->ac_2order; i <= bits; i++)
 +                              if (grp->bb_counters[i] > 0)
 +                                      return 1;
++                      break;
 +              case 1:
 +                      if ((free / fragments) >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 2:
 +                      if (free >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 3:
 +                      return 1;
 +              default:
@@ -1601,21 +1626,18 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +      /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
 +      i = ffs(*len);
-+      if (i >= 8) {
++      if (i >= ext3_mb_order2_reqs) {
 +              i--;
 +              if ((*len & (~(1 << i))) == 0)
 +                      ac.ac_2order = i;
 +      }
 +
-+      /* Sometimes, caller may want to merge even small
-+       * number of blocks to an existing extent */
-+      if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+              err = ext3_mb_find_by_goal(&ac, &e3b);
-+              if (err)
-+                      goto out_err;
-+              if (ac.ac_status == AC_STATUS_FOUND)
-+                      goto found;
-+      }
++      /* first, try the goal */
++      err = ext3_mb_find_by_goal(&ac, &e3b);
++      if (err)
++              goto out_err;
++      if (ac.ac_status == AC_STATUS_FOUND)
++              goto found;
 +
 +      /* Let's just scan groups to find more-less suitable blocks */
 +      cr = ac.ac_2order ? 0 : 1;
@@ -1834,7 +1856,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                      atomic_inc(&sbi->s_bal_breaks);
 +      }
 +
-+      ext3_mb_store_history(sb, &ac);
++      ext3_mb_store_history(sb, inode->i_ino, &ac);
 +
 +      return block;
 +}
@@ -1899,9 +1921,9 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      char buf[20], buf2[20];
 +
 +      if (v == SEQ_START_TOKEN) {
-+              seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+                       "goal", "result", "found", "grps", "cr", "merge",
-+                       "tail", "broken");
++              seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++                       "pid", "inode", "goal", "result", "found", "grps", "cr",
++                       "merge", "tail", "broken");
 +              return 0;
 +      }
 +
@@ -1909,9 +1931,9 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +              hs->goal.fe_start, hs->goal.fe_len);
 +      sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
 +              hs->result.fe_start, hs->result.fe_len);
-+      seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+                      buf2, hs->found, hs->groups, hs->cr, 
-+                      hs->merged ? "M" : "", hs->tail,
++      seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++                      hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++                      hs->cr, hs->merged ? "M" : "", hs->tail,
 +                      hs->buddy ? 1 << hs->buddy : 0);
 +      return 0;
 +}
@@ -1975,12 +1997,108 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      .release        = ext3_mb_seq_history_release,
 +};
 +
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++
++      group = *pos + 1;
++      return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      ++*pos;
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++      group = *pos + 1;
++      return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group = (int) v, i;
++      struct sg {
++              struct ext3_group_info info;
++              unsigned short counters[16];
++      } sg;
++
++      group--;
++      if (group == 0)
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++                       "group", "free", "frags", "first", "2^0", "2^1", "2^2", 
++                       "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++                       "2^11", "2^12", "2^13");
++
++      i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++              sizeof(struct ext3_group_info);
++      ext3_lock_group(sb, group);
++      memcpy(&sg, sbi->s_group_info[group], i);
++      ext3_unlock_group(sb, group);
++
++      if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++              return 0;
++
++      seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++                      sg.info.bb_fragments, sg.info.bb_first_free);
++      for (i = 0; i <= 13; i++)
++              seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++                              sg.info.bb_counters[i] : 0);
++      seq_printf(seq, " ]\n");
++
++      return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++      .start  = ext3_mb_seq_groups_start,
++      .next   = ext3_mb_seq_groups_next,
++      .stop   = ext3_mb_seq_groups_stop,
++      .show   = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++      struct super_block *sb = PDE(inode)->data;
++      int rc;
++
++      rc = seq_open(file, &ext3_mb_seq_groups_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = sb;
++      }
++      return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ext3_mb_seq_groups_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = seq_release,
++};
++
 +static void ext3_mb_history_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      char name[64];
 +
 +      snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++      remove_proc_entry("mb_groups", sbi->s_mb_proc);
 +      remove_proc_entry("mb_history", sbi->s_mb_proc);
 +      remove_proc_entry(name, proc_root_ext3);
 +
@@ -2003,6 +2121,11 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                      p->proc_fops = &ext3_mb_seq_history_fops;
 +                      p->data = sb;
 +              }
++              p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++              if (p) {
++                      p->proc_fops = &ext3_mb_seq_groups_fops;
++                      p->data = sb;
++              }
 +      }
 +
 +      sbi->s_mb_history_max = 1000;
@@ -2015,7 +2138,8 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +}
 +
 +static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++                      struct ext3_allocation_context *ac)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      struct ext3_mb_history h;
@@ -2023,6 +2147,8 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      if (likely(sbi->s_mb_history == NULL))
 +              return;
 +
++      h.pid = current->pid;
++      h.ino = ino;
 +      h.goal = ac->ac_g_ex;
 +      h.result = ac->ac_b_ex;
 +      h.found = ac->ac_found;
@@ -2578,6 +2704,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +#define EXT3_MB_STATS_NAME        "mb_stats"
 +#define EXT3_MB_MAX_TO_SCAN_NAME  "mb_max_to_scan"
 +#define EXT3_MB_MIN_TO_SCAN_NAME  "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ      "mb_order2_req"
 +
 +static int ext3_mb_stats_read(char *page, char **start, off_t off,
 +              int count, int *eof, void *data)
@@ -2665,6 +2792,45 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      return len;
 +}
 +
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++              unsigned long count, void *data)
++{
++      char str[32];
++      long value;
++
++      if (count >= sizeof(str)) {
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++              return -EOVERFLOW;
++      }
++
++      if (copy_from_user(str, buffer, count))
++              return -EFAULT;
++
++      /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++      value = simple_strtol(str, NULL, 0);
++      if (value <= 0)
++              return -ERANGE;
++
++      ext3_mb_order2_reqs = value;    
++
++      return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++              int count, int *eof, void *data)
++{
++      int len;
++
++      *eof = 1;
++      if (off != 0)
++              return 0;
++
++      len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++      *start = page;
++      return len;
++}
++
 +static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
 +              unsigned long count, void *data)
 +{
@@ -2695,6 +2861,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      struct proc_dir_entry *proc_ext3_mb_stats;
 +      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
 +      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_order2_req;
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
@@ -2749,6 +2916,24 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
 +      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
 +
++      /* Initialize EXT3_ORDER2_REQ */
++      proc_ext3_mb_order2_req = create_proc_entry(
++                      EXT3_MB_ORDER2_REQ,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_order2_req == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_ORDER2_REQ);
++              remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_order2_req->data = NULL;
++      proc_ext3_mb_order2_req->read_proc  = ext3_mb_order2_req_read;
++      proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
 +      return 0;
 +}
 +
@@ -2757,13 +2942,14 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-Index: linux-2.6.12.6/fs/ext3/Makefile
+Index: linux-2.6.12.6-bull/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/Makefile       2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/Makefile    2005-12-17 02:21:21.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.12.6-bull.orig/fs/ext3/Makefile  2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/Makefile       2006-04-29 20:39:10.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o resize.o \
index 01e7387..ced267d 100644 (file)
@@ -1,61 +1,7 @@
-Index: linux-2.6.9-full/include/linux/ext3_fs.h
-===================================================================
---- linux-2.6.9-full.orig/include/linux/ext3_fs.h      2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/include/linux/ext3_fs.h   2005-12-16 23:16:42.000000000 +0300
-@@ -57,6 +57,14 @@ struct statfs;
- #define ext3_debug(f, a...)   do {} while (0)
- #endif
-+#define EXT3_MULTIBLOCK_ALLOCATOR     1
-+
-+#define EXT3_MB_HINT_MERGE            1
-+#define EXT3_MB_HINT_RESERVED         2
-+#define EXT3_MB_HINT_METADATA         4
-+#define EXT3_MB_HINT_FIRST            8
-+#define EXT3_MB_HINT_BEST             16
-+
- /*
-  * Special inodes numbers
-  */
-@@ -365,6 +373,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
-+#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef clear_opt
-@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
- extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
- extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
- extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
--                            unsigned long);
-+                            unsigned long, int);
- extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
-                                unsigned long, unsigned long, int *);
- extern unsigned long ext3_count_free_blocks (struct super_block *);
-@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
- extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
-                         unsigned int cmd, unsigned long arg);
-+/* mballoc.c */
-+extern long ext3_mb_stats;
-+extern long ext3_mb_max_to_scan;
-+extern int ext3_mb_init(struct super_block *, int);
-+extern int ext3_mb_release(struct super_block *);
-+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
-+extern int ext3_mb_reserve_blocks(struct super_block *, int);
-+extern void ext3_mb_release_blocks(struct super_block *, int);
-+int __init init_ext3_proc(void);
-+void exit_ext3_proc(void);
-+
- #endif        /* __KERNEL__ */
- /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
 Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h   2005-12-16 23:16:39.000000000 +0300
-+++ linux-2.6.9-full/include/linux/ext3_fs_sb.h        2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h   2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs_sb.h        2006-05-22 21:45:08.000000000 +0400
 @@ -23,9 +23,15 @@
  #define EXT_INCLUDE
  #include <linux/blockgroup_lock.h>
@@ -72,7 +18,7 @@ Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
  
  /*
   * third extended-fs super-block data in memory
-@@ -81,6 +87,38 @@ struct ext3_sb_info {
+@@ -81,6 +87,39 @@ struct ext3_sb_info {
        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
  #endif
@@ -89,6 +35,7 @@ Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
 +      tid_t s_last_transaction;
 +      int s_mb_factor;
 +      unsigned short *s_mb_offsets, *s_mb_maxs;
++      unsigned long s_stripe;
 +
 +      /* history to debug policy */
 +      struct ext3_mb_history *s_mb_history;
@@ -111,10 +58,64 @@ Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
  };
  
  #endif        /* _LINUX_EXT3_FS_SB */
+Index: linux-2.6.9-full/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9-full.orig/include/linux/ext3_fs.h      2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs.h   2006-05-22 21:44:37.000000000 +0400
+@@ -57,6 +57,14 @@ struct statfs;
+ #define ext3_debug(f, a...)   do {} while (0)
+ #endif
++#define EXT3_MULTIBLOCK_ALLOCATOR     1
++
++#define EXT3_MB_HINT_MERGE            1
++#define EXT3_MB_HINT_RESERVED         2
++#define EXT3_MB_HINT_METADATA         4
++#define EXT3_MB_HINT_FIRST            8
++#define EXT3_MB_HINT_BEST             16
++
+ /*
+  * Special inodes numbers
+  */
+@@ -365,6 +373,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+-                            unsigned long);
++                            unsigned long, int);
+ extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
+                                unsigned long, unsigned long, int *);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
+ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
+                         unsigned int cmd, unsigned long arg);
++/* mballoc.c */
++extern long ext3_mb_stats;
++extern long ext3_mb_max_to_scan;
++extern int ext3_mb_init(struct super_block *, int);
++extern int ext3_mb_release(struct super_block *);
++extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
++extern int ext3_mb_reserve_blocks(struct super_block *, int);
++extern void ext3_mb_release_blocks(struct super_block *, int);
++int __init init_ext3_proc(void);
++void exit_ext3_proc(void);
++
+ #endif        /* __KERNEL__ */
+ /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
 Index: linux-2.6.9-full/fs/ext3/super.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/super.c      2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/super.c   2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/fs/ext3/super.c      2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c   2006-05-22 21:52:54.000000000 +0400
 @@ -394,6 +394,7 @@ void ext3_put_super (struct super_block 
        struct ext3_super_block *es = sbi->s_es;
        int i;
@@ -128,29 +129,37 @@ Index: linux-2.6.9-full/fs/ext3/super.c
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
 -      Opt_extents, Opt_extdebug,
-+      Opt_extents, Opt_extdebug, Opt_mballoc,
++      Opt_extents, Opt_extdebug, Opt_mballoc, Opt_stripe
  };
  
  static match_table_t tokens = {
-@@ -647,6 +649,7 @@ static match_table_t tokens = {
+@@ -648,6 +649,8 @@ static match_table_t tokens = {
        {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
 +      {Opt_mballoc, "mballoc"},
++      {Opt_stripe, "stripe=%u"},
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
-@@ -957,6 +960,9 @@ clear_qf_name:
+@@ -958,6 +961,16 @@ clear_qf_name:
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
 +              case Opt_mballoc:
 +                      set_opt (sbi->s_mount_opt, MBALLOC);
 +                      break;
++              case Opt_stripe:
++                      if (match_int(&args[0], &option))
++                              return 0;
++                      if (option < 0)
++                              return 0;
++                      sbi->s_stripe = option;
++                      break;
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1646,6 +1652,7 @@ static int ext3_fill_super (struct super
+@@ -1647,6 +1660,7 @@ static int ext3_fill_super (struct super
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -158,7 +167,7 @@ Index: linux-2.6.9-full/fs/ext3/super.c
  
        return 0;
  
-@@ -2428,7 +2435,13 @@ static struct file_system_type ext3_fs_t
+@@ -2429,7 +2443,13 @@ static struct file_system_type ext3_fs_t
  
  static int __init init_ext3_fs(void)
  {
@@ -173,7 +182,7 @@ Index: linux-2.6.9-full/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2450,6 +2463,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2451,6 +2471,7 @@ static void __exit exit_ext3_fs(void)
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -183,9 +192,9 @@ Index: linux-2.6.9-full/fs/ext3/super.c
  int ext3_prep_san_write(struct inode *inode, long *blocks,
 Index: linux-2.6.9-full/fs/ext3/extents.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/extents.c    2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/extents.c 2005-12-16 23:16:42.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.9-full.orig/fs/ext3/extents.c    2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/extents.c 2006-05-22 21:44:37.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -194,7 +203,7 @@ Index: linux-2.6.9-full/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -203,7 +212,7 @@ Index: linux-2.6.9-full/fs/ext3/extents.c
        return err;
  }
  
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -217,7 +226,7 @@ Index: linux-2.6.9-full/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -226,97 +235,23 @@ Index: linux-2.6.9-full/fs/ext3/extents.c
        } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
                printk("strange request: removal %lu-%lu from %u:%u\n",
                       from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.9-full/fs/ext3/inode.c
+Index: linux-2.6.9-full/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/inode.c      2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/inode.c   2005-12-16 23:16:42.000000000 +0300
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
-               ext3_journal_forget(handle, branch[i].bh);
-       }
-       for (i = 0; i < keys; i++)
--              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
-+              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
-       return err;
- }
-@@ -673,7 +673,7 @@ err_out:
-       if (err == -EAGAIN)
-               for (i = 0; i < num; i++)
-                       ext3_free_blocks(handle, inode, 
--                                       le32_to_cpu(where[i].key), 1);
-+                                       le32_to_cpu(where[i].key), 1, 1);
-       return err;
- }
-@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
-               }
-       }
+--- linux-2.6.9-full.orig/fs/ext3/Makefile     2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/Makefile  2006-05-22 21:44:37.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
  
--      ext3_free_blocks(handle, inode, block_to_free, count);
-+      ext3_free_blocks(handle, inode, block_to_free, count, 1);
- }
- /**
-@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t 
-                               ext3_journal_test_restart(handle, inode);
-                       }
--                      ext3_free_blocks(handle, inode, nr, 1);
-+                      ext3_free_blocks(handle, inode, nr, 1, 1);
-                       if (parent_bh) {
-                               /*
-Index: linux-2.6.9-full/fs/ext3/balloc.c
-===================================================================
---- linux-2.6.9-full.orig/fs/ext3/balloc.c     2005-10-27 21:44:24.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/balloc.c  2005-12-16 23:16:42.000000000 +0300
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
-  *
-  * Return buffer_head on success or NULL in case of failure.
-  */
--static struct buffer_head *
-+struct buffer_head *
- read_block_bitmap(struct super_block *sb, unsigned int block_group)
- {
-       struct ext3_group_desc * desc;
-@@ -450,24 +450,6 @@ error_return:
-       return;
- }
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o
++         extents.o mballoc.o
  
--/* Free given blocks, update quota and i_blocks field */
--void ext3_free_blocks(handle_t *handle, struct inode *inode,
--                      unsigned long block, unsigned long count)
--{
--      struct super_block * sb;
--      int dquot_freed_blocks;
--
--      sb = inode->i_sb;
--      if (!sb) {
--              printk ("ext3_free_blocks: nonexistent device");
--              return;
--      }
--      ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
--      if (dquot_freed_blocks)
--              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
--      return;
--}
--
- /*
-  * For ext3 allocations, we must not reuse any blocks which are
-  * allocated in the bitmap buffer's "last committed data" copy.  This
-@@ -1140,7 +1122,7 @@ int ext3_should_retry_alloc(struct super
-  * bitmap, and then for any free bit if that fails.
-  * This function also updates quota and i_blocks field.
-  */
--int ext3_new_block(handle_t *handle, struct inode *inode,
-+int ext3_new_block_old(handle_t *handle, struct inode *inode,
-                       unsigned long goal, int *errp)
- {
-       struct buffer_head *bitmap_bh = NULL;
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
 Index: linux-2.6.9-full/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/xattr.c      2005-12-16 23:16:40.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/xattr.c   2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/fs/ext3/xattr.c      2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c   2006-05-22 21:44:37.000000000 +0400
 @@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle,
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
@@ -346,9 +281,9 @@ Index: linux-2.6.9-full/fs/ext3/xattr.c
        } else {
 Index: linux-2.6.9-full/fs/ext3/mballoc.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/mballoc.c    2005-12-16 17:46:19.148560250 +0300
-+++ linux-2.6.9-full/fs/ext3/mballoc.c 2005-12-17 00:10:15.000000000 +0300
-@@ -0,0 +1,2429 @@
+--- linux-2.6.9-full.orig/fs/ext3/mballoc.c    2006-05-12 23:14:51.200000000 +0400
++++ linux-2.6.9-full/fs/ext3/mballoc.c 2006-05-22 21:51:30.000000000 +0400
+@@ -0,0 +1,2671 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -437,6 +372,12 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +
 +long ext3_mb_stats = 1;
 +
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
 +#ifdef EXT3_BB_MAX_BLOCKS
 +#undef EXT3_BB_MAX_BLOCKS
 +#endif
@@ -502,6 +443,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +struct ext3_mb_history {
 +      struct ext3_free_extent goal;   /* goal allocation */
 +      struct ext3_free_extent result; /* result allocation */
++      unsigned pid;
++      unsigned ino;
 +      __u16 found;    /* how many extents have been found */
 +      __u16 groups;   /* how many groups have been scanned */
 +      __u16 tail;     /* what tail broke some buddy */
@@ -524,9 +467,9 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +#define EXT3_MB_BUDDY(e3b)    ((e3b)->bd_buddy)
 +
 +#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
 +#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
 +                              struct ext3_allocation_context *ac);
 +#endif
 +
@@ -1148,7 +1091,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
 +                              int needed, struct ext3_free_extent *ex)
 +{
-+      int next, max, ord;
++      int next = block, max, ord;
 +      void *buddy;
 +
 +      J_ASSERT(ex != NULL);
@@ -1173,6 +1116,11 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      ex->fe_start = block << order;
 +      ex->fe_group = e3b->bd_group;
 +
++      /* calc difference from given start */
++      next = next - ex->fe_start;
++      ex->fe_len -= next;
++      ex->fe_start += next;
++
 +      while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
 +
 +              if (block + 1 >= max)
@@ -1385,6 +1333,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                              struct ext3_buddy *e3b)
 +{
 +      int group = ac->ac_g_ex.fe_group, max, err;
++      struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb);
++      struct ext3_super_block *es = sbi->s_es;
 +      struct ext3_free_extent ex;
 +
 +      err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);
@@ -1394,8 +1344,26 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      ext3_lock_group(ac->ac_sb, group);
 +      max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
 +                              ac->ac_g_ex.fe_len, &ex);
-+      
-+      if (max > 0) {
++
++      if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
++              unsigned long start;
++              start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) +
++                              ex.fe_start + le32_to_cpu(es->s_first_data_block));
++              if (start % sbi->s_stripe == 0) {
++                      ac->ac_found++;
++                      ac->ac_b_ex = ex;
++                      ext3_mb_use_best_found(ac, e3b);
++              }
++      } else if (max >= ac->ac_g_ex.fe_len) {
++              J_ASSERT(ex.fe_len > 0);
++              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++              ac->ac_found++;
++              ac->ac_b_ex = ex;
++              ext3_mb_use_best_found(ac, e3b);
++      } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++              /* Sometimes, caller may want to merge even small
++               * number of blocks to an existing extent */
 +              J_ASSERT(ex.fe_len > 0);
 +              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
 +              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
@@ -1423,7 +1391,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      int i, k, max;
 +
 +      J_ASSERT(ac->ac_2order > 0);
-+      for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++      for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
 +              if (grp->bb_counters[i] == 0)
 +                      continue;
 +
@@ -1488,6 +1456,42 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      }
 +}
 +
++/*
++ * This is a special case for storages like raid5
++ * we try to find stripe-aligned chunks for stripe-size requests
++ */
++static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac,
++                                      struct ext3_buddy *e3b)
++{
++      struct super_block *sb = ac->ac_sb;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      void *bitmap = EXT3_MB_BITMAP(e3b);
++      struct ext3_free_extent ex;
++      unsigned long i, max;
++
++      J_ASSERT(sbi->s_stripe != 0);
++
++      /* find first stripe-aligned block */
++      i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb)
++              + le32_to_cpu(sbi->s_es->s_first_data_block);
++      i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe;
++      i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))
++                      % EXT3_BLOCKS_PER_GROUP(sb);
++
++      while (i < sb->s_blocksize * 8) {
++              if (!mb_test_bit(i, bitmap)) {
++                      max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex);
++                      if (max >= sbi->s_stripe) {
++                              ac->ac_found++;
++                              ac->ac_b_ex = ex;
++                              ext3_mb_use_best_found(ac, e3b);
++                              break;
++                      }
++              }
++              i += sbi->s_stripe;
++      }
++}
++
 +static int ext3_mb_good_group(struct ext3_allocation_context *ac,
 +                              int group, int cr)
 +{
@@ -1509,15 +1513,18 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +              case 0:
 +                      J_ASSERT(ac->ac_2order != 0);
 +                      bits = ac->ac_sb->s_blocksize_bits + 1;
-+                      for (i = ac->ac_2order; i < bits; i++)
++                      for (i = ac->ac_2order; i <= bits; i++)
 +                              if (grp->bb_counters[i] > 0)
 +                                      return 1;
++                      break;
 +              case 1:
 +                      if ((free / fragments) >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 2:
 +                      if (free >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 3:
 +                      return 1;
 +              default:
@@ -1618,23 +1625,27 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      ac.ac_2order = 0;
 +      ac.ac_criteria = 0;
 +
++      if (*len == 1 && sbi->s_stripe) {
++              /* looks like a metadata, let's use a dirty hack for raid5
++               * move all metadata in first groups in hope to hit cached
++               * sectors and thus avoid read-modify cycles in raid5 */
++              ac.ac_g_ex.fe_group = group = 0;
++      }
++
 +      /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
 +      i = ffs(*len);
-+      if (i >= 8) {
++      if (i >= ext3_mb_order2_reqs) {
 +              i--;
 +              if ((*len & (~(1 << i))) == 0)
 +                      ac.ac_2order = i;
 +      }
 +
-+      /* Sometimes, caller may want to merge even small
-+       * number of blocks to an existing extent */
-+      if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+              err = ext3_mb_find_by_goal(&ac, &e3b);
-+              if (err)
-+                      goto out_err;
-+              if (ac.ac_status == AC_STATUS_FOUND)
-+                      goto found;
-+      }
++      /* first, try the goal */
++      err = ext3_mb_find_by_goal(&ac, &e3b);
++      if (err)
++              goto out_err;
++      if (ac.ac_status == AC_STATUS_FOUND)
++              goto found;
 +
 +      /* Let's just scan groups to find more-less suitable blocks */
 +      cr = ac.ac_2order ? 0 : 1;
@@ -1673,6 +1684,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      ac.ac_groups_scanned++;
 +                      if (cr == 0)
 +                              ext3_mb_simple_scan_group(&ac, &e3b);
++                      else if (cr == 1 && *len == sbi->s_stripe) 
++                              ext3_mb_scan_aligned(&ac, &e3b);
 +                      else
 +                              ext3_mb_complex_scan_group(&ac, &e3b);
 +
@@ -1853,7 +1866,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      atomic_inc(&sbi->s_bal_breaks);
 +      }
 +
-+      ext3_mb_store_history(sb, &ac);
++      ext3_mb_store_history(sb, inode->i_ino, &ac);
 +
 +      return block;
 +}
@@ -1918,9 +1931,9 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      char buf[20], buf2[20];
 +
 +      if (v == SEQ_START_TOKEN) {
-+              seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+                       "goal", "result", "found", "grps", "cr", "merge",
-+                       "tail", "broken");
++              seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++                       "pid", "inode", "goal", "result", "found", "grps", "cr",
++                       "merge", "tail", "broken");
 +              return 0;
 +      }
 +
@@ -1928,9 +1941,9 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +              hs->goal.fe_start, hs->goal.fe_len);
 +      sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
 +              hs->result.fe_start, hs->result.fe_len);
-+      seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+                      buf2, hs->found, hs->groups, hs->cr, 
-+                      hs->merged ? "M" : "", hs->tail,
++      seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++                      hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++                      hs->cr, hs->merged ? "M" : "", hs->tail,
 +                      hs->buddy ? 1 << hs->buddy : 0);
 +      return 0;
 +}
@@ -1994,12 +2007,108 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      .release        = ext3_mb_seq_history_release,
 +};
 +
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++
++      group = *pos + 1;
++      return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      ++*pos;
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++      group = *pos + 1;
++      return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group = (int) v, i;
++      struct sg {
++              struct ext3_group_info info;
++              unsigned short counters[16];
++      } sg;
++
++      group--;
++      if (group == 0)
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++                       "group", "free", "frags", "first", "2^0", "2^1", "2^2", 
++                       "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++                       "2^11", "2^12", "2^13");
++
++      i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++              sizeof(struct ext3_group_info);
++      ext3_lock_group(sb, group);
++      memcpy(&sg, sbi->s_group_info[group], i);
++      ext3_unlock_group(sb, group);
++
++      if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++              return 0;
++
++      seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++                      sg.info.bb_fragments, sg.info.bb_first_free);
++      for (i = 0; i <= 13; i++)
++              seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++                              sg.info.bb_counters[i] : 0);
++      seq_printf(seq, " ]\n");
++
++      return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++      .start  = ext3_mb_seq_groups_start,
++      .next   = ext3_mb_seq_groups_next,
++      .stop   = ext3_mb_seq_groups_stop,
++      .show   = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++      struct super_block *sb = PDE(inode)->data;
++      int rc;
++
++      rc = seq_open(file, &ext3_mb_seq_groups_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = sb;
++      }
++      return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ext3_mb_seq_groups_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = seq_release,
++};
++
 +static void ext3_mb_history_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      char name[64];
 +
 +      snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++      remove_proc_entry("mb_groups", sbi->s_mb_proc);
 +      remove_proc_entry("mb_history", sbi->s_mb_proc);
 +      remove_proc_entry(name, proc_root_ext3);
 +
@@ -2022,6 +2131,11 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      p->proc_fops = &ext3_mb_seq_history_fops;
 +                      p->data = sb;
 +              }
++              p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++              if (p) {
++                      p->proc_fops = &ext3_mb_seq_groups_fops;
++                      p->data = sb;
++              }
 +      }
 +
 +      sbi->s_mb_history_max = 1000;
@@ -2034,7 +2148,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +}
 +
 +static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++                      struct ext3_allocation_context *ac)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      struct ext3_mb_history h;
@@ -2042,6 +2157,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      if (likely(sbi->s_mb_history == NULL))
 +              return;
 +
++      h.pid = current->pid;
++      h.ino = ino;
 +      h.goal = ac->ac_g_ex;
 +      h.result = ac->ac_b_ex;
 +      h.found = ac->ac_found;
@@ -2597,6 +2714,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +#define EXT3_MB_STATS_NAME        "mb_stats"
 +#define EXT3_MB_MAX_TO_SCAN_NAME  "mb_max_to_scan"
 +#define EXT3_MB_MIN_TO_SCAN_NAME  "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ      "mb_order2_req"
 +
 +static int ext3_mb_stats_read(char *page, char **start, off_t off,
 +              int count, int *eof, void *data)
@@ -2684,6 +2802,45 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      return len;
 +}
 +
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++              unsigned long count, void *data)
++{
++      char str[32];
++      long value;
++
++      if (count >= sizeof(str)) {
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++              return -EOVERFLOW;
++      }
++
++      if (copy_from_user(str, buffer, count))
++              return -EFAULT;
++
++      /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++      value = simple_strtol(str, NULL, 0);
++      if (value <= 0)
++              return -ERANGE;
++
++      ext3_mb_order2_reqs = value;    
++
++      return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++              int count, int *eof, void *data)
++{
++      int len;
++
++      *eof = 1;
++      if (off != 0)
++              return 0;
++
++      len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++      *start = page;
++      return len;
++}
++
 +static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
 +              unsigned long count, void *data)
 +{
@@ -2691,7 +2848,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string too long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2714,10 +2871,11 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      struct proc_dir_entry *proc_ext3_mb_stats;
 +      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
 +      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_order2_req;
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
 +              return -EIO;
 +      }
 +
@@ -2725,7 +2883,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_stats == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_STATS_NAME);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +              return -EIO;
@@ -2740,7 +2898,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      EXT3_MB_MAX_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_max_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MAX_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
@@ -2756,7 +2914,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      EXT3_MB_MIN_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_min_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MIN_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
@@ -2768,6 +2926,24 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
 +      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
 +
++      /* Initialize EXT3_ORDER2_REQ */
++      proc_ext3_mb_order2_req = create_proc_entry(
++                      EXT3_MB_ORDER2_REQ,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_order2_req == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_ORDER2_REQ);
++              remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_order2_req->data = NULL;
++      proc_ext3_mb_order2_req->read_proc  = ext3_mb_order2_req_read;
++      proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
 +      return 0;
 +}
 +
@@ -2776,18 +2952,93 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-Index: linux-2.6.9-full/fs/ext3/Makefile
+Index: linux-2.6.9-full/fs/ext3/balloc.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/Makefile     2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/Makefile  2005-12-16 23:16:42.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.9-full.orig/fs/ext3/balloc.c     2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9-full/fs/ext3/balloc.c  2006-05-22 21:44:37.000000000 +0400
+@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+  *
+  * Return buffer_head on success or NULL in case of failure.
+  */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+       struct ext3_group_desc * desc;
+@@ -451,24 +451,6 @@ error_return:
+       return;
+ }
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-          ioctl.o namei.o super.o symlink.o hash.o resize.o \
--         extents.o
-+         extents.o mballoc.o
+-/* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
+-                      unsigned long block, unsigned long count)
+-{
+-      struct super_block * sb;
+-      int dquot_freed_blocks;
+-
+-      sb = inode->i_sb;
+-      if (!sb) {
+-              printk ("ext3_free_blocks: nonexistent device");
+-              return;
+-      }
+-      ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+-      if (dquot_freed_blocks)
+-              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+-      return;
+-}
+-
+ /*
+  * For ext3 allocations, we must not reuse any blocks which are
+  * allocated in the bitmap buffer's "last committed data" copy.  This
+@@ -1131,7 +1113,7 @@ int ext3_should_retry_alloc(struct super
+  * bitmap, and then for any free bit if that fails.
+  * This function also updates quota and i_blocks field.
+  */
+-int ext3_new_block(handle_t *handle, struct inode *inode,
++int ext3_new_block_old(handle_t *handle, struct inode *inode,
+                       unsigned long goal, int *errp)
+ {
+       struct buffer_head *bitmap_bh = NULL;
+Index: linux-2.6.9-full/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/inode.c      2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c   2006-05-22 21:44:37.000000000 +0400
+@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+               ext3_journal_forget(handle, branch[i].bh);
+       }
+       for (i = 0; i < keys; i++)
+-              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+       return err;
+ }
  
- ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
- ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+@@ -673,7 +673,7 @@ err_out:
+       if (err == -EAGAIN)
+               for (i = 0; i < num; i++)
+                       ext3_free_blocks(handle, inode, 
+-                                       le32_to_cpu(where[i].key), 1);
++                                       le32_to_cpu(where[i].key), 1, 1);
+       return err;
+ }
+@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
+               }
+       }
+-      ext3_free_blocks(handle, inode, block_to_free, count);
++      ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+ /**
+@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t 
+                               ext3_journal_test_restart(handle, inode);
+                       }
+-                      ext3_free_blocks(handle, inode, nr, 1);
++                      ext3_free_blocks(handle, inode, nr, 1, 1);
+                       if (parent_bh) {
+                               /*
index 2b01d3e..b630b73 100644 (file)
@@ -287,6 +287,27 @@ Description: Cache open negative dentries on client when possible.
 Details    : Guard negative dentries with UPDATE lock on parent dir, drop
             negative dentries on lock revocation.
 
+Severity   : minor
+Frequency  : Always
+Bugzilla   : 10510
+Description: Remounting a client read-only wasn't possible with a zconf mount
+Details    : It wasn't possible to remount a client read-only with llmount.
+
+Severity   : enhancement
+Description: Include MPICH 1.2.6 Lustre ADIO interface patch
+Details    : In lustre/contrib/ or /usr/share/lustre in RPM a patch for
+            MPICH is included to add Lustre-specific ADIO interfaces.
+            This is based closely on the UFS ADIO layer and only differs
+            in file creation, in order to allow the OST striping to be set.
+            This is user-contributed code and not supported by CFS.
+
+Severity   : minor
+Frequency  : Always
+Bugzilla   : 9486
+Description: extended inode attributes work improperly for the case of 2.4/2.6
+             kernels used on client/server or the other way around.
+Details    : Introduce kernel-independent values for these flags.
+
 
 ------------------------------------------------------------------------------
 
@@ -816,6 +837,14 @@ Details    : If a client is repeatedly creating and unlinking files it
             client node to run out of memory.  Instead flush old inodes
             from client cache that have the same inode number as a new inode.
 
+Severity   : minor
+Frequency  : SLES9 2.6.5 kernel and long filenames only
+Bugzilla   : 9969, 10379
+Description: utime reports stale NFS file handle
+Details    : SLES9 uses out-of-dentry names in some cases, which confused
+            the lustre dentry revalidation.  Change it to always use the
+            in-dentry qstr.
+
 Severity   : major
 Frequency  : rare, unless heavy write-truncate concurrency is continuous
 Bugzilla   : 4180, 6984, 7171, 9963, 9331
index be4dae8..0b39704 100644 (file)
@@ -5,8 +5,9 @@
 
 AUTOMAKE_OPTIONS = foreign
 
+# also update lustre/autoconf/lustre-core.m4 AC_CONFIG_FILES
 ALWAYS_SUBDIRS := include lvfs obdclass ldlm ptlrpc osc lov obdecho \
-       mgc doc utils tests conf scripts autoconf
+       mgc doc utils tests conf scripts autoconf contrib
 
 SERVER_SUBDIRS := ldiskfs obdfilter ost mds mgs
 
index d695f43..f9ef1fb 100644 (file)
@@ -602,7 +602,7 @@ AC_DEFUN([LC_CONFIGURE],
 [LC_CONFIG_OBD_BUFFER_SIZE
 
 # include/liblustre.h
-AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h])
+AC_CHECK_HEADERS([asm/page.h sys/user.h sys/vfs.h stdint.h blkid/blkid.h])
 
 # include/lustre/lustre_user.h
 # See note there re: __ASM_X86_64_PROCESSOR_H
@@ -650,6 +650,7 @@ AM_CONDITIONAL(MPITESTS, test x$enable_mpitests = xyes, Build MPI Tests)
 AM_CONDITIONAL(CLIENT, test x$enable_client = xyes)
 AM_CONDITIONAL(SERVER, test x$enable_server = xyes)
 AM_CONDITIONAL(QUOTA, test x$enable_quota = xyes)
+AM_CONDITIONAL(BLKID, test x$ac_cv_header_blkid_blkid_h = xyes)
 ])
 
 #
@@ -662,6 +663,7 @@ AC_DEFUN([LC_CONFIG_FILES],
 lustre/Makefile
 lustre/autoMakefile
 lustre/autoconf/Makefile
+lustre/contrib/Makefile
 lustre/conf/Makefile
 lustre/doc/Makefile
 lustre/include/Makefile
diff --git a/lustre/contrib/.cvsignore b/lustre/contrib/.cvsignore
new file mode 100644 (file)
index 0000000..282522d
--- /dev/null
@@ -0,0 +1,2 @@
+Makefile
+Makefile.in
diff --git a/lustre/contrib/Makefile.am b/lustre/contrib/Makefile.am
new file mode 100644 (file)
index 0000000..5a8e66c
--- /dev/null
@@ -0,0 +1,5 @@
+# Contributions Makefile
+
+EXTRA_DIST = mpich-*.patch
+pkgdata_DATA = $(EXTRA_DIST)
+
diff --git a/lustre/contrib/README b/lustre/contrib/README
new file mode 100644 (file)
index 0000000..73270f3
--- /dev/null
@@ -0,0 +1,2 @@
+The files in this directory are user-contributed and are not supported by
+CFS in any way.
diff --git a/lustre/doc/llverdev.txt b/lustre/doc/llverdev.txt
new file mode 100644 (file)
index 0000000..dd0e150
--- /dev/null
@@ -0,0 +1,48 @@
+BLOCK DEVICE VERIFICATION TOOL. ( bdevt )
+==========================================
+
+Building tool: 
+       To build this tool you just need to invoke make at command prompt.
+       e.g.  $ make 
+       
+       this will compile the sources and build bdevt in this directory.
+
+Usage: 
+Syntax:
+
+./bdevt [OPTION]... <device-name> ... 
+
+[OPTION]
+        -t {seconds} for --timestamp,  set test time (default=current time())
+        -o {offset}  for --offset, offset in kB of start of test (default=0)
+        -r run test in read (verify) mode
+        -w run test in write (test-pattern) mode (default=r&w)
+        -v for verbose
+        -p for --partial, for partial check (1GB steps)
+        -l for --long, full check (default 4k)
+        -c for --chunksize, IO chunk size (default=1048576)
+        -f for --force, force test to run without confirmation
+       --help to display help.
+
+Guide lines for using this tool:
+       It is expected that bdevt tool will be run on large size devices (TB), 
+So it is always better to run bdevt tool in verbose mode, So that one can easily 
+restart device testing from the point at which it had stoped. 
+for example:
+
+       [root@tucker bdevt]# ./bdevt -v -f -w --timestamp=1009839028 /dev/hda5
+       Number of sectors: 49158837, this makes 23.441 GB
+       Timestamp: 1009839028
+       Current write offset:        5078016 kB
+
+If due to some reason sombody breaks execution at this point then one can 
+easily restart device from the same point by picking the same offset 
+displayed in by verbose as explained below.
+
+       [root@tucker bdevt]# ./bdevt -v -f -w --offset=5078016 --timestamp=1009839028 /dev/hda5
+       Number of sectors: 49158837, this makes 23.441 GB
+       Timestamp: 1009839028
+       Current write offset:        9726208 kB
+One can use similar things for read only and read write modes also.
+
diff --git a/lustre/doc/llverfs.txt b/lustre/doc/llverfs.txt
new file mode 100644 (file)
index 0000000..0321d75
--- /dev/null
@@ -0,0 +1,48 @@
+FILESYSTEM VERIFICATION TOOL. ( ext3vt )
+==========================================
+
+Building tool: 
+       To build this tool you just need to invoke make at command prompt.
+       e.g.  $ make 
+       
+       this will compile the sources and build ext3vt in this directory.
+
+Usage: 
+Syntax:
+
+./ext3vt [OPTION]... <filesystem path> ... 
+
+[OPTION]
+        -t {seconds} for --timestamp,  set test time(default=current time())
+        -o {fileOffset}  for --fileOffset, full path of file from which tests should start
+        -r run test in read (verify) mode
+        -w run test in write (test-pattern) mode (default=r&w)
+        -v for verbose
+        -p for --partial, for partial check (1MB files)
+        -l for --long, full check (4GB file with 4k blocks)
+        -c for --chunksize, IO chunk size (default=1048576)
+        -h display this help and exit
+        --help display this help and exit
+
+Guide lines for using this tool:
+       It is expected that ext3vt tool will be run on large size 
+filesystem (TB), So it is always better to run ext3vt tool in verbose mode, 
+So that one can easily restart device testing from the point at which it 
+had stoped. 
+for example:
+       
+       [root@Matrix ext3vt]# ./ext3vtnew -v -f -w --timestamp=1145009417 /mnt/store/
+       Timestamp: 1145009417
+       write File name: /mnt/store/dir00004/file005
+       
+If due to some reason sombody breaks execution at this point then one can 
+easily restart device from the same point by picking the same file offset 
+displayed in by verbose as explained below.
+
+       [root@tucker ext3vt]# ./bdevt -v -f -w --fileOffset=/home/dir00004/file005 
+                            --timestamp=1145009417 /mnt/store/
+       Timestamp: 1145009417
+       write File name: /mnt/store/dir00008/file007
+       write complete
+       [root@tucker ext3vt]#
+One can use similar things for read only and read write modes also.
index ec22d4a..72cc96f 100644 (file)
@@ -242,7 +242,7 @@ typedef long sector_t;
 static inline void clear_page_dirty(struct page *page)
 {
         if (PageDirty(page))
-                ClearPageDirty(page); 
+                ClearPageDirty(page);
 }
 
 static inline int clear_page_dirty_for_io(struct page *page)
index 17f4546..4e4ae90 100644 (file)
@@ -19,8 +19,6 @@
 
 #include <linux/fs.h>
 #include <linux/dcache.h>
-#include <linux/jbd.h>
-#include <linux/ext3_fs.h>
 #include <linux/proc_fs.h>
 
 #include <obd_class.h>
index 8f724c8..f99051b 100644 (file)
 #endif
 
 #if (!defined(_LINUX_TYPES_H) && !defined(_BLKID_TYPES_H) && \
-     !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H) && \
-     !defined(_X86_64_TYPES_H))
+       !defined(_EXT2_TYPES_H) && !defined(_I386_TYPES_H)) && \
+       !defined(_ASM_IA64_TYPES_H) && !defined(_X86_64_TYPES_H) && \
+       !defined(_PPC_TYPES_H) && !defined(_PPC64_TYPES_H)
+       /* yuck, would be nicer with _ASM_TYPES_H */
 
 typedef unsigned short umode_t;
 /*
index e339db0..56f88e3 100644 (file)
@@ -255,6 +255,7 @@ extern void lustre_swab_ptlrpc_body(struct ptlrpc_body *pb);
 #define OBD_CONNECT_ATTRFID   0x4000ULL /* Server supports GetAttr By Fid */
 #define OBD_CONNECT_NODEVOH   0x8000ULL /* No open handle for special nodes */
 #define OBD_CONNECT_EMPTY 0x80000000ULL /* fake: these are empty connect flags*/
+#define OBD_CONNECT_RMT_CLIENT 0x10000ULL /* Remote client */
 
 /* also update obd_connect_names[] for lprocfs_rd_connect_flags() */
 
@@ -665,6 +666,48 @@ struct mds_status_req {
 extern void lustre_swab_mds_status_req (struct mds_status_req *r);
 
 #define MDS_BFLAG_UNCOMMITTED_WRITES   0x1
+#define MDS_BFLAG_EXT_FLAGS     0x80000000 /* == EXT3_RESERVED_FL */
+
+/* these should be identical to their EXT3_*_FL counterparts, and are
+ * redefined here only to avoid dragging in ext3_fs.h */
+#define MDS_SYNC_FL             0x00000008 /* Synchronous updates */
+#define MDS_IMMUTABLE_FL        0x00000010 /* Immutable file */
+#define MDS_APPEND_FL           0x00000020 /* writes to file may only append */
+#define MDS_NOATIME_FL          0x00000080 /* do not update atime */
+#define MDS_DIRSYNC_FL          0x00010000 /* dirsync behaviour (dir only) */
+
+#ifdef __KERNEL__
+/* If MDS_BFLAG_IOC_FLAGS is set it means we requested EXT3_*_FL inode flags
+ * and we need to decode these into local S_* flags in the inode.  Otherwise
+ * we pass flags straight through (see bug 9486). */
+static inline int ll_ext_to_inode_flags(int flags)
+{
+        return (flags & MDS_BFLAG_EXT_FLAGS) ?
+               (((flags & MDS_SYNC_FL)      ? S_SYNC      : 0) |
+                ((flags & MDS_NOATIME_FL)   ? S_NOATIME   : 0) |
+                ((flags & MDS_APPEND_FL)    ? S_APPEND    : 0) |
+#if defined(S_DIRSYNC)
+                ((flags & MDS_DIRSYNC_FL)   ? S_DIRSYNC   : 0) |
+#endif
+                ((flags & MDS_IMMUTABLE_FL) ? S_IMMUTABLE : 0)) :
+               (flags & ~MDS_BFLAG_EXT_FLAGS);
+}
+
+/* If MDS_BFLAG_EXT_FLAGS is set it means we requested EXT3_*_FL inode flags
+ * and we pass these straight through.  Otherwise we need to convert from
+ * S_* flags to their EXT3_*_FL equivalents (see bug 9486). */
+static inline int ll_inode_to_ext_flags(int oflags, int iflags)
+{
+        return (oflags & MDS_BFLAG_EXT_FLAGS) ? (oflags & ~MDS_BFLAG_EXT_FLAGS):
+               (((iflags & S_SYNC)      ? MDS_SYNC_FL      : 0) |
+                ((iflags & S_NOATIME)   ? MDS_NOATIME_FL   : 0) |
+                ((iflags & S_APPEND)    ? MDS_APPEND_FL    : 0) |
+#if defined(S_DIRSYNC)
+                ((iflags & S_DIRSYNC)   ? MDS_DIRSYNC_FL   : 0) |
+#endif
+                ((iflags & S_IMMUTABLE) ? MDS_IMMUTABLE_FL : 0));
+}
+#endif
 
 struct mds_body {
         struct ll_fid  fid1;
index e885c06..5b03428 100644 (file)
@@ -409,9 +409,12 @@ CDEB_TYPE(level, "@@@ " fmt                                                    \
        REQ_FLAGS_FMT"/%x/%x rc %d/%d\n" , ## args, req, req->rq_xid,           \
        req->rq_transno,                                                        \
        req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : -1,               \
-       req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) : "<?>",          \
+       req->rq_import ? obd2cli_tgt(req->rq_import->imp_obd) :                 \
+          req->rq_export ? (char*)req->rq_export->exp_client_uuid.uuid : "<?>",\
        req->rq_import ?                                                        \
-          (char *)req->rq_import->imp_connection->c_remote_uuid.uuid : "<?>",  \
+          (char *)req->rq_import->imp_connection->c_remote_uuid.uuid :         \
+          req->rq_export ?                                                     \
+          (char *)req->rq_export->exp_connection->c_remote_uuid.uuid :  "<?>", \
        (req->rq_import && req->rq_import->imp_client) ?                        \
            req->rq_import->imp_client->cli_request_portal : -1,                \
        req->rq_reqlen, req->rq_replen,                                         \
index f042b56..5c5a96f 100644 (file)
@@ -23,7 +23,7 @@
 #define IOC_MDC_TYPE         'i'
 #define IOC_MDC_MIN_NR       20
 /* Moved to lustre_user.h
-#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_device *)
+#define IOC_MDC_LOOKUP       _IOWR(IOC_MDC_TYPE, 20, struct obd_ioctl_data *)
 #define IOC_MDC_GETSTRIPE    _IOWR(IOC_MDC_TYPE, 21, struct lov_mds_md *) */
 #define IOC_MDC_MAX_NR       50
 
@@ -35,9 +35,9 @@
 /* this is really local to the OSC */
 struct loi_oap_pages {
         struct list_head        lop_pending;
-        int                     lop_num_pending;
         struct list_head        lop_urgent;
         struct list_head        lop_pending_group;
+        int                     lop_num_pending;
 };
 
 struct osc_async_rc {
index 46499d4..4b2b028 100644 (file)
@@ -98,6 +98,7 @@ CONFIG_PTRACK=y
 #
 CONFIG_EFI_VARS=y
 CONFIG_EFI_PCDP=y
+CONFIG_DELL_RBU=m
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=y
 
@@ -307,6 +308,8 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -321,6 +324,8 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC94XX=m
+# CONFIG_AIC94XX_DEBUG is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -332,6 +337,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -408,10 +414,14 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -830,9 +840,11 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1070,6 +1082,12 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1175,6 +1193,7 @@ CONFIG_SERIAL_8250_RSA=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_SERIAL_SGI_L1_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_CRASH is not set
@@ -1242,7 +1261,8 @@ CONFIG_DRM_MGA=m
 CONFIG_RAW_DRIVER=y
 # CONFIG_HPET is not set
 CONFIG_MAX_RAW_DEVS=8192
-# CONFIG_MMTIMER is not set
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_MMTIMER=m
 
 #
 # I2C support
@@ -1698,6 +1718,25 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+# CONFIG_EDAC is not set
+
+#
 # File systems
 #
 CONFIG_EXT2_FS=y
@@ -1792,6 +1831,7 @@ CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
+CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
index 92aa946..97749c9 100644 (file)
@@ -98,6 +98,7 @@ CONFIG_PTRACK=y
 #
 CONFIG_EFI_VARS=y
 CONFIG_EFI_PCDP=y
+CONFIG_DELL_RBU=m
 CONFIG_BINFMT_ELF=y
 CONFIG_BINFMT_MISC=y
 
@@ -307,6 +308,8 @@ CONFIG_SCSI_LOGGING=y
 CONFIG_SCSI_SPI_ATTRS=m
 CONFIG_SCSI_FC_ATTRS=m
 CONFIG_SCSI_ISCSI_ATTRS=m
+CONFIG_SAS_CLASS=m
+# CONFIG_SAS_DEBUG is not set
 
 #
 # SCSI low-level drivers
@@ -321,6 +324,8 @@ CONFIG_AIC7XXX_RESET_DELAY_MS=15000
 # CONFIG_AIC7XXX_DEBUG_ENABLE is not set
 CONFIG_AIC7XXX_DEBUG_MASK=0
 # CONFIG_AIC7XXX_REG_PRETTY_PRINT is not set
+CONFIG_SCSI_AIC94XX=m
+# CONFIG_AIC94XX_DEBUG is not set
 CONFIG_SCSI_AIC7XXX_OLD=m
 CONFIG_SCSI_AIC79XX=m
 CONFIG_AIC79XX_CMDS_PER_DEVICE=4
@@ -332,6 +337,7 @@ CONFIG_AIC79XX_DEBUG_MASK=0
 CONFIG_MEGARAID_NEWGEN=y
 CONFIG_MEGARAID_MM=m
 CONFIG_MEGARAID_MAILBOX=m
+CONFIG_MEGARAID_SAS=m
 CONFIG_SCSI_SATA=y
 CONFIG_SCSI_SATA_AHCI=m
 CONFIG_SCSI_SATA_SVW=m
@@ -408,10 +414,14 @@ CONFIG_DM_MULTIPATH_EMC=m
 #
 # Fusion MPT device support
 #
-CONFIG_FUSION=m
+CONFIG_FUSION=y
+CONFIG_FUSION_SPI=m
+CONFIG_FUSION_FC=m
+CONFIG_FUSION_SAS=m
 CONFIG_FUSION_MAX_SGE=40
 CONFIG_FUSION_CTL=m
 CONFIG_FUSION_LAN=m
+CONFIG_FUSION_OLD_MODULE_COMPAT=m
 
 #
 # IEEE 1394 (FireWire) support
@@ -830,9 +840,11 @@ CONFIG_NS83820=m
 # CONFIG_YELLOWFIN is not set
 CONFIG_R8169=m
 CONFIG_R8169_NAPI=y
+CONFIG_SKY2=m
 CONFIG_SK98LIN=m
 CONFIG_VIA_VELOCITY=m
 CONFIG_TIGON3=m
+CONFIG_BNX2=m
 
 #
 # Ethernet (10000 Mbit)
@@ -1070,6 +1082,12 @@ CONFIG_ISDN_CAPI_CAPIDRV=m
 # Active AVM cards
 #
 CONFIG_CAPI_AVM=y
+CONFIG_ISDN_DRV_AVMB1_B1PCI=m
+CONFIG_ISDN_DRV_AVMB1_B1PCIV4=y
+CONFIG_ISDN_DRV_AVMB1_B1PCMCIA=m
+CONFIG_ISDN_DRV_AVMB1_AVM_CS=m
+CONFIG_ISDN_DRV_AVMB1_T1PCI=m
+CONFIG_ISDN_DRV_AVMB1_C4=m
 
 #
 # Active Eicon DIVA Server cards
@@ -1175,6 +1193,7 @@ CONFIG_SERIAL_8250_RSA=y
 CONFIG_SERIAL_CORE=y
 CONFIG_SERIAL_CORE_CONSOLE=y
 CONFIG_SERIAL_SGI_L1_CONSOLE=y
+# CONFIG_SERIAL_JSM is not set
 CONFIG_UNIX98_PTYS=y
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_CRASH is not set
@@ -1242,7 +1261,8 @@ CONFIG_DRM_MGA=m
 CONFIG_RAW_DRIVER=y
 # CONFIG_HPET is not set
 CONFIG_MAX_RAW_DEVS=8192
-# CONFIG_MMTIMER is not set
+CONFIG_HANGCHECK_TIMER=m
+CONFIG_MMTIMER=m
 
 #
 # I2C support
@@ -1698,6 +1718,25 @@ CONFIG_USB_SPEEDTOUCH=m
 # CONFIG_USB_GADGET is not set
 
 #
+# InfiniBand support
+#
+CONFIG_INFINIBAND=m
+CONFIG_INFINIBAND_USER_MAD=m
+CONFIG_INFINIBAND_USER_ACCESS=m
+CONFIG_INFINIBAND_MTHCA=m
+# CONFIG_INFINIBAND_MTHCA_DEBUG is not set
+CONFIG_INFINIBAND_IPOIB=m
+# CONFIG_INFINIBAND_IPOIB_DEBUG is not set
+CONFIG_INFINIBAND_SDP=m
+# CONFIG_INFINIBAND_SDP_DEBUG is not set
+CONFIG_INFINIBAND_SRP=m
+
+#
+# EDAC - error detection and reporting (RAS)
+#
+# CONFIG_EDAC is not set
+
+#
 # File systems
 #
 CONFIG_EXT2_FS=y
@@ -1792,6 +1831,7 @@ CONFIG_NFSD_TCP=y
 CONFIG_LOCKD=m
 CONFIG_LOCKD_V4=y
 CONFIG_EXPORTFS=m
+CONFIG_NFS_COMMON=y
 CONFIG_SUNRPC=m
 CONFIG_SUNRPC_GSS=m
 CONFIG_RPCSEC_GSS_KRB5=m
diff --git a/lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch b/lustre/kernel_patches/patches/ext3-external-journal-2.6.9.patch
new file mode 100644 (file)
index 0000000..7cc86f2
--- /dev/null
@@ -0,0 +1,150 @@
+Signed-off-by: Johann Lombardi <johann.lombardi@bull.net>
+
+Index: linux-2.6.9-full/fs/ext3/super.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/super.c      2006-05-20 01:14:14.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c   2006-05-20 01:17:10.000000000 +0400
+@@ -39,7 +39,8 @@
+ #include "xattr.h"
+ #include "acl.h"
+-static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
++static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
++                           unsigned long journal_devnum);
+ static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
+                              int);
+ static void ext3_commit_super (struct super_block * sb,
+@@ -591,7 +592,7 @@ enum {
+       Opt_nouid32, Opt_check, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
+       Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
+       Opt_reservation, Opt_noreservation, Opt_noload,
+-      Opt_commit, Opt_journal_update, Opt_journal_inum,
++      Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
+       Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
+       Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
+       Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0,
+@@ -630,6 +631,7 @@ static match_table_t tokens = {
+       {Opt_commit, "commit=%u"},
+       {Opt_journal_update, "journal=update"},
+       {Opt_journal_inum, "journal=%u"},
++      {Opt_journal_dev, "journal_dev=%u"},
+       {Opt_abort, "abort"},
+       {Opt_data_journal, "data=journal"},
+       {Opt_data_ordered, "data=ordered"},
+@@ -675,8 +677,9 @@ static unsigned long get_sb_block(void *
+       return sb_block;
+ }
+-static int parse_options (char * options, struct super_block *sb,
+-                        unsigned long * inum, unsigned long *n_blocks_count, int is_remount)
++static int parse_options (char *options, struct super_block *sb,
++                        unsigned long *inum, unsigned long *journal_devnum, 
++                        unsigned long *n_blocks_count, int is_remount)
+ {
+       struct ext3_sb_info *sbi = EXT3_SB(sb);
+       char * p;
+@@ -816,6 +819,16 @@ static int parse_options (char * options
+                               return 0;
+                       *inum = option;
+                       break;
++              case Opt_journal_dev:
++                      if (is_remount) {
++                              printk(KERN_ERR "EXT3-fs: cannot specify "
++                                     "journal on remount\n");
++                              return 0;
++                      }
++                      if (match_int(&args[0], &option))
++                              return 0;
++                      *journal_devnum = option;
++                      break;
+               case Opt_noload:
+                       set_opt (sbi->s_mount_opt, NOLOAD);
+                       break;
+@@ -1278,6 +1291,7 @@ static int ext3_fill_super (struct super
+       unsigned long logic_sb_block;
+       unsigned long offset = 0;
+       unsigned long journal_inum = 0;
++      unsigned long journal_devnum = 0;
+       unsigned long def_mount_opts;
+       struct inode *root;
+       int blocksize;
+@@ -1361,7 +1375,8 @@ static int ext3_fill_super (struct super
+       set_opt(sbi->s_mount_opt, RESERVATION);
+-      if (!parse_options ((char *) data, sb, &journal_inum, NULL, 0))
++      if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum, 
++                          NULL, 0))
+               goto failed_mount;
+       set_sb_time_gran(sb, 1000000000U);
+@@ -1567,7 +1582,7 @@ static int ext3_fill_super (struct super
+        */
+       if (!test_opt(sb, NOLOAD) &&
+           EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
+-              if (ext3_load_journal(sb, es))
++              if (ext3_load_journal(sb, es, journal_devnum))
+                       goto failed_mount2;
+       } else if (journal_inum) {
+               if (ext3_create_journal(sb, es, journal_inum))
+@@ -1831,15 +1846,24 @@ out_bdev:
+       return NULL;
+ }
+-static int ext3_load_journal(struct super_block * sb,
+-                           struct ext3_super_block * es)
++static int ext3_load_journal(struct super_block *sb,
++                           struct ext3_super_block *es,
++                           unsigned long journal_devnum)
+ {
+       journal_t *journal;
+       int journal_inum = le32_to_cpu(es->s_journal_inum);
+-      dev_t journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
++      dev_t journal_dev;
+       int err = 0;
+       int really_read_only;
++      if (journal_devnum &&
++          journal_devnum != le32_to_cpu(es->s_journal_dev)) {
++              printk(KERN_INFO "EXT3-fs: external journal device major/minor "
++                      "numbers have changed\n");
++              journal_dev = new_decode_dev(journal_devnum);
++      } else
++              journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
++
+       really_read_only = bdev_read_only(sb->s_bdev);
+       /*
+@@ -1898,6 +1922,16 @@ static int ext3_load_journal(struct supe
+       EXT3_SB(sb)->s_journal = journal;
+       ext3_clear_journal_err(sb, es);
++
++      if (journal_devnum &&
++          journal_devnum != le32_to_cpu(es->s_journal_dev)) {
++              es->s_journal_dev = cpu_to_le32(journal_devnum);
++              sb->s_dirt = 1;
++
++              /* Make sure we flush the recovery flag to disk. */
++              ext3_commit_super(sb, es, 1);
++      }
++
+       return 0;
+ }
+@@ -2105,13 +2139,13 @@ int ext3_remount (struct super_block * s
+ {
+       struct ext3_super_block * es;
+       struct ext3_sb_info *sbi = EXT3_SB(sb);
+-      unsigned long tmp;
++      unsigned long tmp1, tmp2;
+       unsigned long n_blocks_count = 0;
+       /*
+        * Allow the "check" option to be passed as a remount option.
+        */
+-      if (!parse_options(data, sb, &tmp, &n_blocks_count, 1))
++      if (!parse_options(data, sb, &tmp1, &tmp2, &n_blocks_count, 1))
+               return -EINVAL;
+       if (sbi->s_mount_opt & EXT3_MOUNT_ABORT)
index 2a64875..33dc268 100644 (file)
@@ -1,7 +1,7 @@
-Index: linux-2.6.5-7.201/include/linux/ext3_fs.h
+Index: linux-2.6.5-7.252-full/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.5-7.201.orig/include/linux/ext3_fs.h     2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/include/linux/ext3_fs.h  2005-12-17 03:13:38.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs.h        2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/include/linux/ext3_fs.h     2006-04-26 23:40:28.000000000 +0400
 @@ -57,6 +57,14 @@ struct statfs;
  #define ext3_debug(f, a...)   do {} while (0)
  #endif
@@ -54,10 +54,10 @@ Index: linux-2.6.5-7.201/include/linux/ext3_fs.h
  #endif        /* __KERNEL__ */
  
  #define EXT3_IOC_CREATE_INUM                  _IOW('f', 5, long)
-Index: linux-2.6.5-7.201/include/linux/ext3_fs_sb.h
+Index: linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-2.6.5-7.201.orig/include/linux/ext3_fs_sb.h  2005-12-17 02:53:25.000000000 +0300
-+++ linux-2.6.5-7.201/include/linux/ext3_fs_sb.h       2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/include/linux/ext3_fs_sb.h     2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/include/linux/ext3_fs_sb.h  2006-04-26 23:40:28.000000000 +0400
 @@ -23,9 +23,15 @@
  #define EXT_INCLUDE
  #include <linux/blockgroup_lock.h>
@@ -113,10 +113,10 @@ Index: linux-2.6.5-7.201/include/linux/ext3_fs_sb.h
  };
  
  #endif        /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.5-7.201/fs/ext3/super.c
+Index: linux-2.6.5-7.252-full/fs/ext3/super.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/super.c     2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/super.c  2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/super.c        2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/super.c     2006-04-26 23:40:28.000000000 +0400
 @@ -389,6 +389,7 @@ void ext3_put_super (struct super_block 
        struct ext3_super_block *es = sbi->s_es;
        int i;
@@ -125,7 +125,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        ext3_ext_release(sb);
        ext3_xattr_put_super(sb);
        journal_destroy(sbi->s_journal);
-@@ -543,7 +544,7 @@ enum {
+@@ -545,7 +546,7 @@ enum {
        Opt_ignore, Opt_barrier,
        Opt_err,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
@@ -134,7 +134,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
  };
  
  static match_table_t tokens = {
-@@ -590,6 +591,7 @@ static match_table_t tokens = {
+@@ -591,6 +592,7 @@ static match_table_t tokens = {
        {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
@@ -142,7 +142,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL}
  };
-@@ -811,6 +813,9 @@ static int parse_options (char * options
+@@ -813,6 +815,9 @@ static int parse_options (char * options
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
@@ -152,7 +152,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1464,6 +1469,7 @@ static int ext3_fill_super (struct super
+@@ -1466,6 +1471,7 @@ static int ext3_fill_super (struct super
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -160,7 +160,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
  
        return 0;
  
-@@ -2112,7 +2118,13 @@ static struct file_system_type ext3_fs_t
+@@ -2114,7 +2120,13 @@ static struct file_system_type ext3_fs_t
  
  static int __init init_ext3_fs(void)
  {
@@ -175,7 +175,7 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2141,6 +2153,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2143,6 +2155,7 @@ static void __exit exit_ext3_fs(void)
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -183,11 +183,11 @@ Index: linux-2.6.5-7.201/fs/ext3/super.c
  }
  
  int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.5-7.201/fs/ext3/extents.c
+Index: linux-2.6.5-7.252-full/fs/ext3/extents.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/extents.c   2005-12-17 02:53:29.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/extents.c        2005-12-17 03:10:23.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.5-7.252-full.orig/fs/ext3/extents.c      2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/extents.c   2006-04-26 23:40:28.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -196,7 +196,7 @@ Index: linux-2.6.5-7.201/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -205,7 +205,7 @@ Index: linux-2.6.5-7.201/fs/ext3/extents.c
        return err;
  }
  
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -219,7 +219,7 @@ Index: linux-2.6.5-7.201/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -228,11 +228,11 @@ Index: linux-2.6.5-7.201/fs/ext3/extents.c
        } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
                printk("strange request: removal %lu-%lu from %u:%u\n",
                       from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.5-7.201/fs/ext3/inode.c
+Index: linux-2.6.5-7.252-full/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/inode.c     2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/inode.c  2005-12-17 03:10:23.000000000 +0300
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+--- linux-2.6.5-7.252-full.orig/fs/ext3/inode.c        2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/inode.c     2006-04-26 23:40:28.000000000 +0400
+@@ -574,7 +574,7 @@ static int ext3_alloc_branch(handle_t *h
                ext3_journal_forget(handle, branch[i].bh);
        }
        for (i = 0; i < keys; i++)
@@ -241,7 +241,7 @@ Index: linux-2.6.5-7.201/fs/ext3/inode.c
        return err;
  }
  
-@@ -673,7 +673,7 @@ err_out:
+@@ -675,7 +675,7 @@ err_out:
        if (err == -EAGAIN)
                for (i = 0; i < num; i++)
                        ext3_free_blocks(handle, inode, 
@@ -250,7 +250,7 @@ Index: linux-2.6.5-7.201/fs/ext3/inode.c
        return err;
  }
  
-@@ -1835,7 +1835,7 @@ ext3_clear_blocks(handle_t *handle, stru
+@@ -1837,7 +1837,7 @@ ext3_clear_blocks(handle_t *handle, stru
                }
        }
  
@@ -259,7 +259,7 @@ Index: linux-2.6.5-7.201/fs/ext3/inode.c
  }
  
  /**
-@@ -2006,7 +2006,7 @@ static void ext3_free_branches(handle_t 
+@@ -2008,7 +2008,7 @@ static void ext3_free_branches(handle_t 
                                ext3_journal_test_restart(handle, inode);
                        }
  
@@ -268,10 +268,10 @@ Index: linux-2.6.5-7.201/fs/ext3/inode.c
  
                        if (parent_bh) {
                                /*
-Index: linux-2.6.5-7.201/fs/ext3/balloc.c
+Index: linux-2.6.5-7.252-full/fs/ext3/balloc.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/balloc.c    2005-10-11 00:12:45.000000000 +0400
-+++ linux-2.6.5-7.201/fs/ext3/balloc.c 2005-12-17 03:10:23.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/balloc.c       2006-02-14 15:26:58.000000000 +0300
++++ linux-2.6.5-7.252-full/fs/ext3/balloc.c    2006-04-26 23:40:28.000000000 +0400
 @@ -78,7 +78,7 @@ struct ext3_group_desc * ext3_get_group_
   *
   * Return buffer_head on success or NULL in case of failure.
@@ -299,10 +299,10 @@ Index: linux-2.6.5-7.201/fs/ext3/balloc.c
                        unsigned long goal, int *errp)
  {
        struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.5-7.201/fs/ext3/xattr.c
+Index: linux-2.6.5-7.252-full/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/xattr.c     2005-12-17 02:53:26.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/xattr.c  2005-12-17 03:10:41.000000000 +0300
+--- linux-2.6.5-7.252-full.orig/fs/ext3/xattr.c        2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/xattr.c     2006-04-26 23:40:28.000000000 +0400
 @@ -1371,7 +1371,7 @@ ext3_xattr_set_handle2(handle_t *handle,
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
@@ -330,11 +330,11 @@ Index: linux-2.6.5-7.201/fs/ext3/xattr.c
                get_bh(bh);
                ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl);
        } else {
-Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
+Index: linux-2.6.5-7.252-full/fs/ext3/mballoc.c
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/mballoc.c   2005-12-09 13:08:53.191437750 +0300
-+++ linux-2.6.5-7.201/fs/ext3/mballoc.c        2005-12-17 03:15:04.000000000 +0300
-@@ -0,0 +1,2430 @@
+--- linux-2.6.5-7.252-full.orig/fs/ext3/mballoc.c      2006-04-22 17:31:47.543334750 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/mballoc.c   2006-04-26 23:42:45.000000000 +0400
+@@ -0,0 +1,2616 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -423,6 +423,12 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +long ext3_mb_stats = 1;
 +
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
 +#ifdef EXT3_BB_MAX_BLOCKS
 +#undef EXT3_BB_MAX_BLOCKS
 +#endif
@@ -488,6 +494,8 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +struct ext3_mb_history {
 +      struct ext3_free_extent goal;   /* goal allocation */
 +      struct ext3_free_extent result; /* result allocation */
++      unsigned pid;
++      unsigned ino;
 +      __u16 found;    /* how many extents have been found */
 +      __u16 groups;   /* how many groups have been scanned */
 +      __u16 tail;     /* what tail broke some buddy */
@@ -510,9 +518,9 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +#define EXT3_MB_BUDDY(e3b)    ((e3b)->bd_buddy)
 +
 +#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
 +#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
 +                              struct ext3_allocation_context *ac);
 +#endif
 +
@@ -1134,7 +1142,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
 +                              int needed, struct ext3_free_extent *ex)
 +{
-+      int next, max, ord;
++      int next = block, max, ord;
 +      void *buddy;
 +
 +      J_ASSERT(ex != NULL);
@@ -1159,6 +1167,11 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      ex->fe_start = block << order;
 +      ex->fe_group = e3b->bd_group;
 +
++      /* calc difference from given start */
++      next = next - ex->fe_start;
++      ex->fe_len -= next;
++      ex->fe_start += next;
++
 +      while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
 +
 +              if (block + 1 >= max)
@@ -1381,7 +1394,16 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
 +                              ac->ac_g_ex.fe_len, &ex);
 +      
-+      if (max > 0) {
++      if (max >= ac->ac_g_ex.fe_len) {
++              J_ASSERT(ex.fe_len > 0);
++              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++              ac->ac_found++;
++              ac->ac_b_ex = ex;
++              ext3_mb_use_best_found(ac, e3b);
++      } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++              /* Sometimes, caller may want to merge even small
++               * number of blocks to an existing extent */
 +              J_ASSERT(ex.fe_len > 0);
 +              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
 +              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
@@ -1409,7 +1431,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      int i, k, max;
 +
 +      J_ASSERT(ac->ac_2order > 0);
-+      for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++      for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
 +              if (grp->bb_counters[i] == 0)
 +                      continue;
 +
@@ -1495,15 +1517,18 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +              case 0:
 +                      J_ASSERT(ac->ac_2order != 0);
 +                      bits = ac->ac_sb->s_blocksize_bits + 1;
-+                      for (i = ac->ac_2order; i < bits; i++)
++                      for (i = ac->ac_2order; i <= bits; i++)
 +                              if (grp->bb_counters[i] > 0)
 +                                      return 1;
++                      break;
 +              case 1:
 +                      if ((free / fragments) >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 2:
 +                      if (free >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 3:
 +                      return 1;
 +              default:
@@ -1606,21 +1631,18 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +
 +      /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
 +      i = ffs(*len);
-+      if (i >= 8) {
++      if (i >= ext3_mb_order2_reqs) {
 +              i--;
 +              if ((*len & (~(1 << i))) == 0)
 +                      ac.ac_2order = i;
 +      }
 +
-+      /* Sometimes, caller may want to merge even small
-+       * number of blocks to an existing extent */
-+      if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+              err = ext3_mb_find_by_goal(&ac, &e3b);
-+              if (err)
-+                      goto out_err;
-+              if (ac.ac_status == AC_STATUS_FOUND)
-+                      goto found;
-+      }
++      /* first, try the goal */
++      err = ext3_mb_find_by_goal(&ac, &e3b);
++      if (err)
++              goto out_err;
++      if (ac.ac_status == AC_STATUS_FOUND)
++              goto found;
 +
 +      /* Let's just scan groups to find more-less suitable blocks */
 +      cr = ac.ac_2order ? 0 : 1;
@@ -1839,7 +1861,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                      atomic_inc(&sbi->s_bal_breaks);
 +      }
 +
-+      ext3_mb_store_history(sb, &ac);
++      ext3_mb_store_history(sb, inode->i_ino, &ac);
 +
 +      return block;
 +}
@@ -1904,9 +1926,9 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      char buf[20], buf2[20];
 +
 +      if (v == SEQ_START_TOKEN) {
-+              seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+                       "goal", "result", "found", "grps", "cr", "merge",
-+                       "tail", "broken");
++              seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++                       "pid", "inode", "goal", "result", "found", "grps", "cr",
++                       "merge", "tail", "broken");
 +              return 0;
 +      }
 +
@@ -1914,9 +1936,9 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +              hs->goal.fe_start, hs->goal.fe_len);
 +      sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
 +              hs->result.fe_start, hs->result.fe_len);
-+      seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+                      buf2, hs->found, hs->groups, hs->cr, 
-+                      hs->merged ? "M" : "", hs->tail,
++      seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++                      hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++                      hs->cr, hs->merged ? "M" : "", hs->tail,
 +                      hs->buddy ? 1 << hs->buddy : 0);
 +      return 0;
 +}
@@ -1980,12 +2002,108 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      .release        = ext3_mb_seq_history_release,
 +};
 +
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++
++      group = *pos + 1;
++      return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      ++*pos;
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++      group = *pos + 1;
++      return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group = (int) v, i;
++      struct sg {
++              struct ext3_group_info info;
++              unsigned short counters[16];
++      } sg;
++
++      group--;
++      if (group == 0)
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++                       "group", "free", "frags", "first", "2^0", "2^1", "2^2", 
++                       "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++                       "2^11", "2^12", "2^13");
++
++      i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++              sizeof(struct ext3_group_info);
++      ext3_lock_group(sb, group);
++      memcpy(&sg, sbi->s_group_info[group], i);
++      ext3_unlock_group(sb, group);
++
++      if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++              return 0;
++
++      seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++                      sg.info.bb_fragments, sg.info.bb_first_free);
++      for (i = 0; i <= 13; i++)
++              seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++                              sg.info.bb_counters[i] : 0);
++      seq_printf(seq, " ]\n");
++
++      return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++      .start  = ext3_mb_seq_groups_start,
++      .next   = ext3_mb_seq_groups_next,
++      .stop   = ext3_mb_seq_groups_stop,
++      .show   = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++      struct super_block *sb = PDE(inode)->data;
++      int rc;
++
++      rc = seq_open(file, &ext3_mb_seq_groups_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = sb;
++      }
++      return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ext3_mb_seq_groups_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = seq_release,
++};
++
 +static void ext3_mb_history_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      char name[64];
 +
 +      snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++      remove_proc_entry("mb_groups", sbi->s_mb_proc);
 +      remove_proc_entry("mb_history", sbi->s_mb_proc);
 +      remove_proc_entry(name, proc_root_ext3);
 +
@@ -2008,6 +2126,11 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +                      p->proc_fops = &ext3_mb_seq_history_fops;
 +                      p->data = sb;
 +              }
++              p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++              if (p) {
++                      p->proc_fops = &ext3_mb_seq_groups_fops;
++                      p->data = sb;
++              }
 +      }
 +
 +      sbi->s_mb_history_max = 1000;
@@ -2020,7 +2143,8 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +}
 +
 +static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++                      struct ext3_allocation_context *ac)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      struct ext3_mb_history h;
@@ -2028,6 +2152,8 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      if (likely(sbi->s_mb_history == NULL))
 +              return;
 +
++      h.pid = current->pid;
++      h.ino = ino;
 +      h.goal = ac->ac_g_ex;
 +      h.result = ac->ac_b_ex;
 +      h.found = ac->ac_found;
@@ -2584,6 +2710,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +#define EXT3_MB_STATS_NAME        "mb_stats"
 +#define EXT3_MB_MAX_TO_SCAN_NAME  "mb_max_to_scan"
 +#define EXT3_MB_MIN_TO_SCAN_NAME  "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ      "mb_order2_req"
 +
 +static int ext3_mb_stats_read(char *page, char **start, off_t off,
 +              int count, int *eof, void *data)
@@ -2671,6 +2798,45 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      return len;
 +}
 +
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++              unsigned long count, void *data)
++{
++      char str[32];
++      long value;
++
++      if (count >= sizeof(str)) {
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++              return -EOVERFLOW;
++      }
++
++      if (copy_from_user(str, buffer, count))
++              return -EFAULT;
++
++      /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++      value = simple_strtol(str, NULL, 0);
++      if (value <= 0)
++              return -ERANGE;
++
++      ext3_mb_order2_reqs = value;    
++
++      return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++              int count, int *eof, void *data)
++{
++      int len;
++
++      *eof = 1;
++      if (off != 0)
++              return 0;
++
++      len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++      *start = page;
++      return len;
++}
++
 +static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
 +              unsigned long count, void *data)
 +{
@@ -2701,6 +2867,7 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      struct proc_dir_entry *proc_ext3_mb_stats;
 +      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
 +      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_order2_req;
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
@@ -2755,6 +2922,24 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
 +      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
 +
++      /* Initialize EXT3_ORDER2_REQ */
++      proc_ext3_mb_order2_req = create_proc_entry(
++                      EXT3_MB_ORDER2_REQ,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_order2_req == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_ORDER2_REQ);
++              remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_order2_req->data = NULL;
++      proc_ext3_mb_order2_req->read_proc  = ext3_mb_order2_req_read;
++      proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
 +      return 0;
 +}
 +
@@ -2763,13 +2948,14 @@ Index: linux-2.6.5-7.201/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-Index: linux-2.6.5-7.201/fs/ext3/Makefile
+Index: linux-2.6.5-7.252-full/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.5-7.201.orig/fs/ext3/Makefile    2005-12-17 02:53:30.000000000 +0300
-+++ linux-2.6.5-7.201/fs/ext3/Makefile 2005-12-17 03:10:23.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.5-7.252-full.orig/fs/ext3/Makefile       2006-04-25 17:42:19.000000000 +0400
++++ linux-2.6.5-7.252-full/fs/ext3/Makefile    2006-04-26 23:40:28.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o \
index 70f4f8a..0297609 100644 (file)
@@ -1,7 +1,7 @@
-Index: linux-2.6.12.6/include/linux/ext3_fs.h
+Index: linux-2.6.12.6-bull/include/linux/ext3_fs.h
 ===================================================================
---- linux-2.6.12.6.orig/include/linux/ext3_fs.h        2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/include/linux/ext3_fs.h     2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/include/linux/ext3_fs.h   2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/include/linux/ext3_fs.h        2006-04-29 20:39:10.000000000 +0400
 @@ -57,6 +57,14 @@ struct statfs;
  #define ext3_debug(f, a...)   do {} while (0)
  #endif
@@ -52,10 +52,10 @@ Index: linux-2.6.12.6/include/linux/ext3_fs.h
  #endif        /* __KERNEL__ */
  
  /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
-Index: linux-2.6.12.6/include/linux/ext3_fs_sb.h
+Index: linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-2.6.12.6.orig/include/linux/ext3_fs_sb.h     2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/include/linux/ext3_fs_sb.h  2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/include/linux/ext3_fs_sb.h        2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/include/linux/ext3_fs_sb.h     2006-04-29 20:39:10.000000000 +0400
 @@ -21,8 +21,14 @@
  #include <linux/wait.h>
  #include <linux/blockgroup_lock.h>
@@ -110,10 +110,10 @@ Index: linux-2.6.12.6/include/linux/ext3_fs_sb.h
  };
  
  #endif        /* _LINUX_EXT3_FS_SB */
-Index: linux-2.6.12.6/fs/ext3/super.c
+Index: linux-2.6.12.6-bull/fs/ext3/super.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/super.c        2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/super.c     2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/super.c   2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/super.c        2006-04-29 20:39:10.000000000 +0400
 @@ -387,6 +387,7 @@ static void ext3_put_super (struct super
        struct ext3_super_block *es = sbi->s_es;
        int i;
@@ -131,7 +131,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
  };
  
  static match_table_t tokens = {
-@@ -649,6 +651,7 @@ static match_table_t tokens = {
+@@ -650,6 +651,7 @@ static match_table_t tokens = {
        {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
@@ -139,7 +139,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
-@@ -964,6 +967,9 @@ clear_qf_name:
+@@ -965,6 +967,9 @@ clear_qf_name:
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
@@ -149,7 +149,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1669,6 +1675,7 @@ static int ext3_fill_super (struct super
+@@ -1670,6 +1675,7 @@ static int ext3_fill_super (struct super
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -157,7 +157,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
        lock_kernel();
        return 0;
  
-@@ -2548,7 +2555,13 @@ static struct file_system_type ext3_fs_t
+@@ -2549,7 +2555,13 @@ static struct file_system_type ext3_fs_t
  
  static int __init init_ext3_fs(void)
  {
@@ -172,7 +172,7 @@ Index: linux-2.6.12.6/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2570,6 +2583,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2571,6 +2583,7 @@ static void __exit exit_ext3_fs(void)
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -180,11 +180,11 @@ Index: linux-2.6.12.6/fs/ext3/super.c
  }
  
  int ext3_prep_san_write(struct inode *inode, long *blocks,
-Index: linux-2.6.12.6/fs/ext3/extents.c
+Index: linux-2.6.12.6-bull/fs/ext3/extents.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/extents.c      2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/extents.c   2005-12-17 02:21:21.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.12.6-bull.orig/fs/ext3/extents.c 2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/extents.c      2006-04-29 20:39:10.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -193,7 +193,7 @@ Index: linux-2.6.12.6/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -202,7 +202,7 @@ Index: linux-2.6.12.6/fs/ext3/extents.c
        return err;
  }
  
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -216,7 +216,7 @@ Index: linux-2.6.12.6/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -225,10 +225,10 @@ Index: linux-2.6.12.6/fs/ext3/extents.c
        } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
                printk("strange request: removal %lu-%lu from %u:%u\n",
                       from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.12.6/fs/ext3/inode.c
+Index: linux-2.6.12.6-bull/fs/ext3/inode.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/inode.c        2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/inode.c     2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/inode.c   2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/inode.c        2006-04-29 20:39:10.000000000 +0400
 @@ -564,7 +564,7 @@ static int ext3_alloc_branch(handle_t *h
                ext3_journal_forget(handle, branch[i].bh);
        }
@@ -256,10 +256,10 @@ Index: linux-2.6.12.6/fs/ext3/inode.c
  
                        if (parent_bh) {
                                /*
-Index: linux-2.6.12.6/fs/ext3/balloc.c
+Index: linux-2.6.12.6-bull/fs/ext3/balloc.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/balloc.c       2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/fs/ext3/balloc.c    2005-12-17 02:21:21.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/balloc.c  2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/balloc.c       2006-04-29 20:39:10.000000000 +0400
 @@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
   *
   * Return buffer_head on success or NULL in case of failure.
@@ -303,10 +303,10 @@ Index: linux-2.6.12.6/fs/ext3/balloc.c
                        unsigned long goal, int *errp)
  {
        struct buffer_head *bitmap_bh = NULL;
-Index: linux-2.6.12.6/fs/ext3/xattr.c
+Index: linux-2.6.12.6-bull/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/xattr.c        2005-08-29 20:55:27.000000000 +0400
-+++ linux-2.6.12.6/fs/ext3/xattr.c     2005-12-17 02:21:33.000000000 +0300
+--- linux-2.6.12.6-bull.orig/fs/ext3/xattr.c   2005-08-29 20:55:27.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/xattr.c        2006-04-29 20:39:10.000000000 +0400
 @@ -484,7 +484,7 @@ ext3_xattr_release_block(handle_t *handl
                ea_bdebug(bh, "refcount now=0; freeing");
                if (ce)
@@ -325,11 +325,11 @@ Index: linux-2.6.12.6/fs/ext3/xattr.c
                                error = -EIO;
                                goto cleanup;
                        }
-Index: linux-2.6.12.6/fs/ext3/mballoc.c
+Index: linux-2.6.12.6-bull/fs/ext3/mballoc.c
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/mballoc.c      2005-12-09 13:08:53.191437750 +0300
-+++ linux-2.6.12.6/fs/ext3/mballoc.c   2005-12-17 02:21:21.000000000 +0300
-@@ -0,0 +1,2429 @@
+--- linux-2.6.12.6-bull.orig/fs/ext3/mballoc.c 2006-04-22 17:31:47.543334750 +0400
++++ linux-2.6.12.6-bull/fs/ext3/mballoc.c      2006-04-30 01:24:11.000000000 +0400
+@@ -0,0 +1,2615 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -418,6 +418,12 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +long ext3_mb_stats = 1;
 +
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
 +#ifdef EXT3_BB_MAX_BLOCKS
 +#undef EXT3_BB_MAX_BLOCKS
 +#endif
@@ -483,6 +489,8 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +struct ext3_mb_history {
 +      struct ext3_free_extent goal;   /* goal allocation */
 +      struct ext3_free_extent result; /* result allocation */
++      unsigned pid;
++      unsigned ino;
 +      __u16 found;    /* how many extents have been found */
 +      __u16 groups;   /* how many groups have been scanned */
 +      __u16 tail;     /* what tail broke some buddy */
@@ -505,9 +513,9 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +#define EXT3_MB_BUDDY(e3b)    ((e3b)->bd_buddy)
 +
 +#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
 +#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
 +                              struct ext3_allocation_context *ac);
 +#endif
 +
@@ -1129,7 +1137,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
 +                              int needed, struct ext3_free_extent *ex)
 +{
-+      int next, max, ord;
++      int next = block, max, ord;
 +      void *buddy;
 +
 +      J_ASSERT(ex != NULL);
@@ -1154,6 +1162,11 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      ex->fe_start = block << order;
 +      ex->fe_group = e3b->bd_group;
 +
++      /* calc difference from given start */
++      next = next - ex->fe_start;
++      ex->fe_len -= next;
++      ex->fe_start += next;
++
 +      while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
 +
 +              if (block + 1 >= max)
@@ -1376,7 +1389,16 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
 +                              ac->ac_g_ex.fe_len, &ex);
 +      
-+      if (max > 0) {
++      if (max >= ac->ac_g_ex.fe_len) {
++              J_ASSERT(ex.fe_len > 0);
++              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++              ac->ac_found++;
++              ac->ac_b_ex = ex;
++              ext3_mb_use_best_found(ac, e3b);
++      } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++              /* Sometimes, caller may want to merge even small
++               * number of blocks to an existing extent */
 +              J_ASSERT(ex.fe_len > 0);
 +              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
 +              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
@@ -1404,7 +1426,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      int i, k, max;
 +
 +      J_ASSERT(ac->ac_2order > 0);
-+      for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++      for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
 +              if (grp->bb_counters[i] == 0)
 +                      continue;
 +
@@ -1490,15 +1512,18 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +              case 0:
 +                      J_ASSERT(ac->ac_2order != 0);
 +                      bits = ac->ac_sb->s_blocksize_bits + 1;
-+                      for (i = ac->ac_2order; i < bits; i++)
++                      for (i = ac->ac_2order; i <= bits; i++)
 +                              if (grp->bb_counters[i] > 0)
 +                                      return 1;
++                      break;
 +              case 1:
 +                      if ((free / fragments) >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 2:
 +                      if (free >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 3:
 +                      return 1;
 +              default:
@@ -1601,21 +1626,18 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +
 +      /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
 +      i = ffs(*len);
-+      if (i >= 8) {
++      if (i >= ext3_mb_order2_reqs) {
 +              i--;
 +              if ((*len & (~(1 << i))) == 0)
 +                      ac.ac_2order = i;
 +      }
 +
-+      /* Sometimes, caller may want to merge even small
-+       * number of blocks to an existing extent */
-+      if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+              err = ext3_mb_find_by_goal(&ac, &e3b);
-+              if (err)
-+                      goto out_err;
-+              if (ac.ac_status == AC_STATUS_FOUND)
-+                      goto found;
-+      }
++      /* first, try the goal */
++      err = ext3_mb_find_by_goal(&ac, &e3b);
++      if (err)
++              goto out_err;
++      if (ac.ac_status == AC_STATUS_FOUND)
++              goto found;
 +
 +      /* Let's just scan groups to find more-less suitable blocks */
 +      cr = ac.ac_2order ? 0 : 1;
@@ -1834,7 +1856,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                      atomic_inc(&sbi->s_bal_breaks);
 +      }
 +
-+      ext3_mb_store_history(sb, &ac);
++      ext3_mb_store_history(sb, inode->i_ino, &ac);
 +
 +      return block;
 +}
@@ -1899,9 +1921,9 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      char buf[20], buf2[20];
 +
 +      if (v == SEQ_START_TOKEN) {
-+              seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+                       "goal", "result", "found", "grps", "cr", "merge",
-+                       "tail", "broken");
++              seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++                       "pid", "inode", "goal", "result", "found", "grps", "cr",
++                       "merge", "tail", "broken");
 +              return 0;
 +      }
 +
@@ -1909,9 +1931,9 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +              hs->goal.fe_start, hs->goal.fe_len);
 +      sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
 +              hs->result.fe_start, hs->result.fe_len);
-+      seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+                      buf2, hs->found, hs->groups, hs->cr, 
-+                      hs->merged ? "M" : "", hs->tail,
++      seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++                      hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++                      hs->cr, hs->merged ? "M" : "", hs->tail,
 +                      hs->buddy ? 1 << hs->buddy : 0);
 +      return 0;
 +}
@@ -1975,12 +1997,108 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      .release        = ext3_mb_seq_history_release,
 +};
 +
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++
++      group = *pos + 1;
++      return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      ++*pos;
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++      group = *pos + 1;
++      return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group = (int) v, i;
++      struct sg {
++              struct ext3_group_info info;
++              unsigned short counters[16];
++      } sg;
++
++      group--;
++      if (group == 0)
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++                       "group", "free", "frags", "first", "2^0", "2^1", "2^2", 
++                       "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++                       "2^11", "2^12", "2^13");
++
++      i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++              sizeof(struct ext3_group_info);
++      ext3_lock_group(sb, group);
++      memcpy(&sg, sbi->s_group_info[group], i);
++      ext3_unlock_group(sb, group);
++
++      if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++              return 0;
++
++      seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++                      sg.info.bb_fragments, sg.info.bb_first_free);
++      for (i = 0; i <= 13; i++)
++              seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++                              sg.info.bb_counters[i] : 0);
++      seq_printf(seq, " ]\n");
++
++      return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++      .start  = ext3_mb_seq_groups_start,
++      .next   = ext3_mb_seq_groups_next,
++      .stop   = ext3_mb_seq_groups_stop,
++      .show   = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++      struct super_block *sb = PDE(inode)->data;
++      int rc;
++
++      rc = seq_open(file, &ext3_mb_seq_groups_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = sb;
++      }
++      return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ext3_mb_seq_groups_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = seq_release,
++};
++
 +static void ext3_mb_history_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      char name[64];
 +
 +      snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++      remove_proc_entry("mb_groups", sbi->s_mb_proc);
 +      remove_proc_entry("mb_history", sbi->s_mb_proc);
 +      remove_proc_entry(name, proc_root_ext3);
 +
@@ -2003,6 +2121,11 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +                      p->proc_fops = &ext3_mb_seq_history_fops;
 +                      p->data = sb;
 +              }
++              p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++              if (p) {
++                      p->proc_fops = &ext3_mb_seq_groups_fops;
++                      p->data = sb;
++              }
 +      }
 +
 +      sbi->s_mb_history_max = 1000;
@@ -2015,7 +2138,8 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +}
 +
 +static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++                      struct ext3_allocation_context *ac)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      struct ext3_mb_history h;
@@ -2023,6 +2147,8 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      if (likely(sbi->s_mb_history == NULL))
 +              return;
 +
++      h.pid = current->pid;
++      h.ino = ino;
 +      h.goal = ac->ac_g_ex;
 +      h.result = ac->ac_b_ex;
 +      h.found = ac->ac_found;
@@ -2578,6 +2704,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +#define EXT3_MB_STATS_NAME        "mb_stats"
 +#define EXT3_MB_MAX_TO_SCAN_NAME  "mb_max_to_scan"
 +#define EXT3_MB_MIN_TO_SCAN_NAME  "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ      "mb_order2_req"
 +
 +static int ext3_mb_stats_read(char *page, char **start, off_t off,
 +              int count, int *eof, void *data)
@@ -2665,6 +2792,45 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      return len;
 +}
 +
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++              unsigned long count, void *data)
++{
++      char str[32];
++      long value;
++
++      if (count >= sizeof(str)) {
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++              return -EOVERFLOW;
++      }
++
++      if (copy_from_user(str, buffer, count))
++              return -EFAULT;
++
++      /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++      value = simple_strtol(str, NULL, 0);
++      if (value <= 0)
++              return -ERANGE;
++
++      ext3_mb_order2_reqs = value;    
++
++      return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++              int count, int *eof, void *data)
++{
++      int len;
++
++      *eof = 1;
++      if (off != 0)
++              return 0;
++
++      len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++      *start = page;
++      return len;
++}
++
 +static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
 +              unsigned long count, void *data)
 +{
@@ -2695,6 +2861,7 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      struct proc_dir_entry *proc_ext3_mb_stats;
 +      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
 +      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_order2_req;
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
@@ -2749,6 +2916,24 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
 +      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
 +
++      /* Initialize EXT3_ORDER2_REQ */
++      proc_ext3_mb_order2_req = create_proc_entry(
++                      EXT3_MB_ORDER2_REQ,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_order2_req == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_ORDER2_REQ);
++              remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_order2_req->data = NULL;
++      proc_ext3_mb_order2_req->read_proc  = ext3_mb_order2_req_read;
++      proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
 +      return 0;
 +}
 +
@@ -2757,13 +2942,14 @@ Index: linux-2.6.12.6/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-Index: linux-2.6.12.6/fs/ext3/Makefile
+Index: linux-2.6.12.6-bull/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.12.6.orig/fs/ext3/Makefile       2005-12-17 02:17:16.000000000 +0300
-+++ linux-2.6.12.6/fs/ext3/Makefile    2005-12-17 02:21:21.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.12.6-bull.orig/fs/ext3/Makefile  2006-04-29 20:39:09.000000000 +0400
++++ linux-2.6.12.6-bull/fs/ext3/Makefile       2006-04-29 20:39:10.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
  
  ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
           ioctl.o namei.o super.o symlink.o hash.o resize.o \
index 01e7387..ced267d 100644 (file)
@@ -1,61 +1,7 @@
-Index: linux-2.6.9-full/include/linux/ext3_fs.h
-===================================================================
---- linux-2.6.9-full.orig/include/linux/ext3_fs.h      2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/include/linux/ext3_fs.h   2005-12-16 23:16:42.000000000 +0300
-@@ -57,6 +57,14 @@ struct statfs;
- #define ext3_debug(f, a...)   do {} while (0)
- #endif
-+#define EXT3_MULTIBLOCK_ALLOCATOR     1
-+
-+#define EXT3_MB_HINT_MERGE            1
-+#define EXT3_MB_HINT_RESERVED         2
-+#define EXT3_MB_HINT_METADATA         4
-+#define EXT3_MB_HINT_FIRST            8
-+#define EXT3_MB_HINT_BEST             16
-+
- /*
-  * Special inodes numbers
-  */
-@@ -365,6 +373,7 @@ struct ext3_inode {
- #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
- #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
- #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
-+#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
- /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
- #ifndef clear_opt
-@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
- extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
- extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
- extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
--                            unsigned long);
-+                            unsigned long, int);
- extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
-                                unsigned long, unsigned long, int *);
- extern unsigned long ext3_count_free_blocks (struct super_block *);
-@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
- extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
-                         unsigned int cmd, unsigned long arg);
-+/* mballoc.c */
-+extern long ext3_mb_stats;
-+extern long ext3_mb_max_to_scan;
-+extern int ext3_mb_init(struct super_block *, int);
-+extern int ext3_mb_release(struct super_block *);
-+extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
-+extern int ext3_mb_reserve_blocks(struct super_block *, int);
-+extern void ext3_mb_release_blocks(struct super_block *, int);
-+int __init init_ext3_proc(void);
-+void exit_ext3_proc(void);
-+
- #endif        /* __KERNEL__ */
- /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
 Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
 ===================================================================
---- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h   2005-12-16 23:16:39.000000000 +0300
-+++ linux-2.6.9-full/include/linux/ext3_fs_sb.h        2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/include/linux/ext3_fs_sb.h   2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs_sb.h        2006-05-22 21:45:08.000000000 +0400
 @@ -23,9 +23,15 @@
  #define EXT_INCLUDE
  #include <linux/blockgroup_lock.h>
@@ -72,7 +18,7 @@ Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
  
  /*
   * third extended-fs super-block data in memory
-@@ -81,6 +87,38 @@ struct ext3_sb_info {
+@@ -81,6 +87,39 @@ struct ext3_sb_info {
        char *s_qf_names[MAXQUOTAS];            /* Names of quota files with journalled quota */
        int s_jquota_fmt;                       /* Format of quota to use */
  #endif
@@ -89,6 +35,7 @@ Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
 +      tid_t s_last_transaction;
 +      int s_mb_factor;
 +      unsigned short *s_mb_offsets, *s_mb_maxs;
++      unsigned long s_stripe;
 +
 +      /* history to debug policy */
 +      struct ext3_mb_history *s_mb_history;
@@ -111,10 +58,64 @@ Index: linux-2.6.9-full/include/linux/ext3_fs_sb.h
  };
  
  #endif        /* _LINUX_EXT3_FS_SB */
+Index: linux-2.6.9-full/include/linux/ext3_fs.h
+===================================================================
+--- linux-2.6.9-full.orig/include/linux/ext3_fs.h      2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/include/linux/ext3_fs.h   2006-05-22 21:44:37.000000000 +0400
+@@ -57,6 +57,14 @@ struct statfs;
+ #define ext3_debug(f, a...)   do {} while (0)
+ #endif
++#define EXT3_MULTIBLOCK_ALLOCATOR     1
++
++#define EXT3_MB_HINT_MERGE            1
++#define EXT3_MB_HINT_RESERVED         2
++#define EXT3_MB_HINT_METADATA         4
++#define EXT3_MB_HINT_FIRST            8
++#define EXT3_MB_HINT_BEST             16
++
+ /*
+  * Special inodes numbers
+  */
+@@ -365,6 +373,7 @@ struct ext3_inode {
+ #define EXT3_MOUNT_IOPEN_NOPRIV               0x100000/* Make iopen world-readable */
+ #define EXT3_MOUNT_EXTENTS            0x200000/* Extents support */
+ #define EXT3_MOUNT_EXTDEBUG           0x400000/* Extents debug */
++#define EXT3_MOUNT_MBALLOC            0x800000/* Buddy allocation support */
+ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
+ #ifndef clear_opt
+@@ -726,7 +735,7 @@ extern int ext3_bg_has_super(struct supe
+ extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group);
+ extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *);
+ extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long,
+-                            unsigned long);
++                            unsigned long, int);
+ extern void ext3_free_blocks_sb (handle_t *, struct super_block *,
+                                unsigned long, unsigned long, int *);
+ extern unsigned long ext3_count_free_blocks (struct super_block *);
+@@ -857,6 +866,17 @@ extern void ext3_extents_initialize_bloc
+ extern int ext3_ext_ioctl(struct inode *inode, struct file *filp,
+                         unsigned int cmd, unsigned long arg);
++/* mballoc.c */
++extern long ext3_mb_stats;
++extern long ext3_mb_max_to_scan;
++extern int ext3_mb_init(struct super_block *, int);
++extern int ext3_mb_release(struct super_block *);
++extern int ext3_mb_new_blocks(handle_t *, struct inode *, unsigned long, int *, int, int *);
++extern int ext3_mb_reserve_blocks(struct super_block *, int);
++extern void ext3_mb_release_blocks(struct super_block *, int);
++int __init init_ext3_proc(void);
++void exit_ext3_proc(void);
++
+ #endif        /* __KERNEL__ */
+ /* EXT3_IOC_CREATE_INUM at bottom of file (visible to kernel and user). */
 Index: linux-2.6.9-full/fs/ext3/super.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/super.c      2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/super.c   2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/fs/ext3/super.c      2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/super.c   2006-05-22 21:52:54.000000000 +0400
 @@ -394,6 +394,7 @@ void ext3_put_super (struct super_block 
        struct ext3_super_block *es = sbi->s_es;
        int i;
@@ -128,29 +129,37 @@ Index: linux-2.6.9-full/fs/ext3/super.c
        Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
        Opt_iopen, Opt_noiopen, Opt_iopen_nopriv,
 -      Opt_extents, Opt_extdebug,
-+      Opt_extents, Opt_extdebug, Opt_mballoc,
++      Opt_extents, Opt_extdebug, Opt_mballoc, Opt_stripe
  };
  
  static match_table_t tokens = {
-@@ -647,6 +649,7 @@ static match_table_t tokens = {
+@@ -648,6 +649,8 @@ static match_table_t tokens = {
        {Opt_iopen_nopriv, "iopen_nopriv"},
        {Opt_extents, "extents"},
        {Opt_extdebug, "extdebug"},
 +      {Opt_mballoc, "mballoc"},
++      {Opt_stripe, "stripe=%u"},
        {Opt_barrier, "barrier=%u"},
        {Opt_err, NULL},
        {Opt_resize, "resize"},
-@@ -957,6 +960,9 @@ clear_qf_name:
+@@ -958,6 +961,16 @@ clear_qf_name:
                case Opt_extdebug:
                        set_opt (sbi->s_mount_opt, EXTDEBUG);
                        break;
 +              case Opt_mballoc:
 +                      set_opt (sbi->s_mount_opt, MBALLOC);
 +                      break;
++              case Opt_stripe:
++                      if (match_int(&args[0], &option))
++                              return 0;
++                      if (option < 0)
++                              return 0;
++                      sbi->s_stripe = option;
++                      break;
                default:
                        printk (KERN_ERR
                                "EXT3-fs: Unrecognized mount option \"%s\" "
-@@ -1646,6 +1652,7 @@ static int ext3_fill_super (struct super
+@@ -1647,6 +1660,7 @@ static int ext3_fill_super (struct super
                ext3_count_dirs(sb));
  
        ext3_ext_init(sb);
@@ -158,7 +167,7 @@ Index: linux-2.6.9-full/fs/ext3/super.c
  
        return 0;
  
-@@ -2428,7 +2435,13 @@ static struct file_system_type ext3_fs_t
+@@ -2429,7 +2443,13 @@ static struct file_system_type ext3_fs_t
  
  static int __init init_ext3_fs(void)
  {
@@ -173,7 +182,7 @@ Index: linux-2.6.9-full/fs/ext3/super.c
        if (err)
                return err;
        err = init_inodecache();
-@@ -2450,6 +2463,7 @@ static void __exit exit_ext3_fs(void)
+@@ -2451,6 +2471,7 @@ static void __exit exit_ext3_fs(void)
        unregister_filesystem(&ext3_fs_type);
        destroy_inodecache();
        exit_ext3_xattr();
@@ -183,9 +192,9 @@ Index: linux-2.6.9-full/fs/ext3/super.c
  int ext3_prep_san_write(struct inode *inode, long *blocks,
 Index: linux-2.6.9-full/fs/ext3/extents.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/extents.c    2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/extents.c 2005-12-16 23:16:42.000000000 +0300
-@@ -771,7 +771,7 @@ cleanup:
+--- linux-2.6.9-full.orig/fs/ext3/extents.c    2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/extents.c 2006-05-22 21:44:37.000000000 +0400
+@@ -777,7 +777,7 @@ cleanup:
                for (i = 0; i < depth; i++) {
                        if (!ablocks[i])
                                continue;
@@ -194,7 +203,7 @@ Index: linux-2.6.9-full/fs/ext3/extents.c
                }
        }
        kfree(ablocks);
-@@ -1428,7 +1428,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
+@@ -1434,7 +1434,7 @@ int ext3_ext_rm_idx(handle_t *handle, st
                  path->p_idx->ei_leaf);
        bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf);
        ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf);
@@ -203,7 +212,7 @@ Index: linux-2.6.9-full/fs/ext3/extents.c
        return err;
  }
  
-@@ -1913,10 +1913,12 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1919,10 +1919,12 @@ ext3_remove_blocks(struct ext3_extents_t
        int needed = ext3_remove_blocks_credits(tree, ex, from, to);
        handle_t *handle = ext3_journal_start(tree->inode, needed);
        struct buffer_head *bh;
@@ -217,7 +226,7 @@ Index: linux-2.6.9-full/fs/ext3/extents.c
        if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) {
                /* tail removal */
                unsigned long num, start;
-@@ -1928,7 +1930,7 @@ ext3_remove_blocks(struct ext3_extents_t
+@@ -1934,7 +1936,7 @@ ext3_remove_blocks(struct ext3_extents_t
                        bh = sb_find_get_block(tree->inode->i_sb, start + i);
                        ext3_forget(handle, 0, tree->inode, bh, start + i);
                }
@@ -226,97 +235,23 @@ Index: linux-2.6.9-full/fs/ext3/extents.c
        } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) {
                printk("strange request: removal %lu-%lu from %u:%u\n",
                       from, to, ex->ee_block, ex->ee_len);
-Index: linux-2.6.9-full/fs/ext3/inode.c
+Index: linux-2.6.9-full/fs/ext3/Makefile
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/inode.c      2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/inode.c   2005-12-16 23:16:42.000000000 +0300
-@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
-               ext3_journal_forget(handle, branch[i].bh);
-       }
-       for (i = 0; i < keys; i++)
--              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
-+              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
-       return err;
- }
-@@ -673,7 +673,7 @@ err_out:
-       if (err == -EAGAIN)
-               for (i = 0; i < num; i++)
-                       ext3_free_blocks(handle, inode, 
--                                       le32_to_cpu(where[i].key), 1);
-+                                       le32_to_cpu(where[i].key), 1, 1);
-       return err;
- }
-@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
-               }
-       }
+--- linux-2.6.9-full.orig/fs/ext3/Makefile     2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/Makefile  2006-05-22 21:44:37.000000000 +0400
+@@ -6,7 +6,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o
  
--      ext3_free_blocks(handle, inode, block_to_free, count);
-+      ext3_free_blocks(handle, inode, block_to_free, count, 1);
- }
- /**
-@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t 
-                               ext3_journal_test_restart(handle, inode);
-                       }
--                      ext3_free_blocks(handle, inode, nr, 1);
-+                      ext3_free_blocks(handle, inode, nr, 1, 1);
-                       if (parent_bh) {
-                               /*
-Index: linux-2.6.9-full/fs/ext3/balloc.c
-===================================================================
---- linux-2.6.9-full.orig/fs/ext3/balloc.c     2005-10-27 21:44:24.000000000 +0400
-+++ linux-2.6.9-full/fs/ext3/balloc.c  2005-12-16 23:16:42.000000000 +0300
-@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
-  *
-  * Return buffer_head on success or NULL in case of failure.
-  */
--static struct buffer_head *
-+struct buffer_head *
- read_block_bitmap(struct super_block *sb, unsigned int block_group)
- {
-       struct ext3_group_desc * desc;
-@@ -450,24 +450,6 @@ error_return:
-       return;
- }
+ ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
+          ioctl.o namei.o super.o symlink.o hash.o resize.o \
+-         extents.o
++         extents.o mballoc.o
  
--/* Free given blocks, update quota and i_blocks field */
--void ext3_free_blocks(handle_t *handle, struct inode *inode,
--                      unsigned long block, unsigned long count)
--{
--      struct super_block * sb;
--      int dquot_freed_blocks;
--
--      sb = inode->i_sb;
--      if (!sb) {
--              printk ("ext3_free_blocks: nonexistent device");
--              return;
--      }
--      ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
--      if (dquot_freed_blocks)
--              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
--      return;
--}
--
- /*
-  * For ext3 allocations, we must not reuse any blocks which are
-  * allocated in the bitmap buffer's "last committed data" copy.  This
-@@ -1140,7 +1122,7 @@ int ext3_should_retry_alloc(struct super
-  * bitmap, and then for any free bit if that fails.
-  * This function also updates quota and i_blocks field.
-  */
--int ext3_new_block(handle_t *handle, struct inode *inode,
-+int ext3_new_block_old(handle_t *handle, struct inode *inode,
-                       unsigned long goal, int *errp)
- {
-       struct buffer_head *bitmap_bh = NULL;
+ ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
+ ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
 Index: linux-2.6.9-full/fs/ext3/xattr.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/xattr.c      2005-12-16 23:16:40.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/xattr.c   2005-12-16 23:16:42.000000000 +0300
+--- linux-2.6.9-full.orig/fs/ext3/xattr.c      2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/xattr.c   2006-05-22 21:44:37.000000000 +0400
 @@ -1281,7 +1281,7 @@ ext3_xattr_set_handle2(handle_t *handle,
                        new_bh = sb_getblk(sb, block);
                        if (!new_bh) {
@@ -346,9 +281,9 @@ Index: linux-2.6.9-full/fs/ext3/xattr.c
        } else {
 Index: linux-2.6.9-full/fs/ext3/mballoc.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/mballoc.c    2005-12-16 17:46:19.148560250 +0300
-+++ linux-2.6.9-full/fs/ext3/mballoc.c 2005-12-17 00:10:15.000000000 +0300
-@@ -0,0 +1,2429 @@
+--- linux-2.6.9-full.orig/fs/ext3/mballoc.c    2006-05-12 23:14:51.200000000 +0400
++++ linux-2.6.9-full/fs/ext3/mballoc.c 2006-05-22 21:51:30.000000000 +0400
+@@ -0,0 +1,2671 @@
 +/*
 + * Copyright (c) 2003-2005, Cluster File Systems, Inc, info@clusterfs.com
 + * Written by Alex Tomas <alex@clusterfs.com>
@@ -437,6 +372,12 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +
 +long ext3_mb_stats = 1;
 +
++/*
++ * for which requests use 2^N search using buddies
++ */
++long ext3_mb_order2_reqs = 8;
++
++
 +#ifdef EXT3_BB_MAX_BLOCKS
 +#undef EXT3_BB_MAX_BLOCKS
 +#endif
@@ -502,6 +443,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +struct ext3_mb_history {
 +      struct ext3_free_extent goal;   /* goal allocation */
 +      struct ext3_free_extent result; /* result allocation */
++      unsigned pid;
++      unsigned ino;
 +      __u16 found;    /* how many extents have been found */
 +      __u16 groups;   /* how many groups have been scanned */
 +      __u16 tail;     /* what tail broke some buddy */
@@ -524,9 +467,9 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +#define EXT3_MB_BUDDY(e3b)    ((e3b)->bd_buddy)
 +
 +#ifndef EXT3_MB_HISTORY
-+#define ext3_mb_store_history(sb,ac)
++#define ext3_mb_store_history(sb,ino,ac)
 +#else
-+static void ext3_mb_store_history(struct super_block *,
++static void ext3_mb_store_history(struct super_block *, unsigned ino,
 +                              struct ext3_allocation_context *ac);
 +#endif
 +
@@ -1148,7 +1091,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +static int mb_find_extent(struct ext3_buddy *e3b, int order, int block,
 +                              int needed, struct ext3_free_extent *ex)
 +{
-+      int next, max, ord;
++      int next = block, max, ord;
 +      void *buddy;
 +
 +      J_ASSERT(ex != NULL);
@@ -1173,6 +1116,11 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      ex->fe_start = block << order;
 +      ex->fe_group = e3b->bd_group;
 +
++      /* calc difference from given start */
++      next = next - ex->fe_start;
++      ex->fe_len -= next;
++      ex->fe_start += next;
++
 +      while (needed > ex->fe_len && (buddy = mb_find_buddy(e3b, order, &max))) {
 +
 +              if (block + 1 >= max)
@@ -1385,6 +1333,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                              struct ext3_buddy *e3b)
 +{
 +      int group = ac->ac_g_ex.fe_group, max, err;
++      struct ext3_sb_info *sbi = EXT3_SB(ac->ac_sb);
++      struct ext3_super_block *es = sbi->s_es;
 +      struct ext3_free_extent ex;
 +
 +      err = ext3_mb_load_buddy(ac->ac_sb, group, e3b);
@@ -1394,8 +1344,26 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      ext3_lock_group(ac->ac_sb, group);
 +      max = mb_find_extent(e3b, 0, ac->ac_g_ex.fe_start,
 +                              ac->ac_g_ex.fe_len, &ex);
-+      
-+      if (max > 0) {
++
++      if (max >= ac->ac_g_ex.fe_len && ac->ac_g_ex.fe_len == sbi->s_stripe) {
++              unsigned long start;
++              start = (e3b->bd_group * EXT3_BLOCKS_PER_GROUP(ac->ac_sb) +
++                              ex.fe_start + le32_to_cpu(es->s_first_data_block));
++              if (start % sbi->s_stripe == 0) {
++                      ac->ac_found++;
++                      ac->ac_b_ex = ex;
++                      ext3_mb_use_best_found(ac, e3b);
++              }
++      } else if (max >= ac->ac_g_ex.fe_len) {
++              J_ASSERT(ex.fe_len > 0);
++              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
++              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
++              ac->ac_found++;
++              ac->ac_b_ex = ex;
++              ext3_mb_use_best_found(ac, e3b);
++      } else if (max > 0 && (ac->ac_flags & EXT3_MB_HINT_MERGE)) {
++              /* Sometimes, caller may want to merge even small
++               * number of blocks to an existing extent */
 +              J_ASSERT(ex.fe_len > 0);
 +              J_ASSERT(ex.fe_group == ac->ac_g_ex.fe_group);
 +              J_ASSERT(ex.fe_start == ac->ac_g_ex.fe_start);
@@ -1423,7 +1391,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      int i, k, max;
 +
 +      J_ASSERT(ac->ac_2order > 0);
-+      for (i = ac->ac_2order; i < sb->s_blocksize_bits + 1; i++) {
++      for (i = ac->ac_2order; i <= sb->s_blocksize_bits + 1; i++) {
 +              if (grp->bb_counters[i] == 0)
 +                      continue;
 +
@@ -1488,6 +1456,42 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      }
 +}
 +
++/*
++ * This is a special case for storages like raid5
++ * we try to find stripe-aligned chunks for stripe-size requests
++ */
++static void ext3_mb_scan_aligned(struct ext3_allocation_context *ac,
++                                      struct ext3_buddy *e3b)
++{
++      struct super_block *sb = ac->ac_sb;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      void *bitmap = EXT3_MB_BITMAP(e3b);
++      struct ext3_free_extent ex;
++      unsigned long i, max;
++
++      J_ASSERT(sbi->s_stripe != 0);
++
++      /* find first stripe-aligned block */
++      i = e3b->bd_group * EXT3_BLOCKS_PER_GROUP(sb)
++              + le32_to_cpu(sbi->s_es->s_first_data_block);
++      i = ((i + sbi->s_stripe - 1) / sbi->s_stripe) * sbi->s_stripe;
++      i = (i - le32_to_cpu(sbi->s_es->s_first_data_block))
++                      % EXT3_BLOCKS_PER_GROUP(sb);
++
++      while (i < sb->s_blocksize * 8) {
++              if (!mb_test_bit(i, bitmap)) {
++                      max = mb_find_extent(e3b, 0, i, sbi->s_stripe, &ex);
++                      if (max >= sbi->s_stripe) {
++                              ac->ac_found++;
++                              ac->ac_b_ex = ex;
++                              ext3_mb_use_best_found(ac, e3b);
++                              break;
++                      }
++              }
++              i += sbi->s_stripe;
++      }
++}
++
 +static int ext3_mb_good_group(struct ext3_allocation_context *ac,
 +                              int group, int cr)
 +{
@@ -1509,15 +1513,18 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +              case 0:
 +                      J_ASSERT(ac->ac_2order != 0);
 +                      bits = ac->ac_sb->s_blocksize_bits + 1;
-+                      for (i = ac->ac_2order; i < bits; i++)
++                      for (i = ac->ac_2order; i <= bits; i++)
 +                              if (grp->bb_counters[i] > 0)
 +                                      return 1;
++                      break;
 +              case 1:
 +                      if ((free / fragments) >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 2:
 +                      if (free >= ac->ac_g_ex.fe_len)
 +                              return 1;
++                      break;
 +              case 3:
 +                      return 1;
 +              default:
@@ -1618,23 +1625,27 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      ac.ac_2order = 0;
 +      ac.ac_criteria = 0;
 +
++      if (*len == 1 && sbi->s_stripe) {
++              /* looks like a metadata, let's use a dirty hack for raid5
++               * move all metadata in first groups in hope to hit cached
++               * sectors and thus avoid read-modify cycles in raid5 */
++              ac.ac_g_ex.fe_group = group = 0;
++      }
++
 +      /* probably, the request is for 2^8+ blocks (1/2/3/... MB) */
 +      i = ffs(*len);
-+      if (i >= 8) {
++      if (i >= ext3_mb_order2_reqs) {
 +              i--;
 +              if ((*len & (~(1 << i))) == 0)
 +                      ac.ac_2order = i;
 +      }
 +
-+      /* Sometimes, caller may want to merge even small
-+       * number of blocks to an existing extent */
-+      if (ac.ac_flags & EXT3_MB_HINT_MERGE) {
-+              err = ext3_mb_find_by_goal(&ac, &e3b);
-+              if (err)
-+                      goto out_err;
-+              if (ac.ac_status == AC_STATUS_FOUND)
-+                      goto found;
-+      }
++      /* first, try the goal */
++      err = ext3_mb_find_by_goal(&ac, &e3b);
++      if (err)
++              goto out_err;
++      if (ac.ac_status == AC_STATUS_FOUND)
++              goto found;
 +
 +      /* Let's just scan groups to find more-less suitable blocks */
 +      cr = ac.ac_2order ? 0 : 1;
@@ -1673,6 +1684,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      ac.ac_groups_scanned++;
 +                      if (cr == 0)
 +                              ext3_mb_simple_scan_group(&ac, &e3b);
++                      else if (cr == 1 && *len == sbi->s_stripe) 
++                              ext3_mb_scan_aligned(&ac, &e3b);
 +                      else
 +                              ext3_mb_complex_scan_group(&ac, &e3b);
 +
@@ -1853,7 +1866,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      atomic_inc(&sbi->s_bal_breaks);
 +      }
 +
-+      ext3_mb_store_history(sb, &ac);
++      ext3_mb_store_history(sb, inode->i_ino, &ac);
 +
 +      return block;
 +}
@@ -1918,9 +1931,9 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      char buf[20], buf2[20];
 +
 +      if (v == SEQ_START_TOKEN) {
-+              seq_printf(seq, "%-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
-+                       "goal", "result", "found", "grps", "cr", "merge",
-+                       "tail", "broken");
++              seq_printf(seq, "%-5s %-8s %-17s %-17s %-5s %-5s %-2s %-5s %-5s %-6s\n",
++                       "pid", "inode", "goal", "result", "found", "grps", "cr",
++                       "merge", "tail", "broken");
 +              return 0;
 +      }
 +
@@ -1928,9 +1941,9 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +              hs->goal.fe_start, hs->goal.fe_len);
 +      sprintf(buf2, "%u/%u/%u", hs->result.fe_group,
 +              hs->result.fe_start, hs->result.fe_len);
-+      seq_printf(seq, "%-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n", buf,
-+                      buf2, hs->found, hs->groups, hs->cr, 
-+                      hs->merged ? "M" : "", hs->tail,
++      seq_printf(seq, "%-5u %-8u %-17s %-17s %-5u %-5u %-2u %-5s %-5u %-6u\n",
++                      hs->pid, hs->ino, buf, buf2, hs->found, hs->groups,
++                      hs->cr, hs->merged ? "M" : "", hs->tail,
 +                      hs->buddy ? 1 << hs->buddy : 0);
 +      return 0;
 +}
@@ -1994,12 +2007,108 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      .release        = ext3_mb_seq_history_release,
 +};
 +
++static void *ext3_mb_seq_groups_start(struct seq_file *seq, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++
++      group = *pos + 1;
++      return (void *) group;
++}
++
++static void *ext3_mb_seq_groups_next(struct seq_file *seq, void *v, loff_t *pos)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group;
++
++      ++*pos;
++      if (*pos < 0 || *pos >= sbi->s_groups_count)
++              return NULL;
++      group = *pos + 1;
++      return (void *) group;;
++}
++
++static int ext3_mb_seq_groups_show(struct seq_file *seq, void *v)
++{
++      struct super_block *sb = seq->private;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      int group = (int) v, i;
++      struct sg {
++              struct ext3_group_info info;
++              unsigned short counters[16];
++      } sg;
++
++      group--;
++      if (group == 0)
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s [ %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
++                       "group", "free", "frags", "first", "2^0", "2^1", "2^2", 
++                       "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10",
++                       "2^11", "2^12", "2^13");
++
++      i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
++              sizeof(struct ext3_group_info);
++      ext3_lock_group(sb, group);
++      memcpy(&sg, sbi->s_group_info[group], i);
++      ext3_unlock_group(sb, group);
++
++      if (EXT3_MB_GRP_NEED_INIT(&sg.info))
++              return 0;
++
++      seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
++                      sg.info.bb_fragments, sg.info.bb_first_free);
++      for (i = 0; i <= 13; i++)
++              seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
++                              sg.info.bb_counters[i] : 0);
++      seq_printf(seq, " ]\n");
++
++      return 0;
++}
++
++static void ext3_mb_seq_groups_stop(struct seq_file *seq, void *v)
++{
++}
++
++static struct seq_operations ext3_mb_seq_groups_ops = {
++      .start  = ext3_mb_seq_groups_start,
++      .next   = ext3_mb_seq_groups_next,
++      .stop   = ext3_mb_seq_groups_stop,
++      .show   = ext3_mb_seq_groups_show,
++};
++
++static int ext3_mb_seq_groups_open(struct inode *inode, struct file *file)
++{
++      struct super_block *sb = PDE(inode)->data;
++      int rc;
++
++      rc = seq_open(file, &ext3_mb_seq_groups_ops);
++      if (rc == 0) {
++              struct seq_file *m = (struct seq_file *)file->private_data;
++              m->private = sb;
++      }
++      return rc;
++
++}
++
++static struct file_operations ext3_mb_seq_groups_fops = {
++      .owner          = THIS_MODULE,
++      .open           = ext3_mb_seq_groups_open,
++      .read           = seq_read,
++      .llseek         = seq_lseek,
++      .release        = seq_release,
++};
++
 +static void ext3_mb_history_release(struct super_block *sb)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      char name[64];
 +
 +      snprintf(name, sizeof(name) - 1, "%s", bdevname(sb->s_bdev, name));
++      remove_proc_entry("mb_groups", sbi->s_mb_proc);
 +      remove_proc_entry("mb_history", sbi->s_mb_proc);
 +      remove_proc_entry(name, proc_root_ext3);
 +
@@ -2022,6 +2131,11 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      p->proc_fops = &ext3_mb_seq_history_fops;
 +                      p->data = sb;
 +              }
++              p = create_proc_entry("mb_groups", S_IRUGO, sbi->s_mb_proc);
++              if (p) {
++                      p->proc_fops = &ext3_mb_seq_groups_fops;
++                      p->data = sb;
++              }
 +      }
 +
 +      sbi->s_mb_history_max = 1000;
@@ -2034,7 +2148,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +}
 +
 +static void
-+ext3_mb_store_history(struct super_block *sb, struct ext3_allocation_context *ac)
++ext3_mb_store_history(struct super_block *sb, unsigned ino,
++                      struct ext3_allocation_context *ac)
 +{
 +      struct ext3_sb_info *sbi = EXT3_SB(sb);
 +      struct ext3_mb_history h;
@@ -2042,6 +2157,8 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      if (likely(sbi->s_mb_history == NULL))
 +              return;
 +
++      h.pid = current->pid;
++      h.ino = ino;
 +      h.goal = ac->ac_g_ex;
 +      h.result = ac->ac_b_ex;
 +      h.found = ac->ac_found;
@@ -2597,6 +2714,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +#define EXT3_MB_STATS_NAME        "mb_stats"
 +#define EXT3_MB_MAX_TO_SCAN_NAME  "mb_max_to_scan"
 +#define EXT3_MB_MIN_TO_SCAN_NAME  "mb_min_to_scan"
++#define EXT3_MB_ORDER2_REQ      "mb_order2_req"
 +
 +static int ext3_mb_stats_read(char *page, char **start, off_t off,
 +              int count, int *eof, void *data)
@@ -2684,6 +2802,45 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      return len;
 +}
 +
++static int ext3_mb_order2_req_write(struct file *file, const char *buffer,
++              unsigned long count, void *data)
++{
++      char str[32];
++      long value;
++
++      if (count >= sizeof(str)) {
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
++                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
++              return -EOVERFLOW;
++      }
++
++      if (copy_from_user(str, buffer, count))
++              return -EFAULT;
++
++      /* Only set to 0 or 1 respectively; zero->0; non-zero->1 */
++      value = simple_strtol(str, NULL, 0);
++      if (value <= 0)
++              return -ERANGE;
++
++      ext3_mb_order2_reqs = value;    
++
++      return count;
++}
++
++static int ext3_mb_order2_req_read(char *page, char **start, off_t off,
++              int count, int *eof, void *data)
++{
++      int len;
++
++      *eof = 1;
++      if (off != 0)
++              return 0;
++
++      len = sprintf(page, "%ld\n", ext3_mb_order2_reqs);
++      *start = page;
++      return len;
++}
++
 +static int ext3_mb_min_to_scan_write(struct file *file, const char *buffer,
 +              unsigned long count, void *data)
 +{
@@ -2691,7 +2848,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      long value;
 +
 +      if (count >= sizeof(str)) {
-+              printk(KERN_ERR "EXT3: %s string too long, max %u bytes\n",
++              printk(KERN_ERR "EXT3-fs: %s string too long, max %u bytes\n",
 +                     EXT3_MB_MIN_TO_SCAN_NAME, (int)sizeof(str));
 +              return -EOVERFLOW;
 +      }
@@ -2714,10 +2871,11 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      struct proc_dir_entry *proc_ext3_mb_stats;
 +      struct proc_dir_entry *proc_ext3_mb_max_to_scan;
 +      struct proc_dir_entry *proc_ext3_mb_min_to_scan;
++      struct proc_dir_entry *proc_ext3_mb_order2_req;
 +
 +      proc_root_ext3 = proc_mkdir(EXT3_ROOT, proc_root_fs);
 +      if (proc_root_ext3 == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n", EXT3_ROOT);
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n", EXT3_ROOT);
 +              return -EIO;
 +      }
 +
@@ -2725,7 +2883,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      proc_ext3_mb_stats = create_proc_entry(EXT3_MB_STATS_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_stats == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_STATS_NAME);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +              return -EIO;
@@ -2740,7 +2898,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      EXT3_MB_MAX_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_max_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MAX_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_ROOT, proc_root_fs);
@@ -2756,7 +2914,7 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +                      EXT3_MB_MIN_TO_SCAN_NAME,
 +                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
 +      if (proc_ext3_mb_min_to_scan == NULL) {
-+              printk(KERN_ERR "EXT3: Unable to create %s\n",
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
 +                              EXT3_MB_MIN_TO_SCAN_NAME);
 +              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
@@ -2768,6 +2926,24 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      proc_ext3_mb_min_to_scan->read_proc  = ext3_mb_min_to_scan_read;
 +      proc_ext3_mb_min_to_scan->write_proc = ext3_mb_min_to_scan_write;
 +
++      /* Initialize EXT3_ORDER2_REQ */
++      proc_ext3_mb_order2_req = create_proc_entry(
++                      EXT3_MB_ORDER2_REQ,
++                      S_IFREG | S_IRUGO | S_IWUSR, proc_root_ext3);
++      if (proc_ext3_mb_order2_req == NULL) {
++              printk(KERN_ERR "EXT3-fs: Unable to create %s\n",
++                              EXT3_MB_ORDER2_REQ);
++              remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
++              remove_proc_entry(EXT3_ROOT, proc_root_fs);
++              return -EIO;
++      }
++
++      proc_ext3_mb_order2_req->data = NULL;
++      proc_ext3_mb_order2_req->read_proc  = ext3_mb_order2_req_read;
++      proc_ext3_mb_order2_req->write_proc = ext3_mb_order2_req_write;
++
 +      return 0;
 +}
 +
@@ -2776,18 +2952,93 @@ Index: linux-2.6.9-full/fs/ext3/mballoc.c
 +      remove_proc_entry(EXT3_MB_STATS_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MAX_TO_SCAN_NAME, proc_root_ext3);
 +      remove_proc_entry(EXT3_MB_MIN_TO_SCAN_NAME, proc_root_ext3);
++      remove_proc_entry(EXT3_MB_ORDER2_REQ, proc_root_ext3);
 +      remove_proc_entry(EXT3_ROOT, proc_root_fs);
 +}
-Index: linux-2.6.9-full/fs/ext3/Makefile
+Index: linux-2.6.9-full/fs/ext3/balloc.c
 ===================================================================
---- linux-2.6.9-full.orig/fs/ext3/Makefile     2005-12-16 23:16:41.000000000 +0300
-+++ linux-2.6.9-full/fs/ext3/Makefile  2005-12-16 23:16:42.000000000 +0300
-@@ -6,7 +6,7 @@
+--- linux-2.6.9-full.orig/fs/ext3/balloc.c     2006-03-10 18:20:03.000000000 +0300
++++ linux-2.6.9-full/fs/ext3/balloc.c  2006-05-22 21:44:37.000000000 +0400
+@@ -79,7 +79,7 @@ struct ext3_group_desc * ext3_get_group_
+  *
+  * Return buffer_head on success or NULL in case of failure.
+  */
+-static struct buffer_head *
++struct buffer_head *
+ read_block_bitmap(struct super_block *sb, unsigned int block_group)
+ {
+       struct ext3_group_desc * desc;
+@@ -451,24 +451,6 @@ error_return:
+       return;
+ }
  
- ext3-y        := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \
-          ioctl.o namei.o super.o symlink.o hash.o resize.o \
--         extents.o
-+         extents.o mballoc.o
+-/* Free given blocks, update quota and i_blocks field */
+-void ext3_free_blocks(handle_t *handle, struct inode *inode,
+-                      unsigned long block, unsigned long count)
+-{
+-      struct super_block * sb;
+-      int dquot_freed_blocks;
+-
+-      sb = inode->i_sb;
+-      if (!sb) {
+-              printk ("ext3_free_blocks: nonexistent device");
+-              return;
+-      }
+-      ext3_free_blocks_sb(handle, sb, block, count, &dquot_freed_blocks);
+-      if (dquot_freed_blocks)
+-              DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
+-      return;
+-}
+-
+ /*
+  * For ext3 allocations, we must not reuse any blocks which are
+  * allocated in the bitmap buffer's "last committed data" copy.  This
+@@ -1131,7 +1113,7 @@ int ext3_should_retry_alloc(struct super
+  * bitmap, and then for any free bit if that fails.
+  * This function also updates quota and i_blocks field.
+  */
+-int ext3_new_block(handle_t *handle, struct inode *inode,
++int ext3_new_block_old(handle_t *handle, struct inode *inode,
+                       unsigned long goal, int *errp)
+ {
+       struct buffer_head *bitmap_bh = NULL;
+Index: linux-2.6.9-full/fs/ext3/inode.c
+===================================================================
+--- linux-2.6.9-full.orig/fs/ext3/inode.c      2006-05-18 23:57:04.000000000 +0400
++++ linux-2.6.9-full/fs/ext3/inode.c   2006-05-22 21:44:37.000000000 +0400
+@@ -572,7 +572,7 @@ static int ext3_alloc_branch(handle_t *h
+               ext3_journal_forget(handle, branch[i].bh);
+       }
+       for (i = 0; i < keys; i++)
+-              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1);
++              ext3_free_blocks(handle, inode, le32_to_cpu(branch[i].key), 1, 1);
+       return err;
+ }
  
- ext3-$(CONFIG_EXT3_FS_XATTR)   += xattr.o xattr_user.o xattr_trusted.o
- ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o
+@@ -673,7 +673,7 @@ err_out:
+       if (err == -EAGAIN)
+               for (i = 0; i < num; i++)
+                       ext3_free_blocks(handle, inode, 
+-                                       le32_to_cpu(where[i].key), 1);
++                                       le32_to_cpu(where[i].key), 1, 1);
+       return err;
+ }
+@@ -1831,7 +1831,7 @@ ext3_clear_blocks(handle_t *handle, stru
+               }
+       }
+-      ext3_free_blocks(handle, inode, block_to_free, count);
++      ext3_free_blocks(handle, inode, block_to_free, count, 1);
+ }
+ /**
+@@ -2004,7 +2004,7 @@ static void ext3_free_branches(handle_t 
+                               ext3_journal_test_restart(handle, inode);
+                       }
+-                      ext3_free_blocks(handle, inode, nr, 1);
++                      ext3_free_blocks(handle, inode, nr, 1, 1);
+                       if (parent_bh) {
+                               /*
index 529f5a7..ebdf8e8 100644 (file)
@@ -166,13 +166,13 @@ int ll_drop_dentry(struct dentry *dentry)
                 dput(dentry);
                 spin_lock(&dcache_lock);
                 return 1;
-        } 
-        
+        }
+
         if (!(dentry->d_flags & DCACHE_LUSTRE_INVALID)) {
 #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0))
                 struct inode *inode = dentry->d_inode;
 #endif
-               CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
+                CDEBUG(D_DENTRY, "unhashing dentry %.*s (%p) parent %p "
                        "inode %p refc %d\n", dentry->d_name.len,
                        dentry->d_name.name, dentry, dentry->d_parent,
                        dentry->d_inode, atomic_read(&dentry->d_count));
index 9a5a294..9e2724c 100644 (file)
@@ -1552,8 +1552,7 @@ static int join_sanity_check(struct inode *head, struct inode *tail)
                 RETURN(-EINVAL);
         }
         if (head->i_size % JOIN_FILE_ALIGN) {
-                CERROR("hsize" LPU64 " must be times of 64K\n",
-                        head->i_size);
+                CERROR("hsize %llu must be times of 64K\n", head->i_size);
                 RETURN(-EINVAL);
         }
         RETURN(0);
index f0cce54..4b49579 100644 (file)
@@ -1521,7 +1521,7 @@ void ll_update_inode(struct inode *inode, struct lustre_md *md)
         if (body->valid & OBD_MD_FLGID)
                 inode->i_gid = body->gid;
         if (body->valid & OBD_MD_FLFLAGS)
-                inode->i_flags = body->flags;
+                inode->i_flags = ll_ext_to_inode_flags(body->flags);
         if (body->valid & OBD_MD_FLNLINK)
                 inode->i_nlink = body->nlink;
         if (body->valid & OBD_MD_FLGENER)
@@ -1630,14 +1630,10 @@ int ll_iocontrol(struct inode *inode, struct file *file,
                 body = lustre_msg_buf(req->rq_repmsg, REPLY_REC_OFF,
                                       sizeof(*body));
 
-                if (body->flags & S_APPEND)
-                        flags |= EXT3_APPEND_FL;
-                if (body->flags & S_IMMUTABLE)
-                        flags |= EXT3_IMMUTABLE_FL;
-                if (body->flags & S_NOATIME)
-                        flags |= EXT3_NOATIME_FL;
-
-                ptlrpc_req_finished(req);
+                /* We want to return EXT3_*_FL flags to the caller via this
+                 * ioctl.  An older MDS may be sending S_* flags, fix it up. */
+                flags = ll_inode_to_ext_flags(body->flags, body->flags);
+                ptlrpc_req_finished (req);
 
                 RETURN(put_user(flags, (int *)arg));
         }
@@ -1682,19 +1678,8 @@ int ll_iocontrol(struct inode *inode, struct file *file,
                         RETURN(rc);
                 }
 
-                if (flags & EXT3_APPEND_FL)
-                        inode->i_flags |= S_APPEND;
-                else
-                        inode->i_flags &= ~S_APPEND;
-                if (flags & EXT3_IMMUTABLE_FL)
-                        inode->i_flags |= S_IMMUTABLE;
-                else
-                        inode->i_flags &= ~S_IMMUTABLE;
-                if (flags & EXT3_NOATIME_FL)
-                        inode->i_flags |= S_NOATIME;
-                else
-                        inode->i_flags &= ~S_NOATIME;
-
+                inode->i_flags = ll_ext_to_inode_flags(flags |
+                                                       MDS_BFLAG_EXT_FLAGS);
                 RETURN(0);
         }
         default:
index 49e6407..49083db 100644 (file)
@@ -1257,7 +1257,7 @@ static void ras_update(struct ll_sb_info *sbi, struct inode *inode,
 
                 kms_pages = (inode->i_size + PAGE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
-                CDEBUG(D_READA, "kmsp %llu mwp %lu mp %lu\n", kms_pages,
+                CDEBUG(D_READA, "kmsp "LPU64" mwp %lu mp %lu\n", kms_pages,
                        ra->ra_max_read_ahead_whole_pages, ra->ra_max_pages);
 
                 if (kms_pages &&
index 0a418a0..f6e3f67 100644 (file)
@@ -509,6 +509,12 @@ static int fsfilt_ext3_iocontrol(struct inode * inode, struct file *file,
                 RETURN(-EPERM);
         }
 
+        /* FIXME: Can't do this because of nested transaction deadlock */
+        if (cmd == EXT3_IOC_SETFLAGS && (*(int *)arg) & EXT3_JOURNAL_DATA_FL) {
+                CERROR("can't set data journal flag on file\n");
+                RETURN(-EPERM);
+        }
+
         if (inode->i_fop->ioctl)
                 rc = inode->i_fop->ioctl(inode, file, cmd, arg);
         else
index 09351eb..1012381 100644 (file)
@@ -8,7 +8,7 @@
 
 #include <lustre_mds.h>
 void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
-                       __u64 valid, struct ll_fid *fid, int ea_size);
+                       __u64 valid, struct ll_fid *fid, int ea_size, int flags);
 void mdc_pack_rep_body(struct ptlrpc_request *);
 void mdc_readdir_pack(struct ptlrpc_request *req, int offset, __u64 pg_off,
                      __u32 size, struct ll_fid *mdc_fid);
index 9ebb767..eb503c4 100644 (file)
@@ -63,7 +63,7 @@ static void mdc_pack_body(struct mds_body *b)
 }
 
 void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
-                       __u64 valid, struct ll_fid *fid, int ea_size)
+                       __u64 valid, struct ll_fid *fid, int ea_size, int flags)
 {
         struct mds_body *b = lustre_msg_buf(req->rq_reqmsg, offset, sizeof(*b));
 
@@ -71,6 +71,7 @@ void mdc_pack_req_body(struct ptlrpc_request *req, int offset,
                 b->fid1 = *fid;
         b->valid = valid;
         b->eadatasize = ea_size;
+        b->flags = flags;
         mdc_pack_body(b);
 }
 
@@ -297,7 +298,7 @@ void mdc_getattr_pack(struct ptlrpc_request *req, int offset, int valid,
         b->fsgid = current->fsgid;
         b->capability = current->cap_effective;
         b->valid = valid;
-        b->flags = flags;
+        b->flags = flags | MDS_BFLAG_EXT_FLAGS;
         b->suppgid = data->suppgids[0];
 
         b->fid1 = data->fid1;
index 6dbf7f5..2b010a9 100644 (file)
@@ -57,7 +57,6 @@ void it_clear_disposition(struct lookup_intent *it, int flag)
 {
         it->d.lustre.it_disposition &= ~flag;
 }
-
 EXPORT_SYMBOL(it_clear_disposition);
 
 static int it_to_lock_mode(struct lookup_intent *it)
@@ -568,9 +567,9 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data,
                    owner/group/acls are under lookup lock, we need both 
                    ibits for GETATTR. */
                 policy.l_inodebits.bits = (it->it_op == IT_GETATTR) ?
-                        MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP : 
+                        MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP :
                         MDS_INODELOCK_LOOKUP;
-                
+
                 rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
                                      LDLM_FL_BLOCK_GRANTED, &res_id,
                                      LDLM_IBITS, &policy, LCK_CR, &lockh);
@@ -578,13 +577,13 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data,
                         mode = LCK_CW;
                         rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
                                              LDLM_FL_BLOCK_GRANTED, &res_id,
-                                             LDLM_IBITS, &policy, LCK_CW, &lockh);
+                                             LDLM_IBITS, &policy,LCK_CW,&lockh);
                 }
                 if (!rc) {
                         mode = LCK_PR;
                         rc = ldlm_lock_match(exp->exp_obd->obd_namespace,
                                              LDLM_FL_BLOCK_GRANTED, &res_id,
-                                             LDLM_IBITS, &policy, LCK_PR, &lockh);
+                                             LDLM_IBITS, &policy,LCK_PR,&lockh);
                 }
                 if (rc) {
                         memcpy(&it->d.lustre.it_lock_handle, &lockh,
@@ -658,7 +657,8 @@ int mdc_intent_lock(struct obd_export *exp, struct mdc_op_data *op_data,
         if (op_data->fid2.id && (it->it_op != IT_GETATTR)) {
                 it_set_disposition(it, DISP_ENQ_COMPLETE);
                 /* Also: did we find the same inode? */
-                if (memcmp(&op_data->fid2, &mds_body->fid1, sizeof(op_data->fid2)))
+                if (memcmp(&op_data->fid2, &mds_body->fid1,
+                           sizeof(op_data->fid2)))
                         RETURN(-ESTALE);
         }
 
index 131263a..bd979ac 100644 (file)
@@ -66,7 +66,7 @@ static int send_getstatus(struct obd_import *imp, struct ll_fid *rootfid,
         req->rq_send_state = level;
         ptlrpc_req_set_repsize(req, 2, size);
 
-        mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, 0);
+        mdc_pack_req_body(req, REQ_REC_OFF, 0, NULL, 0, 0);
         lustre_msg_add_flags(req->rq_reqmsg, msg_flags);
         rc = ptlrpc_queue_wait(req);
 
@@ -178,7 +178,8 @@ int mdc_getattr(struct obd_export *exp, struct ll_fid *fid,
         if (!req)
                 GOTO(out, rc = -ENOMEM);
 
-        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size);
+        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size,
+                          MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/);
 
         /* currently only root inode will call us with FLACL */
         if (valid & OBD_MD_FLACL)
@@ -208,7 +209,8 @@ int mdc_getattr_name(struct obd_export *exp, struct ll_fid *fid,
         if (!req)
                 GOTO(out, rc = -ENOMEM);
 
-        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size);
+        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, ea_size,
+                          MDS_BFLAG_EXT_FLAGS/*request "new" flags(bug 9486)*/);
  
         LASSERT(strnlen(filename, namelen) == namelen - 1);
         memcpy(lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF + 1, namelen),
@@ -231,8 +233,8 @@ int mdc_xattr_common(struct obd_export *exp, struct ll_fid *fid,
                      int flags, struct ptlrpc_request **request)
 {
         struct ptlrpc_request *req;
-        struct mds_body *body;
-        int size[4] = { sizeof(struct ptlrpc_body), sizeof(*body) };
+        int size[4] = { sizeof(struct ptlrpc_body), sizeof(struct mds_body) };
+        // int size[3] = {sizeof(struct mds_body)}, bufcnt = 1;
         int rc, xattr_namelen = 0, bufcnt = 2, offset;
         void *tmp;
         ENTRY;
@@ -252,9 +254,7 @@ int mdc_xattr_common(struct obd_export *exp, struct ll_fid *fid,
                 GOTO(out, rc = -ENOMEM);
 
         /* request data */
-        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, output_size);
-        body = lustre_msg_buf(req->rq_reqmsg, REQ_REC_OFF, sizeof(*body));
-        body->flags = flags;
+        mdc_pack_req_body(req, REQ_REC_OFF, valid, fid, output_size, flags);
 
         offset = REQ_REC_OFF + 1;
 
@@ -269,6 +269,7 @@ int mdc_xattr_common(struct obd_export *exp, struct ll_fid *fid,
 
         /* reply buffers */
         if (opcode == MDS_GETXATTR) {
+                size[0] = sizeof(struct mds_body);
                 bufcnt = 2;
         } else {
                 bufcnt = 1;
@@ -291,7 +292,8 @@ int mdc_xattr_common(struct obd_export *exp, struct ll_fid *fid,
                 GOTO(err_out, rc);
 
         if (opcode == MDS_GETXATTR) {
-                body = lustre_swab_repbuf(req, REPLY_REC_OFF, sizeof(*body),
+                struct mds_body * body = lustre_swab_repbuf(req, REPLY_REC_OFF,
+                                          sizeof(*body),
                                           lustre_swab_mds_body);
                 if (body == NULL) {
                         CERROR ("Can't unpack mds_body\n");
@@ -1060,7 +1062,7 @@ int mdc_sync(struct obd_export *exp, struct ll_fid *fid,
         if (!req)
                 RETURN(rc = -ENOMEM);
 
-        mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, 0);
+        mdc_pack_req_body(req, REQ_REC_OFF, 0, fid, 0, 0);
 
         ptlrpc_req_set_repsize(req, 2, size);
 
index 9a88e5c..44628ad 100644 (file)
@@ -633,6 +633,7 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
         LASSERT(body != NULL);                 /* caller prepped reply */
 
         mds_pack_inode2fid(&body->fid1, inode);
+        body->flags = reqbody->flags; /* copy MDS_BFLAG_EXT_FLAGS if present */
         mds_pack_inode2body(body, inode);
         reply_off++;
 
@@ -673,6 +674,16 @@ static int mds_getattr_internal(struct obd_device *obd, struct dentry *dentry,
                         rc = 0;
                 }
                 reply_off++;
+        } else if (reqbody->valid == OBD_MD_FLFLAGS &&
+                   reqbody->flags & MDS_BFLAG_EXT_FLAGS) {
+                int flags;
+
+                /* We only return the full set of flags on ioctl, otherwise we
+                 * get enough flags from the inode in mds_pack_inode2body(). */
+                rc = fsfilt_iocontrol(obd, inode, NULL, EXT3_IOC_GETFLAGS,
+                                      (long)&flags);
+                if (rc == 0)
+                        body->flags = flags | MDS_BFLAG_EXT_FLAGS;
         }
 
         if (reqbody->valid & OBD_MD_FLMODEASIZE) {
index 40e37b0..17cf471 100644 (file)
@@ -80,7 +80,7 @@ void mds_pack_inode2body(struct mds_body *b, struct inode *inode)
         b->blocks = inode->i_blocks;
         b->uid = inode->i_uid;
         b->gid = inode->i_gid;
-        b->flags = inode->i_flags;
+        b->flags = ll_inode_to_ext_flags(b->flags, inode->i_flags);
         b->rdev = inode->i_rdev;
         /* Return the correct link count for orphan inodes */
         b->nlink = mds_inode_is_orphan(inode) ? 0 : inode->i_nlink;
index 43a63e3..3b181b4 100644 (file)
@@ -101,7 +101,7 @@ int mds_log_op_unlink(struct obd_device *obd, struct inode *inode,
         if (IS_ERR(mds->mds_osc_obd))
                 RETURN(PTR_ERR(mds->mds_osc_obd));
 
-        rc = obd_unpackmd(mds->mds_osc_exp,  &lsm, lmm, lmm_size);
+        rc = obd_unpackmd(mds->mds_osc_exp, &lsm, lmm, lmm_size);
         if (rc < 0)
                 RETURN(rc);
         rc = obd_checkmd(mds->mds_osc_exp, obd->obd_self_export, lsm);
index a291ace..f0bd52e 100644 (file)
@@ -748,8 +748,9 @@ static int mds_finish_open(struct ptlrpc_request *req, struct dentry *dchild,
                 mds_lov_update_objids(obd, ids);
                 OBD_FREE(ids, sizeof(*ids) * mds->mds_lov_desc.ld_tgt_count);
         }
-        if (rc)
+        if (rc) /* coverity[deadcode] */
                 mds_mfd_unlink(mfd, 1);
+
         mds_mfd_put(mfd);
         RETURN(rc);
 }
index 891cd2a..4bd1670 100644 (file)
@@ -605,10 +605,12 @@ static int mds_reint_setattr(struct mds_update_record *rec, int offset,
                  * values specified) then delete default striping from dir. */
                 if (S_ISDIR(inode->i_mode) &&
                     ((lum->lmm_stripe_size == 0 &&
-                      lum->lmm_stripe_offset == (typeof(lum->lmm_stripe_offset))(-1) &&
+                      lum->lmm_stripe_offset ==
+                      (typeof(lum->lmm_stripe_offset))(-1) &&
                       lum->lmm_stripe_count == 0) ||
                     /* lmm_stripe_size == -1 is deprecated in 1.4.6 */
-                    lum->lmm_stripe_size == (typeof(lum->lmm_stripe_size))(-1))){
+                    lum->lmm_stripe_size ==
+                    (typeof(lum->lmm_stripe_size))(-1))){
                         rc = fsfilt_set_md(obd, inode, handle, NULL, 0, "lov");
                         if (rc)
                                 GOTO(cleanup, rc);
index 2e9c52f..bc8afc6 100644 (file)
@@ -121,7 +121,7 @@ static int llog_check_cb(struct llog_handle *handle, struct llog_rec_hdr *rec,
                         RETURN(-EOPNOTSUPP);
                 rc = llog_cat_id2handle(handle, &log_handle, &lir->lid_id);
                 if (rc) {
-                        CDEBUG(D_IOCTL, 
+                        CDEBUG(D_IOCTL,
                                "cannot find log #"LPX64"#"LPX64"#%08x\n",
                                lir->lid_id.lgl_oid, lir->lid_id.lgl_ogr,
                                lir->lid_id.lgl_ogen);
index 8ec0231..8a6e6ba 100644 (file)
@@ -1775,7 +1775,7 @@ static int filter_connect_internal(struct obd_export *exp,
                 spin_unlock(&exp->exp_obd->obd_osfs_lock);
 
                 CDEBUG(D_CACHE, "%s: cli %s/%p ocd_grant: %d want: "
-                       "%lld left: %lld\n", exp->exp_obd->obd_name,
+                       LPU64" left: "LPU64"\n", exp->exp_obd->obd_name,
                        exp->exp_client_uuid.uuid, exp,
                        data->ocd_grant, want, left);
         }
index ae83fb9..6766190 100644 (file)
@@ -467,7 +467,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount,
                 CERROR("Failure to commit OST transaction (%d)?\n", err);
                 rc = err;
         }
-        if (obd->obd_replayable && !err)
+        if (obd->obd_replayable && !rc)
                 LASSERTF(oti->oti_transno <= obd->obd_last_committed,
                          "oti_transno "LPU64" last_committed "LPU64"\n",
                          oti->oti_transno, obd->obd_last_committed);
index b9975fc..5c57e80 100644 (file)
@@ -390,7 +390,7 @@ static int filter_clear_page_cache(struct inode *inode,
         rc = generic_osync_inode(inode, inode->i_mapping,
                                  OSYNC_DATA|OSYNC_METADATA);
          */
-        down(&inode->i_sem);
+        LOCK_INODE_MUTEX(inode);
         current->flags |= PF_SYNCWRITE;
         rc = filemap_fdatawrite(inode->i_mapping);
         rc2 = sync_mapping_buffers(inode->i_mapping);
@@ -398,7 +398,7 @@ static int filter_clear_page_cache(struct inode *inode,
                 rc = rc2;
         rc2 = filemap_fdatawait(inode->i_mapping);
         current->flags &= ~PF_SYNCWRITE;
-        up(&inode->i_sem);
+        UNLOCK_INODE_MUTEX(inode);
         if (rc == 0)
                 rc = rc2;
         if (rc != 0)
@@ -655,10 +655,12 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa,
         fsfilt_check_slow(now, obd_timeout, "direct_io");
 
         err = fsfilt_commit_wait(obd, inode, wait_handle);
-        if (err)
+        if (err) {
+                CERROR("Failure to commit OST transaction (%d)?\n", err);
                 rc = err;
+        }
 
-        if (obd->obd_replayable && !err)
+        if (obd->obd_replayable && !rc)
                 LASSERTF(oti->oti_transno <= obd->obd_last_committed,
                          "oti_transno "LPU64" last_committed "LPU64"\n",
                          oti->oti_transno, obd->obd_last_committed);
index b9ac3d9..49cf171 100644 (file)
@@ -648,7 +648,7 @@ static void ost_prolong_locks(struct obd_export *exp, struct obd_ioobj *obj,
         opd.opd_policy.l_extent.end = (nb[nrbufs - 1].offset +
                                        nb[nrbufs - 1].len - 1) | ~CFS_PAGE_MASK;
 
-        CDEBUG(D_DLMTRACE, "refresh locks: "LPU64"/"LPU64" ("LPU64"->"LPU64")\n",
+        CDEBUG(D_DLMTRACE,"refresh locks: "LPU64"/"LPU64" ("LPU64"->"LPU64")\n",
                res_id.name[0], res_id.name[1], opd.opd_policy.l_extent.start,
                opd.opd_policy.l_extent.end);
         ldlm_resource_iterate(exp->exp_obd->obd_namespace, &res_id,
@@ -853,7 +853,8 @@ static int ost_brw_read(struct ptlrpc_request *req, struct obd_trans_info *oti)
                         ptlrpc_rs_decref(req->rq_reply_state);
                         req->rq_reply_state = NULL;
                 }
-                CWARN("%s: ignoring bulk IO comm error with %s@%s id %s\n",
+                CWARN("%s: ignoring bulk IO comm error with %s@%s id %s - "
+                      "client will retry\n",
                       req->rq_export->exp_obd->obd_name,
                       req->rq_export->exp_client_uuid.uuid,
                       req->rq_export->exp_connection->c_remote_uuid.uuid,
@@ -1094,7 +1095,8 @@ static int ost_brw_write(struct ptlrpc_request *req, struct obd_trans_info *oti)
                         ptlrpc_rs_decref(req->rq_reply_state);
                         req->rq_reply_state = NULL;
                 }
-                CWARN("%s: ignoring bulk IO comm error with %s@%s id %s\n",
+                CWARN("%s: ignoring bulk IO comm error with %s@%s id %s - "
+                      "client will retry\n",
                       req->rq_export->exp_obd->obd_name,
                       req->rq_export->exp_client_uuid.uuid,
                       req->rq_export->exp_connection->c_remote_uuid.uuid,
index 14483fa..c518279 100644 (file)
@@ -2192,8 +2192,12 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
         CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
         CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
         /* Sizes and Offsets */
 
+        /* Checks for struct obd_uuid */
+        LASSERTF((int)sizeof(struct obd_uuid) == 40, " found %lld\n",
+                 (long long)(int)sizeof(struct obd_uuid));
 
         /* Checks for struct lustre_handle */
         LASSERTF((int)sizeof(struct lustre_handle) == 8, " found %lld\n",
index e10817b..6270a63 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # script which _must_ complete successfully (at minimum) before checkins to
 # the CVS HEAD are allowed.
 set -vxe
@@ -6,13 +6,13 @@ set -vxe
 PATH=`dirname $0`/../utils:$PATH
 
 [ "$CONFIGS" ] || CONFIGS="local"  #"local lov"
-[ "$MAX_THREADS" ] || MAX_THREADS=10
+[ "$MAX_THREADS" ] || MAX_THREADS=20
+RAMKB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
 if [ -z "$THREADS" ]; then
-       KB=`awk '/MemTotal:/ { print $2 }' /proc/meminfo`
-       THREADS=`expr $KB / 16384`
+       THREADS=$((RAMKB / 16384))
        [ $THREADS -gt $MAX_THREADS ] && THREADS=$MAX_THREADS
 fi
-[ "$SIZE" ] || SIZE=40960
+[ "$SIZE" ] || SIZE=$((RAMKB * 2))
 [ "$RSIZE" ] || RSIZE=512
 [ "$UID" ] || UID=1000
 [ "$MOUNT" ] || MOUNT=/mnt/lustre
@@ -53,7 +53,7 @@ for NAME in $CONFIGS; do
        if [ "$DBENCH" != "no" ]; then
                mount_client $MOUNT
                SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
-               DB_THREADS=`expr $SPACE / 50000`
+               DB_THREADS=$((SPACE / 50000))
                [ $THREADS -lt $DB_THREADS ] && DB_THREADS=$THREADS
 
                $DEBUG_OFF
@@ -74,18 +74,22 @@ for NAME in $CONFIGS; do
        chown $UID $MOUNT && chmod 700 $MOUNT
        if [ "$BONNIE" != "no" ]; then
                mount_client $MOUNT
+               SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
+               [ $SPACE -lt $SIZE ] && SIZE=$((SPACE * 3 / 4))
                $DEBUG_OFF
-               bonnie++ -f -r 0 -s $(($SIZE / 1024)) -n 10 -u $UID -d $MOUNT
+               bonnie++ -f -r 0 -s $((SIZE / 1024)) -n 10 -u $UID -d $MOUNT
                $DEBUG_ON
                $CLEANUP
                $SETUP
        fi
 
-       IOZONE_OPTS="-i 0 -i 1 -i 2 -e -+d -r $RSIZE -s $SIZE"
-       IOZFILE="-f $MOUNT/iozone"
        export O_DIRECT
        if [ "$IOZONE" != "no" ]; then
                mount_client $MOUNT
+               SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
+               [ $SPACE -lt $SIZE ] && SIZE=$((SPACE * 3 / 4))
+               IOZONE_OPTS="-i 0 -i 1 -i 2 -e -+d -r $RSIZE -s $SIZE"
+               IOZFILE="-f $MOUNT/iozone"
                $DEBUG_OFF
                iozone $IOZONE_OPTS $IOZFILE
                $DEBUG_ON
@@ -109,16 +113,16 @@ for NAME in $CONFIGS; do
                fi
 
                SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
-               IOZ_THREADS=`expr $SPACE / \( $SIZE + $SIZE / 512 \)`
+               IOZ_THREADS=$((SPACE / SIZE * 2 / 3 ))
                [ $THREADS -lt $IOZ_THREADS ] && IOZ_THREADS=$THREADS
-               IOZVER=`iozone -v|awk '/Revision:/ {print $3}'|tr -d .`
+               IOZVER=`iozone -v | awk '/Revision:/ {print $3}' | tr -d .`
                if [ "$IOZ_THREADS" -gt 1 -a "$IOZVER" -ge 3145 ]; then
                        $DEBUG_OFF
                        THREAD=1
                        IOZFILE="-F "
                        while [ $THREAD -le $IOZ_THREADS ]; do
                                IOZFILE="$IOZFILE $MOUNT/iozone.$THREAD"
-                               THREAD=`expr $THREAD + 1`
+                               THREAD=$((THREAD + 1))
                        done
                        iozone $IOZONE_OPTS -t $IOZ_THREADS $IOZFILE
                        $DEBUG_ON
@@ -132,6 +136,8 @@ for NAME in $CONFIGS; do
 
        if [ "$FSX" != "no" ]; then
                mount | grep $MOUNT || $SETUP
+               SPACE=`df -P $MOUNT | tail -n 1 | awk '{ print $4 }'`
+               [ $SPACE -lt $SIZE ] && SIZE=$((SPACE * 3 / 4))
                $DEBUG_OFF
                ./fsx -c 50 -p 1000 -P $TMP -l $SIZE \
                        -N $(($COUNT * 100)) $MOUNT/fsxfile
index 9d79511..df0416d 100644 (file)
@@ -85,6 +85,15 @@ mount_client() {
        zconf_mount `hostname` $MOUNTPATH  || return 96
 }
 
+remount_client() {
+       local SAVEMOUNTOPT=$MOUNTOPT
+       MOUNTOPT="remount,$1"
+       local MOUNTPATH=$2
+       echo "remount '$1' lustre on ${MOUNTPATH}....."
+       zconf_mount `hostname`  $MOUNTPATH  || return 96
+       MOUNTOPT=$SAVEMOUNTOPT
+}
+
 umount_client() {
        local MOUNTPATH=$1
        echo "umount lustre on ${MOUNTPATH}....."
@@ -648,10 +657,10 @@ test_16() {
         fi
 
         echo "change the mode of $MDSDEV/OBJECTS,LOGS,PENDING to 555"
-        do_facet mds "[ -d $TMPMTPT ] || mkdir -p $TMPMTPT;
-                      mount -o loop -t ext3 $MDSDEV $TMPMTPT || return \$?;
-                      chmod 555 $TMPMTPT/{OBJECTS,LOGS,PENDING} || return \$?;
-                      umount -d $TMPMTPT || return \$?" || return $?
+        do_facet mds "mkdir -p $TMPMTPT &&
+                      mount -o loop -t ext3 $MDSDEV $TMPMTPT &&
+                      chmod 555 $TMPMTPT/{OBJECTS,LOGS,PENDING} &&
+                      umount $TMPMTPT" || return $?
 
         echo "mount Lustre to change the mode of OBJECTS/LOGS/PENDING, then umount Lustre"
        setup
@@ -828,4 +837,25 @@ run_test 22 "interrupt client during recovery mount delay"
 umount_client $MOUNT   
 cleanup_nocli
 
+test_20() {
+       # first format the ost/mdt
+       start_ost
+       start_mds
+       mount_client $MOUNT
+       check_mount || return 43
+       rm -f $DIR/$tfile
+       remount_client ro $MOUNT || return 44
+       touch $DIR/$tfile && echo "$DIR/$tfile created incorrectly" && return 45
+       [ -e $DIR/$tfile ] && echo "$DIR/$tfile exists incorrectly" && return 46
+       remount_client rw $MOUNT || return 47
+       touch $DIR/$tfile
+       [ ! -f $DIR/$tfile ] && echo "$DIR/$tfile missing" && return 48
+       MCNT=`grep -c $MOUNT /etc/mtab`
+       [ "$MCNT" -ne 1 ] && echo "$MOUNT in /etc/mtab $MCNT times" && return 49
+       umount_client $MOUNT
+       stop_mds
+       stop_ost
+}
+run_test 20 "remount ro,rw mounts work and doesn't break /etc/mtab"
+
 equals_msg "Done"
index d59031c..1cbeee4 100644 (file)
@@ -218,6 +218,35 @@ reboot_facet() {
     fi
 }
 
+# verify that lustre actually cleaned up properly
+cleanup_check() {
+    BUSY=`dmesg | grep -i destruct || true`
+    if [ "$BUSY" ]; then
+        echo "$BUSY" 1>&2
+        [ -e $TMP/debug ] && mv $TMP/debug $TMP/debug-busy.`date +%s`
+        exit 205
+    fi
+    LEAK_LUSTRE=`dmesg | tail -n 30 | grep "obd mem.*leaked" || true`
+    LEAK_PORTALS=`dmesg | tail -n 20 | grep "Portals memory leaked" || true`
+    if [ "$LEAK_LUSTRE" -o "$LEAK_PORTALS" ]; then
+        echo "$0: $LEAK_LUSTRE" 1>&2
+        echo "$0: $LEAK_PORTALS" 1>&2
+        echo "$0: Memory leak(s) detected..." 1>&2
+        mv $TMP/debug $TMP/debug-leak.`date +%s`
+        exit 204
+    fi
+
+    [ "`lctl dl 2> /dev/null | wc -l`" -gt 0 ] && lctl dl && \
+        echo "$0: lustre didn't clean up..." 1>&2 && return 202 || true
+
+    if [ "`/sbin/lsmod 2>&1 | egrep 'lnet|libcfs'`" ]; then
+        echo "$0: modules still loaded..." 1>&2
+        /sbin/lsmod 1>&2
+        return 203
+    fi
+    return 0
+}
+
 wait_for_host() {
    HOST=$1
    check_network "$HOST" 900
index d730e27..f1f7030 100644 (file)
@@ -28,3 +28,5 @@ wiretest
 llog_reader
 .*.cmd
 .*.d
+llverfs
+llverdev
index 6fd7f84..6b4089b 100644 (file)
@@ -16,7 +16,7 @@ if UTILS
 rootsbin_PROGRAMS = mount.lustre
 sbin_PROGRAMS = lctl obdio obdbarrier lload wirecheck wiretest \
        mount_lustre mkfs_lustre mkfs.lustre \
-       tunefs_lustre tunefs.lustre l_getgroups llverfs llverdev
+       tunefs_lustre tunefs.lustre l_getgroups llverfs llverdev
 bin_PROGRAMS = lfs llog_reader
 lib_LIBRARIES = liblustreapi.a
 sbin_SCRIPTS = $(sbin_scripts)
@@ -37,7 +37,11 @@ lload_DEPENDENCIES := $(LIBPTLCTL)
 lload_SOURCES = lload.c 
 
 llverfs_LDADD := -lext2fs -le2p
+if BLKID
 llverdev_LDADD := -lext2fs -lblkid
+else
+llverdev_LDADD := -lext2fs
+endif
 
 liblustreapi_a_SOURCES = liblustreapi.c
 
index 296f600..fb99a5f 100755 (executable)
@@ -1032,6 +1032,10 @@ class kmod:
                     run('/sbin/rmmod kiiblnd')
                 if mod_loaded("kviblnd"):
                     run('/sbin/rmmod kviblnd')
+                if mod_loaded("kciblnd"):
+                    run('/sbin/rmmod kciblnd')
+                if mod_loaded("ko2iblnd"):
+                    run('/sbin/rmmod ko2iblnd')
                 if mod_loaded("kralnd"):
                     run('/sbin/rmmod kralnd')
                 if mod_loaded("kptllnd"):
index a00db8e..3494a04 100644 (file)
  * pattern in bulk.
  */
 
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#ifndef LUSTRE_UTILS
+#define LUSTRE_UTILS
+#endif
+#ifndef _LARGEFILE64_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
+
 #include <features.h>
 #include <stdlib.h>
 #include <stdio.h>
@@ -35,7 +48,6 @@
 #include <sys/time.h>
 #include <gnu/stubs.h>
 #include <ext2fs/ext2fs.h>
-#include <blkid/blkid.h>
 
 #define ONE_MB (1024 * 1024)
 #define ONE_GB (1024 * 1024 * 1024)
@@ -128,20 +140,53 @@ static int open_dev(const char *devname, int mode)
        return (fd);
 }
 
+#ifdef HAVE_BLKID_BLKID_H
+#include <blkid/blkid.h>
+#endif
 /*
  * sizeof_dev: Returns size of device in bytes
  */
-static unsigned long long sizeof_dev(int fd)
+static loff_t sizeof_dev(int fd)
 {
-       blkid_loff_t numbytes = 0;
+       loff_t numbytes;
 
+#ifdef HAVE_BLKID_BLKID_H
        numbytes = blkid_get_dev_size(fd);
        if (numbytes <= 0) {
                fprintf(stderr, "%s: blkid_get_dev_size(%s) failed",
                        progname, devname);
                return 1;
        }
+       goto out;
+#else
+# if defined BLKGETSIZE64      /* in sys/mount.h */
+       if (ioctl(fd, BLKGETSIZE64, &numbytes) >= 0)
+               goto out;
+# endif
+# if defined BLKGETSIZE                /* in sys/mount.h */
+       {
+               unsigned long sectors;
+
+               if (ioctl(fd, BLKGETSIZE, &sectors) >= 0) {
+                       numbytes = (loff_t)sectors << 9;
+                       goto out;
+               }
+       }
+# endif
+       {
+               struct stat statbuf;
+
+               if (fstat(fd, &statbuf) == 0 && S_ISREG(statbuf.st_mode)) {
+                       numbytes = statbuf.st_size;
+                       goto out;
+               }
+       }
+       fprintf(stderr, "%s: unable to determine size of %s\n",
+                       progname, devname);
+       return 0;
+#endif
 
+out:
        if (verbose)
                printf("%s: %s is %llu bytes (%g GB) in size\n",
                       progname, devname,
@@ -155,7 +200,7 @@ static unsigned long long sizeof_dev(int fd)
  * Returns 0 if test offset and timestamp is correct otherwise 1.
  */
 int verify_chunk(char *chunk_buf, size_t chunksize,
-                loff_t chunk_off, time_t time_st)
+                unsigned long long chunk_off, time_t time_st)
 {
        struct block_data *bd;
        char *chunk_end;
@@ -225,8 +270,8 @@ void show_rate(char *op, unsigned long long offset, unsigned long long *count)
  * write_chunk: write the chunk_buf on the device. The number of write
  * operations are based on the parameters write_end, offset, and chunksize.
  */
-int write_chunks(loff_t offset, loff_t write_end, char *chunk_buf,
-                size_t chunksize, time_t time_st)
+int write_chunks(unsigned long long offset, unsigned long long write_end,
+                char *chunk_buf, size_t chunksize, time_t time_st)
 {
        unsigned long long stride, count = 0;
 
@@ -281,8 +326,8 @@ int write_chunks(loff_t offset, loff_t write_end, char *chunk_buf,
  * read_chunk: reads the chunk_buf from the device. The number of read
  * operations are based on the parameters read_end, offset, and chunksize.
  */
-int read_chunks(loff_t offset, loff_t read_end, char *chunk_buf,
-               size_t chunksize, time_t time_st)
+int read_chunks(unsigned long long offset, unsigned long long read_end,
+               char *chunk_buf, size_t chunksize, time_t time_st)
 {
        unsigned long long stride, count = 0;
 
index 77e54dd..c25fa78 100644 (file)
  * that the data in each file is correct.
  */
 
+#ifndef _GNU_SOURCE
 #define _GNU_SOURCE
+#endif
+#ifndef LUSTRE_UTILS
+#define LUSTRE_UTILS
+#endif
+#ifndef _LARGEFILE64_SOURCE
+#define _LARGEFILE64_SOURCE
+#endif
+#ifndef _FILE_OFFSET_BITS
+#define _FILE_OFFSET_BITS 64
+#endif
 
 #include <features.h>
 #include <stdlib.h>
@@ -48,9 +59,9 @@
 
 /* Structure for writing test pattern */
 struct block_data {
-       loff_t  bd_offset;
-       time_t  bd_time;
-       ino_t   bd_inode;
+       unsigned long long bd_offset;
+       unsigned long long bd_time;
+       unsigned long long bd_inode;
 };
 static char *progname;             /* name by which this program was run. */
 static unsigned verbose = 1;       /* prints offset in kB, operation rate */
@@ -60,7 +71,7 @@ char *testdir;                            /* name of device to be tested. */
 static unsigned full = 1;          /* flag to full check */
 static int errno_local;                    /* local copy of errno */
 static unsigned long num_files;     /* Total number of files for read/write */
-static loff_t file_size;           /* Size of each file */
+static loff_t file_size = 4*ONE_GB; /* Size of each file */
 static unsigned files_in_dir = 32;  /* number of files in each directioy */
 static unsigned num_dirs = 30000;   /* total number of directories */
 const int dirmode = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH;
@@ -128,8 +139,9 @@ static int open_file(const char *file, int flag)
  * Verify_chunk: Verifies test pattern in each 4kB (BLOCKSIZE) is correct.
  * Returns 0 if test offset and timestamp is correct otherwise 1.
  */
-int verify_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
-                time_t time_st, ino_t inode_st, char *file)
+int verify_chunk(char *chunk_buf, size_t chunksize,unsigned long long chunk_off,
+                unsigned long long time_st, unsigned long long inode_st,
+                char *file)
 {
        struct block_data *bd;
        char *chunk_end;
@@ -142,9 +154,9 @@ int verify_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
                    (bd->bd_inode == inode_st))
                        continue;
                fprintf(stderr,"\n%s: verify %s failed offset/timestamp/inode "
-                       "%llu/%lu/%lu: found %llu/%lu/%lu instead\n", progname,
-                       file, chunk_off, time_st, inode_st, bd->bd_offset,
-                       bd->bd_time, bd->bd_inode);
+                       "%llu/%llu/%llu: found %llu/%llu/%llu instead\n",
+                       progname, file, chunk_off, time_st, inode_st,
+                       bd->bd_offset, bd->bd_time, bd->bd_inode);
                return 1;
        }
        return 0;
@@ -175,8 +187,8 @@ void fill_chunk(char *chunk_buf, size_t chunksize, loff_t chunk_off,
  * write_chunk: write the chunk_buf on the device. The number of write
  * operations are based on the parameters write_end, offset, and chunksize.
  */
-int write_chunks(int fd, loff_t offset, loff_t write_end, char *chunk_buf,
-                size_t chunksize, time_t time_st,
+int write_chunks(int fd, unsigned long long offset,unsigned long long write_end,
+                char *chunk_buf, size_t chunksize, time_t time_st,
                 ino_t inode_st, const char *file)
 {
        unsigned long long stride;
@@ -226,8 +238,9 @@ int write_chunks(int fd, loff_t offset, loff_t write_end, char *chunk_buf,
  * read_chunk: reads the chunk_buf from the device. The number of read
  * operations are based on the parameters read_end, offset, and chunksize.
  */
-int read_chunks(int fd, loff_t offset, loff_t read_end, char *chunk_buf,
-               size_t chunksize, time_t time_st, ino_t inode_st, char *file)
+int read_chunks(int fd, unsigned long long offset, unsigned long long read_end,
+               char *chunk_buf, size_t chunksize, time_t time_st,
+               ino_t inode_st, char *file)
 {
        unsigned long long stride;
 
@@ -497,7 +510,6 @@ int main(int argc, char **argv)
                usage(1);
                return -1;
        }
-       file_size = 4 * ONE_GB;
        if (!readoption && !writeoption) {
                readoption = 1;
                writeoption = 1;
index 4b3adc1..8ee65e4 100644 (file)
@@ -1117,6 +1117,7 @@ main(int argc, char **argv)
         CHECK_CDEFINE(OBD_CONNECT_JOIN);
         CHECK_CDEFINE(OBD_CONNECT_ATTRFID);
         CHECK_CDEFINE(OBD_CONNECT_NODEVOH);
+        CHECK_CDEFINE(OBD_CONNECT_RMT_CLIENT);
 
         COMMENT("Sizes and Offsets");
         BLANK_LINE();
index 0ce8377..3b2701d 100644 (file)
@@ -243,8 +243,12 @@ void lustre_assert_wire_constants(void)
         CLASSERT(OBD_CONNECT_JOIN == 0x2000ULL);
         CLASSERT(OBD_CONNECT_ATTRFID == 0x4000ULL);
         CLASSERT(OBD_CONNECT_NODEVOH == 0x8000ULL);
+        CLASSERT(OBD_CONNECT_RMT_CLIENT == 0x10000ULL);
         /* Sizes and Offsets */
 
+        /* Checks for struct obd_uuid */
+        LASSERTF((int)sizeof(struct obd_uuid) == 40, " found %lld\n",
+                 (long long)(int)sizeof(struct obd_uuid));
 
         /* Checks for struct lustre_handle */
         LASSERTF((int)sizeof(struct lustre_handle) == 8, " found %lld\n",