Whamcloud - gitweb
Branch HEAD
authoradilger <adilger>
Tue, 20 Mar 2007 06:39:53 +0000 (06:39 +0000)
committeradilger <adilger>
Tue, 20 Mar 2007 06:39:53 +0000 (06:39 +0000)
Untested patch for handling uninitialized block groups.
Adding to CVS to avoid loss, not currently in any patch series.
b=11269

lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch [new file with mode: 0644]

diff --git a/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch b/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch
new file mode 100644 (file)
index 0000000..f66d713
--- /dev/null
@@ -0,0 +1,779 @@
+Keep a high water mark of used inodes for each group to improve e2fsck time.
+Block and inode bitmaps can be uninitialized on disk via a flag in the
+group descriptor to avoid reading or scanning them at e2fsck time.
+A checksum of each group descriptor is used to ensure that corruption in
+the group descriptor's bit flags does not cause incorrect operation.
+
+Index: linux-stage/fs/ext3/balloc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/balloc.c  2007-03-14 04:44:12.000000000 -0400
++++ linux-stage/fs/ext3/balloc.c       2007-03-14 16:23:48.000000000 -0400
+@@ -73,6 +73,95 @@ struct ext3_group_desc * ext3_get_group_
+       return gdp + desc;
+ }
++unsigned long ext3_free_blocks_after_init(struct super_block *sb,
++                                        int block_group,
++                                        struct ext3_group_desc *gdp)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++      unsigned long blks;
++      int has_sb;
++
++      /* Last and first groups are always initialized */
++      blks = le32_to_cpu(EXT3_BLOCKS_PER_GROUP(sb));
++      /* Account for for sb, gdt */
++      has_sb = ext3_bg_has_super(sb, block_group);
++      if (has_sb)
++              blks--;
++
++      if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
++          block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) {
++              if (has_sb) {
++                      blks -= le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
++                      blks -= ext3_bg_num_gdb(sb, block_group);
++              }
++      } else { /* For META_BG BLOCK GROUPS */
++              int group_rel = (block_group -
++                               le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
++                              EXT3_DESC_PER_BLOCK(sb);
++              if (group_rel == 0 || group_rel == 1 ||
++                  (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1))
++                      blks--;
++      }
++
++      /* Account for bitmaps and inode table */
++      blks -= sbi->s_itb_per_group + 2;
++      return blks;
++}
++
++/* Initializes an uninitialized block bitmap */
++void ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
++                          int block_group, struct ext3_group_desc *gdp)
++{
++      unsigned long startblk;
++      int bit, bit_max;
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      J_ASSERT_BH(bh, buffer_locked(bh));
++
++      /* If the checksum is bad, then just mark all bits in use for safety */
++      if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) {
++              ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
++                         block_group);
++              gdp->bg_free_blocks_count = 0;
++              gdp->bg_free_inodes_count = 0;
++              memset(bh->b_data, 0xff, sb->s_blocksize);
++              return;
++      }
++      memset(bh->b_data, 0, bh->b_size);
++
++      /* Set bits for sb, gdt */
++      startblk = block_group * EXT3_BLOCKS_PER_GROUP(sb) +
++              le32_to_cpu(sbi->s_es->s_first_data_block);
++
++      bit = 0;
++      bit_max = ext3_bg_has_super(sb, block_group);
++
++      if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
++          block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) {
++              if (bit_max) {
++                      bit_max += ext3_bg_num_gdb(sb, block_group);
++                      bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
++              }
++      } else { /* For META_BG_BLOCK_GROUPS */
++              int group_rel = (block_group -
++                               le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
++                              EXT3_DESC_PER_BLOCK(sb);
++              if (group_rel == 0 || group_rel == 1 ||
++                  (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1))
++                      bit_max += 1;
++      }
++      for (; bit < bit_max; bit++)
++              ext3_set_bit(bit, bh->b_data);
++
++      /* Set bits for bitmaps and inode table */
++      ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - startblk, bh->b_data);
++      ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - startblk, bh->b_data);
++      bit_max = bit + sbi->s_itb_per_group;
++      for (bit = le32_to_cpu(gdp->bg_inode_table) - startblk;
++           bit < bit_max; bit++)
++              ext3_set_bit(bit, bh->b_data);
++}
++
+ /*
+  * Read the bitmap for a given block_group, reading into the specified 
+  * slot in the superblock's bitmap cache.
+@@ -88,7 +170,19 @@ read_block_bitmap(struct super_block *sb
+       desc = ext3_get_group_desc (sb, block_group, NULL);
+       if (!desc)
+               goto error_out;
+-      bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
++      if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
++              bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
++              if (!buffer_uptodate(bh)) {
++                      lock_buffer(bh);
++                      if (!buffer_uptodate(bh)) {
++                              ext3_init_block_bitmap(sb, bh,block_group,desc);
++                              set_buffer_uptodate(bh);
++                      }
++                      unlock_buffer(bh);
++              }
++      } else {
++              bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
++      }
+       if (!bh)
+               ext3_error (sb, "read_block_bitmap",
+                           "Cannot read block bitmap - "
+@@ -427,6 +520,7 @@ do_more:
+       gdp->bg_free_blocks_count =
+               cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
+                       *pdquot_freed_blocks);
++      gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp);
+       spin_unlock(sb_bgl_lock(sbi, block_group));
+       percpu_counter_mod(&sbi->s_freeblocks_counter, count);
+@@ -1175,7 +1270,7 @@ int ext3_new_block_old(handle_t *handle,
+       goal_group = group_no;
+ retry:
+-      free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++      free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp);
+       if (free_blocks > 0) {
+               ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
+                               EXT3_BLOCKS_PER_GROUP(sb));
+@@ -1206,7 +1304,7 @@ retry:
+                       *errp = -EIO;
+                       goto out;
+               }
+-              free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
++              free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp);
+               if (free_blocks <= 0)
+                       continue;
+@@ -1306,10 +1407,17 @@ allocated:
+                       ret_block, goal_hits, goal_attempts);
+       spin_lock(sb_bgl_lock(sbi, group_no));
++      free_blocks = 0;
++      if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
++              gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
++              free_blocks = ext3_free_blocks_after_init(sb, group_no, gdp);
++              gdp->bg_free_blocks_count = cpu_to_le16(free_blocks);
++      }
+       gdp->bg_free_blocks_count =
+                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
++      gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp);
+       spin_unlock(sb_bgl_lock(sbi, group_no));
+-      percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
++      percpu_counter_mod(&sbi->s_freeblocks_counter, free_blocks);
+       BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
+       err = ext3_journal_dirty_metadata(handle, gdp_bh);
+@@ -1379,7 +1479,7 @@ unsigned long ext3_count_free_blocks(str
+               gdp = ext3_get_group_desc(sb, i, NULL);
+               if (!gdp)
+                       continue;
+-              desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
++              desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp);
+               brelse(bitmap_bh);
+               bitmap_bh = read_block_bitmap(sb, i);
+               if (bitmap_bh == NULL)
+@@ -1387,7 +1487,7 @@ unsigned long ext3_count_free_blocks(str
+               x = ext3_count_free(bitmap_bh, sb->s_blocksize);
+               printk("group %d: stored = %d, counted = %lu\n",
+-                      i, le16_to_cpu(gdp->bg_free_blocks_count), x);
++                      i, EXT3_BG_BLOCKS_FREE(sb, i, gdp), x);
+               bitmap_count += x;
+       }
+       brelse(bitmap_bh);
+@@ -1403,7 +1503,7 @@ unsigned long ext3_count_free_blocks(str
+               gdp = ext3_get_group_desc(sb, i, NULL);
+               if (!gdp)
+                       continue;
+-              desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
++              desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp);
+       }
+       return desc_count;
+@@ -1590,7 +1587,7 @@ void ext3_check_blocks_bitmap (struct su
+               gdp = ext3_get_group_desc (sb, i, NULL);
+               if (!gdp)
+                       continue;
+-              desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
++              desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp);
+               brelse(bitmap_bh);
+               bitmap_bh = read_block_bitmap(sb, i);
+               if (bitmap_bh == NULL)
+@@ -1628,11 +1625,11 @@ void ext3_check_blocks_bitmap (struct su
+                                           "group %d is marked free", j, i);
+               x = ext3_count_free(bitmap_bh, sb->s_blocksize);
+-              if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
++              if (EXT3_BG_BLOCKS_FREE(sb, i, gdp) != x)
+                       ext3_error (sb, "ext3_check_blocks_bitmap",
+                                   "Wrong free blocks count for group %d, "
+                                   "stored = %d, counted = %lu", i,
+-                                  le16_to_cpu(gdp->bg_free_blocks_count), x);
++                                  EXT3_BG_BLOCKS_FREE(sb, i, gdp), x);
+               bitmap_count += x;
+       }
+       brelse(bitmap_bh);
+Index: linux-stage/fs/ext3/ialloc.c
+--- linux-stage.orig/fs/ext3/ialloc.c  2007-03-14 04:44:13.000000000 -0400
++++ linux-stage/fs/ext3/ialloc.c       2007-03-14 16:13:29.000000000 -0400
+@@ -43,6 +43,47 @@
+  * the free blocks count in the block.
+  */
++/*
++ * To avoid calling the atomic setbit hundreds or thousands of times, we only
++ * need to use it within a single byte (to ensure we get endianness right).
++ * We can use memset for the rest of the bitmap as there are no other users.
++ */
++static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
++{
++      int i;
++
++      if (start_bit >= end_bit)
++              return;
++
++      ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
++      for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
++              ext3_set_bit(i, bitmap);
++      if (i < end_bit)
++              memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
++}
++
++/* Initializes an uninitialized inode bitmap */
++void ext3_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
++                          int block_group, struct ext3_group_desc *gdp)
++{
++      struct ext3_sb_info *sbi = EXT3_SB(sb);
++
++      J_ASSERT_BH(bh, buffer_locked(bh));
++
++      /* If the checksum is bad, then just mark all bits in use for safety */
++      if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) {
++              ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
++                         block_group);
++              memset(bh->b_data, 0xff, sb->s_blocksize);
++              gdp->bg_free_blocks_count = 0;
++              gdp->bg_free_inodes_count = 0;
++              return;
++      }
++
++      memset(bh->b_data, 0, EXT3_INODES_PER_GROUP(sb) + 7 / 8);
++      mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
++                      bh->b_data);
++}
+ /*
+  * Read the inode allocation bitmap for a given block_group, reading
+@@ -59,8 +103,15 @@ read_inode_bitmap(struct super_block * s
+       desc = ext3_get_group_desc(sb, block_group, NULL);
+       if (!desc)
+               goto error_out;
+-
+-      bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
++      if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
++              bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap));
++              if (!buffer_uptodate(bh)) {
++                      ext3_init_inode_bitmap(sb, bh, block_group, desc);
++                      set_buffer_uptodate(bh);
++              }
++      } else {
++              bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
++      }
+       if (!bh)
+               ext3_error(sb, "read_inode_bitmap",
+                           "Cannot read inode bitmap - "
+@@ -169,6 +175,8 @@ void ext3_free_inode (handle_t *handle, 
+                       if (is_directory)
+                               gdp->bg_used_dirs_count = cpu_to_le16(
+                                 le16_to_cpu(gdp->bg_used_dirs_count) - 1);
++                      gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group,
++                                                              gdp);
+                       spin_unlock(sb_bgl_lock(sbi, block_group));
+                       percpu_counter_inc(&sbi->s_freeinodes_counter);
+                       if (is_directory)
+@@ -202,8 +210,8 @@ error_return:
+ static int find_group_dir(struct super_block *sb, struct inode *parent)
+ {
+       int ngroups = EXT3_SB(sb)->s_groups_count;
+-      int freei, avefreei;
+-      struct ext3_group_desc *desc, *best_desc = NULL;
++      int freei, avefreei, freeb, best_freeb = 0;
++      struct ext3_group_desc *desc;
+       struct buffer_head *bh;
+       int group, best_group = -1;
+@@ -212,15 +220,14 @@ static int find_group_dir(struct super_b
+       for (group = 0; group < ngroups; group++) {
+               desc = ext3_get_group_desc (sb, group, &bh);
+-              if (!desc || !desc->bg_free_inodes_count)
++              if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0)
+                       continue;
+-              if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
++              if (freei < avefreei)
+                       continue;
+-              if (!best_desc || 
+-                  (le16_to_cpu(desc->bg_free_blocks_count) >
+-                   le16_to_cpu(best_desc->bg_free_blocks_count))) {
++              freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc);
++              if (freeb > best_freeb) {
+                       best_group = group;
+-                      best_desc = desc;
++                      best_freeb = freeb;
+               }
+       }
+       return best_group;
+@@ -284,14 +294,15 @@ static int find_group_orlov(struct super
+               parent_group = (unsigned)group % ngroups;
+               for (i = 0; i < ngroups; i++) {
+                       group = (parent_group + i) % ngroups;
+-                      desc = ext3_get_group_desc (sb, group, &bh);
+-                      if (!desc || !desc->bg_free_inodes_count)
++                      desc = ext3_get_group_desc(sb, group, &bh);
++                      if (!desc ||
++                          (freei = EXT3_BG_INODES_FREE(sb, groups, desc)) ==0)
+                               continue;
+                       if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
+                               continue;
+-                      if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
++                      if (freei < avefreei)
+                               continue;
+-                      if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
++                      if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < avefreeb)
+                               continue;
+                       best_group = group;
+                       best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
+@@ -318,13 +330,13 @@ static int find_group_orlov(struct super
+       for (i = 0; i < ngroups; i++) {
+               group = (parent_group + i) % ngroups;
+               desc = ext3_get_group_desc (sb, group, &bh);
+-              if (!desc || !desc->bg_free_inodes_count)
++              if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0)
+                       continue;
+               if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
+                       continue;
+-              if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
++              if (freei < min_inodes)
+                       continue;
+-              if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
++              if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < min_blocks)
+                       continue;
+               return group;
+       }
+@@ -333,9 +347,9 @@ fallback:
+       for (i = 0; i < ngroups; i++) {
+               group = (parent_group + i) % ngroups;
+               desc = ext3_get_group_desc (sb, group, &bh);
+-              if (!desc || !desc->bg_free_inodes_count)
++              if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0)
+                       continue;
+-              if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
++              if (freei >= avefreei)
+                       return group;
+       }
+@@ -362,6 +377,7 @@ static int find_group_other(struct super
+       int group, i;
+       int best_group = -1;
+       int avefreeb, freeb, best_group_freeb = 0;
++      int freei;
+       /*
+        * Try to place the inode in its parent directory
+@@ -380,8 +396,8 @@ static int find_group_other(struct super
+        */
+       group = parent_group;
+       desc = ext3_get_group_desc (sb, group, &bh);
+-      if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
+-          (!S_ISREG(mode) || le16_to_cpu(desc->bg_free_blocks_count)))
++      if (desc && EXT3_BG_INODES_FREE(sb, group, desc) &&
++          (!S_ISREG(mode) || EXT3_BG_BLOCKS_PNLT(sb, group, desc)))
+               return group;
+       avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / ngroups;
+       /*
+@@ -392,11 +408,11 @@ static int find_group_other(struct super
+               if (group >= ngroups)
+                       group -= ngroups;
+               desc = ext3_get_group_desc (sb, group, &bh);
+-              if (!desc || !desc->bg_free_inodes_count)
++              if (!desc || EXT3_BG_INODES_FREE(sb, group, desc) == 0)
+                       continue;
+               if (!S_ISREG(mode))
+                       return group;
+-              if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb)
++              if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) >= avefreeb)
+                       return group;
+       }
+@@ -413,9 +431,10 @@ static int find_group_other(struct super
+               if (++group >= ngroups)
+                       group = 0;
+               desc = ext3_get_group_desc (sb, group, &bh);
+-              if (!desc || !desc->bg_free_inodes_count)
++              freei = EXT3_BG_INODES_FREE(sb, group, desc);
++              if (!desc || !freei)
+                       continue;
+-              freeb = le16_to_cpu(desc->bg_free_blocks_count);
++              freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc);
+               if (freeb > best_group_freeb) {
+                       best_group_freeb = freeb;
+                       best_group = group;
+@@ -453,6 +472,7 @@ struct inode *ext3_new_inode(handle_t *h
+       int err = 0;
+       struct inode *ret;
+       int i;
++      int unused_flag = 0;
+       /* Cannot create files in a deleted directory */
+       if (!dir || !dir->i_nlink)
+@@ -581,18 +601,34 @@ got:
+       err = ext3_journal_get_write_access(handle, bh2);
+       if (err) goto fail;
+       spin_lock(sb_bgl_lock(sbi, group));
+-      gdp->bg_free_inodes_count =
+-              cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
++      if (gdp->bg_free_inodes_count == 0) {
++              if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
++                      gdp->bg_itable_unused =
++                             cpu_to_le16(le32_to_cpu(es->s_inodes_per_group));
++                      gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT);
++              }
++              /* If we didn't allocate from free initialized inodes,
++               * then we allocated from uninitialized inodes. In this
++               * case initialize one inode. */
++              gdp->bg_itable_unused =
++                      cpu_to_le16(le16_to_cpu(gdp->bg_itable_unused) - 1);
++              unused_flag = 1;
++      } else {
++              gdp->bg_free_inodes_count =
++                      cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) -1);
++      }
+       if (S_ISDIR(mode)) {
+               gdp->bg_used_dirs_count =
+                       cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
+       }
++      gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp);
+       spin_unlock(sb_bgl_lock(sbi, group));
+       BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
+       err = ext3_journal_dirty_metadata(handle, bh2);
+       if (err) goto fail;
+-      percpu_counter_dec(&sbi->s_freeinodes_counter);
++      if (!unused_flag)
++              percpu_counter_dec(&sbi->s_freeinodes_counter);
+       if (S_ISDIR(mode))
+               percpu_counter_inc(&sbi->s_dirs_counter);
+       sb->s_dirt = 1;
+Index: linux-stage/fs/ext3/mballoc.c
+===================================================================
+--- linux-stage.orig/fs/ext3/mballoc.c 2007-03-14 04:44:12.000000000 -0400
++++ linux-stage/fs/ext3/mballoc.c      2007-03-14 16:13:29.000000000 -0400
+@@ -107,6 +107,7 @@ struct ext3_group_info {
+       unsigned long   bb_state;
+       unsigned long   bb_tid;
+       struct ext3_free_metadata *bb_md_cur;
++      struct ext3_group_desc *bb_gdp;
+       unsigned short  bb_first_free;
+       unsigned short  bb_free;
+       unsigned short  bb_fragments;
+@@ -511,10 +512,7 @@ static int ext3_mb_init_cache(struct pag
+               if (first_group + i >= EXT3_SB(sb)->s_groups_count)
+                       break;
+-              err = -EIO;
+-              desc = ext3_get_group_desc(sb, first_group + i, NULL);
+-              if (desc == NULL)
+-                      goto out;
++              desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp;
+               err = -ENOMEM;
+               bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
+@@ -529,7 +530,12 @@ static int ext3_mb_init_cache(struct pag
+                       unlock_buffer(bh[i]);
+                       continue;
+               }
+-
++              if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
++                      ext3_init_block_bitmap(sb, bh[i], first_group + i,desc);
++                      set_buffer_uptodate(bh[i]);
++                      unlock_buffer(bh[i]);
++                      continue;
++              }
+               get_bh(bh[i]);
+               bh[i]->b_end_io = end_buffer_read_sync;
+               submit_bh(READ, bh[i]);
+@@ -1246,6 +1244,10 @@ static int ext3_mb_good_group(struct ext
+       switch (cr) {
+               case 0:
+                       J_ASSERT(ac->ac_2order != 0);
++                      /* If this group is uninitialized, skip it initially */
++                      if (grp->bb_gdp->bg_flags &
++                          cpu_to_le16(EXT3_BG_BLOCK_UNINIT))
++                              return 0;
+                       bits = ac->ac_sb->s_blocksize_bits + 1;
+                       for (i = ac->ac_2order; i <= bits; i++)
+                               if (grp->bb_counters[i] > 0)
+@@ -1421,7 +1421,8 @@ repeat:
+                       }
+                       ac.ac_groups_scanned++;
+-                      if (cr == 0)
++                      if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags &
++                                      cpu_to_le16(EXT3_BG_BLOCK_UNINIT)))
+                               ext3_mb_simple_scan_group(&ac, &e3b);
+                       else if (cr == 1 && *len == sbi->s_stripe)
+                               ext3_mb_scan_aligned(&ac, &e3b);
+@@ -1500,7 +1499,8 @@ found:
+        * bitmap to be journaled */
+       ext3_debug("using block group %d(%d)\n",
+-                      ac.ac_b_group.group, gdp->bg_free_blocks_count);
++                 ac.ac_b_group.group,
++                 EXT3_BG_BLOCKS_FREE(sb, ac.ac_b_group.group, gdp));
+       bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group);
+       if (!bitmap_bh) {
+@@ -1537,9 +1543,17 @@ found:
+       mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len);
+       spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group));
++      if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
++              gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
++              gdp->bg_free_blocks_count =
++                      cpu_to_le16(ext3_free_blocks_after_init(sb,
++                                                          ac.ac_b_ex.fe_group,
++                                                          gdp));
++      }
+       gdp->bg_free_blocks_count =
+                       cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
+                                       - ac.ac_b_ex.fe_len);
++      gdp->bg_checksum = ext3_group_desc_csum(sbi, ac.ac_b_ex.fe_group, gdp);
+       spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group));
+       percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len);
+@@ -1992,16 +1991,16 @@ int ext3_mb_init_backend(struct super_bl
+                       i--;
+                       goto err_freebuddy;
+               }
++              memset(meta_group_info[j], 0, len);
+               desc = ext3_get_group_desc(sb, i, NULL);
++              meta_group_info[j]->bb_gdp = desc;
+               if (desc == NULL) {
+                       printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
+                       goto err_freebuddy;
+               }
+-              memset(meta_group_info[j], 0, len);
+               set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT,
+                       &meta_group_info[j]->bb_state);
+-              meta_group_info[j]->bb_free =
+-                      le16_to_cpu(desc->bg_free_blocks_count);
++              meta_group_info[j]->bb_free = EXT3_BG_BLOCKS_FREE(sb, j, desc);
+       }
+       return 0;
+@@ -2397,6 +2410,7 @@ do_more:
+       spin_lock(sb_bgl_lock(sbi, block_group));
+       gdp->bg_free_blocks_count =
+               cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
++      gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp);
+       spin_unlock(sb_bgl_lock(sbi, block_group));
+       percpu_counter_mod(&sbi->s_freeblocks_counter, count);
+Index: linux-stage/fs/ext3/resize.c
+===================================================================
+--- linux-stage.orig/fs/ext3/resize.c  2007-03-14 04:44:13.000000000 -0400
++++ linux-stage/fs/ext3/resize.c       2007-03-14 16:13:29.000000000 -0400
+@@ -807,6 +807,9 @@ int ext3_group_add(struct super_block *s
+       gdp->bg_inode_table = cpu_to_le32(input->inode_table);
+       gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
+       gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
++      spin_lock(sb_bgl_lock(sbi, input->group)); /* not strictly needed */
++      gdp->bg_checksum = ext3_group_desc_csum(sbi, input->group, gdp);
++      spin_unlock(sb_bgl_lock(sbi, input->group));
+       /*
+        * Make the new blocks and inodes valid next.  We do this before
+Index: linux-stage/fs/ext3/super.c
+===================================================================
+--- linux-stage.orig/fs/ext3/super.c   2007-03-14 04:44:13.000000000 -0400
++++ linux-stage/fs/ext3/super.c        2007-03-14 16:13:29.000000000 -0400
+@@ -1084,6 +1084,56 @@ static int ext3_setup_super(struct super
+       return res;
+ }
++static __u16 crc16(__u16 crc, const u8 *buf, size_t len)
++{
++      __u16 tmp;
++
++      while (len--) {
++              crc ^= *buf++;
++              crc ^= (u8)crc >> 4;
++              tmp = (u8)crc;
++              crc ^= (tmp ^ (tmp << 1)) << 4;
++      }
++      return crc;
++}
++
++__u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group,
++                         struct ext3_group_desc *gdp)
++{
++      __u16 crc = 0;
++
++#ifdef CONFIG_SMP
++      J_ASSERT(spin_is_locked(sb_bgl_lock(sbi, block_group)));
++#endif
++      if (sbi->s_es->s_feature_ro_compat &
++          cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
++              int offset = offsetof(struct ext3_group_desc, bg_checksum);
++
++              block_group = cpu_to_le32(block_group);
++              crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
++              crc = crc16(crc, (__u8 *)&block_group, sizeof(block_group));
++              crc = crc16(crc, (__u8 *)gdp, offset);
++              offset += sizeof(gdp->bg_checksum); /* skip checksum */
++              BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */
++              /* for checksum of struct ext4_group_desc do the rest...
++              if (offset < sbi->s_es->s_desc_size) {
++                      crc = crc16(crc, (__u8 *)gdp + offset,
++                                  sbi->s_es->s_desc_size - offset);
++               */
++      }
++
++      return cpu_to_le16(crc);
++}
++
++int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group,
++                              struct ext3_group_desc *gdp)
++{
++      if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp))
++              return 0;
++
++      return 1;
++}
++
+ /* Called at mount-time, super-block is locked */
+ static int ext3_check_descriptors (struct super_block * sb)
+ {
+@@ -1133,6 +1179,13 @@ static int ext3_check_descriptors (struc
+                                       le32_to_cpu(gdp->bg_inode_table));
+                       return 0;
+               }
++              if (!ext3_group_desc_csum_verify(sbi, i, gdp)) {
++                      ext3_error(sb, __FUNCTION__,
++                                 "Checksum for group %d failed (%u != %u)\n",
++                                 i, ext3_group_desc_csum(sbi, i, gdp),
++                                 gdp->bg_checksum);
++                      return 0;
++              }
+               block += EXT3_BLOCKS_PER_GROUP(sb);
+               gdp++;
+       }
+Index: linux-stage/include/linux/ext3_fs.h
+===================================================================
+--- linux-stage.orig/include/linux/ext3_fs.h   2007-03-14 16:29:47.000000000 -0400
++++ linux-stage/include/linux/ext3_fs.h        2007-03-14 16:37:23.000000000 -0400
+@@ -118,6 +118,26 @@ struct statfs;
+                                (s)->s_first_ino)
+ #endif
++/* Macro-instructions used to calculate Free inodes and blocks count.
++ * Value is approximate for the blocks count until it is initialized. */
++#define EXT3_BG_INODES_FREE(sb,gr,gdp) ((gdp)->bg_flags &                    \
++                                     cpu_to_le16(EXT3_BG_INODE_UNINIT) ?     \
++                                     EXT3_INODES_PER_GROUP(sb) :             \
++                                     le16_to_cpu((gdp)->bg_itable_unused) +  \
++                                     le16_to_cpu((gdp)->bg_free_inodes_count))
++#define EXT3_BG_BLOCKS_FREE(sb,gr,gdp) ((gdp)->bg_flags &                    \
++                                     cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ?     \
++                                     ext3_free_blocks_after_init(sb,gr,gdp) :\
++                                     le16_to_cpu((gdp)->bg_free_blocks_count))
++
++/* When looking for groups to allocate from, we reduce the free blocks count
++ * to avoid always picking uninitialized groups.  Essentially, we don't start
++ * allocating from uninitialized groups until other groups are 1/2 full. */
++#define EXT3_BG_BLOCKS_PNLT(sb,gr,gdp) ((gdp)->bg_flags &                    \
++                                     cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ?     \
++                                     EXT3_BLOCKS_PER_GROUP(sb) / 2 :         \
++                                     le16_to_cpu((gdp)->bg_free_blocks_count))
++
+ /*
+  * Macro-instructions used to manage fragments
+  */
+@@ -138,16 +148,22 @@ struct statfs;
+  */
+ struct ext3_group_desc
+ {
+-      __le32  bg_block_bitmap;                /* Blocks bitmap block */
+-      __le32  bg_inode_bitmap;                /* Inodes bitmap block */
++      __le32  bg_block_bitmap;        /* Blocks bitmap block */
++      __le32  bg_inode_bitmap;        /* Inodes bitmap block */
+       __le32  bg_inode_table;         /* Inodes table block */
+       __le16  bg_free_blocks_count;   /* Free blocks count */
+       __le16  bg_free_inodes_count;   /* Free inodes count */
+       __le16  bg_used_dirs_count;     /* Directories count */
+-      __u16   bg_pad;
+-      __le32  bg_reserved[3];
++      __le16  bg_flags;               /* EXT3_BG_flags (UNINIT, etc) */
++      __le32  bg_reserved[2];         /* Likely block/inode bitmap checksum */
++      __le16  bg_itable_unused;       /* Unused inodes count */
++      __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */
+ };
++#define EXT3_BG_INODE_UNINIT  0x0001 /* Inode table/bitmap not in use */
++#define EXT3_BG_BLOCK_UNINIT  0x0002 /* Block bitmap not in use */
++#define EXT3_BG_BLOCK_WRITTEN 0x0004 /* On-disk structures were overwritten */
++
+ /*
+  * Macro-instructions used to manage group descriptors
+  */
+@@ -564,6 +581,7 @@ static inline struct ext3_inode_info *EX
+ #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER   0x0001
+ #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE     0x0002
+ #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR      0x0004
++#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM               0x0010
+ #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK      0x0020
+ #define EXT3_FEATURE_INCOMPAT_COMPRESSION     0x0001
+@@ -580,6 +598,7 @@ static inline struct ext3_inode_info *EX
+                                        EXT3_FEATURE_INCOMPAT_EXTENTS)
+ #define EXT3_FEATURE_RO_COMPAT_SUPP   (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
+                                        EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
++                                       EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
+                                        EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
+                                        EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
+@@ -841,6 +860,16 @@ extern void ext3_unlockfs (struct super_
+ extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
+ extern int ext3_remount (struct super_block *, int *, char *);
+ extern int ext3_statfs (struct super_block *, struct kstatfs *);
++extern __u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group,
++                                struct ext3_group_desc *gdp);
++extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group,
++                                     struct ext3_group_desc *gdp);
++extern unsigned long ext3_free_blocks_after_init(struct super_block *sb,
++                                               int block_group,
++                                               struct ext3_group_desc *gdp);
++extern void ext3_init_block_bitmap(struct super_block *sb,
++                                 struct buffer_head *bh, int group,
++                                 struct ext3_group_desc *desc);
+ #define ext3_std_error(sb, errno)                             \
+ do {                                                          \
+
+%diffstat
+ fs/ext3/balloc.c             |  116 +++++++++++++++++++++++++++++++++++++++++--
+ fs/ext3/ialloc.c             |   85 ++++++++++++++++++++++---------
+ fs/ext3/mballoc.c            |   18 +++++-
+ fs/ext3/resize.c             |    1 
+ fs/ext3/super.c              |   53 +++++++++++++++++++
+ include/linux/ext3_fs.h      |   36 +++++++++++--
+ include/linux/ext3_fs.h.orig |   23 +++++++-
+ 7 files changed, 294 insertions(+), 38 deletions(-)
+