From 1efc338624e38461f826e521f3094b8a8e6d7d8c Mon Sep 17 00:00:00 2001 From: adilger Date: Tue, 20 Mar 2007 06:39:53 +0000 Subject: [PATCH] Branch HEAD Untested patch for handling uninitialized block groups. Adding to CVS to avoid loss, not currently in any patch series. b=11269 --- .../kernel_patches/patches/ext3-uninit-2.6.9.patch | 779 +++++++++++++++++++++ 1 file changed, 779 insertions(+) create mode 100644 lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch diff --git a/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch b/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch new file mode 100644 index 0000000..f66d713 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-uninit-2.6.9.patch @@ -0,0 +1,779 @@ +Keep a high water mark of used inodes for each group to improve e2fsck time. +Block and inode bitmaps can be uninitialized on disk via a flag in the +group descriptor to avoid reading or scanning them at e2fsck time. +A checksum of each group descriptor is used to ensure that corruption in +the group descriptor's bit flags does not cause incorrect operation. + +Index: linux-stage/fs/ext3/balloc.c +=================================================================== +--- linux-stage.orig/fs/ext3/balloc.c 2007-03-14 04:44:12.000000000 -0400 ++++ linux-stage/fs/ext3/balloc.c 2007-03-14 16:23:48.000000000 -0400 +@@ -73,6 +73,95 @@ struct ext3_group_desc * ext3_get_group_ + return gdp + desc; + } + ++unsigned long ext3_free_blocks_after_init(struct super_block *sb, ++ int block_group, ++ struct ext3_group_desc *gdp) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ unsigned long blks; ++ int has_sb; ++ ++ /* Last and first groups are always initialized */ ++ blks = le32_to_cpu(EXT3_BLOCKS_PER_GROUP(sb)); ++ /* Account for for sb, gdt */ ++ has_sb = ext3_bg_has_super(sb, block_group); ++ if (has_sb) ++ blks--; ++ ++ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || ++ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) { ++ if (has_sb) { ++ blks -= le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); ++ blks -= ext3_bg_num_gdb(sb, block_group); ++ } ++ } else { /* For META_BG BLOCK GROUPS */ ++ int group_rel = (block_group - ++ le32_to_cpu(sbi->s_es->s_first_meta_bg)) % ++ EXT3_DESC_PER_BLOCK(sb); ++ if (group_rel == 0 || group_rel == 1 || ++ (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) ++ blks--; ++ } ++ ++ /* Account for bitmaps and inode table */ ++ blks -= sbi->s_itb_per_group + 2; ++ return blks; ++} ++ ++/* Initializes an uninitialized block bitmap */ ++void ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, ++ int block_group, struct ext3_group_desc *gdp) ++{ ++ unsigned long startblk; ++ int bit, bit_max; ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ J_ASSERT_BH(bh, buffer_locked(bh)); ++ ++ /* If the checksum is bad, then just mark all bits in use for safety */ ++ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { ++ ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", ++ block_group); ++ gdp->bg_free_blocks_count = 0; ++ gdp->bg_free_inodes_count = 0; ++ memset(bh->b_data, 0xff, sb->s_blocksize); ++ return; ++ } ++ memset(bh->b_data, 0, bh->b_size); ++ ++ /* Set bits for sb, gdt */ ++ startblk = block_group * EXT3_BLOCKS_PER_GROUP(sb) + ++ le32_to_cpu(sbi->s_es->s_first_data_block); ++ ++ bit = 0; ++ bit_max = ext3_bg_has_super(sb, block_group); ++ ++ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || ++ block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) { ++ if (bit_max) { ++ bit_max += ext3_bg_num_gdb(sb, block_group); ++ bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); ++ } ++ } else { /* For META_BG_BLOCK_GROUPS */ ++ int group_rel = (block_group - ++ le32_to_cpu(sbi->s_es->s_first_meta_bg)) % ++ EXT3_DESC_PER_BLOCK(sb); ++ if (group_rel == 0 || group_rel == 1 || ++ (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) ++ bit_max += 1; ++ } ++ for (; bit < bit_max; bit++) ++ ext3_set_bit(bit, bh->b_data); ++ ++ /* Set bits for bitmaps and inode table */ ++ ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - startblk, bh->b_data); ++ ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - startblk, bh->b_data); ++ bit_max = bit + sbi->s_itb_per_group; ++ for (bit = le32_to_cpu(gdp->bg_inode_table) - startblk; ++ bit < bit_max; bit++) ++ ext3_set_bit(bit, bh->b_data); ++} ++ + /* + * Read the bitmap for a given block_group, reading into the specified + * slot in the superblock's bitmap cache. +@@ -88,7 +170,19 @@ read_block_bitmap(struct super_block *sb + desc = ext3_get_group_desc (sb, block_group, NULL); + if (!desc) + goto error_out; +- bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ if (!buffer_uptodate(bh)) { ++ lock_buffer(bh); ++ if (!buffer_uptodate(bh)) { ++ ext3_init_block_bitmap(sb, bh,block_group,desc); ++ set_buffer_uptodate(bh); ++ } ++ unlock_buffer(bh); ++ } ++ } else { ++ bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); ++ } + if (!bh) + ext3_error (sb, "read_block_bitmap", + "Cannot read block bitmap - " +@@ -427,6 +520,7 @@ do_more: + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + + *pdquot_freed_blocks); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, count); + +@@ -1175,7 +1270,7 @@ int ext3_new_block_old(handle_t *handle, + + goal_group = group_no; + retry: +- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); ++ free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp); + if (free_blocks > 0) { + ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % + EXT3_BLOCKS_PER_GROUP(sb)); +@@ -1206,7 +1304,7 @@ retry: + *errp = -EIO; + goto out; + } +- free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); ++ free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp); + if (free_blocks <= 0) + continue; + +@@ -1306,10 +1407,17 @@ allocated: + ret_block, goal_hits, goal_attempts); + + spin_lock(sb_bgl_lock(sbi, group_no)); ++ free_blocks = 0; ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); ++ free_blocks = ext3_free_blocks_after_init(sb, group_no, gdp); ++ gdp->bg_free_blocks_count = cpu_to_le16(free_blocks); ++ } + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp); + spin_unlock(sb_bgl_lock(sbi, group_no)); +- percpu_counter_mod(&sbi->s_freeblocks_counter, -1); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, free_blocks); + + BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); + err = ext3_journal_dirty_metadata(handle, gdp_bh); +@@ -1379,7 +1479,7 @@ unsigned long ext3_count_free_blocks(str + gdp = ext3_get_group_desc(sb, i, NULL); + if (!gdp) + continue; +- desc_count += le16_to_cpu(gdp->bg_free_blocks_count); ++ desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp); + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, i); + if (bitmap_bh == NULL) +@@ -1387,7 +1487,7 @@ unsigned long ext3_count_free_blocks(str + + x = ext3_count_free(bitmap_bh, sb->s_blocksize); + printk("group %d: stored = %d, counted = %lu\n", +- i, le16_to_cpu(gdp->bg_free_blocks_count), x); ++ i, EXT3_BG_BLOCKS_FREE(sb, i, gdp), x); + bitmap_count += x; + } + brelse(bitmap_bh); +@@ -1403,7 +1503,7 @@ unsigned long ext3_count_free_blocks(str + gdp = ext3_get_group_desc(sb, i, NULL); + if (!gdp) + continue; +- desc_count += le16_to_cpu(gdp->bg_free_blocks_count); ++ desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp); + } + + return desc_count; +@@ -1590,7 +1587,7 @@ void ext3_check_blocks_bitmap (struct su + gdp = ext3_get_group_desc (sb, i, NULL); + if (!gdp) + continue; +- desc_count += le16_to_cpu(gdp->bg_free_blocks_count); ++ desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp); + brelse(bitmap_bh); + bitmap_bh = read_block_bitmap(sb, i); + if (bitmap_bh == NULL) +@@ -1628,11 +1625,11 @@ void ext3_check_blocks_bitmap (struct su + "group %d is marked free", j, i); + + x = ext3_count_free(bitmap_bh, sb->s_blocksize); +- if (le16_to_cpu(gdp->bg_free_blocks_count) != x) ++ if (EXT3_BG_BLOCKS_FREE(sb, i, gdp) != x) + ext3_error (sb, "ext3_check_blocks_bitmap", + "Wrong free blocks count for group %d, " + "stored = %d, counted = %lu", i, +- le16_to_cpu(gdp->bg_free_blocks_count), x); ++ EXT3_BG_BLOCKS_FREE(sb, i, gdp), x); + bitmap_count += x; + } + brelse(bitmap_bh); +Index: linux-stage/fs/ext3/ialloc.c +--- linux-stage.orig/fs/ext3/ialloc.c 2007-03-14 04:44:13.000000000 -0400 ++++ linux-stage/fs/ext3/ialloc.c 2007-03-14 16:13:29.000000000 -0400 +@@ -43,6 +43,47 @@ + * the free blocks count in the block. + */ + ++/* ++ * To avoid calling the atomic setbit hundreds or thousands of times, we only ++ * need to use it within a single byte (to ensure we get endianness right). ++ * We can use memset for the rest of the bitmap as there are no other users. ++ */ ++static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) ++{ ++ int i; ++ ++ if (start_bit >= end_bit) ++ return; ++ ++ ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); ++ for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) ++ ext3_set_bit(i, bitmap); ++ if (i < end_bit) ++ memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); ++} ++ ++/* Initializes an uninitialized inode bitmap */ ++void ext3_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, ++ int block_group, struct ext3_group_desc *gdp) ++{ ++ struct ext3_sb_info *sbi = EXT3_SB(sb); ++ ++ J_ASSERT_BH(bh, buffer_locked(bh)); ++ ++ /* If the checksum is bad, then just mark all bits in use for safety */ ++ if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { ++ ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", ++ block_group); ++ memset(bh->b_data, 0xff, sb->s_blocksize); ++ gdp->bg_free_blocks_count = 0; ++ gdp->bg_free_inodes_count = 0; ++ return; ++ } ++ ++ memset(bh->b_data, 0, EXT3_INODES_PER_GROUP(sb) + 7 / 8); ++ mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), ++ bh->b_data); ++} + + /* + * Read the inode allocation bitmap for a given block_group, reading +@@ -59,8 +103,15 @@ read_inode_bitmap(struct super_block * s + desc = ext3_get_group_desc(sb, block_group, NULL); + if (!desc) + goto error_out; +- +- bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); ++ if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { ++ bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap)); ++ if (!buffer_uptodate(bh)) { ++ ext3_init_inode_bitmap(sb, bh, block_group, desc); ++ set_buffer_uptodate(bh); ++ } ++ } else { ++ bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); ++ } + if (!bh) + ext3_error(sb, "read_inode_bitmap", + "Cannot read inode bitmap - " +@@ -169,6 +175,8 @@ void ext3_free_inode (handle_t *handle, + if (is_directory) + gdp->bg_used_dirs_count = cpu_to_le16( + le16_to_cpu(gdp->bg_used_dirs_count) - 1); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group, ++ gdp); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_inc(&sbi->s_freeinodes_counter); + if (is_directory) +@@ -202,8 +210,8 @@ error_return: + static int find_group_dir(struct super_block *sb, struct inode *parent) + { + int ngroups = EXT3_SB(sb)->s_groups_count; +- int freei, avefreei; +- struct ext3_group_desc *desc, *best_desc = NULL; ++ int freei, avefreei, freeb, best_freeb = 0; ++ struct ext3_group_desc *desc; + struct buffer_head *bh; + int group, best_group = -1; + +@@ -212,15 +220,14 @@ static int find_group_dir(struct super_b + + for (group = 0; group < ngroups; group++) { + desc = ext3_get_group_desc (sb, group, &bh); +- if (!desc || !desc->bg_free_inodes_count) ++ if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0) + continue; +- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) ++ if (freei < avefreei) + continue; +- if (!best_desc || +- (le16_to_cpu(desc->bg_free_blocks_count) > +- le16_to_cpu(best_desc->bg_free_blocks_count))) { ++ freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc); ++ if (freeb > best_freeb) { + best_group = group; +- best_desc = desc; ++ best_freeb = freeb; + } + } + return best_group; +@@ -284,14 +294,15 @@ static int find_group_orlov(struct super + parent_group = (unsigned)group % ngroups; + for (i = 0; i < ngroups; i++) { + group = (parent_group + i) % ngroups; +- desc = ext3_get_group_desc (sb, group, &bh); +- if (!desc || !desc->bg_free_inodes_count) ++ desc = ext3_get_group_desc(sb, group, &bh); ++ if (!desc || ++ (freei = EXT3_BG_INODES_FREE(sb, groups, desc)) ==0) + continue; + if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) + continue; +- if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) ++ if (freei < avefreei) + continue; +- if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) ++ if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < avefreeb) + continue; + best_group = group; + best_ndir = le16_to_cpu(desc->bg_used_dirs_count); +@@ -318,13 +330,13 @@ static int find_group_orlov(struct super + for (i = 0; i < ngroups; i++) { + group = (parent_group + i) % ngroups; + desc = ext3_get_group_desc (sb, group, &bh); +- if (!desc || !desc->bg_free_inodes_count) ++ if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0) + continue; + if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) + continue; +- if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) ++ if (freei < min_inodes) + continue; +- if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) ++ if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < min_blocks) + continue; + return group; + } +@@ -333,9 +347,9 @@ fallback: + for (i = 0; i < ngroups; i++) { + group = (parent_group + i) % ngroups; + desc = ext3_get_group_desc (sb, group, &bh); +- if (!desc || !desc->bg_free_inodes_count) ++ if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0) + continue; +- if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) ++ if (freei >= avefreei) + return group; + } + +@@ -362,6 +377,7 @@ static int find_group_other(struct super + int group, i; + int best_group = -1; + int avefreeb, freeb, best_group_freeb = 0; ++ int freei; + + /* + * Try to place the inode in its parent directory +@@ -380,8 +396,8 @@ static int find_group_other(struct super + */ + group = parent_group; + desc = ext3_get_group_desc (sb, group, &bh); +- if (desc && le16_to_cpu(desc->bg_free_inodes_count) && +- (!S_ISREG(mode) || le16_to_cpu(desc->bg_free_blocks_count))) ++ if (desc && EXT3_BG_INODES_FREE(sb, group, desc) && ++ (!S_ISREG(mode) || EXT3_BG_BLOCKS_PNLT(sb, group, desc))) + return group; + avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / ngroups; + /* +@@ -392,11 +408,11 @@ static int find_group_other(struct super + if (group >= ngroups) + group -= ngroups; + desc = ext3_get_group_desc (sb, group, &bh); +- if (!desc || !desc->bg_free_inodes_count) ++ if (!desc || EXT3_BG_INODES_FREE(sb, group, desc) == 0) + continue; + if (!S_ISREG(mode)) + return group; +- if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb) ++ if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) >= avefreeb) + return group; + } + +@@ -413,9 +431,10 @@ static int find_group_other(struct super + if (++group >= ngroups) + group = 0; + desc = ext3_get_group_desc (sb, group, &bh); +- if (!desc || !desc->bg_free_inodes_count) ++ freei = EXT3_BG_INODES_FREE(sb, group, desc); ++ if (!desc || !freei) + continue; +- freeb = le16_to_cpu(desc->bg_free_blocks_count); ++ freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc); + if (freeb > best_group_freeb) { + best_group_freeb = freeb; + best_group = group; +@@ -453,6 +472,7 @@ struct inode *ext3_new_inode(handle_t *h + int err = 0; + struct inode *ret; + int i; ++ int unused_flag = 0; + + /* Cannot create files in a deleted directory */ + if (!dir || !dir->i_nlink) +@@ -581,18 +601,34 @@ got: + err = ext3_journal_get_write_access(handle, bh2); + if (err) goto fail; + spin_lock(sb_bgl_lock(sbi, group)); +- gdp->bg_free_inodes_count = +- cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); ++ if (gdp->bg_free_inodes_count == 0) { ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { ++ gdp->bg_itable_unused = ++ cpu_to_le16(le32_to_cpu(es->s_inodes_per_group)); ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT); ++ } ++ /* If we didn't allocate from free initialized inodes, ++ * then we allocated from uninitialized inodes. In this ++ * case initialize one inode. */ ++ gdp->bg_itable_unused = ++ cpu_to_le16(le16_to_cpu(gdp->bg_itable_unused) - 1); ++ unused_flag = 1; ++ } else { ++ gdp->bg_free_inodes_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) -1); ++ } + if (S_ISDIR(mode)) { + gdp->bg_used_dirs_count = + cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); + } ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp); + spin_unlock(sb_bgl_lock(sbi, group)); + BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); + err = ext3_journal_dirty_metadata(handle, bh2); + if (err) goto fail; + +- percpu_counter_dec(&sbi->s_freeinodes_counter); ++ if (!unused_flag) ++ percpu_counter_dec(&sbi->s_freeinodes_counter); + if (S_ISDIR(mode)) + percpu_counter_inc(&sbi->s_dirs_counter); + sb->s_dirt = 1; +Index: linux-stage/fs/ext3/mballoc.c +=================================================================== +--- linux-stage.orig/fs/ext3/mballoc.c 2007-03-14 04:44:12.000000000 -0400 ++++ linux-stage/fs/ext3/mballoc.c 2007-03-14 16:13:29.000000000 -0400 +@@ -107,6 +107,7 @@ struct ext3_group_info { + unsigned long bb_state; + unsigned long bb_tid; + struct ext3_free_metadata *bb_md_cur; ++ struct ext3_group_desc *bb_gdp; + unsigned short bb_first_free; + unsigned short bb_free; + unsigned short bb_fragments; +@@ -511,10 +512,7 @@ static int ext3_mb_init_cache(struct pag + if (first_group + i >= EXT3_SB(sb)->s_groups_count) + break; + +- err = -EIO; +- desc = ext3_get_group_desc(sb, first_group + i, NULL); +- if (desc == NULL) +- goto out; ++ desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp; + + err = -ENOMEM; + bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); +@@ -529,7 +530,12 @@ static int ext3_mb_init_cache(struct pag + unlock_buffer(bh[i]); + continue; + } +- ++ if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ ext3_init_block_bitmap(sb, bh[i], first_group + i,desc); ++ set_buffer_uptodate(bh[i]); ++ unlock_buffer(bh[i]); ++ continue; ++ } + get_bh(bh[i]); + bh[i]->b_end_io = end_buffer_read_sync; + submit_bh(READ, bh[i]); +@@ -1246,6 +1244,10 @@ static int ext3_mb_good_group(struct ext + switch (cr) { + case 0: + J_ASSERT(ac->ac_2order != 0); ++ /* If this group is uninitialized, skip it initially */ ++ if (grp->bb_gdp->bg_flags & ++ cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) ++ return 0; + bits = ac->ac_sb->s_blocksize_bits + 1; + for (i = ac->ac_2order; i <= bits; i++) + if (grp->bb_counters[i] > 0) +@@ -1421,7 +1421,8 @@ repeat: + } + + ac.ac_groups_scanned++; +- if (cr == 0) ++ if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags & ++ cpu_to_le16(EXT3_BG_BLOCK_UNINIT))) + ext3_mb_simple_scan_group(&ac, &e3b); + else if (cr == 1 && *len == sbi->s_stripe) + ext3_mb_scan_aligned(&ac, &e3b); +@@ -1500,7 +1499,8 @@ found: + * bitmap to be journaled */ + + ext3_debug("using block group %d(%d)\n", +- ac.ac_b_group.group, gdp->bg_free_blocks_count); ++ ac.ac_b_group.group, ++ EXT3_BG_BLOCKS_FREE(sb, ac.ac_b_group.group, gdp)); + + bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group); + if (!bitmap_bh) { +@@ -1537,9 +1543,17 @@ found: + mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len); + + spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); ++ if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { ++ gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(ext3_free_blocks_after_init(sb, ++ ac.ac_b_ex.fe_group, ++ gdp)); ++ } + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + - ac.ac_b_ex.fe_len); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, ac.ac_b_ex.fe_group, gdp); + spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len); + +@@ -1992,16 +1991,16 @@ int ext3_mb_init_backend(struct super_bl + i--; + goto err_freebuddy; + } ++ memset(meta_group_info[j], 0, len); + desc = ext3_get_group_desc(sb, i, NULL); ++ meta_group_info[j]->bb_gdp = desc; + if (desc == NULL) { + printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); + goto err_freebuddy; + } +- memset(meta_group_info[j], 0, len); + set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, + &meta_group_info[j]->bb_state); +- meta_group_info[j]->bb_free = +- le16_to_cpu(desc->bg_free_blocks_count); ++ meta_group_info[j]->bb_free = EXT3_BG_BLOCKS_FREE(sb, j, desc); + } + + return 0; +@@ -2397,6 +2410,7 @@ do_more: + spin_lock(sb_bgl_lock(sbi, block_group)); + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); + spin_unlock(sb_bgl_lock(sbi, block_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, count); + +Index: linux-stage/fs/ext3/resize.c +=================================================================== +--- linux-stage.orig/fs/ext3/resize.c 2007-03-14 04:44:13.000000000 -0400 ++++ linux-stage/fs/ext3/resize.c 2007-03-14 16:13:29.000000000 -0400 +@@ -807,6 +807,9 @@ int ext3_group_add(struct super_block *s + gdp->bg_inode_table = cpu_to_le32(input->inode_table); + gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); + gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); ++ spin_lock(sb_bgl_lock(sbi, input->group)); /* not strictly needed */ ++ gdp->bg_checksum = ext3_group_desc_csum(sbi, input->group, gdp); ++ spin_unlock(sb_bgl_lock(sbi, input->group)); + + /* + * Make the new blocks and inodes valid next. We do this before +Index: linux-stage/fs/ext3/super.c +=================================================================== +--- linux-stage.orig/fs/ext3/super.c 2007-03-14 04:44:13.000000000 -0400 ++++ linux-stage/fs/ext3/super.c 2007-03-14 16:13:29.000000000 -0400 +@@ -1084,6 +1084,56 @@ static int ext3_setup_super(struct super + return res; + } + ++static __u16 crc16(__u16 crc, const u8 *buf, size_t len) ++{ ++ __u16 tmp; ++ ++ while (len--) { ++ crc ^= *buf++; ++ crc ^= (u8)crc >> 4; ++ tmp = (u8)crc; ++ crc ^= (tmp ^ (tmp << 1)) << 4; ++ } ++ return crc; ++} ++ ++__u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group, ++ struct ext3_group_desc *gdp) ++{ ++ __u16 crc = 0; ++ ++#ifdef CONFIG_SMP ++ J_ASSERT(spin_is_locked(sb_bgl_lock(sbi, block_group))); ++#endif ++ if (sbi->s_es->s_feature_ro_compat & ++ cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { ++ int offset = offsetof(struct ext3_group_desc, bg_checksum); ++ ++ block_group = cpu_to_le32(block_group); ++ crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); ++ crc = crc16(crc, (__u8 *)&block_group, sizeof(block_group)); ++ crc = crc16(crc, (__u8 *)gdp, offset); ++ offset += sizeof(gdp->bg_checksum); /* skip checksum */ ++ BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */ ++ /* for checksum of struct ext4_group_desc do the rest... ++ if (offset < sbi->s_es->s_desc_size) { ++ crc = crc16(crc, (__u8 *)gdp + offset, ++ sbi->s_es->s_desc_size - offset); ++ */ ++ } ++ ++ return cpu_to_le16(crc); ++} ++ ++int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group, ++ struct ext3_group_desc *gdp) ++{ ++ if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp)) ++ return 0; ++ ++ return 1; ++} ++ + /* Called at mount-time, super-block is locked */ + static int ext3_check_descriptors (struct super_block * sb) + { +@@ -1133,6 +1179,13 @@ static int ext3_check_descriptors (struc + le32_to_cpu(gdp->bg_inode_table)); + return 0; + } ++ if (!ext3_group_desc_csum_verify(sbi, i, gdp)) { ++ ext3_error(sb, __FUNCTION__, ++ "Checksum for group %d failed (%u != %u)\n", ++ i, ext3_group_desc_csum(sbi, i, gdp), ++ gdp->bg_checksum); ++ return 0; ++ } + block += EXT3_BLOCKS_PER_GROUP(sb); + gdp++; + } +Index: linux-stage/include/linux/ext3_fs.h +=================================================================== +--- linux-stage.orig/include/linux/ext3_fs.h 2007-03-14 16:29:47.000000000 -0400 ++++ linux-stage/include/linux/ext3_fs.h 2007-03-14 16:37:23.000000000 -0400 +@@ -118,6 +118,26 @@ struct statfs; + (s)->s_first_ino) + #endif + ++/* Macro-instructions used to calculate Free inodes and blocks count. ++ * Value is approximate for the blocks count until it is initialized. */ ++#define EXT3_BG_INODES_FREE(sb,gr,gdp) ((gdp)->bg_flags & \ ++ cpu_to_le16(EXT3_BG_INODE_UNINIT) ? \ ++ EXT3_INODES_PER_GROUP(sb) : \ ++ le16_to_cpu((gdp)->bg_itable_unused) + \ ++ le16_to_cpu((gdp)->bg_free_inodes_count)) ++#define EXT3_BG_BLOCKS_FREE(sb,gr,gdp) ((gdp)->bg_flags & \ ++ cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ? \ ++ ext3_free_blocks_after_init(sb,gr,gdp) :\ ++ le16_to_cpu((gdp)->bg_free_blocks_count)) ++ ++/* When looking for groups to allocate from, we reduce the free blocks count ++ * to avoid always picking uninitialized groups. Essentially, we don't start ++ * allocating from uninitialized groups until other groups are 1/2 full. */ ++#define EXT3_BG_BLOCKS_PNLT(sb,gr,gdp) ((gdp)->bg_flags & \ ++ cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ? \ ++ EXT3_BLOCKS_PER_GROUP(sb) / 2 : \ ++ le16_to_cpu((gdp)->bg_free_blocks_count)) ++ + /* + * Macro-instructions used to manage fragments + */ +@@ -138,16 +148,22 @@ struct statfs; + */ + struct ext3_group_desc + { +- __le32 bg_block_bitmap; /* Blocks bitmap block */ +- __le32 bg_inode_bitmap; /* Inodes bitmap block */ ++ __le32 bg_block_bitmap; /* Blocks bitmap block */ ++ __le32 bg_inode_bitmap; /* Inodes bitmap block */ + __le32 bg_inode_table; /* Inodes table block */ + __le16 bg_free_blocks_count; /* Free blocks count */ + __le16 bg_free_inodes_count; /* Free inodes count */ + __le16 bg_used_dirs_count; /* Directories count */ +- __u16 bg_pad; +- __le32 bg_reserved[3]; ++ __le16 bg_flags; /* EXT3_BG_flags (UNINIT, etc) */ ++ __le32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ ++ __le16 bg_itable_unused; /* Unused inodes count */ ++ __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ + }; + ++#define EXT3_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ ++#define EXT3_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ ++#define EXT3_BG_BLOCK_WRITTEN 0x0004 /* On-disk structures were overwritten */ ++ + /* + * Macro-instructions used to manage group descriptors + */ +@@ -564,6 +581,7 @@ static inline struct ext3_inode_info *EX + #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 + #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 + #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 ++#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 + #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 + + #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 +@@ -580,6 +598,7 @@ static inline struct ext3_inode_info *EX + EXT3_FEATURE_INCOMPAT_EXTENTS) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ ++ EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ + EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ + EXT3_FEATURE_RO_COMPAT_BTREE_DIR) + +@@ -841,6 +860,16 @@ extern void ext3_unlockfs (struct super_ + extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); + extern int ext3_remount (struct super_block *, int *, char *); + extern int ext3_statfs (struct super_block *, struct kstatfs *); ++extern __u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group, ++ struct ext3_group_desc *gdp); ++extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group, ++ struct ext3_group_desc *gdp); ++extern unsigned long ext3_free_blocks_after_init(struct super_block *sb, ++ int block_group, ++ struct ext3_group_desc *gdp); ++extern void ext3_init_block_bitmap(struct super_block *sb, ++ struct buffer_head *bh, int group, ++ struct ext3_group_desc *desc); + + #define ext3_std_error(sb, errno) \ + do { \ + +%diffstat + fs/ext3/balloc.c | 116 +++++++++++++++++++++++++++++++++++++++++-- + fs/ext3/ialloc.c | 85 ++++++++++++++++++++++--------- + fs/ext3/mballoc.c | 18 +++++- + fs/ext3/resize.c | 1 + fs/ext3/super.c | 53 +++++++++++++++++++ + include/linux/ext3_fs.h | 36 +++++++++++-- + include/linux/ext3_fs.h.orig | 23 +++++++- + 7 files changed, 294 insertions(+), 38 deletions(-) + -- 1.8.3.1