Keep a high water mark of used inodes for each group to improve e2fsck time. Block and inode bitmaps can be uninitialized on disk via a flag in the group descriptor to avoid reading or scanning them at e2fsck time. A checksum of each group descriptor is used to ensure that corruption in the group descriptor's bit flags does not cause incorrect operation. Index: linux-stage/fs/ext3/balloc.c =================================================================== --- linux-stage.orig/fs/ext3/balloc.c 2007-03-14 04:44:12.000000000 -0400 +++ linux-stage/fs/ext3/balloc.c 2007-03-14 16:23:48.000000000 -0400 @@ -73,6 +73,95 @@ struct ext3_group_desc * ext3_get_group_ return gdp + desc; } +unsigned long ext3_free_blocks_after_init(struct super_block *sb, + int block_group, + struct ext3_group_desc *gdp) +{ + struct ext3_sb_info *sbi = EXT3_SB(sb); + unsigned long blks; + int has_sb; + + /* Last and first groups are always initialized */ + blks = le32_to_cpu(EXT3_BLOCKS_PER_GROUP(sb)); + /* Account for for sb, gdt */ + has_sb = ext3_bg_has_super(sb, block_group); + if (has_sb) + blks--; + + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || + block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) { + if (has_sb) { + blks -= le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); + blks -= ext3_bg_num_gdb(sb, block_group); + } + } else { /* For META_BG BLOCK GROUPS */ + int group_rel = (block_group - + le32_to_cpu(sbi->s_es->s_first_meta_bg)) % + EXT3_DESC_PER_BLOCK(sb); + if (group_rel == 0 || group_rel == 1 || + (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) + blks--; + } + + /* Account for bitmaps and inode table */ + blks -= sbi->s_itb_per_group + 2; + return blks; +} + +/* Initializes an uninitialized block bitmap */ +void ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, + int block_group, struct ext3_group_desc *gdp) +{ + unsigned long startblk; + int bit, bit_max; + struct ext3_sb_info *sbi = EXT3_SB(sb); + + J_ASSERT_BH(bh, buffer_locked(bh)); + + /* If the checksum is bad, then just mark all bits in use for safety */ + if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { + ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", + block_group); + gdp->bg_free_blocks_count = 0; + gdp->bg_free_inodes_count = 0; + memset(bh->b_data, 0xff, sb->s_blocksize); + return; + } + memset(bh->b_data, 0, bh->b_size); + + /* Set bits for sb, gdt */ + startblk = block_group * EXT3_BLOCKS_PER_GROUP(sb) + + le32_to_cpu(sbi->s_es->s_first_data_block); + + bit = 0; + bit_max = ext3_bg_has_super(sb, block_group); + + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) || + block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) { + if (bit_max) { + bit_max += ext3_bg_num_gdb(sb, block_group); + bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks); + } + } else { /* For META_BG_BLOCK_GROUPS */ + int group_rel = (block_group - + le32_to_cpu(sbi->s_es->s_first_meta_bg)) % + EXT3_DESC_PER_BLOCK(sb); + if (group_rel == 0 || group_rel == 1 || + (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1)) + bit_max += 1; + } + for (; bit < bit_max; bit++) + ext3_set_bit(bit, bh->b_data); + + /* Set bits for bitmaps and inode table */ + ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - startblk, bh->b_data); + ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - startblk, bh->b_data); + bit_max = bit + sbi->s_itb_per_group; + for (bit = le32_to_cpu(gdp->bg_inode_table) - startblk; + bit < bit_max; bit++) + ext3_set_bit(bit, bh->b_data); +} + /* * Read the bitmap for a given block_group, reading into the specified * slot in the superblock's bitmap cache. @@ -88,7 +170,19 @@ read_block_bitmap(struct super_block *sb desc = ext3_get_group_desc (sb, block_group, NULL); if (!desc) goto error_out; - bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); + if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { + bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); + if (!buffer_uptodate(bh)) { + lock_buffer(bh); + if (!buffer_uptodate(bh)) { + ext3_init_block_bitmap(sb, bh,block_group,desc); + set_buffer_uptodate(bh); + } + unlock_buffer(bh); + } + } else { + bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap)); + } if (!bh) ext3_error (sb, "read_block_bitmap", "Cannot read block bitmap - " @@ -427,6 +520,7 @@ do_more: gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + *pdquot_freed_blocks); + gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_mod(&sbi->s_freeblocks_counter, count); @@ -1175,7 +1270,7 @@ int ext3_new_block_old(handle_t *handle, goal_group = group_no; retry: - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp); if (free_blocks > 0) { ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) % EXT3_BLOCKS_PER_GROUP(sb)); @@ -1206,7 +1304,7 @@ retry: *errp = -EIO; goto out; } - free_blocks = le16_to_cpu(gdp->bg_free_blocks_count); + free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp); if (free_blocks <= 0) continue; @@ -1306,10 +1407,17 @@ allocated: ret_block, goal_hits, goal_attempts); spin_lock(sb_bgl_lock(sbi, group_no)); + free_blocks = 0; + if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); + free_blocks = ext3_free_blocks_after_init(sb, group_no, gdp); + gdp->bg_free_blocks_count = cpu_to_le16(free_blocks); + } gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1); + gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp); spin_unlock(sb_bgl_lock(sbi, group_no)); - percpu_counter_mod(&sbi->s_freeblocks_counter, -1); + percpu_counter_mod(&sbi->s_freeblocks_counter, free_blocks); BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor"); err = ext3_journal_dirty_metadata(handle, gdp_bh); @@ -1379,7 +1479,7 @@ unsigned long ext3_count_free_blocks(str gdp = ext3_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); + desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp); brelse(bitmap_bh); bitmap_bh = read_block_bitmap(sb, i); if (bitmap_bh == NULL) @@ -1387,7 +1487,7 @@ unsigned long ext3_count_free_blocks(str x = ext3_count_free(bitmap_bh, sb->s_blocksize); printk("group %d: stored = %d, counted = %lu\n", - i, le16_to_cpu(gdp->bg_free_blocks_count), x); + i, EXT3_BG_BLOCKS_FREE(sb, i, gdp), x); bitmap_count += x; } brelse(bitmap_bh); @@ -1403,7 +1503,7 @@ unsigned long ext3_count_free_blocks(str gdp = ext3_get_group_desc(sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); + desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp); } return desc_count; @@ -1590,7 +1587,7 @@ void ext3_check_blocks_bitmap (struct su gdp = ext3_get_group_desc (sb, i, NULL); if (!gdp) continue; - desc_count += le16_to_cpu(gdp->bg_free_blocks_count); + desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp); brelse(bitmap_bh); bitmap_bh = read_block_bitmap(sb, i); if (bitmap_bh == NULL) @@ -1628,11 +1625,11 @@ void ext3_check_blocks_bitmap (struct su "group %d is marked free", j, i); x = ext3_count_free(bitmap_bh, sb->s_blocksize); - if (le16_to_cpu(gdp->bg_free_blocks_count) != x) + if (EXT3_BG_BLOCKS_FREE(sb, i, gdp) != x) ext3_error (sb, "ext3_check_blocks_bitmap", "Wrong free blocks count for group %d, " "stored = %d, counted = %lu", i, - le16_to_cpu(gdp->bg_free_blocks_count), x); + EXT3_BG_BLOCKS_FREE(sb, i, gdp), x); bitmap_count += x; } brelse(bitmap_bh); Index: linux-stage/fs/ext3/ialloc.c --- linux-stage.orig/fs/ext3/ialloc.c 2007-03-14 04:44:13.000000000 -0400 +++ linux-stage/fs/ext3/ialloc.c 2007-03-14 16:13:29.000000000 -0400 @@ -43,6 +43,47 @@ * the free blocks count in the block. */ +/* + * To avoid calling the atomic setbit hundreds or thousands of times, we only + * need to use it within a single byte (to ensure we get endianness right). + * We can use memset for the rest of the bitmap as there are no other users. + */ +static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap) +{ + int i; + + if (start_bit >= end_bit) + return; + + ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit); + for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++) + ext3_set_bit(i, bitmap); + if (i < end_bit) + memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3); +} + +/* Initializes an uninitialized inode bitmap */ +void ext3_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh, + int block_group, struct ext3_group_desc *gdp) +{ + struct ext3_sb_info *sbi = EXT3_SB(sb); + + J_ASSERT_BH(bh, buffer_locked(bh)); + + /* If the checksum is bad, then just mark all bits in use for safety */ + if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) { + ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n", + block_group); + memset(bh->b_data, 0xff, sb->s_blocksize); + gdp->bg_free_blocks_count = 0; + gdp->bg_free_inodes_count = 0; + return; + } + + memset(bh->b_data, 0, EXT3_INODES_PER_GROUP(sb) + 7 / 8); + mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb), + bh->b_data); +} /* * Read the inode allocation bitmap for a given block_group, reading @@ -59,8 +103,15 @@ read_inode_bitmap(struct super_block * s desc = ext3_get_group_desc(sb, block_group, NULL); if (!desc) goto error_out; - - bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); + if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { + bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap)); + if (!buffer_uptodate(bh)) { + ext3_init_inode_bitmap(sb, bh, block_group, desc); + set_buffer_uptodate(bh); + } + } else { + bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap)); + } if (!bh) ext3_error(sb, "read_inode_bitmap", "Cannot read inode bitmap - " @@ -169,6 +175,8 @@ void ext3_free_inode (handle_t *handle, if (is_directory) gdp->bg_used_dirs_count = cpu_to_le16( le16_to_cpu(gdp->bg_used_dirs_count) - 1); + gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group, + gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_inc(&sbi->s_freeinodes_counter); if (is_directory) @@ -202,8 +210,8 @@ error_return: static int find_group_dir(struct super_block *sb, struct inode *parent) { int ngroups = EXT3_SB(sb)->s_groups_count; - int freei, avefreei; - struct ext3_group_desc *desc, *best_desc = NULL; + int freei, avefreei, freeb, best_freeb = 0; + struct ext3_group_desc *desc; struct buffer_head *bh; int group, best_group = -1; @@ -212,15 +220,14 @@ static int find_group_dir(struct super_b for (group = 0; group < ngroups; group++) { desc = ext3_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + if (freei < avefreei) continue; - if (!best_desc || - (le16_to_cpu(desc->bg_free_blocks_count) > - le16_to_cpu(best_desc->bg_free_blocks_count))) { + freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc); + if (freeb > best_freeb) { best_group = group; - best_desc = desc; + best_freeb = freeb; } } return best_group; @@ -284,14 +294,15 @@ static int find_group_orlov(struct super parent_group = (unsigned)group % ngroups; for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; - desc = ext3_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) + desc = ext3_get_group_desc(sb, group, &bh); + if (!desc || + (freei = EXT3_BG_INODES_FREE(sb, groups, desc)) ==0) continue; if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei) + if (freei < avefreei) continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb) + if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < avefreeb) continue; best_group = group; best_ndir = le16_to_cpu(desc->bg_used_dirs_count); @@ -318,13 +330,13 @@ static int find_group_orlov(struct super for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; desc = ext3_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0) continue; if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes) + if (freei < min_inodes) continue; - if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks) + if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < min_blocks) continue; return group; } @@ -333,9 +347,9 @@ fallback: for (i = 0; i < ngroups; i++) { group = (parent_group + i) % ngroups; desc = ext3_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0) continue; - if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei) + if (freei >= avefreei) return group; } @@ -362,6 +377,7 @@ static int find_group_other(struct super int group, i; int best_group = -1; int avefreeb, freeb, best_group_freeb = 0; + int freei; /* * Try to place the inode in its parent directory @@ -380,8 +396,8 @@ static int find_group_other(struct super */ group = parent_group; desc = ext3_get_group_desc (sb, group, &bh); - if (desc && le16_to_cpu(desc->bg_free_inodes_count) && - (!S_ISREG(mode) || le16_to_cpu(desc->bg_free_blocks_count))) + if (desc && EXT3_BG_INODES_FREE(sb, group, desc) && + (!S_ISREG(mode) || EXT3_BG_BLOCKS_PNLT(sb, group, desc))) return group; avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / ngroups; /* @@ -392,11 +408,11 @@ static int find_group_other(struct super if (group >= ngroups) group -= ngroups; desc = ext3_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) + if (!desc || EXT3_BG_INODES_FREE(sb, group, desc) == 0) continue; if (!S_ISREG(mode)) return group; - if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb) + if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) >= avefreeb) return group; } @@ -413,9 +431,10 @@ static int find_group_other(struct super if (++group >= ngroups) group = 0; desc = ext3_get_group_desc (sb, group, &bh); - if (!desc || !desc->bg_free_inodes_count) + freei = EXT3_BG_INODES_FREE(sb, group, desc); + if (!desc || !freei) continue; - freeb = le16_to_cpu(desc->bg_free_blocks_count); + freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc); if (freeb > best_group_freeb) { best_group_freeb = freeb; best_group = group; @@ -453,6 +472,7 @@ struct inode *ext3_new_inode(handle_t *h int err = 0; struct inode *ret; int i; + int unused_flag = 0; /* Cannot create files in a deleted directory */ if (!dir || !dir->i_nlink) @@ -581,18 +601,34 @@ got: err = ext3_journal_get_write_access(handle, bh2); if (err) goto fail; spin_lock(sb_bgl_lock(sbi, group)); - gdp->bg_free_inodes_count = - cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1); + if (gdp->bg_free_inodes_count == 0) { + if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) { + gdp->bg_itable_unused = + cpu_to_le16(le32_to_cpu(es->s_inodes_per_group)); + gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT); + } + /* If we didn't allocate from free initialized inodes, + * then we allocated from uninitialized inodes. In this + * case initialize one inode. */ + gdp->bg_itable_unused = + cpu_to_le16(le16_to_cpu(gdp->bg_itable_unused) - 1); + unused_flag = 1; + } else { + gdp->bg_free_inodes_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) -1); + } if (S_ISDIR(mode)) { gdp->bg_used_dirs_count = cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1); } + gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp); spin_unlock(sb_bgl_lock(sbi, group)); BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata"); err = ext3_journal_dirty_metadata(handle, bh2); if (err) goto fail; - percpu_counter_dec(&sbi->s_freeinodes_counter); + if (!unused_flag) + percpu_counter_dec(&sbi->s_freeinodes_counter); if (S_ISDIR(mode)) percpu_counter_inc(&sbi->s_dirs_counter); sb->s_dirt = 1; Index: linux-stage/fs/ext3/mballoc.c =================================================================== --- linux-stage.orig/fs/ext3/mballoc.c 2007-03-14 04:44:12.000000000 -0400 +++ linux-stage/fs/ext3/mballoc.c 2007-03-14 16:13:29.000000000 -0400 @@ -107,6 +107,7 @@ struct ext3_group_info { unsigned long bb_state; unsigned long bb_tid; struct ext3_free_metadata *bb_md_cur; + struct ext3_group_desc *bb_gdp; unsigned short bb_first_free; unsigned short bb_free; unsigned short bb_fragments; @@ -511,10 +512,7 @@ static int ext3_mb_init_cache(struct pag if (first_group + i >= EXT3_SB(sb)->s_groups_count) break; - err = -EIO; - desc = ext3_get_group_desc(sb, first_group + i, NULL); - if (desc == NULL) - goto out; + desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp; err = -ENOMEM; bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap)); @@ -529,7 +530,12 @@ static int ext3_mb_init_cache(struct pag unlock_buffer(bh[i]); continue; } - + if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { + ext3_init_block_bitmap(sb, bh[i], first_group + i,desc); + set_buffer_uptodate(bh[i]); + unlock_buffer(bh[i]); + continue; + } get_bh(bh[i]); bh[i]->b_end_io = end_buffer_read_sync; submit_bh(READ, bh[i]); @@ -1246,6 +1244,10 @@ static int ext3_mb_good_group(struct ext switch (cr) { case 0: J_ASSERT(ac->ac_2order != 0); + /* If this group is uninitialized, skip it initially */ + if (grp->bb_gdp->bg_flags & + cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) + return 0; bits = ac->ac_sb->s_blocksize_bits + 1; for (i = ac->ac_2order; i <= bits; i++) if (grp->bb_counters[i] > 0) @@ -1421,7 +1421,8 @@ repeat: } ac.ac_groups_scanned++; - if (cr == 0) + if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags & + cpu_to_le16(EXT3_BG_BLOCK_UNINIT))) ext3_mb_simple_scan_group(&ac, &e3b); else if (cr == 1 && *len == sbi->s_stripe) ext3_mb_scan_aligned(&ac, &e3b); @@ -1500,7 +1499,8 @@ found: * bitmap to be journaled */ ext3_debug("using block group %d(%d)\n", - ac.ac_b_group.group, gdp->bg_free_blocks_count); + ac.ac_b_group.group, + EXT3_BG_BLOCKS_FREE(sb, ac.ac_b_group.group, gdp)); bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group); if (!bitmap_bh) { @@ -1537,9 +1543,17 @@ found: mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len); spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); + if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); + gdp->bg_free_blocks_count = + cpu_to_le16(ext3_free_blocks_after_init(sb, + ac.ac_b_ex.fe_group, + gdp)); + } gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - ac.ac_b_ex.fe_len); + gdp->bg_checksum = ext3_group_desc_csum(sbi, ac.ac_b_ex.fe_group, gdp); spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group)); percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len); @@ -1992,16 +1991,16 @@ int ext3_mb_init_backend(struct super_bl i--; goto err_freebuddy; } + memset(meta_group_info[j], 0, len); desc = ext3_get_group_desc(sb, i, NULL); + meta_group_info[j]->bb_gdp = desc; if (desc == NULL) { printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i); goto err_freebuddy; } - memset(meta_group_info[j], 0, len); set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT, &meta_group_info[j]->bb_state); - meta_group_info[j]->bb_free = - le16_to_cpu(desc->bg_free_blocks_count); + meta_group_info[j]->bb_free = EXT3_BG_BLOCKS_FREE(sb, j, desc); } return 0; @@ -2397,6 +2410,7 @@ do_more: spin_lock(sb_bgl_lock(sbi, block_group)); gdp->bg_free_blocks_count = cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); + gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp); spin_unlock(sb_bgl_lock(sbi, block_group)); percpu_counter_mod(&sbi->s_freeblocks_counter, count); Index: linux-stage/fs/ext3/resize.c =================================================================== --- linux-stage.orig/fs/ext3/resize.c 2007-03-14 04:44:13.000000000 -0400 +++ linux-stage/fs/ext3/resize.c 2007-03-14 16:13:29.000000000 -0400 @@ -807,6 +807,9 @@ int ext3_group_add(struct super_block *s gdp->bg_inode_table = cpu_to_le32(input->inode_table); gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count); gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb)); + spin_lock(sb_bgl_lock(sbi, input->group)); /* not strictly needed */ + gdp->bg_checksum = ext3_group_desc_csum(sbi, input->group, gdp); + spin_unlock(sb_bgl_lock(sbi, input->group)); /* * Make the new blocks and inodes valid next. We do this before Index: linux-stage/fs/ext3/super.c =================================================================== --- linux-stage.orig/fs/ext3/super.c 2007-03-14 04:44:13.000000000 -0400 +++ linux-stage/fs/ext3/super.c 2007-03-14 16:13:29.000000000 -0400 @@ -1084,6 +1084,56 @@ static int ext3_setup_super(struct super return res; } +static __u16 crc16(__u16 crc, const u8 *buf, size_t len) +{ + __u16 tmp; + + while (len--) { + crc ^= *buf++; + crc ^= (u8)crc >> 4; + tmp = (u8)crc; + crc ^= (tmp ^ (tmp << 1)) << 4; + } + return crc; +} + +__u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group, + struct ext3_group_desc *gdp) +{ + __u16 crc = 0; + +#ifdef CONFIG_SMP + J_ASSERT(spin_is_locked(sb_bgl_lock(sbi, block_group))); +#endif + if (sbi->s_es->s_feature_ro_compat & + cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) { + int offset = offsetof(struct ext3_group_desc, bg_checksum); + + block_group = cpu_to_le32(block_group); + crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); + crc = crc16(crc, (__u8 *)&block_group, sizeof(block_group)); + crc = crc16(crc, (__u8 *)gdp, offset); + offset += sizeof(gdp->bg_checksum); /* skip checksum */ + BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */ + /* for checksum of struct ext4_group_desc do the rest... + if (offset < sbi->s_es->s_desc_size) { + crc = crc16(crc, (__u8 *)gdp + offset, + sbi->s_es->s_desc_size - offset); + */ + } + + return cpu_to_le16(crc); +} + +int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group, + struct ext3_group_desc *gdp) +{ + if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp)) + return 0; + + return 1; +} + /* Called at mount-time, super-block is locked */ static int ext3_check_descriptors (struct super_block * sb) { @@ -1133,6 +1179,13 @@ static int ext3_check_descriptors (struc le32_to_cpu(gdp->bg_inode_table)); return 0; } + if (!ext3_group_desc_csum_verify(sbi, i, gdp)) { + ext3_error(sb, __FUNCTION__, + "Checksum for group %d failed (%u != %u)\n", + i, ext3_group_desc_csum(sbi, i, gdp), + gdp->bg_checksum); + return 0; + } block += EXT3_BLOCKS_PER_GROUP(sb); gdp++; } Index: linux-stage/include/linux/ext3_fs.h =================================================================== --- linux-stage.orig/include/linux/ext3_fs.h 2007-03-14 16:29:47.000000000 -0400 +++ linux-stage/include/linux/ext3_fs.h 2007-03-14 16:37:23.000000000 -0400 @@ -118,6 +118,26 @@ struct statfs; (s)->s_first_ino) #endif +/* Macro-instructions used to calculate Free inodes and blocks count. + * Value is approximate for the blocks count until it is initialized. */ +#define EXT3_BG_INODES_FREE(sb,gr,gdp) ((gdp)->bg_flags & \ + cpu_to_le16(EXT3_BG_INODE_UNINIT) ? \ + EXT3_INODES_PER_GROUP(sb) : \ + le16_to_cpu((gdp)->bg_itable_unused) + \ + le16_to_cpu((gdp)->bg_free_inodes_count)) +#define EXT3_BG_BLOCKS_FREE(sb,gr,gdp) ((gdp)->bg_flags & \ + cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ? \ + ext3_free_blocks_after_init(sb,gr,gdp) :\ + le16_to_cpu((gdp)->bg_free_blocks_count)) + +/* When looking for groups to allocate from, we reduce the free blocks count + * to avoid always picking uninitialized groups. Essentially, we don't start + * allocating from uninitialized groups until other groups are 1/2 full. */ +#define EXT3_BG_BLOCKS_PNLT(sb,gr,gdp) ((gdp)->bg_flags & \ + cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ? \ + EXT3_BLOCKS_PER_GROUP(sb) / 2 : \ + le16_to_cpu((gdp)->bg_free_blocks_count)) + /* * Macro-instructions used to manage fragments */ @@ -138,16 +148,22 @@ struct statfs; */ struct ext3_group_desc { - __le32 bg_block_bitmap; /* Blocks bitmap block */ - __le32 bg_inode_bitmap; /* Inodes bitmap block */ + __le32 bg_block_bitmap; /* Blocks bitmap block */ + __le32 bg_inode_bitmap; /* Inodes bitmap block */ __le32 bg_inode_table; /* Inodes table block */ __le16 bg_free_blocks_count; /* Free blocks count */ __le16 bg_free_inodes_count; /* Free inodes count */ __le16 bg_used_dirs_count; /* Directories count */ - __u16 bg_pad; - __le32 bg_reserved[3]; + __le16 bg_flags; /* EXT3_BG_flags (UNINIT, etc) */ + __le32 bg_reserved[2]; /* Likely block/inode bitmap checksum */ + __le16 bg_itable_unused; /* Unused inodes count */ + __le16 bg_checksum; /* crc16(sb_uuid+group+desc) */ }; +#define EXT3_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ +#define EXT3_BG_BLOCK_UNINIT 0x0002 /* Block bitmap not in use */ +#define EXT3_BG_BLOCK_WRITTEN 0x0004 /* On-disk structures were overwritten */ + /* * Macro-instructions used to manage group descriptors */ @@ -564,6 +581,7 @@ static inline struct ext3_inode_info *EX #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR 0x0004 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM 0x0010 #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK 0x0020 #define EXT3_FEATURE_INCOMPAT_COMPRESSION 0x0001 @@ -580,6 +598,7 @@ static inline struct ext3_inode_info *EX EXT3_FEATURE_INCOMPAT_EXTENTS) #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \ EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \ EXT3_FEATURE_RO_COMPAT_BTREE_DIR) @@ -841,6 +860,16 @@ extern void ext3_unlockfs (struct super_ extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int); extern int ext3_remount (struct super_block *, int *, char *); extern int ext3_statfs (struct super_block *, struct kstatfs *); +extern __u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group, + struct ext3_group_desc *gdp); +extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group, + struct ext3_group_desc *gdp); +extern unsigned long ext3_free_blocks_after_init(struct super_block *sb, + int block_group, + struct ext3_group_desc *gdp); +extern void ext3_init_block_bitmap(struct super_block *sb, + struct buffer_head *bh, int group, + struct ext3_group_desc *desc); #define ext3_std_error(sb, errno) \ do { \ %diffstat fs/ext3/balloc.c | 116 +++++++++++++++++++++++++++++++++++++++++-- fs/ext3/ialloc.c | 85 ++++++++++++++++++++++--------- fs/ext3/mballoc.c | 18 +++++- fs/ext3/resize.c | 1 fs/ext3/super.c | 53 +++++++++++++++++++ include/linux/ext3_fs.h | 36 +++++++++++-- include/linux/ext3_fs.h.orig | 23 +++++++- 7 files changed, 294 insertions(+), 38 deletions(-)