Index: linux-2.6.27.21-0.1/fs/ext4/mballoc.c =================================================================== --- linux-2.6.27.21-0.1.orig/fs/ext4/mballoc.c +++ linux-2.6.27.21-0.1/fs/ext4/mballoc.c @@ -333,7 +333,7 @@ static struct kmem_cache *ext4_pspace_cachep; static struct kmem_cache *ext4_ac_cachep; static struct kmem_cache *ext4_free_ext_cachep; -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ext4_group_t group); @@ -672,7 +672,7 @@ static void ext4_mb_mark_free_simple(str } } -static void ext4_mb_generate_buddy(struct super_block *sb, +static int ext4_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, ext4_group_t group) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); @@ -704,14 +704,13 @@ static void ext4_mb_generate_buddy(struc grp->bb_fragments = fragments; if (free != grp->bb_free) { - ext4_grp_locked_error(sb, group, __func__, - "EXT4-fs: group %u: %u blocks in bitmap, %u in gd\n", - group, free, grp->bb_free); - /* - * If we intent to continue, we consider group descritor - * corrupt and update bb_free using bitmap value - */ - grp->bb_free = free; + struct ext4_group_desc *gdp; + gdp = ext4_get_group_desc (sb, group, NULL); + ext4_grp_locked_error(sb, group, __func__, + "group %u: %u blocks in bitmap, %u in bb, " + "%u in gd\n", group, free, grp->bb_free, + ext4_free_blks_count(sb, gdp)); + return -EIO; } clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state)); @@ -721,6 +720,8 @@ static void ext4_mb_generate_buddy(struc EXT4_SB(sb)->s_mb_buddies_generated++; EXT4_SB(sb)->s_mb_generation_time += period; spin_unlock(&EXT4_SB(sb)->s_bal_lock); + + return 0; } /* The buddy information is attached the buddy cache inode @@ -850,7 +851,7 @@ static int ext4_mb_init_cache(struct pag first_block = page->index * blocks_per_page; /* init the page */ memset(page_address(page), 0xff, PAGE_CACHE_SIZE); - for (i = 0; i < blocks_per_page; i++) { + for (i = 0; i < blocks_per_page && err == 0; i++) { int group; struct ext4_group_info *grinfo; @@ -884,7 +885,7 @@ static int ext4_mb_init_cache(struct pag * incore got set to the group block bitmap below */ ext4_lock_group(sb, group); - ext4_mb_generate_buddy(sb, data, incore, group); + err = ext4_mb_generate_buddy(sb, data, incore, group); ext4_unlock_group(sb, group); incore = NULL; } else { @@ -898,7 +899,7 @@ static int ext4_mb_init_cache(struct pag memcpy(data, bitmap, blocksize); /* mark all preallocated blks used in in-core bitmap */ - ext4_mb_generate_from_pa(sb, data, group); + err = ext4_mb_generate_from_pa(sb, data, group); ext4_mb_generate_from_freelist(sb, data, group); ext4_unlock_group(sb, group); @@ -908,6 +909,7 @@ static int ext4_mb_init_cache(struct pag incore = data; } } + if (likely(err == 0)) SetPageUptodate(page); out: @@ -2217,7 +2219,10 @@ static int ext4_mb_seq_history_show(stru hs->result.fe_start, hs->result.fe_len); seq_printf(seq, "%-5u %-8u %-23s free\n", hs->pid, hs->ino, buf2); + } else { + seq_printf(seq, "unknown op %d\n", hs->op); } + return 0; } @@ -2345,9 +2350,11 @@ static void *ext4_mb_seq_groups_next(str static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) { struct super_block *sb = seq->private; + struct ext4_group_desc *gdp; ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i; int err; + unsigned free = 0; struct ext4_buddy e4b; struct sg { struct ext4_group_info info; @@ -2356,10 +2363,10 @@ static int ext4_mb_seq_groups_show(struc group--; if (group == 0) - seq_printf(seq, "#%-5s: %-5s %-5s %-5s " + seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s" "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s " "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", - "group", "free", "frags", "first", + "group", "free", "frags", "first", "first", "pa", "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13"); @@ -2371,12 +2378,18 @@ static int ext4_mb_seq_groups_show(struc seq_printf(seq, "#%-5u: I/O error\n", group); return 0; } + + gdp = ext4_get_group_desc(sb, group, NULL); + if (gdp != NULL) + free = ext4_free_blks_count(sb, gdp); + ext4_lock_group(sb, group); memcpy(&sg, ext4_get_group_info(sb, group), i); ext4_unlock_group(sb, group); ext4_mb_release_desc(&e4b); - seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, + seq_printf(seq, "#%-5u: %-5u %-5u %-5u %-5u [", group, + sg.info.bb_free, free, sg.info.bb_fragments, sg.info.bb_first_free); for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? @@ -2474,6 +2487,7 @@ ext4_mb_store_history(struct ext4_alloca h.tail = ac->ac_tail; h.buddy = ac->ac_buddy; h.merged = 0; + h.cr = ac->ac_criteria; if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) { if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start && ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group) @@ -3695,22 +3709,67 @@ static void ext4_mb_generate_from_freeli } /* + * check free blocks in bitmap match free block in group descriptor + * do this before taking preallocated blocks into account to be able + * to detect on-disk corruptions. The group lock should be hold by the + * caller. + */ +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap, + struct ext4_group_desc *gdp, int group) +{ + unsigned short max = EXT4_BLOCKS_PER_GROUP(sb); + unsigned short i, first, free = 0; + + i = mb_find_next_zero_bit(bitmap, max, 0); + + while (i < max) { + first = i; + i = mb_find_next_bit(bitmap, max, i); + if (i > max) + i = max; + free += i - first; + if (i < max) + i = mb_find_next_zero_bit(bitmap, max, i); + } + + if (free != ext4_free_blks_count(sb, gdp)) { + ext4_error(sb, __FUNCTION__, "on-disk bitmap for group %d" + "corrupted: %u blocks free in bitmap, %u - in gd\n", + group, free, ext4_free_blks_count(sb, gdp)); + return -EIO; + } + return 0; +} + +/* * the function goes through all preallocation in this group and marks them * used in in-core bitmap. buddy must be generated from this bitmap * Need to be called with ext4 group lock (ext4_lock_group) */ -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct ext4_prealloc_space *pa; + struct ext4_group_desc *gdp; struct list_head *cur; ext4_group_t groupnr; ext4_grpblk_t start; int preallocated = 0; int count = 0; + int skip = 0; + int err; int len; + gdp = ext4_get_group_desc (sb, group, NULL); + if (gdp == NULL) + return -EIO; + + /* before applying preallocations, check bitmap consistency */ + err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group); + if (err) + return err; + /* all form of preallocation discards first load group, * so the only competing code is preallocation use. * we don't need any locking here @@ -3720,8 +3778,10 @@ static void ext4_mb_generate_from_pa(str &groupnr, &start); len = pa->pa_len; spin_unlock(&pa->pa_lock); - if (unlikely(len == 0)) + if (unlikely(len == 0)) { + skip++; continue; + } BUG_ON(groupnr != group); mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group), bitmap, start, len); @@ -3729,6 +3789,7 @@ static void ext4_mb_generate_from_pa(str count++; } mb_debug("prellocated %u for group %u\n", preallocated, group); + return 0; } static void ext4_mb_pa_callback(struct rcu_head *head) @@ -3978,6 +4039,7 @@ ext4_mb_release_inode_pa(struct ext4_bud ac->ac_sb = sb; ac->ac_inode = pa->pa_inode; ac->ac_op = EXT4_MB_HISTORY_DISCARD; + ac->ac_o_ex.fe_len = 1; } while (bit < end) { @@ -4260,7 +4322,7 @@ repeat: __release(e4b->alloc_semp); ext4_error(sb, __func__, "Error in loading buddy " "information for %u\n", group); - continue; + return; } bitmap_bh = ext4_read_block_bitmap(sb, group); Index: linux-2.6.27.21-0.1/fs/ext4/mballoc.h =================================================================== --- linux-2.6.27.21-0.1.orig/fs/ext4/mballoc.h +++ linux-2.6.27.21-0.1/fs/ext4/mballoc.h @@ -92,7 +92,7 @@ /* * for which requests use 2^N search using buddies */ -#define MB_DEFAULT_ORDER2_REQS 2 +#define MB_DEFAULT_ORDER2_REQS 8 /* * default group prealloc size 512 blocks @@ -219,7 +219,7 @@ struct ext4_mb_history { __u16 tail; /* what tail broke some buddy */ __u16 buddy; /* buddy the tail ^^^ broke */ __u16 flags; - __u8 cr:3; /* which phase the result extent was found at */ + __u8 cr:8; /* which phase the result extent was found at */ __u8 op:4; __u8 merged:1; };