Index: linux-3.10.0-123.el7.x86_64/fs/ext4/ext4.h =================================================================== --- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/ext4.h +++ linux-3.10.0-123.el7.x86_64/fs/ext4/ext4.h @@ -2391,6 +2391,7 @@ struct ext4_group_info { ext4_grpblk_t bb_fragments; /* nr of freespace fragments */ ext4_grpblk_t bb_largest_free_order;/* order of largest frag in BG */ struct list_head bb_prealloc_list; + unsigned long bb_prealloc_nr; #ifdef DOUBLE_CHECK void *bb_bitmap; #endif Index: linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.c =================================================================== --- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/mballoc.c +++ linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.c @@ -362,7 +362,7 @@ static const char *ext4_groupinfo_slab_n "ext4_groupinfo_64k", "ext4_groupinfo_128k" }; -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group); static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap, ext4_group_t group); @@ -718,7 +718,7 @@ mb_set_largest_free_order(struct super_b } static noinline_for_stack -void ext4_mb_generate_buddy(struct super_block *sb, +int ext4_mb_generate_buddy(struct super_block *sb, void *buddy, void *bitmap, ext4_group_t group) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); @@ -750,19 +750,13 @@ void ext4_mb_generate_buddy(struct super grp->bb_fragments = fragments; if (free != grp->bb_free) { - ext4_grp_locked_error(sb, group, 0, 0, - "block bitmap and bg descriptor " - "inconsistent: %u vs %u free clusters", - free, grp->bb_free); - /* - * If we intend to continue, we consider group descriptor - * corrupt and update bb_free using bitmap value - */ - grp->bb_free = free; - if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) - percpu_counter_sub(&sbi->s_freeclusters_counter, - grp->bb_free); - set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state); + struct ext4_group_desc *gdp; + gdp = ext4_get_group_desc(sb, group, NULL); + ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, " + "%u in gd, %lu pa's\n", (long unsigned int)group, + free, grp->bb_free, ext4_free_group_clusters(sb, gdp), + grp->bb_prealloc_nr); + return -EIO; } mb_set_largest_free_order(sb, grp); @@ -768,6 +767,8 @@ void ext4_mb_generate_buddy(struct super EXT4_SB(sb)->s_mb_buddies_generated++; EXT4_SB(sb)->s_mb_generation_time += period; spin_unlock(&EXT4_SB(sb)->s_bal_lock); + + return 0; } static void mb_regenerate_buddy(struct ext4_buddy *e4b) @@ -883,7 +884,7 @@ static int ext4_mb_init_cache(struct pag } first_block = page->index * blocks_per_page; - for (i = 0; i < blocks_per_page; i++) { + for (i = 0; i < blocks_per_page && err == 0; i++) { group = (first_block + i) >> 1; if (group >= ngroups) break; @@ -922,7 +923,7 @@ static int ext4_mb_init_cache(struct pag ext4_lock_group(sb, group); /* init the buddy */ memset(data, 0xff, blocksize); - ext4_mb_generate_buddy(sb, data, incore, group); + err = ext4_mb_generate_buddy(sb, data, incore, group); ext4_unlock_group(sb, group); incore = NULL; } else { @@ -937,7 +938,7 @@ static int ext4_mb_init_cache(struct pag memcpy(data, bitmap, blocksize); /* mark all preallocated blks used in in-core bitmap */ - ext4_mb_generate_from_pa(sb, data, group); + err = ext4_mb_generate_from_pa(sb, data, group); ext4_mb_generate_from_freelist(sb, data, group); ext4_unlock_group(sb, group); @@ -947,7 +948,8 @@ static int ext4_mb_init_cache(struct pag incore = data; } } - SetPageUptodate(page); + if (likely(err == 0)) + SetPageUptodate(page); out: if (bh) { @@ -2224,9 +2226,11 @@ static void *ext4_mb_seq_groups_next(str static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v) { struct super_block *sb = seq->private; + struct ext4_group_desc *gdp; ext4_group_t group = (ext4_group_t) ((unsigned long) v); int i; int err, buddy_loaded = 0; + int free = 0; struct ext4_buddy e4b; struct ext4_group_info *grinfo; struct sg { @@ -2236,10 +2240,10 @@ static int ext4_mb_seq_groups_show(struc group--; if (group == 0) - seq_printf(seq, "#%-5s: %-5s %-5s %-5s " + seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s" "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s " "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n", - "group", "free", "frags", "first", + "group", "bfree", "gfree", "frags", "first", "pa", "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6", "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13"); @@ -2256,13 +2260,19 @@ static int ext4_mb_seq_groups_show(struc buddy_loaded = 1; } + gdp = ext4_get_group_desc(sb, group, NULL); + if (gdp != NULL) + free = ext4_free_group_clusters(sb, gdp); + memcpy(&sg, ext4_get_group_info(sb, group), i); if (buddy_loaded) ext4_mb_unload_buddy(&e4b); - seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free, - sg.info.bb_fragments, sg.info.bb_first_free); + seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [", + (long unsigned int)group, sg.info.bb_free, free, + sg.info.bb_fragments, sg.info.bb_first_free, + sg.info.bb_prealloc_nr); for (i = 0; i <= 13; i++) seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ? sg.info.bb_counters[i] : 0); @@ -3507,22 +3517,71 @@ static void ext4_mb_generate_from_freeli } /* + * check free blocks in bitmap match free block in group descriptor + * do this before taking preallocated blocks into account to be able + * to detect on-disk corruptions. The group lock should be hold by the + * caller. + */ +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap, + struct ext4_group_desc *gdp, int group) +{ + unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb); + unsigned short i, first, free = 0; + unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp); + + if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) + return 0; + + i = mb_find_next_zero_bit(bitmap, max, 0); + + while (i < max) { + first = i; + i = mb_find_next_bit(bitmap, max, i); + if (i > max) + i = max; + free += i - first; + if (i < max) + i = mb_find_next_zero_bit(bitmap, max, i); + } + + if (free != free_in_gdp) { + ext4_error(sb, "on-disk bitmap for group %d" + "corrupted: %u blocks free in bitmap, %u - in gd\n", + group, free, free_in_gdp); + return -EIO; + } + return 0; +} + +/* * the function goes through all preallocation in this group and marks them * used in in-core bitmap. buddy must be generated from this bitmap * Need to be called with ext4 group lock held */ static noinline_for_stack -void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, +int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap, ext4_group_t group) { struct ext4_group_info *grp = ext4_get_group_info(sb, group); struct ext4_prealloc_space *pa; + struct ext4_group_desc *gdp; struct list_head *cur; ext4_group_t groupnr; ext4_grpblk_t start; int preallocated = 0; + int skip = 0, count = 0; + int err; int len; + gdp = ext4_get_group_desc(sb, group, NULL); + if (gdp == NULL) + return -EIO; + + /* before applying preallocations, check bitmap consistency */ + err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group); + if (err) + return err; + /* all form of preallocation discards first load group, * so the only competing code is preallocation use. * we don't need any locking here @@ -3538,13 +3593,23 @@ void ext4_mb_generate_from_pa(struct sup &groupnr, &start); len = pa->pa_len; spin_unlock(&pa->pa_lock); - if (unlikely(len == 0)) + if (unlikely(len == 0)) { + skip++; continue; + } BUG_ON(groupnr != group); ext4_set_bits(bitmap, start, len); preallocated += len; + count++; + } + if (count + skip != grp->bb_prealloc_nr) { + ext4_error(sb, "lost preallocations: " + "count %d, bb_prealloc_nr %lu, skip %d\n", + count, grp->bb_prealloc_nr, skip); + return -EIO; } mb_debug(1, "prellocated %u for group %u\n", preallocated, group); + return 0; } static void ext4_mb_pa_callback(struct rcu_head *head) @@ -3603,6 +3668,7 @@ static void ext4_mb_put_pa(struct ext4_a */ ext4_lock_group(sb, grp); list_del(&pa->pa_group_list); + ext4_get_group_info(sb, grp)->bb_prealloc_nr--; ext4_unlock_group(sb, grp); spin_lock(pa->pa_obj_lock); @@ -3697,6 +3763,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat ext4_lock_group(sb, ac->ac_b_ex.fe_group); list_add(&pa->pa_group_list, &grp->bb_prealloc_list); + grp->bb_prealloc_nr++; ext4_unlock_group(sb, ac->ac_b_ex.fe_group); spin_lock(pa->pa_obj_lock); @@ -3758,6 +3825,7 @@ ext4_mb_new_group_pa(struct ext4_allocat ext4_lock_group(sb, ac->ac_b_ex.fe_group); list_add(&pa->pa_group_list, &grp->bb_prealloc_list); + grp->bb_prealloc_nr++; ext4_unlock_group(sb, ac->ac_b_ex.fe_group); /* @@ -3927,6 +3995,8 @@ repeat: spin_unlock(&pa->pa_lock); + BUG_ON(grp->bb_prealloc_nr == 0); + grp->bb_prealloc_nr--; list_del(&pa->pa_group_list); list_add(&pa->u.pa_tmp_list, &list); } @@ -4056,7 +4126,7 @@ repeat: if (err) { ext4_error(sb, "Error loading buddy information for %u", group); - continue; + return; } bitmap_bh = ext4_read_block_bitmap(sb, group); @@ -4068,6 +4138,8 @@ repeat: } ext4_lock_group(sb, group); + BUG_ON(e4b.bd_info->bb_prealloc_nr == 0); + e4b.bd_info->bb_prealloc_nr--; list_del(&pa->pa_group_list); ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa); ext4_unlock_group(sb, group); @@ -4328,6 +4400,7 @@ ext4_mb_discard_lg_preallocations(struct } ext4_lock_group(sb, group); list_del(&pa->pa_group_list); + ext4_get_group_info(sb, group)->bb_prealloc_nr--; ext4_mb_release_group_pa(&e4b, pa); ext4_unlock_group(sb, group); Index: linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.h =================================================================== --- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/mballoc.h +++ linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.h @@ -82,7 +82,7 @@ extern ushort ext4_mballoc_debug; /* * for which requests use 2^N search using buddies */ -#define MB_DEFAULT_ORDER2_REQS 2 +#define MB_DEFAULT_ORDER2_REQS 8 /* * default group prealloc size 512 blocks