From c35b8db46fb2cc045bd4451a852ce2abbfdb9c58 Mon Sep 17 00:00:00 2001 From: adilger Date: Tue, 14 Apr 2009 04:40:46 +0000 Subject: [PATCH] Branch b1_8 Description: fix racy locking of mballoc block bitmaps causing BUG Details : The locking of the mballoc buddy bitmap and the in-memory block bitmap was using two different spin locks in some cases. This made it possible to incorrectly access the mballoc bitmap while another process was modifying it, causing a sanity assertion to fail. While no on-disk corruption was reported, there was some risk of this happening. b=18810 i=alex --- ldiskfs/ChangeLog | 52 +++++++++++++++-- .../patches/ext3-fiemap-2.6-rhel5.patch | 2 +- .../patches/ext3-mballoc3-core.patch | 66 ++++++++++++---------- .../patches/ext3-uninit-2.6-sles10.patch | 2 +- .../patches/ext3-uninit-2.6-suse.patch | 2 +- .../patches/ext3-uninit-2.6.18.patch | 2 +- .../patches/ext3-uninit-2.6.22-vanilla.patch | 4 +- .../kernel_patches/patches/ext3-uninit-2.6.9.patch | 4 +- .../kernel_patches/series/ldiskfs-2.6-rhel5.series | 1 - .../series/ldiskfs-2.6-sles10.series | 1 - 10 files changed, 91 insertions(+), 45 deletions(-) diff --git a/ldiskfs/ChangeLog b/ldiskfs/ChangeLog index 2faad16..b2624b8 100644 --- a/ldiskfs/ChangeLog +++ b/ldiskfs/ChangeLog @@ -1,17 +1,48 @@ tbd Sun Microsystems, Inc. * version 3.0.8 -Severity : -Bugzilla : -Description: -Details : +Severity : minor +Bugzilla : 16114 +Description: minor fixes and cleanups +Details : use EXT_UNSET_BLOCK to avoid confusion with EXT_MAX_BLOCK. + Initialize 'ix' variable in extents patch to stop compiler warning. + +Severity : feature +Bugzilla : 17942 +Description: update FIEMAP ioctl to match upstream kernel version +Details : the FIEMAP block-mapping ioctl had a prototype version in + ldiskfs 3.0.7 but this release updates it to match the + interface in the upstream kernel, with a new ioctl number. + +Severity : normal +Frequency : only if MMP is active and detects filesystem is in use +Bugzilla : 18173 +Description: if MMP startup fails, an oops is triggered +Details : if ldiskfs mounting doesn't succeed the error handling doesn't + clean up the MMP data correctly, causing an oops. + +------------------------------------------------------------------------------- + +2009-04-06 Sun Microsystems, Inc. + * version 3.0.7.1 + +Severity : major +Frequency : rare +Bugzilla : 18810 +Description: fix racy locking of mballoc block bitmaps causing BUG +Details : The locking of the mballoc buddy bitmap and the in-memory + block bitmap was using two different spin locks in some + cases. This made it possible to incorrectly access the + mballoc bitmap while another process was modifying it, + causing a sanity assertion to fail. While no on-disk corruption + was reported, there was some risk of this happening. ------------------------------------------------------------------------------- -2009-01-15 Sun Microsystems, Inc. +2009-02-07 Sun Microsystems, Inc. * version 3.0.7 -Severity : normal +Severity : enhancement Bugzilla : 16498 Description: Get RAID stripe size from superblock Details : RAID striping parameters are now saved in the superblock itself, @@ -19,6 +50,7 @@ Details : RAID striping parameters are now saved in the superblock itself, a mount option each time. Severity : major +Frequency : only if server is running on unsupported big-endian machine Bugzilla : 16438 Description: Disable big-endian ldiskfs server support. Details : The ldiskfs code is not tested on big-endian machines, and @@ -29,6 +61,14 @@ Details : The ldiskfs code is not tested on big-endian machines, and possible to mount with the "bigendian_extents" option to force the mount. +Severity : major +Frequency : only with software RAID-5 +Bugzilla : 17895 +Description: MMP block reads may fail to detect another user of the filesystem +Details : with software RAID it is possible that the RAID driver will + cache the MMP block and not refetch it from disk. Force the + read to invalidate the RAID page cache and go directly to disk. + ------------------------------------------------------------------------------- 2008-08-31 Sun Microsystems, Inc. diff --git a/ldiskfs/kernel_patches/patches/ext3-fiemap-2.6-rhel5.patch b/ldiskfs/kernel_patches/patches/ext3-fiemap-2.6-rhel5.patch index 0d87c57..d4c0e28 100644 --- a/ldiskfs/kernel_patches/patches/ext3-fiemap-2.6-rhel5.patch +++ b/ldiskfs/kernel_patches/patches/ext3-fiemap-2.6-rhel5.patch @@ -412,7 +412,7 @@ Index: linux-2.6.18-92.1.22/fs/ext3/fiemap.h +/* + * FIEMAP ioctl infrastructure. + * -+ * Copyright 2008 Sun Microsystems, Inc ++ * Copyright 2008 Sun Microsystems, Inc. + * + * Author: Kalpak Shah + * Andreas Dilger diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc3-core.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc3-core.patch index fa8b4ae..197e8cc 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc3-core.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc3-core.patch @@ -288,10 +288,10 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c =================================================================== --- /dev/null +++ linux-2.6.22.19/fs/ext3/mballoc.c -@@ -0,0 +1,4475 @@ +@@ -0,0 +1,4483 @@ +/* -+ * Copyright 2008 Sun Microsystems, Inc. -+ * Written by Alex Tomas ++ * Copyright 2009 Sun Microsystems, Inc. ++ * Written by Alex Zhuravlev + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as @@ -1456,7 +1456,10 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + cur += 32; + continue; + } -+ mb_clear_bit_atomic(lock, cur, bm); ++ if (lock) ++ mb_clear_bit_atomic(lock, cur, bm); ++ else ++ mb_clear_bit(cur, bm); + cur++; + } +} @@ -1474,7 +1477,10 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + cur += 32; + continue; + } -+ mb_set_bit_atomic(lock, cur, bm); ++ if (lock) ++ mb_set_bit_atomic(lock, cur, bm); ++ else ++ mb_set_bit(cur, bm); + cur++; + } +} @@ -1628,6 +1634,7 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + BUG_ON(start + len > (e3b->bd_sb->s_blocksize << 3)); + BUG_ON(e3b->bd_group != ex->fe_group); + BUG_ON(!ext3_is_group_locked(e3b->bd_sb, e3b->bd_group)); ++ spin_lock(sb_bgl_lock(EXT3_SB(e3b->bd_sb), ex->fe_group)); + mb_check_buddy(e3b); + mb_mark_used_double(e3b, start, len); + @@ -1681,9 +1688,9 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + e3b->bd_info->bb_counters[ord]++; + } + -+ mb_set_bits(sb_bgl_lock(EXT3_SB(e3b->bd_sb), ex->fe_group), -+ EXT3_MB_BITMAP(e3b), ex->fe_start, len0); ++ mb_set_bits(NULL, EXT3_MB_BITMAP(e3b), ex->fe_start, len0); + mb_check_buddy(e3b); ++ spin_unlock(sb_bgl_lock(EXT3_SB(e3b->bd_sb), ex->fe_group)); + + return ret; +} @@ -3244,6 +3251,8 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + ext3_error(sb, __FUNCTION__, + "Allocating block in system zone - block = %lu", + (unsigned long) block); ++ ext3_lock_group(sb, ac->ac_b_ex.fe_group); ++ spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); +#ifdef AGGRESSIVE_CHECK + { + int i; @@ -3253,15 +3262,15 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + } + } +#endif -+ mb_set_bits(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group), bitmap_bh->b_data, ++ mb_set_bits(NULL, bitmap_bh->b_data, + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); + -+ spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + gdp->bg_free_blocks_count = + cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + - ac->ac_b_ex.fe_len); + spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len); ++ ext3_unlock_group(sb, ac->ac_b_ex.fe_group); + + err = ext3_journal_dirty_metadata(handle, bitmap_bh); + if (err) @@ -3613,6 +3622,7 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + unsigned short max = EXT3_BLOCKS_PER_GROUP(sb); + unsigned short i, first, free = 0; + ++ spin_lock(sb_bgl_lock(EXT3_SB(sb), group)); + i = mb_find_next_zero_bit(bitmap, max, 0); + + while (i < max) { @@ -3626,11 +3636,13 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + } + + if (free != le16_to_cpu(gdp->bg_free_blocks_count)) { ++ spin_unlock(sb_bgl_lock(EXT3_SB(sb), group)); + ext3_error(sb, __FUNCTION__, "on-disk bitmap for group %d" + "corrupted: %u blocks free in bitmap, %u - in gd\n", + group, free, le16_to_cpu(gdp->bg_free_blocks_count)); + return -EIO; + } ++ spin_unlock(sb_bgl_lock(EXT3_SB(sb), group)); + return 0; +} + @@ -4566,7 +4578,6 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + BUG_ON(e3b->bd_bitmap_page == NULL); + BUG_ON(e3b->bd_buddy_page == NULL); + -+ ext3_lock_group(sb, group); + for (i = 0; i < count; i++) { + md = db->bb_md_cur; + if (md && db->bb_tid != handle->h_transaction->t_tid) { @@ -4611,7 +4622,6 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + db->bb_md_cur = NULL; + } + } -+ ext3_unlock_group(sb, group); + return 0; +} + @@ -4704,6 +4714,8 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + if (err) + goto error_return; + ++ ext3_lock_group(sb, block_group); ++ spin_lock(sb_bgl_lock(sbi, block_group)); +#ifdef AGGRESSIVE_CHECK + { + int i; @@ -4711,35 +4723,31 @@ Index: linux-2.6.22.19/fs/ext3/mballoc.c + BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data)); + } +#endif -+ mb_clear_bits(sb_bgl_lock(sbi, block_group), bitmap_bh->b_data, bit, -+ count); -+ -+ /* We dirtied the bitmap block */ -+ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); -+ err = ext3_journal_dirty_metadata(handle, bitmap_bh); -+ -+ ac.ac_b_ex.fe_group = block_group; -+ ac.ac_b_ex.fe_start = bit; -+ ac.ac_b_ex.fe_len = count; -+ ext3_mb_store_history(&ac); ++ mb_clear_bits(NULL, bitmap_bh->b_data, bit, count); ++ gdp->bg_free_blocks_count = ++ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); ++ spin_unlock(sb_bgl_lock(sbi, block_group)); ++ percpu_counter_mod(&sbi->s_freeblocks_counter, count); + + if (metadata) { + /* blocks being freed are metadata. these blocks shouldn't + * be used until this transaction is committed */ + ext3_mb_free_metadata(handle, &e3b, block_group, bit, count); + } else { -+ ext3_lock_group(sb, block_group); + err = mb_free_blocks(inode, &e3b, bit, count); + ext3_mb_return_to_preallocation(inode, &e3b, block, count); -+ ext3_unlock_group(sb, block_group); + BUG_ON(err != 0); + } ++ ext3_unlock_group(sb, block_group); + -+ spin_lock(sb_bgl_lock(sbi, block_group)); -+ gdp->bg_free_blocks_count = -+ cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count); -+ spin_unlock(sb_bgl_lock(sbi, block_group)); -+ percpu_counter_mod(&sbi->s_freeblocks_counter, count); ++ ac.ac_b_ex.fe_group = block_group; ++ ac.ac_b_ex.fe_start = bit; ++ ac.ac_b_ex.fe_len = count; ++ ext3_mb_store_history(&ac); ++ ++ /* We dirtied the bitmap block */ ++ BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); ++ err = ext3_journal_dirty_metadata(handle, bitmap_bh); + + ext3_mb_release_desc(&e3b); + diff --git a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6-sles10.patch b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6-sles10.patch index 11f1ac0..d001841 100644 --- a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6-sles10.patch +++ b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6-sles10.patch @@ -535,9 +535,9 @@ Index: linux-2.6.16.60-0.27/fs/ext3/mballoc.c &meta_group_info[j]->bb_state); @@ -2945,9 +2957,17 @@ int ext3_mb_mark_diskspace_used(struct e + mb_set_bits(NULL, bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); + gdp->bg_free_blocks_count = diff --git a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6-suse.patch b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6-suse.patch index 9d15162..6bef8a5 100644 --- a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6-suse.patch +++ b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6-suse.patch @@ -498,9 +498,9 @@ Index: linux-2.6.5-7.311/fs/ext3/mballoc.c &meta_group_info[j]->bb_state); @@ -2945,9 +2957,17 @@ int ext3_mb_mark_diskspace_used(struct e + mb_set_bits(NULL, bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); + gdp->bg_free_blocks_count = diff --git a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.18.patch b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.18.patch index dc506e7..88dc179 100644 --- a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.18.patch +++ b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.18.patch @@ -535,9 +535,9 @@ Index: linux-2.6.18-53.1.14/fs/ext3/mballoc.c &meta_group_info[j]->bb_state); @@ -2943,9 +2955,17 @@ int ext3_mb_mark_diskspace_used(struct e + mb_set_bits(NULL, bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); + gdp->bg_free_blocks_count = diff --git a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.22-vanilla.patch b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.22-vanilla.patch index c4fa59e..069f1ba 100644 --- a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.22-vanilla.patch +++ b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.22-vanilla.patch @@ -535,9 +535,9 @@ Index: linux-2.6.22.14/fs/ext3/mballoc.c &meta_group_info[j]->bb_state); @@ -2945,9 +2957,17 @@ int ext3_mb_mark_diskspace_used(struct e - ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); + mb_set_bits(NULL, bitmap_bh->b_data, + ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); + gdp->bg_free_blocks_count = diff --git a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.9.patch b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.9.patch index aa7effc..eb9a75d 100644 --- a/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.9.patch +++ b/ldiskfs/kernel_patches/patches/ext3-uninit-2.6.9.patch @@ -224,7 +224,7 @@ Index: linux-2.6.9-67.0.15/fs/ext3/group.h +/* + * linux/fs/ext3/group.h + * -+ * Copyright 2007 Sun Microsystems, Inc. ++ * Copyright 2008 Sun Microsystems, Inc. + * + * Author: Andreas Dilger + */ @@ -535,9 +535,9 @@ Index: linux-2.6.9-67.0.15/fs/ext3/mballoc.c &meta_group_info[j]->bb_state); @@ -2945,9 +2957,17 @@ int ext3_mb_mark_diskspace_used(struct e + mb_set_bits(NULL, bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len); - spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group)); + if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) { + gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT); + gdp->bg_free_blocks_count = diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series index 3ccbb49..330b52e 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5.series @@ -26,4 +26,3 @@ ext3-xattr-no-update-ctime-2.6.22-vanilla.patch ext3-journal-chksum-2.6.18-vanilla.patch ext3-get-raid-stripe-from-sb.patch ext3-big-endian-check-2.6-rhel5.patch -ext3-fix-race-mb-clear-bits.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series index 9d88798..c6934bb 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-sles10.series @@ -31,4 +31,3 @@ ext3-check-bad-inode.patch ext3-journal-chksum-2.6.18-vanilla.patch ext3-get-raid-stripe-from-sb.patch ext3-big-endian-check-2.6-sles10.patch -ext3-fix-race-mb-clear-bits.patch -- 1.8.3.1