From 48452fbe583cf365d3c1f5be3c4272d30e198781 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Thu, 27 Oct 2011 09:51:39 +0800 Subject: [PATCH] LU-508 ldiskfs: fix race in ext4_ext_walk_space() we should not access on-disk data (e.g. path->p_ext->*) with no locking. to be fixed in mainline ext4 as well. Port from: ORI-291 Author: Alex Zhuravlev Signed-off-by: Bobi Jam Change-Id: Ic9dfcc9a90766c79c0da5ca54e9fbb2f917865a6 Reviewed-on: http://review.whamcloud.com/1618 Tested-by: Hudson Tested-by: Maloo Reviewed-by: Alex Zhuravlev Reviewed-by: Mikhail Pershin Reviewed-by: Oleg Drokin --- .../ext4-store-tree-generation-at-find.patch | 66 ++++++++++++++++++++++ .../series/ldiskfs-2.6-rhel5-ext4.series | 1 + .../kernel_patches/series/ldiskfs-2.6-rhel6.series | 1 + lustre/lvfs/fsfilt_ext3.c | 29 +++------- 4 files changed, 77 insertions(+), 20 deletions(-) create mode 100644 ldiskfs/kernel_patches/patches/ext4-store-tree-generation-at-find.patch diff --git a/ldiskfs/kernel_patches/patches/ext4-store-tree-generation-at-find.patch b/ldiskfs/kernel_patches/patches/ext4-store-tree-generation-at-find.patch new file mode 100644 index 0000000..9c87a92 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext4-store-tree-generation-at-find.patch @@ -0,0 +1,66 @@ +Index: linux-stage/fs/ext4/ext4_extents.h +=================================================================== +--- linux-stage.orig/fs/ext4/ext4_extents.h ++++ linux-stage/fs/ext4/ext4_extents.h +@@ -113,6 +113,7 @@ struct ext4_extent_header { + * Truncate uses it to simulate recursive walking. + */ + struct ext4_ext_path { ++ unsigned long p_generation; + ext4_fsblk_t p_block; + __u16 p_depth; + struct ext4_extent *p_ext; +Index: linux-stage/fs/ext4/extents.c +=================================================================== +--- linux-stage.orig/fs/ext4/extents.c ++++ linux-stage/fs/ext4/extents.c +@@ -1855,7 +1855,7 @@ int ext4_ext_walk_space(struct inode *in + { + struct ext4_ext_path *path = NULL; + struct ext4_ext_cache cbex; +- struct ext4_extent *ex; ++ struct ext4_extent _ex, *ex; + ext4_lblk_t next, start = 0, end = 0; + ext4_lblk_t last = block + num; + int depth, exists, err = 0; +@@ -1868,21 +1868,29 @@ int ext4_ext_walk_space(struct inode *in + /* find extent for this block */ + down_read(&EXT4_I(inode)->i_data_sem); + path = ext4_ext_find_extent(inode, block, path); +- up_read(&EXT4_I(inode)->i_data_sem); + if (IS_ERR(path)) { ++ up_read(&EXT4_I(inode)->i_data_sem); + err = PTR_ERR(path); + path = NULL; + break; + } + ++ path[0].p_generation = EXT4_I(inode)->i_ext_generation; ++ + depth = ext_depth(inode); + if (unlikely(path[depth].p_hdr == NULL)) { ++ up_read(&EXT4_I(inode)->i_data_sem); + EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth); + err = -EIO; + break; + } +- ex = path[depth].p_ext; ++ ex = NULL; ++ if (path[depth].p_ext) { ++ _ex = *path[depth].p_ext; ++ ex = &_ex; ++ } + next = ext4_ext_next_allocated_block(path); ++ up_read(&EXT4_I(inode)->i_data_sem); + + exists = 0; + if (!ex) { +@@ -1936,7 +1944,7 @@ int ext4_ext_walk_space(struct inode *in + err = -EIO; + break; + } +- err = func(inode, path, &cbex, ex, cbdata); ++ err = func(inode, path, &cbex, NULL, cbdata); + ext4_ext_drop_refs(path); + + if (err < 0) diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series index ccd906e..a73e039 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series @@ -36,3 +36,4 @@ ext4-export-64bit-name-hash.patch ext4-vmalloc-rhel5.patch ext4-mballoc-group_check-rhel5.patch ext4-journal-callback-rhel5.patch +ext4-store-tree-generation-at-find.patch diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series index 1b1b133..4b4318b 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series @@ -32,3 +32,4 @@ ext4-nocmtime-2.6-rhel5.patch ext4-export-64bit-name-hash.patch ext4-vmalloc-rhel6.patch ext4-journal-callback.patch +ext4-store-tree-generation-at-find.patch diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index af1f719..54b16ae 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -983,19 +983,18 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base, #endif struct inode *inode = ext3_ext_base2inode(base); struct ext3_extent nex; -#if defined(HAVE_EXT4_LDISKFS) - struct ext4_ext_path *tmppath = NULL; - struct ext4_extent *tmpex; -#endif unsigned long pblock; unsigned long tgen; - int err, i, depth; + int err, i; unsigned long count; handle_t *handle; - i = depth = EXT_DEPTH(base); - EXT_ASSERT(i == path->p_depth); + i = EXT_DEPTH(base); EXT_ASSERT(path[i].p_hdr); + LASSERTF(i == path->p_depth || + EXT_GENERATION(base) != path[0].p_generation, + "base vs path extent depth:%d != %d, generation:%lu == %lu\n", + i, path->p_depth, EXT_GENERATION(base), path[0].p_generation); if (cex->ec_type == EXT3_EXT_CACHE_EXTENT) { err = EXT_CONTINUE; @@ -1040,21 +1039,13 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base, #if defined(HAVE_EXT4_LDISKFS) /* In 2.6.32 kernel, ext4_ext_walk_space()'s callback func is not - * protected by i_data_sem, we need revalidate extent to be created */ + * protected by i_data_sem as whole. so we patch it to store + * generation to path and now verify the tree hasn't changed */ down_write((&EXT4_I(inode)->i_data_sem)); /* validate extent, make sure the extent tree does not changed */ - tmppath = ext4_ext_find_extent(inode, cex->ec_block, NULL); - if (IS_ERR(tmppath)) { - up_write(&EXT4_I(inode)->i_data_sem); - ext3_journal_stop(handle); - return PTR_ERR(tmppath); - } - tmpex = tmppath[depth].p_ext; - if (tmpex != ex) { + if (EXT_GENERATION(base) != path[0].p_generation) { /* cex is invalid, try again */ - ext4_ext_drop_refs(tmppath); - kfree(tmppath); up_write(&EXT4_I(inode)->i_data_sem); ext3_journal_stop(handle); return EXT_REPEAT; @@ -1096,8 +1087,6 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base, out: #if defined(HAVE_EXT4_LDISKFS) - ext4_ext_drop_refs(tmppath); - kfree(tmppath); up_write((&EXT4_I(inode)->i_data_sem)); #endif ext3_journal_stop(handle); -- 1.8.3.1