Whamcloud - gitweb
LU-508 ldiskfs: fix race in ext4_ext_walk_space()
authorBobi Jam <bobijam@whamcloud.com>
Thu, 27 Oct 2011 01:51:39 +0000 (09:51 +0800)
committerOleg Drokin <green@whamcloud.com>
Thu, 24 Nov 2011 21:43:12 +0000 (16:43 -0500)
we should not access on-disk data (e.g. path->p_ext->*) with no
locking.

to be fixed in mainline ext4 as well.

Port from: ORI-291
Author: Alex Zhuravlev <bzzz@whamcloud.com>
Signed-off-by: Bobi Jam <bobijam@whamcloud.com>
Change-Id: Ic9dfcc9a90766c79c0da5ca54e9fbb2f917865a6
Reviewed-on: http://review.whamcloud.com/1618
Tested-by: Hudson
Tested-by: Maloo <whamcloud.maloo@gmail.com>
Reviewed-by: Alex Zhuravlev <bzzz@whamcloud.com>
Reviewed-by: Mikhail Pershin <tappro@whamcloud.com>
Reviewed-by: Oleg Drokin <green@whamcloud.com>
ldiskfs/kernel_patches/patches/ext4-store-tree-generation-at-find.patch [new file with mode: 0644]
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel5-ext4.series
ldiskfs/kernel_patches/series/ldiskfs-2.6-rhel6.series
lustre/lvfs/fsfilt_ext3.c

diff --git a/ldiskfs/kernel_patches/patches/ext4-store-tree-generation-at-find.patch b/ldiskfs/kernel_patches/patches/ext4-store-tree-generation-at-find.patch
new file mode 100644 (file)
index 0000000..9c87a92
--- /dev/null
@@ -0,0 +1,66 @@
+Index: linux-stage/fs/ext4/ext4_extents.h
+===================================================================
+--- linux-stage.orig/fs/ext4/ext4_extents.h
++++ linux-stage/fs/ext4/ext4_extents.h
+@@ -113,6 +113,7 @@ struct ext4_extent_header {
+  * Truncate uses it to simulate recursive walking.
+  */
+ struct ext4_ext_path {
++      unsigned long                   p_generation;
+       ext4_fsblk_t                    p_block;
+       __u16                           p_depth;
+       struct ext4_extent              *p_ext;
+Index: linux-stage/fs/ext4/extents.c
+===================================================================
+--- linux-stage.orig/fs/ext4/extents.c
++++ linux-stage/fs/ext4/extents.c
+@@ -1855,7 +1855,7 @@ int ext4_ext_walk_space(struct inode *in
+ {
+       struct ext4_ext_path *path = NULL;
+       struct ext4_ext_cache cbex;
+-      struct ext4_extent *ex;
++      struct ext4_extent _ex, *ex;
+       ext4_lblk_t next, start = 0, end = 0;
+       ext4_lblk_t last = block + num;
+       int depth, exists, err = 0;
+@@ -1868,21 +1868,29 @@ int ext4_ext_walk_space(struct inode *in
+               /* find extent for this block */
+               down_read(&EXT4_I(inode)->i_data_sem);
+               path = ext4_ext_find_extent(inode, block, path);
+-              up_read(&EXT4_I(inode)->i_data_sem);
+               if (IS_ERR(path)) {
++                      up_read(&EXT4_I(inode)->i_data_sem);
+                       err = PTR_ERR(path);
+                       path = NULL;
+                       break;
+               }
++              path[0].p_generation = EXT4_I(inode)->i_ext_generation;
++
+               depth = ext_depth(inode);
+               if (unlikely(path[depth].p_hdr == NULL)) {
++                      up_read(&EXT4_I(inode)->i_data_sem);
+                       EXT4_ERROR_INODE(inode, "path[%d].p_hdr == NULL", depth);
+                       err = -EIO;
+                       break;
+               }
+-              ex = path[depth].p_ext;
++              ex = NULL;
++              if (path[depth].p_ext) {
++                      _ex = *path[depth].p_ext;
++                      ex = &_ex;
++              }
+               next = ext4_ext_next_allocated_block(path);
++              up_read(&EXT4_I(inode)->i_data_sem);
+               exists = 0;
+               if (!ex) {
+@@ -1936,7 +1944,7 @@ int ext4_ext_walk_space(struct inode *in
+                       err = -EIO;
+                       break;
+               }
+-              err = func(inode, path, &cbex, ex, cbdata);
++              err = func(inode, path, &cbex, NULL, cbdata);
+               ext4_ext_drop_refs(path);
+               if (err < 0)
index ccd906e..a73e039 100644 (file)
@@ -36,3 +36,4 @@ ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel5.patch
 ext4-mballoc-group_check-rhel5.patch
 ext4-journal-callback-rhel5.patch
 ext4-vmalloc-rhel5.patch
 ext4-mballoc-group_check-rhel5.patch
 ext4-journal-callback-rhel5.patch
+ext4-store-tree-generation-at-find.patch
index 1b1b133..4b4318b 100644 (file)
@@ -32,3 +32,4 @@ ext4-nocmtime-2.6-rhel5.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel6.patch
 ext4-journal-callback.patch
 ext4-export-64bit-name-hash.patch
 ext4-vmalloc-rhel6.patch
 ext4-journal-callback.patch
+ext4-store-tree-generation-at-find.patch
index af1f719..54b16ae 100644 (file)
@@ -983,19 +983,18 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
 #endif
         struct inode *inode = ext3_ext_base2inode(base);
         struct ext3_extent nex;
 #endif
         struct inode *inode = ext3_ext_base2inode(base);
         struct ext3_extent nex;
-#if defined(HAVE_EXT4_LDISKFS)
-        struct ext4_ext_path *tmppath = NULL;
-        struct ext4_extent *tmpex;
-#endif
         unsigned long pblock;
         unsigned long tgen;
         unsigned long pblock;
         unsigned long tgen;
-        int err, i, depth;
+        int err, i;
         unsigned long count;
         handle_t *handle;
 
         unsigned long count;
         handle_t *handle;
 
-        i = depth = EXT_DEPTH(base);
-        EXT_ASSERT(i == path->p_depth);
+        i = EXT_DEPTH(base);
         EXT_ASSERT(path[i].p_hdr);
         EXT_ASSERT(path[i].p_hdr);
+        LASSERTF(i == path->p_depth ||
+                 EXT_GENERATION(base) != path[0].p_generation,
+                 "base vs path extent depth:%d != %d, generation:%lu == %lu\n",
+                 i, path->p_depth, EXT_GENERATION(base), path[0].p_generation);
 
         if (cex->ec_type == EXT3_EXT_CACHE_EXTENT) {
                 err = EXT_CONTINUE;
 
         if (cex->ec_type == EXT3_EXT_CACHE_EXTENT) {
                 err = EXT_CONTINUE;
@@ -1040,21 +1039,13 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
 
 #if defined(HAVE_EXT4_LDISKFS)
         /* In 2.6.32 kernel, ext4_ext_walk_space()'s callback func is not
 
 #if defined(HAVE_EXT4_LDISKFS)
         /* In 2.6.32 kernel, ext4_ext_walk_space()'s callback func is not
-         * protected by i_data_sem, we need revalidate extent to be created */
+         * protected by i_data_sem as whole. so we patch it to store
+        * generation to path and now verify the tree hasn't changed */
         down_write((&EXT4_I(inode)->i_data_sem));
 
         /* validate extent, make sure the extent tree does not changed */
         down_write((&EXT4_I(inode)->i_data_sem));
 
         /* validate extent, make sure the extent tree does not changed */
-        tmppath = ext4_ext_find_extent(inode, cex->ec_block, NULL);
-        if (IS_ERR(tmppath)) {
-                up_write(&EXT4_I(inode)->i_data_sem);
-                ext3_journal_stop(handle);
-                return PTR_ERR(tmppath);
-        }
-        tmpex = tmppath[depth].p_ext;
-        if (tmpex != ex) {
+       if (EXT_GENERATION(base) != path[0].p_generation) {
                 /* cex is invalid, try again */
                 /* cex is invalid, try again */
-                ext4_ext_drop_refs(tmppath);
-                kfree(tmppath);
                 up_write(&EXT4_I(inode)->i_data_sem);
                 ext3_journal_stop(handle);
                 return EXT_REPEAT;
                 up_write(&EXT4_I(inode)->i_data_sem);
                 ext3_journal_stop(handle);
                 return EXT_REPEAT;
@@ -1096,8 +1087,6 @@ static int ext3_ext_new_extent_cb(struct ext3_ext_base *base,
 
 out:
 #if defined(HAVE_EXT4_LDISKFS)
 
 out:
 #if defined(HAVE_EXT4_LDISKFS)
-        ext4_ext_drop_refs(tmppath);
-        kfree(tmppath);
         up_write((&EXT4_I(inode)->i_data_sem));
 #endif
         ext3_journal_stop(handle);
         up_write((&EXT4_I(inode)->i_data_sem));
 #endif
         ext3_journal_stop(handle);