Whamcloud - gitweb
LU-1026 ldiskfs: make bitmaps corruption not fatal
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / sles11sp2 / ext4-corrupted-inode-block-bitmaps-handling-patches.patch
index ca1a648..2f4a2a8 100644 (file)
@@ -1,68 +1,70 @@
----
- fs/ext4/balloc.c  |   14 ++++++++------
- fs/ext4/ext4.h    |    6 ++++++
- fs/ext4/ialloc.c  |   37 +++++++++++++++++++++++++++----------
- fs/ext4/mballoc.c |   22 +++++++++++++++++++++-
- 4 files changed, 62 insertions(+), 17 deletions(-)
-
 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
-index db5b7a3..c631a37 100644
+index db5b7a3..332078f 100644
 --- a/fs/ext4/balloc.c
 +++ b/fs/ext4/balloc.c
-@@ -92,6 +92,7 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
-       ext4_group_t ngroups = ext4_get_groups_count(sb);
-       unsigned free_blocks, group_blocks;
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-+      struct ext4_group_info *grp;
-       if (bh) {
-               J_ASSERT_BH(bh, buffer_locked(bh));
-@@ -99,12 +100,11 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
+@@ -99,12 +99,11 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
                /* If checksum is bad mark all blocks used to prevent allocation
                 * essentially implementing a per-group read-only flag. */
                if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
 -                      ext4_error(sb, "Checksum bad for group %u",
-+                      ext4_warning(sb, "Checksum bad for group %u",
++                      ext4_corrupted_block_group(sb, block_group,
++                                      EXT4_GROUP_INFO_BBITMAP_CORRUPT |
++                                      EXT4_GROUP_INFO_IBITMAP_CORRUPT,
++                                      "Checksum bad for group %u",
                                        block_group);
 -                      ext4_free_blks_set(sb, gdp, 0);
 -                      ext4_free_inodes_set(sb, gdp, 0);
 -                      ext4_itable_unused_set(sb, gdp, 0);
 -                      memset(bh->b_data, 0xff, sb->s_blocksize);
-+                      grp = ext4_get_group_info(sb, block_group);
-+                      set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-+                      set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
                        return 0;
                }
                memset(bh->b_data, 0, sb->s_blocksize);
-@@ -242,6 +242,7 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
-       ext4_grpblk_t next_zero_bit;
-       ext4_fsblk_t bitmap_blk;
-       ext4_fsblk_t group_first_block;
-+      struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
-       if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) {
-               /* with FLEX_BG, the inode/block bitmaps and itable
-@@ -279,8 +280,9 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
+@@ -279,8 +278,10 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
                return 1;
  
  err_out:
 -      ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
-+      ext4_warning(sb, "Invalid block bitmap - block_group = %d, block = %llu",
-                       block_group, bitmap_blk);
-+      set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
+-                      block_group, bitmap_blk);
++      ext4_corrupted_block_group(sb, block_group,
++                                 EXT4_GROUP_INFO_BBITMAP_CORRUPT,
++                                 "Invalid block bitmap - block_group = %d, block = %llu",
++                                 block_group, bitmap_blk);
        return 0;
  }
  /**
 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
-index a18fd36..40ad66f 100644
+index a18fd36..949d7be 100644
 --- a/fs/ext4/ext4.h
 +++ b/fs/ext4/ext4.h
-@@ -2257,9 +2257,15 @@ struct ext4_group_info {
+@@ -78,6 +78,18 @@ typedef __u32 ext4_lblk_t;
+ /* data type for block group number */
+ typedef unsigned int ext4_group_t;
++void __ext4_corrupted_block_group(struct super_block *sb,
++                                ext4_group_t group, unsigned int flags,
++                                const char *function,
++                                unsigned int line);
++
++#define ext4_corrupted_block_group(sb, group, flags, fmt...)          \
++      do {                                                            \
++              __ext4_warning(sb, __func__, __LINE__, ## fmt);         \
++              __ext4_corrupted_block_group(sb, group, flags,          \
++                      __func__, __LINE__);                            \
++      } while (0)
++
+ /*
+  * Flags used in mballoc's allocation_context flags field.
+  *
+@@ -2257,9 +2269,19 @@ struct ext4_group_info {
  
  #define EXT4_GROUP_INFO_NEED_INIT_BIT         0
  #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT               1
 +#define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT   2
++#define EXT4_GROUP_INFO_BBITMAP_CORRUPT               \
++              (1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
 +#define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT   3
++#define EXT4_GROUP_INFO_IBITMAP_CORRUPT               \
++              (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
  
  #define EXT4_MB_GRP_NEED_INIT(grp)    \
        (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
@@ -74,17 +76,10 @@ index a18fd36..40ad66f 100644
  #define EXT4_MB_GRP_WAS_TRIMMED(grp)  \
        (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
-index 532dcaa..c9e6b19 100644
+index 532dcaa..6082e54 100644
 --- a/fs/ext4/ialloc.c
 +++ b/fs/ext4/ialloc.c
-@@ -72,17 +72,17 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
-                                      struct ext4_group_desc *gdp)
- {
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-+      struct ext4_group_info *grp;
-       J_ASSERT_BH(bh, buffer_locked(bh));
+@@ -78,11 +78,10 @@ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
        /* If checksum is bad mark all blocks and inodes use to prevent
         * allocation, essentially implementing a per-group read-only flag. */
        if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
@@ -93,14 +88,14 @@ index 532dcaa..c9e6b19 100644
 -              ext4_free_inodes_set(sb, gdp, 0);
 -              ext4_itable_unused_set(sb, gdp, 0);
 -              memset(bh->b_data, 0xff, sb->s_blocksize);
-+              ext4_warning(sb, "Checksum bad for group %u", block_group);
-+              grp = ext4_get_group_info(sb, block_group);
-+              set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
-+              set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
++              ext4_corrupted_block_group(sb, block_group,
++                              EXT4_GROUP_INFO_BBITMAP_CORRUPT |
++                              EXT4_GROUP_INFO_IBITMAP_CORRUPT,
++                              "Checksum bad for group %u", block_group);
                return 0;
        }
  
-@@ -195,6 +195,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
+@@ -195,6 +194,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
        struct ext4_super_block *es;
        struct ext4_sb_info *sbi;
        int fatal = 0, err, count, cleared;
@@ -108,7 +103,7 @@ index 532dcaa..c9e6b19 100644
  
        if (atomic_read(&inode->i_count) > 1) {
                printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
-@@ -238,7 +239,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
+@@ -238,7 +238,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
        block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
        bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
        bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
@@ -119,20 +114,22 @@ index 532dcaa..c9e6b19 100644
                goto error_return;
  
        BUFFER_TRACE(bitmap_bh, "get_write_access");
-@@ -286,8 +289,10 @@ out:
+@@ -286,8 +288,12 @@ out:
                if (!fatal)
                        fatal = err;
                ext4_mark_super_dirty(sb);
 -      } else
 -              ext4_error(sb, "bit already cleared for inode %lu", ino);
 +      } else {
-+              ext4_warning(sb, "bit already cleared for inode %lu", ino);
-+              set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
++              ext4_corrupted_block_group(sb, block_group,
++                              EXT4_GROUP_INFO_IBITMAP_CORRUPT,
++                              "bit already cleared for inode %lu",
++                              ino);
 +      }
  
  error_return:
        brelse(bitmap_bh);
-@@ -820,6 +825,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
+@@ -820,6 +826,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
        int free = 0;
        static int once = 1;
        ext4_group_t flex_group;
@@ -140,7 +137,7 @@ index 532dcaa..c9e6b19 100644
  
        /* Cannot create files in a deleted directory */
        if (!dir || !dir->i_nlink)
-@@ -879,10 +885,21 @@ got_group:
+@@ -879,10 +886,21 @@ got_group:
                if (!gdp)
                        goto fail;
  
@@ -165,10 +162,45 @@ index 532dcaa..c9e6b19 100644
  repeat_in_this_group:
                ino = ext4_find_next_zero_bit((unsigned long *)
 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
-index 1d77581..d19d1ba 100644
+index 1d77581..4d6558e 100644
 --- a/fs/ext4/mballoc.c
 +++ b/fs/ext4/mballoc.c
-@@ -1322,6 +1322,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
+@@ -740,7 +740,6 @@ int ext4_mb_generate_buddy(struct super_block *sb,
+                                     "%u blocks in bitmap, %u in bb, %u in gd",
+                                     free, grp->bb_free,
+                                     ext4_free_blks_count(sb, gdp));
+-
+               /*
+                * If we intent to continue, we consider group descritor
+                * corrupt and update bb_free using bitmap value
+@@ -1124,7 +1123,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+       int block;
+       int pnum;
+       int poff;
+-      struct page *page;
++      struct page *page = NULL;
+       int ret;
+       struct ext4_group_info *grp;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+@@ -1149,7 +1148,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+                */
+               ret = ext4_mb_init_group(sb, group);
+               if (ret)
+-                      return ret;
++                      goto err;
+       }
+       /*
+@@ -1233,6 +1232,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+       return 0;
+ err:
++      ext4_warning(sb, "Error in loading buddy information for %u",
++                      group);
+       if (page)
+               page_cache_release(page);
+       if (e4b->bd_bitmap_page)
+@@ -1322,6 +1323,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
  
        BUG_ON(first + count > (sb->s_blocksize << 3));
        assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
@@ -179,20 +211,7 @@ index 1d77581..d19d1ba 100644
        mb_check_buddy(e4b);
        mb_free_blocks_double(inode, e4b, first, count);
  
-@@ -1353,7 +1357,11 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
-                                             inode ? inode->i_ino : 0,
-                                             blocknr,
-                                             "freeing already freed block "
--                                            "(bit %u)", block);
-+                                            "(bit %u); block bitmap corrupt",
-+                                            block);
-+                      /* Mark the block group as corrupt. */
-+                      set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
-+                              &e4b->bd_info->bb_state);
-               }
-               mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
-               e4b->bd_info->bb_counters[order]++;
-@@ -1729,6 +1737,11 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
+@@ -1729,6 +1734,11 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
        if (err)
                return err;
  
@@ -204,7 +223,7 @@ index 1d77581..d19d1ba 100644
        ext4_lock_group(ac->ac_sb, group);
        max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
                             ac->ac_g_ex.fe_len, &ex);
-@@ -1940,6 +1953,9 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
+@@ -1940,6 +1950,9 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
  
        BUG_ON(cr < 0 || cr >= 4);
  
@@ -214,7 +233,106 @@ index 1d77581..d19d1ba 100644
        /* We only do this if the grp has never been initialized */
        if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
                int ret = ext4_mb_init_group(ac->ac_sb, group);
-@@ -4781,6 +4797,10 @@ do_more:
+@@ -3458,9 +3471,11 @@ int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
+       }
+       if (free != ext4_free_blks_count(sb, gdp)) {
+-              ext4_error(sb, "on-disk bitmap for group %d"
+-                      "corrupted: %u blocks free in bitmap, %u - in gd\n",
+-                      group, free, ext4_free_blks_count(sb, gdp));
++              ext4_corrupted_block_group(sb, group,
++                              EXT4_GROUP_INFO_BBITMAP_CORRUPT,
++                              "on-disk bitmap for group %d corrupted: %u blocks free in bitmap, %u - in gd\n",
++                              group, free,
++                              ext4_free_blks_count(sb, gdp));
+               return -EIO;
+       }
+       return 0;
+@@ -3813,17 +3828,9 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
+       /* "free < pa->pa_free" means we maybe double alloc the same blocks,
+        * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
+       if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free)) {
+-              ext4_error(sb, "pa free mismatch: [pa %p] "
+-                              "[phy %lu] [logic %lu] [len %u] [free %u] "
+-                              "[error %u] [inode %lu] [freed %u]", pa,
+-                              (unsigned long)pa->pa_pstart,
+-                              (unsigned long)pa->pa_lstart,
+-                              (unsigned)pa->pa_len, (unsigned)pa->pa_free,
+-                              (unsigned)pa->pa_error, pa->pa_inode->i_ino,
+-                              free);
+               ext4_grp_locked_error(sb, group, 0, 0,
+-                                      "free %u, pa_free %u",
+-                                      free, pa->pa_free);
++                                    "free %u, pa_free %u",
++                                    free, pa->pa_free);
+               /*
+                * pa is already deleted so we use the value obtained
+                * from the bitmap and continue.
+@@ -3883,14 +3890,11 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
+               return 0;
+       bitmap_bh = ext4_read_block_bitmap(sb, group);
+-      if (bitmap_bh == NULL) {
+-              ext4_error(sb, "Error reading block bitmap for %u", group);
++      if (bitmap_bh == NULL)
+               return 0;
+-      }
+       err = ext4_mb_load_buddy(sb, group, &e4b);
+       if (err) {
+-              ext4_error(sb, "Error loading buddy information for %u", group);
+               put_bh(bitmap_bh);
+               return 0;
+       }
+@@ -4054,16 +4058,11 @@ repeat:
+               ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
+               err = ext4_mb_load_buddy(sb, group, &e4b);
+-              if (err) {
+-                      ext4_error(sb, "Error loading buddy information for %u",
+-                                      group);
++              if (err)
+                       return;
+-              }
+               bitmap_bh = ext4_read_block_bitmap(sb, group);
+               if (bitmap_bh == NULL) {
+-                      ext4_error(sb, "Error reading block bitmap for %u",
+-                                      group);
+                       ext4_mb_unload_buddy(&e4b);
+                       continue;
+               }
+@@ -4324,11 +4323,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
+       list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
+               ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
+-              if (ext4_mb_load_buddy(sb, group, &e4b)) {
+-                      ext4_error(sb, "Error loading buddy information for %u",
+-                                      group);
++              if (ext4_mb_load_buddy(sb, group, &e4b))
+                       continue;
+-              }
+               ext4_lock_group(sb, group);
+               list_del(&pa->pa_group_list);
+               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
+@@ -4585,7 +4581,7 @@ repeat:
+                       * been updated or not when fail case. So can
+                       * not revert pa_free back, just mark pa_error*/
+                       pa->pa_error++;
+-                      ext4_error(sb,
++                      ext4_corrupted_block_group(sb, 0, 0,
+                               "Updating bitmap error: [err %d] "
+                               "[pa %p] [phy %lu] [logic %lu] "
+                               "[len %u] [free %u] [error %u] "
+@@ -4731,6 +4727,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
+       struct ext4_buddy e4b;
+       int err = 0;
+       int ret;
++      int skip_error = 0;
+       if (bh) {
+               if (block)
+@@ -4781,6 +4778,10 @@ do_more:
        overflow = 0;
        ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
  
@@ -225,6 +343,96 @@ index 1d77581..d19d1ba 100644
        /*
         * Check to see if we are freeing blocks across a group
         * boundary.
--- 
-1.7.1
-
+@@ -4837,8 +4838,10 @@ do_more:
+       trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
+       err = ext4_mb_load_buddy(sb, block_group, &e4b);
+-      if (err)
++      if (err) {
++              skip_error = 1;
+               goto error_return;
++      }
+       if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
+               struct ext4_free_data *new_entry;
+@@ -4905,8 +4908,9 @@ do_more:
+ error_return:
+       if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
+               dquot_free_block(inode, freed);
++      if (!skip_error)
++              ext4_std_error(sb, err);
+       brelse(bitmap_bh);
+-      ext4_std_error(sb, err);
+       return;
+ }
+@@ -4991,7 +4995,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
+       err = ext4_mb_load_buddy(sb, block_group, &e4b);
+       if (err)
+-              goto error_return;
++              goto error_brelse;
+       /*
+        * need to update group_info->bb_free and bitmap
+@@ -5026,8 +5030,9 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
+               err = ret;
+ error_return:
+-      brelse(bitmap_bh);
+       ext4_std_error(sb, err);
++error_brelse:
++      brelse(bitmap_bh);
+       return;
+ }
+@@ -5094,11 +5099,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
+       int ret;
+       ret = ext4_mb_load_buddy(sb, group, &e4b);
+-      if (ret) {
+-              ext4_error(sb, "Error in loading buddy "
+-                              "information for %u", group);
++      if (ret)
+               return ret;
+-      }
+       bitmap = e4b.bd_bitmap;
+       ext4_lock_group(sb, group);
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index a21e903..2fca810 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -666,6 +666,32 @@ void __ext4_warning(struct super_block *sb, const char *function,
+       va_end(args);
+ }
++void __ext4_corrupted_block_group(struct super_block *sb, ext4_group_t group,
++                                unsigned int flags, const char *function,
++                                unsigned int line)
++{
++      struct ext4_group_info *grp = ext4_get_group_info(sb, group);
++      struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
++
++      if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT &&
++          !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
++              ext4_free_blks_set(sb, gdp, 0);
++              set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
++                      &grp->bb_state);
++      }
++
++      if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT &&
++          !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
++              if (gdp) {
++                      ext4_free_inodes_set(sb, gdp, 0);
++                      ext4_itable_unused_set(sb, gdp, 0);
++              }
++              set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
++                      &grp->bb_state);
++      }
++      save_error_info(sb, function, line);
++}
++
+ void __ext4_grp_locked_error(const char *function, unsigned int line,
+                            struct super_block *sb, ext4_group_t grp,
+                            unsigned long ino, ext4_fsblk_t block,