Whamcloud - gitweb
b=16893
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext4-mballoc-extra-checks-rhel5.patch
diff --git a/ldiskfs/kernel_patches/patches/ext4-mballoc-extra-checks-rhel5.patch b/ldiskfs/kernel_patches/patches/ext4-mballoc-extra-checks-rhel5.patch
new file mode 100644 (file)
index 0000000..b8cdada
--- /dev/null
@@ -0,0 +1,330 @@
+Index: linux-2.6.18.i686/fs/ext4/mballoc.c
+===================================================================
+--- linux-2.6.18.i686.orig/fs/ext4/mballoc.c
++++ linux-2.6.18.i686/fs/ext4/mballoc.c
+@@ -660,7 +660,7 @@ static void ext4_mb_mark_free_simple(str
+       }
+ }
+-static void ext4_mb_generate_buddy(struct super_block *sb,
++static int ext4_mb_generate_buddy(struct super_block *sb,
+                               void *buddy, void *bitmap, ext4_group_t group)
+ {
+       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+@@ -692,14 +692,14 @@ static void ext4_mb_generate_buddy(struc
+       grp->bb_fragments = fragments;
+       if (free != grp->bb_free) {
+-              ext4_error(sb, __func__,
+-                      "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
+-                      group, free, grp->bb_free);
+-              /*
+-               * If we intent to continue, we consider group descritor
+-               * corrupt and update bb_free using bitmap value
+-               */
+-              grp->bb_free = free;
++              struct ext4_group_desc *gdp;
++              gdp = ext4_get_group_desc (sb, group, NULL);
++              ext4_error(sb, __FUNCTION__,
++                      "group %lu: %u blocks in bitmap, %u in bb, "
++                      "%u in gd, %lu pa's\n", group, free, grp->bb_free,
++                      le16_to_cpu(gdp->bg_free_blocks_count),
++                      grp->bb_prealloc_nr);
++              return -EIO;
+       }
+       clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
+@@ -709,6 +709,8 @@ static void ext4_mb_generate_buddy(struc
+       EXT4_SB(sb)->s_mb_buddies_generated++;
+       EXT4_SB(sb)->s_mb_generation_time += period;
+       spin_unlock(&EXT4_SB(sb)->s_bal_lock);
++
++      return 0;
+ }
+ /* The buddy information is attached the buddy cache inode
+@@ -814,7 +816,7 @@ static int ext4_mb_init_cache(struct pag
+       err = 0;
+       first_block = page->index * blocks_per_page;
+-      for (i = 0; i < blocks_per_page; i++) {
++      for (i = 0; i < blocks_per_page && err == 0; i++) {
+               int group;
+               struct ext4_group_info *grinfo;
+@@ -848,7 +850,7 @@ static int ext4_mb_init_cache(struct pag
+                       /*
+                        * incore got set to the group block bitmap below
+                        */
+-                      ext4_mb_generate_buddy(sb, data, incore, group);
++                      err = ext4_mb_generate_buddy(sb, data, incore, group);
+                       incore = NULL;
+               } else {
+                       /* this is block of bitmap */
+@@ -861,7 +863,7 @@ static int ext4_mb_init_cache(struct pag
+                       memcpy(data, bitmap, blocksize);
+                       /* mark all preallocated blks used in in-core bitmap */
+-                      ext4_mb_generate_from_pa(sb, data, group);
++                      err = ext4_mb_generate_from_pa(sb, data, group);
+                       ext4_unlock_group(sb, group);
+                       /* set incore so that the buddy information can be
+@@ -870,6 +872,7 @@ static int ext4_mb_init_cache(struct pag
+                       incore = data;
+               }
+       }
++      if (likely(err == 0))
+       SetPageUptodate(page);
+ out:
+@@ -1964,7 +1967,10 @@ static int ext4_mb_seq_history_show(stru
+                       hs->result.fe_start, hs->result.fe_len);
+               seq_printf(seq, "%-5u %-8u %-23s free\n",
+                               hs->pid, hs->ino, buf2);
++      } else {
++              seq_printf(seq, "unknown op %d\n", hs->op);
+       }
++
+       return 0;
+ }
+@@ -2092,9 +2098,11 @@ static void *ext4_mb_seq_groups_next(str
+ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
+ {
+       struct super_block *sb = seq->private;
++      struct ext4_group_desc *gdp;
+       long group = (long) v;
+       int i;
+       int err;
++      int free = 0;
+       struct ext4_buddy e4b;
+       struct sg {
+               struct ext4_group_info info;
+@@ -2103,10 +2111,10 @@ static int ext4_mb_seq_groups_show(struc
+       group--;
+       if (group == 0)
+-              seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
++              seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s"
+                               "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
+                                 "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
+-                         "group", "free", "frags", "first",
++                         "group", "free", "frags", "first", "first", "pa",
+                          "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
+                          "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
+@@ -2117,13 +2125,20 @@ static int ext4_mb_seq_groups_show(struc
+               seq_printf(seq, "#%-5lu: I/O error\n", group);
+               return 0;
+       }
++
++      gdp = ext4_get_group_desc(sb, group, NULL);
++      if (gdp != NULL)
++              free = le16_to_cpu(gdp->bg_free_blocks_count);
++
+       ext4_lock_group(sb, group);
+       memcpy(&sg, ext4_get_group_info(sb, group), i);
+       ext4_unlock_group(sb, group);
+       ext4_mb_release_desc(&e4b);
+-      seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free,
+-                      sg.info.bb_fragments, sg.info.bb_first_free);
++      seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [", group,
++                      sg.info.bb_free, free,
++                      sg.info.bb_fragments, sg.info.bb_first_free,
++                      sg.info.bb_prealloc_nr);
+       for (i = 0; i <= 13; i++)
+               seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
+                               sg.info.bb_counters[i] : 0);
+@@ -2226,6 +2241,7 @@ ext4_mb_store_history(struct ext4_alloca
+       h.tail = ac->ac_tail;
+       h.buddy = ac->ac_buddy;
+       h.merged = 0;
++      h.cr = ac->ac_criteria;
+       if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
+               if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
+                               ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
+@@ -3531,22 +3547,66 @@ ext4_mb_use_preallocated(struct ext4_all
+ }
+ /*
++ * check free blocks in bitmap match free block in group descriptor
++ * do this before taking preallocated blocks into account to be able
++ * to detect on-disk corruptions
++ */
++int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
++                              struct ext4_group_desc *gdp, int group)
++{
++      unsigned short max = EXT4_BLOCKS_PER_GROUP(sb);
++      unsigned short i, first, free = 0;
++
++      i = mb_find_next_zero_bit(bitmap, max, 0);
++
++      while (i < max) {
++              first = i;
++              i = find_next_bit(bitmap, max, i);
++              if (i > max)
++                      i = max;
++              free += i - first;
++              if (i < max)
++                      i = mb_find_next_zero_bit(bitmap, max, i);
++      }
++
++      if (free != le16_to_cpu(gdp->bg_free_blocks_count)) {
++              ext4_error(sb, __FUNCTION__, "on-disk bitmap for group %d"
++                      "corrupted: %u blocks free in bitmap, %u - in gd\n",
++                      group, free, le16_to_cpu(gdp->bg_free_blocks_count));
++              return -EIO;
++      }
++      return 0;
++}
++
++/*
+  * the function goes through all preallocation in this group and marks them
+  * used in in-core bitmap. buddy must be generated from this bitmap
+  * Need to be called with ext4 group lock (ext4_lock_group)
+  */
+-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
++static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                       ext4_group_t group)
+ {
+       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+       struct ext4_prealloc_space *pa;
++      struct ext4_group_desc *gdp;
+       struct list_head *cur;
+       ext4_group_t groupnr;
+       ext4_grpblk_t start;
+       int preallocated = 0;
+       int count = 0;
++      int skip = 0;
++      int err;
+       int len;
++      gdp = ext4_get_group_desc (sb, group, NULL);
++      if (gdp == NULL)
++              return -EIO;
++
++      /* before applying preallocations, check bitmap consistency */
++      err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
++      if (err)
++              return err;
++
+       /* all form of preallocation discards first load group,
+        * so the only competing code is preallocation use.
+        * we don't need any locking here
+@@ -3562,15 +3622,24 @@ static void ext4_mb_generate_from_pa(str
+                                            &groupnr, &start);
+               len = pa->pa_len;
+               spin_unlock(&pa->pa_lock);
+-              if (unlikely(len == 0))
++              if (unlikely(len == 0)) {
++                      skip++;
+                       continue;
++              }
+               BUG_ON(groupnr != group);
+               mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
+                                               bitmap, start, len);
+               preallocated += len;
+               count++;
+       }
++      if (count + skip != grp->bb_prealloc_nr) {
++              ext4_error(sb, __FUNCTION__, "lost preallocations: "
++                         "count %d, bb_prealloc_nr %lu, skip %d\n",
++                         count, grp->bb_prealloc_nr, skip);
++              return -EIO;
++      }
+       mb_debug("prellocated %u for group %lu\n", preallocated, group);
++      return 0;
+ }
+ static void ext4_mb_pa_callback(struct rcu_head *head)
+@@ -3621,6 +3690,7 @@ static void ext4_mb_put_pa(struct ext4_a
+        */
+       ext4_lock_group(sb, grp);
+       list_del(&pa->pa_group_list);
++      ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
+       ext4_unlock_group(sb, grp);
+       spin_lock(pa->pa_obj_lock);
+@@ -3709,6 +3779,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
+       ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+       list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
++      grp->bb_prealloc_nr++;
+       ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+       spin_lock(pa->pa_obj_lock);
+@@ -3768,6 +3839,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
+       ext4_lock_group(sb, ac->ac_b_ex.fe_group);
+       list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
++      grp->bb_prealloc_nr++;
+       ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
+       /*
+@@ -3820,6 +3892,7 @@ ext4_mb_release_inode_pa(struct ext4_bud
+               ac->ac_sb = sb;
+               ac->ac_inode = pa->pa_inode;
+               ac->ac_op = EXT4_MB_HISTORY_DISCARD;
++              ac->ac_o_ex.fe_len = 1;
+       }
+       while (bit < end) {
+@@ -3964,6 +4037,8 @@ repeat:
+               spin_unlock(&pa->pa_lock);
++              BUG_ON(grp->bb_prealloc_nr == 0);
++              grp->bb_prealloc_nr--;
+               list_del(&pa->pa_group_list);
+               list_add(&pa->u.pa_tmp_list, &list);
+       }
+@@ -4099,7 +4174,7 @@ repeat:
+               if (err) {
+                       ext4_error(sb, __func__, "Error in loading buddy "
+                                       "information for %lu\n", group);
+-                      continue;
++                      return;
+               }
+               bitmap_bh = ext4_read_block_bitmap(sb, group);
+@@ -4111,6 +4186,8 @@ repeat:
+               }
+               ext4_lock_group(sb, group);
++              BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
++              e4b.bd_info->bb_prealloc_nr--;
+               list_del(&pa->pa_group_list);
+               ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+               ext4_unlock_group(sb, group);
+Index: linux-2.6.18.i686/fs/ext4/mballoc.h
+===================================================================
+--- linux-2.6.18.i686.orig/fs/ext4/mballoc.h
++++ linux-2.6.18.i686/fs/ext4/mballoc.h
+@@ -119,6 +119,7 @@ struct ext4_group_info {
+       unsigned short  bb_free;
+       unsigned short  bb_fragments;
+       struct          list_head bb_prealloc_list;
++      unsigned long   bb_prealloc_nr;
+ #ifdef DOUBLE_CHECK
+       void            *bb_bitmap;
+ #endif
+@@ -228,7 +229,7 @@ struct ext4_mb_history {
+       __u16 tail;     /* what tail broke some buddy */
+       __u16 buddy;    /* buddy the tail ^^^ broke */
+       __u16 flags;
+-      __u8 cr:3;      /* which phase the result extent was found at */
++      __u8 cr:8;      /* which phase the result extent was found at */
+       __u8 op:4;
+       __u8 merged:1;
+ };
+@@ -259,7 +260,7 @@ static void ext4_mb_store_history(struct
+ struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
+-static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
++static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
+                                       ext4_group_t group);
+ static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
+ static void ext4_mb_free_committed_blocks(struct super_block *);