Whamcloud - gitweb
LU-9410 ldiskfs: no check mb bitmap if flex_bg enabled
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / sles11sp2 / ext4-mballoc-extra-checks.patch
1 ---
2  fs/ext4/ext4.h    |    1 
3  fs/ext4/mballoc.c |  109 ++++++++++++++++++++++++++++++++++++++++++++++--------
4  fs/ext4/mballoc.h |    2 -
5  3 files changed, 96 insertions(+), 16 deletions(-)
6
7 --- a/fs/ext4/ext4.h
8 +++ b/fs/ext4/ext4.h
9 @@ -2098,6 +2098,7 @@ struct ext4_group_info {
10         ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
11         ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
12         struct          list_head bb_prealloc_list;
13 +       unsigned long   bb_prealloc_nr;
14  #ifdef DOUBLE_CHECK
15         void            *bb_bitmap;
16  #endif
17 --- a/fs/ext4/mballoc.c
18 +++ b/fs/ext4/mballoc.c
19 @@ -352,7 +352,7 @@ static const char *ext4_groupinfo_slab_n
20         "ext4_groupinfo_64k", "ext4_groupinfo_128k"
21  };
22  
23 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
24 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
25                                         ext4_group_t group);
26  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
27                                                 ext4_group_t group);
28 @@ -702,7 +702,7 @@ mb_set_largest_free_order(struct super_b
29  }
30  
31  static noinline_for_stack
32 -void ext4_mb_generate_buddy(struct super_block *sb,
33 +int ext4_mb_generate_buddy(struct super_block *sb,
34                                 void *buddy, void *bitmap, ext4_group_t group)
35  {
36         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
37 @@ -734,14 +734,19 @@ void ext4_mb_generate_buddy(struct super
38         grp->bb_fragments = fragments;
39  
40         if (free != grp->bb_free) {
41 +               struct ext4_group_desc *gdp;
42 +               gdp = ext4_get_group_desc (sb, group, NULL);
43                 ext4_grp_locked_error(sb, group, 0, 0,
44 -                                     "%u blocks in bitmap, %u in gd",
45 -                                     free, grp->bb_free);
46 +                                     "%u blocks in bitmap, %u in bb, %u in gd",
47 +                                     free, grp->bb_free,
48 +                                     ext4_free_blks_count(sb, gdp));
49 +
50                 /*
51                  * If we intent to continue, we consider group descritor
52                  * corrupt and update bb_free using bitmap value
53                  */
54                 grp->bb_free = free;
55 +               return -EIO;
56         }
57         mb_set_largest_free_order(sb, grp);
58  
59 @@ -752,6 +757,8 @@ void ext4_mb_generate_buddy(struct super
60         EXT4_SB(sb)->s_mb_buddies_generated++;
61         EXT4_SB(sb)->s_mb_generation_time += period;
62         spin_unlock(&EXT4_SB(sb)->s_bal_lock);
63 +
64 +       return 0;
65  }
66  
67  /* The buddy information is attached the buddy cache inode
68 @@ -898,7 +905,7 @@ static int ext4_mb_init_cache(struct pag
69  
70         err = 0;
71         first_block = page->index * blocks_per_page;
72 -       for (i = 0; i < blocks_per_page; i++) {
73 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
74                 int group;
75  
76                 group = (first_block + i) >> 1;
77 @@ -939,7 +946,7 @@ static int ext4_mb_init_cache(struct pag
78                         ext4_lock_group(sb, group);
79                         /* init the buddy */
80                         memset(data, 0xff, blocksize);
81 -                       ext4_mb_generate_buddy(sb, data, incore, group);
82 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
83                         ext4_unlock_group(sb, group);
84                         incore = NULL;
85                 } else {
86 @@ -954,7 +961,7 @@ static int ext4_mb_init_cache(struct pag
87                         memcpy(data, bitmap, blocksize);
88  
89                         /* mark all preallocated blks used in in-core bitmap */
90 -                       ext4_mb_generate_from_pa(sb, data, group);
91 +                       err = ext4_mb_generate_from_pa(sb, data, group);
92                         ext4_mb_generate_from_freelist(sb, data, group);
93                         ext4_unlock_group(sb, group);
94  
95 @@ -964,7 +971,8 @@ static int ext4_mb_init_cache(struct pag
96                         incore = data;
97                 }
98         }
99 -       SetPageUptodate(page);
100 +       if (likely(err == 0))
101 +               SetPageUptodate(page);
102  
103  out:
104         if (bh) {
105 @@ -2148,9 +2156,11 @@ static void *ext4_mb_seq_groups_next(str
106  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
107  {
108         struct super_block *sb = seq->private;
109 +       struct ext4_group_desc *gdp;
110         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
111         int i;
112         int err;
113 +       int free = 0;
114         struct ext4_buddy e4b;
115         struct sg {
116                 struct ext4_group_info info;
117 @@ -2159,10 +2169,10 @@ static int ext4_mb_seq_groups_show(struc
118  
119         group--;
120         if (group == 0)
121 -               seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
122 +               seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s"
123                                 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
124                                   "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
125 -                          "group", "free", "frags", "first",
126 +                          "group", "free", "free", "frags", "first", "pa",
127                            "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
128                            "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
129  
130 @@ -2173,13 +2183,20 @@ static int ext4_mb_seq_groups_show(struc
131                 seq_printf(seq, "#%-5u: I/O error\n", group);
132                 return 0;
133         }
134 +
135 +       gdp = ext4_get_group_desc(sb, group, NULL);
136 +       if (gdp != NULL)
137 +               free = ext4_free_blks_count(sb, gdp);
138 +
139         ext4_lock_group(sb, group);
140         memcpy(&sg, ext4_get_group_info(sb, group), i);
141         ext4_unlock_group(sb, group);
142         ext4_mb_unload_buddy(&e4b);
143  
144 -       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
145 -                       sg.info.bb_fragments, sg.info.bb_first_free);
146 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
147 +                       (long unsigned int)group, sg.info.bb_free, free,
148 +                       sg.info.bb_fragments, sg.info.bb_first_free,
149 +                       sg.info.bb_prealloc_nr);
150         for (i = 0; i <= 13; i++)
151                 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
152                                 sg.info.bb_counters[i] : 0);
153 @@ -3417,23 +3434,72 @@ static void ext4_mb_generate_from_freeli
154  }
155  
156  /*
157 + * check free blocks in bitmap match free block in group descriptor
158 + * do this before taking preallocated blocks into account to be able
159 + * to detect on-disk corruptions. The group lock should be hold by the
160 + * caller.
161 + */
162 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
163 +                               struct ext4_group_desc *gdp, int group)
164 +{
165 +       unsigned short max = EXT4_BLOCKS_PER_GROUP(sb);
166 +       unsigned short i, first, free = 0;
167 +       unsigned short free_in_gdp = ext4_free_blks_count(sb, gdp);
168 +
169 +       if (free_in_gdp == 0 && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
170 +               return 0;
171 +
172 +       i = mb_find_next_zero_bit(bitmap, max, 0);
173 +
174 +       while (i < max) {
175 +               first = i;
176 +               i = mb_find_next_bit(bitmap, max, i);
177 +               if (i > max)
178 +                       i = max;
179 +               free += i - first;
180 +               if (i < max)
181 +                       i = mb_find_next_zero_bit(bitmap, max, i);
182 +       }
183 +
184 +       if (free != free_in_gdp) {
185 +               ext4_error(sb, "on-disk bitmap for group %d"
186 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
187 +                       group, free, free_in_gdp);
188 +               return -EIO;
189 +       }
190 +       return 0;
191 +}
192 +
193 +/*
194   * the function goes through all preallocation in this group and marks them
195   * used in in-core bitmap. buddy must be generated from this bitmap
196   * Need to be called with ext4 group lock held
197   */
198  static noinline_for_stack
199 -void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
200 +int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
201                                         ext4_group_t group)
202  {
203         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
204         struct ext4_prealloc_space *pa;
205 +       struct ext4_group_desc *gdp;
206         struct list_head *cur;
207         ext4_group_t groupnr;
208         ext4_grpblk_t start;
209         int preallocated = 0;
210         int count = 0;
211 +       int skip = 0;
212 +       int err;
213         int len;
214  
215 +       gdp = ext4_get_group_desc (sb, group, NULL);
216 +       if (gdp == NULL)
217 +               return -EIO;
218 +
219 +       /* before applying preallocations, check bitmap consistency */
220 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
221 +       if (err)
222 +               return err;
223 +
224         /* all form of preallocation discards first load group,
225          * so the only competing code is preallocation use.
226          * we don't need any locking here
227 @@ -3449,14 +3511,23 @@ void ext4_mb_generate_from_pa(struct sup
228                                              &groupnr, &start);
229                 len = pa->pa_len;
230                 spin_unlock(&pa->pa_lock);
231 -               if (unlikely(len == 0))
232 +               if (unlikely(len == 0)) {
233 +                       skip++;
234                         continue;
235 +               }
236                 BUG_ON(groupnr != group);
237                 mb_set_bits(bitmap, start, len);
238                 preallocated += len;
239                 count++;
240         }
241 +       if (count + skip != grp->bb_prealloc_nr) {
242 +               ext4_error(sb, "lost preallocations: "
243 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
244 +                          count, grp->bb_prealloc_nr, skip);
245 +               return -EIO;
246 +       }
247         mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
248 +       return 0;
249  }
250  
251  static void ext4_mb_pa_callback(struct rcu_head *head)
252 @@ -3515,6 +3586,7 @@ static void ext4_mb_put_pa(struct ext4_a
253          */
254         ext4_lock_group(sb, grp);
255         list_del(&pa->pa_group_list);
256 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
257         ext4_unlock_group(sb, grp);
258  
259         spin_lock(pa->pa_obj_lock);
260 @@ -3606,6 +3678,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
261  
262         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
263         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
264 +       grp->bb_prealloc_nr++;
265         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
266  
267         spin_lock(pa->pa_obj_lock);
268 @@ -3667,6 +3740,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
269  
270         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
271         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
272 +       grp->bb_prealloc_nr++;
273         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
274  
275         /*
276 @@ -3835,6 +3909,8 @@ repeat:
277  
278                 spin_unlock(&pa->pa_lock);
279  
280 +               BUG_ON(grp->bb_prealloc_nr == 0);
281 +               grp->bb_prealloc_nr--;
282                 list_del(&pa->pa_group_list);
283                 list_add(&pa->u.pa_tmp_list, &list);
284         }
285 @@ -3968,7 +4044,7 @@ repeat:
286                 if (err) {
287                         ext4_error(sb, "Error loading buddy information for %u",
288                                         group);
289 -                       continue;
290 +                       return;
291                 }
292  
293                 bitmap_bh = ext4_read_block_bitmap(sb, group);
294 @@ -3980,6 +4056,8 @@ repeat:
295                 }
296  
297                 ext4_lock_group(sb, group);
298 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
299 +               e4b.bd_info->bb_prealloc_nr--;
300                 list_del(&pa->pa_group_list);
301                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
302                 ext4_unlock_group(sb, group);
303 @@ -4240,6 +4318,7 @@ ext4_mb_discard_lg_preallocations(struct
304                 }
305                 ext4_lock_group(sb, group);
306                 list_del(&pa->pa_group_list);
307 +               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
308                 ext4_mb_release_group_pa(&e4b, pa);
309                 ext4_unlock_group(sb, group);
310
311 --- a/fs/ext4/mballoc.h
312 +++ b/fs/ext4/mballoc.h
313 @@ -87,7 +87,7 @@ extern u8 mb_enable_debug;
314  /*
315   * for which requests use 2^N search using buddies
316   */
317 -#define MB_DEFAULT_ORDER2_REQS         2
318 +#define MB_DEFAULT_ORDER2_REQS         8
319  
320  /*
321   * default group prealloc size 512 blocks