Whamcloud - gitweb
LU-17599 ldiskfs: restore ldiskfs patch attribution
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / suse15 / ext4-mballoc-extra-checks.patch
1 commit f2f28f1d09c0a00b3fc569422f881931d857fac9
2 Author:     Alex Zhuravlev <alex.zhuravlev@sun.com>
3 AuthorDate: Tue Oct 28 17:59:09 2008 +0000
4 Subject: ext4: detect on-disk corruption of block bitmap
5
6 Detect on-disk corruption of block bitmap and better checking of
7 preallocated blocks.
8
9 Bugzilla-ID: b=16680
10 Signed-off-by: Alex Zhuravlev <alex.zhuravlev@sun.com>
11 Reviewed-by: Kalpak Shah <kalpak.shah@sun.com>
12 Signed-off-by: Andreas Dilger <andreas.dilger@sun.com>
13 ---
14  fs/ext4/ext4.h    |    1 
15  fs/ext4/mballoc.c |  123 ++++++++++++++++++++++++++++++++++++++++++------------
16  fs/ext4/mballoc.h |    2 
17  3 files changed, 99 insertions(+), 27 deletions(-)
18
19 --- a/fs/ext4/ext4.h
20 +++ b/fs/ext4/ext4.h
21 @@ -2911,6 +2911,7 @@ struct ext4_group_info {
22         ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
23         ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
24         struct          list_head bb_prealloc_list;
25 +       unsigned long   bb_prealloc_nr;
26  #ifdef DOUBLE_CHECK
27         void            *bb_bitmap;
28  #endif
29 --- a/fs/ext4/mballoc.c
30 +++ b/fs/ext4/mballoc.c
31 @@ -364,7 +364,7 @@ static const char * const ext4_groupinfo
32         "ext4_groupinfo_64k", "ext4_groupinfo_128k"
33  };
34  
35 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
36 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
37                                         ext4_group_t group);
38  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
39                                                 ext4_group_t group);
40 @@ -720,11 +720,10 @@ mb_set_largest_free_order(struct super_b
41  }
42  
43  static noinline_for_stack
44 -void ext4_mb_generate_buddy(struct super_block *sb,
45 +int ext4_mb_generate_buddy(struct super_block *sb,
46                                 void *buddy, void *bitmap, ext4_group_t group)
47  {
48         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
49 -       struct ext4_sb_info *sbi = EXT4_SB(sb);
50         ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
51         ext4_grpblk_t i = 0;
52         ext4_grpblk_t first;
53 @@ -753,19 +752,13 @@ void ext4_mb_generate_buddy(struct super
54         grp->bb_fragments = fragments;
55  
56         if (free != grp->bb_free) {
57 -               ext4_grp_locked_error(sb, group, 0, 0,
58 -                                     "block bitmap and bg descriptor "
59 -                                     "inconsistent: %u vs %u free clusters",
60 -                                     free, grp->bb_free);
61 -               /*
62 -                * If we intend to continue, we consider group descriptor
63 -                * corrupt and update bb_free using bitmap value
64 -                */
65 -               grp->bb_free = free;
66 -               if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
67 -                       percpu_counter_sub(&sbi->s_freeclusters_counter,
68 -                                          grp->bb_free);
69 -               set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
70 +               struct ext4_group_desc *gdp;
71 +               gdp = ext4_get_group_desc(sb, group, NULL);
72 +               ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
73 +                       "%u in gd, %lu pa's\n", (long unsigned int)group,
74 +                       free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
75 +                       grp->bb_prealloc_nr);
76 +               return -EIO;
77         }
78         mb_set_largest_free_order(sb, grp);
79  
80 @@ -776,6 +769,8 @@ void ext4_mb_generate_buddy(struct super
81         EXT4_SB(sb)->s_mb_buddies_generated++;
82         EXT4_SB(sb)->s_mb_generation_time += period;
83         spin_unlock(&EXT4_SB(sb)->s_bal_lock);
84 +
85 +       return 0;
86  }
87  
88  static void mb_regenerate_buddy(struct ext4_buddy *e4b)
89 @@ -896,7 +891,7 @@ static int ext4_mb_init_cache(struct pag
90         }
91  
92         first_block = page->index * blocks_per_page;
93 -       for (i = 0; i < blocks_per_page; i++) {
94 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
95                 group = (first_block + i) >> 1;
96                 if (group >= ngroups)
97                         break;
98 @@ -940,7 +935,7 @@ static int ext4_mb_init_cache(struct pag
99                         ext4_lock_group(sb, group);
100                         /* init the buddy */
101                         memset(data, 0xff, blocksize);
102 -                       ext4_mb_generate_buddy(sb, data, incore, group);
103 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
104                         ext4_unlock_group(sb, group);
105                         incore = NULL;
106                 } else {
107 @@ -955,7 +950,7 @@ static int ext4_mb_init_cache(struct pag
108                         memcpy(data, bitmap, blocksize);
109  
110                         /* mark all preallocated blks used in in-core bitmap */
111 -                       ext4_mb_generate_from_pa(sb, data, group);
112 +                       err = ext4_mb_generate_from_pa(sb, data, group);
113                         ext4_mb_generate_from_freelist(sb, data, group);
114                         ext4_unlock_group(sb, group);
115  
116 @@ -965,7 +960,8 @@ static int ext4_mb_init_cache(struct pag
117                         incore = data;
118                 }
119         }
120 -       SetPageUptodate(page);
121 +       if (likely(err == 0))
122 +               SetPageUptodate(page);
123  
124  out:
125         if (bh) {
126 @@ -2314,9 +2310,11 @@ static void *ext4_mb_seq_groups_next(str
127  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
128  {
129         struct super_block *sb = seq->private;
130 +       struct ext4_group_desc *gdp;
131         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
132         int i;
133         int err, buddy_loaded = 0;
134 +       int free = 0;
135         struct ext4_buddy e4b;
136         struct ext4_group_info *grinfo;
137         struct sg {
138 @@ -2326,7 +2324,7 @@ static int ext4_mb_seq_groups_show(struc
139  
140         group--;
141         if (group == 0)
142 -               seq_puts(seq, "#group: free  frags first ["
143 +               seq_puts(seq, "#group: bfree gfree frags first pa    ["
144                               " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
145                               " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
146  
147 @@ -2343,13 +2341,19 @@ static int ext4_mb_seq_groups_show(struc
148                 buddy_loaded = 1;
149         }
150  
151 +       gdp = ext4_get_group_desc(sb, group, NULL);
152 +       if (gdp != NULL)
153 +               free = ext4_free_group_clusters(sb, gdp);
154 +
155         memcpy(&sg, ext4_get_group_info(sb, group), i);
156  
157         if (buddy_loaded)
158                 ext4_mb_unload_buddy(&e4b);
159  
160 -       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
161 -                       sg.info.bb_fragments, sg.info.bb_first_free);
162 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
163 +                       (long unsigned int)group, sg.info.bb_free, free,
164 +                       sg.info.bb_fragments, sg.info.bb_first_free,
165 +                       sg.info.bb_prealloc_nr);
166         for (i = 0; i <= 13; i++)
167                 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
168                                 sg.info.bb_counters[i] : 0);
169 @@ -3617,22 +3621,71 @@ static void ext4_mb_generate_from_freeli
170  }
171  
172  /*
173 + * check free blocks in bitmap match free block in group descriptor
174 + * do this before taking preallocated blocks into account to be able
175 + * to detect on-disk corruptions. The group lock should be hold by the
176 + * caller.
177 + */
178 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
179 +                               struct ext4_group_desc *gdp, int group)
180 +{
181 +       unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
182 +       unsigned short i, first, free = 0;
183 +       unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
184 +
185 +       if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
186 +               return 0;
187 +
188 +       i = mb_find_next_zero_bit(bitmap, max, 0);
189 +
190 +       while (i < max) {
191 +               first = i;
192 +               i = mb_find_next_bit(bitmap, max, i);
193 +               if (i > max)
194 +                       i = max;
195 +               free += i - first;
196 +               if (i < max)
197 +                       i = mb_find_next_zero_bit(bitmap, max, i);
198 +       }
199 +
200 +       if (free != free_in_gdp) {
201 +               ext4_error(sb, "on-disk bitmap for group %d"
202 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
203 +                       group, free, free_in_gdp);
204 +               return -EIO;
205 +       }
206 +       return 0;
207 +}
208 +
209 +/*
210   * the function goes through all preallocation in this group and marks them
211   * used in in-core bitmap. buddy must be generated from this bitmap
212   * Need to be called with ext4 group lock held
213   */
214  static noinline_for_stack
215 -void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
216 +int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
217                                         ext4_group_t group)
218  {
219         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
220         struct ext4_prealloc_space *pa;
221 +       struct ext4_group_desc *gdp;
222         struct list_head *cur;
223         ext4_group_t groupnr;
224         ext4_grpblk_t start;
225         int preallocated = 0;
226 +       int skip = 0, count = 0;
227 +       int err;
228         int len;
229  
230 +       gdp = ext4_get_group_desc(sb, group, NULL);
231 +       if (gdp == NULL)
232 +               return -EIO;
233 +
234 +       /* before applying preallocations, check bitmap consistency */
235 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
236 +       if (err)
237 +               return err;
238 +
239         /* all form of preallocation discards first load group,
240          * so the only competing code is preallocation use.
241          * we don't need any locking here
242 @@ -3648,13 +3701,23 @@ void ext4_mb_generate_from_pa(struct sup
243                                              &groupnr, &start);
244                 len = pa->pa_len;
245                 spin_unlock(&pa->pa_lock);
246 -               if (unlikely(len == 0))
247 +               if (unlikely(len == 0)) {
248 +                       skip++;
249                         continue;
250 +               }
251                 BUG_ON(groupnr != group);
252                 ext4_set_bits(bitmap, start, len);
253                 preallocated += len;
254 +               count++;
255 +       }
256 +       if (count + skip != grp->bb_prealloc_nr) {
257 +               ext4_error(sb, "lost preallocations: "
258 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
259 +                          count, grp->bb_prealloc_nr, skip);
260 +               return -EIO;
261         }
262         mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
263 +       return 0;
264  }
265  
266  static void ext4_mb_pa_callback(struct rcu_head *head)
267 @@ -3718,6 +3781,7 @@ static void ext4_mb_put_pa(struct ext4_a
268          */
269         ext4_lock_group(sb, grp);
270         list_del(&pa->pa_group_list);
271 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
272         ext4_unlock_group(sb, grp);
273  
274         spin_lock(pa->pa_obj_lock);
275 @@ -3812,6 +3876,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
276  
277         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
278         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
279 +       grp->bb_prealloc_nr++;
280         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
281  
282         spin_lock(pa->pa_obj_lock);
283 @@ -3873,6 +3938,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
284  
285         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
286         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
287 +       grp->bb_prealloc_nr++;
288         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
289  
290         /*
291 @@ -4045,6 +4111,8 @@ repeat:
292  
293                 spin_unlock(&pa->pa_lock);
294  
295 +               BUG_ON(grp->bb_prealloc_nr == 0);
296 +               grp->bb_prealloc_nr--;
297                 list_del(&pa->pa_group_list);
298                 list_add(&pa->u.pa_tmp_list, &list);
299         }
300 @@ -4175,7 +4243,7 @@ repeat:
301                 if (err) {
302                         ext4_error(sb, "Error %d loading buddy information for %u",
303                                    err, group);
304 -                       continue;
305 +                       return;
306                 }
307  
308                 bitmap_bh = ext4_read_block_bitmap(sb, group);
309 @@ -4188,6 +4256,8 @@ repeat:
310                 }
311  
312                 ext4_lock_group(sb, group);
313 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
314 +               e4b.bd_info->bb_prealloc_nr--;
315                 list_del(&pa->pa_group_list);
316                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
317                 ext4_unlock_group(sb, group);
318 @@ -4450,6 +4520,7 @@ ext4_mb_discard_lg_preallocations(struct
319                 }
320                 ext4_lock_group(sb, group);
321                 list_del(&pa->pa_group_list);
322 +               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
323                 ext4_mb_release_group_pa(&e4b, pa);
324                 ext4_unlock_group(sb, group);
325  
326 --- a/fs/ext4/mballoc.h
327 +++ b/fs/ext4/mballoc.h
328 @@ -69,7 +69,7 @@ do {                                                                  \
329  /*
330   * for which requests use 2^N search using buddies
331   */
332 -#define MB_DEFAULT_ORDER2_REQS         2
333 +#define MB_DEFAULT_ORDER2_REQS         8
334  
335  /*
336   * default group prealloc size 512 blocks