Whamcloud - gitweb
LU-14499 lnet: Revert "LU-13368 lnet: discard the callback"
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ubuntu18 / ext4-mballoc-extra-checks.patch
1 commit f2f28f1d09c0a00b3fc569422f881931d857fac9
2 Author:     Alex Zhuravlev <alex.zhuravlev@sun.com>
3 AuthorDate: Tue Oct 28 17:59:09 2008 +0000
4 Subject: ext4: detect on-disk corruption of block bitmap
5
6 Detect on-disk corruption of block bitmap and better checking of
7 preallocated blocks.
8
9 Bugzilla-ID: b=16680
10 Signed-off-by: Alex Zhuravlev <alex.zhuravlev@sun.com>
11 Reviewed-by: Kalpak Shah <kalpak.shah@sun.com>
12 Signed-off-by: Andreas Dilger <andreas.dilger@sun.com>
13
14 Index: linux-4.15.0/fs/ext4/ext4.h
15 ===================================================================
16 --- linux-4.15.0.orig/fs/ext4/ext4.h
17 +++ linux-4.15.0/fs/ext4/ext4.h
18 @@ -2874,6 +2874,7 @@ struct ext4_group_info {
19         ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
20         ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
21         struct          list_head bb_prealloc_list;
22 +       unsigned long   bb_prealloc_nr;
23  #ifdef DOUBLE_CHECK
24         void            *bb_bitmap;
25  #endif
26 Index: linux-4.15.0/fs/ext4/mballoc.c
27 ===================================================================
28 --- linux-4.15.0.orig/fs/ext4/mballoc.c
29 +++ linux-4.15.0/fs/ext4/mballoc.c
30 @@ -363,7 +363,7 @@ static const char * const ext4_groupinfo
31         "ext4_groupinfo_64k", "ext4_groupinfo_128k"
32  };
33  
34 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
35 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
36                                         ext4_group_t group);
37  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
38                                                 ext4_group_t group);
39 @@ -717,7 +717,7 @@ mb_set_largest_free_order(struct super_b
40  }
41  
42  static noinline_for_stack
43 -void ext4_mb_generate_buddy(struct super_block *sb,
44 +int ext4_mb_generate_buddy(struct super_block *sb,
45                                 void *buddy, void *bitmap, ext4_group_t group)
46  {
47         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
48 @@ -749,19 +749,13 @@ void ext4_mb_generate_buddy(struct super
49         grp->bb_fragments = fragments;
50  
51         if (free != grp->bb_free) {
52 -               ext4_grp_locked_error(sb, group, 0, 0,
53 -                                     "block bitmap and bg descriptor "
54 -                                     "inconsistent: %u vs %u free clusters",
55 -                                     free, grp->bb_free);
56 -               /*
57 -                * If we intend to continue, we consider group descriptor
58 -                * corrupt and update bb_free using bitmap value
59 -                */
60 -               grp->bb_free = free;
61 -               if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
62 -                       percpu_counter_sub(&sbi->s_freeclusters_counter,
63 -                                          grp->bb_free);
64 -               set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
65 +               struct ext4_group_desc *gdp;
66 +               gdp = ext4_get_group_desc(sb, group, NULL);
67 +               ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
68 +                       "%u in gd, %lu pa's\n", (long unsigned int)group,
69 +                       free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
70 +                       grp->bb_prealloc_nr);
71 +               return -EIO;
72         }
73         mb_set_largest_free_order(sb, grp);
74  
75 @@ -772,6 +766,8 @@ void ext4_mb_generate_buddy(struct super
76         EXT4_SB(sb)->s_mb_buddies_generated++;
77         EXT4_SB(sb)->s_mb_generation_time += period;
78         spin_unlock(&EXT4_SB(sb)->s_bal_lock);
79 +
80 +       return 0;
81  }
82  
83  static void mb_regenerate_buddy(struct ext4_buddy *e4b)
84 @@ -892,7 +888,7 @@ static int ext4_mb_init_cache(struct pag
85         }
86  
87         first_block = page->index * blocks_per_page;
88 -       for (i = 0; i < blocks_per_page; i++) {
89 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
90                 group = (first_block + i) >> 1;
91                 if (group >= ngroups)
92                         break;
93 @@ -936,7 +932,7 @@ static int ext4_mb_init_cache(struct pag
94                         ext4_lock_group(sb, group);
95                         /* init the buddy */
96                         memset(data, 0xff, blocksize);
97 -                       ext4_mb_generate_buddy(sb, data, incore, group);
98 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
99                         ext4_unlock_group(sb, group);
100                         incore = NULL;
101                 } else {
102 @@ -951,7 +947,7 @@ static int ext4_mb_init_cache(struct pag
103                         memcpy(data, bitmap, blocksize);
104  
105                         /* mark all preallocated blks used in in-core bitmap */
106 -                       ext4_mb_generate_from_pa(sb, data, group);
107 +                       err = ext4_mb_generate_from_pa(sb, data, group);
108                         ext4_mb_generate_from_freelist(sb, data, group);
109                         ext4_unlock_group(sb, group);
110  
111 @@ -961,7 +957,8 @@ static int ext4_mb_init_cache(struct pag
112                         incore = data;
113                 }
114         }
115 -       SetPageUptodate(page);
116 +       if (likely(err == 0))
117 +               SetPageUptodate(page);
118  
119  out:
120         if (bh) {
121 @@ -2309,9 +2306,11 @@ static void *ext4_mb_seq_groups_next(str
122  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
123  {
124         struct super_block *sb = seq->private;
125 +       struct ext4_group_desc *gdp;
126         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
127         int i;
128         int err, buddy_loaded = 0;
129 +       int free = 0;
130         struct ext4_buddy e4b;
131         struct ext4_group_info *grinfo;
132         unsigned char blocksize_bits = min_t(unsigned char,
133 @@ -2324,7 +2323,7 @@ static int ext4_mb_seq_groups_show(struc
134  
135         group--;
136         if (group == 0)
137 -               seq_puts(seq, "#group: free  frags first ["
138 +               seq_puts(seq, "#group: bfree gfree frags first pa    ["
139                               " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
140                               " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
141  
142 @@ -2342,13 +2341,19 @@ static int ext4_mb_seq_groups_show(struc
143                 buddy_loaded = 1;
144         }
145  
146 +       gdp = ext4_get_group_desc(sb, group, NULL);
147 +       if (gdp != NULL)
148 +               free = ext4_free_group_clusters(sb, gdp);
149 +
150         memcpy(&sg, ext4_get_group_info(sb, group), i);
151  
152         if (buddy_loaded)
153                 ext4_mb_unload_buddy(&e4b);
154  
155 -       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
156 -                       sg.info.bb_fragments, sg.info.bb_first_free);
157 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
158 +                       (long unsigned int)group, sg.info.bb_free, free,
159 +                       sg.info.bb_fragments, sg.info.bb_first_free,
160 +                       sg.info.bb_prealloc_nr);
161         for (i = 0; i <= 13; i++)
162                 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
163                                 sg.info.bb_counters[i] : 0);
164 @@ -3656,22 +3661,71 @@ static void ext4_mb_generate_from_freeli
165  }
166  
167  /*
168 + * check free blocks in bitmap match free block in group descriptor
169 + * do this before taking preallocated blocks into account to be able
170 + * to detect on-disk corruptions. The group lock should be hold by the
171 + * caller.
172 + */
173 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
174 +                               struct ext4_group_desc *gdp, int group)
175 +{
176 +       unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
177 +       unsigned short i, first, free = 0;
178 +       unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
179 +
180 +       if (free_in_gdp == 0 && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
181 +               return 0;
182 +
183 +       i = mb_find_next_zero_bit(bitmap, max, 0);
184 +
185 +       while (i < max) {
186 +               first = i;
187 +               i = mb_find_next_bit(bitmap, max, i);
188 +               if (i > max)
189 +                       i = max;
190 +               free += i - first;
191 +               if (i < max)
192 +                       i = mb_find_next_zero_bit(bitmap, max, i);
193 +       }
194 +
195 +       if (free != free_in_gdp) {
196 +               ext4_error(sb, "on-disk bitmap for group %d"
197 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
198 +                       group, free, free_in_gdp);
199 +               return -EIO;
200 +       }
201 +       return 0;
202 +}
203 +
204 +/*
205   * the function goes through all preallocation in this group and marks them
206   * used in in-core bitmap. buddy must be generated from this bitmap
207   * Need to be called with ext4 group lock held
208   */
209  static noinline_for_stack
210 -void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
211 +int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
212                                         ext4_group_t group)
213  {
214         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
215         struct ext4_prealloc_space *pa;
216 +       struct ext4_group_desc *gdp;
217         struct list_head *cur;
218         ext4_group_t groupnr;
219         ext4_grpblk_t start;
220         int preallocated = 0;
221 +       int skip = 0, count = 0;
222 +       int err;
223         int len;
224  
225 +       gdp = ext4_get_group_desc(sb, group, NULL);
226 +       if (gdp == NULL)
227 +               return -EIO;
228 +
229 +       /* before applying preallocations, check bitmap consistency */
230 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
231 +       if (err)
232 +               return err;
233 +
234         /* all form of preallocation discards first load group,
235          * so the only competing code is preallocation use.
236          * we don't need any locking here
237 @@ -3687,13 +3741,23 @@ void ext4_mb_generate_from_pa(struct sup
238                                              &groupnr, &start);
239                 len = pa->pa_len;
240                 spin_unlock(&pa->pa_lock);
241 -               if (unlikely(len == 0))
242 +               if (unlikely(len == 0)) {
243 +                       skip++;
244                         continue;
245 +               }
246                 BUG_ON(groupnr != group);
247                 ext4_set_bits(bitmap, start, len);
248                 preallocated += len;
249 +               count++;
250 +       }
251 +       if (count + skip != grp->bb_prealloc_nr) {
252 +               ext4_error(sb, "lost preallocations: "
253 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
254 +                          count, grp->bb_prealloc_nr, skip);
255 +               return -EIO;
256         }
257         mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
258 +       return 0;
259  }
260  
261  static void ext4_mb_pa_callback(struct rcu_head *head)
262 @@ -3757,6 +3821,7 @@ static void ext4_mb_put_pa(struct ext4_a
263          */
264         ext4_lock_group(sb, grp);
265         list_del(&pa->pa_group_list);
266 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
267         ext4_unlock_group(sb, grp);
268  
269         spin_lock(pa->pa_obj_lock);
270 @@ -3851,6 +3916,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
271  
272         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
273         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
274 +       grp->bb_prealloc_nr++;
275         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
276  
277         spin_lock(pa->pa_obj_lock);
278 @@ -3912,6 +3978,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
279  
280         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
281         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
282 +       grp->bb_prealloc_nr++;
283         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
284  
285         /*
286 @@ -4084,6 +4151,8 @@ repeat:
287  
288                 spin_unlock(&pa->pa_lock);
289  
290 +               BUG_ON(grp->bb_prealloc_nr == 0);
291 +               grp->bb_prealloc_nr--;
292                 list_del(&pa->pa_group_list);
293                 list_add(&pa->u.pa_tmp_list, &list);
294         }
295 @@ -4214,7 +4283,7 @@ repeat:
296                 if (err) {
297                         ext4_error(sb, "Error %d loading buddy information for %u",
298                                    err, group);
299 -                       continue;
300 +                       return;
301                 }
302  
303                 bitmap_bh = ext4_read_block_bitmap(sb, group);
304 @@ -4227,6 +4296,8 @@ repeat:
305                 }
306  
307                 ext4_lock_group(sb, group);
308 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
309 +               e4b.bd_info->bb_prealloc_nr--;
310                 list_del(&pa->pa_group_list);
311                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
312                 ext4_unlock_group(sb, group);
313 @@ -4489,6 +4560,7 @@ ext4_mb_discard_lg_preallocations(struct
314                 }
315                 ext4_lock_group(sb, group);
316                 list_del(&pa->pa_group_list);
317 +               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
318                 ext4_mb_release_group_pa(&e4b, pa);
319                 ext4_unlock_group(sb, group);
320  
321 Index: linux-4.15.0/fs/ext4/mballoc.h
322 ===================================================================
323 --- linux-4.15.0.orig/fs/ext4/mballoc.h
324 +++ linux-4.15.0/fs/ext4/mballoc.h
325 @@ -70,7 +70,7 @@ do {                                                                  \
326  /*
327   * for which requests use 2^N search using buddies
328   */
329 -#define MB_DEFAULT_ORDER2_REQS         2
330 +#define MB_DEFAULT_ORDER2_REQS         8
331  
332  /*
333   * default group prealloc size 512 blocks