Whamcloud - gitweb
LU-11310 ldiskfs: Repair support for SUSE 15 GA and SP1
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / suse15 / ext4-mballoc-extra-checks.patch
1 ---
2  fs/ext4/ext4.h    |    1 
3  fs/ext4/mballoc.c |  123 ++++++++++++++++++++++++++++++++++++++++++------------
4  fs/ext4/mballoc.h |    2 
5  3 files changed, 99 insertions(+), 27 deletions(-)
6
7 --- a/fs/ext4/ext4.h
8 +++ b/fs/ext4/ext4.h
9 @@ -2911,6 +2911,7 @@ struct ext4_group_info {
10         ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
11         ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
12         struct          list_head bb_prealloc_list;
13 +       unsigned long   bb_prealloc_nr;
14  #ifdef DOUBLE_CHECK
15         void            *bb_bitmap;
16  #endif
17 --- a/fs/ext4/mballoc.c
18 +++ b/fs/ext4/mballoc.c
19 @@ -364,7 +364,7 @@ static const char * const ext4_groupinfo
20         "ext4_groupinfo_64k", "ext4_groupinfo_128k"
21  };
22  
23 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
24 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
25                                         ext4_group_t group);
26  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
27                                                 ext4_group_t group);
28 @@ -720,11 +720,10 @@ mb_set_largest_free_order(struct super_b
29  }
30  
31  static noinline_for_stack
32 -void ext4_mb_generate_buddy(struct super_block *sb,
33 +int ext4_mb_generate_buddy(struct super_block *sb,
34                                 void *buddy, void *bitmap, ext4_group_t group)
35  {
36         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
37 -       struct ext4_sb_info *sbi = EXT4_SB(sb);
38         ext4_grpblk_t max = EXT4_CLUSTERS_PER_GROUP(sb);
39         ext4_grpblk_t i = 0;
40         ext4_grpblk_t first;
41 @@ -753,19 +752,13 @@ void ext4_mb_generate_buddy(struct super
42         grp->bb_fragments = fragments;
43  
44         if (free != grp->bb_free) {
45 -               ext4_grp_locked_error(sb, group, 0, 0,
46 -                                     "block bitmap and bg descriptor "
47 -                                     "inconsistent: %u vs %u free clusters",
48 -                                     free, grp->bb_free);
49 -               /*
50 -                * If we intend to continue, we consider group descriptor
51 -                * corrupt and update bb_free using bitmap value
52 -                */
53 -               grp->bb_free = free;
54 -               if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
55 -                       percpu_counter_sub(&sbi->s_freeclusters_counter,
56 -                                          grp->bb_free);
57 -               set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
58 +               struct ext4_group_desc *gdp;
59 +               gdp = ext4_get_group_desc(sb, group, NULL);
60 +               ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
61 +                       "%u in gd, %lu pa's\n", (long unsigned int)group,
62 +                       free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
63 +                       grp->bb_prealloc_nr);
64 +               return -EIO;
65         }
66         mb_set_largest_free_order(sb, grp);
67  
68 @@ -776,6 +769,8 @@ void ext4_mb_generate_buddy(struct super
69         EXT4_SB(sb)->s_mb_buddies_generated++;
70         EXT4_SB(sb)->s_mb_generation_time += period;
71         spin_unlock(&EXT4_SB(sb)->s_bal_lock);
72 +
73 +       return 0;
74  }
75  
76  static void mb_regenerate_buddy(struct ext4_buddy *e4b)
77 @@ -896,7 +891,7 @@ static int ext4_mb_init_cache(struct pag
78         }
79  
80         first_block = page->index * blocks_per_page;
81 -       for (i = 0; i < blocks_per_page; i++) {
82 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
83                 group = (first_block + i) >> 1;
84                 if (group >= ngroups)
85                         break;
86 @@ -940,7 +935,7 @@ static int ext4_mb_init_cache(struct pag
87                         ext4_lock_group(sb, group);
88                         /* init the buddy */
89                         memset(data, 0xff, blocksize);
90 -                       ext4_mb_generate_buddy(sb, data, incore, group);
91 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
92                         ext4_unlock_group(sb, group);
93                         incore = NULL;
94                 } else {
95 @@ -955,7 +950,7 @@ static int ext4_mb_init_cache(struct pag
96                         memcpy(data, bitmap, blocksize);
97  
98                         /* mark all preallocated blks used in in-core bitmap */
99 -                       ext4_mb_generate_from_pa(sb, data, group);
100 +                       err = ext4_mb_generate_from_pa(sb, data, group);
101                         ext4_mb_generate_from_freelist(sb, data, group);
102                         ext4_unlock_group(sb, group);
103  
104 @@ -965,7 +960,8 @@ static int ext4_mb_init_cache(struct pag
105                         incore = data;
106                 }
107         }
108 -       SetPageUptodate(page);
109 +       if (likely(err == 0))
110 +               SetPageUptodate(page);
111  
112  out:
113         if (bh) {
114 @@ -2314,9 +2310,11 @@ static void *ext4_mb_seq_groups_next(str
115  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
116  {
117         struct super_block *sb = seq->private;
118 +       struct ext4_group_desc *gdp;
119         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
120         int i;
121         int err, buddy_loaded = 0;
122 +       int free = 0;
123         struct ext4_buddy e4b;
124         struct ext4_group_info *grinfo;
125         struct sg {
126 @@ -2326,7 +2324,7 @@ static int ext4_mb_seq_groups_show(struc
127  
128         group--;
129         if (group == 0)
130 -               seq_puts(seq, "#group: free  frags first ["
131 +               seq_puts(seq, "#group: bfree gfree frags first pa    ["
132                               " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
133                               " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
134  
135 @@ -2343,13 +2341,19 @@ static int ext4_mb_seq_groups_show(struc
136                 buddy_loaded = 1;
137         }
138  
139 +       gdp = ext4_get_group_desc(sb, group, NULL);
140 +       if (gdp != NULL)
141 +               free = ext4_free_group_clusters(sb, gdp);
142 +
143         memcpy(&sg, ext4_get_group_info(sb, group), i);
144  
145         if (buddy_loaded)
146                 ext4_mb_unload_buddy(&e4b);
147  
148 -       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
149 -                       sg.info.bb_fragments, sg.info.bb_first_free);
150 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
151 +                       (long unsigned int)group, sg.info.bb_free, free,
152 +                       sg.info.bb_fragments, sg.info.bb_first_free,
153 +                       sg.info.bb_prealloc_nr);
154         for (i = 0; i <= 13; i++)
155                 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
156                                 sg.info.bb_counters[i] : 0);
157 @@ -3617,22 +3621,71 @@ static void ext4_mb_generate_from_freeli
158  }
159  
160  /*
161 + * check free blocks in bitmap match free block in group descriptor
162 + * do this before taking preallocated blocks into account to be able
163 + * to detect on-disk corruptions. The group lock should be hold by the
164 + * caller.
165 + */
166 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
167 +                               struct ext4_group_desc *gdp, int group)
168 +{
169 +       unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
170 +       unsigned short i, first, free = 0;
171 +       unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
172 +
173 +       if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
174 +               return 0;
175 +
176 +       i = mb_find_next_zero_bit(bitmap, max, 0);
177 +
178 +       while (i < max) {
179 +               first = i;
180 +               i = mb_find_next_bit(bitmap, max, i);
181 +               if (i > max)
182 +                       i = max;
183 +               free += i - first;
184 +               if (i < max)
185 +                       i = mb_find_next_zero_bit(bitmap, max, i);
186 +       }
187 +
188 +       if (free != free_in_gdp) {
189 +               ext4_error(sb, "on-disk bitmap for group %d"
190 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
191 +                       group, free, free_in_gdp);
192 +               return -EIO;
193 +       }
194 +       return 0;
195 +}
196 +
197 +/*
198   * the function goes through all preallocation in this group and marks them
199   * used in in-core bitmap. buddy must be generated from this bitmap
200   * Need to be called with ext4 group lock held
201   */
202  static noinline_for_stack
203 -void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
204 +int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
205                                         ext4_group_t group)
206  {
207         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
208         struct ext4_prealloc_space *pa;
209 +       struct ext4_group_desc *gdp;
210         struct list_head *cur;
211         ext4_group_t groupnr;
212         ext4_grpblk_t start;
213         int preallocated = 0;
214 +       int skip = 0, count = 0;
215 +       int err;
216         int len;
217  
218 +       gdp = ext4_get_group_desc(sb, group, NULL);
219 +       if (gdp == NULL)
220 +               return -EIO;
221 +
222 +       /* before applying preallocations, check bitmap consistency */
223 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
224 +       if (err)
225 +               return err;
226 +
227         /* all form of preallocation discards first load group,
228          * so the only competing code is preallocation use.
229          * we don't need any locking here
230 @@ -3648,13 +3701,23 @@ void ext4_mb_generate_from_pa(struct sup
231                                              &groupnr, &start);
232                 len = pa->pa_len;
233                 spin_unlock(&pa->pa_lock);
234 -               if (unlikely(len == 0))
235 +               if (unlikely(len == 0)) {
236 +                       skip++;
237                         continue;
238 +               }
239                 BUG_ON(groupnr != group);
240                 ext4_set_bits(bitmap, start, len);
241                 preallocated += len;
242 +               count++;
243 +       }
244 +       if (count + skip != grp->bb_prealloc_nr) {
245 +               ext4_error(sb, "lost preallocations: "
246 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
247 +                          count, grp->bb_prealloc_nr, skip);
248 +               return -EIO;
249         }
250         mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
251 +       return 0;
252  }
253  
254  static void ext4_mb_pa_callback(struct rcu_head *head)
255 @@ -3718,6 +3781,7 @@ static void ext4_mb_put_pa(struct ext4_a
256          */
257         ext4_lock_group(sb, grp);
258         list_del(&pa->pa_group_list);
259 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
260         ext4_unlock_group(sb, grp);
261  
262         spin_lock(pa->pa_obj_lock);
263 @@ -3812,6 +3876,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
264  
265         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
266         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
267 +       grp->bb_prealloc_nr++;
268         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
269  
270         spin_lock(pa->pa_obj_lock);
271 @@ -3873,6 +3938,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
272  
273         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
274         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
275 +       grp->bb_prealloc_nr++;
276         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
277  
278         /*
279 @@ -4045,6 +4111,8 @@ repeat:
280  
281                 spin_unlock(&pa->pa_lock);
282  
283 +               BUG_ON(grp->bb_prealloc_nr == 0);
284 +               grp->bb_prealloc_nr--;
285                 list_del(&pa->pa_group_list);
286                 list_add(&pa->u.pa_tmp_list, &list);
287         }
288 @@ -4175,7 +4243,7 @@ repeat:
289                 if (err) {
290                         ext4_error(sb, "Error %d loading buddy information for %u",
291                                    err, group);
292 -                       continue;
293 +                       return;
294                 }
295  
296                 bitmap_bh = ext4_read_block_bitmap(sb, group);
297 @@ -4188,6 +4256,8 @@ repeat:
298                 }
299  
300                 ext4_lock_group(sb, group);
301 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
302 +               e4b.bd_info->bb_prealloc_nr--;
303                 list_del(&pa->pa_group_list);
304                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
305                 ext4_unlock_group(sb, group);
306 @@ -4450,6 +4520,7 @@ ext4_mb_discard_lg_preallocations(struct
307                 }
308                 ext4_lock_group(sb, group);
309                 list_del(&pa->pa_group_list);
310 +               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
311                 ext4_mb_release_group_pa(&e4b, pa);
312                 ext4_unlock_group(sb, group);
313  
314 --- a/fs/ext4/mballoc.h
315 +++ b/fs/ext4/mballoc.h
316 @@ -69,7 +69,7 @@ do {                                                                  \
317  /*
318   * for which requests use 2^N search using buddies
319   */
320 -#define MB_DEFAULT_ORDER2_REQS         2
321 +#define MB_DEFAULT_ORDER2_REQS         8
322  
323  /*
324   * default group prealloc size 512 blocks