Whamcloud - gitweb
LU-10837 ldiskfs: skip bitmap check if block bitmap is uninitialized
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel7 / ext4-mballoc-extra-checks.patch
1 Index: linux-3.10.0-123.el7.x86_64/fs/ext4/ext4.h
2 ===================================================================
3 --- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/ext4.h
4 +++ linux-3.10.0-123.el7.x86_64/fs/ext4/ext4.h
5 @@ -2391,6 +2391,7 @@ struct ext4_group_info {
6         ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
7         ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
8         struct          list_head bb_prealloc_list;
9 +       unsigned long   bb_prealloc_nr;
10  #ifdef DOUBLE_CHECK
11         void            *bb_bitmap;
12  #endif
13 Index: linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.c
14 ===================================================================
15 --- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/mballoc.c
16 +++ linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.c
17 @@ -362,7 +362,7 @@ static const char *ext4_groupinfo_slab_n
18         "ext4_groupinfo_64k", "ext4_groupinfo_128k"
19  };
20  
21 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
22 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
23                                         ext4_group_t group);
24  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
25                                                 ext4_group_t group);
26 @@ -718,7 +718,7 @@ mb_set_largest_free_order(struct super_b
27  }
28  
29  static noinline_for_stack
30 -void ext4_mb_generate_buddy(struct super_block *sb,
31 +int ext4_mb_generate_buddy(struct super_block *sb,
32                                 void *buddy, void *bitmap, ext4_group_t group)
33  {
34         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
35 @@ -750,19 +750,13 @@ void ext4_mb_generate_buddy(struct super
36         grp->bb_fragments = fragments;
37  
38         if (free != grp->bb_free) {
39 -               ext4_grp_locked_error(sb, group, 0, 0,
40 -                                     "block bitmap and bg descriptor "
41 -                                     "inconsistent: %u vs %u free clusters",
42 -                                     free, grp->bb_free);
43 -               /*
44 -                * If we intend to continue, we consider group descriptor
45 -                * corrupt and update bb_free using bitmap value
46 -                */
47 -               grp->bb_free = free;
48 -               if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
49 -                       percpu_counter_sub(&sbi->s_freeclusters_counter,
50 -                                          grp->bb_free);
51 -               set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &grp->bb_state);
52 +               struct ext4_group_desc *gdp;
53 +               gdp = ext4_get_group_desc(sb, group, NULL);
54 +               ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
55 +                       "%u in gd, %lu pa's\n", (long unsigned int)group,
56 +                       free, grp->bb_free, ext4_free_group_clusters(sb, gdp),
57 +                       grp->bb_prealloc_nr);
58 +               return -EIO;
59         }
60         mb_set_largest_free_order(sb, grp);
61  
62 @@ -768,6 +767,8 @@ void ext4_mb_generate_buddy(struct super
63         EXT4_SB(sb)->s_mb_buddies_generated++;
64         EXT4_SB(sb)->s_mb_generation_time += period;
65         spin_unlock(&EXT4_SB(sb)->s_bal_lock);
66 +
67 +       return 0;
68  }
69  
70  static void mb_regenerate_buddy(struct ext4_buddy *e4b)
71 @@ -883,7 +884,7 @@ static int ext4_mb_init_cache(struct pag
72         }
73  
74         first_block = page->index * blocks_per_page;
75 -       for (i = 0; i < blocks_per_page; i++) {
76 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
77                 group = (first_block + i) >> 1;
78                 if (group >= ngroups)
79                         break;
80 @@ -922,7 +923,7 @@ static int ext4_mb_init_cache(struct pag
81                         ext4_lock_group(sb, group);
82                         /* init the buddy */
83                         memset(data, 0xff, blocksize);
84 -                       ext4_mb_generate_buddy(sb, data, incore, group);
85 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
86                         ext4_unlock_group(sb, group);
87                         incore = NULL;
88                 } else {
89 @@ -937,7 +938,7 @@ static int ext4_mb_init_cache(struct pag
90                         memcpy(data, bitmap, blocksize);
91  
92                         /* mark all preallocated blks used in in-core bitmap */
93 -                       ext4_mb_generate_from_pa(sb, data, group);
94 +                       err = ext4_mb_generate_from_pa(sb, data, group);
95                         ext4_mb_generate_from_freelist(sb, data, group);
96                         ext4_unlock_group(sb, group);
97  
98 @@ -947,7 +948,8 @@ static int ext4_mb_init_cache(struct pag
99                         incore = data;
100                 }
101         }
102 -       SetPageUptodate(page);
103 +       if (likely(err == 0))
104 +               SetPageUptodate(page);
105  
106  out:
107         if (bh) {
108 @@ -2224,9 +2226,11 @@ static void *ext4_mb_seq_groups_next(str
109  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
110  {
111         struct super_block *sb = seq->private;
112 +       struct ext4_group_desc *gdp;
113         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
114         int i;
115         int err, buddy_loaded = 0;
116 +       int free = 0;
117         struct ext4_buddy e4b;
118         struct ext4_group_info *grinfo;
119         struct sg {
120 @@ -2236,10 +2240,10 @@ static int ext4_mb_seq_groups_show(struc
121  
122         group--;
123         if (group == 0)
124 -               seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
125 +               seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s"
126                                 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
127                                   "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
128 -                          "group", "free", "frags", "first",
129 +                          "group", "bfree", "gfree", "frags", "first", "pa",
130                            "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
131                            "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
132  
133 @@ -2256,13 +2260,19 @@ static int ext4_mb_seq_groups_show(struc
134                 buddy_loaded = 1;
135         }
136  
137 +       gdp = ext4_get_group_desc(sb, group, NULL);
138 +       if (gdp != NULL)
139 +               free = ext4_free_group_clusters(sb, gdp);
140 +
141         memcpy(&sg, ext4_get_group_info(sb, group), i);
142  
143         if (buddy_loaded)
144                 ext4_mb_unload_buddy(&e4b);
145  
146 -       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
147 -                       sg.info.bb_fragments, sg.info.bb_first_free);
148 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
149 +                       (long unsigned int)group, sg.info.bb_free, free,
150 +                       sg.info.bb_fragments, sg.info.bb_first_free,
151 +                       sg.info.bb_prealloc_nr);
152         for (i = 0; i <= 13; i++)
153                 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
154                                 sg.info.bb_counters[i] : 0);
155 @@ -3507,22 +3517,71 @@ static void ext4_mb_generate_from_freeli
156  }
157  
158  /*
159 + * check free blocks in bitmap match free block in group descriptor
160 + * do this before taking preallocated blocks into account to be able
161 + * to detect on-disk corruptions. The group lock should be hold by the
162 + * caller.
163 + */
164 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
165 +                               struct ext4_group_desc *gdp, int group)
166 +{
167 +       unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
168 +       unsigned short i, first, free = 0;
169 +       unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
170 +
171 +       if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
172 +               return 0;
173 +
174 +       i = mb_find_next_zero_bit(bitmap, max, 0);
175 +
176 +       while (i < max) {
177 +               first = i;
178 +               i = mb_find_next_bit(bitmap, max, i);
179 +               if (i > max)
180 +                       i = max;
181 +               free += i - first;
182 +               if (i < max)
183 +                       i = mb_find_next_zero_bit(bitmap, max, i);
184 +       }
185 +
186 +       if (free != free_in_gdp) {
187 +               ext4_error(sb, "on-disk bitmap for group %d"
188 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
189 +                       group, free, free_in_gdp);
190 +               return -EIO;
191 +       }
192 +       return 0;
193 +}
194 +
195 +/*
196   * the function goes through all preallocation in this group and marks them
197   * used in in-core bitmap. buddy must be generated from this bitmap
198   * Need to be called with ext4 group lock held
199   */
200  static noinline_for_stack
201 -void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
202 +int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
203                                         ext4_group_t group)
204  {
205         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
206         struct ext4_prealloc_space *pa;
207 +       struct ext4_group_desc *gdp;
208         struct list_head *cur;
209         ext4_group_t groupnr;
210         ext4_grpblk_t start;
211         int preallocated = 0;
212 +       int skip = 0, count = 0;
213 +       int err;
214         int len;
215  
216 +       gdp = ext4_get_group_desc(sb, group, NULL);
217 +       if (gdp == NULL)
218 +               return -EIO;
219 +
220 +       /* before applying preallocations, check bitmap consistency */
221 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
222 +       if (err)
223 +               return err;
224 +
225         /* all form of preallocation discards first load group,
226          * so the only competing code is preallocation use.
227          * we don't need any locking here
228 @@ -3538,13 +3593,23 @@ void ext4_mb_generate_from_pa(struct sup
229                                              &groupnr, &start);
230                 len = pa->pa_len;
231                 spin_unlock(&pa->pa_lock);
232 -               if (unlikely(len == 0))
233 +               if (unlikely(len == 0)) {
234 +                       skip++;
235                         continue;
236 +               }
237                 BUG_ON(groupnr != group);
238                 ext4_set_bits(bitmap, start, len);
239                 preallocated += len;
240 +               count++;
241 +       }
242 +       if (count + skip != grp->bb_prealloc_nr) {
243 +               ext4_error(sb, "lost preallocations: "
244 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
245 +                          count, grp->bb_prealloc_nr, skip);
246 +               return -EIO;
247         }
248         mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
249 +       return 0;
250  }
251  
252  static void ext4_mb_pa_callback(struct rcu_head *head)
253 @@ -3603,6 +3668,7 @@ static void ext4_mb_put_pa(struct ext4_a
254          */
255         ext4_lock_group(sb, grp);
256         list_del(&pa->pa_group_list);
257 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
258         ext4_unlock_group(sb, grp);
259  
260         spin_lock(pa->pa_obj_lock);
261 @@ -3697,6 +3763,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
262  
263         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
264         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
265 +       grp->bb_prealloc_nr++;
266         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
267  
268         spin_lock(pa->pa_obj_lock);
269 @@ -3758,6 +3825,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
270  
271         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
272         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
273 +       grp->bb_prealloc_nr++;
274         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
275  
276         /*
277 @@ -3927,6 +3995,8 @@ repeat:
278  
279                 spin_unlock(&pa->pa_lock);
280  
281 +               BUG_ON(grp->bb_prealloc_nr == 0);
282 +               grp->bb_prealloc_nr--;
283                 list_del(&pa->pa_group_list);
284                 list_add(&pa->u.pa_tmp_list, &list);
285         }
286 @@ -4056,7 +4126,7 @@ repeat:
287                 if (err) {
288                         ext4_error(sb, "Error loading buddy information for %u",
289                                         group);
290 -                       continue;
291 +                       return;
292                 }
293  
294                 bitmap_bh = ext4_read_block_bitmap(sb, group);
295 @@ -4068,6 +4138,8 @@ repeat:
296                 }
297  
298                 ext4_lock_group(sb, group);
299 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
300 +               e4b.bd_info->bb_prealloc_nr--;
301                 list_del(&pa->pa_group_list);
302                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
303                 ext4_unlock_group(sb, group);
304 @@ -4328,6 +4400,7 @@ ext4_mb_discard_lg_preallocations(struct
305                 }
306                 ext4_lock_group(sb, group);
307                 list_del(&pa->pa_group_list);
308 +               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
309                 ext4_mb_release_group_pa(&e4b, pa);
310                 ext4_unlock_group(sb, group);
311  
312 Index: linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.h
313 ===================================================================
314 --- linux-3.10.0-123.el7.x86_64.orig/fs/ext4/mballoc.h
315 +++ linux-3.10.0-123.el7.x86_64/fs/ext4/mballoc.h
316 @@ -82,7 +82,7 @@ extern ushort ext4_mballoc_debug;
317  /*
318   * for which requests use 2^N search using buddies
319   */
320 -#define MB_DEFAULT_ORDER2_REQS         2
321 +#define MB_DEFAULT_ORDER2_REQS         8
322  
323  /*
324   * default group prealloc size 512 blocks