Whamcloud - gitweb
LU-13783 ldiskfs: Add support for mainline 5.8 kernel
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / linux-5.8 / ext4-mballoc-extra-checks.patch
1 ---
2  fs/ext4/ext4.h    |    1 
3  fs/ext4/mballoc.c |  103 +++++++++++++++++++++++++++++++++++++++++++++++-------
4  fs/ext4/mballoc.h |    2 -
5  3 files changed, 93 insertions(+), 13 deletions(-)
6
7 --- a/fs/ext4/ext4.h
8 +++ b/fs/ext4/ext4.h
9 @@ -3156,6 +3156,7 @@ struct ext4_group_info {
10         ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
11         ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
12         struct          list_head bb_prealloc_list;
13 +       unsigned long   bb_prealloc_nr;
14  #ifdef DOUBLE_CHECK
15         void            *bb_bitmap;
16  #endif
17 --- a/fs/ext4/mballoc.c
18 +++ b/fs/ext4/mballoc.c
19 @@ -345,7 +345,7 @@ static const char * const ext4_groupinfo
20         "ext4_groupinfo_64k", "ext4_groupinfo_128k"
21  };
22  
23 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
24 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
25                                         ext4_group_t group);
26  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
27                                                 ext4_group_t group);
28 @@ -769,7 +769,7 @@ mb_set_largest_free_order(struct super_b
29  }
30  
31  static noinline_for_stack
32 -void ext4_mb_generate_buddy(struct super_block *sb,
33 +int ext4_mb_generate_buddy(struct super_block *sb,
34                                 void *buddy, void *bitmap, ext4_group_t group)
35  {
36         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
37 @@ -813,6 +813,7 @@ void ext4_mb_generate_buddy(struct super
38                 grp->bb_free = free;
39                 ext4_mark_group_bitmap_corrupted(sb, group,
40                                         EXT4_GROUP_INFO_BBITMAP_CORRUPT);
41 +               return -EIO;
42         }
43         mb_set_largest_free_order(sb, grp);
44  
45 @@ -823,6 +824,8 @@ void ext4_mb_generate_buddy(struct super
46         sbi->s_mb_buddies_generated++;
47         sbi->s_mb_generation_time += period;
48         spin_unlock(&sbi->s_bal_lock);
49 +
50 +       return 0;
51  }
52  
53  static void mb_regenerate_buddy(struct ext4_buddy *e4b)
54 @@ -943,7 +946,7 @@ static int ext4_mb_init_cache(struct pag
55         }
56  
57         first_block = page->index * blocks_per_page;
58 -       for (i = 0; i < blocks_per_page; i++) {
59 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
60                 group = (first_block + i) >> 1;
61                 if (group >= ngroups)
62                         break;
63 @@ -987,7 +990,7 @@ static int ext4_mb_init_cache(struct pag
64                         ext4_lock_group(sb, group);
65                         /* init the buddy */
66                         memset(data, 0xff, blocksize);
67 -                       ext4_mb_generate_buddy(sb, data, incore, group);
68 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
69                         ext4_unlock_group(sb, group);
70                         incore = NULL;
71                 } else {
72 @@ -1002,7 +1005,7 @@ static int ext4_mb_init_cache(struct pag
73                         memcpy(data, bitmap, blocksize);
74  
75                         /* mark all preallocated blks used in in-core bitmap */
76 -                       ext4_mb_generate_from_pa(sb, data, group);
77 +                       err = ext4_mb_generate_from_pa(sb, data, group);
78                         ext4_mb_generate_from_freelist(sb, data, group);
79                         ext4_unlock_group(sb, group);
80  
81 @@ -1012,7 +1015,8 @@ static int ext4_mb_init_cache(struct pag
82                         incore = data;
83                 }
84         }
85 -       SetPageUptodate(page);
86 +       if (likely(err == 0))
87 +               SetPageUptodate(page);
88  
89  out:
90         if (bh) {
91 @@ -2396,9 +2400,11 @@ static void *ext4_mb_seq_groups_next(str
92  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
93  {
94         struct super_block *sb = PDE_DATA(file_inode(seq->file));
95 +       struct ext4_group_desc *gdp;
96         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
97         int i;
98         int err, buddy_loaded = 0;
99 +       int free = 0;
100         struct ext4_buddy e4b;
101         struct ext4_group_info *grinfo;
102         unsigned char blocksize_bits = min_t(unsigned char,
103 @@ -2411,7 +2417,7 @@ static int ext4_mb_seq_groups_show(struc
104  
105         group--;
106         if (group == 0)
107 -               seq_puts(seq, "#group: free  frags first ["
108 +               seq_puts(seq, "#group: bfree gfree frags first pa    ["
109                               " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
110                               " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
111  
112 @@ -2429,13 +2435,19 @@ static int ext4_mb_seq_groups_show(struc
113                 buddy_loaded = 1;
114         }
115  
116 +       gdp = ext4_get_group_desc(sb, group, NULL);
117 +       if (gdp != NULL)
118 +               free = ext4_free_group_clusters(sb, gdp);
119 +
120         memcpy(&sg, ext4_get_group_info(sb, group), i);
121  
122         if (buddy_loaded)
123                 ext4_mb_unload_buddy(&e4b);
124  
125 -       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
126 -                       sg.info.bb_fragments, sg.info.bb_first_free);
127 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
128 +                       (long unsigned int)group, sg.info.bb_free, free,
129 +                       sg.info.bb_fragments, sg.info.bb_first_free,
130 +                       sg.info.bb_prealloc_nr);
131         for (i = 0; i <= 13; i++)
132                 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
133                                 sg.info.bb_counters[i] : 0);
134 @@ -3742,22 +3754,71 @@ static void ext4_mb_generate_from_freeli
135  }
136  
137  /*
138 + * check free blocks in bitmap match free block in group descriptor
139 + * do this before taking preallocated blocks into account to be able
140 + * to detect on-disk corruptions. The group lock should be hold by the
141 + * caller.
142 + */
143 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
144 +                               struct ext4_group_desc *gdp, int group)
145 +{
146 +       unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
147 +       unsigned short i, first, free = 0;
148 +       unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
149 +
150 +       if (free_in_gdp == 0 && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
151 +               return 0;
152 +
153 +       i = mb_find_next_zero_bit(bitmap, max, 0);
154 +
155 +       while (i < max) {
156 +               first = i;
157 +               i = mb_find_next_bit(bitmap, max, i);
158 +               if (i > max)
159 +                       i = max;
160 +               free += i - first;
161 +               if (i < max)
162 +                       i = mb_find_next_zero_bit(bitmap, max, i);
163 +       }
164 +
165 +       if (free != free_in_gdp) {
166 +               ext4_error(sb, "on-disk bitmap for group %d"
167 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
168 +                       group, free, free_in_gdp);
169 +               return -EIO;
170 +       }
171 +       return 0;
172 +}
173 +
174 +/*
175   * the function goes through all preallocation in this group and marks them
176   * used in in-core bitmap. buddy must be generated from this bitmap
177   * Need to be called with ext4 group lock held
178   */
179  static noinline_for_stack
180 -void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
181 +int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
182                                         ext4_group_t group)
183  {
184         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
185         struct ext4_prealloc_space *pa;
186 +       struct ext4_group_desc *gdp;
187         struct list_head *cur;
188         ext4_group_t groupnr;
189         ext4_grpblk_t start;
190         int preallocated = 0;
191 +       int skip = 0, count = 0;
192 +       int err;
193         int len;
194  
195 +       gdp = ext4_get_group_desc(sb, group, NULL);
196 +       if (gdp == NULL)
197 +               return -EIO;
198 +
199 +       /* before applying preallocations, check bitmap consistency */
200 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
201 +       if (err)
202 +               return err;
203 +
204         /* all form of preallocation discards first load group,
205          * so the only competing code is preallocation use.
206          * we don't need any locking here
207 @@ -3773,13 +3834,23 @@ void ext4_mb_generate_from_pa(struct sup
208                                              &groupnr, &start);
209                 len = pa->pa_len;
210                 spin_unlock(&pa->pa_lock);
211 -               if (unlikely(len == 0))
212 +               if (unlikely(len == 0)) {
213 +                       skip++;
214                         continue;
215 +               }
216                 BUG_ON(groupnr != group);
217                 ext4_set_bits(bitmap, start, len);
218                 preallocated += len;
219 +               count++;
220 +       }
221 +       if (count + skip != grp->bb_prealloc_nr) {
222 +               ext4_error(sb, "lost preallocations: "
223 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
224 +                          count, grp->bb_prealloc_nr, skip);
225 +               return -EIO;
226         }
227         mb_debug(sb, "preallocated %d for group %u\n", preallocated, group);
228 +       return 0;
229  }
230  
231  static void ext4_mb_pa_callback(struct rcu_head *head)
232 @@ -3843,6 +3914,7 @@ static void ext4_mb_put_pa(struct ext4_a
233          */
234         ext4_lock_group(sb, grp);
235         list_del(&pa->pa_group_list);
236 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
237         ext4_unlock_group(sb, grp);
238  
239         spin_lock(pa->pa_obj_lock);
240 @@ -3934,6 +4006,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
241         pa->pa_inode = ac->ac_inode;
242  
243         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
244 +       grp->bb_prealloc_nr++;
245  
246         spin_lock(pa->pa_obj_lock);
247         list_add_rcu(&pa->pa_inode_list, &ei->i_prealloc_list);
248 @@ -3988,6 +4061,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
249         pa->pa_inode = NULL;
250  
251         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
252 +       grp->bb_prealloc_nr++;
253  
254         /*
255          * We will later add the new pa to the right bucket
256 @@ -4155,6 +4229,8 @@ repeat:
257  
258                 spin_unlock(&pa->pa_lock);
259  
260 +               BUG_ON(grp->bb_prealloc_nr == 0);
261 +               grp->bb_prealloc_nr--;
262                 list_del(&pa->pa_group_list);
263                 list_add(&pa->u.pa_tmp_list, &list);
264         }
265 @@ -4291,7 +4367,7 @@ repeat:
266                 if (err) {
267                         ext4_error_err(sb, -err, "Error %d loading buddy information for %u",
268                                        err, group);
269 -                       continue;
270 +                       return;
271                 }
272  
273                 bitmap_bh = ext4_read_block_bitmap(sb, group);
274 @@ -4304,6 +4380,8 @@ repeat:
275                 }
276  
277                 ext4_lock_group(sb, group);
278 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
279 +               e4b.bd_info->bb_prealloc_nr--;
280                 list_del(&pa->pa_group_list);
281                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
282                 ext4_unlock_group(sb, group);
283 @@ -4598,6 +4676,7 @@ ext4_mb_discard_lg_preallocations(struct
284                 }
285                 ext4_lock_group(sb, group);
286                 list_del(&pa->pa_group_list);
287 +               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
288                 ext4_mb_release_group_pa(&e4b, pa);
289                 ext4_unlock_group(sb, group);
290  
291 --- a/fs/ext4/mballoc.h
292 +++ b/fs/ext4/mballoc.h
293 @@ -66,7 +66,7 @@
294  /*
295   * for which requests use 2^N search using buddies
296   */
297 -#define MB_DEFAULT_ORDER2_REQS         2
298 +#define MB_DEFAULT_ORDER2_REQS         8
299  
300  /*
301   * default group prealloc size 512 blocks