Whamcloud - gitweb
LU-11838 ldiskfs: add rhel8 server support
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel8 / ext4-mballoc-extra-checks.patch
1 Index: linux-4.18.0-80.1.2.el8_0/fs/ext4/ext4.h
2 ===================================================================
3 --- linux-4.18.0-80.1.2.el8_0.orig/fs/ext4/ext4.h
4 +++ linux-4.18.0-80.1.2.el8_0/fs/ext4/ext4.h
5 @@ -2881,6 +2881,7 @@ struct ext4_group_info {
6         ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
7         ext4_grpblk_t   bb_largest_free_order;/* order of largest frag in BG */
8         struct          list_head bb_prealloc_list;
9 +       unsigned long   bb_prealloc_nr;
10  #ifdef DOUBLE_CHECK
11         void            *bb_bitmap;
12  #endif
13 Index: linux-4.18.0-80.1.2.el8_0/fs/ext4/mballoc.c
14 ===================================================================
15 --- linux-4.18.0-80.1.2.el8_0.orig/fs/ext4/mballoc.c
16 +++ linux-4.18.0-80.1.2.el8_0/fs/ext4/mballoc.c
17 @@ -352,7 +352,7 @@ static const char * const ext4_groupinfo
18         "ext4_groupinfo_64k", "ext4_groupinfo_128k"
19  };
20  
21 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
22 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
23                                         ext4_group_t group);
24  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
25                                                 ext4_group_t group);
26 @@ -708,7 +708,7 @@ mb_set_largest_free_order(struct super_b
27  }
28  
29  static noinline_for_stack
30 -void ext4_mb_generate_buddy(struct super_block *sb,
31 +int ext4_mb_generate_buddy(struct super_block *sb,
32                                 void *buddy, void *bitmap, ext4_group_t group)
33  {
34         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
35 @@ -752,6 +752,7 @@ void ext4_mb_generate_buddy(struct super
36                 grp->bb_free = free;
37                 ext4_mark_group_bitmap_corrupted(sb, group,
38                                         EXT4_GROUP_INFO_BBITMAP_CORRUPT);
39 +               return -EIO;
40         }
41         mb_set_largest_free_order(sb, grp);
42  
43 @@ -762,6 +763,8 @@ void ext4_mb_generate_buddy(struct super
44         sbi->s_mb_buddies_generated++;
45         sbi->s_mb_generation_time += period;
46         spin_unlock(&sbi->s_bal_lock);
47 +
48 +       return 0;
49  }
50  
51  static void mb_regenerate_buddy(struct ext4_buddy *e4b)
52 @@ -882,7 +885,7 @@ static int ext4_mb_init_cache(struct pag
53         }
54  
55         first_block = page->index * blocks_per_page;
56 -       for (i = 0; i < blocks_per_page; i++) {
57 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
58                 group = (first_block + i) >> 1;
59                 if (group >= ngroups)
60                         break;
61 @@ -926,7 +929,7 @@ static int ext4_mb_init_cache(struct pag
62                         ext4_lock_group(sb, group);
63                         /* init the buddy */
64                         memset(data, 0xff, blocksize);
65 -                       ext4_mb_generate_buddy(sb, data, incore, group);
66 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
67                         ext4_unlock_group(sb, group);
68                         incore = NULL;
69                 } else {
70 @@ -941,7 +944,7 @@ static int ext4_mb_init_cache(struct pag
71                         memcpy(data, bitmap, blocksize);
72  
73                         /* mark all preallocated blks used in in-core bitmap */
74 -                       ext4_mb_generate_from_pa(sb, data, group);
75 +                       err = ext4_mb_generate_from_pa(sb, data, group);
76                         ext4_mb_generate_from_freelist(sb, data, group);
77                         ext4_unlock_group(sb, group);
78  
79 @@ -951,7 +954,8 @@ static int ext4_mb_init_cache(struct pag
80                         incore = data;
81                 }
82         }
83 -       SetPageUptodate(page);
84 +       if (likely(err == 0))
85 +               SetPageUptodate(page);
86  
87  out:
88         if (bh) {
89 @@ -2280,9 +2284,11 @@ static void *ext4_mb_seq_groups_next(str
90  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
91  {
92         struct super_block *sb = PDE_DATA(file_inode(seq->file));
93 +       struct ext4_group_desc *gdp;
94         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
95         int i;
96         int err, buddy_loaded = 0;
97 +       int free = 0;
98         struct ext4_buddy e4b;
99         struct ext4_group_info *grinfo;
100         unsigned char blocksize_bits = min_t(unsigned char,
101 @@ -2295,7 +2301,7 @@ static int ext4_mb_seq_groups_show(struc
102  
103         group--;
104         if (group == 0)
105 -               seq_puts(seq, "#group: free  frags first ["
106 +               seq_puts(seq, "#group: bfree gfree frags first pa    ["
107                               " 2^0   2^1   2^2   2^3   2^4   2^5   2^6  "
108                               " 2^7   2^8   2^9   2^10  2^11  2^12  2^13  ]\n");
109  
110 @@ -2313,13 +2319,19 @@ static int ext4_mb_seq_groups_show(struc
111                 buddy_loaded = 1;
112         }
113  
114 +       gdp = ext4_get_group_desc(sb, group, NULL);
115 +       if (gdp != NULL)
116 +               free = ext4_free_group_clusters(sb, gdp);
117 +
118         memcpy(&sg, ext4_get_group_info(sb, group), i);
119  
120         if (buddy_loaded)
121                 ext4_mb_unload_buddy(&e4b);
122  
123 -       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
124 -                       sg.info.bb_fragments, sg.info.bb_first_free);
125 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
126 +                       (long unsigned int)group, sg.info.bb_free, free,
127 +                       sg.info.bb_fragments, sg.info.bb_first_free,
128 +                       sg.info.bb_prealloc_nr);
129         for (i = 0; i <= 13; i++)
130                 seq_printf(seq, " %-5u", i <= blocksize_bits + 1 ?
131                                 sg.info.bb_counters[i] : 0);
132 @@ -3614,22 +3626,71 @@ static void ext4_mb_generate_from_freeli
133  }
134  
135  /*
136 + * check free blocks in bitmap match free block in group descriptor
137 + * do this before taking preallocated blocks into account to be able
138 + * to detect on-disk corruptions. The group lock should be hold by the
139 + * caller.
140 + */
141 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
142 +                               struct ext4_group_desc *gdp, int group)
143 +{
144 +       unsigned short max = EXT4_CLUSTERS_PER_GROUP(sb);
145 +       unsigned short i, first, free = 0;
146 +       unsigned short free_in_gdp = ext4_free_group_clusters(sb, gdp);
147 +
148 +       if (free_in_gdp == 0 && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
149 +               return 0;
150 +
151 +       i = mb_find_next_zero_bit(bitmap, max, 0);
152 +
153 +       while (i < max) {
154 +               first = i;
155 +               i = mb_find_next_bit(bitmap, max, i);
156 +               if (i > max)
157 +                       i = max;
158 +               free += i - first;
159 +               if (i < max)
160 +                       i = mb_find_next_zero_bit(bitmap, max, i);
161 +       }
162 +
163 +       if (free != free_in_gdp) {
164 +               ext4_error(sb, "on-disk bitmap for group %d"
165 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
166 +                       group, free, free_in_gdp);
167 +               return -EIO;
168 +       }
169 +       return 0;
170 +}
171 +
172 +/*
173   * the function goes through all preallocation in this group and marks them
174   * used in in-core bitmap. buddy must be generated from this bitmap
175   * Need to be called with ext4 group lock held
176   */
177  static noinline_for_stack
178 -void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
179 +int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
180                                         ext4_group_t group)
181  {
182         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
183         struct ext4_prealloc_space *pa;
184 +       struct ext4_group_desc *gdp;
185         struct list_head *cur;
186         ext4_group_t groupnr;
187         ext4_grpblk_t start;
188         int preallocated = 0;
189 +       int skip = 0, count = 0;
190 +       int err;
191         int len;
192  
193 +       gdp = ext4_get_group_desc(sb, group, NULL);
194 +       if (gdp == NULL)
195 +               return -EIO;
196 +
197 +       /* before applying preallocations, check bitmap consistency */
198 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
199 +       if (err)
200 +               return err;
201 +
202         /* all form of preallocation discards first load group,
203          * so the only competing code is preallocation use.
204          * we don't need any locking here
205 @@ -3645,13 +3706,23 @@ void ext4_mb_generate_from_pa(struct sup
206                                              &groupnr, &start);
207                 len = pa->pa_len;
208                 spin_unlock(&pa->pa_lock);
209 -               if (unlikely(len == 0))
210 +               if (unlikely(len == 0)) {
211 +                       skip++;
212                         continue;
213 +               }
214                 BUG_ON(groupnr != group);
215                 ext4_set_bits(bitmap, start, len);
216                 preallocated += len;
217 +               count++;
218 +       }
219 +       if (count + skip != grp->bb_prealloc_nr) {
220 +               ext4_error(sb, "lost preallocations: "
221 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
222 +                          count, grp->bb_prealloc_nr, skip);
223 +               return -EIO;
224         }
225         mb_debug(1, "preallocated %u for group %u\n", preallocated, group);
226 +       return 0;
227  }
228  
229  static void ext4_mb_pa_callback(struct rcu_head *head)
230 @@ -3715,6 +3786,7 @@ static void ext4_mb_put_pa(struct ext4_a
231          */
232         ext4_lock_group(sb, grp);
233         list_del(&pa->pa_group_list);
234 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
235         ext4_unlock_group(sb, grp);
236  
237         spin_lock(pa->pa_obj_lock);
238 @@ -3809,6 +3881,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
239  
240         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
241         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
242 +       grp->bb_prealloc_nr++;
243         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
244  
245         spin_lock(pa->pa_obj_lock);
246 @@ -3870,6 +3943,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
247  
248         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
249         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
250 +       grp->bb_prealloc_nr++;
251         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
252  
253         /*
254 @@ -4041,6 +4115,8 @@ repeat:
255  
256                 spin_unlock(&pa->pa_lock);
257  
258 +               BUG_ON(grp->bb_prealloc_nr == 0);
259 +               grp->bb_prealloc_nr--;
260                 list_del(&pa->pa_group_list);
261                 list_add(&pa->u.pa_tmp_list, &list);
262         }
263 @@ -4171,7 +4247,7 @@ repeat:
264                 if (err) {
265                         ext4_error(sb, "Error %d loading buddy information for %u",
266                                    err, group);
267 -                       continue;
268 +                       return;
269                 }
270  
271                 bitmap_bh = ext4_read_block_bitmap(sb, group);
272 @@ -4184,6 +4260,8 @@ repeat:
273                 }
274  
275                 ext4_lock_group(sb, group);
276 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
277 +               e4b.bd_info->bb_prealloc_nr--;
278                 list_del(&pa->pa_group_list);
279                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
280                 ext4_unlock_group(sb, group);
281 @@ -4446,6 +4524,7 @@ ext4_mb_discard_lg_preallocations(struct
282                 }
283                 ext4_lock_group(sb, group);
284                 list_del(&pa->pa_group_list);
285 +               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
286                 ext4_mb_release_group_pa(&e4b, pa);
287                 ext4_unlock_group(sb, group);
288  
289 Index: linux-4.18.0-80.1.2.el8_0/fs/ext4/mballoc.h
290 ===================================================================
291 --- linux-4.18.0-80.1.2.el8_0.orig/fs/ext4/mballoc.h
292 +++ linux-4.18.0-80.1.2.el8_0/fs/ext4/mballoc.h
293 @@ -70,7 +70,7 @@ do {                                                                  \
294  /*
295   * for which requests use 2^N search using buddies
296   */
297 -#define MB_DEFAULT_ORDER2_REQS         2
298 +#define MB_DEFAULT_ORDER2_REQS         8
299  
300  /*
301   * default group prealloc size 512 blocks