Whamcloud - gitweb
LU-1026 ldiskfs: make bitmaps corruption not fatal
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel6.6 / ext4-corrupted-inode-block-bitmaps-handling-patches.patch
1 diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
2 index 61aeacb..026c89f 100644
3 --- a/fs/ext4/balloc.c
4 +++ b/fs/ext4/balloc.c
5 @@ -97,12 +97,11 @@ unsigned ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
6                 /* If checksum is bad mark all blocks used to prevent allocation
7                  * essentially implementing a per-group read-only flag. */
8                 if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
9 -                       ext4_error(sb, "Checksum bad for group %u",
10 +                       ext4_corrupted_block_group(sb, block_group,
11 +                                       EXT4_GROUP_INFO_BBITMAP_CORRUPT |
12 +                                       EXT4_GROUP_INFO_IBITMAP_CORRUPT,
13 +                                       "Checksum bad for group %u",
14                                         block_group);
15 -                       ext4_free_blks_set(sb, gdp, 0);
16 -                       ext4_free_inodes_set(sb, gdp, 0);
17 -                       ext4_itable_unused_set(sb, gdp, 0);
18 -                       memset(bh->b_data, 0xff, sb->s_blocksize);
19                         return 0;
20                 }
21                 memset(bh->b_data, 0, sb->s_blocksize);
22 @@ -277,7 +276,9 @@ static int ext4_valid_block_bitmap(struct super_block *sb,
23                 return 1;
24  
25  err_out:
26 -       ext4_error(sb, "Invalid block bitmap - block_group = %d, block = %llu",
27 +       ext4_corrupted_block_group(sb, block_group,
28 +                       EXT4_GROUP_INFO_BBITMAP_CORRUPT,
29 +                       "Invalid block bitmap - block_group = %d, block = %llu",
30                         block_group, bitmap_blk);
31         return 0;
32  }
33 diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
34 index 4ed330c..938487a 100644
35 --- a/fs/ext4/ext4.h
36 +++ b/fs/ext4/ext4.h
37 @@ -75,8 +75,17 @@ typedef __u32 ext4_lblk_t;
38  /* data type for block group number */
39  typedef unsigned int ext4_group_t;
40  
41 +void __ext4_corrupted_block_group(struct super_block *sb,
42 +                                 ext4_group_t group, unsigned int flags);
43 +
44 +#define ext4_corrupted_block_group(sb, group, flags, fmt...)           \
45 +       do {                                                            \
46 +               __ext4_warning(sb, __func__, ## fmt);                   \
47 +               __ext4_corrupted_block_group(sb, group, flags);         \
48 +       } while (0)
49 +
50  /*
51 - * Flags used in mballoc's allocation_context flags field.  
52 + * Flags used in mballoc's allocation_context flags field.
53   *
54   * Also used to show what's going on for debugging purposes when the
55   * flag field is exported via the traceport interface
56 @@ -2203,9 +2212,19 @@ struct ext4_group_info {
57  
58  #define EXT4_GROUP_INFO_NEED_INIT_BIT          0
59  #define EXT4_GROUP_INFO_WAS_TRIMMED_BIT                1
60 +#define EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT    2
61 +#define EXT4_GROUP_INFO_BBITMAP_CORRUPT        \
62 +       (1 << EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT)
63 +#define EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT    3
64 +#define EXT4_GROUP_INFO_IBITMAP_CORRUPT        \
65 +       (1 << EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT)
66  
67  #define EXT4_MB_GRP_NEED_INIT(grp)     \
68         (test_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &((grp)->bb_state)))
69 +#define EXT4_MB_GRP_BBITMAP_CORRUPT(grp)       \
70 +       (test_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
71 +#define EXT4_MB_GRP_IBITMAP_CORRUPT(grp)       \
72 +       (test_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &((grp)->bb_state)))
73  
74  #define EXT4_MB_GRP_WAS_TRIMMED(grp)   \
75         (test_bit(EXT4_GROUP_INFO_WAS_TRIMMED_BIT, &((grp)->bb_state)))
76 diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
77 index f3509ba..8894963 100644
78 --- a/fs/ext4/ialloc.c
79 +++ b/fs/ext4/ialloc.c
80 @@ -76,11 +76,10 @@ unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
81         /* If checksum is bad mark all blocks and inodes use to prevent
82          * allocation, essentially implementing a per-group read-only flag. */
83         if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
84 -               ext4_error(sb, "Checksum bad for group %u", block_group);
85 -               ext4_free_blks_set(sb, gdp, 0);
86 -               ext4_free_inodes_set(sb, gdp, 0);
87 -               ext4_itable_unused_set(sb, gdp, 0);
88 -               memset(bh->b_data, 0xff, sb->s_blocksize);
89 +               ext4_corrupted_block_group(sb, block_group,
90 +                               EXT4_GROUP_INFO_BBITMAP_CORRUPT |
91 +                               EXT4_GROUP_INFO_IBITMAP_CORRUPT,
92 +                               "Checksum bad for group %u", block_group);
93                 return 0;
94         }
95  
96 @@ -192,6 +191,7 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
97         struct ext4_super_block *es;
98         struct ext4_sb_info *sbi;
99         int fatal = 0, err, count, cleared;
100 +       struct ext4_group_info *grp;
101  
102         if (atomic_read(&inode->i_count) > 1) {
103                 printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
104 @@ -235,7 +235,9 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
105         block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
106         bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
107         bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
108 -       if (!bitmap_bh)
109 +       /* Don't bother if the inode bitmap is corrupt. */
110 +       grp = ext4_get_group_info(sb, block_group);
111 +       if (unlikely(EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) || !bitmap_bh)
112                 goto error_return;
113  
114         BUFFER_TRACE(bitmap_bh, "get_write_access");
115 @@ -247,9 +249,12 @@ void ext4_free_inode(handle_t *handle, struct inode *inode)
116         ext4_lock_group(sb, block_group);
117         cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
118         ext4_unlock_group(sb, block_group);
119 -       if (!cleared)
120 -               ext4_error(sb, "bit already cleared for inode %lu", ino);
121 -       else {
122 +       if (!cleared) {
123 +               ext4_corrupted_block_group(sb, block_group,
124 +                                          EXT4_GROUP_INFO_IBITMAP_CORRUPT,
125 +                                          "bit already cleared for inode %lu",
126 +                                          ino);
127 +       } else {
128                 gdp = ext4_get_group_desc(sb, block_group, &bh2);
129  
130                 BUFFER_TRACE(bh2, "get_write_access");
131 @@ -825,6 +830,7 @@ struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
132         int free = 0;
133         static int once = 1;
134         ext4_group_t flex_group;
135 +       struct ext4_group_info *grp;
136  
137         /* Cannot create files in a deleted directory */
138         if (!dir || !dir->i_nlink)
139 @@ -884,10 +890,21 @@ got_group:
140                 if (!gdp)
141                         goto fail;
142  
143 +               grp = ext4_get_group_info(sb, group);
144 +               /* Skip groups with already-known suspicious inode tables */
145 +               if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
146 +                       if (++group == ngroups)
147 +                               group = 0;
148 +                       continue;
149 +               }
150                 brelse(inode_bitmap_bh);
151                 inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
152 -               if (!inode_bitmap_bh)
153 -                       goto fail;
154 +               /* Skip groups with suspicious inode tables */
155 +               if (EXT4_MB_GRP_IBITMAP_CORRUPT(grp) || !inode_bitmap_bh) {
156 +                       if (++group == ngroups)
157 +                               group = 0;
158 +                       continue;
159 +               }
160  
161  repeat_in_this_group:
162                 ino = ext4_find_next_zero_bit((unsigned long *)
163 diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
164 index efcf909..06cd929 100644
165 --- a/fs/ext4/mballoc.c
166 +++ b/fs/ext4/mballoc.c
167 @@ -715,10 +715,12 @@ int ext4_mb_generate_buddy(struct super_block *sb,
168         if (free != grp->bb_free) {
169                 struct ext4_group_desc *gdp;
170                 gdp = ext4_get_group_desc (sb, group, NULL);
171 -               ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
172 -                       "%u in gd, %lu pa's\n", (long unsigned int)group,
173 -                       free, grp->bb_free, ext4_free_blks_count(sb, gdp),
174 -                       grp->bb_prealloc_nr);
175 +               ext4_corrupted_block_group(sb, group,
176 +                               EXT4_GROUP_INFO_BBITMAP_CORRUPT,
177 +                               "group %lu: %u blocks in bitmap, %u in bb, %u in gd, %lu pa's\n",
178 +                               (long unsigned int)group, free, grp->bb_free,
179 +                               ext4_free_blks_count(sb, gdp),
180 +                               grp->bb_prealloc_nr);
181                 return -EIO;
182         }
183         mb_set_largest_free_order(sb, grp);
184 @@ -1120,7 +1122,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
185                  */
186                 ret = ext4_mb_init_group(sb, group);
187                 if (ret)
188 -                       return ret;
189 +                       goto err;
190         }
191  
192         /*
193 @@ -1204,6 +1206,8 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
194         return 0;
195  
196  err:
197 +       ext4_warning(sb, "Error in loading buddy information for %u",
198 +                    group);
199         if (e4b->bd_bitmap_page)
200                 page_cache_release(e4b->bd_bitmap_page);
201         if (e4b->bd_buddy_page)
202 @@ -1291,6 +1295,10 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
203  
204         BUG_ON(first + count > (sb->s_blocksize << 3));
205         assert_spin_locked(ext4_group_lock_ptr(sb, e4b->bd_group));
206 +       /* Don't bother if the block group is corrupt. */
207 +       if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info)))
208 +               return;
209 +
210         mb_check_buddy(e4b);
211         mb_free_blocks_double(inode, e4b, first, count);
212  
213 @@ -1321,9 +1329,12 @@ static void mb_free_blocks(struct inode *inode, struct ext4_buddy *e4b,
214                             le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block);
215                         ext4_grp_locked_error(sb, e4b->bd_group,
216                                    __func__, "double-free of inode"
217 -                                  " %lu's block %llu(bit %u in group %u)",
218 +                                  " %lu's block %llu(bit %u in group %u) block bitmap corrupt",
219                                    inode ? inode->i_ino : 0, blocknr, block,
220                                    e4b->bd_group);
221 +                       /* Mark the block group as corrupt. */
222 +                       set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
223 +                               &e4b->bd_info->bb_state);
224                 }
225                 mb_clear_bit(block, EXT4_MB_BITMAP(e4b));
226                 e4b->bd_info->bb_counters[order]++;
227 @@ -1700,6 +1711,11 @@ int ext4_mb_find_by_goal(struct ext4_allocation_context *ac,
228         if (err)
229                 return err;
230  
231 +       if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(e4b->bd_info))) {
232 +               ext4_mb_release_desc(e4b);
233 +               return 0;
234 +       }
235 +
236         ext4_lock_group(ac->ac_sb, group);
237         max = mb_find_extent(e4b, 0, ac->ac_g_ex.fe_start,
238                              ac->ac_g_ex.fe_len, &ex);
239 @@ -1912,6 +1928,9 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
240  
241         BUG_ON(cr < 0 || cr >= 4);
242  
243 +       if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(grp)))
244 +               return 0;
245 +
246         /* We only do this if the grp has never been initialized */
247         if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
248                 int ret = ext4_mb_init_group(ac->ac_sb, group);
249 @@ -3382,9 +3401,10 @@ int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
250         }
251  
252         if (free != ext4_free_blks_count(sb, gdp)) {
253 -               ext4_error(sb, "on-disk bitmap for group %d"
254 -                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
255 -                       group, free, ext4_free_blks_count(sb, gdp));
256 +               ext4_corrupted_block_group(sb, group,
257 +                               EXT4_GROUP_INFO_BBITMAP_CORRUPT,
258 +                               "on-disk bitmap for group %d corrupted: %u blocks free in bitmap, %u - in gd\n",
259 +                               group, free, ext4_free_blks_count(sb, gdp));
260                 return -EIO;
261         }
262         return 0;
263 @@ -3753,14 +3773,6 @@ ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
264         /* "free < pa->pa_free" means we maybe double alloc the same blocks,
265          * otherwise maybe leave some free blocks unavailable, no need to BUG.*/
266         if ((free > pa->pa_free && !pa->pa_error) || (free < pa->pa_free)) {
267 -               ext4_error(sb, "pa free mismatch: [pa %p] "
268 -                               "[phy %lu] [logic %lu] [len %u] [free %u] "
269 -                               "[error %u] [inode %lu] [freed %u]", pa,
270 -                               (unsigned long)pa->pa_pstart,
271 -                               (unsigned long)pa->pa_lstart,
272 -                               (unsigned)pa->pa_len, (unsigned)pa->pa_free,
273 -                               (unsigned)pa->pa_error, pa->pa_inode->i_ino,
274 -                               free);
275                 ext4_grp_locked_error(sb, group,
276                                 __func__, "free %u, pa_free %u",
277                                 free, pa->pa_free);
278 @@ -3834,14 +3846,11 @@ ext4_mb_discard_group_preallocations(struct super_block *sb,
279                 return 0;
280  
281         bitmap_bh = ext4_read_block_bitmap(sb, group);
282 -       if (bitmap_bh == NULL) {
283 -               ext4_error(sb, "Error reading block bitmap for %u", group);
284 +       if (bitmap_bh == NULL)
285                 return 0;
286 -       }
287  
288         err = ext4_mb_load_buddy(sb, group, &e4b);
289         if (err) {
290 -               ext4_error(sb, "Error loading buddy information for %u", group);
291                 put_bh(bitmap_bh);
292                 return 0;
293         }
294 @@ -4015,16 +4024,11 @@ repeat:
295                 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
296  
297                 err = ext4_mb_load_buddy(sb, group, &e4b);
298 -               if (err) {
299 -                       ext4_error(sb, "Error loading buddy information for %u",
300 -                                       group);
301 +               if (err)
302                         return;
303 -               }
304  
305                 bitmap_bh = ext4_read_block_bitmap(sb, group);
306                 if (bitmap_bh == NULL) {
307 -                       ext4_error(sb, "Error reading block bitmap for %u",
308 -                                       group);
309                         ext4_mb_release_desc(&e4b);
310                         continue;
311                 }
312 @@ -4299,11 +4303,8 @@ ext4_mb_discard_lg_preallocations(struct super_block *sb,
313         list_for_each_entry_safe(pa, tmp, &discard_list, u.pa_tmp_list) {
314  
315                 ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, NULL);
316 -               if (ext4_mb_load_buddy(sb, group, &e4b)) {
317 -                       ext4_error(sb, "Error loading buddy information for %u",
318 -                                       group);
319 +               if (ext4_mb_load_buddy(sb, group, &e4b))
320                         continue;
321 -               }
322                 ext4_lock_group(sb, group);
323                 list_del(&pa->pa_group_list);
324                 ext4_get_group_info(sb, group)->bb_prealloc_nr--;
325 @@ -4565,7 +4566,7 @@ repeat:
326                          * been updated or not when fail case. So can
327                          * not revert pa_free back, just mark pa_error*/
328                         pa->pa_error++;
329 -                       ext4_error(sb,
330 +                       ext4_corrupted_block_group(sb, 0, 0,
331                                 "Updating bitmap error: [err %d] "
332                                 "[pa %p] [phy %lu] [logic %lu] "
333                                 "[len %u] [free %u] [error %u] "
334 @@ -4710,6 +4711,7 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
335         struct ext4_sb_info *sbi;
336         struct ext4_buddy e4b;
337         int err = 0;
338 +       int skip_error = 0;
339         int ret;
340  
341         /*
342 @@ -4746,6 +4748,10 @@ do_more:
343         overflow = 0;
344         ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
345  
346 +       if (unlikely(EXT4_MB_GRP_BBITMAP_CORRUPT(
347 +                       ext4_get_group_info(sb, block_group))))
348 +               return;
349 +
350         /*
351          * Check to see if we are freeing blocks across a group
352          * boundary.
353 @@ -4807,8 +4813,10 @@ do_more:
354         }
355  
356         err = ext4_mb_load_buddy(sb, block_group, &e4b);
357 -       if (err)
358 +       if (err) {
359 +               skip_error = 1;
360                 goto error_return;
361 +       }
362         if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
363                 struct ext4_free_data *new_entry;
364                 /*
365 @@ -4876,10 +4884,10 @@ error_return:
366         if (freed && !(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
367                 vfs_dq_free_block(inode, freed);
368         brelse(bitmap_bh);
369 -       ext4_std_error(sb, err);
370 +       if (!skip_error)
371 +               ext4_std_error(sb, err);
372         if (ac)
373                 kmem_cache_free(ext4_ac_cachep, ac);
374 -       return;
375  }
376  
377  /**
378 @@ -4970,7 +4978,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
379  
380         err = ext4_mb_load_buddy(sb, block_group, &e4b);
381         if (err)
382 -               goto error_return;
383 +               goto error_brelse;
384  
385         /*
386          * need to update group_info->bb_free and bitmap
387 @@ -5006,9 +5014,9 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
388         sb->s_dirt = 1;
389  
390  error_return:
391 -       brelse(bitmap_bh);
392         ext4_std_error(sb, err);
393 -       return;
394 +error_brelse:
395 +       brelse(bitmap_bh);
396  }
397  
398  /**
399 @@ -5078,11 +5086,8 @@ ext4_trim_all_free(struct super_block *sb, ext4_group_t group,
400         trace_ext4_trim_all_free(sb, group, start, max);
401  
402         ret = ext4_mb_load_buddy(sb, group, &e4b);
403 -       if (ret) {
404 -               ext4_error(sb, "Error in loading buddy "
405 -                               "information for %u", group);
406 +       if (ret)
407                 return ret;
408 -       }
409         bitmap = e4b.bd_bitmap;
410  
411         ext4_lock_group(sb, group);
412 diff --git a/fs/ext4/super.c b/fs/ext4/super.c
413 index 31ee33b..f02a632 100644
414 --- a/fs/ext4/super.c
415 +++ b/fs/ext4/super.c
416 @@ -562,6 +562,34 @@ void __ext4_warning(struct super_block *sb, const char *function,
417         va_end(args);
418  }
419  
420 +void __ext4_corrupted_block_group(struct super_block *sb, ext4_group_t group,
421 +                                 unsigned int flags)
422 +{
423 +       struct ext4_sb_info *sbi = EXT4_SB(sb);
424 +       struct ext4_group_info *grp = ext4_get_group_info(sb, group);
425 +       struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
426 +
427 +       if (flags & EXT4_GROUP_INFO_BBITMAP_CORRUPT &&
428 +           !EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) {
429 +               ext4_free_blks_set(sb, gdp, 0);
430 +               set_bit(EXT4_GROUP_INFO_BBITMAP_CORRUPT_BIT,
431 +                       &grp->bb_state);
432 +       }
433 +
434 +       if (flags & EXT4_GROUP_INFO_IBITMAP_CORRUPT &&
435 +           !EXT4_MB_GRP_IBITMAP_CORRUPT(grp)) {
436 +               if (gdp) {
437 +                       ext4_free_inodes_set(sb, gdp, 0);
438 +                       ext4_itable_unused_set(sb, gdp, 0);
439 +               }
440 +               set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT,
441 +                       &grp->bb_state);
442 +       }
443 +       sbi->s_mount_state |= EXT4_ERROR_FS;
444 +       sbi->s_es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
445 +       ext4_commit_super(sb, 1);
446 +}
447 +
448  void ext4_grp_locked_error(struct super_block *sb, ext4_group_t grp,
449                            const char *function, const char *fmt, ...)
450  __releases(bitlock)