Whamcloud - gitweb
Branch b1_8
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext4-mballoc-extra-checks-rhel5.patch
1 Index: linux-2.6.18.i686/fs/ext4/mballoc.c
2 ===================================================================
3 --- linux-2.6.18.i686.orig/fs/ext4/mballoc.c
4 +++ linux-2.6.18.i686/fs/ext4/mballoc.c
5 @@ -660,7 +660,7 @@ static void ext4_mb_mark_free_simple(str
6         }
7  }
8  
9 -static void ext4_mb_generate_buddy(struct super_block *sb,
10 +static int ext4_mb_generate_buddy(struct super_block *sb,
11                                 void *buddy, void *bitmap, ext4_group_t group)
12  {
13         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
14 @@ -692,14 +692,14 @@ static void ext4_mb_generate_buddy(struc
15         grp->bb_fragments = fragments;
16  
17         if (free != grp->bb_free) {
18 -               ext4_error(sb, __func__,
19 -                       "EXT4-fs: group %lu: %u blocks in bitmap, %u in gd\n",
20 -                       group, free, grp->bb_free);
21 -               /*
22 -                * If we intent to continue, we consider group descritor
23 -                * corrupt and update bb_free using bitmap value
24 -                */
25 -               grp->bb_free = free;
26 +               struct ext4_group_desc *gdp;
27 +               gdp = ext4_get_group_desc (sb, group, NULL);
28 +               ext4_error(sb, __FUNCTION__,
29 +                       "group %lu: %u blocks in bitmap, %u in bb, "
30 +                       "%u in gd, %lu pa's\n", group, free, grp->bb_free,
31 +                       le16_to_cpu(gdp->bg_free_blocks_count),
32 +                       grp->bb_prealloc_nr);
33 +               return -EIO;
34         }
35  
36         clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
37 @@ -709,6 +709,8 @@ static void ext4_mb_generate_buddy(struc
38         EXT4_SB(sb)->s_mb_buddies_generated++;
39         EXT4_SB(sb)->s_mb_generation_time += period;
40         spin_unlock(&EXT4_SB(sb)->s_bal_lock);
41 +
42 +       return 0;
43  }
44  
45  /* The buddy information is attached the buddy cache inode
46 @@ -814,7 +816,7 @@ static int ext4_mb_init_cache(struct pag
47  
48         err = 0;
49         first_block = page->index * blocks_per_page;
50 -       for (i = 0; i < blocks_per_page; i++) {
51 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
52                 int group;
53                 struct ext4_group_info *grinfo;
54  
55 @@ -848,7 +850,7 @@ static int ext4_mb_init_cache(struct pag
56                         /*
57                          * incore got set to the group block bitmap below
58                          */
59 -                       ext4_mb_generate_buddy(sb, data, incore, group);
60 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
61                         incore = NULL;
62                 } else {
63                         /* this is block of bitmap */
64 @@ -861,7 +863,7 @@ static int ext4_mb_init_cache(struct pag
65                         memcpy(data, bitmap, blocksize);
66  
67                         /* mark all preallocated blks used in in-core bitmap */
68 -                       ext4_mb_generate_from_pa(sb, data, group);
69 +                       err = ext4_mb_generate_from_pa(sb, data, group);
70                         ext4_unlock_group(sb, group);
71  
72                         /* set incore so that the buddy information can be
73 @@ -870,6 +872,7 @@ static int ext4_mb_init_cache(struct pag
74                         incore = data;
75                 }
76         }
77 +       if (likely(err == 0))
78         SetPageUptodate(page);
79  
80  out:
81 @@ -1964,7 +1967,10 @@ static int ext4_mb_seq_history_show(stru
82                         hs->result.fe_start, hs->result.fe_len);
83                 seq_printf(seq, "%-5u %-8u %-23s free\n",
84                                 hs->pid, hs->ino, buf2);
85 +       } else {
86 +               seq_printf(seq, "unknown op %d\n", hs->op);
87         }
88 +
89         return 0;
90  }
91  
92 @@ -2092,9 +2098,11 @@ static void *ext4_mb_seq_groups_next(str
93  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
94  {
95         struct super_block *sb = seq->private;
96 +       struct ext4_group_desc *gdp;
97         long group = (long) v;
98         int i;
99         int err;
100 +       int free = 0;
101         struct ext4_buddy e4b;
102         struct sg {
103                 struct ext4_group_info info;
104 @@ -2103,10 +2111,10 @@ static int ext4_mb_seq_groups_show(struc
105  
106         group--;
107         if (group == 0)
108 -               seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
109 +               seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s"
110                                 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
111                                   "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
112 -                          "group", "free", "frags", "first",
113 +                          "group", "free", "frags", "first", "first", "pa",
114                            "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
115                            "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
116  
117 @@ -2117,13 +2125,20 @@ static int ext4_mb_seq_groups_show(struc
118                 seq_printf(seq, "#%-5lu: I/O error\n", group);
119                 return 0;
120         }
121 +
122 +       gdp = ext4_get_group_desc(sb, group, NULL);
123 +       if (gdp != NULL)
124 +               free = le16_to_cpu(gdp->bg_free_blocks_count);
125 +
126         ext4_lock_group(sb, group);
127         memcpy(&sg, ext4_get_group_info(sb, group), i);
128         ext4_unlock_group(sb, group);
129         ext4_mb_release_desc(&e4b);
130  
131 -       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u [", group, sg.info.bb_free,
132 -                       sg.info.bb_fragments, sg.info.bb_first_free);
133 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [", group,
134 +                       sg.info.bb_free, free,
135 +                       sg.info.bb_fragments, sg.info.bb_first_free,
136 +                       sg.info.bb_prealloc_nr);
137         for (i = 0; i <= 13; i++)
138                 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
139                                 sg.info.bb_counters[i] : 0);
140 @@ -2226,6 +2241,7 @@ ext4_mb_store_history(struct ext4_alloca
141         h.tail = ac->ac_tail;
142         h.buddy = ac->ac_buddy;
143         h.merged = 0;
144 +       h.cr = ac->ac_criteria;
145         if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
146                 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
147                                 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
148 @@ -3531,22 +3547,66 @@ ext4_mb_use_preallocated(struct ext4_all
149  }
150  
151  /*
152 + * check free blocks in bitmap match free block in group descriptor
153 + * do this before taking preallocated blocks into account to be able
154 + * to detect on-disk corruptions
155 + */
156 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
157 +                               struct ext4_group_desc *gdp, int group)
158 +{
159 +       unsigned short max = EXT4_BLOCKS_PER_GROUP(sb);
160 +       unsigned short i, first, free = 0;
161 +
162 +       i = mb_find_next_zero_bit(bitmap, max, 0);
163 +
164 +       while (i < max) {
165 +               first = i;
166 +               i = find_next_bit(bitmap, max, i);
167 +               if (i > max)
168 +                       i = max;
169 +               free += i - first;
170 +               if (i < max)
171 +                       i = mb_find_next_zero_bit(bitmap, max, i);
172 +       }
173 +
174 +       if (free != le16_to_cpu(gdp->bg_free_blocks_count)) {
175 +               ext4_error(sb, __FUNCTION__, "on-disk bitmap for group %d"
176 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
177 +                       group, free, le16_to_cpu(gdp->bg_free_blocks_count));
178 +               return -EIO;
179 +       }
180 +       return 0;
181 +}
182 +
183 +/*
184   * the function goes through all preallocation in this group and marks them
185   * used in in-core bitmap. buddy must be generated from this bitmap
186   * Need to be called with ext4 group lock (ext4_lock_group)
187   */
188 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
189 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
190                                         ext4_group_t group)
191  {
192         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
193         struct ext4_prealloc_space *pa;
194 +       struct ext4_group_desc *gdp;
195         struct list_head *cur;
196         ext4_group_t groupnr;
197         ext4_grpblk_t start;
198         int preallocated = 0;
199         int count = 0;
200 +       int skip = 0;
201 +       int err;
202         int len;
203  
204 +       gdp = ext4_get_group_desc (sb, group, NULL);
205 +       if (gdp == NULL)
206 +               return -EIO;
207 +
208 +       /* before applying preallocations, check bitmap consistency */
209 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
210 +       if (err)
211 +               return err;
212 +
213         /* all form of preallocation discards first load group,
214          * so the only competing code is preallocation use.
215          * we don't need any locking here
216 @@ -3562,15 +3622,24 @@ static void ext4_mb_generate_from_pa(str
217                                              &groupnr, &start);
218                 len = pa->pa_len;
219                 spin_unlock(&pa->pa_lock);
220 -               if (unlikely(len == 0))
221 +               if (unlikely(len == 0)) {
222 +                       skip++;
223                         continue;
224 +               }
225                 BUG_ON(groupnr != group);
226                 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
227                                                 bitmap, start, len);
228                 preallocated += len;
229                 count++;
230         }
231 +       if (count + skip != grp->bb_prealloc_nr) {
232 +               ext4_error(sb, __FUNCTION__, "lost preallocations: "
233 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
234 +                          count, grp->bb_prealloc_nr, skip);
235 +               return -EIO;
236 +       }
237         mb_debug("prellocated %u for group %lu\n", preallocated, group);
238 +       return 0;
239  }
240  
241  static void ext4_mb_pa_callback(struct rcu_head *head)
242 @@ -3621,6 +3690,7 @@ static void ext4_mb_put_pa(struct ext4_a
243          */
244         ext4_lock_group(sb, grp);
245         list_del(&pa->pa_group_list);
246 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
247         ext4_unlock_group(sb, grp);
248  
249         spin_lock(pa->pa_obj_lock);
250 @@ -3709,6 +3779,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
251  
252         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
253         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
254 +       grp->bb_prealloc_nr++;
255         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
256  
257         spin_lock(pa->pa_obj_lock);
258 @@ -3768,6 +3839,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
259  
260         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
261         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
262 +       grp->bb_prealloc_nr++;
263         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
264  
265         /*
266 @@ -3820,6 +3892,7 @@ ext4_mb_release_inode_pa(struct ext4_bud
267                 ac->ac_sb = sb;
268                 ac->ac_inode = pa->pa_inode;
269                 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
270 +               ac->ac_o_ex.fe_len = 1;
271         }
272  
273         while (bit < end) {
274 @@ -3964,6 +4037,8 @@ repeat:
275  
276                 spin_unlock(&pa->pa_lock);
277  
278 +               BUG_ON(grp->bb_prealloc_nr == 0);
279 +               grp->bb_prealloc_nr--;
280                 list_del(&pa->pa_group_list);
281                 list_add(&pa->u.pa_tmp_list, &list);
282         }
283 @@ -4099,7 +4174,7 @@ repeat:
284                 if (err) {
285                         ext4_error(sb, __func__, "Error in loading buddy "
286                                         "information for %lu\n", group);
287 -                       continue;
288 +                       return;
289                 }
290  
291                 bitmap_bh = ext4_read_block_bitmap(sb, group);
292 @@ -4111,6 +4186,8 @@ repeat:
293                 }
294  
295                 ext4_lock_group(sb, group);
296 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
297 +               e4b.bd_info->bb_prealloc_nr--;
298                 list_del(&pa->pa_group_list);
299                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
300                 ext4_unlock_group(sb, group);
301 Index: linux-2.6.18.i686/fs/ext4/mballoc.h
302 ===================================================================
303 --- linux-2.6.18.i686.orig/fs/ext4/mballoc.h
304 +++ linux-2.6.18.i686/fs/ext4/mballoc.h
305 @@ -119,6 +119,7 @@ struct ext4_group_info {
306         unsigned short  bb_free;
307         unsigned short  bb_fragments;
308         struct          list_head bb_prealloc_list;
309 +       unsigned long   bb_prealloc_nr;
310  #ifdef DOUBLE_CHECK
311         void            *bb_bitmap;
312  #endif
313 @@ -228,7 +229,7 @@ struct ext4_mb_history {
314         __u16 tail;     /* what tail broke some buddy */
315         __u16 buddy;    /* buddy the tail ^^^ broke */
316         __u16 flags;
317 -       __u8 cr:3;      /* which phase the result extent was found at */
318 +       __u8 cr:8;      /* which phase the result extent was found at */
319         __u8 op:4;
320         __u8 merged:1;
321  };
322 @@ -259,7 +260,7 @@ static void ext4_mb_store_history(struct
323  
324  struct buffer_head *read_block_bitmap(struct super_block *, ext4_group_t);
325  
326 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
327 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
328                                         ext4_group_t group);
329  static void ext4_mb_poll_new_transaction(struct super_block *, handle_t *);
330  static void ext4_mb_free_committed_blocks(struct super_block *);