Whamcloud - gitweb
d0ef621443b940935aa0c8d42849796f7044d009
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / rhel6.3 / ext4-mballoc-extra-checks.patch
1 Index: linux-stage/fs/ext4/ext4.h
2 ===================================================================
3 --- linux-stage.orig/fs/ext4/ext4.h     2011-03-14 16:18:28.300241437 +0800
4 +++ linux-stage/fs/ext4/ext4.h  2011-03-14 16:33:17.056087375 +0800
5 @@ -1770,6 +1770,7 @@
6         ext4_grpblk_t   bb_free;        /* total free blocks */
7         ext4_grpblk_t   bb_fragments;   /* nr of freespace fragments */
8         struct          list_head bb_prealloc_list;
9 +       unsigned long   bb_prealloc_nr;
10  #ifdef DOUBLE_CHECK
11         void            *bb_bitmap;
12  #endif
13 Index: linux-stage/fs/ext4/mballoc.c
14 ===================================================================
15 --- linux-stage.orig/fs/ext4/mballoc.c  2011-03-14 16:18:28.336242149 +0800
16 +++ linux-stage/fs/ext4/mballoc.c       2011-03-14 16:33:27.072292006 +0800
17 @@ -337,7 +337,7 @@
18  static struct kmem_cache *ext4_pspace_cachep;
19  static struct kmem_cache *ext4_ac_cachep;
20  static struct kmem_cache *ext4_free_ext_cachep;
21 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
22 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
23                                         ext4_group_t group);
24  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
25                                                 ext4_group_t group);
26 @@ -659,7 +659,7 @@
27  }
28  
29  static noinline_for_stack
30 -void ext4_mb_generate_buddy(struct super_block *sb,
31 +int ext4_mb_generate_buddy(struct super_block *sb,
32                                 void *buddy, void *bitmap, ext4_group_t group)
33  {
34         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
35 @@ -691,14 +691,13 @@
36         grp->bb_fragments = fragments;
37  
38         if (free != grp->bb_free) {
39 -               ext4_grp_locked_error(sb, group,  __func__,
40 -                       "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
41 -                       group, free, grp->bb_free);
42 -               /*
43 -                * If we intent to continue, we consider group descritor
44 -                * corrupt and update bb_free using bitmap value
45 -                */
46 -               grp->bb_free = free;
47 +               struct ext4_group_desc *gdp;
48 +               gdp = ext4_get_group_desc (sb, group, NULL);
49 +               ext4_error(sb, "group %lu: %u blocks in bitmap, %u in bb, "
50 +                       "%u in gd, %lu pa's\n", (long unsigned int)group,
51 +                       free, grp->bb_free, ext4_free_blks_count(sb, gdp),
52 +                       grp->bb_prealloc_nr);
53 +               return -EIO;
54         }
55  
56         clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
57 @@ -708,6 +707,8 @@
58         EXT4_SB(sb)->s_mb_buddies_generated++;
59         EXT4_SB(sb)->s_mb_generation_time += period;
60         spin_unlock(&EXT4_SB(sb)->s_bal_lock);
61 +
62 +       return 0;
63  }
64  
65  /* The buddy information is attached the buddy cache inode
66 @@ -839,7 +840,7 @@
67  
68         err = 0;
69         first_block = page->index * blocks_per_page;
70 -       for (i = 0; i < blocks_per_page; i++) {
71 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
72                 int group;
73                 struct ext4_group_info *grinfo;
74  
75 @@ -874,7 +875,7 @@
76                         ext4_lock_group(sb, group);
77                         /* init the buddy */
78                         memset(data, 0xff, blocksize);
79 -                       ext4_mb_generate_buddy(sb, data, incore, group);
80 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
81                         ext4_unlock_group(sb, group);
82                         incore = NULL;
83                 } else {
84 @@ -888,7 +889,7 @@
85                         memcpy(data, bitmap, blocksize);
86  
87                         /* mark all preallocated blks used in in-core bitmap */
88 -                       ext4_mb_generate_from_pa(sb, data, group);
89 +                       err = ext4_mb_generate_from_pa(sb, data, group);
90                         ext4_mb_generate_from_freelist(sb, data, group);
91                         ext4_unlock_group(sb, group);
92  
93 @@ -898,7 +899,8 @@
94                         incore = data;
95                 }
96         }
97 -       SetPageUptodate(page);
98 +       if (likely(err == 0))
99 +               SetPageUptodate(page);
100  
101  out:
102         if (bh) {
103 @@ -2142,9 +2144,11 @@
104  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
105  {
106         struct super_block *sb = seq->private;
107 +       struct ext4_group_desc *gdp;
108         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
109         int i;
110         int err;
111 +       int free = 0;
112         struct ext4_buddy e4b;
113         struct sg {
114                 struct ext4_group_info info;
115 @@ -2153,10 +2157,10 @@
116  
117         group--;
118         if (group == 0)
119 -               seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
120 +               seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s"
121                                 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
122                                   "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
123 -                          "group", "free", "frags", "first",
124 +                          "group", "free", "free", "frags", "first", "pa",
125                            "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
126                            "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
127  
128 @@ -2167,13 +2171,20 @@
129                 seq_printf(seq, "#%-5u: I/O error\n", group);
130                 return 0;
131         }
132 +
133 +       gdp = ext4_get_group_desc(sb, group, NULL);
134 +       if (gdp != NULL)
135 +               free = ext4_free_blks_count(sb, gdp);
136 +
137         ext4_lock_group(sb, group);
138         memcpy(&sg, ext4_get_group_info(sb, group), i);
139         ext4_unlock_group(sb, group);
140         ext4_mb_release_desc(&e4b);
141  
142 -       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
143 -                       sg.info.bb_fragments, sg.info.bb_first_free);
144 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
145 +                       (long unsigned int)group, sg.info.bb_free, free,
146 +                       sg.info.bb_fragments, sg.info.bb_first_free,
147 +                       sg.info.bb_prealloc_nr);
148         for (i = 0; i <= 13; i++)
149                 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
150                                 sg.info.bb_counters[i] : 0);
151 @@ -3354,23 +3365,72 @@
152  }
153  
154  /*
155 + * check free blocks in bitmap match free block in group descriptor
156 + * do this before taking preallocated blocks into account to be able
157 + * to detect on-disk corruptions. The group lock should be hold by the
158 + * caller.
159 + */
160 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
161 +                               struct ext4_group_desc *gdp, int group)
162 +{
163 +       unsigned short max = EXT4_BLOCKS_PER_GROUP(sb);
164 +       unsigned short i, first, free = 0;
165 +       unsigned short free_in_gdp = ext4_free_blks_count(sb, gdp);
166 +
167 +       if (free_in_gdp == 0 && gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT))
168 +               return 0;
169 +
170 +       i = mb_find_next_zero_bit(bitmap, max, 0);
171 +
172 +       while (i < max) {
173 +               first = i;
174 +               i = mb_find_next_bit(bitmap, max, i);
175 +               if (i > max)
176 +                       i = max;
177 +               free += i - first;
178 +               if (i < max)
179 +                       i = mb_find_next_zero_bit(bitmap, max, i);
180 +       }
181 +
182 +       if (free != free_in_gdp) {
183 +               ext4_error(sb, "on-disk bitmap for group %d"
184 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
185 +                       group, free, free_in_gdp);
186 +               return -EIO;
187 +       }
188 +       return 0;
189 +}
190 +
191 +/*
192   * the function goes through all preallocation in this group and marks them
193   * used in in-core bitmap. buddy must be generated from this bitmap
194   * Need to be called with ext4 group lock held
195   */
196  static noinline_for_stack
197 -void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
198 +int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
199                                         ext4_group_t group)
200  {
201         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
202         struct ext4_prealloc_space *pa;
203 +       struct ext4_group_desc *gdp;
204         struct list_head *cur;
205         ext4_group_t groupnr;
206         ext4_grpblk_t start;
207         int preallocated = 0;
208         int count = 0;
209 +       int skip = 0;
210 +       int err;
211         int len;
212  
213 +       gdp = ext4_get_group_desc (sb, group, NULL);
214 +       if (gdp == NULL)
215 +               return -EIO;
216 +
217 +       /* before applying preallocations, check bitmap consistency */
218 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
219 +       if (err)
220 +               return err;
221 +
222         /* all form of preallocation discards first load group,
223          * so the only competing code is preallocation use.
224          * we don't need any locking here
225 @@ -3386,14 +3442,23 @@
226                                              &groupnr, &start);
227                 len = pa->pa_len;
228                 spin_unlock(&pa->pa_lock);
229 -               if (unlikely(len == 0))
230 +               if (unlikely(len == 0)) {
231 +                       skip++;
232                         continue;
233 +               }
234                 BUG_ON(groupnr != group);
235                 mb_set_bits(bitmap, start, len);
236                 preallocated += len;
237                 count++;
238         }
239 +       if (count + skip != grp->bb_prealloc_nr) {
240 +               ext4_error(sb, "lost preallocations: "
241 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
242 +                          count, grp->bb_prealloc_nr, skip);
243 +               return -EIO;
244 +       }
245         mb_debug(1, "prellocated %u for group %u\n", preallocated, group);
246 +       return 0;
247  }
248  
249  static void ext4_mb_pa_callback(struct rcu_head *head)
250 @@ -3452,6 +3517,7 @@
251          */
252         ext4_lock_group(sb, grp);
253         list_del(&pa->pa_group_list);
254 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
255         ext4_unlock_group(sb, grp);
256  
257         spin_lock(pa->pa_obj_lock);
258 @@ -3543,6 +3609,7 @@
259  
260         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
261         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
262 +       grp->bb_prealloc_nr++;
263         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
264  
265         spin_lock(pa->pa_obj_lock);
266 @@ -3604,6 +3671,7 @@
267  
268         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
269         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
270 +       grp->bb_prealloc_nr++;
271         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
272  
273         /*
274 @@ -3802,6 +3870,8 @@
275  
276                 spin_unlock(&pa->pa_lock);
277  
278 +               BUG_ON(grp->bb_prealloc_nr == 0);
279 +               grp->bb_prealloc_nr--;
280                 list_del(&pa->pa_group_list);
281                 list_add(&pa->u.pa_tmp_list, &list);
282         }
283 @@ -3942,7 +4012,7 @@
284                 if (err) {
285                         ext4_error(sb, "Error loading buddy information for %u",
286                                         group);
287 -                       continue;
288 +                       return;
289                 }
290  
291                 bitmap_bh = ext4_read_block_bitmap(sb, group);
292 @@ -3954,6 +4024,8 @@
293                 }
294  
295                 ext4_lock_group(sb, group);
296 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
297 +               e4b.bd_info->bb_prealloc_nr--;
298                 list_del(&pa->pa_group_list);
299                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
300                 ext4_unlock_group(sb, group);
301 @@ -4227,6 +4299,7 @@
302                 }
303                 ext4_lock_group(sb, group);
304                 list_del(&pa->pa_group_list);
305 +               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
306                 ext4_mb_release_group_pa(&e4b, pa, ac);
307                 ext4_unlock_group(sb, group);
308  
309 Index: linux-stage/fs/ext4/mballoc.h
310 ===================================================================
311 --- linux-stage.orig/fs/ext4/mballoc.h  2011-03-14 16:18:26.670209322 +0800
312 +++ linux-stage/fs/ext4/mballoc.h       2011-03-14 16:32:50.859552482 +0800
313 @@ -88,7 +88,7 @@
314  /*
315   * for which requests use 2^N search using buddies
316   */
317 -#define MB_DEFAULT_ORDER2_REQS         2
318 +#define MB_DEFAULT_ORDER2_REQS         8
319  
320  /*
321   * default group prealloc size 512 blocks