Whamcloud - gitweb
b=21671 Eliminate a warning for the ldiskfs module
[fs/lustre-release.git] / ldiskfs / kernel_patches / patches / ext4-mballoc-extra-checks-rhel5.patch
1 diff -rupN linux-2.6.18-128.1.6_1//fs/ext4/mballoc.c linux-2.6.18-128.1.6_2//fs/ext4/mballoc.c
2 --- linux-2.6.18-128.1.6_1//fs/ext4/mballoc.c
3 +++ linux-2.6.18-128.1.6_2//fs/ext4/mballoc.c
4 @@ -360,8 +360,8 @@ static void ext4_mb_mark_free_simple(str
5  static struct kmem_cache *ext4_pspace_cachep;
6  static struct kmem_cache *ext4_ac_cachep;
7  static struct kmem_cache *ext4_free_ext_cachep;
8 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
9 -                                       ext4_group_t group);
10 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
11 +                                       ext4_group_t group);
12  static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
13                                                  ext4_group_t group);
14  
15 @@ -660,7 +660,7 @@ static void ext4_mb_mark_free_simple(str
16         }
17  }
18  
19 -static void ext4_mb_generate_buddy(struct super_block *sb,
20 +static int ext4_mb_generate_buddy(struct super_block *sb,
21                                 void *buddy, void *bitmap, ext4_group_t group)
22  {
23         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
24 @@ -692,14 +692,14 @@ static void ext4_mb_generate_buddy(struc
25         grp->bb_fragments = fragments;
26  
27         if (free != grp->bb_free) {
28 -               ext4_grp_locked_error(sb, group,  __func__,
29 -                       "EXT4-fs: group %u: %u blocks in bitmap, %u in gd",
30 -                       group, free, grp->bb_free);
31 -               /*
32 -                * If we intent to continue, we consider group descritor
33 -                * corrupt and update bb_free using bitmap value
34 -                */
35 -               grp->bb_free = free;
36 +               struct ext4_group_desc *gdp;
37 +               gdp = ext4_get_group_desc (sb, group, NULL);
38 +               ext4_error(sb, __FUNCTION__,
39 +                       "group %lu: %u blocks in bitmap, %u in bb, "
40 +                       "%u in gd, %lu pa's\n", (long unsigned int)group,
41 +                       free, grp->bb_free, ext4_free_blks_count(sb, gdp),
42 +                       grp->bb_prealloc_nr);
43 +               return -EIO;
44         }
45  
46         clear_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
47 @@ -709,6 +709,8 @@ static void ext4_mb_generate_buddy(struc
48         EXT4_SB(sb)->s_mb_buddies_generated++;
49         EXT4_SB(sb)->s_mb_generation_time += period;
50         spin_unlock(&EXT4_SB(sb)->s_bal_lock);
51 +
52 +       return 0;
53  }
54  
55  /* The buddy information is attached the buddy cache inode
56 @@ -814,7 +816,7 @@ static int ext4_mb_init_cache(struct pag
57         first_block = page->index * blocks_per_page;
58         /* init the page  */
59         memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
60 -       for (i = 0; i < blocks_per_page; i++) {
61 +       for (i = 0; i < blocks_per_page && err == 0; i++) {
62                 int group;
63                 struct ext4_group_info *grinfo;
64  
65 @@ -848,7 +850,7 @@ static int ext4_mb_init_cache(struct pag
66                          * incore got set to the group block bitmap below
67                          */
68                         ext4_lock_group(sb, group);
69 -                       ext4_mb_generate_buddy(sb, data, incore, group);
70 +                       err = ext4_mb_generate_buddy(sb, data, incore, group);
71                         ext4_unlock_group(sb, group);
72                         incore = NULL;
73                 } else {
74 @@ -861,7 +863,7 @@ static int ext4_mb_init_cache(struct pag
75                         memcpy(data, bitmap, blocksize);
76  
77                         /* mark all preallocated blks used in in-core bitmap */
78 -                       ext4_mb_generate_from_pa(sb, data, group);
79 +                       err = ext4_mb_generate_from_pa(sb, data, group);
80                         ext4_mb_generate_from_freelist(sb, data, group);
81                         ext4_unlock_group(sb, group);
82  
83 @@ -870,6 +872,7 @@ static int ext4_mb_init_cache(struct pag
84                         incore = data;
85                 }
86         }
87 +       if (likely(err == 0))
88         SetPageUptodate(page);
89  
90  out:
91 @@ -1964,7 +1967,10 @@ static int ext4_mb_seq_history_show(stru
92                         hs->result.fe_start, hs->result.fe_len);
93                 seq_printf(seq, "%-5u %-8u %-23s free\n",
94                                 hs->pid, hs->ino, buf2);
95 +       } else {
96 +               seq_printf(seq, "unknown op %d\n", hs->op);
97         }
98 +
99         return 0;
100  }
101  
102 @@ -2092,9 +2098,11 @@ static void *ext4_mb_seq_groups_next(str
103  static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
104  {
105         struct super_block *sb = seq->private;
106 +       struct ext4_group_desc *gdp;
107         ext4_group_t group = (ext4_group_t) ((unsigned long) v);
108         int i;
109         int err;
110 +       int free = 0;
111         struct ext4_buddy e4b;
112         struct sg {
113                 struct ext4_group_info info;
114 @@ -2103,10 +2111,10 @@ static int ext4_mb_seq_groups_show(struc
115  
116         group--;
117         if (group == 0)
118 -               seq_printf(seq, "#%-5s: %-5s %-5s %-5s "
119 +               seq_printf(seq, "#%-5s: %-5s %-5s %-5s %-5s %-5s"
120                                 "[ %-5s %-5s %-5s %-5s %-5s %-5s %-5s "
121                                   "%-5s %-5s %-5s %-5s %-5s %-5s %-5s ]\n",
122 -                          "group", "free", "frags", "first",
123 +                          "group", "free", "frags", "first", "first", "pa",
124                            "2^0", "2^1", "2^2", "2^3", "2^4", "2^5", "2^6",
125                            "2^7", "2^8", "2^9", "2^10", "2^11", "2^12", "2^13");
126  
127 @@ -2117,13 +2125,20 @@ static int ext4_mb_seq_groups_show(struc
128                 seq_printf(seq, "#%-5lu: I/O error\n", group);
129                 return 0;
130         }
131 +
132 +       gdp = ext4_get_group_desc(sb, group, NULL);
133 +       if (gdp != NULL)
134 +               free = ext4_free_blks_count(sb, gdp);
135 +
136         ext4_lock_group(sb, group);
137         memcpy(&sg, ext4_get_group_info(sb, group), i);
138         ext4_unlock_group(sb, group);
139         ext4_mb_release_desc(&e4b);
140  
141 -       seq_printf(seq, "#%-5u: %-5u %-5u %-5u [", group, sg.info.bb_free,
142 -                       sg.info.bb_fragments, sg.info.bb_first_free);
143 +       seq_printf(seq, "#%-5lu: %-5u %-5u %-5u %-5u %-5lu [",
144 +                       (long unsigned int)group, sg.info.bb_free, free,
145 +                       sg.info.bb_fragments, sg.info.bb_first_free,
146 +                       sg.info.bb_prealloc_nr);
147         for (i = 0; i <= 13; i++)
148                 seq_printf(seq, " %-5u", i <= sb->s_blocksize_bits + 1 ?
149                                 sg.info.bb_counters[i] : 0);
150 @@ -2226,6 +2241,7 @@ ext4_mb_store_history(struct ext4_alloca
151         h.tail = ac->ac_tail;
152         h.buddy = ac->ac_buddy;
153         h.merged = 0;
154 +       h.cr = ac->ac_criteria;
155         if (ac->ac_op == EXT4_MB_HISTORY_ALLOC) {
156                 if (ac->ac_g_ex.fe_start == ac->ac_b_ex.fe_start &&
157                                 ac->ac_g_ex.fe_group == ac->ac_b_ex.fe_group)
158 @@ -3539,22 +3555,67 @@ ext4_mb_use_preallocated(struct ext4_all
159  }
160  
161  /*
162 + * check free blocks in bitmap match free block in group descriptor
163 + * do this before taking preallocated blocks into account to be able
164 + * to detect on-disk corruptions. The group lock should be hold by the
165 + * caller.
166 + */
167 +int ext4_mb_check_ondisk_bitmap(struct super_block *sb, void *bitmap,
168 +                               struct ext4_group_desc *gdp, int group)
169 +{
170 +       unsigned short max = EXT4_BLOCKS_PER_GROUP(sb);
171 +       unsigned short i, first, free = 0;
172 +
173 +       i = mb_find_next_zero_bit(bitmap, max, 0);
174 +
175 +       while (i < max) {
176 +               first = i;
177 +               i = mb_find_next_bit(bitmap, max, i);
178 +               if (i > max)
179 +                       i = max;
180 +               free += i - first;
181 +               if (i < max)
182 +                       i = mb_find_next_zero_bit(bitmap, max, i);
183 +       }
184 +
185 +       if (free != ext4_free_blks_count(sb, gdp)) {
186 +               ext4_error(sb, __FUNCTION__, "on-disk bitmap for group %d"
187 +                       "corrupted: %u blocks free in bitmap, %u - in gd\n",
188 +                       group, free, ext4_free_blks_count(sb, gdp));
189 +               return -EIO;
190 +       }
191 +       return 0;
192 +}
193 +
194 +/*
195   * the function goes through all preallocation in this group and marks them
196   * used in in-core bitmap. buddy must be generated from this bitmap
197   * Need to be called with ext4 group lock (ext4_lock_group)
198   */
199 -static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
200 +static int ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
201                                         ext4_group_t group)
202  {
203         struct ext4_group_info *grp = ext4_get_group_info(sb, group);
204         struct ext4_prealloc_space *pa;
205 +       struct ext4_group_desc *gdp;
206         struct list_head *cur;
207         ext4_group_t groupnr;
208         ext4_grpblk_t start;
209         int preallocated = 0;
210         int count = 0;
211 +       int skip = 0;
212 +       int err;
213         int len;
214  
215 +       gdp = ext4_get_group_desc (sb, group, NULL);
216 +       if (gdp == NULL)
217 +               return -EIO;
218 +
219 +       /* before applying preallocations, check bitmap consistency */
220 +       err = ext4_mb_check_ondisk_bitmap(sb, bitmap, gdp, group);
221 +       if (err)
222 +               return err;
223 +
224         /* all form of preallocation discards first load group,
225          * so the only competing code is preallocation use.
226          * we don't need any locking here
227 @@ -3570,15 +3631,24 @@ static void ext4_mb_generate_from_pa(str
228                                              &groupnr, &start);
229                 len = pa->pa_len;
230                 spin_unlock(&pa->pa_lock);
231 -               if (unlikely(len == 0))
232 +               if (unlikely(len == 0)) {
233 +                       skip++;
234                         continue;
235 +               }
236                 BUG_ON(groupnr != group);
237                 mb_set_bits(sb_bgl_lock(EXT4_SB(sb), group),
238                                                 bitmap, start, len);
239                 preallocated += len;
240                 count++;
241         }
242 +       if (count + skip != grp->bb_prealloc_nr) {
243 +               ext4_error(sb, __FUNCTION__, "lost preallocations: "
244 +                          "count %d, bb_prealloc_nr %lu, skip %d\n",
245 +                          count, grp->bb_prealloc_nr, skip);
246 +               return -EIO;
247 +       }
248         mb_debug("prellocated %u for group %u\n", preallocated, group);
249 +       return 0;
250  }
251  
252  static void ext4_mb_pa_callback(struct rcu_head *head)
253 @@ -3629,6 +3699,7 @@ static void ext4_mb_put_pa(struct ext4_a
254          */
255         ext4_lock_group(sb, grp);
256         list_del(&pa->pa_group_list);
257 +       ext4_get_group_info(sb, grp)->bb_prealloc_nr--;
258         ext4_unlock_group(sb, grp);
259  
260         spin_lock(pa->pa_obj_lock);
261 @@ -3717,6 +3788,7 @@ ext4_mb_new_inode_pa(struct ext4_allocat
262  
263         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
264         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
265 +       grp->bb_prealloc_nr++;
266         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
267  
268         spin_lock(pa->pa_obj_lock);
269 @@ -3776,6 +3848,7 @@ ext4_mb_new_group_pa(struct ext4_allocat
270  
271         ext4_lock_group(sb, ac->ac_b_ex.fe_group);
272         list_add(&pa->pa_group_list, &grp->bb_prealloc_list);
273 +       grp->bb_prealloc_nr++;
274         ext4_unlock_group(sb, ac->ac_b_ex.fe_group);
275  
276         /*
277 @@ -3828,6 +3901,7 @@ ext4_mb_release_inode_pa(struct ext4_bud
278                 ac->ac_sb = sb;
279                 ac->ac_inode = pa->pa_inode;
280                 ac->ac_op = EXT4_MB_HISTORY_DISCARD;
281 +               ac->ac_o_ex.fe_len = 1;
282         }
283  
284         while (bit < end) {
285 @@ -3972,6 +4046,8 @@ repeat:
286  
287                 spin_unlock(&pa->pa_lock);
288  
289 +               BUG_ON(grp->bb_prealloc_nr == 0);
290 +               grp->bb_prealloc_nr--;
291                 list_del(&pa->pa_group_list);
292                 list_add(&pa->u.pa_tmp_list, &list);
293         }
294 @@ -4107,7 +4183,7 @@ repeat:
295                 if (err) {
296                         ext4_error(sb, __func__, "Error in loading buddy "
297                                         "information for %u", group);
298 -                       continue;
299 +                       return;
300                 }
301  
302                 bitmap_bh = ext4_read_block_bitmap(sb, group);
303 @@ -4119,6 +4195,8 @@ repeat:
304                 }
305  
306                 ext4_lock_group(sb, group);
307 +               BUG_ON(e4b.bd_info->bb_prealloc_nr == 0);
308 +               e4b.bd_info->bb_prealloc_nr--;
309                 list_del(&pa->pa_group_list);
310                 ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
311                 ext4_unlock_group(sb, group);
312 @@ -4394,6 +4472,7 @@ ext4_mb_discard_lg_preallocations(struct
313                 }
314                 ext4_lock_group(sb, group);
315                 list_del(&pa->pa_group_list);
316 +               ext4_get_group_info(sb, group)->bb_prealloc_nr--;
317                 ext4_mb_release_group_pa(&e4b, pa, ac);
318                 ext4_unlock_group(sb, group);
319  
320 diff -rupN linux-2.6.18-128.1.6/fs/ext4/ext4.h
321 --- linux-2.6.18-128.1.6.orig/fs/ext4/ext4.h
322 +++ linux-2.6.18-128.1.6/fs/ext4/ext4.h
323 @@ -119,6 +119,7 @@ struct ext4_group_info {
324         unsigned short  bb_free;
325         unsigned short  bb_fragments;
326         struct          list_head bb_prealloc_list;
327 +       unsigned long   bb_prealloc_nr;
328  #ifdef DOUBLE_CHECK
329         void            *bb_bitmap;
330  #endif
331 Index: linux-2.6.18-128.1.6/fs/ext4/mballoc.h
332 ===================================================================
333 --- linux-2.6.18-128.1.6.orig/fs/ext4/mballoc.h
334 +++ linux-2.6.18-128.1.6/fs/ext4/mballoc.h
335 @@ -228,7 +229,7 @@ struct ext4_mb_history {
336         __u16 tail;     /* what tail broke some buddy */
337         __u16 buddy;    /* buddy the tail ^^^ broke */
338         __u16 flags;
339 -       __u8 cr:3;      /* which phase the result extent was found at */
340 +       __u8 cr:8;      /* which phase the result extent was found at */
341         __u8 op:4;
342         __u8 merged:1;
343  };