Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-uninit-2.6.9.patch
1 Keep a high water mark of used inodes for each group to improve e2fsck time.
2 Block and inode bitmaps can be uninitialized on disk via a flag in the
3 group descriptor to avoid reading or scanning them at e2fsck time.
4 A checksum of each group descriptor is used to ensure that corruption in
5 the group descriptor's bit flags does not cause incorrect operation.
6
7 Index: linux-stage/fs/ext3/balloc.c
8 ===================================================================
9 --- linux-stage.orig/fs/ext3/balloc.c   2007-03-14 04:44:12.000000000 -0400
10 +++ linux-stage/fs/ext3/balloc.c        2007-03-14 16:23:48.000000000 -0400
11 @@ -73,6 +73,95 @@ struct ext3_group_desc * ext3_get_group_
12         return gdp + desc;
13  }
14  
15 +unsigned long ext3_free_blocks_after_init(struct super_block *sb,
16 +                                         int block_group,
17 +                                         struct ext3_group_desc *gdp)
18 +{
19 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
20 +       unsigned long blks;
21 +       int has_sb;
22 +
23 +       /* Last and first groups are always initialized */
24 +       blks = le32_to_cpu(EXT3_BLOCKS_PER_GROUP(sb));
25 +       /* Account for for sb, gdt */
26 +       has_sb = ext3_bg_has_super(sb, block_group);
27 +       if (has_sb)
28 +               blks--;
29 +
30 +       if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
31 +           block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) {
32 +               if (has_sb) {
33 +                       blks -= le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
34 +                       blks -= ext3_bg_num_gdb(sb, block_group);
35 +               }
36 +       } else { /* For META_BG BLOCK GROUPS */
37 +               int group_rel = (block_group -
38 +                                le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
39 +                               EXT3_DESC_PER_BLOCK(sb);
40 +               if (group_rel == 0 || group_rel == 1 ||
41 +                   (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1))
42 +                       blks--;
43 +       }
44 +
45 +       /* Account for bitmaps and inode table */
46 +       blks -= sbi->s_itb_per_group + 2;
47 +       return blks;
48 +}
49 +
50 +/* Initializes an uninitialized block bitmap */
51 +void ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
52 +                           int block_group, struct ext3_group_desc *gdp)
53 +{
54 +       unsigned long startblk;
55 +       int bit, bit_max;
56 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
57 +
58 +       J_ASSERT_BH(bh, buffer_locked(bh));
59 +
60 +       /* If the checksum is bad, then just mark all bits in use for safety */
61 +       if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) {
62 +               ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
63 +                          block_group);
64 +               gdp->bg_free_blocks_count = 0;
65 +               gdp->bg_free_inodes_count = 0;
66 +               memset(bh->b_data, 0xff, sb->s_blocksize);
67 +               return;
68 +       }
69 +       memset(bh->b_data, 0, bh->b_size);
70 +
71 +       /* Set bits for sb, gdt */
72 +       startblk = block_group * EXT3_BLOCKS_PER_GROUP(sb) +
73 +               le32_to_cpu(sbi->s_es->s_first_data_block);
74 +
75 +       bit = 0;
76 +       bit_max = ext3_bg_has_super(sb, block_group);
77 +
78 +       if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
79 +           block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg)) {
80 +               if (bit_max) {
81 +                       bit_max += ext3_bg_num_gdb(sb, block_group);
82 +                       bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
83 +               }
84 +       } else { /* For META_BG_BLOCK_GROUPS */
85 +               int group_rel = (block_group -
86 +                                le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
87 +                               EXT3_DESC_PER_BLOCK(sb);
88 +               if (group_rel == 0 || group_rel == 1 ||
89 +                   (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1))
90 +                       bit_max += 1;
91 +       }
92 +       for (; bit < bit_max; bit++)
93 +               ext3_set_bit(bit, bh->b_data);
94 +
95 +       /* Set bits for bitmaps and inode table */
96 +       ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - startblk, bh->b_data);
97 +       ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - startblk, bh->b_data);
98 +       bit_max = bit + sbi->s_itb_per_group;
99 +       for (bit = le32_to_cpu(gdp->bg_inode_table) - startblk;
100 +            bit < bit_max; bit++)
101 +               ext3_set_bit(bit, bh->b_data);
102 +}
103 +
104  /*
105   * Read the bitmap for a given block_group, reading into the specified 
106   * slot in the superblock's bitmap cache.
107 @@ -88,7 +170,19 @@ read_block_bitmap(struct super_block *sb
108         desc = ext3_get_group_desc (sb, block_group, NULL);
109         if (!desc)
110                 goto error_out;
111 -       bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
112 +       if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
113 +               bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
114 +               if (!buffer_uptodate(bh)) {
115 +                       lock_buffer(bh);
116 +                       if (!buffer_uptodate(bh)) {
117 +                               ext3_init_block_bitmap(sb, bh,block_group,desc);
118 +                               set_buffer_uptodate(bh);
119 +                       }
120 +                       unlock_buffer(bh);
121 +               }
122 +       } else {
123 +               bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
124 +       }
125         if (!bh)
126                 ext3_error (sb, "read_block_bitmap",
127                             "Cannot read block bitmap - "
128 @@ -427,6 +520,7 @@ do_more:
129         gdp->bg_free_blocks_count =
130                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
131                         *pdquot_freed_blocks);
132 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp);
133         spin_unlock(sb_bgl_lock(sbi, block_group));
134         percpu_counter_mod(&sbi->s_freeblocks_counter, count);
135  
136 @@ -1175,7 +1270,7 @@ int ext3_new_block_old(handle_t *handle,
137  
138         goal_group = group_no;
139  retry:
140 -       free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
141 +       free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp);
142         if (free_blocks > 0) {
143                 ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
144                                 EXT3_BLOCKS_PER_GROUP(sb));
145 @@ -1206,7 +1304,7 @@ retry:
146                         *errp = -EIO;
147                         goto out;
148                 }
149 -               free_blocks = le16_to_cpu(gdp->bg_free_blocks_count);
150 +               free_blocks = EXT3_BG_BLOCKS_FREE(sb, group_no, gdp);
151                 if (free_blocks <= 0)
152                         continue;
153  
154 @@ -1306,10 +1407,17 @@ allocated:
155                         ret_block, goal_hits, goal_attempts);
156  
157         spin_lock(sb_bgl_lock(sbi, group_no));
158 +       free_blocks = 0;
159 +       if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
160 +               gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
161 +               free_blocks = ext3_free_blocks_after_init(sb, group_no, gdp);
162 +               gdp->bg_free_blocks_count = cpu_to_le16(free_blocks);
163 +       }
164         gdp->bg_free_blocks_count =
165                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
166 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp);
167         spin_unlock(sb_bgl_lock(sbi, group_no));
168 -       percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
169 +       percpu_counter_mod(&sbi->s_freeblocks_counter, free_blocks);
170  
171         BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
172         err = ext3_journal_dirty_metadata(handle, gdp_bh);
173 @@ -1379,7 +1479,7 @@ unsigned long ext3_count_free_blocks(str
174                 gdp = ext3_get_group_desc(sb, i, NULL);
175                 if (!gdp)
176                         continue;
177 -               desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
178 +               desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp);
179                 brelse(bitmap_bh);
180                 bitmap_bh = read_block_bitmap(sb, i);
181                 if (bitmap_bh == NULL)
182 @@ -1387,7 +1487,7 @@ unsigned long ext3_count_free_blocks(str
183  
184                 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
185                 printk("group %d: stored = %d, counted = %lu\n",
186 -                       i, le16_to_cpu(gdp->bg_free_blocks_count), x);
187 +                       i, EXT3_BG_BLOCKS_FREE(sb, i, gdp), x);
188                 bitmap_count += x;
189         }
190         brelse(bitmap_bh);
191 @@ -1403,7 +1503,7 @@ unsigned long ext3_count_free_blocks(str
192                 gdp = ext3_get_group_desc(sb, i, NULL);
193                 if (!gdp)
194                         continue;
195 -               desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
196 +               desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp);
197         }
198  
199         return desc_count;
200 @@ -1590,7 +1587,7 @@ void ext3_check_blocks_bitmap (struct su
201                 gdp = ext3_get_group_desc (sb, i, NULL);
202                 if (!gdp)
203                         continue;
204 -               desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
205 +               desc_count += EXT3_BG_BLOCKS_FREE(sb, i, gdp);
206                 brelse(bitmap_bh);
207                 bitmap_bh = read_block_bitmap(sb, i);
208                 if (bitmap_bh == NULL)
209 @@ -1628,11 +1625,11 @@ void ext3_check_blocks_bitmap (struct su
210                                             "group %d is marked free", j, i);
211  
212                 x = ext3_count_free(bitmap_bh, sb->s_blocksize);
213 -               if (le16_to_cpu(gdp->bg_free_blocks_count) != x)
214 +               if (EXT3_BG_BLOCKS_FREE(sb, i, gdp) != x)
215                         ext3_error (sb, "ext3_check_blocks_bitmap",
216                                     "Wrong free blocks count for group %d, "
217                                     "stored = %d, counted = %lu", i,
218 -                                   le16_to_cpu(gdp->bg_free_blocks_count), x);
219 +                                   EXT3_BG_BLOCKS_FREE(sb, i, gdp), x);
220                 bitmap_count += x;
221         }
222         brelse(bitmap_bh);
223 Index: linux-stage/fs/ext3/ialloc.c
224 --- linux-stage.orig/fs/ext3/ialloc.c   2007-03-14 04:44:13.000000000 -0400
225 +++ linux-stage/fs/ext3/ialloc.c        2007-03-14 16:13:29.000000000 -0400
226 @@ -43,6 +43,47 @@
227   * the free blocks count in the block.
228   */
229  
230 +/*
231 + * To avoid calling the atomic setbit hundreds or thousands of times, we only
232 + * need to use it within a single byte (to ensure we get endianness right).
233 + * We can use memset for the rest of the bitmap as there are no other users.
234 + */
235 +static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
236 +{
237 +       int i;
238 +
239 +       if (start_bit >= end_bit)
240 +               return;
241 +
242 +       ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
243 +       for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
244 +               ext3_set_bit(i, bitmap);
245 +       if (i < end_bit)
246 +               memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
247 +}
248 +
249 +/* Initializes an uninitialized inode bitmap */
250 +void ext3_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
251 +                           int block_group, struct ext3_group_desc *gdp)
252 +{
253 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
254 +
255 +       J_ASSERT_BH(bh, buffer_locked(bh));
256 +
257 +       /* If the checksum is bad, then just mark all bits in use for safety */
258 +       if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) {
259 +               ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
260 +                          block_group);
261 +               memset(bh->b_data, 0xff, sb->s_blocksize);
262 +               gdp->bg_free_blocks_count = 0;
263 +               gdp->bg_free_inodes_count = 0;
264 +               return;
265 +       }
266 +
267 +       memset(bh->b_data, 0, EXT3_INODES_PER_GROUP(sb) + 7 / 8);
268 +       mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
269 +                       bh->b_data);
270 +}
271  
272  /*
273   * Read the inode allocation bitmap for a given block_group, reading
274 @@ -59,8 +103,15 @@ read_inode_bitmap(struct super_block * s
275         desc = ext3_get_group_desc(sb, block_group, NULL);
276         if (!desc)
277                 goto error_out;
278 -
279 -       bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
280 +       if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
281 +               bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap));
282 +               if (!buffer_uptodate(bh)) {
283 +                       ext3_init_inode_bitmap(sb, bh, block_group, desc);
284 +                       set_buffer_uptodate(bh);
285 +               }
286 +       } else {
287 +               bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
288 +       }
289         if (!bh)
290                 ext3_error(sb, "read_inode_bitmap",
291                             "Cannot read inode bitmap - "
292 @@ -169,6 +175,8 @@ void ext3_free_inode (handle_t *handle, 
293                         if (is_directory)
294                                 gdp->bg_used_dirs_count = cpu_to_le16(
295                                   le16_to_cpu(gdp->bg_used_dirs_count) - 1);
296 +                       gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group,
297 +                                                               gdp);
298                         spin_unlock(sb_bgl_lock(sbi, block_group));
299                         percpu_counter_inc(&sbi->s_freeinodes_counter);
300                         if (is_directory)
301 @@ -202,8 +210,8 @@ error_return:
302  static int find_group_dir(struct super_block *sb, struct inode *parent)
303  {
304         int ngroups = EXT3_SB(sb)->s_groups_count;
305 -       int freei, avefreei;
306 -       struct ext3_group_desc *desc, *best_desc = NULL;
307 +       int freei, avefreei, freeb, best_freeb = 0;
308 +       struct ext3_group_desc *desc;
309         struct buffer_head *bh;
310         int group, best_group = -1;
311  
312 @@ -212,15 +220,14 @@ static int find_group_dir(struct super_b
313  
314         for (group = 0; group < ngroups; group++) {
315                 desc = ext3_get_group_desc (sb, group, &bh);
316 -               if (!desc || !desc->bg_free_inodes_count)
317 +               if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0)
318                         continue;
319 -               if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
320 +               if (freei < avefreei)
321                         continue;
322 -               if (!best_desc || 
323 -                   (le16_to_cpu(desc->bg_free_blocks_count) >
324 -                    le16_to_cpu(best_desc->bg_free_blocks_count))) {
325 +               freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc);
326 +               if (freeb > best_freeb) {
327                         best_group = group;
328 -                       best_desc = desc;
329 +                       best_freeb = freeb;
330                 }
331         }
332         return best_group;
333 @@ -284,14 +294,15 @@ static int find_group_orlov(struct super
334                 parent_group = (unsigned)group % ngroups;
335                 for (i = 0; i < ngroups; i++) {
336                         group = (parent_group + i) % ngroups;
337 -                       desc = ext3_get_group_desc (sb, group, &bh);
338 -                       if (!desc || !desc->bg_free_inodes_count)
339 +                       desc = ext3_get_group_desc(sb, group, &bh);
340 +                       if (!desc ||
341 +                           (freei = EXT3_BG_INODES_FREE(sb, groups, desc)) ==0)
342                                 continue;
343                         if (le16_to_cpu(desc->bg_used_dirs_count) >= best_ndir)
344                                 continue;
345 -                       if (le16_to_cpu(desc->bg_free_inodes_count) < avefreei)
346 +                       if (freei < avefreei)
347                                 continue;
348 -                       if (le16_to_cpu(desc->bg_free_blocks_count) < avefreeb)
349 +                       if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < avefreeb)
350                                 continue;
351                         best_group = group;
352                         best_ndir = le16_to_cpu(desc->bg_used_dirs_count);
353 @@ -318,13 +330,13 @@ static int find_group_orlov(struct super
354         for (i = 0; i < ngroups; i++) {
355                 group = (parent_group + i) % ngroups;
356                 desc = ext3_get_group_desc (sb, group, &bh);
357 -               if (!desc || !desc->bg_free_inodes_count)
358 +               if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0)
359                         continue;
360                 if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
361                         continue;
362 -               if (le16_to_cpu(desc->bg_free_inodes_count) < min_inodes)
363 +               if (freei < min_inodes)
364                         continue;
365 -               if (le16_to_cpu(desc->bg_free_blocks_count) < min_blocks)
366 +               if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) < min_blocks)
367                         continue;
368                 return group;
369         }
370 @@ -333,9 +347,9 @@ fallback:
371         for (i = 0; i < ngroups; i++) {
372                 group = (parent_group + i) % ngroups;
373                 desc = ext3_get_group_desc (sb, group, &bh);
374 -               if (!desc || !desc->bg_free_inodes_count)
375 +               if (!desc || (freei = EXT3_BG_INODES_FREE(sb, group, desc)) ==0)
376                         continue;
377 -               if (le16_to_cpu(desc->bg_free_inodes_count) >= avefreei)
378 +               if (freei >= avefreei)
379                         return group;
380         }
381  
382 @@ -362,6 +377,7 @@ static int find_group_other(struct super
383         int group, i;
384         int best_group = -1;
385         int avefreeb, freeb, best_group_freeb = 0;
386 +       int freei;
387  
388         /*
389          * Try to place the inode in its parent directory
390 @@ -380,8 +396,8 @@ static int find_group_other(struct super
391          */
392         group = parent_group;
393         desc = ext3_get_group_desc (sb, group, &bh);
394 -       if (desc && le16_to_cpu(desc->bg_free_inodes_count) &&
395 -           (!S_ISREG(mode) || le16_to_cpu(desc->bg_free_blocks_count)))
396 +       if (desc && EXT3_BG_INODES_FREE(sb, group, desc) &&
397 +           (!S_ISREG(mode) || EXT3_BG_BLOCKS_PNLT(sb, group, desc)))
398                 return group;
399         avefreeb = le32_to_cpu(sbi->s_es->s_free_blocks_count) / ngroups;
400         /*
401 @@ -392,11 +408,11 @@ static int find_group_other(struct super
402                 if (group >= ngroups)
403                         group -= ngroups;
404                 desc = ext3_get_group_desc (sb, group, &bh);
405 -               if (!desc || !desc->bg_free_inodes_count)
406 +               if (!desc || EXT3_BG_INODES_FREE(sb, group, desc) == 0)
407                         continue;
408                 if (!S_ISREG(mode))
409                         return group;
410 -               if (le16_to_cpu(desc->bg_free_blocks_count) >= avefreeb)
411 +               if (EXT3_BG_BLOCKS_PNLT(sb, group, desc) >= avefreeb)
412                         return group;
413         }
414  
415 @@ -413,9 +431,10 @@ static int find_group_other(struct super
416                 if (++group >= ngroups)
417                         group = 0;
418                 desc = ext3_get_group_desc (sb, group, &bh);
419 -               if (!desc || !desc->bg_free_inodes_count)
420 +               freei = EXT3_BG_INODES_FREE(sb, group, desc);
421 +               if (!desc || !freei)
422                         continue;
423 -               freeb = le16_to_cpu(desc->bg_free_blocks_count);
424 +               freeb = EXT3_BG_BLOCKS_PNLT(sb, group, desc);
425                 if (freeb > best_group_freeb) {
426                         best_group_freeb = freeb;
427                         best_group = group;
428 @@ -453,6 +472,7 @@ struct inode *ext3_new_inode(handle_t *h
429         int err = 0;
430         struct inode *ret;
431         int i;
432 +       int unused_flag = 0;
433  
434         /* Cannot create files in a deleted directory */
435         if (!dir || !dir->i_nlink)
436 @@ -581,18 +601,34 @@ got:
437         err = ext3_journal_get_write_access(handle, bh2);
438         if (err) goto fail;
439         spin_lock(sb_bgl_lock(sbi, group));
440 -       gdp->bg_free_inodes_count =
441 -               cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
442 +       if (gdp->bg_free_inodes_count == 0) {
443 +               if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
444 +                       gdp->bg_itable_unused =
445 +                              cpu_to_le16(le32_to_cpu(es->s_inodes_per_group));
446 +                       gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT);
447 +               }
448 +               /* If we didn't allocate from free initialized inodes,
449 +                * then we allocated from uninitialized inodes. In this
450 +                * case initialize one inode. */
451 +               gdp->bg_itable_unused =
452 +                       cpu_to_le16(le16_to_cpu(gdp->bg_itable_unused) - 1);
453 +               unused_flag = 1;
454 +       } else {
455 +               gdp->bg_free_inodes_count =
456 +                       cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) -1);
457 +       }
458         if (S_ISDIR(mode)) {
459                 gdp->bg_used_dirs_count =
460                         cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
461         }
462 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp);
463         spin_unlock(sb_bgl_lock(sbi, group));
464         BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
465         err = ext3_journal_dirty_metadata(handle, bh2);
466         if (err) goto fail;
467  
468 -       percpu_counter_dec(&sbi->s_freeinodes_counter);
469 +       if (!unused_flag)
470 +               percpu_counter_dec(&sbi->s_freeinodes_counter);
471         if (S_ISDIR(mode))
472                 percpu_counter_inc(&sbi->s_dirs_counter);
473         sb->s_dirt = 1;
474 Index: linux-stage/fs/ext3/mballoc.c
475 ===================================================================
476 --- linux-stage.orig/fs/ext3/mballoc.c  2007-03-14 04:44:12.000000000 -0400
477 +++ linux-stage/fs/ext3/mballoc.c       2007-03-14 16:13:29.000000000 -0400
478 @@ -107,6 +107,7 @@ struct ext3_group_info {
479         unsigned long   bb_state;
480         unsigned long   bb_tid;
481         struct ext3_free_metadata *bb_md_cur;
482 +       struct ext3_group_desc *bb_gdp;
483         unsigned short  bb_first_free;
484         unsigned short  bb_free;
485         unsigned short  bb_fragments;
486 @@ -511,10 +512,7 @@ static int ext3_mb_init_cache(struct pag
487                 if (first_group + i >= EXT3_SB(sb)->s_groups_count)
488                         break;
489  
490 -               err = -EIO;
491 -               desc = ext3_get_group_desc(sb, first_group + i, NULL);
492 -               if (desc == NULL)
493 -                       goto out;
494 +               desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp;
495  
496                 err = -ENOMEM;
497                 bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
498 @@ -529,7 +530,12 @@ static int ext3_mb_init_cache(struct pag
499                         unlock_buffer(bh[i]);
500                         continue;
501                 }
502 -
503 +               if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
504 +                       ext3_init_block_bitmap(sb, bh[i], first_group + i,desc);
505 +                       set_buffer_uptodate(bh[i]);
506 +                       unlock_buffer(bh[i]);
507 +                       continue;
508 +               }
509                 get_bh(bh[i]);
510                 bh[i]->b_end_io = end_buffer_read_sync;
511                 submit_bh(READ, bh[i]);
512 @@ -1246,6 +1244,10 @@ static int ext3_mb_good_group(struct ext
513         switch (cr) {
514                 case 0:
515                         J_ASSERT(ac->ac_2order != 0);
516 +                       /* If this group is uninitialized, skip it initially */
517 +                       if (grp->bb_gdp->bg_flags &
518 +                           cpu_to_le16(EXT3_BG_BLOCK_UNINIT))
519 +                               return 0;
520                         bits = ac->ac_sb->s_blocksize_bits + 1;
521                         for (i = ac->ac_2order; i <= bits; i++)
522                                 if (grp->bb_counters[i] > 0)
523 @@ -1421,7 +1421,8 @@ repeat:
524                         }
525  
526                         ac.ac_groups_scanned++;
527 -                       if (cr == 0)
528 +                       if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags &
529 +                                       cpu_to_le16(EXT3_BG_BLOCK_UNINIT)))
530                                 ext3_mb_simple_scan_group(&ac, &e3b);
531                         else if (cr == 1 && *len == sbi->s_stripe)
532                                 ext3_mb_scan_aligned(&ac, &e3b);
533 @@ -1500,7 +1499,8 @@ found:
534          * bitmap to be journaled */
535  
536         ext3_debug("using block group %d(%d)\n",
537 -                       ac.ac_b_group.group, gdp->bg_free_blocks_count);
538 +                  ac.ac_b_group.group,
539 +                  EXT3_BG_BLOCKS_FREE(sb, ac.ac_b_group.group, gdp));
540  
541         bitmap_bh = read_block_bitmap(sb, ac.ac_b_ex.fe_group);
542         if (!bitmap_bh) {
543 @@ -1537,9 +1543,17 @@ found:
544         mb_set_bits(bitmap_bh->b_data, ac.ac_b_ex.fe_start, ac.ac_b_ex.fe_len);
545  
546         spin_lock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group));
547 +       if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
548 +               gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
549 +               gdp->bg_free_blocks_count =
550 +                       cpu_to_le16(ext3_free_blocks_after_init(sb,
551 +                                                           ac.ac_b_ex.fe_group,
552 +                                                           gdp));
553 +       }
554         gdp->bg_free_blocks_count =
555                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
556                                         - ac.ac_b_ex.fe_len);
557 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, ac.ac_b_ex.fe_group, gdp);
558         spin_unlock(sb_bgl_lock(sbi, ac.ac_b_ex.fe_group));
559         percpu_counter_mod(&sbi->s_freeblocks_counter, - ac.ac_b_ex.fe_len);
560  
561 @@ -1992,16 +1991,16 @@ int ext3_mb_init_backend(struct super_bl
562                         i--;
563                         goto err_freebuddy;
564                 }
565 +               memset(meta_group_info[j], 0, len);
566                 desc = ext3_get_group_desc(sb, i, NULL);
567 +               meta_group_info[j]->bb_gdp = desc;
568                 if (desc == NULL) {
569                         printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
570                         goto err_freebuddy;
571                 }
572 -               memset(meta_group_info[j], 0, len);
573                 set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT,
574                         &meta_group_info[j]->bb_state);
575 -               meta_group_info[j]->bb_free =
576 -                       le16_to_cpu(desc->bg_free_blocks_count);
577 +               meta_group_info[j]->bb_free = EXT3_BG_BLOCKS_FREE(sb, j, desc);
578         }
579  
580         return 0;
581 @@ -2397,6 +2410,7 @@ do_more:
582         spin_lock(sb_bgl_lock(sbi, block_group));
583         gdp->bg_free_blocks_count =
584                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
585 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp);
586         spin_unlock(sb_bgl_lock(sbi, block_group));
587         percpu_counter_mod(&sbi->s_freeblocks_counter, count);
588  
589 Index: linux-stage/fs/ext3/resize.c
590 ===================================================================
591 --- linux-stage.orig/fs/ext3/resize.c   2007-03-14 04:44:13.000000000 -0400
592 +++ linux-stage/fs/ext3/resize.c        2007-03-14 16:13:29.000000000 -0400
593 @@ -807,6 +807,9 @@ int ext3_group_add(struct super_block *s
594         gdp->bg_inode_table = cpu_to_le32(input->inode_table);
595         gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
596         gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
597 +       spin_lock(sb_bgl_lock(sbi, input->group)); /* not strictly needed */
598 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, input->group, gdp);
599 +       spin_unlock(sb_bgl_lock(sbi, input->group));
600  
601         /*
602          * Make the new blocks and inodes valid next.  We do this before
603 Index: linux-stage/fs/ext3/super.c
604 ===================================================================
605 --- linux-stage.orig/fs/ext3/super.c    2007-03-14 04:44:13.000000000 -0400
606 +++ linux-stage/fs/ext3/super.c 2007-03-14 16:13:29.000000000 -0400
607 @@ -1084,6 +1084,56 @@ static int ext3_setup_super(struct super
608         return res;
609  }
610  
611 +static __u16 crc16(__u16 crc, const u8 *buf, size_t len)
612 +{
613 +       __u16 tmp;
614 +
615 +       while (len--) {
616 +               crc ^= *buf++;
617 +               crc ^= (u8)crc >> 4;
618 +               tmp = (u8)crc;
619 +               crc ^= (tmp ^ (tmp << 1)) << 4;
620 +       }
621 +       return crc;
622 +}
623 +
624 +__u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group,
625 +                          struct ext3_group_desc *gdp)
626 +{
627 +       __u16 crc = 0;
628 +
629 +#ifdef CONFIG_SMP
630 +       J_ASSERT(spin_is_locked(sb_bgl_lock(sbi, block_group)));
631 +#endif
632 +       if (sbi->s_es->s_feature_ro_compat &
633 +           cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
634 +               int offset = offsetof(struct ext3_group_desc, bg_checksum);
635 +
636 +               block_group = cpu_to_le32(block_group);
637 +               crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
638 +               crc = crc16(crc, (__u8 *)&block_group, sizeof(block_group));
639 +               crc = crc16(crc, (__u8 *)gdp, offset);
640 +               offset += sizeof(gdp->bg_checksum); /* skip checksum */
641 +               BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */
642 +               /* for checksum of struct ext4_group_desc do the rest...
643 +               if (offset < sbi->s_es->s_desc_size) {
644 +                       crc = crc16(crc, (__u8 *)gdp + offset,
645 +                                   sbi->s_es->s_desc_size - offset);
646 +                */
647 +       }
648 +
649 +       return cpu_to_le16(crc);
650 +}
651 +
652 +int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group,
653 +                               struct ext3_group_desc *gdp)
654 +{
655 +       if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp))
656 +               return 0;
657 +
658 +       return 1;
659 +}
660 +
661  /* Called at mount-time, super-block is locked */
662  static int ext3_check_descriptors (struct super_block * sb)
663  {
664 @@ -1133,6 +1179,13 @@ static int ext3_check_descriptors (struc
665                                         le32_to_cpu(gdp->bg_inode_table));
666                         return 0;
667                 }
668 +               if (!ext3_group_desc_csum_verify(sbi, i, gdp)) {
669 +                       ext3_error(sb, __FUNCTION__,
670 +                                  "Checksum for group %d failed (%u != %u)\n",
671 +                                  i, ext3_group_desc_csum(sbi, i, gdp),
672 +                                  gdp->bg_checksum);
673 +                       return 0;
674 +               }
675                 block += EXT3_BLOCKS_PER_GROUP(sb);
676                 gdp++;
677         }
678 Index: linux-stage/include/linux/ext3_fs.h
679 ===================================================================
680 --- linux-stage.orig/include/linux/ext3_fs.h    2007-03-14 16:29:47.000000000 -0400
681 +++ linux-stage/include/linux/ext3_fs.h 2007-03-14 16:37:23.000000000 -0400
682 @@ -118,6 +118,26 @@ struct statfs;
683                                  (s)->s_first_ino)
684  #endif
685  
686 +/* Macro-instructions used to calculate Free inodes and blocks count.
687 + * Value is approximate for the blocks count until it is initialized. */
688 +#define EXT3_BG_INODES_FREE(sb,gr,gdp) ((gdp)->bg_flags &                     \
689 +                                      cpu_to_le16(EXT3_BG_INODE_UNINIT) ?     \
690 +                                      EXT3_INODES_PER_GROUP(sb) :             \
691 +                                      le16_to_cpu((gdp)->bg_itable_unused) +  \
692 +                                      le16_to_cpu((gdp)->bg_free_inodes_count))
693 +#define EXT3_BG_BLOCKS_FREE(sb,gr,gdp) ((gdp)->bg_flags &                     \
694 +                                      cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ?     \
695 +                                      ext3_free_blocks_after_init(sb,gr,gdp) :\
696 +                                      le16_to_cpu((gdp)->bg_free_blocks_count))
697 +
698 +/* When looking for groups to allocate from, we reduce the free blocks count
699 + * to avoid always picking uninitialized groups.  Essentially, we don't start
700 + * allocating from uninitialized groups until other groups are 1/2 full. */
701 +#define EXT3_BG_BLOCKS_PNLT(sb,gr,gdp) ((gdp)->bg_flags &                     \
702 +                                      cpu_to_le16(EXT3_BG_BLOCK_UNINIT) ?     \
703 +                                      EXT3_BLOCKS_PER_GROUP(sb) / 2 :         \
704 +                                      le16_to_cpu((gdp)->bg_free_blocks_count))
705 +
706  /*
707   * Macro-instructions used to manage fragments
708   */
709 @@ -138,16 +148,22 @@ struct statfs;
710   */
711  struct ext3_group_desc
712  {
713 -       __le32  bg_block_bitmap;                /* Blocks bitmap block */
714 -       __le32  bg_inode_bitmap;                /* Inodes bitmap block */
715 +       __le32  bg_block_bitmap;        /* Blocks bitmap block */
716 +       __le32  bg_inode_bitmap;        /* Inodes bitmap block */
717         __le32  bg_inode_table;         /* Inodes table block */
718         __le16  bg_free_blocks_count;   /* Free blocks count */
719         __le16  bg_free_inodes_count;   /* Free inodes count */
720         __le16  bg_used_dirs_count;     /* Directories count */
721 -       __u16   bg_pad;
722 -       __le32  bg_reserved[3];
723 +       __le16  bg_flags;               /* EXT3_BG_flags (UNINIT, etc) */
724 +       __le32  bg_reserved[2];         /* Likely block/inode bitmap checksum */
725 +       __le16  bg_itable_unused;       /* Unused inodes count */
726 +       __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */
727  };
728  
729 +#define EXT3_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not in use */
730 +#define EXT3_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not in use */
731 +#define EXT3_BG_BLOCK_WRITTEN  0x0004 /* On-disk structures were overwritten */
732 +
733  /*
734   * Macro-instructions used to manage group descriptors
735   */
736 @@ -564,6 +581,7 @@ static inline struct ext3_inode_info *EX
737  #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER    0x0001
738  #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE      0x0002
739  #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR       0x0004
740 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM                0x0010
741  #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK       0x0020
742  
743  #define EXT3_FEATURE_INCOMPAT_COMPRESSION      0x0001
744 @@ -580,6 +598,7 @@ static inline struct ext3_inode_info *EX
745                                          EXT3_FEATURE_INCOMPAT_EXTENTS)
746  #define EXT3_FEATURE_RO_COMPAT_SUPP    (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
747                                          EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
748 +                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
749                                          EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
750                                          EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
751  
752 @@ -841,6 +860,16 @@ extern void ext3_unlockfs (struct super_
753  extern void ext3_commit_super (struct super_block *, struct ext3_super_block *, int);
754  extern int ext3_remount (struct super_block *, int *, char *);
755  extern int ext3_statfs (struct super_block *, struct kstatfs *);
756 +extern __u16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group,
757 +                                 struct ext3_group_desc *gdp);
758 +extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group,
759 +                                      struct ext3_group_desc *gdp);
760 +extern unsigned long ext3_free_blocks_after_init(struct super_block *sb,
761 +                                                int block_group,
762 +                                                struct ext3_group_desc *gdp);
763 +extern void ext3_init_block_bitmap(struct super_block *sb,
764 +                                  struct buffer_head *bh, int group,
765 +                                  struct ext3_group_desc *desc);
766  
767  #define ext3_std_error(sb, errno)                              \
768  do {                                                           \
769
770 %diffstat
771  fs/ext3/balloc.c             |  116 +++++++++++++++++++++++++++++++++++++++++--
772  fs/ext3/ialloc.c             |   85 ++++++++++++++++++++++---------
773  fs/ext3/mballoc.c            |   18 +++++-
774  fs/ext3/resize.c             |    1 
775  fs/ext3/super.c              |   53 +++++++++++++++++++
776  include/linux/ext3_fs.h      |   36 +++++++++++--
777  include/linux/ext3_fs.h.orig |   23 +++++++-
778  7 files changed, 294 insertions(+), 38 deletions(-)
779