Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-uninit-2.6-suse.patch
1 Add support for the uninit_groups feature to the kernel.
2
3 Keep a high water mark of used inodes for each group to improve e2fsck time.
4 Block and inode bitmaps can be uninitialized on disk via a flag in the
5 group descriptor to avoid reading or scanning them at e2fsck time.
6 A checksum of each group descriptor is used to ensure that corruption in
7 the group descriptor's bit flags does not cause incorrect operation.
8
9 Index: linux-2.6.5-7.283-full/include/linux/ext3_fs.h
10 ===================================================================
11 --- linux-2.6.5-7.283-full.orig/include/linux/ext3_fs.h 2007-03-28 17:33:05.000000000 +0400
12 +++ linux-2.6.5-7.283-full/include/linux/ext3_fs.h      2007-03-28 18:33:35.000000000 +0400
13 @@ -153,16 +153,22 @@ struct ext3_allocation_request {
14   */
15  struct ext3_group_desc
16  {
17 -       __u32   bg_block_bitmap;                /* Blocks bitmap block */
18 -       __u32   bg_inode_bitmap;                /* Inodes bitmap block */
19 +       __u32   bg_block_bitmap;        /* Blocks bitmap block */
20 +       __u32   bg_inode_bitmap;        /* Inodes bitmap block */
21         __u32   bg_inode_table;         /* Inodes table block */
22         __u16   bg_free_blocks_count;   /* Free blocks count */
23         __u16   bg_free_inodes_count;   /* Free inodes count */
24         __u16   bg_used_dirs_count;     /* Directories count */
25 -       __u16   bg_pad;
26 -       __u32   bg_reserved[3];
27 +       __u16   bg_flags;               /* EXT3_BG_flags (UNINIT, etc) */
28 +       __u32   bg_reserved[2];         /* Likely block/inode bitmap checksum */
29 +       __u16   bg_itable_unused;       /* Unused inodes count */
30 +       __u16   bg_checksum;            /* crc16(sb_uuid+group+desc) */
31  };
32  
33 +#define EXT3_BG_INODE_UNINIT   0x0001  /* Inode table/bitmap not in use */
34 +#define EXT3_BG_BLOCK_UNINIT   0x0002  /* Block bitmap not in use */
35 +#define EXT3_BG_INODE_ZEROED   0x0004  /* On-disk itable initialized to zero */
36 +
37  /*
38   * Macro-instructions used to manage group descriptors
39   */
40 @@ -458,7 +464,7 @@ struct ext3_super_block {
41          */
42         __u8    s_prealloc_blocks;      /* Nr of blocks to try to preallocate*/
43         __u8    s_prealloc_dir_blocks;  /* Nr to preallocate for dirs */
44 -       __u16   s_padding1;
45 +       __u16   s_reserved_gdt_blocks;  /* Per group desc for online growth */
46         /*
47          * Journaling support valid if EXT3_FEATURE_COMPAT_HAS_JOURNAL set.
48          */
49 @@ -546,6 +552,7 @@ static inline struct ext3_inode_info *EX
50  #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER    0x0001
51  #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE      0x0002
52  #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR       0x0004
53 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM                0x0010
54  #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK       0x0020
55  
56  #define EXT3_FEATURE_INCOMPAT_COMPRESSION      0x0001
57 @@ -562,6 +569,7 @@ static inline struct ext3_inode_info *EX
58                                          EXT3_FEATURE_INCOMPAT_EXTENTS)
59  #define EXT3_FEATURE_RO_COMPAT_SUPP    (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
60                                          EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
61 +                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
62                                          EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
63                                          EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
64  
65 Index: linux-2.6.5-7.283-full/fs/ext3/super.c
66 ===================================================================
67 --- linux-2.6.5-7.283-full.orig/fs/ext3/super.c 2007-03-28 17:33:05.000000000 +0400
68 +++ linux-2.6.5-7.283-full/fs/ext3/super.c      2007-03-28 18:33:35.000000000 +0400
69 @@ -36,6 +36,7 @@
70  #include <linux/quotaops.h>
71  #include "xattr.h"
72  #include "acl.h"
73 +#include "group.h"
74  
75  static int ext3_load_journal(struct super_block *, struct ext3_super_block *);
76  static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
77 @@ -996,6 +997,90 @@ static int ext3_setup_super(struct super
78         return res;
79  }
80  
81 +#if !defined(CONFIG_CRC16) && !defined(CONFIG_CRC16_MODULE)
82 +/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */
83 +__u16 const crc16_table[256] = {
84 +       0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
85 +       0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
86 +       0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
87 +       0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
88 +       0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
89 +       0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
90 +       0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
91 +       0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
92 +       0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
93 +       0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
94 +       0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
95 +       0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
96 +       0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
97 +       0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
98 +       0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
99 +       0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
100 +       0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
101 +       0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
102 +       0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
103 +       0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
104 +       0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
105 +       0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
106 +       0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
107 +       0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
108 +       0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
109 +       0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
110 +       0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
111 +       0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
112 +       0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
113 +       0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
114 +       0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
115 +       0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
116 +};
117 +
118 +static inline __u16 crc16_byte(__u16 crc, const __u8 data)
119 +{
120 +       return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff];
121 +}
122 +
123 +__u16 crc16(__u16 crc, __u8 const *buffer, size_t len)
124 +{
125 +       while (len--)
126 +               crc = crc16_byte(crc, *buffer++);
127 +       return crc;
128 +}
129 +#endif
130 +
131 +__le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group,
132 +                           struct ext3_group_desc *gdp)
133 +{
134 +       __u16 crc = 0;
135 +
136 +       if (sbi->s_es->s_feature_ro_compat &
137 +           cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
138 +               int offset = offsetof(struct ext3_group_desc, bg_checksum);
139 +               __le32 le_group = cpu_to_le32(block_group);
140 +
141 +               crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
142 +               crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
143 +               crc = crc16(crc, (__u8 *)gdp, offset);
144 +               offset += sizeof(gdp->bg_checksum); /* skip checksum */
145 +               BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */
146 +               /* for checksum of struct ext4_group_desc do the rest...
147 +               if (offset < sbi->s_es->s_desc_size) {
148 +                       crc = crc16(crc, (__u8 *)gdp + offset,
149 +                                   sbi->s_es->s_desc_size - offset);
150 +                */
151 +       }
152 +
153 +       return cpu_to_le16(crc);
154 +}
155 +
156 +int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group,
157 +                               struct ext3_group_desc *gdp)
158 +{
159 +       if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp))
160 +               return 0;
161 +
162 +       return 1;
163 +}
164 +
165  static int ext3_check_descriptors (struct super_block * sb)
166  {
167         struct ext3_sb_info *sbi = EXT3_SB(sb);
168 @@ -1044,6 +1129,13 @@ static int ext3_check_descriptors (struc
169                                         le32_to_cpu(gdp->bg_inode_table));
170                         return 0;
171                 }
172 +               if (!ext3_group_desc_csum_verify(sbi, i, gdp)) {
173 +                       ext3_error(sb, __FUNCTION__,
174 +                                  "Checksum for group %d failed (%u!=%u)\n", i,
175 +                                  le16_to_cpu(ext3_group_desc_csum(sbi,i,gdp)),
176 +                                  le16_to_cpu(gdp->bg_checksum));
177 +                       return 0;
178 +               }
179                 block += EXT3_BLOCKS_PER_GROUP(sb);
180                 gdp++;
181         }
182 Index: linux-2.6.5-7.283-full/fs/ext3/group.h
183 ===================================================================
184 --- linux-2.6.5-7.283-full.orig/fs/ext3/group.h 2007-02-13 18:39:59.640066087 +0300
185 +++ linux-2.6.5-7.283-full/fs/ext3/group.h      2007-03-28 18:33:35.000000000 +0400
186 @@ -0,0 +1,29 @@
187 +/*
188 + *  linux/fs/ext3/group.h
189 + *
190 + * Copyright 2008 Sun Microsystems, Inc.
191 + *
192 + * Author: Andreas Dilger <adilger@clusterfs.com>
193 + */
194 +
195 +#ifndef _LINUX_EXT3_GROUP_H
196 +#define _LINUX_EXT3_GROUP_H
197 +#if defined(CONFIG_CRC16) || defined(CONFIG_CRC16_MODULE)
198 +#include <linux/crc16.h>
199 +#endif
200 +
201 +extern __le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group,
202 +                                  struct ext3_group_desc *gdp);
203 +extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group,
204 +                                      struct ext3_group_desc *gdp);
205 +struct buffer_head *read_block_bitmap(struct super_block *sb,
206 +                                     unsigned int block_group);
207 +extern unsigned ext3_init_block_bitmap(struct super_block *sb,
208 +                                      struct buffer_head *bh, int group,
209 +                                      struct ext3_group_desc *desc);
210 +#define ext3_free_blocks_after_init(sb, group, desc)                   \
211 +               ext3_init_block_bitmap(sb, NULL, group, desc)
212 +extern unsigned ext3_init_inode_bitmap(struct super_block *sb,
213 +                                      struct buffer_head *bh, int group,
214 +                                      struct ext3_group_desc *desc);
215 +#endif /* _LINUX_EXT3_GROUP_H */
216 Index: linux-2.6.5-7.283-full/fs/ext3/ialloc.c
217 ===================================================================
218 --- linux-2.6.5-7.283-full.orig/fs/ext3/ialloc.c        2007-03-28 17:33:03.000000000 +0400
219 +++ linux-2.6.5-7.283-full/fs/ext3/ialloc.c     2007-03-28 18:33:35.000000000 +0400
220 @@ -28,6 +28,7 @@
221  
222  #include "xattr.h"
223  #include "acl.h"
224 +#include "group.h"
225  
226  /*
227   * ialloc.c contains the inodes allocation and deallocation routines
228 @@ -43,6 +44,52 @@
229   * the free blocks count in the block.
230   */
231  
232 +/*
233 + * To avoid calling the atomic setbit hundreds or thousands of times, we only
234 + * need to use it within a single byte (to ensure we get endianness right).
235 + * We can use memset for the rest of the bitmap as there are no other users.
236 + */
237 +static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
238 +{
239 +       int i;
240 +
241 +       if (start_bit >= end_bit)
242 +               return;
243 +
244 +       ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
245 +       for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
246 +               ext3_set_bit(i, bitmap);
247 +       if (i < end_bit)
248 +               memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
249 +}
250 +
251 +/* Initializes an uninitialized inode bitmap */
252 +unsigned ext3_init_inode_bitmap(struct super_block *sb,
253 +                               struct buffer_head *bh, int block_group,
254 +                               struct ext3_group_desc *gdp)
255 +{
256 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
257 +
258 +       J_ASSERT_BH(bh, buffer_locked(bh));
259 +
260 +       /* If checksum is bad mark all blocks and inodes use to prevent
261 +        * allocation, essentially implementing a per-group read-only flag. */
262 +       if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) {
263 +               ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
264 +                          block_group);
265 +               gdp->bg_free_blocks_count = 0;
266 +               gdp->bg_free_inodes_count = 0;
267 +               gdp->bg_itable_unused = 0;
268 +               memset(bh->b_data, 0xff, sb->s_blocksize);
269 +               return 0;
270 +       }
271 +
272 +       memset(bh->b_data, 0, (EXT3_INODES_PER_GROUP(sb) + 7) / 8);
273 +       mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
274 +                       bh->b_data);
275 +
276 +       return EXT3_INODES_PER_GROUP(sb);
277 +}
278  
279  /*
280   * Read the inode allocation bitmap for a given block_group, reading
281 @@ -59,8 +106,19 @@ read_inode_bitmap(struct super_block * s
282         desc = ext3_get_group_desc(sb, block_group, NULL);
283         if (!desc)
284                 goto error_out;
285 -
286 -       bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
287 +       if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
288 +               bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap));
289 +               if (!buffer_uptodate(bh)) {
290 +                       lock_buffer(bh);
291 +                       if (!buffer_uptodate(bh)) {
292 +                               ext3_init_inode_bitmap(sb, bh,block_group,desc);
293 +                               set_buffer_uptodate(bh);
294 +                       }
295 +                       unlock_buffer(bh);
296 +               }
297 +       } else {
298 +               bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
299 +       }
300         if (!bh)
301                 ext3_error(sb, "read_inode_bitmap",
302                             "Cannot read inode bitmap - "
303 @@ -168,6 +226,8 @@ void ext3_free_inode (handle_t *handle, 
304                         if (is_directory)
305                                 gdp->bg_used_dirs_count = cpu_to_le16(
306                                   le16_to_cpu(gdp->bg_used_dirs_count) - 1);
307 +                       gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group,
308 +                                                               gdp);
309                         spin_unlock(sb_bgl_lock(sbi, block_group));
310                         percpu_counter_inc(&sbi->s_freeinodes_counter);
311                         if (is_directory)
312 @@ -454,7 +514,7 @@ struct inode *ext3_new_inode(handle_t *h
313         struct ext3_sb_info *sbi;
314         int err = 0;
315         struct inode *ret;
316 -       int i;
317 +       int i, free = 0;
318  
319         /* Cannot create files in a deleted directory */
320         if (!dir || !dir->i_nlink)
321 @@ -570,11 +630,13 @@ repeat_in_this_group:
322         goto out;
323  
324  got:
325 -       ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
326 -       if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
327 -               ext3_error (sb, "ext3_new_inode",
328 -                           "reserved inode or inode > inodes count - "
329 -                           "block_group = %d, inode=%lu", group, ino);
330 +       ino++;
331 +       if ((group == 0 && ino < EXT3_FIRST_INO(sb)) ||
332 +           ino > EXT3_INODES_PER_GROUP(sb)) {
333 +               ext3_error(sb, __FUNCTION__,
334 +                          "reserved inode or inode > inodes count - "
335 +                          "block_group = %d, inode=%lu", group,
336 +                          ino + group * EXT3_INODES_PER_GROUP(sb));
337                 err = -EIO;
338                 goto fail;
339         }
340 @@ -582,13 +644,65 @@ got:
341         BUFFER_TRACE(bh2, "get_write_access");
342         err = ext3_journal_get_write_access(handle, bh2);
343         if (err) goto fail;
344 +
345 +       /* We may have to initialize the block bitmap if it isn't already */
346 +       if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
347 +           gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
348 +               struct buffer_head *block_bh = read_block_bitmap(sb, group);
349 +
350 +               BUFFER_TRACE(block_bh, "get block bitmap access");
351 +               err = ext3_journal_get_write_access(handle, block_bh);
352 +               if (err) {
353 +                       brelse(block_bh);
354 +                       goto fail;
355 +               }
356 +
357 +               free = 0;
358 +               spin_lock(sb_bgl_lock(sbi, group));
359 +               /* recheck and clear flag under lock if we still need to */
360 +               if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
361 +                       gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
362 +                       free = ext3_free_blocks_after_init(sb, group, gdp);
363 +                       gdp->bg_free_blocks_count = cpu_to_le16(free);
364 +               }
365 +               spin_unlock(sb_bgl_lock(sbi, group));
366 +
367 +               /* Don't need to dirty bitmap block if we didn't change it */
368 +               if (free) {
369 +                       BUFFER_TRACE(block_bh, "dirty block bitmap");
370 +                       err = ext3_journal_dirty_metadata(handle, block_bh);
371 +               }
372 +
373 +               brelse(block_bh);
374 +               if (err)
375 +                       goto fail;
376 +       }
377 +
378         spin_lock(sb_bgl_lock(sbi, group));
379 +       /* If we didn't allocate from within the initialized part of the inode
380 +        * table then we need to initialize up to this inode. */
381 +       if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
382 +               if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
383 +                       gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT);
384 +                       free = EXT3_INODES_PER_GROUP(sb);
385 +               } else {
386 +                       free = EXT3_INODES_PER_GROUP(sb) -
387 +                               le16_to_cpu(gdp->bg_itable_unused);
388 +               }
389 +
390 +               if (ino > free) {
391 +                       gdp->bg_itable_unused =
392 +                               cpu_to_le16(EXT3_INODES_PER_GROUP(sb) - ino);
393 +               }
394 +       }
395 +
396         gdp->bg_free_inodes_count =
397                 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
398         if (S_ISDIR(mode)) {
399                 gdp->bg_used_dirs_count =
400                         cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
401         }
402 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp);
403         spin_unlock(sb_bgl_lock(sbi, group));
404         BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
405         err = ext3_journal_dirty_metadata(handle, bh2);
406 @@ -610,7 +724,7 @@ got:
407                 inode->i_gid = current->fsgid;
408         inode->i_mode = mode;
409  
410 -       inode->i_ino = ino;
411 +       inode->i_ino = ino + group * EXT3_INODES_PER_GROUP(sb);
412         /* This is the optimal IO size (for stat), not the fs block size */
413         inode->i_blksize = PAGE_SIZE;
414         inode->i_blocks = 0;
415 Index: linux-2.6.5-7.283-full/fs/ext3/mballoc.c
416 ===================================================================
417 --- linux-2.6.5-7.283-full.orig/fs/ext3/mballoc.c       2007-03-28 15:46:00.000000000 +0400
418 +++ linux-2.6.5-7.283-full/fs/ext3/mballoc.c    2007-03-28 18:33:35.000000000 +0400
419 @@ -36,6 +36,8 @@
420  #include <linux/seq_file.h>
421  #include <linux/version.h>
422  
423 +#include "group.h"
424 +
425  /*
426   * MUSTDO:
427   *   - test ext3_ext_search_left() and ext3_ext_search_right()
428 @@ -323,6 +325,7 @@ struct ext3_group_info {
429         unsigned long   bb_state;
430         unsigned long   bb_tid;
431         struct ext3_free_metadata *bb_md_cur;
432 +       struct ext3_group_desc *bb_gdp;
433         unsigned short  bb_first_free;
434         unsigned short  bb_free;
435         unsigned short  bb_fragments;
436 @@ -928,10 +931,7 @@ static int ext3_mb_init_cache(struct pag
437                 if (first_group + i >= EXT3_SB(sb)->s_groups_count)
438                         break;
439  
440 -               err = -EIO;
441 -               desc = ext3_get_group_desc(sb, first_group + i, NULL);
442 -               if (desc == NULL)
443 -                       goto out;
444 +               desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp;
445  
446                 err = -ENOMEM;
447                 bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
448 @@ -946,7 +946,12 @@ static int ext3_mb_init_cache(struct pag
449                         unlock_buffer(bh[i]);
450                         continue;
451                 }
452 -
453 +               if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
454 +                       ext3_init_block_bitmap(sb, bh[i], first_group + i,desc);
455 +                       set_buffer_uptodate(bh[i]);
456 +                       unlock_buffer(bh[i]);
457 +                       continue;
458 +               }
459                 get_bh(bh[i]);
460                 bh[i]->b_end_io = end_buffer_read_sync;
461                 submit_bh(READ, bh[i]);
462 @@ -1703,6 +1708,10 @@ static int ext3_mb_good_group(struct ext
463         switch (cr) {
464                 case 0:
465                         BUG_ON(ac->ac_2order == 0);
466 +                       /* If this group is uninitialized, skip it initially */
467 +                       if (grp->bb_gdp->bg_flags &
468 +                           cpu_to_le16(EXT3_BG_BLOCK_UNINIT))
469 +                               return 0;
470                         bits = ac->ac_sb->s_blocksize_bits + 1;
471                         for (i = ac->ac_2order; i <= bits; i++)
472                                 if (grp->bb_counters[i] > 0)
473 @@ -1796,7 +1805,9 @@ repeat:
474                         }
475  
476                         ac->ac_groups_scanned++;
477 -                       if (cr == 0)
478 +                       if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags &
479 +                                       cpu_to_le16(EXT3_BG_BLOCK_UNINIT) &&
480 +                                       ac->ac_2order != 0))
481                                 ext3_mb_simple_scan_group(ac, &e3b);
482                         else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe)
483                                 ext3_mb_scan_aligned(ac, &e3b);
484 @@ -2267,12 +2278,13 @@ int ext3_mb_init_backend(struct super_bl
485                         i--;
486                         goto err_freebuddy;
487                 }
488 +               memset(meta_group_info[j], 0, len);
489                 desc = ext3_get_group_desc(sb, i, NULL);
490 +               meta_group_info[j]->bb_gdp = desc;
491                 if (desc == NULL) {
492                         printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
493                         goto err_freebuddy;
494                 }
495 -               memset(meta_group_info[j], 0, len);
496                 set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT,
497                         &meta_group_info[j]->bb_state);
498  
499 @@ -2936,9 +2948,17 @@ int ext3_mb_mark_diskspace_used(struct e
500         mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
501  
502         spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
503 +       if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
504 +               gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
505 +               gdp->bg_free_blocks_count =
506 +                       cpu_to_le16(ext3_free_blocks_after_init(sb,
507 +                                                           ac->ac_b_ex.fe_group,
508 +                                                           gdp));
509 +       }
510         gdp->bg_free_blocks_count =
511                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
512                                 - ac->ac_b_ex.fe_len);
513 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
514         spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
515         percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len);
516  
517 @@ -4303,6 +4323,7 @@ do_more:
518         spin_lock(sb_bgl_lock(sbi, block_group));
519         gdp->bg_free_blocks_count =
520                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
521 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp);
522         spin_unlock(sb_bgl_lock(sbi, block_group));
523         percpu_counter_mod(&sbi->s_freeblocks_counter, count);
524  
525 Index: linux-2.6.5-7.283-full/fs/ext3/balloc.c
526 ===================================================================
527 --- linux-2.6.5-7.283-full.orig/fs/ext3/balloc.c        2007-03-28 17:33:02.000000000 +0400
528 +++ linux-2.6.5-7.283-full/fs/ext3/balloc.c     2007-03-28 18:33:35.000000000 +0400
529 @@ -20,6 +20,7 @@
530  #include <linux/quotaops.h>
531  #include <linux/buffer_head.h>
532  
533 +#include "group.h"
534  /*
535   * balloc.c contains the blocks allocation and deallocation routines
536   */
537 @@ -72,6 +73,75 @@ struct ext3_group_desc * ext3_get_group_
538         return gdp + desc;
539  }
540  
541 +/* Initializes an uninitialized block bitmap if given, and returns the
542 + * number of blocks free in the group. */
543 +unsigned ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
544 +                               int block_group, struct ext3_group_desc *gdp)
545 +{
546 +       unsigned long start;
547 +       int bit, bit_max;
548 +       unsigned free_blocks;
549 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
550 +
551 +       if (bh) {
552 +               J_ASSERT_BH(bh, buffer_locked(bh));
553 +
554 +               /* If checksum is bad mark all blocks use to prevent allocation,
555 +                * essentially implementing a per-group read-only flag. */
556 +               if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) {
557 +                       ext3_error(sb, __FUNCTION__,
558 +                                  "Checksum bad for group %u\n", block_group);
559 +                       gdp->bg_free_blocks_count = 0;
560 +                       gdp->bg_free_inodes_count = 0;
561 +                       gdp->bg_itable_unused = 0;
562 +                       memset(bh->b_data, 0xff, sb->s_blocksize);
563 +                       return 0;
564 +               }
565 +               memset(bh->b_data, 0, sb->s_blocksize);
566 +       }
567 +
568 +       /* Check for superblock and gdt backups in this group */
569 +       bit_max = ext3_bg_has_super(sb, block_group);
570 +
571 +       if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
572 +           block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
573 +                         sbi->s_desc_per_block) {
574 +               if (bit_max) {
575 +                       bit_max += ext3_bg_num_gdb(sb, block_group);
576 +                       bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
577 +               }
578 +       } else { /* For META_BG_BLOCK_GROUPS */
579 +               int group_rel = (block_group -
580 +                                le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
581 +                               EXT3_DESC_PER_BLOCK(sb);
582 +               if (group_rel == 0 || group_rel == 1 ||
583 +                   (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1))
584 +                       bit_max += 1;
585 +       }
586 +
587 +       /* Last and first groups are always initialized */
588 +       free_blocks = EXT3_BLOCKS_PER_GROUP(sb) - bit_max;
589 +
590 +       if (bh) {
591 +               for (bit = 0; bit < bit_max; bit++)
592 +                       ext3_set_bit(bit, bh->b_data);
593 +
594 +               start = block_group * EXT3_BLOCKS_PER_GROUP(sb) +
595 +                       le32_to_cpu(sbi->s_es->s_first_data_block);
596 +
597 +               /* Set bits for block and inode bitmaps, and inode table */
598 +               ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - start,
599 +                            bh->b_data);
600 +               ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - start,
601 +                            bh->b_data);
602 +               for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
603 +                    bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
604 +                       ext3_set_bit(bit, bh->b_data);
605 +       }
606 +
607 +       return free_blocks - sbi->s_itb_per_group - 2;
608 +}
609 +
610  /*
611   * Read the bitmap for a given block_group, reading into the specified 
612   * slot in the superblock's bitmap cache.
613 @@ -87,7 +157,19 @@ read_block_bitmap(struct super_block *sb
614         desc = ext3_get_group_desc (sb, block_group, NULL);
615         if (!desc)
616                 goto error_out;
617 -       bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
618 +       if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
619 +               bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
620 +               if (!buffer_uptodate(bh)) {
621 +                       lock_buffer(bh);
622 +                       if (!buffer_uptodate(bh)) {
623 +                               ext3_init_block_bitmap(sb, bh,block_group,desc);
624 +                               set_buffer_uptodate(bh);
625 +                       }
626 +                       unlock_buffer(bh);
627 +               }
628 +       } else {
629 +               bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
630 +       }
631         if (!bh)
632                 ext3_error (sb, "read_block_bitmap",
633                             "Cannot read block bitmap - "
634 @@ -432,6 +514,7 @@ do_more:
635         gdp->bg_free_blocks_count =
636                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
637                         dquot_freed_blocks);
638 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp);
639         spin_unlock(sb_bgl_lock(sbi, block_group));
640         percpu_counter_mod(&sbi->s_freeblocks_counter, count);
641  
642 @@ -1372,8 +1455,11 @@ allocated:
643                         ret_block, goal_hits, goal_attempts);
644  
645         spin_lock(sb_bgl_lock(sbi, group_no));
646 +       if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT))
647 +               gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
648         gdp->bg_free_blocks_count =
649                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
650 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp);
651         spin_unlock(sb_bgl_lock(sbi, group_no));
652         percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
653