Whamcloud - gitweb
Branch HEAD
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-uninit-2.6-sles10.patch
1 Add support for the uninit_groups feature to the kernel.
2
3 Keep a high water mark of used inodes for each group to improve e2fsck time.
4 Block and inode bitmaps can be uninitialized on disk via a flag in the
5 group descriptor to avoid reading or scanning them at e2fsck time.
6 A checksum of each group descriptor is used to ensure that corruption in
7 the group descriptor's bit flags does not cause incorrect operation.
8
9 Index: linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h
10 ===================================================================
11 --- linux-2.6.16.27-0.9-full.orig/include/linux/ext3_fs.h       2007-03-28 18:20:16.000000000 +0400
12 +++ linux-2.6.16.27-0.9-full/include/linux/ext3_fs.h    2007-03-28 18:30:06.000000000 +0400
13 @@ -153,16 +153,22 @@ struct ext3_allocation_request {
14   */
15  struct ext3_group_desc
16  {
17 -       __le32  bg_block_bitmap;                /* Blocks bitmap block */
18 -       __le32  bg_inode_bitmap;                /* Inodes bitmap block */
19 +       __le32  bg_block_bitmap;        /* Blocks bitmap block */
20 +       __le32  bg_inode_bitmap;        /* Inodes bitmap block */
21         __le32  bg_inode_table;         /* Inodes table block */
22         __le16  bg_free_blocks_count;   /* Free blocks count */
23         __le16  bg_free_inodes_count;   /* Free inodes count */
24         __le16  bg_used_dirs_count;     /* Directories count */
25 -       __u16   bg_pad;
26 -       __le32  bg_reserved[3];
27 +       __le16  bg_flags;               /* EXT3_BG_flags (UNINIT, etc) */
28 +       __le32  bg_reserved[2];         /* Likely block/inode bitmap checksum */
29 +       __le16  bg_itable_unused;       /* Unused inodes count */
30 +       __le16  bg_checksum;            /* crc16(sb_uuid+group+desc) */
31  };
32  
33 +#define EXT3_BG_INODE_UNINIT   0x0001 /* Inode table/bitmap not in use */
34 +#define EXT3_BG_BLOCK_UNINIT   0x0002 /* Block bitmap not in use */
35 +#define EXT3_BG_INODE_ZEROED   0x0004 /* On-disk itable initialized to zero */
36 +
37  /*
38   * Macro-instructions used to manage group descriptors
39   */
40 @@ -590,6 +596,7 @@ static inline struct ext3_inode_info *EX
41  #define EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER    0x0001
42  #define EXT3_FEATURE_RO_COMPAT_LARGE_FILE      0x0002
43  #define EXT3_FEATURE_RO_COMPAT_BTREE_DIR       0x0004
44 +#define EXT4_FEATURE_RO_COMPAT_GDT_CSUM                0x0010
45  #define EXT4_FEATURE_RO_COMPAT_DIR_NLINK       0x0020
46  
47  #define EXT3_FEATURE_INCOMPAT_COMPRESSION      0x0001
48 @@ -606,6 +613,7 @@ static inline struct ext3_inode_info *EX
49                                          EXT3_FEATURE_INCOMPAT_EXTENTS)
50  #define EXT3_FEATURE_RO_COMPAT_SUPP    (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \
51                                          EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \
52 +                                        EXT4_FEATURE_RO_COMPAT_GDT_CSUM| \
53                                          EXT4_FEATURE_RO_COMPAT_DIR_NLINK| \
54                                          EXT3_FEATURE_RO_COMPAT_BTREE_DIR)
55  
56 Index: linux-2.6.16.27-0.9-full/fs/ext3/resize.c
57 ===================================================================
58 --- linux-2.6.16.27-0.9-full.orig/fs/ext3/resize.c      2007-03-13 02:56:52.000000000 +0300
59 +++ linux-2.6.16.27-0.9-full/fs/ext3/resize.c   2007-03-28 18:30:06.000000000 +0400
60 @@ -19,6 +19,7 @@
61  #include <linux/errno.h>
62  #include <linux/slab.h>
63  
64 +#include "group.h"
65  
66  #define outside(b, first, last)        ((b) < (first) || (b) >= (last))
67  #define inside(b, first, last) ((b) >= (first) && (b) < (last))
68 @@ -818,6 +819,7 @@ int ext3_group_add(struct super_block *s
69         gdp->bg_inode_table = cpu_to_le32(input->inode_table);
70         gdp->bg_free_blocks_count = cpu_to_le16(input->free_blocks_count);
71         gdp->bg_free_inodes_count = cpu_to_le16(EXT3_INODES_PER_GROUP(sb));
72 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, input->group, gdp);
73  
74         /*
75          * Make the new blocks and inodes valid next.  We do this before
76 Index: linux-2.6.16.27-0.9-full/fs/ext3/super.c
77 ===================================================================
78 --- linux-2.6.16.27-0.9-full.orig/fs/ext3/super.c       2007-03-28 18:25:51.000000000 +0400
79 +++ linux-2.6.16.27-0.9-full/fs/ext3/super.c    2007-03-28 18:30:06.000000000 +0400
80 @@ -42,6 +42,7 @@
81  #include "xattr.h"
82  #include "acl.h"
83  #include "namei.h"
84 +#include "group.h"
85  
86  static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
87                              unsigned long journal_devnum);
88 @@ -1221,6 +1222,90 @@ static int ext3_setup_super(struct super
89         return res;
90  }
91  
92 +#if !defined(CONFIG_CRC16) && !defined(CONFIG_CRC16_MODULE)
93 +/** CRC table for the CRC-16. The poly is 0x8005 (x^16 + x^15 + x^2 + 1) */
94 +__u16 const crc16_table[256] = {
95 +       0x0000, 0xC0C1, 0xC181, 0x0140, 0xC301, 0x03C0, 0x0280, 0xC241,
96 +       0xC601, 0x06C0, 0x0780, 0xC741, 0x0500, 0xC5C1, 0xC481, 0x0440,
97 +       0xCC01, 0x0CC0, 0x0D80, 0xCD41, 0x0F00, 0xCFC1, 0xCE81, 0x0E40,
98 +       0x0A00, 0xCAC1, 0xCB81, 0x0B40, 0xC901, 0x09C0, 0x0880, 0xC841,
99 +       0xD801, 0x18C0, 0x1980, 0xD941, 0x1B00, 0xDBC1, 0xDA81, 0x1A40,
100 +       0x1E00, 0xDEC1, 0xDF81, 0x1F40, 0xDD01, 0x1DC0, 0x1C80, 0xDC41,
101 +       0x1400, 0xD4C1, 0xD581, 0x1540, 0xD701, 0x17C0, 0x1680, 0xD641,
102 +       0xD201, 0x12C0, 0x1380, 0xD341, 0x1100, 0xD1C1, 0xD081, 0x1040,
103 +       0xF001, 0x30C0, 0x3180, 0xF141, 0x3300, 0xF3C1, 0xF281, 0x3240,
104 +       0x3600, 0xF6C1, 0xF781, 0x3740, 0xF501, 0x35C0, 0x3480, 0xF441,
105 +       0x3C00, 0xFCC1, 0xFD81, 0x3D40, 0xFF01, 0x3FC0, 0x3E80, 0xFE41,
106 +       0xFA01, 0x3AC0, 0x3B80, 0xFB41, 0x3900, 0xF9C1, 0xF881, 0x3840,
107 +       0x2800, 0xE8C1, 0xE981, 0x2940, 0xEB01, 0x2BC0, 0x2A80, 0xEA41,
108 +       0xEE01, 0x2EC0, 0x2F80, 0xEF41, 0x2D00, 0xEDC1, 0xEC81, 0x2C40,
109 +       0xE401, 0x24C0, 0x2580, 0xE541, 0x2700, 0xE7C1, 0xE681, 0x2640,
110 +       0x2200, 0xE2C1, 0xE381, 0x2340, 0xE101, 0x21C0, 0x2080, 0xE041,
111 +       0xA001, 0x60C0, 0x6180, 0xA141, 0x6300, 0xA3C1, 0xA281, 0x6240,
112 +       0x6600, 0xA6C1, 0xA781, 0x6740, 0xA501, 0x65C0, 0x6480, 0xA441,
113 +       0x6C00, 0xACC1, 0xAD81, 0x6D40, 0xAF01, 0x6FC0, 0x6E80, 0xAE41,
114 +       0xAA01, 0x6AC0, 0x6B80, 0xAB41, 0x6900, 0xA9C1, 0xA881, 0x6840,
115 +       0x7800, 0xB8C1, 0xB981, 0x7940, 0xBB01, 0x7BC0, 0x7A80, 0xBA41,
116 +       0xBE01, 0x7EC0, 0x7F80, 0xBF41, 0x7D00, 0xBDC1, 0xBC81, 0x7C40,
117 +       0xB401, 0x74C0, 0x7580, 0xB541, 0x7700, 0xB7C1, 0xB681, 0x7640,
118 +       0x7200, 0xB2C1, 0xB381, 0x7340, 0xB101, 0x71C0, 0x7080, 0xB041,
119 +       0x5000, 0x90C1, 0x9181, 0x5140, 0x9301, 0x53C0, 0x5280, 0x9241,
120 +       0x9601, 0x56C0, 0x5780, 0x9741, 0x5500, 0x95C1, 0x9481, 0x5440,
121 +       0x9C01, 0x5CC0, 0x5D80, 0x9D41, 0x5F00, 0x9FC1, 0x9E81, 0x5E40,
122 +       0x5A00, 0x9AC1, 0x9B81, 0x5B40, 0x9901, 0x59C0, 0x5880, 0x9841,
123 +       0x8801, 0x48C0, 0x4980, 0x8941, 0x4B00, 0x8BC1, 0x8A81, 0x4A40,
124 +       0x4E00, 0x8EC1, 0x8F81, 0x4F40, 0x8D01, 0x4DC0, 0x4C80, 0x8C41,
125 +       0x4400, 0x84C1, 0x8581, 0x4540, 0x8701, 0x47C0, 0x4680, 0x8641,
126 +       0x8201, 0x42C0, 0x4380, 0x8341, 0x4100, 0x81C1, 0x8081, 0x4040
127 +};
128 +
129 +static inline __u16 crc16_byte(__u16 crc, const __u8 data)
130 +{
131 +       return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff];
132 +}
133 +
134 +__u16 crc16(__u16 crc, __u8 const *buffer, size_t len)
135 +{
136 +       while (len--)
137 +               crc = crc16_byte(crc, *buffer++);
138 +       return crc;
139 +}
140 +#endif
141 +
142 +__le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 block_group,
143 +                           struct ext3_group_desc *gdp)
144 +{
145 +       __u16 crc = 0;
146 +
147 +       if (sbi->s_es->s_feature_ro_compat &
148 +           cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
149 +               int offset = offsetof(struct ext3_group_desc, bg_checksum);
150 +               __le32 le_group = cpu_to_le32(block_group);
151 +
152 +               crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
153 +               crc = crc16(crc, (__u8 *)&le_group, sizeof(le_group));
154 +               crc = crc16(crc, (__u8 *)gdp, offset);
155 +               offset += sizeof(gdp->bg_checksum); /* skip checksum */
156 +               BUG_ON(offset != sizeof(*gdp)); /* XXX handle s_desc_size */
157 +               /* for checksum of struct ext4_group_desc do the rest...
158 +               if (offset < sbi->s_es->s_desc_size) {
159 +                       crc = crc16(crc, (__u8 *)gdp + offset,
160 +                                   sbi->s_es->s_desc_size - offset);
161 +                */
162 +       }
163 +
164 +       return cpu_to_le16(crc);
165 +}
166 +
167 +int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 block_group,
168 +                               struct ext3_group_desc *gdp)
169 +{
170 +       if (gdp->bg_checksum != ext3_group_desc_csum(sbi, block_group, gdp))
171 +               return 0;
172 +
173 +       return 1;
174 +}
175 +
176  /* Called at mount-time, super-block is locked */
177  static int ext3_check_descriptors (struct super_block * sb)
178  {
179 @@ -1270,6 +1355,13 @@ static int ext3_check_descriptors (struc
180                                         le32_to_cpu(gdp->bg_inode_table));
181                         return 0;
182                 }
183 +               if (!ext3_group_desc_csum_verify(sbi, i, gdp)) {
184 +                       ext3_error(sb, __FUNCTION__,
185 +                                  "Checksum for group %d failed (%u!=%u)\n", i,
186 +                                  le16_to_cpu(ext3_group_desc_csum(sbi,i,gdp)),
187 +                                  le16_to_cpu(gdp->bg_checksum));
188 +                       return 0;
189 +               }
190                 block += EXT3_BLOCKS_PER_GROUP(sb);
191                 gdp++;
192         }
193 Index: linux-2.6.16.27-0.9-full/fs/ext3/group.h
194 ===================================================================
195 --- linux-2.6.16.27-0.9-full.orig/fs/ext3/group.h       2007-02-13 18:39:59.640066087 +0300
196 +++ linux-2.6.16.27-0.9-full/fs/ext3/group.h    2007-03-28 18:30:06.000000000 +0400
197 @@ -0,0 +1,29 @@
198 +/*
199 + *  linux/fs/ext3/group.h
200 + *
201 + * Copyright (C) 2007 Cluster File Systems, Inc
202 + *
203 + * Author: Andreas Dilger <adilger@clusterfs.com>
204 + */
205 +
206 +#ifndef _LINUX_EXT3_GROUP_H
207 +#define _LINUX_EXT3_GROUP_H
208 +#if defined(CONFIG_CRC16) || defined(CONFIG_CRC16_MODULE)
209 +#include <linux/crc16.h>
210 +#endif
211 +
212 +extern __le16 ext3_group_desc_csum(struct ext3_sb_info *sbi, __u32 group,
213 +                                  struct ext3_group_desc *gdp);
214 +extern int ext3_group_desc_csum_verify(struct ext3_sb_info *sbi, __u32 group,
215 +                                      struct ext3_group_desc *gdp);
216 +struct buffer_head *read_block_bitmap(struct super_block *sb,
217 +                                     unsigned int block_group);
218 +extern unsigned ext3_init_block_bitmap(struct super_block *sb,
219 +                                      struct buffer_head *bh, int group,
220 +                                      struct ext3_group_desc *desc);
221 +#define ext3_free_blocks_after_init(sb, group, desc)                   \
222 +               ext3_init_block_bitmap(sb, NULL, group, desc)
223 +extern unsigned ext3_init_inode_bitmap(struct super_block *sb,
224 +                                      struct buffer_head *bh, int group,
225 +                                      struct ext3_group_desc *desc);
226 +#endif /* _LINUX_EXT3_GROUP_H */
227 Index: linux-2.6.16.27-0.9-full/fs/ext3/ialloc.c
228 ===================================================================
229 --- linux-2.6.16.27-0.9-full.orig/fs/ext3/ialloc.c      2007-03-28 18:20:17.000000000 +0400
230 +++ linux-2.6.16.27-0.9-full/fs/ext3/ialloc.c   2007-03-28 18:30:06.000000000 +0400
231 @@ -28,6 +28,7 @@
232  
233  #include "xattr.h"
234  #include "acl.h"
235 +#include "group.h"
236  
237  /*
238   * ialloc.c contains the inodes allocation and deallocation routines
239 @@ -43,6 +44,52 @@
240   * the free blocks count in the block.
241   */
242  
243 +/*
244 + * To avoid calling the atomic setbit hundreds or thousands of times, we only
245 + * need to use it within a single byte (to ensure we get endianness right).
246 + * We can use memset for the rest of the bitmap as there are no other users.
247 + */
248 +static void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
249 +{
250 +       int i;
251 +
252 +       if (start_bit >= end_bit)
253 +               return;
254 +
255 +       ext3_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
256 +       for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
257 +               ext3_set_bit(i, bitmap);
258 +       if (i < end_bit)
259 +               memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
260 +}
261 +
262 +/* Initializes an uninitialized inode bitmap */
263 +unsigned ext3_init_inode_bitmap(struct super_block *sb,
264 +                               struct buffer_head *bh, int block_group,
265 +                               struct ext3_group_desc *gdp)
266 +{
267 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
268 +
269 +       J_ASSERT_BH(bh, buffer_locked(bh));
270 +
271 +       /* If checksum is bad mark all blocks and inodes use to prevent
272 +        * allocation, essentially implementing a per-group read-only flag. */
273 +       if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) {
274 +               ext3_error(sb, __FUNCTION__, "Checksum bad for group %u\n",
275 +                          block_group);
276 +               gdp->bg_free_blocks_count = 0;
277 +               gdp->bg_free_inodes_count = 0;
278 +               gdp->bg_itable_unused = 0;
279 +               memset(bh->b_data, 0xff, sb->s_blocksize);
280 +               return 0;
281 +       }
282 +
283 +       memset(bh->b_data, 0, (EXT3_INODES_PER_GROUP(sb) + 7) / 8);
284 +       mark_bitmap_end(EXT3_INODES_PER_GROUP(sb), EXT3_BLOCKS_PER_GROUP(sb),
285 +                       bh->b_data);
286 +
287 +       return EXT3_INODES_PER_GROUP(sb);
288 +}
289  
290  /*
291   * Read the inode allocation bitmap for a given block_group, reading
292 @@ -59,8 +106,19 @@ read_inode_bitmap(struct super_block * s
293         desc = ext3_get_group_desc(sb, block_group, NULL);
294         if (!desc)
295                 goto error_out;
296 -
297 -       bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
298 +       if (desc->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
299 +               bh = sb_getblk(sb, le32_to_cpu(desc->bg_inode_bitmap));
300 +               if (!buffer_uptodate(bh)) {
301 +                       lock_buffer(bh);
302 +                       if (!buffer_uptodate(bh)) {
303 +                               ext3_init_inode_bitmap(sb, bh,block_group,desc);
304 +                               set_buffer_uptodate(bh);
305 +                       }
306 +                       unlock_buffer(bh);
307 +               }
308 +       } else {
309 +               bh = sb_bread(sb, le32_to_cpu(desc->bg_inode_bitmap));
310 +       }
311         if (!bh)
312                 ext3_error(sb, "read_inode_bitmap",
313                             "Cannot read inode bitmap - "
314 @@ -169,6 +227,8 @@ void ext3_free_inode (handle_t *handle, 
315                         if (is_directory)
316                                 gdp->bg_used_dirs_count = cpu_to_le16(
317                                   le16_to_cpu(gdp->bg_used_dirs_count) - 1);
318 +                       gdp->bg_checksum = ext3_group_desc_csum(sbi,block_group,
319 +                                                               gdp);
320                         spin_unlock(sb_bgl_lock(sbi, block_group));
321                         percpu_counter_inc(&sbi->s_freeinodes_counter);
322                         if (is_directory)
323 @@ -453,7 +513,7 @@ struct inode *ext3_new_inode(handle_t *h
324         struct ext3_sb_info *sbi;
325         int err = 0;
326         struct inode *ret;
327 -       int i;
328 +       int i, free = 0;
329  
330         /* Cannot create files in a deleted directory */
331         if (!dir || !dir->i_nlink)
332 @@ -570,11 +630,13 @@ repeat_in_this_group:
333         goto out;
334  
335  got:
336 -       ino += group * EXT3_INODES_PER_GROUP(sb) + 1;
337 -       if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
338 -               ext3_error (sb, "ext3_new_inode",
339 -                           "reserved inode or inode > inodes count - "
340 -                           "block_group = %d, inode=%lu", group, ino);
341 +       ino++;
342 +       if ((group == 0 && ino < EXT3_FIRST_INO(sb)) ||
343 +           ino > EXT3_INODES_PER_GROUP(sb)) {
344 +               ext3_error(sb, __FUNCTION__,
345 +                          "reserved inode or inode > inodes count - "
346 +                          "block_group = %d, inode=%lu", group,
347 +                          ino + group * EXT3_INODES_PER_GROUP(sb));
348                 err = -EIO;
349                 goto fail;
350         }
351 @@ -582,13 +644,65 @@ got:
352         BUFFER_TRACE(bh2, "get_write_access");
353         err = ext3_journal_get_write_access(handle, bh2);
354         if (err) goto fail;
355 +
356 +       /* We may have to initialize the block bitmap if it isn't already */
357 +       if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
358 +           gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
359 +               struct buffer_head *block_bh = read_block_bitmap(sb, group);
360 +
361 +               BUFFER_TRACE(block_bh, "get block bitmap access");
362 +               err = ext3_journal_get_write_access(handle, block_bh);
363 +               if (err) {
364 +                       brelse(block_bh);
365 +                       goto fail;
366 +               }
367 +
368 +               free = 0;
369 +               spin_lock(sb_bgl_lock(sbi, group));
370 +               /* recheck and clear flag under lock if we still need to */
371 +               if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
372 +                       gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
373 +                       free = ext3_free_blocks_after_init(sb, group, gdp);
374 +                       gdp->bg_free_blocks_count = cpu_to_le16(free);
375 +               }
376 +               spin_unlock(sb_bgl_lock(sbi, group));
377 +
378 +               /* Don't need to dirty bitmap block if we didn't change it */
379 +               if (free) {
380 +                       BUFFER_TRACE(block_bh, "dirty block bitmap");
381 +                       err = ext3_journal_dirty_metadata(handle, block_bh);
382 +               }
383 +
384 +               brelse(block_bh);
385 +               if (err)
386 +                       goto fail;
387 +       }
388 +
389         spin_lock(sb_bgl_lock(sbi, group));
390 +       /* If we didn't allocate from within the initialized part of the inode
391 +        * table then we need to initialize up to this inode. */
392 +       if (EXT3_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {
393 +               if (gdp->bg_flags & cpu_to_le16(EXT3_BG_INODE_UNINIT)) {
394 +                       gdp->bg_flags &= cpu_to_le16(~EXT3_BG_INODE_UNINIT);
395 +                       free = EXT3_INODES_PER_GROUP(sb);
396 +               } else {
397 +                       free = EXT3_INODES_PER_GROUP(sb) -
398 +                               le16_to_cpu(gdp->bg_itable_unused);
399 +               }
400 +
401 +               if (ino > free) {
402 +                       gdp->bg_itable_unused =
403 +                               cpu_to_le16(EXT3_INODES_PER_GROUP(sb) - ino);
404 +               }
405 +       }
406 +
407         gdp->bg_free_inodes_count =
408                 cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
409         if (S_ISDIR(mode)) {
410                 gdp->bg_used_dirs_count =
411                         cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
412         }
413 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, group, gdp);
414         spin_unlock(sb_bgl_lock(sbi, group));
415         BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
416         err = ext3_journal_dirty_metadata(handle, bh2);
417 @@ -610,7 +724,7 @@ got:
418                 inode->i_gid = current->fsgid;
419         inode->i_mode = mode;
420  
421 -       inode->i_ino = ino;
422 +       inode->i_ino = ino + group * EXT3_INODES_PER_GROUP(sb);
423         /* This is the optimal IO size (for stat), not the fs block size */
424         inode->i_blksize = PAGE_SIZE;
425         inode->i_blocks = 0;
426 Index: linux-2.6.16.27-0.9-full/fs/ext3/mballoc.c
427 ===================================================================
428 --- linux-2.6.16.27-0.9-full.orig/fs/ext3/mballoc.c     2007-03-28 16:03:19.000000000 +0400
429 +++ linux-2.6.16.27-0.9-full/fs/ext3/mballoc.c  2007-03-28 18:30:36.000000000 +0400
430 @@ -36,6 +36,8 @@
431  #include <linux/seq_file.h>
432  #include <linux/version.h>
433  
434 +#include "group.h"
435 +
436  /*
437   * MUSTDO:
438   *   - test ext3_ext_search_left() and ext3_ext_search_right()
439 @@ -323,6 +325,7 @@ struct ext3_group_info {
440         unsigned long   bb_state;
441         unsigned long   bb_tid;
442         struct ext3_free_metadata *bb_md_cur;
443 +       struct ext3_group_desc *bb_gdp;
444         unsigned short  bb_first_free;
445         unsigned short  bb_free;
446         unsigned short  bb_fragments;
447 @@ -928,10 +931,7 @@ static int ext3_mb_init_cache(struct pag
448                 if (first_group + i >= EXT3_SB(sb)->s_groups_count)
449                         break;
450  
451 -               err = -EIO;
452 -               desc = ext3_get_group_desc(sb, first_group + i, NULL);
453 -               if (desc == NULL)
454 -                       goto out;
455 +               desc = EXT3_GROUP_INFO(sb, first_group + i)->bb_gdp;
456  
457                 err = -ENOMEM;
458                 bh[i] = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
459 @@ -946,7 +946,12 @@ static int ext3_mb_init_cache(struct pag
460                         unlock_buffer(bh[i]);
461                         continue;
462                 }
463 -
464 +               if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
465 +                       ext3_init_block_bitmap(sb, bh[i], first_group + i,desc);
466 +                       set_buffer_uptodate(bh[i]);
467 +                       unlock_buffer(bh[i]);
468 +                       continue;
469 +               }
470                 get_bh(bh[i]);
471                 bh[i]->b_end_io = end_buffer_read_sync;
472                 submit_bh(READ, bh[i]);
473 @@ -1703,6 +1708,10 @@ static int ext3_mb_good_group(struct ext
474         switch (cr) {
475                 case 0:
476                         BUG_ON(ac->ac_2order == 0);
477 +                       /* If this group is uninitialized, skip it initially */
478 +                       if (grp->bb_gdp->bg_flags &
479 +                           cpu_to_le16(EXT3_BG_BLOCK_UNINIT))
480 +                               return 0;
481                         bits = ac->ac_sb->s_blocksize_bits + 1;
482                         for (i = ac->ac_2order; i <= bits; i++)
483                                 if (grp->bb_counters[i] > 0)
484 @@ -1796,7 +1805,9 @@ repeat:
485                         }
486  
487                         ac->ac_groups_scanned++;
488 -                       if (cr == 0)
489 +                       if (cr == 0 || (e3b.bd_info->bb_gdp->bg_flags &
490 +                                       cpu_to_le16(EXT3_BG_BLOCK_UNINIT) &&
491 +                                       ac->ac_2order != 0))
492                                 ext3_mb_simple_scan_group(ac, &e3b);
493                         else if (cr == 1 && ac->ac_g_ex.fe_len == sbi->s_stripe)
494                                 ext3_mb_scan_aligned(ac, &e3b);
495 @@ -2267,12 +2278,13 @@ int ext3_mb_init_backend(struct super_bl
496                         i--;
497                         goto err_freebuddy;
498                 }
499 +               memset(meta_group_info[j], 0, len);
500                 desc = ext3_get_group_desc(sb, i, NULL);
501 +               meta_group_info[j]->bb_gdp = desc;
502                 if (desc == NULL) {
503                         printk(KERN_ERR"EXT3-fs: can't read descriptor %u\n",i);
504                         goto err_freebuddy;
505                 }
506 -               memset(meta_group_info[j], 0, len);
507                 set_bit(EXT3_GROUP_INFO_NEED_INIT_BIT,
508                         &meta_group_info[j]->bb_state);
509  
510 @@ -2936,9 +2948,17 @@ int ext3_mb_mark_diskspace_used(struct e
511         mb_set_bits(bitmap_bh->b_data, ac->ac_b_ex.fe_start, ac->ac_b_ex.fe_len);
512  
513         spin_lock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
514 +       if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
515 +               gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
516 +               gdp->bg_free_blocks_count =
517 +                       cpu_to_le16(ext3_free_blocks_after_init(sb,
518 +                                                           ac->ac_b_ex.fe_group,
519 +                                                           gdp));
520 +       }
521         gdp->bg_free_blocks_count =
522                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)
523                                 - ac->ac_b_ex.fe_len);
524 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, ac->ac_b_ex.fe_group, gdp);
525         spin_unlock(sb_bgl_lock(sbi, ac->ac_b_ex.fe_group));
526         percpu_counter_mod(&sbi->s_freeblocks_counter, - ac->ac_b_ex.fe_len);
527  
528 @@ -4303,6 +4323,7 @@ do_more:
529         spin_lock(sb_bgl_lock(sbi, block_group));
530         gdp->bg_free_blocks_count =
531                 cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + count);
532 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, block_group, gdp);
533         spin_unlock(sb_bgl_lock(sbi, block_group));
534         percpu_counter_mod(&sbi->s_freeblocks_counter, count);
535  
536 Index: linux-2.6.16.27-0.9-full/fs/ext3/balloc.c
537 ===================================================================
538 --- linux-2.6.16.27-0.9-full.orig/fs/ext3/balloc.c      2007-03-28 16:03:20.000000000 +0400
539 +++ linux-2.6.16.27-0.9-full/fs/ext3/balloc.c   2007-03-28 18:30:06.000000000 +0400
540 @@ -21,6 +21,7 @@
541  #include <linux/quotaops.h>
542  #include <linux/buffer_head.h>
543  
544 +#include "group.h"
545  /*
546   * balloc.c contains the blocks allocation and deallocation routines
547   */
548 @@ -74,6 +75,75 @@ struct ext3_group_desc * ext3_get_group_
549         return desc + offset;
550  }
551  
552 +/* Initializes an uninitialized block bitmap if given, and returns the
553 + * number of blocks free in the group. */
554 +unsigned ext3_init_block_bitmap(struct super_block *sb, struct buffer_head *bh,
555 +                               int block_group, struct ext3_group_desc *gdp)
556 +{
557 +       unsigned long start;
558 +       int bit, bit_max;
559 +       unsigned free_blocks;
560 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
561 +
562 +       if (bh) {
563 +               J_ASSERT_BH(bh, buffer_locked(bh));
564 +
565 +               /* If checksum is bad mark all blocks use to prevent allocation,
566 +                * essentially implementing a per-group read-only flag. */
567 +               if (!ext3_group_desc_csum_verify(sbi, block_group, gdp)) {
568 +                       ext3_error(sb, __FUNCTION__,
569 +                                  "Checksum bad for group %u\n", block_group);
570 +                       gdp->bg_free_blocks_count = 0;
571 +                       gdp->bg_free_inodes_count = 0;
572 +                       gdp->bg_itable_unused = 0;
573 +                       memset(bh->b_data, 0xff, sb->s_blocksize);
574 +                       return 0;
575 +               }
576 +               memset(bh->b_data, 0, sb->s_blocksize);
577 +       }
578 +
579 +       /* Check for superblock and gdt backups in this group */
580 +       bit_max = ext3_bg_has_super(sb, block_group);
581 +
582 +       if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
583 +           block_group < le32_to_cpu(sbi->s_es->s_first_meta_bg) *
584 +                         sbi->s_desc_per_block) {
585 +               if (bit_max) {
586 +                       bit_max += ext3_bg_num_gdb(sb, block_group);
587 +                       bit_max +=le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks);
588 +               }
589 +       } else { /* For META_BG_BLOCK_GROUPS */
590 +               int group_rel = (block_group -
591 +                                le32_to_cpu(sbi->s_es->s_first_meta_bg)) %
592 +                               EXT3_DESC_PER_BLOCK(sb);
593 +               if (group_rel == 0 || group_rel == 1 ||
594 +                   (group_rel == EXT3_DESC_PER_BLOCK(sb) - 1))
595 +                       bit_max += 1;
596 +       }
597 +
598 +       /* Last and first groups are always initialized */
599 +       free_blocks = EXT3_BLOCKS_PER_GROUP(sb) - bit_max;
600 +
601 +       if (bh) {
602 +               for (bit = 0; bit < bit_max; bit++)
603 +                       ext3_set_bit(bit, bh->b_data);
604 +
605 +               start = block_group * EXT3_BLOCKS_PER_GROUP(sb) +
606 +                       le32_to_cpu(sbi->s_es->s_first_data_block);
607 +
608 +               /* Set bits for block and inode bitmaps, and inode table */
609 +               ext3_set_bit(le32_to_cpu(gdp->bg_block_bitmap) - start,
610 +                            bh->b_data);
611 +               ext3_set_bit(le32_to_cpu(gdp->bg_inode_bitmap) - start,
612 +                            bh->b_data);
613 +               for (bit = le32_to_cpu(gdp->bg_inode_table) - start,
614 +                    bit_max = bit + sbi->s_itb_per_group; bit < bit_max; bit++)
615 +                       ext3_set_bit(bit, bh->b_data);
616 +       }
617 +
618 +       return free_blocks - sbi->s_itb_per_group - 2;
619 +}
620 +
621  /*
622   * Read the bitmap for a given block_group, reading into the specified 
623   * slot in the superblock's bitmap cache.
624 @@ -89,7 +159,19 @@ read_block_bitmap(struct super_block *sb
625         desc = ext3_get_group_desc (sb, block_group, NULL);
626         if (!desc)
627                 goto error_out;
628 -       bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
629 +       if (desc->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT)) {
630 +               bh = sb_getblk(sb, le32_to_cpu(desc->bg_block_bitmap));
631 +               if (!buffer_uptodate(bh)) {
632 +                       lock_buffer(bh);
633 +                       if (!buffer_uptodate(bh)) {
634 +                               ext3_init_block_bitmap(sb, bh,block_group,desc);
635 +                               set_buffer_uptodate(bh);
636 +                       }
637 +                       unlock_buffer(bh);
638 +               }
639 +       } else {
640 +               bh = sb_bread(sb, le32_to_cpu(desc->bg_block_bitmap));
641 +       }
642         if (!bh)
643                 ext3_error (sb, "read_block_bitmap",
644                             "Cannot read block bitmap - "
645 @@ -468,6 +550,7 @@ do_more:
646         desc->bg_free_blocks_count =
647                 cpu_to_le16(le16_to_cpu(desc->bg_free_blocks_count) +
648                         group_freed);
649 +       desc->bg_checksum = ext3_group_desc_csum(sbi, block_group, desc);
650         spin_unlock(sb_bgl_lock(sbi, block_group));
651         percpu_counter_mod(&sbi->s_freeblocks_counter, count);
652  
653 @@ -1378,8 +1461,11 @@ allocated:
654                         ret_block, goal_hits, goal_attempts);
655  
656         spin_lock(sb_bgl_lock(sbi, group_no));
657 +       if (gdp->bg_flags & cpu_to_le16(EXT3_BG_BLOCK_UNINIT))
658 +               gdp->bg_flags &= cpu_to_le16(~EXT3_BG_BLOCK_UNINIT);
659         gdp->bg_free_blocks_count =
660                         cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
661 +       gdp->bg_checksum = ext3_group_desc_csum(sbi, group_no, gdp);
662         spin_unlock(sb_bgl_lock(sbi, group_no));
663         percpu_counter_mod(&sbi->s_freeblocks_counter, -1);
664  
665
666 %diffstat
667  fs/ext3/balloc.c        |   88 +++++++++++++++++++++++++++++
668  fs/ext3/group.h         |   38 ++++++++++++
669  fs/ext3/ialloc.c        |  144 +++++++++++++++++++++++++++++++++++++++++++-----
670  fs/ext3/mballoc.c       |   35 +++++++++--
671  fs/ext3/resize.c        |    2 
672  fs/ext3/super.c         |   92 ++++++++++++++++++++++++++++++
673  include/linux/ext3_fs.h |   16 ++++-
674  7 files changed, 388 insertions(+), 27 deletions(-)