Whamcloud - gitweb
- landing of b_hd_cleanup_merge to HEAD.
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-inode-reuse-2.4.24.patch
1 Index: linux-2.4.20/fs/ext3/ialloc.c
2 ===================================================================
3 --- linux-2.4.20.orig/fs/ext3/ialloc.c  2003-10-25 00:37:13.000000000 +0400
4 +++ linux-2.4.20/fs/ext3/ialloc.c       2003-10-29 20:33:33.000000000 +0300
5 @@ -241,11 +241,16 @@
6  
7         bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
8  
9 -       BUFFER_TRACE(bh, "get_write_access");
10 -       fatal = ext3_journal_get_write_access(handle, bh);
11 +       BUFFER_TRACE(bh, "get_undo_access");
12 +       fatal = ext3_journal_get_undo_access(handle, bh);
13         if (fatal)
14                 goto error_return;
15  
16 +       /* to prevent inode reusing within single transaction -bzzz */
17 +       BUFFER_TRACE(bh, "clear in b_committed_data");
18 +       J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data != NULL);
19 +       ext3_set_bit(bit, bh2jh(bh)->b_committed_data);
20 +
21         /* Ok, now we can actually update the inode bitmaps.. */
22         if (!ext3_clear_bit (bit, bh->b_data))
23                 ext3_error (sb, "ext3_free_inode",
24 @@ -319,6 +324,131 @@
25         return 0;
26  }
27  
28 +static int ext3_test_allocatable(int nr, struct buffer_head *bh)
29 +{
30 +       if (ext3_test_bit(nr, bh->b_data))
31 +               return 0;
32 +       if (!buffer_jbd(bh) || !bh2jh(bh)->b_committed_data)
33 +               return 1;
34 +#if 0
35 +       if (!ext3_test_bit(nr, bh2jh(bh)->b_committed_data))
36 +               printk("EXT3-fs: inode %d was used\n", nr);
37 +#endif
38 +       return !ext3_test_bit(nr, bh2jh(bh)->b_committed_data);
39 +}
40 +
41 +int ext3_find_group_dir(const struct inode *dir,
42 +                               struct ext3_group_desc **gdp,
43 +                               struct buffer_head **bh)
44 +{
45 +       struct super_block *sb = dir->i_sb;
46 +       struct ext3_super_block *es;
47 +       struct ext3_group_desc *tmp;
48 +       int i = 0, j, avefreei;
49 +
50 +       es = EXT3_SB(sb)->s_es;
51 +       avefreei = le32_to_cpu(es->s_free_inodes_count) /
52 +                       sb->u.ext3_sb.s_groups_count;
53 +       for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
54 +               struct buffer_head *temp_buffer;
55 +               tmp = ext3_get_group_desc(sb, j, &temp_buffer);
56 +               if (tmp && le16_to_cpu(tmp->bg_free_inodes_count) &&
57 +                       le16_to_cpu(tmp->bg_free_inodes_count) >= avefreei) {
58 +                       if (!*gdp || (le16_to_cpu(tmp->bg_free_blocks_count) >
59 +                               le16_to_cpu((*gdp)->bg_free_blocks_count))) {
60 +                               i = j;
61 +                               *gdp = tmp;
62 +                               *bh = temp_buffer;
63 +                       }
64 +               }
65 +       }
66 +
67 +       return i;
68 +}
69 +
70 +int ext3_find_group_other(const struct inode *dir,
71 +                               struct ext3_group_desc **gdp,
72 +                               struct buffer_head **bh)
73 +{
74 +       struct super_block *sb = dir->i_sb;
75 +       struct ext3_group_desc *tmp;
76 +       int i, j;
77 +
78 +       /*
79 +        * Try to place the inode in its parent directory
80 +        */
81 +       i = dir->u.ext3_i.i_block_group;
82 +       tmp = ext3_get_group_desc(sb, i, bh);
83 +       if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
84 +               *gdp = tmp;
85 +       else {
86 +               /*
87 +                * Use a quadratic hash to find a group with a
88 +                * free inode
89 +                */
90 +               for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
91 +                       i += j;
92 +                       if (i >= sb->u.ext3_sb.s_groups_count)
93 +                               i -= sb->u.ext3_sb.s_groups_count;
94 +                       tmp = ext3_get_group_desc (sb, i, bh);
95 +                       if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) {
96 +                               *gdp = tmp;
97 +                               break;
98 +                       }
99 +               }
100 +       }
101 +       if (!*gdp) {
102 +               /*
103 +                * That failed: try linear search for a free inode
104 +                */
105 +               i = dir->u.ext3_i.i_block_group + 1;
106 +               for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
107 +                       if (++i >= sb->u.ext3_sb.s_groups_count)
108 +                               i = 0;
109 +                       tmp = ext3_get_group_desc (sb, i, bh);
110 +                       if (tmp && le16_to_cpu(tmp->bg_free_inodes_count)) {
111 +                               *gdp = tmp;
112 +                               break;
113 +                       }
114 +               }
115 +       }
116 +
117 +       return i;
118 +}
119 +
120 +static int ext3_find_group(const struct inode *dir, int mode,
121 +                               struct ext3_group_desc **gdp,
122 +                               struct buffer_head **bh)
123 +{
124 +       if (S_ISDIR(mode))
125 +               return ext3_find_group_dir(dir, gdp, bh);
126 +       return ext3_find_group_other(dir, gdp, bh);
127 +}
128 +
129 +static int ext3_find_usable_inode(struct super_block *sb,
130 +                                       struct buffer_head *bh)
131 +{
132 +       int here, maxinodes, next;
133 +
134 +       maxinodes = EXT3_INODES_PER_GROUP(sb);
135 +       here = 0;       
136 +
137 +       while (here < maxinodes) {
138 +               next  = ext3_find_next_zero_bit((unsigned long *) bh->b_data, 
139 +                                                maxinodes, here);
140 +               if (next >= maxinodes)
141 +                       return -1;
142 +               if (ext3_test_allocatable(next, bh))
143 +                       return next;
144 +
145 +               J_ASSERT_BH(bh, bh2jh(bh)->b_committed_data);
146 +               here = ext3_find_next_zero_bit
147 +                       ((unsigned long *) bh2jh(bh)->b_committed_data, 
148 +                        maxinodes, next);
149 +       }
150 +       return -1;
151 +}
152 +
153  /*
154   * There are two policies for allocating an inode.  If the new inode is
155   * a directory, then a forward search is made for a block group with both
156 @@ -336,7 +466,7 @@
157         struct super_block * sb;
158         struct buffer_head * bh;
159         struct buffer_head * bh2;
160 -       int i, j, avefreei;
161 +       int i, j, k;
162         struct inode * inode;
163         int bitmap_nr;
164         struct ext3_group_desc * gdp;
165 @@ -371,11 +501,12 @@
166  
167                 bh = EXT3_SB(sb)->s_inode_bitmap[bitmap_nr];
168  
169 -               BUFFER_TRACE(bh, "get_write_access");
170 -               err = ext3_journal_get_write_access(handle, bh);
171 +               BUFFER_TRACE(bh, "get_undo_access");
172 +               err = ext3_journal_get_undo_access(handle, bh);
173                 if (err) goto fail;
174  
175 -               if (ext3_set_bit(j, bh->b_data)) {
176 +               if (!ext3_test_allocatable(j, bh) ||
177 +                               ext3_set_bit(j, bh->b_data)) {
178                         printk(KERN_ERR "goal inode %lu unavailable\n", goal);
179                         /* Oh well, we tried. */
180                         goto repeat;
181 @@ -393,119 +524,70 @@
182  
183  repeat:
184         gdp = NULL;
185 -       i = 0;
186 -
187 -       if (S_ISDIR(mode)) {
188 -               avefreei = le32_to_cpu(es->s_free_inodes_count) /
189 -                       sb->u.ext3_sb.s_groups_count;
190 -               if (!gdp) {
191 -                       for (j = 0; j < sb->u.ext3_sb.s_groups_count; j++) {
192 -                               struct buffer_head *temp_buffer;
193 -                               tmp = ext3_get_group_desc (sb, j, &temp_buffer);
194 -                               if (tmp &&
195 -                                   le16_to_cpu(tmp->bg_free_inodes_count) &&
196 -                                   le16_to_cpu(tmp->bg_free_inodes_count) >=
197 -                                                       avefreei) {
198 -                                       if (!gdp || (le16_to_cpu(tmp->bg_free_blocks_count) >
199 -                                               le16_to_cpu(gdp->bg_free_blocks_count))) {
200 -                                               i = j;
201 -                                               gdp = tmp;
202 -                                               bh2 = temp_buffer;
203 -                                       }
204 -                               }
205 -                       }
206 -               }
207 -       } else {
208 -               /*
209 -                * Try to place the inode in its parent directory
210 -                */
211 -               i = dir->u.ext3_i.i_block_group;
212 -               tmp = ext3_get_group_desc (sb, i, &bh2);
213 -               if (tmp && le16_to_cpu(tmp->bg_free_inodes_count))
214 -                       gdp = tmp;
215 -               else
216 -               {
217 -                       /*
218 -                        * Use a quadratic hash to find a group with a
219 -                        * free inode
220 -                        */
221 -                       for (j = 1; j < sb->u.ext3_sb.s_groups_count; j <<= 1) {
222 -                               i += j;
223 -                               if (i >= sb->u.ext3_sb.s_groups_count)
224 -                                       i -= sb->u.ext3_sb.s_groups_count;
225 -                               tmp = ext3_get_group_desc (sb, i, &bh2);
226 -                               if (tmp &&
227 -                                   le16_to_cpu(tmp->bg_free_inodes_count)) {
228 -                                       gdp = tmp;
229 -                                       break;
230 -                               }
231 -                       }
232 -               }
233 -               if (!gdp) {
234 -                       /*
235 -                        * That failed: try linear search for a free inode
236 -                        */
237 -                       i = dir->u.ext3_i.i_block_group + 1;
238 -                       for (j = 2; j < sb->u.ext3_sb.s_groups_count; j++) {
239 -                               if (++i >= sb->u.ext3_sb.s_groups_count)
240 -                                       i = 0;
241 -                               tmp = ext3_get_group_desc (sb, i, &bh2);
242 -                               if (tmp &&
243 -                                   le16_to_cpu(tmp->bg_free_inodes_count)) {
244 -                                       gdp = tmp;
245 -                                       break;
246 -                               }
247 -                       }
248 -               }
249 -       }
250  
251 +       /* choose group */
252 +       i = ext3_find_group(dir, mode, &gdp, &bh2);
253         err = -ENOSPC;
254         if (!gdp)
255                 goto out;
256 -
257 +       
258         err = -EIO;
259 -       bitmap_nr = load_inode_bitmap (sb, i);
260 +       bitmap_nr = load_inode_bitmap(sb, i);
261         if (bitmap_nr < 0)
262                 goto fail;
263 -
264         bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
265  
266 -       if ((j = ext3_find_first_zero_bit ((unsigned long *) bh->b_data,
267 -                                     EXT3_INODES_PER_GROUP(sb))) <
268 -           EXT3_INODES_PER_GROUP(sb)) {
269 -               BUFFER_TRACE(bh, "get_write_access");
270 -               err = ext3_journal_get_write_access(handle, bh);
271 -               if (err) goto fail;
272 -               
273 -               if (ext3_set_bit (j, bh->b_data)) {
274 -                       ext3_error (sb, "ext3_new_inode",
275 -                                     "bit already set for inode %d", j);
276 -                       goto repeat;
277 -               }
278 -               BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
279 -               err = ext3_journal_dirty_metadata(handle, bh);
280 -               if (err) goto fail;
281 -       } else {
282 -               if (le16_to_cpu(gdp->bg_free_inodes_count) != 0) {
283 -                       ext3_error (sb, "ext3_new_inode",
284 -                                   "Free inodes count corrupted in group %d",
285 -                                   i);
286 -                       /* Is it really ENOSPC? */
287 -                       err = -ENOSPC;
288 -                       if (sb->s_flags & MS_RDONLY)
289 -                               goto fail;
290 -
291 -                       BUFFER_TRACE(bh2, "get_write_access");
292 -                       err = ext3_journal_get_write_access(handle, bh2);
293 -                       if (err) goto fail;
294 -                       gdp->bg_free_inodes_count = 0;
295 -                       BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
296 -                       err = ext3_journal_dirty_metadata(handle, bh2);
297 -                       if (err) goto fail;
298 +       /* try to allocate in selected group */
299 +       j = ext3_find_usable_inode(sb, bh);
300 +       err = -ENOSPC;
301 +       if (j >= 0)
302 +               goto found_free;
303 +
304 +       /* can't allocate: try to allocate in ANY another groups */
305 +       k = i;
306 +       err = -EIO;
307 +       for (i = i + 1; i != k; i++) {
308 +               if (i >= sb->u.ext3_sb.s_groups_count)
309 +                       i = 0;
310 +               tmp = ext3_get_group_desc(sb, i, &bh2);
311 +               if (le16_to_cpu(tmp->bg_free_inodes_count) == 0)
312 +                       continue;
313 +
314 +               bitmap_nr = load_inode_bitmap(sb, i);
315 +               if (bitmap_nr < 0)
316 +                       goto fail;
317 +               bh = sb->u.ext3_sb.s_inode_bitmap[bitmap_nr];
318 +
319 +               /* try to allocate in selected group */
320 +               if ((j = ext3_find_usable_inode(sb, bh)) >= 0) {
321 +                       gdp = tmp;
322 +                       break;
323                 }
324 -               goto repeat;
325         }
326 +       err = -ENOSPC;
327 +       if (!gdp)
328 +               goto out;
329 +
330 + found_free:
331 +       BUFFER_TRACE(bh, "get_undo_access");
332 +       err = ext3_journal_get_undo_access(handle, bh);
333 +       if (err)
334 +               goto fail;
335 +
336 +       if (ext3_set_bit(j, bh->b_data)) {
337 +               ext3_error (sb, "ext3_new_inode",
338 +                               "bit already set for inode %d", j);
339 +               goto fail; 
340 +       }
341 +       BUFFER_TRACE(bh, "call ext3_journal_dirty_metadata");
342 +       err = ext3_journal_dirty_metadata(handle, bh);
343 +       if (err)
344 +               goto fail;
345 +       
346   have_bit_and_group:
347 +       if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data)
348 +               J_ASSERT_BH(bh, !ext3_test_bit(j, bh2jh(bh)->b_committed_data));
349 +
350         j += i * EXT3_INODES_PER_GROUP(sb) + 1;
351         if (j < EXT3_FIRST_INO(sb) || j > le32_to_cpu(es->s_inodes_count)) {
352                 ext3_error (sb, "ext3_new_inode",