Whamcloud - gitweb
b=3119
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-extents-2.4.18-chaos.patch
1  fs/ext3/Makefile           |    3 
2  fs/ext3/extents.c          | 1615 +++++++++++++++++++++++++++++++++++++++++++++
3  fs/ext3/ialloc.c           |    4 
4  fs/ext3/inode.c            |   30 
5  fs/ext3/super.c            |    8 
6  include/linux/ext3_fs.h    |   18 
7  include/linux/ext3_fs_i.h  |    4 
8  include/linux/ext3_fs_sb.h |   10 
9  8 files changed, 1684 insertions(+), 8 deletions(-)
10
11 Index: linux-2.4.18-chaos/fs/ext3/extents.c
12 ===================================================================
13 --- linux-2.4.18-chaos.orig/fs/ext3/extents.c   2003-01-30 13:24:37.000000000 +0300
14 +++ linux-2.4.18-chaos/fs/ext3/extents.c        2004-01-13 16:11:00.000000000 +0300
15 @@ -0,0 +1,1614 @@
16 +/*
17 + *
18 + * linux/fs/ext3/extents.c
19 + *
20 + * Extents support for EXT3
21 + *
22 + * 07/08/2003    Alex Tomas <bzzz@tmi.comex.ru>
23 + * 
24 + * TODO:
25 + *   - ext3*_error() should be used in some situations
26 + *   - find_goal() [to be tested and improved]
27 + *   - error handling
28 + *   - we could leak allocated block in some error cases
29 + *   - quick search for index/leaf in ext3_ext_find_extent()
30 + *   - tree reduction
31 + *   - cache last found extent
32 + *   - arch-independent
33 + */
34 +
35 +#include <linux/module.h>
36 +#include <linux/fs.h>
37 +#include <linux/time.h>
38 +#include <linux/ext3_jbd.h>
39 +#include <linux/jbd.h>
40 +#include <linux/smp_lock.h>
41 +#include <linux/highuid.h>
42 +#include <linux/pagemap.h>
43 +#include <linux/quotaops.h>
44 +#include <linux/string.h>
45 +#include <linux/slab.h>
46 +#include <linux/locks.h>
47 +
48 +/*
49 + * with AGRESSIVE_TEST defined capacity of index/leaf blocks
50 + * become very little, so index split, in-depth growing and
51 + * other hard changes happens much more often
52 + * this is for debug purposes only
53 + */
54 +#define AGRESSIVE_TEST_
55 +
56 +/*
57 + * if EXT_DEBUG defined you can use 'extdebug' mount option
58 + * to get lots of info what's going on
59 + */
60 +#define EXT_DEBUG
61 +#ifdef EXT_DEBUG
62 +#define ext_debug(inode,fmt,a...)              \
63 +do {                                           \
64 +       if (test_opt((inode)->i_sb, EXTDEBUG))  \
65 +               printk(fmt, ##a);               \
66 +} while (0);
67 +#else
68 +#define ext_debug(inode,fmt,a...)
69 +#endif
70 +
71 +#define EXT3_ALLOC_NEEDED      2       /* block bitmap + group descriptor */
72 +
73 +/*
74 + * ext3_inode has i_block array (total 60 bytes)
75 + * first 4 bytes are used to store:
76 + *  - tree depth (0 mean there is no tree yet. all extents in the inode)
77 + *  - number of alive extents in the inode
78 + */
79 +
80 +/*
81 + * this is extent on-disk structure
82 + * it's used at the bottom of the tree
83 + */
84 +struct ext3_extent {
85 +       __u32   e_block;        /* first logical block extent covers */
86 +       __u32   e_start;        /* first physical block extents lives */
87 +       __u32   e_num;          /* number of blocks covered by extent */
88 +};
89 +
90 +/*
91 + * this is index on-disk structure
92 + * it's used at all the levels, but the bottom
93 + */
94 +struct ext3_extent_idx {
95 +       __u32   e_block;        /* index covers logical blocks from 'block' */
96 +       __u32   e_leaf;         /* pointer to the physical block of the next *
97 +                                * level. leaf or next index could bet here */
98 +};
99 +
100 +/*
101 + * each block (leaves and indexes), even inode-stored has header
102 + */
103 +struct ext3_extent_header {    
104 +       __u16   e_num;          /* number of valid entries */
105 +       __u16   e_max;          /* capacity of store in entries */
106 +};
107 +
108 +/*
109 + * array of ext3_ext_path contains path to some extent
110 + * creation/lookup routines use it for traversal/splitting/etc
111 + * truncate uses it to simulate recursive walking
112 + */
113 +struct ext3_ext_path {
114 +       __u32                           p_block;
115 +       __u16                           p_depth;
116 +       struct ext3_extent              *p_ext;
117 +       struct ext3_extent_idx          *p_idx;
118 +       struct ext3_extent_header       *p_hdr;
119 +       struct buffer_head              *p_bh;
120 +};
121 +
122 +#define EXT_FIRST_EXTENT(__hdr__) \
123 +       ((struct ext3_extent *) (((char *) (__hdr__)) +         \
124 +                                sizeof(struct ext3_extent_header)))
125 +#define EXT_FIRST_INDEX(__hdr__) \
126 +       ((struct ext3_extent_idx *) (((char *) (__hdr__)) +     \
127 +                                    sizeof(struct ext3_extent_header)))
128 +#define EXT_HAS_FREE_INDEX(__path__) \
129 +       ((__path__)->p_hdr->e_num < (__path__)->p_hdr->e_max)
130 +#define EXT_LAST_EXTENT(__hdr__) \
131 +       (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_num - 1)
132 +#define EXT_LAST_INDEX(__hdr__) \
133 +       (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_num - 1)
134 +#define EXT_MAX_EXTENT(__hdr__) \
135 +       (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_max - 1)
136 +#define EXT_MAX_INDEX(__hdr__) \
137 +       (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_max - 1)
138 +
139 +
140 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
141 +
142 +/*
143 + * could return:
144 + *  - EROFS
145 + *  - ENOMEM
146 + */
147 +static int ext3_ext_get_access(handle_t *handle, struct inode *inode,
148 +                               struct ext3_ext_path *path)
149 +{
150 +       if (path->p_bh) {
151 +               /* path points to block */
152 +               return ext3_journal_get_write_access(handle, path->p_bh);
153 +       }
154 +
155 +       /* path points to leaf/index in inode body */
156 +       return 0;
157 +}
158 +
159 +/*
160 + * could return:
161 + *  - EROFS
162 + *  - ENOMEM
163 + *  - EIO
164 + */
165 +static int ext3_ext_dirty(handle_t *handle, struct inode *inode,
166 +                               struct ext3_ext_path *path)
167 +{
168 +       if (path->p_bh) {
169 +               /* path points to block */
170 +               return ext3_journal_dirty_metadata(handle, path->p_bh);
171 +       }
172 +
173 +       /* path points to leaf/index in inode body */
174 +       return ext3_mark_inode_dirty(handle, inode);
175 +}
176 +
177 +static inline int ext3_ext_space_block(struct inode *inode)
178 +{
179 +       int size;
180 +
181 +       size = (inode->i_sb->s_blocksize - sizeof(struct ext3_extent_header))
182 +               / sizeof(struct ext3_extent);
183 +#ifdef AGRESSIVE_TEST
184 +       size = 6; /* FIXME: for debug, remove this line */
185 +#endif
186 +       return size;
187 +}
188 +
189 +static inline int ext3_ext_space_inode(struct inode *inode)
190 +{
191 +       int size;
192 +
193 +       size = (sizeof(EXT3_I(inode)->i_data) -
194 +                       sizeof(struct ext3_extent_header))
195 +                       / sizeof(struct ext3_extent);
196 +#ifdef AGRESSIVE_TEST
197 +       size = 3; /* FIXME: for debug, remove this line */
198 +#endif
199 +       return size;
200 +}
201 +
202 +static inline int ext3_ext_space_inode_idx(struct inode *inode)
203 +{
204 +       int size;
205 +
206 +       size = (sizeof(EXT3_I(inode)->i_data) -
207 +                       sizeof(struct ext3_extent_header))
208 +                       / sizeof(struct ext3_extent_idx);
209 +#ifdef AGRESSIVE_TEST
210 +       size = 4; /* FIXME: for debug, remove this line */
211 +#endif
212 +       return size;
213 +}
214 +
215 +static void ext3_ext_show_path(struct inode *inode, struct ext3_ext_path *path)
216 +{
217 +       int k, l = path->p_depth;
218 +
219 +       ext_debug(inode, "path:");
220 +       for (k = 0; k <= l; k++, path++) {
221 +               if (path->p_idx) {
222 +                       ext_debug(inode, "  %d->%d", path->p_idx->e_block,
223 +                                       path->p_idx->e_leaf);
224 +               } else if (path->p_ext) {
225 +                       ext_debug(inode, "  %d:%d:%d",
226 +                                       path->p_ext->e_block,
227 +                                       path->p_ext->e_start,
228 +                                       path->p_ext->e_num);
229 +               } else
230 +                       ext_debug(inode, "  []");
231 +       }
232 +       ext_debug(inode, "\n");
233 +}
234 +
235 +static void ext3_ext_show_leaf(struct inode *inode, struct ext3_ext_path *path)
236 +{
237 +       int depth = EXT3_I(inode)->i_depth;
238 +       struct ext3_extent_header *eh = path[depth].p_hdr;
239 +       struct ext3_extent *ex = EXT_FIRST_EXTENT(eh);
240 +       int i;
241 +
242 +       for (i = 0; i < eh->e_num; i++, ex++) {
243 +               ext_debug(inode, "%d:%d:%d ",
244 +                               ex->e_block, ex->e_start, ex->e_num);
245 +       }
246 +       ext_debug(inode, "\n");
247 +}
248 +
249 +static void ext3_ext_drop_refs(struct inode *inode, struct ext3_ext_path *path)
250 +{
251 +       int depth = path->p_depth;
252 +       int i;
253 +
254 +       for (i = 0; i <= depth; i++, path++)
255 +               if (path->p_bh) {
256 +                       brelse(path->p_bh);
257 +                       path->p_bh = NULL;
258 +               }
259 +}
260 +
261 +static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path)
262 +{
263 +       struct ext3_inode_info *ei = EXT3_I(inode);
264 +       unsigned long bg_start;
265 +       unsigned long colour;
266 +       int depth;
267 +       
268 +       if (path) {
269 +               depth = path->p_depth;
270 +               /* try to find previous block */
271 +               if (path[depth].p_ext)
272 +                       return path[depth].p_ext->e_start +
273 +                               path[depth].p_ext->e_num - 1;
274 +               
275 +               /* it looks index is empty
276 +                * try to find starting from index itself */
277 +               if (path[depth].p_bh)
278 +                       return path[depth].p_bh->b_blocknr;
279 +       }
280 +
281 +       /* OK. use inode's group */
282 +       bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
283 +               le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
284 +       colour = (current->pid % 16) *
285 +                       (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
286 +       return bg_start + colour;
287 +}
288 +
289 +static struct ext3_ext_path *
290 +ext3_ext_find_extent(struct inode *inode, int block, struct ext3_ext_path *path)
291 +{
292 +       struct ext3_inode_info *ei = EXT3_I(inode);
293 +       struct ext3_extent_header *eh = (void *) ei->i_data;
294 +       struct ext3_extent_idx *ix;
295 +       struct buffer_head *bh;
296 +       struct ext3_extent *ex;
297 +       int depth, i, k, ppos = 0, prev = 0;
298 +       
299 +       eh = (struct ext3_extent_header *) ei->i_data;
300 +
301 +       /* initialize capacity of leaf in inode for first time */
302 +       if (eh->e_max == 0)
303 +               eh->e_max = ext3_ext_space_inode(inode);
304 +       i = depth = ei->i_depth;
305 +       EXT_ASSERT(i == 0 || eh->e_num > 0);
306 +       
307 +       /* account possible depth increase */
308 +       if (!path) {
309 +               path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2),
310 +                               GFP_NOFS);
311 +               if (!path)
312 +                       return ERR_PTR(-ENOMEM);
313 +       }
314 +       memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
315 +
316 +       /* walk through the tree */
317 +       while (i) {
318 +               ext_debug(inode, "depth %d: num %d, max %d\n",
319 +                               ppos, eh->e_num, eh->e_max);
320 +               ix = EXT_FIRST_INDEX(eh);
321 +               if (eh->e_num) {
322 +                       EXT_ASSERT(prev == 0 || ix->e_block == prev);
323 +                       path[ppos].p_idx = ix;
324 +               }
325 +               EXT_ASSERT(eh->e_num <= eh->e_max);
326 +               for (k = 0; k < eh->e_num; k++, ix++) {
327 +                       ext_debug(inode, "index: %d -> %d\n",
328 +                                       ix->e_block, ix->e_leaf);
329 +                       EXT_ASSERT((k == 0 && prev <= (int)ix->e_block) ||
330 +                                       (k > 0 && prev < (int)ix->e_block));
331 +                       if (block < ix->e_block)
332 +                               break;
333 +                       prev = ix->e_block;
334 +                       path[ppos].p_idx = ix;
335 +               }
336 +               path[ppos].p_block = path[ppos].p_idx->e_leaf;
337 +               path[ppos].p_depth = i;
338 +               path[ppos].p_hdr = eh;
339 +               path[ppos].p_ext = NULL;
340 +
341 +               bh = sb_bread(inode->i_sb, path[ppos].p_block);
342 +               if (!bh) {
343 +                       ext3_ext_drop_refs(inode, path);
344 +                       kfree(path);
345 +                       return ERR_PTR(-EIO);
346 +               }
347 +               eh = (struct ext3_extent_header *) bh->b_data;
348 +               ppos++;
349 +               EXT_ASSERT(ppos <= depth);
350 +               path[ppos].p_bh = bh;
351 +               i--;
352 +       }
353 +
354 +       path[ppos].p_depth = i;
355 +       path[ppos].p_hdr = eh;
356 +       path[ppos].p_ext = NULL;
357 +       
358 +       /* find extent */
359 +       ex = EXT_FIRST_EXTENT(eh);
360 +       if (eh->e_num)
361 +               path[ppos].p_ext = ex;
362 +       EXT_ASSERT(eh->e_num <= eh->e_max);
363 +       for (k = 0; k < eh->e_num; k++, ex++) {
364 +               EXT_ASSERT(ex->e_num < EXT3_BLOCKS_PER_GROUP(inode->i_sb));
365 +               EXT_ASSERT((k == 0 && prev <= (int)ex->e_block) ||
366 +                               (k > 0 && prev < (int)ex->e_block));
367 +               if (block < ex->e_block) 
368 +                       break;
369 +               prev = ex->e_block;
370 +               path[ppos].p_ext = ex;
371 +       }
372 +
373 +       ext3_ext_show_path(inode, path);
374 +
375 +       return path;
376 +}
377 +
378 +static void ext3_ext_check_boundary(struct inode *inode,
379 +                                       struct ext3_ext_path *curp,
380 +                                       void *addr, int len)
381 +{
382 +       void *end;
383 +
384 +       if (!len)
385 +               return;
386 +       if (curp->p_bh)
387 +               end = (void *) curp->p_hdr + inode->i_sb->s_blocksize;
388 +       else
389 +               end = (void *) curp->p_hdr + sizeof(EXT3_I(inode)->i_data);
390 +       if (((unsigned long) addr) + len > (unsigned long) end) {
391 +               printk("overflow! 0x%p > 0x%p\n", addr + len, end);
392 +               BUG();
393 +       }
394 +       if ((unsigned long) addr < (unsigned long) curp->p_hdr) {
395 +               printk("underflow! 0x%p < 0x%p\n", addr, curp->p_hdr);
396 +               BUG();
397 +       }
398 +}
399 +
400 +/*
401 + * insert new index [logical;ptr] into the block at cupr
402 + * it check where to insert: before curp or after curp
403 + */
404 +static int ext3_ext_insert_index(handle_t *handle, struct inode *inode,
405 +                               struct ext3_ext_path *curp, int logical,
406 +                               int ptr)
407 +{
408 +       struct ext3_extent_idx *ix;
409 +       int len, err;
410 +
411 +       if ((err = ext3_ext_get_access(handle, inode, curp)))
412 +               return err;
413 +
414 +       EXT_ASSERT(logical != curp->p_idx->e_block);
415 +       len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
416 +       if (logical > curp->p_idx->e_block) {
417 +               /* insert after */
418 +               len = (len - 1) * sizeof(struct ext3_extent_idx);
419 +               len = len < 0 ? 0 : len;
420 +               ext_debug(inode, "insert new index %d after: %d. "
421 +                               "move %d from 0x%p to 0x%p\n",
422 +                               logical, ptr, len,
423 +                               (curp->p_idx + 1), (curp->p_idx + 2));
424 +
425 +               ext3_ext_check_boundary(inode, curp, curp->p_idx + 2, len);
426 +               memmove(curp->p_idx + 2, curp->p_idx + 1, len);
427 +               ix = curp->p_idx + 1;
428 +       } else {
429 +               /* insert before */
430 +               len = len * sizeof(struct ext3_extent_idx);
431 +               len = len < 0 ? 0 : len;
432 +               ext_debug(inode, "insert new index %d before: %d. "
433 +                               "move %d from 0x%p to 0x%p\n",
434 +                               logical, ptr, len,
435 +                               curp->p_idx, (curp->p_idx + 1));
436 +
437 +               ext3_ext_check_boundary(inode, curp, curp->p_idx + 1, len);
438 +               memmove(curp->p_idx + 1, curp->p_idx, len);
439 +               ix = curp->p_idx;
440 +       }
441 +
442 +       ix->e_block = logical;
443 +       ix->e_leaf = ptr;
444 +       curp->p_hdr->e_num++;
445 +
446 +       err = ext3_ext_dirty(handle, inode, curp);
447 +       ext3_std_error(inode->i_sb, err);
448 +
449 +       return err;
450 +}
451 +
452 +/*
453 + * routine inserts new subtree into the path, using free index entry
454 + * at depth 'at:
455 + *  - allocates all needed blocks (new leaf and all intermediate index blocks)
456 + *  - makes decision where to split
457 + *  - moves remaining extens and index entries (right to the split point)
458 + *    into the newly allocated blocks
459 + *  - initialize subtree
460 + */
461 +static int ext3_ext_split(handle_t *handle, struct inode *inode,
462 +                               struct ext3_ext_path *path,
463 +                               struct ext3_extent *newext, int at)
464 +{
465 +       struct buffer_head *bh = NULL;
466 +       int depth = EXT3_I(inode)->i_depth;
467 +       struct ext3_extent_header *neh;
468 +       struct ext3_extent_idx *fidx;
469 +       struct ext3_extent *ex;
470 +       int i = at, k, m, a;
471 +       long newblock, oldblock, border;
472 +       int *ablocks = NULL; /* array of allocated blocks */
473 +       int err = 0;
474 +
475 +       /* make decision: where to split? */
476 +       /* FIXME: now desicion is simplest: at current extent */
477 +
478 +       /* if current leaf will be splitted, then we should use 
479 +        * border from split point */
480 +       
481 +       if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
482 +               border = path[depth].p_ext[1].e_block;
483 +               ext_debug(inode, "leaf will be splitted."
484 +                               " next leaf starts at %d\n",
485 +                               (int)border);
486 +       } else {
487 +               border = newext->e_block;
488 +               ext_debug(inode, "leaf will be added."
489 +                               " next leaf starts at %d\n",
490 +                               (int)border);
491 +       }
492 +
493 +       /* 
494 +        * if error occurs, then we break processing
495 +        * and turn filesystem read-only. so, index won't
496 +        * be inserted and tree will be in consistent
497 +        * state. next mount will repair buffers too
498 +        */
499 +
500 +       /*
501 +        * get array to track all allocated blocks
502 +        * we need this to handle errors and free blocks
503 +        * upon them
504 +        */
505 +       ablocks = kmalloc(sizeof(long) * depth, GFP_NOFS);
506 +       if (!ablocks)
507 +               return -ENOMEM;
508 +       memset(ablocks, 0, sizeof(long) * depth);
509 +
510 +       /* allocate all needed blocks */
511 +       ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
512 +       newblock = 0; /* FIXME: something more sophisticated needed here */ 
513 +       for (a = 0; newext->e_num > 0 && a < depth - at; a++) {
514 +               newblock = ablocks[a] = newext->e_start++;
515 +               newext->e_num--;
516 +       }
517 +       for (; a < depth - at; a++) {
518 +               newblock = ext3_new_block(handle, inode,
519 +                                               newblock + 1, 0, 0, &err);
520 +               if (newblock == 0)
521 +                       goto cleanup;
522 +               ablocks[a] = newblock;
523 +       }
524 +
525 +       /* initialize new leaf */
526 +       newblock = ablocks[--a];
527 +       EXT_ASSERT(newblock);
528 +       bh = sb_getblk(inode->i_sb, newblock);
529 +       if (!bh) {
530 +               err = -EIO;
531 +               goto cleanup;
532 +       }
533 +       lock_buffer(bh);
534 +
535 +       if ((err = ext3_journal_get_create_access(handle, bh)))
536 +               goto cleanup;
537 +
538 +       neh = (struct ext3_extent_header *) bh->b_data;
539 +       neh->e_num = 0;
540 +       neh->e_max = ext3_ext_space_block(inode);
541 +       ex = EXT_FIRST_EXTENT(neh);
542 +
543 +       /* move remain of path[depth] to the new leaf */
544 +       EXT_ASSERT(path[depth].p_hdr->e_num ==
545 +                       path[depth].p_hdr->e_max);
546 +       /* start copy from next extent */
547 +       /* TODO: we could do it by single memmove */
548 +       m = 0;
549 +       path[depth].p_ext++;
550 +       while (path[depth].p_ext <=
551 +                       EXT_MAX_EXTENT(path[depth].p_hdr)) {
552 +               ext_debug(inode, "move %d:%d:%d in new leaf\n",
553 +                               path[depth].p_ext->e_block,
554 +                               path[depth].p_ext->e_start,
555 +                               path[depth].p_ext->e_num);
556 +               memmove(ex++, path[depth].p_ext++,
557 +                               sizeof(struct ext3_extent));
558 +               neh->e_num++;
559 +               m++;
560 +       }
561 +       mark_buffer_uptodate(bh, 1);
562 +       unlock_buffer(bh);
563 +
564 +       if ((err = ext3_journal_dirty_metadata(handle, bh)))
565 +               goto cleanup;   
566 +       brelse(bh);
567 +       bh = NULL;
568 +
569 +       /* correct old leaf */
570 +       if (m) {
571 +               if ((err = ext3_ext_get_access(handle, inode, path)))
572 +                       goto cleanup;
573 +               path[depth].p_hdr->e_num -= m;
574 +               if ((err = ext3_ext_dirty(handle, inode, path)))
575 +                       goto cleanup;
576 +               
577 +       }
578 +
579 +       /* create intermediate indexes */
580 +       k = depth - at - 1;
581 +       EXT_ASSERT(k >= 0);
582 +       if (k)
583 +               ext_debug(inode,
584 +                               "create %d intermediate indices\n", k);
585 +       /* insert new index into current index block */
586 +       /* current depth stored in i var */
587 +       i = depth - 1;
588 +       while (k--) {
589 +               oldblock = newblock;
590 +               newblock = ablocks[--a];
591 +               bh = sb_getblk(inode->i_sb, newblock);
592 +               if (!bh) {
593 +                       err = -EIO;
594 +                       goto cleanup;
595 +               }
596 +               lock_buffer(bh);
597 +
598 +               if ((err = ext3_journal_get_create_access(handle, bh)))
599 +                       goto cleanup;
600 +
601 +               neh = (struct ext3_extent_header *) bh->b_data;
602 +               neh->e_num = 1;
603 +               neh->e_max = ext3_ext_space_block(inode);
604 +               fidx = EXT_FIRST_INDEX(neh);
605 +               fidx->e_block = border;
606 +               fidx->e_leaf = oldblock;
607 +
608 +               ext_debug(inode,
609 +                               "int.index at %d (block %u): %d -> %d\n",
610 +                               i, (unsigned) newblock,
611 +                               (int) border,
612 +                               (int) oldblock);
613 +               /* copy indexes */
614 +               m = 0;
615 +               path[i].p_idx++;
616 +               ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
617 +                               EXT_MAX_INDEX(path[i].p_hdr));
618 +               EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) ==
619 +                               EXT_LAST_INDEX(path[i].p_hdr));
620 +               while (path[i].p_idx <=
621 +                               EXT_MAX_INDEX(path[i].p_hdr)) {
622 +                       ext_debug(inode, "%d: move %d:%d in new index\n",
623 +                                       i, path[i].p_idx->e_block,
624 +                                       path[i].p_idx->e_leaf);
625 +                       memmove(++fidx, path[i].p_idx++,
626 +                                       sizeof(struct ext3_extent_idx));
627 +                       neh->e_num++;
628 +                       m++;
629 +               }
630 +
631 +               mark_buffer_uptodate(bh, 1);
632 +               unlock_buffer(bh);
633 +
634 +               if ((err = ext3_journal_dirty_metadata(handle, bh)))
635 +                       goto cleanup;
636 +               brelse(bh);
637 +               bh = NULL;
638 +
639 +               /* correct old index */
640 +               if (m) {
641 +                       err = ext3_ext_get_access(handle,inode,path+i);
642 +                       if (err)
643 +                               goto cleanup;
644 +                       path[i].p_hdr->e_num -= m;
645 +                       err = ext3_ext_dirty(handle, inode, path + i);
646 +                       if (err)
647 +                               goto cleanup;
648 +               }
649 +
650 +               i--;
651 +       }
652 +
653 +       /* insert new index */
654 +       if (!err)
655 +               err = ext3_ext_insert_index(handle, inode, path + at,
656 +                                               border, newblock);
657 +
658 +cleanup:
659 +       if (bh) {
660 +               if (buffer_locked(bh))
661 +                       unlock_buffer(bh);
662 +               brelse(bh);
663 +       }
664 +
665 +       if (err) {
666 +               /* free all allocated blocks in error case */
667 +               for (i = 0; i < depth; i++)
668 +                       if (!ablocks[i])
669 +                               continue;
670 +                       ext3_free_blocks(handle, inode, ablocks[i], 1);
671 +       }
672 +       kfree(ablocks);
673 +
674 +       return err;
675 +}
676 +
677 +/*
678 + * routine implements tree growing procedure:
679 + *  - allocates new block
680 + *  - moves top-level data (index block or leaf) into the new block
681 + *  - initialize new top-level, creating index that points to the
682 + *    just created block
683 + */
684 +static int ext3_ext_grow_indepth(handle_t *handle, struct inode *inode,
685 +                                       struct ext3_ext_path *path,
686 +                                       struct ext3_extent *newext)
687 +{
688 +       struct buffer_head *bh;
689 +       struct ext3_ext_path *curp = path;
690 +       struct ext3_extent_header *neh;
691 +       struct ext3_extent_idx *fidx;
692 +       int len, err = 0;
693 +       long newblock;
694 +
695 +       /*
696 +        * use already allocated by the called block for new root block
697 +        */
698 +       newblock = newext->e_start++;
699 +       if (newext->e_num == 0) {
700 +               /* 
701 +                * FIXME: if this may happen, then we have to handle
702 +                * possible error and free allocated block
703 +                */
704 +               printk("grow_indepth with zero blocks\n");
705 +               newblock = ext3_new_block(handle, inode,
706 +                                               newblock, 0, 0, &err);
707 +       } else
708 +               newext->e_num--;
709 +       
710 +       bh = sb_getblk(inode->i_sb, newblock);
711 +       if (!bh) {
712 +               err = -EIO;
713 +               ext3_std_error(inode->i_sb, err);
714 +               return err;
715 +       }
716 +       lock_buffer(bh);
717 +
718 +       if ((err = ext3_journal_get_create_access(handle, bh))) {
719 +               unlock_buffer(bh);
720 +               goto out;       
721 +       }
722 +
723 +       /* move top-level index/leaf into new block */
724 +       len = sizeof(struct ext3_extent_header) +
725 +               sizeof(struct ext3_extent) * curp->p_hdr->e_max;
726 +       EXT_ASSERT(len >= 0 && len < 4096);
727 +       memmove(bh->b_data, curp->p_hdr, len);
728 +
729 +       /* set size of new block */
730 +       neh = (struct ext3_extent_header *) bh->b_data;
731 +       neh->e_max = ext3_ext_space_block(inode);
732 +       mark_buffer_uptodate(bh, 1);
733 +       unlock_buffer(bh);
734 +
735 +       if ((err = ext3_journal_dirty_metadata(handle, bh)))
736 +               goto out;
737 +
738 +       /* create index in new top-level index: num,max,pointer */
739 +       if ((err = ext3_ext_get_access(handle, inode, curp)))
740 +               goto out;
741 +
742 +       curp->p_hdr->e_max = ext3_ext_space_inode_idx(inode);
743 +       curp->p_hdr->e_num = 1;
744 +       curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
745 +       curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block;
746 +       curp->p_idx->e_leaf = newblock;
747 +
748 +       neh = (struct ext3_extent_header *) EXT3_I(inode)->i_data;
749 +       fidx = EXT_FIRST_INDEX(neh);
750 +       ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %d\n",
751 +                       neh->e_num, neh->e_max, fidx->e_block, fidx->e_leaf); 
752 +
753 +       EXT3_I(inode)->i_depth++;
754 +       err = ext3_ext_dirty(handle, inode, curp);
755 +out:
756 +       brelse(bh);
757 +
758 +       return err;
759 +}
760 +
761 +/*
762 + * routine finds empty index and adds new leaf. if no free index found
763 + * then it requests in-depth growing
764 + */
765 +static int ext3_ext_create_new_leaf(handle_t *handle, struct inode *inode,
766 +                                       struct ext3_ext_path *path,
767 +                                       struct ext3_extent *newext)
768 +{
769 +       long newblock = newext->e_start;
770 +       struct ext3_ext_path *curp;
771 +       int depth, i, err = 0;
772 +
773 +repeat:
774 +       i = depth = EXT3_I(inode)->i_depth;
775 +       
776 +       /* walk up to the tree and look for free index entry */
777 +       curp = path + depth;
778 +       while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
779 +               i--;
780 +               curp--;
781 +       }
782 +
783 +       /* we use already allocated block for index block
784 +        * so, subsequent data blocks should be contigoues */
785 +       if (EXT_HAS_FREE_INDEX(curp)) {
786 +               /* if we found index with free entry, then use that
787 +                * entry: create all needed subtree and add new leaf */
788 +               err = ext3_ext_split(handle, inode, path, newext, i);
789 +
790 +               /* refill path */
791 +               ext3_ext_drop_refs(inode, path);
792 +               path = ext3_ext_find_extent(inode, newext->e_block, path);
793 +               if (IS_ERR(path))
794 +                       err = PTR_ERR(path);
795 +       } else {
796 +               /* tree is full, time to grow in depth */
797 +               err = ext3_ext_grow_indepth(handle, inode, path, newext);
798 +
799 +               /* refill path */
800 +               ext3_ext_drop_refs(inode, path);
801 +               path = ext3_ext_find_extent(inode, newext->e_block, path);
802 +               if (IS_ERR(path))
803 +                       err = PTR_ERR(path);
804 +       
805 +               /*
806 +                * only first (depth 0 -> 1) produces free space
807 +                * in all other cases we have to split growed tree
808 +                */
809 +               depth = EXT3_I(inode)->i_depth;
810 +               if (path[depth].p_hdr->e_num == path[depth].p_hdr->e_max) {
811 +                       /* now we need split */
812 +                       goto repeat;
813 +               }
814 +       }
815 +
816 +       if (err)
817 +               return err;
818 +
819 +       /*
820 +        * probably we've used some blocks from extent
821 +        * let's allocate new block for it
822 +        */
823 +       if (newext->e_num == 0 && !err) {
824 +               newext->e_start =
825 +                       ext3_new_block(handle, inode, newblock,
826 +                                       0, 0, &err);
827 +               if (newext->e_start != 0)
828 +                       newext->e_num = 1;
829 +       }
830 +
831 +       return 0;
832 +}
833 +
834 +/*
835 + * returns next allocated block or 0xffffffff
836 + * NOTE: it consider block number from index entry as
837 + * allocated block. thus, index entries have to be consistent
838 + * with leafs
839 + */
840 +static inline unsigned ext3_ext_next_allocated_block(struct inode *inode,
841 +                                               struct ext3_ext_path *path)
842 +{
843 +       int depth;
844 +
845 +       EXT_ASSERT(path != NULL);
846 +       depth = path->p_depth;
847 +
848 +       if (depth == 0 && path->p_ext == NULL)
849 +               return 0xffffffff;
850 +
851 +       /* FIXME: what if index isn't full ?! */
852 +       while (depth >= 0) {
853 +               if (depth == path->p_depth) {
854 +                       /* leaf */
855 +                       if (path[depth].p_ext !=
856 +                                       EXT_LAST_EXTENT(path[depth].p_hdr))
857 +                               return path[depth].p_ext[1].e_block;
858 +               } else {
859 +                       /* index */
860 +                       if (path[depth].p_idx !=
861 +                                       EXT_LAST_INDEX(path[depth].p_hdr))
862 +                               return path[depth].p_idx[1].e_block;
863 +               }
864 +               depth--;        
865 +       }
866 +
867 +       return 0xffffffff;
868 +}
869 +
870 +/*
871 + * returns first allocated block from next leaf or 0xffffffff
872 + */
873 +static unsigned ext3_ext_next_leaf_block(struct inode *inode,
874 +                                               struct ext3_ext_path *path)
875 +{
876 +       int depth;
877 +
878 +       EXT_ASSERT(path != NULL);
879 +       depth = path->p_depth;
880 +
881 +       /* zero-tree has no leaf blocks at all */
882 +       if (depth == 0)
883 +               return 0xffffffff;
884 +
885 +       /* go to index block */
886 +       depth--;
887 +       
888 +       while (depth >= 0) {
889 +               if (path[depth].p_idx !=
890 +                               EXT_LAST_INDEX(path[depth].p_hdr))
891 +                       return path[depth].p_idx[1].e_block;
892 +               depth--;        
893 +       }
894 +
895 +       return 0xffffffff;
896 +}
897 +
898 +/*
899 + * if leaf gets modified and modified extent is first in the leaf
900 + * then we have to correct all indexes above
901 + * TODO: do we need to correct tree in all cases?
902 + */
903 +int ext3_ext_correct_indexes(handle_t *handle, struct inode *inode,
904 +                               struct ext3_ext_path *path)
905 +{
906 +       int depth = EXT3_I(inode)->i_depth;     
907 +       struct ext3_extent_header *eh;
908 +       struct ext3_extent *ex;
909 +       long border;
910 +       int k, err = 0;
911 +       
912 +       eh = path[depth].p_hdr;
913 +       ex = path[depth].p_ext;
914 +
915 +       EXT_ASSERT(ex);
916 +       EXT_ASSERT(eh);
917 +       
918 +       if (depth == 0) {
919 +               /* there is no tree at all */
920 +               return 0;
921 +       }
922 +       
923 +       if (ex != EXT_FIRST_EXTENT(eh)) {
924 +               /* we correct tree if first leaf got modified only */
925 +               return 0;
926 +       }
927 +       
928 +       /*
929 +        * TODO: we need correction if border is smaller then current one
930 +        */
931 +       k = depth - 1;
932 +       border = path[depth].p_ext->e_block;
933 +       if ((err = ext3_ext_get_access(handle, inode, path + k)))
934 +               return err;
935 +       path[k].p_idx->e_block = border;
936 +       if ((err = ext3_ext_dirty(handle, inode, path + k)))
937 +               return err;
938 +
939 +       while (k--) {
940 +               /* change all left-side indexes */
941 +               if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
942 +                       break;
943 +               if ((err = ext3_ext_get_access(handle, inode, path + k)))
944 +                       break;
945 +               path[k].p_idx->e_block = border;
946 +               if ((err = ext3_ext_dirty(handle, inode, path + k)))
947 +                       break;
948 +       }
949 +
950 +       return err;
951 +}
952 +
953 +/*
954 + * this routine tries to merge requsted extent into the existing
955 + * extent or inserts requested extent as new one into the tree,
956 + * creating new leaf in no-space case
957 + */
958 +int ext3_ext_insert_extent(handle_t *handle, struct inode *inode,
959 +                               struct ext3_ext_path *path,
960 +                               struct ext3_extent *newext)
961 +{
962 +       int depth, len;
963 +       struct ext3_extent_header * eh;
964 +       struct ext3_extent *ex;
965 +       struct ext3_extent *nearex; /* nearest extent */
966 +       struct ext3_ext_path *npath = NULL;
967 +       int err;
968 +
969 +       depth = EXT3_I(inode)->i_depth; 
970 +       if ((ex = path[depth].p_ext)) {
971 +               /* try to insert block into found extent and return */
972 +               if (ex->e_block + ex->e_num == newext->e_block &&
973 +                               ex->e_start + ex->e_num == newext->e_start) {
974 +#ifdef AGRESSIVE_TEST
975 +                       if (ex->e_num >= 2)
976 +                               goto repeat;
977 +#endif
978 +                       if ((err = ext3_ext_get_access(handle, inode,
979 +                                                       path + depth)))
980 +                               return err;
981 +                       ext_debug(inode, "append %d block to %d:%d (from %d)\n",
982 +                                       newext->e_num, ex->e_block, ex->e_num,
983 +                                       ex->e_start);
984 +                       ex->e_num += newext->e_num;
985 +                       err = ext3_ext_dirty(handle, inode, path + depth);
986 +                       return err;
987 +               }
988 +       }
989 +
990 +repeat:
991 +       depth = EXT3_I(inode)->i_depth; 
992 +       eh = path[depth].p_hdr;
993 +       if (eh->e_num == eh->e_max) {
994 +               /* probably next leaf has space for us? */
995 +               int next = ext3_ext_next_leaf_block(inode, path);
996 +               if (next != 0xffffffff) {
997 +                       ext_debug(inode, "next leaf block - %d\n", next);
998 +                       EXT_ASSERT(!npath);
999 +                       npath = ext3_ext_find_extent(inode, next, NULL);
1000 +                       if (IS_ERR(npath))
1001 +                               return PTR_ERR(npath);
1002 +                       EXT_ASSERT(npath->p_depth == path->p_depth);
1003 +                       eh = npath[depth].p_hdr;
1004 +                       if (eh->e_num < eh->e_max) {
1005 +                               ext_debug(inode,
1006 +                                               "next leaf has free ext(%d)\n",
1007 +                                               eh->e_num);
1008 +                               path = npath;
1009 +                               goto repeat;
1010 +                       }
1011 +                       ext_debug(inode, "next leaf hasno free space(%d,%d)\n",
1012 +                                       eh->e_num, eh->e_max);
1013 +               }
1014 +               /*
1015 +                * there is no free space in found leaf
1016 +                * we're gonna add new leaf in the tree
1017 +                */
1018 +               err = ext3_ext_create_new_leaf(handle, inode, path, newext);
1019 +               if (err)
1020 +                       goto cleanup;
1021 +               goto repeat;
1022 +       }
1023 +
1024 +       nearex = path[depth].p_ext;
1025 +
1026 +       if ((err = ext3_ext_get_access(handle, inode, path + depth)))
1027 +               goto cleanup;
1028 +
1029 +       if (!nearex) {
1030 +               /* there is no extent in this leaf, create first one */
1031 +               ext_debug(inode, "first extent in the leaf: %d:%d:%d\n",
1032 +                               newext->e_block, newext->e_start,
1033 +                               newext->e_num);
1034 +               path[depth].p_ext = EXT_FIRST_EXTENT(eh);
1035 +       } else if (newext->e_block > nearex->e_block) {
1036 +               EXT_ASSERT(newext->e_block != nearex->e_block);
1037 +               len = EXT_MAX_EXTENT(eh) - nearex;
1038 +               len = (len - 1) * sizeof(struct ext3_extent);
1039 +               len = len < 0 ? 0 : len;
1040 +               ext_debug(inode, "insert %d:%d:%d after: nearest 0x%p, "
1041 +                               "move %d from 0x%p to 0x%p\n",
1042 +                               newext->e_block, newext->e_start, newext->e_num,
1043 +                               nearex, len, nearex + 1, nearex + 2);
1044 +               ext3_ext_check_boundary(inode, path + depth, nearex + 2, len);
1045 +               memmove(nearex + 2, nearex + 1, len);
1046 +               path[depth].p_ext = nearex + 1;
1047 +       } else {
1048 +               EXT_ASSERT(newext->e_block != nearex->e_block);
1049 +               len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent);
1050 +               len = len < 0 ? 0 : len;
1051 +               ext_debug(inode, "insert %d:%d:%d before: nearest 0x%p, "
1052 +                               "move %d from 0x%p to 0x%p\n",
1053 +                               newext->e_block, newext->e_start, newext->e_num,
1054 +                               nearex, len, nearex + 1, nearex + 2);
1055 +               
1056 +               memmove(nearex + 1, nearex, len);
1057 +               path[depth].p_ext = nearex;
1058 +       }
1059 +
1060 +       if (!err) {
1061 +               eh->e_num++;
1062 +               nearex = path[depth].p_ext;
1063 +               nearex->e_block = newext->e_block;
1064 +               nearex->e_start = newext->e_start;
1065 +               nearex->e_num = newext->e_num;
1066 +               EXT_ASSERT(nearex->e_num < EXT3_BLOCKS_PER_GROUP(inode->i_sb) &&
1067 +                               nearex->e_num > 0);
1068 +
1069 +               /* time to correct all indexes above */
1070 +               err = ext3_ext_correct_indexes(handle, inode, path);
1071 +       }
1072 +
1073 +       err = ext3_ext_dirty(handle, inode, path + depth);
1074 +
1075 +cleanup:
1076 +       if (npath) {
1077 +               ext3_ext_drop_refs(inode, npath);
1078 +               kfree(npath);
1079 +       }
1080 +               
1081 +       return err;
1082 +}
1083 +
1084 +int ext3_ext_get_block(handle_t *handle, struct inode *inode, long iblock,
1085 +                       struct buffer_head *bh_result, int create,
1086 +                       int extend_disksize)
1087 +{
1088 +       struct ext3_ext_path *path;
1089 +       int depth = EXT3_I(inode)->i_depth;
1090 +       struct ext3_extent newex;
1091 +       struct ext3_extent *ex;
1092 +       int goal, newblock, err = 0;
1093 +
1094 +       ext_debug(inode, "block %d requested for inode %u, bh_result 0x%p\n",
1095 +                       (int) iblock, (unsigned) inode->i_ino, bh_result);
1096 +       bh_result->b_state &= ~(1UL << BH_New);
1097 +
1098 +       down(&EXT3_I(inode)->i_ext_sem);
1099 +
1100 +       /* find extent for this block */
1101 +       path = ext3_ext_find_extent(inode, iblock, NULL);
1102 +       if (IS_ERR(path)) {
1103 +               err = PTR_ERR(path);
1104 +               goto out2;
1105 +       }
1106 +
1107 +       if ((ex = path[depth].p_ext)) {
1108 +               /* if found exent covers block, simple return it */
1109 +               if (iblock >= ex->e_block && iblock < ex->e_block + ex->e_num) {
1110 +                       newblock = iblock - ex->e_block + ex->e_start;
1111 +                       ext_debug(inode, "%d fit into %d:%d -> %d\n",
1112 +                                       (int) iblock, ex->e_block, ex->e_num,
1113 +                                       newblock);
1114 +                       goto out;
1115 +               }
1116 +       }
1117 +
1118 +       /*
1119 +        * we couldn't try to create block if create flag is zero 
1120 +        */
1121 +       if (!create) 
1122 +               goto out2;
1123 +
1124 +       /* allocate new block */
1125 +       goal = ext3_ext_find_goal(inode, path);
1126 +       newblock = ext3_new_block(handle, inode, goal, 0, 0, &err);
1127 +       if (!newblock)
1128 +               goto out2;
1129 +       ext_debug(inode, "allocate new block: goal %d, found %d\n",
1130 +                       goal, newblock);
1131 +
1132 +       /* try to insert new extent into found leaf and return */
1133 +       newex.e_block = iblock;
1134 +       newex.e_start = newblock;
1135 +       newex.e_num = 1;
1136 +       err = ext3_ext_insert_extent(handle, inode, path, &newex);
1137 +       if (err)
1138 +               goto out2;
1139 +       
1140 +       /* previous routine could use block we allocated */
1141 +       newblock = newex.e_start;
1142 +       bh_result->b_state |= (1UL << BH_New);
1143 +
1144 +out:
1145 +       ext3_ext_show_leaf(inode, path);
1146 +       bh_result->b_dev = inode->i_dev;
1147 +       bh_result->b_blocknr = newblock;
1148 +       bh_result->b_state |= (1UL << BH_Mapped);
1149 +out2:
1150 +       ext3_ext_drop_refs(inode, path);
1151 +       kfree(path);
1152 +       up(&EXT3_I(inode)->i_ext_sem);
1153 +
1154 +       return err;     
1155 +}
1156 +
1157 +/*
1158 + * returns 1 if current index have to be freed (even partial)
1159 + */
1160 +static int ext3_ext_more_to_truncate(struct inode *inode,
1161 +                               struct ext3_ext_path *path)
1162 +{
1163 +       EXT_ASSERT(path->p_idx);
1164 +
1165 +       if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
1166 +               return 0;
1167 +
1168 +       /*
1169 +        * if truncate on deeper level happened it it wasn't partial
1170 +        * so we have to consider current index for truncation
1171 +        */
1172 +       if (path->p_hdr->e_num == path->p_block)
1173 +               return 0;
1174 +
1175 +       /*
1176 +        * put actual number of indexes to know is this number got
1177 +        * changed at the next iteration
1178 +        */
1179 +       path->p_block = path->p_hdr->e_num;
1180 +       
1181 +       return 1;
1182 +}
1183 +
1184 +/*
1185 + * routine removes index from the index block
1186 + * it's used in truncate case only. thus all requests are for
1187 + * last index in the block only
1188 + */
1189 +int ext3_ext_remove_index(handle_t *handle, struct inode *inode,
1190 +                                       struct ext3_ext_path *path)
1191 +{
1192 +       struct buffer_head *bh;
1193 +       int err;
1194 +       
1195 +       /* free index block */
1196 +       path--;
1197 +       EXT_ASSERT(path->p_hdr->e_num);
1198 +       if ((err = ext3_ext_get_access(handle, inode, path)))
1199 +               return err;
1200 +       path->p_hdr->e_num--;
1201 +       if ((err = ext3_ext_dirty(handle, inode, path)))
1202 +               return err;
1203 +       bh = sb_get_hash_table(inode->i_sb, path->p_idx->e_leaf);
1204 +       ext3_forget(handle, 0, inode, bh, path->p_idx->e_leaf);
1205 +       ext3_free_blocks(handle, inode, path->p_idx->e_leaf, 1);
1206 +
1207 +       ext_debug(inode, "index is empty, remove it, free block %d\n",
1208 +                       path->p_idx->e_leaf);
1209 +       return err;
1210 +}
1211 +
1212 +/*
1213 + * returns 1 if current extent needs to be freed (even partial)
1214 + * instead, returns 0
1215 + */
1216 +int ext3_ext_more_leaves_to_truncate(struct inode *inode,
1217 +                                       struct ext3_ext_path *path)
1218 +{
1219 +       unsigned blocksize = inode->i_sb->s_blocksize;
1220 +       struct ext3_extent *ex = path->p_ext;
1221 +       int last_block; 
1222 +
1223 +       EXT_ASSERT(ex);
1224 +
1225 +       /* is there leave in the current leaf? */
1226 +       if (ex < EXT_FIRST_EXTENT(path->p_hdr))
1227 +               return 0;
1228 +       
1229 +       last_block = (inode->i_size + blocksize-1)
1230 +                       >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
1231 +
1232 +       if (last_block >= ex->e_block + ex->e_num)
1233 +               return 0;
1234 +
1235 +       /* seems it extent have to be freed */
1236 +       return 1;
1237 +}
1238 +
1239 +handle_t *ext3_ext_journal_restart(handle_t *handle, int needed)
1240 +{
1241 +       int err;
1242 +
1243 +       if (handle->h_buffer_credits > needed)
1244 +               return handle;
1245 +       if (!ext3_journal_extend(handle, needed))
1246 +               return handle;
1247 +       err = ext3_journal_restart(handle, needed);
1248 +       
1249 +       return handle;
1250 +}
1251 +
1252 +/*
1253 + * this routine calculate max number of blocks to be modified
1254 + * while freeing extent and is intended to be used in truncate path
1255 + */
1256 +static int ext3_ext_calc_credits(struct inode *inode,
1257 +                                       struct ext3_ext_path *path,
1258 +                                       int num)
1259 +{
1260 +       int depth = EXT3_I(inode)->i_depth;
1261 +       int needed;
1262 +       
1263 +       /*
1264 +        * extent couldn't cross group, so we will modify
1265 +        * single bitmap block and single group descriptor
1266 +        */
1267 +       needed = 3;
1268 +
1269 +       /*
1270 +        * if this is last extent in a leaf, then we have to
1271 +        * free leaf block and remove pointer from index above.
1272 +        * that pointer could be last in index block, so we'll
1273 +        * have to remove it too. this way we could modify/free
1274 +        * the whole path + root index (inode stored) will be
1275 +        * modified
1276 +        */
1277 +       if (!path || (num == path->p_ext->e_num &&
1278 +                               path->p_ext == EXT_FIRST_EXTENT(path->p_hdr)))
1279 +               needed += (depth * (EXT3_ALLOC_NEEDED + 1)) + 1;
1280 +
1281 +       return needed;
1282 +}
1283 +
1284 +/*
1285 + * core of the truncate procedure:
1286 + * - calculated what part of each extent in the requested leaf
1287 + *   need to be freed
1288 + * - frees and forgets these blocks
1289 + *
1290 + * TODO: we could optimize and free several extents during
1291 + *       single journal_restart()-journal_restart() cycle
1292 + */
1293 +static int ext3_ext_truncate_leaf(handle_t *handle,
1294 +                                       struct inode *inode,
1295 +                                       struct ext3_ext_path *path,
1296 +                                       int depth)
1297 +{
1298 +       unsigned blocksize = inode->i_sb->s_blocksize;
1299 +       int last_block; 
1300 +       int i, err = 0, sf, num;
1301 +
1302 +       ext_debug(inode, "level %d - leaf\n", depth);
1303 +       if (!path->p_hdr)
1304 +               path->p_hdr =
1305 +                       (struct ext3_extent_header *) path->p_bh->b_data;
1306 +
1307 +       EXT_ASSERT(path->p_hdr->e_num <= path->p_hdr->e_max);
1308 +       
1309 +       last_block = (inode->i_size + blocksize-1)
1310 +                                       >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
1311 +       path->p_ext = EXT_LAST_EXTENT(path->p_hdr);
1312 +       while (ext3_ext_more_leaves_to_truncate(inode, path)) {
1313 +
1314 +               /* what part of extent have to be freed? */
1315 +               sf = last_block > path->p_ext->e_block ?
1316 +                       last_block : path->p_ext->e_block;
1317 +
1318 +               /* number of blocks from extent to be freed */
1319 +               num = path->p_ext->e_block + path->p_ext->e_num - sf;
1320 +
1321 +               /* calc physical first physical block to be freed */
1322 +               sf = path->p_ext->e_start + (sf - path->p_ext->e_block);
1323 +
1324 +               i = ext3_ext_calc_credits(inode, path, num);
1325 +               handle = ext3_ext_journal_restart(handle, i);
1326 +               if (IS_ERR(handle))
1327 +                       return PTR_ERR(handle);
1328 +               
1329 +               ext_debug(inode, "free extent %d:%d:%d -> free %d:%d\n",
1330 +                               path->p_ext->e_block, path->p_ext->e_start,
1331 +                               path->p_ext->e_num, sf, num);
1332 +               for (i = 0; i < num; i++) {
1333 +                       struct buffer_head *bh =
1334 +                               sb_get_hash_table(inode->i_sb, sf + i);
1335 +                       ext3_forget(handle, 0, inode, bh, sf + i);
1336 +               }
1337 +               ext3_free_blocks(handle, inode, sf, num);
1338 +
1339 +               /* collect extents usage stats */
1340 +               spin_lock(&EXT3_SB(inode->i_sb)->s_ext_lock);
1341 +               EXT3_SB(inode->i_sb)->s_ext_extents++;
1342 +               EXT3_SB(inode->i_sb)->s_ext_blocks += num;
1343 +               spin_unlock(&EXT3_SB(inode->i_sb)->s_ext_lock);
1344 +
1345 +               /* reduce extent */
1346 +               if ((err = ext3_ext_get_access(handle, inode, path)))
1347 +                       return err;
1348 +               path->p_ext->e_num -= num;
1349 +               if (path->p_ext->e_num == 0)
1350 +                       path->p_hdr->e_num--;
1351 +               if ((err = ext3_ext_dirty(handle, inode, path)))
1352 +                       return err;
1353 +
1354 +               path->p_ext--;
1355 +       }
1356 +       
1357 +       /* if this leaf is free, then we should
1358 +        * remove it from index block above */
1359 +       if (path->p_hdr->e_num == 0 && depth > 0) 
1360 +               err = ext3_ext_remove_index(handle, inode, path);
1361 +
1362 +       return err;
1363 +}
1364 +
1365 +static void ext3_ext_collect_stats(struct inode *inode)
1366 +{
1367 +       int depth;
1368 +       
1369 +       /* skip inodes with old good bitmap */
1370 +       if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
1371 +               return;
1372 +       
1373 +       /* collect on full truncate only */
1374 +       if (inode->i_size)
1375 +               return;
1376 +
1377 +       depth = EXT3_I(inode)->i_depth;
1378 +       if (depth < EXT3_SB(inode->i_sb)->s_ext_mindepth)
1379 +                EXT3_SB(inode->i_sb)->s_ext_mindepth = depth;
1380 +       if (depth > EXT3_SB(inode->i_sb)->s_ext_maxdepth)
1381 +                EXT3_SB(inode->i_sb)->s_ext_maxdepth = depth;
1382 +       EXT3_SB(inode->i_sb)->s_ext_sum += depth;
1383 +       EXT3_SB(inode->i_sb)->s_ext_count++;
1384 +       
1385 +}
1386 +
1387 +void ext3_ext_truncate(struct inode * inode)
1388 +{
1389 +       struct address_space *mapping = inode->i_mapping;
1390 +       struct ext3_ext_path *path;
1391 +       struct page * page;
1392 +       handle_t *handle;
1393 +       int i, depth, err = 0;
1394 +
1395 +       ext3_ext_collect_stats(inode);
1396 +
1397 +       /*
1398 +        * We have to lock the EOF page here, because lock_page() nests
1399 +        * outside journal_start().
1400 +        */
1401 +       if ((inode->i_size & (inode->i_sb->s_blocksize - 1)) == 0) {
1402 +               /* Block boundary? Nothing to do */
1403 +               page = NULL;
1404 +       } else {
1405 +               page = grab_cache_page(mapping,
1406 +                               inode->i_size >> PAGE_CACHE_SHIFT);
1407 +               if (!page)
1408 +                       return;
1409 +       }
1410 +
1411 +       /*
1412 +        * probably first extent we're gonna free will be last in block
1413 +        */
1414 +       i = ext3_ext_calc_credits(inode, NULL, 0);
1415 +       handle = ext3_journal_start(inode, i);
1416 +       if (IS_ERR(handle)) {
1417 +               if (page) {
1418 +                       clear_highpage(page);
1419 +                       flush_dcache_page(page);
1420 +                       unlock_page(page);
1421 +                       page_cache_release(page);
1422 +               }
1423 +               return;
1424 +       }
1425 +
1426 +       if (page)
1427 +               ext3_block_truncate_page(handle, mapping, inode->i_size, page,
1428 +                                               inode->i_sb->s_blocksize);
1429 +
1430 +       down(&EXT3_I(inode)->i_ext_sem);
1431 +
1432 +       /* 
1433 +        * TODO: optimization is possible here
1434 +        * probably we need not scaning at all,
1435 +        * because page truncation is enough
1436 +        */
1437 +       if (ext3_orphan_add(handle, inode))
1438 +               goto out_stop;
1439 +
1440 +       /* we have to know where to truncate from in crash case */
1441 +       EXT3_I(inode)->i_disksize = inode->i_size;
1442 +       ext3_mark_inode_dirty(handle, inode);
1443 +
1444 +       /*
1445 +        * we start scanning from right side freeing all the blocks
1446 +        * after i_size and walking into the deep
1447 +        */
1448 +       i = 0;
1449 +       depth = EXT3_I(inode)->i_depth;
1450 +       path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL);
1451 +       if (IS_ERR(path)) {
1452 +               ext3_error(inode->i_sb, "ext3_ext_truncate",
1453 +                               "Can't allocate path array");
1454 +               goto out_stop;
1455 +       }
1456 +       memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
1457 +
1458 +       path[i].p_hdr = (struct ext3_extent_header *) EXT3_I(inode)->i_data;
1459 +       while (i >= 0 && err == 0) {
1460 +               if (i == depth) {
1461 +                       /* this is leaf block */
1462 +                       err = ext3_ext_truncate_leaf(handle, inode,
1463 +                                                       path + i, i);
1464 +                       /* root level have p_bh == NULL, brelse() eats this */
1465 +                       brelse(path[i].p_bh);
1466 +                       i--;
1467 +                       continue;
1468 +               }
1469 +               
1470 +               /* this is index block */
1471 +               if (!path[i].p_hdr) {
1472 +                       path[i].p_hdr =
1473 +                               (struct ext3_extent_header *) path[i].p_bh->b_data;
1474 +                       ext_debug(inode, "initialize header\n");
1475 +               }
1476 +
1477 +               EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max);
1478 +               
1479 +               if (!path[i].p_idx) {
1480 +                       /* this level hasn't touched yet */
1481 +                       path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
1482 +                       path[i].p_block = path[i].p_hdr->e_num + 1;
1483 +                       ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
1484 +                                       path[i].p_hdr, path[i].p_hdr->e_num);
1485 +               } else {
1486 +                       /* we've already was here, see at next index */
1487 +                       path[i].p_idx--;
1488 +               }
1489 +
1490 +               ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
1491 +                               i, EXT_FIRST_INDEX(path[i].p_hdr),
1492 +                               path[i].p_idx);
1493 +               if (ext3_ext_more_to_truncate(inode, path + i)) {
1494 +                       /* go to the next level */
1495 +                       ext_debug(inode, "move to level %d (block %d)\n", i+1,
1496 +                                       path[i].p_idx->e_leaf);
1497 +                       memset(path + i + 1, 0, sizeof(*path));
1498 +                       path[i+1].p_bh = sb_bread(inode->i_sb,
1499 +                                                       path[i].p_idx->e_leaf);
1500 +                       if (!path[i+1].p_bh) {
1501 +                               /* should we reset i_size? */
1502 +                               err = -EIO;
1503 +                               break;
1504 +                       }
1505 +                       i++;
1506 +               } else {
1507 +                       /* we finish processing this index, go up */
1508 +                       if (path[i].p_hdr->e_num == 0 && i > 0) {
1509 +                               /* index is empty, remove it
1510 +                                * handle must be already prepared by the
1511 +                                * truncate_leaf()
1512 +                                */
1513 +                               err = ext3_ext_remove_index(handle, inode,
1514 +                                                               path + i);
1515 +                       }
1516 +                       /* root level have p_bh == NULL, brelse() eats this */
1517 +                       brelse(path[i].p_bh);
1518 +                       i--;
1519 +                       ext_debug(inode, "return to level %d\n", i);
1520 +               }
1521 +       }
1522 +
1523 +       /* TODO: flexible tree reduction should be here */
1524 +       if (path->p_hdr->e_num == 0) {
1525 +               /*
1526 +                * truncate to zero freed all the tree
1527 +                * so, we need to correct i_depth
1528 +                */
1529 +               EXT3_I(inode)->i_depth = 0;
1530 +               path->p_hdr->e_max = 0;
1531 +               ext3_mark_inode_dirty(handle, inode);
1532 +       }
1533 +
1534 +       kfree(path);
1535 +
1536 +       /* In a multi-transaction truncate, we only make the final
1537 +        * transaction synchronous */
1538 +       if (IS_SYNC(inode))
1539 +               handle->h_sync = 1;
1540 +
1541 +out_stop:
1542 +       /*
1543 +        * If this was a simple ftruncate(), and the file will remain alive
1544 +        * then we need to clear up the orphan record which we created above.
1545 +        * However, if this was a real unlink then we were called by
1546 +        * ext3_delete_inode(), and we allow that function to clean up the
1547 +        * orphan info for us.
1548 +        */
1549 +       if (inode->i_nlink)
1550 +               ext3_orphan_del(handle, inode);
1551 +
1552 +       up(&EXT3_I(inode)->i_ext_sem);
1553 +       ext3_journal_stop(handle, inode);
1554 +}
1555 +
1556 +/*
1557 + * this routine calculate max number of blocks we could modify
1558 + * in order to allocate new block for an inode
1559 + */
1560 +int ext3_ext_writepage_trans_blocks(struct inode *inode, int num)
1561 +{
1562 +       struct ext3_inode_info *ei = EXT3_I(inode);
1563 +       int depth = ei->i_depth + 1;
1564 +       int needed;
1565 +       
1566 +       /*
1567 +        * the worste case we're expecting is creation of the
1568 +        * new root (growing in depth) with index splitting
1569 +        * for splitting we have to consider depth + 1 because
1570 +        * previous growing could increase it
1571 +        */
1572 +
1573 +       /* 
1574 +        * growing in depth:
1575 +        * block allocation + new root + old root
1576 +        */
1577 +       needed = EXT3_ALLOC_NEEDED + 2;
1578 +
1579 +       /* index split. we may need:
1580 +        *   allocate intermediate indexes and new leaf
1581 +        *   change two blocks at each level, but root
1582 +        *   modify root block (inode)
1583 +        */
1584 +       needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1;
1585 +
1586 +       /* caller want to allocate num blocks */
1587 +       needed *= num;
1588 +       
1589 +#ifdef CONFIG_QUOTA
1590 +       /* 
1591 +        * FIXME: real calculation should be here
1592 +        * it depends on blockmap format of qouta file
1593 +        */
1594 +       needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
1595 +#endif
1596 +
1597 +       return needed;
1598 +}
1599 +
1600 +/*
1601 + * called at mount time
1602 + */
1603 +void ext3_ext_init(struct super_block *sb)
1604 +{
1605 +       /*
1606 +        * possible initialization would be here
1607 +        */
1608 +
1609 +       if (test_opt(sb, EXTENTS))
1610 +               printk("EXT3-fs: file extents enabled\n");
1611 +       spin_lock_init(&EXT3_SB(sb)->s_ext_lock);
1612 +}
1613 +
1614 +/*
1615 + * called at umount time
1616 + */
1617 +void ext3_ext_release(struct super_block *sb)
1618 +{
1619 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1620 +
1621 +       /* show collected stats */
1622 +       if (sbi->s_ext_count && sbi->s_ext_extents)
1623 +               printk("EXT3-fs: min depth - %d, max depth - %d, "
1624 +                               "ave. depth - %d, ave. blocks/extent - %d\n",
1625 +                               sbi->s_ext_mindepth,
1626 +                               sbi->s_ext_maxdepth,
1627 +                               sbi->s_ext_sum / sbi->s_ext_count,
1628 +                               sbi->s_ext_blocks / sbi->s_ext_extents);
1629 +}
1630 Index: linux-2.4.18-chaos/fs/ext3/ialloc.c
1631 ===================================================================
1632 --- linux-2.4.18-chaos.orig/fs/ext3/ialloc.c    2004-01-13 16:10:23.000000000 +0300
1633 +++ linux-2.4.18-chaos/fs/ext3/ialloc.c 2004-01-13 16:11:00.000000000 +0300
1634 @@ -573,6 +573,10 @@
1635         ei->i_prealloc_count = 0;
1636  #endif
1637         ei->i_block_group = i;
1638 +       if (test_opt(sb, EXTENTS))
1639 +               EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
1640 +       ei->i_depth = 0;
1641 +       sema_init(&ei->i_ext_sem, 1);
1642  
1643         if (ei->i_flags & EXT3_SYNC_FL)
1644                 inode->i_flags |= S_SYNC;
1645 Index: linux-2.4.18-chaos/fs/ext3/inode.c
1646 ===================================================================
1647 --- linux-2.4.18-chaos.orig/fs/ext3/inode.c     2004-01-13 16:10:23.000000000 +0300
1648 +++ linux-2.4.18-chaos/fs/ext3/inode.c  2004-01-13 16:11:00.000000000 +0300
1649 @@ -842,6 +842,15 @@
1650         goto reread;
1651  }
1652  
1653 +static inline int
1654 +ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block,
1655 +               struct buffer_head *bh, int create, int extend_disksize)
1656 +{
1657 +       if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
1658 +               return ext3_ext_get_block(handle, inode, block, bh, create, 1);
1659 +       return ext3_get_block_handle(handle, inode, block, bh, create, 1);
1660 +}
1661 +
1662  /*
1663   * The BKL is not held on entry here.
1664   */
1665 @@ -855,7 +864,7 @@
1666                 handle = ext3_journal_current_handle();
1667                 J_ASSERT(handle != 0);
1668         }
1669 -       ret = ext3_get_block_handle(handle, inode, iblock,
1670 +       ret = ext3_get_block_wrap(handle, inode, iblock,
1671                                 bh_result, create, 1);
1672         return ret;
1673  }
1674 @@ -882,7 +891,7 @@
1675                 }
1676         }
1677         if (ret == 0)
1678 -               ret = ext3_get_block_handle(handle, inode, iblock,
1679 +               ret = ext3_get_block_wrap(handle, inode, iblock,
1680                                         bh_result, create, 0);
1681         if (ret == 0)
1682                 bh_result->b_size = (1 << inode->i_blkbits);
1683 @@ -904,7 +913,7 @@
1684         dummy.b_state = 0;
1685         dummy.b_blocknr = -1000;
1686         buffer_trace_init(&dummy.b_history);
1687 -       *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
1688 +       *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1);
1689         if (!*errp && buffer_mapped(&dummy)) {
1690                 struct buffer_head *bh;
1691                 bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
1692 @@ -1520,7 +1529,7 @@
1693   * This required during truncate. We need to physically zero the tail end
1694   * of that block so it doesn't yield old data if the file is later grown.
1695   */
1696 -static int ext3_block_truncate_page(handle_t *handle,
1697 +int ext3_block_truncate_page(handle_t *handle,
1698                                     struct address_space *mapping, loff_t from,
1699                                     struct page *page, unsigned blocksize)
1700  {
1701 @@ -1998,6 +2007,9 @@
1702  
1703         ext3_discard_prealloc(inode);
1704  
1705 +       if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
1706 +               return ext3_ext_truncate(inode);
1707 +
1708         blocksize = inode->i_sb->s_blocksize;
1709         last_block = (inode->i_size + blocksize-1)
1710                                         >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
1711 @@ -2436,6 +2448,8 @@
1712         ei->i_prealloc_count = 0;
1713  #endif
1714         ei->i_block_group = iloc.block_group;
1715 +       ei->i_depth = raw_inode->osd2.linux2.l_i_depth;
1716 +       sema_init(&ei->i_ext_sem, 1);
1717  
1718         /*
1719          * NOTE! The in-memory inode i_data array is in little-endian order
1720 @@ -2556,6 +2570,7 @@
1721                 raw_inode->i_fsize = 0;
1722         }
1723  #endif
1724 +       raw_inode->osd2.linux2.l_i_depth = ei->i_depth;
1725         raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
1726         if (!S_ISREG(inode->i_mode)) {
1727                 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
1728 @@ -2759,6 +2774,9 @@
1729         int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
1730         int ret;
1731         
1732 +       if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
1733 +               return ext3_ext_writepage_trans_blocks(inode, bpp);
1734 +
1735         if (ext3_should_journal_data(inode))
1736                 ret = 3 * (bpp + indirects) + 2;
1737         else
1738 @@ -3082,7 +3100,7 @@
1739  
1740         /* alloc blocks one by one */
1741         for (i = 0; i < nblocks; i++) {
1742 -               ret = ext3_get_block_handle(handle, inode, blocks[i],
1743 +               ret = ext3_get_block_wrap(handle, inode, blocks[i],
1744                                                 &bh_tmp, 1, 1);
1745                 if (ret)
1746                         break;
1747 @@ -3158,7 +3176,7 @@
1748                  if (blocks[i] != 0)
1749                          continue;
1750  
1751 -                rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1, 1);
1752 +                rc = ext3_get_block_wrap(handle, inode, iblock, &bh, 1, 1);
1753                  if (rc) {
1754                          printk(KERN_INFO "ext3_map_inode_page: error %d "
1755                                 "allocating block %ld\n", rc, iblock);
1756 Index: linux-2.4.18-chaos/fs/ext3/Makefile
1757 ===================================================================
1758 --- linux-2.4.18-chaos.orig/fs/ext3/Makefile    2004-01-13 16:10:23.000000000 +0300
1759 +++ linux-2.4.18-chaos/fs/ext3/Makefile 2004-01-13 16:11:00.000000000 +0300
1760 @@ -12,7 +12,8 @@
1761  export-objs := ext3-exports.o
1762  
1763  obj-y    := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
1764 -               ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
1765 +               ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o \
1766 +               extents.o
1767  obj-m    := $(O_TARGET)
1768  
1769  include $(TOPDIR)/Rules.make
1770 Index: linux-2.4.18-chaos/fs/ext3/super.c
1771 ===================================================================
1772 --- linux-2.4.18-chaos.orig/fs/ext3/super.c     2004-01-13 16:10:23.000000000 +0300
1773 +++ linux-2.4.18-chaos/fs/ext3/super.c  2004-01-13 16:11:23.000000000 +0300
1774 @@ -622,6 +622,7 @@
1775  
1776         J_ASSERT(sbi->s_delete_inodes == 0);
1777  
1778 +       ext3_ext_release(sb);
1779         ext3_xattr_put_super(sb);
1780         journal_destroy(sbi->s_journal);
1781         if (!(sb->s_flags & MS_RDONLY)) {
1782 @@ -743,6 +744,12 @@
1783                 else
1784  #endif
1785  
1786 +               if (!strcmp (this_char, "extents"))
1787 +                       set_opt (sbi->s_mount_opt, EXTENTS);
1788 +               else
1789 +               if (!strcmp (this_char, "extdebug"))
1790 +                       set_opt (sbi->s_mount_opt, EXTDEBUG);
1791 +               else
1792                 if (!strcmp (this_char, "bsddf"))
1793                         clear_opt (*mount_options, MINIX_DF);
1794                 else if (!strcmp (this_char, "nouid32")) {
1795 @@ -1470,6 +1477,7 @@
1796                 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
1797                 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
1798                 "writeback");
1799 +       ext3_ext_init(sb);
1800  
1801         return sb;
1802  
1803 Index: linux-2.4.18-chaos/include/linux/ext3_fs.h
1804 ===================================================================
1805 --- linux-2.4.18-chaos.orig/include/linux/ext3_fs.h     2004-01-13 16:10:23.000000000 +0300
1806 +++ linux-2.4.18-chaos/include/linux/ext3_fs.h  2004-01-13 16:11:00.000000000 +0300
1807 @@ -183,6 +183,7 @@
1808  #define EXT3_IMAGIC_FL                 0x00002000 /* AFS directory */
1809  #define EXT3_JOURNAL_DATA_FL           0x00004000 /* file data should be journaled */
1810  #define EXT3_RESERVED_FL               0x80000000 /* reserved for ext3 lib */
1811 +#define EXT3_EXTENTS_FL                        0x00080000 /* Inode uses extents */
1812  
1813  #define EXT3_FL_USER_VISIBLE           0x00005FFF /* User visible flags */
1814  #define EXT3_FL_USER_MODIFIABLE                0x000000FF /* User modifiable flags */
1815 @@ -243,7 +244,7 @@
1816                 struct {
1817                         __u8    l_i_frag;       /* Fragment number */
1818                         __u8    l_i_fsize;      /* Fragment size */
1819 -                       __u16   i_pad1;
1820 +                       __u16   l_i_depth;
1821                         __u16   l_i_uid_high;   /* these 2 fields    */
1822                         __u16   l_i_gid_high;   /* were reserved2[0] */
1823                         __u32   l_i_reserved2;
1824 @@ -324,6 +325,8 @@
1825  #define EXT3_MOUNT_IOPEN               0x8000  /* Allow access via iopen */
1826  #define EXT3_MOUNT_IOPEN_NOPRIV                0x10000 /* Make iopen world-readable */
1827  #define EXT3_MOUNT_ASYNCDEL            0x20000 /* Delayed deletion */
1828 +#define EXT3_MOUNT_EXTENTS             0x40000 /* Extents support */
1829 +#define EXT3_MOUNT_EXTDEBUG            0x80000 /* Extents debug */
1830  
1831  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1832  #ifndef _LINUX_EXT2_FS_H
1833 @@ -663,6 +666,12 @@
1834  extern void ext3_dirty_inode(struct inode *);
1835  extern int ext3_change_inode_journal_flag(struct inode *, int);
1836  extern void ext3_truncate (struct inode *);
1837 +extern int ext3_block_truncate_page(handle_t *handle,
1838 +                                   struct address_space *mapping, loff_t from,
1839 +                                   struct page *page, unsigned blocksize);
1840 +extern int ext3_forget(handle_t *handle, int is_metadata,
1841 +                      struct inode *inode, struct buffer_head *bh,
1842 +                      int blocknr);
1843  #ifdef EXT3_DELETE_THREAD
1844  extern void ext3_truncate_thread(struct inode *inode);
1845  #endif
1846 @@ -722,6 +731,13 @@
1847  /* symlink.c */
1848  extern struct inode_operations ext3_fast_symlink_inode_operations;
1849  
1850 +/* extents.c */
1851 +extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
1852 +extern int ext3_ext_get_block(handle_t *, struct inode *, long,
1853 +                               struct buffer_head *, int, int);
1854 +extern void ext3_ext_truncate(struct inode *);
1855 +extern void ext3_ext_init(struct super_block *);
1856 +extern void ext3_ext_release(struct super_block *);
1857  
1858  #endif /* __KERNEL__ */
1859  
1860 Index: linux-2.4.18-chaos/include/linux/ext3_fs_i.h
1861 ===================================================================
1862 --- linux-2.4.18-chaos.orig/include/linux/ext3_fs_i.h   2001-11-22 22:46:19.000000000 +0300
1863 +++ linux-2.4.18-chaos/include/linux/ext3_fs_i.h        2004-01-13 16:11:00.000000000 +0300
1864 @@ -73,6 +73,10 @@
1865          * by other means, so we have truncate_sem.
1866          */
1867         struct rw_semaphore truncate_sem;
1868 +
1869 +       /* extents-related data */
1870 +       struct semaphore i_ext_sem;
1871 +       __u16 i_depth;
1872  };
1873  
1874  #endif /* _LINUX_EXT3_FS_I */
1875 Index: linux-2.4.18-chaos/include/linux/ext3_fs_sb.h
1876 ===================================================================
1877 --- linux-2.4.18-chaos.orig/include/linux/ext3_fs_sb.h  2004-01-13 16:10:21.000000000 +0300
1878 +++ linux-2.4.18-chaos/include/linux/ext3_fs_sb.h       2004-01-13 16:11:00.000000000 +0300
1879 @@ -84,6 +84,16 @@
1880         wait_queue_head_t s_delete_thread_queue;
1881         wait_queue_head_t s_delete_waiter_queue;
1882  #endif
1883 +
1884 +       /* extents */
1885 +       int s_ext_debug;
1886 +       int s_ext_mindepth;
1887 +       int s_ext_maxdepth;
1888 +       int s_ext_sum;
1889 +       int s_ext_count;
1890 +       spinlock_t s_ext_lock;
1891 +       int s_ext_extents;
1892 +       int s_ext_blocks;
1893  };
1894  
1895  #endif /* _LINUX_EXT3_FS_SB */