Whamcloud - gitweb
- kernel_text_address patch against chaos-2.4.18 series
[fs/lustre-release.git] / lustre / kernel_patches / patches / ext3-extents-2.4.18-chaos.patch
1  fs/ext3/Makefile           |    3 
2  fs/ext3/extents.c          | 1615 +++++++++++++++++++++++++++++++++++++++++++++
3  fs/ext3/ialloc.c           |    4 
4  fs/ext3/inode.c            |   30 
5  fs/ext3/super.c            |    8 
6  include/linux/ext3_fs.h    |   18 
7  include/linux/ext3_fs_i.h  |    4 
8  include/linux/ext3_fs_sb.h |   10 
9  8 files changed, 1684 insertions(+), 8 deletions(-)
10
11 --- /dev/null   2003-01-30 13:24:37.000000000 +0300
12 +++ linux-2.4.18-chaos-alexey/fs/ext3/extents.c 2003-09-23 18:08:59.000000000 +0400
13 1inux/fs/ext3/extents.c
14 + *
15 + * Extents support for EXT3
16 + *
17 + * 07/08/2003    Alex Tomas <bzzz@tmi.comex.ru>
18 + * 
19 + * TODO:
20 + *   - ext3*_error() should be used in some situations
21 + *   - find_goal() [to be tested and improved]
22 + *   - error handling
23 + *   - we could leak allocated block in some error cases
24 + *   - quick search for index/leaf in ext3_ext_find_extent()
25 + *   - tree reduction
26 + *   - cache last found extent
27 + *   - arch-independent
28 + */
29 +
30 +#include <linux/module.h>
31 +#include <linux/fs.h>
32 +#include <linux/time.h>
33 +#include <linux/ext3_jbd.h>
34 +#include <linux/jbd.h>
35 +#include <linux/smp_lock.h>
36 +#include <linux/highuid.h>
37 +#include <linux/pagemap.h>
38 +#include <linux/quotaops.h>
39 +#include <linux/string.h>
40 +#include <linux/slab.h>
41 +#include <linux/locks.h>
42 +
43 +/*
44 + * with AGRESSIVE_TEST defined capacity of index/leaf blocks
45 + * become very little, so index split, in-depth growing and
46 + * other hard changes happens much more often
47 + * this is for debug purposes only
48 + */
49 +#define AGRESSIVE_TEST_
50 +
51 +/*
52 + * if EXT_DEBUG defined you can use 'extdebug' mount option
53 + * to get lots of info what's going on
54 + */
55 +#define EXT_DEBUG
56 +#ifdef EXT_DEBUG
57 +#define ext_debug(inode,fmt,a...)              \
58 +do {                                           \
59 +       if (test_opt((inode)->i_sb, EXTDEBUG))  \
60 +               printk(fmt, ##a);               \
61 +} while (0);
62 +#else
63 +#define ext_debug(inode,fmt,a...)
64 +#endif
65 +
66 +#define EXT3_ALLOC_NEEDED      2       /* block bitmap + group descriptor */
67 +
68 +/*
69 + * ext3_inode has i_block array (total 60 bytes)
70 + * first 4 bytes are used to store:
71 + *  - tree depth (0 mean there is no tree yet. all extents in the inode)
72 + *  - number of alive extents in the inode
73 + */
74 +
75 +/*
76 + * this is extent on-disk structure
77 + * it's used at the bottom of the tree
78 + */
79 +struct ext3_extent {
80 +       __u32   e_block;        /* first logical block extent covers */
81 +       __u32   e_start;        /* first physical block extents lives */
82 +       __u32   e_num;          /* number of blocks covered by extent */
83 +};
84 +
85 +/*
86 + * this is index on-disk structure
87 + * it's used at all the levels, but the bottom
88 + */
89 +struct ext3_extent_idx {
90 +       __u32   e_block;        /* index covers logical blocks from 'block' */
91 +       __u32   e_leaf;         /* pointer to the physical block of the next *
92 +                                * level. leaf or next index could bet here */
93 +};
94 +
95 +/*
96 + * each block (leaves and indexes), even inode-stored has header
97 + */
98 +struct ext3_extent_header {    
99 +       __u16   e_num;          /* number of valid entries */
100 +       __u16   e_max;          /* capacity of store in entries */
101 +};
102 +
103 +/*
104 + * array of ext3_ext_path contains path to some extent
105 + * creation/lookup routines use it for traversal/splitting/etc
106 + * truncate uses it to simulate recursive walking
107 + */
108 +struct ext3_ext_path {
109 +       __u32                           p_block;
110 +       __u16                           p_depth;
111 +       struct ext3_extent              *p_ext;
112 +       struct ext3_extent_idx          *p_idx;
113 +       struct ext3_extent_header       *p_hdr;
114 +       struct buffer_head              *p_bh;
115 +};
116 +
117 +#define EXT_FIRST_EXTENT(__hdr__) \
118 +       ((struct ext3_extent *) (((char *) (__hdr__)) +         \
119 +                                sizeof(struct ext3_extent_header)))
120 +#define EXT_FIRST_INDEX(__hdr__) \
121 +       ((struct ext3_extent_idx *) (((char *) (__hdr__)) +     \
122 +                                    sizeof(struct ext3_extent_header)))
123 +#define EXT_HAS_FREE_INDEX(__path__) \
124 +       ((__path__)->p_hdr->e_num < (__path__)->p_hdr->e_max)
125 +#define EXT_LAST_EXTENT(__hdr__) \
126 +       (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_num - 1)
127 +#define EXT_LAST_INDEX(__hdr__) \
128 +       (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_num - 1)
129 +#define EXT_MAX_EXTENT(__hdr__) \
130 +       (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->e_max - 1)
131 +#define EXT_MAX_INDEX(__hdr__) \
132 +       (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->e_max - 1)
133 +
134 +
135 +#define EXT_ASSERT(__x__) if (!(__x__)) BUG();
136 +
137 +/*
138 + * could return:
139 + *  - EROFS
140 + *  - ENOMEM
141 + */
142 +static int ext3_ext_get_access(handle_t *handle, struct inode *inode,
143 +                               struct ext3_ext_path *path)
144 +{
145 +       if (path->p_bh) {
146 +               /* path points to block */
147 +               return ext3_journal_get_write_access(handle, path->p_bh);
148 +       }
149 +
150 +       /* path points to leaf/index in inode body */
151 +       return 0;
152 +}
153 +
154 +/*
155 + * could return:
156 + *  - EROFS
157 + *  - ENOMEM
158 + *  - EIO
159 + */
160 +static int ext3_ext_dirty(handle_t *handle, struct inode *inode,
161 +                               struct ext3_ext_path *path)
162 +{
163 +       if (path->p_bh) {
164 +               /* path points to block */
165 +               return ext3_journal_dirty_metadata(handle, path->p_bh);
166 +       }
167 +
168 +       /* path points to leaf/index in inode body */
169 +       return ext3_mark_inode_dirty(handle, inode);
170 +}
171 +
172 +static inline int ext3_ext_space_block(struct inode *inode)
173 +{
174 +       int size;
175 +
176 +       size = (inode->i_sb->s_blocksize - sizeof(struct ext3_extent_header))
177 +               / sizeof(struct ext3_extent);
178 +#ifdef AGRESSIVE_TEST
179 +       size = 6; /* FIXME: for debug, remove this line */
180 +#endif
181 +       return size;
182 +}
183 +
184 +static inline int ext3_ext_space_inode(struct inode *inode)
185 +{
186 +       int size;
187 +
188 +       size = (sizeof(EXT3_I(inode)->i_data) -
189 +                       sizeof(struct ext3_extent_header))
190 +                       / sizeof(struct ext3_extent);
191 +#ifdef AGRESSIVE_TEST
192 +       size = 3; /* FIXME: for debug, remove this line */
193 +#endif
194 +       return size;
195 +}
196 +
197 +static inline int ext3_ext_space_inode_idx(struct inode *inode)
198 +{
199 +       int size;
200 +
201 +       size = (sizeof(EXT3_I(inode)->i_data) -
202 +                       sizeof(struct ext3_extent_header))
203 +                       / sizeof(struct ext3_extent_idx);
204 +#ifdef AGRESSIVE_TEST
205 +       size = 4; /* FIXME: for debug, remove this line */
206 +#endif
207 +       return size;
208 +}
209 +
210 +static void ext3_ext_show_path(struct inode *inode, struct ext3_ext_path *path)
211 +{
212 +       int k, l = path->p_depth;
213 +
214 +       ext_debug(inode, "path:");
215 +       for (k = 0; k <= l; k++, path++) {
216 +               if (path->p_idx) {
217 +                       ext_debug(inode, "  %d->%d", path->p_idx->e_block,
218 +                                       path->p_idx->e_leaf);
219 +               } else if (path->p_ext) {
220 +                       ext_debug(inode, "  %d:%d:%d",
221 +                                       path->p_ext->e_block,
222 +                                       path->p_ext->e_start,
223 +                                       path->p_ext->e_num);
224 +               } else
225 +                       ext_debug(inode, "  []");
226 +       }
227 +       ext_debug(inode, "\n");
228 +}
229 +
230 +static void ext3_ext_show_leaf(struct inode *inode, struct ext3_ext_path *path)
231 +{
232 +       int depth = EXT3_I(inode)->i_depth;
233 +       struct ext3_extent_header *eh = path[depth].p_hdr;
234 +       struct ext3_extent *ex = EXT_FIRST_EXTENT(eh);
235 +       int i;
236 +
237 +       for (i = 0; i < eh->e_num; i++, ex++) {
238 +               ext_debug(inode, "%d:%d:%d ",
239 +                               ex->e_block, ex->e_start, ex->e_num);
240 +       }
241 +       ext_debug(inode, "\n");
242 +}
243 +
244 +static void ext3_ext_drop_refs(struct inode *inode, struct ext3_ext_path *path)
245 +{
246 +       int depth = path->p_depth;
247 +       int i;
248 +
249 +       for (i = 0; i <= depth; i++, path++)
250 +               if (path->p_bh) {
251 +                       brelse(path->p_bh);
252 +                       path->p_bh = NULL;
253 +               }
254 +}
255 +
256 +static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path)
257 +{
258 +       struct ext3_inode_info *ei = EXT3_I(inode);
259 +       unsigned long bg_start;
260 +       unsigned long colour;
261 +       int depth;
262 +       
263 +       if (path) {
264 +               depth = path->p_depth;
265 +               /* try to find previous block */
266 +               if (path[depth].p_ext)
267 +                       return path[depth].p_ext->e_start +
268 +                               path[depth].p_ext->e_num - 1;
269 +               
270 +               /* it looks index is empty
271 +                * try to find starting from index itself */
272 +               if (path[depth].p_bh)
273 +                       return path[depth].p_bh->b_blocknr;
274 +       }
275 +
276 +       /* OK. use inode's group */
277 +       bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) +
278 +               le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block);
279 +       colour = (current->pid % 16) *
280 +                       (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16);
281 +       return bg_start + colour;
282 +}
283 +
284 +static struct ext3_ext_path *
285 +ext3_ext_find_extent(struct inode *inode, int block, struct ext3_ext_path *path)
286 +{
287 +       struct ext3_inode_info *ei = EXT3_I(inode);
288 +       struct ext3_extent_header *eh = (void *) ei->i_data;
289 +       struct ext3_extent_idx *ix;
290 +       struct buffer_head *bh;
291 +       struct ext3_extent *ex;
292 +       int depth, i, k, ppos = 0, prev = 0;
293 +       
294 +       eh = (struct ext3_extent_header *) ei->i_data;
295 +
296 +       /* initialize capacity of leaf in inode for first time */
297 +       if (eh->e_max == 0)
298 +               eh->e_max = ext3_ext_space_inode(inode);
299 +       i = depth = ei->i_depth;
300 +       EXT_ASSERT(i == 0 || eh->e_num > 0);
301 +       
302 +       /* account possible depth increase */
303 +       if (!path) {
304 +               path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2),
305 +                               GFP_NOFS);
306 +               if (!path)
307 +                       return ERR_PTR(-ENOMEM);
308 +       }
309 +       memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
310 +
311 +       /* walk through the tree */
312 +       while (i) {
313 +               ext_debug(inode, "depth %d: num %d, max %d\n",
314 +                               ppos, eh->e_num, eh->e_max);
315 +               ix = EXT_FIRST_INDEX(eh);
316 +               if (eh->e_num) {
317 +                       EXT_ASSERT(prev == 0 || ix->e_block == prev);
318 +                       path[ppos].p_idx = ix;
319 +               }
320 +               EXT_ASSERT(eh->e_num <= eh->e_max);
321 +               for (k = 0; k < eh->e_num; k++, ix++) {
322 +                       ext_debug(inode, "index: %d -> %d\n",
323 +                                       ix->e_block, ix->e_leaf);
324 +                       EXT_ASSERT((k == 0 && prev <= (int)ix->e_block) ||
325 +                                       (k > 0 && prev < (int)ix->e_block));
326 +                       if (block < ix->e_block)
327 +                               break;
328 +                       prev = ix->e_block;
329 +                       path[ppos].p_idx = ix;
330 +               }
331 +               path[ppos].p_block = path[ppos].p_idx->e_leaf;
332 +               path[ppos].p_depth = i;
333 +               path[ppos].p_hdr = eh;
334 +               path[ppos].p_ext = NULL;
335 +
336 +               bh = sb_bread(inode->i_sb, path[ppos].p_block);
337 +               if (!bh) {
338 +                       ext3_ext_drop_refs(inode, path);
339 +                       kfree(path);
340 +                       return ERR_PTR(-EIO);
341 +               }
342 +               eh = (struct ext3_extent_header *) bh->b_data;
343 +               ppos++;
344 +               EXT_ASSERT(ppos <= depth);
345 +               path[ppos].p_bh = bh;
346 +               i--;
347 +       }
348 +
349 +       path[ppos].p_depth = i;
350 +       path[ppos].p_hdr = eh;
351 +       path[ppos].p_ext = NULL;
352 +       
353 +       /* find extent */
354 +       ex = EXT_FIRST_EXTENT(eh);
355 +       if (eh->e_num)
356 +               path[ppos].p_ext = ex;
357 +       EXT_ASSERT(eh->e_num <= eh->e_max);
358 +       for (k = 0; k < eh->e_num; k++, ex++) {
359 +               EXT_ASSERT(ex->e_num < EXT3_BLOCKS_PER_GROUP(inode->i_sb));
360 +               EXT_ASSERT((k == 0 && prev <= (int)ex->e_block) ||
361 +                               (k > 0 && prev < (int)ex->e_block));
362 +               if (block < ex->e_block) 
363 +                       break;
364 +               prev = ex->e_block;
365 +               path[ppos].p_ext = ex;
366 +       }
367 +
368 +       ext3_ext_show_path(inode, path);
369 +
370 +       return path;
371 +}
372 +
373 +static void ext3_ext_check_boundary(struct inode *inode,
374 +                                       struct ext3_ext_path *curp,
375 +                                       void *addr, int len)
376 +{
377 +       void *end;
378 +
379 +       if (!len)
380 +               return;
381 +       if (curp->p_bh)
382 +               end = (void *) curp->p_hdr + inode->i_sb->s_blocksize;
383 +       else
384 +               end = (void *) curp->p_hdr + sizeof(EXT3_I(inode)->i_data);
385 +       if (((unsigned long) addr) + len > (unsigned long) end) {
386 +               printk("overflow! 0x%p > 0x%p\n", addr + len, end);
387 +               BUG();
388 +       }
389 +       if ((unsigned long) addr < (unsigned long) curp->p_hdr) {
390 +               printk("underflow! 0x%p < 0x%p\n", addr, curp->p_hdr);
391 +               BUG();
392 +       }
393 +}
394 +
395 +/*
396 + * insert new index [logical;ptr] into the block at cupr
397 + * it check where to insert: before curp or after curp
398 + */
399 +static int ext3_ext_insert_index(handle_t *handle, struct inode *inode,
400 +                               struct ext3_ext_path *curp, int logical,
401 +                               int ptr)
402 +{
403 +       struct ext3_extent_idx *ix;
404 +       int len, err;
405 +
406 +       if ((err = ext3_ext_get_access(handle, inode, curp)))
407 +               return err;
408 +
409 +       EXT_ASSERT(logical != curp->p_idx->e_block);
410 +       len = EXT_MAX_INDEX(curp->p_hdr) - curp->p_idx;
411 +       if (logical > curp->p_idx->e_block) {
412 +               /* insert after */
413 +               len = (len - 1) * sizeof(struct ext3_extent_idx);
414 +               len = len < 0 ? 0 : len;
415 +               ext_debug(inode, "insert new index %d after: %d. "
416 +                               "move %d from 0x%p to 0x%p\n",
417 +                               logical, ptr, len,
418 +                               (curp->p_idx + 1), (curp->p_idx + 2));
419 +
420 +               ext3_ext_check_boundary(inode, curp, curp->p_idx + 2, len);
421 +               memmove(curp->p_idx + 2, curp->p_idx + 1, len);
422 +               ix = curp->p_idx + 1;
423 +       } else {
424 +               /* insert before */
425 +               len = len * sizeof(struct ext3_extent_idx);
426 +               len = len < 0 ? 0 : len;
427 +               ext_debug(inode, "insert new index %d before: %d. "
428 +                               "move %d from 0x%p to 0x%p\n",
429 +                               logical, ptr, len,
430 +                               curp->p_idx, (curp->p_idx + 1));
431 +
432 +               ext3_ext_check_boundary(inode, curp, curp->p_idx + 1, len);
433 +               memmove(curp->p_idx + 1, curp->p_idx, len);
434 +               ix = curp->p_idx;
435 +       }
436 +
437 +       ix->e_block = logical;
438 +       ix->e_leaf = ptr;
439 +       curp->p_hdr->e_num++;
440 +
441 +       err = ext3_ext_dirty(handle, inode, curp);
442 +       ext3_std_error(inode->i_sb, err);
443 +
444 +       return err;
445 +}
446 +
447 +/*
448 + * routine inserts new subtree into the path, using free index entry
449 + * at depth 'at:
450 + *  - allocates all needed blocks (new leaf and all intermediate index blocks)
451 + *  - makes decision where to split
452 + *  - moves remaining extens and index entries (right to the split point)
453 + *    into the newly allocated blocks
454 + *  - initialize subtree
455 + */
456 +static int ext3_ext_split(handle_t *handle, struct inode *inode,
457 +                               struct ext3_ext_path *path,
458 +                               struct ext3_extent *newext, int at)
459 +{
460 +       struct buffer_head *bh = NULL;
461 +       int depth = EXT3_I(inode)->i_depth;
462 +       struct ext3_extent_header *neh;
463 +       struct ext3_extent_idx *fidx;
464 +       struct ext3_extent *ex;
465 +       int i = at, k, m, a;
466 +       long newblock, oldblock, border;
467 +       int *ablocks = NULL; /* array of allocated blocks */
468 +       int err = 0;
469 +
470 +       /* make decision: where to split? */
471 +       /* FIXME: now desicion is simplest: at current extent */
472 +
473 +       /* if current leaf will be splitted, then we should use 
474 +        * border from split point */
475 +       
476 +       if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) {
477 +               border = path[depth].p_ext[1].e_block;
478 +               ext_debug(inode, "leaf will be splitted."
479 +                               " next leaf starts at %d\n",
480 +                               (int)border);
481 +       } else {
482 +               border = newext->e_block;
483 +               ext_debug(inode, "leaf will be added."
484 +                               " next leaf starts at %d\n",
485 +                               (int)border);
486 +       }
487 +
488 +       /* 
489 +        * if error occurs, then we break processing
490 +        * and turn filesystem read-only. so, index won't
491 +        * be inserted and tree will be in consistent
492 +        * state. next mount will repair buffers too
493 +        */
494 +
495 +       /*
496 +        * get array to track all allocated blocks
497 +        * we need this to handle errors and free blocks
498 +        * upon them
499 +        */
500 +       ablocks = kmalloc(sizeof(long) * depth, GFP_NOFS);
501 +       if (!ablocks)
502 +               return -ENOMEM;
503 +       memset(ablocks, 0, sizeof(long) * depth);
504 +
505 +       /* allocate all needed blocks */
506 +       ext_debug(inode, "allocate %d blocks for indexes/leaf\n", depth - at);
507 +       newblock = 0; /* FIXME: something more sophisticated needed here */ 
508 +       for (a = 0; newext->e_num > 0 && a < depth - at; a++) {
509 +               newblock = ablocks[a] = newext->e_start++;
510 +               newext->e_num--;
511 +       }
512 +       for (; a < depth - at; a++) {
513 +               newblock = ext3_new_block(handle, inode,
514 +                                               newblock + 1, 0, 0, &err);
515 +               if (newblock == 0)
516 +                       goto cleanup;
517 +               ablocks[a] = newblock;
518 +       }
519 +
520 +       /* initialize new leaf */
521 +       newblock = ablocks[--a];
522 +       EXT_ASSERT(newblock);
523 +       bh = sb_getblk(inode->i_sb, newblock);
524 +       if (!bh) {
525 +               err = -EIO;
526 +               goto cleanup;
527 +       }
528 +       lock_buffer(bh);
529 +
530 +       if ((err = ext3_journal_get_create_access(handle, bh)))
531 +               goto cleanup;
532 +
533 +       neh = (struct ext3_extent_header *) bh->b_data;
534 +       neh->e_num = 0;
535 +       neh->e_max = ext3_ext_space_block(inode);
536 +       ex = EXT_FIRST_EXTENT(neh);
537 +
538 +       /* move remain of path[depth] to the new leaf */
539 +       EXT_ASSERT(path[depth].p_hdr->e_num ==
540 +                       path[depth].p_hdr->e_max);
541 +       /* start copy from next extent */
542 +       /* TODO: we could do it by single memmove */
543 +       m = 0;
544 +       path[depth].p_ext++;
545 +       while (path[depth].p_ext <=
546 +                       EXT_MAX_EXTENT(path[depth].p_hdr)) {
547 +               ext_debug(inode, "move %d:%d:%d in new leaf\n",
548 +                               path[depth].p_ext->e_block,
549 +                               path[depth].p_ext->e_start,
550 +                               path[depth].p_ext->e_num);
551 +               memmove(ex++, path[depth].p_ext++,
552 +                               sizeof(struct ext3_extent));
553 +               neh->e_num++;
554 +               m++;
555 +       }
556 +       mark_buffer_uptodate(bh, 1);
557 +       unlock_buffer(bh);
558 +
559 +       if ((err = ext3_journal_dirty_metadata(handle, bh)))
560 +               goto cleanup;   
561 +       brelse(bh);
562 +       bh = NULL;
563 +
564 +       /* correct old leaf */
565 +       if (m) {
566 +               if ((err = ext3_ext_get_access(handle, inode, path)))
567 +                       goto cleanup;
568 +               path[depth].p_hdr->e_num -= m;
569 +               if ((err = ext3_ext_dirty(handle, inode, path)))
570 +                       goto cleanup;
571 +               
572 +       }
573 +
574 +       /* create intermediate indexes */
575 +       k = depth - at - 1;
576 +       EXT_ASSERT(k >= 0);
577 +       if (k)
578 +               ext_debug(inode,
579 +                               "create %d intermediate indices\n", k);
580 +       /* insert new index into current index block */
581 +       /* current depth stored in i var */
582 +       i = depth - 1;
583 +       while (k--) {
584 +               oldblock = newblock;
585 +               newblock = ablocks[--a];
586 +               bh = sb_getblk(inode->i_sb, newblock);
587 +               if (!bh) {
588 +                       err = -EIO;
589 +                       goto cleanup;
590 +               }
591 +               lock_buffer(bh);
592 +
593 +               if ((err = ext3_journal_get_create_access(handle, bh)))
594 +                       goto cleanup;
595 +
596 +               neh = (struct ext3_extent_header *) bh->b_data;
597 +               neh->e_num = 1;
598 +               neh->e_max = ext3_ext_space_block(inode);
599 +               fidx = EXT_FIRST_INDEX(neh);
600 +               fidx->e_block = border;
601 +               fidx->e_leaf = oldblock;
602 +
603 +               ext_debug(inode,
604 +                               "int.index at %d (block %u): %d -> %d\n",
605 +                               i, (unsigned) newblock,
606 +                               (int) border,
607 +                               (int) oldblock);
608 +               /* copy indexes */
609 +               m = 0;
610 +               path[i].p_idx++;
611 +               ext_debug(inode, "cur 0x%p, last 0x%p\n", path[i].p_idx,
612 +                               EXT_MAX_INDEX(path[i].p_hdr));
613 +               EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) ==
614 +                               EXT_LAST_INDEX(path[i].p_hdr));
615 +               while (path[i].p_idx <=
616 +                               EXT_MAX_INDEX(path[i].p_hdr)) {
617 +                       ext_debug(inode, "%d: move %d:%d in new index\n",
618 +                                       i, path[i].p_idx->e_block,
619 +                                       path[i].p_idx->e_leaf);
620 +                       memmove(++fidx, path[i].p_idx++,
621 +                                       sizeof(struct ext3_extent_idx));
622 +                       neh->e_num++;
623 +                       m++;
624 +               }
625 +
626 +               mark_buffer_uptodate(bh, 1);
627 +               unlock_buffer(bh);
628 +
629 +               if ((err = ext3_journal_dirty_metadata(handle, bh)))
630 +                       goto cleanup;
631 +               brelse(bh);
632 +               bh = NULL;
633 +
634 +               /* correct old index */
635 +               if (m) {
636 +                       err = ext3_ext_get_access(handle,inode,path+i);
637 +                       if (err)
638 +                               goto cleanup;
639 +                       path[i].p_hdr->e_num -= m;
640 +                       err = ext3_ext_dirty(handle, inode, path + i);
641 +                       if (err)
642 +                               goto cleanup;
643 +               }
644 +
645 +               i--;
646 +       }
647 +
648 +       /* insert new index */
649 +       if (!err)
650 +               err = ext3_ext_insert_index(handle, inode, path + at,
651 +                                               border, newblock);
652 +
653 +cleanup:
654 +       if (bh) {
655 +               if (buffer_locked(bh))
656 +                       unlock_buffer(bh);
657 +               brelse(bh);
658 +       }
659 +
660 +       if (err) {
661 +               /* free all allocated blocks in error case */
662 +               for (i = 0; i < depth; i++)
663 +                       if (!ablocks[i])
664 +                               continue;
665 +                       ext3_free_blocks(handle, inode, ablocks[i], 1);
666 +       }
667 +       kfree(ablocks);
668 +
669 +       return err;
670 +}
671 +
672 +/*
673 + * routine implements tree growing procedure:
674 + *  - allocates new block
675 + *  - moves top-level data (index block or leaf) into the new block
676 + *  - initialize new top-level, creating index that points to the
677 + *    just created block
678 + */
679 +static int ext3_ext_grow_indepth(handle_t *handle, struct inode *inode,
680 +                                       struct ext3_ext_path *path,
681 +                                       struct ext3_extent *newext)
682 +{
683 +       struct buffer_head *bh;
684 +       struct ext3_ext_path *curp = path;
685 +       struct ext3_extent_header *neh;
686 +       struct ext3_extent_idx *fidx;
687 +       int len, err = 0;
688 +       long newblock;
689 +
690 +       /*
691 +        * use already allocated by the called block for new root block
692 +        */
693 +       newblock = newext->e_start++;
694 +       if (newext->e_num == 0) {
695 +               /* 
696 +                * FIXME: if this may happen, then we have to handle
697 +                * possible error and free allocated block
698 +                */
699 +               printk("grow_indepth with zero blocks\n");
700 +               newblock = ext3_new_block(handle, inode,
701 +                                               newblock, 0, 0, &err);
702 +       } else
703 +               newext->e_num--;
704 +       
705 +       bh = sb_getblk(inode->i_sb, newblock);
706 +       if (!bh) {
707 +               err = -EIO;
708 +               ext3_std_error(inode->i_sb, err);
709 +               return err;
710 +       }
711 +       lock_buffer(bh);
712 +
713 +       if ((err = ext3_journal_get_create_access(handle, bh))) {
714 +               unlock_buffer(bh);
715 +               goto out;       
716 +       }
717 +
718 +       /* move top-level index/leaf into new block */
719 +       len = sizeof(struct ext3_extent_header) +
720 +               sizeof(struct ext3_extent) * curp->p_hdr->e_max;
721 +       EXT_ASSERT(len >= 0 && len < 4096);
722 +       memmove(bh->b_data, curp->p_hdr, len);
723 +
724 +       /* set size of new block */
725 +       neh = (struct ext3_extent_header *) bh->b_data;
726 +       neh->e_max = ext3_ext_space_block(inode);
727 +       mark_buffer_uptodate(bh, 1);
728 +       unlock_buffer(bh);
729 +
730 +       if ((err = ext3_journal_dirty_metadata(handle, bh)))
731 +               goto out;
732 +
733 +       /* create index in new top-level index: num,max,pointer */
734 +       if ((err = ext3_ext_get_access(handle, inode, curp)))
735 +               goto out;
736 +
737 +       curp->p_hdr->e_max = ext3_ext_space_inode_idx(inode);
738 +       curp->p_hdr->e_num = 1;
739 +       curp->p_idx = EXT_FIRST_INDEX(curp->p_hdr);
740 +       curp->p_idx->e_block = EXT_FIRST_EXTENT(path[0].p_hdr)->e_block;
741 +       curp->p_idx->e_leaf = newblock;
742 +
743 +       neh = (struct ext3_extent_header *) EXT3_I(inode)->i_data;
744 +       fidx = EXT_FIRST_INDEX(neh);
745 +       ext_debug(inode, "new root: num %d(%d), lblock %d, ptr %d\n",
746 +                       neh->e_num, neh->e_max, fidx->e_block, fidx->e_leaf); 
747 +
748 +       EXT3_I(inode)->i_depth++;
749 +       err = ext3_ext_dirty(handle, inode, curp);
750 +out:
751 +       brelse(bh);
752 +
753 +       return err;
754 +}
755 +
756 +/*
757 + * routine finds empty index and adds new leaf. if no free index found
758 + * then it requests in-depth growing
759 + */
760 +static int ext3_ext_create_new_leaf(handle_t *handle, struct inode *inode,
761 +                                       struct ext3_ext_path *path,
762 +                                       struct ext3_extent *newext)
763 +{
764 +       long newblock = newext->e_start;
765 +       struct ext3_ext_path *curp;
766 +       int depth, i, err = 0;
767 +
768 +repeat:
769 +       i = depth = EXT3_I(inode)->i_depth;
770 +       
771 +       /* walk up to the tree and look for free index entry */
772 +       curp = path + depth;
773 +       while (i > 0 && !EXT_HAS_FREE_INDEX(curp)) {
774 +               i--;
775 +               curp--;
776 +       }
777 +
778 +       /* we use already allocated block for index block
779 +        * so, subsequent data blocks should be contigoues */
780 +       if (EXT_HAS_FREE_INDEX(curp)) {
781 +               /* if we found index with free entry, then use that
782 +                * entry: create all needed subtree and add new leaf */
783 +               err = ext3_ext_split(handle, inode, path, newext, i);
784 +
785 +               /* refill path */
786 +               ext3_ext_drop_refs(inode, path);
787 +               path = ext3_ext_find_extent(inode, newext->e_block, path);
788 +               if (IS_ERR(path))
789 +                       err = PTR_ERR(path);
790 +       } else {
791 +               /* tree is full, time to grow in depth */
792 +               err = ext3_ext_grow_indepth(handle, inode, path, newext);
793 +
794 +               /* refill path */
795 +               ext3_ext_drop_refs(inode, path);
796 +               path = ext3_ext_find_extent(inode, newext->e_block, path);
797 +               if (IS_ERR(path))
798 +                       err = PTR_ERR(path);
799 +       
800 +               /*
801 +                * only first (depth 0 -> 1) produces free space
802 +                * in all other cases we have to split growed tree
803 +                */
804 +               depth = EXT3_I(inode)->i_depth;
805 +               if (path[depth].p_hdr->e_num == path[depth].p_hdr->e_max) {
806 +                       /* now we need split */
807 +                       goto repeat;
808 +               }
809 +       }
810 +
811 +       if (err)
812 +               return err;
813 +
814 +       /*
815 +        * probably we've used some blocks from extent
816 +        * let's allocate new block for it
817 +        */
818 +       if (newext->e_num == 0 && !err) {
819 +               newext->e_start =
820 +                       ext3_new_block(handle, inode, newblock,
821 +                                       0, 0, &err);
822 +               if (newext->e_start != 0)
823 +                       newext->e_num = 1;
824 +       }
825 +
826 +       return 0;
827 +}
828 +
829 +/*
830 + * returns next allocated block or 0xffffffff
831 + * NOTE: it consider block number from index entry as
832 + * allocated block. thus, index entries have to be consistent
833 + * with leafs
834 + */
835 +static inline unsigned ext3_ext_next_allocated_block(struct inode *inode,
836 +                                               struct ext3_ext_path *path)
837 +{
838 +       int depth;
839 +
840 +       EXT_ASSERT(path != NULL);
841 +       depth = path->p_depth;
842 +
843 +       if (depth == 0 && path->p_ext == NULL)
844 +               return 0xffffffff;
845 +
846 +       /* FIXME: what if index isn't full ?! */
847 +       while (depth >= 0) {
848 +               if (depth == path->p_depth) {
849 +                       /* leaf */
850 +                       if (path[depth].p_ext !=
851 +                                       EXT_LAST_EXTENT(path[depth].p_hdr))
852 +                               return path[depth].p_ext[1].e_block;
853 +               } else {
854 +                       /* index */
855 +                       if (path[depth].p_idx !=
856 +                                       EXT_LAST_INDEX(path[depth].p_hdr))
857 +                               return path[depth].p_idx[1].e_block;
858 +               }
859 +               depth--;        
860 +       }
861 +
862 +       return 0xffffffff;
863 +}
864 +
865 +/*
866 + * returns first allocated block from next leaf or 0xffffffff
867 + */
868 +static unsigned ext3_ext_next_leaf_block(struct inode *inode,
869 +                                               struct ext3_ext_path *path)
870 +{
871 +       int depth;
872 +
873 +       EXT_ASSERT(path != NULL);
874 +       depth = path->p_depth;
875 +
876 +       /* zero-tree has no leaf blocks at all */
877 +       if (depth == 0)
878 +               return 0xffffffff;
879 +
880 +       /* go to index block */
881 +       depth--;
882 +       
883 +       while (depth >= 0) {
884 +               if (path[depth].p_idx !=
885 +                               EXT_LAST_INDEX(path[depth].p_hdr))
886 +                       return path[depth].p_idx[1].e_block;
887 +               depth--;        
888 +       }
889 +
890 +       return 0xffffffff;
891 +}
892 +
893 +/*
894 + * if leaf gets modified and modified extent is first in the leaf
895 + * then we have to correct all indexes above
896 + * TODO: do we need to correct tree in all cases?
897 + */
898 +int ext3_ext_correct_indexes(handle_t *handle, struct inode *inode,
899 +                               struct ext3_ext_path *path)
900 +{
901 +       int depth = EXT3_I(inode)->i_depth;     
902 +       struct ext3_extent_header *eh;
903 +       struct ext3_extent *ex;
904 +       long border;
905 +       int k, err = 0;
906 +       
907 +       eh = path[depth].p_hdr;
908 +       ex = path[depth].p_ext;
909 +
910 +       EXT_ASSERT(ex);
911 +       EXT_ASSERT(eh);
912 +       
913 +       if (depth == 0) {
914 +               /* there is no tree at all */
915 +               return 0;
916 +       }
917 +       
918 +       if (ex != EXT_FIRST_EXTENT(eh)) {
919 +               /* we correct tree if first leaf got modified only */
920 +               return 0;
921 +       }
922 +       
923 +       /*
924 +        * TODO: we need correction if border is smaller then current one
925 +        */
926 +       k = depth - 1;
927 +       border = path[depth].p_ext->e_block;
928 +       if ((err = ext3_ext_get_access(handle, inode, path + k)))
929 +               return err;
930 +       path[k].p_idx->e_block = border;
931 +       if ((err = ext3_ext_dirty(handle, inode, path + k)))
932 +               return err;
933 +
934 +       while (k--) {
935 +               /* change all left-side indexes */
936 +               if (path[k+1].p_idx != EXT_FIRST_INDEX(path[k+1].p_hdr))
937 +                       break;
938 +               if ((err = ext3_ext_get_access(handle, inode, path + k)))
939 +                       break;
940 +               path[k].p_idx->e_block = border;
941 +               if ((err = ext3_ext_dirty(handle, inode, path + k)))
942 +                       break;
943 +       }
944 +
945 +       return err;
946 +}
947 +
948 +/*
949 + * this routine tries to merge requsted extent into the existing
950 + * extent or inserts requested extent as new one into the tree,
951 + * creating new leaf in no-space case
952 + */
953 +int ext3_ext_insert_extent(handle_t *handle, struct inode *inode,
954 +                               struct ext3_ext_path *path,
955 +                               struct ext3_extent *newext)
956 +{
957 +       int depth, len;
958 +       struct ext3_extent_header * eh;
959 +       struct ext3_extent *ex;
960 +       struct ext3_extent *nearex; /* nearest extent */
961 +       struct ext3_ext_path *npath = NULL;
962 +       int err;
963 +
964 +       depth = EXT3_I(inode)->i_depth; 
965 +       if ((ex = path[depth].p_ext)) {
966 +               /* try to insert block into found extent and return */
967 +               if (ex->e_block + ex->e_num == newext->e_block &&
968 +                               ex->e_start + ex->e_num == newext->e_start) {
969 +#ifdef AGRESSIVE_TEST
970 +                       if (ex->e_num >= 2)
971 +                               goto repeat;
972 +#endif
973 +                       if ((err = ext3_ext_get_access(handle, inode,
974 +                                                       path + depth)))
975 +                               return err;
976 +                       ext_debug(inode, "append %d block to %d:%d (from %d)\n",
977 +                                       newext->e_num, ex->e_block, ex->e_num,
978 +                                       ex->e_start);
979 +                       ex->e_num += newext->e_num;
980 +                       err = ext3_ext_dirty(handle, inode, path + depth);
981 +                       return err;
982 +               }
983 +       }
984 +
985 +repeat:
986 +       depth = EXT3_I(inode)->i_depth; 
987 +       eh = path[depth].p_hdr;
988 +       if (eh->e_num == eh->e_max) {
989 +               /* probably next leaf has space for us? */
990 +               int next = ext3_ext_next_leaf_block(inode, path);
991 +               if (next != 0xffffffff) {
992 +                       ext_debug(inode, "next leaf block - %d\n", next);
993 +                       EXT_ASSERT(!npath);
994 +                       npath = ext3_ext_find_extent(inode, next, NULL);
995 +                       if (IS_ERR(npath))
996 +                               return PTR_ERR(npath);
997 +                       EXT_ASSERT(npath->p_depth == path->p_depth);
998 +                       eh = npath[depth].p_hdr;
999 +                       if (eh->e_num < eh->e_max) {
1000 +                               ext_debug(inode,
1001 +                                               "next leaf has free ext(%d)\n",
1002 +                                               eh->e_num);
1003 +                               path = npath;
1004 +                               goto repeat;
1005 +                       }
1006 +                       ext_debug(inode, "next leaf hasno free space(%d,%d)\n",
1007 +                                       eh->e_num, eh->e_max);
1008 +               }
1009 +               /*
1010 +                * there is no free space in found leaf
1011 +                * we're gonna add new leaf in the tree
1012 +                */
1013 +               err = ext3_ext_create_new_leaf(handle, inode, path, newext);
1014 +               if (err)
1015 +                       goto cleanup;
1016 +               goto repeat;
1017 +       }
1018 +
1019 +       nearex = path[depth].p_ext;
1020 +
1021 +       if ((err = ext3_ext_get_access(handle, inode, path + depth)))
1022 +               goto cleanup;
1023 +
1024 +       if (!nearex) {
1025 +               /* there is no extent in this leaf, create first one */
1026 +               ext_debug(inode, "first extent in the leaf: %d:%d:%d\n",
1027 +                               newext->e_block, newext->e_start,
1028 +                               newext->e_num);
1029 +               path[depth].p_ext = EXT_FIRST_EXTENT(eh);
1030 +       } else if (newext->e_block > nearex->e_block) {
1031 +               EXT_ASSERT(newext->e_block != nearex->e_block);
1032 +               len = EXT_MAX_EXTENT(eh) - nearex;
1033 +               len = (len - 1) * sizeof(struct ext3_extent);
1034 +               len = len < 0 ? 0 : len;
1035 +               ext_debug(inode, "insert %d:%d:%d after: nearest 0x%p, "
1036 +                               "move %d from 0x%p to 0x%p\n",
1037 +                               newext->e_block, newext->e_start, newext->e_num,
1038 +                               nearex, len, nearex + 1, nearex + 2);
1039 +               ext3_ext_check_boundary(inode, path + depth, nearex + 2, len);
1040 +               memmove(nearex + 2, nearex + 1, len);
1041 +               path[depth].p_ext = nearex + 1;
1042 +       } else {
1043 +               EXT_ASSERT(newext->e_block != nearex->e_block);
1044 +               len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent);
1045 +               len = len < 0 ? 0 : len;
1046 +               ext_debug(inode, "insert %d:%d:%d before: nearest 0x%p, "
1047 +                               "move %d from 0x%p to 0x%p\n",
1048 +                               newext->e_block, newext->e_start, newext->e_num,
1049 +                               nearex, len, nearex + 1, nearex + 2);
1050 +               
1051 +               memmove(nearex + 1, nearex, len);
1052 +               path[depth].p_ext = nearex;
1053 +       }
1054 +
1055 +       if (!err) {
1056 +               eh->e_num++;
1057 +               nearex = path[depth].p_ext;
1058 +               nearex->e_block = newext->e_block;
1059 +               nearex->e_start = newext->e_start;
1060 +               nearex->e_num = newext->e_num;
1061 +               EXT_ASSERT(nearex->e_num < EXT3_BLOCKS_PER_GROUP(inode->i_sb) &&
1062 +                               nearex->e_num > 0);
1063 +
1064 +               /* time to correct all indexes above */
1065 +               err = ext3_ext_correct_indexes(handle, inode, path);
1066 +       }
1067 +
1068 +       err = ext3_ext_dirty(handle, inode, path + depth);
1069 +
1070 +cleanup:
1071 +       if (npath) {
1072 +               ext3_ext_drop_refs(inode, npath);
1073 +               kfree(npath);
1074 +       }
1075 +               
1076 +       return err;
1077 +}
1078 +
1079 +int ext3_ext_get_block(handle_t *handle, struct inode *inode, long iblock,
1080 +                       struct buffer_head *bh_result, int create,
1081 +                       int extend_disksize)
1082 +{
1083 +       struct ext3_ext_path *path;
1084 +       int depth = EXT3_I(inode)->i_depth;
1085 +       struct ext3_extent newex;
1086 +       struct ext3_extent *ex;
1087 +       int goal, newblock, err = 0;
1088 +
1089 +       ext_debug(inode, "block %d requested for inode %u, bh_result 0x%p\n",
1090 +                       (int) iblock, (unsigned) inode->i_ino, bh_result);
1091 +       bh_result->b_state &= ~(1UL << BH_New);
1092 +
1093 +       down(&EXT3_I(inode)->i_ext_sem);
1094 +
1095 +       /* find extent for this block */
1096 +       path = ext3_ext_find_extent(inode, iblock, NULL);
1097 +       if (IS_ERR(path)) {
1098 +               err = PTR_ERR(path);
1099 +               goto out2;
1100 +       }
1101 +
1102 +       if ((ex = path[depth].p_ext)) {
1103 +               /* if found exent covers block, simple return it */
1104 +               if (iblock >= ex->e_block && iblock < ex->e_block + ex->e_num) {
1105 +                       newblock = iblock - ex->e_block + ex->e_start;
1106 +                       ext_debug(inode, "%d fit into %d:%d -> %d\n",
1107 +                                       (int) iblock, ex->e_block, ex->e_num,
1108 +                                       newblock);
1109 +                       goto out;
1110 +               }
1111 +       }
1112 +
1113 +       /*
1114 +        * we couldn't try to create block if create flag is zero 
1115 +        */
1116 +       if (!create) 
1117 +               goto out2;
1118 +
1119 +       /* allocate new block */
1120 +       goal = ext3_ext_find_goal(inode, path);
1121 +       newblock = ext3_new_block(handle, inode, goal, 0, 0, &err);
1122 +       if (!newblock)
1123 +               goto out2;
1124 +       ext_debug(inode, "allocate new block: goal %d, found %d\n",
1125 +                       goal, newblock);
1126 +
1127 +       /* try to insert new extent into found leaf and return */
1128 +       newex.e_block = iblock;
1129 +       newex.e_start = newblock;
1130 +       newex.e_num = 1;
1131 +       err = ext3_ext_insert_extent(handle, inode, path, &newex);
1132 +       if (err)
1133 +               goto out2;
1134 +       
1135 +       /* previous routine could use block we allocated */
1136 +       newblock = newex.e_start;
1137 +       bh_result->b_state |= (1UL << BH_New);
1138 +
1139 +out:
1140 +       ext3_ext_show_leaf(inode, path);
1141 +       bh_result->b_dev = inode->i_dev;
1142 +       bh_result->b_blocknr = newblock;
1143 +       bh_result->b_state |= (1UL << BH_Mapped);
1144 +out2:
1145 +       ext3_ext_drop_refs(inode, path);
1146 +       kfree(path);
1147 +       up(&EXT3_I(inode)->i_ext_sem);
1148 +
1149 +       return err;     
1150 +}
1151 +
1152 +/*
1153 + * returns 1 if current index have to be freed (even partial)
1154 + */
1155 +static int ext3_ext_more_to_truncate(struct inode *inode,
1156 +                               struct ext3_ext_path *path)
1157 +{
1158 +       EXT_ASSERT(path->p_idx);
1159 +
1160 +       if (path->p_idx < EXT_FIRST_INDEX(path->p_hdr))
1161 +               return 0;
1162 +
1163 +       /*
1164 +        * if truncate on deeper level happened it it wasn't partial
1165 +        * so we have to consider current index for truncation
1166 +        */
1167 +       if (path->p_hdr->e_num == path->p_block)
1168 +               return 0;
1169 +
1170 +       /*
1171 +        * put actual number of indexes to know is this number got
1172 +        * changed at the next iteration
1173 +        */
1174 +       path->p_block = path->p_hdr->e_num;
1175 +       
1176 +       return 1;
1177 +}
1178 +
1179 +/*
1180 + * routine removes index from the index block
1181 + * it's used in truncate case only. thus all requests are for
1182 + * last index in the block only
1183 + */
1184 +int ext3_ext_remove_index(handle_t *handle, struct inode *inode,
1185 +                                       struct ext3_ext_path *path)
1186 +{
1187 +       struct buffer_head *bh;
1188 +       int err;
1189 +       
1190 +       /* free index block */
1191 +       path--;
1192 +       EXT_ASSERT(path->p_hdr->e_num);
1193 +       if ((err = ext3_ext_get_access(handle, inode, path)))
1194 +               return err;
1195 +       path->p_hdr->e_num--;
1196 +       if ((err = ext3_ext_dirty(handle, inode, path)))
1197 +               return err;
1198 +       bh = sb_get_hash_table(inode->i_sb, path->p_idx->e_leaf);
1199 +       ext3_forget(handle, 0, inode, bh, path->p_idx->e_leaf);
1200 +       ext3_free_blocks(handle, inode, path->p_idx->e_leaf, 1);
1201 +
1202 +       ext_debug(inode, "index is empty, remove it, free block %d\n",
1203 +                       path->p_idx->e_leaf);
1204 +       return err;
1205 +}
1206 +
1207 +/*
1208 + * returns 1 if current extent needs to be freed (even partial)
1209 + * instead, returns 0
1210 + */
1211 +int ext3_ext_more_leaves_to_truncate(struct inode *inode,
1212 +                                       struct ext3_ext_path *path)
1213 +{
1214 +       unsigned blocksize = inode->i_sb->s_blocksize;
1215 +       struct ext3_extent *ex = path->p_ext;
1216 +       int last_block; 
1217 +
1218 +       EXT_ASSERT(ex);
1219 +
1220 +       /* is there leave in the current leaf? */
1221 +       if (ex < EXT_FIRST_EXTENT(path->p_hdr))
1222 +               return 0;
1223 +       
1224 +       last_block = (inode->i_size + blocksize-1)
1225 +                       >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
1226 +
1227 +       if (last_block >= ex->e_block + ex->e_num)
1228 +               return 0;
1229 +
1230 +       /* seems it extent have to be freed */
1231 +       return 1;
1232 +}
1233 +
1234 +handle_t *ext3_ext_journal_restart(handle_t *handle, int needed)
1235 +{
1236 +       int err;
1237 +
1238 +       if (handle->h_buffer_credits > needed)
1239 +               return handle;
1240 +       if (!ext3_journal_extend(handle, needed))
1241 +               return handle;
1242 +       err = ext3_journal_restart(handle, needed);
1243 +       
1244 +       return handle;
1245 +}
1246 +
1247 +/*
1248 + * this routine calculate max number of blocks to be modified
1249 + * while freeing extent and is intended to be used in truncate path
1250 + */
1251 +static int ext3_ext_calc_credits(struct inode *inode,
1252 +                                       struct ext3_ext_path *path,
1253 +                                       int num)
1254 +{
1255 +       int depth = EXT3_I(inode)->i_depth;
1256 +       int needed;
1257 +       
1258 +       /*
1259 +        * extent couldn't cross group, so we will modify
1260 +        * single bitmap block and single group descriptor
1261 +        */
1262 +       needed = 3;
1263 +
1264 +       /*
1265 +        * if this is last extent in a leaf, then we have to
1266 +        * free leaf block and remove pointer from index above.
1267 +        * that pointer could be last in index block, so we'll
1268 +        * have to remove it too. this way we could modify/free
1269 +        * the whole path + root index (inode stored) will be
1270 +        * modified
1271 +        */
1272 +       if (!path || (num == path->p_ext->e_num &&
1273 +                               path->p_ext == EXT_FIRST_EXTENT(path->p_hdr)))
1274 +               needed += (depth * (EXT3_ALLOC_NEEDED + 1)) + 1;
1275 +
1276 +       return needed;
1277 +}
1278 +
1279 +/*
1280 + * core of the truncate procedure:
1281 + * - calculated what part of each extent in the requested leaf
1282 + *   need to be freed
1283 + * - frees and forgets these blocks
1284 + *
1285 + * TODO: we could optimize and free several extents during
1286 + *       single journal_restart()-journal_restart() cycle
1287 + */
1288 +static int ext3_ext_truncate_leaf(handle_t *handle,
1289 +                                       struct inode *inode,
1290 +                                       struct ext3_ext_path *path,
1291 +                                       int depth)
1292 +{
1293 +       unsigned blocksize = inode->i_sb->s_blocksize;
1294 +       int last_block; 
1295 +       int i, err = 0, sf, num;
1296 +
1297 +       ext_debug(inode, "level %d - leaf\n", depth);
1298 +       if (!path->p_hdr)
1299 +               path->p_hdr =
1300 +                       (struct ext3_extent_header *) path->p_bh->b_data;
1301 +
1302 +       EXT_ASSERT(path->p_hdr->e_num <= path->p_hdr->e_max);
1303 +       
1304 +       last_block = (inode->i_size + blocksize-1)
1305 +                                       >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
1306 +       path->p_ext = EXT_LAST_EXTENT(path->p_hdr);
1307 +       while (ext3_ext_more_leaves_to_truncate(inode, path)) {
1308 +
1309 +               /* what part of extent have to be freed? */
1310 +               sf = last_block > path->p_ext->e_block ?
1311 +                       last_block : path->p_ext->e_block;
1312 +
1313 +               /* number of blocks from extent to be freed */
1314 +               num = path->p_ext->e_block + path->p_ext->e_num - sf;
1315 +
1316 +               /* calc physical first physical block to be freed */
1317 +               sf = path->p_ext->e_start + (sf - path->p_ext->e_block);
1318 +
1319 +               i = ext3_ext_calc_credits(inode, path, num);
1320 +               handle = ext3_ext_journal_restart(handle, i);
1321 +               if (IS_ERR(handle))
1322 +                       return PTR_ERR(handle);
1323 +               
1324 +               ext_debug(inode, "free extent %d:%d:%d -> free %d:%d\n",
1325 +                               path->p_ext->e_block, path->p_ext->e_start,
1326 +                               path->p_ext->e_num, sf, num);
1327 +               for (i = 0; i < num; i++) {
1328 +                       struct buffer_head *bh =
1329 +                               sb_get_hash_table(inode->i_sb, sf + i);
1330 +                       ext3_forget(handle, 0, inode, bh, sf + i);
1331 +               }
1332 +               ext3_free_blocks(handle, inode, sf, num);
1333 +
1334 +               /* collect extents usage stats */
1335 +               spin_lock(&EXT3_SB(inode->i_sb)->s_ext_lock);
1336 +               EXT3_SB(inode->i_sb)->s_ext_extents++;
1337 +               EXT3_SB(inode->i_sb)->s_ext_blocks += num;
1338 +               spin_unlock(&EXT3_SB(inode->i_sb)->s_ext_lock);
1339 +
1340 +               /* reduce extent */
1341 +               if ((err = ext3_ext_get_access(handle, inode, path)))
1342 +                       return err;
1343 +               path->p_ext->e_num -= num;
1344 +               if (path->p_ext->e_num == 0)
1345 +                       path->p_hdr->e_num--;
1346 +               if ((err = ext3_ext_dirty(handle, inode, path)))
1347 +                       return err;
1348 +
1349 +               path->p_ext--;
1350 +       }
1351 +       
1352 +       /* if this leaf is free, then we should
1353 +        * remove it from index block above */
1354 +       if (path->p_hdr->e_num == 0 && depth > 0) 
1355 +               err = ext3_ext_remove_index(handle, inode, path);
1356 +
1357 +       return err;
1358 +}
1359 +
1360 +static void ext3_ext_collect_stats(struct inode *inode)
1361 +{
1362 +       int depth;
1363 +       
1364 +       /* skip inodes with old good bitmap */
1365 +       if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL))
1366 +               return;
1367 +       
1368 +       /* collect on full truncate only */
1369 +       if (inode->i_size)
1370 +               return;
1371 +
1372 +       depth = EXT3_I(inode)->i_depth;
1373 +       if (depth < EXT3_SB(inode->i_sb)->s_ext_mindepth)
1374 +                EXT3_SB(inode->i_sb)->s_ext_mindepth = depth;
1375 +       if (depth > EXT3_SB(inode->i_sb)->s_ext_maxdepth)
1376 +                EXT3_SB(inode->i_sb)->s_ext_maxdepth = depth;
1377 +       EXT3_SB(inode->i_sb)->s_ext_sum += depth;
1378 +       EXT3_SB(inode->i_sb)->s_ext_count++;
1379 +       
1380 +}
1381 +
1382 +void ext3_ext_truncate(struct inode * inode)
1383 +{
1384 +       struct address_space *mapping = inode->i_mapping;
1385 +       struct ext3_ext_path *path;
1386 +       struct page * page;
1387 +       handle_t *handle;
1388 +       int i, depth, err = 0;
1389 +
1390 +       ext3_ext_collect_stats(inode);
1391 +
1392 +       /*
1393 +        * We have to lock the EOF page here, because lock_page() nests
1394 +        * outside journal_start().
1395 +        */
1396 +       if ((inode->i_size & (inode->i_sb->s_blocksize - 1)) == 0) {
1397 +               /* Block boundary? Nothing to do */
1398 +               page = NULL;
1399 +       } else {
1400 +               page = grab_cache_page(mapping,
1401 +                               inode->i_size >> PAGE_CACHE_SHIFT);
1402 +               if (!page)
1403 +                       return;
1404 +       }
1405 +
1406 +       /*
1407 +        * probably first extent we're gonna free will be last in block
1408 +        */
1409 +       i = ext3_ext_calc_credits(inode, NULL, 0);
1410 +       handle = ext3_journal_start(inode, i);
1411 +       if (IS_ERR(handle)) {
1412 +               if (page) {
1413 +                       clear_highpage(page);
1414 +                       flush_dcache_page(page);
1415 +                       unlock_page(page);
1416 +                       page_cache_release(page);
1417 +               }
1418 +               return;
1419 +       }
1420 +
1421 +       if (page)
1422 +               ext3_block_truncate_page(handle, mapping, inode->i_size, page,
1423 +                                               inode->i_sb->s_blocksize);
1424 +
1425 +       down(&EXT3_I(inode)->i_ext_sem);
1426 +
1427 +       /* 
1428 +        * TODO: optimization is possible here
1429 +        * probably we need not scaning at all,
1430 +        * because page truncation is enough
1431 +        */
1432 +       if (ext3_orphan_add(handle, inode))
1433 +               goto out_stop;
1434 +
1435 +       /* we have to know where to truncate from in crash case */
1436 +       EXT3_I(inode)->i_disksize = inode->i_size;
1437 +       ext3_mark_inode_dirty(handle, inode);
1438 +
1439 +       /*
1440 +        * we start scanning from right side freeing all the blocks
1441 +        * after i_size and walking into the deep
1442 +        */
1443 +       i = 0;
1444 +       depth = EXT3_I(inode)->i_depth;
1445 +       path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL);
1446 +       if (IS_ERR(path)) {
1447 +               ext3_error(inode->i_sb, "ext3_ext_truncate",
1448 +                               "Can't allocate path array");
1449 +               goto out_stop;
1450 +       }
1451 +       memset(path, 0, sizeof(struct ext3_ext_path) * (depth + 1));
1452 +
1453 +       path[i].p_hdr = (struct ext3_extent_header *) EXT3_I(inode)->i_data;
1454 +       while (i >= 0 && err == 0) {
1455 +               if (i == depth) {
1456 +                       /* this is leaf block */
1457 +                       err = ext3_ext_truncate_leaf(handle, inode,
1458 +                                                       path + i, i);
1459 +                       /* root level have p_bh == NULL, brelse() eats this */
1460 +                       brelse(path[i].p_bh);
1461 +                       i--;
1462 +                       continue;
1463 +               }
1464 +               
1465 +               /* this is index block */
1466 +               if (!path[i].p_hdr) {
1467 +                       path[i].p_hdr =
1468 +                               (struct ext3_extent_header *) path[i].p_bh->b_data;
1469 +                       ext_debug(inode, "initialize header\n");
1470 +               }
1471 +
1472 +               EXT_ASSERT(path[i].p_hdr->e_num <= path[i].p_hdr->e_max);
1473 +               
1474 +               if (!path[i].p_idx) {
1475 +                       /* this level hasn't touched yet */
1476 +                       path[i].p_idx = EXT_LAST_INDEX(path[i].p_hdr);
1477 +                       path[i].p_block = path[i].p_hdr->e_num + 1;
1478 +                       ext_debug(inode, "init index ptr: hdr 0x%p, num %d\n",
1479 +                                       path[i].p_hdr, path[i].p_hdr->e_num);
1480 +               } else {
1481 +                       /* we've already was here, see at next index */
1482 +                       path[i].p_idx--;
1483 +               }
1484 +
1485 +               ext_debug(inode, "level %d - index, first 0x%p, cur 0x%p\n",
1486 +                               i, EXT_FIRST_INDEX(path[i].p_hdr),
1487 +                               path[i].p_idx);
1488 +               if (ext3_ext_more_to_truncate(inode, path + i)) {
1489 +                       /* go to the next level */
1490 +                       ext_debug(inode, "move to level %d (block %d)\n", i+1,
1491 +                                       path[i].p_idx->e_leaf);
1492 +                       memset(path + i + 1, 0, sizeof(*path));
1493 +                       path[i+1].p_bh = sb_bread(inode->i_sb,
1494 +                                                       path[i].p_idx->e_leaf);
1495 +                       if (!path[i+1].p_bh) {
1496 +                               /* should we reset i_size? */
1497 +                               err = -EIO;
1498 +                               break;
1499 +                       }
1500 +                       i++;
1501 +               } else {
1502 +                       /* we finish processing this index, go up */
1503 +                       if (path[i].p_hdr->e_num == 0 && i > 0) {
1504 +                               /* index is empty, remove it
1505 +                                * handle must be already prepared by the
1506 +                                * truncate_leaf()
1507 +                                */
1508 +                               err = ext3_ext_remove_index(handle, inode,
1509 +                                                               path + i);
1510 +                       }
1511 +                       /* root level have p_bh == NULL, brelse() eats this */
1512 +                       brelse(path[i].p_bh);
1513 +                       i--;
1514 +                       ext_debug(inode, "return to level %d\n", i);
1515 +               }
1516 +       }
1517 +
1518 +       /* TODO: flexible tree reduction should be here */
1519 +       if (path->p_hdr->e_num == 0) {
1520 +               /*
1521 +                * truncate to zero freed all the tree
1522 +                * so, we need to correct i_depth
1523 +                */
1524 +               EXT3_I(inode)->i_depth = 0;
1525 +               path->p_hdr->e_max = 0;
1526 +               ext3_mark_inode_dirty(handle, inode);
1527 +       }
1528 +
1529 +       kfree(path);
1530 +
1531 +       /* In a multi-transaction truncate, we only make the final
1532 +        * transaction synchronous */
1533 +       if (IS_SYNC(inode))
1534 +               handle->h_sync = 1;
1535 +
1536 +out_stop:
1537 +       /*
1538 +        * If this was a simple ftruncate(), and the file will remain alive
1539 +        * then we need to clear up the orphan record which we created above.
1540 +        * However, if this was a real unlink then we were called by
1541 +        * ext3_delete_inode(), and we allow that function to clean up the
1542 +        * orphan info for us.
1543 +        */
1544 +       if (inode->i_nlink)
1545 +               ext3_orphan_del(handle, inode);
1546 +
1547 +       up(&EXT3_I(inode)->i_ext_sem);
1548 +       ext3_journal_stop(handle, inode);
1549 +}
1550 +
1551 +/*
1552 + * this routine calculate max number of blocks we could modify
1553 + * in order to allocate new block for an inode
1554 + */
1555 +int ext3_ext_writepage_trans_blocks(struct inode *inode, int num)
1556 +{
1557 +       struct ext3_inode_info *ei = EXT3_I(inode);
1558 +       int depth = ei->i_depth + 1;
1559 +       int needed;
1560 +       
1561 +       /*
1562 +        * the worste case we're expecting is creation of the
1563 +        * new root (growing in depth) with index splitting
1564 +        * for splitting we have to consider depth + 1 because
1565 +        * previous growing could increase it
1566 +        */
1567 +
1568 +       /* 
1569 +        * growing in depth:
1570 +        * block allocation + new root + old root
1571 +        */
1572 +       needed = EXT3_ALLOC_NEEDED + 2;
1573 +
1574 +       /* index split. we may need:
1575 +        *   allocate intermediate indexes and new leaf
1576 +        *   change two blocks at each level, but root
1577 +        *   modify root block (inode)
1578 +        */
1579 +       needed += (depth * EXT3_ALLOC_NEEDED) + (2 * depth) + 1;
1580 +
1581 +       /* caller want to allocate num blocks */
1582 +       needed *= num;
1583 +       
1584 +#ifdef CONFIG_QUOTA
1585 +       /* 
1586 +        * FIXME: real calculation should be here
1587 +        * it depends on blockmap format of qouta file
1588 +        */
1589 +       needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS;
1590 +#endif
1591 +
1592 +       return needed;
1593 +}
1594 +
1595 +/*
1596 + * called at mount time
1597 + */
1598 +void ext3_ext_init(struct super_block *sb)
1599 +{
1600 +       /*
1601 +        * possible initialization would be here
1602 +        */
1603 +
1604 +       if (test_opt(sb, EXTENTS))
1605 +               printk("EXT3-fs: file extents enabled\n");
1606 +       spin_lock_init(&EXT3_SB(sb)->s_ext_lock);
1607 +}
1608 +
1609 +/*
1610 + * called at umount time
1611 + */
1612 +void ext3_ext_release(struct super_block *sb)
1613 +{
1614 +       struct ext3_sb_info *sbi = EXT3_SB(sb);
1615 +
1616 +       /* show collected stats */
1617 +       if (sbi->s_ext_count && sbi->s_ext_extents)
1618 +               printk("EXT3-fs: min depth - %d, max depth - %d, "
1619 +                               "ave. depth - %d, ave. blocks/extent - %d\n",
1620 +                               sbi->s_ext_mindepth,
1621 +                               sbi->s_ext_maxdepth,
1622 +                               sbi->s_ext_sum / sbi->s_ext_count,
1623 +                               sbi->s_ext_blocks / sbi->s_ext_extents);
1624 +}
1625 +
1626 --- linux-2.4.18-chaos/fs/ext3/ialloc.c~ext3-extents-2.4.18-chaos       2003-09-19 22:07:14.000000000 +0400
1627 +++ linux-2.4.18-chaos-alexey/fs/ext3/ialloc.c  2003-09-20 00:18:43.000000000 +0400
1628 @@ -573,6 +573,10 @@ repeat:
1629         ei->i_prealloc_count = 0;
1630  #endif
1631         ei->i_block_group = i;
1632 +       if (test_opt(sb, EXTENTS))
1633 +               EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL;
1634 +       ei->i_depth = 0;
1635 +       sema_init(&ei->i_ext_sem, 1);
1636  
1637         if (ei->i_flags & EXT3_SYNC_FL)
1638                 inode->i_flags |= S_SYNC;
1639 --- linux-2.4.18-chaos/fs/ext3/inode.c~ext3-extents-2.4.18-chaos        2003-09-19 22:07:14.000000000 +0400
1640 +++ linux-2.4.18-chaos-alexey/fs/ext3/inode.c   2003-09-22 15:40:30.000000000 +0400
1641 @@ -842,6 +842,15 @@ changed:
1642         goto reread;
1643  }
1644  
1645 +static inline int
1646 +ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block,
1647 +               struct buffer_head *bh, int create, int extend_disksize)
1648 +{
1649 +       if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
1650 +               return ext3_ext_get_block(handle, inode, block, bh, create, 1);
1651 +       return ext3_get_block_handle(handle, inode, block, bh, create, 1);
1652 +}
1653 +
1654  /*
1655   * The BKL is not held on entry here.
1656   */
1657 @@ -855,7 +864,7 @@ static int ext3_get_block(struct inode *
1658                 handle = ext3_journal_current_handle();
1659                 J_ASSERT(handle != 0);
1660         }
1661 -       ret = ext3_get_block_handle(handle, inode, iblock,
1662 +       ret = ext3_get_block_wrap(handle, inode, iblock,
1663                                 bh_result, create, 1);
1664         return ret;
1665  }
1666 @@ -882,7 +891,7 @@ ext3_direct_io_get_block(struct inode *i
1667                 }
1668         }
1669         if (ret == 0)
1670 -               ret = ext3_get_block_handle(handle, inode, iblock,
1671 +               ret = ext3_get_block_wrap(handle, inode, iblock,
1672                                         bh_result, create, 0);
1673         if (ret == 0)
1674                 bh_result->b_size = (1 << inode->i_blkbits);
1675 @@ -904,7 +913,7 @@ struct buffer_head *ext3_getblk(handle_t
1676         dummy.b_state = 0;
1677         dummy.b_blocknr = -1000;
1678         buffer_trace_init(&dummy.b_history);
1679 -       *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1);
1680 +       *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1);
1681         if (!*errp && buffer_mapped(&dummy)) {
1682                 struct buffer_head *bh;
1683                 bh = sb_getblk(inode->i_sb, dummy.b_blocknr);
1684 @@ -1520,7 +1529,7 @@ ext3_block_truncate_page_prepare(struct 
1685   * This required during truncate. We need to physically zero the tail end
1686   * of that block so it doesn't yield old data if the file is later grown.
1687   */
1688 -static int ext3_block_truncate_page(handle_t *handle,
1689 +int ext3_block_truncate_page(handle_t *handle,
1690                                     struct address_space *mapping, loff_t from,
1691                                     struct page *page, unsigned blocksize)
1692  {
1693 @@ -1998,6 +2007,9 @@ void ext3_truncate(struct inode * inode)
1694  
1695         ext3_discard_prealloc(inode);
1696  
1697 +       if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
1698 +               return ext3_ext_truncate(inode);
1699 +
1700         blocksize = inode->i_sb->s_blocksize;
1701         last_block = (inode->i_size + blocksize-1)
1702                                         >> EXT3_BLOCK_SIZE_BITS(inode->i_sb);
1703 @@ -2436,6 +2448,8 @@ void ext3_read_inode(struct inode * inod
1704         ei->i_prealloc_count = 0;
1705  #endif
1706         ei->i_block_group = iloc.block_group;
1707 +       ei->i_depth = raw_inode->osd2.linux2.l_i_depth;
1708 +       sema_init(&ei->i_ext_sem, 1);
1709  
1710         /*
1711          * NOTE! The in-memory inode i_data array is in little-endian order
1712 @@ -2556,6 +2570,7 @@ static int ext3_do_update_inode(handle_t
1713                 raw_inode->i_fsize = 0;
1714         }
1715  #endif
1716 +       raw_inode->osd2.linux2.l_i_depth = ei->i_depth;
1717         raw_inode->i_file_acl = cpu_to_le32(ei->i_file_acl);
1718         if (!S_ISREG(inode->i_mode)) {
1719                 raw_inode->i_dir_acl = cpu_to_le32(ei->i_dir_acl);
1720 @@ -2759,6 +2774,9 @@ int ext3_writepage_trans_blocks(struct i
1721         int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3;
1722         int ret;
1723         
1724 +       if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)
1725 +               return ext3_ext_writepage_trans_blocks(inode, bpp);
1726 +
1727         if (ext3_should_journal_data(inode))
1728                 ret = 3 * (bpp + indirects) + 2;
1729         else
1730 @@ -3082,7 +3100,7 @@ int ext3_prep_san_write(struct inode *in
1731  
1732         /* alloc blocks one by one */
1733         for (i = 0; i < nblocks; i++) {
1734 -               ret = ext3_get_block_handle(handle, inode, blocks[i],
1735 +               ret = ext3_get_block_wrap(handle, inode, blocks[i],
1736                                                 &bh_tmp, 1, 1);
1737                 if (ret)
1738                         break;
1739 @@ -3143,7 +3161,7 @@ int ext3_map_inode_page(struct inode *in
1740                  if (blocks[i] != 0)
1741                          continue;
1742  
1743 -                rc = ext3_get_block_handle(handle, inode, iblock, &bh, 1, 1);
1744 +                rc = ext3_get_block_wrap(handle, inode, iblock, &bh, 1, 1);
1745                  if (rc) {
1746                          printk(KERN_INFO "ext3_map_inode_page: error %d "
1747                                 "allocating block %ld\n", rc, iblock);
1748 --- linux-2.4.18-chaos/fs/ext3/Makefile~ext3-extents-2.4.18-chaos       2003-09-19 22:07:14.000000000 +0400
1749 +++ linux-2.4.18-chaos-alexey/fs/ext3/Makefile  2003-09-20 00:18:43.000000000 +0400
1750 @@ -12,7 +12,8 @@ O_TARGET := ext3.o
1751  export-objs := ext3-exports.o
1752  
1753  obj-y    := balloc.o iopen.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \
1754 -               ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o
1755 +               ioctl.o namei.o super.o symlink.o xattr.o ext3-exports.o \
1756 +               extents.o
1757  obj-m    := $(O_TARGET)
1758  
1759  include $(TOPDIR)/Rules.make
1760 --- linux-2.4.18-chaos/fs/ext3/super.c~ext3-extents-2.4.18-chaos        2003-09-19 22:07:15.000000000 +0400
1761 +++ linux-2.4.18-chaos-alexey/fs/ext3/super.c   2003-09-20 00:18:43.000000000 +0400
1762 @@ -619,6 +619,7 @@ void ext3_put_super (struct super_block 
1763         kdev_t j_dev = sbi->s_journal->j_dev;
1764         int i;
1765  
1766 +       ext3_ext_release(sb);
1767         ext3_stop_delete_thread(sbi);
1768         ext3_xattr_put_super(sb);
1769         journal_destroy(sbi->s_journal);
1770 @@ -741,6 +742,12 @@ static int parse_options (char * options
1771                 else
1772  #endif
1773  
1774 +               if (!strcmp (this_char, "extents"))
1775 +                       set_opt (sbi->s_mount_opt, EXTENTS);
1776 +               else
1777 +               if (!strcmp (this_char, "extdebug"))
1778 +                       set_opt (sbi->s_mount_opt, EXTDEBUG);
1779 +               else
1780                 if (!strcmp (this_char, "bsddf"))
1781                         clear_opt (*mount_options, MINIX_DF);
1782                 else if (!strcmp (this_char, "nouid32")) {
1783 @@ -1468,6 +1475,7 @@ struct super_block * ext3_read_super (st
1784                 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
1785                 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
1786                 "writeback");
1787 +       ext3_ext_init(sb);
1788  
1789         return sb;
1790  
1791 --- linux-2.4.18-chaos/include/linux/ext3_fs.h~ext3-extents-2.4.18-chaos        2003-09-19 22:07:14.000000000 +0400
1792 +++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs.h   2003-09-20 00:18:43.000000000 +0400
1793 @@ -183,6 +183,7 @@ struct ext3_group_desc
1794  #define EXT3_IMAGIC_FL                 0x00002000 /* AFS directory */
1795  #define EXT3_JOURNAL_DATA_FL           0x00004000 /* file data should be journaled */
1796  #define EXT3_RESERVED_FL               0x80000000 /* reserved for ext3 lib */
1797 +#define EXT3_EXTENTS_FL                        0x00080000 /* Inode uses extents */
1798  
1799  #define EXT3_FL_USER_VISIBLE           0x00005FFF /* User visible flags */
1800  #define EXT3_FL_USER_MODIFIABLE                0x000000FF /* User modifiable flags */
1801 @@ -243,7 +244,7 @@ struct ext3_inode {
1802                 struct {
1803                         __u8    l_i_frag;       /* Fragment number */
1804                         __u8    l_i_fsize;      /* Fragment size */
1805 -                       __u16   i_pad1;
1806 +                       __u16   l_i_depth;
1807                         __u16   l_i_uid_high;   /* these 2 fields    */
1808                         __u16   l_i_gid_high;   /* were reserved2[0] */
1809                         __u32   l_i_reserved2;
1810 @@ -324,6 +325,8 @@ struct ext3_inode {
1811  #define EXT3_MOUNT_IOPEN               0x8000  /* Allow access via iopen */
1812  #define EXT3_MOUNT_IOPEN_NOPRIV                0x10000 /* Make iopen world-readable */
1813  #define EXT3_MOUNT_ASYNCDEL            0x20000 /* Delayed deletion */
1814 +#define EXT3_MOUNT_EXTENTS             0x40000 /* Extents support */
1815 +#define EXT3_MOUNT_EXTDEBUG            0x80000 /* Extents debug */
1816  
1817  /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */
1818  #ifndef _LINUX_EXT2_FS_H
1819 @@ -663,6 +666,12 @@ extern void ext3_discard_prealloc (struc
1820  extern void ext3_dirty_inode(struct inode *);
1821  extern int ext3_change_inode_journal_flag(struct inode *, int);
1822  extern void ext3_truncate (struct inode *);
1823 +extern int ext3_block_truncate_page(handle_t *handle,
1824 +                                   struct address_space *mapping, loff_t from,
1825 +                                   struct page *page, unsigned blocksize);
1826 +extern int ext3_forget(handle_t *handle, int is_metadata,
1827 +                      struct inode *inode, struct buffer_head *bh,
1828 +                      int blocknr);
1829  #ifdef EXT3_DELETE_THREAD
1830  extern void ext3_truncate_thread(struct inode *inode);
1831  #endif
1832 @@ -722,6 +731,13 @@ extern struct inode_operations ext3_dir_
1833  /* symlink.c */
1834  extern struct inode_operations ext3_fast_symlink_inode_operations;
1835  
1836 +/* extents.c */
1837 +extern int ext3_ext_writepage_trans_blocks(struct inode *, int);
1838 +extern int ext3_ext_get_block(handle_t *, struct inode *, long,
1839 +                               struct buffer_head *, int, int);
1840 +extern void ext3_ext_truncate(struct inode *);
1841 +extern void ext3_ext_init(struct super_block *);
1842 +extern void ext3_ext_release(struct super_block *);
1843  
1844  #endif /* __KERNEL__ */
1845  
1846 --- linux-2.4.18-chaos/include/linux/ext3_fs_i.h~ext3-extents-2.4.18-chaos      2001-11-22 22:46:19.000000000 +0300
1847 +++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs_i.h 2003-09-20 00:18:43.000000000 +0400
1848 @@ -73,6 +73,10 @@ struct ext3_inode_info {
1849          * by other means, so we have truncate_sem.
1850          */
1851         struct rw_semaphore truncate_sem;
1852 +
1853 +       /* extents-related data */
1854 +       struct semaphore i_ext_sem;
1855 +       __u16 i_depth;
1856  };
1857  
1858  #endif /* _LINUX_EXT3_FS_I */
1859 --- linux-2.4.18-chaos/include/linux/ext3_fs_sb.h~ext3-extents-2.4.18-chaos     2003-09-19 22:07:13.000000000 +0400
1860 +++ linux-2.4.18-chaos-alexey/include/linux/ext3_fs_sb.h        2003-09-20 00:18:43.000000000 +0400
1861 @@ -84,6 +84,16 @@ struct ext3_sb_info {
1862         wait_queue_head_t s_delete_thread_queue;
1863         wait_queue_head_t s_delete_waiter_queue;
1864  #endif
1865 +
1866 +       /* extents */
1867 +       int s_ext_debug;
1868 +       int s_ext_mindepth;
1869 +       int s_ext_maxdepth;
1870 +       int s_ext_sum;
1871 +       int s_ext_count;
1872 +       spinlock_t s_ext_lock;
1873 +       int s_ext_extents;
1874 +       int s_ext_blocks;
1875  };
1876  
1877  #endif /* _LINUX_EXT3_FS_SB */
1878
1879 _