From a8899581aa47d6aa12069b79513a6abd8d242885 Mon Sep 17 00:00:00 2001 From: alex Date: Sat, 13 Aug 2005 13:40:42 +0000 Subject: [PATCH] b=7049 b=7085 - port old fixes from b1_4 to HEAD (extents related) - unify extents & fsfilt codebase with fsfilt --- .../kernel_patches/series/ldiskfs-2.6-fc3.series | 6 +- .../patches/ext3-extents-2.6.10-fc3.patch | 2405 ++++++++++---------- .../patches/ext3-extents-in-ea-2.6.10-fc3.patch | 21 +- .../patches/ext3-mballoc2-2.6.10-fc3.patch | 25 +- lustre/kernel_patches/series/2.6-fc3.series | 1 - .../kernel_patches/series/ldiskfs-2.6-fc3.series | 6 +- lustre/lvfs/fsfilt_ext3.c | 162 +- 7 files changed, 1367 insertions(+), 1259 deletions(-) diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-fc3.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-fc3.series index 3e96555..b4608a9 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-fc3.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-fc3.series @@ -9,9 +9,9 @@ ext3-ea-in-inode-2.6-fc3.patch export-ext3-2.6.10-fc3.patch ext3-include-fixes-2.6-suse.patch ext3-extents-2.6.10-fc3.patch -ext3-extents-in-ea-2.6.10-fc3.patch -ext3-extents-in-ea-ioctl-2.6.10-fc3.patch -ext3-extents-in-ea-exports-symbol-2.6.7.patch +#ext3-extents-in-ea-2.6.10-fc3.patch +#ext3-extents-in-ea-ioctl-2.6.10-fc3.patch +#ext3-extents-in-ea-exports-symbol-2.6.7.patch ext3-mds-num-2.6.10-fc3.patch ext3-fid-2.6.7.patch ext3-raw-lookup-2.6.10.patch diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.10-fc3.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.10-fc3.patch index 90064a2..eceafb7 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.10-fc3.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.6.10-fc3.patch @@ -1,94 +1,11 @@ %patch -Index: linux-2.6.10/include/linux/ext3_fs.h +Index: linux-2.6.5-sles9/fs/ext3/extents.c =================================================================== ---- linux-2.6.10.orig/include/linux/ext3_fs.h 2005-04-05 12:26:19.494124024 +0800 -+++ linux-2.6.10/include/linux/ext3_fs.h 2005-04-05 12:26:25.474214912 +0800 -@@ -186,6 +186,7 @@ - #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ - #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ - #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ -+#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ - - #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ - #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ -@@ -238,7 +239,9 @@ - #endif - #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) - #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) -- -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 10, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 11, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 12, long) - /* - * Structure of an inode on the disk - */ -@@ -361,6 +364,8 @@ - #define EXT3_MOUNT_PDIROPS 0x800000/* Parallel dir operations */ - #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ - #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ -+#define EXT3_MOUNT_EXTENTS 0x100000 /* Extents support */ -+#define EXT3_MOUNT_EXTDEBUG 0x200000 /* Extents debug */ - - /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ - #ifndef clear_opt -@@ -549,11 +554,13 @@ - #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ - #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ - #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 -+#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ - - #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR - #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ - EXT3_FEATURE_INCOMPAT_RECOVER| \ -- EXT3_FEATURE_INCOMPAT_META_BG) -+ EXT3_FEATURE_INCOMPAT_META_BG| \ -+ EXT3_FEATURE_INCOMPAT_EXTENTS) - #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ - EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ - EXT3_FEATURE_RO_COMPAT_BTREE_DIR) -@@ -759,6 +766,7 @@ - - - /* inode.c */ -+extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t); - extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); - extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); - extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -839,6 +847,14 @@ - extern struct inode_operations ext3_symlink_inode_operations; - extern struct inode_operations ext3_fast_symlink_inode_operations; - -+/* extents.c */ -+extern int ext3_ext_writepage_trans_blocks(struct inode *, int); -+extern int ext3_ext_get_block(handle_t *, struct inode *, long, -+ struct buffer_head *, int, int); -+extern void ext3_ext_truncate(struct inode *, struct page *); -+extern void ext3_ext_init(struct super_block *); -+extern void ext3_ext_release(struct super_block *); -+extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); - - #endif /* __KERNEL__ */ - -Index: linux-2.6.10/include/linux/ext3_fs_i.h -=================================================================== ---- linux-2.6.10.orig/include/linux/ext3_fs_i.h 2005-04-05 12:26:19.377141808 +0800 -+++ linux-2.6.10/include/linux/ext3_fs_i.h 2005-04-05 12:26:25.473215064 +0800 -@@ -134,6 +134,8 @@ - struct dynlock i_htree_lock; - struct semaphore i_append_sem; - struct semaphore i_rename_sem; -+ -+ __u32 i_cached_extent[3]; - }; - - #endif /* _LINUX_EXT3_FS_I */ -Index: linux-2.6.10/include/linux/ext3_extents.h -=================================================================== ---- linux-2.6.10.orig/include/linux/ext3_extents.h 2005-04-05 19:01:49.158500672 +0800 -+++ linux-2.6.10/include/linux/ext3_extents.h 2005-04-05 12:26:25.476214608 +0800 -@@ -0,0 +1,238 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2005-02-17 22:07:57.023609040 +0300 ++++ linux-2.6.5-sles9/fs/ext3/extents.c 2005-02-23 01:02:37.396435640 +0300 +@@ -0,0 +1,2349 @@ +/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com ++ * Copyright(c) 2003, 2004, 2005, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas + * + * This program is free software; you can redistribute it and/or modify @@ -105,788 +22,406 @@ Index: linux-2.6.10/include/linux/ext3_extents.h + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ + -+#ifndef _LINUX_EXT3_EXTENTS -+#define _LINUX_EXT3_EXTENTS -+ -+/* -+ * with AGRESSIVE_TEST defined capacity of index/leaf blocks -+ * become very little, so index split, in-depth growing and -+ * other hard changes happens much more often -+ * this is for debug purposes only -+ */ -+#define AGRESSIVE_TEST_ -+ +/* -+ * if CHECK_BINSEARCH defined, then results of binary search -+ * will be checked by linear search ++ * Extents support for EXT3 ++ * ++ * TODO: ++ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() ++ * - ext3_ext_calc_credits() could take 'mergable' into account ++ * - ext3*_error() should be used in some situations ++ * - find_goal() [to be tested and improved] ++ * - smart tree reduction ++ * - arch-independence ++ * common on-disk format for big/little-endian arch + */ -+#define CHECK_BINSEARCH_ + -+/* -+ * if EXT_DEBUG is defined you can use 'extdebug' mount option -+ * to get lots of info what's going on -+ */ -+#define EXT_DEBUG -+#ifdef EXT_DEBUG -+#define ext_debug(tree,fmt,a...) \ -+do { \ -+ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ -+ printk(fmt, ##a); \ -+} while (0); -+#else -+#define ext_debug(tree,fmt,a...) -+#endif ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++#include + -+/* -+ * if EXT_STATS is defined then stats numbers are collected -+ * these number will be displayed at umount time -+ */ -+#define EXT_STATS_ + ++static inline int ext3_ext_check_header(struct ext3_extent_header *eh) ++{ ++ if (eh->eh_magic != EXT3_EXT_MAGIC) { ++ printk(KERN_ERR "EXT3-fs: invalid magic = 0x%x\n", ++ (unsigned)eh->eh_magic); ++ return -EIO; ++ } ++ if (eh->eh_max == 0) { ++ printk(KERN_ERR "EXT3-fs: invalid eh_max = %u\n", ++ (unsigned)eh->eh_max); ++ return -EIO; ++ } ++ if (eh->eh_entries > eh->eh_max) { ++ printk(KERN_ERR "EXT3-fs: invalid eh_entries = %u\n", ++ (unsigned)eh->eh_entries); ++ return -EIO; ++ } ++ return 0; ++} + -+#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ ++static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) ++{ ++ int err; + -+/* -+ * ext3_inode has i_block array (total 60 bytes) -+ * first 4 bytes are used to store: -+ * - tree depth (0 mean there is no tree yet. all extents in the inode) -+ * - number of alive extents in the inode -+ */ ++ if (handle->h_buffer_credits > needed) ++ return handle; ++ if (!ext3_journal_extend(handle, needed)) ++ return handle; ++ err = ext3_journal_restart(handle, needed); ++ ++ return handle; ++} + -+/* -+ * this is extent on-disk structure -+ * it's used at the bottom of the tree -+ */ -+struct ext3_extent { -+ __u32 ee_block; /* first logical block extent covers */ -+ __u16 ee_len; /* number of blocks covered by extent */ -+ __u16 ee_start_hi; /* high 16 bits of physical block */ -+ __u32 ee_start; /* low 32 bigs of physical block */ -+}; ++static int inline ++ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) ++{ ++ if (tree->ops->get_write_access) ++ return tree->ops->get_write_access(h,tree->buffer); ++ else ++ return 0; ++} + -+/* -+ * this is index on-disk structure -+ * it's used at all the levels, but the bottom -+ */ -+struct ext3_extent_idx { -+ __u32 ei_block; /* index covers logical blocks from 'block' */ -+ __u32 ei_leaf; /* pointer to the physical block of the next * -+ * level. leaf or next index could bet here */ -+ __u16 ei_leaf_hi; /* high 16 bits of physical block */ -+ __u16 ei_unused; -+}; ++static int inline ++ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) ++{ ++ if (tree->ops->mark_buffer_dirty) ++ return tree->ops->mark_buffer_dirty(h,tree->buffer); ++ else ++ return 0; ++} + +/* -+ * each block (leaves and indexes), even inode-stored has header ++ * could return: ++ * - EROFS ++ * - ENOMEM + */ -+struct ext3_extent_header { -+ __u16 eh_magic; /* probably will support different formats */ -+ __u16 eh_entries; /* number of valid entries */ -+ __u16 eh_max; /* capacity of store in entries */ -+ __u16 eh_depth; /* has tree real underlaying blocks? */ -+ __u32 eh_generation; /* generation of the tree */ -+}; ++static int ext3_ext_get_access(handle_t *handle, ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int err; + -+#define EXT3_EXT_MAGIC 0xf30a ++ if (path->p_bh) { ++ /* path points to block */ ++ err = ext3_journal_get_write_access(handle, path->p_bh); ++ } else { ++ /* path points to leaf/index in inode body */ ++ err = ext3_ext_get_access_for_root(handle, tree); ++ } ++ return err; ++} + +/* -+ * array of ext3_ext_path contains path to some extent -+ * creation/lookup routines use it for traversal/splitting/etc -+ * truncate uses it to simulate recursive walking ++ * could return: ++ * - EROFS ++ * - ENOMEM ++ * - EIO + */ -+struct ext3_ext_path { -+ __u32 p_block; -+ __u16 p_depth; -+ struct ext3_extent *p_ext; -+ struct ext3_extent_idx *p_idx; -+ struct ext3_extent_header *p_hdr; -+ struct buffer_head *p_bh; -+}; ++static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++ int err; ++ if (path->p_bh) { ++ /* path points to block */ ++ err =ext3_journal_dirty_metadata(handle, path->p_bh); ++ } else { ++ /* path points to leaf/index in inode body */ ++ err = ext3_ext_mark_root_dirty(handle, tree); ++ } ++ return err; ++} + -+/* -+ * structure for external API -+ */ ++static int inline ++ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, struct ext3_extent *ex, ++ int *err) ++{ ++ int goal, depth, newblock; ++ struct inode *inode; + -+/* -+ * ext3_extents_tree is used to pass initial information -+ * to top-level extents API -+ */ -+struct ext3_extents_helpers; -+struct ext3_extents_tree { -+ struct inode *inode; /* inode which tree belongs to */ -+ void *root; /* ptr to data top of tree resides at */ -+ void *buffer; /* will be passed as arg to ^^ routines */ -+ int buffer_len; -+ void *private; -+ struct ext3_extent *cex;/* last found extent */ -+ struct ext3_extents_helpers *ops; -+}; ++ EXT_ASSERT(tree); ++ if (tree->ops->new_block) ++ return tree->ops->new_block(handle, tree, path, ex, err); + -+struct ext3_extents_helpers { -+ int (*get_write_access)(handle_t *h, void *buffer); -+ int (*mark_buffer_dirty)(handle_t *h, void *buffer); -+ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); -+ int (*remove_extent_credits)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*remove_extent)(struct ext3_extents_tree *, -+ struct ext3_extent *, unsigned long, -+ unsigned long); -+ int (*new_block)(handle_t *, struct ext3_extents_tree *, -+ struct ext3_ext_path *, struct ext3_extent *, -+ int *); -+}; ++ inode = tree->inode; ++ depth = EXT_DEPTH(tree); ++ if (path && depth > 0) { ++ goal = path[depth-1].p_block; ++ } else { ++ struct ext3_inode_info *ei = EXT3_I(inode); ++ unsigned long bg_start; ++ unsigned long colour; + -+/* -+ * to be called by ext3_ext_walk_space() -+ * negative retcode - error -+ * positive retcode - signal for ext3_ext_walk_space(), see below -+ * callback must return valid extent (passed or newly created) -+ */ -+typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, -+ struct ext3_ext_path *, -+ struct ext3_extent *, int); ++ bg_start = (ei->i_block_group * ++ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + ++ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); ++ colour = (current->pid % 16) * ++ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); ++ goal = bg_start + colour; ++ } + -+#define EXT_CONTINUE 0 -+#define EXT_BREAK 1 -+#define EXT_REPEAT 2 ++ newblock = ext3_new_block(handle, inode, goal, err); ++ return newblock; ++} + ++static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) ++{ ++ struct ext3_extent_header *neh; ++ neh = EXT_ROOT_HDR(tree); ++ neh->eh_generation++; ++} + -+#define EXT_MAX_BLOCK 0xffffffff -+#define EXT_CACHE_MARK 0xffff ++static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) ++{ ++ int size; + ++ size = (tree->inode->i_sb->s_blocksize - ++ sizeof(struct ext3_extent_header)) / ++ sizeof(struct ext3_extent); ++#ifdef AGRESSIVE_TEST ++ size = 6; ++#endif ++ return size; ++} + -+#define EXT_FIRST_EXTENT(__hdr__) \ -+ ((struct ext3_extent *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_FIRST_INDEX(__hdr__) \ -+ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ -+ sizeof(struct ext3_extent_header))) -+#define EXT_HAS_FREE_INDEX(__path__) \ -+ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) -+#define EXT_LAST_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_LAST_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) -+#define EXT_MAX_EXTENT(__hdr__) \ -+ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) -+#define EXT_MAX_INDEX(__hdr__) \ -+ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) ++static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) ++{ ++ int size; + -+#define EXT_ROOT_HDR(tree) \ -+ ((struct ext3_extent_header *) (tree)->root) -+#define EXT_BLOCK_HDR(bh) \ -+ ((struct ext3_extent_header *) (bh)->b_data) -+#define EXT_DEPTH(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) -+#define EXT_GENERATION(_t_) \ -+ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) ++ size = (tree->inode->i_sb->s_blocksize - ++ sizeof(struct ext3_extent_header)) / ++ sizeof(struct ext3_extent_idx); ++#ifdef AGRESSIVE_TEST ++ size = 5; ++#endif ++ return size; ++} + ++static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) ++{ ++ int size; + -+#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); ++ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / ++ sizeof(struct ext3_extent); ++#ifdef AGRESSIVE_TEST ++ size = 3; ++#endif ++ return size; ++} + ++static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) ++{ ++ int size; + -+/* -+ * this structure is used to gather extents from the tree via ioctl -+ */ -+struct ext3_extent_buf { -+ unsigned long start; -+ int buflen; -+ void *buffer; -+ void *cur; -+ int err; -+}; ++ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) / ++ sizeof(struct ext3_extent_idx); ++#ifdef AGRESSIVE_TEST ++ size = 4; ++#endif ++ return size; ++} + -+/* -+ * this structure is used to collect stats info about the tree -+ */ -+struct ext3_extent_tree_stats { -+ int depth; -+ int extents_num; -+ int leaf_num; -+}; ++static void ext3_ext_show_path(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) ++{ ++#ifdef EXT_DEBUG ++ int k, l = path->p_depth; + -+extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); -+extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); -+extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); -+extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); -+extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); -+extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); -+extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); -+extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); ++ ext_debug(tree, "path:"); ++ for (k = 0; k <= l; k++, path++) { ++ if (path->p_idx) { ++ ext_debug(tree, " %d->%d", path->p_idx->ei_block, ++ path->p_idx->ei_leaf); ++ } else if (path->p_ext) { ++ ext_debug(tree, " %d:%d:%d", ++ path->p_ext->ee_block, ++ path->p_ext->ee_len, ++ path->p_ext->ee_start); ++ } else ++ ext_debug(tree, " []"); ++ } ++ ext_debug(tree, "\n"); ++#endif ++} + -+static inline void -+ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) ++static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path) +{ -+ if (tree->cex) -+ tree->cex->ee_len = 0; -+} ++#ifdef EXT_DEBUG ++ int depth = EXT_DEPTH(tree); ++ struct ext3_extent_header *eh; ++ struct ext3_extent *ex; ++ int i; + ++ if (!path) ++ return; + -+#endif /* _LINUX_EXT3_EXTENTS */ ++ eh = path[depth].p_hdr; ++ ex = EXT_FIRST_EXTENT(eh); + -Index: linux-2.6.10/fs/ext3/inode.c -=================================================================== ---- linux-2.6.10.orig/fs/ext3/inode.c 2005-04-05 12:26:19.367143328 +0800 -+++ linux-2.6.10/fs/ext3/inode.c 2005-04-05 12:26:25.462216736 +0800 -@@ -796,6 +796,17 @@ - goto reread; - } - -+static inline int -+ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, -+ struct buffer_head *bh, int create, int extend_disksize) ++ for (i = 0; i < eh->eh_entries; i++, ex++) { ++ ext_debug(tree, "%d:%d:%d ", ++ ex->ee_block, ex->ee_len, ex->ee_start); ++ } ++ ext_debug(tree, "\n"); ++#endif ++} ++ ++static void ext3_ext_drop_refs(struct ext3_ext_path *path) +{ -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_get_block(handle, inode, block, bh, create, -+ extend_disksize); -+ return ext3_get_block_handle(handle, inode, block, bh, create, -+ extend_disksize); ++ int depth = path->p_depth; ++ int i; ++ ++ for (i = 0; i <= depth; i++, path++) { ++ if (path->p_bh) { ++ brelse(path->p_bh); ++ path->p_bh = NULL; ++ } ++ } +} + - static int ext3_get_block(struct inode *inode, sector_t iblock, - struct buffer_head *bh_result, int create) - { -@@ -806,8 +817,8 @@ - handle = ext3_journal_current_handle(); - J_ASSERT(handle != 0); - } -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 1); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 1); - return ret; - } - -@@ -851,8 +862,8 @@ - - get_block: - if (ret == 0) -- ret = ext3_get_block_handle(handle, inode, iblock, -- bh_result, create, 0); -+ ret = ext3_get_block_wrap(handle, inode, iblock, -+ bh_result, create, 0); - bh_result->b_size = (1 << inode->i_blkbits); - return ret; - } -@@ -871,7 +882,7 @@ - dummy.b_state = 0; - dummy.b_blocknr = -1000; - buffer_trace_init(&dummy.b_history); -- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); -+ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); - if (!*errp && buffer_mapped(&dummy)) { - struct buffer_head *bh; - bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1591,7 +1602,7 @@ - * This required during truncate. We need to physically zero the tail end - * of that block so it doesn't yield old data if the file is later grown. - */ --static int ext3_block_truncate_page(handle_t *handle, struct page *page, -+int ext3_block_truncate_page(handle_t *handle, struct page *page, - struct address_space *mapping, loff_t from) - { - unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -2089,6 +2100,9 @@ - return; - } - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_truncate(inode, page); ++/* ++ * binary search for closest index by given block ++ */ ++static inline void ++ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, int block) ++{ ++ struct ext3_extent_header *eh = path->p_hdr; ++ struct ext3_extent_idx *ix; ++ int l = 0, k, r; + - handle = start_transaction(inode); - if (IS_ERR(handle)) { - if (page) { -@@ -2817,6 +2831,9 @@ - int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; - int ret; - -+ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) -+ return ext3_ext_writepage_trans_blocks(inode, bpp); -+ - if (ext3_should_journal_data(inode)) - ret = 3 * (bpp + indirects) + 2; - else -Index: linux-2.6.10/fs/ext3/ioctl.c -=================================================================== ---- linux-2.6.10.orig/fs/ext3/ioctl.c 2005-04-05 12:25:13.631136720 +0800 -+++ linux-2.6.10/fs/ext3/ioctl.c 2005-04-05 12:26:25.471215368 +0800 -@@ -245,6 +245,10 @@ - return err; - } - -+ case EXT3_IOC_GET_EXTENTS: -+ case EXT3_IOC_GET_TREE_STATS: -+ case EXT3_IOC_GET_TREE_DEPTH: -+ return ext3_ext_ioctl(inode, filp, cmd, arg); - - default: - return -ENOTTY; -Index: linux-2.6.10/fs/ext3/super.c -=================================================================== ---- linux-2.6.10.orig/fs/ext3/super.c 2005-04-05 12:26:19.438132536 +0800 -+++ linux-2.6.10/fs/ext3/super.c 2005-04-05 12:26:25.471215368 +0800 -@@ -394,6 +394,7 @@ - struct ext3_super_block *es = sbi->s_es; - int i; - -+ ext3_ext_release(sb); - ext3_xattr_put_super(sb); - journal_destroy(sbi->s_journal); - if (!(sb->s_flags & MS_RDONLY)) { -@@ -463,6 +464,9 @@ - dynlock_init(&ei->i_htree_lock); - sema_init(&ei->i_rename_sem, 1); - sema_init(&ei->i_append_sem, 1); -+ ei->i_cached_extent[0] = 0; -+ ei->i_cached_extent[1] = 0; -+ ei->i_cached_extent[2] = 0; - return &ei->vfs_inode; - } - -@@ -595,6 +599,7 @@ - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, - Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_pdirops, - Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, -+ Opt_extents, Opt_extdebug, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, - }; - -@@ -647,6 +652,8 @@ - {Opt_iopen, "iopen"}, - {Opt_noiopen, "noiopen"}, - {Opt_iopen_nopriv, "iopen_nopriv"}, -+ {Opt_extents, "extents"}, -+ {Opt_extdebug, "extdebug"}, - {Opt_err, NULL}, - {Opt_resize, "resize"}, - }; -@@ -950,6 +957,12 @@ - match_int(&args[0], &option); - *n_blocks_count = option; - break; -+ case Opt_extents: -+ set_opt (sbi->s_mount_opt, EXTENTS); -+ break; -+ case Opt_extdebug: -+ set_opt (sbi->s_mount_opt, EXTDEBUG); -+ break; - default: - printk (KERN_ERR - "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1635,6 +1648,8 @@ - percpu_counter_mod(&sbi->s_dirs_counter, - ext3_count_dirs(sb)); - -+ ext3_ext_init(sb); -+ - return 0; - - cantfind_ext3: -Index: linux-2.6.10/fs/ext3/extents.c -=================================================================== ---- linux-2.6.10.orig/fs/ext3/extents.c 2005-04-05 19:01:49.158500672 +0800 -+++ linux-2.6.10/fs/ext3/extents.c 2005-04-05 12:26:25.468215824 +0800 -@@ -0,0 +1,2306 @@ -+/* -+ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com -+ * Written by Alex Tomas -+ * -+ * This program is free software; you can redistribute it and/or modify -+ * it under the terms of the GNU General Public License version 2 as -+ * published by the Free Software Foundation. -+ * -+ * This program is distributed in the hope that it will be useful, -+ * but WITHOUT ANY WARRANTY; without even the implied warranty of -+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -+ * GNU General Public License for more details. -+ * -+ * You should have received a copy of the GNU General Public Licens -+ * along with this program; if not, write to the Free Software -+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- -+ */ -+ -+/* -+ * Extents support for EXT3 -+ * -+ * TODO: -+ * - ext3_ext_walk_space() sould not use ext3_ext_find_extent() -+ * - ext3_ext_calc_credits() could take 'mergable' into account -+ * - ext3*_error() should be used in some situations -+ * - find_goal() [to be tested and improved] -+ * - smart tree reduction -+ * - arch-independence -+ * common on-disk format for big/little-endian arch -+ */ ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(eh->eh_entries <= eh->eh_max); ++ EXT_ASSERT(eh->eh_entries > 0); + -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include -+#include ++ ext_debug(tree, "binsearch for %d(idx): ", block); + -+static handle_t *ext3_ext_journal_restart(handle_t *handle, int needed) -+{ -+ int err; ++ path->p_idx = ix = EXT_FIRST_INDEX(eh); + -+ if (handle->h_buffer_credits > needed) -+ return handle; -+ if (!ext3_journal_extend(handle, needed)) -+ return handle; -+ err = ext3_journal_restart(handle, needed); -+ -+ return handle; -+} ++ r = k = eh->eh_entries; ++ while (k > 1) { ++ k = (r - l) / 2; ++ if (block < ix[l + k].ei_block) ++ r -= k; ++ else ++ l += k; ++ ext_debug(tree, "%d:%d:%d ", k, l, r); ++ } + -+static int inline -+ext3_ext_get_access_for_root(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->get_write_access) -+ return tree->ops->get_write_access(h,tree->buffer); -+ else -+ return 0; -+} ++ ix += l; ++ path->p_idx = ix; ++ ext_debug(tree," -> %d->%d ",path->p_idx->ei_block,path->p_idx->ei_leaf); + -+static int inline -+ext3_ext_mark_root_dirty(handle_t *h, struct ext3_extents_tree *tree) -+{ -+ if (tree->ops->mark_buffer_dirty) -+ return tree->ops->mark_buffer_dirty(h,tree->buffer); -+ else -+ return 0; -+} ++ while (l++ < r) { ++ if (block < ix->ei_block) ++ break; ++ path->p_idx = ix++; ++ } ++ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, ++ path->p_idx->ei_leaf); + -+/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ */ -+static int ext3_ext_get_access(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+ int err; ++#ifdef CHECK_BINSEARCH ++ { ++ struct ext3_extent_idx *chix; + -+ if (path->p_bh) { -+ /* path points to block */ -+ err = ext3_journal_get_write_access(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_get_access_for_root(handle, tree); ++ chix = ix = EXT_FIRST_INDEX(eh); ++ for (k = 0; k < eh->eh_entries; k++, ix++) { ++ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { ++ printk("k=%d, ix=0x%p, first=0x%p\n", k, ++ ix, EXT_FIRST_INDEX(eh)); ++ printk("%u <= %u\n", ++ ix->ei_block,ix[-1].ei_block); ++ } ++ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); ++ if (block < ix->ei_block) ++ break; ++ chix = ix; ++ } ++ EXT_ASSERT(chix == path->p_idx); + } -+ return err; ++#endif +} + +/* -+ * could return: -+ * - EROFS -+ * - ENOMEM -+ * - EIO ++ * binary search for closest extent by given block + */ -+static int ext3_ext_dirty(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) ++static inline void ++ext3_ext_binsearch(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, int block) +{ -+ int err; -+ if (path->p_bh) { -+ /* path points to block */ -+ err =ext3_journal_dirty_metadata(handle, path->p_bh); -+ } else { -+ /* path points to leaf/index in inode body */ -+ err = ext3_ext_mark_root_dirty(handle, tree); -+ } -+ return err; -+} ++ struct ext3_extent_header *eh = path->p_hdr; ++ struct ext3_extent *ex; ++ int l = 0, k, r; + -+static int inline -+ext3_ext_new_block(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, struct ext3_extent *ex, -+ int *err) -+{ -+ int goal, depth, newblock; -+ struct inode *inode; ++ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); ++ EXT_ASSERT(eh->eh_entries <= eh->eh_max); + -+ EXT_ASSERT(tree); -+ if (tree->ops->new_block) -+ return tree->ops->new_block(handle, tree, path, ex, err); ++ if (eh->eh_entries == 0) { ++ /* ++ * this leaf is empty yet: ++ * we get such a leaf in split/add case ++ */ ++ return; ++ } ++ ++ ext_debug(tree, "binsearch for %d: ", block); + -+ inode = tree->inode; -+ depth = EXT_DEPTH(tree); -+ if (path && depth > 0) { -+ goal = path[depth-1].p_block; -+ } else { -+ struct ext3_inode_info *ei = EXT3_I(inode); -+ unsigned long bg_start; -+ unsigned long colour; ++ path->p_ext = ex = EXT_FIRST_EXTENT(eh); + -+ bg_start = (ei->i_block_group * -+ EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + -+ le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); -+ colour = (current->pid % 16) * -+ (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); -+ goal = bg_start + colour; ++ r = k = eh->eh_entries; ++ while (k > 1) { ++ k = (r - l) / 2; ++ if (block < ex[l + k].ee_block) ++ r -= k; ++ else ++ l += k; ++ ext_debug(tree, "%d:%d:%d ", k, l, r); + } + -+ newblock = ext3_new_block(handle, inode, goal, err); -+ return newblock; -+} ++ ex += l; ++ path->p_ext = ex; ++ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, ++ path->p_ext->ee_start, path->p_ext->ee_len); + -+static inline void ext3_ext_tree_changed(struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *neh; -+ neh = EXT_ROOT_HDR(tree); -+ neh->eh_generation++; -+} ++ while (l++ < r) { ++ if (block < ex->ee_block) ++ break; ++ path->p_ext = ex++; ++ } ++ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, ++ path->p_ext->ee_start, path->p_ext->ee_len); + -+static inline int ext3_ext_space_block(struct ext3_extents_tree *tree) -+{ -+ int size; ++#ifdef CHECK_BINSEARCH ++ { ++ struct ext3_extent *chex; + -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 6; ++ chex = ex = EXT_FIRST_EXTENT(eh); ++ for (k = 0; k < eh->eh_entries; k++, ex++) { ++ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); ++ if (block < ex->ee_block) ++ break; ++ chex = ex; ++ } ++ EXT_ASSERT(chex == path->p_ext); ++ } +#endif -+ return size; +} + -+static inline int ext3_ext_space_block_idx(struct ext3_extents_tree *tree) ++int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) +{ -+ int size; -+ -+ size = (tree->inode->i_sb->s_blocksize - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 5; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent); -+#ifdef AGRESSIVE_TEST -+ size = 3; -+#endif -+ return size; -+} -+ -+static inline int ext3_ext_space_root_idx(struct ext3_extents_tree *tree) -+{ -+ int size; -+ -+ size = (tree->buffer_len - -+ sizeof(struct ext3_extent_header)) -+ / sizeof(struct ext3_extent_idx); -+#ifdef AGRESSIVE_TEST -+ size = 4; -+#endif -+ return size; -+} -+ -+static void ext3_ext_show_path(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int k, l = path->p_depth; -+ -+ ext_debug(tree, "path:"); -+ for (k = 0; k <= l; k++, path++) { -+ if (path->p_idx) { -+ ext_debug(tree, " %d->%d", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ } else if (path->p_ext) { -+ ext_debug(tree, " %d:%d:%d", -+ path->p_ext->ee_block, -+ path->p_ext->ee_len, -+ path->p_ext->ee_start); -+ } else -+ ext_debug(tree, " []"); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_show_leaf(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) -+{ -+#ifdef EXT_DEBUG -+ int depth = EXT_DEPTH(tree); -+ struct ext3_extent_header *eh; -+ struct ext3_extent *ex; -+ int i; -+ -+ if (!path) -+ return; -+ -+ eh = path[depth].p_hdr; -+ ex = EXT_FIRST_EXTENT(eh); -+ -+ for (i = 0; i < eh->eh_entries; i++, ex++) { -+ ext_debug(tree, "%d:%d:%d ", -+ ex->ee_block, ex->ee_len, ex->ee_start); -+ } -+ ext_debug(tree, "\n"); -+#endif -+} -+ -+static void ext3_ext_drop_refs(struct ext3_ext_path *path) -+{ -+ int depth = path->p_depth; -+ int i; -+ -+ for (i = 0; i <= depth; i++, path++) -+ if (path->p_bh) { -+ brelse(path->p_bh); -+ path->p_bh = NULL; -+ } -+} -+ -+/* -+ * binary search for closest index by given block -+ */ -+static inline void -+ext3_ext_binsearch_idx(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent_idx *ix; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ EXT_ASSERT(eh->eh_entries > 0); -+ -+ ext_debug(tree, "binsearch for %d(idx): ", block); -+ -+ path->p_idx = ix = EXT_FIRST_INDEX(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ix[l + k].ei_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ix += l; -+ path->p_idx = ix; -+ ext_debug(tree, " -> %d->%d ", path->p_idx->ei_block, path->p_idx->ei_leaf); -+ -+ while (l++ < r) { -+ if (block < ix->ei_block) -+ break; -+ path->p_idx = ix++; -+ } -+ ext_debug(tree, " -> %d->%d\n", path->p_idx->ei_block, -+ path->p_idx->ei_leaf); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent_idx *chix; -+ -+ chix = ix = EXT_FIRST_INDEX(eh); -+ for (k = 0; k < eh->eh_entries; k++, ix++) { -+ if (k != 0 && ix->ei_block <= ix[-1].ei_block) { -+ printk("k=%d, ix=0x%p, first=0x%p\n", k, -+ ix, EXT_FIRST_INDEX(eh)); -+ printk("%u <= %u\n", -+ ix->ei_block,ix[-1].ei_block); -+ } -+ EXT_ASSERT(k == 0 || ix->ei_block > ix[-1].ei_block); -+ if (block < ix->ei_block) -+ break; -+ chix = ix; -+ } -+ EXT_ASSERT(chix == path->p_idx); -+ } -+#endif -+ -+} -+ -+/* -+ * binary search for closest extent by given block -+ */ -+static inline void -+ext3_ext_binsearch(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, int block) -+{ -+ struct ext3_extent_header *eh = path->p_hdr; -+ struct ext3_extent *ex; -+ int l = 0, k, r; -+ -+ EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(eh->eh_entries <= eh->eh_max); -+ -+ if (eh->eh_entries == 0) { -+ /* -+ * this leaf is empty yet: -+ * we get such a leaf in split/add case -+ */ -+ return; -+ } -+ -+ ext_debug(tree, "binsearch for %d: ", block); -+ -+ path->p_ext = ex = EXT_FIRST_EXTENT(eh); -+ -+ r = k = eh->eh_entries; -+ while (k > 1) { -+ k = (r - l) / 2; -+ if (block < ex[l + k].ee_block) -+ r -= k; -+ else -+ l += k; -+ ext_debug(tree, "%d:%d:%d ", k, l, r); -+ } -+ -+ ex += l; -+ path->p_ext = ex; -+ ext_debug(tree, " -> %d:%d:%d ", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+ while (l++ < r) { -+ if (block < ex->ee_block) -+ break; -+ path->p_ext = ex++; -+ } -+ ext_debug(tree, " -> %d:%d:%d\n", path->p_ext->ee_block, -+ path->p_ext->ee_start, path->p_ext->ee_len); -+ -+#ifdef CHECK_BINSEARCH -+ { -+ struct ext3_extent *chex; -+ -+ chex = ex = EXT_FIRST_EXTENT(eh); -+ for (k = 0; k < eh->eh_entries; k++, ex++) { -+ EXT_ASSERT(k == 0 || ex->ee_block > ex[-1].ee_block); -+ if (block < ex->ee_block) -+ break; -+ chex = ex; -+ } -+ EXT_ASSERT(chex == path->p_ext); -+ } -+#endif -+ -+} -+ -+int ext3_extent_tree_init(handle_t *handle, struct ext3_extents_tree *tree) -+{ -+ struct ext3_extent_header *eh; ++ struct ext3_extent_header *eh; + + BUG_ON(tree->buffer_len == 0); + ext3_ext_get_access_for_root(handle, tree); @@ -902,7 +437,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + +struct ext3_ext_path * +ext3_ext_find_extent(struct ext3_extents_tree *tree, int block, -+ struct ext3_ext_path *path) ++ struct ext3_ext_path *path) +{ + struct ext3_extent_header *eh; + struct buffer_head *bh; @@ -914,15 +449,17 @@ Index: linux-2.6.10/fs/ext3/extents.c + + eh = EXT_ROOT_HDR(tree); + EXT_ASSERT(eh); ++ if (ext3_ext_check_header(eh)) ++ goto err; ++ + i = depth = EXT_DEPTH(tree); + EXT_ASSERT(eh->eh_max); + EXT_ASSERT(eh->eh_magic == EXT3_EXT_MAGIC); -+ EXT_ASSERT(i == 0 || eh->eh_entries > 0); + + /* account possible depth increase */ + if (!path) { + path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 2), -+ GFP_NOFS); ++ GFP_NOFS); + if (!path) + return ERR_PTR(-ENOMEM); + } @@ -932,29 +469,34 @@ Index: linux-2.6.10/fs/ext3/extents.c + /* walk through the tree */ + while (i) { + ext_debug(tree, "depth %d: num %d, max %d\n", -+ ppos, eh->eh_entries, eh->eh_max); ++ ppos, eh->eh_entries, eh->eh_max); + ext3_ext_binsearch_idx(tree, path + ppos, block); + path[ppos].p_block = path[ppos].p_idx->ei_leaf; + path[ppos].p_depth = i; + path[ppos].p_ext = NULL; + + bh = sb_bread(tree->inode->i_sb, path[ppos].p_block); -+ if (!bh) { -+ ext3_ext_drop_refs(path); -+ kfree(path); -+ return ERR_PTR(-EIO); -+ } ++ if (!bh) ++ goto err; ++ + eh = EXT_BLOCK_HDR(bh); + ppos++; + EXT_ASSERT(ppos <= depth); + path[ppos].p_bh = bh; + path[ppos].p_hdr = eh; + i--; ++ ++ if (ext3_ext_check_header(eh)) ++ goto err; + } + + path[ppos].p_depth = i; + path[ppos].p_hdr = eh; + path[ppos].p_ext = NULL; ++ path[ppos].p_idx = NULL; ++ ++ if (ext3_ext_check_header(eh)) ++ goto err; + + /* find extent */ + ext3_ext_binsearch(tree, path + ppos, block); @@ -962,6 +504,12 @@ Index: linux-2.6.10/fs/ext3/extents.c + ext3_ext_show_path(tree, path); + + return path; ++ ++err: ++ printk(KERN_ERR "EXT3-fs: header is corrupted!\n"); ++ ext3_ext_drop_refs(path); ++ kfree(path); ++ return ERR_PTR(-EIO); +} + +/* @@ -969,9 +517,9 @@ Index: linux-2.6.10/fs/ext3/extents.c + * it check where to insert: before curp or after curp + */ +static int ext3_ext_insert_index(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *curp, -+ int logical, int ptr) ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *curp, ++ int logical, int ptr) +{ + struct ext3_extent_idx *ix; + int len, err; @@ -987,9 +535,9 @@ Index: linux-2.6.10/fs/ext3/extents.c + len = (len - 1) * sizeof(struct ext3_extent_idx); + len = len < 0 ? 0 : len; + ext_debug(tree, "insert new index %d after: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ (curp->p_idx + 1), (curp->p_idx + 2)); ++ "move %d from 0x%p to 0x%p\n", ++ logical, ptr, len, ++ (curp->p_idx + 1), (curp->p_idx + 2)); + memmove(curp->p_idx + 2, curp->p_idx + 1, len); + } + ix = curp->p_idx + 1; @@ -998,9 +546,9 @@ Index: linux-2.6.10/fs/ext3/extents.c + len = len * sizeof(struct ext3_extent_idx); + len = len < 0 ? 0 : len; + ext_debug(tree, "insert new index %d before: %d. " -+ "move %d from 0x%p to 0x%p\n", -+ logical, ptr, len, -+ curp->p_idx, (curp->p_idx + 1)); ++ "move %d from 0x%p to 0x%p\n", ++ logical, ptr, len, ++ curp->p_idx, (curp->p_idx + 1)); + memmove(curp->p_idx + 1, curp->p_idx, len); + ix = curp->p_idx; + } @@ -1028,8 +576,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + * - initialize subtree + */ +static int ext3_ext_split(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext, int at) ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext, int at) +{ + struct buffer_head *bh = NULL; + int depth = EXT_DEPTH(tree); @@ -1050,13 +598,13 @@ Index: linux-2.6.10/fs/ext3/extents.c + if (path[depth].p_ext != EXT_MAX_EXTENT(path[depth].p_hdr)) { + border = path[depth].p_ext[1].ee_block; + ext_debug(tree, "leaf will be splitted." -+ " next leaf starts at %d\n", -+ (int)border); ++ " next leaf starts at %d\n", ++ (int)border); + } else { + border = newext->ee_block; + ext_debug(tree, "leaf will be added." -+ " next leaf starts at %d\n", -+ (int)border); ++ " next leaf starts at %d\n", ++ (int)border); + } + + /* @@ -1114,12 +662,11 @@ Index: linux-2.6.10/fs/ext3/extents.c + while (path[depth].p_ext <= + EXT_MAX_EXTENT(path[depth].p_hdr)) { + ext_debug(tree, "move %d:%d:%d in new leaf %lu\n", -+ path[depth].p_ext->ee_block, -+ path[depth].p_ext->ee_start, -+ path[depth].p_ext->ee_len, -+ newblock); -+ memmove(ex++, path[depth].p_ext++, -+ sizeof(struct ext3_extent)); ++ path[depth].p_ext->ee_block, ++ path[depth].p_ext->ee_start, ++ path[depth].p_ext->ee_len, ++ newblock); ++ memmove(ex++, path[depth].p_ext++, sizeof(struct ext3_extent)); + neh->eh_entries++; + m++; + } @@ -1172,21 +719,21 @@ Index: linux-2.6.10/fs/ext3/extents.c + fidx->ei_leaf = oldblock; + + ext_debug(tree, "int.index at %d (block %lu): %lu -> %lu\n", -+ i, newblock, border, oldblock); ++ i, newblock, border, oldblock); + /* copy indexes */ + m = 0; + path[i].p_idx++; + + ext_debug(tree, "cur 0x%p, last 0x%p\n", path[i].p_idx, -+ EXT_MAX_INDEX(path[i].p_hdr)); ++ EXT_MAX_INDEX(path[i].p_hdr)); + EXT_ASSERT(EXT_MAX_INDEX(path[i].p_hdr) == -+ EXT_LAST_INDEX(path[i].p_hdr)); ++ EXT_LAST_INDEX(path[i].p_hdr)); + while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) { + ext_debug(tree, "%d: move %d:%d in new index %lu\n", -+ i, path[i].p_idx->ei_block, -+ path[i].p_idx->ei_leaf, newblock); ++ i, path[i].p_idx->ei_block, ++ path[i].p_idx->ei_leaf, newblock); + memmove(++fidx, path[i].p_idx++, -+ sizeof(struct ext3_extent_idx)); ++ sizeof(struct ext3_extent_idx)); + neh->eh_entries++; + EXT_ASSERT(neh->eh_entries <= neh->eh_max); + m++; @@ -1216,7 +763,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + /* insert new index */ + if (!err) + err = ext3_ext_insert_index(handle, tree, path + at, -+ border, newblock); ++ border, newblock); + +cleanup: + if (bh) { @@ -1246,9 +793,9 @@ Index: linux-2.6.10/fs/ext3/extents.c + * just created block + */ +static int ext3_ext_grow_indepth(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) +{ + struct ext3_ext_path *curp = path; + struct ext3_extent_header *neh; @@ -1280,7 +827,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + /* set size of new block */ + neh = EXT_BLOCK_HDR(bh); + /* old root could have indexes or leaves -+ * so calculate e_max right way */ ++ * so calculate eh_max right way */ + if (EXT_DEPTH(tree)) + neh->eh_max = ext3_ext_space_block_idx(tree); + else @@ -1307,7 +854,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + neh = EXT_ROOT_HDR(tree); + fidx = EXT_FIRST_INDEX(neh); + ext_debug(tree, "new root: num %d(%d), lblock %d, ptr %d\n", -+ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); ++ neh->eh_entries, neh->eh_max, fidx->ei_block, fidx->ei_leaf); + + neh->eh_depth = path->p_depth + 1; + err = ext3_ext_dirty(handle, tree, curp); @@ -1322,9 +869,9 @@ Index: linux-2.6.10/fs/ext3/extents.c + * then it requests in-depth growing + */ +static int ext3_ext_create_new_leaf(handle_t *handle, -+ struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) ++ struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) +{ + struct ext3_ext_path *curp; + int depth, i, err = 0; @@ -1400,12 +947,12 @@ Index: linux-2.6.10/fs/ext3/extents.c + if (depth == path->p_depth) { + /* leaf */ + if (path[depth].p_ext != -+ EXT_LAST_EXTENT(path[depth].p_hdr)) ++ EXT_LAST_EXTENT(path[depth].p_hdr)) + return path[depth].p_ext[1].ee_block; + } else { + /* index */ + if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) ++ EXT_LAST_INDEX(path[depth].p_hdr)) + return path[depth].p_idx[1].ei_block; + } + depth--; @@ -1418,7 +965,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + * returns first allocated block from next leaf or EXT_MAX_BLOCK + */ +static unsigned ext3_ext_next_leaf_block(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) ++ struct ext3_ext_path *path) +{ + int depth; + @@ -1434,7 +981,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + + while (depth >= 0) { + if (path[depth].p_idx != -+ EXT_LAST_INDEX(path[depth].p_hdr)) ++ EXT_LAST_INDEX(path[depth].p_hdr)) + return path[depth].p_idx[1].ei_block; + depth--; + } @@ -1448,7 +995,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + * TODO: do we need to correct tree in all cases? + */ +int ext3_ext_correct_indexes(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) ++ struct ext3_ext_path *path) +{ + struct ext3_extent_header *eh; + int depth = EXT_DEPTH(tree); @@ -1498,8 +1045,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + +static int inline +ext3_can_extents_be_merged(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex1, -+ struct ext3_extent *ex2) ++ struct ext3_extent *ex1, ++ struct ext3_extent *ex2) +{ + if (ex1->ee_block + ex1->ee_len != ex2->ee_block) + return 0; @@ -1521,8 +1068,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + * creating new leaf in no-space case + */ +int ext3_ext_insert_extent(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newext) ++ struct ext3_ext_path *path, ++ struct ext3_extent *newext) +{ + struct ext3_extent_header * eh; + struct ext3_extent *ex, *fex; @@ -1531,7 +1078,6 @@ Index: linux-2.6.10/fs/ext3/extents.c + int depth, len, err, next; + + EXT_ASSERT(newext->ee_len > 0); -+ EXT_ASSERT(newext->ee_len < EXT_CACHE_MARK); + depth = EXT_DEPTH(tree); + ex = path[depth].p_ext; + EXT_ASSERT(path[depth].p_hdr); @@ -1539,8 +1085,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + /* try to insert block into found extent and return */ + if (ex && ext3_can_extents_be_merged(tree, ex, newext)) { + ext_debug(tree, "append %d block to %d:%d (from %d)\n", -+ newext->ee_len, ex->ee_block, ex->ee_len, -+ ex->ee_start); ++ newext->ee_len, ex->ee_block, ex->ee_len, ++ ex->ee_start); + if ((err = ext3_ext_get_access(handle, tree, path + depth))) + return err; + ex->ee_len += newext->ee_len; @@ -1568,12 +1114,12 @@ Index: linux-2.6.10/fs/ext3/extents.c + eh = npath[depth].p_hdr; + if (eh->eh_entries < eh->eh_max) { + ext_debug(tree, "next leaf isnt full(%d)\n", -+ eh->eh_entries); ++ eh->eh_entries); + path = npath; + goto repeat; + } + ext_debug(tree, "next leaf hasno free space(%d,%d)\n", -+ eh->eh_entries, eh->eh_max); ++ eh->eh_entries, eh->eh_max); + } + + /* @@ -1595,8 +1141,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + if (!nearex) { + /* there is no extent in this leaf, create first one */ + ext_debug(tree, "first extent in the leaf: %d:%d:%d\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len); ++ newext->ee_block, newext->ee_start, ++ newext->ee_len); + path[depth].p_ext = EXT_FIRST_EXTENT(eh); + } else if (newext->ee_block > nearex->ee_block) { + EXT_ASSERT(newext->ee_block != nearex->ee_block); @@ -1605,10 +1151,10 @@ Index: linux-2.6.10/fs/ext3/extents.c + len = (len - 1) * sizeof(struct ext3_extent); + len = len < 0 ? 0 : len; + ext_debug(tree, "insert %d:%d:%d after: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, -+ newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); ++ "move %d from 0x%p to 0x%p\n", ++ newext->ee_block, newext->ee_start, ++ newext->ee_len, ++ nearex, len, nearex + 1, nearex + 2); + memmove(nearex + 2, nearex + 1, len); + } + path[depth].p_ext = nearex + 1; @@ -1617,9 +1163,9 @@ Index: linux-2.6.10/fs/ext3/extents.c + len = (EXT_MAX_EXTENT(eh) - nearex) * sizeof(struct ext3_extent); + len = len < 0 ? 0 : len; + ext_debug(tree, "insert %d:%d:%d before: nearest 0x%p, " -+ "move %d from 0x%p to 0x%p\n", -+ newext->ee_block, newext->ee_start, newext->ee_len, -+ nearex, len, nearex + 1, nearex + 2); ++ "move %d from 0x%p to 0x%p\n", ++ newext->ee_block, newext->ee_start, newext->ee_len, ++ nearex, len, nearex + 1, nearex + 2); + memmove(nearex + 1, nearex, len); + path[depth].p_ext = nearex; + } @@ -1640,8 +1186,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + /* merge with next extent! */ + nearex->ee_len += nearex[1].ee_len; + if (nearex + 1 < EXT_LAST_EXTENT(eh)) { -+ len = (EXT_LAST_EXTENT(eh) - nearex - 1) -+ * sizeof(struct ext3_extent); ++ len = (EXT_LAST_EXTENT(eh) - nearex - 1) * ++ sizeof(struct ext3_extent); + memmove(nearex + 1, nearex + 2, len); + } + eh->eh_entries--; @@ -1671,7 +1217,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + unsigned long num, ext_prepare_callback func) +{ + struct ext3_ext_path *path = NULL; -+ struct ext3_extent *ex, cbex; ++ struct ext3_ext_cache cbex; ++ struct ext3_extent *ex; + unsigned long next, start = 0, end = 0; + unsigned long last = block + num; + int depth, exists, err = 0; @@ -1730,14 +1277,20 @@ Index: linux-2.6.10/fs/ext3/extents.c + EXT_ASSERT(end > start); + + if (!exists) { -+ cbex.ee_block = start; -+ cbex.ee_len = end - start; -+ cbex.ee_start = 0; -+ } else -+ cbex = *ex; ++ cbex.ec_block = start; ++ cbex.ec_len = end - start; ++ cbex.ec_start = 0; ++ cbex.ec_type = EXT3_EXT_CACHE_GAP; ++ } else { ++ cbex.ec_block = ex->ee_block; ++ cbex.ec_len = ex->ee_len; ++ cbex.ec_start = ex->ee_start; ++ cbex.ec_type = EXT3_EXT_CACHE_EXTENT; ++ } + ++ EXT_ASSERT(cbex.ec_len > 0); + EXT_ASSERT(path[depth].p_hdr); -+ err = func(tree, path, &cbex, exists); ++ err = func(tree, path, &cbex); + ext3_ext_drop_refs(path); + + if (err < 0) @@ -1755,7 +1308,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + path = NULL; + } + -+ block = cbex.ee_block + cbex.ee_len; ++ block = cbex.ec_block + cbex.ec_len; + } + + if (path) { @@ -1767,14 +1320,15 @@ Index: linux-2.6.10/fs/ext3/extents.c +} + +static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, struct ext3_extent *ex) ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, ++ __u32 len, __u32 start, int type) +{ ++ EXT_ASSERT(len > 0); + if (tree->cex) { -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_len); -+ tree->cex->ee_block = ex->ee_block; -+ tree->cex->ee_start = ex->ee_start; -+ tree->cex->ee_len = ex->ee_len; ++ tree->cex->ec_type = type; ++ tree->cex->ec_block = block; ++ tree->cex->ec_len = len; ++ tree->cex->ec_start = start; + } +} + @@ -1784,11 +1338,12 @@ Index: linux-2.6.10/fs/ext3/extents.c + */ +static inline void +ext3_ext_put_gap_in_cache(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ unsigned long block) ++ struct ext3_ext_path *path, ++ unsigned long block) +{ + int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex, gex; ++ unsigned long lblock, len; ++ struct ext3_extent *ex; + + if (!tree->cex) + return; @@ -1796,63 +1351,64 @@ Index: linux-2.6.10/fs/ext3/extents.c + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ -+ gex.ee_block = 0; -+ gex.ee_len = EXT_CACHE_MARK; ++ lblock = 0; ++ len = EXT_MAX_BLOCK; + ext_debug(tree, "cache gap(whole file):"); + } else if (block < ex->ee_block) { -+ gex.ee_block = block; -+ gex.ee_len = ex->ee_block - block; ++ lblock = block; ++ len = ex->ee_block - block; + ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len); ++ (unsigned long) block, ++ (unsigned long) ex->ee_block, ++ (unsigned long) ex->ee_len); + } else if (block >= ex->ee_block + ex->ee_len) { -+ gex.ee_block = ex->ee_block + ex->ee_len; -+ gex.ee_len = ext3_ext_next_allocated_block(path); ++ lblock = ex->ee_block + ex->ee_len; ++ len = ext3_ext_next_allocated_block(path); + ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) block); -+ EXT_ASSERT(gex.ee_len > gex.ee_block); -+ gex.ee_len = gex.ee_len - gex.ee_block; ++ (unsigned long) ex->ee_block, ++ (unsigned long) ex->ee_len, ++ (unsigned long) block); ++ EXT_ASSERT(len > lblock); ++ len = len - lblock; + } else { ++ lblock = len = 0; + BUG(); + } + -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) gex.ee_block, -+ (unsigned long) gex.ee_len); -+ gex.ee_start = EXT_CACHE_MARK; -+ ext3_ext_put_in_cache(tree, &gex); ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); ++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); +} + +static inline int +ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, -+ struct ext3_extent *ex) ++ struct ext3_extent *ex) +{ -+ struct ext3_extent *cex = tree->cex; ++ struct ext3_ext_cache *cex = tree->cex; + + /* is there cache storage at all? */ + if (!cex) -+ return 0; ++ return EXT3_EXT_CACHE_NO; + + /* has cache valid data? */ -+ if (cex->ee_len == 0) -+ return 0; -+ -+ if (block >= cex->ee_block && block < cex->ee_block + cex->ee_len) { -+ ex->ee_block = cex->ee_block; -+ ex->ee_start = cex->ee_start; -+ ex->ee_len = cex->ee_len; ++ if (cex->ec_type == EXT3_EXT_CACHE_NO) ++ return EXT3_EXT_CACHE_NO; ++ ++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || ++ cex->ec_type == EXT3_EXT_CACHE_EXTENT); ++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ ex->ee_block = cex->ec_block; ++ ex->ee_start = cex->ec_start; ++ ex->ee_len = cex->ec_len; + ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", -+ (unsigned long) block, -+ (unsigned long) ex->ee_block, -+ (unsigned long) ex->ee_len, -+ (unsigned long) ex->ee_start); -+ return 1; ++ (unsigned long) block, ++ (unsigned long) ex->ee_block, ++ (unsigned long) ex->ee_len, ++ (unsigned long) ex->ee_start); ++ return cex->ec_type; + } + + /* not in cache */ -+ return 0; ++ return EXT3_EXT_CACHE_NO; +} + +/* @@ -1861,7 +1417,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + * last index in the block only + */ +int ext3_ext_rm_idx(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) ++ struct ext3_ext_path *path) +{ + struct buffer_head *bh; + int err; @@ -1875,7 +1431,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + if ((err = ext3_ext_dirty(handle, tree, path))) + return err; + ext_debug(tree, "index is empty, remove it, free block %d\n", -+ path->p_idx->ei_leaf); ++ path->p_idx->ei_leaf); + bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); + ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); + ext3_free_blocks(handle, tree->inode, path->p_idx->ei_leaf, 1); @@ -1883,7 +1439,7 @@ Index: linux-2.6.10/fs/ext3/extents.c +} + +int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path) ++ struct ext3_ext_path *path) +{ + int depth = EXT_DEPTH(tree); + int needed; @@ -1920,8 +1476,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + +static int +ext3_ext_split_for_rm(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) ++ struct ext3_ext_path *path, unsigned long start, ++ unsigned long end) +{ + struct ext3_extent *ex, tex; + struct ext3_ext_path *npath; @@ -1955,7 +1511,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + /* FIXME: some callback to free underlying resource + * and correct ee_start? */ + ext_debug(tree, "split extent: head %u:%u, tail %u:%u\n", -+ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); ++ ex->ee_block, ex->ee_len, tex.ee_block, tex.ee_len); + + npath = ext3_ext_find_extent(tree, ex->ee_block, NULL); + if (IS_ERR(npath)) @@ -1969,13 +1525,12 @@ Index: linux-2.6.10/fs/ext3/extents.c + kfree(npath); + + return err; -+ +} + +static int +ext3_ext_rm_leaf(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, unsigned long start, -+ unsigned long end) ++ struct ext3_ext_path *path, unsigned long start, ++ unsigned long end) +{ + struct ext3_extent *ex, *fu = NULL, *lu, *le; + int err = 0, correct_index = 0; @@ -2008,8 +1563,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + } + + lu = ex; -+ while (ex >= EXT_FIRST_EXTENT(eh) && -+ ex->ee_block + ex->ee_len > start) { ++ while (ex >= EXT_FIRST_EXTENT(eh) && ex->ee_block + ex->ee_len > start) { + ext_debug(tree, "remove ext %u:%u\n", ex->ee_block, ex->ee_len); + path[depth].p_ext = ex; + @@ -2036,7 +1590,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + block = ex->ee_block; + num = 0; + EXT_ASSERT(a == ex->ee_block && -+ b == ex->ee_block + ex->ee_len - 1); ++ b == ex->ee_block + ex->ee_len - 1); + } + + if (ex == EXT_FIRST_EXTENT(eh)) @@ -2078,7 +1632,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + goto out; + + ext_debug(tree, "new extent: %u:%u:%u\n", -+ ex->ee_block, ex->ee_len, ex->ee_start); ++ ex->ee_block, ex->ee_len, ex->ee_start); + ex--; + } + @@ -2142,7 +1696,7 @@ Index: linux-2.6.10/fs/ext3/extents.c +} + +int ext3_ext_remove_space(struct ext3_extents_tree *tree, -+ unsigned long start, unsigned long end) ++ unsigned long start, unsigned long end) +{ + struct inode *inode = tree->inode; + struct super_block *sb = inode->i_sb; @@ -2166,8 +1720,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + */ + path = kmalloc(sizeof(struct ext3_ext_path) * (depth + 1), GFP_KERNEL); + if (IS_ERR(path)) { -+ ext3_error(sb, "ext3_ext_remove_space", -+ "Can't allocate path array"); ++ ext3_error(sb, __FUNCTION__, "Can't allocate path array"); + ext3_journal_stop(handle); + return -ENOMEM; + } @@ -2199,19 +1752,19 @@ Index: linux-2.6.10/fs/ext3/extents.c + ext3_ext_last_covered(path[i].p_hdr, end); + path[i].p_block = path[i].p_hdr->eh_entries + 1; + ext_debug(tree, "init index ptr: hdr 0x%p, num %d\n", -+ path[i].p_hdr, path[i].p_hdr->eh_entries); ++ path[i].p_hdr, path[i].p_hdr->eh_entries); + } else { + /* we've already was here, see at next index */ + path[i].p_idx--; + } + + ext_debug(tree, "level %d - index, first 0x%p, cur 0x%p\n", -+ i, EXT_FIRST_INDEX(path[i].p_hdr), -+ path[i].p_idx); ++ i, EXT_FIRST_INDEX(path[i].p_hdr), ++ path[i].p_idx); + if (ext3_ext_more_to_rm(path + i)) { + /* go to the next level */ + ext_debug(tree, "move to level %d (block %d)\n", -+ i + 1, path[i].p_idx->ei_leaf); ++ i + 1, path[i].p_idx->ei_leaf); + memset(path + i + 1, 0, sizeof(*path)); + path[i+1].p_bh = sb_bread(sb, path[i].p_idx->ei_leaf); + if (!path[i+1].p_bh) { @@ -2334,7 +1887,7 @@ Index: linux-2.6.10/fs/ext3/extents.c +} + +static int ext3_ext_mergable(struct ext3_extent *ex1, -+ struct ext3_extent *ex2) ++ struct ext3_extent *ex2) +{ + /* FIXME: support for large fs */ + if (ex1->ee_start + ex1->ee_len == ex2->ee_start) @@ -2344,8 +1897,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + +static int +ext3_remove_blocks_credits(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) ++ struct ext3_extent *ex, ++ unsigned long from, unsigned long to) +{ + int needed; + @@ -2360,8 +1913,8 @@ Index: linux-2.6.10/fs/ext3/extents.c + +static int +ext3_remove_blocks(struct ext3_extents_tree *tree, -+ struct ext3_extent *ex, -+ unsigned long from, unsigned long to) ++ struct ext3_extent *ex, ++ unsigned long from, unsigned long to) +{ + int needed = ext3_remove_blocks_credits(tree, ex, from, to); + handle_t *handle = ext3_journal_start(tree->inode, needed); @@ -2376,7 +1929,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + num = ex->ee_block + ex->ee_len - from; + start = ex->ee_start + ex->ee_len - num; + ext_debug(tree, "free last %lu blocks starting %lu\n", -+ num, start); ++ num, start); + for (i = 0; i < num; i++) { + bh = sb_find_get_block(tree->inode->i_sb, start + i); + ext3_forget(handle, 0, tree->inode, bh, start + i); @@ -2384,17 +1937,17 @@ Index: linux-2.6.10/fs/ext3/extents.c + ext3_free_blocks(handle, tree->inode, start, num); + } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { + printk("strange request: removal %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); ++ from, to, ex->ee_block, ex->ee_len); + } else { + printk("strange request: removal(2) %lu-%lu from %u:%u\n", -+ from, to, ex->ee_block, ex->ee_len); ++ from, to, ex->ee_block, ex->ee_len); + } + ext3_journal_stop(handle); + return 0; +} + +static int ext3_ext_find_goal(struct inode *inode, -+ struct ext3_ext_path *path, unsigned long block) ++ struct ext3_ext_path *path, unsigned long block) +{ + struct ext3_inode_info *ei = EXT3_I(inode); + unsigned long bg_start; @@ -2424,8 +1977,8 @@ Index: linux-2.6.10/fs/ext3/extents.c +} + +static int ext3_new_block_cb(handle_t *handle, struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int *err) ++ struct ext3_ext_path *path, ++ struct ext3_extent *ex, int *err) +{ + struct inode *inode = tree->inode; + int newblock, goal; @@ -2462,19 +2015,19 @@ Index: linux-2.6.10/fs/ext3/extents.c +}; + +void ext3_init_tree_desc(struct ext3_extents_tree *tree, -+ struct inode *inode) ++ struct inode *inode) +{ + tree->inode = inode; + tree->root = (void *) EXT3_I(inode)->i_data; + tree->buffer = (void *) inode; + tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; ++ tree->cex = &EXT3_I(inode)->i_cached_extent; + tree->ops = &ext3_blockmap_helpers; +} + +int ext3_ext_get_block(handle_t *handle, struct inode *inode, -+ long iblock, struct buffer_head *bh_result, -+ int create, int extend_disksize) ++ long iblock, struct buffer_head *bh_result, ++ int create, int extend_disksize) +{ + struct ext3_ext_path *path = NULL; + struct ext3_extent newex; @@ -2482,26 +2035,27 @@ Index: linux-2.6.10/fs/ext3/extents.c + int goal, newblock, err = 0, depth; + struct ext3_extents_tree tree; + -+ clear_buffer_new(bh_result); ++ __clear_bit(BH_New, &bh_result->b_state); + ext3_init_tree_desc(&tree, inode); + ext_debug(&tree, "block %d requested for inode %u\n", -+ (int) iblock, (unsigned) inode->i_ino); ++ (int) iblock, (unsigned) inode->i_ino); + down(&EXT3_I(inode)->truncate_sem); + + /* check in cache */ -+ if (ext3_ext_in_cache(&tree, iblock, &newex)) { -+ if (newex.ee_start == EXT_CACHE_MARK) { -+ /* this is cached gap */ ++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { ++ if (goal == EXT3_EXT_CACHE_GAP) { + if (!create) { + /* block isn't allocated yet and + * user don't want to allocate it */ + goto out2; + } + /* we should allocate requested block */ -+ } else if (newex.ee_start) { ++ } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock - newex.ee_block + newex.ee_start; + goto out; ++ } else { ++ EXT_ASSERT(0); + } + } + @@ -2527,9 +2081,11 @@ Index: linux-2.6.10/fs/ext3/extents.c + if (iblock >= ex->ee_block && iblock < ex->ee_block + ex->ee_len) { + newblock = iblock - ex->ee_block + ex->ee_start; + ext_debug(&tree, "%d fit into %d:%d -> %d\n", -+ (int) iblock, ex->ee_block, ex->ee_len, -+ newblock); -+ ext3_ext_put_in_cache(&tree, ex); ++ (int) iblock, ex->ee_block, ex->ee_len, ++ newblock); ++ ext3_ext_put_in_cache(&tree, ex->ee_block, ++ ex->ee_len, ex->ee_start, ++ EXT3_EXT_CACHE_EXTENT); + goto out; + } + } @@ -2550,7 +2106,7 @@ Index: linux-2.6.10/fs/ext3/extents.c + if (!newblock) + goto out2; + ext_debug(&tree, "allocate new block: goal %d, found %d\n", -+ goal, newblock); ++ goal, newblock); + + /* try to insert new extent into found leaf and return */ + newex.ee_block = iblock; @@ -2565,12 +2121,15 @@ Index: linux-2.6.10/fs/ext3/extents.c + + /* previous routine could use block we allocated */ + newblock = newex.ee_start; -+ set_buffer_new(bh_result); ++ __set_bit(BH_New, &bh_result->b_state); + -+ ext3_ext_put_in_cache(&tree, &newex); ++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, ++ newex.ee_start, EXT3_EXT_CACHE_EXTENT); +out: + ext3_ext_show_leaf(&tree, path); -+ map_bh(bh_result, inode->i_sb, newblock); ++ __set_bit(BH_Mapped, &bh_result->b_state); ++ bh_result->b_bdev = inode->i_sb->s_bdev; ++ bh_result->b_blocknr = newblock; +out2: + if (path) { + ext3_ext_drop_refs(path); @@ -2625,222 +2184,752 @@ Index: linux-2.6.10/fs/ext3/extents.c + EXT3_I(inode)->i_disksize = inode->i_size; + ext3_mark_inode_dirty(handle, inode); + -+ last_block = (inode->i_size + sb->s_blocksize - 1) -+ >> EXT3_BLOCK_SIZE_BITS(sb); -+ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); -+ -+ /* In a multi-transaction truncate, we only make the final -+ * transaction synchronous */ -+ if (IS_SYNC(inode)) -+ handle->h_sync = 1; ++ last_block = (inode->i_size + sb->s_blocksize - 1) >> ++ EXT3_BLOCK_SIZE_BITS(sb); ++ err = ext3_ext_remove_space(&tree, last_block, EXT_MAX_BLOCK); ++ ++ /* In a multi-transaction truncate, we only make the final ++ * transaction synchronous */ ++ if (IS_SYNC(inode)) ++ handle->h_sync = 1; ++ ++out_stop: ++ /* ++ * If this was a simple ftruncate(), and the file will remain alive ++ * then we need to clear up the orphan record which we created above. ++ * However, if this was a real unlink then we were called by ++ * ext3_delete_inode(), and we allow that function to clean up the ++ * orphan info for us. ++ */ ++ if (inode->i_nlink) ++ ext3_orphan_del(handle, inode); ++ ++ up(&EXT3_I(inode)->truncate_sem); ++ ext3_journal_stop(handle); ++} ++ ++/* ++ * this routine calculate max number of blocks we could modify ++ * in order to allocate new block for an inode ++ */ ++int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) ++{ ++ struct ext3_extents_tree tree; ++ int needed; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ++ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); ++ ++ /* caller want to allocate num blocks */ ++ needed *= num; ++ ++#ifdef CONFIG_QUOTA ++ /* ++ * FIXME: real calculation should be here ++ * it depends on blockmap format of qouta file ++ */ ++ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; ++#endif ++ ++ return needed; ++} ++ ++void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) ++{ ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ ext3_extent_tree_init(handle, &tree); ++} ++ ++int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) ++{ ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ return ext3_ext_calc_metadata_amount(&tree, blocks); ++} ++ ++static int ++ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_ext_cache *newex) ++{ ++ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; ++ ++ if (newex->ec_type != EXT3_EXT_CACHE_EXTENT) ++ return EXT_CONTINUE; ++ ++ if (buf->err < 0) ++ return EXT_BREAK; ++ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) ++ return EXT_BREAK; ++ ++ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { ++ buf->err++; ++ buf->cur += sizeof(*newex); ++ } else { ++ buf->err = -EFAULT; ++ return EXT_BREAK; ++ } ++ return EXT_CONTINUE; ++} ++ ++static int ++ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, ++ struct ext3_ext_path *path, ++ struct ext3_ext_cache *ex) ++{ ++ struct ext3_extent_tree_stats *buf = ++ (struct ext3_extent_tree_stats *) tree->private; ++ int depth; ++ ++ if (ex->ec_type != EXT3_EXT_CACHE_EXTENT) ++ return EXT_CONTINUE; ++ ++ depth = EXT_DEPTH(tree); ++ buf->extents_num++; ++ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) ++ buf->leaf_num++; ++ return EXT_CONTINUE; ++} ++ ++int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, ++ unsigned long arg) ++{ ++ int err = 0; ++ ++ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) ++ return -EINVAL; ++ ++ if (cmd == EXT3_IOC_GET_EXTENTS) { ++ struct ext3_extent_buf buf; ++ struct ext3_extents_tree tree; ++ ++ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) ++ return -EFAULT; ++ ++ ext3_init_tree_desc(&tree, inode); ++ buf.cur = buf.buffer; ++ buf.err = 0; ++ tree.private = &buf; ++ down(&EXT3_I(inode)->truncate_sem); ++ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, ++ ext3_ext_store_extent_cb); ++ up(&EXT3_I(inode)->truncate_sem); ++ if (err == 0) ++ err = buf.err; ++ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { ++ struct ext3_extent_tree_stats buf; ++ struct ext3_extents_tree tree; ++ ++ ext3_init_tree_desc(&tree, inode); ++ down(&EXT3_I(inode)->truncate_sem); ++ buf.depth = EXT_DEPTH(&tree); ++ buf.extents_num = 0; ++ buf.leaf_num = 0; ++ tree.private = &buf; ++ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, ++ ext3_ext_collect_stats_cb); ++ up(&EXT3_I(inode)->truncate_sem); ++ if (!err) ++ err = copy_to_user((void *) arg, &buf, sizeof(buf)); ++ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { ++ struct ext3_extents_tree tree; ++ ext3_init_tree_desc(&tree, inode); ++ down(&EXT3_I(inode)->truncate_sem); ++ err = EXT_DEPTH(&tree); ++ up(&EXT3_I(inode)->truncate_sem); ++ } ++ ++ return err; ++} ++ ++EXPORT_SYMBOL(ext3_init_tree_desc); ++EXPORT_SYMBOL(ext3_mark_inode_dirty); ++EXPORT_SYMBOL(ext3_ext_invalidate_cache); ++EXPORT_SYMBOL(ext3_ext_insert_extent); ++EXPORT_SYMBOL(ext3_ext_walk_space); ++EXPORT_SYMBOL(ext3_ext_find_goal); ++EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); +Index: linux-2.6.5-sles9/fs/ext3/ialloc.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c 2005-02-23 01:01:52.366281264 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2005-02-23 01:02:37.398435336 +0300 +@@ -566,7 +566,7 @@ repeat: + ei->i_dir_start_lookup = 0; + ei->i_disksize = 0; + +- ei->i_flags = EXT3_I(dir)->i_flags & ~EXT3_INDEX_FL; ++ ei->i_flags = EXT3_I(dir)->i_flags & ~(EXT3_INDEX_FL|EXT3_EXTENTS_FL); + if (S_ISLNK(mode)) + ei->i_flags &= ~(EXT3_IMMUTABLE_FL|EXT3_APPEND_FL); + /* dirsync only applies to directories */ +@@ -647,6 +647,18 @@ + DQUOT_FREE_INODE(inode); + goto fail2; + } ++ if (test_opt(sb, EXTENTS) && S_ISREG(inode->i_mode)) { ++ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; ++ ext3_extents_initialize_blockmap(handle, inode); ++ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { ++ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); ++ if (err) goto fail; ++ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); ++ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); ++ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); ++ } ++ } ++ + err = ext3_mark_inode_dirty(handle, inode); + if (err) { + ext3_std_error(sb, err); +Index: linux-2.6.5-sles9/fs/ext3/inode.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2005-02-23 01:01:52.373280200 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2005-02-23 01:02:37.404434424 +0300 +@@ -796,6 +796,17 @@ + goto reread; + } + ++static inline int ++ext3_get_block_wrap(handle_t *handle, struct inode *inode, long block, ++ struct buffer_head *bh, int create, int extend_disksize) ++{ ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_get_block(handle, inode, block, bh, create, ++ extend_disksize); ++ return ext3_get_block_handle(handle, inode, block, bh, create, ++ extend_disksize); ++} ++ + static int ext3_get_block(struct inode *inode, sector_t iblock, + struct buffer_head *bh_result, int create) + { +@@ -806,8 +817,8 @@ + handle = ext3_journal_current_handle(); + J_ASSERT(handle != 0); + } +- ret = ext3_get_block_handle(handle, inode, iblock, +- bh_result, create, 1); ++ ret = ext3_get_block_wrap(handle, inode, iblock, ++ bh_result, create, 1); + return ret; + } + +@@ -833,8 +844,8 @@ + + get_block: + if (ret == 0) +- ret = ext3_get_block_handle(handle, inode, iblock, +- bh_result, create, 0); ++ ret = ext3_get_block_wrap(handle, inode, iblock, ++ bh_result, create, 0); + bh_result->b_size = (1 << inode->i_blkbits); + return ret; + } +@@ -855,7 +866,7 @@ + dummy.b_state = 0; + dummy.b_blocknr = -1000; + buffer_trace_init(&dummy.b_history); +- *errp = ext3_get_block_handle(handle, inode, block, &dummy, create, 1); ++ *errp = ext3_get_block_wrap(handle, inode, block, &dummy, create, 1); + if (!*errp && buffer_mapped(&dummy)) { + struct buffer_head *bh; + bh = sb_getblk(inode->i_sb, dummy.b_blocknr); +@@ -1587,7 +1598,7 @@ + * This required during truncate. We need to physically zero the tail end + * of that block so it doesn't yield old data if the file is later grown. + */ +-static int ext3_block_truncate_page(handle_t *handle, struct page *page, ++int ext3_block_truncate_page(handle_t *handle, struct page *page, + struct address_space *mapping, loff_t from) + { + unsigned long index = from >> PAGE_CACHE_SHIFT; +@@ -2083,6 +2094,9 @@ + return; + } + ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_truncate(inode, page); ++ + handle = start_transaction(inode); + if (IS_ERR(handle)) { + if (page) { +@@ -2789,6 +2803,9 @@ + int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; + int ret; + ++ if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) ++ return ext3_ext_writepage_trans_blocks(inode, bpp); ++ + if (ext3_should_journal_data(inode)) + ret = 3 * (bpp + indirects) + 2; + else +Index: linux-2.6.5-sles9/fs/ext3/Makefile +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2005-02-23 01:01:46.501172896 +0300 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2005-02-23 01:02:37.405434272 +0300 +@@ -5,7 +5,8 @@ + obj-$(CONFIG_EXT3_FS) += ext3.o + + ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ +- ioctl.o namei.o super.o symlink.o hash.o resize.o iopen.o ++ ioctl.o namei.o super.o symlink.o hash.o resize.o iopen.o \ ++ extents.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o + ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o +Index: linux-2.6.5-sles9/fs/ext3/super.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2005-02-23 01:02:34.072940888 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2005-02-23 01:47:15.291333736 +0300 +@@ -389,6 +389,7 @@ + struct ext3_super_block *es = sbi->s_es; + int i; + ++ ext3_ext_release(sb); + ext3_xattr_put_super(sb); + journal_destroy(sbi->s_journal); + if (!(sb->s_flags & MS_RDONLY)) { +@@ -447,7 +448,8 @@ + dynlock_init(&ei->i_htree_lock); + sema_init(&ei->i_rename_sem, 1); + sema_init(&ei->i_append_sem, 1); ++ memset(&ei->i_cached_extent, 0, sizeof(ei->i_cached_extent)); + return &ei->vfs_inode; + } + + static void ext3_destroy_inode(struct inode *inode) +@@ -537,6 +540,7 @@ + Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_pdirops, + Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, ++ Opt_extents, Opt_extdebug, + }; + + static match_table_t tokens = { +@@ -582,6 +585,8 @@ + {Opt_iopen, "iopen"}, + {Opt_noiopen, "noiopen"}, + {Opt_iopen_nopriv, "iopen_nopriv"}, ++ {Opt_extents, "extents"}, ++ {Opt_extdebug, "extdebug"}, + {Opt_err, NULL}, + {Opt_resize, "resize"}, + }; +@@ -797,6 +802,12 @@ + break; + case Opt_ignore: + break; ++ case Opt_extents: ++ set_opt (sbi->s_mount_opt, EXTENTS); ++ break; ++ case Opt_extdebug: ++ set_opt (sbi->s_mount_opt, EXTDEBUG); ++ break; + default: + printk (KERN_ERR + "EXT3-fs: Unrecognized mount option \"%s\" " +@@ -1449,6 +1460,8 @@ + percpu_counter_mod(&sbi->s_dirs_counter, + ext3_count_dirs(sb)); + ++ ext3_ext_init(sb); ++ + return 0; + + failed_mount3: +Index: linux-2.6.5-sles9/fs/ext3/ioctl.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c 2005-02-23 01:01:42.887722224 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ioctl.c 2005-02-23 01:02:37.412433208 +0300 +@@ -124,6 +124,10 @@ + err = ext3_change_inode_journal_flag(inode, jflag); + return err; + } ++ case EXT3_IOC_GET_EXTENTS: ++ case EXT3_IOC_GET_TREE_STATS: ++ case EXT3_IOC_GET_TREE_DEPTH: ++ return ext3_ext_ioctl(inode, filp, cmd, arg); + case EXT3_IOC_GETVERSION: + case EXT3_IOC_GETVERSION_OLD: + return put_user(inode->i_generation, (int *) arg); +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2005-02-23 01:02:35.823674736 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2005-02-23 01:02:37.414432904 +0300 +@@ -186,8 +186,9 @@ + #define EXT3_NOTAIL_FL 0x00008000 /* don't merge file tail */ + #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ + #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ ++#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ + #define EXT3_RESERVED_FL 0x80000000 /* reserved for ext3 lib */ + +-#define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ ++#define EXT3_FL_USER_VISIBLE 0x000BDFFF /* User visible flags */ + #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + +@@ -211,6 +212,9 @@ + #endif + #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) + #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) ++#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) ++#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) ++#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) + + /* + * Structure of an inode on the disk +@@ -333,6 +337,8 @@ + #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ + #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ + #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ ++#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ ++#define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ + + /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ + #ifndef clear_opt +@@ -503,11 +509,13 @@ + #define EXT3_FEATURE_INCOMPAT_RECOVER 0x0004 /* Needs recovery */ + #define EXT3_FEATURE_INCOMPAT_JOURNAL_DEV 0x0008 /* Journal device */ + #define EXT3_FEATURE_INCOMPAT_META_BG 0x0010 ++#define EXT3_FEATURE_INCOMPAT_EXTENTS 0x0040 /* extents support */ + + #define EXT3_FEATURE_COMPAT_SUPP EXT2_FEATURE_COMPAT_EXT_ATTR + #define EXT3_FEATURE_INCOMPAT_SUPP (EXT3_FEATURE_INCOMPAT_FILETYPE| \ + EXT3_FEATURE_INCOMPAT_RECOVER| \ +- EXT3_FEATURE_INCOMPAT_META_BG) ++ EXT3_FEATURE_INCOMPAT_META_BG| \ ++ EXT3_FEATURE_INCOMPAT_EXTENTS) + #define EXT3_FEATURE_RO_COMPAT_SUPP (EXT3_FEATURE_RO_COMPAT_SPARSE_SUPER| \ + EXT3_FEATURE_RO_COMPAT_LARGE_FILE| \ + EXT3_FEATURE_RO_COMPAT_BTREE_DIR) +@@ -729,6 +735,7 @@ + + + /* inode.c */ ++extern int ext3_block_truncate_page(handle_t *, struct page *, struct address_space *, loff_t); + extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); + extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); + extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); +@@ -802,6 +809,16 @@ + extern struct inode_operations ext3_symlink_inode_operations; + extern struct inode_operations ext3_fast_symlink_inode_operations; + ++/* extents.c */ ++extern int ext3_ext_writepage_trans_blocks(struct inode *, int); ++extern int ext3_ext_get_block(handle_t *, struct inode *, long, ++ struct buffer_head *, int, int); ++extern void ext3_ext_truncate(struct inode *, struct page *); ++extern void ext3_ext_init(struct super_block *); ++extern void ext3_ext_release(struct super_block *); ++extern void ext3_extents_initialize_blockmap(handle_t *, struct inode *); ++extern int ext3_ext_ioctl(struct inode *inode, struct file *filp, ++ unsigned int cmd, unsigned long arg); + + #endif /* __KERNEL__ */ + +Index: linux-2.6.5-sles9/include/linux/ext3_extents.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2005-02-17 22:07:57.023609040 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2005-02-23 01:02:37.416432600 +0300 +@@ -0,0 +1,264 @@ ++/* ++ * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com ++ * Written by Alex Tomas ++ * ++ * This program is free software; you can redistribute it and/or modify ++ * it under the terms of the GNU General Public License version 2 as ++ * published by the Free Software Foundation. ++ * ++ * This program is distributed in the hope that it will be useful, ++ * but WITHOUT ANY WARRANTY; without even the implied warranty of ++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ * GNU General Public License for more details. ++ * ++ * You should have received a copy of the GNU General Public Licens ++ * along with this program; if not, write to the Free Software ++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- ++ */ ++ ++#ifndef _LINUX_EXT3_EXTENTS ++#define _LINUX_EXT3_EXTENTS ++ ++/* ++ * with AGRESSIVE_TEST defined capacity of index/leaf blocks ++ * become very little, so index split, in-depth growing and ++ * other hard changes happens much more often ++ * this is for debug purposes only ++ */ ++#define AGRESSIVE_TEST_ ++ ++/* ++ * if CHECK_BINSEARCH defined, then results of binary search ++ * will be checked by linear search ++ */ ++#define CHECK_BINSEARCH_ ++ ++/* ++ * if EXT_DEBUG is defined you can use 'extdebug' mount option ++ * to get lots of info what's going on ++ */ ++#define EXT_DEBUG_ ++#ifdef EXT_DEBUG ++#define ext_debug(tree,fmt,a...) \ ++do { \ ++ if (test_opt((tree)->inode->i_sb, EXTDEBUG)) \ ++ printk(fmt, ##a); \ ++} while (0); ++#else ++#define ext_debug(tree,fmt,a...) ++#endif ++ ++/* ++ * if EXT_STATS is defined then stats numbers are collected ++ * these number will be displayed at umount time ++ */ ++#define EXT_STATS_ ++ ++ ++#define EXT3_ALLOC_NEEDED 3 /* block bitmap + group desc. + sb */ ++ ++/* ++ * ext3_inode has i_block array (total 60 bytes) ++ * first 4 bytes are used to store: ++ * - tree depth (0 mean there is no tree yet. all extents in the inode) ++ * - number of alive extents in the inode ++ */ ++ ++/* ++ * this is extent on-disk structure ++ * it's used at the bottom of the tree ++ */ ++struct ext3_extent { ++ __u32 ee_block; /* first logical block extent covers */ ++ __u16 ee_len; /* number of blocks covered by extent */ ++ __u16 ee_start_hi; /* high 16 bits of physical block */ ++ __u32 ee_start; /* low 32 bigs of physical block */ ++}; ++ ++/* ++ * this is index on-disk structure ++ * it's used at all the levels, but the bottom ++ */ ++struct ext3_extent_idx { ++ __u32 ei_block; /* index covers logical blocks from 'block' */ ++ __u32 ei_leaf; /* pointer to the physical block of the next * ++ * level. leaf or next index could bet here */ ++ __u16 ei_leaf_hi; /* high 16 bits of physical block */ ++ __u16 ei_unused; ++}; ++ ++/* ++ * each block (leaves and indexes), even inode-stored has header ++ */ ++struct ext3_extent_header { ++ __u16 eh_magic; /* probably will support different formats */ ++ __u16 eh_entries; /* number of valid entries */ ++ __u16 eh_max; /* capacity of store in entries */ ++ __u16 eh_depth; /* has tree real underlaying blocks? */ ++ __u32 eh_generation; /* generation of the tree */ ++}; ++ ++#define EXT3_EXT_MAGIC 0xf30a ++ ++/* ++ * array of ext3_ext_path contains path to some extent ++ * creation/lookup routines use it for traversal/splitting/etc ++ * truncate uses it to simulate recursive walking ++ */ ++struct ext3_ext_path { ++ __u32 p_block; ++ __u16 p_depth; ++ struct ext3_extent *p_ext; ++ struct ext3_extent_idx *p_idx; ++ struct ext3_extent_header *p_hdr; ++ struct buffer_head *p_bh; ++}; ++ ++/* ++ * structure for external API ++ */ + -+out_stop: -+ /* -+ * If this was a simple ftruncate(), and the file will remain alive -+ * then we need to clear up the orphan record which we created above. -+ * However, if this was a real unlink then we were called by -+ * ext3_delete_inode(), and we allow that function to clean up the -+ * orphan info for us. -+ */ -+ if (inode->i_nlink) -+ ext3_orphan_del(handle, inode); ++/* ++ * storage for cached extent ++ */ ++struct ext3_ext_cache { ++ __u32 ec_start; ++ __u32 ec_block; ++ __u32 ec_len; ++ __u32 ec_type; ++}; + -+ up(&EXT3_I(inode)->truncate_sem); -+ ext3_journal_stop(handle); -+} ++#define EXT3_EXT_CACHE_NO 0 ++#define EXT3_EXT_CACHE_GAP 1 ++#define EXT3_EXT_CACHE_EXTENT 2 + +/* -+ * this routine calculate max number of blocks we could modify -+ * in order to allocate new block for an inode ++ * ext3_extents_tree is used to pass initial information ++ * to top-level extents API + */ -+int ext3_ext_writepage_trans_blocks(struct inode *inode, int num) -+{ -+ struct ext3_extents_tree tree; -+ int needed; -+ -+ ext3_init_tree_desc(&tree, inode); -+ -+ needed = ext3_ext_calc_credits_for_insert(&tree, NULL); ++struct ext3_extents_helpers; ++struct ext3_extents_tree { ++ struct inode *inode; /* inode which tree belongs to */ ++ void *root; /* ptr to data top of tree resides at */ ++ void *buffer; /* will be passed as arg to ^^ routines */ ++ int buffer_len; ++ void *private; ++ struct ext3_ext_cache *cex;/* last found extent */ ++ struct ext3_extents_helpers *ops; ++}; + -+ /* caller want to allocate num blocks */ -+ needed *= num; -+ -+#ifdef CONFIG_QUOTA -+ /* -+ * FIXME: real calculation should be here -+ * it depends on blockmap format of qouta file -+ */ -+ needed += 2 * EXT3_SINGLEDATA_TRANS_BLOCKS; -+#endif ++struct ext3_extents_helpers { ++ int (*get_write_access)(handle_t *h, void *buffer); ++ int (*mark_buffer_dirty)(handle_t *h, void *buffer); ++ int (*mergable)(struct ext3_extent *ex1, struct ext3_extent *ex2); ++ int (*remove_extent_credits)(struct ext3_extents_tree *, ++ struct ext3_extent *, unsigned long, ++ unsigned long); ++ int (*remove_extent)(struct ext3_extents_tree *, ++ struct ext3_extent *, unsigned long, ++ unsigned long); ++ int (*new_block)(handle_t *, struct ext3_extents_tree *, ++ struct ext3_ext_path *, struct ext3_extent *, ++ int *); ++}; + -+ return needed; -+} ++/* ++ * to be called by ext3_ext_walk_space() ++ * negative retcode - error ++ * positive retcode - signal for ext3_ext_walk_space(), see below ++ * callback must return valid extent (passed or newly created) ++ */ ++typedef int (*ext_prepare_callback)(struct ext3_extents_tree *, ++ struct ext3_ext_path *, ++ struct ext3_ext_cache *); + -+void ext3_extents_initialize_blockmap(handle_t *handle, struct inode *inode) -+{ -+ struct ext3_extents_tree tree; ++#define EXT_CONTINUE 0 ++#define EXT_BREAK 1 ++#define EXT_REPEAT 2 + -+ ext3_init_tree_desc(&tree, inode); -+ ext3_extent_tree_init(handle, &tree); -+} + -+int ext3_ext_calc_blockmap_metadata(struct inode *inode, int blocks) -+{ -+ struct ext3_extents_tree tree; ++#define EXT_MAX_BLOCK 0xffffffff + -+ ext3_init_tree_desc(&tree, inode); -+ return ext3_ext_calc_metadata_amount(&tree, blocks); -+} -+ -+static int -+ext3_ext_store_extent_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *newex, int exist) -+{ -+ struct ext3_extent_buf *buf = (struct ext3_extent_buf *) tree->private; + -+ if (!exist) -+ return EXT_CONTINUE; -+ if (buf->err < 0) -+ return EXT_BREAK; -+ if (buf->cur - buf->buffer + sizeof(*newex) > buf->buflen) -+ return EXT_BREAK; ++#define EXT_FIRST_EXTENT(__hdr__) \ ++ ((struct ext3_extent *) (((char *) (__hdr__)) + \ ++ sizeof(struct ext3_extent_header))) ++#define EXT_FIRST_INDEX(__hdr__) \ ++ ((struct ext3_extent_idx *) (((char *) (__hdr__)) + \ ++ sizeof(struct ext3_extent_header))) ++#define EXT_HAS_FREE_INDEX(__path__) \ ++ ((__path__)->p_hdr->eh_entries < (__path__)->p_hdr->eh_max) ++#define EXT_LAST_EXTENT(__hdr__) \ ++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_entries - 1) ++#define EXT_LAST_INDEX(__hdr__) \ ++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_entries - 1) ++#define EXT_MAX_EXTENT(__hdr__) \ ++ (EXT_FIRST_EXTENT((__hdr__)) + (__hdr__)->eh_max - 1) ++#define EXT_MAX_INDEX(__hdr__) \ ++ (EXT_FIRST_INDEX((__hdr__)) + (__hdr__)->eh_max - 1) + -+ if (!copy_to_user(buf->cur, newex, sizeof(*newex))) { -+ buf->err++; -+ buf->cur += sizeof(*newex); -+ } else { -+ buf->err = -EFAULT; -+ return EXT_BREAK; -+ } -+ return EXT_CONTINUE; -+} ++#define EXT_ROOT_HDR(tree) \ ++ ((struct ext3_extent_header *) (tree)->root) ++#define EXT_BLOCK_HDR(bh) \ ++ ((struct ext3_extent_header *) (bh)->b_data) ++#define EXT_DEPTH(_t_) \ ++ (((struct ext3_extent_header *)((_t_)->root))->eh_depth) ++#define EXT_GENERATION(_t_) \ ++ (((struct ext3_extent_header *)((_t_)->root))->eh_generation) + -+static int -+ext3_ext_collect_stats_cb(struct ext3_extents_tree *tree, -+ struct ext3_ext_path *path, -+ struct ext3_extent *ex, int exist) -+{ -+ struct ext3_extent_tree_stats *buf = -+ (struct ext3_extent_tree_stats *) tree->private; -+ int depth; + -+ if (!exist) -+ return EXT_CONTINUE; ++#define EXT_ASSERT(__x__) if (!(__x__)) BUG(); + -+ depth = EXT_DEPTH(tree); -+ buf->extents_num++; -+ if (path[depth].p_ext == EXT_FIRST_EXTENT(path[depth].p_hdr)) -+ buf->leaf_num++; -+ return EXT_CONTINUE; ++#define EXT_CHECK_PATH(tree,path) \ ++{ \ ++ int depth = EXT_DEPTH(tree); \ ++ BUG_ON((unsigned long) (path) < __PAGE_OFFSET); \ ++ BUG_ON((unsigned long) (path)[depth].p_idx < \ ++ __PAGE_OFFSET && (path)[depth].p_idx != NULL); \ ++ BUG_ON((unsigned long) (path)[depth].p_ext < \ ++ __PAGE_OFFSET && (path)[depth].p_ext != NULL); \ ++ BUG_ON((unsigned long) (path)[depth].p_hdr < __PAGE_OFFSET); \ ++ BUG_ON((unsigned long) (path)[depth].p_bh < __PAGE_OFFSET \ ++ && depth != 0); \ ++ BUG_ON((path)[0].p_depth != depth); \ +} + -+int ext3_ext_ioctl(struct inode *inode, struct file *filp, unsigned int cmd, -+ unsigned long arg) -+{ -+ int err = 0; -+ -+ if (!(EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL)) -+ return -EINVAL; -+ -+ if (cmd == EXT3_IOC_GET_EXTENTS) { -+ struct ext3_extent_buf buf; -+ struct ext3_extents_tree tree; + -+ if (copy_from_user(&buf, (void *) arg, sizeof(buf))) -+ return -EFAULT; ++/* ++ * this structure is used to gather extents from the tree via ioctl ++ */ ++struct ext3_extent_buf { ++ unsigned long start; ++ int buflen; ++ void *buffer; ++ void *cur; ++ int err; ++}; + -+ ext3_init_tree_desc(&tree, inode); -+ buf.cur = buf.buffer; -+ buf.err = 0; -+ tree.private = &buf; -+ down(&EXT3_I(inode)->truncate_sem); -+ err = ext3_ext_walk_space(&tree, buf.start, EXT_MAX_BLOCK, -+ ext3_ext_store_extent_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (err == 0) -+ err = buf.err; -+ } else if (cmd == EXT3_IOC_GET_TREE_STATS) { -+ struct ext3_extent_tree_stats buf; -+ struct ext3_extents_tree tree; ++/* ++ * this structure is used to collect stats info about the tree ++ */ ++struct ext3_extent_tree_stats { ++ int depth; ++ int extents_num; ++ int leaf_num; ++}; + -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ buf.depth = EXT_DEPTH(&tree); -+ buf.extents_num = 0; -+ buf.leaf_num = 0; -+ tree.private = &buf; -+ err = ext3_ext_walk_space(&tree, 0, EXT_MAX_BLOCK, -+ ext3_ext_collect_stats_cb); -+ up(&EXT3_I(inode)->truncate_sem); -+ if (!err) -+ err = copy_to_user((void *) arg, &buf, sizeof(buf)); -+ } else if (cmd == EXT3_IOC_GET_TREE_DEPTH) { -+ struct ext3_extents_tree tree; -+ ext3_init_tree_desc(&tree, inode); -+ down(&EXT3_I(inode)->truncate_sem); -+ err = EXT_DEPTH(&tree); -+ up(&EXT3_I(inode)->truncate_sem); -+ } ++extern void ext3_init_tree_desc(struct ext3_extents_tree *, struct inode *); ++extern int ext3_extent_tree_init(handle_t *, struct ext3_extents_tree *); ++extern int ext3_ext_calc_credits_for_insert(struct ext3_extents_tree *, struct ext3_ext_path *); ++extern int ext3_ext_insert_extent(handle_t *, struct ext3_extents_tree *, struct ext3_ext_path *, struct ext3_extent *); ++extern int ext3_ext_walk_space(struct ext3_extents_tree *, unsigned long, unsigned long, ext_prepare_callback); ++extern int ext3_ext_remove_space(struct ext3_extents_tree *, unsigned long, unsigned long); ++extern struct ext3_ext_path * ext3_ext_find_extent(struct ext3_extents_tree *, int, struct ext3_ext_path *); ++extern int ext3_ext_calc_blockmap_metadata(struct inode *, int); + -+ return err; ++static inline void ++ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) ++{ ++ if (tree->cex) ++ tree->cex->ec_type = EXT3_EXT_CACHE_NO; +} + -+EXPORT_SYMBOL(ext3_init_tree_desc); -+EXPORT_SYMBOL(ext3_mark_inode_dirty); -+EXPORT_SYMBOL(ext3_ext_invalidate_cache); -+EXPORT_SYMBOL(ext3_ext_insert_extent); -+EXPORT_SYMBOL(ext3_ext_walk_space); -+EXPORT_SYMBOL(ext3_ext_find_goal); -+EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); + -Index: linux-2.6.10/fs/ext3/ialloc.c -=================================================================== ---- linux-2.6.10.orig/fs/ext3/ialloc.c 2005-04-05 12:26:19.368143176 +0800 -+++ linux-2.6.10/fs/ext3/ialloc.c 2005-04-05 12:26:25.464216432 +0800 -@@ -644,6 +644,17 @@ - DQUOT_FREE_INODE(inode); - goto fail2; - } -+ if (test_opt(sb, EXTENTS)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); -+ if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { -+ err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); -+ if (err) goto fail; -+ EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS); -+ BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata"); -+ err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh); -+ } -+ } - err = ext3_mark_inode_dirty(handle, inode); - if (err) { - ext3_std_error(sb, err); -Index: linux-2.6.10/fs/ext3/Makefile ++#endif /* _LINUX_EXT3_EXTENTS */ +Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h =================================================================== ---- linux-2.6.10.orig/fs/ext3/Makefile 2005-04-05 12:26:06.897039072 +0800 -+++ linux-2.6.10/fs/ext3/Makefile 2005-04-05 12:27:00.597875304 +0800 -@@ -5,8 +5,8 @@ - obj-$(CONFIG_EXT3_FS) += ext3.o +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2005-02-23 01:01:52.425272296 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2005-02-23 01:45:55.611446920 +0300 +@@ -19,6 +19,7 @@ + #include + #include + #include ++#include - ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ -- ioctl.o namei.o super.o symlink.o hash.o resize.o iopen.o -- -+ ioctl.o namei.o super.o symlink.o hash.o resize.o iopen.o \ -+ extents.o - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o - ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o - ext3-$(CONFIG_EXT3_FS_SECURITY) += xattr_security.o + struct ext3_reserve_window { + __u32 _rsv_start; /* First byte reserved */ +@@ -128,6 +129,8 @@ + struct dynlock i_htree_lock; + struct semaphore i_append_sem; + struct semaphore i_rename_sem; ++ ++ struct ext3_ext_cache i_cached_extent; + }; + + #endif /* _LINUX_EXT3_FS_I */ %diffstat - fs/ext3/Makefile | 4 - fs/ext3/extents.c | 2306 +++++++++++++++++++++++++++++++++++++++++++ - fs/ext3/ialloc.c | 11 + fs/ext3/Makefile | 2 + fs/ext3/extents.c | 2356 +++++++++++++++++++++++++++++++++++++++++++ + fs/ext3/ialloc.c | 4 fs/ext3/inode.c | 29 fs/ext3/ioctl.c | 4 fs/ext3/super.c | 15 - include/linux/ext3_extents.h | 238 ++++ - include/linux/ext3_fs.h | 20 - include/linux/ext3_fs_i.h | 2 - 9 files changed, 2619 insertions(+), 10 deletions(-) + include/linux/ext3_extents.h | 265 ++++ + include/linux/ext3_fs.h | 17 + include/linux/ext3_fs_i.h | 3 + 9 files changed, 2687 insertions(+), 8 deletions(-) diff --git a/lustre/kernel_patches/patches/ext3-extents-in-ea-2.6.10-fc3.patch b/lustre/kernel_patches/patches/ext3-extents-in-ea-2.6.10-fc3.patch index a400fb3..070b2f4 100755 --- a/lustre/kernel_patches/patches/ext3-extents-in-ea-2.6.10-fc3.patch +++ b/lustre/kernel_patches/patches/ext3-extents-in-ea-2.6.10-fc3.patch @@ -44,7 +44,7 @@ Index: linux-2.6.10/fs/ext3/extents-in-ea.c +#include +#include +#include -+#include ++#include +#include +#include + @@ -128,7 +128,7 @@ Index: linux-2.6.10/fs/ext3/extents-in-ea.c + err = ext3_extent_tree_init(handle, &tree); + ext3_release_tree_in_ea_desc(&tree); +out: -+ ext3_journal_stop(handle, inode); ++ ext3_journal_stop(handle); + kfree(root); + return err; +} @@ -151,24 +151,24 @@ Index: linux-2.6.10/fs/ext3/extents-in-ea.c + up(&EXT3_I(inode)->truncate_sem); + handle = ext3_journal_start(tree->inode, needed + 10); + if (IS_ERR(handle)) { -+ down_write(&EXT3_I(inode)->truncate_sem); ++ down(&EXT3_I(inode)->truncate_sem); + return PTR_ERR(handle); + } + + if (tgen != EXT_GENERATION(tree)) { + /* the tree has changed. so path can be invalid at moment */ -+ ext3_journal_stop(handle, inode); -+ down_write(&EXT3_I(inode)->truncate_sem); ++ ext3_journal_stop(handle); ++ down(&EXT3_I(inode)->truncate_sem); + return EXT_REPEAT; + } + -+ down_write(&EXT3_I(inode)->truncate_sem); ++ down(&EXT3_I(inode)->truncate_sem); + + /* insert new extent */ + newex->ee_start = 0; + err = ext3_ext_insert_extent(handle, tree, path, newex); + if (!err) -+ ext3_journal_stop(handle, tree->inode); ++ ext3_journal_stop(handle); + + return err; +} @@ -182,11 +182,11 @@ Index: linux-2.6.10/fs/ext3/extents-in-ea.c + + err = ext3_init_tree_in_ea_desc(&tree, inode, name_index, eaname); + if (err == 0) { -+ down_write(&EXT3_I(inode)->truncate_sem); ++ down(&EXT3_I(inode)->truncate_sem); + err = ext3_ext_walk_space(&tree, from, num, + ext3_ext_in_ea_new_extent); + ext3_release_tree_in_ea_desc(&tree); -+ up_write(&EXT3_I(inode)->truncate_sem); ++ up(&EXT3_I(inode)->truncate_sem); + } + return err; +} @@ -351,10 +351,11 @@ Index: linux-2.6.10/fs/ext3/Makefile =================================================================== --- linux-2.6.10.orig/fs/ext3/Makefile 2005-04-05 12:27:00.597875304 +0800 +++ linux-2.6.10/fs/ext3/Makefile 2005-04-05 12:28:26.989741744 +0800 -@@ -7,6 +7,6 @@ +@@ -7,7 +7,7 @@ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o iopen.o \ extents.o + -ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o +ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o extents-in-ea.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.10-fc3.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.10-fc3.patch index 6d6dc0a..4ef9b82 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.10-fc3.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.10-fc3.patch @@ -1873,24 +1873,24 @@ Index: linux-2.6.10/fs/ext3/super.c int i; + ext3_mb_release(sb); - ext3_ext_release(sb); + ext3_ext_release(sb); ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); @@ -592,7 +593,7 @@ - Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_pdirops, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, + Opt_ignore, Opt_barrier, Opt_err, Opt_resize, - Opt_extents, Opt_extdebug, + Opt_extents, Opt_extdebug, Opt_mballoc, Opt_mbfactor, - Opt_ignore, Opt_barrier, Opt_err, Opt_resize, }; + static match_table_t tokens = { @@ -646,6 +647,8 @@ {Opt_iopen_nopriv, "iopen_nopriv"}, - {Opt_extents, "extents"}, - {Opt_extdebug, "extdebug"}, + {Opt_extents, "extents"}, + {Opt_extdebug, "extdebug"}, + {Opt_mballoc, "mballoc"}, -+ {Opt_mballoc, "mbfactor=%u"}, ++ {Opt_mbfactor, "mbfactor=%u"}, {Opt_err, NULL}, {Opt_resize, "resize"}, }; @@ -1915,9 +1915,9 @@ Index: linux-2.6.10/fs/ext3/super.c percpu_counter_mod(&sbi->s_dirs_counter, ext3_count_dirs(sb)); - ext3_ext_init(sb); + ext3_ext_init(sb); + ext3_mb_init(sb, needs_recovery); - + return 0; cantfind_ext3: @@ -1925,13 +1925,14 @@ Index: linux-2.6.10/fs/ext3/Makefile =================================================================== --- linux-2.6.10.orig/fs/ext3/Makefile 2005-02-25 17:27:00.228757768 +0200 +++ linux-2.6.10/fs/ext3/Makefile 2005-02-25 17:28:41.863306968 +0200 -@@ -5,7 +5,7 @@ +@@ -5,8 +5,8 @@ ext3-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o \ ioctl.o namei.o super.o symlink.o hash.o resize.o iopen.o \ - extents.o + extents.o mballoc.o - ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o extents-in-ea.o + + ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o ext3-$(CONFIG_EXT3_FS_SECURITY) += xattr_security.o Index: linux-2.6.10/fs/ext3/balloc.c @@ -2131,8 +2132,8 @@ Index: linux-2.6.10/include/linux/ext3_fs.h */ @@ -365,6 +373,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x100000 /* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x200000 /* Extents debug */ + #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ +#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ diff --git a/lustre/kernel_patches/series/2.6-fc3.series b/lustre/kernel_patches/series/2.6-fc3.series index eca1d49..420ca96 100644 --- a/lustre/kernel_patches/series/2.6-fc3.series +++ b/lustre/kernel_patches/series/2.6-fc3.series @@ -31,4 +31,3 @@ export-show_task-2.6-vanilla.patch nid-2.6-fc3.patch highmem-split-2.6.10-fc3.patch pag-basic-2.6.10-fc3.patch - diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-fc3.series b/lustre/kernel_patches/series/ldiskfs-2.6-fc3.series index 3e96555..b4608a9 100644 --- a/lustre/kernel_patches/series/ldiskfs-2.6-fc3.series +++ b/lustre/kernel_patches/series/ldiskfs-2.6-fc3.series @@ -9,9 +9,9 @@ ext3-ea-in-inode-2.6-fc3.patch export-ext3-2.6.10-fc3.patch ext3-include-fixes-2.6-suse.patch ext3-extents-2.6.10-fc3.patch -ext3-extents-in-ea-2.6.10-fc3.patch -ext3-extents-in-ea-ioctl-2.6.10-fc3.patch -ext3-extents-in-ea-exports-symbol-2.6.7.patch +#ext3-extents-in-ea-2.6.10-fc3.patch +#ext3-extents-in-ea-ioctl-2.6.10-fc3.patch +#ext3-extents-in-ea-exports-symbol-2.6.7.patch ext3-mds-num-2.6.10-fc3.patch ext3-fid-2.6.7.patch ext3-raw-lookup-2.6.10.patch diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 8e40cde..6d23c7f 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -369,14 +369,17 @@ static int fsfilt_ext3_commit_async(struct inode *inode, void *h, static int fsfilt_ext3_commit_wait(struct inode *inode, void *h) { + journal_t *journal = EXT3_JOURNAL(inode); tid_t tid = (tid_t)(long)h; CDEBUG(D_INODE, "commit wait: %lu\n", (unsigned long) tid); - if (is_journal_aborted(EXT3_JOURNAL(inode))) + if (unlikely(is_journal_aborted(journal))) return -EIO; log_wait_commit(EXT3_JOURNAL(inode), tid); + if (unlikely(is_journal_aborted(journal))) + return -EIO; return 0; } @@ -724,6 +727,14 @@ static int fsfilt_ext3_sync(struct super_block *sb) return ext3_force_commit(sb); } +#if defined(EXT3_MULTIBLOCK_ALLOCATOR) && (!defined(EXT3_EXT_CACHE_NO) || defined(EXT_CACHE_MARK)) +#warning "kernel code has old extents/mballoc patch, disabling" +#undef EXT3_MULTIBLOCK_ALLOCATOR +#endif +#ifndef EXT3_EXTENTS_FL +#define EXT3_EXTENTS_FL 0x00080000 /* Inode uses extents */ +#endif + #ifdef EXT3_MULTIBLOCK_ALLOCATOR #if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) #define ext3_up_truncate_sem(inode) up_write(&EXT3_I(inode)->truncate_sem); @@ -752,18 +763,19 @@ struct bpointers { int init_num; int create; }; + static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, - unsigned long block, int *aflags) + unsigned long block, int *aflags) { struct ext3_inode_info *ei = EXT3_I(inode); unsigned long bg_start; unsigned long colour; int depth; - + if (path) { struct ext3_extent *ex; depth = path->p_depth; - + /* try to predict block placement */ if ((ex = path[depth].p_ext)) { #if 0 @@ -772,57 +784,56 @@ static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, * request will fit into. This can fragment data * block allocation and prevents our lovely 1M I/Os * from reaching the disk intact. */ - if (ex->ee_block + ex->ee_len == block) *aflags |= 1; #endif return ex->ee_start + (block - ex->ee_block); } - + /* it looks index is empty * try to find starting from index itself */ if (path[depth].p_bh) return path[depth].p_bh->b_blocknr; } - + /* OK. use inode's group */ bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); colour = (current->pid % 16) * - (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); + (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); return bg_start + colour + block; } static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, struct ext3_ext_path *path, - struct ext3_extent *newex, int exist) + struct ext3_ext_cache *cex) { struct inode *inode = tree->inode; struct bpointers *bp = tree->private; + struct ext3_extent nex; int count, err, goal; unsigned long pblock; - unsigned long tgen; loff_t new_i_size; handle_t *handle; int i, aflags = 0; - + i = EXT_DEPTH(tree); EXT_ASSERT(i == path->p_depth); EXT_ASSERT(path[i].p_hdr); - - if (exist) { + + if (cex->ec_type == EXT3_EXT_CACHE_EXTENT) { err = EXT_CONTINUE; goto map; } - + if (bp->create == 0) { i = 0; - if (newex->ee_block < bp->start) - i = bp->start - newex->ee_block; - if (i >= newex->ee_len) + if (cex->ec_block < bp->start) + i = bp->start - cex->ec_block; + if (i >= cex->ec_len) CERROR("nothing to do?! i = %d, e_num = %u\n", - i, newex->ee_len); - for (; i < newex->ee_len && bp->num; i++) { + i, cex->ec_len); + for (; i < cex->ec_len && bp->num; i++) { *(bp->created) = 0; bp->created++; *(bp->blocks) = 0; @@ -830,78 +841,92 @@ static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, bp->num--; bp->start++; } - + return EXT_CONTINUE; } - tgen = EXT_GENERATION(tree); + count = ext3_ext_calc_credits_for_insert(tree, path); - ext3_up_truncate_sem(inode); + lock_24kernel(); - handle = journal_start(EXT3_JOURNAL(inode), count + EXT3_ALLOC_NEEDED + 1); + handle = journal_start(EXT3_JOURNAL(inode), count+EXT3_ALLOC_NEEDED+1); unlock_24kernel(); - if (IS_ERR(handle)) { - ext3_down_truncate_sem(inode); + if (IS_ERR(handle)) return PTR_ERR(handle); - } - - if (tgen != EXT_GENERATION(tree)) { - /* the tree has changed. so path can be invalid at moment */ - lock_24kernel(); - journal_stop(handle); - unlock_24kernel(); - ext3_down_truncate_sem(inode); - return EXT_REPEAT; - } - ext3_down_truncate_sem(inode); - count = newex->ee_len; - goal = ext3_ext_find_goal(inode, path, newex->ee_block, &aflags); + + count = cex->ec_len; + goal = ext3_ext_find_goal(inode, path, cex->ec_block, &aflags); aflags |= 2; /* block have been already reserved */ + lock_24kernel(); pblock = ext3_mb_new_blocks(handle, inode, goal, &count, aflags, &err); + unlock_24kernel(); if (!pblock) goto out; - EXT_ASSERT(count <= newex->ee_len); - + EXT_ASSERT(count <= cex->ec_len); + /* insert new extent */ - newex->ee_start = pblock; - newex->ee_len = count; - err = ext3_ext_insert_extent(handle, tree, path, newex); + nex.ee_block = cex->ec_block; + nex.ee_start = pblock; + nex.ee_len = count; + err = ext3_ext_insert_extent(handle, tree, path, &nex); if (err) goto out; - + + /* + * Putting len of the actual extent we just inserted, + * we are asking ext3_ext_walk_space() to continue + * scaning after that block + */ + cex->ec_len = nex.ee_len; + cex->ec_start = nex.ee_start; + BUG_ON(nex.ee_len == 0); + BUG_ON(nex.ee_block != cex->ec_block); + /* correct on-disk inode size */ - if (newex->ee_len > 0) { - new_i_size = (loff_t) newex->ee_block + newex->ee_len; + if (nex.ee_len > 0) { + new_i_size = (loff_t) nex.ee_block + nex.ee_len; new_i_size = new_i_size << inode->i_blkbits; if (new_i_size > EXT3_I(inode)->i_disksize) { EXT3_I(inode)->i_disksize = new_i_size; err = ext3_mark_inode_dirty(handle, inode); } } + out: lock_24kernel(); journal_stop(handle); unlock_24kernel(); map: if (err >= 0) { + struct block_device *bdev = inode->i_sb->s_bdev; + /* map blocks */ if (bp->num == 0) { CERROR("hmm. why do we find this extent?\n"); CERROR("initial space: %lu:%u\n", bp->start, bp->init_num); CERROR("current extent: %u/%u/%u %d\n", - newex->ee_block, newex->ee_len, - newex->ee_start, exist); + cex->ec_block, cex->ec_len, + cex->ec_start, cex->ec_type); } i = 0; - if (newex->ee_block < bp->start) - i = bp->start - newex->ee_block; - if (i >= newex->ee_len) + if (cex->ec_block < bp->start) + i = bp->start - cex->ec_block; + if (i >= cex->ec_len) CERROR("nothing to do?! i = %d, e_num = %u\n", - i, newex->ee_len); - for (; i < newex->ee_len && bp->num; i++) { - *(bp->created) = (exist == 0 ? 1 : 0); + i, cex->ec_len); + for (; i < cex->ec_len && bp->num; i++) { + *(bp->blocks) = cex->ec_start + i; + if (cex->ec_type == EXT3_EXT_CACHE_EXTENT) { + *(bp->created) = 0; + } else { + *(bp->created) = 1; + /* unmap any possible underlying metadata from + * the block device mapping. bug 6998. + * This only compiles on 2.6, but there are + * no users of mballoc on 2.4. */ + unmap_underlying_metadata(bdev, *(bp->blocks)); + } bp->created++; - *(bp->blocks) = newex->ee_start + i; bp->blocks++; bp->num--; bp->start++; @@ -909,18 +934,18 @@ map: } return err; } - + int fsfilt_map_nblocks(struct inode *inode, unsigned long block, unsigned long num, unsigned long *blocks, int *created, int create) { struct ext3_extents_tree tree; struct bpointers bp; - int err, i; - + int err; + CDEBUG(D_OTHER, "blocks %lu-%lu requested for inode %u\n", block, block + num, (unsigned) inode->i_ino); - + ext3_init_tree_desc(&tree, inode); tree.private = &bp; bp.blocks = blocks; @@ -928,21 +953,10 @@ int fsfilt_map_nblocks(struct inode *inode, unsigned long block, bp.start = block; bp.init_num = bp.num = num; bp.create = create; - - ext3_down_truncate_sem(inode); + err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb); ext3_ext_invalidate_cache(&tree); - ext3_up_truncate_sem(inode); - /* unmap underlying pages/buffers from blockdevice mapping */ - if (create) { - struct block_device *bdev = inode->i_sb->s_bdev; - for (i = 0; i < num; i++) { - if (created[i] == 0) - continue; - unmap_underlying_metadata(bdev, blocks[i]); - } - } return err; } @@ -1108,7 +1122,7 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, loff_t new_size = inode->i_size; journal_t *journal; handle_t *handle; - int err = 0, block_count = 0, blocksize, size, boffs; + int err, block_count = 0, blocksize, size, boffs; /* Determine how many transaction credits are needed */ blocksize = 1 << inode->i_blkbits; @@ -1338,6 +1352,7 @@ static int fsfilt_ext3_get_op_len(int op, struct fsfilt_objinfo *fso, int logs) } +#if 0 #define EXTENTS_EA "write_extents" #define EXTENTS_EA_SIZE 64 @@ -1398,6 +1413,7 @@ static int fsfilt_ext3_get_write_extents_num(struct inode *inode, int *size) EXTENTS_EA, size); return rc; } +#endif static struct fsfilt_operations fsfilt_ext3_ops = { .fs_type = "ext3", @@ -1427,11 +1443,13 @@ static struct fsfilt_operations fsfilt_ext3_ops = { .fs_get_op_len = fsfilt_ext3_get_op_len, .fs_add_dir_entry = fsfilt_ext3_add_dir_entry, .fs_del_dir_entry = fsfilt_ext3_del_dir_entry, +#if 0 .fs_init_extents_ea = fsfilt_ext3_init_extents_ea, .fs_insert_extents_ea = fsfilt_ext3_insert_extents_ea, .fs_remove_extents_ea = fsfilt_ext3_remove_extents_ea, .fs_get_inode_write_extents = fsfilt_ext3_get_inode_write_extents, .fs_get_write_extents_num = fsfilt_ext3_get_write_extents_num, +#endif }; static int __init fsfilt_ext3_init(void) -- 1.8.3.1