From 05de0aaad91659c37b56f48410ea9f09041a1e13 Mon Sep 17 00:00:00 2001 From: phil Date: Tue, 16 Nov 2004 05:12:19 +0000 Subject: [PATCH] land b1_4_bug5025 on b1_4 (b1_4_bug5025 should no longer be used) --- .../patches/export_symbols-ext3-2.6-suse.patch | 6 +- .../patches/ext3-extents-2.6.5.patch | 244 ++++++----- .../patches/ext3-mballoc2-2.6-suse.patch | 267 ++++++------ .../patches/ext3-rename-reserve-2.6-suse.patch | 263 ++++++++++++ .../kernel_patches/patches/iopen-2.6-suse.patch | 52 +-- .../kernel_patches/series/ldiskfs-2.6-suse.series | 8 +- lustre/ChangeLog | 6 +- lustre/configure.in | 2 +- lustre/include/linux/lustre_fsfilt.h | 37 +- .../kernel-2.4.21-rhel-2.4-ia64-smp.config | 20 +- .../kernel-2.4.21-rhel-2.4-ia64.config | 20 +- .../patches/export_symbols-ext3-2.6-suse.patch | 6 +- .../patches/ext3-delete_thread-2.4.20-hp.patch | 44 +- .../patches/ext3-extents-2.4.20-rh.patch | 156 ++++--- .../patches/ext3-extents-2.4.21-chaos.patch | 182 ++++---- .../patches/ext3-extents-2.4.21-suse2.patch | 160 ++++--- .../patches/ext3-extents-2.4.24.patch | 158 ++++--- .../patches/ext3-extents-2.6.5.patch | 244 ++++++----- .../patches/ext3-extents-asyncdel-2.4.20-rh.patch | 3 +- .../ext3-extents-asyncdel-2.4.21-chaos.patch | 3 +- .../patches/ext3-extents-asyncdel-2.4.24.patch | 3 +- .../patches/ext3-mballoc2-2.6-suse.patch | 267 ++++++------ .../patches/ext3-mballoc2-2.6.7.patch | 107 +++-- .../patches/ext3-rename-reserve-2.6-suse.patch | 263 ++++++++++++ lustre/kernel_patches/patches/iopen-2.6-suse.patch | 52 +-- lustre/kernel_patches/patches/revert-76chaos.patch | 289 +++++++++++++ lustre/kernel_patches/series/2.6-vanilla.series | 15 + lustre/kernel_patches/series/chaos-2.4.21 | 19 +- .../kernel_patches/series/ldiskfs-2.6-suse.series | 8 +- .../series/ldiskfs-2.6-vanilla.series | 11 + lustre/kernel_patches/series/rhel-2.4.21 | 6 +- lustre/lvfs/fsfilt_ext3.c | 382 ++++++++++++++++- lustre/lvfs/lvfs_linux.c | 9 +- lustre/mds/handler.c | 16 +- lustre/obdfilter/filter.c | 48 +-- lustre/obdfilter/filter_internal.h | 15 +- lustre/obdfilter/filter_io.c | 409 +++++++++--------- lustre/obdfilter/filter_io_24.c | 263 ++++++++---- lustre/obdfilter/filter_io_26.c | 476 +++++++++++++++------ lustre/portals/build.m4 | 7 - lustre/ptlrpc/service.c | 14 + lustre/scripts/lustre-kernel-2.4.spec.in | 2 +- lustre/scripts/lustre.spec.in | 2 +- lustre/utils/lconf | 88 +++- lustre/utils/lmc | 33 +- 45 files changed, 3285 insertions(+), 1400 deletions(-) create mode 100644 ldiskfs/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch create mode 100644 lustre/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch create mode 100644 lustre/kernel_patches/patches/revert-76chaos.patch create mode 100644 lustre/kernel_patches/series/2.6-vanilla.series create mode 100644 lustre/kernel_patches/series/ldiskfs-2.6-vanilla.series diff --git a/ldiskfs/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch b/ldiskfs/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch index 74962dd..294a9cd 100644 --- a/ldiskfs/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch +++ b/ldiskfs/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch @@ -1,7 +1,7 @@ -Index: linux-stage/include/linux/ext3_fs_sb.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs_sb.h 2004-11-03 14:16:26.059485670 -0500 -+++ linux-stage/include/linux/ext3_fs_sb.h 2004-11-03 14:20:30.761493072 -0500 +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-03 08:36:51.000000000 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300 @@ -19,9 +19,12 @@ #ifdef __KERNEL__ #include diff --git a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch index 8a41b1c..cad7b54 100644 --- a/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch +++ b/ldiskfs/kernel_patches/patches/ext3-extents-2.6.5.patch @@ -1,9 +1,9 @@ %patch -Index: linux-2.6.7/fs/ext3/extents.c +Index: linux-2.6.5-sles9/fs/ext3/extents.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.6.7/fs/ext3/extents.c 2004-08-19 08:53:49.000000000 +0400 -@@ -0,0 +1,2306 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300 +@@ -0,0 +1,2313 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -150,7 +150,7 @@ Index: linux-2.6.7/fs/ext3/extents.c + goal = bg_start + colour; + } + -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, err); ++ newblock = ext3_new_block(handle, inode, goal, err); + return newblock; +} + @@ -1283,14 +1283,15 @@ Index: linux-2.6.7/fs/ext3/extents.c +} + +static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, struct ext3_extent *ex) ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, ++ __u32 len, __u32 start, int type) +{ ++ EXT_ASSERT(len > 0); + if (tree->cex) { -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_len); -+ tree->cex->ee_block = ex->ee_block; -+ tree->cex->ee_start = ex->ee_start; -+ tree->cex->ee_len = ex->ee_len; ++ tree->cex->ec_type = type; ++ tree->cex->ec_block = block; ++ tree->cex->ec_len = len; ++ tree->cex->ec_start = start; + } +} + @@ -1304,7 +1305,8 @@ Index: linux-2.6.7/fs/ext3/extents.c + unsigned long block) +{ + int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex, gex; ++ unsigned long lblock, len; ++ struct ext3_extent *ex; + + if (!tree->cex) + return; @@ -1312,63 +1314,64 @@ Index: linux-2.6.7/fs/ext3/extents.c + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ -+ gex.ee_block = 0; -+ gex.ee_len = EXT_CACHE_MARK; ++ lblock = 0; ++ len = EXT_MAX_BLOCK; + ext_debug(tree, "cache gap(whole file):"); + } else if (block < ex->ee_block) { -+ gex.ee_block = block; -+ gex.ee_len = ex->ee_block - block; ++ lblock = block; ++ len = ex->ee_block - block; + ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len); + } else if (block >= ex->ee_block + ex->ee_len) { -+ gex.ee_block = ex->ee_block + ex->ee_len; -+ gex.ee_len = ext3_ext_next_allocated_block(path); ++ lblock = ex->ee_block + ex->ee_len; ++ len = ext3_ext_next_allocated_block(path); + ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) block); -+ EXT_ASSERT(gex.ee_len > gex.ee_block); -+ gex.ee_len = gex.ee_len - gex.ee_block; ++ EXT_ASSERT(len > lblock); ++ len = len - lblock; + } else { ++ lblock = len = 0; + BUG(); + } + -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) gex.ee_block, -+ (unsigned long) gex.ee_len); -+ gex.ee_start = EXT_CACHE_MARK; -+ ext3_ext_put_in_cache(tree, &gex); ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); ++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); +} + +static inline int +ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, + struct ext3_extent *ex) +{ -+ struct ext3_extent *cex = tree->cex; ++ struct ext3_ext_cache *cex = tree->cex; + + /* is there cache storage at all? */ + if (!cex) -+ return 0; ++ return EXT3_EXT_CACHE_NO; + + /* has cache valid data? */ -+ if (cex->ee_len == 0) -+ return 0; -+ -+ if (block >= cex->ee_block && block < cex->ee_block + cex->ee_len) { -+ ex->ee_block = cex->ee_block; -+ ex->ee_start = cex->ee_start; -+ ex->ee_len = cex->ee_len; ++ if (cex->ec_type == EXT3_EXT_CACHE_NO) ++ return EXT3_EXT_CACHE_NO; ++ ++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || ++ cex->ec_type == EXT3_EXT_CACHE_EXTENT); ++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ ex->ee_block = cex->ec_block; ++ ex->ee_start = cex->ec_start; ++ ex->ee_len = cex->ec_len; + ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) ex->ee_start); -+ return 1; ++ return cex->ec_type; + } + + /* not in cache */ -+ return 0; ++ return EXT3_EXT_CACHE_NO; +} + +/* @@ -1958,7 +1961,7 @@ Index: linux-2.6.7/fs/ext3/extents.c + ex->ee_len = 1; + /* allocate new block for the extent */ + goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ ex->ee_start = ext3_new_block(handle, inode, goal, 0, 0, err); ++ ex->ee_start = ext3_new_block(handle, inode, goal, err); + if (ex->ee_start == 0) { + /* error occured: restore old extent */ + ex->ee_start = newblock; @@ -1984,7 +1987,7 @@ Index: linux-2.6.7/fs/ext3/extents.c + tree->root = (void *) EXT3_I(inode)->i_data; + tree->buffer = (void *) inode; + tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; ++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; + tree->ops = &ext3_blockmap_helpers; +} + @@ -2005,19 +2008,20 @@ Index: linux-2.6.7/fs/ext3/extents.c + down(&EXT3_I(inode)->truncate_sem); + + /* check in cache */ -+ if (ext3_ext_in_cache(&tree, iblock, &newex)) { -+ if (newex.ee_start == EXT_CACHE_MARK) { -+ /* this is cached gap */ ++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { ++ if (goal == EXT3_EXT_CACHE_GAP) { + if (!create) { + /* block isn't allocated yet and + * user don't want to allocate it */ + goto out2; + } + /* we should allocate requested block */ -+ } else if (newex.ee_start) { ++ } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock - newex.ee_block + newex.ee_start; + goto out; ++ } else { ++ EXT_ASSERT(0); + } + } + @@ -2045,7 +2049,9 @@ Index: linux-2.6.7/fs/ext3/extents.c + ext_debug(&tree, "%d fit into %d:%d -> %d\n", + (int) iblock, ex->ee_block, ex->ee_len, + newblock); -+ ext3_ext_put_in_cache(&tree, ex); ++ ext3_ext_put_in_cache(&tree, ex->ee_block, ++ ex->ee_len, ex->ee_start, ++ EXT3_EXT_CACHE_EXTENT); + goto out; + } + } @@ -2062,7 +2068,7 @@ Index: linux-2.6.7/fs/ext3/extents.c + + /* allocate new block */ + goal = ext3_ext_find_goal(inode, path, iblock); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); ++ newblock = ext3_new_block(handle, inode, goal, &err); + if (!newblock) + goto out2; + ext_debug(&tree, "allocate new block: goal %d, found %d\n", @@ -2083,7 +2089,8 @@ Index: linux-2.6.7/fs/ext3/extents.c + newblock = newex.ee_start; + set_buffer_new(bh_result); + -+ ext3_ext_put_in_cache(&tree, &newex); ++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, ++ newex.ee_start, EXT3_EXT_CACHE_EXTENT); +out: + ext3_ext_show_leaf(&tree, path); + map_bh(bh_result, inode->i_sb, newblock); @@ -2310,11 +2317,11 @@ Index: linux-2.6.7/fs/ext3/extents.c +EXPORT_SYMBOL(ext3_ext_find_goal); +EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); + -Index: linux-2.6.7/fs/ext3/ialloc.c +Index: linux-2.6.5-sles9/fs/ext3/ialloc.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/ialloc.c 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/fs/ext3/ialloc.c 2004-08-19 08:53:49.000000000 +0400 -@@ -646,6 +646,10 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c 2004-11-09 02:22:55.763148128 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2004-11-09 02:23:21.587222272 +0300 +@@ -647,6 +647,10 @@ DQUOT_FREE_INODE(inode); goto fail2; } @@ -2325,11 +2332,11 @@ Index: linux-2.6.7/fs/ext3/ialloc.c err = ext3_mark_inode_dirty(handle, inode); if (err) { ext3_std_error(sb, err); -Index: linux-2.6.7/fs/ext3/inode.c +Index: linux-2.6.5-sles9/fs/ext3/inode.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/inode.c 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/fs/ext3/inode.c 2004-08-19 08:53:49.000000000 +0400 -@@ -857,6 +857,17 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:22:55.767147520 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300 +@@ -796,6 +796,17 @@ goto reread; } @@ -2347,7 +2354,7 @@ Index: linux-2.6.7/fs/ext3/inode.c static int ext3_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { -@@ -867,8 +878,8 @@ +@@ -806,8 +817,8 @@ handle = ext3_journal_current_handle(); J_ASSERT(handle != 0); } @@ -2358,7 +2365,7 @@ Index: linux-2.6.7/fs/ext3/inode.c return ret; } -@@ -894,8 +905,8 @@ +@@ -833,8 +844,8 @@ } } if (ret == 0) @@ -2369,7 +2376,7 @@ Index: linux-2.6.7/fs/ext3/inode.c if (ret == 0) bh_result->b_size = (1 << inode->i_blkbits); return ret; -@@ -916,7 +927,7 @@ +@@ -855,7 +866,7 @@ dummy.b_state = 0; dummy.b_blocknr = -1000; buffer_trace_init(&dummy.b_history); @@ -2378,7 +2385,7 @@ Index: linux-2.6.7/fs/ext3/inode.c if (!*errp && buffer_mapped(&dummy)) { struct buffer_head *bh; bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1669,7 +1680,7 @@ +@@ -1587,7 +1598,7 @@ * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */ @@ -2387,7 +2394,7 @@ Index: linux-2.6.7/fs/ext3/inode.c struct address_space *mapping, loff_t from) { unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -2165,6 +2176,9 @@ +@@ -2083,6 +2094,9 @@ return; } @@ -2397,7 +2404,7 @@ Index: linux-2.6.7/fs/ext3/inode.c handle = start_transaction(inode); if (IS_ERR(handle)) { if (page) { -@@ -2888,6 +2902,9 @@ +@@ -2789,6 +2803,9 @@ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; @@ -2407,10 +2414,10 @@ Index: linux-2.6.7/fs/ext3/inode.c if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else -Index: linux-2.6.7/fs/ext3/Makefile +Index: linux-2.6.5-sles9/fs/ext3/Makefile =================================================================== ---- linux-2.6.7.orig/fs/ext3/Makefile 2004-08-19 08:52:14.000000000 +0400 -+++ linux-2.6.7/fs/ext3/Makefile 2004-08-19 08:53:49.000000000 +0400 +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300 @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o @@ -2420,11 +2427,11 @@ Index: linux-2.6.7/fs/ext3/Makefile ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.7/fs/ext3/super.c +Index: linux-2.6.5-sles9/fs/ext3/super.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/super.c 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/fs/ext3/super.c 2004-08-19 08:53:49.000000000 +0400 -@@ -392,6 +392,7 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:22:56.450043704 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300 +@@ -389,6 +389,7 @@ struct ext3_super_block *es = sbi->s_es; int i; @@ -2432,17 +2439,18 @@ Index: linux-2.6.7/fs/ext3/super.c ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -455,6 +456,9 @@ - ei->i_default_acl = EXT3_ACL_NOT_CACHED; +@@ -447,6 +448,10 @@ #endif + ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; ei->vfs_inode.i_version = 1; + ei->i_cached_extent[0] = 0; + ei->i_cached_extent[1] = 0; + ei->i_cached_extent[2] = 0; ++ ei->i_cached_extent[3] = 0; return &ei->vfs_inode; } -@@ -590,7 +594,7 @@ +@@ -537,7 +542,7 @@ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, @@ -2451,8 +2459,8 @@ Index: linux-2.6.7/fs/ext3/super.c }; static match_table_t tokens = { -@@ -638,6 +642,8 @@ - {Opt_iopen, "iopen"}, +@@ -582,6 +587,8 @@ + {Opt_iopen, "iopen"}, {Opt_noiopen, "noiopen"}, {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_extents, "extents"}, @@ -2460,7 +2468,7 @@ Index: linux-2.6.7/fs/ext3/super.c {Opt_err, NULL} }; -@@ -917,6 +923,12 @@ +@@ -797,6 +804,12 @@ break; case Opt_ignore: break; @@ -2473,7 +2481,7 @@ Index: linux-2.6.7/fs/ext3/super.c default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1589,6 +1601,8 @@ +@@ -1449,6 +1462,8 @@ percpu_counter_mod(&sbi->s_dirs_counter, ext3_count_dirs(sb)); @@ -2482,25 +2490,25 @@ Index: linux-2.6.7/fs/ext3/super.c return 0; failed_mount3: -Index: linux-2.6.7/fs/ext3/ioctl.c +Index: linux-2.6.5-sles9/fs/ext3/ioctl.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/ioctl.c 2004-08-19 08:51:03.000000000 +0400 -+++ linux-2.6.7/fs/ext3/ioctl.c 2004-08-19 08:53:49.000000000 +0400 -@@ -176,6 +176,10 @@ - return ret; - } - #endif +--- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c 2004-11-09 02:15:44.610693264 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ioctl.c 2004-11-09 02:23:52.991448104 +0300 +@@ -124,6 +124,10 @@ + err = ext3_change_inode_journal_flag(inode, jflag); + return err; + } + case EXT3_IOC_GET_EXTENTS: + case EXT3_IOC_GET_TREE_STATS: + case EXT3_IOC_GET_TREE_DEPTH: + return ext3_ext_ioctl(inode, filp, cmd, arg); - default: - return -ENOTTY; - } -Index: linux-2.6.7/include/linux/ext3_fs.h + case EXT3_IOC_GETVERSION: + case EXT3_IOC_GETVERSION_OLD: + return put_user(inode->i_generation, (int *) arg); +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h =================================================================== ---- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/include/linux/ext3_fs.h 2004-08-19 08:53:49.000000000 +0400 +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:22:58.767691368 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300 @@ -186,6 +186,7 @@ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ @@ -2509,18 +2517,18 @@ Index: linux-2.6.7/include/linux/ext3_fs.h #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ -@@ -209,6 +210,9 @@ - #ifdef CONFIG_JBD_DEBUG - #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) +@@ -211,6 +212,9 @@ #endif -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 5, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 6, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 7, long) + #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) + #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) ++#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) ++#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) ++#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) /* * Structure of an inode on the disk -@@ -329,6 +333,8 @@ - #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ +@@ -333,6 +337,8 @@ + #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ +#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ @@ -2528,7 +2536,7 @@ Index: linux-2.6.7/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt -@@ -724,6 +730,7 @@ +@@ -729,6 +735,7 @@ /* inode.c */ @@ -2536,7 +2544,7 @@ Index: linux-2.6.7/include/linux/ext3_fs.h extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -796,6 +803,14 @@ +@@ -802,6 +809,14 @@ extern struct inode_operations ext3_symlink_inode_operations; extern struct inode_operations ext3_fast_symlink_inode_operations; @@ -2551,11 +2559,11 @@ Index: linux-2.6.7/include/linux/ext3_fs.h #endif /* __KERNEL__ */ -Index: linux-2.6.7/include/linux/ext3_extents.h +Index: linux-2.6.5-sles9/include/linux/ext3_extents.h =================================================================== ---- linux-2.6.7.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.6.7/include/linux/ext3_extents.h 2004-08-19 08:53:49.000000000 +0400 -@@ -0,0 +1,238 @@ +--- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2004-11-09 02:23:21.606219384 +0300 +@@ -0,0 +1,252 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2595,7 +2603,7 @@ Index: linux-2.6.7/include/linux/ext3_extents.h + * if EXT_DEBUG is defined you can use 'extdebug' mount option + * to get lots of info what's going on + */ -+#define EXT_DEBUG ++#define EXT_DEBUG_ +#ifdef EXT_DEBUG +#define ext_debug(tree,fmt,a...) \ +do { \ @@ -2677,6 +2685,20 @@ Index: linux-2.6.7/include/linux/ext3_extents.h + */ + +/* ++ * storage for cached extent ++ */ ++struct ext3_ext_cache { ++ __u32 ec_start; ++ __u32 ec_block; ++ __u32 ec_len; ++ __u32 ec_type; ++}; ++ ++#define EXT3_EXT_CACHE_NO 0 ++#define EXT3_EXT_CACHE_GAP 1 ++#define EXT3_EXT_CACHE_EXTENT 2 ++ ++/* + * ext3_extents_tree is used to pass initial information + * to top-level extents API + */ @@ -2687,7 +2709,7 @@ Index: linux-2.6.7/include/linux/ext3_extents.h + void *buffer; /* will be passed as arg to ^^ routines */ + int buffer_len; + void *private; -+ struct ext3_extent *cex;/* last found extent */ ++ struct ext3_ext_cache *cex;/* last found extent */ + struct ext3_extents_helpers *ops; +}; + @@ -2788,35 +2810,35 @@ Index: linux-2.6.7/include/linux/ext3_extents.h +ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) +{ + if (tree->cex) -+ tree->cex->ee_len = 0; ++ tree->cex->ec_type = EXT3_EXT_CACHE_NO; +} + + +#endif /* _LINUX_EXT3_EXTENTS */ + -Index: linux-2.6.7/include/linux/ext3_fs_i.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h =================================================================== ---- linux-2.6.7.orig/include/linux/ext3_fs_i.h 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/include/linux/ext3_fs_i.h 2004-08-19 08:53:49.000000000 +0400 -@@ -111,6 +111,8 @@ +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2004-11-09 02:22:55.780145544 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2004-11-09 02:23:21.606219384 +0300 +@@ -128,6 +128,8 @@ */ struct semaphore truncate_sem; struct inode vfs_inode; + -+ __u32 i_cached_extent[3]; ++ __u32 i_cached_extent[4]; }; #endif /* _LINUX_EXT3_FS_I */ %diffstat fs/ext3/Makefile | 2 - fs/ext3/extents.c | 2306 +++++++++++++++++++++++++++++++++++++++++++ + fs/ext3/extents.c | 2313 +++++++++++++++++++++++++++++++++++++++++++ fs/ext3/ialloc.c | 4 fs/ext3/inode.c | 29 fs/ext3/ioctl.c | 4 - fs/ext3/super.c | 16 - include/linux/ext3_extents.h | 238 ++++ + fs/ext3/super.c | 17 + include/linux/ext3_extents.h | 252 ++++ include/linux/ext3_fs.h | 15 include/linux/ext3_fs_i.h | 2 - 9 files changed, 2608 insertions(+), 8 deletions(-) + 9 files changed, 2630 insertions(+), 8 deletions(-) diff --git a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch index 7c3d8bd..2408cc7 100644 --- a/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch +++ b/ldiskfs/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -1,8 +1,8 @@ -Index: linux-stage/fs/ext3/mballoc.c +Index: linux-2.6.5-sles9/fs/ext3/mballoc.c =================================================================== ---- linux-stage.orig/fs/ext3/mballoc.c 2003-01-30 05:24:37.000000000 -0500 -+++ linux-stage/fs/ext3/mballoc.c 2004-10-13 17:06:53.000000000 -0400 -@@ -0,0 +1,1397 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300 +@@ -0,0 +1,1428 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -118,12 +118,43 @@ Index: linux-stage/fs/ext3/mballoc.c +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); +struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); +void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long); -+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *); ++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *); +int ext3_mb_reserve_blocks(struct super_block *, int); +void ext3_mb_release_blocks(struct super_block *, int); +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); +void ext3_mb_free_committed_blocks(struct super_block *); + ++#define mb_correct_addr_and_bit(bit,addr) \ ++{ \ ++ if ((unsigned) addr & 1) { \ ++ bit += 8; \ ++ addr--; \ ++ } \ ++ if ((unsigned) addr & 2) { \ ++ bit += 16; \ ++ addr--; \ ++ addr--; \ ++ } \ ++} ++ ++static inline int mb_test_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ return test_bit(bit, addr); ++} ++ ++static inline void mb_set_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ set_bit(bit, addr); ++} ++ ++static inline void mb_clear_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ clear_bit(bit, addr); ++} ++ +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) +{ + int i = 1; @@ -232,22 +263,22 @@ Index: linux-stage/fs/ext3/mballoc.c + count = 0; + for (i = 0; i < max; i++) { + -+ if (!test_bit(i, buddy)) { ++ if (!mb_test_bit(i, buddy)) { + /* only single bit in buddy2 may be 1 */ -+ if (test_bit(i << 1, buddy2)) -+ J_ASSERT(!test_bit((i<<1)+1, buddy2)); -+ else if (test_bit((i << 1) + 1, buddy2)) -+ J_ASSERT(!test_bit(i << 1, buddy2)); ++ if (mb_test_bit(i << 1, buddy2)) ++ J_ASSERT(!mb_test_bit((i<<1)+1, buddy2)); ++ else if (mb_test_bit((i << 1) + 1, buddy2)) ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); + continue; + } + + /* both bits in buddy2 must be 0 */ -+ J_ASSERT(!test_bit(i << 1, buddy2)); -+ J_ASSERT(!test_bit((i << 1) + 1, buddy2)); ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); ++ J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2)); + + for (j = 0; j < (1 << order); j++) { + k = (i * (1 << order)) + j; -+ J_ASSERT(test_bit(k, e3b->bd_bitmap)); ++ J_ASSERT(mb_test_bit(k, e3b->bd_bitmap)); + } + count++; + } @@ -257,14 +288,14 @@ Index: linux-stage/fs/ext3/mballoc.c + + buddy = mb_find_buddy(e3b, 0, &max); + for (i = 0; i < max; i++) { -+ if (test_bit(i, buddy)) ++ if (mb_test_bit(i, buddy)) + continue; + /* check used bits only */ + for (j = 0; j < e3b->bd_blkbits + 1; j++) { + buddy2 = mb_find_buddy(e3b, j, &max2); + k = i >> j; + J_ASSERT(k < max2); -+ J_ASSERT(!test_bit(k, buddy2)); ++ J_ASSERT(!mb_test_bit(k, buddy2)); + } + } +} @@ -295,7 +326,7 @@ Index: linux-stage/fs/ext3/mballoc.c + bb = e3b->bd_buddy; + while (order <= e3b->bd_blkbits + 1) { + block = block >> 1; -+ if (test_bit(block, bb)) { ++ if (mb_test_bit(block, bb)) { + /* this block is part of buddy of order 'order' */ + return order; + } @@ -318,7 +349,7 @@ Index: linux-stage/fs/ext3/mballoc.c + cur += 32; + continue; + } -+ clear_bit(cur, bm); ++ mb_clear_bit(cur, bm); + cur++; + } +} @@ -336,7 +367,7 @@ Index: linux-stage/fs/ext3/mballoc.c + cur += 32; + continue; + } -+ set_bit(cur, bm); ++ mb_set_bit(cur, bm); + cur++; + } +} @@ -351,8 +382,8 @@ Index: linux-stage/fs/ext3/mballoc.c + block = first++; + order = 0; + -+ J_ASSERT(!test_bit(block, e3b->bd_bitmap)); -+ set_bit(block, e3b->bd_bitmap); ++ J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap)); ++ mb_set_bit(block, e3b->bd_bitmap); + e3b->bd_bd->bb_counters[order]++; + + /* start of the buddy */ @@ -360,8 +391,8 @@ Index: linux-stage/fs/ext3/mballoc.c + + do { + block &= ~1UL; -+ if (!test_bit(block, buddy) || -+ !test_bit(block + 1, buddy)) ++ if (!mb_test_bit(block, buddy) || ++ !mb_test_bit(block + 1, buddy)) + break; + + /* both the buddies are free, try to coalesce them */ @@ -373,8 +404,8 @@ Index: linux-stage/fs/ext3/mballoc.c + if (order > 0) { + /* for special purposes, we don't clear + * free bits in bitmap */ -+ clear_bit(block, buddy); -+ clear_bit(block + 1, buddy); ++ mb_clear_bit(block, buddy); ++ mb_clear_bit(block + 1, buddy); + } + e3b->bd_bd->bb_counters[order]--; + e3b->bd_bd->bb_counters[order]--; @@ -383,7 +414,7 @@ Index: linux-stage/fs/ext3/mballoc.c + order++; + e3b->bd_bd->bb_counters[order]++; + -+ set_bit(block, buddy2); ++ mb_set_bit(block, buddy2); + buddy = buddy2; + } while (1); + } @@ -448,7 +479,7 @@ Index: linux-stage/fs/ext3/mballoc.c + buddy = mb_find_buddy(e3b, order, &max); + J_ASSERT(buddy); + J_ASSERT(block < max); -+ if (!test_bit(block, buddy)) ++ if (!mb_test_bit(block, buddy)) + goto nofree; + + if (order == 0) { @@ -471,7 +502,7 @@ Index: linux-stage/fs/ext3/mballoc.c + break; + + next = (block + 1) * (1 << order); -+ if (!test_bit(next, e3b->bd_bitmap)) ++ if (!mb_test_bit(next, e3b->bd_bitmap)) + break; + + ord = mb_find_order_for_block(e3b, next); @@ -509,7 +540,7 @@ Index: linux-stage/fs/ext3/mballoc.c + mlen = 1 << ord; + buddy = mb_find_buddy(e3b, ord, &max); + J_ASSERT((start >> ord) < max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + start -= mlen; + len -= mlen; @@ -521,14 +552,14 @@ Index: linux-stage/fs/ext3/mballoc.c + /* we have to split large buddy */ + J_ASSERT(ord > 0); + buddy = mb_find_buddy(e3b, ord, &max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + + ord--; + cur = (start >> ord) & ~1U; + buddy = mb_find_buddy(e3b, ord, &max); -+ set_bit(cur, buddy); -+ set_bit(cur + 1, buddy); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); + e3b->bd_bd->bb_counters[ord]++; + e3b->bd_bd->bb_counters[ord]++; + } @@ -556,7 +587,7 @@ Index: linux-stage/fs/ext3/mballoc.c + mlen = 1 << ord; + buddy = mb_find_buddy(e3b, ord, &max); + J_ASSERT((start >> ord) < max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + start += mlen; + len -= mlen; @@ -567,14 +598,14 @@ Index: linux-stage/fs/ext3/mballoc.c + /* we have to split large buddy */ + J_ASSERT(ord > 0); + buddy = mb_find_buddy(e3b, ord, &max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + + ord--; + cur = (start >> ord) & ~1U; + buddy = mb_find_buddy(e3b, ord, &max); -+ set_bit(cur, buddy); -+ set_bit(cur + 1, buddy); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); + e3b->bd_bd->bb_counters[ord]++; + e3b->bd_bd->bb_counters[ord]++; + } @@ -615,7 +646,7 @@ Index: linux-stage/fs/ext3/mballoc.c + if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) { + /* someone asks for space at this specified block + * probably he wants to merge it into existing extent */ -+ if (test_bit(ac->ac_g_start, e3b->bd_bitmap)) { ++ if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) { + /* good. at least one block is free */ + max = mb_find_extent(e3b, 0, ac->ac_g_start, + ac->ac_g_len, &curex); @@ -720,7 +751,7 @@ Index: linux-stage/fs/ext3/mballoc.c + ext3_mballoc_warning++; + } + *len = 1; -+ err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp); ++ err = ext3_new_block_old(handle, inode, goal, errp); + return err; + } + @@ -857,7 +888,7 @@ Index: linux-stage/fs/ext3/mballoc.c + "block = %u", block); +#if 0 + for (i = 0; i < ac.ac_b_len; i++) -+ J_ASSERT(!test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); ++ J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); +#endif + mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len); + @@ -930,7 +961,7 @@ Index: linux-stage/fs/ext3/mballoc.c + + /* loop over the blocks, nad create buddies for free ones */ + for (i = 0; i < sb->s_blocksize * 8; i++) { -+ if (!test_bit(i, (void *) bh->b_data)) { ++ if (!mb_test_bit(i, (void *) bh->b_data)) { + mb_free_blocks(&e3b, i, 1); + count++; + } @@ -949,7 +980,7 @@ Index: linux-stage/fs/ext3/mballoc.c + +#define MB_CREDITS \ + (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \ -+ 2 * EXT3_QUOTA_INIT_BLOCKS) ++ + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS) + +int ext3_mb_init_backend(struct super_block *sb) +{ @@ -1375,12 +1406,12 @@ Index: linux-stage/fs/ext3/mballoc.c +} + +int ext3_new_block(handle_t *handle, struct inode *inode, -+ unsigned long goal, u32 *pc, u32 *pb, int *errp) ++ unsigned long goal, int *errp) +{ + int ret, len; + + if (!test_opt(inode->i_sb, MBALLOC)) { -+ ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp); ++ ret = ext3_new_block_old(handle, inode, goal, errp); + goto out; + } + len = 1; @@ -1400,10 +1431,10 @@ Index: linux-stage/fs/ext3/mballoc.c + return; +} + -Index: linux-stage/fs/ext3/super.c +Index: linux-2.6.5-sles9/fs/ext3/super.c =================================================================== ---- linux-stage.orig/fs/ext3/super.c 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/super.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:26:12.572228600 +0300 @@ -389,6 +389,7 @@ struct ext3_super_block *es = sbi->s_es; int i; @@ -1412,16 +1443,16 @@ Index: linux-stage/fs/ext3/super.c ext3_ext_release(sb); ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); -@@ -543,7 +544,7 @@ +@@ -542,7 +543,7 @@ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_err, Opt_extents, Opt_extdebug -+ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc ++ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc, }; static match_table_t tokens = { -@@ -588,6 +589,7 @@ +@@ -589,6 +590,7 @@ {Opt_iopen_nopriv, "iopen_nopriv"}, {Opt_extents, "extents"}, {Opt_extdebug, "extdebug"}, @@ -1429,7 +1460,7 @@ Index: linux-stage/fs/ext3/super.c {Opt_err, NULL} }; -@@ -803,6 +805,9 @@ +@@ -810,6 +812,9 @@ case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break; @@ -1439,7 +1470,7 @@ Index: linux-stage/fs/ext3/super.c default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1444,7 +1449,8 @@ +@@ -1463,7 +1468,8 @@ ext3_count_dirs(sb)); ext3_ext_init(sb); @@ -1449,10 +1480,10 @@ Index: linux-stage/fs/ext3/super.c return 0; failed_mount3: -Index: linux-stage/fs/ext3/Makefile +Index: linux-2.6.5-sles9/fs/ext3/Makefile =================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/Makefile 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300 @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o @@ -1462,10 +1493,10 @@ Index: linux-stage/fs/ext3/Makefile ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-stage/fs/ext3/balloc.c +Index: linux-2.6.5-sles9/fs/ext3/balloc.c =================================================================== ---- linux-stage.orig/fs/ext3/balloc.c 2004-10-13 17:06:52.000000000 -0400 -+++ linux-stage/fs/ext3/balloc.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-03 08:36:51.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300 @@ -78,7 +78,7 @@ * * Return buffer_head on success or NULL in case of failure. @@ -1475,32 +1506,28 @@ Index: linux-stage/fs/ext3/balloc.c read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc; -@@ -98,8 +98,8 @@ +@@ -274,7 +274,7 @@ } /* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks (handle_t *handle, struct inode * inode, -- unsigned long block, unsigned long count) -+void ext3_free_blocks_old (handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count) +-void ext3_free_blocks(handle_t *handle, struct inode *inode, ++void ext3_free_blocks_old(handle_t *handle, struct inode *inode, + unsigned long block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gd_bh; -@@ -528,8 +528,8 @@ +@@ -1142,7 +1142,7 @@ + * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ - int --ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, -- u32 *prealloc_count, u32 *prealloc_block, int *errp) -+ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal, -+ u32 *prealloc_count, u32 *prealloc_block, int *errp) +-int ext3_new_block(handle_t *handle, struct inode *inode, ++int ext3_new_block_old(handle_t *handle, struct inode *inode, + unsigned long goal, int *errp) { - struct buffer_head *bitmap_bh = NULL; /* bh */ - struct buffer_head *gdp_bh; /* bh2 */ -Index: linux-stage/fs/ext3/namei.c + struct buffer_head *bitmap_bh = NULL; +Index: linux-2.6.5-sles9/fs/ext3/namei.c =================================================================== ---- linux-stage.orig/fs/ext3/namei.c 2004-10-13 17:06:52.000000000 -0400 -+++ linux-stage/fs/ext3/namei.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300 ++++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:26:12.580227384 +0300 @@ -1640,7 +1640,7 @@ * If the create succeeds, we fill in the inode information * with d_instantiate(). @@ -1510,20 +1537,11 @@ Index: linux-stage/fs/ext3/namei.c struct nameidata *nd) { handle_t *handle; -Index: linux-stage/fs/ext3/inode.c +Index: linux-2.6.5-sles9/fs/ext3/inode.c =================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/inode.c 2004-10-13 17:06:53.000000000 -0400 -@@ -256,7 +256,7 @@ - ei->i_prealloc_count = 0; - ei->i_prealloc_block = 0; - /* Writer: end */ -- ext3_free_blocks (inode, block, total); -+ ext3_free_blocks (inode, block, total, 1); - } - #endif - } -@@ -635,7 +635,7 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:26:12.587226320 +0300 +@@ -572,7 +572,7 @@ ext3_journal_forget(handle, branch[i].bh); } for (i = 0; i < keys; i++) @@ -1532,7 +1550,7 @@ Index: linux-stage/fs/ext3/inode.c return err; } -@@ -736,7 +736,7 @@ +@@ -673,7 +673,7 @@ if (err == -EAGAIN) for (i = 0; i < num; i++) ext3_free_blocks(handle, inode, @@ -1541,7 +1559,7 @@ Index: linux-stage/fs/ext3/inode.c return err; } -@@ -1924,7 +1924,7 @@ +@@ -1829,7 +1829,7 @@ } } @@ -1550,7 +1568,7 @@ Index: linux-stage/fs/ext3/inode.c } /** -@@ -2095,7 +2095,7 @@ +@@ -2000,7 +2000,7 @@ ext3_journal_test_restart(handle, inode); } @@ -1559,10 +1577,10 @@ Index: linux-stage/fs/ext3/inode.c if (parent_bh) { /* -Index: linux-stage/fs/ext3/extents.c +Index: linux-2.6.5-sles9/fs/ext3/extents.c =================================================================== ---- linux-stage.orig/fs/ext3/extents.c 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/extents.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300 ++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:26:12.591225712 +0300 @@ -740,7 +740,7 @@ for (i = 0; i < depth; i++) { if (!ablocks[i]) @@ -1572,7 +1590,7 @@ Index: linux-stage/fs/ext3/extents.c } } kfree(ablocks); -@@ -1388,7 +1388,7 @@ +@@ -1391,7 +1391,7 @@ path->p_idx->ei_leaf); bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); @@ -1581,7 +1599,7 @@ Index: linux-stage/fs/ext3/extents.c return err; } -@@ -1876,10 +1876,12 @@ +@@ -1879,10 +1879,12 @@ int needed = ext3_remove_blocks_credits(tree, ex, from, to); handle_t *handle = ext3_journal_start(tree->inode, needed); struct buffer_head *bh; @@ -1595,7 +1613,7 @@ Index: linux-stage/fs/ext3/extents.c if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { /* tail removal */ unsigned long num, start; -@@ -1891,7 +1893,7 @@ +@@ -1894,7 +1896,7 @@ bh = sb_find_get_block(tree->inode->i_sb, start + i); ext3_forget(handle, 0, tree->inode, bh, start + i); } @@ -1604,10 +1622,10 @@ Index: linux-stage/fs/ext3/extents.c } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", from, to, ex->ee_block, ex->ee_len); -Index: linux-stage/fs/ext3/xattr.c +Index: linux-2.6.5-sles9/fs/ext3/xattr.c =================================================================== ---- linux-stage.orig/fs/ext3/xattr.c 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/xattr.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/xattr.c 2004-11-09 02:22:55.777146000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/xattr.c 2004-11-09 02:26:12.593225408 +0300 @@ -1366,7 +1366,7 @@ new_bh = sb_getblk(sb, block); if (!new_bh) { @@ -1635,10 +1653,10 @@ Index: linux-stage/fs/ext3/xattr.c get_bh(bh); ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); } else { -Index: linux-stage/include/linux/ext3_fs.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/include/linux/ext3_fs.h 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:26:12.596224952 +0300 @@ -57,6 +57,8 @@ #define ext3_debug(f, a...) do {} while (0) #endif @@ -1648,7 +1666,7 @@ Index: linux-stage/include/linux/ext3_fs.h /* * Special inodes numbers */ -@@ -336,6 +338,7 @@ +@@ -339,6 +341,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ @@ -1656,26 +1674,27 @@ Index: linux-stage/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt -@@ -696,7 +699,7 @@ - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, - __u32 *, __u32 *, int *); +@@ -698,7 +701,7 @@ + extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); + extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, - unsigned long); + unsigned long, int); extern unsigned long ext3_count_free_blocks (struct super_block *); extern void ext3_check_blocks_bitmap (struct super_block *); extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, -Index: linux-stage/include/linux/ext3_fs_sb.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs_sb.h 2004-10-13 17:06:52.000000000 -0400 -+++ linux-stage/include/linux/ext3_fs_sb.h 2004-10-13 17:06:53.000000000 -0400 -@@ -23,9 +23,29 @@ +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300 +@@ -23,10 +23,30 @@ #define EXT_INCLUDE #include #include +#include #endif #endif + #include +#define EXT3_BB_MAX_BLOCKS 30 +struct ext3_free_metadata { @@ -1699,7 +1718,7 @@ Index: linux-stage/include/linux/ext3_fs_sb.h /* * third extended-fs super-block data in memory */ -@@ -72,6 +92,17 @@ +@@ -78,6 +98,17 @@ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ #endif @@ -1717,31 +1736,3 @@ Index: linux-stage/include/linux/ext3_fs_sb.h }; #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-stage/include/linux/ext3_jbd.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_jbd.h 2004-10-13 17:06:52.000000000 -0400 -+++ linux-stage/include/linux/ext3_jbd.h 2004-10-13 19:12:30.000000000 -0400 -@@ -72,6 +72,23 @@ - - #define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 - -+#ifdef CONFIG_QUOTA -+/* Maximal numbers of writes for quota operation (insert/delete/update) -+ * (over all formats) - info block, 4 pointer blocks, data block */ -+#define DQUOT_MAX_WRITES 6 -+ -+/* Amount of blocks needed for quota update - we know that the structure was -+ * allocated so we need to update only inode+data */ -+#define EXT3_QUOTA_TRANS_BLOCKS 2 -+/* Amount of blocks needed for quota insert/delete - we do some block writes -+ * but inode, sb and group updates are done only once */ -+#define EXT3_QUOTA_INIT_BLOCKS (DQUOT_MAX_WRITES*\ -+ (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3) -+#else -+#define EXT3_QUOTA_TRANS_BLOCKS 0 -+#define EXT3_QUOTA_INIT_BLOCKS 0 -+#endif -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, diff --git a/ldiskfs/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch b/ldiskfs/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch new file mode 100644 index 0000000..f323584 --- /dev/null +++ b/ldiskfs/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch @@ -0,0 +1,263 @@ +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:29:14.878513832 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:32:14.151260232 +0300 +@@ -709,7 +709,7 @@ + unsigned int block_group, + struct buffer_head ** bh); + extern int ext3_should_retry_alloc(struct super_block *sb, int *retries); +-extern void rsv_window_add(struct super_block *sb, struct reserve_window_node *rsv); ++extern void rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv); + + /* dir.c */ + extern int ext3_check_dir_entry(const char *, struct inode *, +Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:32:27.996155488 +0300 +@@ -86,7 +86,7 @@ + /* root of the per fs reservation window tree */ + spinlock_t s_rsv_window_lock; + struct rb_root s_rsv_window_root; +- struct reserve_window_node s_rsv_window_head; ++ struct ext3_reserve_window_node s_rsv_window_head; + + /* Journaling */ + struct inode * s_journal_inode; +Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2004-11-09 02:23:21.606219384 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2004-11-09 02:32:08.752081032 +0300 +@@ -20,17 +20,17 @@ + #include + #include + +-struct reserve_window { ++struct ext3_reserve_window { + __u32 _rsv_start; /* First byte reserved */ + __u32 _rsv_end; /* Last byte reserved or 0 */ + }; + +-struct reserve_window_node { ++struct ext3_reserve_window_node { + struct rb_node rsv_node; + atomic_t rsv_goal_size; + atomic_t rsv_alloc_hit; + seqlock_t rsv_seqlock; +- struct reserve_window rsv_window; ++ struct ext3_reserve_window rsv_window; + }; + + #define rsv_start rsv_window._rsv_start +@@ -76,7 +76,7 @@ + */ + __u32 i_next_alloc_goal; + /* block reservation window */ +- struct reserve_window_node i_rsv_window; ++ struct ext3_reserve_window_node i_rsv_window; + + __u32 i_dir_start_lookup; + #ifdef CONFIG_EXT3_FS_XATTR +Index: linux-2.6.5-sles9/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300 ++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:32:43.108858008 +0300 +@@ -115,7 +115,7 @@ + const char *fn) + { + struct rb_node *n; +- struct reserve_window_node *rsv, *prev; ++ struct ext3_reserve_window_node *rsv, *prev; + int bad; + + restart: +@@ -125,7 +125,7 @@ + + printk("Block Allocation Reservation Windows Map (%s):\n", fn); + while (n) { +- rsv = list_entry(n, struct reserve_window_node, rsv_node); ++ rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); + if (verbose) + printk("reservation window 0x%p " + "start: %d, end: %d\n", +@@ -161,7 +161,7 @@ + #endif + + static int +-goal_in_my_reservation(struct reserve_window *rsv, int goal, ++goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal, + unsigned int group, struct super_block * sb) + { + unsigned long group_first_block, group_last_block; +@@ -184,18 +184,18 @@ + * if the goal is not in any window. + * Returns NULL if there are no windows or if all windows start after the goal. + */ +-static struct reserve_window_node *search_reserve_window(struct rb_root *root, ++static struct ext3_reserve_window_node *search_ext3_reserve_window(struct rb_root *root, + unsigned long goal) + { + struct rb_node *n = root->rb_node; +- struct reserve_window_node *rsv; ++ struct ext3_reserve_window_node *rsv; + + if (!n) + return NULL; + + while (n) + { +- rsv = rb_entry(n, struct reserve_window_node, rsv_node); ++ rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); + + if (goal < rsv->rsv_start) + n = n->rb_left; +@@ -212,13 +212,13 @@ + */ + if (rsv->rsv_start > goal) { + n = rb_prev(&rsv->rsv_node); +- rsv = rb_entry(n, struct reserve_window_node, rsv_node); ++ rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); + } + return rsv; + } + + void rsv_window_add(struct super_block *sb, +- struct reserve_window_node *rsv) ++ struct ext3_reserve_window_node *rsv) + { + struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; + struct rb_node *node = &rsv->rsv_node; +@@ -226,12 +226,12 @@ + + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; +- struct reserve_window_node *this; ++ struct ext3_reserve_window_node *this; + + while (*p) + { + parent = *p; +- this = rb_entry(parent, struct reserve_window_node, rsv_node); ++ this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node); + + if (start < this->rsv_start) + p = &(*p)->rb_left; +@@ -246,7 +246,7 @@ + } + + static void rsv_window_remove(struct super_block *sb, +- struct reserve_window_node *rsv) ++ struct ext3_reserve_window_node *rsv) + { + rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; +@@ -254,7 +254,7 @@ + rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); + } + +-static inline int rsv_is_empty(struct reserve_window *rsv) ++static inline int rsv_is_empty(struct ext3_reserve_window *rsv) + { + /* a valid reservation end block could not be 0 */ + return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED); +@@ -263,7 +263,7 @@ + void ext3_discard_reservation(struct inode *inode) + { + struct ext3_inode_info *ei = EXT3_I(inode); +- struct reserve_window_node *rsv = &ei->i_rsv_window; ++ struct ext3_reserve_window_node *rsv = &ei->i_rsv_window; + spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + + if (!rsv_is_empty(&rsv->rsv_window)) { +@@ -600,7 +600,7 @@ + */ + static int + ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, +- struct buffer_head *bitmap_bh, int goal, struct reserve_window *my_rsv) ++ struct buffer_head *bitmap_bh, int goal, struct ext3_reserve_window *my_rsv) + { + int group_first_block, start, end; + +@@ -700,13 +700,13 @@ + * on succeed, it returns the reservation window to be appended to. + * failed, return NULL. + */ +-static struct reserve_window_node *find_next_reservable_window( +- struct reserve_window_node *search_head, ++static struct ext3_reserve_window_node *find_next_reservable_window( ++ struct ext3_reserve_window_node *search_head, + unsigned long size, int *start_block, + int last_block) + { + struct rb_node *next; +- struct reserve_window_node *rsv, *prev; ++ struct ext3_reserve_window_node *rsv, *prev; + int cur; + + /* TODO: make the start of the reservation window byte-aligned */ +@@ -734,7 +734,7 @@ + + prev = rsv; + next = rb_next(&rsv->rsv_node); +- rsv = list_entry(next, struct reserve_window_node, rsv_node); ++ rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node); + + /* + * Reached the last reservation, we can just append to the +@@ -801,15 +801,15 @@ + * @group: the group we are trying to allocate in + * @bitmap_bh: the block group block bitmap + */ +-static int alloc_new_reservation(struct reserve_window_node *my_rsv, ++static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, + int goal, struct super_block *sb, + unsigned int group, struct buffer_head *bitmap_bh) + { +- struct reserve_window_node *search_head; ++ struct ext3_reserve_window_node *search_head; + int group_first_block, group_end_block, start_block; + int first_free_block; + int reservable_space_start; +- struct reserve_window_node *prev_rsv; ++ struct ext3_reserve_window_node *prev_rsv; + struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; + unsigned long size; + +@@ -859,7 +859,7 @@ + /* + * shift the search start to the window near the goal block + */ +- search_head = search_reserve_window(fs_rsv_root, start_block); ++ search_head = search_ext3_reserve_window(fs_rsv_root, start_block); + + /* + * find_next_reservable_window() simply finds a reservable window +@@ -968,7 +968,7 @@ + static int + ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, + unsigned int group, struct buffer_head *bitmap_bh, +- int goal, struct reserve_window_node * my_rsv, ++ int goal, struct ext3_reserve_window_node * my_rsv, + int *errp) + { + spinlock_t *rsv_lock; +@@ -1027,7 +1027,7 @@ + * then we could go to allocate from the reservation window directly. + */ + while (1) { +- struct reserve_window rsv_copy; ++ struct ext3_reserve_window rsv_copy; + unsigned int seq; + + do { +@@ -1159,8 +1159,8 @@ + struct ext3_group_desc *gdp; + struct ext3_super_block *es; + struct ext3_sb_info *sbi; +- struct reserve_window_node *my_rsv = NULL; +- struct reserve_window_node *rsv = &EXT3_I(inode)->i_rsv_window; ++ struct ext3_reserve_window_node *my_rsv = NULL; ++ struct ext3_reserve_window_node *rsv = &EXT3_I(inode)->i_rsv_window; + unsigned short windowsz = 0; + #ifdef EXT3FS_DEBUG + static int goal_hits, goal_attempts; diff --git a/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch b/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch index 0b58346..4e4bbaa 100644 --- a/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch +++ b/ldiskfs/kernel_patches/patches/iopen-2.6-suse.patch @@ -6,10 +6,10 @@ include/linux/ext3_fs.h | 2 7 files changed, 304 insertions(+), 1 deletion(-) -Index: linux-stage/fs/ext3/Makefile +Index: linux-2.6.5-sles9/fs/ext3/Makefile =================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2004-11-03 14:41:24.747805262 -0500 -+++ linux-stage/fs/ext3/Makefile 2004-11-03 14:41:25.123696274 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-04-04 07:36:18.000000000 +0400 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300 @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o @@ -19,10 +19,10 @@ Index: linux-stage/fs/ext3/Makefile ioctl.o namei.o super.o symlink.o hash.o ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -Index: linux-stage/fs/ext3/inode.c +Index: linux-2.6.5-sles9/fs/ext3/inode.c =================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2004-11-03 14:41:25.040720333 -0500 -+++ linux-stage/fs/ext3/inode.c 2004-11-03 14:46:08.458515670 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:15:44.739673656 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:18:27.608913768 +0300 @@ -37,6 +37,7 @@ #include #include @@ -31,20 +31,20 @@ Index: linux-stage/fs/ext3/inode.c #include "acl.h" /* -@@ -2401,6 +2402,9 @@ - ei->i_default_acl = EXT3_ACL_NOT_CACHED; +@@ -2402,6 +2403,9 @@ #endif ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; -+ -+ if (ext3_iopen_get_inode(inode)) -+ return; ++ if (ext3_iopen_get_inode(inode)) ++ return; ++ if (ext3_get_inode_loc(inode, &iloc, 0)) goto bad_inode; -Index: linux-stage/fs/ext3/iopen.c + bh = iloc.bh; +Index: linux-2.6.5-sles9/fs/ext3/iopen.c =================================================================== ---- linux-stage.orig/fs/ext3/iopen.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.c 2004-11-03 14:41:25.125695694 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/iopen.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/iopen.c 2004-11-09 02:18:27.611913312 +0300 @@ -0,0 +1,272 @@ +/* + * linux/fs/ext3/iopen.c @@ -318,10 +318,10 @@ Index: linux-stage/fs/ext3/iopen.c + + return 1; +} -Index: linux-stage/fs/ext3/iopen.h +Index: linux-2.6.5-sles9/fs/ext3/iopen.h =================================================================== ---- linux-stage.orig/fs/ext3/iopen.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.h 2004-11-03 14:41:25.126695404 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/iopen.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/iopen.h 2004-11-09 02:18:27.613913008 +0300 @@ -0,0 +1,15 @@ +/* + * iopen.h @@ -338,10 +338,10 @@ Index: linux-stage/fs/ext3/iopen.h +extern int ext3_iopen_get_inode(struct inode *inode); +extern struct dentry *iopen_connect_dentry(struct dentry *dentry, + struct inode *inode, int rehash); -Index: linux-stage/fs/ext3/namei.c +Index: linux-2.6.5-sles9/fs/ext3/namei.c =================================================================== ---- linux-stage.orig/fs/ext3/namei.c 2004-11-03 14:41:24.957744391 -0500 -+++ linux-stage/fs/ext3/namei.c 2004-11-03 14:41:25.127695114 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:15:44.614692656 +0300 ++++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300 @@ -37,6 +37,7 @@ #include #include @@ -418,10 +418,10 @@ Index: linux-stage/fs/ext3/namei.c ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; -Index: linux-stage/fs/ext3/super.c +Index: linux-2.6.5-sles9/fs/ext3/super.c =================================================================== ---- linux-stage.orig/fs/ext3/super.c 2004-11-03 14:41:25.043719463 -0500 -+++ linux-stage/fs/ext3/super.c 2004-11-03 14:41:25.129694535 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:15:44.743673048 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:18:27.620911944 +0300 @@ -534,7 +534,7 @@ Opt_reservation, Opt_noreservation, Opt_noload, Opt_commit, Opt_journal_update, Opt_journal_inum, @@ -460,10 +460,10 @@ Index: linux-stage/fs/ext3/super.c case Opt_ignore: break; default: -Index: linux-stage/include/linux/ext3_fs.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2004-11-03 14:41:24.958744101 -0500 -+++ linux-stage/include/linux/ext3_fs.h 2004-11-03 14:41:25.129694535 -0500 +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:15:44.616692352 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:18:27.622911640 +0300 @@ -329,6 +329,8 @@ #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ #define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */ diff --git a/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series b/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series index 02155b7..fd05c25 100644 --- a/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series +++ b/ldiskfs/kernel_patches/series/ldiskfs-2.6-suse.series @@ -1,12 +1,12 @@ ext3-wantedi-2.6-suse.patch ext3-san-jdike-2.6-suse.patch -iopen-2.6-suse.patch +iopen-2.6-suse.patch export_symbols-ext3-2.6-suse.patch ext3-map_inode_page-2.6-suse.patch -ext3-init-generation-2.6-suse.patch ext3-ea-in-inode-2.6-suse.patch export-ext3-2.6-suse.patch ext3-include-fixes-2.6-suse.patch -ext3-extents-2.6.5.patch -ext3-mballoc2-2.6-suse.patch +ext3-extents-2.6.5.patch +ext3-mballoc2-2.6-suse.patch ext3-nlinks-2.6.7.patch +ext3-rename-reserve-2.6-suse.patch diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 027fab1..ba6959e 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -1,14 +1,18 @@ tbd Cluster File Systems, Inc. * version 1.3.4 * bug fixes - - flock/lockf fixes + - flock/lockf fixes (bug it's still disabled, pending - don't use EXT3 constants in llite code (5094) - return async write errors to application if possible (2248) + - return last_committed value from OST to avoid OOM (4966) * miscellania - reorganization of lov code - single portals codebase - Infiniband NAL - add pid to ldlm debugging output (4922) + - return last_committed value from OST to avoid OOM (4966) + - add extents/mballoc support (5025) + - direct I/O reads in the obdfilter (4048) tbd Cluster File Systems, Inc. * version 1.2.8 diff --git a/lustre/configure.in b/lustre/configure.in index ffe07b0..479bf65 100644 --- a/lustre/configure.in +++ b/lustre/configure.in @@ -5,7 +5,7 @@ AC_INIT AC_CANONICAL_SYSTEM -AM_INIT_AUTOMAKE(lustre, 1.3.4) +AM_INIT_AUTOMAKE(lustre, 1.4.0rc1) # AM_MAINTAINER_MODE # Four main targets: lustre kernel modules, utilities, tests, and liblustre diff --git a/lustre/include/linux/lustre_fsfilt.h b/lustre/include/linux/lustre_fsfilt.h index e39729e..1557133 100644 --- a/lustre/include/linux/lustre_fsfilt.h +++ b/lustre/include/linux/lustre_fsfilt.h @@ -58,6 +58,12 @@ struct fsfilt_operations { int (* fs_set_md)(struct inode *inode, void *handle, void *md, int size); int (* fs_get_md)(struct inode *inode, void *md, int size); + /* this method is needed to make IO operation fsfilt nature depend. */ +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) + int (* fs_send_bio)(int rw, struct inode *inode, struct bio *bio); +#else + int (* fs_send_bio)(int rw, struct inode *inode,struct kiobuf *bio); +#endif ssize_t (* fs_readpage)(struct file *file, char *buf, size_t count, loff_t *offset); int (* fs_add_journal_cb)(struct obd_device *obd, __u64 last_rcvd, @@ -65,9 +71,10 @@ struct fsfilt_operations { void *cb_data); int (* fs_statfs)(struct super_block *sb, struct obd_statfs *osfs); int (* fs_sync)(struct super_block *sb); - int (* fs_map_inode_page)(struct inode *inode, struct page *page, - unsigned long *blocks, int *created, - int create); + int (* fs_map_inode_pages)(struct inode *inode, struct page **page, + int pages, unsigned long *blocks, + int *created, int create, + struct semaphore *sem); int (* fs_prep_san_write)(struct inode *inode, long *blocks, int nblocks, loff_t newsize); int (* fs_write_record)(struct file *, void *, int size, loff_t *, @@ -229,6 +236,17 @@ static inline int fsfilt_get_md(struct obd_device *obd, struct inode *inode, return obd->obd_fsops->fs_get_md(inode, md, size); } +static inline int fsfilt_send_bio(int rw, struct obd_device *obd, + struct inode *inode, void *bio) +{ + LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw); + + if (rw == OBD_BRW_READ) + return obd->obd_fsops->fs_send_bio(READ, inode, bio); + else + return obd->obd_fsops->fs_send_bio(WRITE, inode, bio); +} + static inline ssize_t fsfilt_readpage(struct obd_device *obd, struct file *file, char *buf, size_t count, loff_t *offset) @@ -267,13 +285,14 @@ static inline int fsfilt_sync(struct obd_device *obd, struct super_block *sb) return obd->obd_fsops->fs_sync(sb); } -static inline int fsfilt_map_inode_page(struct obd_device *obd, - struct inode *inode, struct page *page, - unsigned long *blocks, int *created, - int create) +static inline int fsfilt_map_inode_pages(struct obd_device *obd, + struct inode *inode, + struct page **page, int pages, + unsigned long *blocks, int *created, + int create, struct semaphore *sem) { - return obd->obd_fsops->fs_map_inode_page(inode, page, blocks, created, - create); + return obd->obd_fsops->fs_map_inode_pages(inode, page, pages, blocks, + created, create, sem); } static inline int fs_prep_san_write(struct obd_device *obd, diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config index a8afabf..894ec7f 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64-smp.config @@ -716,6 +716,17 @@ CONFIG_YELLOWFIN=m CONFIG_R8169=m CONFIG_SK98LIN=m CONFIG_TIGON3=m + +# +# Quadrics QsNet device support +# +CONFIG_QSNET=m +CONFIG_ELAN3=m +CONFIG_ELAN4=m +CONFIG_EP=m +CONFIG_EIP=m +CONFIG_RMS=m +CONFIG_JTAG=m # CONFIG_FDDI is not set # CONFIG_NETCONSOLE is not set # CONFIG_HIPPI is not set @@ -1110,12 +1121,11 @@ CONFIG_FB_VOODOO1=m # CONFIG_FB_TRIDENT is not set # CONFIG_FB_VIRTUAL is not set # CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y +CONFIG_FBCON_CFB8=m +CONFIG_FBCON_CFB16=m +CONFIG_FBCON_CFB24=m +CONFIG_FBCON_CFB32=m CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m # CONFIG_FBCON_FONTWIDTH8_ONLY is not set # CONFIG_FBCON_FONTS is not set CONFIG_FONT_8x8=y diff --git a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config index a8afabf..894ec7f 100644 --- a/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config +++ b/lustre/kernel_patches/kernel_configs/kernel-2.4.21-rhel-2.4-ia64.config @@ -716,6 +716,17 @@ CONFIG_YELLOWFIN=m CONFIG_R8169=m CONFIG_SK98LIN=m CONFIG_TIGON3=m + +# +# Quadrics QsNet device support +# +CONFIG_QSNET=m +CONFIG_ELAN3=m +CONFIG_ELAN4=m +CONFIG_EP=m +CONFIG_EIP=m +CONFIG_RMS=m +CONFIG_JTAG=m # CONFIG_FDDI is not set # CONFIG_NETCONSOLE is not set # CONFIG_HIPPI is not set @@ -1110,12 +1121,11 @@ CONFIG_FB_VOODOO1=m # CONFIG_FB_TRIDENT is not set # CONFIG_FB_VIRTUAL is not set # CONFIG_FBCON_ADVANCED is not set -CONFIG_FBCON_CFB8=y -CONFIG_FBCON_CFB16=y -CONFIG_FBCON_CFB24=y -CONFIG_FBCON_CFB32=y +CONFIG_FBCON_CFB8=m +CONFIG_FBCON_CFB16=m +CONFIG_FBCON_CFB24=m +CONFIG_FBCON_CFB32=m CONFIG_FBCON_VGA_PLANES=m -CONFIG_FBCON_HGA=m # CONFIG_FBCON_FONTWIDTH8_ONLY is not set # CONFIG_FBCON_FONTS is not set CONFIG_FONT_8x8=y diff --git a/lustre/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch b/lustre/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch index 74962dd..294a9cd 100644 --- a/lustre/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch +++ b/lustre/kernel_patches/patches/export_symbols-ext3-2.6-suse.patch @@ -1,7 +1,7 @@ -Index: linux-stage/include/linux/ext3_fs_sb.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs_sb.h 2004-11-03 14:16:26.059485670 -0500 -+++ linux-stage/include/linux/ext3_fs_sb.h 2004-11-03 14:20:30.761493072 -0500 +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-03 08:36:51.000000000 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300 @@ -19,9 +19,12 @@ #ifdef __KERNEL__ #include diff --git a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20-hp.patch b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20-hp.patch index d722b68..619b845 100644 --- a/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20-hp.patch +++ b/lustre/kernel_patches/patches/ext3-delete_thread-2.4.20-hp.patch @@ -5,10 +5,10 @@ include/linux/ext3_fs_sb.h | 10 + 5 files changed, 365 insertions(+) -Index: linux/fs/ext3/super.c +Index: linux-2.4.20-hp_pnnl39/fs/ext3/super.c =================================================================== ---- linux.orig/fs/ext3/super.c Mon Feb 2 20:57:35 2004 -+++ linux/fs/ext3/super.c Mon Feb 2 20:58:05 2004 +--- linux-2.4.20-hp_pnnl39.orig/fs/ext3/super.c 2004-11-02 22:16:42.000000000 +0300 ++++ linux-2.4.20-hp_pnnl39/fs/ext3/super.c 2004-11-02 23:47:18.513035128 +0300 @@ -400,6 +400,221 @@ } } @@ -231,15 +231,17 @@ Index: linux/fs/ext3/super.c void ext3_put_super (struct super_block * sb) { struct ext3_sb_info *sbi = EXT3_SB(sb); -@@ -407,6 +622,7 @@ +@@ -407,6 +622,9 @@ kdev_t j_dev = sbi->s_journal->j_dev; int i; ++#ifdef EXT3_DELETE_THREAD + J_ASSERT(sbi->s_delete_inodes == 0); ++#endif ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -455,7 +671,11 @@ +@@ -455,7 +673,11 @@ write_inode: ext3_write_inode, /* BKL not held. Don't need */ dirty_inode: ext3_dirty_inode, /* BKL not held. We take it */ put_inode: ext3_put_inode, /* BKL not held. Don't need */ @@ -252,7 +254,7 @@ Index: linux/fs/ext3/super.c put_super: ext3_put_super, /* BKL held */ write_super: ext3_write_super, /* BKL held */ sync_fs: ext3_sync_fs, -@@ -524,6 +744,13 @@ +@@ -524,6 +746,13 @@ clear_opt (*mount_options, XATTR_USER); else #endif @@ -266,7 +268,7 @@ Index: linux/fs/ext3/super.c if (!strcmp (this_char, "bsddf")) clear_opt (*mount_options, MINIX_DF); else if (!strcmp (this_char, "nouid32")) { -@@ -1223,6 +1450,7 @@ +@@ -1223,6 +1452,7 @@ } ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY); @@ -274,7 +276,7 @@ Index: linux/fs/ext3/super.c /* * akpm: core read_super() calls in here with the superblock locked. * That deadlocks, because orphan cleanup needs to lock the superblock -@@ -1614,7 +1842,12 @@ +@@ -1614,7 +1844,12 @@ static int ext3_sync_fs(struct super_block *sb) { tid_t target; @@ -288,7 +290,7 @@ Index: linux/fs/ext3/super.c sb->s_dirt = 0; target = log_start_commit(EXT3_SB(sb)->s_journal, NULL); log_wait_commit(EXT3_SB(sb)->s_journal, target); -@@ -1678,6 +1911,9 @@ +@@ -1678,6 +1913,9 @@ if (!parse_options(data, &tmp, sbi, &tmp, 1)) return -EINVAL; @@ -298,10 +300,10 @@ Index: linux/fs/ext3/super.c if (sbi->s_mount_opt & EXT3_MOUNT_ABORT) ext3_abort(sb, __FUNCTION__, "Abort forced by user"); -Index: linux/fs/ext3/inode.c +Index: linux-2.4.20-hp_pnnl39/fs/ext3/inode.c =================================================================== ---- linux.orig/fs/ext3/inode.c Mon Feb 2 20:57:35 2004 -+++ linux/fs/ext3/inode.c Mon Feb 2 20:58:05 2004 +--- linux-2.4.20-hp_pnnl39.orig/fs/ext3/inode.c 2004-11-02 22:16:41.000000000 +0300 ++++ linux-2.4.20-hp_pnnl39/fs/ext3/inode.c 2004-11-02 22:16:42.000000000 +0300 @@ -2500,6 +2500,118 @@ return err; } @@ -421,10 +423,10 @@ Index: linux/fs/ext3/inode.c /* * On success, We end up with an outstanding reference count against * iloc->bh. This _must_ be cleaned up later. -Index: linux/fs/ext3/file.c +Index: linux-2.4.20-hp_pnnl39/fs/ext3/file.c =================================================================== ---- linux.orig/fs/ext3/file.c Mon Feb 2 20:57:34 2004 -+++ linux/fs/ext3/file.c Mon Feb 2 20:58:05 2004 +--- linux-2.4.20-hp_pnnl39.orig/fs/ext3/file.c 2004-11-02 22:16:41.000000000 +0300 ++++ linux-2.4.20-hp_pnnl39/fs/ext3/file.c 2004-11-02 22:16:42.132490592 +0300 @@ -125,7 +125,11 @@ }; @@ -437,10 +439,10 @@ Index: linux/fs/ext3/file.c setattr: ext3_setattr, /* BKL held */ setxattr: ext3_setxattr, /* BKL held */ getxattr: ext3_getxattr, /* BKL held */ -Index: linux/include/linux/ext3_fs.h +Index: linux-2.4.20-hp_pnnl39/include/linux/ext3_fs.h =================================================================== ---- linux.orig/include/linux/ext3_fs.h Mon Feb 2 20:57:35 2004 -+++ linux/include/linux/ext3_fs.h Mon Feb 2 20:58:05 2004 +--- linux-2.4.20-hp_pnnl39.orig/include/linux/ext3_fs.h 2004-11-02 22:16:41.000000000 +0300 ++++ linux-2.4.20-hp_pnnl39/include/linux/ext3_fs.h 2004-11-02 22:16:42.000000000 +0300 @@ -193,6 +193,7 @@ */ #define EXT3_STATE_JDATA 0x00000001 /* journaled data exists */ @@ -467,10 +469,10 @@ Index: linux/include/linux/ext3_fs.h /* ioctl.c */ extern int ext3_ioctl (struct inode *, struct file *, unsigned int, -Index: linux/include/linux/ext3_fs_sb.h +Index: linux-2.4.20-hp_pnnl39/include/linux/ext3_fs_sb.h =================================================================== ---- linux.orig/include/linux/ext3_fs_sb.h Mon Feb 2 20:57:35 2004 -+++ linux/include/linux/ext3_fs_sb.h Mon Feb 2 20:58:05 2004 +--- linux-2.4.20-hp_pnnl39.orig/include/linux/ext3_fs_sb.h 2004-11-02 22:16:42.000000000 +0300 ++++ linux-2.4.20-hp_pnnl39/include/linux/ext3_fs_sb.h 2004-11-02 23:43:11.521583536 +0300 @@ -29,6 +29,8 @@ #define EXT3_MAX_GROUP_LOADED 8 diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.20-rh.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.20-rh.patch index 5a05370..562d38a 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.20-rh.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.20-rh.patch @@ -1,8 +1,8 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c =================================================================== --- linux-2.4.20-rh-20.9.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.20-rh-20.9/fs/ext3/extents.c 2004-09-12 20:01:45.000000000 +0400 -@@ -0,0 +1,2262 @@ ++++ linux-2.4.20-rh-20.9/fs/ext3/extents.c 2004-11-03 00:31:41.927134640 +0300 +@@ -0,0 +1,2269 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -1283,14 +1283,15 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c +} + +static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, struct ext3_extent *ex) ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, ++ __u32 len, __u32 start, int type) +{ ++ EXT_ASSERT(len > 0); + if (tree->cex) { -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_len); -+ tree->cex->ee_block = ex->ee_block; -+ tree->cex->ee_start = ex->ee_start; -+ tree->cex->ee_len = ex->ee_len; ++ tree->cex->ec_type = type; ++ tree->cex->ec_block = block; ++ tree->cex->ec_len = len; ++ tree->cex->ec_start = start; + } +} + @@ -1304,7 +1305,8 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c + unsigned long block) +{ + int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex, gex; ++ unsigned long lblock, len; ++ struct ext3_extent *ex; + + if (!tree->cex) + return; @@ -1312,63 +1314,64 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ -+ gex.ee_block = 0; -+ gex.ee_len = EXT_CACHE_MARK; ++ lblock = 0; ++ len = EXT_MAX_BLOCK; + ext_debug(tree, "cache gap(whole file):"); + } else if (block < ex->ee_block) { -+ gex.ee_block = block; -+ gex.ee_len = ex->ee_block - block; ++ lblock = block; ++ len = ex->ee_block - block; + ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len); + } else if (block >= ex->ee_block + ex->ee_len) { -+ gex.ee_block = ex->ee_block + ex->ee_len; -+ gex.ee_len = ext3_ext_next_allocated_block(path); ++ lblock = ex->ee_block + ex->ee_len; ++ len = ext3_ext_next_allocated_block(path); + ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) block); -+ EXT_ASSERT(gex.ee_len > gex.ee_block); -+ gex.ee_len = gex.ee_len - gex.ee_block; ++ EXT_ASSERT(len > lblock); ++ len = len - lblock; + } else { ++ lblock = len = 0; + BUG(); + } + -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) gex.ee_block, -+ (unsigned long) gex.ee_len); -+ gex.ee_start = EXT_CACHE_MARK; -+ ext3_ext_put_in_cache(tree, &gex); ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); ++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); +} + +static inline int +ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, + struct ext3_extent *ex) +{ -+ struct ext3_extent *cex = tree->cex; ++ struct ext3_ext_cache *cex = tree->cex; + + /* is there cache storage at all? */ + if (!cex) -+ return 0; ++ return EXT3_EXT_CACHE_NO; + + /* has cache valid data? */ -+ if (cex->ee_len == 0) -+ return 0; -+ -+ if (block >= cex->ee_block && block < cex->ee_block + cex->ee_len) { -+ ex->ee_block = cex->ee_block; -+ ex->ee_start = cex->ee_start; -+ ex->ee_len = cex->ee_len; ++ if (cex->ec_type == EXT3_EXT_CACHE_NO) ++ return EXT3_EXT_CACHE_NO; ++ ++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || ++ cex->ec_type == EXT3_EXT_CACHE_EXTENT); ++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ ex->ee_block = cex->ec_block; ++ ex->ee_start = cex->ec_start; ++ ex->ee_len = cex->ec_len; + ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) ex->ee_start); -+ return 1; ++ return cex->ec_type; + } + + /* not in cache */ -+ return 0; ++ return EXT3_EXT_CACHE_NO; +} + +/* @@ -1954,7 +1957,7 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c + tree->root = (void *) EXT3_I(inode)->i_data; + tree->buffer = (void *) inode; + tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; ++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; + tree->ops = &ext3_blockmap_helpers; +} + @@ -1974,19 +1977,20 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c + down_write(&EXT3_I(inode)->truncate_sem); + + /* check in cache */ -+ if (ext3_ext_in_cache(&tree, iblock, &newex)) { -+ if (newex.ee_start == EXT_CACHE_MARK) { -+ /* this is cached gap */ ++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { ++ if (goal == EXT3_EXT_CACHE_GAP) { + if (!create) { + /* block isn't allocated yet and + * user don't want to allocate it */ + goto out2; + } + /* we should allocate requested block */ -+ } else if (newex.ee_start) { ++ } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock - newex.ee_block + newex.ee_start; + goto out; ++ } else { ++ EXT_ASSERT(0); + } + } + @@ -2014,7 +2018,9 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c + ext_debug(&tree, "%d fit into %d:%d -> %d\n", + (int) iblock, ex->ee_block, ex->ee_len, + newblock); -+ ext3_ext_put_in_cache(&tree, ex); ++ ext3_ext_put_in_cache(&tree, ex->ee_block, ++ ex->ee_len, ex->ee_start, ++ EXT3_EXT_CACHE_EXTENT); + goto out; + } + } @@ -2052,7 +2058,8 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c + newblock = newex.ee_start; + set_bit(BH_New, &bh_result->b_state); + -+ ext3_ext_put_in_cache(&tree, &newex); ++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, ++ newex.ee_start, EXT3_EXT_CACHE_EXTENT); +out: + ext3_ext_show_leaf(&tree, path); + set_bit(BH_Mapped, &bh_result->b_state); @@ -2267,9 +2274,9 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/extents.c + Index: linux-2.4.20-rh-20.9/fs/ext3/ialloc.c =================================================================== ---- linux-2.4.20-rh-20.9.orig/fs/ext3/ialloc.c 2004-09-12 20:00:59.000000000 +0400 -+++ linux-2.4.20-rh-20.9/fs/ext3/ialloc.c 2004-09-12 20:02:04.000000000 +0400 -@@ -593,10 +593,21 @@ +--- linux-2.4.20-rh-20.9.orig/fs/ext3/ialloc.c 2004-11-02 21:01:47.000000000 +0300 ++++ linux-2.4.20-rh-20.9/fs/ext3/ialloc.c 2004-11-02 21:03:00.000000000 +0300 +@@ -593,10 +593,22 @@ iloc.bh = NULL; goto fail; } @@ -2278,6 +2285,7 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/ialloc.c + if (test_opt(sb, EXTENTS)) { + EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; ++ memset(&inode->u.ext3_i.i_cached_extent, 0, sizeof(__u32) * 4); + ext3_extents_initialize_blockmap(handle, inode); + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { + err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); @@ -2295,8 +2303,8 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/ialloc.c if(DQUOT_ALLOC_INODE(inode)) { Index: linux-2.4.20-rh-20.9/fs/ext3/inode.c =================================================================== ---- linux-2.4.20-rh-20.9.orig/fs/ext3/inode.c 2004-09-12 20:01:10.000000000 +0400 -+++ linux-2.4.20-rh-20.9/fs/ext3/inode.c 2004-09-12 20:01:45.000000000 +0400 +--- linux-2.4.20-rh-20.9.orig/fs/ext3/inode.c 2004-11-02 21:01:55.000000000 +0300 ++++ linux-2.4.20-rh-20.9/fs/ext3/inode.c 2004-11-02 21:03:00.000000000 +0300 @@ -852,6 +852,16 @@ goto reread; } @@ -2360,7 +2368,15 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/inode.c handle = start_transaction(inode); if (IS_ERR(handle)) return; /* AKPM: return what? */ -@@ -2758,6 +2771,9 @@ +@@ -2426,6 +2439,7 @@ + for (block = 0; block < EXT3_N_BLOCKS; block++) + inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; + INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); ++ memset(&inode->u.ext3_i.i_cached_extent, 0, sizeof(__u32) * 4); + + if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) + inode->u.ext3_i.i_extra_isize = +@@ -2758,6 +2772,9 @@ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; @@ -2370,7 +2386,7 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/inode.c if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else -@@ -3082,7 +3098,7 @@ +@@ -3082,7 +3099,7 @@ /* alloc blocks one by one */ for (i = 0; i < nblocks; i++) { @@ -2379,7 +2395,7 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/inode.c &bh_tmp, 1, 1); if (ret) break; -@@ -3158,7 +3174,7 @@ +@@ -3158,7 +3175,7 @@ if (blocks[i] != 0) continue; @@ -2390,8 +2406,8 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/inode.c "allocating block %ld\n", rc, iblock); Index: linux-2.4.20-rh-20.9/fs/ext3/Makefile =================================================================== ---- linux-2.4.20-rh-20.9.orig/fs/ext3/Makefile 2004-09-12 20:01:01.000000000 +0400 -+++ linux-2.4.20-rh-20.9/fs/ext3/Makefile 2004-09-12 20:01:45.000000000 +0400 +--- linux-2.4.20-rh-20.9.orig/fs/ext3/Makefile 2004-11-02 21:01:49.000000000 +0300 ++++ linux-2.4.20-rh-20.9/fs/ext3/Makefile 2004-11-02 21:01:58.000000000 +0300 @@ -13,7 +13,9 @@ obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ @@ -2405,8 +2421,8 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/Makefile export-objs += xattr.o Index: linux-2.4.20-rh-20.9/fs/ext3/super.c =================================================================== ---- linux-2.4.20-rh-20.9.orig/fs/ext3/super.c 2004-09-12 20:00:59.000000000 +0400 -+++ linux-2.4.20-rh-20.9/fs/ext3/super.c 2004-09-12 20:01:45.000000000 +0400 +--- linux-2.4.20-rh-20.9.orig/fs/ext3/super.c 2004-11-02 21:01:47.000000000 +0300 ++++ linux-2.4.20-rh-20.9/fs/ext3/super.c 2004-11-02 21:01:58.000000000 +0300 @@ -623,6 +623,7 @@ int i; @@ -2437,8 +2453,8 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/super.c Index: linux-2.4.20-rh-20.9/fs/ext3/ioctl.c =================================================================== ---- linux-2.4.20-rh-20.9.orig/fs/ext3/ioctl.c 2004-09-12 20:00:32.000000000 +0400 -+++ linux-2.4.20-rh-20.9/fs/ext3/ioctl.c 2004-09-12 20:01:45.000000000 +0400 +--- linux-2.4.20-rh-20.9.orig/fs/ext3/ioctl.c 2004-11-02 21:01:31.000000000 +0300 ++++ linux-2.4.20-rh-20.9/fs/ext3/ioctl.c 2004-11-02 21:01:58.000000000 +0300 @@ -189,6 +189,10 @@ return ret; } @@ -2452,8 +2468,8 @@ Index: linux-2.4.20-rh-20.9/fs/ext3/ioctl.c } Index: linux-2.4.20-rh-20.9/include/linux/ext3_fs.h =================================================================== ---- linux-2.4.20-rh-20.9.orig/include/linux/ext3_fs.h 2004-09-12 20:00:59.000000000 +0400 -+++ linux-2.4.20-rh-20.9/include/linux/ext3_fs.h 2004-09-12 20:02:04.000000000 +0400 +--- linux-2.4.20-rh-20.9.orig/include/linux/ext3_fs.h 2004-11-02 21:01:47.000000000 +0300 ++++ linux-2.4.20-rh-20.9/include/linux/ext3_fs.h 2004-11-02 21:01:58.000000000 +0300 @@ -184,6 +184,7 @@ #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ @@ -2521,8 +2537,8 @@ Index: linux-2.4.20-rh-20.9/include/linux/ext3_fs.h Index: linux-2.4.20-rh-20.9/include/linux/ext3_extents.h =================================================================== --- linux-2.4.20-rh-20.9.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.20-rh-20.9/include/linux/ext3_extents.h 2004-09-12 20:01:45.000000000 +0400 -@@ -0,0 +1,237 @@ ++++ linux-2.4.20-rh-20.9/include/linux/ext3_extents.h 2004-11-02 21:03:00.000000000 +0300 +@@ -0,0 +1,251 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2562,7 +2578,7 @@ Index: linux-2.4.20-rh-20.9/include/linux/ext3_extents.h + * if EXT_DEBUG is defined you can use 'extdebug' mount option + * to get lots of info what's going on + */ -+#define EXT_DEBUG ++#define EXT_DEBUG_ +#ifdef EXT_DEBUG +#define ext_debug(tree,fmt,a...) \ +do { \ @@ -2644,6 +2660,20 @@ Index: linux-2.4.20-rh-20.9/include/linux/ext3_extents.h + */ + +/* ++ * storage for cached extent ++ */ ++struct ext3_ext_cache { ++ __u32 ec_start; ++ __u32 ec_block; ++ __u32 ec_len; ++ __u32 ec_type; ++}; ++ ++#define EXT3_EXT_CACHE_NO 0 ++#define EXT3_EXT_CACHE_GAP 1 ++#define EXT3_EXT_CACHE_EXTENT 2 ++ ++/* + * ext3_extents_tree is used to pass initial information + * to top-level extents API + */ @@ -2654,7 +2684,7 @@ Index: linux-2.4.20-rh-20.9/include/linux/ext3_extents.h + void *buffer; /* will be passed as arg to ^^ routines */ + int buffer_len; + void *private; -+ struct ext3_extent *cex;/* last found extent */ ++ struct ext3_ext_cache *cex;/* last found extent */ + struct ext3_extents_helpers *ops; +}; + @@ -2754,7 +2784,7 @@ Index: linux-2.4.20-rh-20.9/include/linux/ext3_extents.h +ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) +{ + if (tree->cex) -+ tree->cex->ee_len = 0; ++ tree->cex->ec_type = EXT3_EXT_CACHE_NO; +} + + @@ -2762,14 +2792,14 @@ Index: linux-2.4.20-rh-20.9/include/linux/ext3_extents.h + Index: linux-2.4.20-rh-20.9/include/linux/ext3_fs_i.h =================================================================== ---- linux-2.4.20-rh-20.9.orig/include/linux/ext3_fs_i.h 2004-09-12 20:00:59.000000000 +0400 -+++ linux-2.4.20-rh-20.9/include/linux/ext3_fs_i.h 2004-09-12 20:01:45.000000000 +0400 +--- linux-2.4.20-rh-20.9.orig/include/linux/ext3_fs_i.h 2004-11-02 21:01:47.000000000 +0300 ++++ linux-2.4.20-rh-20.9/include/linux/ext3_fs_i.h 2004-11-02 21:06:02.000000000 +0300 @@ -82,6 +82,8 @@ struct dynlock i_htree_lock; struct semaphore i_append_sem; struct semaphore i_rename_sem; + -+ __u32 i_cached_extent[3]; ++ __u32 i_cached_extent[4]; }; #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch index ea3d149..1ad8f36 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.21-chaos.patch @@ -1,8 +1,8 @@ -Index: linux-2.4.21-15.EL/fs/ext3/extents.c +Index: linux-2.4.21-20.EL/fs/ext3/extents.c =================================================================== ---- linux-2.4.21-15.EL.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-15.EL/fs/ext3/extents.c 2004-09-12 20:22:35.000000000 +0400 -@@ -0,0 +1,2271 @@ +--- linux-2.4.21-20.EL.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.4.21-20.EL/fs/ext3/extents.c 2004-11-03 00:35:30.644364336 +0300 +@@ -0,0 +1,2278 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -1283,14 +1283,15 @@ Index: linux-2.4.21-15.EL/fs/ext3/extents.c +} + +static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, struct ext3_extent *ex) ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, ++ __u32 len, __u32 start, int type) +{ ++ EXT_ASSERT(len > 0); + if (tree->cex) { -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_len); -+ tree->cex->ee_block = ex->ee_block; -+ tree->cex->ee_start = ex->ee_start; -+ tree->cex->ee_len = ex->ee_len; ++ tree->cex->ec_type = type; ++ tree->cex->ec_block = block; ++ tree->cex->ec_len = len; ++ tree->cex->ec_start = start; + } +} + @@ -1304,7 +1305,8 @@ Index: linux-2.4.21-15.EL/fs/ext3/extents.c + unsigned long block) +{ + int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex, gex; ++ unsigned long lblock, len; ++ struct ext3_extent *ex; + + if (!tree->cex) + return; @@ -1312,63 +1314,64 @@ Index: linux-2.4.21-15.EL/fs/ext3/extents.c + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ -+ gex.ee_block = 0; -+ gex.ee_len = EXT_CACHE_MARK; ++ lblock = 0; ++ len = EXT_MAX_BLOCK; + ext_debug(tree, "cache gap(whole file):"); + } else if (block < ex->ee_block) { -+ gex.ee_block = block; -+ gex.ee_len = ex->ee_block - block; ++ lblock = block; ++ len = ex->ee_block - block; + ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len); + } else if (block >= ex->ee_block + ex->ee_len) { -+ gex.ee_block = ex->ee_block + ex->ee_len; -+ gex.ee_len = ext3_ext_next_allocated_block(path); ++ lblock = ex->ee_block + ex->ee_len; ++ len = ext3_ext_next_allocated_block(path); + ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) block); -+ EXT_ASSERT(gex.ee_len > gex.ee_block); -+ gex.ee_len = gex.ee_len - gex.ee_block; ++ EXT_ASSERT(len > lblock); ++ len = len - lblock; + } else { ++ lblock = len = 0; + BUG(); + } + -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) gex.ee_block, -+ (unsigned long) gex.ee_len); -+ gex.ee_start = EXT_CACHE_MARK; -+ ext3_ext_put_in_cache(tree, &gex); ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); ++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); +} + +static inline int +ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, + struct ext3_extent *ex) +{ -+ struct ext3_extent *cex = tree->cex; ++ struct ext3_ext_cache *cex = tree->cex; + + /* is there cache storage at all? */ + if (!cex) -+ return 0; ++ return EXT3_EXT_CACHE_NO; + + /* has cache valid data? */ -+ if (cex->ee_len == 0) -+ return 0; -+ -+ if (block >= cex->ee_block && block < cex->ee_block + cex->ee_len) { -+ ex->ee_block = cex->ee_block; -+ ex->ee_start = cex->ee_start; -+ ex->ee_len = cex->ee_len; ++ if (cex->ec_type == EXT3_EXT_CACHE_NO) ++ return EXT3_EXT_CACHE_NO; ++ ++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || ++ cex->ec_type == EXT3_EXT_CACHE_EXTENT); ++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ ex->ee_block = cex->ec_block; ++ ex->ee_start = cex->ec_start; ++ ex->ee_len = cex->ec_len; + ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) ex->ee_start); -+ return 1; ++ return cex->ec_type; + } + + /* not in cache */ -+ return 0; ++ return EXT3_EXT_CACHE_NO; +} + +/* @@ -1954,7 +1957,7 @@ Index: linux-2.4.21-15.EL/fs/ext3/extents.c + tree->root = (void *) EXT3_I(inode)->i_data; + tree->buffer = (void *) inode; + tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; ++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; + tree->ops = &ext3_blockmap_helpers; +} + @@ -1974,19 +1977,20 @@ Index: linux-2.4.21-15.EL/fs/ext3/extents.c + down_write(&EXT3_I(inode)->truncate_sem); + + /* check in cache */ -+ if (ext3_ext_in_cache(&tree, iblock, &newex)) { -+ if (newex.ee_start == EXT_CACHE_MARK) { -+ /* this is cached gap */ ++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { ++ if (goal == EXT3_EXT_CACHE_GAP) { + if (!create) { + /* block isn't allocated yet and + * user don't want to allocate it */ + goto out2; + } + /* we should allocate requested block */ -+ } else if (newex.ee_start) { ++ } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock - newex.ee_block + newex.ee_start; + goto out; ++ } else { ++ EXT_ASSERT(0); + } + } + @@ -2014,7 +2018,9 @@ Index: linux-2.4.21-15.EL/fs/ext3/extents.c + ext_debug(&tree, "%d fit into %d:%d -> %d\n", + (int) iblock, ex->ee_block, ex->ee_len, + newblock); -+ ext3_ext_put_in_cache(&tree, ex); ++ ext3_ext_put_in_cache(&tree, ex->ee_block, ++ ex->ee_len, ex->ee_start, ++ EXT3_EXT_CACHE_EXTENT); + goto out; + } + } @@ -2052,7 +2058,8 @@ Index: linux-2.4.21-15.EL/fs/ext3/extents.c + newblock = newex.ee_start; + set_bit(BH_New, &bh_result->b_state); + -+ ext3_ext_put_in_cache(&tree, &newex); ++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, ++ newex.ee_start, EXT3_EXT_CACHE_EXTENT); +out: + ext3_ext_show_leaf(&tree, path); + set_bit(BH_Mapped, &bh_result->b_state); @@ -2274,11 +2281,11 @@ Index: linux-2.4.21-15.EL/fs/ext3/extents.c +EXPORT_SYMBOL(ext3_ext_find_goal); +EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); + -Index: linux-2.4.21-15.EL/fs/ext3/ialloc.c +Index: linux-2.4.21-20.EL/fs/ext3/ialloc.c =================================================================== ---- linux-2.4.21-15.EL.orig/fs/ext3/ialloc.c 2004-09-12 20:22:15.000000000 +0400 -+++ linux-2.4.21-15.EL/fs/ext3/ialloc.c 2004-09-12 20:24:20.000000000 +0400 -@@ -596,9 +596,21 @@ +--- linux-2.4.21-20.EL.orig/fs/ext3/ialloc.c 2004-11-02 20:43:27.000000000 +0300 ++++ linux-2.4.21-20.EL/fs/ext3/ialloc.c 2004-11-02 20:53:34.000000000 +0300 +@@ -596,9 +596,22 @@ iloc.bh = NULL; goto fail; } @@ -2286,8 +2293,9 @@ Index: linux-2.4.21-15.EL/fs/ext3/ialloc.c - if (err) goto fail; + if (test_opt(sb, EXTENTS)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); ++ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; ++ memset(&inode->u.ext3_i.i_cached_extent, 0, sizeof(__u32) * 4); ++ ext3_extents_initialize_blockmap(handle, inode); + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { + err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); + if (err) goto fail; @@ -2302,10 +2310,10 @@ Index: linux-2.4.21-15.EL/fs/ext3/ialloc.c #ifdef CONFIG_EXT3_FS_XATTR -Index: linux-2.4.21-15.EL/fs/ext3/inode.c +Index: linux-2.4.21-20.EL/fs/ext3/inode.c =================================================================== ---- linux-2.4.21-15.EL.orig/fs/ext3/inode.c 2004-09-12 20:22:29.000000000 +0400 -+++ linux-2.4.21-15.EL/fs/ext3/inode.c 2004-09-12 20:22:35.000000000 +0400 +--- linux-2.4.21-20.EL.orig/fs/ext3/inode.c 2004-11-02 20:43:31.000000000 +0300 ++++ linux-2.4.21-20.EL/fs/ext3/inode.c 2004-11-02 20:53:34.000000000 +0300 @@ -859,6 +859,16 @@ goto reread; } @@ -2360,7 +2368,15 @@ Index: linux-2.4.21-15.EL/fs/ext3/inode.c handle = start_transaction(inode); if (IS_ERR(handle)) -@@ -2842,6 +2855,9 @@ +@@ -2502,6 +2515,7 @@ + for (block = 0; block < EXT3_N_BLOCKS; block++) + ei->i_data[block] = iloc.raw_inode->i_block[block]; + INIT_LIST_HEAD(&ei->i_orphan); ++ memset(&inode->u.ext3_i.i_cached_extent, 0, sizeof(__u32) * 4); + + if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) + EXT3_I(inode)->i_extra_isize = +@@ -2842,6 +2856,9 @@ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; @@ -2370,7 +2386,7 @@ Index: linux-2.4.21-15.EL/fs/ext3/inode.c if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else -@@ -3166,7 +3182,7 @@ +@@ -3166,7 +3183,7 @@ /* alloc blocks one by one */ for (i = 0; i < nblocks; i++) { @@ -2379,7 +2395,7 @@ Index: linux-2.4.21-15.EL/fs/ext3/inode.c &bh_tmp, 1, 1); if (ret) break; -@@ -3242,7 +3258,7 @@ +@@ -3242,7 +3259,7 @@ if (blocks[i] != 0) continue; @@ -2388,10 +2404,10 @@ Index: linux-2.4.21-15.EL/fs/ext3/inode.c if (rc) { printk(KERN_INFO "ext3_map_inode_page: error %d " "allocating block %ld\n", rc, iblock); -Index: linux-2.4.21-15.EL/fs/ext3/Makefile +Index: linux-2.4.21-20.EL/fs/ext3/Makefile =================================================================== ---- linux-2.4.21-15.EL.orig/fs/ext3/Makefile 2004-09-12 20:22:01.000000000 +0400 -+++ linux-2.4.21-15.EL/fs/ext3/Makefile 2004-09-12 20:22:35.000000000 +0400 +--- linux-2.4.21-20.EL.orig/fs/ext3/Makefile 2004-11-02 20:43:19.000000000 +0300 ++++ linux-2.4.21-20.EL/fs/ext3/Makefile 2004-11-02 20:53:34.000000000 +0300 @@ -9,10 +9,11 @@ O_TARGET := ext3.o @@ -2406,10 +2422,10 @@ Index: linux-2.4.21-15.EL/fs/ext3/Makefile obj-m := $(O_TARGET) export-objs += xattr.o -Index: linux-2.4.21-15.EL/fs/ext3/super.c +Index: linux-2.4.21-20.EL/fs/ext3/super.c =================================================================== ---- linux-2.4.21-15.EL.orig/fs/ext3/super.c 2004-09-12 20:22:15.000000000 +0400 -+++ linux-2.4.21-15.EL/fs/ext3/super.c 2004-09-12 20:22:35.000000000 +0400 +--- linux-2.4.21-20.EL.orig/fs/ext3/super.c 2004-11-02 20:43:27.000000000 +0300 ++++ linux-2.4.21-20.EL/fs/ext3/super.c 2004-11-02 20:53:34.000000000 +0300 @@ -648,6 +648,7 @@ int i; @@ -2438,10 +2454,10 @@ Index: linux-2.4.21-15.EL/fs/ext3/super.c return sb; failed_mount3: -Index: linux-2.4.21-15.EL/fs/ext3/ioctl.c +Index: linux-2.4.21-20.EL/fs/ext3/ioctl.c =================================================================== ---- linux-2.4.21-15.EL.orig/fs/ext3/ioctl.c 2004-09-12 20:21:53.000000000 +0400 -+++ linux-2.4.21-15.EL/fs/ext3/ioctl.c 2004-09-12 20:22:35.000000000 +0400 +--- linux-2.4.21-20.EL.orig/fs/ext3/ioctl.c 2004-11-02 20:43:14.000000000 +0300 ++++ linux-2.4.21-20.EL/fs/ext3/ioctl.c 2004-11-02 20:53:34.000000000 +0300 @@ -173,6 +173,10 @@ return ret; } @@ -2453,10 +2469,10 @@ Index: linux-2.4.21-15.EL/fs/ext3/ioctl.c default: return -ENOTTY; } -Index: linux-2.4.21-15.EL/include/linux/ext3_fs.h +Index: linux-2.4.21-20.EL/include/linux/ext3_fs.h =================================================================== ---- linux-2.4.21-15.EL.orig/include/linux/ext3_fs.h 2004-09-12 20:22:15.000000000 +0400 -+++ linux-2.4.21-15.EL/include/linux/ext3_fs.h 2004-09-12 20:23:38.000000000 +0400 +--- linux-2.4.21-20.EL.orig/include/linux/ext3_fs.h 2004-11-02 20:43:27.000000000 +0300 ++++ linux-2.4.21-20.EL/include/linux/ext3_fs.h 2004-11-02 20:53:34.000000000 +0300 @@ -188,6 +188,7 @@ #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ @@ -2522,11 +2538,11 @@ Index: linux-2.4.21-15.EL/include/linux/ext3_fs.h #endif /* __KERNEL__ */ -Index: linux-2.4.21-15.EL/include/linux/ext3_extents.h +Index: linux-2.4.21-20.EL/include/linux/ext3_extents.h =================================================================== ---- linux-2.4.21-15.EL.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-15.EL/include/linux/ext3_extents.h 2004-09-12 20:22:35.000000000 +0400 -@@ -0,0 +1,237 @@ +--- linux-2.4.21-20.EL.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.4.21-20.EL/include/linux/ext3_extents.h 2004-11-02 20:53:34.000000000 +0300 +@@ -0,0 +1,251 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2566,7 +2582,7 @@ Index: linux-2.4.21-15.EL/include/linux/ext3_extents.h + * if EXT_DEBUG is defined you can use 'extdebug' mount option + * to get lots of info what's going on + */ -+#define EXT_DEBUG ++#define EXT_DEBUG_ +#ifdef EXT_DEBUG +#define ext_debug(tree,fmt,a...) \ +do { \ @@ -2648,6 +2664,20 @@ Index: linux-2.4.21-15.EL/include/linux/ext3_extents.h + */ + +/* ++ * storage for cached extent ++ */ ++struct ext3_ext_cache { ++ __u32 ec_start; ++ __u32 ec_block; ++ __u32 ec_len; ++ __u32 ec_type; ++}; ++ ++#define EXT3_EXT_CACHE_NO 0 ++#define EXT3_EXT_CACHE_GAP 1 ++#define EXT3_EXT_CACHE_EXTENT 2 ++ ++/* + * ext3_extents_tree is used to pass initial information + * to top-level extents API + */ @@ -2658,7 +2688,7 @@ Index: linux-2.4.21-15.EL/include/linux/ext3_extents.h + void *buffer; /* will be passed as arg to ^^ routines */ + int buffer_len; + void *private; -+ struct ext3_extent *cex;/* last found extent */ ++ struct ext3_ext_cache *cex;/* last found extent */ + struct ext3_extents_helpers *ops; +}; + @@ -2758,22 +2788,22 @@ Index: linux-2.4.21-15.EL/include/linux/ext3_extents.h +ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) +{ + if (tree->cex) -+ tree->cex->ee_len = 0; ++ tree->cex->ec_type = EXT3_EXT_CACHE_NO; +} + + +#endif /* _LINUX_EXT3_EXTENTS */ + -Index: linux-2.4.21-15.EL/include/linux/ext3_fs_i.h +Index: linux-2.4.21-20.EL/include/linux/ext3_fs_i.h =================================================================== ---- linux-2.4.21-15.EL.orig/include/linux/ext3_fs_i.h 2004-09-12 20:22:15.000000000 +0400 -+++ linux-2.4.21-15.EL/include/linux/ext3_fs_i.h 2004-09-12 20:22:35.000000000 +0400 +--- linux-2.4.21-20.EL.orig/include/linux/ext3_fs_i.h 2004-11-02 20:43:27.000000000 +0300 ++++ linux-2.4.21-20.EL/include/linux/ext3_fs_i.h 2004-11-02 20:58:19.000000000 +0300 @@ -90,6 +90,8 @@ * by other means, so we have truncate_sem. */ struct rw_semaphore truncate_sem; + -+ __u32 i_cached_extent[3]; ++ __u32 i_cached_extent[4]; }; #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch index 20355a5..d598e4e 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.21-suse2.patch @@ -1,8 +1,8 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c =================================================================== --- linux-2.4.21-suse2.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-suse2/fs/ext3/extents.c 2004-09-12 19:52:27.000000000 +0400 -@@ -0,0 +1,2262 @@ ++++ linux-2.4.21-suse2/fs/ext3/extents.c 2004-11-03 00:34:45.404241880 +0300 +@@ -0,0 +1,2269 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -1283,14 +1283,15 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c +} + +static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, struct ext3_extent *ex) ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, ++ __u32 len, __u32 start, int type) +{ ++ EXT_ASSERT(len > 0); + if (tree->cex) { -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_len); -+ tree->cex->ee_block = ex->ee_block; -+ tree->cex->ee_start = ex->ee_start; -+ tree->cex->ee_len = ex->ee_len; ++ tree->cex->ec_type = type; ++ tree->cex->ec_block = block; ++ tree->cex->ec_len = len; ++ tree->cex->ec_start = start; + } +} + @@ -1304,7 +1305,8 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c + unsigned long block) +{ + int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex, gex; ++ unsigned long lblock, len; ++ struct ext3_extent *ex; + + if (!tree->cex) + return; @@ -1312,63 +1314,64 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ -+ gex.ee_block = 0; -+ gex.ee_len = EXT_CACHE_MARK; ++ lblock = 0; ++ len = EXT_MAX_BLOCK; + ext_debug(tree, "cache gap(whole file):"); + } else if (block < ex->ee_block) { -+ gex.ee_block = block; -+ gex.ee_len = ex->ee_block - block; ++ lblock = block; ++ len = ex->ee_block - block; + ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len); + } else if (block >= ex->ee_block + ex->ee_len) { -+ gex.ee_block = ex->ee_block + ex->ee_len; -+ gex.ee_len = ext3_ext_next_allocated_block(path); ++ lblock = ex->ee_block + ex->ee_len; ++ len = ext3_ext_next_allocated_block(path); + ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) block); -+ EXT_ASSERT(gex.ee_len > gex.ee_block); -+ gex.ee_len = gex.ee_len - gex.ee_block; ++ EXT_ASSERT(len > lblock); ++ len = len - lblock; + } else { ++ lblock = len = 0; + BUG(); + } + -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) gex.ee_block, -+ (unsigned long) gex.ee_len); -+ gex.ee_start = EXT_CACHE_MARK; -+ ext3_ext_put_in_cache(tree, &gex); ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); ++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); +} + +static inline int +ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, + struct ext3_extent *ex) +{ -+ struct ext3_extent *cex = tree->cex; ++ struct ext3_ext_cache *cex = tree->cex; + + /* is there cache storage at all? */ + if (!cex) -+ return 0; ++ return EXT3_EXT_CACHE_NO; + + /* has cache valid data? */ -+ if (cex->ee_len == 0) -+ return 0; -+ -+ if (block >= cex->ee_block && block < cex->ee_block + cex->ee_len) { -+ ex->ee_block = cex->ee_block; -+ ex->ee_start = cex->ee_start; -+ ex->ee_len = cex->ee_len; ++ if (cex->ec_type == EXT3_EXT_CACHE_NO) ++ return EXT3_EXT_CACHE_NO; ++ ++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || ++ cex->ec_type == EXT3_EXT_CACHE_EXTENT); ++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ ex->ee_block = cex->ec_block; ++ ex->ee_start = cex->ec_start; ++ ex->ee_len = cex->ec_len; + ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) ex->ee_start); -+ return 1; ++ return cex->ec_type; + } + + /* not in cache */ -+ return 0; ++ return EXT3_EXT_CACHE_NO; +} + +/* @@ -1954,7 +1957,7 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c + tree->root = (void *) EXT3_I(inode)->i_data; + tree->buffer = (void *) inode; + tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; ++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; + tree->ops = &ext3_blockmap_helpers; +} + @@ -1974,19 +1977,20 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c + down_write(&EXT3_I(inode)->truncate_sem); + + /* check in cache */ -+ if (ext3_ext_in_cache(&tree, iblock, &newex)) { -+ if (newex.ee_start == EXT_CACHE_MARK) { -+ /* this is cached gap */ ++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { ++ if (goal == EXT3_EXT_CACHE_GAP) { + if (!create) { + /* block isn't allocated yet and + * user don't want to allocate it */ + goto out2; + } + /* we should allocate requested block */ -+ } else if (newex.ee_start) { ++ } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock - newex.ee_block + newex.ee_start; + goto out; ++ } else { ++ EXT_ASSERT(0); + } + } + @@ -2014,7 +2018,9 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c + ext_debug(&tree, "%d fit into %d:%d -> %d\n", + (int) iblock, ex->ee_block, ex->ee_len, + newblock); -+ ext3_ext_put_in_cache(&tree, ex); ++ ext3_ext_put_in_cache(&tree, ex->ee_block, ++ ex->ee_len, ex->ee_start, ++ EXT3_EXT_CACHE_EXTENT); + goto out; + } + } @@ -2052,7 +2058,8 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c + newblock = newex.ee_start; + set_bit(BH_New, &bh_result->b_state); + -+ ext3_ext_put_in_cache(&tree, &newex); ++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, ++ newex.ee_start, EXT3_EXT_CACHE_EXTENT); +out: + ext3_ext_show_leaf(&tree, path); + set_bit(BH_Mapped, &bh_result->b_state); @@ -2267,9 +2274,9 @@ Index: linux-2.4.21-suse2/fs/ext3/extents.c + Index: linux-2.4.21-suse2/fs/ext3/ialloc.c =================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/ialloc.c 2004-09-12 19:52:26.000000000 +0400 -+++ linux-2.4.21-suse2/fs/ext3/ialloc.c 2004-09-12 19:53:34.000000000 +0400 -@@ -592,10 +592,21 @@ +--- linux-2.4.21-suse2.orig/fs/ext3/ialloc.c 2004-11-02 20:31:37.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/ialloc.c 2004-11-02 20:34:00.000000000 +0300 +@@ -592,10 +592,22 @@ iloc.bh = NULL; goto fail; } @@ -2278,8 +2285,9 @@ Index: linux-2.4.21-suse2/fs/ext3/ialloc.c - + if (test_opt(sb, EXTENTS)) { -+ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; -+ ext3_extents_initialize_blockmap(handle, inode); ++ EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; ++ memset(&inode->u.ext3_i.i_cached_extent, 0, sizeof(__u32) * 4); ++ ext3_extents_initialize_blockmap(handle, inode); + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { + err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); + if (err) goto fail; @@ -2296,8 +2304,8 @@ Index: linux-2.4.21-suse2/fs/ext3/ialloc.c init_rwsem(&inode->u.ext3_i.xattr_sem); Index: linux-2.4.21-suse2/fs/ext3/inode.c =================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/inode.c 2004-09-12 19:52:27.000000000 +0400 -+++ linux-2.4.21-suse2/fs/ext3/inode.c 2004-09-12 19:52:27.000000000 +0400 +--- linux-2.4.21-suse2.orig/fs/ext3/inode.c 2004-11-02 20:31:38.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/inode.c 2004-11-02 20:34:00.000000000 +0300 @@ -853,6 +853,16 @@ goto reread; } @@ -2361,7 +2369,15 @@ Index: linux-2.4.21-suse2/fs/ext3/inode.c handle = start_transaction(inode); if (IS_ERR(handle)) return; /* AKPM: return what? */ -@@ -2664,6 +2677,9 @@ +@@ -2324,6 +2337,7 @@ + for (block = 0; block < EXT3_N_BLOCKS; block++) + inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; + INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); ++ memset(&inode->u.ext3_i.i_cached_extent, 0, sizeof(__u32) * 4); + + brelse (iloc.bh); + +@@ -2664,6 +2678,9 @@ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; @@ -2371,7 +2387,7 @@ Index: linux-2.4.21-suse2/fs/ext3/inode.c if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else -@@ -3100,7 +3116,7 @@ +@@ -3100,7 +3117,7 @@ /* alloc blocks one by one */ for (i = 0; i < nblocks; i++) { @@ -2380,7 +2396,7 @@ Index: linux-2.4.21-suse2/fs/ext3/inode.c &bh_tmp, 1, 1); if (ret) break; -@@ -3176,7 +3192,7 @@ +@@ -3176,7 +3193,7 @@ if (blocks[i] != 0) continue; @@ -2391,8 +2407,8 @@ Index: linux-2.4.21-suse2/fs/ext3/inode.c "allocating block %ld\n", rc, iblock); Index: linux-2.4.21-suse2/fs/ext3/Makefile =================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/Makefile 2004-09-12 19:52:25.000000000 +0400 -+++ linux-2.4.21-suse2/fs/ext3/Makefile 2004-09-12 19:52:27.000000000 +0400 +--- linux-2.4.21-suse2.orig/fs/ext3/Makefile 2004-11-02 20:31:33.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/Makefile 2004-11-02 20:31:39.000000000 +0300 @@ -12,7 +12,10 @@ export-objs := ext3-exports.o @@ -2407,8 +2423,8 @@ Index: linux-2.4.21-suse2/fs/ext3/Makefile export-objs += xattr.o Index: linux-2.4.21-suse2/fs/ext3/super.c =================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/super.c 2004-09-12 19:52:26.000000000 +0400 -+++ linux-2.4.21-suse2/fs/ext3/super.c 2004-09-12 19:52:27.000000000 +0400 +--- linux-2.4.21-suse2.orig/fs/ext3/super.c 2004-11-02 20:31:37.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/super.c 2004-11-02 20:31:39.000000000 +0300 @@ -624,6 +624,7 @@ int i; @@ -2439,8 +2455,8 @@ Index: linux-2.4.21-suse2/fs/ext3/super.c failed_mount3: Index: linux-2.4.21-suse2/fs/ext3/ioctl.c =================================================================== ---- linux-2.4.21-suse2.orig/fs/ext3/ioctl.c 2004-09-12 19:52:25.000000000 +0400 -+++ linux-2.4.21-suse2/fs/ext3/ioctl.c 2004-09-12 19:52:27.000000000 +0400 +--- linux-2.4.21-suse2.orig/fs/ext3/ioctl.c 2004-11-02 20:31:32.000000000 +0300 ++++ linux-2.4.21-suse2/fs/ext3/ioctl.c 2004-11-02 20:31:39.000000000 +0300 @@ -174,6 +174,10 @@ return ret; } @@ -2454,8 +2470,8 @@ Index: linux-2.4.21-suse2/fs/ext3/ioctl.c } Index: linux-2.4.21-suse2/include/linux/ext3_fs.h =================================================================== ---- linux-2.4.21-suse2.orig/include/linux/ext3_fs.h 2004-09-12 19:52:26.000000000 +0400 -+++ linux-2.4.21-suse2/include/linux/ext3_fs.h 2004-09-12 19:52:50.000000000 +0400 +--- linux-2.4.21-suse2.orig/include/linux/ext3_fs.h 2004-11-02 20:31:37.000000000 +0300 ++++ linux-2.4.21-suse2/include/linux/ext3_fs.h 2004-11-02 20:31:39.000000000 +0300 @@ -184,6 +184,7 @@ #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ @@ -2523,8 +2539,8 @@ Index: linux-2.4.21-suse2/include/linux/ext3_fs.h Index: linux-2.4.21-suse2/include/linux/ext3_extents.h =================================================================== --- linux-2.4.21-suse2.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.21-suse2/include/linux/ext3_extents.h 2004-09-12 19:52:27.000000000 +0400 -@@ -0,0 +1,237 @@ ++++ linux-2.4.21-suse2/include/linux/ext3_extents.h 2004-11-02 20:34:00.000000000 +0300 +@@ -0,0 +1,251 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2564,7 +2580,7 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h + * if EXT_DEBUG is defined you can use 'extdebug' mount option + * to get lots of info what's going on + */ -+#define EXT_DEBUG ++#define EXT_DEBUG_ +#ifdef EXT_DEBUG +#define ext_debug(tree,fmt,a...) \ +do { \ @@ -2646,6 +2662,20 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h + */ + +/* ++ * storage for cached extent ++ */ ++struct ext3_ext_cache { ++ __u32 ec_start; ++ __u32 ec_block; ++ __u32 ec_len; ++ __u32 ec_type; ++}; ++ ++#define EXT3_EXT_CACHE_NO 0 ++#define EXT3_EXT_CACHE_GAP 1 ++#define EXT3_EXT_CACHE_EXTENT 2 ++ ++/* + * ext3_extents_tree is used to pass initial information + * to top-level extents API + */ @@ -2656,7 +2686,7 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h + void *buffer; /* will be passed as arg to ^^ routines */ + int buffer_len; + void *private; -+ struct ext3_extent *cex;/* last found extent */ ++ struct ext3_ext_cache *cex;/* last found extent */ + struct ext3_extents_helpers *ops; +}; + @@ -2756,7 +2786,7 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h +ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) +{ + if (tree->cex) -+ tree->cex->ee_len = 0; ++ tree->cex->ec_type = EXT3_EXT_CACHE_NO; +} + + @@ -2764,14 +2794,14 @@ Index: linux-2.4.21-suse2/include/linux/ext3_extents.h + Index: linux-2.4.21-suse2/include/linux/ext3_fs_i.h =================================================================== ---- linux-2.4.21-suse2.orig/include/linux/ext3_fs_i.h 2004-09-12 19:52:27.000000000 +0400 -+++ linux-2.4.21-suse2/include/linux/ext3_fs_i.h 2004-09-12 19:52:27.000000000 +0400 +--- linux-2.4.21-suse2.orig/include/linux/ext3_fs_i.h 2004-11-02 20:31:37.000000000 +0300 ++++ linux-2.4.21-suse2/include/linux/ext3_fs_i.h 2004-11-02 20:45:16.000000000 +0300 @@ -90,6 +90,8 @@ * by other means, so we have truncate_sem. */ struct rw_semaphore truncate_sem; + -+ __u32 i_cached_extent[3]; ++ __u32 i_cached_extent[4]; }; #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch b/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch index 0159dd9..617b7e1 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.4.24.patch @@ -1,8 +1,8 @@ Index: linux-2.4.24/fs/ext3/extents.c =================================================================== --- linux-2.4.24.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24/fs/ext3/extents.c 2004-09-12 19:57:09.000000000 +0400 -@@ -0,0 +1,2262 @@ ++++ linux-2.4.24/fs/ext3/extents.c 2004-11-03 00:36:44.894076664 +0300 +@@ -0,0 +1,2269 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -1283,14 +1283,15 @@ Index: linux-2.4.24/fs/ext3/extents.c +} + +static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, struct ext3_extent *ex) ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, ++ __u32 len, __u32 start, int type) +{ ++ EXT_ASSERT(len > 0); + if (tree->cex) { -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_len); -+ tree->cex->ee_block = ex->ee_block; -+ tree->cex->ee_start = ex->ee_start; -+ tree->cex->ee_len = ex->ee_len; ++ tree->cex->ec_type = type; ++ tree->cex->ec_block = block; ++ tree->cex->ec_len = len; ++ tree->cex->ec_start = start; + } +} + @@ -1304,7 +1305,8 @@ Index: linux-2.4.24/fs/ext3/extents.c + unsigned long block) +{ + int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex, gex; ++ unsigned long lblock, len; ++ struct ext3_extent *ex; + + if (!tree->cex) + return; @@ -1312,63 +1314,64 @@ Index: linux-2.4.24/fs/ext3/extents.c + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ -+ gex.ee_block = 0; -+ gex.ee_len = EXT_CACHE_MARK; ++ lblock = 0; ++ len = EXT_MAX_BLOCK; + ext_debug(tree, "cache gap(whole file):"); + } else if (block < ex->ee_block) { -+ gex.ee_block = block; -+ gex.ee_len = ex->ee_block - block; ++ lblock = block; ++ len = ex->ee_block - block; + ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len); + } else if (block >= ex->ee_block + ex->ee_len) { -+ gex.ee_block = ex->ee_block + ex->ee_len; -+ gex.ee_len = ext3_ext_next_allocated_block(path); ++ lblock = ex->ee_block + ex->ee_len; ++ len = ext3_ext_next_allocated_block(path); + ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) block); -+ EXT_ASSERT(gex.ee_len > gex.ee_block); -+ gex.ee_len = gex.ee_len - gex.ee_block; ++ EXT_ASSERT(len > lblock); ++ len = len - lblock; + } else { ++ lblock = len = 0; + BUG(); + } + -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) gex.ee_block, -+ (unsigned long) gex.ee_len); -+ gex.ee_start = EXT_CACHE_MARK; -+ ext3_ext_put_in_cache(tree, &gex); ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); ++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); +} + +static inline int +ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, + struct ext3_extent *ex) +{ -+ struct ext3_extent *cex = tree->cex; ++ struct ext3_ext_cache *cex = tree->cex; + + /* is there cache storage at all? */ + if (!cex) -+ return 0; ++ return EXT3_EXT_CACHE_NO; + + /* has cache valid data? */ -+ if (cex->ee_len == 0) -+ return 0; -+ -+ if (block >= cex->ee_block && block < cex->ee_block + cex->ee_len) { -+ ex->ee_block = cex->ee_block; -+ ex->ee_start = cex->ee_start; -+ ex->ee_len = cex->ee_len; ++ if (cex->ec_type == EXT3_EXT_CACHE_NO) ++ return EXT3_EXT_CACHE_NO; ++ ++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || ++ cex->ec_type == EXT3_EXT_CACHE_EXTENT); ++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ ex->ee_block = cex->ec_block; ++ ex->ee_start = cex->ec_start; ++ ex->ee_len = cex->ec_len; + ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) ex->ee_start); -+ return 1; ++ return cex->ec_type; + } + + /* not in cache */ -+ return 0; ++ return EXT3_EXT_CACHE_NO; +} + +/* @@ -1954,7 +1957,7 @@ Index: linux-2.4.24/fs/ext3/extents.c + tree->root = (void *) EXT3_I(inode)->i_data; + tree->buffer = (void *) inode; + tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; ++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; + tree->ops = &ext3_blockmap_helpers; +} + @@ -1974,19 +1977,20 @@ Index: linux-2.4.24/fs/ext3/extents.c + down_write(&EXT3_I(inode)->truncate_sem); + + /* check in cache */ -+ if (ext3_ext_in_cache(&tree, iblock, &newex)) { -+ if (newex.ee_start == EXT_CACHE_MARK) { -+ /* this is cached gap */ ++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { ++ if (goal == EXT3_EXT_CACHE_GAP) { + if (!create) { + /* block isn't allocated yet and + * user don't want to allocate it */ + goto out2; + } + /* we should allocate requested block */ -+ } else if (newex.ee_start) { ++ } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock - newex.ee_block + newex.ee_start; + goto out; ++ } else { ++ EXT_ASSERT(0); + } + } + @@ -2014,7 +2018,9 @@ Index: linux-2.4.24/fs/ext3/extents.c + ext_debug(&tree, "%d fit into %d:%d -> %d\n", + (int) iblock, ex->ee_block, ex->ee_len, + newblock); -+ ext3_ext_put_in_cache(&tree, ex); ++ ext3_ext_put_in_cache(&tree, ex->ee_block, ++ ex->ee_len, ex->ee_start, ++ EXT3_EXT_CACHE_EXTENT); + goto out; + } + } @@ -2052,7 +2058,8 @@ Index: linux-2.4.24/fs/ext3/extents.c + newblock = newex.ee_start; + set_bit(BH_New, &bh_result->b_state); + -+ ext3_ext_put_in_cache(&tree, &newex); ++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, ++ newex.ee_start, EXT3_EXT_CACHE_EXTENT); +out: + ext3_ext_show_leaf(&tree, path); + set_bit(BH_Mapped, &bh_result->b_state); @@ -2267,9 +2274,9 @@ Index: linux-2.4.24/fs/ext3/extents.c + Index: linux-2.4.24/fs/ext3/ialloc.c =================================================================== ---- linux-2.4.24.orig/fs/ext3/ialloc.c 2004-09-12 19:57:08.000000000 +0400 -+++ linux-2.4.24/fs/ext3/ialloc.c 2004-09-12 19:57:45.000000000 +0400 -@@ -592,10 +592,21 @@ +--- linux-2.4.24.orig/fs/ext3/ialloc.c 2004-11-02 20:28:32.000000000 +0300 ++++ linux-2.4.24/fs/ext3/ialloc.c 2004-11-02 20:32:17.000000000 +0300 +@@ -592,10 +592,22 @@ iloc.bh = NULL; goto fail; } @@ -2278,6 +2285,7 @@ Index: linux-2.4.24/fs/ext3/ialloc.c + if (test_opt(sb, EXTENTS)) { + EXT3_I(inode)->i_flags |= EXT3_EXTENTS_FL; ++ memset(&inode->u.ext3_i.i_cached_extent, 0, sizeof(__u32) * 4); + ext3_extents_initialize_blockmap(handle, inode); + if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_EXTENTS)) { + err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh); @@ -2295,8 +2303,8 @@ Index: linux-2.4.24/fs/ext3/ialloc.c if(DQUOT_ALLOC_INODE(inode)) { Index: linux-2.4.24/fs/ext3/inode.c =================================================================== ---- linux-2.4.24.orig/fs/ext3/inode.c 2004-09-12 19:57:09.000000000 +0400 -+++ linux-2.4.24/fs/ext3/inode.c 2004-09-12 19:57:09.000000000 +0400 +--- linux-2.4.24.orig/fs/ext3/inode.c 2004-11-02 20:28:33.000000000 +0300 ++++ linux-2.4.24/fs/ext3/inode.c 2004-11-02 20:32:17.000000000 +0300 @@ -848,6 +848,15 @@ goto reread; } @@ -2350,7 +2358,15 @@ Index: linux-2.4.24/fs/ext3/inode.c handle = start_transaction(inode); if (IS_ERR(handle)) return; /* AKPM: return what? */ -@@ -2537,6 +2549,9 @@ +@@ -2225,6 +2237,7 @@ + for (block = 0; block < EXT3_N_BLOCKS; block++) + inode->u.ext3_i.i_data[block] = iloc.raw_inode->i_block[block]; + INIT_LIST_HEAD(&inode->u.ext3_i.i_orphan); ++ memset(&inode->u.ext3_i.i_cached_extent, 0, sizeof(__u32) * 4); + + if (EXT3_INODE_SIZE(inode->i_sb) > EXT3_GOOD_OLD_INODE_SIZE) + inode->u.ext3_i.i_extra_isize = +@@ -2537,6 +2550,9 @@ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; @@ -2360,7 +2376,7 @@ Index: linux-2.4.24/fs/ext3/inode.c if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else -@@ -2973,7 +2988,7 @@ +@@ -2973,7 +2989,7 @@ /* alloc blocks one by one */ for (i = 0; i < nblocks; i++) { @@ -2369,7 +2385,7 @@ Index: linux-2.4.24/fs/ext3/inode.c &bh_tmp, 1); if (ret) break; -@@ -3049,7 +3064,7 @@ +@@ -3049,7 +3065,7 @@ if (blocks[i] != 0) continue; @@ -2380,8 +2396,8 @@ Index: linux-2.4.24/fs/ext3/inode.c "allocating block %ld\n", rc, iblock); Index: linux-2.4.24/fs/ext3/Makefile =================================================================== ---- linux-2.4.24.orig/fs/ext3/Makefile 2004-09-12 19:57:08.000000000 +0400 -+++ linux-2.4.24/fs/ext3/Makefile 2004-09-12 19:57:09.000000000 +0400 +--- linux-2.4.24.orig/fs/ext3/Makefile 2004-11-02 20:28:32.000000000 +0300 ++++ linux-2.4.24/fs/ext3/Makefile 2004-11-02 20:32:17.000000000 +0300 @@ -13,7 +13,9 @@ obj-y := balloc.o bitmap.o dir.o file.o fsync.o ialloc.o inode.o iopen.o \ @@ -2395,11 +2411,11 @@ Index: linux-2.4.24/fs/ext3/Makefile export-objs += xattr.o Index: linux-2.4.24/fs/ext3/super.c =================================================================== ---- linux-2.4.24.orig/fs/ext3/super.c 2004-09-12 19:57:08.000000000 +0400 -+++ linux-2.4.24/fs/ext3/super.c 2004-09-12 19:57:09.000000000 +0400 +--- linux-2.4.24.orig/fs/ext3/super.c 2004-11-02 20:28:32.000000000 +0300 ++++ linux-2.4.24/fs/ext3/super.c 2004-11-02 20:32:17.000000000 +0300 @@ -532,6 +532,7 @@ #ifdef EXT3_DELETE_THREAD - J_ASSERT(sbi->s_delete_inodes == 0); + J_ASSERT(sbi->s_delete_inodes == 0); #endif + ext3_ext_release(sb); ext3_xattr_put_super(sb); @@ -2427,8 +2443,8 @@ Index: linux-2.4.24/fs/ext3/super.c failed_mount3: Index: linux-2.4.24/fs/ext3/ioctl.c =================================================================== ---- linux-2.4.24.orig/fs/ext3/ioctl.c 2004-09-12 19:57:07.000000000 +0400 -+++ linux-2.4.24/fs/ext3/ioctl.c 2004-09-12 19:57:09.000000000 +0400 +--- linux-2.4.24.orig/fs/ext3/ioctl.c 2004-11-02 20:28:29.000000000 +0300 ++++ linux-2.4.24/fs/ext3/ioctl.c 2004-11-02 20:32:17.000000000 +0300 @@ -174,6 +174,10 @@ return ret; } @@ -2442,8 +2458,8 @@ Index: linux-2.4.24/fs/ext3/ioctl.c } Index: linux-2.4.24/include/linux/ext3_fs.h =================================================================== ---- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-09-12 19:57:08.000000000 +0400 -+++ linux-2.4.24/include/linux/ext3_fs.h 2004-09-12 19:57:45.000000000 +0400 +--- linux-2.4.24.orig/include/linux/ext3_fs.h 2004-11-02 20:28:32.000000000 +0300 ++++ linux-2.4.24/include/linux/ext3_fs.h 2004-11-02 20:32:17.000000000 +0300 @@ -184,6 +184,7 @@ #define EXT3_IMAGIC_FL 0x00002000 /* AFS directory */ #define EXT3_JOURNAL_DATA_FL 0x00004000 /* file data should be journaled */ @@ -2511,8 +2527,8 @@ Index: linux-2.4.24/include/linux/ext3_fs.h Index: linux-2.4.24/include/linux/ext3_extents.h =================================================================== --- linux-2.4.24.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.4.24/include/linux/ext3_extents.h 2004-09-12 19:57:09.000000000 +0400 -@@ -0,0 +1,237 @@ ++++ linux-2.4.24/include/linux/ext3_extents.h 2004-11-02 20:32:17.000000000 +0300 +@@ -0,0 +1,251 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2552,7 +2568,7 @@ Index: linux-2.4.24/include/linux/ext3_extents.h + * if EXT_DEBUG is defined you can use 'extdebug' mount option + * to get lots of info what's going on + */ -+#define EXT_DEBUG ++#define EXT_DEBUG_ +#ifdef EXT_DEBUG +#define ext_debug(tree,fmt,a...) \ +do { \ @@ -2634,6 +2650,20 @@ Index: linux-2.4.24/include/linux/ext3_extents.h + */ + +/* ++ * storage for cached extent ++ */ ++struct ext3_ext_cache { ++ __u32 ec_start; ++ __u32 ec_block; ++ __u32 ec_len; ++ __u32 ec_type; ++}; ++ ++#define EXT3_EXT_CACHE_NO 0 ++#define EXT3_EXT_CACHE_GAP 1 ++#define EXT3_EXT_CACHE_EXTENT 2 ++ ++/* + * ext3_extents_tree is used to pass initial information + * to top-level extents API + */ @@ -2644,7 +2674,7 @@ Index: linux-2.4.24/include/linux/ext3_extents.h + void *buffer; /* will be passed as arg to ^^ routines */ + int buffer_len; + void *private; -+ struct ext3_extent *cex;/* last found extent */ ++ struct ext3_ext_cache *cex;/* last found extent */ + struct ext3_extents_helpers *ops; +}; + @@ -2744,7 +2774,7 @@ Index: linux-2.4.24/include/linux/ext3_extents.h +ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) +{ + if (tree->cex) -+ tree->cex->ee_len = 0; ++ tree->cex->ec_type = EXT3_EXT_CACHE_NO; +} + + @@ -2752,14 +2782,14 @@ Index: linux-2.4.24/include/linux/ext3_extents.h + Index: linux-2.4.24/include/linux/ext3_fs_i.h =================================================================== ---- linux-2.4.24.orig/include/linux/ext3_fs_i.h 2004-09-12 19:57:08.000000000 +0400 -+++ linux-2.4.24/include/linux/ext3_fs_i.h 2004-09-12 19:57:09.000000000 +0400 +--- linux-2.4.24.orig/include/linux/ext3_fs_i.h 2004-11-02 20:28:32.000000000 +0300 ++++ linux-2.4.24/include/linux/ext3_fs_i.h 2004-11-02 20:43:45.000000000 +0300 @@ -76,6 +76,8 @@ * by other means, so we have truncate_sem. */ struct rw_semaphore truncate_sem; + -+ __u32 i_cached_extent[3]; ++ __u32 i_cached_extent[4]; }; #endif /* _LINUX_EXT3_FS_I */ diff --git a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch index 8a41b1c..cad7b54 100644 --- a/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch +++ b/lustre/kernel_patches/patches/ext3-extents-2.6.5.patch @@ -1,9 +1,9 @@ %patch -Index: linux-2.6.7/fs/ext3/extents.c +Index: linux-2.6.5-sles9/fs/ext3/extents.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.6.7/fs/ext3/extents.c 2004-08-19 08:53:49.000000000 +0400 -@@ -0,0 +1,2306 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300 +@@ -0,0 +1,2313 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -150,7 +150,7 @@ Index: linux-2.6.7/fs/ext3/extents.c + goal = bg_start + colour; + } + -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, err); ++ newblock = ext3_new_block(handle, inode, goal, err); + return newblock; +} + @@ -1283,14 +1283,15 @@ Index: linux-2.6.7/fs/ext3/extents.c +} + +static inline void -+ext3_ext_put_in_cache(struct ext3_extents_tree *tree, struct ext3_extent *ex) ++ext3_ext_put_in_cache(struct ext3_extents_tree *tree, __u32 block, ++ __u32 len, __u32 start, int type) +{ ++ EXT_ASSERT(len > 0); + if (tree->cex) { -+ EXT_ASSERT(ex); -+ EXT_ASSERT(ex->ee_len); -+ tree->cex->ee_block = ex->ee_block; -+ tree->cex->ee_start = ex->ee_start; -+ tree->cex->ee_len = ex->ee_len; ++ tree->cex->ec_type = type; ++ tree->cex->ec_block = block; ++ tree->cex->ec_len = len; ++ tree->cex->ec_start = start; + } +} + @@ -1304,7 +1305,8 @@ Index: linux-2.6.7/fs/ext3/extents.c + unsigned long block) +{ + int depth = EXT_DEPTH(tree); -+ struct ext3_extent *ex, gex; ++ unsigned long lblock, len; ++ struct ext3_extent *ex; + + if (!tree->cex) + return; @@ -1312,63 +1314,64 @@ Index: linux-2.6.7/fs/ext3/extents.c + ex = path[depth].p_ext; + if (ex == NULL) { + /* there is no extent yet, so gap is [0;-] */ -+ gex.ee_block = 0; -+ gex.ee_len = EXT_CACHE_MARK; ++ lblock = 0; ++ len = EXT_MAX_BLOCK; + ext_debug(tree, "cache gap(whole file):"); + } else if (block < ex->ee_block) { -+ gex.ee_block = block; -+ gex.ee_len = ex->ee_block - block; ++ lblock = block; ++ len = ex->ee_block - block; + ext_debug(tree, "cache gap(before): %lu [%lu:%lu]", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len); + } else if (block >= ex->ee_block + ex->ee_len) { -+ gex.ee_block = ex->ee_block + ex->ee_len; -+ gex.ee_len = ext3_ext_next_allocated_block(path); ++ lblock = ex->ee_block + ex->ee_len; ++ len = ext3_ext_next_allocated_block(path); + ext_debug(tree, "cache gap(after): [%lu:%lu] %lu", + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) block); -+ EXT_ASSERT(gex.ee_len > gex.ee_block); -+ gex.ee_len = gex.ee_len - gex.ee_block; ++ EXT_ASSERT(len > lblock); ++ len = len - lblock; + } else { ++ lblock = len = 0; + BUG(); + } + -+ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) gex.ee_block, -+ (unsigned long) gex.ee_len); -+ gex.ee_start = EXT_CACHE_MARK; -+ ext3_ext_put_in_cache(tree, &gex); ++ ext_debug(tree, " -> %lu:%lu\n", (unsigned long) lblock, len); ++ ext3_ext_put_in_cache(tree, lblock, len, 0, EXT3_EXT_CACHE_GAP); +} + +static inline int +ext3_ext_in_cache(struct ext3_extents_tree *tree, unsigned long block, + struct ext3_extent *ex) +{ -+ struct ext3_extent *cex = tree->cex; ++ struct ext3_ext_cache *cex = tree->cex; + + /* is there cache storage at all? */ + if (!cex) -+ return 0; ++ return EXT3_EXT_CACHE_NO; + + /* has cache valid data? */ -+ if (cex->ee_len == 0) -+ return 0; -+ -+ if (block >= cex->ee_block && block < cex->ee_block + cex->ee_len) { -+ ex->ee_block = cex->ee_block; -+ ex->ee_start = cex->ee_start; -+ ex->ee_len = cex->ee_len; ++ if (cex->ec_type == EXT3_EXT_CACHE_NO) ++ return EXT3_EXT_CACHE_NO; ++ ++ EXT_ASSERT(cex->ec_type == EXT3_EXT_CACHE_GAP || ++ cex->ec_type == EXT3_EXT_CACHE_EXTENT); ++ if (block >= cex->ec_block && block < cex->ec_block + cex->ec_len) { ++ ex->ee_block = cex->ec_block; ++ ex->ee_start = cex->ec_start; ++ ex->ee_len = cex->ec_len; + ext_debug(tree, "%lu cached by %lu:%lu:%lu\n", + (unsigned long) block, + (unsigned long) ex->ee_block, + (unsigned long) ex->ee_len, + (unsigned long) ex->ee_start); -+ return 1; ++ return cex->ec_type; + } + + /* not in cache */ -+ return 0; ++ return EXT3_EXT_CACHE_NO; +} + +/* @@ -1958,7 +1961,7 @@ Index: linux-2.6.7/fs/ext3/extents.c + ex->ee_len = 1; + /* allocate new block for the extent */ + goal = ext3_ext_find_goal(inode, path, ex->ee_block); -+ ex->ee_start = ext3_new_block(handle, inode, goal, 0, 0, err); ++ ex->ee_start = ext3_new_block(handle, inode, goal, err); + if (ex->ee_start == 0) { + /* error occured: restore old extent */ + ex->ee_start = newblock; @@ -1984,7 +1987,7 @@ Index: linux-2.6.7/fs/ext3/extents.c + tree->root = (void *) EXT3_I(inode)->i_data; + tree->buffer = (void *) inode; + tree->buffer_len = sizeof(EXT3_I(inode)->i_data); -+ tree->cex = (struct ext3_extent *) &EXT3_I(inode)->i_cached_extent; ++ tree->cex = (struct ext3_ext_cache *) &EXT3_I(inode)->i_cached_extent; + tree->ops = &ext3_blockmap_helpers; +} + @@ -2005,19 +2008,20 @@ Index: linux-2.6.7/fs/ext3/extents.c + down(&EXT3_I(inode)->truncate_sem); + + /* check in cache */ -+ if (ext3_ext_in_cache(&tree, iblock, &newex)) { -+ if (newex.ee_start == EXT_CACHE_MARK) { -+ /* this is cached gap */ ++ if ((goal = ext3_ext_in_cache(&tree, iblock, &newex))) { ++ if (goal == EXT3_EXT_CACHE_GAP) { + if (!create) { + /* block isn't allocated yet and + * user don't want to allocate it */ + goto out2; + } + /* we should allocate requested block */ -+ } else if (newex.ee_start) { ++ } else if (goal == EXT3_EXT_CACHE_EXTENT) { + /* block is already allocated */ + newblock = iblock - newex.ee_block + newex.ee_start; + goto out; ++ } else { ++ EXT_ASSERT(0); + } + } + @@ -2045,7 +2049,9 @@ Index: linux-2.6.7/fs/ext3/extents.c + ext_debug(&tree, "%d fit into %d:%d -> %d\n", + (int) iblock, ex->ee_block, ex->ee_len, + newblock); -+ ext3_ext_put_in_cache(&tree, ex); ++ ext3_ext_put_in_cache(&tree, ex->ee_block, ++ ex->ee_len, ex->ee_start, ++ EXT3_EXT_CACHE_EXTENT); + goto out; + } + } @@ -2062,7 +2068,7 @@ Index: linux-2.6.7/fs/ext3/extents.c + + /* allocate new block */ + goal = ext3_ext_find_goal(inode, path, iblock); -+ newblock = ext3_new_block(handle, inode, goal, 0, 0, &err); ++ newblock = ext3_new_block(handle, inode, goal, &err); + if (!newblock) + goto out2; + ext_debug(&tree, "allocate new block: goal %d, found %d\n", @@ -2083,7 +2089,8 @@ Index: linux-2.6.7/fs/ext3/extents.c + newblock = newex.ee_start; + set_buffer_new(bh_result); + -+ ext3_ext_put_in_cache(&tree, &newex); ++ ext3_ext_put_in_cache(&tree, newex.ee_block, newex.ee_len, ++ newex.ee_start, EXT3_EXT_CACHE_EXTENT); +out: + ext3_ext_show_leaf(&tree, path); + map_bh(bh_result, inode->i_sb, newblock); @@ -2310,11 +2317,11 @@ Index: linux-2.6.7/fs/ext3/extents.c +EXPORT_SYMBOL(ext3_ext_find_goal); +EXPORT_SYMBOL(ext3_ext_calc_credits_for_insert); + -Index: linux-2.6.7/fs/ext3/ialloc.c +Index: linux-2.6.5-sles9/fs/ext3/ialloc.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/ialloc.c 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/fs/ext3/ialloc.c 2004-08-19 08:53:49.000000000 +0400 -@@ -646,6 +646,10 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/ialloc.c 2004-11-09 02:22:55.763148128 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ialloc.c 2004-11-09 02:23:21.587222272 +0300 +@@ -647,6 +647,10 @@ DQUOT_FREE_INODE(inode); goto fail2; } @@ -2325,11 +2332,11 @@ Index: linux-2.6.7/fs/ext3/ialloc.c err = ext3_mark_inode_dirty(handle, inode); if (err) { ext3_std_error(sb, err); -Index: linux-2.6.7/fs/ext3/inode.c +Index: linux-2.6.5-sles9/fs/ext3/inode.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/inode.c 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/fs/ext3/inode.c 2004-08-19 08:53:49.000000000 +0400 -@@ -857,6 +857,17 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:22:55.767147520 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300 +@@ -796,6 +796,17 @@ goto reread; } @@ -2347,7 +2354,7 @@ Index: linux-2.6.7/fs/ext3/inode.c static int ext3_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create) { -@@ -867,8 +878,8 @@ +@@ -806,8 +817,8 @@ handle = ext3_journal_current_handle(); J_ASSERT(handle != 0); } @@ -2358,7 +2365,7 @@ Index: linux-2.6.7/fs/ext3/inode.c return ret; } -@@ -894,8 +905,8 @@ +@@ -833,8 +844,8 @@ } } if (ret == 0) @@ -2369,7 +2376,7 @@ Index: linux-2.6.7/fs/ext3/inode.c if (ret == 0) bh_result->b_size = (1 << inode->i_blkbits); return ret; -@@ -916,7 +927,7 @@ +@@ -855,7 +866,7 @@ dummy.b_state = 0; dummy.b_blocknr = -1000; buffer_trace_init(&dummy.b_history); @@ -2378,7 +2385,7 @@ Index: linux-2.6.7/fs/ext3/inode.c if (!*errp && buffer_mapped(&dummy)) { struct buffer_head *bh; bh = sb_getblk(inode->i_sb, dummy.b_blocknr); -@@ -1669,7 +1680,7 @@ +@@ -1587,7 +1598,7 @@ * This required during truncate. We need to physically zero the tail end * of that block so it doesn't yield old data if the file is later grown. */ @@ -2387,7 +2394,7 @@ Index: linux-2.6.7/fs/ext3/inode.c struct address_space *mapping, loff_t from) { unsigned long index = from >> PAGE_CACHE_SHIFT; -@@ -2165,6 +2176,9 @@ +@@ -2083,6 +2094,9 @@ return; } @@ -2397,7 +2404,7 @@ Index: linux-2.6.7/fs/ext3/inode.c handle = start_transaction(inode); if (IS_ERR(handle)) { if (page) { -@@ -2888,6 +2902,9 @@ +@@ -2789,6 +2803,9 @@ int indirects = (EXT3_NDIR_BLOCKS % bpp) ? 5 : 3; int ret; @@ -2407,10 +2414,10 @@ Index: linux-2.6.7/fs/ext3/inode.c if (ext3_should_journal_data(inode)) ret = 3 * (bpp + indirects) + 2; else -Index: linux-2.6.7/fs/ext3/Makefile +Index: linux-2.6.5-sles9/fs/ext3/Makefile =================================================================== ---- linux-2.6.7.orig/fs/ext3/Makefile 2004-08-19 08:52:14.000000000 +0400 -+++ linux-2.6.7/fs/ext3/Makefile 2004-08-19 08:53:49.000000000 +0400 +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300 @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o @@ -2420,11 +2427,11 @@ Index: linux-2.6.7/fs/ext3/Makefile ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-2.6.7/fs/ext3/super.c +Index: linux-2.6.5-sles9/fs/ext3/super.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/super.c 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/fs/ext3/super.c 2004-08-19 08:53:49.000000000 +0400 -@@ -392,6 +392,7 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:22:56.450043704 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300 +@@ -389,6 +389,7 @@ struct ext3_super_block *es = sbi->s_es; int i; @@ -2432,17 +2439,18 @@ Index: linux-2.6.7/fs/ext3/super.c ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); if (!(sb->s_flags & MS_RDONLY)) { -@@ -455,6 +456,9 @@ - ei->i_default_acl = EXT3_ACL_NOT_CACHED; +@@ -447,6 +448,10 @@ #endif + ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; ei->vfs_inode.i_version = 1; + ei->i_cached_extent[0] = 0; + ei->i_cached_extent[1] = 0; + ei->i_cached_extent[2] = 0; ++ ei->i_cached_extent[3] = 0; return &ei->vfs_inode; } -@@ -590,7 +594,7 @@ +@@ -537,7 +542,7 @@ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, @@ -2451,8 +2459,8 @@ Index: linux-2.6.7/fs/ext3/super.c }; static match_table_t tokens = { -@@ -638,6 +642,8 @@ - {Opt_iopen, "iopen"}, +@@ -582,6 +587,8 @@ + {Opt_iopen, "iopen"}, {Opt_noiopen, "noiopen"}, {Opt_iopen_nopriv, "iopen_nopriv"}, + {Opt_extents, "extents"}, @@ -2460,7 +2468,7 @@ Index: linux-2.6.7/fs/ext3/super.c {Opt_err, NULL} }; -@@ -917,6 +923,12 @@ +@@ -797,6 +804,12 @@ break; case Opt_ignore: break; @@ -2473,7 +2481,7 @@ Index: linux-2.6.7/fs/ext3/super.c default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1589,6 +1601,8 @@ +@@ -1449,6 +1462,8 @@ percpu_counter_mod(&sbi->s_dirs_counter, ext3_count_dirs(sb)); @@ -2482,25 +2490,25 @@ Index: linux-2.6.7/fs/ext3/super.c return 0; failed_mount3: -Index: linux-2.6.7/fs/ext3/ioctl.c +Index: linux-2.6.5-sles9/fs/ext3/ioctl.c =================================================================== ---- linux-2.6.7.orig/fs/ext3/ioctl.c 2004-08-19 08:51:03.000000000 +0400 -+++ linux-2.6.7/fs/ext3/ioctl.c 2004-08-19 08:53:49.000000000 +0400 -@@ -176,6 +176,10 @@ - return ret; - } - #endif +--- linux-2.6.5-sles9.orig/fs/ext3/ioctl.c 2004-11-09 02:15:44.610693264 +0300 ++++ linux-2.6.5-sles9/fs/ext3/ioctl.c 2004-11-09 02:23:52.991448104 +0300 +@@ -124,6 +124,10 @@ + err = ext3_change_inode_journal_flag(inode, jflag); + return err; + } + case EXT3_IOC_GET_EXTENTS: + case EXT3_IOC_GET_TREE_STATS: + case EXT3_IOC_GET_TREE_DEPTH: + return ext3_ext_ioctl(inode, filp, cmd, arg); - default: - return -ENOTTY; - } -Index: linux-2.6.7/include/linux/ext3_fs.h + case EXT3_IOC_GETVERSION: + case EXT3_IOC_GETVERSION_OLD: + return put_user(inode->i_generation, (int *) arg); +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h =================================================================== ---- linux-2.6.7.orig/include/linux/ext3_fs.h 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/include/linux/ext3_fs.h 2004-08-19 08:53:49.000000000 +0400 +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:22:58.767691368 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300 @@ -186,6 +186,7 @@ #define EXT3_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ #define EXT3_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ @@ -2509,18 +2517,18 @@ Index: linux-2.6.7/include/linux/ext3_fs.h #define EXT3_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ #define EXT3_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ -@@ -209,6 +210,9 @@ - #ifdef CONFIG_JBD_DEBUG - #define EXT3_IOC_WAIT_FOR_READONLY _IOR('f', 99, long) +@@ -211,6 +212,9 @@ #endif -+#define EXT3_IOC_GET_EXTENTS _IOR('f', 5, long) -+#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 6, long) -+#define EXT3_IOC_GET_TREE_STATS _IOR('f', 7, long) + #define EXT3_IOC_GETRSVSZ _IOR('f', 5, long) + #define EXT3_IOC_SETRSVSZ _IOW('f', 6, long) ++#define EXT3_IOC_GET_EXTENTS _IOR('f', 7, long) ++#define EXT3_IOC_GET_TREE_DEPTH _IOR('f', 8, long) ++#define EXT3_IOC_GET_TREE_STATS _IOR('f', 9, long) /* * Structure of an inode on the disk -@@ -329,6 +333,8 @@ - #define EXT3_MOUNT_POSIX_ACL 0x8000 /* POSIX Access Control Lists */ +@@ -333,6 +337,8 @@ + #define EXT3_MOUNT_BARRIER 0x20000 /* Use block barriers */ #define EXT3_MOUNT_IOPEN 0x40000 /* Allow access via iopen */ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ +#define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ @@ -2528,7 +2536,7 @@ Index: linux-2.6.7/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt -@@ -724,6 +730,7 @@ +@@ -729,6 +735,7 @@ /* inode.c */ @@ -2536,7 +2544,7 @@ Index: linux-2.6.7/include/linux/ext3_fs.h extern int ext3_forget(handle_t *, int, struct inode *, struct buffer_head *, int); extern struct buffer_head * ext3_getblk (handle_t *, struct inode *, long, int, int *); extern struct buffer_head * ext3_bread (handle_t *, struct inode *, int, int, int *); -@@ -796,6 +803,14 @@ +@@ -802,6 +809,14 @@ extern struct inode_operations ext3_symlink_inode_operations; extern struct inode_operations ext3_fast_symlink_inode_operations; @@ -2551,11 +2559,11 @@ Index: linux-2.6.7/include/linux/ext3_fs.h #endif /* __KERNEL__ */ -Index: linux-2.6.7/include/linux/ext3_extents.h +Index: linux-2.6.5-sles9/include/linux/ext3_extents.h =================================================================== ---- linux-2.6.7.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.6.7/include/linux/ext3_extents.h 2004-08-19 08:53:49.000000000 +0400 -@@ -0,0 +1,238 @@ +--- linux-2.6.5-sles9.orig/include/linux/ext3_extents.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_extents.h 2004-11-09 02:23:21.606219384 +0300 +@@ -0,0 +1,252 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -2595,7 +2603,7 @@ Index: linux-2.6.7/include/linux/ext3_extents.h + * if EXT_DEBUG is defined you can use 'extdebug' mount option + * to get lots of info what's going on + */ -+#define EXT_DEBUG ++#define EXT_DEBUG_ +#ifdef EXT_DEBUG +#define ext_debug(tree,fmt,a...) \ +do { \ @@ -2677,6 +2685,20 @@ Index: linux-2.6.7/include/linux/ext3_extents.h + */ + +/* ++ * storage for cached extent ++ */ ++struct ext3_ext_cache { ++ __u32 ec_start; ++ __u32 ec_block; ++ __u32 ec_len; ++ __u32 ec_type; ++}; ++ ++#define EXT3_EXT_CACHE_NO 0 ++#define EXT3_EXT_CACHE_GAP 1 ++#define EXT3_EXT_CACHE_EXTENT 2 ++ ++/* + * ext3_extents_tree is used to pass initial information + * to top-level extents API + */ @@ -2687,7 +2709,7 @@ Index: linux-2.6.7/include/linux/ext3_extents.h + void *buffer; /* will be passed as arg to ^^ routines */ + int buffer_len; + void *private; -+ struct ext3_extent *cex;/* last found extent */ ++ struct ext3_ext_cache *cex;/* last found extent */ + struct ext3_extents_helpers *ops; +}; + @@ -2788,35 +2810,35 @@ Index: linux-2.6.7/include/linux/ext3_extents.h +ext3_ext_invalidate_cache(struct ext3_extents_tree *tree) +{ + if (tree->cex) -+ tree->cex->ee_len = 0; ++ tree->cex->ec_type = EXT3_EXT_CACHE_NO; +} + + +#endif /* _LINUX_EXT3_EXTENTS */ + -Index: linux-2.6.7/include/linux/ext3_fs_i.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h =================================================================== ---- linux-2.6.7.orig/include/linux/ext3_fs_i.h 2004-08-19 08:51:04.000000000 +0400 -+++ linux-2.6.7/include/linux/ext3_fs_i.h 2004-08-19 08:53:49.000000000 +0400 -@@ -111,6 +111,8 @@ +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2004-11-09 02:22:55.780145544 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2004-11-09 02:23:21.606219384 +0300 +@@ -128,6 +128,8 @@ */ struct semaphore truncate_sem; struct inode vfs_inode; + -+ __u32 i_cached_extent[3]; ++ __u32 i_cached_extent[4]; }; #endif /* _LINUX_EXT3_FS_I */ %diffstat fs/ext3/Makefile | 2 - fs/ext3/extents.c | 2306 +++++++++++++++++++++++++++++++++++++++++++ + fs/ext3/extents.c | 2313 +++++++++++++++++++++++++++++++++++++++++++ fs/ext3/ialloc.c | 4 fs/ext3/inode.c | 29 fs/ext3/ioctl.c | 4 - fs/ext3/super.c | 16 - include/linux/ext3_extents.h | 238 ++++ + fs/ext3/super.c | 17 + include/linux/ext3_extents.h | 252 ++++ include/linux/ext3_fs.h | 15 include/linux/ext3_fs_i.h | 2 - 9 files changed, 2608 insertions(+), 8 deletions(-) + 9 files changed, 2630 insertions(+), 8 deletions(-) diff --git a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.20-rh.patch b/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.20-rh.patch index e37dacd..20eac0aa 100644 --- a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.20-rh.patch +++ b/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.20-rh.patch @@ -13,7 +13,7 @@ Index: linux-2.4.20-30.9/fs/ext3/inode.c nei->i_disksize = oei->i_disksize; nei->i_state |= EXT3_STATE_DELETE; -@@ -2447,6 +2451,12 @@ +@@ -2447,6 +2451,13 @@ brelse (iloc.bh); @@ -21,6 +21,7 @@ Index: linux-2.4.20-30.9/fs/ext3/inode.c + inode->u.ext3_i.i_cached_extent[0] = 0; + inode->u.ext3_i.i_cached_extent[1] = 0; + inode->u.ext3_i.i_cached_extent[2] = 0; ++ inode->u.ext3_i.i_cached_extent[3] = 0; + } + if (S_ISREG(inode->i_mode)) { diff --git a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.21-chaos.patch b/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.21-chaos.patch index bf086e1..d425fe6 100644 --- a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.21-chaos.patch +++ b/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.21-chaos.patch @@ -13,7 +13,7 @@ Index: 57chaos/fs/ext3/inode.c nei->i_disksize = oei->i_disksize; nei->i_state |= EXT3_STATE_DELETE; -@@ -2522,6 +2526,12 @@ void ext3_read_inode(struct inode * inod +@@ -2522,6 +2526,13 @@ void ext3_read_inode(struct inode * inod else EXT3_I(inode)->i_extra_isize = 0; @@ -21,6 +21,7 @@ Index: 57chaos/fs/ext3/inode.c + inode->u.ext3_i.i_cached_extent[0] = 0; + inode->u.ext3_i.i_cached_extent[1] = 0; + inode->u.ext3_i.i_cached_extent[2] = 0; ++ inode->u.ext3_i.i_cached_extent[3] = 0; + } + if (S_ISREG(inode->i_mode)) { diff --git a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.24.patch b/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.24.patch index 04d418f..44432fc 100644 --- a/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.24.patch +++ b/lustre/kernel_patches/patches/ext3-extents-asyncdel-2.4.24.patch @@ -2,7 +2,7 @@ Index: linux-2.4.24/fs/ext3/inode.c =================================================================== --- linux-2.4.24.orig/fs/ext3/inode.c 2004-05-18 12:34:48.000000000 -0700 +++ linux-2.4.24/fs/ext3/inode.c 2004-05-18 12:47:50.000000000 -0700 -@@ -2244,6 +2244,12 @@ +@@ -2244,6 +2244,13 @@ else inode->u.ext3_i.i_extra_isize = 0; @@ -10,6 +10,7 @@ Index: linux-2.4.24/fs/ext3/inode.c + inode->u.ext3_i.i_cached_extent[0] = 0; + inode->u.ext3_i.i_cached_extent[1] = 0; + inode->u.ext3_i.i_cached_extent[2] = 0; ++ inode->u.ext3_i.i_cached_extent[3] = 0; + } + if (S_ISREG(inode->i_mode)) { diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch index 7c3d8bd..2408cc7 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6-suse.patch @@ -1,8 +1,8 @@ -Index: linux-stage/fs/ext3/mballoc.c +Index: linux-2.6.5-sles9/fs/ext3/mballoc.c =================================================================== ---- linux-stage.orig/fs/ext3/mballoc.c 2003-01-30 05:24:37.000000000 -0500 -+++ linux-stage/fs/ext3/mballoc.c 2004-10-13 17:06:53.000000000 -0400 -@@ -0,0 +1,1397 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/mballoc.c 2004-11-09 02:34:25.181340632 +0300 +@@ -0,0 +1,1428 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -118,12 +118,43 @@ Index: linux-stage/fs/ext3/mballoc.c +int ext3_create (struct inode *, struct dentry *, int, struct nameidata *); +struct buffer_head * read_block_bitmap(struct super_block *, unsigned int); +void ext3_free_blocks_old(handle_t *, struct inode *, unsigned long, unsigned long); -+int ext3_new_block_old(handle_t *, struct inode *, unsigned long, u32 *, u32 *, int *); ++int ext3_new_block_old(handle_t *, struct inode *, unsigned long, int *); +int ext3_mb_reserve_blocks(struct super_block *, int); +void ext3_mb_release_blocks(struct super_block *, int); +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); +void ext3_mb_free_committed_blocks(struct super_block *); + ++#define mb_correct_addr_and_bit(bit,addr) \ ++{ \ ++ if ((unsigned) addr & 1) { \ ++ bit += 8; \ ++ addr--; \ ++ } \ ++ if ((unsigned) addr & 2) { \ ++ bit += 16; \ ++ addr--; \ ++ addr--; \ ++ } \ ++} ++ ++static inline int mb_test_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ return test_bit(bit, addr); ++} ++ ++static inline void mb_set_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ set_bit(bit, addr); ++} ++ ++static inline void mb_clear_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ clear_bit(bit, addr); ++} ++ +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) +{ + int i = 1; @@ -232,22 +263,22 @@ Index: linux-stage/fs/ext3/mballoc.c + count = 0; + for (i = 0; i < max; i++) { + -+ if (!test_bit(i, buddy)) { ++ if (!mb_test_bit(i, buddy)) { + /* only single bit in buddy2 may be 1 */ -+ if (test_bit(i << 1, buddy2)) -+ J_ASSERT(!test_bit((i<<1)+1, buddy2)); -+ else if (test_bit((i << 1) + 1, buddy2)) -+ J_ASSERT(!test_bit(i << 1, buddy2)); ++ if (mb_test_bit(i << 1, buddy2)) ++ J_ASSERT(!mb_test_bit((i<<1)+1, buddy2)); ++ else if (mb_test_bit((i << 1) + 1, buddy2)) ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); + continue; + } + + /* both bits in buddy2 must be 0 */ -+ J_ASSERT(!test_bit(i << 1, buddy2)); -+ J_ASSERT(!test_bit((i << 1) + 1, buddy2)); ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); ++ J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2)); + + for (j = 0; j < (1 << order); j++) { + k = (i * (1 << order)) + j; -+ J_ASSERT(test_bit(k, e3b->bd_bitmap)); ++ J_ASSERT(mb_test_bit(k, e3b->bd_bitmap)); + } + count++; + } @@ -257,14 +288,14 @@ Index: linux-stage/fs/ext3/mballoc.c + + buddy = mb_find_buddy(e3b, 0, &max); + for (i = 0; i < max; i++) { -+ if (test_bit(i, buddy)) ++ if (mb_test_bit(i, buddy)) + continue; + /* check used bits only */ + for (j = 0; j < e3b->bd_blkbits + 1; j++) { + buddy2 = mb_find_buddy(e3b, j, &max2); + k = i >> j; + J_ASSERT(k < max2); -+ J_ASSERT(!test_bit(k, buddy2)); ++ J_ASSERT(!mb_test_bit(k, buddy2)); + } + } +} @@ -295,7 +326,7 @@ Index: linux-stage/fs/ext3/mballoc.c + bb = e3b->bd_buddy; + while (order <= e3b->bd_blkbits + 1) { + block = block >> 1; -+ if (test_bit(block, bb)) { ++ if (mb_test_bit(block, bb)) { + /* this block is part of buddy of order 'order' */ + return order; + } @@ -318,7 +349,7 @@ Index: linux-stage/fs/ext3/mballoc.c + cur += 32; + continue; + } -+ clear_bit(cur, bm); ++ mb_clear_bit(cur, bm); + cur++; + } +} @@ -336,7 +367,7 @@ Index: linux-stage/fs/ext3/mballoc.c + cur += 32; + continue; + } -+ set_bit(cur, bm); ++ mb_set_bit(cur, bm); + cur++; + } +} @@ -351,8 +382,8 @@ Index: linux-stage/fs/ext3/mballoc.c + block = first++; + order = 0; + -+ J_ASSERT(!test_bit(block, e3b->bd_bitmap)); -+ set_bit(block, e3b->bd_bitmap); ++ J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap)); ++ mb_set_bit(block, e3b->bd_bitmap); + e3b->bd_bd->bb_counters[order]++; + + /* start of the buddy */ @@ -360,8 +391,8 @@ Index: linux-stage/fs/ext3/mballoc.c + + do { + block &= ~1UL; -+ if (!test_bit(block, buddy) || -+ !test_bit(block + 1, buddy)) ++ if (!mb_test_bit(block, buddy) || ++ !mb_test_bit(block + 1, buddy)) + break; + + /* both the buddies are free, try to coalesce them */ @@ -373,8 +404,8 @@ Index: linux-stage/fs/ext3/mballoc.c + if (order > 0) { + /* for special purposes, we don't clear + * free bits in bitmap */ -+ clear_bit(block, buddy); -+ clear_bit(block + 1, buddy); ++ mb_clear_bit(block, buddy); ++ mb_clear_bit(block + 1, buddy); + } + e3b->bd_bd->bb_counters[order]--; + e3b->bd_bd->bb_counters[order]--; @@ -383,7 +414,7 @@ Index: linux-stage/fs/ext3/mballoc.c + order++; + e3b->bd_bd->bb_counters[order]++; + -+ set_bit(block, buddy2); ++ mb_set_bit(block, buddy2); + buddy = buddy2; + } while (1); + } @@ -448,7 +479,7 @@ Index: linux-stage/fs/ext3/mballoc.c + buddy = mb_find_buddy(e3b, order, &max); + J_ASSERT(buddy); + J_ASSERT(block < max); -+ if (!test_bit(block, buddy)) ++ if (!mb_test_bit(block, buddy)) + goto nofree; + + if (order == 0) { @@ -471,7 +502,7 @@ Index: linux-stage/fs/ext3/mballoc.c + break; + + next = (block + 1) * (1 << order); -+ if (!test_bit(next, e3b->bd_bitmap)) ++ if (!mb_test_bit(next, e3b->bd_bitmap)) + break; + + ord = mb_find_order_for_block(e3b, next); @@ -509,7 +540,7 @@ Index: linux-stage/fs/ext3/mballoc.c + mlen = 1 << ord; + buddy = mb_find_buddy(e3b, ord, &max); + J_ASSERT((start >> ord) < max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + start -= mlen; + len -= mlen; @@ -521,14 +552,14 @@ Index: linux-stage/fs/ext3/mballoc.c + /* we have to split large buddy */ + J_ASSERT(ord > 0); + buddy = mb_find_buddy(e3b, ord, &max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + + ord--; + cur = (start >> ord) & ~1U; + buddy = mb_find_buddy(e3b, ord, &max); -+ set_bit(cur, buddy); -+ set_bit(cur + 1, buddy); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); + e3b->bd_bd->bb_counters[ord]++; + e3b->bd_bd->bb_counters[ord]++; + } @@ -556,7 +587,7 @@ Index: linux-stage/fs/ext3/mballoc.c + mlen = 1 << ord; + buddy = mb_find_buddy(e3b, ord, &max); + J_ASSERT((start >> ord) < max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + start += mlen; + len -= mlen; @@ -567,14 +598,14 @@ Index: linux-stage/fs/ext3/mballoc.c + /* we have to split large buddy */ + J_ASSERT(ord > 0); + buddy = mb_find_buddy(e3b, ord, &max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + + ord--; + cur = (start >> ord) & ~1U; + buddy = mb_find_buddy(e3b, ord, &max); -+ set_bit(cur, buddy); -+ set_bit(cur + 1, buddy); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); + e3b->bd_bd->bb_counters[ord]++; + e3b->bd_bd->bb_counters[ord]++; + } @@ -615,7 +646,7 @@ Index: linux-stage/fs/ext3/mballoc.c + if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) { + /* someone asks for space at this specified block + * probably he wants to merge it into existing extent */ -+ if (test_bit(ac->ac_g_start, e3b->bd_bitmap)) { ++ if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) { + /* good. at least one block is free */ + max = mb_find_extent(e3b, 0, ac->ac_g_start, + ac->ac_g_len, &curex); @@ -720,7 +751,7 @@ Index: linux-stage/fs/ext3/mballoc.c + ext3_mballoc_warning++; + } + *len = 1; -+ err = ext3_new_block_old(handle, inode, goal, NULL,NULL, errp); ++ err = ext3_new_block_old(handle, inode, goal, errp); + return err; + } + @@ -857,7 +888,7 @@ Index: linux-stage/fs/ext3/mballoc.c + "block = %u", block); +#if 0 + for (i = 0; i < ac.ac_b_len; i++) -+ J_ASSERT(!test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); ++ J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); +#endif + mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len); + @@ -930,7 +961,7 @@ Index: linux-stage/fs/ext3/mballoc.c + + /* loop over the blocks, nad create buddies for free ones */ + for (i = 0; i < sb->s_blocksize * 8; i++) { -+ if (!test_bit(i, (void *) bh->b_data)) { ++ if (!mb_test_bit(i, (void *) bh->b_data)) { + mb_free_blocks(&e3b, i, 1); + count++; + } @@ -949,7 +980,7 @@ Index: linux-stage/fs/ext3/mballoc.c + +#define MB_CREDITS \ + (EXT3_DATA_TRANS_BLOCKS + 3 + EXT3_INDEX_EXTRA_TRANS_BLOCKS + \ -+ 2 * EXT3_QUOTA_INIT_BLOCKS) ++ + 2 * EXT3_SINGLEDATA_TRANS_BLOCKS) + +int ext3_mb_init_backend(struct super_block *sb) +{ @@ -1375,12 +1406,12 @@ Index: linux-stage/fs/ext3/mballoc.c +} + +int ext3_new_block(handle_t *handle, struct inode *inode, -+ unsigned long goal, u32 *pc, u32 *pb, int *errp) ++ unsigned long goal, int *errp) +{ + int ret, len; + + if (!test_opt(inode->i_sb, MBALLOC)) { -+ ret = ext3_new_block_old(handle, inode, goal, pc, pb, errp); ++ ret = ext3_new_block_old(handle, inode, goal, errp); + goto out; + } + len = 1; @@ -1400,10 +1431,10 @@ Index: linux-stage/fs/ext3/mballoc.c + return; +} + -Index: linux-stage/fs/ext3/super.c +Index: linux-2.6.5-sles9/fs/ext3/super.c =================================================================== ---- linux-stage.orig/fs/ext3/super.c 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/super.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:23:21.597220752 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:26:12.572228600 +0300 @@ -389,6 +389,7 @@ struct ext3_super_block *es = sbi->s_es; int i; @@ -1412,16 +1443,16 @@ Index: linux-stage/fs/ext3/super.c ext3_ext_release(sb); ext3_xattr_put_super(sb); journal_destroy(sbi->s_journal); -@@ -543,7 +544,7 @@ +@@ -542,7 +543,7 @@ Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, Opt_ignore, Opt_barrier, Opt_iopen, Opt_noiopen, Opt_iopen_nopriv, - Opt_err, Opt_extents, Opt_extdebug -+ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc ++ Opt_err, Opt_extents, Opt_extdebug, Opt_mballoc, }; static match_table_t tokens = { -@@ -588,6 +589,7 @@ +@@ -589,6 +590,7 @@ {Opt_iopen_nopriv, "iopen_nopriv"}, {Opt_extents, "extents"}, {Opt_extdebug, "extdebug"}, @@ -1429,7 +1460,7 @@ Index: linux-stage/fs/ext3/super.c {Opt_err, NULL} }; -@@ -803,6 +805,9 @@ +@@ -810,6 +812,9 @@ case Opt_extdebug: set_opt (sbi->s_mount_opt, EXTDEBUG); break; @@ -1439,7 +1470,7 @@ Index: linux-stage/fs/ext3/super.c default: printk (KERN_ERR "EXT3-fs: Unrecognized mount option \"%s\" " -@@ -1444,7 +1449,8 @@ +@@ -1463,7 +1468,8 @@ ext3_count_dirs(sb)); ext3_ext_init(sb); @@ -1449,10 +1480,10 @@ Index: linux-stage/fs/ext3/super.c return 0; failed_mount3: -Index: linux-stage/fs/ext3/Makefile +Index: linux-2.6.5-sles9/fs/ext3/Makefile =================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/Makefile 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-11-09 02:23:21.593221360 +0300 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:26:12.572228600 +0300 @@ -5,7 +5,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o @@ -1462,10 +1493,10 @@ Index: linux-stage/fs/ext3/Makefile ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o ext3-$(CONFIG_EXT3_FS_POSIX_ACL) += acl.o -Index: linux-stage/fs/ext3/balloc.c +Index: linux-2.6.5-sles9/fs/ext3/balloc.c =================================================================== ---- linux-stage.orig/fs/ext3/balloc.c 2004-10-13 17:06:52.000000000 -0400 -+++ linux-stage/fs/ext3/balloc.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-03 08:36:51.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300 @@ -78,7 +78,7 @@ * * Return buffer_head on success or NULL in case of failure. @@ -1475,32 +1506,28 @@ Index: linux-stage/fs/ext3/balloc.c read_block_bitmap(struct super_block *sb, unsigned int block_group) { struct ext3_group_desc * desc; -@@ -98,8 +98,8 @@ +@@ -274,7 +274,7 @@ } /* Free given blocks, update quota and i_blocks field */ --void ext3_free_blocks (handle_t *handle, struct inode * inode, -- unsigned long block, unsigned long count) -+void ext3_free_blocks_old (handle_t *handle, struct inode * inode, -+ unsigned long block, unsigned long count) +-void ext3_free_blocks(handle_t *handle, struct inode *inode, ++void ext3_free_blocks_old(handle_t *handle, struct inode *inode, + unsigned long block, unsigned long count) { struct buffer_head *bitmap_bh = NULL; - struct buffer_head *gd_bh; -@@ -528,8 +528,8 @@ +@@ -1142,7 +1142,7 @@ + * bitmap, and then for any free bit if that fails. * This function also updates quota and i_blocks field. */ - int --ext3_new_block(handle_t *handle, struct inode *inode, unsigned long goal, -- u32 *prealloc_count, u32 *prealloc_block, int *errp) -+ext3_new_block_old(handle_t *handle, struct inode *inode, unsigned long goal, -+ u32 *prealloc_count, u32 *prealloc_block, int *errp) +-int ext3_new_block(handle_t *handle, struct inode *inode, ++int ext3_new_block_old(handle_t *handle, struct inode *inode, + unsigned long goal, int *errp) { - struct buffer_head *bitmap_bh = NULL; /* bh */ - struct buffer_head *gdp_bh; /* bh2 */ -Index: linux-stage/fs/ext3/namei.c + struct buffer_head *bitmap_bh = NULL; +Index: linux-2.6.5-sles9/fs/ext3/namei.c =================================================================== ---- linux-stage.orig/fs/ext3/namei.c 2004-10-13 17:06:52.000000000 -0400 -+++ linux-stage/fs/ext3/namei.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300 ++++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:26:12.580227384 +0300 @@ -1640,7 +1640,7 @@ * If the create succeeds, we fill in the inode information * with d_instantiate(). @@ -1510,20 +1537,11 @@ Index: linux-stage/fs/ext3/namei.c struct nameidata *nd) { handle_t *handle; -Index: linux-stage/fs/ext3/inode.c +Index: linux-2.6.5-sles9/fs/ext3/inode.c =================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/inode.c 2004-10-13 17:06:53.000000000 -0400 -@@ -256,7 +256,7 @@ - ei->i_prealloc_count = 0; - ei->i_prealloc_block = 0; - /* Writer: end */ -- ext3_free_blocks (inode, block, total); -+ ext3_free_blocks (inode, block, total, 1); - } - #endif - } -@@ -635,7 +635,7 @@ +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:23:21.592221512 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:26:12.587226320 +0300 +@@ -572,7 +572,7 @@ ext3_journal_forget(handle, branch[i].bh); } for (i = 0; i < keys; i++) @@ -1532,7 +1550,7 @@ Index: linux-stage/fs/ext3/inode.c return err; } -@@ -736,7 +736,7 @@ +@@ -673,7 +673,7 @@ if (err == -EAGAIN) for (i = 0; i < num; i++) ext3_free_blocks(handle, inode, @@ -1541,7 +1559,7 @@ Index: linux-stage/fs/ext3/inode.c return err; } -@@ -1924,7 +1924,7 @@ +@@ -1829,7 +1829,7 @@ } } @@ -1550,7 +1568,7 @@ Index: linux-stage/fs/ext3/inode.c } /** -@@ -2095,7 +2095,7 @@ +@@ -2000,7 +2000,7 @@ ext3_journal_test_restart(handle, inode); } @@ -1559,10 +1577,10 @@ Index: linux-stage/fs/ext3/inode.c if (parent_bh) { /* -Index: linux-stage/fs/ext3/extents.c +Index: linux-2.6.5-sles9/fs/ext3/extents.c =================================================================== ---- linux-stage.orig/fs/ext3/extents.c 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/extents.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/extents.c 2004-11-09 02:25:56.143726112 +0300 ++++ linux-2.6.5-sles9/fs/ext3/extents.c 2004-11-09 02:26:12.591225712 +0300 @@ -740,7 +740,7 @@ for (i = 0; i < depth; i++) { if (!ablocks[i]) @@ -1572,7 +1590,7 @@ Index: linux-stage/fs/ext3/extents.c } } kfree(ablocks); -@@ -1388,7 +1388,7 @@ +@@ -1391,7 +1391,7 @@ path->p_idx->ei_leaf); bh = sb_find_get_block(tree->inode->i_sb, path->p_idx->ei_leaf); ext3_forget(handle, 1, tree->inode, bh, path->p_idx->ei_leaf); @@ -1581,7 +1599,7 @@ Index: linux-stage/fs/ext3/extents.c return err; } -@@ -1876,10 +1876,12 @@ +@@ -1879,10 +1879,12 @@ int needed = ext3_remove_blocks_credits(tree, ex, from, to); handle_t *handle = ext3_journal_start(tree->inode, needed); struct buffer_head *bh; @@ -1595,7 +1613,7 @@ Index: linux-stage/fs/ext3/extents.c if (from >= ex->ee_block && to == ex->ee_block + ex->ee_len - 1) { /* tail removal */ unsigned long num, start; -@@ -1891,7 +1893,7 @@ +@@ -1894,7 +1896,7 @@ bh = sb_find_get_block(tree->inode->i_sb, start + i); ext3_forget(handle, 0, tree->inode, bh, start + i); } @@ -1604,10 +1622,10 @@ Index: linux-stage/fs/ext3/extents.c } else if (from == ex->ee_block && to <= ex->ee_block + ex->ee_len - 1) { printk("strange request: removal %lu-%lu from %u:%u\n", from, to, ex->ee_block, ex->ee_len); -Index: linux-stage/fs/ext3/xattr.c +Index: linux-2.6.5-sles9/fs/ext3/xattr.c =================================================================== ---- linux-stage.orig/fs/ext3/xattr.c 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/fs/ext3/xattr.c 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/fs/ext3/xattr.c 2004-11-09 02:22:55.777146000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/xattr.c 2004-11-09 02:26:12.593225408 +0300 @@ -1366,7 +1366,7 @@ new_bh = sb_getblk(sb, block); if (!new_bh) { @@ -1635,10 +1653,10 @@ Index: linux-stage/fs/ext3/xattr.c get_bh(bh); ext3_forget(handle, 1, inode, bh, EXT3_I(inode)->i_file_acl); } else { -Index: linux-stage/include/linux/ext3_fs.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2004-10-13 17:06:53.000000000 -0400 -+++ linux-stage/include/linux/ext3_fs.h 2004-10-13 17:06:53.000000000 -0400 +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:25:17.238640584 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:26:12.596224952 +0300 @@ -57,6 +57,8 @@ #define ext3_debug(f, a...) do {} while (0) #endif @@ -1648,7 +1666,7 @@ Index: linux-stage/include/linux/ext3_fs.h /* * Special inodes numbers */ -@@ -336,6 +338,7 @@ +@@ -339,6 +341,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ @@ -1656,26 +1674,27 @@ Index: linux-stage/include/linux/ext3_fs.h /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt -@@ -696,7 +699,7 @@ - extern int ext3_new_block (handle_t *, struct inode *, unsigned long, - __u32 *, __u32 *, int *); +@@ -698,7 +701,7 @@ + extern unsigned long ext3_bg_num_gdb(struct super_block *sb, int group); + extern int ext3_new_block (handle_t *, struct inode *, unsigned long, int *); extern void ext3_free_blocks (handle_t *, struct inode *, unsigned long, - unsigned long); + unsigned long, int); extern unsigned long ext3_count_free_blocks (struct super_block *); extern void ext3_check_blocks_bitmap (struct super_block *); extern struct ext3_group_desc * ext3_get_group_desc(struct super_block * sb, -Index: linux-stage/include/linux/ext3_fs_sb.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs_sb.h 2004-10-13 17:06:52.000000000 -0400 -+++ linux-stage/include/linux/ext3_fs_sb.h 2004-10-13 17:06:53.000000000 -0400 -@@ -23,9 +23,29 @@ +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:20:51.598024096 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300 +@@ -23,10 +23,30 @@ #define EXT_INCLUDE #include #include +#include #endif #endif + #include +#define EXT3_BB_MAX_BLOCKS 30 +struct ext3_free_metadata { @@ -1699,7 +1718,7 @@ Index: linux-stage/include/linux/ext3_fs_sb.h /* * third extended-fs super-block data in memory */ -@@ -72,6 +92,17 @@ +@@ -78,6 +98,17 @@ struct timer_list turn_ro_timer; /* For turning read-only (crash simulation) */ wait_queue_head_t ro_wait_queue; /* For people waiting for the fs to go read-only */ #endif @@ -1717,31 +1736,3 @@ Index: linux-stage/include/linux/ext3_fs_sb.h }; #endif /* _LINUX_EXT3_FS_SB */ -Index: linux-stage/include/linux/ext3_jbd.h -=================================================================== ---- linux-stage.orig/include/linux/ext3_jbd.h 2004-10-13 17:06:52.000000000 -0400 -+++ linux-stage/include/linux/ext3_jbd.h 2004-10-13 19:12:30.000000000 -0400 -@@ -72,6 +72,23 @@ - - #define EXT3_INDEX_EXTRA_TRANS_BLOCKS 8 - -+#ifdef CONFIG_QUOTA -+/* Maximal numbers of writes for quota operation (insert/delete/update) -+ * (over all formats) - info block, 4 pointer blocks, data block */ -+#define DQUOT_MAX_WRITES 6 -+ -+/* Amount of blocks needed for quota update - we know that the structure was -+ * allocated so we need to update only inode+data */ -+#define EXT3_QUOTA_TRANS_BLOCKS 2 -+/* Amount of blocks needed for quota insert/delete - we do some block writes -+ * but inode, sb and group updates are done only once */ -+#define EXT3_QUOTA_INIT_BLOCKS (DQUOT_MAX_WRITES*\ -+ (EXT3_SINGLEDATA_TRANS_BLOCKS-3)+3) -+#else -+#define EXT3_QUOTA_TRANS_BLOCKS 0 -+#define EXT3_QUOTA_INIT_BLOCKS 0 -+#endif -+ - int - ext3_mark_iloc_dirty(handle_t *handle, - struct inode *inode, diff --git a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch index 6c40cd9..9d782c4 100644 --- a/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch +++ b/lustre/kernel_patches/patches/ext3-mballoc2-2.6.7.patch @@ -1,8 +1,8 @@ Index: linux-2.6.7/fs/ext3/mballoc.c =================================================================== --- linux-2.6.7.orig/fs/ext3/mballoc.c 2003-01-30 13:24:37.000000000 +0300 -+++ linux-2.6.7/fs/ext3/mballoc.c 2004-09-03 09:48:40.000000000 +0400 -@@ -0,0 +1,1397 @@ ++++ linux-2.6.7/fs/ext3/mballoc.c 2004-09-06 12:51:42.000000000 +0400 +@@ -0,0 +1,1428 @@ +/* + * Copyright (c) 2003, Cluster File Systems, Inc, info@clusterfs.com + * Written by Alex Tomas @@ -124,6 +124,37 @@ Index: linux-2.6.7/fs/ext3/mballoc.c +void ext3_mb_poll_new_transaction(struct super_block *, handle_t *); +void ext3_mb_free_committed_blocks(struct super_block *); + ++#define mb_correct_addr_and_bit(bit,addr) \ ++{ \ ++ if ((unsigned) addr & 1) { \ ++ bit += 8; \ ++ addr--; \ ++ } \ ++ if ((unsigned) addr & 2) { \ ++ bit += 16; \ ++ addr--; \ ++ addr--; \ ++ } \ ++} ++ ++static inline int mb_test_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ return test_bit(bit, addr); ++} ++ ++static inline void mb_set_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ set_bit(bit, addr); ++} ++ ++static inline void mb_clear_bit(int bit, void *addr) ++{ ++ mb_correct_addr_and_bit(bit,addr); ++ clear_bit(bit, addr); ++} ++ +static inline void *mb_find_buddy(struct ext3_buddy *e3b, int order, int *max) +{ + int i = 1; @@ -232,22 +263,22 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + count = 0; + for (i = 0; i < max; i++) { + -+ if (!test_bit(i, buddy)) { ++ if (!mb_test_bit(i, buddy)) { + /* only single bit in buddy2 may be 1 */ -+ if (test_bit(i << 1, buddy2)) -+ J_ASSERT(!test_bit((i<<1)+1, buddy2)); -+ else if (test_bit((i << 1) + 1, buddy2)) -+ J_ASSERT(!test_bit(i << 1, buddy2)); ++ if (mb_test_bit(i << 1, buddy2)) ++ J_ASSERT(!mb_test_bit((i<<1)+1, buddy2)); ++ else if (mb_test_bit((i << 1) + 1, buddy2)) ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); + continue; + } + + /* both bits in buddy2 must be 0 */ -+ J_ASSERT(!test_bit(i << 1, buddy2)); -+ J_ASSERT(!test_bit((i << 1) + 1, buddy2)); ++ J_ASSERT(!mb_test_bit(i << 1, buddy2)); ++ J_ASSERT(!mb_test_bit((i << 1) + 1, buddy2)); + + for (j = 0; j < (1 << order); j++) { + k = (i * (1 << order)) + j; -+ J_ASSERT(test_bit(k, e3b->bd_bitmap)); ++ J_ASSERT(mb_test_bit(k, e3b->bd_bitmap)); + } + count++; + } @@ -257,14 +288,14 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + + buddy = mb_find_buddy(e3b, 0, &max); + for (i = 0; i < max; i++) { -+ if (test_bit(i, buddy)) ++ if (mb_test_bit(i, buddy)) + continue; + /* check used bits only */ + for (j = 0; j < e3b->bd_blkbits + 1; j++) { + buddy2 = mb_find_buddy(e3b, j, &max2); + k = i >> j; + J_ASSERT(k < max2); -+ J_ASSERT(!test_bit(k, buddy2)); ++ J_ASSERT(!mb_test_bit(k, buddy2)); + } + } +} @@ -295,7 +326,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + bb = e3b->bd_buddy; + while (order <= e3b->bd_blkbits + 1) { + block = block >> 1; -+ if (test_bit(block, bb)) { ++ if (mb_test_bit(block, bb)) { + /* this block is part of buddy of order 'order' */ + return order; + } @@ -318,7 +349,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + cur += 32; + continue; + } -+ clear_bit(cur, bm); ++ mb_clear_bit(cur, bm); + cur++; + } +} @@ -336,7 +367,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + cur += 32; + continue; + } -+ set_bit(cur, bm); ++ mb_set_bit(cur, bm); + cur++; + } +} @@ -351,8 +382,8 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + block = first++; + order = 0; + -+ J_ASSERT(!test_bit(block, e3b->bd_bitmap)); -+ set_bit(block, e3b->bd_bitmap); ++ J_ASSERT(!mb_test_bit(block, e3b->bd_bitmap)); ++ mb_set_bit(block, e3b->bd_bitmap); + e3b->bd_bd->bb_counters[order]++; + + /* start of the buddy */ @@ -360,8 +391,8 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + + do { + block &= ~1UL; -+ if (!test_bit(block, buddy) || -+ !test_bit(block + 1, buddy)) ++ if (!mb_test_bit(block, buddy) || ++ !mb_test_bit(block + 1, buddy)) + break; + + /* both the buddies are free, try to coalesce them */ @@ -373,8 +404,8 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + if (order > 0) { + /* for special purposes, we don't clear + * free bits in bitmap */ -+ clear_bit(block, buddy); -+ clear_bit(block + 1, buddy); ++ mb_clear_bit(block, buddy); ++ mb_clear_bit(block + 1, buddy); + } + e3b->bd_bd->bb_counters[order]--; + e3b->bd_bd->bb_counters[order]--; @@ -383,7 +414,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + order++; + e3b->bd_bd->bb_counters[order]++; + -+ set_bit(block, buddy2); ++ mb_set_bit(block, buddy2); + buddy = buddy2; + } while (1); + } @@ -448,7 +479,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + buddy = mb_find_buddy(e3b, order, &max); + J_ASSERT(buddy); + J_ASSERT(block < max); -+ if (!test_bit(block, buddy)) ++ if (!mb_test_bit(block, buddy)) + goto nofree; + + if (order == 0) { @@ -471,7 +502,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + break; + + next = (block + 1) * (1 << order); -+ if (!test_bit(next, e3b->bd_bitmap)) ++ if (!mb_test_bit(next, e3b->bd_bitmap)) + break; + + ord = mb_find_order_for_block(e3b, next); @@ -509,7 +540,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + mlen = 1 << ord; + buddy = mb_find_buddy(e3b, ord, &max); + J_ASSERT((start >> ord) < max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + start -= mlen; + len -= mlen; @@ -521,14 +552,14 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + /* we have to split large buddy */ + J_ASSERT(ord > 0); + buddy = mb_find_buddy(e3b, ord, &max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + + ord--; + cur = (start >> ord) & ~1U; + buddy = mb_find_buddy(e3b, ord, &max); -+ set_bit(cur, buddy); -+ set_bit(cur + 1, buddy); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); + e3b->bd_bd->bb_counters[ord]++; + e3b->bd_bd->bb_counters[ord]++; + } @@ -556,7 +587,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + mlen = 1 << ord; + buddy = mb_find_buddy(e3b, ord, &max); + J_ASSERT((start >> ord) < max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + start += mlen; + len -= mlen; @@ -567,14 +598,14 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + /* we have to split large buddy */ + J_ASSERT(ord > 0); + buddy = mb_find_buddy(e3b, ord, &max); -+ clear_bit(start >> ord, buddy); ++ mb_clear_bit(start >> ord, buddy); + e3b->bd_bd->bb_counters[ord]--; + + ord--; + cur = (start >> ord) & ~1U; + buddy = mb_find_buddy(e3b, ord, &max); -+ set_bit(cur, buddy); -+ set_bit(cur + 1, buddy); ++ mb_set_bit(cur, buddy); ++ mb_set_bit(cur + 1, buddy); + e3b->bd_bd->bb_counters[ord]++; + e3b->bd_bd->bb_counters[ord]++; + } @@ -615,7 +646,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + if ((ac->ac_g_flags & 1) && ac->ac_g_group == group) { + /* someone asks for space at this specified block + * probably he wants to merge it into existing extent */ -+ if (test_bit(ac->ac_g_start, e3b->bd_bitmap)) { ++ if (mb_test_bit(ac->ac_g_start, e3b->bd_bitmap)) { + /* good. at least one block is free */ + max = mb_find_extent(e3b, 0, ac->ac_g_start, + ac->ac_g_len, &curex); @@ -857,7 +888,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + "block = %u", block); +#if 0 + for (i = 0; i < ac.ac_b_len; i++) -+ J_ASSERT(!test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); ++ J_ASSERT(!mb_test_bit(ac.ac_b_start + i, bitmap_bh->b_data)); +#endif + mb_set_bits(bitmap_bh->b_data, ac.ac_b_start, ac.ac_b_len); + @@ -930,7 +961,7 @@ Index: linux-2.6.7/fs/ext3/mballoc.c + + /* loop over the blocks, nad create buddies for free ones */ + for (i = 0; i < sb->s_blocksize * 8; i++) { -+ if (!test_bit(i, (void *) bh->b_data)) { ++ if (!mb_test_bit(i, (void *) bh->b_data)) { + mb_free_blocks(&e3b, i, 1); + count++; + } @@ -1650,9 +1681,9 @@ Index: linux-2.6.7/include/linux/ext3_fs.h */ @@ -335,6 +337,7 @@ #define EXT3_MOUNT_IOPEN_NOPRIV 0x80000 /* Make iopen world-readable */ - #define EXT3_MOUNT_EXTENTS 0x100000/* Extents support */ - #define EXT3_MOUNT_EXTDEBUG 0x200000/* Extents debug */ -+#define EXT3_MOUNT_MBALLOC 0x400000/* Buddy allocation support */ + #define EXT3_MOUNT_EXTENTS 0x10000 /* Extents support */ + #define EXT3_MOUNT_EXTDEBUG 0x20000 /* Extents debug */ ++#define EXT3_MOUNT_MBALLOC 0x100000/* Buddy allocation support */ /* Compatibility, for having both ext2_fs.h and ext3_fs.h included at once */ #ifndef clear_opt diff --git a/lustre/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch b/lustre/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch new file mode 100644 index 0000000..f323584 --- /dev/null +++ b/lustre/kernel_patches/patches/ext3-rename-reserve-2.6-suse.patch @@ -0,0 +1,263 @@ +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:29:14.878513832 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:32:14.151260232 +0300 +@@ -709,7 +709,7 @@ + unsigned int block_group, + struct buffer_head ** bh); + extern int ext3_should_retry_alloc(struct super_block *sb, int *retries); +-extern void rsv_window_add(struct super_block *sb, struct reserve_window_node *rsv); ++extern void rsv_window_add(struct super_block *sb, struct ext3_reserve_window_node *rsv); + + /* dir.c */ + extern int ext3_check_dir_entry(const char *, struct inode *, +Index: linux-2.6.5-sles9/include/linux/ext3_fs_sb.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_sb.h 2004-11-09 02:28:18.753046200 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_sb.h 2004-11-09 02:32:27.996155488 +0300 +@@ -86,7 +86,7 @@ + /* root of the per fs reservation window tree */ + spinlock_t s_rsv_window_lock; + struct rb_root s_rsv_window_root; +- struct reserve_window_node s_rsv_window_head; ++ struct ext3_reserve_window_node s_rsv_window_head; + + /* Journaling */ + struct inode * s_journal_inode; +Index: linux-2.6.5-sles9/include/linux/ext3_fs_i.h +=================================================================== +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs_i.h 2004-11-09 02:23:21.606219384 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs_i.h 2004-11-09 02:32:08.752081032 +0300 +@@ -20,17 +20,17 @@ + #include + #include + +-struct reserve_window { ++struct ext3_reserve_window { + __u32 _rsv_start; /* First byte reserved */ + __u32 _rsv_end; /* Last byte reserved or 0 */ + }; + +-struct reserve_window_node { ++struct ext3_reserve_window_node { + struct rb_node rsv_node; + atomic_t rsv_goal_size; + atomic_t rsv_alloc_hit; + seqlock_t rsv_seqlock; +- struct reserve_window rsv_window; ++ struct ext3_reserve_window rsv_window; + }; + + #define rsv_start rsv_window._rsv_start +@@ -76,7 +76,7 @@ + */ + __u32 i_next_alloc_goal; + /* block reservation window */ +- struct reserve_window_node i_rsv_window; ++ struct ext3_reserve_window_node i_rsv_window; + + __u32 i_dir_start_lookup; + #ifdef CONFIG_EXT3_FS_XATTR +Index: linux-2.6.5-sles9/fs/ext3/balloc.c +=================================================================== +--- linux-2.6.5-sles9.orig/fs/ext3/balloc.c 2004-11-09 02:26:53.078070776 +0300 ++++ linux-2.6.5-sles9/fs/ext3/balloc.c 2004-11-09 02:32:43.108858008 +0300 +@@ -115,7 +115,7 @@ + const char *fn) + { + struct rb_node *n; +- struct reserve_window_node *rsv, *prev; ++ struct ext3_reserve_window_node *rsv, *prev; + int bad; + + restart: +@@ -125,7 +125,7 @@ + + printk("Block Allocation Reservation Windows Map (%s):\n", fn); + while (n) { +- rsv = list_entry(n, struct reserve_window_node, rsv_node); ++ rsv = list_entry(n, struct ext3_reserve_window_node, rsv_node); + if (verbose) + printk("reservation window 0x%p " + "start: %d, end: %d\n", +@@ -161,7 +161,7 @@ + #endif + + static int +-goal_in_my_reservation(struct reserve_window *rsv, int goal, ++goal_in_my_reservation(struct ext3_reserve_window *rsv, int goal, + unsigned int group, struct super_block * sb) + { + unsigned long group_first_block, group_last_block; +@@ -184,18 +184,18 @@ + * if the goal is not in any window. + * Returns NULL if there are no windows or if all windows start after the goal. + */ +-static struct reserve_window_node *search_reserve_window(struct rb_root *root, ++static struct ext3_reserve_window_node *search_ext3_reserve_window(struct rb_root *root, + unsigned long goal) + { + struct rb_node *n = root->rb_node; +- struct reserve_window_node *rsv; ++ struct ext3_reserve_window_node *rsv; + + if (!n) + return NULL; + + while (n) + { +- rsv = rb_entry(n, struct reserve_window_node, rsv_node); ++ rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); + + if (goal < rsv->rsv_start) + n = n->rb_left; +@@ -212,13 +212,13 @@ + */ + if (rsv->rsv_start > goal) { + n = rb_prev(&rsv->rsv_node); +- rsv = rb_entry(n, struct reserve_window_node, rsv_node); ++ rsv = rb_entry(n, struct ext3_reserve_window_node, rsv_node); + } + return rsv; + } + + void rsv_window_add(struct super_block *sb, +- struct reserve_window_node *rsv) ++ struct ext3_reserve_window_node *rsv) + { + struct rb_root *root = &EXT3_SB(sb)->s_rsv_window_root; + struct rb_node *node = &rsv->rsv_node; +@@ -226,12 +226,12 @@ + + struct rb_node ** p = &root->rb_node; + struct rb_node * parent = NULL; +- struct reserve_window_node *this; ++ struct ext3_reserve_window_node *this; + + while (*p) + { + parent = *p; +- this = rb_entry(parent, struct reserve_window_node, rsv_node); ++ this = rb_entry(parent, struct ext3_reserve_window_node, rsv_node); + + if (start < this->rsv_start) + p = &(*p)->rb_left; +@@ -246,7 +246,7 @@ + } + + static void rsv_window_remove(struct super_block *sb, +- struct reserve_window_node *rsv) ++ struct ext3_reserve_window_node *rsv) + { + rsv->rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; + rsv->rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; +@@ -254,7 +254,7 @@ + rb_erase(&rsv->rsv_node, &EXT3_SB(sb)->s_rsv_window_root); + } + +-static inline int rsv_is_empty(struct reserve_window *rsv) ++static inline int rsv_is_empty(struct ext3_reserve_window *rsv) + { + /* a valid reservation end block could not be 0 */ + return (rsv->_rsv_end == EXT3_RESERVE_WINDOW_NOT_ALLOCATED); +@@ -263,7 +263,7 @@ + void ext3_discard_reservation(struct inode *inode) + { + struct ext3_inode_info *ei = EXT3_I(inode); +- struct reserve_window_node *rsv = &ei->i_rsv_window; ++ struct ext3_reserve_window_node *rsv = &ei->i_rsv_window; + spinlock_t *rsv_lock = &EXT3_SB(inode->i_sb)->s_rsv_window_lock; + + if (!rsv_is_empty(&rsv->rsv_window)) { +@@ -600,7 +600,7 @@ + */ + static int + ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group, +- struct buffer_head *bitmap_bh, int goal, struct reserve_window *my_rsv) ++ struct buffer_head *bitmap_bh, int goal, struct ext3_reserve_window *my_rsv) + { + int group_first_block, start, end; + +@@ -700,13 +700,13 @@ + * on succeed, it returns the reservation window to be appended to. + * failed, return NULL. + */ +-static struct reserve_window_node *find_next_reservable_window( +- struct reserve_window_node *search_head, ++static struct ext3_reserve_window_node *find_next_reservable_window( ++ struct ext3_reserve_window_node *search_head, + unsigned long size, int *start_block, + int last_block) + { + struct rb_node *next; +- struct reserve_window_node *rsv, *prev; ++ struct ext3_reserve_window_node *rsv, *prev; + int cur; + + /* TODO: make the start of the reservation window byte-aligned */ +@@ -734,7 +734,7 @@ + + prev = rsv; + next = rb_next(&rsv->rsv_node); +- rsv = list_entry(next, struct reserve_window_node, rsv_node); ++ rsv = list_entry(next, struct ext3_reserve_window_node, rsv_node); + + /* + * Reached the last reservation, we can just append to the +@@ -801,15 +801,15 @@ + * @group: the group we are trying to allocate in + * @bitmap_bh: the block group block bitmap + */ +-static int alloc_new_reservation(struct reserve_window_node *my_rsv, ++static int alloc_new_reservation(struct ext3_reserve_window_node *my_rsv, + int goal, struct super_block *sb, + unsigned int group, struct buffer_head *bitmap_bh) + { +- struct reserve_window_node *search_head; ++ struct ext3_reserve_window_node *search_head; + int group_first_block, group_end_block, start_block; + int first_free_block; + int reservable_space_start; +- struct reserve_window_node *prev_rsv; ++ struct ext3_reserve_window_node *prev_rsv; + struct rb_root *fs_rsv_root = &EXT3_SB(sb)->s_rsv_window_root; + unsigned long size; + +@@ -859,7 +859,7 @@ + /* + * shift the search start to the window near the goal block + */ +- search_head = search_reserve_window(fs_rsv_root, start_block); ++ search_head = search_ext3_reserve_window(fs_rsv_root, start_block); + + /* + * find_next_reservable_window() simply finds a reservable window +@@ -968,7 +968,7 @@ + static int + ext3_try_to_allocate_with_rsv(struct super_block *sb, handle_t *handle, + unsigned int group, struct buffer_head *bitmap_bh, +- int goal, struct reserve_window_node * my_rsv, ++ int goal, struct ext3_reserve_window_node * my_rsv, + int *errp) + { + spinlock_t *rsv_lock; +@@ -1027,7 +1027,7 @@ + * then we could go to allocate from the reservation window directly. + */ + while (1) { +- struct reserve_window rsv_copy; ++ struct ext3_reserve_window rsv_copy; + unsigned int seq; + + do { +@@ -1159,8 +1159,8 @@ + struct ext3_group_desc *gdp; + struct ext3_super_block *es; + struct ext3_sb_info *sbi; +- struct reserve_window_node *my_rsv = NULL; +- struct reserve_window_node *rsv = &EXT3_I(inode)->i_rsv_window; ++ struct ext3_reserve_window_node *my_rsv = NULL; ++ struct ext3_reserve_window_node *rsv = &EXT3_I(inode)->i_rsv_window; + unsigned short windowsz = 0; + #ifdef EXT3FS_DEBUG + static int goal_hits, goal_attempts; diff --git a/lustre/kernel_patches/patches/iopen-2.6-suse.patch b/lustre/kernel_patches/patches/iopen-2.6-suse.patch index 0b58346..4e4bbaa 100644 --- a/lustre/kernel_patches/patches/iopen-2.6-suse.patch +++ b/lustre/kernel_patches/patches/iopen-2.6-suse.patch @@ -6,10 +6,10 @@ include/linux/ext3_fs.h | 2 7 files changed, 304 insertions(+), 1 deletion(-) -Index: linux-stage/fs/ext3/Makefile +Index: linux-2.6.5-sles9/fs/ext3/Makefile =================================================================== ---- linux-stage.orig/fs/ext3/Makefile 2004-11-03 14:41:24.747805262 -0500 -+++ linux-stage/fs/ext3/Makefile 2004-11-03 14:41:25.123696274 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/Makefile 2004-04-04 07:36:18.000000000 +0400 ++++ linux-2.6.5-sles9/fs/ext3/Makefile 2004-11-09 02:18:27.604914376 +0300 @@ -4,7 +4,7 @@ obj-$(CONFIG_EXT3_FS) += ext3.o @@ -19,10 +19,10 @@ Index: linux-stage/fs/ext3/Makefile ioctl.o namei.o super.o symlink.o hash.o ext3-$(CONFIG_EXT3_FS_XATTR) += xattr.o xattr_user.o xattr_trusted.o -Index: linux-stage/fs/ext3/inode.c +Index: linux-2.6.5-sles9/fs/ext3/inode.c =================================================================== ---- linux-stage.orig/fs/ext3/inode.c 2004-11-03 14:41:25.040720333 -0500 -+++ linux-stage/fs/ext3/inode.c 2004-11-03 14:46:08.458515670 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/inode.c 2004-11-09 02:15:44.739673656 +0300 ++++ linux-2.6.5-sles9/fs/ext3/inode.c 2004-11-09 02:18:27.608913768 +0300 @@ -37,6 +37,7 @@ #include #include @@ -31,20 +31,20 @@ Index: linux-stage/fs/ext3/inode.c #include "acl.h" /* -@@ -2401,6 +2402,9 @@ - ei->i_default_acl = EXT3_ACL_NOT_CACHED; +@@ -2402,6 +2403,9 @@ #endif ei->i_rsv_window.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED; -+ -+ if (ext3_iopen_get_inode(inode)) -+ return; ++ if (ext3_iopen_get_inode(inode)) ++ return; ++ if (ext3_get_inode_loc(inode, &iloc, 0)) goto bad_inode; -Index: linux-stage/fs/ext3/iopen.c + bh = iloc.bh; +Index: linux-2.6.5-sles9/fs/ext3/iopen.c =================================================================== ---- linux-stage.orig/fs/ext3/iopen.c 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.c 2004-11-03 14:41:25.125695694 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/iopen.c 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/iopen.c 2004-11-09 02:18:27.611913312 +0300 @@ -0,0 +1,272 @@ +/* + * linux/fs/ext3/iopen.c @@ -318,10 +318,10 @@ Index: linux-stage/fs/ext3/iopen.c + + return 1; +} -Index: linux-stage/fs/ext3/iopen.h +Index: linux-2.6.5-sles9/fs/ext3/iopen.h =================================================================== ---- linux-stage.orig/fs/ext3/iopen.h 1969-12-31 19:00:00.000000000 -0500 -+++ linux-stage/fs/ext3/iopen.h 2004-11-03 14:41:25.126695404 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/iopen.h 2003-01-30 13:24:37.000000000 +0300 ++++ linux-2.6.5-sles9/fs/ext3/iopen.h 2004-11-09 02:18:27.613913008 +0300 @@ -0,0 +1,15 @@ +/* + * iopen.h @@ -338,10 +338,10 @@ Index: linux-stage/fs/ext3/iopen.h +extern int ext3_iopen_get_inode(struct inode *inode); +extern struct dentry *iopen_connect_dentry(struct dentry *dentry, + struct inode *inode, int rehash); -Index: linux-stage/fs/ext3/namei.c +Index: linux-2.6.5-sles9/fs/ext3/namei.c =================================================================== ---- linux-stage.orig/fs/ext3/namei.c 2004-11-03 14:41:24.957744391 -0500 -+++ linux-stage/fs/ext3/namei.c 2004-11-03 14:41:25.127695114 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/namei.c 2004-11-09 02:15:44.614692656 +0300 ++++ linux-2.6.5-sles9/fs/ext3/namei.c 2004-11-09 02:18:27.616912552 +0300 @@ -37,6 +37,7 @@ #include #include @@ -418,10 +418,10 @@ Index: linux-stage/fs/ext3/namei.c ext3_journal_stop(handle); if (err == -ENOSPC && ext3_should_retry_alloc(dir->i_sb, &retries)) goto retry; -Index: linux-stage/fs/ext3/super.c +Index: linux-2.6.5-sles9/fs/ext3/super.c =================================================================== ---- linux-stage.orig/fs/ext3/super.c 2004-11-03 14:41:25.043719463 -0500 -+++ linux-stage/fs/ext3/super.c 2004-11-03 14:41:25.129694535 -0500 +--- linux-2.6.5-sles9.orig/fs/ext3/super.c 2004-11-09 02:15:44.743673048 +0300 ++++ linux-2.6.5-sles9/fs/ext3/super.c 2004-11-09 02:18:27.620911944 +0300 @@ -534,7 +534,7 @@ Opt_reservation, Opt_noreservation, Opt_noload, Opt_commit, Opt_journal_update, Opt_journal_inum, @@ -460,10 +460,10 @@ Index: linux-stage/fs/ext3/super.c case Opt_ignore: break; default: -Index: linux-stage/include/linux/ext3_fs.h +Index: linux-2.6.5-sles9/include/linux/ext3_fs.h =================================================================== ---- linux-stage.orig/include/linux/ext3_fs.h 2004-11-03 14:41:24.958744101 -0500 -+++ linux-stage/include/linux/ext3_fs.h 2004-11-03 14:41:25.129694535 -0500 +--- linux-2.6.5-sles9.orig/include/linux/ext3_fs.h 2004-11-09 02:15:44.616692352 +0300 ++++ linux-2.6.5-sles9/include/linux/ext3_fs.h 2004-11-09 02:18:27.622911640 +0300 @@ -329,6 +329,8 @@ #define EXT3_MOUNT_POSIX_ACL 0x08000 /* POSIX Access Control Lists */ #define EXT3_MOUNT_RESERVATION 0x10000 /* Preallocation */ diff --git a/lustre/kernel_patches/patches/revert-76chaos.patch b/lustre/kernel_patches/patches/revert-76chaos.patch new file mode 100644 index 0000000..25f5440 --- /dev/null +++ b/lustre/kernel_patches/patches/revert-76chaos.patch @@ -0,0 +1,289 @@ +Index: linux/arch/i386/kernel/traps.c +=================================================================== +--- linux.orig/arch/i386/kernel/traps.c 2004-11-05 19:00:03.000000000 -0800 ++++ linux/arch/i386/kernel/traps.c 2004-11-05 19:00:06.000000000 -0800 +@@ -133,141 +133,48 @@ + + #endif + +-void scan_stack (unsigned long *stack) ++void show_trace(unsigned long * stack) + { ++#if !CONFIG_FRAME_POINTER + int i; ++#endif + unsigned long addr; +- /* static to not take up stackspace */ +- static char buffer[NR_CPUS][512], *bufp; ++ /* static to not take up stackspace; if we race here too bad */ ++ static char buffer[512]; + +- bufp = buffer[smp_processor_id()]; ++ if (!stack) ++ stack = (unsigned long*)&stack; + ++ printk("Call Trace: "); + /* + * If we have frame pointers then use them to get + * a 100% exact backtrace, up until the entry frame: + */ ++#if CONFIG_FRAME_POINTER ++#define DO(n) \ ++ addr = (int)__builtin_return_address(n); \ ++ if (!kernel_text_address(addr)) \ ++ goto out; \ ++ lookup_symbol(addr, buffer, 512); \ ++ printk("[<%08lx>] %s\n", addr, buffer); ++ ++ DO(0); DO(1); DO(2); DO(3); DO(4); DO(5); DO(7); DO(8); DO(9); ++ DO(10); DO(11); DO(12); DO(13); DO(14); DO(15); DO(17); DO(18); DO(19); ++out: ++#else + i = 1; + while (((long) stack & (THREAD_SIZE-1)) != 0) { + addr = *stack++; + if (kernel_text_address(addr)) { +- lookup_symbol(addr, bufp, 512); +- printk("[<%08lx>] %s (0x%p)\n", addr,bufp,stack-1); ++ lookup_symbol(addr, buffer, 512); ++ printk("[<%08lx>] %s (0x%p)\n", addr,buffer,stack-1); + i++; + } + } +-} +- +-#if CONFIG_FRAME_POINTER +-void show_stack_frame_params (int param_count, unsigned long params[]) +-{ +- int i; +- unsigned long *p, task_addr, stack_base; +- +- if (param_count <= 0) +- return; +- +- task_addr = (unsigned long) current; +- stack_base = task_addr + THREAD_SIZE - 1; +- +- printk(" ("); +- +- for (i = 0, p = params; +- ((param_count - i) > 1) && (p >= task_addr) && (p <= stack_base); +- i++, p++) { +- printk("0x%x, ", *p); +- +- if ((i % 4) == 3) +- printk("\n "); +- } +- +- if ((p >= task_addr) && (p <= stack_base)) +- printk("0x%x)\n", *p); +-} +- +-/* Display a stack trace for the currently executing task. The 'dummy' +- * parameter serves a purpose although its value is unused. We use the +- * address of 'dummy' as a reference point for finding the saved %ebp register +- * value on the stack. +- */ +-void frame_pointer_walk (void *dummy) +-{ +- int i; +- unsigned long addr, task_addr, *frame_ptr, *next_frame_ptr, *eip_ptr, +- eip, stack_base; +- /* static to not take up stackspace */ +- static char buffer[NR_CPUS][512], *bufp; +- +- bufp = buffer[smp_processor_id()]; +- task_addr = (unsigned long) current; +- stack_base = task_addr + THREAD_SIZE - 1; +- frame_ptr = (unsigned long *) (&dummy - 2); +- +- for (; ; ) { +- next_frame_ptr = (unsigned long *) (*frame_ptr); +- addr = (unsigned long) next_frame_ptr; +- +- /* Stop when we reach a frame pointer that points to a +- * location clearly outside our own kernel stack. +- */ +- if ((addr < task_addr) || (addr > stack_base)) +- break; +- +- eip_ptr = frame_ptr + 1; +- eip = *eip_ptr; +- +- if (kernel_text_address(eip)) { +- lookup_symbol(eip, bufp, 512); +- show_stack_frame_params(4, frame_ptr + 2); +- printk("[<%08lx>] %s (0x%x)\n", eip, bufp, +- eip_ptr); +- } +- +- frame_ptr = next_frame_ptr; +- } +-} +- +-typedef void (*stack_trace_fn_t) (unsigned long *stack); +- +-void show_trace(unsigned long * stack) +-{ +- static const stack_trace_fn_t trace_fn_vector[] = +- { scan_stack, frame_pointer_walk }; +- unsigned long addr, task_addr, stack_base; +- int task_is_current; +- +- if (!stack) +- stack = (unsigned long*)&stack; +- +- printk("Call Trace:\n"); +- addr = (unsigned long) stack; +- task_addr = (unsigned long) current; +- stack_base = task_addr + THREAD_SIZE - 1; +- task_is_current = (addr >= task_addr) && (addr <= stack_base); +- +- /* We may use frame pointers to do a stack trace only if the current +- * task is being traced. Tracing some other task in this manner +- * would require a saved %ebp register value. Perhaps in the future +- * I'll consider providing a means of obtaining this. +- */ +- trace_fn_vector[task_is_current](stack); +- +- printk("\n"); +-} +- +-#else /* CONFIG_FRAME_POINTER */ +- +-void show_trace(unsigned long * stack) +-{ +- if (!stack) +- stack = (unsigned long*)&stack; +- +- printk("Call Trace:\n"); +- scan_stack(stack); ++#endif + printk("\n"); + } + +-#endif /* CONFIG_FRAME_POINTER */ +- + void show_trace_task(struct task_struct *tsk) + { + unsigned long esp = tsk->thread.esp; +Index: linux/fs/namei.c +=================================================================== +--- linux.orig/fs/namei.c 2004-11-05 19:00:03.000000000 -0800 ++++ linux/fs/namei.c 2004-11-05 19:00:06.000000000 -0800 +@@ -1022,7 +1022,7 @@ + * The simplest case - just a plain lookup. + */ + if (!(flag & O_CREAT)) { +- error = path_lookup_it(pathname, lookup_flags(flag), nd); ++ error = path_lookup(pathname, lookup_flags(flag), nd); + if (error) + return error; + dentry = nd->dentry; +Index: linux/include/asm-i386/hw_irq.h +=================================================================== +--- linux.orig/include/asm-i386/hw_irq.h 2004-11-05 19:00:03.000000000 -0800 ++++ linux/include/asm-i386/hw_irq.h 2004-11-05 19:00:06.000000000 -0800 +@@ -158,9 +158,6 @@ + /* there is a second layer of macro just to get the symbolic + name for the vector evaluated. This change is for RTLinux */ + #define BUILD_SMP_INTERRUPT(x,v) XBUILD_SMP_INTERRUPT(x,v) +- +-#if CONFIG_X86_HIGH_ENTRY +- + #define XBUILD_SMP_INTERRUPT(x,v)\ + asmlinkage void x(void); \ + asmlinkage void call_##x(void); \ +@@ -173,26 +170,7 @@ + "movl $"SYMBOL_NAME_STR(smp_##x)", %ebp; call *%ebp\n\t" \ + "jmp ret_from_intr; .previous\n"); + +-#else +- +-#define XBUILD_SMP_INTERRUPT(x,v)\ +-asmlinkage void x(void); \ +-asmlinkage void call_##x(void); \ +-__asm__( \ +-".section .entry.text,\"ax\"\n"__ALIGN_STR"\n" \ +-SYMBOL_NAME_STR(x) ":\n\t" \ +- "pushl $"#v"-256\n\t" \ +- SAVE_ALL_SWITCH \ +- SYMBOL_NAME_STR(call_##x)":\n\t" \ +- "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ +- "jmp ret_from_intr; .previous\n"); +- +-#endif +- + #define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v) +- +-#if CONFIG_X86_HIGH_ENTRY +- + #define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ + asmlinkage void x(struct pt_regs * regs); \ + asmlinkage void call_##x(void); \ +@@ -208,27 +186,6 @@ + "addl $4,%esp\n\t" \ + "jmp ret_from_intr; .previous\n"); + +-#else +- +-#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ +-asmlinkage void x(struct pt_regs * regs); \ +-asmlinkage void call_##x(void); \ +-__asm__( \ +-".section .entry.text,\"ax\"\n"__ALIGN_STR"\n" \ +-SYMBOL_NAME_STR(x) ":\n\t" \ +- "pushl $"#v"-256\n\t" \ +- SAVE_ALL_SWITCH \ +- "movl %esp,%eax\n\t" \ +- "pushl %eax\n\t" \ +- SYMBOL_NAME_STR(call_##x)":\n\t" \ +- "call "SYMBOL_NAME_STR(smp_##x)"\n\t" \ +- "addl $4,%esp\n\t" \ +- "jmp ret_from_intr; .previous\n"); +- +-#endif +- +-#if CONFIG_X86_HIGH_ENTRY +- + #define BUILD_COMMON_IRQ() \ + asmlinkage void call_do_IRQ(void); \ + __asm__( \ +@@ -239,20 +196,6 @@ + "movl $"SYMBOL_NAME_STR(do_IRQ)", %ebp; call *%ebp\n\t" \ + "jmp ret_from_intr; .previous\n"); + +-#else +- +-#define BUILD_COMMON_IRQ() \ +-asmlinkage void call_do_IRQ(void); \ +-__asm__( \ +- ".section .entry.text,\"ax\"\n" __ALIGN_STR"\n" \ +- "common_interrupt:\n\t" \ +- SAVE_ALL_SWITCH \ +- SYMBOL_NAME_STR(call_do_IRQ)":\n\t" \ +- "call "SYMBOL_NAME_STR(do_IRQ)"\n\t" \ +- "jmp ret_from_intr; .previous\n"); +- +-#endif +- + /* + * subtle. orig_eax is used by the signal code to distinct between + * system calls and interrupted 'random user-space'. Thus we have +Index: linux/mm/highmem.c +=================================================================== +--- linux.orig/mm/highmem.c 2004-11-05 08:59:32.000000000 -0800 ++++ linux/mm/highmem.c 2004-11-05 19:00:06.000000000 -0800 +@@ -465,11 +465,7 @@ + /* + * FIXME: assuming PAGE_SIZE buffer_heads + */ +- +-/* +- * don't allow SUPERBH_MAX_USERS to go < 1 - mag +- */ +-#define SUPERBH_MAX_USERS max(POOL_SIZE * PAGE_SIZE / MAX_SUPERBH, 1) ++#define SUPERBH_MAX_USERS (POOL_SIZE * PAGE_SIZE / MAX_SUPERBH) + + static int superbh_users; + static DECLARE_WAIT_QUEUE_HEAD(superbh_wait); diff --git a/lustre/kernel_patches/series/2.6-vanilla.series b/lustre/kernel_patches/series/2.6-vanilla.series new file mode 100644 index 0000000..7fa0d34 --- /dev/null +++ b/lustre/kernel_patches/series/2.6-vanilla.series @@ -0,0 +1,15 @@ +uml-2.6.7-01-bb2.patch +lustre_version.patch +vfs_intent-2.6-vanilla.patch +vfs_nointent-2.6-vanilla.patch +vfs_races-2.6-vanilla.patch +ext3-wantedi-misc-2.6-suse.patch +nfs-cifs-intent-2.6-vanilla.patch +iopen-misc-2.6-suse.patch +export-truncate-2.6-suse.patch +export_symbols-2.6-suse.patch +dev_read_only-2.6-suse.patch +export-2.6-suse.patch +header-guards-2.6-suse.patch +lookup_bdev_init_intent.patch +ext3-super-ntohl.patch diff --git a/lustre/kernel_patches/series/chaos-2.4.21 b/lustre/kernel_patches/series/chaos-2.4.21 index f455f13..97190b5 100644 --- a/lustre/kernel_patches/series/chaos-2.4.21 +++ b/lustre/kernel_patches/series/chaos-2.4.21 @@ -1,12 +1,13 @@ +revert-76chaos.patch configurable-x86-stack-2.4.21-chaos.patch dev_read_only_2.4.21-chaos.patch -exports-2.4.21-chaos.patch +exports_2.4.19-suse.patch lustre_version.patch -vfs_intent-2.4.21-chaos.patch +vfs_intent-2.4.21-rhel.patch invalidate_show-2.4.20-rh.patch iod-rmap-exports-2.4.21-chaos.patch export-truncate.patch -ext3-htree-2.4.21-chaos.patch +ext3-htree-2.4.21-rhel.patch linux-2.4.21-xattr-0.8.54-chaos.patch ext3-ino_sb_macro-2.4.21-chaos.patch ext3-orphan_lock-2.4.22-rh.patch @@ -34,10 +35,14 @@ pagecache-lock-2.4.21-chaos.patch ext3-truncate-buffer-head.patch inode-max-readahead-2.4.24.patch dcache_refcount_debug.patch -ext3-extents-2.4.21-chaos.patch -ext3-extents-asyncdel-2.4.21-chaos.patch -ext3-mballoc-2.4.21-chaos.patch +ext3-extents-2.4.21-chaos.patch +ext3-extents-asyncdel-2.4.21-chaos.patch +ext3-mballoc-2.4.21-chaos.patch blkdev_tunables-2.4.21-chaos.patch -small_scatterlist-2.4.21-chaos.patch +small_scatterlist-2.4.21-rhel.patch ext3-nlinks-2.4.21-chaos.patch sd_iostats-2.4.21-chaos.patch +llnl-frame-pointer-walk-2.4.21-rhel.patch +llnl-frame-pointer-walk-fix-2.4.21-rhel.patch +export-show_task-2.4-rhel.patch +compile-fixes-2.4.21-rhel.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-suse.series b/lustre/kernel_patches/series/ldiskfs-2.6-suse.series index 02155b7..fd05c25 100644 --- a/lustre/kernel_patches/series/ldiskfs-2.6-suse.series +++ b/lustre/kernel_patches/series/ldiskfs-2.6-suse.series @@ -1,12 +1,12 @@ ext3-wantedi-2.6-suse.patch ext3-san-jdike-2.6-suse.patch -iopen-2.6-suse.patch +iopen-2.6-suse.patch export_symbols-ext3-2.6-suse.patch ext3-map_inode_page-2.6-suse.patch -ext3-init-generation-2.6-suse.patch ext3-ea-in-inode-2.6-suse.patch export-ext3-2.6-suse.patch ext3-include-fixes-2.6-suse.patch -ext3-extents-2.6.5.patch -ext3-mballoc2-2.6-suse.patch +ext3-extents-2.6.5.patch +ext3-mballoc2-2.6-suse.patch ext3-nlinks-2.6.7.patch +ext3-rename-reserve-2.6-suse.patch diff --git a/lustre/kernel_patches/series/ldiskfs-2.6-vanilla.series b/lustre/kernel_patches/series/ldiskfs-2.6-vanilla.series new file mode 100644 index 0000000..13cf85a --- /dev/null +++ b/lustre/kernel_patches/series/ldiskfs-2.6-vanilla.series @@ -0,0 +1,11 @@ +ext3-wantedi-2.6-suse.patch +ext3-san-jdike-2.6-suse.patch +iopen-2.6-vanilla.patch +export_symbols-ext3-2.6-suse.patch +ext3-map_inode_page-2.6-suse.patch +ext3-ea-in-inode-2.6-suse.patch +export-ext3-2.6-suse.patch +ext3-include-fixes-2.6-suse.patch +ext3-extents-2.6.7.patch +ext3-mballoc2-2.6.7.patch +ext3-nlinks-2.6.7.patch diff --git a/lustre/kernel_patches/series/rhel-2.4.21 b/lustre/kernel_patches/series/rhel-2.4.21 index 4709bf9..cf623d5 100644 --- a/lustre/kernel_patches/series/rhel-2.4.21 +++ b/lustre/kernel_patches/series/rhel-2.4.21 @@ -34,9 +34,9 @@ pagecache-lock-2.4.21-chaos.patch ext3-truncate-buffer-head.patch inode-max-readahead-2.4.24.patch dcache_refcount_debug.patch -ext3-extents-2.4.21-chaos.patch -ext3-extents-asyncdel-2.4.21-chaos.patch -ext3-mballoc-2.4.21-chaos.patch +ext3-extents-2.4.21-chaos.patch +ext3-extents-asyncdel-2.4.21-chaos.patch +ext3-mballoc-2.4.21-chaos.patch blkdev_tunables-2.4.21-chaos.patch small_scatterlist-2.4.21-rhel.patch ext3-nlinks-2.4.21-chaos.patch diff --git a/lustre/lvfs/fsfilt_ext3.c b/lustre/lvfs/fsfilt_ext3.c index 466da2e..279c040 100644 --- a/lustre/lvfs/fsfilt_ext3.c +++ b/lustre/lvfs/fsfilt_ext3.c @@ -45,6 +45,21 @@ #include #include #include +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#include +#endif + +#ifdef EXT3_MULTIBLOCK_ALLOCATOR +#include +#endif + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7)) +# define lock_24kernel() lock_kernel() +# define unlock_24kernel() unlock_kernel() +#else +# define lock_24kernel() do {} while (0) +# define unlock_24kernel() do {} while (0) +#endif static kmem_cache_t *fcb_cache; static atomic_t fcb_cache_count = ATOMIC_INIT(0); @@ -142,9 +157,9 @@ static void *fsfilt_ext3_start(struct inode *inode, int op, void *desc_private, journal_start: LASSERTF(nblocks > 0, "can't start %d credit transaction\n", nblocks); - lock_kernel(); + lock_24kernel(); handle = journal_start(EXT3_JOURNAL(inode), nblocks); - unlock_kernel(); + unlock_24kernel(); if (!IS_ERR(handle)) LASSERT(current->journal_info == handle); @@ -281,9 +296,9 @@ static void *fsfilt_ext3_brw_start(int objcount, struct fsfilt_objinfo *fso, } LASSERTF(needed > 0, "can't start %d credit transaction\n", needed); - lock_kernel(); + lock_24kernel(); handle = journal_start(journal, needed); - unlock_kernel(); + unlock_24kernel(); if (IS_ERR(handle)) { CERROR("can't get handle for %d credits: rc = %ld\n", needed, PTR_ERR(handle)); @@ -304,9 +319,9 @@ static int fsfilt_ext3_commit(struct inode *inode, void *h, int force_sync) if (force_sync) handle->h_sync = 1; /* recovery likes this */ - lock_kernel(); + lock_24kernel(); rc = journal_stop(handle); - unlock_kernel(); + unlock_24kernel(); return rc; } @@ -579,6 +594,32 @@ static int fsfilt_ext3_get_md(struct inode *inode, void *lmm, int lmm_size) return rc; } +#if (LINUX_VERSION_CODE > KERNEL_VERSION(2,5,0)) +static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct bio *bio) +{ + submit_bio(rw, bio); + return 0; +} +#else +static int fsfilt_ext3_send_bio(int rw, struct inode *inode, struct kiobuf *bio) +{ + int rc, blocks_per_page; + + rc = brw_kiovec(rw, 1, &bio, inode->i_dev, + bio->blocks, 1 << inode->i_blkbits); + + blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + + if (rc != (1 << inode->i_blkbits) * bio->nr_pages * blocks_per_page) { + CERROR("short write? expected %d, wrote %d\n", + (1 << inode->i_blkbits) * bio->nr_pages * + blocks_per_page, rc); + } + + return rc; +} +#endif + static ssize_t fsfilt_ext3_readpage(struct file *file, char *buf, size_t count, loff_t *off) { @@ -662,10 +703,10 @@ static int fsfilt_ext3_add_journal_cb(struct obd_device *obd, __u64 last_rcvd, fcb->cb_data = cb_data; CDEBUG(D_EXT2, "set callback for last_rcvd: "LPD64"\n", last_rcvd); - lock_kernel(); + lock_24kernel(); journal_callback_set(handle, fsfilt_ext3_cb_func, (struct journal_callback *)fcb); - unlock_kernel(); + unlock_24kernel(); return 0; } @@ -700,12 +741,320 @@ static int fsfilt_ext3_sync(struct super_block *sb) return ext3_force_commit(sb); } +#ifdef EXT3_MULTIBLOCK_ALLOCATOR +#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) +#define ext3_up_truncate_sem(inode) up_write(&EXT3_I(inode)->truncate_sem); +#define ext3_down_truncate_sem(inode) down_write(&EXT3_I(inode)->truncate_sem); +#else +#define ext3_up_truncate_sem(inode) up(&EXT3_I(inode)->truncate_sem); +#define ext3_down_truncate_sem(inode) down(&EXT3_I(inode)->truncate_sem); +#endif + +#include +#if EXT3_EXT_MAGIC == 0xf301 +#define ee_start e_start +#define ee_block e_block +#define ee_len e_num +#endif +#ifndef EXT3_BB_MAX_BLOCKS +#define ext3_mb_new_blocks(handle, inode, goal, count, aflags, err) \ + ext3_new_blocks(handle, inode, count, goal, err) +#endif + +struct bpointers { + unsigned long *blocks; + int *created; + unsigned long start; + int num; + int init_num; + int create; +}; + +static int ext3_ext_find_goal(struct inode *inode, struct ext3_ext_path *path, + unsigned long block, int *aflags) +{ + struct ext3_inode_info *ei = EXT3_I(inode); + unsigned long bg_start; + unsigned long colour; + int depth; + + if (path) { + struct ext3_extent *ex; + depth = path->p_depth; + + /* try to predict block placement */ + if ((ex = path[depth].p_ext)) { +#if 0 + /* This prefers to eat into a contiguous extent + * rather than find an extent that the whole + * request will fit into. This can fragment data + * block allocation and prevents our lovely 1M I/Os + * from reaching the disk intact. */ + if (ex->ee_block + ex->ee_len == block) + *aflags |= 1; +#endif + return ex->ee_start + (block - ex->ee_block); + } + + /* it looks index is empty + * try to find starting from index itself */ + if (path[depth].p_bh) + return path[depth].p_bh->b_blocknr; + } + + /* OK. use inode's group */ + bg_start = (ei->i_block_group * EXT3_BLOCKS_PER_GROUP(inode->i_sb)) + + le32_to_cpu(EXT3_SB(inode->i_sb)->s_es->s_first_data_block); + colour = (current->pid % 16) * + (EXT3_BLOCKS_PER_GROUP(inode->i_sb) / 16); + return bg_start + colour + block; +} + +static int ext3_ext_new_extent_cb(struct ext3_extents_tree *tree, + struct ext3_ext_path *path, + struct ext3_extent *newex, int exist) +{ + struct inode *inode = tree->inode; + struct bpointers *bp = tree->private; + int count, err, goal; + unsigned long pblock; + unsigned long tgen; + loff_t new_i_size; + handle_t *handle; + int i, aflags = 0; + + i = EXT_DEPTH(tree); + EXT_ASSERT(i == path->p_depth); + EXT_ASSERT(path[i].p_hdr); + + if (exist) { + err = EXT_CONTINUE; + goto map; + } + + if (bp->create == 0) { + i = 0; + if (newex->ee_block < bp->start) + i = bp->start - newex->ee_block; + if (i >= newex->ee_len) + CERROR("nothing to do?! i = %d, e_num = %u\n", + i, newex->ee_len); + for (; i < newex->ee_len && bp->num; i++) { + *(bp->created) = 0; + *(bp->created) = 0; + bp->created++; + *(bp->blocks) = 0; + bp->blocks++; + bp->num--; + bp->start++; + } + + return EXT_CONTINUE; + } + + tgen = EXT_GENERATION(tree); + count = ext3_ext_calc_credits_for_insert(tree, path); + ext3_up_truncate_sem(inode); + + lock_24kernel(); + handle = journal_start(EXT3_JOURNAL(inode), count + EXT3_ALLOC_NEEDED + 1); + unlock_24kernel(); + if (IS_ERR(handle)) { + ext3_down_truncate_sem(inode); + return PTR_ERR(handle); + } + + if (tgen != EXT_GENERATION(tree)) { + /* the tree has changed. so path can be invalid at moment */ + lock_24kernel(); + journal_stop(handle); + unlock_24kernel(); + ext3_down_truncate_sem(inode); + return EXT_REPEAT; + } + + ext3_down_truncate_sem(inode); + count = newex->ee_len; + goal = ext3_ext_find_goal(inode, path, newex->ee_block, &aflags); + aflags |= 2; /* block have been already reserved */ + pblock = ext3_mb_new_blocks(handle, inode, goal, &count, aflags, &err); + if (!pblock) + goto out; + EXT_ASSERT(count <= newex->ee_len); + + /* insert new extent */ + newex->ee_start = pblock; + newex->ee_len = count; + err = ext3_ext_insert_extent(handle, tree, path, newex); + if (err) + goto out; + + /* correct on-disk inode size */ + if (newex->ee_len > 0) { + new_i_size = (loff_t) newex->ee_block + newex->ee_len; + new_i_size = new_i_size << inode->i_blkbits; + if (new_i_size > EXT3_I(inode)->i_disksize) { + EXT3_I(inode)->i_disksize = new_i_size; + err = ext3_mark_inode_dirty(handle, inode); + } + } + +out: + lock_24kernel(); + journal_stop(handle); + unlock_24kernel(); +map: + if (err >= 0) { + /* map blocks */ + if (bp->num == 0) { + CERROR("hmm. why do we find this extent?\n"); + CERROR("initial space: %lu:%u\n", + bp->start, bp->init_num); + CERROR("current extent: %u/%u/%u %d\n", + newex->ee_block, newex->ee_len, + newex->ee_start, exist); + } + i = 0; + if (newex->ee_block < bp->start) + i = bp->start - newex->ee_block; + if (i >= newex->ee_len) + CERROR("nothing to do?! i = %d, e_num = %u\n", + i, newex->ee_len); + for (; i < newex->ee_len && bp->num; i++) { + *(bp->created) = (exist == 0 ? 1 : 0); + bp->created++; + *(bp->blocks) = newex->ee_start + i; + bp->blocks++; + bp->num--; + bp->start++; + } + } + return err; +} + +int fsfilt_map_nblocks(struct inode *inode, unsigned long block, + unsigned long num, unsigned long *blocks, + int *created, int create) +{ + struct ext3_extents_tree tree; + struct bpointers bp; + int err; + + CDEBUG(D_OTHER, "blocks %lu-%lu requested for inode %u\n", + block, block + num, (unsigned) inode->i_ino); + + ext3_init_tree_desc(&tree, inode); + tree.private = &bp; + bp.blocks = blocks; + bp.created = created; + bp.start = block; + bp.init_num = bp.num = num; + bp.create = create; + + ext3_down_truncate_sem(inode); + err = ext3_ext_walk_space(&tree, block, num, ext3_ext_new_extent_cb); + ext3_ext_invalidate_cache(&tree); + ext3_up_truncate_sem(inode); + + return err; +} + +int fsfilt_ext3_map_ext_inode_pages(struct inode *inode, struct page **page, + int pages, unsigned long *blocks, + int *created, int create) +{ + int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + int rc = 0, i = 0; + struct page *fp = NULL; + int clen = 0; + + CDEBUG(D_OTHER, "inode %lu: map %d pages from %lu\n", + inode->i_ino, pages, (*page)->index); + + /* pages are sorted already. so, we just have to find + * contig. space and process them properly */ + while (i < pages) { + if (fp == NULL) { + /* start new extent */ + fp = *page++; + clen = 1; + i++; + continue; + } else if (fp->index + clen == (*page)->index) { + /* continue the extent */ + page++; + clen++; + i++; + continue; + } + + /* process found extent */ + rc = fsfilt_map_nblocks(inode, fp->index * blocks_per_page, + clen * blocks_per_page, blocks, + created, create); + if (rc) + GOTO(cleanup, rc); + + /* look for next extent */ + fp = NULL; + blocks += blocks_per_page * clen; + created += blocks_per_page * clen; + } + + if (fp) + rc = fsfilt_map_nblocks(inode, fp->index * blocks_per_page, + clen * blocks_per_page, blocks, + created, create); +cleanup: + return rc; +} +#endif + extern int ext3_map_inode_page(struct inode *inode, struct page *page, unsigned long *blocks, int *created, int create); -int fsfilt_ext3_map_inode_page(struct inode *inode, struct page *page, - unsigned long *blocks, int *created, int create) +int fsfilt_ext3_map_bm_inode_pages(struct inode *inode, struct page **page, + int pages, unsigned long *blocks, + int *created, int create) +{ + int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + unsigned long *b; + int rc = 0, i, *cr; + + for (i = 0, cr = created, b = blocks; i < pages; i++, page++) { + rc = ext3_map_inode_page(inode, *page, b, cr, create); + if (rc) { + CERROR("ino %lu, blk %lu cr %u create %d: rc %d\n", + inode->i_ino, *b, *cr, create, rc); + break; + } + + b += blocks_per_page; + cr += blocks_per_page; + } + return rc; +} + +int fsfilt_ext3_map_inode_pages(struct inode *inode, struct page **page, + int pages, unsigned long *blocks, + int *created, int create, + struct semaphore *optional_sem) { - return ext3_map_inode_page(inode, page, blocks, created, create); + int rc; +#ifdef EXT3_MULTIBLOCK_ALLOCATOR + if (EXT3_I(inode)->i_flags & EXT3_EXTENTS_FL) { + rc = fsfilt_ext3_map_ext_inode_pages(inode, page, pages, + blocks, created, create); + return rc; + } +#endif + if (optional_sem != NULL) + down(optional_sem); + rc = fsfilt_ext3_map_bm_inode_pages(inode, page, pages, blocks, + created, create); + if (optional_sem != NULL) + up(optional_sem); + + return rc; } extern int ext3_prep_san_write(struct inode *inode, long *blocks, @@ -780,10 +1129,10 @@ static int fsfilt_ext3_write_record(struct file *file, void *buf, int bufsize, block_count = (block_count + blocksize - 1) >> inode->i_blkbits; journal = EXT3_SB(inode->i_sb)->s_journal; - lock_kernel(); + lock_24kernel(); handle = journal_start(journal, block_count * EXT3_DATA_TRANS_BLOCKS + 2); - unlock_kernel(); + unlock_24kernel(); if (IS_ERR(handle)) { CERROR("can't start transaction\n"); return PTR_ERR(handle); @@ -841,9 +1190,9 @@ out: unlock_kernel(); } - lock_kernel(); + lock_24kernel(); journal_stop(handle); - unlock_kernel(); + unlock_24kernel(); if (err == 0) *offs = offset; @@ -911,11 +1260,12 @@ static struct fsfilt_operations fsfilt_ext3_ops = { .fs_add_journal_cb = fsfilt_ext3_add_journal_cb, .fs_statfs = fsfilt_ext3_statfs, .fs_sync = fsfilt_ext3_sync, - .fs_map_inode_page = fsfilt_ext3_map_inode_page, + .fs_map_inode_pages = fsfilt_ext3_map_inode_pages, .fs_prep_san_write = fsfilt_ext3_prep_san_write, .fs_write_record = fsfilt_ext3_write_record, .fs_read_record = fsfilt_ext3_read_record, .fs_setup = fsfilt_ext3_setup, + .fs_send_bio = fsfilt_ext3_send_bio, .fs_get_op_len = fsfilt_ext3_get_op_len, }; diff --git a/lustre/lvfs/lvfs_linux.c b/lustre/lvfs/lvfs_linux.c index ef28875..a7f4d7c 100644 --- a/lustre/lvfs/lvfs_linux.c +++ b/lustre/lvfs/lvfs_linux.c @@ -65,7 +65,7 @@ int obd_memmax; # define ASSERT_KERNEL_CTXT(msg) do {} while(0) #endif -#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)) +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4)) #define current_ngroups current->group_info->ngroups #define current_groups current->group_info->small_block #else @@ -121,6 +121,13 @@ void push_ctxt(struct obd_run_ctxt *save, struct obd_run_ctxt *new_ctx, current_groups[current_ngroups++] = uc->ouc_suppgid1; if (uc->ouc_suppgid2 != -1) current_groups[current_ngroups++] = uc->ouc_suppgid2; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + if (uc->ouc_suppgid1 != -1 && uc->ouc_suppgid2 != -1 + && (uc->ouc_suppgid1 > uc->ouc_suppgid2)) { + current_groups[0] = uc->ouc_suppgid2; + current_groups[1] = uc->ouc_suppgid1; + } +#endif } current->fs->umask = 0; /* umask already applied on client */ set_fs(new_ctx->fs); diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index 9d5fd07..45782bc 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1384,6 +1384,7 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) { struct lprocfs_static_vars lvars; struct lustre_cfg* lcfg = buf; + char *options = NULL; struct mds_obd *mds = &obd->u.mds; struct vfsmount *mnt; char ns_name[48]; @@ -1404,11 +1405,20 @@ static int mds_setup(struct obd_device *obd, obd_count len, void *buf) if (!page) RETURN(-ENOMEM); - memset((void *)page, 0, PAGE_SIZE); - sprintf((char *)page, "iopen_nopriv,errors=remount-ro"); + options = (char *)page; + memset(options, 0, PAGE_SIZE); + + /* here we use "iopen_nopriv" hardcoded, because it affects MDS utility + * and the rest of options are passed by mount options. Probably this + * should be moved to somewhere else like startup scripts or lconf. */ + sprintf(options, "iopen_nopriv"); + + if (lcfg->lcfg_inllen4 > 0 && lcfg->lcfg_inlbuf4) + sprintf(options + strlen(options), ",%s", + lcfg->lcfg_inlbuf4); mnt = do_kern_mount(lcfg->lcfg_inlbuf2, 0, - lcfg->lcfg_inlbuf1, (void *)page); + lcfg->lcfg_inlbuf1, (void *)options); free_page(page); if (IS_ERR(mnt)) { rc = PTR_ERR(mnt); diff --git a/lustre/obdfilter/filter.c b/lustre/obdfilter/filter.c index 65f7f91..67df476 100644 --- a/lustre/obdfilter/filter.c +++ b/lustre/obdfilter/filter.c @@ -282,8 +282,7 @@ static int filter_free_server_data(struct filter_obd *filter) { OBD_FREE(filter->fo_fsd, sizeof(*filter->fo_fsd)); filter->fo_fsd = NULL; - OBD_FREE(filter->fo_last_rcvd_slots, - FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long)); + OBD_FREE(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS/8); filter->fo_last_rcvd_slots = NULL; return 0; } @@ -354,8 +353,7 @@ static int filter_init_server_data(struct obd_device *obd, struct file * filp) RETURN(-ENOMEM); filter->fo_fsd = fsd; - OBD_ALLOC(filter->fo_last_rcvd_slots, - FILTER_LR_MAX_CLIENT_WORDS * sizeof(unsigned long)); + OBD_ALLOC(filter->fo_last_rcvd_slots, FILTER_LR_MAX_CLIENTS/8); if (filter->fo_last_rcvd_slots == NULL) { OBD_FREE(fsd, sizeof(*fsd)); RETURN(-ENOMEM); @@ -1278,32 +1276,12 @@ static int filter_setup(struct obd_device *obd, obd_count len, void *buf) { struct lprocfs_static_vars lvars; struct lustre_cfg* lcfg = buf; - const char *str = NULL; - char *option = NULL; - int n = 0; int rc; if (!lcfg->lcfg_inlbuf1 || !lcfg->lcfg_inlbuf2) RETURN(-EINVAL); - if (!strcmp(lcfg->lcfg_inlbuf2, "ext3") || - !strcmp(lcfg->lcfg_inlbuf2, "ldiskfs")) { -#if (LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0)) - /* bug 1577: implement async-delete for 2.5 */ - str = "errors=remount-ro,asyncdel"; -#else - str = "errors=remount-ro"; -#endif - n = strlen(str) + 1; - OBD_ALLOC(option, n); - if (option == NULL) - RETURN(-ENOMEM); - strcpy(option, str); - } - - rc = filter_common_setup(obd, len, buf, option); - if (option) - OBD_FREE(option, n); + rc = filter_common_setup(obd, len, buf, lcfg->lcfg_inlbuf4); lprocfs_init_vars(filter, &lvars); if (rc == 0 && lprocfs_obd_setup(obd, lvars.obd_vars) == 0 && @@ -2535,15 +2513,28 @@ static int __init obdfilter_init(void) lprocfs_init_vars(filter, &lvars); + OBD_ALLOC(obdfilter_created_scratchpad, + OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * + sizeof(*obdfilter_created_scratchpad)); + if (obdfilter_created_scratchpad == NULL) + return -ENOMEM; + rc = class_register_type(&filter_obd_ops, lvars.module_vars, OBD_FILTER_DEVICENAME); - if (rc) + if (rc) { + GOTO(out, rc); return rc; + } rc = class_register_type(&filter_sanobd_ops, lvars.module_vars, OBD_FILTER_SAN_DEVICENAME); - if (rc) + if (rc) { class_unregister_type(OBD_FILTER_DEVICENAME); +out: + OBD_FREE(obdfilter_created_scratchpad, + OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * + sizeof(*obdfilter_created_scratchpad)); + } return rc; } @@ -2551,6 +2542,9 @@ static void __exit obdfilter_exit(void) { class_unregister_type(OBD_FILTER_SAN_DEVICENAME); class_unregister_type(OBD_FILTER_DEVICENAME); + OBD_FREE(obdfilter_created_scratchpad, + OBDFILTER_CREATED_SCRATCHPAD_ENTRIES * + sizeof(*obdfilter_created_scratchpad)); } MODULE_AUTHOR("Cluster File Systems, Inc. "); diff --git a/lustre/obdfilter/filter_internal.h b/lustre/obdfilter/filter_internal.h index d3d1a77..446625a 100644 --- a/lustre/obdfilter/filter_internal.h +++ b/lustre/obdfilter/filter_internal.h @@ -37,7 +37,6 @@ /* This limit is arbitrary, but for now we fit it in 1 page (32k clients) */ #define FILTER_LR_MAX_CLIENTS (PAGE_SIZE * 8) -#define FILTER_LR_MAX_CLIENT_WORDS (FILTER_LR_MAX_CLIENTS/sizeof(unsigned long)) #define FILTER_SUBDIR_COUNT 32 /* set to zero for no subdirs */ #define FILTER_GROUPS 3 /* must be at least 3; not dynamic yet */ @@ -96,6 +95,11 @@ enum { //#define FILTER_MAX_CACHE_SIZE (32 * 1024 * 1024) /* was OBD_OBJECT_EOF */ #define FILTER_MAX_CACHE_SIZE OBD_OBJECT_EOF +/* We have to pass a 'created' array to fsfilt_map_inode_pages() which we + * then ignore. So we pre-allocate one that everyone can use... */ +#define OBDFILTER_CREATED_SCRATCHPAD_ENTRIES 1024 +extern int *obdfilter_created_scratchpad; + /* filter.c */ void f_dput(struct dentry *); struct dentry *filter_fid2dentry(struct obd_device *, struct dentry *dir, @@ -128,6 +132,8 @@ int filter_brw(int cmd, struct obd_export *, struct obdo *, struct lov_stripe_md *, obd_count oa_bufs, struct brw_page *, struct obd_trans_info *); void flip_into_page_cache(struct inode *inode, struct page *new_page); +void filter_free_dio_pages(int objcount, struct obd_ioobj *obj, + int niocount, struct niobuf_local *res); /* filter_io_*.c */ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, @@ -139,6 +145,13 @@ long filter_grant(struct obd_export *exp, obd_size current_grant, obd_size want, obd_size fs_space_left); void filter_grant_commit(struct obd_export *exp, int niocount, struct niobuf_local *res); +int filter_alloc_iobuf(int rw, int num_pages, void **ret); +void filter_free_iobuf(void *iobuf); +int filter_iobuf_add_page(struct obd_device *obd, void *iobuf, + struct inode *inode, struct page *page); +int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, + struct obd_export *exp, struct iattr *attr, + struct obd_trans_info *oti, void **wait_handle); /* filter_log.c */ struct ost_filterdata { diff --git a/lustre/obdfilter/filter_io.c b/lustre/obdfilter/filter_io.c index 7bddd43..9764996 100644 --- a/lustre/obdfilter/filter_io.c +++ b/lustre/obdfilter/filter_io.c @@ -35,68 +35,45 @@ #include #include "filter_internal.h" -static int filter_start_page_read(struct inode *inode, struct niobuf_local *lnb) +int *obdfilter_created_scratchpad; + +static int filter_alloc_dio_page(struct obd_device *obd, struct inode *inode, + struct niobuf_local *lnb) { - struct address_space *mapping = inode->i_mapping; struct page *page; - unsigned long index = lnb->offset >> PAGE_SHIFT; - int rc; - - page = grab_cache_page(mapping, index); /* locked page */ - if (page == NULL) - return lnb->rc = -ENOMEM; - - LASSERT(page->mapping == mapping); - - lnb->page = page; - - if (inode->i_size < lnb->offset + lnb->len - 1) - lnb->rc = inode->i_size - lnb->offset; - else - lnb->rc = lnb->len; - if (PageUptodate(page)) { - unlock_page(page); - return 0; + page = alloc_pages(GFP_HIGHUSER, 0); + if (page == NULL) { + CERROR("no memory for a temp page\n"); + lnb->rc = -ENOMEM; + RETURN(-ENOMEM); } - - rc = mapping->a_ops->readpage(NULL, page); - if (rc < 0) { - CERROR("page index %lu, rc = %d\n", index, rc); - lnb->page = NULL; - page_cache_release(page); - return lnb->rc = rc; +#if 0 + POISON_PAGE(page, 0xf1); + if (lnb->len != PAGE_SIZE) { + memset(kmap(page) + lnb->len, 0, PAGE_SIZE - lnb->len); + kunmap(page); } +#endif + page->index = lnb->offset >> PAGE_SHIFT; + lnb->page = page; - return 0; + RETURN(0); } -static int filter_finish_page_read(struct niobuf_local *lnb) +void filter_free_dio_pages(int objcount, struct obd_ioobj *obj, + int niocount, struct niobuf_local *res) { - if (lnb->page == NULL) - return 0; - - if (PageUptodate(lnb->page)) - return 0; + int i, j; - wait_on_page(lnb->page); - if (!PageUptodate(lnb->page)) { - CERROR("page index %lu/offset "LPX64" not uptodate\n", - lnb->page->index, lnb->offset); - GOTO(err_page, lnb->rc = -EIO); - } - if (PageError(lnb->page)) { - CERROR("page index %lu/offset "LPX64" has error\n", - lnb->page->index, lnb->offset); - GOTO(err_page, lnb->rc = -EIO); + for (i = 0; i < objcount; i++, obj++) { + for (j = 0 ; j < obj->ioo_bufcnt ; j++, res++) { + if (res->page != NULL) { + __free_page(res->page); + res->page = NULL; + } + } } - - return 0; - -err_page: - page_cache_release(lnb->page); - lnb->page = NULL; - return lnb->rc; } /* Grab the dirty and seen grant announcements from the incoming obdo. @@ -160,6 +137,13 @@ static void filter_grant_incoming(struct obd_export *exp, struct obdo *oa) obd->u.filter.fo_tot_granted -= oa->o_dropped; fed->fed_grant -= oa->o_dropped; fed->fed_dirty = oa->o_dirty; + if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0) { + CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n", + obd->obd_name, exp->exp_client_uuid.uuid, exp, + fed->fed_dirty, fed->fed_pending, fed->fed_grant); + spin_unlock(&obd->obd_osfs_lock); + LBUG(); + } EXIT; } @@ -251,8 +235,11 @@ long filter_grant(struct obd_export *exp, obd_size current_grant, * has and what we think it has, don't grant very much and let the * client consume its grant first. Either it just has lots of RPCs * in flight, or it was evicted and its grants will soon be used up. */ - if (current_grant < want && - current_grant < fed->fed_grant + FILTER_GRANT_CHUNK) { + if (want > 0x7fffffff) { + CERROR("%s: client %s/%p requesting > 2GB grant "LPU64"\n", + obd->obd_name, exp->exp_client_uuid.uuid, exp, want); + } else if (current_grant < want && + current_grant < fed->fed_grant + FILTER_GRANT_CHUNK) { grant = min((want >> blockbits) / 2, (fs_space_left >> blockbits) / 8); grant <<= blockbits; @@ -263,6 +250,14 @@ long filter_grant(struct obd_export *exp, obd_size current_grant, obd->u.filter.fo_tot_granted += grant; fed->fed_grant += grant; + if (fed->fed_grant < 0) { + CERROR("%s: cli %s/%p grant %ld want "LPU64 + "current"LPU64"\n", + obd->obd_name, exp->exp_client_uuid.uuid, + exp, fed->fed_grant, want,current_grant); + spin_unlock(&obd->obd_osfs_lock); + LBUG(); + } } } @@ -285,135 +280,114 @@ static int filter_preprw_read(int cmd, struct obd_export *exp, struct obdo *oa, { struct obd_device *obd = exp->exp_obd; struct obd_run_ctxt saved; - struct obd_ioobj *o; struct niobuf_remote *rnb; - struct niobuf_local *lnb = NULL; - struct fsfilt_objinfo *fso; - struct dentry *dentry; + struct niobuf_local *lnb; + struct dentry *dentry = NULL; struct inode *inode; - int rc = 0, i, j, tot_bytes = 0, cleanup_phase = 0; + void *iobuf = NULL; + int rc = 0, i, tot_bytes = 0; unsigned long now = jiffies; ENTRY; /* We are currently not supporting multi-obj BRW_READ RPCS at all. * When we do this function's dentry cleanup will need to be fixed */ - LASSERT(objcount == 1); - LASSERT(obj->ioo_bufcnt > 0); + LASSERTF(objcount == 1, "%d\n", objcount); + LASSERTF(obj->ioo_bufcnt > 0, "%d\n", obj->ioo_bufcnt); if (oa && oa->o_valid & OBD_MD_FLGRANT) { spin_lock(&obd->obd_osfs_lock); filter_grant_incoming(exp, oa); -#if 0 - /* Reads do not increase grants */ - oa->o_grant = filter_grant(exp, oa->o_grant, oa->o_undirty, - filter_grant_space_left(exp)); -#else oa->o_grant = 0; -#endif spin_unlock(&obd->obd_osfs_lock); } - OBD_ALLOC(fso, objcount * sizeof(*fso)); - if (fso == NULL) - RETURN(-ENOMEM); - memset(res, 0, niocount * sizeof(*res)); push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); - for (i = 0, o = obj; i < objcount; i++, o++) { - LASSERT(o->ioo_bufcnt); - dentry = filter_oa2dentry(obd, oa); - if (IS_ERR(dentry)) - GOTO(cleanup, rc = PTR_ERR(dentry)); + rc = filter_alloc_iobuf(OBD_BRW_READ, obj->ioo_bufcnt, &iobuf); + if (rc) + GOTO(cleanup, rc); - if (dentry->d_inode == NULL) { - CERROR("trying to BRW to non-existent file "LPU64"\n", - o->ioo_id); - f_dput(dentry); - GOTO(cleanup, rc = -ENOENT); - } + dentry = filter_oa2dentry(obd, oa); + if (IS_ERR(dentry)) + GOTO(cleanup, rc = PTR_ERR(dentry)); - if (oa) - obdo_to_inode(dentry->d_inode, oa, OBD_MD_FLATIME); - fso[i].fso_dentry = dentry; - fso[i].fso_bufcnt = o->ioo_bufcnt; + if (dentry->d_inode == NULL) { + CERROR("trying to BRW to non-existent file "LPU64"\n", + obj->ioo_id); + GOTO(cleanup, rc = -ENOENT); } + inode = dentry->d_inode; + + obdo_to_inode(dentry->d_inode, oa, OBD_MD_FLATIME); fsfilt_check_slow(now, obd_timeout, "preprw_read setup"); - for (i = 0, o = obj, rnb = nb, lnb = res; i < objcount; i++, o++) { - dentry = fso[i].fso_dentry; - inode = dentry->d_inode; - - for (j = 0; j < o->ioo_bufcnt; j++, rnb++, lnb++) { - lnb->dentry = dentry; - lnb->offset = rnb->offset; - lnb->len = rnb->len; - lnb->flags = rnb->flags; - - if (inode->i_size <= rnb->offset) { - /* If there's no more data, abort early. - * lnb->page == NULL and lnb->rc == 0, so it's - * easy to detect later. */ - break; - } else { - rc = filter_start_page_read(inode, lnb); - } + for (i = 0, lnb = res, rnb = nb; i < obj->ioo_bufcnt; + i++, rnb++, lnb++) { + lnb->dentry = dentry; + lnb->offset = rnb->offset; + lnb->len = rnb->len; + lnb->flags = rnb->flags; - if (rc) { - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "page err %u@"LPU64" %u/%u %p: rc %d\n", - lnb->len, lnb->offset, j, o->ioo_bufcnt, - dentry, rc); - cleanup_phase = 1; - GOTO(cleanup, rc); - } + if (inode->i_size <= rnb->offset) + /* If there's no more data, abort early. + * lnb->page == NULL and lnb->rc == 0, so it's + * easy to detect later. */ + break; + else + rc = filter_alloc_dio_page(obd, inode, lnb); - tot_bytes += lnb->rc; - if (lnb->rc < lnb->len) { - /* short read, be sure to wait on it */ - lnb++; - break; - } + if (rc) { + CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, + "page err %u@"LPU64" %u/%u %p: rc %d\n", + lnb->len, lnb->offset, i, obj->ioo_bufcnt, + dentry, rc); + GOTO(cleanup, rc); } + + if (inode->i_size < lnb->offset + lnb->len - 1) + lnb->rc = inode->i_size - lnb->offset; + else + lnb->rc = lnb->len; + + tot_bytes += lnb->rc; + + filter_iobuf_add_page(obd, iobuf, inode, lnb->page); } fsfilt_check_slow(now, obd_timeout, "start_page_read"); - lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, tot_bytes); - while (lnb-- > res) { - rc = filter_finish_page_read(lnb); - if (rc) { - CERROR("error page %u@"LPU64" %u %p: rc %d\n", lnb->len, - lnb->offset, (int)(lnb - res), lnb->dentry, rc); - cleanup_phase = 1; - GOTO(cleanup, rc); - } - } + rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf, exp, + NULL, NULL, NULL); + if (rc) + GOTO(cleanup, rc); - fsfilt_check_slow(now, obd_timeout, "finish_page_read"); + lprocfs_counter_add(obd->obd_stats, LPROC_FILTER_READ_BYTES, tot_bytes); filter_tally_read(&exp->exp_obd->u.filter, res, niocount); EXIT; cleanup: - switch (cleanup_phase) { - case 1: - for (lnb = res; lnb < (res + niocount); lnb++) { - if (lnb->page) - page_cache_release(lnb->page); - } - if (res->dentry != NULL) - f_dput(res->dentry); + if (rc != 0) { + filter_free_dio_pages(objcount, obj, niocount, res); + + if (dentry != NULL) + f_dput(dentry); else CERROR("NULL dentry in cleanup -- tell CFS\n"); - case 0: - OBD_FREE(fso, objcount * sizeof(*fso)); - pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); } + + if (iobuf != NULL) + filter_free_iobuf(iobuf); + + pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + if (rc) + CERROR("io error %d\n", rc); + return rc; } @@ -521,26 +495,14 @@ static int filter_grant_check(struct obd_export *exp, int objcount, exp->exp_obd->u.filter.fo_tot_dirty -= used; fed->fed_dirty -= used; - return rc; -} - -static int filter_start_page_write(struct inode *inode, - struct niobuf_local *lnb) -{ - struct page *page = alloc_pages(GFP_HIGHUSER, 0); - if (page == NULL) { - CERROR("no memory for a temp page\n"); - RETURN(lnb->rc = -ENOMEM); - } - POISON_PAGE(page, 0xf1); - if (lnb->len != PAGE_SIZE) { - memset(kmap(page) + lnb->len, 0, PAGE_SIZE - lnb->len); - kunmap(page); + if (fed->fed_dirty < 0 || fed->fed_grant < 0 || fed->fed_pending < 0) { + CERROR("%s: cli %s/%p dirty %ld pend %ld grant %ld\n", + exp->exp_obd->obd_name, exp->exp_client_uuid.uuid, exp, + fed->fed_dirty, fed->fed_pending, fed->fed_grant); + spin_unlock(&exp->exp_obd->obd_osfs_lock); + LBUG(); } - page->index = lnb->offset >> PAGE_SHIFT; - lnb->page = page; - - return 0; + return rc; } /* If we ever start to support multi-object BRW RPCs, we will need to get locks @@ -561,28 +523,34 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, { struct obd_run_ctxt saved; struct niobuf_remote *rnb; - struct niobuf_local *lnb; + struct niobuf_local *lnb = res; struct fsfilt_objinfo fso; - struct dentry *dentry; + struct dentry *dentry = NULL; + void *iobuf; obd_size left; unsigned long now = jiffies; - int rc = 0, i, tot_bytes = 0, cleanup_phase = 1; + int rc = 0, i, tot_bytes = 0, cleanup_phase = 0; ENTRY; LASSERT(objcount == 1); LASSERT(obj->ioo_bufcnt > 0); memset(res, 0, niocount * sizeof(*res)); + rc = filter_alloc_iobuf(OBD_BRW_READ, obj->ioo_bufcnt, &iobuf); + if (rc) + GOTO(cleanup, rc); + cleanup_phase = 1; + push_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); dentry = filter_fid2dentry(exp->exp_obd, NULL, obj->ioo_gr, obj->ioo_id); if (IS_ERR(dentry)) GOTO(cleanup, rc = PTR_ERR(dentry)); + cleanup_phase = 2; if (dentry->d_inode == NULL) { CERROR("trying to BRW to non-existent file "LPU64"\n", obj->ioo_id); - f_dput(dentry); GOTO(cleanup, rc = -ENOENT); } @@ -597,7 +565,7 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, obdo_to_inode(dentry->d_inode, oa, OBD_MD_FLATIME | OBD_MD_FLMTIME | OBD_MD_FLCTIME); } - cleanup_phase = 0; + cleanup_phase = 3; left = filter_grant_space_left(exp); @@ -615,10 +583,8 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, spin_unlock(&exp->exp_obd->obd_osfs_lock); - if (rc) { - f_dput(dentry); + if (rc) GOTO(cleanup, rc); - } for (i = 0, rnb = nb, lnb = res; i < obj->ioo_bufcnt; i++, lnb++, rnb++) { @@ -630,20 +596,55 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, lnb->len = rnb->len; lnb->flags = rnb->flags; - rc = filter_start_page_write(dentry->d_inode, lnb); + rc = filter_alloc_dio_page(exp->exp_obd, dentry->d_inode,lnb); if (rc) { CERROR("page err %u@"LPU64" %u/%u %p: rc %d\n", lnb->len, lnb->offset, i, obj->ioo_bufcnt, dentry, rc); - while (lnb-- > res) - __free_pages(lnb->page, 0); - f_dput(dentry); GOTO(cleanup, rc); } + cleanup_phase = 4; + + /* If the filter writes a partial page, then has the file + * extended, the client will read in the whole page. the + * filter has to be careful to zero the rest of the partial + * page on disk. we do it by hand for partial extending + * writes, send_bio() is responsible for zeroing pages when + * asked to read unmapped blocks -- brw_kiovec() does this. */ + if (lnb->len != PAGE_SIZE) { + __s64 maxidx; + + maxidx = ((dentry->d_inode->i_size + PAGE_SIZE - 1) >> + PAGE_SHIFT) - 1; + if (maxidx >= lnb->page->index) { + LL_CDEBUG_PAGE(D_PAGE, lnb->page, "write %u @ " + LPU64" flg %x before EOF %llu\n", + lnb->len, lnb->offset,lnb->flags, + dentry->d_inode->i_size); + filter_iobuf_add_page(exp->exp_obd, iobuf, + dentry->d_inode, + lnb->page); + } else { + long off; + char *p = kmap(lnb->page); + + off = lnb->offset & ~PAGE_MASK; + if (off) + memset(p, 0, off); + off = (lnb->offset + lnb->len) & ~PAGE_MASK; + if (off) + memset(p + off, 0, PAGE_SIZE - off); + kunmap(lnb->page); + } + } + if (lnb->rc == 0) tot_bytes += lnb->len; } + rc = filter_direct_io(OBD_BRW_READ, dentry, iobuf, exp, + NULL, NULL, NULL); + fsfilt_check_slow(now, obd_timeout, "start_page_write"); lprocfs_counter_add(exp->exp_obd->obd_stats, LPROC_FILTER_WRITE_BYTES, @@ -651,14 +652,26 @@ static int filter_preprw_write(int cmd, struct obd_export *exp, struct obdo *oa, EXIT; cleanup: switch(cleanup_phase) { + case 4: + if (rc) + filter_free_dio_pages(objcount, obj, niocount, res); + case 3: + pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + filter_free_iobuf(iobuf); + case 2: + if (rc) + f_dput(dentry); + break; case 1: spin_lock(&exp->exp_obd->obd_osfs_lock); if (oa) filter_grant_incoming(exp, oa); spin_unlock(&exp->exp_obd->obd_osfs_lock); - default: ; + pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); + filter_free_iobuf(iobuf); + break; + default:; } - pop_ctxt(&saved, &exp->exp_obd->obd_ctxt, NULL); return rc; } @@ -679,33 +692,36 @@ int filter_preprw(int cmd, struct obd_export *exp, struct obdo *oa, return -EPROTO; } +void filter_release_read_page(struct filter_obd *filter, struct inode *inode, + struct page *page) +{ + int drop = 0; + + if (inode != NULL && + (inode->i_size > filter->fo_readcache_max_filesize)) + drop = 1; + + /* drop from cache like truncate_list_pages() */ + if (drop && !TryLockPage(page)) { + if (page->mapping) + ll_truncate_complete_page(page); + unlock_page(page); + } + page_cache_release(page); +} + static int filter_commitrw_read(struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *res, struct obd_trans_info *oti, int rc) { - struct obd_ioobj *o; - struct niobuf_local *lnb; - int i, j, drop = 0; + struct inode *inode = NULL; ENTRY; if (res->dentry != NULL) - drop = (res->dentry->d_inode->i_size > - exp->exp_obd->u.filter.fo_readcache_max_filesize); + inode = res->dentry->d_inode; - for (i = 0, o = obj, lnb = res; i < objcount; i++, o++) { - for (j = 0 ; j < o->ioo_bufcnt ; j++, lnb++) { - if (lnb->page == NULL) - continue; - /* drop from cache like truncate_list_pages() */ - if (drop && !TryLockPage(lnb->page)) { - if (lnb->page->mapping) - ll_truncate_complete_page(lnb->page); - unlock_page(lnb->page); - } - page_cache_release(lnb->page); - } - } + filter_free_dio_pages(objcount, obj, niocount, res); if (res->dentry != NULL) f_dput(res->dentry); @@ -826,9 +842,16 @@ int filter_brw(int cmd, struct obd_export *exp, struct obdo *oa, GOTO(out, ret); for (i = 0; i < oa_bufs; i++) { - void *virt = kmap(pga[i].pg); - obd_off off = pga[i].off & ~PAGE_MASK; - void *addr = kmap(lnb[i].page); + void *virt; + obd_off off; + void *addr; + + if (lnb[i].page == NULL) + break; + + off = pga[i].off & ~PAGE_MASK; + virt = kmap(pga[i].pg); + addr = kmap(lnb[i].page); /* 2 kmaps == vanishingly small deadlock opportunity */ diff --git a/lustre/obdfilter/filter_io_24.c b/lustre/obdfilter/filter_io_24.c index 12f2a6a..0bb2146 100644 --- a/lustre/obdfilter/filter_io_24.c +++ b/lustre/obdfilter/filter_io_24.c @@ -77,27 +77,81 @@ static void check_pending_bhs(unsigned long *blocks, int nr_pages, dev_t dev, #endif } -/* Must be called with i_sem taken; this will drop it */ -static int filter_direct_io(int rw, struct dentry *dchild, struct kiobuf *iobuf, - struct obd_export *exp, struct iattr *attr, - struct obd_trans_info *oti, void **wait_handle) +/* when brw_kiovec() is asked to read from block -1UL it just zeros + * the page. this gives us a chance to verify the write mappings + * as well */ +static int filter_cleanup_mappings(int rw, struct kiobuf *iobuf, + struct inode *inode) +{ + int i, blocks_per_page_bits = PAGE_SHIFT - inode->i_blkbits; + ENTRY; + + for (i = 0 ; i < iobuf->nr_pages << blocks_per_page_bits; i++) { + if (iobuf->blocks[i] > 0) + continue; + + if (rw == OBD_BRW_WRITE) + RETURN(-EINVAL); + + iobuf->blocks[i] = -1UL; + } + RETURN(0); +} + +#if 0 +static void dump_page(int rw, unsigned long block, struct page *page) +{ + char *blah = kmap(page); + CDEBUG(D_PAGE, "rw %d block %lu: %02x %02x %02x %02x\n", rw, block, + blah[0], blah[1], blah[2], blah[3]); + kunmap(page); +} +#endif + +static void filter_clear_page_cache(struct inode *inode, struct kiobuf *iobuf) +{ + struct page *page; + int i; + + for (i = 0; i < iobuf->nr_pages ; i++) { + page = find_lock_page(inode->i_mapping, + iobuf->maplist[i]->index); + if (page == NULL) + continue; + if (page->mapping != NULL) { + block_flushpage(page, 0); + truncate_complete_page(page); + } + unlock_page(page); + page_cache_release(page); + } +} + +/* Must be called with i_sem taken for writes; this will drop it */ +int filter_direct_io(int rw, struct dentry *dchild, void *buf, + struct obd_export *exp, struct iattr *attr, + struct obd_trans_info *oti, void **wait_handle) { struct obd_device *obd = exp->exp_obd; struct inode *inode = dchild->d_inode; - struct page *page; - unsigned long *b = iobuf->blocks; - int rc, i, create = (rw == OBD_BRW_WRITE), blocks_per_page; - int *cr, cleanup_phase = 0, *created = NULL; - int committed = 0; + struct kiobuf *iobuf = buf; + int rc, create = (rw == OBD_BRW_WRITE), committed = 0; + int blocks_per_page = PAGE_SIZE >> inode->i_blkbits, cleanup_phase = 0; + struct semaphore *sem = NULL; ENTRY; - blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw); + + if (iobuf->nr_pages == 0) + GOTO(cleanup, rc = 0); + if (iobuf->nr_pages * blocks_per_page > KIO_MAX_SECTORS) GOTO(cleanup, rc = -EINVAL); - OBD_ALLOC(created, sizeof(*created) * iobuf->nr_pages*blocks_per_page); - if (created == NULL) - GOTO(cleanup, rc = -ENOMEM); + if (iobuf->nr_pages * blocks_per_page > + OBDFILTER_CREATED_SCRATCHPAD_ENTRIES) + GOTO(cleanup, rc = -EINVAL); + cleanup_phase = 1; rc = lock_kiovec(1, &iobuf, 1); @@ -105,44 +159,50 @@ static int filter_direct_io(int rw, struct dentry *dchild, struct kiobuf *iobuf, GOTO(cleanup, rc); cleanup_phase = 2; - down(&exp->exp_obd->u.filter.fo_alloc_lock); - for (i = 0, cr = created, b = iobuf->blocks; i < iobuf->nr_pages; i++){ - page = iobuf->maplist[i]; - - rc = fsfilt_map_inode_page(obd, inode, page, b, cr, create); - if (rc) { - CERROR("ino %lu, blk %lu cr %u create %d: rc %d\n", - inode->i_ino, *b, *cr, create, rc); - up(&exp->exp_obd->u.filter.fo_alloc_lock); - GOTO(cleanup, rc); - } - - b += blocks_per_page; - cr += blocks_per_page; + if (rw == OBD_BRW_WRITE) { + create = 1; + sem = &obd->u.filter.fo_alloc_lock; } - up(&exp->exp_obd->u.filter.fo_alloc_lock); - - filter_tally_write(&obd->u.filter, iobuf->maplist, iobuf->nr_pages, - iobuf->blocks, blocks_per_page); - - if (attr->ia_size > inode->i_size) - attr->ia_valid |= ATTR_SIZE; - rc = fsfilt_setattr(obd, dchild, oti->oti_handle, attr, 0); + rc = fsfilt_map_inode_pages(obd, inode, iobuf->maplist, + iobuf->nr_pages, iobuf->blocks, + obdfilter_created_scratchpad, create, sem); if (rc) GOTO(cleanup, rc); - up(&inode->i_sem); - cleanup_phase = 3; - - rc = filter_finish_transno(exp, oti, 0); + rc = filter_cleanup_mappings(rw, iobuf, inode); if (rc) GOTO(cleanup, rc); - rc = fsfilt_commit_async(obd, inode, oti->oti_handle, wait_handle); - oti->oti_handle = NULL; - committed = 1; - if (rc) - GOTO(cleanup, rc); + if (rw == OBD_BRW_WRITE) { + filter_tally_write(&obd->u.filter, iobuf->maplist, + iobuf->nr_pages, iobuf->blocks, + blocks_per_page); + + if (attr->ia_size > inode->i_size) + attr->ia_valid |= ATTR_SIZE; + rc = fsfilt_setattr(obd, dchild, oti->oti_handle, attr, 0); + if (rc) + GOTO(cleanup, rc); + + up(&inode->i_sem); + cleanup_phase = 3; + + rc = filter_finish_transno(exp, oti, 0); + if (rc) + GOTO(cleanup, rc); + + rc = fsfilt_commit_async(obd,inode,oti->oti_handle,wait_handle); + committed = 1; + if (rc) + GOTO(cleanup, rc); + } + + /* these are our hacks to keep our directio/bh IO coherent with ext3's + * page cache use. Most notably ext3 reads file data into the page + * cache when it is zeroing the tail of partial-block truncates and + * leaves it there, sometimes generating io from it at later truncates. + * Someday very soon we'll be performing our brw_kiovec() IO to and + * from the page cache. */ check_pending_bhs(iobuf->blocks, iobuf->nr_pages, inode->i_dev, 1 << inode->i_blkbits); @@ -152,35 +212,26 @@ static int filter_direct_io(int rw, struct dentry *dchild, struct kiobuf *iobuf, rc = fsync_inode_data_buffers(inode); if (rc == 0) rc = filemap_fdatawait(inode->i_mapping); - if (rc < 0) { - /* We can race with truncate_complete_page() in the call to - * filter_clear_page_cache(). This is OK, because it also - * waits on IO completion already, but the truncate confuses - * the buffer_uptodate() in fsync_inode_data_buffers(). - * The only dirty pages in the page cache on an inode should - * be from partial page truncates. - * If there is a real IO error here we'll hit it below. */ - CDEBUG(D_WARNING, "error flushing page cache: rc %d\n", rc); - //GOTO(cleanup, rc); - } + if (rc < 0) + GOTO(cleanup, rc); + + /* be careful to call this after fsync_inode_data_buffers has waited + * for IO to complete before we evict it from the cache */ + filter_clear_page_cache(inode, iobuf); + + rc = fsfilt_send_bio(rw, obd, inode, iobuf); - rc = brw_kiovec(WRITE, 1, &iobuf, inode->i_dev, iobuf->blocks, - 1 << inode->i_blkbits); CDEBUG(D_INFO, "tried to write %d pages, rc = %d\n", iobuf->nr_pages, rc); - if (rc != (1 << inode->i_blkbits) * iobuf->nr_pages * blocks_per_page) - CERROR("short write? expected %d, wrote %d\n", - (1 << inode->i_blkbits) * iobuf->nr_pages * - blocks_per_page, rc); + if (rc > 0) rc = 0; EXIT; cleanup: - if (!committed) { + if (!committed && (rw == OBD_BRW_WRITE)) { int err = fsfilt_commit_async(obd, inode, oti->oti_handle, wait_handle); - oti->oti_handle = NULL; if (err) CERROR("can't close transaction: %d\n", err); /* @@ -194,12 +245,9 @@ cleanup: case 2: unlock_kiovec(1, &iobuf); case 1: - OBD_FREE(created, sizeof(*created) * - iobuf->nr_pages*blocks_per_page); case 0: - if (cleanup_phase == 3) - break; - up(&inode->i_sem); + if (cleanup_phase != 3 && rw == OBD_BRW_WRITE) + up(&inode->i_sem); break; default: CERROR("corrupt cleanup_phase (%d)?\n", cleanup_phase); @@ -210,7 +258,7 @@ cleanup: } /* See if there are unallocated parts in given file region */ -static int filter_range_is_mapped(struct inode *inode, obd_size offset, int len) +int filter_range_is_mapped(struct inode *inode, obd_size offset, int len) { int (*fs_bmap)(struct address_space *, long) = inode->i_mapping->a_ops->bmap; @@ -223,7 +271,7 @@ static int filter_range_is_mapped(struct inode *inode, obd_size offset, int len) offset >>= inode->i_blkbits; len >>= inode->i_blkbits; - for (j = 0; j <= len; j++) + for (j = 0; j < len; j++) if (fs_bmap(inode->i_mapping, offset + j) == 0) return 0; @@ -245,6 +293,51 @@ static void clear_kiobuf(struct kiobuf *iobuf) iobuf->length = 0; } +int filter_alloc_iobuf(int rw, int num_pages, void **ret) +{ + int rc; + struct kiobuf *iobuf; + ENTRY; + + LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw); + + rc = alloc_kiovec(1, &iobuf); + if (rc) + RETURN(rc); + + rc = expand_kiobuf(iobuf, num_pages); + if (rc) { + free_kiovec(1, &iobuf); + RETURN(rc); + } + +#ifdef HAVE_KIOBUF_DOVARY + iobuf->dovary = 0; /* this prevents corruption, not present in 2.4.20 */ +#endif + clear_kiobuf(iobuf); + *ret = iobuf; + RETURN(0); +} + +void filter_free_iobuf(void *buf) +{ + struct kiobuf *iobuf = buf; + + clear_kiobuf(iobuf); + free_kiovec(1, &iobuf); +} + +int filter_iobuf_add_page(struct obd_device *obd, void *buf, + struct inode *inode, struct page *page) +{ + struct kiobuf *iobuf = buf; + + iobuf->maplist[iobuf->nr_pages++] = page; + iobuf->length += PAGE_SIZE; + + return 0; +} + int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, struct obd_ioobj *obj, int niocount, struct niobuf_local *res, struct obd_trans_info *oti, @@ -255,13 +348,12 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, struct niobuf_local *lnb; struct fsfilt_objinfo fso; struct iattr iattr = { 0 }; - struct kiobuf *iobuf; + void *iobuf = NULL; struct inode *inode = NULL; int i, n, cleanup_phase = 0, err; unsigned long now = jiffies; /* DEBUGGING OST TIMEOUTS */ void *wait_handle; ENTRY; - LASSERT(oti != NULL); LASSERT(objcount == 1); LASSERT(current->journal_info == NULL); @@ -269,21 +361,11 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (rc != 0) GOTO(cleanup, rc); - rc = alloc_kiovec(1, &iobuf); + rc = filter_alloc_iobuf(OBD_BRW_WRITE, obj->ioo_bufcnt, &iobuf); if (rc) GOTO(cleanup, rc); cleanup_phase = 1; -#ifdef HAVE_KIOBUF_DOVARY - iobuf->dovary = 0; /* this prevents corruption, not present in 2.4.20 */ -#endif - rc = expand_kiobuf(iobuf, obj->ioo_bufcnt); - if (rc) - GOTO(cleanup, rc); - - clear_kiobuf(iobuf); - - cleanup_phase = 1; fso.fso_dentry = res->dentry; fso.fso_bufcnt = obj->ioo_bufcnt; inode = res->dentry->d_inode; @@ -299,9 +381,7 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, int objcount, if (lnb->rc) /* ENOSPC, network RPC error */ continue; - iobuf->maplist[n++] = lnb->page; - iobuf->length += PAGE_SIZE; - iobuf->nr_pages++; + filter_iobuf_add_page(obd, iobuf, inode, lnb->page); /* We expect these pages to be in offset order, but we'll * be forgiving */ @@ -351,16 +431,9 @@ cleanup: pop_ctxt(&saved, &obd->obd_ctxt, NULL); LASSERT(current->journal_info == NULL); case 1: - clear_kiobuf(iobuf); - free_kiovec(1, &iobuf); + filter_free_iobuf(iobuf); case 0: - for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { - /* flip_.. gets a ref, while free_page only frees - * when it decrefs to 0 */ - if (rc == 0) - flip_into_page_cache(inode, lnb->page); - __free_page(lnb->page); - } + filter_free_dio_pages(objcount, obj, niocount, res); f_dput(res->dentry); } diff --git a/lustre/obdfilter/filter_io_26.c b/lustre/obdfilter/filter_io_26.c index a1fa90f..bb8b1c1 100644 --- a/lustre/obdfilter/filter_io_26.c +++ b/lustre/obdfilter/filter_io_26.c @@ -28,6 +28,7 @@ #include #include // XXX kill me soon #include +#include #define DEBUG_SUBSYSTEM S_FILTER @@ -40,12 +41,15 @@ /* 512byte block min */ #define MAX_BLOCKS_PER_PAGE (PAGE_SIZE / 512) struct dio_request { - atomic_t numreqs; /* number of reqs being processed */ - struct bio *bio_list; /* list of completed bios */ - wait_queue_head_t wait; - int created[MAX_BLOCKS_PER_PAGE]; - unsigned long blocks[MAX_BLOCKS_PER_PAGE]; - spinlock_t lock; + atomic_t dr_numreqs; /* number of reqs being processed */ + struct bio *dr_bios; /* list of completed bios */ + wait_queue_head_t dr_wait; + int dr_max_pages; + int dr_npages; + int dr_error; + struct page **dr_pages; + unsigned long *dr_blocks; + spinlock_t dr_lock; }; static int dio_complete_routine(struct bio *bio, unsigned int done, int error) @@ -53,19 +57,22 @@ static int dio_complete_routine(struct bio *bio, unsigned int done, int error) struct dio_request *dreq = bio->bi_private; unsigned long flags; - spin_lock_irqsave(&dreq->lock, flags); - bio->bi_private = dreq->bio_list; - dreq->bio_list = bio; - spin_unlock_irqrestore(&dreq->lock, flags); - if (atomic_dec_and_test(&dreq->numreqs)) - wake_up(&dreq->wait); + spin_lock_irqsave(&dreq->dr_lock, flags); + bio->bi_private = dreq->dr_bios; + dreq->dr_bios = bio; + if (dreq->dr_error == 0) + dreq->dr_error = error; + spin_unlock_irqrestore(&dreq->dr_lock, flags); + + if (atomic_dec_and_test(&dreq->dr_numreqs)) + wake_up(&dreq->dr_wait); return 0; } static int can_be_merged(struct bio *bio, sector_t sector) { - int size; + unsigned int size; if (!bio) return 0; @@ -74,6 +81,299 @@ static int can_be_merged(struct bio *bio, sector_t sector) return bio->bi_sector + size == sector ? 1 : 0; } +int filter_alloc_iobuf(int rw, int num_pages, void **ret) +{ + struct dio_request *dreq; + + LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw); + + OBD_ALLOC(dreq, sizeof(*dreq)); + if (dreq == NULL) + goto failed_0; + + OBD_ALLOC(dreq->dr_pages, num_pages * sizeof(*dreq->dr_pages)); + if (dreq->dr_pages == NULL) + goto failed_1; + + OBD_ALLOC(dreq->dr_blocks, + MAX_BLOCKS_PER_PAGE * num_pages * sizeof(*dreq->dr_blocks)); + if (dreq->dr_blocks == NULL) + goto failed_2; + + dreq->dr_bios = NULL; + init_waitqueue_head(&dreq->dr_wait); + atomic_set(&dreq->dr_numreqs, 0); + spin_lock_init(&dreq->dr_lock); + dreq->dr_max_pages = num_pages; + dreq->dr_npages = 0; + + *ret = dreq; + RETURN(0); + + failed_2: + OBD_FREE(dreq->dr_pages, + num_pages * sizeof(*dreq->dr_pages)); + failed_1: + OBD_FREE(dreq, sizeof(*dreq)); + failed_0: + RETURN(-ENOMEM); +} + +void filter_free_iobuf(void *iobuf) +{ + struct dio_request *dreq = iobuf; + int num_pages = dreq->dr_max_pages; + + /* free all bios */ + while (dreq->dr_bios) { + struct bio *bio = dreq->dr_bios; + dreq->dr_bios = bio->bi_private; + bio_put(bio); + } + + OBD_FREE(dreq->dr_blocks, + MAX_BLOCKS_PER_PAGE * num_pages * sizeof(*dreq->dr_blocks)); + OBD_FREE(dreq->dr_pages, + num_pages * sizeof(*dreq->dr_pages)); + OBD_FREE(dreq, sizeof(*dreq)); +} + +int filter_iobuf_add_page(struct obd_device *obd, void *iobuf, + struct inode *inode, struct page *page) +{ + struct dio_request *dreq = iobuf; + + LASSERT (dreq->dr_npages < dreq->dr_max_pages); + dreq->dr_pages[dreq->dr_npages++] = page; + + return 0; +} + +int filter_do_bio(struct obd_device *obd, struct inode *inode, + struct dio_request *dreq, int rw) +{ + int blocks_per_page = PAGE_SIZE >> inode->i_blkbits; + struct page **pages = dreq->dr_pages; + int npages = dreq->dr_npages; + unsigned long *blocks = dreq->dr_blocks; + int total_blocks = npages * blocks_per_page; + int sector_bits = inode->i_sb->s_blocksize_bits - 9; + unsigned int blocksize = inode->i_sb->s_blocksize; + struct bio *bio = NULL; + struct page *page; + unsigned int page_offset; + sector_t sector; + int nblocks; + int block_idx; + int page_idx; + int i; + int rc = 0; + ENTRY; + + LASSERT(dreq->dr_npages == npages); + LASSERT(total_blocks <= OBDFILTER_CREATED_SCRATCHPAD_ENTRIES); + + for (page_idx = 0, block_idx = 0; + page_idx < npages; + page_idx++, block_idx += blocks_per_page) { + + page = pages[page_idx]; + LASSERT (block_idx + blocks_per_page <= total_blocks); + + for (i = 0, page_offset = 0; + i < blocks_per_page; + i += nblocks, page_offset += blocksize * nblocks) { + + nblocks = 1; + + if (blocks[block_idx + i] == 0) { /* hole */ + LASSERT(rw == OBD_BRW_READ); + memset(kmap(page) + page_offset, 0, blocksize); + kunmap(page); + continue; + } + + sector = blocks[block_idx + i] << sector_bits; + + /* Additional contiguous file blocks? */ + while (i + nblocks < blocks_per_page && + (sector + nblocks*(blocksize>>9)) == + (blocks[block_idx + i + nblocks] << sector_bits)) + nblocks++; + + if (bio != NULL && + can_be_merged(bio, sector) && + bio_add_page(bio, page, + blocksize * nblocks, page_offset) != 0) + continue; /* added this frag OK */ + + if (bio != NULL) { + request_queue_t *q = bdev_get_queue(bio->bi_bdev); + + /* Dang! I have to fragment this I/O */ + CDEBUG(D_INODE, "bio++ sz %d vcnt %d(%d) " + "sectors %d(%d) psg %d(%d) hsg %d(%d)\n", + bio->bi_size, + bio->bi_vcnt, bio->bi_max_vecs, + bio->bi_size >> 9, q->max_sectors, + bio_phys_segments(q, bio), + q->max_phys_segments, + bio_hw_segments(q, bio), + q->max_hw_segments); + + atomic_inc(&dreq->dr_numreqs); + rc = fsfilt_send_bio(rw, obd, inode, bio); + if (rc < 0) { + CERROR("Can't send bio: %d\n", rc); + /* OK do dec; we do the waiting */ + atomic_dec(&dreq->dr_numreqs); + goto out; + } + rc = 0; + + bio = NULL; + } + + /* allocate new bio */ + bio = bio_alloc(GFP_NOIO, + (npages - page_idx) * blocks_per_page); + if (bio == NULL) { + CERROR ("Can't allocate bio\n"); + rc = -ENOMEM; + goto out; + } + + bio->bi_bdev = inode->i_sb->s_bdev; + bio->bi_sector = sector; + bio->bi_end_io = dio_complete_routine; + bio->bi_private = dreq; + + rc = bio_add_page(bio, page, + blocksize * nblocks, page_offset); + LASSERT (rc != 0); + } + } + + if (bio != NULL) { + atomic_inc(&dreq->dr_numreqs); + rc = fsfilt_send_bio(rw, obd, inode, bio); + if (rc >= 0) { + rc = 0; + } else { + CERROR("Can't send bio: %d\n", rc); + /* OK do dec; we do the waiting */ + atomic_dec(&dreq->dr_numreqs); + } + } + + out: + wait_event(dreq->dr_wait, atomic_read(&dreq->dr_numreqs) == 0); + + if (rc == 0) + rc = dreq->dr_error; + RETURN(rc); +} + +static void filter_clear_page_cache(struct inode *inode, struct bio *iobuf) +{ +#if 0 + struct page *page; + int i; + + for (i = 0; i < iobuf->nr_pages ; i++) { + page = find_lock_page(inode->i_mapping, + iobuf->maplist[i]->index); + if (page == NULL) + continue; + if (page->mapping != NULL) { + block_invalidatepage(page, 0); + truncate_complete_page(page); + } + unlock_page(page); + page_cache_release(page); + } +#endif +} + +/* Must be called with i_sem taken for writes; this will drop it */ +int filter_direct_io(int rw, struct dentry *dchild, void *iobuf, + struct obd_export *exp, struct iattr *attr, + struct obd_trans_info *oti, void **wait_handle) +{ + struct obd_device *obd = exp->exp_obd; + struct dio_request *dreq = iobuf; + struct inode *inode = dchild->d_inode; + int rc; + int rc2; + ENTRY; + + LASSERTF(rw == OBD_BRW_WRITE || rw == OBD_BRW_READ, "%x\n", rw); + LASSERTF(dreq->dr_npages <= dreq->dr_max_pages, "%d,%d\n", + dreq->dr_npages, dreq->dr_max_pages); + + /* XXX FIXME these assertions should be handled properly here or + * checked elsewhere */ + LASSERT(dreq->dr_npages <= OBDFILTER_CREATED_SCRATCHPAD_ENTRIES); + if (dreq->dr_npages == 0) + GOTO(out, rc=0); + + rc = fsfilt_map_inode_pages(obd, inode, + dreq->dr_pages, dreq->dr_npages, + dreq->dr_blocks, + obdfilter_created_scratchpad, + rw == OBD_BRW_WRITE, NULL); + + if (rw == OBD_BRW_WRITE) { + if (rc == 0) { +#if 0 + filter_tally_write(&obd->u.filter, + dreq->dr_pages, + dreq->dr_page_idx, + dreq->dr_blocks, + blocks_per_page); +#endif + if (attr->ia_size > inode->i_size) + attr->ia_valid |= ATTR_SIZE; + rc = fsfilt_setattr(obd, dchild, + oti->oti_handle, attr, 0); + } + + up(&inode->i_sem); + + rc2 = filter_finish_transno(exp, oti, 0); + if (rc2 != 0) + CERROR("can't close transaction: %d\n", rc); + + if (rc == 0) + rc = rc2; + if (rc != 0) + RETURN(rc); + + } + + /* This is nearly osync_inode, without the waiting + rc = generic_osync_inode(inode, inode->i_mapping, + OSYNC_DATA|OSYNC_METADATA); */ + rc = filemap_fdatawrite(inode->i_mapping); + rc2 = sync_mapping_buffers(inode->i_mapping); + if (rc == 0) + rc = rc2; + rc2 = filemap_fdatawait(inode->i_mapping); + if (rc == 0) + rc = rc2; + + if (rc != 0) + RETURN(rc); + + /* be careful to call this after fsync_inode_data_buffers has waited + * for IO to complete before we evict it from the cache */ + filter_clear_page_cache(inode, iobuf); + + RETURN(filter_do_bio(obd, inode, dreq, rw)); +out: + RETURN(rc); +} + /* See if there are unallocated parts in given file region */ static int filter_range_is_mapped(struct inode *inode, obd_size offset, int len) { @@ -100,146 +400,90 @@ int filter_commitrw_write(struct obd_export *exp, struct obdo *oa, struct niobuf_local *res, struct obd_trans_info *oti, int rc) { - struct bio *bio = NULL; - int blocks_per_page, err; struct niobuf_local *lnb; + struct dio_request *dreq = NULL; struct obd_run_ctxt saved; struct fsfilt_objinfo fso; struct iattr iattr = { 0 }; struct inode *inode = NULL; unsigned long now = jiffies; - int i, k, cleanup_phase = 0; - - struct dio_request *dreq = NULL; + int i, err, cleanup_phase = 0; struct obd_device *obd = exp->exp_obd; - + int total_size = 0; ENTRY; + LASSERT(oti != NULL); LASSERT(objcount == 1); LASSERT(current->journal_info == NULL); if (rc != 0) GOTO(cleanup, rc); - - inode = res->dentry->d_inode; - blocks_per_page = PAGE_SIZE >> inode->i_blkbits; - LASSERT(blocks_per_page <= MAX_BLOCKS_PER_PAGE); - - OBD_ALLOC(dreq, sizeof(*dreq)); - - if (dreq == NULL) - RETURN(-ENOMEM); - - dreq->bio_list = NULL; - init_waitqueue_head(&dreq->wait); - atomic_set(&dreq->numreqs, 0); - spin_lock_init(&dreq->lock); - - cleanup_phase = 1; - fso.fso_dentry = res->dentry; - fso.fso_bufcnt = obj->ioo_bufcnt; - - push_ctxt(&saved, &obd->obd_ctxt, NULL); - cleanup_phase = 2; - - generic_osync_inode(inode, inode->i_mapping, OSYNC_DATA|OSYNC_METADATA); - - oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, - niocount, res, oti); - if (IS_ERR(oti->oti_handle)) { - rc = PTR_ERR(oti->oti_handle); - CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, - "error starting transaction: rc = %d\n", rc); - oti->oti_handle = NULL; + rc = filter_alloc_iobuf(OBD_BRW_WRITE, obj->ioo_bufcnt, (void **)&dreq); + if (rc) GOTO(cleanup, rc); - } + cleanup_phase = 1; - fsfilt_check_slow(now, obd_timeout, "brw_start"); + fso.fso_dentry = res->dentry; + fso.fso_bufcnt = obj->ioo_bufcnt; + inode = res->dentry->d_inode; - iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME); for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { loff_t this_size; - sector_t sector; - int offs; /* If overwriting an existing block, we don't need a grant */ if (!(lnb->flags & OBD_BRW_GRANTED) && lnb->rc == -ENOSPC && filter_range_is_mapped(inode, lnb->offset, lnb->len)) lnb->rc = 0; - if (lnb->rc) /* ENOSPC, network RPC error, etc. */ + if (lnb->rc) { /* ENOSPC, network RPC error, etc. */ + CDEBUG(D_INODE, "Skipping [%d] == %d\n", i, lnb->rc); continue; - - /* get block number for next page */ - rc = fsfilt_map_inode_page(obd, inode, lnb->page, dreq->blocks, - dreq->created, 1); - if (rc != 0) - GOTO(cleanup, rc); - - for (k = 0; k < blocks_per_page; k++) { - sector = dreq->blocks[k] *(inode->i_sb->s_blocksize>>9); - offs = k * inode->i_sb->s_blocksize; - - if (!bio || !can_be_merged(bio, sector) || - !bio_add_page(bio, lnb->page, PAGE_SIZE, offs)) { - if (bio) { - atomic_inc(&dreq->numreqs); - submit_bio(WRITE, bio); - bio = NULL; - } - /* allocate new bio */ - bio = bio_alloc(GFP_NOIO, obj->ioo_bufcnt); - bio->bi_bdev = inode->i_sb->s_bdev; - bio->bi_sector = sector; - bio->bi_end_io = dio_complete_routine; - bio->bi_private = dreq; - - if (!bio_add_page(bio, lnb->page, PAGE_SIZE, - offs)) - LBUG(); - } } - /* We expect these pages to be in offset order, but we'll + err = filter_iobuf_add_page(obd, dreq, inode, lnb->page); + LASSERT (err == 0); + + total_size += lnb->len; + + /* we expect these pages to be in offset order, but we'll * be forgiving */ this_size = lnb->offset + lnb->len; if (this_size > iattr.ia_size) iattr.ia_size = this_size; } - - if (bio) { - atomic_inc(&dreq->numreqs); - submit_bio(WRITE, bio); - } - - /* time to wait for I/O completion */ - wait_event(dreq->wait, atomic_read(&dreq->numreqs) == 0); - - /* free all bios */ - while (dreq->bio_list) { - bio = dreq->bio_list; - dreq->bio_list = bio->bi_private; - bio_put(bio); - } +#if 0 + /* I use this when I'm checking our lovely 1M I/Os reach the disk -eeb */ + if (total_size != (1<<20)) + CWARN("total size %d (%d pages)\n", + total_size, total_size/PAGE_SIZE); +#endif + push_ctxt(&saved, &obd->obd_ctxt, NULL); + cleanup_phase = 2; down(&inode->i_sem); - if (iattr.ia_size > inode->i_size) { - CDEBUG(D_INFO, "setting i_size to "LPU64"\n", - iattr.ia_size); - - iattr.ia_valid |= ATTR_SIZE; + oti->oti_handle = fsfilt_brw_start(obd, objcount, &fso, niocount, res, + oti); + if (IS_ERR(oti->oti_handle)) { + up(&inode->i_sem); + rc = PTR_ERR(oti->oti_handle); + CDEBUG(rc == -ENOSPC ? D_INODE : D_ERROR, + "error starting transaction: rc = %d\n", rc); + oti->oti_handle = NULL; + GOTO(cleanup, rc); } + /* have to call fsfilt_commit() from this point on */ - fsfilt_setattr(obd, res->dentry, oti->oti_handle, &iattr, 0); - up(&inode->i_sem); - - fsfilt_check_slow(now, obd_timeout, "direct_io"); + fsfilt_check_slow(now, obd_timeout, "brw_start"); + iattr_from_obdo(&iattr,oa,OBD_MD_FLATIME|OBD_MD_FLMTIME|OBD_MD_FLCTIME); + /* filter_direct_io drops i_sem */ + rc = filter_direct_io(OBD_BRW_WRITE, res->dentry, dreq, exp, &iattr, + oti, NULL); if (rc == 0) obdo_from_inode(oa, inode, FILTER_VALID_FLAGS); - rc = filter_finish_transno(exp, oti, rc); + fsfilt_check_slow(now, obd_timeout, "direct_io"); err = fsfilt_commit(obd, inode, oti->oti_handle, obd_sync_filter); if (err) @@ -258,15 +502,9 @@ cleanup: pop_ctxt(&saved, &obd->obd_ctxt, NULL); LASSERT(current->journal_info == NULL); case 1: - OBD_FREE(dreq, sizeof(*dreq)); + filter_free_iobuf(dreq); case 0: - for (i = 0, lnb = res; i < obj->ioo_bufcnt; i++, lnb++) { - /* flip_.. gets a ref, while free_page only frees - * when it decrefs to 0 */ - if (rc == 0) - flip_into_page_cache(inode, lnb->page); - __free_page(lnb->page); - } + filter_free_dio_pages(objcount, obj, niocount, res); f_dput(res->dentry); } diff --git a/lustre/portals/build.m4 b/lustre/portals/build.m4 index f158396..e791fbf 100644 --- a/lustre/portals/build.m4 +++ b/lustre/portals/build.m4 @@ -61,13 +61,6 @@ case "$CC_VERSION" in "gcc version 2.96 20000731 (Mandrake Linux 8.1 2.96-0.62mdk)") bad_cc ;; - # unpatched 'gcc' on rh9. miscompiles a - # struct = (type) { .member = value, }; - # asignment in the iibnal where the struct is a mix - # of u64 and u32 bit-fields. - "gcc version 3.2.2 20030222 (Red Hat Linux 3.2.2-5)") - bad_cc - ;; *) AC_MSG_RESULT([no known problems]) ;; diff --git a/lustre/ptlrpc/service.c b/lustre/ptlrpc/service.c index 6f0c7ea..c760cf9 100644 --- a/lustre/ptlrpc/service.c +++ b/lustre/ptlrpc/service.c @@ -701,6 +701,9 @@ static int ptlrpc_main(void *arg) struct ptlrpc_thread *thread = data->thread; struct lc_watchdog *watchdog; unsigned long flags; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + struct group_info *ginfo = NULL; +#endif ENTRY; lock_kernel(); @@ -717,6 +720,17 @@ static int ptlrpc_main(void *arg) THREAD_NAME(current->comm, sizeof(current->comm) - 1, "%s", data->name); unlock_kernel(); +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,4) + ginfo = groups_alloc(0); + if (!ginfo) { + thread->t_flags = SVC_RUNNING; + wake_up(&thread->t_ctl_waitq); + return (-ENOMEM); + } + set_current_groups(ginfo); + put_group_info(ginfo); +#endif + /* Record that the thread is running */ thread->t_flags = SVC_RUNNING; wake_up(&thread->t_ctl_waitq); diff --git a/lustre/scripts/lustre-kernel-2.4.spec.in b/lustre/scripts/lustre-kernel-2.4.spec.in index 2c5a921..f7f6a70 100644 --- a/lustre/scripts/lustre-kernel-2.4.spec.in +++ b/lustre/scripts/lustre-kernel-2.4.spec.in @@ -808,7 +808,7 @@ fi #/usr/share/doc/lustre/lustre.pdf #/usr/share/doc/lustre/COPYING -/usr/lib/lustre/examples +/usr/share/lustre/examples %files -n lustre-ldap %defattr(-, root, root) diff --git a/lustre/scripts/lustre.spec.in b/lustre/scripts/lustre.spec.in index bbde7a5..6215e7a 100644 --- a/lustre/scripts/lustre.spec.in +++ b/lustre/scripts/lustre.spec.in @@ -145,7 +145,7 @@ mkdir -p $RPM_BUILD_ROOT/var/lib/ldap/lustre %attr(-, root, root) /usr/bin/mcreate %attr(-, root, root) /usr/bin/munlink %attr(-, root, root) /usr/lib/lustre/python -%attr(-, root, root) /usr/lib/lustre/examples +%attr(-, root, root) /usr/share/lustre/examples %attr(-, root, root) /etc/init.d/lustre %attr(-, root, root) /etc/init.d/lustrefs diff --git a/lustre/utils/lconf b/lustre/utils/lconf index aa7b1aa..b1cef54 100755 --- a/lustre/utils/lconf +++ b/lustre/utils/lconf @@ -930,6 +930,15 @@ def if2addr(iface): ip = string.split(addr, ':')[1] return ip +def def_mount_options(fstype, target): + """returns deafult mount options for passed fstype and target (mds, ost)""" + if fstype == 'ext3' or fstype == 'ldiskfs': + mountfsoptions = "errors=remount-ro" + if target == 'ost' and sys_get_branch() == '2.4': + mountfsoptions = "%s,asyncdel" % (mountfsoptions) + return mountfsoptions + return "" + def sys_get_elan_position_file(): procfiles = ["/proc/elan/device0/position", "/proc/qsnet/elan4/device0/position", @@ -986,6 +995,21 @@ def sys_get_local_address(net_type, wildcard, cluster_id): return local +def sys_get_branch(): + """Returns kernel release""" + try: + fp = open('/proc/sys/kernel/osrelease') + lines = fp.readlines() + fp.close() + + for l in lines: + version = string.split(l) + a = string.split(version[0], '.') + return a[0] + '.' + a[1] + except IOError, e: + log(e) + return "" + def mod_loaded(modname): """Check if a module is already loaded. Look in /proc/modules for it.""" try: @@ -1431,6 +1455,7 @@ class MDSDEV(Module): self.fstype = self.db.get_val('fstype', '') self.nspath = self.db.get_val('nspath', '') self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mountfsoptions = self.db.get_val('mountfsoptions', '') # overwrite the orignal MDSDEV name and uuid with the MDS name and uuid target_uuid = self.db.get_first_ref('target') mds = self.db.lookup(target_uuid) @@ -1515,8 +1540,26 @@ class MDSDEV(Module): if not is_prepared('MDT'): lctl.newdev("mdt", 'MDT', 'MDT_UUID', setup ="") try: + mountfsoptions = def_mount_options(self.fstype, 'mds') + + if config.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + config.mountfsoptions + else: + mountfsoptions = config.mountfsoptions + if self.mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + if self.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + mountfsoptions = self.mountfsoptions + + print 'MDS mount options: ' + mountfsoptions + lctl.newdev("mds", self.name, self.uuid, - setup ="%s %s %s" %(blkdev, self.fstype, self.name)) + setup ="%s %s %s %s" %(blkdev, self.fstype, self.name, mountfsoptions)) except CommandError, e: if e.rc == 2: panic("MDS is missing the config log. Need to run " + @@ -1649,6 +1692,7 @@ class OSD(Module): self.journal_size = self.db.get_val_int('journalsize', 0) self.inode_size = self.db.get_val_int('inodesize', 0) self.mkfsoptions = self.db.get_val('mkfsoptions', '') + self.mountfsoptions = self.db.get_val('mountfsoptions', '') self.fstype = self.db.get_val('fstype', '') self.nspath = self.db.get_val('nspath', '') target_uuid = self.db.get_first_ref('target') @@ -1703,9 +1747,28 @@ class OSD(Module): blkdev = block_dev(self.devpath, self.size, self.fstype, config.reformat, self.format, self.journal_size, self.inode_size, self.mkfsoptions) + + mountfsoptions = def_mount_options(self.fstype, 'ost') + + if config.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + config.mountfsoptions + else: + mountfsoptions = config.mountfsoptions + if self.mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + if self.mountfsoptions: + if mountfsoptions: + mountfsoptions = mountfsoptions + ',' + self.mountfsoptions + else: + mountfsoptions = self.mountfsoptions + + print 'OST mount options: ' + mountfsoptions + lctl.newdev(self.osdtype, self.name, self.uuid, - setup ="%s %s %s" %(blkdev, self.fstype, - self.failover_ost)) + setup ="%s %s %s %s" %(blkdev, self.fstype, + self.failover_ost, mountfsoptions)) if not is_prepared('OSS'): lctl.newdev("ost", 'OSS', 'OSS_UUID', setup ="") @@ -1970,6 +2033,7 @@ class Mountpoint(Module): def __init__(self,db): Module.__init__(self, 'MTPT', db) self.path = my_rstrip(self.db.get_val('path'), '/') + self.clientoptions = self.db.get_val('clientoptions', '') self.fs_uuid = self.db.get_first_ref('filesystem') fs = self.db.lookup(self.fs_uuid) self.mds_uuid = fs.get_first_ref('mds') @@ -2003,8 +2067,20 @@ class Mountpoint(Module): if config.record or config.lctl_dump: lctl.mount_option(local_node_name, self.vosc.get_name(), mdc_name) return - cmd = "mount -t lustre_lite -o osc=%s,mdc=%s %s %s" % \ - (self.vosc.get_name(), mdc_name, config.config, self.path) + + if config.clientoptions: + if self.clientoptions: + self.clientoptions = self.clientoptions + ',' + config.clientoptions + else: + self.clientoptions = config.clientoptions + if self.clientoptions: + self.clientoptions = ',' + self.clientoptions + # Linux kernel will deal with async and not pass it to ll_fill_super, + # so replace it with Lustre async + self.clientoptions = string.replace(self.clientoptions, "async", "lasync") + + cmd = "mount -t lustre_lite -o osc=%s,mdc=%s%s %s %s" % \ + (self.vosc.get_name(), mdc_name, self.clientoptions, config.config, self.path) run("mkdir", self.path) ret, val = run(cmd) if ret: @@ -2633,6 +2709,8 @@ lconf_options = [ ('nosetup', "Skip device setup/cleanup step."), ('reformat', "Reformat all devices (without question)"), ('mkfsoptions', "Additional options for the mk*fs command line", PARAM), + ('mountfsoptions', "Additional options for mount fs command line", PARAM), + ('clientoptions', "Additional options for Lustre", PARAM), ('dump', "Dump the kernel debug log to file before portals is unloaded", PARAM), ('write_conf', "Save all the client config information on mds."), diff --git a/lustre/utils/lmc b/lustre/utils/lmc index 33d6839..0d34ace2 100755 --- a/lustre/utils/lmc +++ b/lustre/utils/lmc @@ -91,6 +91,8 @@ Object creation command summary: --journal_size size --inode_size size --mdsuuid uuid + --mkfsoptions options + --mountfsoptions options --add lov --lov lov_name @@ -110,12 +112,15 @@ Object creation command summary: --inode_size size --osdtype obdecho|obdfilter --ostuuid uuid + --mkfsoptions options + --mountfsoptions options --add mtpt - Mountpoint --node node_name --path /mnt/point --mds mds_name --ost ost_name OR --lov lov_name + --clientoptions options --add route --node nodename @@ -185,6 +190,7 @@ lmc_options = [ ('inode_size', "Specify new inode size for underlying ext3 file system.", PARAM,"0"), ('fstype', "Optional argument to specify the filesystem type.", PARAM, "ext3"), ('mkfsoptions', "Optional argument to mkfs.", PARAM, ""), + ('mountfsoptions', "Optional argument to mount fs.", PARAM, ""), ('ostuuid', "Optional argument to specify OST UUID", PARAM,""), ('mdsuuid', "Optional argument to specify MDS UUID", PARAM,""), ('nspath', "Local mount point of server namespace.", PARAM,""), @@ -194,6 +200,7 @@ lmc_options = [ ('echo_client', "", PARAM), ('path', "Specify the mountpoint for Lustre.", PARAM), ('filesystem', "Lustre filesystem name", PARAM,""), + ('clientoptions', "Specify the options for Lustre, such as async.", PARAM, ""), # lov ('lov', "Specify LOV name.", PARAM,""), @@ -365,7 +372,8 @@ class GenConfig: return ldlm def osd(self, name, uuid, fs, osdtype, devname, format, ost_uuid, - node_uuid, dev_size=0, journal_size=0, inode_size=0, nspath="", mkfsoptions=""): + node_uuid, dev_size=0, journal_size=0, inode_size=0, nspath="", + mkfsoptions="", mountfsoptions=""): osd = self.newService("osd", name, uuid) osd.setAttribute('osdtype', osdtype) osd.appendChild(self.ref("target", ost_uuid)) @@ -383,6 +391,8 @@ class GenConfig: self.addElement(osd, "inodesize", "%s" % (inode_size)) if mkfsoptions: self.addElement(osd, "mkfsoptions", mkfsoptions) + if mountfsoptions: + self.addElement(osd, "mountfsoptions", mountfsoptions) if nspath: self.addElement(osd, "nspath", nspath) return osd @@ -426,7 +436,7 @@ class GenConfig: def mdsdev(self, name, uuid, fs, devname, format, node_uuid, mds_uuid, dev_size=0, journal_size=0, inode_size=256, - nspath="", mkfsoptions=""): + nspath="", mkfsoptions="", mountfsoptions=""): mdd = self.newService("mdsdev", name, uuid) self.addElement(mdd, "fstype", fs) dev = self.addElement(mdd, "devpath", devname) @@ -441,6 +451,9 @@ class GenConfig: self.addElement(mdd, "nspath", nspath) if mkfsoptions: self.addElement(mdd, "mkfsoptions", mkfsoptions) + if mountfsoptions: + self.addElement(mdd, "mountfsoptions", mountfsoptions) + mdd.appendChild(self.ref("node", node_uuid)) mdd.appendChild(self.ref("target", mds_uuid)) return mdd @@ -452,10 +465,12 @@ class GenConfig: mgmt.appendChild(self.ref("active", mgmt_uuid)) return mgmt - def mountpoint(self, name, uuid, fs_uuid, path): + def mountpoint(self, name, uuid, fs_uuid, path, clientoptions): mtpt = self.newService("mountpoint", name, uuid) mtpt.appendChild(self.ref("filesystem", fs_uuid)) self.addElement(mtpt, "path", path) + if clientoptions: + self.addElement(mtpt, "clientoptions", clientoptions) return mtpt def filesystem(self, name, uuid, mds_uuid, obd_uuid, mgmt_uuid): @@ -712,6 +727,7 @@ def add_mds(gen, lustre, options): inode_size = get_option(options, 'inode_size') nspath = get_option(options, 'nspath') mkfsoptions = get_option(options, 'mkfsoptions') + mountfsoptions = get_option(options, 'mountfsoptions') node_uuid = name2uuid(lustre, node_name, 'node') @@ -723,7 +739,8 @@ def add_mds(gen, lustre, options): mdd = gen.mdsdev(mdd_name, mdd_uuid, fstype, devname, get_format_flag(options), node_uuid, mds_uuid, - size, journal_size, inode_size, nspath, mkfsoptions) + size, journal_size, inode_size, nspath, mkfsoptions, + mountfsoptions) lustre.appendChild(mdd) @@ -759,6 +776,7 @@ def add_ost(gen, lustre, options): journal_size = '' inode_size = '' mkfsoptions = '' + mountfsoptions = '' else: devname = get_option(options, 'dev') # can be unset for bluearcs size = get_option(options, 'size') @@ -766,6 +784,7 @@ def add_ost(gen, lustre, options): journal_size = get_option(options, 'journal_size') inode_size = get_option(options, 'inode_size') mkfsoptions = get_option(options, 'mkfsoptions') + mountfsoptions = get_option(options, 'mountfsoptions') nspath = get_option(options, 'nspath') @@ -801,7 +820,8 @@ def add_ost(gen, lustre, options): osd = gen.osd(osdname, osd_uuid, fstype, osdtype, devname, get_format_flag(options), ost_uuid, node_uuid, size, - journal_size, inode_size, nspath, mkfsoptions) + journal_size, inode_size, nspath, mkfsoptions, + mountfsoptions) node = findByName(lustre, node_name, "node") @@ -931,6 +951,7 @@ def add_mtpt(gen, lustre, options): node_name = get_option(options, 'node') path = get_option(options, 'path') + clientoptions = get_option(options, "clientoptions") fs_name = get_option(options, 'filesystem') lov_name = get_option(options, 'lov') @@ -963,7 +984,7 @@ def add_mtpt(gen, lustre, options): error("MOUNTPOINT: ", name, " already exists.") uuid = new_uuid(name) - mtpt = gen.mountpoint(name, uuid, fs_uuid, path) + mtpt = gen.mountpoint(name, uuid, fs_uuid, path, clientoptions) node = findByName(lustre, node_name, "node") if not node: error('node:', node_name, "not found.") -- 1.8.3.1