From 09023e69c6a6289ac5255d1001798c490fc9b075 Mon Sep 17 00:00:00 2001 From: johann Date: Thu, 30 Aug 2007 15:32:57 +0000 Subject: [PATCH] Branch HEAD b=11802 i=adilger i=kalpak Severity : normal Bugzilla : 11802 Description: lustre support for RHEL5 Details : Add support for RHEL5. --- lustre/ChangeLog | 5 ++ lustre/autoconf/lustre-core.m4 | 1 + .../patches/jbd-16tb-overflow-fixes.patch | 43 +++++++++ .../patches/jbd-check-for-unmapped-buffer.patch | 91 +++++++++++++++++++ .../patches/vfs_races-2.6-rhel5.patch | 100 +++++++++++++++++++++ lustre/kernel_patches/series/2.6-rhel5.series | 11 +++ lustre/kernel_patches/series/2.6.18-vanilla.series | 2 + lustre/kernel_patches/targets/2.6-rhel5.target.in | 24 +++++ lustre/kernel_patches/which_patch | 1 + lustre/mds/handler.c | 8 +- 10 files changed, 282 insertions(+), 4 deletions(-) create mode 100644 lustre/kernel_patches/patches/jbd-16tb-overflow-fixes.patch create mode 100644 lustre/kernel_patches/patches/jbd-check-for-unmapped-buffer.patch create mode 100644 lustre/kernel_patches/patches/vfs_races-2.6-rhel5.patch create mode 100644 lustre/kernel_patches/series/2.6-rhel5.series create mode 100644 lustre/kernel_patches/targets/2.6-rhel5.target.in diff --git a/lustre/ChangeLog b/lustre/ChangeLog index c37f400..59e69a5 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -196,6 +196,11 @@ Description: testing performance impact of enabling checksumming Details : enable checksum by default, allow --disable-checksum configure option and "-o nochecksum" mount option +Severity : normal +Bugzilla : 11802 +Description: lustre support for RHEL5 +Details : Add support for RHEL5. + -------------------------------------------------------------------------------- 2007-08-10 Cluster File Systems, Inc. diff --git a/lustre/autoconf/lustre-core.m4 b/lustre/autoconf/lustre-core.m4 index 7cbe3bd..3fc77f2 100644 --- a/lustre/autoconf/lustre-core.m4 +++ b/lustre/autoconf/lustre-core.m4 @@ -1500,6 +1500,7 @@ lustre/include/lustre/Makefile lustre/kernel_patches/targets/2.6-suse.target lustre/kernel_patches/targets/2.6-vanilla.target lustre/kernel_patches/targets/2.6-rhel4.target +lustre/kernel_patches/targets/2.6-rhel5.target lustre/kernel_patches/targets/2.6-fc5.target lustre/kernel_patches/targets/2.6-patchless.target lustre/kernel_patches/targets/2.6-sles10.target diff --git a/lustre/kernel_patches/patches/jbd-16tb-overflow-fixes.patch b/lustre/kernel_patches/patches/jbd-16tb-overflow-fixes.patch new file mode 100644 index 0000000..beab322 --- /dev/null +++ b/lustre/kernel_patches/patches/jbd-16tb-overflow-fixes.patch @@ -0,0 +1,43 @@ +Date: Tue, 26 Sep 2006 11:00:28 -0500 +From: Eric Sandeen +Subject: Re: [PATCH RHEL5] 16T overflows in jbd code + +Signed-off-by: Eric Sandeen +Signed-off-by: Andrew Morton + +Index: linux-2.6.17-1.2654.el5/fs/jbd/journal.c +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/fs/jbd/journal.c ++++ linux-2.6.17-1.2654.el5/fs/jbd/journal.c +@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_ + int journal_write_metadata_buffer(transaction_t *transaction, + struct journal_head *jh_in, + struct journal_head **jh_out, +- int blocknr) ++ unsigned long blocknr) + { + int need_copy_out = 0; + int done_copy_out = 0; +@@ -696,7 +696,7 @@ fail: + * @bdev: Block device on which to create the journal + * @fs_dev: Device which hold journalled filesystem for this journal. + * @start: Block nr Start of journal. +- * @len: Lenght of the journal in blocks. ++ * @len: Length of the journal in blocks. + * @blocksize: blocksize of journalling device + * @returns: a newly created journal_t * + * +Index: linux-2.6.17-1.2654.el5/include/linux/jbd.h +=================================================================== +--- linux-2.6.17-1.2654.el5.orig/include/linux/jbd.h ++++ linux-2.6.17-1.2654.el5/include/linux/jbd.h +@@ -866,7 +866,7 @@ extern int + journal_write_metadata_buffer(transaction_t *transaction, + struct journal_head *jh_in, + struct journal_head **jh_out, +- int blocknr); ++ unsigned long blocknr); + + /* Transaction locking */ + extern void __wait_on_journal (journal_t *); + diff --git a/lustre/kernel_patches/patches/jbd-check-for-unmapped-buffer.patch b/lustre/kernel_patches/patches/jbd-check-for-unmapped-buffer.patch new file mode 100644 index 0000000..0127ef0 --- /dev/null +++ b/lustre/kernel_patches/patches/jbd-check-for-unmapped-buffer.patch @@ -0,0 +1,91 @@ +Date: Mon, 23 Oct 2006 15:40:48 -0500 +From: Eric Sandeen +Subject: [PATCH RHEL5] handle races w/ truncate in journal_dirty_data() + +This is for BZ 209647 : ext3/jbd panic + +This patch is now in -mm. + +When running several fsx's and other filesystem stress tests, we found +cases where an unmapped buffer was still being sent to submit_bh by the +ext3 dirty data journaling code. + +I saw this happen in two ways, both related to another thread doing a +truncate which would unmap the buffer in question. + +Either we would get into journal_dirty_data with a bh which was already +unmapped (although journal_dirty_data_fn had checked for this earlier, the +state was not locked at that point), or it would get unmapped in the middle +of journal_dirty_data when we dropped locks to call sync_dirty_buffer. + +By re-checking for mapped state after we've acquired the bh state lock, we +should avoid these races. If we find a buffer which is no longer mapped, +we essentially ignore it, because journal_unmap_buffer has already decided +that this buffer can go away. + +I've also added tracepoints in these two cases, and made a couple other +tracepoint changes that I found useful in debugging this. + +Signed-off-by: Eric Sandeen +Cc: +Signed-off-by: Andrew Morton +--- + + fs/jbd/transaction.c | 15 ++++++++++++++- + 1 file changed, 14 insertions(+), 1 deletion(-) + +Index: linux-2.6.18-1.2732.el5/fs/jbd/transaction.c +=================================================================== +--- linux-2.6.18-1.2732.el5.orig/fs/jbd/transaction.c ++++ linux-2.6.18-1.2732.el5/fs/jbd/transaction.c +@@ -967,6 +967,13 @@ int journal_dirty_data(handle_t *handle, + */ + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); ++ ++ /* Now that we have bh_state locked, are we really still mapped? */ ++ if (!buffer_mapped(bh)) { ++ JBUFFER_TRACE(jh, "unmapped buffer, bailing out"); ++ goto no_journal; ++ } ++ + if (jh->b_transaction) { + JBUFFER_TRACE(jh, "has transaction"); + if (jh->b_transaction != handle->h_transaction) { +@@ -1028,6 +1035,11 @@ int journal_dirty_data(handle_t *handle, + sync_dirty_buffer(bh); + jbd_lock_bh_state(bh); + spin_lock(&journal->j_list_lock); ++ /* Since we dropped the lock... */ ++ if (!buffer_mapped(bh)) { ++ JBUFFER_TRACE(jh, "buffer got unmapped"); ++ goto no_journal; ++ } + /* The buffer may become locked again at any + time if it is redirtied */ + } +@@ -1823,6 +1835,7 @@ static int journal_unmap_buffer(journal_ + } + } + } else if (transaction == journal->j_committing_transaction) { ++ JBUFFER_TRACE(jh, "on committing transaction"); + if (jh->b_jlist == BJ_Locked) { + /* + * The buffer is on the committing transaction's locked +@@ -1837,7 +1850,6 @@ static int journal_unmap_buffer(journal_ + * can remove it's next_transaction pointer from the + * running transaction if that is set, but nothing + * else. */ +- JBUFFER_TRACE(jh, "on committing transaction"); + set_buffer_freed(bh); + if (jh->b_next_transaction) { + J_ASSERT(jh->b_next_transaction == +@@ -1857,6 +1869,7 @@ static int journal_unmap_buffer(journal_ + * i_size already for this truncate so recovery will not + * expose the disk blocks we are discarding here.) */ + J_ASSERT_JH(jh, transaction == journal->j_running_transaction); ++ JBUFFER_TRACE(jh, "on running transaction"); + may_free = __dispose_buffer(jh, transaction); + } + + diff --git a/lustre/kernel_patches/patches/vfs_races-2.6-rhel5.patch b/lustre/kernel_patches/patches/vfs_races-2.6-rhel5.patch new file mode 100644 index 0000000..eb659fd --- /dev/null +++ b/lustre/kernel_patches/patches/vfs_races-2.6-rhel5.patch @@ -0,0 +1,100 @@ +diff -urp linux-2.6.18.rawops/fs/dcache.c linux-2.6.18.races/fs/dcache.c +--- linux-2.6.18.rawops/fs/dcache.c 2007-02-08 19:00:31.000000000 +0200 ++++ linux-2.6.18.races/fs/dcache.c 2007-02-14 19:23:49.000000000 +0200 +@@ -230,6 +230,13 @@ int d_invalidate(struct dentry * dentry) + spin_unlock(&dcache_lock); + return 0; + } ++ ++ /* network invalidation by Lustre */ ++ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) { ++ spin_unlock(&dcache_lock); ++ return 0; ++ } ++ + /* + * Check whether to do a partial shrink_dcache + * to get rid of unused child entries. +@@ -1400,13 +1407,21 @@ static void _d_rehash(struct dentry * en + * Adds a dentry to the hash according to its name. + */ + +-void d_rehash(struct dentry * entry) ++void d_rehash_cond(struct dentry * entry, int lock) + { +- spin_lock(&dcache_lock); ++ if (lock) ++ spin_lock(&dcache_lock); + spin_lock(&entry->d_lock); + _d_rehash(entry); + spin_unlock(&entry->d_lock); +- spin_unlock(&dcache_lock); ++ if (lock) ++ spin_unlock(&dcache_lock); ++} ++EXPORT_SYMBOL(d_rehash_cond); ++ ++void d_rehash(struct dentry * entry) ++{ ++ d_rehash_cond(entry, 1); + } + + #define do_switch(x,y) do { \ +@@ -1481,14 +1496,13 @@ static void switch_names(struct dentry * + * dcache entries should not be moved in this way. + */ + +-void d_move(struct dentry * dentry, struct dentry * target) ++void __d_move(struct dentry * dentry, struct dentry * target) + { + struct hlist_head *list; + + if (!dentry->d_inode) + printk(KERN_WARNING "VFS: moving negative dcache entry\n"); + +- spin_lock(&dcache_lock); + write_seqlock(&rename_lock); + /* + * XXXX: do we really need to take target->d_lock? +@@ -1549,6 +1563,14 @@ already_unhashed: + fsnotify_d_move(dentry); + spin_unlock(&dentry->d_lock); + write_sequnlock(&rename_lock); ++} ++ ++EXPORT_SYMBOL(__d_move); ++ ++void d_move(struct dentry *dentry, struct dentry *target) ++{ ++ spin_lock(&dcache_lock); ++ __d_move(dentry, target); + spin_unlock(&dcache_lock); + } + +diff -urp linux-2.6.18.rawops/include/linux/dcache.h linux-2.6.18.races/include/linux/dcache.h +--- linux-2.6.18.rawops/include/linux/dcache.h 2007-02-14 16:52:37.000000000 +0200 ++++ linux-2.6.18.races/include/linux/dcache.h 2007-02-14 19:21:14.000000000 +0200 +@@ -177,6 +177,7 @@ d_iput: no no no yes + + #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */ + #define DCACHE_UNHASHED 0x0010 ++#define DCACHE_LUSTRE_INVALID 0x0040 /* Lustre invalidated */ + + #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */ + +@@ -254,6 +255,7 @@ extern int have_submounts(struct dentry + * This adds the entry to the hash queues. + */ + extern void d_rehash(struct dentry *); ++extern void d_rehash_cond(struct dentry *, int lock); + + /** + * d_add - add dentry to hash queues +@@ -289,6 +291,7 @@ static inline struct dentry *d_add_uniqu + + /* used for rename() and baskets */ + extern void d_move(struct dentry *, struct dentry *); ++extern void __d_move(struct dentry *, struct dentry *); + + /* appendix may either be NULL or be used for transname suffixes */ + extern struct dentry * d_lookup(struct dentry *, struct qstr *); diff --git a/lustre/kernel_patches/series/2.6-rhel5.series b/lustre/kernel_patches/series/2.6-rhel5.series new file mode 100644 index 0000000..076164b --- /dev/null +++ b/lustre/kernel_patches/series/2.6-rhel5.series @@ -0,0 +1,11 @@ +lustre_version.patch +vfs_races-2.6-rhel5.patch +i_filter_data.patch +jbd-jcberr-2.6.18-vanilla.patch +export_symbols-2.6.18-vanilla.patch +dev_read_only-2.6.18-vanilla.patch +export-2.6.18-vanilla.patch +8kstack-2.6.12.patch +export-show_task-2.6.18-vanilla.patch +sd_iostats-2.6-rhel4.patch +export_symbol_numa-2.6-fc5.patch diff --git a/lustre/kernel_patches/series/2.6.18-vanilla.series b/lustre/kernel_patches/series/2.6.18-vanilla.series index 2f6665e..d1528a0 100644 --- a/lustre/kernel_patches/series/2.6.18-vanilla.series +++ b/lustre/kernel_patches/series/2.6.18-vanilla.series @@ -11,4 +11,6 @@ export-2.6.18-vanilla.patch export-show_task-2.6.18-vanilla.patch sd_iostats-2.6-rhel4.patch export_symbol_numa-2.6.18.patch +jbd-16tb-overflow-fixes.patch +jbd-check-for-unmapped-buffer.patch jbd-journal-chksum-2.6.18-vanilla.patch diff --git a/lustre/kernel_patches/targets/2.6-rhel5.target.in b/lustre/kernel_patches/targets/2.6-rhel5.target.in new file mode 100644 index 0000000..9a0cd52 --- /dev/null +++ b/lustre/kernel_patches/targets/2.6-rhel5.target.in @@ -0,0 +1,24 @@ +lnxmaj="2.6.18" +lnxrel="8.1.8.el5" + +KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2 +SERIES=2.6-rhel5.series +VERSION=${lnxmaj} +EXTRA_VERSION=${lnxrel}_lustre.@VERSION@ +RHBUILD=1 +LINUX26=1 +LUSTRE_VERSION=@VERSION@ + +BASE_ARCHS="i686 x86_64 ia64" +BIGMEM_ARCHS="" +BOOT_ARCHS="" +JENSEN_ARCHS="" +SMP_ARCHS="i686 x86_64 ia64" +UP_ARCHS="" + +for cc in gcc ; do + if which $cc >/dev/null 2>/dev/null ; then + export CC=$cc + break + fi +done diff --git a/lustre/kernel_patches/which_patch b/lustre/kernel_patches/which_patch index ea78422..08ae12d 100644 --- a/lustre/kernel_patches/which_patch +++ b/lustre/kernel_patches/which_patch @@ -5,6 +5,7 @@ SUPPORTED KERNELS: 2.6-suse-newer SLES9: 2.6.5-7.286 extra patches for SLES9 after SP1 2.6-rhel4 RHEL4: 2.6.9-55.0.2.EL 2.6-sles10 SLES10: 2.6.16.46-0.14 +2.6-rhel5.series RHEL5: 2.6.18-8.1.8.el5 2.6.18-vanilla.series kernel.org: 2.6.18.8 CLIENT SUPPORT FOR UNPATCHED KERNELS: diff --git a/lustre/mds/handler.c b/lustre/mds/handler.c index cc51660..75e9f8f 100644 --- a/lustre/mds/handler.c +++ b/lustre/mds/handler.c @@ -1211,16 +1211,16 @@ static int mds_readpage(struct ptlrpc_request *req, int offset) GOTO(out_pop, rc = PTR_ERR(file)); /* body->size is actually the offset -eeb */ - if ((body->size & (de->d_inode->i_blksize - 1)) != 0) { + if ((body->size & (de->d_inode->i_sb->s_blocksize - 1)) != 0) { CERROR("offset "LPU64" not on a block boundary of %lu\n", - body->size, de->d_inode->i_blksize); + body->size, de->d_inode->i_sb->s_blocksize); GOTO(out_file, rc = -EFAULT); } /* body->nlink is actually the #bytes to read -eeb */ - if (body->nlink & (de->d_inode->i_blksize - 1)) { + if (body->nlink & (de->d_inode->i_sb->s_blocksize - 1)) { CERROR("size %u is not multiple of blocksize %lu\n", - body->nlink, de->d_inode->i_blksize); + body->nlink, de->d_inode->i_sb->s_blocksize); GOTO(out_file, rc = -EFAULT); } -- 1.8.3.1