Details : enable checksum by default, allow --disable-checksum
configure option and "-o nochecksum" mount option
+Severity : normal
+Bugzilla : 11802
+Description: lustre support for RHEL5
+Details : Add support for RHEL5.
+
--------------------------------------------------------------------------------
2007-08-10 Cluster File Systems, Inc. <info@clusterfs.com>
lustre/kernel_patches/targets/2.6-suse.target
lustre/kernel_patches/targets/2.6-vanilla.target
lustre/kernel_patches/targets/2.6-rhel4.target
+lustre/kernel_patches/targets/2.6-rhel5.target
lustre/kernel_patches/targets/2.6-fc5.target
lustre/kernel_patches/targets/2.6-patchless.target
lustre/kernel_patches/targets/2.6-sles10.target
--- /dev/null
+Date: Tue, 26 Sep 2006 11:00:28 -0500
+From: Eric Sandeen <esandeen@redhat.com>
+Subject: Re: [PATCH RHEL5] 16T overflows in jbd code
+
+Signed-off-by: Eric Sandeen <esandeen@redhat.com>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+
+Index: linux-2.6.17-1.2654.el5/fs/jbd/journal.c
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/fs/jbd/journal.c
++++ linux-2.6.17-1.2654.el5/fs/jbd/journal.c
+@@ -271,7 +271,7 @@ static void journal_kill_thread(journal_
+ int journal_write_metadata_buffer(transaction_t *transaction,
+ struct journal_head *jh_in,
+ struct journal_head **jh_out,
+- int blocknr)
++ unsigned long blocknr)
+ {
+ int need_copy_out = 0;
+ int done_copy_out = 0;
+@@ -696,7 +696,7 @@ fail:
+ * @bdev: Block device on which to create the journal
+ * @fs_dev: Device which hold journalled filesystem for this journal.
+ * @start: Block nr Start of journal.
+- * @len: Lenght of the journal in blocks.
++ * @len: Length of the journal in blocks.
+ * @blocksize: blocksize of journalling device
+ * @returns: a newly created journal_t *
+ *
+Index: linux-2.6.17-1.2654.el5/include/linux/jbd.h
+===================================================================
+--- linux-2.6.17-1.2654.el5.orig/include/linux/jbd.h
++++ linux-2.6.17-1.2654.el5/include/linux/jbd.h
+@@ -866,7 +866,7 @@ extern int
+ journal_write_metadata_buffer(transaction_t *transaction,
+ struct journal_head *jh_in,
+ struct journal_head **jh_out,
+- int blocknr);
++ unsigned long blocknr);
+
+ /* Transaction locking */
+ extern void __wait_on_journal (journal_t *);
+
--- /dev/null
+Date: Mon, 23 Oct 2006 15:40:48 -0500
+From: Eric Sandeen <sandeen@redhat.com>
+Subject: [PATCH RHEL5] handle races w/ truncate in journal_dirty_data()
+
+This is for BZ 209647 <https://bugzilla.redhat.com/bugzilla/show_bug.cgi?id=209647>: ext3/jbd panic
+
+This patch is now in -mm.
+
+When running several fsx's and other filesystem stress tests, we found
+cases where an unmapped buffer was still being sent to submit_bh by the
+ext3 dirty data journaling code.
+
+I saw this happen in two ways, both related to another thread doing a
+truncate which would unmap the buffer in question.
+
+Either we would get into journal_dirty_data with a bh which was already
+unmapped (although journal_dirty_data_fn had checked for this earlier, the
+state was not locked at that point), or it would get unmapped in the middle
+of journal_dirty_data when we dropped locks to call sync_dirty_buffer.
+
+By re-checking for mapped state after we've acquired the bh state lock, we
+should avoid these races. If we find a buffer which is no longer mapped,
+we essentially ignore it, because journal_unmap_buffer has already decided
+that this buffer can go away.
+
+I've also added tracepoints in these two cases, and made a couple other
+tracepoint changes that I found useful in debugging this.
+
+Signed-off-by: Eric Sandeen <esandeen@redhat.com>
+Cc: <linux-ext4@vger.kernel.org>
+Signed-off-by: Andrew Morton <akpm@osdl.org>
+---
+
+ fs/jbd/transaction.c | 15 ++++++++++++++-
+ 1 file changed, 14 insertions(+), 1 deletion(-)
+
+Index: linux-2.6.18-1.2732.el5/fs/jbd/transaction.c
+===================================================================
+--- linux-2.6.18-1.2732.el5.orig/fs/jbd/transaction.c
++++ linux-2.6.18-1.2732.el5/fs/jbd/transaction.c
+@@ -967,6 +967,13 @@ int journal_dirty_data(handle_t *handle,
+ */
+ jbd_lock_bh_state(bh);
+ spin_lock(&journal->j_list_lock);
++
++ /* Now that we have bh_state locked, are we really still mapped? */
++ if (!buffer_mapped(bh)) {
++ JBUFFER_TRACE(jh, "unmapped buffer, bailing out");
++ goto no_journal;
++ }
++
+ if (jh->b_transaction) {
+ JBUFFER_TRACE(jh, "has transaction");
+ if (jh->b_transaction != handle->h_transaction) {
+@@ -1028,6 +1035,11 @@ int journal_dirty_data(handle_t *handle,
+ sync_dirty_buffer(bh);
+ jbd_lock_bh_state(bh);
+ spin_lock(&journal->j_list_lock);
++ /* Since we dropped the lock... */
++ if (!buffer_mapped(bh)) {
++ JBUFFER_TRACE(jh, "buffer got unmapped");
++ goto no_journal;
++ }
+ /* The buffer may become locked again at any
+ time if it is redirtied */
+ }
+@@ -1823,6 +1835,7 @@ static int journal_unmap_buffer(journal_
+ }
+ }
+ } else if (transaction == journal->j_committing_transaction) {
++ JBUFFER_TRACE(jh, "on committing transaction");
+ if (jh->b_jlist == BJ_Locked) {
+ /*
+ * The buffer is on the committing transaction's locked
+@@ -1837,7 +1850,6 @@ static int journal_unmap_buffer(journal_
+ * can remove it's next_transaction pointer from the
+ * running transaction if that is set, but nothing
+ * else. */
+- JBUFFER_TRACE(jh, "on committing transaction");
+ set_buffer_freed(bh);
+ if (jh->b_next_transaction) {
+ J_ASSERT(jh->b_next_transaction ==
+@@ -1857,6 +1869,7 @@ static int journal_unmap_buffer(journal_
+ * i_size already for this truncate so recovery will not
+ * expose the disk blocks we are discarding here.) */
+ J_ASSERT_JH(jh, transaction == journal->j_running_transaction);
++ JBUFFER_TRACE(jh, "on running transaction");
+ may_free = __dispose_buffer(jh, transaction);
+ }
+
+
--- /dev/null
+diff -urp linux-2.6.18.rawops/fs/dcache.c linux-2.6.18.races/fs/dcache.c
+--- linux-2.6.18.rawops/fs/dcache.c 2007-02-08 19:00:31.000000000 +0200
++++ linux-2.6.18.races/fs/dcache.c 2007-02-14 19:23:49.000000000 +0200
+@@ -230,6 +230,13 @@ int d_invalidate(struct dentry * dentry)
+ spin_unlock(&dcache_lock);
+ return 0;
+ }
++
++ /* network invalidation by Lustre */
++ if (dentry->d_flags & DCACHE_LUSTRE_INVALID) {
++ spin_unlock(&dcache_lock);
++ return 0;
++ }
++
+ /*
+ * Check whether to do a partial shrink_dcache
+ * to get rid of unused child entries.
+@@ -1400,13 +1407,21 @@ static void _d_rehash(struct dentry * en
+ * Adds a dentry to the hash according to its name.
+ */
+
+-void d_rehash(struct dentry * entry)
++void d_rehash_cond(struct dentry * entry, int lock)
+ {
+- spin_lock(&dcache_lock);
++ if (lock)
++ spin_lock(&dcache_lock);
+ spin_lock(&entry->d_lock);
+ _d_rehash(entry);
+ spin_unlock(&entry->d_lock);
+- spin_unlock(&dcache_lock);
++ if (lock)
++ spin_unlock(&dcache_lock);
++}
++EXPORT_SYMBOL(d_rehash_cond);
++
++void d_rehash(struct dentry * entry)
++{
++ d_rehash_cond(entry, 1);
+ }
+
+ #define do_switch(x,y) do { \
+@@ -1481,14 +1496,13 @@ static void switch_names(struct dentry *
+ * dcache entries should not be moved in this way.
+ */
+
+-void d_move(struct dentry * dentry, struct dentry * target)
++void __d_move(struct dentry * dentry, struct dentry * target)
+ {
+ struct hlist_head *list;
+
+ if (!dentry->d_inode)
+ printk(KERN_WARNING "VFS: moving negative dcache entry\n");
+
+- spin_lock(&dcache_lock);
+ write_seqlock(&rename_lock);
+ /*
+ * XXXX: do we really need to take target->d_lock?
+@@ -1549,6 +1563,14 @@ already_unhashed:
+ fsnotify_d_move(dentry);
+ spin_unlock(&dentry->d_lock);
+ write_sequnlock(&rename_lock);
++}
++
++EXPORT_SYMBOL(__d_move);
++
++void d_move(struct dentry *dentry, struct dentry *target)
++{
++ spin_lock(&dcache_lock);
++ __d_move(dentry, target);
+ spin_unlock(&dcache_lock);
+ }
+
+diff -urp linux-2.6.18.rawops/include/linux/dcache.h linux-2.6.18.races/include/linux/dcache.h
+--- linux-2.6.18.rawops/include/linux/dcache.h 2007-02-14 16:52:37.000000000 +0200
++++ linux-2.6.18.races/include/linux/dcache.h 2007-02-14 19:21:14.000000000 +0200
+@@ -177,6 +177,7 @@ d_iput: no no no yes
+
+ #define DCACHE_REFERENCED 0x0008 /* Recently used, don't discard. */
+ #define DCACHE_UNHASHED 0x0010
++#define DCACHE_LUSTRE_INVALID 0x0040 /* Lustre invalidated */
+
+ #define DCACHE_INOTIFY_PARENT_WATCHED 0x0020 /* Parent inode is watched */
+
+@@ -254,6 +255,7 @@ extern int have_submounts(struct dentry
+ * This adds the entry to the hash queues.
+ */
+ extern void d_rehash(struct dentry *);
++extern void d_rehash_cond(struct dentry *, int lock);
+
+ /**
+ * d_add - add dentry to hash queues
+@@ -289,6 +291,7 @@ static inline struct dentry *d_add_uniqu
+
+ /* used for rename() and baskets */
+ extern void d_move(struct dentry *, struct dentry *);
++extern void __d_move(struct dentry *, struct dentry *);
+
+ /* appendix may either be NULL or be used for transname suffixes */
+ extern struct dentry * d_lookup(struct dentry *, struct qstr *);
--- /dev/null
+lustre_version.patch
+vfs_races-2.6-rhel5.patch
+i_filter_data.patch
+jbd-jcberr-2.6.18-vanilla.patch
+export_symbols-2.6.18-vanilla.patch
+dev_read_only-2.6.18-vanilla.patch
+export-2.6.18-vanilla.patch
+8kstack-2.6.12.patch
+export-show_task-2.6.18-vanilla.patch
+sd_iostats-2.6-rhel4.patch
+export_symbol_numa-2.6-fc5.patch
export-show_task-2.6.18-vanilla.patch
sd_iostats-2.6-rhel4.patch
export_symbol_numa-2.6.18.patch
+jbd-16tb-overflow-fixes.patch
+jbd-check-for-unmapped-buffer.patch
jbd-journal-chksum-2.6.18-vanilla.patch
--- /dev/null
+lnxmaj="2.6.18"
+lnxrel="8.1.8.el5"
+
+KERNEL=linux-${lnxmaj}-${lnxrel}.tar.bz2
+SERIES=2.6-rhel5.series
+VERSION=${lnxmaj}
+EXTRA_VERSION=${lnxrel}_lustre.@VERSION@
+RHBUILD=1
+LINUX26=1
+LUSTRE_VERSION=@VERSION@
+
+BASE_ARCHS="i686 x86_64 ia64"
+BIGMEM_ARCHS=""
+BOOT_ARCHS=""
+JENSEN_ARCHS=""
+SMP_ARCHS="i686 x86_64 ia64"
+UP_ARCHS=""
+
+for cc in gcc ; do
+ if which $cc >/dev/null 2>/dev/null ; then
+ export CC=$cc
+ break
+ fi
+done
2.6-suse-newer SLES9: 2.6.5-7.286 extra patches for SLES9 after SP1
2.6-rhel4 RHEL4: 2.6.9-55.0.2.EL
2.6-sles10 SLES10: 2.6.16.46-0.14
+2.6-rhel5.series RHEL5: 2.6.18-8.1.8.el5
2.6.18-vanilla.series kernel.org: 2.6.18.8
CLIENT SUPPORT FOR UNPATCHED KERNELS:
GOTO(out_pop, rc = PTR_ERR(file));
/* body->size is actually the offset -eeb */
- if ((body->size & (de->d_inode->i_blksize - 1)) != 0) {
+ if ((body->size & (de->d_inode->i_sb->s_blocksize - 1)) != 0) {
CERROR("offset "LPU64" not on a block boundary of %lu\n",
- body->size, de->d_inode->i_blksize);
+ body->size, de->d_inode->i_sb->s_blocksize);
GOTO(out_file, rc = -EFAULT);
}
/* body->nlink is actually the #bytes to read -eeb */
- if (body->nlink & (de->d_inode->i_blksize - 1)) {
+ if (body->nlink & (de->d_inode->i_sb->s_blocksize - 1)) {
CERROR("size %u is not multiple of blocksize %lu\n",
- body->nlink, de->d_inode->i_blksize);
+ body->nlink, de->d_inode->i_sb->s_blocksize);
GOTO(out_file, rc = -EFAULT);
}