From 0f636d44004beb8ddec9e0c1ad15f2afbbbdcec9 Mon Sep 17 00:00:00 2001
From: girish <girish>
Date: Fri, 1 May 2009 15:23:05 +0000
Subject: [PATCH] b=19058 b=19184 i=adilger,girish o=Bernd Schubert o=Brian
 Behlendorf

ext3_remount() should return correct error code
add locking for creation of jbd_4k slab cache. Only needed for RHEL5 kernel
---
 lustre/ChangeLog                                   |  13 +++
 .../patches/jbd-slab-race-2.6-rhel5.patch          | 101 +++++++++++++++++++++
 lustre/kernel_patches/series/2.6-rhel5.series      |   1 +
 3 files changed, 115 insertions(+)
 create mode 100644 lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch
diff --git a/lustre/ChangeLog b/lustre/ChangeLog
index 6053cad..7cb5045 100644
--- a/lustre/ChangeLog
+++ b/lustre/ChangeLog
@@ -182,6 +182,19 @@ Description: 1.6.5 mdsrate performance is slower than 1.4.11/12 (MDS is not cpu
 Details    : create_count always drops to the min value (=32) because grow_count
 	     is being changed before the precreate RPC completes.
 
+Severity   : normal
+Frequency  : Only in RHEL5 when mounting multiple ext3 filesystems
+	     simultaneously
+Bugzilla   : 19184
+Description: "kmem_cache_create: duplicate cache jbd_4k" error message
+Details    : add proper locking for creation of jbd_4k slab cache
+
+Severity   : normal
+Bugzilla   : 19058
+Description: MMP check in ext3_remount() fails without displaying any error
+Details    : When multiple mount protection fails during remount, proper error
+	     should be returned
+
 -------------------------------------------------------------------------------
 2008-12-31 Sun Microsystems, Inc.
 	* version 1.8.0
diff --git a/lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch b/lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch
new file mode 100644
index 0000000..0a11f74
--- /dev/null
+++ b/lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch
@@ -0,0 +1,101 @@
+kmem_cache_create: duplicate cache jbd_4k
+
+The jbd slab cache creation/deletion is racey.  If multiple jbd based
+filesystems are mounted concurrently, and there are no other jbd based
+filesystems already mounted.  Then we can race creating the slab caches
+since jbd_slab[] is not locked.  This is not commonly observed because
+typically /root is mounted early with a jbd based filesystem making the
+race impossible.  On our diskless systems /root does not use the jbd
+but we do have attached storage which does, and which is mounted in
+parallel.  Basically our setup is similiar to what may be found in a
+NAS style appliance.
+
+This patch wraps all modifications to jbd_slab[] in the jbd_slab_lock
+to prevent this above race.
+
+LLNL Bug 291
+Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
+
+Index: linux+rh+chaos/fs/jbd/journal.c
+===================================================================
+--- linux+rh+chaos.orig/fs/jbd/journal.c
++++ linux+rh+chaos/fs/jbd/journal.c
+@@ -1979,6 +1979,7 @@ void * __jbd_kmalloc (const char *where,
+ #define JBD_MAX_SLABS 5
+ #define JBD_SLAB_INDEX(size)  (size >> 11)
+ 
++static DECLARE_RWSEM(jbd_slab_lock); /* protect jbd_slab[] */
+ static kmem_cache_t *jbd_slab[JBD_MAX_SLABS];
+ static const char *jbd_slab_names[JBD_MAX_SLABS] = {
+ 	"jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k"
+@@ -1988,24 +1989,27 @@ static void journal_destroy_jbd_slabs(vo
+ {
+ 	int i;
+ 
++	down_write(&jbd_slab_lock);
+ 	for (i = 0; i < JBD_MAX_SLABS; i++) {
+ 		if (jbd_slab[i])
+ 			kmem_cache_destroy(jbd_slab[i]);
+ 		jbd_slab[i] = NULL;
+ 	}
++	up_write(&jbd_slab_lock);
+ }
+ 
+ static int journal_create_jbd_slab(size_t slab_size)
+ {
+-	int i = JBD_SLAB_INDEX(slab_size);
++	int rc = 0, i = JBD_SLAB_INDEX(slab_size);
+ 
+ 	BUG_ON(i >= JBD_MAX_SLABS);
+ 
+ 	/*
+ 	 * Check if we already have a slab created for this size
+ 	 */
++	down_write(&jbd_slab_lock);
+ 	if (jbd_slab[i])
+-		return 0;
++		goto out_lock;
+ 
+ 	/*
+ 	 * Create a slab and force alignment to be same as slabsize -
+@@ -2016,27 +2020,36 @@ static int journal_create_jbd_slab(size_
+ 				slab_size, slab_size, 0, NULL, NULL);
+ 	if (!jbd_slab[i]) {
+ 		printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n");
+-		return -ENOMEM;
++		rc = -ENOMEM;
+ 	}
+-	return 0;
++out_lock:
++	up_write(&jbd_slab_lock);
++	return rc;
+ }
+ 
+ void * jbd_slab_alloc(size_t size, gfp_t flags)
+ {
++	void *ptr;
+ 	int idx;
+ 
++	down_read(&jbd_slab_lock);
+ 	idx = JBD_SLAB_INDEX(size);
+ 	BUG_ON(jbd_slab[idx] == NULL);
+-	return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
++	ptr = kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL);
++	up_read(&jbd_slab_lock);
++
++	return ptr;
+ }
+ 
+ void jbd_slab_free(void *ptr,  size_t size)
+ {
+ 	int idx;
+ 
++	down_read(&jbd_slab_lock);
+ 	idx = JBD_SLAB_INDEX(size);
+ 	BUG_ON(jbd_slab[idx] == NULL);
+ 	kmem_cache_free(jbd_slab[idx], ptr);
++	up_read(&jbd_slab_lock);
+ }
+ 
+ /*
+
diff --git a/lustre/kernel_patches/series/2.6-rhel5.series b/lustre/kernel_patches/series/2.6-rhel5.series
index f43347a..c07ab4f 100644
--- a/lustre/kernel_patches/series/2.6-rhel5.series
+++ b/lustre/kernel_patches/series/2.6-rhel5.series
@@ -22,3 +22,4 @@ jbd-journal-chksum-2.6.18-vanilla.patch
 quota-large-limits-rhel5.patch
 md-mmp-unplug-dev.patch
 prune-icache-use-trylock-rhel5.patch
+jbd-slab-race-2.6-rhel5.patch
-- 
1.8.3.1