From 0f636d44004beb8ddec9e0c1ad15f2afbbbdcec9 Mon Sep 17 00:00:00 2001 From: girish Date: Fri, 1 May 2009 15:23:05 +0000 Subject: [PATCH] b=19058 b=19184 i=adilger,girish o=Bernd Schubert o=Brian Behlendorf ext3_remount() should return correct error code add locking for creation of jbd_4k slab cache. Only needed for RHEL5 kernel --- lustre/ChangeLog | 13 +++ .../patches/jbd-slab-race-2.6-rhel5.patch | 101 +++++++++++++++++++++ lustre/kernel_patches/series/2.6-rhel5.series | 1 + 3 files changed, 115 insertions(+) create mode 100644 lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch diff --git a/lustre/ChangeLog b/lustre/ChangeLog index 6053cad..7cb5045 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -182,6 +182,19 @@ Description: 1.6.5 mdsrate performance is slower than 1.4.11/12 (MDS is not cpu Details : create_count always drops to the min value (=32) because grow_count is being changed before the precreate RPC completes. +Severity : normal +Frequency : Only in RHEL5 when mounting multiple ext3 filesystems + simultaneously +Bugzilla : 19184 +Description: "kmem_cache_create: duplicate cache jbd_4k" error message +Details : add proper locking for creation of jbd_4k slab cache + +Severity : normal +Bugzilla : 19058 +Description: MMP check in ext3_remount() fails without displaying any error +Details : When multiple mount protection fails during remount, proper error + should be returned + ------------------------------------------------------------------------------- 2008-12-31 Sun Microsystems, Inc. * version 1.8.0 diff --git a/lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch b/lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch new file mode 100644 index 0000000..0a11f74 --- /dev/null +++ b/lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch @@ -0,0 +1,101 @@ +kmem_cache_create: duplicate cache jbd_4k + +The jbd slab cache creation/deletion is racey. If multiple jbd based +filesystems are mounted concurrently, and there are no other jbd based +filesystems already mounted. Then we can race creating the slab caches +since jbd_slab[] is not locked. This is not commonly observed because +typically /root is mounted early with a jbd based filesystem making the +race impossible. On our diskless systems /root does not use the jbd +but we do have attached storage which does, and which is mounted in +parallel. Basically our setup is similiar to what may be found in a +NAS style appliance. + +This patch wraps all modifications to jbd_slab[] in the jbd_slab_lock +to prevent this above race. + +LLNL Bug 291 +Signed-off-by: Brian Behlendorf + +Index: linux+rh+chaos/fs/jbd/journal.c +=================================================================== +--- linux+rh+chaos.orig/fs/jbd/journal.c ++++ linux+rh+chaos/fs/jbd/journal.c +@@ -1979,6 +1979,7 @@ void * __jbd_kmalloc (const char *where, + #define JBD_MAX_SLABS 5 + #define JBD_SLAB_INDEX(size) (size >> 11) + ++static DECLARE_RWSEM(jbd_slab_lock); /* protect jbd_slab[] */ + static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; + static const char *jbd_slab_names[JBD_MAX_SLABS] = { + "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" +@@ -1988,24 +1989,27 @@ static void journal_destroy_jbd_slabs(vo + { + int i; + ++ down_write(&jbd_slab_lock); + for (i = 0; i < JBD_MAX_SLABS; i++) { + if (jbd_slab[i]) + kmem_cache_destroy(jbd_slab[i]); + jbd_slab[i] = NULL; + } ++ up_write(&jbd_slab_lock); + } + + static int journal_create_jbd_slab(size_t slab_size) + { +- int i = JBD_SLAB_INDEX(slab_size); ++ int rc = 0, i = JBD_SLAB_INDEX(slab_size); + + BUG_ON(i >= JBD_MAX_SLABS); + + /* + * Check if we already have a slab created for this size + */ ++ down_write(&jbd_slab_lock); + if (jbd_slab[i]) +- return 0; ++ goto out_lock; + + /* + * Create a slab and force alignment to be same as slabsize - +@@ -2016,27 +2020,36 @@ static int journal_create_jbd_slab(size_ + slab_size, slab_size, 0, NULL, NULL); + if (!jbd_slab[i]) { + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); +- return -ENOMEM; ++ rc = -ENOMEM; + } +- return 0; ++out_lock: ++ up_write(&jbd_slab_lock); ++ return rc; + } + + void * jbd_slab_alloc(size_t size, gfp_t flags) + { ++ void *ptr; + int idx; + ++ down_read(&jbd_slab_lock); + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); +- return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); ++ ptr = kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); ++ up_read(&jbd_slab_lock); ++ ++ return ptr; + } + + void jbd_slab_free(void *ptr, size_t size) + { + int idx; + ++ down_read(&jbd_slab_lock); + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + kmem_cache_free(jbd_slab[idx], ptr); ++ up_read(&jbd_slab_lock); + } + + /* + diff --git a/lustre/kernel_patches/series/2.6-rhel5.series b/lustre/kernel_patches/series/2.6-rhel5.series index f43347a..c07ab4f 100644 --- a/lustre/kernel_patches/series/2.6-rhel5.series +++ b/lustre/kernel_patches/series/2.6-rhel5.series @@ -22,3 +22,4 @@ jbd-journal-chksum-2.6.18-vanilla.patch quota-large-limits-rhel5.patch md-mmp-unplug-dev.patch prune-icache-use-trylock-rhel5.patch +jbd-slab-race-2.6-rhel5.patch -- 1.8.3.1