From 74a548647771d9b555a6cbcbaf8a5b61541182a9 Mon Sep 17 00:00:00 2001 From: girish Date: Sun, 3 May 2009 18:36:35 +0000 Subject: [PATCH] b=19184 i=adilger i=girish o=brian behlendorf add proper locking for creation of jbd_4k slab cache --- lustre/ChangeLog | 7 ++ .../patches/jbd-slab-race-2.6-rhel5.patch | 101 +++++++++++++++++++++ lustre/kernel_patches/series/2.6-rhel5.series | 1 + 3 files changed, 109 insertions(+) create mode 100644 lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch diff --git a/lustre/ChangeLog b/lustre/ChangeLog index f5fb5a7..60c69df 100644 --- a/lustre/ChangeLog +++ b/lustre/ChangeLog @@ -2016,6 +2016,13 @@ Description: open file using fid Details : A file can be opened using just its fid, like /.lustre/fid/SEQ:OID:VER - this is needed for HSM and replication +Severity : normal +Frequency : Only in RHEL5 when mounting multiple ext3 filesystems + simultaneously +Bugzilla : 19184 +Description: "kmem_cache_create: duplicate cache jbd_4k" error message +Details : add proper locking for creation of jbd_4k slab cache + -------------------------------------------------------------------------------- 2007-08-10 Cluster File Systems, Inc. diff --git a/lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch b/lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch new file mode 100644 index 0000000..0a11f74 --- /dev/null +++ b/lustre/kernel_patches/patches/jbd-slab-race-2.6-rhel5.patch @@ -0,0 +1,101 @@ +kmem_cache_create: duplicate cache jbd_4k + +The jbd slab cache creation/deletion is racey. If multiple jbd based +filesystems are mounted concurrently, and there are no other jbd based +filesystems already mounted. Then we can race creating the slab caches +since jbd_slab[] is not locked. This is not commonly observed because +typically /root is mounted early with a jbd based filesystem making the +race impossible. On our diskless systems /root does not use the jbd +but we do have attached storage which does, and which is mounted in +parallel. Basically our setup is similiar to what may be found in a +NAS style appliance. + +This patch wraps all modifications to jbd_slab[] in the jbd_slab_lock +to prevent this above race. + +LLNL Bug 291 +Signed-off-by: Brian Behlendorf + +Index: linux+rh+chaos/fs/jbd/journal.c +=================================================================== +--- linux+rh+chaos.orig/fs/jbd/journal.c ++++ linux+rh+chaos/fs/jbd/journal.c +@@ -1979,6 +1979,7 @@ void * __jbd_kmalloc (const char *where, + #define JBD_MAX_SLABS 5 + #define JBD_SLAB_INDEX(size) (size >> 11) + ++static DECLARE_RWSEM(jbd_slab_lock); /* protect jbd_slab[] */ + static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; + static const char *jbd_slab_names[JBD_MAX_SLABS] = { + "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" +@@ -1988,24 +1989,27 @@ static void journal_destroy_jbd_slabs(vo + { + int i; + ++ down_write(&jbd_slab_lock); + for (i = 0; i < JBD_MAX_SLABS; i++) { + if (jbd_slab[i]) + kmem_cache_destroy(jbd_slab[i]); + jbd_slab[i] = NULL; + } ++ up_write(&jbd_slab_lock); + } + + static int journal_create_jbd_slab(size_t slab_size) + { +- int i = JBD_SLAB_INDEX(slab_size); ++ int rc = 0, i = JBD_SLAB_INDEX(slab_size); + + BUG_ON(i >= JBD_MAX_SLABS); + + /* + * Check if we already have a slab created for this size + */ ++ down_write(&jbd_slab_lock); + if (jbd_slab[i]) +- return 0; ++ goto out_lock; + + /* + * Create a slab and force alignment to be same as slabsize - +@@ -2016,27 +2020,36 @@ static int journal_create_jbd_slab(size_ + slab_size, slab_size, 0, NULL, NULL); + if (!jbd_slab[i]) { + printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); +- return -ENOMEM; ++ rc = -ENOMEM; + } +- return 0; ++out_lock: ++ up_write(&jbd_slab_lock); ++ return rc; + } + + void * jbd_slab_alloc(size_t size, gfp_t flags) + { ++ void *ptr; + int idx; + ++ down_read(&jbd_slab_lock); + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); +- return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); ++ ptr = kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); ++ up_read(&jbd_slab_lock); ++ ++ return ptr; + } + + void jbd_slab_free(void *ptr, size_t size) + { + int idx; + ++ down_read(&jbd_slab_lock); + idx = JBD_SLAB_INDEX(size); + BUG_ON(jbd_slab[idx] == NULL); + kmem_cache_free(jbd_slab[idx], ptr); ++ up_read(&jbd_slab_lock); + } + + /* + diff --git a/lustre/kernel_patches/series/2.6-rhel5.series b/lustre/kernel_patches/series/2.6-rhel5.series index 7818b6b..906c06e 100644 --- a/lustre/kernel_patches/series/2.6-rhel5.series +++ b/lustre/kernel_patches/series/2.6-rhel5.series @@ -22,3 +22,4 @@ jbd-journal-chksum-2.6.18-vanilla.patch quota-large-limits-rhel5.patch raid5-mmp-unplug-dev.patch small-fixes-about-jbd.patch +jbd-slab-race-2.6-rhel5.patch -- 1.8.3.1