kmem_cache_create: duplicate cache jbd_4k The jbd slab cache creation/deletion is racey. If multiple jbd based filesystems are mounted concurrently, and there are no other jbd based filesystems already mounted. Then we can race creating the slab caches since jbd_slab[] is not locked. This is not commonly observed because typically /root is mounted early with a jbd based filesystem making the race impossible. On our diskless systems /root does not use the jbd but we do have attached storage which does, and which is mounted in parallel. Basically our setup is similiar to what may be found in a NAS style appliance. This patch wraps all modifications to jbd_slab[] in the jbd_slab_lock to prevent this above race. LLNL Bug 291 Signed-off-by: Brian Behlendorf Index: linux+rh+chaos/fs/jbd/journal.c =================================================================== --- linux+rh+chaos.orig/fs/jbd/journal.c +++ linux+rh+chaos/fs/jbd/journal.c @@ -1979,6 +1979,7 @@ void * __jbd_kmalloc (const char *where, #define JBD_MAX_SLABS 5 #define JBD_SLAB_INDEX(size) (size >> 11) +static DECLARE_RWSEM(jbd_slab_lock); /* protect jbd_slab[] */ static kmem_cache_t *jbd_slab[JBD_MAX_SLABS]; static const char *jbd_slab_names[JBD_MAX_SLABS] = { "jbd_1k", "jbd_2k", "jbd_4k", NULL, "jbd_8k" @@ -1988,24 +1989,27 @@ static void journal_destroy_jbd_slabs(vo { int i; + down_write(&jbd_slab_lock); for (i = 0; i < JBD_MAX_SLABS; i++) { if (jbd_slab[i]) kmem_cache_destroy(jbd_slab[i]); jbd_slab[i] = NULL; } + up_write(&jbd_slab_lock); } static int journal_create_jbd_slab(size_t slab_size) { - int i = JBD_SLAB_INDEX(slab_size); + int rc = 0, i = JBD_SLAB_INDEX(slab_size); BUG_ON(i >= JBD_MAX_SLABS); /* * Check if we already have a slab created for this size */ + down_write(&jbd_slab_lock); if (jbd_slab[i]) - return 0; + goto out_lock; /* * Create a slab and force alignment to be same as slabsize - @@ -2016,27 +2020,36 @@ static int journal_create_jbd_slab(size_ slab_size, slab_size, 0, NULL, NULL); if (!jbd_slab[i]) { printk(KERN_EMERG "JBD: no memory for jbd_slab cache\n"); - return -ENOMEM; + rc = -ENOMEM; } - return 0; +out_lock: + up_write(&jbd_slab_lock); + return rc; } void * jbd_slab_alloc(size_t size, gfp_t flags) { + void *ptr; int idx; + down_read(&jbd_slab_lock); idx = JBD_SLAB_INDEX(size); BUG_ON(jbd_slab[idx] == NULL); - return kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); + ptr = kmem_cache_alloc(jbd_slab[idx], flags | __GFP_NOFAIL); + up_read(&jbd_slab_lock); + + return ptr; } void jbd_slab_free(void *ptr, size_t size) { int idx; + down_read(&jbd_slab_lock); idx = JBD_SLAB_INDEX(size); BUG_ON(jbd_slab[idx] == NULL); kmem_cache_free(jbd_slab[idx], ptr); + up_read(&jbd_slab_lock); } /*