From fbe4b504f24870fd800b103293f1cfc7192448b4 Mon Sep 17 00:00:00 2001 From: Eric Mei Date: Tue, 5 Jan 2010 10:28:57 -0700 Subject: [PATCH] b=21519 fix race between ldlm_pools_thread and ldlm_space_free After ldlm_space_free dropped the refcount of a namespace to 0, it may start to cleanup data structures in the namespace, while the ldlm_pools_thread can still pick up the freeing namespace. This patch is to instruct ldlm_pools_thread to skip namespace which is being freed. i=oleg i=wangdi --- lustre/ldlm/ldlm_pool.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c index 9560f34..4acf356 100644 --- a/lustre/ldlm/ldlm_pool.c +++ b/lustre/ldlm/ldlm_pool.c @@ -1222,6 +1222,7 @@ void ldlm_pools_recalc(ldlm_side_t client) * Recalc at least ldlm_namespace_nr(client) namespaces. */ for (nr = atomic_read(ldlm_namespace_nr(client)); nr > 0; nr--) { + int skip; /* * Lock the list, get first @ns in the list, getref, move it * to the tail, unlock and call pool recalc. This way we avoid @@ -1235,15 +1236,30 @@ void ldlm_pools_recalc(ldlm_side_t client) break; } ns = ldlm_namespace_first_locked(client); - ldlm_namespace_get(ns); + + spin_lock(&ns->ns_hash_lock); + /* + * skip ns which is being freed, and we don't want to increase + * its refcount again, not even temporarily. bz21519. + */ + if (ns->ns_refcount == 0) { + skip = 1; + } else { + skip = 0; + ldlm_namespace_get_locked(ns); + } + spin_unlock(&ns->ns_hash_lock); + ldlm_namespace_move_locked(ns, client); mutex_up(ldlm_namespace_lock(client)); /* * After setup is done - recalc the pool. */ - ldlm_pool_recalc(&ns->ns_pool); - ldlm_namespace_put(ns, 1); + if (!skip) { + ldlm_pool_recalc(&ns->ns_pool); + ldlm_namespace_put(ns, 1); + } } } EXPORT_SYMBOL(ldlm_pools_recalc); -- 1.8.3.1