From 9051844cec34ab4f3427adc28bc1948706f5ffc2 Mon Sep 17 00:00:00 2001 From: Oleg Drokin Date: Wed, 15 Apr 2020 21:40:12 +0000 Subject: [PATCH] Revert "LU-4801 ldlm: discard l_lock from struct ldlm_lock." This makes reocurence of LU-11568 back in force. I just did not realize it due to some mail filters. This reverts commit 0584eb73dbb5b4c710a8c7eb1553ed5dad0c18d8. Change-Id: I9efe670ab9b9b9f2ea81582fe67feaac668e54d5 Reviewed-on: https://review.whamcloud.com/38238 Reviewed-by: Oleg Drokin Tested-by: Oleg Drokin --- libcfs/include/libcfs/linux/linux-mem.h | 4 --- lustre/include/lustre_dlm.h | 11 +++++--- lustre/ldlm/l_lock.c | 24 ++++++++--------- lustre/ldlm/ldlm_lock.c | 48 ++++++++++++++++++--------------- lustre/ldlm/ldlm_lockd.c | 29 +------------------- lustre/ldlm/ldlm_resource.c | 11 ++++---- 6 files changed, 50 insertions(+), 77 deletions(-) diff --git a/libcfs/include/libcfs/linux/linux-mem.h b/libcfs/include/libcfs/linux/linux-mem.h index e8550f0..b822173 100644 --- a/libcfs/include/libcfs/linux/linux-mem.h +++ b/libcfs/include/libcfs/linux/linux-mem.h @@ -45,10 +45,6 @@ # include #endif -#ifndef SLAB_TYPESAFE_BY_RCU -#define SLAB_TYPESAFE_BY_RCU SLAB_DESTROY_BY_RCU -#endif - /* * Shrinker */ diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 876caf9..4f69be739 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -736,12 +736,15 @@ struct ldlm_lock { */ struct portals_handle l_handle; /** + * Internal spinlock protects l_resource. We should hold this lock + * first before taking res_lock. + */ + spinlock_t l_lock; + /** * Pointer to actual resource this lock is in. - * ldlm_lock_change_resource() can change this on the client. - * When this is possible, rcu must be used to stablise - * the resource while we lock and check it hasn't been changed. + * ldlm_lock_change_resource() can change this. */ - struct ldlm_resource __rcu *l_resource; + struct ldlm_resource *l_resource; /** * List item for client side LRU list. * Protected by ns_lock in struct ldlm_namespace. diff --git a/lustre/ldlm/l_lock.c b/lustre/ldlm/l_lock.c index 6d37af1..a4f7c85 100644 --- a/lustre/ldlm/l_lock.c +++ b/lustre/ldlm/l_lock.c @@ -41,24 +41,19 @@ * * LDLM locking uses resource to serialize access to locks * but there is a case when we change resource of lock upon - * enqueue reply. We rely on rcu_assign_pointer(lock->l_resource, new_res) + * enqueue reply. We rely on lock->l_resource = new_res * being an atomic operation. */ struct ldlm_resource *lock_res_and_lock(struct ldlm_lock *lock) { - struct ldlm_resource *res; + /* on server-side resource of lock doesn't change */ + if (!ldlm_is_ns_srv(lock)) + spin_lock(&lock->l_lock); - rcu_read_lock(); - while (1) { - res = rcu_dereference(lock->l_resource); - lock_res(res); - if (res == lock->l_resource) { - ldlm_set_res_locked(lock); - rcu_read_unlock(); - return res; - } - unlock_res(res); - } + lock_res(lock->l_resource); + + ldlm_set_res_locked(lock); + return lock->l_resource; } EXPORT_SYMBOL(lock_res_and_lock); @@ -67,8 +62,11 @@ EXPORT_SYMBOL(lock_res_and_lock); */ void unlock_res_and_lock(struct ldlm_lock *lock) { + /* on server-side resource of lock doesn't change */ ldlm_clear_res_locked(lock); unlock_res(lock->l_resource); + if (!ldlm_is_ns_srv(lock)) + spin_unlock(&lock->l_lock); } EXPORT_SYMBOL(unlock_res_and_lock); diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index 7e424fe..1496808 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -469,7 +469,8 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource) if (lock == NULL) RETURN(NULL); - RCU_INIT_POINTER(lock->l_resource, resource); + spin_lock_init(&lock->l_lock); + lock->l_resource = resource; lu_ref_add(&resource->lr_reference, "lock", lock); refcount_set(&lock->l_handle.h_ref, 2); @@ -486,24 +487,24 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource) INIT_HLIST_NODE(&lock->l_exp_hash); INIT_HLIST_NODE(&lock->l_exp_flock_hash); - lprocfs_counter_incr(ldlm_res_to_ns(resource)->ns_stats, - LDLM_NSS_LOCKS); + lprocfs_counter_incr(ldlm_res_to_ns(resource)->ns_stats, + LDLM_NSS_LOCKS); INIT_HLIST_NODE(&lock->l_handle.h_link); class_handle_hash(&lock->l_handle, lock_handle_owner); - lu_ref_init(&lock->l_reference); - lu_ref_add(&lock->l_reference, "hash", lock); - lock->l_callback_timeout = 0; + lu_ref_init(&lock->l_reference); + lu_ref_add(&lock->l_reference, "hash", lock); + lock->l_callback_timeout = 0; lock->l_activity = 0; #if LUSTRE_TRACKS_LOCK_EXP_REFS INIT_LIST_HEAD(&lock->l_exp_refs_link); - lock->l_exp_refs_nr = 0; - lock->l_exp_refs_target = NULL; + lock->l_exp_refs_nr = 0; + lock->l_exp_refs_target = NULL; #endif INIT_LIST_HEAD(&lock->l_exp_list); - RETURN(lock); + RETURN(lock); } /** @@ -543,13 +544,12 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, lu_ref_add(&newres->lr_reference, "lock", lock); /* - * To flip the lock from the old to the new resource, oldres - * and newres have to be locked. Resource spin-locks are taken - * in the memory address order to avoid dead-locks. - * As this is the only circumstance where ->l_resource - * can change, and this cannot race with itself, it is safe - * to access lock->l_resource without being careful about locking. + * To flip the lock from the old to the new resource, lock, oldres and + * newres have to be locked. Resource spin-locks are nested within + * lock->l_lock, and are taken in the memory address order to avoid + * dead-locks. */ + spin_lock(&lock->l_lock); oldres = lock->l_resource; if (oldres < newres) { lock_res(oldres); @@ -560,9 +560,9 @@ int ldlm_lock_change_resource(struct ldlm_namespace *ns, struct ldlm_lock *lock, } LASSERT(memcmp(new_resid, &oldres->lr_name, sizeof oldres->lr_name) != 0); - rcu_assign_pointer(lock->l_resource, newres); + lock->l_resource = newres; unlock_res(oldres); - unlock_res(newres); + unlock_res_and_lock(lock); /* ...and the flowers are still standing! */ lu_ref_del(&oldres->lr_reference, "lock", lock); @@ -2760,11 +2760,15 @@ void _ldlm_lock_debug(struct ldlm_lock *lock, struct va_format vaf; char *nid = "local"; - rcu_read_lock(); - resource = rcu_dereference(lock->l_resource); - if (resource && !atomic_inc_not_zero(&resource->lr_refcount)) - resource = NULL; - rcu_read_unlock(); + /* on server-side resource of lock doesn't change */ + if ((lock->l_flags & LDLM_FL_NS_SRV) != 0) { + if (lock->l_resource != NULL) + resource = ldlm_resource_getref(lock->l_resource); + } else if (spin_trylock(&lock->l_lock)) { + if (lock->l_resource != NULL) + resource = ldlm_resource_getref(lock->l_resource); + spin_unlock(&lock->l_lock); + } va_start(args, fmt); vaf.fmt = fmt; diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index f7a320f..5f331cc 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -40,7 +40,6 @@ #include #include #include -#include #include #include #include @@ -3373,30 +3372,11 @@ static int ldlm_cleanup(void) RETURN(0); } -void ldlm_resource_init_once(void *p) -{ - /* - * It is import to initialise the spinlock only once, - * as ldlm_lock_change_resource() could try to lock - * the resource *after* it has been freed and possibly - * reused. SLAB_TYPESAFE_BY_RCU ensures the memory won't - * be freed while the lock is being taken, but we need to - * ensure that it doesn't get reinitialized either. - */ - struct ldlm_resource *res = p; - - memset(res, 0, sizeof(*res)); - mutex_init(&res->lr_lvb_mutex); - spin_lock_init(&res->lr_lock); -} - int ldlm_init(void) { ldlm_resource_slab = kmem_cache_create("ldlm_resources", sizeof(struct ldlm_resource), 0, - SLAB_TYPESAFE_BY_RCU | - SLAB_HWCACHE_ALIGN, - ldlm_resource_init_once); + SLAB_HWCACHE_ALIGN, NULL); if (ldlm_resource_slab == NULL) return -ENOMEM; @@ -3456,13 +3436,6 @@ void ldlm_exit(void) { if (ldlm_refcount) CERROR("ldlm_refcount is %d in ldlm_exit!\n", ldlm_refcount); - /* These two lines should not be needed, but appear to fix - * a crash on RHEL7. The slab_cache sometimes gets freed before the - * last slab is rcu_freed, and that can cause kmem_freepages() - * to free too many pages and trip a BUG - */ - kmem_cache_shrink(ldlm_resource_slab); - synchronize_rcu(); kmem_cache_destroy(ldlm_resource_slab); /* * ldlm_lock_put() use RCU to call ldlm_lock_free, so need call diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index 6691acd..2976e4b 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -1367,7 +1367,7 @@ static struct ldlm_resource *ldlm_resource_new(enum ldlm_type ldlm_type) struct ldlm_resource *res; bool rc; - res = kmem_cache_alloc(ldlm_resource_slab, GFP_NOFS); + OBD_SLAB_ALLOC_PTR_GFP(res, ldlm_resource_slab, GFP_NOFS); if (res == NULL) return NULL; @@ -1383,21 +1383,20 @@ static struct ldlm_resource *ldlm_resource_new(enum ldlm_type ldlm_type) break; } if (!rc) { - kmem_cache_free(ldlm_resource_slab, res); + OBD_SLAB_FREE_PTR(res, ldlm_resource_slab); return NULL; } INIT_LIST_HEAD(&res->lr_granted); INIT_LIST_HEAD(&res->lr_waiting); - res->lr_lvb_data = NULL; - res->lr_lvb_inode = NULL; - res->lr_lvb_len = 0; atomic_set(&res->lr_refcount, 1); + spin_lock_init(&res->lr_lock); lu_ref_init(&res->lr_reference); /* Since LVB init can be delayed now, there is no longer need to * immediatelly acquire mutex here. */ + mutex_init(&res->lr_lvb_mutex); res->lr_lvb_initialized = false; return res; @@ -1414,7 +1413,7 @@ static void ldlm_resource_free(struct ldlm_resource *res) OBD_FREE_PTR(res->lr_ibits_queues); } - kmem_cache_free(ldlm_resource_slab, res); + OBD_SLAB_FREE(res, ldlm_resource_slab, sizeof *res); } /** -- 1.8.3.1