From 08d2f5771f7df123c85655e8186ddf657259590e Mon Sep 17 00:00:00 2001
From: Johann Lombardi <johann.lombardi@intel.com>
Date: Tue, 11 Dec 2012 10:47:59 -0500
Subject: [PATCH] LU-1520 ldlm: Revert "improve ldlm_pools_shrink algorithm"

This reverts commit c861cc7e0b6f7e82fd55b9658dd29578f97b5607
The patch should land on master first.

Change-Id: I9b3739defed6bf315646f8a107d3218414a14d25
Reviewed-on: http://review.whamcloud.com/4799
Reviewed-by: Johann Lombardi <johann.lombardi@intel.com>
Tested-by: Johann Lombardi <johann.lombardi@intel.com>
---
 lustre/include/lustre_dlm.h |   1 -
 lustre/ldlm/ldlm_internal.h |   5 --
 lustre/ldlm/ldlm_lock.c     |   2 -
 lustre/ldlm/ldlm_lockd.c    |   1 -
 lustre/ldlm/ldlm_pool.c     | 213 +++++++++++++++-----------------------------
 lustre/ldlm/ldlm_resource.c |   5 --
 6 files changed, 74 insertions(+), 153 deletions(-)

diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h
index 187b965..dd3d64a 100644
--- a/lustre/include/lustre_dlm.h
+++ b/lustre/include/lustre_dlm.h
@@ -396,7 +396,6 @@ struct ldlm_namespace {
         __u32                  ns_refcount; /* count of resources in the hash */
         struct list_head       ns_root_list; /* all root resources in ns */
         struct list_head       ns_list_chain; /* position in global NS list */
-        struct list_head       ns_shrink_chain; /* list of shrinking ns-s */
 
         struct list_head       ns_unused_list; /* all root resources in ns */
         int                    ns_nr_unused;
diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h
index f7b087e..015d09a 100644
--- a/lustre/ldlm/ldlm_internal.h
+++ b/lustre/ldlm/ldlm_internal.h
@@ -42,11 +42,6 @@ extern struct semaphore ldlm_srv_namespace_lock;
 extern struct list_head ldlm_srv_namespace_list;
 extern struct semaphore ldlm_cli_namespace_lock;
 extern struct list_head ldlm_cli_namespace_list;
-extern atomic_t ldlm_cli_all_ns_unused;
-extern atomic_t ldlm_srv_all_pl_granted;
-#define LDLM_POOL_SHRINK_BATCH_SIZE 64
-#define LDLM_POOL_SHRINK_THREAD_LIMIT 32
-extern struct semaphore ldlm_pool_shrink_lock;
 
 static inline atomic_t *ldlm_namespace_nr(ldlm_side_t client)
 {
diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c
index c87c121..604167e 100644
--- a/lustre/ldlm/ldlm_lock.c
+++ b/lustre/ldlm/ldlm_lock.c
@@ -188,7 +188,6 @@ int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
                         lock->l_flags &= ~LDLM_FL_SKIPPED;
                 LASSERT(ns->ns_nr_unused > 0);
                 ns->ns_nr_unused--;
-                atomic_dec(&ldlm_cli_all_ns_unused);
                 rc = 1;
         }
         return rc;
@@ -215,7 +214,6 @@ void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock)
         list_add_tail(&lock->l_lru, &ns->ns_unused_list);
         LASSERT(ns->ns_nr_unused >= 0);
         ns->ns_nr_unused++;
-        atomic_inc(&ldlm_cli_all_ns_unused);
 }
 
 void ldlm_lock_add_to_lru(struct ldlm_lock *lock)
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c
index aab9cfc..8ae069d 100644
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -2366,7 +2366,6 @@ int __init ldlm_init(void)
         init_mutex(&ldlm_ref_sem);
         init_mutex(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER));
         init_mutex(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
-        sema_init(&ldlm_pool_shrink_lock, LDLM_POOL_SHRINK_THREAD_LIMIT);
         ldlm_resource_slab = cfs_mem_cache_create("ldlm_resources",
                                                sizeof(struct ldlm_resource), 0,
                                                SLAB_HWCACHE_ALIGN);
diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c
index d50c050..20678aa 100644
--- a/lustre/ldlm/ldlm_pool.c
+++ b/lustre/ldlm/ldlm_pool.c
@@ -915,7 +915,6 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock)
         ENTRY;
 
         atomic_inc(&pl->pl_granted);
-        atomic_inc(&ldlm_srv_all_pl_granted);
         atomic_inc(&pl->pl_grant_rate);
         atomic_inc(&pl->pl_grant_speed);
 
@@ -947,7 +946,6 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock)
 
         LASSERT(atomic_read(&pl->pl_granted) > 0);
         atomic_dec(&pl->pl_granted);
-        atomic_dec(&ldlm_srv_all_pl_granted);
         atomic_inc(&pl->pl_cancel_rate);
         atomic_dec(&pl->pl_grant_speed);
 
@@ -1054,32 +1052,6 @@ static struct shrinker *ldlm_pools_cli_shrinker;
 static struct completion ldlm_pools_comp;
 
 /*
- * Cancel locks from namespaces on \a ns_list. Number of locks to cancel of
- * each namespace is proportional to number of locks on the namespace. Return
- * number of remained cached locks on the ns_list
- */
-static int ldlm_ns_batch_shrink(ldlm_side_t client, unsigned int gfp_mask,
-				struct list_head *ns_list,
-				int total, int nr_to_shrink)
-{
-	int nr_locks, cached = 0;
-	struct ldlm_namespace *ns;
-
-	list_for_each_entry(ns, ns_list, ns_shrink_chain) {
-		struct ldlm_pool *ns_pool;
-
-		ns_pool = &ns->ns_pool;
-		nr_locks = ldlm_pool_granted(ns_pool);
-		ldlm_pool_shrink(ns_pool,
-				 1 + nr_locks * nr_to_shrink / total,
-				 gfp_mask);
-		cached += ldlm_pool_granted(ns_pool);
-        }
-
-        return cached;
-}
-
-/*
  * Cancel \a nr locks from all namespaces (if possible). Returns number of
  * cached locks after shrink is finished. All namespaces are asked to
  * cancel approximately equal amount of locks to keep balancing.
@@ -1087,10 +1059,8 @@ static int ldlm_ns_batch_shrink(ldlm_side_t client, unsigned int gfp_mask,
 static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                              unsigned int gfp_mask)
 {
-	int total = 0, cached = 0, nr_ns;
-	struct ldlm_namespace *ns;
-	int i;
-	CFS_LIST_HEAD(ns_list);
+        int total = 0, cached = 0, nr_ns;
+        struct ldlm_namespace *ns;
 
         if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
             !(gfp_mask & __GFP_FS))
@@ -1103,74 +1073,61 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
         /*
          * Find out how many resources we may release.
          */
-	if (client == LDLM_NAMESPACE_CLIENT) {
-		total = atomic_read(&ldlm_cli_all_ns_unused);
-		total = total / 100 * sysctl_vfs_cache_pressure;
-	} else {
-		total = atomic_read(&ldlm_srv_all_pl_granted);
-	}
+        for (nr_ns = atomic_read(ldlm_namespace_nr(client));
+             nr_ns > 0; nr_ns--)
+        {
+                mutex_down(ldlm_namespace_lock(client));
+                if (list_empty(ldlm_namespace_list(client))) {
+                        mutex_up(ldlm_namespace_lock(client));
+                        return 0;
+                }
+                ns = ldlm_namespace_first_locked(client);
+                ldlm_namespace_get(ns);
+                ldlm_namespace_move_locked(ns, client);
+                mutex_up(ldlm_namespace_lock(client));
+                total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
+                ldlm_namespace_put(ns, 1);
+        }
 
         if (nr == 0 || total == 0)
                 return total;
 
-	down(&ldlm_pool_shrink_lock);
-	mutex_down(ldlm_namespace_lock(client));
-
-	/*
-	 * If list is empty, we can't return any @cached > 0,
-	 * that probably would cause needless shrinker call.
-	 */
-	if (list_empty(ldlm_namespace_list(client))) {
-		cached = 0;
-		goto out_unlock;
-	}
-
         /*
          * Shrink at least ldlm_namespace_nr(client) namespaces.
          */
-	for (i = 0, nr_ns = atomic_read(ldlm_namespace_nr(client));
-	     nr_ns > 0; nr_ns--) {
-		ns = ldlm_namespace_first_locked(client);
-		spin_lock(&ns->ns_hash_lock);
-		if (list_empty(&ns->ns_shrink_chain) && ns->ns_refcount > 0) {
-			ldlm_namespace_get_locked(ns);
-			list_add_tail(&ns->ns_shrink_chain, &ns_list);
-			i++;
-		}
-		spin_unlock(&ns->ns_hash_lock);
-		ldlm_namespace_move_locked(ns, client);
-		if ((i == LDLM_POOL_SHRINK_BATCH_SIZE || nr_ns == 1) &&
-		    !list_empty(&ns_list)) {
-			struct ldlm_namespace *tmp;
-
-			mutex_up(ldlm_namespace_lock(client));
-			cached += ldlm_ns_batch_shrink(client, gfp_mask,
-						       &ns_list, total, nr);
-			mutex_down(ldlm_namespace_lock(client));
-
-			list_for_each_entry_safe(ns, tmp, &ns_list,
-						 ns_shrink_chain) {
-				spin_lock(&ns->ns_hash_lock);
-				list_del_init(&ns->ns_shrink_chain);
-				ldlm_namespace_put_locked(ns, 0);
-				spin_unlock(&ns->ns_hash_lock);
-			}
-			i = 0;
-
-			if (list_empty(ldlm_namespace_list(client))) {
-				cached = 0;
-				goto out_unlock;
-			}
-		}
-	}
-
-out_unlock:
-	mutex_up(ldlm_namespace_lock(client));
-	up(&ldlm_pool_shrink_lock);
+        for (nr_ns = atomic_read(ldlm_namespace_nr(client));
+             nr_ns > 0; nr_ns--)
+        {
+                int cancel, nr_locks;
+
+                /*
+                 * Do not call shrink under ldlm_namespace_lock(client)
+                 */
+                mutex_down(ldlm_namespace_lock(client));
+                if (list_empty(ldlm_namespace_list(client))) {
+                        mutex_up(ldlm_namespace_lock(client));
+                        /*
+                         * If list is empty, we can't return any @cached > 0,
+                         * that probably would cause needless shrinker
+                         * call.
+                         */
+                        cached = 0;
+                        break;
+                }
+                ns = ldlm_namespace_first_locked(client);
+                ldlm_namespace_get(ns);
+                ldlm_namespace_move_locked(ns, client);
+                mutex_up(ldlm_namespace_lock(client));
 
+                nr_locks = ldlm_pool_granted(&ns->ns_pool);
+                cancel = 1 + nr_locks * nr / total;
+                ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
+                cached += ldlm_pool_granted(&ns->ns_pool);
+                ldlm_namespace_put(ns, 1);
+        }
         /* we only decrease the SLV in server pools shrinker, return -1 to
          * kernel to avoid needless loop. LU-1128 */
-	return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached;
+        return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached;
 }
 
 static int ldlm_pools_srv_shrink(SHRINKER_FIRST_ARG int nr_to_scan,
@@ -1190,8 +1147,6 @@ void ldlm_pools_recalc(ldlm_side_t client)
         __u32 nr_l = 0, nr_p = 0, l;
         struct ldlm_namespace *ns;
         int nr, equal = 0;
-        CFS_LIST_HEAD(ns_list);
-        int i;
 
         /*
          * No need to setup pool limit for client pools.
@@ -1264,53 +1219,33 @@ void ldlm_pools_recalc(ldlm_side_t client)
                 mutex_up(ldlm_namespace_lock(client));
         }
 
-	/*
-	 * Recalc at least ldlm_namespace_nr(client) namespaces.
-	 */
-	mutex_down(ldlm_namespace_lock(client));
-	if (list_empty(ldlm_namespace_list(client))) {
-		mutex_up(ldlm_namespace_lock(client));
-		return;
-	}
-
-	for (i = 0, nr = atomic_read(ldlm_namespace_nr(client));
-	     nr > 0; nr--) {
-		ns = ldlm_namespace_first_locked(client);
-		spin_lock(&ns->ns_hash_lock);
-		if (list_empty(&ns->ns_shrink_chain) && ns->ns_refcount > 0) {
-			ldlm_namespace_get_locked(ns);
-			list_add_tail(&ns->ns_shrink_chain, &ns_list);
-			i++;
-		}
-		spin_unlock(&ns->ns_hash_lock);
-		ldlm_namespace_move_locked(ns, client);
-		if ((i == LDLM_POOL_SHRINK_BATCH_SIZE || nr == 1) &&
-		    !list_empty(&ns_list)) {
-			struct ldlm_namespace *tmp;
-
-			mutex_up(ldlm_namespace_lock(client));
-
-			/*
-			 * Recalc pools on the list
-			 */
-			list_for_each_entry_safe(ns, tmp, &ns_list,
-						 ns_shrink_chain) {
-				ldlm_pool_recalc(&ns->ns_pool);
-
-				spin_lock(&ns->ns_hash_lock);
-				list_del_init(&ns->ns_shrink_chain);
-				ldlm_namespace_put_locked(ns, 1);
-				spin_unlock(&ns->ns_hash_lock);
-			}
-			mutex_down(ldlm_namespace_lock(client));
-
-			i = 0;
-			if (list_empty(ldlm_namespace_list(client)))
-				break;
-		}
-	}
-	mutex_up(ldlm_namespace_lock(client));
-	return;
+        /*
+         * Recalc at least ldlm_namespace_nr(client) namespaces.
+         */
+        for (nr = atomic_read(ldlm_namespace_nr(client)); nr > 0; nr--) {
+                /*
+                 * Lock the list, get first @ns in the list, getref, move it
+                 * to the tail, unlock and call pool recalc. This way we avoid
+                 * calling recalc under @ns lock what is really good as we get
+                 * rid of potential deadlock on client nodes when canceling
+                 * locks synchronously.
+                 */
+                mutex_down(ldlm_namespace_lock(client));
+                if (list_empty(ldlm_namespace_list(client))) {
+                        mutex_up(ldlm_namespace_lock(client));
+                        break;
+                }
+                ns = ldlm_namespace_first_locked(client);
+                ldlm_namespace_get(ns);
+                ldlm_namespace_move_locked(ns, client);
+                mutex_up(ldlm_namespace_lock(client));
+
+                /*
+                 * After setup is done - recalc the pool.
+                 */
+                ldlm_pool_recalc(&ns->ns_pool);
+                ldlm_namespace_put(ns, 1);
+        }
 }
 EXPORT_SYMBOL(ldlm_pools_recalc);
 
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c
index 5b0d8d6..62a285e 100644
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -61,10 +61,6 @@ struct list_head ldlm_srv_namespace_list =
 struct semaphore ldlm_cli_namespace_lock;
 struct list_head ldlm_cli_namespace_list =
         CFS_LIST_HEAD_INIT(ldlm_cli_namespace_list);
-atomic_t ldlm_cli_all_ns_unused = ATOMIC_INIT(0);
-atomic_t ldlm_srv_all_pl_granted = ATOMIC_INIT(0);
-
-struct semaphore ldlm_pool_shrink_lock;
 
 cfs_proc_dir_entry_t *ldlm_type_proc_dir = NULL;
 cfs_proc_dir_entry_t *ldlm_ns_proc_dir = NULL;
@@ -356,7 +352,6 @@ ldlm_namespace_new(struct obd_device *obd, char *name,
 
         CFS_INIT_LIST_HEAD(&ns->ns_root_list);
         CFS_INIT_LIST_HEAD(&ns->ns_list_chain);
-        CFS_INIT_LIST_HEAD(&ns->ns_shrink_chain);
         ns->ns_refcount = 0;
         ns->ns_client = client;
         spin_lock_init(&ns->ns_hash_lock);
-- 
1.8.3.1