LU-1520 ldlm: improve ldlm_pools_shrink algorithm

author Hongchao Zhang <hongchao.zhang@whamcloud.com>

Tue, 4 Sep 2012 15:27:09 +0000 (23:27 +0800)

committer Johann Lombardi <johann.lombardi@intel.com>

Tue, 11 Dec 2012 15:02:48 +0000 (10:02 -0500)
author Hongchao Zhang <hongchao.zhang@whamcloud.com>
Tue, 4 Sep 2012 15:27:09 +0000 (23:27 +0800)
committer Johann Lombardi <johann.lombardi@intel.com>
Tue, 11 Dec 2012 15:02:48 +0000 (10:02 -0500)
diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h

index dd3d64a..187b965 100644 (file)
--- a/lustre/include/lustre_dlm.h
+++ b/lustre/include/lustre_dlm.h
@@ -396,6 +396,7 @@ struct ldlm_namespace {
          __u32                  ns_refcount; /* count of resources in the hash */
          struct list_head       ns_root_list; /* all root resources in ns */
          struct list_head       ns_list_chain; /* position in global NS list */
+        struct list_head       ns_shrink_chain; /* list of shrinking ns-s */
  
          struct list_head       ns_unused_list; /* all root resources in ns */
          int                    ns_nr_unused;
diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h

index 015d09a..f7b087e 100644 (file)
--- a/lustre/ldlm/ldlm_internal.h
+++ b/lustre/ldlm/ldlm_internal.h
@@ -42,6 +42,11 @@ extern struct semaphore ldlm_srv_namespace_lock;
  extern struct list_head ldlm_srv_namespace_list;
  extern struct semaphore ldlm_cli_namespace_lock;
  extern struct list_head ldlm_cli_namespace_list;
+extern atomic_t ldlm_cli_all_ns_unused;
+extern atomic_t ldlm_srv_all_pl_granted;
+#define LDLM_POOL_SHRINK_BATCH_SIZE 64
+#define LDLM_POOL_SHRINK_THREAD_LIMIT 32
+extern struct semaphore ldlm_pool_shrink_lock;
  
  static inline atomic_t *ldlm_namespace_nr(ldlm_side_t client)
  {
diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c

index 604167e..c87c121 100644 (file)
--- a/lustre/ldlm/ldlm_lock.c
+++ b/lustre/ldlm/ldlm_lock.c
@@ -188,6 +188,7 @@ int ldlm_lock_remove_from_lru_nolock(struct ldlm_lock *lock)
                          lock->l_flags &= ~LDLM_FL_SKIPPED;
                  LASSERT(ns->ns_nr_unused > 0);
                  ns->ns_nr_unused--;
+                atomic_dec(&ldlm_cli_all_ns_unused);
                  rc = 1;
          }
          return rc;
@@ -214,6 +215,7 @@ void ldlm_lock_add_to_lru_nolock(struct ldlm_lock *lock)
          list_add_tail(&lock->l_lru, &ns->ns_unused_list);
          LASSERT(ns->ns_nr_unused >= 0);
          ns->ns_nr_unused++;
+        atomic_inc(&ldlm_cli_all_ns_unused);
  }
  
  void ldlm_lock_add_to_lru(struct ldlm_lock *lock)
diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c

index 8ae069d..aab9cfc 100644 (file)
--- a/lustre/ldlm/ldlm_lockd.c
+++ b/lustre/ldlm/ldlm_lockd.c
@@ -2366,6 +2366,7 @@ int __init ldlm_init(void)
          init_mutex(&ldlm_ref_sem);
          init_mutex(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER));
          init_mutex(ldlm_namespace_lock(LDLM_NAMESPACE_CLIENT));
+        sema_init(&ldlm_pool_shrink_lock, LDLM_POOL_SHRINK_THREAD_LIMIT);
          ldlm_resource_slab = cfs_mem_cache_create("ldlm_resources",
                                                 sizeof(struct ldlm_resource), 0,
                                                 SLAB_HWCACHE_ALIGN);
diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c

index 20678aa..d50c050 100644 (file)
--- a/lustre/ldlm/ldlm_pool.c
+++ b/lustre/ldlm/ldlm_pool.c
@@ -915,6 +915,7 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock)
          ENTRY;
  
          atomic_inc(&pl->pl_granted);
+        atomic_inc(&ldlm_srv_all_pl_granted);
          atomic_inc(&pl->pl_grant_rate);
          atomic_inc(&pl->pl_grant_speed);
  
@@ -946,6 +947,7 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock)
  
          LASSERT(atomic_read(&pl->pl_granted) > 0);
          atomic_dec(&pl->pl_granted);
+        atomic_dec(&ldlm_srv_all_pl_granted);
          atomic_inc(&pl->pl_cancel_rate);
          atomic_dec(&pl->pl_grant_speed);
  
@@ -1052,6 +1054,32 @@ static struct shrinker *ldlm_pools_cli_shrinker;
  static struct completion ldlm_pools_comp;
  
  /*
+ * Cancel locks from namespaces on \a ns_list. Number of locks to cancel of
+ * each namespace is proportional to number of locks on the namespace. Return
+ * number of remained cached locks on the ns_list
+ */
+static int ldlm_ns_batch_shrink(ldlm_side_t client, unsigned int gfp_mask,
+                               struct list_head *ns_list,
+                               int total, int nr_to_shrink)
+{
+       int nr_locks, cached = 0;
+       struct ldlm_namespace *ns;
+
+       list_for_each_entry(ns, ns_list, ns_shrink_chain) {
+               struct ldlm_pool *ns_pool;
+
+               ns_pool = &ns->ns_pool;
+               nr_locks = ldlm_pool_granted(ns_pool);
+               ldlm_pool_shrink(ns_pool,
+                                1 + nr_locks * nr_to_shrink / total,
+                                gfp_mask);
+               cached += ldlm_pool_granted(ns_pool);
+        }
+
+        return cached;
+}
+
+/*
   * Cancel \a nr locks from all namespaces (if possible). Returns number of
   * cached locks after shrink is finished. All namespaces are asked to
   * cancel approximately equal amount of locks to keep balancing.
@@ -1059,8 +1087,10 @@ static struct completion ldlm_pools_comp;
  static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                               unsigned int gfp_mask)
  {
-        int total = 0, cached = 0, nr_ns;
-        struct ldlm_namespace *ns;
+       int total = 0, cached = 0, nr_ns;
+       struct ldlm_namespace *ns;
+       int i;
+       CFS_LIST_HEAD(ns_list);
  
          if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
              !(gfp_mask & __GFP_FS))
@@ -1073,61 +1103,74 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
          /*
           * Find out how many resources we may release.
           */
-        for (nr_ns = atomic_read(ldlm_namespace_nr(client));
-             nr_ns > 0; nr_ns--)
-        {
-                mutex_down(ldlm_namespace_lock(client));
-                if (list_empty(ldlm_namespace_list(client))) {
-                        mutex_up(ldlm_namespace_lock(client));
-                        return 0;
-                }
-                ns = ldlm_namespace_first_locked(client);
-                ldlm_namespace_get(ns);
-                ldlm_namespace_move_locked(ns, client);
-                mutex_up(ldlm_namespace_lock(client));
-                total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
-                ldlm_namespace_put(ns, 1);
-        }
+       if (client == LDLM_NAMESPACE_CLIENT) {
+               total = atomic_read(&ldlm_cli_all_ns_unused);
+               total = total / 100 * sysctl_vfs_cache_pressure;
+       } else {
+               total = atomic_read(&ldlm_srv_all_pl_granted);
+       }
  
          if (nr == 0 || total == 0)
                  return total;
  
+       down(&ldlm_pool_shrink_lock);
+       mutex_down(ldlm_namespace_lock(client));
+
+       /*
+        * If list is empty, we can't return any @cached > 0,
+        * that probably would cause needless shrinker call.
+        */
+       if (list_empty(ldlm_namespace_list(client))) {
+               cached = 0;
+               goto out_unlock;
+       }
+
          /*
           * Shrink at least ldlm_namespace_nr(client) namespaces.
           */
-        for (nr_ns = atomic_read(ldlm_namespace_nr(client));
-             nr_ns > 0; nr_ns--)
-        {
-                int cancel, nr_locks;
-
-                /*
-                 * Do not call shrink under ldlm_namespace_lock(client)
-                 */
-                mutex_down(ldlm_namespace_lock(client));
-                if (list_empty(ldlm_namespace_list(client))) {
-                        mutex_up(ldlm_namespace_lock(client));
-                        /*
-                         * If list is empty, we can't return any @cached > 0,
-                         * that probably would cause needless shrinker
-                         * call.
-                         */
-                        cached = 0;
-                        break;
-                }
-                ns = ldlm_namespace_first_locked(client);
-                ldlm_namespace_get(ns);
-                ldlm_namespace_move_locked(ns, client);
-                mutex_up(ldlm_namespace_lock(client));
+       for (i = 0, nr_ns = atomic_read(ldlm_namespace_nr(client));
+            nr_ns > 0; nr_ns--) {
+               ns = ldlm_namespace_first_locked(client);
+               spin_lock(&ns->ns_hash_lock);
+               if (list_empty(&ns->ns_shrink_chain) && ns->ns_refcount > 0) {
+                       ldlm_namespace_get_locked(ns);
+                       list_add_tail(&ns->ns_shrink_chain, &ns_list);
+                       i++;
+               }
+               spin_unlock(&ns->ns_hash_lock);
+               ldlm_namespace_move_locked(ns, client);
+               if ((i == LDLM_POOL_SHRINK_BATCH_SIZE || nr_ns == 1) &&
+                   !list_empty(&ns_list)) {
+                       struct ldlm_namespace *tmp;
+
+                       mutex_up(ldlm_namespace_lock(client));
+                       cached += ldlm_ns_batch_shrink(client, gfp_mask,
+                                                      &ns_list, total, nr);
+                       mutex_down(ldlm_namespace_lock(client));
+
+                       list_for_each_entry_safe(ns, tmp, &ns_list,
+                                                ns_shrink_chain) {
+                               spin_lock(&ns->ns_hash_lock);
+                               list_del_init(&ns->ns_shrink_chain);
+                               ldlm_namespace_put_locked(ns, 0);
+                               spin_unlock(&ns->ns_hash_lock);
+                       }
+                       i = 0;
+
+                       if (list_empty(ldlm_namespace_list(client))) {
+                               cached = 0;
+                               goto out_unlock;
+                       }
+               }
+       }
+
+out_unlock:
+       mutex_up(ldlm_namespace_lock(client));
+       up(&ldlm_pool_shrink_lock);
  
-                nr_locks = ldlm_pool_granted(&ns->ns_pool);
-                cancel = 1 + nr_locks * nr / total;
-                ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
-                cached += ldlm_pool_granted(&ns->ns_pool);
-                ldlm_namespace_put(ns, 1);
-        }
          /* we only decrease the SLV in server pools shrinker, return -1 to
           * kernel to avoid needless loop. LU-1128 */
-        return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached;
+       return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached;
  }
  
  static int ldlm_pools_srv_shrink(SHRINKER_FIRST_ARG int nr_to_scan,
@@ -1147,6 +1190,8 @@ void ldlm_pools_recalc(ldlm_side_t client)
          __u32 nr_l = 0, nr_p = 0, l;
          struct ldlm_namespace *ns;
          int nr, equal = 0;
+        CFS_LIST_HEAD(ns_list);
+        int i;
  
          /*
           * No need to setup pool limit for client pools.
@@ -1219,33 +1264,53 @@ void ldlm_pools_recalc(ldlm_side_t client)
                  mutex_up(ldlm_namespace_lock(client));
          }
  
-        /*
-         * Recalc at least ldlm_namespace_nr(client) namespaces.
-         */
-        for (nr = atomic_read(ldlm_namespace_nr(client)); nr > 0; nr--) {
-                /*
-                 * Lock the list, get first @ns in the list, getref, move it
-                 * to the tail, unlock and call pool recalc. This way we avoid
-                 * calling recalc under @ns lock what is really good as we get
-                 * rid of potential deadlock on client nodes when canceling
-                 * locks synchronously.
-                 */
-                mutex_down(ldlm_namespace_lock(client));
-                if (list_empty(ldlm_namespace_list(client))) {
-                        mutex_up(ldlm_namespace_lock(client));
-                        break;
-                }
-                ns = ldlm_namespace_first_locked(client);
-                ldlm_namespace_get(ns);
-                ldlm_namespace_move_locked(ns, client);
-                mutex_up(ldlm_namespace_lock(client));
-
-                /*
-                 * After setup is done - recalc the pool.
-                 */
-                ldlm_pool_recalc(&ns->ns_pool);
-                ldlm_namespace_put(ns, 1);
-        }
+       /*
+        * Recalc at least ldlm_namespace_nr(client) namespaces.
+        */
+       mutex_down(ldlm_namespace_lock(client));
+       if (list_empty(ldlm_namespace_list(client))) {
+               mutex_up(ldlm_namespace_lock(client));
+               return;
+       }
+
+       for (i = 0, nr = atomic_read(ldlm_namespace_nr(client));
+            nr > 0; nr--) {
+               ns = ldlm_namespace_first_locked(client);
+               spin_lock(&ns->ns_hash_lock);
+               if (list_empty(&ns->ns_shrink_chain) && ns->ns_refcount > 0) {
+                       ldlm_namespace_get_locked(ns);
+                       list_add_tail(&ns->ns_shrink_chain, &ns_list);
+                       i++;
+               }
+               spin_unlock(&ns->ns_hash_lock);
+               ldlm_namespace_move_locked(ns, client);
+               if ((i == LDLM_POOL_SHRINK_BATCH_SIZE || nr == 1) &&
+                   !list_empty(&ns_list)) {
+                       struct ldlm_namespace *tmp;
+
+                       mutex_up(ldlm_namespace_lock(client));
+
+                       /*
+                        * Recalc pools on the list
+                        */
+                       list_for_each_entry_safe(ns, tmp, &ns_list,
+                                                ns_shrink_chain) {
+                               ldlm_pool_recalc(&ns->ns_pool);
+
+                               spin_lock(&ns->ns_hash_lock);
+                               list_del_init(&ns->ns_shrink_chain);
+                               ldlm_namespace_put_locked(ns, 1);
+                               spin_unlock(&ns->ns_hash_lock);
+                       }
+                       mutex_down(ldlm_namespace_lock(client));
+
+                       i = 0;
+                       if (list_empty(ldlm_namespace_list(client)))
+                               break;
+               }
+       }
+       mutex_up(ldlm_namespace_lock(client));
+       return;
  }
  EXPORT_SYMBOL(ldlm_pools_recalc);
  
diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c

index 62a285e..5b0d8d6 100644 (file)
--- a/lustre/ldlm/ldlm_resource.c
+++ b/lustre/ldlm/ldlm_resource.c
@@ -61,6 +61,10 @@ struct list_head ldlm_srv_namespace_list =
  struct semaphore ldlm_cli_namespace_lock;
  struct list_head ldlm_cli_namespace_list =
          CFS_LIST_HEAD_INIT(ldlm_cli_namespace_list);
+atomic_t ldlm_cli_all_ns_unused = ATOMIC_INIT(0);
+atomic_t ldlm_srv_all_pl_granted = ATOMIC_INIT(0);
+
+struct semaphore ldlm_pool_shrink_lock;
  
  cfs_proc_dir_entry_t *ldlm_type_proc_dir = NULL;
  cfs_proc_dir_entry_t *ldlm_ns_proc_dir = NULL;
@@ -352,6 +356,7 @@ ldlm_namespace_new(struct obd_device *obd, char *name,
  
          CFS_INIT_LIST_HEAD(&ns->ns_root_list);
          CFS_INIT_LIST_HEAD(&ns->ns_list_chain);
+        CFS_INIT_LIST_HEAD(&ns->ns_shrink_chain);
          ns->ns_refcount = 0;
          ns->ns_client = client;
          spin_lock_init(&ns->ns_hash_lock);
author	Hongchao Zhang <hongchao.zhang@whamcloud.com>
	Tue, 4 Sep 2012 15:27:09 +0000 (23:27 +0800)
committer	Johann Lombardi <johann.lombardi@intel.com>
	Tue, 11 Dec 2012 15:02:48 +0000 (10:02 -0500)
lustre/include/lustre_dlm.h		patch \| blob \| history
lustre/ldlm/ldlm_internal.h		patch \| blob \| history
lustre/ldlm/ldlm_lock.c		patch \| blob \| history
lustre/ldlm/ldlm_lockd.c		patch \| blob \| history
lustre/ldlm/ldlm_pool.c		patch \| blob \| history
lustre/ldlm/ldlm_resource.c		patch \| blob \| history