LU-5415 ldlm: high load because of negative timeout

[fs/lustre-release.git] / lustre / ldlm / ldlm_pool.c
diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c

index 6cf50f2..522f947 100644 (file)
--- a/lustre/ldlm/ldlm_pool.c
+++ b/lustre/ldlm/ldlm_pool.c
@@ -480,6 +480,7 @@ static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl)
  static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
  {
          time_t recalc_interval_sec;
+       int ret;
          ENTRY;
  
          recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
@@ -500,17 +501,13 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
           * Make sure that pool knows last SLV and Limit from obd.
           */
          ldlm_cli_pool_pop_slv(pl);
-
-        pl->pl_recalc_time = cfs_time_current_sec();
-        lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
-                            recalc_interval_sec);
         spin_unlock(&pl->pl_lock);
  
          /*
           * Do not cancel locks in case lru resize is disabled for this ns.
           */
          if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
-                RETURN(0);
+               GOTO(out, ret = 0);
  
          /*
           * In the time of canceling locks on client we do not need to maintain
@@ -518,8 +515,20 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
           * It may be called when SLV has changed much, this is why we do not
           * take into account pl->pl_recalc_time here.
           */
-       RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LCF_ASYNC,
-                              LDLM_CANCEL_LRUR));
+       ret = ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LCF_ASYNC,
+                              LDLM_CANCEL_LRUR);
+
+out:
+       spin_lock(&pl->pl_lock);
+       /*
+        * Time of LRU resizing might be longer than period,
+        * so update after LRU resizing rather than before it.
+        */
+       pl->pl_recalc_time = cfs_time_current_sec();
+       lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
+                           recalc_interval_sec);
+       spin_unlock(&pl->pl_lock);
+       RETURN(ret);
  }
  
  /**
@@ -531,7 +540,7 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
                                  int nr, unsigned int gfp_mask)
  {
          struct ldlm_namespace *ns;
-        int canceled = 0, unused;
+       int unused;
  
          ns = ldlm_pl2ns(pl);
  
@@ -550,17 +559,14 @@ static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
         unused = ns->ns_nr_unused;
         spin_unlock(&ns->ns_lock);
  
-        if (nr) {
-               canceled = ldlm_cancel_lru(ns, nr, LCF_ASYNC,
-                                          LDLM_CANCEL_SHRINK);
-        }
  #ifdef __KERNEL__
-        /*
-         * Return the number of potentially reclaimable locks.
-         */
-        return ((unused - canceled) / 100) * sysctl_vfs_cache_pressure;
+       if (nr == 0)
+               return (unused / 100) * sysctl_vfs_cache_pressure;
+       else
+               return ldlm_cancel_lru(ns, nr, LCF_ASYNC, LDLM_CANCEL_SHRINK);
  #else
-        return unused - canceled;
+       return unused - (nr ? ldlm_cancel_lru(ns, nr, LCF_ASYNC,
+                                             LDLM_CANCEL_SHRINK) : 0);
  #endif
  }
  
@@ -611,6 +617,14 @@ int ldlm_pool_recalc(struct ldlm_pool *pl)
          }
         recalc_interval_sec = pl->pl_recalc_time - cfs_time_current_sec() +
                               pl->pl_recalc_period;
+       if (recalc_interval_sec <= 0) {
+               /* Prevent too frequent recalculation. */
+               CDEBUG(D_DLMTRACE, "Negative interval(%ld), "
+                      "too short period(%ld)",
+                      recalc_interval_sec,
+                      pl->pl_recalc_period);
+               recalc_interval_sec = 1;
+       }
  
          return recalc_interval_sec;
  }
@@ -1045,41 +1059,36 @@ static struct shrinker *ldlm_pools_cli_shrinker;
  static struct completion ldlm_pools_comp;
  
  /*
- * Cancel \a nr locks from all namespaces (if possible). Returns number of
- * cached locks after shrink is finished. All namespaces are asked to
- * cancel approximately equal amount of locks to keep balancing.
- */
-static int ldlm_pools_shrink(ldlm_side_t client, int nr,
-                             unsigned int gfp_mask)
+* count locks from all namespaces (if possible). Returns number of
+* cached locks.
+*/
+static unsigned long ldlm_pools_count(ldlm_side_t client, unsigned int gfp_mask)
  {
-       unsigned int total = 0, cached = 0;
-       int nr_ns;
-        struct ldlm_namespace *ns;
+       int total = 0, nr_ns;
+       struct ldlm_namespace *ns;
         struct ldlm_namespace *ns_old = NULL; /* loop detection */
-        void *cookie;
+       void *cookie;
  
-        if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
-            !(gfp_mask & __GFP_FS))
-                return -1;
+       if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
+               return 0;
  
-        CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n",
-               nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
+       CDEBUG(D_DLMTRACE, "Request to count %s locks from all pools\n",
+              client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
  
-        cookie = cl_env_reenter();
+       cookie = cl_env_reenter();
  
-        /*
-         * Find out how many resources we may release.
-         */
+       /*
+        * Find out how many resources we may release.
+        */
         for (nr_ns = ldlm_namespace_nr_read(client);
-            nr_ns > 0; nr_ns--)
-        {
+            nr_ns > 0; nr_ns--) {
                 mutex_lock(ldlm_namespace_lock(client));
-                if (cfs_list_empty(ldlm_namespace_list(client))) {
+               if (list_empty(ldlm_namespace_list(client))) {
                         mutex_unlock(ldlm_namespace_lock(client));
-                        cl_env_reexit(cookie);
-                        return 0;
-                }
-                ns = ldlm_namespace_first_locked(client);
+                       cl_env_reexit(cookie);
+                       return 0;
+               }
+               ns = ldlm_namespace_first_locked(client);
  
                 if (ns == ns_old) {
                         mutex_unlock(ldlm_namespace_lock(client));
@@ -1095,57 +1104,117 @@ static int ldlm_pools_shrink(ldlm_side_t client, int nr,
                 if (ns_old == NULL)
                         ns_old = ns;
  
-                ldlm_namespace_get(ns);
-                ldlm_namespace_move_to_active_locked(ns, client);
+               ldlm_namespace_get(ns);
+               ldlm_namespace_move_to_active_locked(ns, client);
                 mutex_unlock(ldlm_namespace_lock(client));
-                total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
-                ldlm_namespace_put(ns);
-        }
+               total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
+               ldlm_namespace_put(ns);
+       }
  
-        if (nr == 0 || total == 0) {
-                cl_env_reexit(cookie);
-                return total;
-        }
+       cl_env_reexit(cookie);
+       return total;
+}
  
-        /*
-         * Shrink at least ldlm_namespace_nr(client) namespaces.
-         */
-       for (nr_ns = ldlm_namespace_nr_read(client) - nr_ns;
-            nr_ns > 0; nr_ns--)
-        {
-               __u64 cancel;
-               unsigned int nr_locks;
+static unsigned long ldlm_pools_scan(ldlm_side_t client, int nr,
+                                    unsigned int gfp_mask)
+{
+       unsigned long freed = 0;
+       int tmp, nr_ns;
+       struct ldlm_namespace *ns;
+       void *cookie;
  
-                /*
-                 * Do not call shrink under ldlm_namespace_lock(client)
-                 */
+       if (client == LDLM_NAMESPACE_CLIENT && !(gfp_mask & __GFP_FS))
+               return -1;
+
+       cookie = cl_env_reenter();
+
+       /*
+        * Shrink at least ldlm_namespace_nr_read(client) namespaces.
+        */
+       for (tmp = nr_ns = ldlm_namespace_nr_read(client);
+            tmp > 0; tmp--) {
+               int cancel, nr_locks;
+
+               /*
+                * Do not call shrink under ldlm_namespace_lock(client)
+               */
                 mutex_lock(ldlm_namespace_lock(client));
-                if (cfs_list_empty(ldlm_namespace_list(client))) {
+               if (list_empty(ldlm_namespace_list(client))) {
                         mutex_unlock(ldlm_namespace_lock(client));
-                        /*
-                         * If list is empty, we can't return any @cached > 0,
-                         * that probably would cause needless shrinker
-                         * call.
-                         */
-                        cached = 0;
-                        break;
-                }
-                ns = ldlm_namespace_first_locked(client);
-                ldlm_namespace_get(ns);
-                ldlm_namespace_move_to_active_locked(ns, client);
+                       break;
+               }
+               ns = ldlm_namespace_first_locked(client);
+               ldlm_namespace_get(ns);
+               ldlm_namespace_move_to_active_locked(ns, client);
                 mutex_unlock(ldlm_namespace_lock(client));
  
-                nr_locks = ldlm_pool_granted(&ns->ns_pool);
-               cancel = (__u64)nr_locks * nr;
-               do_div(cancel, total);
-               ldlm_pool_shrink(&ns->ns_pool, 1 + cancel, gfp_mask);
-                cached += ldlm_pool_granted(&ns->ns_pool);
-                ldlm_namespace_put(ns);
-        }
-        cl_env_reexit(cookie);
-        /* we only decrease the SLV in server pools shrinker, return -1 to
-         * kernel to avoid needless loop. LU-1128 */
-        return (client == LDLM_NAMESPACE_SERVER) ? -1 : cached;
+               nr_locks = ldlm_pool_granted(&ns->ns_pool);
+               /*
+                * We use to shrink propotionally but with new shrinker API,
+                * we lost the total number of freeable locks.
+                */
+               cancel = 1 + min_t(int, nr_locks, nr / nr_ns);
+               freed += ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
+               ldlm_namespace_put(ns);
+       }
+       cl_env_reexit(cookie);
+       /*
+        * we only decrease the SLV in server pools shrinker, return
+        * SHRINK_STOP to kernel to avoid needless loop. LU-1128
+        */
+       return (client == LDLM_NAMESPACE_SERVER) ? SHRINK_STOP : freed;
+}
+
+#ifdef HAVE_SHRINKER_COUNT
+static unsigned long ldlm_pools_srv_count(struct shrinker *s,
+                                         struct shrink_control *sc)
+{
+       return ldlm_pools_count(LDLM_NAMESPACE_SERVER, sc->gfp_mask);
+}
+
+static unsigned long ldlm_pools_srv_scan(struct shrinker *s,
+                                        struct shrink_control *sc)
+{
+       return ldlm_pools_scan(LDLM_NAMESPACE_SERVER, sc->nr_to_scan,
+                              sc->gfp_mask);
+}
+
+static unsigned long ldlm_pools_cli_count(struct shrinker *s, struct shrink_control *sc)
+{
+       return ldlm_pools_count(LDLM_NAMESPACE_CLIENT, sc->gfp_mask);
+}
+
+static unsigned long ldlm_pools_cli_scan(struct shrinker *s,
+                                        struct shrink_control *sc)
+{
+       return ldlm_pools_scan(LDLM_NAMESPACE_CLIENT, sc->nr_to_scan,
+                              sc->gfp_mask);
+}
+
+#else
+/*
+ * Cancel \a nr locks from all namespaces (if possible). Returns number of
+ * cached locks after shrink is finished. All namespaces are asked to
+ * cancel approximately equal amount of locks to keep balancing.
+ */
+static int ldlm_pools_shrink(ldlm_side_t client, int nr,
+                            unsigned int gfp_mask)
+{
+       unsigned int total = 0;
+
+       if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
+           !(gfp_mask & __GFP_FS))
+               return -1;
+
+       CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n",
+              nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
+
+       total = ldlm_pools_count(client, gfp_mask);
+
+       if (nr == 0 || total == 0)
+               return total;
+
+       return ldlm_pools_scan(client, nr, gfp_mask);
  }
  
  static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
@@ -1162,6 +1231,8 @@ static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
                                   shrink_param(sc, gfp_mask));
  }
  
+#endif /* HAVE_SHRINKER_COUNT */
+
  int ldlm_pools_recalc(ldlm_side_t client)
  {
          __u32 nr_l = 0, nr_p = 0, l;
@@ -1418,16 +1489,18 @@ static void ldlm_pools_thread_stop(void)
  int ldlm_pools_init(void)
  {
         int rc;
+       DEF_SHRINKER_VAR(shsvar, ldlm_pools_srv_shrink,
+                        ldlm_pools_srv_count, ldlm_pools_srv_scan);
+       DEF_SHRINKER_VAR(shcvar, ldlm_pools_cli_shrink,
+                        ldlm_pools_cli_count, ldlm_pools_cli_scan);
         ENTRY;
  
         rc = ldlm_pools_thread_start();
         if (rc == 0) {
                 ldlm_pools_srv_shrinker =
-                       set_shrinker(DEFAULT_SEEKS,
-                                        ldlm_pools_srv_shrink);
+                       set_shrinker(DEFAULT_SEEKS, &shsvar);
                 ldlm_pools_cli_shrinker =
-                       set_shrinker(DEFAULT_SEEKS,
-                                        ldlm_pools_cli_shrink);
+                       set_shrinker(DEFAULT_SEEKS, &shcvar);
         }
         RETURN(rc);
  }