Whamcloud - gitweb
LU-6842 clio: add cl_page LRU shrinker 30/15630/12
authorBobi Jam <bobijam.xu@intel.com>
Fri, 17 Jul 2015 05:36:37 +0000 (13:36 +0800)
committerOleg Drokin <oleg.drokin@intel.com>
Fri, 9 Oct 2015 20:36:46 +0000 (20:36 +0000)
Register cache shrinker to reclaim memory from cl_page LRU list.

Signed-off-by: Bobi Jam <bobijam.xu@intel.com>
Change-Id: Id22fd1f1f8554dc03ac7313a58abd8cd3472ece0
Reviewed-on: http://review.whamcloud.com/15630
Tested-by: Jenkins
Tested-by: Maloo <hpdd-maloo@intel.com>
Reviewed-by: Andreas Dilger <andreas.dilger@intel.com>
Reviewed-by: Jinshan Xiong <jinshan.xiong@intel.com>
Reviewed-by: Oleg Drokin <oleg.drokin@intel.com>
lustre/include/obd.h
lustre/ldlm/ldlm_lib.c
lustre/osc/osc_internal.h
lustre/osc/osc_page.c
lustre/osc/osc_request.c

index 5a9155a..7e23de3 100644 (file)
@@ -276,6 +276,8 @@ struct client_obd {
         * An unstable page is a page state that WRITE RPC has finished but
         * the transaction has NOT yet committed. */
        atomic_long_t            cl_unstable_count;
+       /** Link to osc_shrinker_list */
+       struct list_head         cl_shrink_list;
 
        /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */
        atomic_t                 cl_destroy_in_flight;
index 1d4d6b7..ae8c386 100644 (file)
@@ -381,6 +381,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg)
        INIT_LIST_HEAD(&cli->cl_lru_list);
        spin_lock_init(&cli->cl_lru_list_lock);
        atomic_long_set(&cli->cl_unstable_count, 0);
+       INIT_LIST_HEAD(&cli->cl_shrink_list);
 
        init_waitqueue_head(&cli->cl_destroy_waitq);
        atomic_set(&cli->cl_destroy_in_flight, 0);
index 636a315..64f80e1 100644 (file)
@@ -238,4 +238,14 @@ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env,
                                       enum osc_dap_flags flags);
 void osc_pack_req_body(struct ptlrpc_request *req, struct obdo *oa);
 int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc);
+
+/** osc shrink list to link all osc client obd */
+extern struct list_head osc_shrink_list;
+/** spin lock to protect osc_shrink_list */
+extern spinlock_t osc_shrink_lock;
+extern unsigned long osc_cache_shrink_count(struct shrinker *sk,
+                                           struct shrink_control *sc);
+extern unsigned long osc_cache_shrink_scan(struct shrinker *sk,
+                                          struct shrink_control *sc);
+
 #endif /* OSC_INTERNAL_H */
index 12380c6..f3b3782 100644 (file)
@@ -1125,4 +1125,91 @@ bool osc_over_unstable_soft_limit(struct client_obd *cli)
                                    cli->cl_max_rpcs_in_flight;
 }
 
+/**
+ * Return how many LRU pages in the cache of all OSC devices
+ *
+ * \retval     return # of cached LRU pages times reclaimation tendency
+ * \retval     SHRINK_STOP if it cannot do any scanning in this time
+ */
+unsigned long osc_cache_shrink_count(struct shrinker *sk,
+                                    struct shrink_control *sc)
+{
+       struct client_obd *cli;
+       unsigned long cached = 0;
+
+       spin_lock(&osc_shrink_lock);
+       list_for_each_entry(cli, &osc_shrink_list, cl_shrink_list)
+               cached += atomic_long_read(&cli->cl_lru_in_list);
+       spin_unlock(&osc_shrink_lock);
+
+       return (cached  * sysctl_vfs_cache_pressure) / 100;
+}
+
+/**
+ * Scan and try to reclaim sc->nr_to_scan cached LRU pages
+ *
+ * \retval     number of cached LRU pages reclaimed
+ * \retval     SHRINK_STOP if it cannot do any scanning in this time
+ *
+ * Linux kernel will loop calling this shrinker scan routine with
+ * sc->nr_to_scan = SHRINK_BATCH(128 for now) until kernel got enough memory.
+ *
+ * If sc->nr_to_scan is 0, the VM is querying the cache size, we don't need
+ * to scan and try to reclaim LRU pages, just return 0 and
+ * osc_cache_shrink_count() will report the LRU page number.
+ */
+unsigned long osc_cache_shrink_scan(struct shrinker *sk,
+                                   struct shrink_control *sc)
+{
+       struct client_obd *cli;
+       struct client_obd *stop_anchor = NULL;
+       struct cl_env_nest nest;
+       struct lu_env *env;
+       long shrank = 0;
+       int rc;
+
+       if (sc->nr_to_scan == 0)
+               return 0;
+
+       if (!(sc->gfp_mask & __GFP_FS))
+               return SHRINK_STOP;
+
+       env = cl_env_nested_get(&nest);
+       if (IS_ERR(env))
+               return SHRINK_STOP;
+
+       spin_lock(&osc_shrink_lock);
+       while (!list_empty(&osc_shrink_list)) {
+               cli = list_entry(osc_shrink_list.next, struct client_obd,
+                                cl_shrink_list);
+
+               if (stop_anchor == NULL)
+                       stop_anchor = cli;
+               else if (cli == stop_anchor)
+                       break;
+
+               list_move_tail(&cli->cl_shrink_list, &osc_shrink_list);
+               spin_unlock(&osc_shrink_lock);
+
+               /* shrink no more than max_pages_per_rpc for an OSC */
+               rc = osc_lru_shrink(env, cli, (sc->nr_to_scan - shrank) >
+                                   cli->cl_max_pages_per_rpc ?
+                                   cli->cl_max_pages_per_rpc :
+                                   sc->nr_to_scan - shrank, true);
+               if (rc > 0)
+                       shrank += rc;
+
+               if (shrank >= sc->nr_to_scan)
+                       goto out;
+
+               spin_lock(&osc_shrink_lock);
+       }
+       spin_unlock(&osc_shrink_lock);
+
+out:
+       cl_env_nested_put(&nest, env);
+
+       return shrank;
+}
+
 /** @} osc */
index b78eb23..f8b5b1d 100644 (file)
@@ -2695,6 +2695,11 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg)
 
        INIT_LIST_HEAD(&cli->cl_grant_shrink_list);
        ns_register_cancel(obd->obd_namespace, osc_cancel_weight);
+
+       spin_lock(&osc_shrink_lock);
+       list_add_tail(&cli->cl_shrink_list, &osc_shrink_list);
+       spin_unlock(&osc_shrink_lock);
+
        RETURN(0);
 
 out_ptlrpcd_work:
@@ -2751,6 +2756,10 @@ int osc_cleanup(struct obd_device *obd)
 
        ENTRY;
 
+       spin_lock(&osc_shrink_lock);
+       list_del(&cli->cl_shrink_list);
+       spin_unlock(&osc_shrink_lock);
+
        /* lru cleanup */
        if (cli->cl_cache != NULL) {
                LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0);
@@ -2762,7 +2771,7 @@ int osc_cleanup(struct obd_device *obd)
                cli->cl_cache = NULL;
        }
 
-        /* free memory of osc quota cache */
+       /* free memory of osc quota cache */
        osc_quota_cleanup(obd);
 
        rc = client_obd_cleanup(obd);
@@ -2805,6 +2814,27 @@ static struct obd_ops osc_obd_ops = {
         .o_quotactl             = osc_quotactl,
 };
 
+static struct shrinker *osc_cache_shrinker;
+struct list_head osc_shrink_list = LIST_HEAD_INIT(osc_shrink_list);
+DEFINE_SPINLOCK(osc_shrink_lock);
+
+#ifndef HAVE_SHRINKER_COUNT
+static int osc_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
+{
+       struct shrink_control scv = {
+               .nr_to_scan = shrink_param(sc, nr_to_scan),
+               .gfp_mask   = shrink_param(sc, gfp_mask)
+       };
+#if !defined(HAVE_SHRINKER_WANT_SHRINK_PTR) && !defined(HAVE_SHRINK_CONTROL)
+       struct shrinker *shrinker = NULL;
+#endif
+
+       (void)osc_cache_shrink_scan(shrinker, &scv);
+
+       return osc_cache_shrink_count(shrinker, &scv);
+}
+#endif
+
 static int __init osc_init(void)
 {
        bool enable_proc = true;
@@ -2812,15 +2842,16 @@ static int __init osc_init(void)
        unsigned int reqpool_size;
        unsigned int reqsize;
        int rc;
-
+       DEF_SHRINKER_VAR(osc_shvar, osc_cache_shrink,
+                        osc_cache_shrink_count, osc_cache_shrink_scan);
        ENTRY;
 
-        /* print an address of _any_ initialized kernel symbol from this
-         * module, to allow debugging with gdb that doesn't support data
-         * symbols from modules.*/
-        CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches);
+       /* print an address of _any_ initialized kernel symbol from this
+        * module, to allow debugging with gdb that doesn't support data
+        * symbols from modules.*/
+       CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches);
 
-        rc = lu_kmem_init(osc_caches);
+       rc = lu_kmem_init(osc_caches);
        if (rc)
                RETURN(rc);
 
@@ -2833,6 +2864,8 @@ static int __init osc_init(void)
        if (rc)
                GOTO(out_kmem, rc);
 
+       osc_cache_shrinker = set_shrinker(DEFAULT_SEEKS, &osc_shvar);
+
        /* This is obviously too much memory, only prevent overflow here */
        if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0)
                GOTO(out_type, rc = -EINVAL);
@@ -2868,6 +2901,7 @@ out:
 
 static void /*__exit*/ osc_exit(void)
 {
+       remove_shrinker(osc_cache_shrinker);
        class_unregister_type(LUSTRE_OSC_NAME);
        lu_kmem_fini(osc_caches);
        ptlrpc_free_rq_pool(osc_rq_pool);