From 888a3141e72a25bef8daf822325b4295e5a0d5e8 Mon Sep 17 00:00:00 2001 From: Bobi Jam Date: Fri, 17 Jul 2015 13:36:37 +0800 Subject: [PATCH] LU-6842 clio: add cl_page LRU shrinker Register cache shrinker to reclaim memory from cl_page LRU list. Signed-off-by: Bobi Jam Change-Id: Id22fd1f1f8554dc03ac7313a58abd8cd3472ece0 Reviewed-on: http://review.whamcloud.com/15630 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: Andreas Dilger Reviewed-by: Jinshan Xiong Reviewed-by: Oleg Drokin --- lustre/include/obd.h | 2 ++ lustre/ldlm/ldlm_lib.c | 1 + lustre/osc/osc_internal.h | 10 ++++++ lustre/osc/osc_page.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++ lustre/osc/osc_request.c | 48 ++++++++++++++++++++++---- 5 files changed, 141 insertions(+), 7 deletions(-) diff --git a/lustre/include/obd.h b/lustre/include/obd.h index 5a9155a..7e23de3 100644 --- a/lustre/include/obd.h +++ b/lustre/include/obd.h @@ -276,6 +276,8 @@ struct client_obd { * An unstable page is a page state that WRITE RPC has finished but * the transaction has NOT yet committed. */ atomic_long_t cl_unstable_count; + /** Link to osc_shrinker_list */ + struct list_head cl_shrink_list; /* number of in flight destroy rpcs is limited to max_rpcs_in_flight */ atomic_t cl_destroy_in_flight; diff --git a/lustre/ldlm/ldlm_lib.c b/lustre/ldlm/ldlm_lib.c index 1d4d6b7..ae8c386 100644 --- a/lustre/ldlm/ldlm_lib.c +++ b/lustre/ldlm/ldlm_lib.c @@ -381,6 +381,7 @@ int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) INIT_LIST_HEAD(&cli->cl_lru_list); spin_lock_init(&cli->cl_lru_list_lock); atomic_long_set(&cli->cl_unstable_count, 0); + INIT_LIST_HEAD(&cli->cl_shrink_list); init_waitqueue_head(&cli->cl_destroy_waitq); atomic_set(&cli->cl_destroy_in_flight, 0); diff --git a/lustre/osc/osc_internal.h b/lustre/osc/osc_internal.h index 636a315..64f80e1 100644 --- a/lustre/osc/osc_internal.h +++ b/lustre/osc/osc_internal.h @@ -238,4 +238,14 @@ struct ldlm_lock *osc_dlmlock_at_pgoff(const struct lu_env *env, enum osc_dap_flags flags); void osc_pack_req_body(struct ptlrpc_request *req, struct obdo *oa); int osc_object_invalidate(const struct lu_env *env, struct osc_object *osc); + +/** osc shrink list to link all osc client obd */ +extern struct list_head osc_shrink_list; +/** spin lock to protect osc_shrink_list */ +extern spinlock_t osc_shrink_lock; +extern unsigned long osc_cache_shrink_count(struct shrinker *sk, + struct shrink_control *sc); +extern unsigned long osc_cache_shrink_scan(struct shrinker *sk, + struct shrink_control *sc); + #endif /* OSC_INTERNAL_H */ diff --git a/lustre/osc/osc_page.c b/lustre/osc/osc_page.c index 12380c6..f3b3782 100644 --- a/lustre/osc/osc_page.c +++ b/lustre/osc/osc_page.c @@ -1125,4 +1125,91 @@ bool osc_over_unstable_soft_limit(struct client_obd *cli) cli->cl_max_rpcs_in_flight; } +/** + * Return how many LRU pages in the cache of all OSC devices + * + * \retval return # of cached LRU pages times reclaimation tendency + * \retval SHRINK_STOP if it cannot do any scanning in this time + */ +unsigned long osc_cache_shrink_count(struct shrinker *sk, + struct shrink_control *sc) +{ + struct client_obd *cli; + unsigned long cached = 0; + + spin_lock(&osc_shrink_lock); + list_for_each_entry(cli, &osc_shrink_list, cl_shrink_list) + cached += atomic_long_read(&cli->cl_lru_in_list); + spin_unlock(&osc_shrink_lock); + + return (cached * sysctl_vfs_cache_pressure) / 100; +} + +/** + * Scan and try to reclaim sc->nr_to_scan cached LRU pages + * + * \retval number of cached LRU pages reclaimed + * \retval SHRINK_STOP if it cannot do any scanning in this time + * + * Linux kernel will loop calling this shrinker scan routine with + * sc->nr_to_scan = SHRINK_BATCH(128 for now) until kernel got enough memory. + * + * If sc->nr_to_scan is 0, the VM is querying the cache size, we don't need + * to scan and try to reclaim LRU pages, just return 0 and + * osc_cache_shrink_count() will report the LRU page number. + */ +unsigned long osc_cache_shrink_scan(struct shrinker *sk, + struct shrink_control *sc) +{ + struct client_obd *cli; + struct client_obd *stop_anchor = NULL; + struct cl_env_nest nest; + struct lu_env *env; + long shrank = 0; + int rc; + + if (sc->nr_to_scan == 0) + return 0; + + if (!(sc->gfp_mask & __GFP_FS)) + return SHRINK_STOP; + + env = cl_env_nested_get(&nest); + if (IS_ERR(env)) + return SHRINK_STOP; + + spin_lock(&osc_shrink_lock); + while (!list_empty(&osc_shrink_list)) { + cli = list_entry(osc_shrink_list.next, struct client_obd, + cl_shrink_list); + + if (stop_anchor == NULL) + stop_anchor = cli; + else if (cli == stop_anchor) + break; + + list_move_tail(&cli->cl_shrink_list, &osc_shrink_list); + spin_unlock(&osc_shrink_lock); + + /* shrink no more than max_pages_per_rpc for an OSC */ + rc = osc_lru_shrink(env, cli, (sc->nr_to_scan - shrank) > + cli->cl_max_pages_per_rpc ? + cli->cl_max_pages_per_rpc : + sc->nr_to_scan - shrank, true); + if (rc > 0) + shrank += rc; + + if (shrank >= sc->nr_to_scan) + goto out; + + spin_lock(&osc_shrink_lock); + } + spin_unlock(&osc_shrink_lock); + +out: + cl_env_nested_put(&nest, env); + + return shrank; +} + /** @} osc */ diff --git a/lustre/osc/osc_request.c b/lustre/osc/osc_request.c index b78eb23..f8b5b1d 100644 --- a/lustre/osc/osc_request.c +++ b/lustre/osc/osc_request.c @@ -2695,6 +2695,11 @@ int osc_setup(struct obd_device *obd, struct lustre_cfg *lcfg) INIT_LIST_HEAD(&cli->cl_grant_shrink_list); ns_register_cancel(obd->obd_namespace, osc_cancel_weight); + + spin_lock(&osc_shrink_lock); + list_add_tail(&cli->cl_shrink_list, &osc_shrink_list); + spin_unlock(&osc_shrink_lock); + RETURN(0); out_ptlrpcd_work: @@ -2751,6 +2756,10 @@ int osc_cleanup(struct obd_device *obd) ENTRY; + spin_lock(&osc_shrink_lock); + list_del(&cli->cl_shrink_list); + spin_unlock(&osc_shrink_lock); + /* lru cleanup */ if (cli->cl_cache != NULL) { LASSERT(atomic_read(&cli->cl_cache->ccc_users) > 0); @@ -2762,7 +2771,7 @@ int osc_cleanup(struct obd_device *obd) cli->cl_cache = NULL; } - /* free memory of osc quota cache */ + /* free memory of osc quota cache */ osc_quota_cleanup(obd); rc = client_obd_cleanup(obd); @@ -2805,6 +2814,27 @@ static struct obd_ops osc_obd_ops = { .o_quotactl = osc_quotactl, }; +static struct shrinker *osc_cache_shrinker; +struct list_head osc_shrink_list = LIST_HEAD_INIT(osc_shrink_list); +DEFINE_SPINLOCK(osc_shrink_lock); + +#ifndef HAVE_SHRINKER_COUNT +static int osc_cache_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) +{ + struct shrink_control scv = { + .nr_to_scan = shrink_param(sc, nr_to_scan), + .gfp_mask = shrink_param(sc, gfp_mask) + }; +#if !defined(HAVE_SHRINKER_WANT_SHRINK_PTR) && !defined(HAVE_SHRINK_CONTROL) + struct shrinker *shrinker = NULL; +#endif + + (void)osc_cache_shrink_scan(shrinker, &scv); + + return osc_cache_shrink_count(shrinker, &scv); +} +#endif + static int __init osc_init(void) { bool enable_proc = true; @@ -2812,15 +2842,16 @@ static int __init osc_init(void) unsigned int reqpool_size; unsigned int reqsize; int rc; - + DEF_SHRINKER_VAR(osc_shvar, osc_cache_shrink, + osc_cache_shrink_count, osc_cache_shrink_scan); ENTRY; - /* print an address of _any_ initialized kernel symbol from this - * module, to allow debugging with gdb that doesn't support data - * symbols from modules.*/ - CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches); + /* print an address of _any_ initialized kernel symbol from this + * module, to allow debugging with gdb that doesn't support data + * symbols from modules.*/ + CDEBUG(D_INFO, "Lustre OSC module (%p).\n", &osc_caches); - rc = lu_kmem_init(osc_caches); + rc = lu_kmem_init(osc_caches); if (rc) RETURN(rc); @@ -2833,6 +2864,8 @@ static int __init osc_init(void) if (rc) GOTO(out_kmem, rc); + osc_cache_shrinker = set_shrinker(DEFAULT_SEEKS, &osc_shvar); + /* This is obviously too much memory, only prevent overflow here */ if (osc_reqpool_mem_max >= 1 << 12 || osc_reqpool_mem_max == 0) GOTO(out_type, rc = -EINVAL); @@ -2868,6 +2901,7 @@ out: static void /*__exit*/ osc_exit(void) { + remove_shrinker(osc_cache_shrinker); class_unregister_type(LUSTRE_OSC_NAME); lu_kmem_fini(osc_caches); ptlrpc_free_rq_pool(osc_rq_pool); -- 1.8.3.1