From 3bf0b0589a11924356a2c529750e5d70515fc1a9 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 31 May 2018 12:44:57 -0400 Subject: [PATCH] LU-4423 ldlm: use delayed_work for ldlm_pools_recalc ldlm currenty has a kthread which wakes up every so often and calls ldlm_pools_recalc(). The thread is started and stopped, but no other external interactions happen. This can trivially be replaced by a delayed_work if we have ldlm_pools_recalc() reschedule the work rather than just report when to do that. Change-Id: I85f8bc79ef86d1c7a6cbe159e6970445eb7f8389 Signed-off-by: NeilBrown Signed-off-by: Dmitry Eremin Reviewed-on: https://review.whamcloud.com/31705 Tested-by: Jenkins Tested-by: Maloo Reviewed-by: James Simmons Reviewed-by: Andreas Dilger Reviewed-by: Oleg Drokin --- lustre/include/lustre_dlm.h | 1 - lustre/ldlm/ldlm_pool.c | 313 +++++++++++++++----------------------------- 2 files changed, 104 insertions(+), 210 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 5015648..90ffb3e 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -1701,7 +1701,6 @@ void unlock_res_and_lock(struct ldlm_lock *lock); * There are not used outside of ldlm. * @{ */ -time64_t ldlm_pools_recalc(enum ldlm_side client); int ldlm_pools_init(void); void ldlm_pools_fini(void); diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c index 7e49fba..c44f375 100644 --- a/lustre/ldlm/ldlm_pool.c +++ b/lustre/ldlm/ldlm_pool.c @@ -93,7 +93,7 @@ #define DEBUG_SUBSYSTEM S_LDLM -#include +#include #include #include #include @@ -1073,10 +1073,8 @@ __u32 ldlm_pool_get_lvf(struct ldlm_pool *pl) return atomic_read(&pl->pl_lock_volume_factor); } -static struct ptlrpc_thread *ldlm_pools_thread; static struct shrinker *ldlm_pools_srv_shrinker; static struct shrinker *ldlm_pools_cli_shrinker; -static struct completion ldlm_pools_comp; /* * count locks from all namespaces (if possible). Returns number of @@ -1244,108 +1242,35 @@ static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask)) #endif /* HAVE_SHRINKER_COUNT */ -time64_t ldlm_pools_recalc(enum ldlm_side client) +static time64_t ldlm_pools_recalc_delay(enum ldlm_side side) { - unsigned long nr_l = 0, nr_p = 0, l; struct ldlm_namespace *ns; struct ldlm_namespace *ns_old = NULL; - int nr, equal = 0; /* seconds of sleep if no active namespaces */ - time64_t time = client ? LDLM_POOL_CLI_DEF_RECALC_PERIOD : - LDLM_POOL_SRV_DEF_RECALC_PERIOD; - - /* - * No need to setup pool limit for client pools. - */ - if (client == LDLM_NAMESPACE_SERVER) { - /* - * Check all modest namespaces first. - */ - mutex_lock(ldlm_namespace_lock(client)); - list_for_each_entry(ns, ldlm_namespace_list(client), - ns_list_chain) - { - if (ns->ns_appetite != LDLM_NAMESPACE_MODEST) - continue; - - l = ldlm_pool_granted(&ns->ns_pool); - if (l == 0) - l = 1; - - /* - * Set the modest pools limit equal to their avg granted - * locks + ~6%. - */ - l += dru(l, LDLM_POOLS_MODEST_MARGIN_SHIFT, 0); - ldlm_pool_setup(&ns->ns_pool, l); - nr_l += l; - nr_p++; - } - - /* - * Make sure that modest namespaces did not eat more that 2/3 - * of limit. - */ - if (nr_l >= 2 * (LDLM_POOL_HOST_L / 3)) { - CWARN("\"Modest\" pools eat out 2/3 of server locks " - "limit (%lu of %lu). This means that you have too " - "many clients for this amount of server RAM. " - "Upgrade server!\n", nr_l, LDLM_POOL_HOST_L); - equal = 1; - } - + time64_t delay = side == LDLM_NAMESPACE_SERVER ? + LDLM_POOL_SRV_DEF_RECALC_PERIOD : + LDLM_POOL_CLI_DEF_RECALC_PERIOD; + int nr; + + /* Recalc at least ldlm_namespace_nr(side) namespaces. */ + for (nr = ldlm_namespace_nr_read(side); nr > 0; nr--) { + int skip; /* - * The rest is given to greedy namespaces. + * Lock the list, get first @ns in the list, getref, move it + * to the tail, unlock and call pool recalc. This way we avoid + * calling recalc under @ns lock, which is really good as we + * get rid of potential deadlock on side nodes when canceling + * locks synchronously. */ - list_for_each_entry(ns, ldlm_namespace_list(client), - ns_list_chain) - { - if (!equal && ns->ns_appetite != LDLM_NAMESPACE_GREEDY) - continue; - - if (equal) { - /* - * In the case 2/3 locks are eaten out by - * modest pools, we re-setup equal limit - * for _all_ pools. - */ - l = LDLM_POOL_HOST_L / - ldlm_namespace_nr_read(client); - } else { - /* - * All the rest of greedy pools will have - * all locks in equal parts. - */ - l = (LDLM_POOL_HOST_L - nr_l) / - (ldlm_namespace_nr_read(client) - - nr_p); - } - ldlm_pool_setup(&ns->ns_pool, l); - } - mutex_unlock(ldlm_namespace_lock(client)); - } - - /* - * Recalc at least ldlm_namespace_nr(client) namespaces. - */ - for (nr = ldlm_namespace_nr_read(client); nr > 0; nr--) { - int skip; - /* - * Lock the list, get first @ns in the list, getref, move it - * to the tail, unlock and call pool recalc. This way we avoid - * calling recalc under @ns lock what is really good as we get - * rid of potential deadlock on client nodes when canceling - * locks synchronously. - */ - mutex_lock(ldlm_namespace_lock(client)); - if (list_empty(ldlm_namespace_list(client))) { - mutex_unlock(ldlm_namespace_lock(client)); + mutex_lock(ldlm_namespace_lock(side)); + if (list_empty(ldlm_namespace_list(side))) { + mutex_unlock(ldlm_namespace_lock(side)); break; } - ns = ldlm_namespace_first_locked(client); + ns = ldlm_namespace_first_locked(side); if (ns_old == ns) { /* Full pass complete */ - mutex_unlock(ldlm_namespace_lock(client)); + mutex_unlock(ldlm_namespace_lock(side)); break; } @@ -1360,8 +1285,8 @@ time64_t ldlm_pools_recalc(enum ldlm_side client) * there). */ if (ldlm_ns_empty(ns)) { - ldlm_namespace_move_to_inactive_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); + ldlm_namespace_move_to_inactive_locked(ns, side); + mutex_unlock(ldlm_namespace_lock(side)); continue; } @@ -1381,144 +1306,118 @@ time64_t ldlm_pools_recalc(enum ldlm_side client) } spin_unlock(&ns->ns_lock); - ldlm_namespace_move_to_active_locked(ns, client); - mutex_unlock(ldlm_namespace_lock(client)); + ldlm_namespace_move_to_active_locked(ns, side); + mutex_unlock(ldlm_namespace_lock(side)); /* * After setup is done - recalc the pool. */ if (!skip) { - time64_t ttime = ldlm_pool_recalc(&ns->ns_pool); - - if (ttime < time) - time = ttime; - + delay = min(delay, ldlm_pool_recalc(&ns->ns_pool)); ldlm_namespace_put(ns); } - } - - /* Wake up the blocking threads from time to time. */ - ldlm_bl_thread_wakeup(); + } - return time; + return delay; } -static int ldlm_pools_thread_main(void *arg) -{ - struct ptlrpc_thread *thread = (struct ptlrpc_thread *)arg; - time64_t s_time, c_time; - - ENTRY; - thread_set_flags(thread, SVC_RUNNING); - wake_up(&thread->t_ctl_waitq); +static void ldlm_pools_recalc_task(struct work_struct *ws); +static DECLARE_DELAYED_WORK(ldlm_pools_recalc_work, ldlm_pools_recalc_task); - CDEBUG(D_DLMTRACE, "%s: pool thread starting, process %d\n", - "ldlm_poold", current_pid()); +static void ldlm_pools_recalc_task(struct work_struct *ws) +{ + /* seconds of sleep if no active namespaces */ + time64_t delay; +#ifdef HAVE_SERVER_SUPPORT + struct ldlm_namespace *ns; + unsigned long nr_l = 0, nr_p = 0, l; + int equal = 0; - while (1) { - struct l_wait_info lwi; + /* Check all modest namespaces first. */ + mutex_lock(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER)); + list_for_each_entry(ns, ldlm_namespace_list(LDLM_NAMESPACE_SERVER), + ns_list_chain) { + if (ns->ns_appetite != LDLM_NAMESPACE_MODEST) + continue; - /* - * Recal all pools on this tick. - */ - s_time = ldlm_pools_recalc(LDLM_NAMESPACE_SERVER); - c_time = ldlm_pools_recalc(LDLM_NAMESPACE_CLIENT); + l = ldlm_pool_granted(&ns->ns_pool); + if (l == 0) + l = 1; /* - * Wait until the next check time, or until we're - * stopped. + * Set the modest pools limit equal to their avg granted + * locks + ~6%. */ - lwi = LWI_TIMEOUT(cfs_time_seconds(min(s_time, c_time)), - NULL, NULL); - l_wait_event(thread->t_ctl_waitq, - thread_is_stopping(thread) || - thread_is_event(thread), - &lwi); - - if (thread_test_and_clear_flags(thread, SVC_STOPPING)) - break; - else - thread_test_and_clear_flags(thread, SVC_EVENT); - } - - thread_set_flags(thread, SVC_STOPPED); - wake_up(&thread->t_ctl_waitq); - - CDEBUG(D_DLMTRACE, "%s: pool thread exiting, process %d\n", - "ldlm_poold", current_pid()); - - complete_and_exit(&ldlm_pools_comp, 0); -} - -static int ldlm_pools_thread_start(void) -{ - struct l_wait_info lwi = { 0 }; - struct task_struct *task; - ENTRY; - - if (ldlm_pools_thread != NULL) - RETURN(-EALREADY); - - OBD_ALLOC_PTR(ldlm_pools_thread); - if (ldlm_pools_thread == NULL) - RETURN(-ENOMEM); - - init_completion(&ldlm_pools_comp); - init_waitqueue_head(&ldlm_pools_thread->t_ctl_waitq); + l += dru(l, LDLM_POOLS_MODEST_MARGIN_SHIFT, 0); + ldlm_pool_setup(&ns->ns_pool, l); + nr_l += l; + nr_p++; + } - task = kthread_run(ldlm_pools_thread_main, ldlm_pools_thread, - "ldlm_poold"); - if (IS_ERR(task)) { - CERROR("Can't start pool thread, error %ld\n", PTR_ERR(task)); - OBD_FREE(ldlm_pools_thread, sizeof(*ldlm_pools_thread)); - ldlm_pools_thread = NULL; - RETURN(PTR_ERR(task)); + /* + * Make sure than modest namespaces did not eat more that 2/3 + * of limit. + */ + if (nr_l >= 2 * (LDLM_POOL_HOST_L / 3)) { + CWARN("'Modest' pools eat out 2/3 of server locks " + "limit (%lu of %lu). This means that you have too " + "many clients for this amount of server RAM. " + "Upgrade server!\n", nr_l, LDLM_POOL_HOST_L); + equal = 1; } - l_wait_event(ldlm_pools_thread->t_ctl_waitq, - thread_is_running(ldlm_pools_thread), &lwi); - RETURN(0); -} -static void ldlm_pools_thread_stop(void) -{ - ENTRY; + /* The rest is given to greedy namespaces. */ + list_for_each_entry(ns, ldlm_namespace_list(LDLM_NAMESPACE_SERVER), + ns_list_chain) { + if (!equal && ns->ns_appetite != LDLM_NAMESPACE_GREEDY) + continue; - if (ldlm_pools_thread == NULL) { - EXIT; - return; + if (equal) { + /* + * In the case 2/3 locks are eaten out by + * modest pools, we re-setup equal limit + * for _all_ pools. + */ + l = LDLM_POOL_HOST_L / + ldlm_namespace_nr_read(LDLM_NAMESPACE_SERVER); + } else { + /* + * All the rest of greedy pools will have + * all locks in equal parts. + */ + l = (LDLM_POOL_HOST_L - nr_l) / + (ldlm_namespace_nr_read(LDLM_NAMESPACE_SERVER) - + nr_p); + } + ldlm_pool_setup(&ns->ns_pool, l); } + mutex_unlock(ldlm_namespace_lock(LDLM_NAMESPACE_SERVER)); - thread_set_flags(ldlm_pools_thread, SVC_STOPPING); - wake_up(&ldlm_pools_thread->t_ctl_waitq); + delay = min(ldlm_pools_recalc_delay(LDLM_NAMESPACE_SERVER), + ldlm_pools_recalc_delay(LDLM_NAMESPACE_CLIENT)); +#else /* !HAVE_SERVER_SUPPORT */ + delay = ldlm_pools_recalc_delay(LDLM_NAMESPACE_CLIENT); +#endif /* HAVE_SERVER_SUPPORT */ - /* - * Make sure that pools thread is finished before freeing @thread. - * This fixes possible race and oops due to accessing freed memory - * in pools thread. - */ - wait_for_completion(&ldlm_pools_comp); - OBD_FREE_PTR(ldlm_pools_thread); - ldlm_pools_thread = NULL; - EXIT; + /* Wake up the blocking threads from time to time. */ + ldlm_bl_thread_wakeup(); + + schedule_delayed_work(&ldlm_pools_recalc_work, cfs_time_seconds(delay)); } int ldlm_pools_init(void) { - int rc; DEF_SHRINKER_VAR(shsvar, ldlm_pools_srv_shrink, ldlm_pools_srv_count, ldlm_pools_srv_scan); DEF_SHRINKER_VAR(shcvar, ldlm_pools_cli_shrink, ldlm_pools_cli_count, ldlm_pools_cli_scan); - ENTRY; - rc = ldlm_pools_thread_start(); - if (rc == 0) { - ldlm_pools_srv_shrinker = - set_shrinker(DEFAULT_SEEKS, &shsvar); - ldlm_pools_cli_shrinker = - set_shrinker(DEFAULT_SEEKS, &shcvar); - } - RETURN(rc); + schedule_delayed_work(&ldlm_pools_recalc_work, + LDLM_POOL_CLI_DEF_RECALC_PERIOD); + ldlm_pools_srv_shrinker = set_shrinker(DEFAULT_SEEKS, &shsvar); + ldlm_pools_cli_shrinker = set_shrinker(DEFAULT_SEEKS, &shcvar); + + return 0; } void ldlm_pools_fini(void) @@ -1531,7 +1430,7 @@ void ldlm_pools_fini(void) remove_shrinker(ldlm_pools_cli_shrinker); ldlm_pools_cli_shrinker = NULL; } - ldlm_pools_thread_stop(); + cancel_delayed_work_sync(&ldlm_pools_recalc_work); } #else /* !HAVE_LRU_RESIZE_SUPPORT */ @@ -1540,7 +1439,7 @@ int ldlm_pool_setup(struct ldlm_pool *pl, int limit) return 0; } -time64_t ldlm_pool_recalc(struct ldlm_pool *pl) +int ldlm_pool_recalc(struct ldlm_pool *pl) { return 0; } @@ -1617,8 +1516,4 @@ void ldlm_pools_fini(void) return; } -time64_t ldlm_pools_recalc(enum ldlm_side client) -{ - return 0; -} #endif /* HAVE_LRU_RESIZE_SUPPORT */ -- 1.8.3.1