From dd43ff345254f255d40c68488dcd913bcee93514 Mon Sep 17 00:00:00 2001 From: Vitaly Fertman Date: Tue, 4 Aug 2020 20:45:12 +0300 Subject: [PATCH] LU-11518 ldlm: pool recalc forceful call Let pool recalc to be able to be called forcefully independently of the last recalc time; Call the pool recalc forcefully on the lock decref instead of LRU cancel to take into account the fresh SLV obtained from the server. Call LRU recalc from after_reply if a significant SLV change occurs. Add a sysfs attribute to control what 'a significant SLV change' is. Signed-off-by: Vitaly Fertman Change-Id: Iffeb8d73effdfc494f412422f285921aa4eb9811 HPE-bug-id: LUS-8678 Reviewed-on: https://es-gerrit.dev.cray.com/157134 Reviewed-by: Andriy Skulysh Tested-by: Jenkins Build User Reviewed-by: Alexey Lyashkov Reviewed-on: https://review.whamcloud.com/39564 Reviewed-by: Andreas Dilger Tested-by: jenkins Tested-by: Maloo Reviewed-by: Gu Zheng Reviewed-by: Oleg Drokin --- lustre/include/lustre_dlm.h | 29 ++++++++++++++++++++++++++--- lustre/ldlm/ldlm_internal.h | 12 +----------- lustre/ldlm/ldlm_lock.c | 2 +- lustre/ldlm/ldlm_lockd.c | 14 +++++++++++++- lustre/ldlm/ldlm_pool.c | 25 +++++++++++++------------ lustre/ldlm/ldlm_request.c | 35 +++++++++++++++++++++++++++++------ lustre/ldlm/ldlm_resource.c | 31 +++++++++++++++++++++++++++++++ 7 files changed, 114 insertions(+), 34 deletions(-) diff --git a/lustre/include/lustre_dlm.h b/lustre/include/lustre_dlm.h index 5cc31e9..65e91fe 100644 --- a/lustre/include/lustre_dlm.h +++ b/lustre/include/lustre_dlm.h @@ -69,6 +69,7 @@ extern struct kset *ldlm_svc_kset; #define LDLM_DIRTY_AGE_LIMIT (10) #define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024 #define LDLM_DEFAULT_LRU_SHRINK_BATCH (16) +#define LDLM_DEFAULT_SLV_RECALC_PCT (10) /** * LDLM non-error return states @@ -197,6 +198,17 @@ static inline int lockmode_compat(enum ldlm_mode exist_mode, * */ +/* Cancel lru flag, it indicates we cancel aged locks. */ +enum ldlm_lru_flags { + LDLM_LRU_FLAG_NO_WAIT = 0x1, /* Cancel locks w/o blocking (neither + * sending nor waiting for any RPCs) */ + LDLM_LRU_FLAG_CLEANUP = 0x2, /* Used when clearing lru, tells + * prepare_lru_list to set discard flag + * on PR extent locks so we don't waste + * time saving pages that will be + * discarded momentarily */ +}; + struct ldlm_pool; struct ldlm_lock; struct ldlm_resource; @@ -212,7 +224,7 @@ struct ldlm_namespace; */ struct ldlm_pool_ops { /** Recalculate pool \a pl usage */ - int (*po_recalc)(struct ldlm_pool *pl); + int (*po_recalc)(struct ldlm_pool *pl, bool force); /** Cancel at least \a nr locks from pool \a pl */ int (*po_shrink)(struct ldlm_pool *pl, int nr, gfp_t gfp_mask); int (*po_setup)(struct ldlm_pool *pl, int limit); @@ -451,6 +463,11 @@ struct ldlm_namespace { */ unsigned int ns_cancel_batch; + /** + * How much the SLV should decrease in %% to trigger LRU cancel urgently. + */ + unsigned int ns_recalc_pct; + /** Maximum allowed age (last used time) for locks in the LRU. Set in * seconds from userspace, but stored in ns to avoid repeat conversions. */ @@ -545,7 +562,13 @@ struct ldlm_namespace { * Flag to indicate namespace is being freed. Used to determine if * recalculation of LDLM pool statistics should be skipped. */ - unsigned ns_stopping:1; + unsigned ns_stopping:1, + + /** + * Flag to indicate the LRU recalc on RPC reply is in progress. + * Used to limit the process by 1 thread only. + */ + ns_rpc_recalc:1; /** * Which bucket should we start with the lock reclaim. @@ -1811,7 +1834,7 @@ int ldlm_pool_init(struct ldlm_pool *pl, struct ldlm_namespace *ns, int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask); void ldlm_pool_fini(struct ldlm_pool *pl); int ldlm_pool_setup(struct ldlm_pool *pl, int limit); -time64_t ldlm_pool_recalc(struct ldlm_pool *pl); +time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force); __u32 ldlm_pool_get_lvf(struct ldlm_pool *pl); __u64 ldlm_pool_get_slv(struct ldlm_pool *pl); __u64 ldlm_pool_get_clv(struct ldlm_pool *pl); diff --git a/lustre/ldlm/ldlm_internal.h b/lustre/ldlm/ldlm_internal.h index 44b944c..58b2db6 100644 --- a/lustre/ldlm/ldlm_internal.h +++ b/lustre/ldlm/ldlm_internal.h @@ -94,17 +94,6 @@ void ldlm_namespace_move_to_inactive_locked(struct ldlm_namespace *, struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side); /* ldlm_request.c */ -/* Cancel lru flag, it indicates we cancel aged locks. */ -enum ldlm_lru_flags { - LDLM_LRU_FLAG_NO_WAIT = 0x1, /* Cancel locks w/o blocking (neither - * sending nor waiting for any RPCs) */ - LDLM_LRU_FLAG_CLEANUP = 0x2, /* Used when clearing lru, tells - * prepare_lru_list to set discard flag - * on PR extent locks so we don't waste - * time saving pages that will be - * discarded momentarily */ -}; - int ldlm_cancel_lru(struct ldlm_namespace *ns, int min, enum ldlm_cancel_flags cancel_flags, enum ldlm_lru_flags lru_flags); @@ -185,6 +174,7 @@ int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, struct list_head *cancels, int count, enum ldlm_cancel_flags cancel_flags); +int ldlm_bl_to_thread_ns(struct ldlm_namespace *ns); int ldlm_bl_thread_wakeup(void); void ldlm_handle_bl_callback(struct ldlm_namespace *ns, diff --git a/lustre/ldlm/ldlm_lock.c b/lustre/ldlm/ldlm_lock.c index ded6c7c..9b4937b 100644 --- a/lustre/ldlm/ldlm_lock.c +++ b/lustre/ldlm/ldlm_lock.c @@ -905,7 +905,7 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode) if (ldlm_is_fail_loc(lock)) OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE); - ldlm_cancel_lru(ns, 0, LCF_ASYNC, 0); + ldlm_pool_recalc(&ns->ns_pool, true); } else { LDLM_DEBUG(lock, "do not add lock into lru list"); unlock_res_and_lock(lock); diff --git a/lustre/ldlm/ldlm_lockd.c b/lustre/ldlm/ldlm_lockd.c index 9b16cb6..ca0729c 100644 --- a/lustre/ldlm/ldlm_lockd.c +++ b/lustre/ldlm/ldlm_lockd.c @@ -2214,6 +2214,11 @@ int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld, return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags); } +int ldlm_bl_to_thread_ns(struct ldlm_namespace *ns) +{ + return ldlm_bl_to_thread(ns, NULL, NULL, NULL, 0, LCF_ASYNC); +} + int ldlm_bl_thread_wakeup(void) { wake_up(&ldlm_state->ldlm_bl_pool->blp_waitq); @@ -2832,10 +2837,17 @@ static int ldlm_bl_thread_blwi(struct ldlm_bl_pool *blp, LCF_BL_AST); ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL, blwi->blwi_flags); - } else { + } else if (blwi->blwi_lock) { ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld, blwi->blwi_lock); + } else { + ldlm_pool_recalc(&blwi->blwi_ns->ns_pool, true); + spin_lock(&blwi->blwi_ns->ns_lock); + blwi->blwi_ns->ns_rpc_recalc = 0; + spin_unlock(&blwi->blwi_ns->ns_lock); + ldlm_namespace_put(blwi->blwi_ns); } + if (blwi->blwi_mem_pressure) memalloc_noreclaim_restore(mpflags); diff --git a/lustre/ldlm/ldlm_pool.c b/lustre/ldlm/ldlm_pool.c index 8eb29c4..bdba328 100644 --- a/lustre/ldlm/ldlm_pool.c +++ b/lustre/ldlm/ldlm_pool.c @@ -332,19 +332,19 @@ static void ldlm_srv_pool_push_slv(struct ldlm_pool *pl) * * \pre ->pl_lock is not locked. */ -static int ldlm_srv_pool_recalc(struct ldlm_pool *pl) +static int ldlm_srv_pool_recalc(struct ldlm_pool *pl, bool force) { timeout_t recalc_interval_sec; ENTRY; recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) + if (!force && recalc_interval_sec < pl->pl_recalc_period) RETURN(0); spin_lock(&pl->pl_lock); recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) { + if (!force && recalc_interval_sec < pl->pl_recalc_period) { spin_unlock(&pl->pl_lock); RETURN(0); } @@ -471,7 +471,7 @@ static void ldlm_cli_pool_pop_slv(struct ldlm_pool *pl) /** * Recalculates client size pool \a pl according to current SLV and Limit. */ -static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) +static int ldlm_cli_pool_recalc(struct ldlm_pool *pl, bool force) { timeout_t recalc_interval_sec; int ret; @@ -479,7 +479,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) ENTRY; recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) + if (!force && recalc_interval_sec < pl->pl_recalc_period) RETURN(0); spin_lock(&pl->pl_lock); @@ -487,7 +487,7 @@ static int ldlm_cli_pool_recalc(struct ldlm_pool *pl) * Check if we need to recalc lists now. */ recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time; - if (recalc_interval_sec < pl->pl_recalc_period) { + if (!force && recalc_interval_sec < pl->pl_recalc_period) { spin_unlock(&pl->pl_lock); RETURN(0); } @@ -571,7 +571,7 @@ static struct ldlm_pool_ops ldlm_cli_pool_ops = { * * \retval time in seconds for the next recalc of this pool */ -time64_t ldlm_pool_recalc(struct ldlm_pool *pl) +time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force) { timeout_t recalc_interval_sec; int count; @@ -597,7 +597,7 @@ time64_t ldlm_pool_recalc(struct ldlm_pool *pl) } if (pl->pl_ops->po_recalc != NULL) { - count = pl->pl_ops->po_recalc(pl); + count = pl->pl_ops->po_recalc(pl, force); lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT, count); } @@ -991,7 +991,7 @@ void ldlm_pool_add(struct ldlm_pool *pl, struct ldlm_lock *lock) * with too long call paths. */ if (ns_is_server(ldlm_pl2ns(pl))) - ldlm_pool_recalc(pl); + ldlm_pool_recalc(pl, false); } /** @@ -1016,7 +1016,7 @@ void ldlm_pool_del(struct ldlm_pool *pl, struct ldlm_lock *lock) lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT); if (ns_is_server(ldlm_pl2ns(pl))) - ldlm_pool_recalc(pl); + ldlm_pool_recalc(pl, false); } /** @@ -1333,7 +1333,8 @@ static time64_t ldlm_pools_recalc_delay(enum ldlm_side side) * After setup is done - recalc the pool. */ if (!skip) { - delay = min(delay, ldlm_pool_recalc(&ns->ns_pool)); + delay = min(delay, + ldlm_pool_recalc(&ns->ns_pool, false)); ldlm_namespace_put(ns); } } @@ -1472,7 +1473,7 @@ int ldlm_pool_setup(struct ldlm_pool *pl, int limit) return 0; } -time64_t ldlm_pool_recalc(struct ldlm_pool *pl) +time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force) { return 0; } diff --git a/lustre/ldlm/ldlm_request.c b/lustre/ldlm/ldlm_request.c index c322f15..b53a858 100644 --- a/lustre/ldlm/ldlm_request.c +++ b/lustre/ldlm/ldlm_request.c @@ -1418,8 +1418,9 @@ static inline struct ldlm_pool *ldlm_imp2pl(struct obd_import *imp) */ int ldlm_cli_update_pool(struct ptlrpc_request *req) { + struct ldlm_namespace *ns; struct obd_device *obd; - __u64 new_slv; + __u64 new_slv, ratio; __u32 new_limit; ENTRY; @@ -1457,17 +1458,39 @@ int ldlm_cli_update_pool(struct ptlrpc_request *req) read_unlock(&obd->obd_pool_lock); /* - * Set new SLV and limit in OBD fields to make them accessible - * to the pool thread. We do not access obd_namespace and pool - * directly here as there is no reliable way to make sure that - * they are still alive at cleanup time. Evil races are possible - * which may cause Oops at that time. + * OBD device keeps the new pool attributes before they are handled by + * the pool. */ write_lock(&obd->obd_pool_lock); obd->obd_pool_slv = new_slv; obd->obd_pool_limit = new_limit; write_unlock(&obd->obd_pool_lock); + /* + * Check if an urgent pool recalc is needed, let it to be a change of + * SLV on 10%. It is applicable to LRU resize enabled case only. + */ + ns = obd->obd_namespace; + if (!ns_connect_lru_resize(ns) || + ldlm_pool_get_slv(&ns->ns_pool) < new_slv) + RETURN(0); + + ratio = 100 * new_slv / ldlm_pool_get_slv(&ns->ns_pool); + if (100 - ratio >= ns->ns_recalc_pct && + !ns->ns_stopping && !ns->ns_rpc_recalc) { + bool recalc = false; + + spin_lock(&ns->ns_lock); + if (!ns->ns_stopping && !ns->ns_rpc_recalc) { + ldlm_namespace_get(ns); + recalc = true; + ns->ns_rpc_recalc = 1; + } + spin_unlock(&ns->ns_lock); + if (recalc) + ldlm_bl_to_thread_ns(ns); + } + RETURN(0); } diff --git a/lustre/ldlm/ldlm_resource.c b/lustre/ldlm/ldlm_resource.c index e262e7f..a18daa8 100644 --- a/lustre/ldlm/ldlm_resource.c +++ b/lustre/ldlm/ldlm_resource.c @@ -398,6 +398,35 @@ static ssize_t lru_cancel_batch_store(struct kobject *kobj, } LUSTRE_RW_ATTR(lru_cancel_batch); +static ssize_t ns_recalc_pct_show(struct kobject *kobj, + struct attribute *attr, char *buf) +{ + struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, + ns_kobj); + + return snprintf(buf, sizeof(buf) - 1, "%u\n", ns->ns_recalc_pct); +} + +static ssize_t ns_recalc_pct_store(struct kobject *kobj, + struct attribute *attr, + const char *buffer, size_t count) +{ + struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace, + ns_kobj); + unsigned long tmp; + + if (kstrtoul(buffer, 10, &tmp)) + return -EINVAL; + + if (tmp > 100) + return -ERANGE; + + ns->ns_recalc_pct = (unsigned int)tmp; + + return count; +} +LUSTRE_RW_ATTR(ns_recalc_pct); + static ssize_t lru_max_age_show(struct kobject *kobj, struct attribute *attr, char *buf) { @@ -649,6 +678,7 @@ static struct attribute *ldlm_ns_attrs[] = { &lustre_attr_resource_count.attr, &lustre_attr_lock_count.attr, &lustre_attr_lock_unused_count.attr, + &lustre_attr_ns_recalc_pct.attr, &lustre_attr_lru_size.attr, &lustre_attr_lru_cancel_batch.attr, &lustre_attr_lru_max_age.attr, @@ -932,6 +962,7 @@ struct ldlm_namespace *ldlm_namespace_new(struct obd_device *obd, char *name, ns->ns_nr_unused = 0; ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE; ns->ns_cancel_batch = LDLM_DEFAULT_LRU_SHRINK_BATCH; + ns->ns_recalc_pct = LDLM_DEFAULT_SLV_RECALC_PCT; ns->ns_max_age = ktime_set(LDLM_DEFAULT_MAX_ALIVE, 0); ns->ns_ctime_age_limit = LDLM_CTIME_AGE_LIMIT; ns->ns_dirty_age_limit = ktime_set(LDLM_DIRTY_AGE_LIMIT, 0); -- 1.8.3.1