#define LDLM_DIRTY_AGE_LIMIT (10)
#define LDLM_DEFAULT_PARALLEL_AST_LIMIT 1024
#define LDLM_DEFAULT_LRU_SHRINK_BATCH (16)
+#define LDLM_DEFAULT_SLV_RECALC_PCT (10)
/**
* LDLM non-error return states
*
*/
+/* Cancel lru flag, it indicates we cancel aged locks. */
+enum ldlm_lru_flags {
+ LDLM_LRU_FLAG_NO_WAIT = 0x1, /* Cancel locks w/o blocking (neither
+ * sending nor waiting for any RPCs) */
+ LDLM_LRU_FLAG_CLEANUP = 0x2, /* Used when clearing lru, tells
+ * prepare_lru_list to set discard flag
+ * on PR extent locks so we don't waste
+ * time saving pages that will be
+ * discarded momentarily */
+};
+
struct ldlm_pool;
struct ldlm_lock;
struct ldlm_resource;
*/
struct ldlm_pool_ops {
/** Recalculate pool \a pl usage */
- int (*po_recalc)(struct ldlm_pool *pl);
+ int (*po_recalc)(struct ldlm_pool *pl, bool force);
/** Cancel at least \a nr locks from pool \a pl */
int (*po_shrink)(struct ldlm_pool *pl, int nr, gfp_t gfp_mask);
int (*po_setup)(struct ldlm_pool *pl, int limit);
*/
unsigned int ns_cancel_batch;
+ /**
+ * How much the SLV should decrease in %% to trigger LRU cancel urgently.
+ */
+ unsigned int ns_recalc_pct;
+
/** Maximum allowed age (last used time) for locks in the LRU. Set in
* seconds from userspace, but stored in ns to avoid repeat conversions.
*/
* Flag to indicate namespace is being freed. Used to determine if
* recalculation of LDLM pool statistics should be skipped.
*/
- unsigned ns_stopping:1;
+ unsigned ns_stopping:1,
+
+ /**
+ * Flag to indicate the LRU recalc on RPC reply is in progress.
+ * Used to limit the process by 1 thread only.
+ */
+ ns_rpc_recalc:1;
/**
* Which bucket should we start with the lock reclaim.
int ldlm_pool_shrink(struct ldlm_pool *pl, int nr, gfp_t gfp_mask);
void ldlm_pool_fini(struct ldlm_pool *pl);
int ldlm_pool_setup(struct ldlm_pool *pl, int limit);
-time64_t ldlm_pool_recalc(struct ldlm_pool *pl);
+time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force);
__u32 ldlm_pool_get_lvf(struct ldlm_pool *pl);
__u64 ldlm_pool_get_slv(struct ldlm_pool *pl);
__u64 ldlm_pool_get_clv(struct ldlm_pool *pl);
struct ldlm_namespace *ldlm_namespace_first_locked(enum ldlm_side);
/* ldlm_request.c */
-/* Cancel lru flag, it indicates we cancel aged locks. */
-enum ldlm_lru_flags {
- LDLM_LRU_FLAG_NO_WAIT = 0x1, /* Cancel locks w/o blocking (neither
- * sending nor waiting for any RPCs) */
- LDLM_LRU_FLAG_CLEANUP = 0x2, /* Used when clearing lru, tells
- * prepare_lru_list to set discard flag
- * on PR extent locks so we don't waste
- * time saving pages that will be
- * discarded momentarily */
-};
-
int ldlm_cancel_lru(struct ldlm_namespace *ns, int min,
enum ldlm_cancel_flags cancel_flags,
enum ldlm_lru_flags lru_flags);
struct ldlm_lock_desc *ld,
struct list_head *cancels, int count,
enum ldlm_cancel_flags cancel_flags);
+int ldlm_bl_to_thread_ns(struct ldlm_namespace *ns);
int ldlm_bl_thread_wakeup(void);
void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
if (ldlm_is_fail_loc(lock))
OBD_RACE(OBD_FAIL_LDLM_CP_BL_RACE);
- ldlm_cancel_lru(ns, 0, LCF_ASYNC, 0);
+ ldlm_pool_recalc(&ns->ns_pool, true);
} else {
LDLM_DEBUG(lock, "do not add lock into lru list");
unlock_res_and_lock(lock);
return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags);
}
+int ldlm_bl_to_thread_ns(struct ldlm_namespace *ns)
+{
+ return ldlm_bl_to_thread(ns, NULL, NULL, NULL, 0, LCF_ASYNC);
+}
+
int ldlm_bl_thread_wakeup(void)
{
wake_up(&ldlm_state->ldlm_bl_pool->blp_waitq);
LCF_BL_AST);
ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL,
blwi->blwi_flags);
- } else {
+ } else if (blwi->blwi_lock) {
ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld,
blwi->blwi_lock);
+ } else {
+ ldlm_pool_recalc(&blwi->blwi_ns->ns_pool, true);
+ spin_lock(&blwi->blwi_ns->ns_lock);
+ blwi->blwi_ns->ns_rpc_recalc = 0;
+ spin_unlock(&blwi->blwi_ns->ns_lock);
+ ldlm_namespace_put(blwi->blwi_ns);
}
+
if (blwi->blwi_mem_pressure)
memalloc_noreclaim_restore(mpflags);
*
* \pre ->pl_lock is not locked.
*/
-static int ldlm_srv_pool_recalc(struct ldlm_pool *pl)
+static int ldlm_srv_pool_recalc(struct ldlm_pool *pl, bool force)
{
timeout_t recalc_interval_sec;
ENTRY;
recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
- if (recalc_interval_sec < pl->pl_recalc_period)
+ if (!force && recalc_interval_sec < pl->pl_recalc_period)
RETURN(0);
spin_lock(&pl->pl_lock);
recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
- if (recalc_interval_sec < pl->pl_recalc_period) {
+ if (!force && recalc_interval_sec < pl->pl_recalc_period) {
spin_unlock(&pl->pl_lock);
RETURN(0);
}
/**
* Recalculates client size pool \a pl according to current SLV and Limit.
*/
-static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
+static int ldlm_cli_pool_recalc(struct ldlm_pool *pl, bool force)
{
timeout_t recalc_interval_sec;
int ret;
ENTRY;
recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
- if (recalc_interval_sec < pl->pl_recalc_period)
+ if (!force && recalc_interval_sec < pl->pl_recalc_period)
RETURN(0);
spin_lock(&pl->pl_lock);
* Check if we need to recalc lists now.
*/
recalc_interval_sec = ktime_get_seconds() - pl->pl_recalc_time;
- if (recalc_interval_sec < pl->pl_recalc_period) {
+ if (!force && recalc_interval_sec < pl->pl_recalc_period) {
spin_unlock(&pl->pl_lock);
RETURN(0);
}
*
* \retval time in seconds for the next recalc of this pool
*/
-time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
+time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force)
{
timeout_t recalc_interval_sec;
int count;
}
if (pl->pl_ops->po_recalc != NULL) {
- count = pl->pl_ops->po_recalc(pl);
+ count = pl->pl_ops->po_recalc(pl, force);
lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
count);
}
* with too long call paths.
*/
if (ns_is_server(ldlm_pl2ns(pl)))
- ldlm_pool_recalc(pl);
+ ldlm_pool_recalc(pl, false);
}
/**
lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
if (ns_is_server(ldlm_pl2ns(pl)))
- ldlm_pool_recalc(pl);
+ ldlm_pool_recalc(pl, false);
}
/**
* After setup is done - recalc the pool.
*/
if (!skip) {
- delay = min(delay, ldlm_pool_recalc(&ns->ns_pool));
+ delay = min(delay,
+ ldlm_pool_recalc(&ns->ns_pool, false));
ldlm_namespace_put(ns);
}
}
return 0;
}
-time64_t ldlm_pool_recalc(struct ldlm_pool *pl)
+time64_t ldlm_pool_recalc(struct ldlm_pool *pl, bool force)
{
return 0;
}
*/
int ldlm_cli_update_pool(struct ptlrpc_request *req)
{
+ struct ldlm_namespace *ns;
struct obd_device *obd;
- __u64 new_slv;
+ __u64 new_slv, ratio;
__u32 new_limit;
ENTRY;
read_unlock(&obd->obd_pool_lock);
/*
- * Set new SLV and limit in OBD fields to make them accessible
- * to the pool thread. We do not access obd_namespace and pool
- * directly here as there is no reliable way to make sure that
- * they are still alive at cleanup time. Evil races are possible
- * which may cause Oops at that time.
+ * OBD device keeps the new pool attributes before they are handled by
+ * the pool.
*/
write_lock(&obd->obd_pool_lock);
obd->obd_pool_slv = new_slv;
obd->obd_pool_limit = new_limit;
write_unlock(&obd->obd_pool_lock);
+ /*
+ * Check if an urgent pool recalc is needed, let it to be a change of
+ * SLV on 10%. It is applicable to LRU resize enabled case only.
+ */
+ ns = obd->obd_namespace;
+ if (!ns_connect_lru_resize(ns) ||
+ ldlm_pool_get_slv(&ns->ns_pool) < new_slv)
+ RETURN(0);
+
+ ratio = 100 * new_slv / ldlm_pool_get_slv(&ns->ns_pool);
+ if (100 - ratio >= ns->ns_recalc_pct &&
+ !ns->ns_stopping && !ns->ns_rpc_recalc) {
+ bool recalc = false;
+
+ spin_lock(&ns->ns_lock);
+ if (!ns->ns_stopping && !ns->ns_rpc_recalc) {
+ ldlm_namespace_get(ns);
+ recalc = true;
+ ns->ns_rpc_recalc = 1;
+ }
+ spin_unlock(&ns->ns_lock);
+ if (recalc)
+ ldlm_bl_to_thread_ns(ns);
+ }
+
RETURN(0);
}
}
LUSTRE_RW_ATTR(lru_cancel_batch);
+static ssize_t ns_recalc_pct_show(struct kobject *kobj,
+ struct attribute *attr, char *buf)
+{
+ struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
+ ns_kobj);
+
+ return snprintf(buf, sizeof(buf) - 1, "%u\n", ns->ns_recalc_pct);
+}
+
+static ssize_t ns_recalc_pct_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buffer, size_t count)
+{
+ struct ldlm_namespace *ns = container_of(kobj, struct ldlm_namespace,
+ ns_kobj);
+ unsigned long tmp;
+
+ if (kstrtoul(buffer, 10, &tmp))
+ return -EINVAL;
+
+ if (tmp > 100)
+ return -ERANGE;
+
+ ns->ns_recalc_pct = (unsigned int)tmp;
+
+ return count;
+}
+LUSTRE_RW_ATTR(ns_recalc_pct);
+
static ssize_t lru_max_age_show(struct kobject *kobj, struct attribute *attr,
char *buf)
{
&lustre_attr_resource_count.attr,
&lustre_attr_lock_count.attr,
&lustre_attr_lock_unused_count.attr,
+ &lustre_attr_ns_recalc_pct.attr,
&lustre_attr_lru_size.attr,
&lustre_attr_lru_cancel_batch.attr,
&lustre_attr_lru_max_age.attr,
ns->ns_nr_unused = 0;
ns->ns_max_unused = LDLM_DEFAULT_LRU_SIZE;
ns->ns_cancel_batch = LDLM_DEFAULT_LRU_SHRINK_BATCH;
+ ns->ns_recalc_pct = LDLM_DEFAULT_SLV_RECALC_PCT;
ns->ns_max_age = ktime_set(LDLM_DEFAULT_MAX_ALIVE, 0);
ns->ns_ctime_age_limit = LDLM_CTIME_AGE_LIMIT;
ns->ns_dirty_age_limit = ktime_set(LDLM_DIRTY_AGE_LIMIT, 0);