* GPL HEADER END
*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved
+ * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
* Use is subject to license terms.
+ *
+ * Copyright (c) 2011 Whamcloud, Inc.
+ *
*/
/*
* This file is part of Lustre, http://www.lustre.org/
* This controls the speed of reaching LDLM_POOL_MAX_GSP
* with increasing thread period.
*/
-#define LDLM_POOL_GSP_STEP (4)
+#define LDLM_POOL_GSP_STEP_SHIFT (2)
/*
* LDLM_POOL_GSP% of all locks is default GP.
*/
#define LDLM_POOL_MAX_AGE (36000)
+/*
+ * The granularity of SLV calculation.
+ */
+#define LDLM_POOL_SLV_SHIFT (10)
+
#ifdef __KERNEL__
extern cfs_proc_dir_entry_t *ldlm_ns_proc_dir;
#endif
-#define avg(src, add) \
- ((src) = ((src) + (add)) / 2)
-
-static inline __u64 dru(__u64 val, __u32 div)
+static inline __u64 dru(__u64 val, __u32 shift, int round_up)
{
- __u64 ret = val + (div - 1);
- do_div(ret, div);
- return ret;
+ return (val + (round_up ? (1 << shift) - 1 : 0)) >> shift;
}
static inline __u64 ldlm_pool_slv_max(__u32 L)
* Allow to have all locks for 1 client for 10 hrs.
* Formula is the following: limit * 10h / 1 client.
*/
- __u64 lim = L * LDLM_POOL_MAX_AGE / 1;
+ __u64 lim = (__u64)L * LDLM_POOL_MAX_AGE / 1;
return lim;
}
* Calculates suggested grant_step in % of available locks for passed
* \a period. This is later used in grant_plan calculations.
*/
-static inline int ldlm_pool_t2gsp(int t)
+static inline int ldlm_pool_t2gsp(unsigned int t)
{
/*
* This yields 1% grant step for anything below LDLM_POOL_GSP_STEP
* plan is reached.
*/
return LDLM_POOL_MAX_GSP -
- (LDLM_POOL_MAX_GSP - LDLM_POOL_MIN_GSP) /
- (1 << (t / LDLM_POOL_GSP_STEP));
+ ((LDLM_POOL_MAX_GSP - LDLM_POOL_MIN_GSP) >>
+ (t >> LDLM_POOL_GSP_STEP_SHIFT));
}
/**
*
* \pre ->pl_lock is locked.
*/
-static inline void ldlm_pool_recalc_grant_plan(struct ldlm_pool *pl)
+static void ldlm_pool_recalc_grant_plan(struct ldlm_pool *pl)
{
int granted, grant_step, limit;
grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period);
grant_step = ((limit - granted) * grant_step) / 100;
pl->pl_grant_plan = granted + grant_step;
+ limit = (limit * 5) >> 2;
+ if (pl->pl_grant_plan > limit)
+ pl->pl_grant_plan = limit;
}
/**
*
* \pre ->pl_lock is locked.
*/
-static inline void ldlm_pool_recalc_slv(struct ldlm_pool *pl)
+static void ldlm_pool_recalc_slv(struct ldlm_pool *pl)
{
- int grant_usage, granted, grant_plan;
- __u64 slv, slv_factor;
+ int granted;
+ int grant_plan;
+ int round_up;
+ __u64 slv;
+ __u64 slv_factor;
+ __u64 grant_usage;
__u32 limit;
slv = pl->pl_server_lock_volume;
grant_plan = pl->pl_grant_plan;
limit = ldlm_pool_get_limit(pl);
granted = cfs_atomic_read(&pl->pl_granted);
+ round_up = granted < limit;
- grant_usage = limit - (granted - grant_plan);
- if (grant_usage <= 0)
- grant_usage = 1;
+ grant_usage = max_t(int, limit - (granted - grant_plan), 1);
/*
* Find out SLV change factor which is the ratio of grant usage
* SLV. And the opposite, the more grant plan is over-consumed
* (load time) the faster drops SLV.
*/
- slv_factor = (grant_usage * 100) / limit;
- if (2 * abs(granted - limit) > limit) {
- slv_factor *= slv_factor;
- slv_factor = dru(slv_factor, 100);
- }
+ slv_factor = (grant_usage << LDLM_POOL_SLV_SHIFT);
+ do_div(slv_factor, limit);
slv = slv * slv_factor;
- slv = dru(slv, 100);
+ slv = dru(slv, LDLM_POOL_SLV_SHIFT, round_up);
if (slv > ldlm_pool_slv_max(limit)) {
slv = ldlm_pool_slv_max(limit);
*
* \pre ->pl_lock is locked.
*/
-static inline void ldlm_pool_recalc_stats(struct ldlm_pool *pl)
+static void ldlm_pool_recalc_stats(struct ldlm_pool *pl)
{
int grant_plan = pl->pl_grant_plan;
__u64 slv = pl->pl_server_lock_volume;
time_t recalc_interval_sec;
ENTRY;
- cfs_spin_lock(&pl->pl_lock);
recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
- if (recalc_interval_sec >= pl->pl_recalc_period) {
- /*
- * Recalc SLV after last period. This should be done
- * _before_ recalculating new grant plan.
- */
- ldlm_pool_recalc_slv(pl);
+ if (recalc_interval_sec < pl->pl_recalc_period)
+ RETURN(0);
- /*
- * Make sure that pool informed obd of last SLV changes.
- */
- ldlm_srv_pool_push_slv(pl);
+ cfs_spin_lock(&pl->pl_lock);
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
+ if (recalc_interval_sec < pl->pl_recalc_period) {
+ cfs_spin_unlock(&pl->pl_lock);
+ RETURN(0);
+ }
+ /*
+ * Recalc SLV after last period. This should be done
+ * _before_ recalculating new grant plan.
+ */
+ ldlm_pool_recalc_slv(pl);
- /*
- * Update grant_plan for new period.
- */
- ldlm_pool_recalc_grant_plan(pl);
+ /*
+ * Make sure that pool informed obd of last SLV changes.
+ */
+ ldlm_srv_pool_push_slv(pl);
- pl->pl_recalc_time = cfs_time_current_sec();
- lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
- recalc_interval_sec);
- }
+ /*
+ * Update grant_plan for new period.
+ */
+ ldlm_pool_recalc_grant_plan(pl);
+ pl->pl_recalc_time = cfs_time_current_sec();
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_TIMING_STAT,
+ recalc_interval_sec);
cfs_spin_unlock(&pl->pl_lock);
RETURN(0);
}
static int ldlm_srv_pool_setup(struct ldlm_pool *pl, int limit)
{
struct obd_device *obd;
- ENTRY;
obd = ldlm_pl2ns(pl)->ns_obd;
LASSERT(obd != NULL && obd != LP_POISON);
cfs_write_unlock(&obd->obd_pool_lock);
ldlm_pool_set_limit(pl, limit);
- RETURN(0);
+ return 0;
}
/**
time_t recalc_interval_sec;
ENTRY;
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
+ if (recalc_interval_sec < pl->pl_recalc_period)
+ RETURN(0);
+
cfs_spin_lock(&pl->pl_lock);
/*
* Check if we need to recalc lists now.
*/
ldlm_cli_pool_pop_slv(pl);
- cfs_spin_lock(&ns->ns_unused_lock);
+ cfs_spin_lock(&ns->ns_lock);
unused = ns->ns_nr_unused;
- cfs_spin_unlock(&ns->ns_unused_lock);
+ cfs_spin_unlock(&ns->ns_lock);
if (nr) {
- canceled = ldlm_cancel_lru(ns, nr, LDLM_SYNC,
+ canceled = ldlm_cancel_lru(ns, nr, LDLM_ASYNC,
LDLM_CANCEL_SHRINK);
}
#ifdef __KERNEL__
time_t recalc_interval_sec;
int count;
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
+ if (recalc_interval_sec <= 0)
+ goto recalc;
+
cfs_spin_lock(&pl->pl_lock);
recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
if (recalc_interval_sec > 0) {
*/
cfs_atomic_set(&pl->pl_grant_rate, 0);
cfs_atomic_set(&pl->pl_cancel_rate, 0);
- cfs_atomic_set(&pl->pl_grant_speed, 0);
}
cfs_spin_unlock(&pl->pl_lock);
+ recalc:
if (pl->pl_ops->po_recalc != NULL) {
count = pl->pl_ops->po_recalc(pl);
lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
*/
int ldlm_pool_setup(struct ldlm_pool *pl, int limit)
{
- ENTRY;
if (pl->pl_ops->po_setup != NULL)
- RETURN(pl->pl_ops->po_setup(pl, limit));
- RETURN(0);
+ return(pl->pl_ops->po_setup(pl, limit));
+ return 0;
}
EXPORT_SYMBOL(ldlm_pool_setup);
grant_plan = pl->pl_grant_plan;
granted = cfs_atomic_read(&pl->pl_granted);
grant_rate = cfs_atomic_read(&pl->pl_grant_rate);
- lvf = cfs_atomic_read(&pl->pl_lock_volume_factor);
- grant_speed = cfs_atomic_read(&pl->pl_grant_speed);
cancel_rate = cfs_atomic_read(&pl->pl_cancel_rate);
+ grant_speed = grant_rate - cancel_rate;
+ lvf = cfs_atomic_read(&pl->pl_lock_volume_factor);
grant_step = ldlm_pool_t2gsp(pl->pl_recalc_period);
cfs_spin_unlock(&pl->pl_lock);
return nr;
}
+static int lprocfs_rd_grant_speed(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ struct ldlm_pool *pl = data;
+ int grant_speed;
+
+ cfs_spin_lock(&pl->pl_lock);
+ /* serialize with ldlm_pool_recalc */
+ grant_speed = cfs_atomic_read(&pl->pl_grant_rate) -
+ cfs_atomic_read(&pl->pl_cancel_rate);
+ cfs_spin_unlock(&pl->pl_lock);
+ return lprocfs_rd_uint(page, start, off, count, eof, &grant_speed);
+}
+
LDLM_POOL_PROC_READER(grant_plan, int);
LDLM_POOL_PROC_READER(recalc_period, int);
LDLM_POOL_PROC_WRITER(recalc_period, int);
if (!var_name)
RETURN(-ENOMEM);
- parent_ns_proc = lprocfs_srch(ldlm_ns_proc_dir, ns->ns_name);
+ parent_ns_proc = lprocfs_srch(ldlm_ns_proc_dir,
+ ldlm_ns_name(ns));
if (parent_ns_proc == NULL) {
CERROR("%s: proc entry is not initialized\n",
- ns->ns_name);
+ ldlm_ns_name(ns));
GOTO(out_free_name, rc = -EINVAL);
}
pl->pl_proc_dir = lprocfs_register("pool", parent_ns_proc,
lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
snprintf(var_name, MAX_STRING_SIZE, "grant_speed");
- pool_vars[0].data = &pl->pl_grant_speed;
- pool_vars[0].read_fptr = lprocfs_rd_atomic;
+ pool_vars[0].data = pl;
+ pool_vars[0].read_fptr = lprocfs_rd_grant_speed;
lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
snprintf(var_name, MAX_STRING_SIZE, "cancel_rate");
cfs_atomic_set(&pl->pl_grant_rate, 0);
cfs_atomic_set(&pl->pl_cancel_rate, 0);
- cfs_atomic_set(&pl->pl_grant_speed, 0);
pl->pl_grant_plan = LDLM_POOL_GP(LDLM_POOL_HOST_L);
snprintf(pl->pl_name, sizeof(pl->pl_name), "ldlm-pool-%s-%d",
- ns->ns_name, idx);
+ ldlm_ns_name(ns), idx);
if (client == LDLM_NAMESPACE_SERVER) {
pl->pl_ops = &ldlm_srv_pool_ops;
pl->pl_server_lock_volume = ldlm_pool_slv_max(LDLM_POOL_HOST_L);
} else {
ldlm_pool_set_limit(pl, 1);
- pl->pl_server_lock_volume = 1;
+ pl->pl_server_lock_volume = 0;
pl->pl_ops = &ldlm_cli_pool_ops;
pl->pl_recalc_period = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
}
*/
if (lock->l_resource->lr_type == LDLM_FLOCK)
return;
- ENTRY;
- LDLM_DEBUG(lock, "add lock to pool");
cfs_atomic_inc(&pl->pl_granted);
cfs_atomic_inc(&pl->pl_grant_rate);
- cfs_atomic_inc(&pl->pl_grant_speed);
-
lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT);
/*
* Do not do pool recalc for client side as all locks which
*/
if (ns_is_server(ldlm_pl2ns(pl)))
ldlm_pool_recalc(pl);
- EXIT;
}
EXPORT_SYMBOL(ldlm_pool_add);
*/
if (lock->l_resource->lr_type == LDLM_FLOCK)
return;
- ENTRY;
- LDLM_DEBUG(lock, "del lock from pool");
LASSERT(cfs_atomic_read(&pl->pl_granted) > 0);
cfs_atomic_dec(&pl->pl_granted);
cfs_atomic_inc(&pl->pl_cancel_rate);
- cfs_atomic_dec(&pl->pl_grant_speed);
lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
if (ns_is_server(ldlm_pl2ns(pl)))
ldlm_pool_recalc(pl);
- EXIT;
}
EXPORT_SYMBOL(ldlm_pool_del);
struct ldlm_namespace *ns;
void *cookie;
- if (nr != 0 && !(gfp_mask & __GFP_FS))
+ if (client == LDLM_NAMESPACE_CLIENT && nr != 0 &&
+ !(gfp_mask & __GFP_FS))
return -1;
CDEBUG(D_DLMTRACE, "Request to shrink %d %s locks from all pools\n",
ldlm_namespace_move_locked(ns, client);
cfs_mutex_up(ldlm_namespace_lock(client));
total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
- ldlm_namespace_put(ns, 1);
+ ldlm_namespace_put(ns);
}
if (nr == 0 || total == 0) {
cancel = 1 + nr_locks * nr / total;
ldlm_pool_shrink(&ns->ns_pool, cancel, gfp_mask);
cached += ldlm_pool_granted(&ns->ns_pool);
- ldlm_namespace_put(ns, 1);
+ ldlm_namespace_put(ns);
}
cl_env_reexit(cookie);
return cached;
}
-static int ldlm_pools_srv_shrink(int nr, unsigned int gfp_mask)
+static int ldlm_pools_srv_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
{
- return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER, nr, gfp_mask);
+ return ldlm_pools_shrink(LDLM_NAMESPACE_SERVER,
+ shrink_param(sc, nr_to_scan),
+ shrink_param(sc, gfp_mask));
}
-static int ldlm_pools_cli_shrink(int nr, unsigned int gfp_mask)
+static int ldlm_pools_cli_shrink(SHRINKER_ARGS(sc, nr_to_scan, gfp_mask))
{
- return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT, nr, gfp_mask);
+ return ldlm_pools_shrink(LDLM_NAMESPACE_CLIENT,
+ shrink_param(sc, nr_to_scan),
+ shrink_param(sc, gfp_mask));
}
void ldlm_pools_recalc(ldlm_side_t client)
/*
* Set the modest pools limit equal to their avg granted
- * locks + 5%.
+ * locks + ~6%.
*/
- l += dru(l * LDLM_POOLS_MODEST_MARGIN, 100);
+ l += dru(l, LDLM_POOLS_MODEST_MARGIN_SHIFT, 0);
ldlm_pool_setup(&ns->ns_pool, l);
nr_l += l;
nr_p++;
}
ns = ldlm_namespace_first_locked(client);
- cfs_spin_lock(&ns->ns_hash_lock);
+ cfs_spin_lock(&ns->ns_lock);
/*
* skip ns which is being freed, and we don't want to increase
- * its refcount again, not even temporarily. bz21519.
+ * its refcount again, not even temporarily. bz21519 & LU-499.
*/
- if (ns->ns_refcount == 0) {
+ if (ns->ns_stopping) {
skip = 1;
} else {
skip = 0;
- ldlm_namespace_get_locked(ns);
+ ldlm_namespace_get(ns);
}
- cfs_spin_unlock(&ns->ns_hash_lock);
+ cfs_spin_unlock(&ns->ns_lock);
ldlm_namespace_move_locked(ns, client);
cfs_mutex_up(ldlm_namespace_lock(client));
*/
if (!skip) {
ldlm_pool_recalc(&ns->ns_pool);
- ldlm_namespace_put(ns, 1);
+ ldlm_namespace_put(ns);
}
}
}
* CLONE_VM and CLONE_FILES just avoid a needless copy, because we
* just drop the VM and FILES in cfs_daemonize() right away.
*/
- rc = cfs_kernel_thread(ldlm_pools_thread_main, ldlm_pools_thread,
- CLONE_VM | CLONE_FILES);
+ rc = cfs_create_thread(ldlm_pools_thread_main, ldlm_pools_thread,
+ CFS_DAEMON_FLAGS);
if (rc < 0) {
CERROR("Can't start pool thread, error %d\n",
rc);