struct ldlm_resource;
struct ldlm_namespace;
-typedef int (*ldlm_pool_recalc_t)(struct ldlm_pool *pl);
-
-typedef int (*ldlm_pool_shrink_t)(struct ldlm_pool *pl,
- int nr, unsigned int gfp_mask);
+struct ldlm_pool_ops {
+ int (*po_recalc)(struct ldlm_pool *pl);
+ int (*po_shrink)(struct ldlm_pool *pl, int nr,
+ unsigned int gfp_mask);
+ int (*po_setup)(struct ldlm_pool *pl, int limit);
+};
enum {
LDLM_POOL_CTL_RECALC = 1 << 0, /* Pool recalc is enabled */
#define LDLM_POOLS_MODEST_MARGIN (5)
/* A change to SLV in % after which we want to wake up pools thread asap. */
-#define LDLM_POOLS_FAST_SLV_CHANGE (5)
+#define LDLM_POOLS_FAST_SLV_CHANGE (50)
struct ldlm_pool {
/* Common pool fields */
- cfs_proc_dir_entry_t *pl_proc_dir; /* Pool proc directory. */
- char pl_name[100]; /* Pool name, should be long
- * enough to contain complex
- * proc entry name. */
- spinlock_t pl_lock; /* Lock for protecting slv/clv
- * updates. */
- atomic_t pl_limit; /* Number of allowed locks in
- * in pool, both, client and
- * server side. */
- atomic_t pl_granted; /* Number of granted locks. */
- atomic_t pl_grant_rate; /* Grant rate per T. */
- atomic_t pl_cancel_rate; /* Cancel rate per T. */
- atomic_t pl_grant_speed; /* Grant speed (GR - CR) per T. */
- __u64 pl_server_lock_volume; /* Server lock volume. Protected
- * by pl_lock. */
- cfs_time_t pl_update_time; /* Time when last slv from server
- * was obtained. */
- ldlm_pool_recalc_t pl_recalc; /* Recalc callback func pointer. */
- ldlm_pool_shrink_t pl_shrink; /* Shrink callback func pointer. */
- int pl_control; /* Pool features mask */
+ cfs_proc_dir_entry_t *pl_proc_dir; /* Pool proc directory. */
+ char pl_name[100]; /* Pool name, should be long
+ * enough to contain complex
+ * proc entry name. */
+ spinlock_t pl_lock; /* Lock for protecting slv/clv
+ * updates. */
+ atomic_t pl_limit; /* Number of allowed locks in
+ * in pool, both, client and
+ * server side. */
+ atomic_t pl_granted; /* Number of granted locks. */
+ atomic_t pl_grant_rate; /* Grant rate per T. */
+ atomic_t pl_cancel_rate; /* Cancel rate per T. */
+ atomic_t pl_grant_speed; /* Grant speed (GR-CR) per T. */
+ __u64 pl_server_lock_volume; /* Server lock volume.
+ * Protected by pl_lock */
+ atomic_t pl_lock_volume_factor; /* Lock volume factor. */
+
+ time_t pl_recalc_time; /* Time when last slv from
+ * server was obtained. */
+ struct ldlm_pool_ops *pl_ops; /* Recalc and shrink ops. */
+
+ int pl_control; /* Pool features mask */
- /* Server side pool fields */
- atomic_t pl_grant_plan; /* Planned number of granted
- * locks for next T. */
- atomic_t pl_grant_step; /* Grant plan step for next T. */
+ atomic_t pl_grant_plan; /* Planned number of granted
+ * locks for next T. */
+ atomic_t pl_grant_step; /* Grant plan step for next
+ * T. */
- /* Client side pool related fields */
- atomic_t pl_lock_volume_factor; /* Lock volume factor. */
- struct lprocfs_stats *pl_stats; /* Pool statistics. */
+ struct lprocfs_stats *pl_stats; /* Pool statistics. */
};
static inline int pool_recalc_enabled(struct ldlm_pool *pl)
LDLM_NAMESPACE_MODEST = 1 << 1
} ldlm_appetite_t;
+/* Default value for ->ns_shrink_thumb. If lock is not extent one its cost
+ * is one page. Here we have 256 pages which is 1M on i386. Thus by default
+ * all extent locks which have more than 1M long extent will be kept in lru,
+ * others (including ibits locks) will be canceled on memory pressure event. */
+#define LDLM_LOCK_SHRINK_THUMB 256
+
struct ldlm_namespace {
char *ns_name;
ldlm_side_t ns_client; /* is this a client-side lock tree? */
unsigned int ns_max_unused;
unsigned int ns_max_age;
+
+ /* Lower limit to number of pages in lock to keep it in cache */
+ unsigned int ns_shrink_thumb;
cfs_time_t ns_next_dump; /* next debug dump, jiffies */
atomic_t ns_locks;
int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
unsigned int gfp_mask);
void ldlm_pool_fini(struct ldlm_pool *pl);
-int ldlm_pool_setup(struct ldlm_pool *pl, __u32 limit);
+int ldlm_pool_setup(struct ldlm_pool *pl, int limit);
int ldlm_pool_recalc(struct ldlm_pool *pl);
__u64 ldlm_pool_get_slv(struct ldlm_pool *pl);
__u32 ldlm_pool_get_limit(struct ldlm_pool *pl);
} ldlm_sync_t;
/* Cancel lru flag, it indicates we cancel aged locks. */
-#define LDLM_CANCEL_AGED 0x00000001
+enum {
+ LDLM_CANCEL_AGED = 1 << 0, /* Cancel aged locks (non lru resize). */
+ LDLM_CANCEL_PASSED = 1 << 1, /* Cancel passed number of locks. */
+ LDLM_CANCEL_SHRINK = 1 << 2, /* Cancel locks from shrinker. */
+ LDLM_CANCEL_LRUR = 1 << 3 /* Cancel locks from lru resize. */
+};
-int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync);
+int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync,
+ int flags);
int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
int count, int max, int flags);
struct ldlm_pool *pl;
ENTRY;
- if (req->rq_export == NULL) {
+ if (!req->rq_export || !exp_connect_lru_resize(req->rq_export)) {
lustre_msg_set_slv(req->rq_repmsg, 0);
lustre_msg_set_limit(req->rq_repmsg, 0);
RETURN(0);
}
- if (!exp_connect_lru_resize(req->rq_export))
- RETURN(0);
-
pl = ldlm_exp2pl(req->rq_export);
spin_lock(&pl->pl_lock);
+ LASSERT(ldlm_pool_get_slv(pl) != 0 && ldlm_pool_get_limit(pl) != 0);
lustre_msg_set_slv(req->rq_repmsg, ldlm_pool_get_slv(pl));
lustre_msg_set_limit(req->rq_repmsg, ldlm_pool_get_limit(pl));
spin_unlock(&pl->pl_lock);
DEBUG_REQ(D_NET, req, "sending reply");
}
- target_pack_pool_reply(req);
return (ptlrpc_send_reply(req, 1));
}
* enqueue. */
if (!exp_connect_cancelset(lock->l_conn_export) &&
!ns_connect_lru_resize(ns))
- ldlm_cancel_lru(ns, 0, LDLM_ASYNC);
+ ldlm_cancel_lru(ns, 0, LDLM_ASYNC, 0);
} else {
unlock_res_and_lock(lock);
}
}
enum {
- LDLM_POOL_GRANTED_STAT = 0,
+ LDLM_POOL_FIRST_STAT = 0,
+ LDLM_POOL_GRANTED_STAT = LDLM_POOL_FIRST_STAT,
+ LDLM_POOL_GRANT_STAT,
+ LDLM_POOL_CANCEL_STAT,
LDLM_POOL_GRANT_RATE_STAT,
LDLM_POOL_CANCEL_RATE_STAT,
LDLM_POOL_GRANT_PLAN_STAT,
LDLM_POOL_SLV_STAT,
+ LDLM_POOL_SHRINK_REQTD_STAT,
+ LDLM_POOL_SHRINK_FREED_STAT,
+ LDLM_POOL_RECALC_STAT,
LDLM_POOL_LAST_STAT
};
ENTRY;
spin_lock(&pl->pl_lock);
- recalc_interval_sec = cfs_duration_sec(cfs_time_current() -
- pl->pl_update_time);
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
if (recalc_interval_sec > 0) {
/* Update statistics */
ldlm_pool_recalc_stats(pl);
/* Update grant_plan for new period. */
ldlm_pool_recalc_grant_plan(pl);
- pl->pl_update_time = cfs_time_current();
/* Zero out all rates and speed for the last period. */
atomic_set(&pl->pl_grant_rate, 0);
atomic_set(&pl->pl_cancel_rate, 0);
atomic_set(&pl->pl_grant_speed, 0);
+ pl->pl_recalc_time = cfs_time_current_sec();
}
spin_unlock(&pl->pl_lock);
RETURN(0);
static int ldlm_srv_pool_shrink(struct ldlm_pool *pl,
int nr, unsigned int gfp_mask)
{
- __u32 granted, limit;
- __u64 slv_delta;
+ __u32 limit;
ENTRY;
- /* Client already canceled locks but server is already in shrinker and
- * can't cancel anything. Let's catch this race. */
- if ((granted = atomic_read(&pl->pl_granted)) == 0)
+ /* VM is asking how many entries may be potentially freed. */
+ if (nr == 0)
+ RETURN(atomic_read(&pl->pl_granted));
+
+ /* Client already canceled locks but server is already in shrinker
+ * and can't cancel anything. Let's catch this race. */
+ if (atomic_read(&pl->pl_granted) == 0)
RETURN(0);
spin_lock(&pl->pl_lock);
- /* Simple proportion but it gives impression on how much should be
- * SLV changed for request @nr of locks to be canceled.*/
- slv_delta = nr * ldlm_pool_get_slv(pl);
- limit = ldlm_pool_get_limit(pl);
- do_div(slv_delta, granted);
-
- /* As SLV has some dependence on historical data, that is new value
- * is based on old one, this decreasing will make clients get some
- * locks back to the server and after some time it will stabilize.*/
- if (slv_delta < ldlm_pool_get_slv(pl))
- ldlm_pool_set_slv(pl, ldlm_pool_get_slv(pl) - slv_delta);
- else
+ /* We want shrinker to possibly cause cancelation of @nr locks from
+ * clients or grant approximately @nr locks smaller next intervals.
+ *
+ * This is why we decresed SLV by @nr. This effect will only be as
+ * long as one re-calc interval (1s these days) and this should be
+ * enough to pass this decreased SLV to all clients. On next recalc
+ * interval pool will either increase SLV if locks load is not high
+ * or will keep on same level or even decrease again, thus, shrinker
+ * decreased SLV will affect next recalc intervals and this way will
+ * make locking load lower. */
+ if (nr < ldlm_pool_get_slv(pl)) {
+ ldlm_pool_set_slv(pl, ldlm_pool_get_slv(pl) - nr);
+ } else {
+ limit = ldlm_pool_get_limit(pl);
ldlm_pool_set_slv(pl, ldlm_pool_slv_min(limit));
+ }
spin_unlock(&pl->pl_lock);
/* We did not really free any memory here so far, it only will be
RETURN(0);
}
+static int ldlm_srv_pool_setup(struct ldlm_pool *pl, int limit)
+{
+ ENTRY;
+ ldlm_pool_set_limit(pl, limit);
+ RETURN(0);
+}
+
static int ldlm_cli_pool_recalc(struct ldlm_pool *pl)
{
time_t recalc_interval_sec;
spin_lock(&pl->pl_lock);
- recalc_interval_sec = cfs_duration_sec(cfs_time_current() -
- pl->pl_update_time);
+ recalc_interval_sec = cfs_time_current_sec() - pl->pl_recalc_time;
if (recalc_interval_sec > 0) {
/* Update statistics only every T */
ldlm_pool_recalc_stats(pl);
atomic_set(&pl->pl_grant_rate, 0);
atomic_set(&pl->pl_cancel_rate, 0);
atomic_set(&pl->pl_grant_speed, 0);
+ pl->pl_recalc_time = cfs_time_current_sec();
}
spin_unlock(&pl->pl_lock);
- /* Recalc client pool is done without taking into account pl_update_time
- * as this may be called voluntary in the case of emergency. Client
- * recalc does not calculate anything, we do not risk to have skew
- * of some pool param. */
- ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC);
- RETURN(0);
+ /* Do not cancel locks in case lru resize is disabled for this ns */
+ if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
+ RETURN(0);
+
+ /* In the time of canceling locks on client we do not need to maintain
+ * sharp timing, we only want to cancel locks asap according to new SLV.
+ * This may be called when SLV has changed much, this is why we do not
+ * take into account pl->pl_recalc_time here. */
+ RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), 0, LDLM_ASYNC,
+ LDLM_CANCEL_LRUR));
}
static int ldlm_cli_pool_shrink(struct ldlm_pool *pl,
int nr, unsigned int gfp_mask)
{
ENTRY;
- RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC));
+
+ /* Do not cancel locks in case lru resize is disabled for this ns */
+ if (!ns_connect_lru_resize(ldlm_pl2ns(pl)))
+ RETURN(0);
+
+ /* Find out how many locks may be released according to shrink
+ * policy. */
+ if (nr == 0)
+ RETURN(ldlm_cancel_lru_local(ldlm_pl2ns(pl), NULL, 0,
+ 0, LDLM_CANCEL_SHRINK));
+
+ /* Cancel @nr locks accoding to shrink policy */
+ RETURN(ldlm_cancel_lru(ldlm_pl2ns(pl), nr, LDLM_SYNC,
+ LDLM_CANCEL_SHRINK));
}
+struct ldlm_pool_ops ldlm_srv_pool_ops = {
+ .po_recalc = ldlm_srv_pool_recalc,
+ .po_shrink = ldlm_srv_pool_shrink,
+ .po_setup = ldlm_srv_pool_setup
+};
+
+struct ldlm_pool_ops ldlm_cli_pool_ops = {
+ .po_recalc = ldlm_cli_pool_recalc,
+ .po_shrink = ldlm_cli_pool_shrink
+};
+
int ldlm_pool_recalc(struct ldlm_pool *pl)
{
- if (pl->pl_recalc != NULL && pool_recalc_enabled(pl))
- return pl->pl_recalc(pl);
+ int count;
+
+ if (pl->pl_ops->po_recalc != NULL && pool_recalc_enabled(pl)) {
+ count = pl->pl_ops->po_recalc(pl);
+ lprocfs_counter_add(pl->pl_stats, LDLM_POOL_RECALC_STAT,
+ count);
+ return count;
+ }
return 0;
}
EXPORT_SYMBOL(ldlm_pool_recalc);
int ldlm_pool_shrink(struct ldlm_pool *pl, int nr,
unsigned int gfp_mask)
{
- if (pl->pl_shrink != NULL && pool_shrink_enabled(pl)) {
- CDEBUG(D_DLMTRACE, "%s: request to shrink %d locks\n",
- pl->pl_name, nr);
- return pl->pl_shrink(pl, nr, gfp_mask);
+ int cancel = 0;
+
+ if (pl->pl_ops->po_shrink != NULL && pool_shrink_enabled(pl)) {
+ cancel = pl->pl_ops->po_shrink(pl, nr, gfp_mask);
+ if (nr > 0) {
+ lprocfs_counter_add(pl->pl_stats,
+ LDLM_POOL_SHRINK_REQTD_STAT,
+ nr);
+ lprocfs_counter_add(pl->pl_stats,
+ LDLM_POOL_SHRINK_FREED_STAT,
+ cancel);
+ CDEBUG(D_DLMTRACE, "%s: request to shrink %d locks, "
+ "shrunk %d\n", pl->pl_name, nr, cancel);
+ }
}
- return 0;
+ return cancel;
}
EXPORT_SYMBOL(ldlm_pool_shrink);
/* The purpose of this function is to re-setup limit and maximal allowed
* slv according to the passed limit. */
-int ldlm_pool_setup(struct ldlm_pool *pl, __u32 limit)
+int ldlm_pool_setup(struct ldlm_pool *pl, int limit)
{
ENTRY;
- if (ns_is_server(ldlm_pl2ns(pl)))
- ldlm_pool_set_limit(pl, limit);
+ if (pl->pl_ops->po_setup != NULL)
+ RETURN(pl->pl_ops->po_setup(pl, limit));
RETURN(0);
}
EXPORT_SYMBOL(ldlm_pool_setup);
pl->pl_name);
nr += snprintf(page + nr, count - nr, " SLV: "LPU64"\n", slv);
- if (ns_is_client(ldlm_pl2ns(pl))) {
- nr += snprintf(page + nr, count - nr, " LVF: %d\n",
- atomic_read(&pl->pl_lock_volume_factor));
- }
+ nr += snprintf(page + nr, count - nr, " LVF: %d\n",
+ atomic_read(&pl->pl_lock_volume_factor));
+
nr += snprintf(page + nr, count - nr, " GSP: %d%%\n",
grant_step);
nr += snprintf(page + nr, count - nr, " GP: %d\n",
pool_vars[0].write_fptr = lprocfs_wr_atomic;
lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
- if (ns_is_client(ns)) {
- snprintf(var_name, MAX_STRING_SIZE, "lock_volume_factor");
- pool_vars[0].data = &pl->pl_lock_volume_factor;
- pool_vars[0].read_fptr = lprocfs_rd_uint;
- pool_vars[0].write_fptr = lprocfs_wr_uint;
- lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
- }
+ snprintf(var_name, MAX_STRING_SIZE, "lock_volume_factor");
+ pool_vars[0].data = &pl->pl_lock_volume_factor;
+ pool_vars[0].read_fptr = lprocfs_rd_uint;
+ pool_vars[0].write_fptr = lprocfs_wr_uint;
+ lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
snprintf(var_name, MAX_STRING_SIZE, "state");
pool_vars[0].data = pl;
lprocfs_add_vars(pl->pl_proc_dir, pool_vars, 0);
pl->pl_stats = lprocfs_alloc_stats(LDLM_POOL_LAST_STAT -
- LDLM_POOL_GRANTED_STAT, 0);
+ LDLM_POOL_FIRST_STAT, 0);
if (!pl->pl_stats)
GOTO(out_free_name, rc = -ENOMEM);
lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANTED_STAT,
LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
"granted", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_STAT, 0,
+ "grant", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_CANCEL_STAT, 0,
+ "cancel", "locks");
lprocfs_counter_init(pl->pl_stats, LDLM_POOL_GRANT_RATE_STAT,
LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
"grant_rate", "locks/s");
lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SLV_STAT,
LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
"slv", "slv");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_REQTD_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "shrink_request", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_SHRINK_FREED_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "shrink_freed", "locks");
+ lprocfs_counter_init(pl->pl_stats, LDLM_POOL_RECALC_STAT,
+ LPROCFS_CNTR_AVGMINMAX | LPROCFS_CNTR_STDDEV,
+ "recalc_freed", "locks");
lprocfs_register_stats(pl->pl_proc_dir, "stats", pl->pl_stats);
EXIT;
spin_lock_init(&pl->pl_lock);
atomic_set(&pl->pl_granted, 0);
- pl->pl_update_time = cfs_time_current();
+ pl->pl_recalc_time = cfs_time_current_sec();
atomic_set(&pl->pl_lock_volume_factor, 1);
atomic_set(&pl->pl_grant_rate, 0);
ns->ns_name, idx);
if (client == LDLM_NAMESPACE_SERVER) {
- pl->pl_recalc = ldlm_srv_pool_recalc;
- pl->pl_shrink = ldlm_srv_pool_shrink;
+ pl->pl_ops = &ldlm_srv_pool_ops;
ldlm_pool_set_limit(pl, LDLM_POOL_HOST_L);
ldlm_pool_set_slv(pl, ldlm_pool_slv_max(LDLM_POOL_HOST_L));
} else {
ldlm_pool_set_slv(pl, 1);
ldlm_pool_set_limit(pl, 1);
- pl->pl_recalc = ldlm_cli_pool_recalc;
- pl->pl_shrink = ldlm_cli_pool_shrink;
+ pl->pl_ops = &ldlm_cli_pool_ops;
}
rc = ldlm_pool_proc_init(pl);
{
ENTRY;
ldlm_pool_proc_fini(pl);
- pl->pl_recalc = NULL;
- pl->pl_shrink = NULL;
+ pl->pl_ops = NULL;
EXIT;
}
EXPORT_SYMBOL(ldlm_pool_fini);
atomic_inc(&pl->pl_grant_rate);
atomic_inc(&pl->pl_grant_speed);
- /* No need to recalc client pools here as this is already done
- * on enqueue/cancel and locks to cancel already packed to the
- * rpc. */
+ lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_GRANT_STAT);
+
+ /* Do not do pool recalc for client side as all locks which
+ * potentially may be canceled has already been packed into
+ * enqueue/cancel rpc. Also we do not want to run out of stack
+ * with too long call paths. */
if (ns_is_server(ldlm_pl2ns(pl)))
ldlm_pool_recalc(pl);
EXIT;
atomic_inc(&pl->pl_cancel_rate);
atomic_dec(&pl->pl_grant_speed);
- /* Same as in ldlm_pool_add() */
+ lprocfs_counter_incr(pl->pl_stats, LDLM_POOL_CANCEL_STAT);
+
if (ns_is_server(ldlm_pl2ns(pl)))
ldlm_pool_recalc(pl);
EXIT;
nr, client == LDLM_NAMESPACE_CLIENT ? "client" : "server");
/* Find out how many resources we may release. */
- mutex_down(ldlm_namespace_lock(client));
- list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain)
- total += ldlm_pool_granted(&ns->ns_pool);
- mutex_up(ldlm_namespace_lock(client));
-
+ for (nr_ns = atomic_read(ldlm_namespace_nr(client));
+ nr_ns > 0; nr_ns--)
+ {
+ mutex_down(ldlm_namespace_lock(client));
+ if (list_empty(ldlm_namespace_list(client))) {
+ mutex_up(ldlm_namespace_lock(client));
+ return 0;
+ }
+ ns = ldlm_namespace_first(client);
+ ldlm_namespace_get(ns);
+ ldlm_namespace_move(ns, client);
+ mutex_up(ldlm_namespace_lock(client));
+ total += ldlm_pool_shrink(&ns->ns_pool, 0, gfp_mask);
+ ldlm_namespace_put(ns, 1);
+ }
+
if (nr == 0 || total == 0)
return total;
{
__u32 nr_l = 0, nr_p = 0, l;
struct ldlm_namespace *ns;
- int rc, nr, equal = 0;
+ int nr, equal = 0;
- /* Check all modest namespaces. */
- mutex_down(ldlm_namespace_lock(client));
- list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain) {
- if (ns->ns_appetite != LDLM_NAMESPACE_MODEST)
- continue;
+ /* No need to setup pool limit for client pools. */
+ if (client == LDLM_NAMESPACE_SERVER) {
+ /* Check all modest namespaces first. */
+ mutex_down(ldlm_namespace_lock(client));
+ list_for_each_entry(ns, ldlm_namespace_list(client),
+ ns_list_chain)
+ {
+ if (ns->ns_appetite != LDLM_NAMESPACE_MODEST)
+ continue;
- if (client == LDLM_NAMESPACE_SERVER) {
l = ldlm_pool_granted(&ns->ns_pool);
if (l == 0)
l = 1;
nr_l += l;
nr_p++;
}
- }
- /* Make sure that modest namespaces did not eat more that 2/3 of limit */
- if (nr_l >= 2 * (LDLM_POOL_HOST_L / 3)) {
- CWARN("Modest pools eat out 2/3 of locks limit. %d of %lu. "
- "Upgrade server!\n", nr_l, LDLM_POOL_HOST_L);
- equal = 1;
- }
+ /* Make sure that modest namespaces did not eat more that 2/3
+ * of limit */
+ if (nr_l >= 2 * (LDLM_POOL_HOST_L / 3)) {
+ CWARN("\"Modest\" pools eat out 2/3 of server locks "
+ "limit (%d of %lu). This means that you have too "
+ "many clients for this amount of server RAM. "
+ "Upgrade server!\n", nr_l, LDLM_POOL_HOST_L);
+ equal = 1;
+ }
- /* The rest is given to greedy namespaces. */
- list_for_each_entry(ns, ldlm_namespace_list(client), ns_list_chain) {
- if (!equal && ns->ns_appetite != LDLM_NAMESPACE_GREEDY)
- continue;
+ /* The rest is given to greedy namespaces. */
+ list_for_each_entry(ns, ldlm_namespace_list(client),
+ ns_list_chain)
+ {
+ if (!equal && ns->ns_appetite != LDLM_NAMESPACE_GREEDY)
+ continue;
- if (client == LDLM_NAMESPACE_SERVER) {
if (equal) {
/* In the case 2/3 locks are eaten out by
* modest pools, we re-setup equal limit
}
ldlm_pool_setup(&ns->ns_pool, l);
}
+ mutex_up(ldlm_namespace_lock(client));
}
- mutex_up(ldlm_namespace_lock(client));
/* Recalc at least ldlm_namespace_nr(client) namespaces. */
for (nr = atomic_read(ldlm_namespace_nr(client)); nr > 0; nr--) {
mutex_up(ldlm_namespace_lock(client));
/* After setup is done - recalc the pool. */
- rc = ldlm_pool_recalc(&ns->ns_pool);
- if (rc)
- CERROR("%s: pool recalculation error "
- "%d\n", ns->ns_pool.pl_name, rc);
-
+ ldlm_pool_recalc(&ns->ns_pool);
ldlm_namespace_put(ns, 1);
}
}
/* Estimate the amount of available space in the request. */
int avail = ldlm_req_handles_avail(exp, size, bufcount,
LDLM_ENQUEUE_CANCEL_OFF);
+ int flags, cancel;
+
LASSERT(avail >= count);
+ flags = ns_connect_lru_resize(ns) ?
+ LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+ cancel = ns_connect_lru_resize(ns) ? 0 : 1;
+
/* Cancel lru locks here _only_ if the server supports
* EARLY_CANCEL. Otherwise we have to send extra CANCEL
* rpc right on enqueue, what will make it slower, vs.
* asynchronous rpc in blocking thread. */
- count += ldlm_cancel_lru_local(ns, cancels,
- ns_connect_lru_resize(ns) ? 0 : 1,
- avail - count, LDLM_CANCEL_AGED);
+ count += ldlm_cancel_lru_local(ns, cancels, cancel,
+ avail - count, flags);
size[DLM_LOCKREQ_OFF] =
ldlm_request_bufsize(count, LDLM_ENQUEUE);
}
pl = ldlm_imp2pl(req->rq_import);
spin_lock(&pl->pl_lock);
-#ifdef __KERNEL__
+
+ /* Check if we need to wakeup pools thread for fast SLV change.
+ * This is only done when threads period is noticably long like
+ * 10s or more. */
+#if defined(__KERNEL__) && (LDLM_POOLS_THREAD_PERIOD >= 10)
{
- __u64 old_slv, fast_slv_change;
+ __u64 old_slv, new_slv, fast_change;
old_slv = ldlm_pool_get_slv(pl);
- fast_slv_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE;
- do_div(fast_slv_change, 100);
-#endif
- pl->pl_update_time = cfs_time_current();
- ldlm_pool_set_slv(pl, lustre_msg_get_slv(req->rq_repmsg));
- ldlm_pool_set_limit(pl, lustre_msg_get_limit(req->rq_repmsg));
-#ifdef __KERNEL__
+ new_slv = lustre_msg_get_slv(req->rq_repmsg);
+ fast_change = old_slv * LDLM_POOLS_FAST_SLV_CHANGE;
+ do_div(fast_change, 100);
+
/* Wake up pools thread only if SLV has changed more than
- * 5% since last update. In this case we want to react asap.
+ * 50% since last update. In this case we want to react asap.
* Otherwise it is no sense to wake up pools as they are
- * re-calculated every 1s anyways. */
- if (old_slv > ldlm_pool_get_slv(pl) &&
- old_slv - ldlm_pool_get_slv(pl) > fast_slv_change)
+ * re-calculated every LDLM_POOLS_THREAD_PERIOD anyways. */
+ if (old_slv > new_slv && old_slv - new_slv > fast_change)
ldlm_pools_wakeup();
}
#endif
+ /* In some cases RPC may contain slv and limit zeroed out. This is
+ * the case when server does not support lru resize feature. This is
+ * also possible in some recovery cases when server side reqs have no
+ * ref to obd export and thus access to server side namespace is no
+ * possible. */
+ if (lustre_msg_get_slv(req->rq_repmsg) != 0 &&
+ lustre_msg_get_limit(req->rq_repmsg) != 0) {
+ ldlm_pool_set_slv(pl, lustre_msg_get_slv(req->rq_repmsg));
+ ldlm_pool_set_limit(pl, lustre_msg_get_limit(req->rq_repmsg));
+ } else {
+ DEBUG_REQ(D_HA, req, "zero SLV or Limit found "
+ "(SLV: "LPU64", Limit: %u)",
+ lustre_msg_get_slv(req->rq_repmsg),
+ lustre_msg_get_limit(req->rq_repmsg));
+ }
spin_unlock(&pl->pl_lock);
RETURN(0);
if (rc == LDLM_FL_BL_AST) {
rc = ldlm_cli_cancel_req(lock->l_conn_export, &cancels, 1, 0);
} else if (rc == LDLM_FL_CANCELING) {
+ struct ldlm_namespace *ns = lock->l_resource->lr_namespace;
int avail = ldlm_cancel_handles_avail(lock->l_conn_export);
- int count = 1;
+ int flags, cancel;
LASSERT(avail > 0);
- count += ldlm_cancel_lru_local(lock->l_resource->lr_namespace,
- &cancels, 0, avail - 1,
- LDLM_CANCEL_AGED);
- ldlm_cli_cancel_list(&cancels, count, NULL, 0, 0);
+
+ flags = ns_connect_lru_resize(ns) ?
+ LDLM_CANCEL_LRUR : LDLM_CANCEL_AGED;
+ cancel = ns_connect_lru_resize(ns) ? 0 : 1;
+
+ cancel += ldlm_cancel_lru_local(ns, &cancels, 0,
+ avail - cancel, flags);
+ ldlm_cli_cancel_list(&cancels, cancel, NULL, 0, 0);
}
if (rc != LDLM_FL_CANCELING)
LDLM_LOCK_PUT(lock);
RETURN(count);
}
+/* Return 1 if @lock should be canceled according to shrinker policy.
+ * Return zero otherwise. */
+static int ldlm_cancel_shrink_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int asked)
+{
+ int lock_cost;
+ __u64 page_nr;
+
+ if (lock->l_resource->lr_type == LDLM_EXTENT) {
+ struct ldlm_extent *l_extent;
+
+ /* For all extent locks cost is 1 + number of pages in
+ * their extent. */
+ l_extent = &lock->l_policy_data.l_extent;
+ page_nr = (l_extent->end - l_extent->start);
+ do_div(page_nr, CFS_PAGE_SIZE);
+
+#ifdef __KERNEL__
+ /* XXX: In fact this is evil hack, we can't access inode
+ * here. For doing it right we need somehow to have number
+ * of covered by lock. This should be fixed later when 10718
+ * is landed. */
+ if (lock->l_ast_data != NULL) {
+ struct inode *inode = lock->l_ast_data;
+ if (page_nr > inode->i_mapping->nrpages)
+ page_nr = inode->i_mapping->nrpages;
+ }
+#endif
+ lock_cost = 1 + page_nr;
+ } else {
+ /* For all locks which are not extent ones cost is 1 */
+ lock_cost = 1;
+ }
+
+ /* Keep all expensive locks in lru for the memory pressure time
+ * cancel policy. They anyways may be canceled by lru resize
+ * pplicy if they have not small enough CLV. */
+ return (lock_cost <= ns->ns_shrink_thumb);
+}
+
+/* Return 1 if @lock should be canceled according to lru resize policy.
+ * Return zero otherwise. */
+static int ldlm_cancel_lrur_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int asked)
+{
+ cfs_time_t cur = cfs_time_current();
+ struct ldlm_pool *pl = &ns->ns_pool;
+ __u64 slv, lvf, lv;
+ cfs_time_t la;
+
+ spin_lock(&pl->pl_lock);
+ slv = ldlm_pool_get_slv(pl);
+ lvf = atomic_read(&pl->pl_lock_volume_factor);
+ spin_unlock(&pl->pl_lock);
+
+ la = cfs_duration_sec(cfs_time_sub(cur,
+ lock->l_last_used));
+
+ /* Stop when slv is not yet come from server or
+ * lv is smaller than it is. */
+ lv = lvf * la * unused;
+ return (slv > 1 && lv >= slv);
+}
+
+/* Return 1 if @lock should be canceled according to passed policy.
+ * Return zero otherwise. */
+static int ldlm_cancel_passed_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int asked)
+{
+ /* Do nothing here, we allow canceling all locks which
+ * are passed here from upper layer logic. So that locks
+ * number to be canceled will be limited by @count and
+ * @max in ldlm_cancel_lru_local(). */
+ return 1;
+}
+
+/* Return 1 if @lock should be canceled according to aged policy.
+ * Return zero otherwise. */
+static int ldlm_cancel_aged_policy(struct ldlm_namespace *ns,
+ struct ldlm_lock *lock,
+ int unused, int added,
+ int asked)
+{
+ /* Cancel old locks if reached asked limit. */
+ return !((added >= asked) &&
+ cfs_time_before_64(cfs_time_current(),
+ cfs_time_add(lock->l_last_used,
+ ns->ns_max_age)));
+}
+
+typedef int (*ldlm_cancel_lru_policy_t)(struct ldlm_namespace *,
+ struct ldlm_lock *, int,
+ int, int);
+
+static ldlm_cancel_lru_policy_t
+ldlm_cancel_lru_policy(struct ldlm_namespace *ns, int flags)
+{
+ if (ns_connect_lru_resize(ns)) {
+ if (flags & LDLM_CANCEL_SHRINK)
+ return ldlm_cancel_shrink_policy;
+ else if (flags & LDLM_CANCEL_LRUR)
+ return ldlm_cancel_lrur_policy;
+ else if (flags & LDLM_CANCEL_PASSED)
+ return ldlm_cancel_passed_policy;
+ } else {
+ if (flags & LDLM_CANCEL_AGED)
+ return ldlm_cancel_aged_policy;
+ }
+ return NULL;
+}
+
/* - Free space in lru for @count new locks,
* redundant unused locks are canceled locally;
* - also cancel locally unused aged locks;
* There are the following use cases: ldlm_cancel_resource_local(),
* ldlm_cancel_lru_local() and ldlm_cli_cancel(), which check&set this
* flag properly. As any attempt to cancel a lock rely on this flag,
- * l_bl_ast list is accessed later without any special locking. */
+ * l_bl_ast list is accessed later without any special locking.
+ *
+ * Calling policies for enabled lru resize:
+ * ----------------------------------------
+ * flags & LDLM_CANCEL_LRUR - use lru resize policy (SLV from server) to
+ * cancel not more than @count locks;
+ *
+ * flags & LDLM_CANCEL_PASSED - cancel @count number of old locks (located at
+ * the beginning of lru list);
+ *
+ * flags & LDLM_CANCEL_SHRINK - cancel not more than @count locks according to
+ * memory pressre policy function.
+ */
int ldlm_cancel_lru_local(struct ldlm_namespace *ns, struct list_head *cancels,
int count, int max, int flags)
{
- cfs_time_t cur = cfs_time_current();
- int added = 0, unused;
- struct ldlm_lock *lock;
- __u64 slv, lvf, lv;
+ ldlm_cancel_lru_policy_t cancel_lru_policy_func;
+ int added = 0, unused, cancel;
+ struct ldlm_lock *lock, *next;
ENTRY;
spin_lock(&ns->ns_unused_lock);
if (!ns_connect_lru_resize(ns))
count += unused - ns->ns_max_unused;
- while (!list_empty(&ns->ns_unused_list)) {
- struct ldlm_pool *pl = &ns->ns_pool;
-
- LASSERT(unused >= 0);
+ cancel_lru_policy_func = ldlm_cancel_lru_policy(ns, flags);
+
+ list_for_each_entry_safe(lock, next, &ns->ns_unused_list, l_lru) {
+ /* Make sure that we skip locks being already in cancel. */
+ if ((lock->l_flags & LDLM_FL_CANCELING) ||
+ (lock->l_flags & LDLM_FL_BL_AST))
+ continue;
- if (max && added >= max)
+ /* For any flags, stop scanning if @max or passed @count is
+ * reached. */
+ if ((max && added >= max) || (count && added >= count))
break;
- list_for_each_entry(lock, &ns->ns_unused_list, l_lru) {
- /* somebody is already doing CANCEL or there is a
- * blocking request will send cancel. */
- if (!(lock->l_flags & LDLM_FL_CANCELING) &&
- !(lock->l_flags & LDLM_FL_BL_AST))
+ /* Pass the lock through the policy filter and see if it
+ * should stay in lru. */
+ if (cancel_lru_policy_func != NULL) {
+ cancel = cancel_lru_policy_func(ns, lock, unused,
+ added, count);
+
+ /* Take next lock for shrink policy, we need to check
+ * whole list. Stop scanning for other policies. */
+ if ((flags & LDLM_CANCEL_SHRINK) && !cancel)
+ continue;
+ else if (!cancel)
break;
}
- if (&lock->l_lru == &ns->ns_unused_list)
- break;
- if (ns_connect_lru_resize(ns)) {
- cfs_time_t la;
-
- /* Take into account SLV only if cpount == 0. */
- if (count == 0) {
- /* Calculate lv for every lock. */
- spin_lock(&pl->pl_lock);
- slv = ldlm_pool_get_slv(pl);
- lvf = atomic_read(&pl->pl_lock_volume_factor);
- spin_unlock(&pl->pl_lock);
-
- la = cfs_duration_sec(cfs_time_sub(cur,
- lock->l_last_used));
- if (la == 0)
- la = 1;
-
- /* Stop when slv is not yet come from server
- * or lv is smaller than it is. */
- lv = lvf * la * unused;
- if (slv == 1 || lv < slv)
- break;
- } else {
- if (added >= count)
- break;
+ if (cancels != NULL) {
+ LDLM_LOCK_GET(lock); /* dropped by bl thread */
+ spin_unlock(&ns->ns_unused_lock);
+
+ lock_res_and_lock(lock);
+ /* Check flags again under the lock. */
+ if ((lock->l_flags & LDLM_FL_CANCELING) ||
+ (lock->l_flags & LDLM_FL_BL_AST) ||
+ (ldlm_lock_remove_from_lru(lock) == 0)) {
+ /* other thread is removing lock from lru or
+ * somebody is already doing CANCEL or
+ * there is a blocking request which will send
+ * cancel by itseft. */
+ unlock_res_and_lock(lock);
+ LDLM_LOCK_PUT(lock);
+ spin_lock(&ns->ns_unused_lock);
+ continue;
}
- } else {
- if ((added >= count) &&
- (!(flags & LDLM_CANCEL_AGED) ||
- cfs_time_before_64(cur, ns->ns_max_age +
- lock->l_last_used)))
- break;
- }
-
- LDLM_LOCK_GET(lock); /* dropped by bl thread */
- spin_unlock(&ns->ns_unused_lock);
-
- lock_res_and_lock(lock);
- /* Check flags again under the lock. */
- if ((lock->l_flags & LDLM_FL_CANCELING) ||
- (lock->l_flags & LDLM_FL_BL_AST) ||
- (ldlm_lock_remove_from_lru(lock) == 0)) {
- /* other thread is removing lock from lru or
- * somebody is already doing CANCEL or
- * there is a blocking request which will send
- * cancel by itseft. */
+ LASSERT(!lock->l_readers && !lock->l_writers);
+
+ /* If we have chosen to cancel this lock voluntarily, we
+ * better send cancel notification to server, so that it
+ * frees appropriate state. This might lead to a race
+ * where while we are doing cancel here, server is also
+ * silently cancelling this lock. */
+ lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
+
+ /* Setting the CBPENDING flag is a little misleading, but
+ * prevents an important race; namely, once CBPENDING is
+ * set, the lock can accumulate no more readers/writers.
+ * Since readers and writers are already zero here,
+ * ldlm_lock_decref() won't see this flag and call
+ * l_blocking_ast */
+ lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
+
+ /* We can't re-add to l_lru as it confuses the refcounting
+ * in ldlm_lock_remove_from_lru() if an AST arrives after
+ * we drop ns_lock below. We use l_bl_ast and can't use
+ * l_pending_chain as it is used both on server and client
+ * nevertheless bug 5666 says it is used only on server */
+ LASSERT(list_empty(&lock->l_bl_ast));
+ list_add(&lock->l_bl_ast, cancels);
unlock_res_and_lock(lock);
- LDLM_LOCK_PUT(lock);
spin_lock(&ns->ns_unused_lock);
- continue;
}
- LASSERT(!lock->l_readers && !lock->l_writers);
-
- /* If we have chosen to canecl this lock voluntarily, we better
- send cancel notification to server, so that it frees
- appropriate state. This might lead to a race where while
- we are doing cancel here, server is also silently
- cancelling this lock. */
- lock->l_flags &= ~LDLM_FL_CANCEL_ON_BLOCK;
-
- /* Setting the CBPENDING flag is a little misleading, but
- * prevents an important race; namely, once CBPENDING is set,
- * the lock can accumulate no more readers/writers. Since
- * readers and writers are already zero here, ldlm_lock_decref
- * won't see this flag and call l_blocking_ast */
- lock->l_flags |= LDLM_FL_CBPENDING | LDLM_FL_CANCELING;
- /* We can't re-add to l_lru as it confuses the refcounting in
- * ldlm_lock_remove_from_lru() if an AST arrives after we drop
- * ns_lock below. We use l_bl_ast and can't use l_pending_chain
- * as it is used both on server and client nevertheles bug 5666
- * says it is used only on server. --umka */
-
- LASSERT(list_empty(&lock->l_bl_ast));
- list_add(&lock->l_bl_ast, cancels);
- unlock_res_and_lock(lock);
- spin_lock(&ns->ns_unused_lock);
added++;
unused--;
}
spin_unlock(&ns->ns_unused_lock);
+
+ if (cancels == NULL)
+ RETURN(added);
RETURN(ldlm_cancel_list(cancels, added));
}
* in a thread and this function will return after the thread has been
* asked to call the callback. when called with LDLM_SYNC the blocking
* callback will be performed in this function. */
-int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync)
+int ldlm_cancel_lru(struct ldlm_namespace *ns, int nr, ldlm_sync_t sync,
+ int flags)
{
CFS_LIST_HEAD(cancels);
int count, rc;
#ifndef __KERNEL__
sync = LDLM_SYNC; /* force to be sync in user space */
#endif
- count = ldlm_cancel_lru_local(ns, &cancels, nr, 0, 0);
+ count = ldlm_cancel_lru_local(ns, &cancels, nr, 0, flags);
if (sync == LDLM_ASYNC) {
rc = ldlm_bl_to_thread_list(ns, NULL, &cancels, count);
if (rc == 0)
int canceled, unused = ns->ns_nr_unused;
/* Try to cancel all @ns_nr_unused locks. */
- canceled = ldlm_cancel_lru(ns, unused, LDLM_SYNC);
+ canceled = ldlm_cancel_lru(ns, unused, LDLM_SYNC,
+ LDLM_CANCEL_PASSED);
if (canceled < unused) {
CERROR("not all requested locks are canceled, "
"requested: %d, canceled: %d\n", unused,
} else {
tmp = ns->ns_max_unused;
ns->ns_max_unused = 0;
- ldlm_cancel_lru(ns, 0, LDLM_SYNC);
+ ldlm_cancel_lru(ns, 0, LDLM_SYNC, LDLM_CANCEL_PASSED);
ns->ns_max_unused = tmp;
}
return count;
CDEBUG(D_DLMTRACE, "changing namespace %s unused locks from %u to %u\n",
ns->ns_name, ns->ns_nr_unused, (unsigned int)tmp);
- ldlm_cancel_lru(ns, (unsigned int)tmp, LDLM_ASYNC);
+ ldlm_cancel_lru(ns, (unsigned int)tmp, LDLM_ASYNC, LDLM_CANCEL_PASSED);
if (!lru_resize) {
CDEBUG(D_DLMTRACE, "disable lru_resize for namespace %s\n",
CDEBUG(D_DLMTRACE, "changing namespace %s max_unused from %u to %u\n",
ns->ns_name, ns->ns_max_unused, (unsigned int)tmp);
ns->ns_max_unused = (unsigned int)tmp;
- ldlm_cancel_lru(ns, 0, LDLM_ASYNC);
+ ldlm_cancel_lru(ns, 0, LDLM_ASYNC, LDLM_CANCEL_PASSED);
/* Make sure that originally lru resize was supported before
* turning it on here. */
lock_vars[0].write_fptr = lprocfs_wr_lru_size;
lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+ snprintf(lock_name, MAX_STRING_SIZE, "%s/shrink_thumb",
+ ns->ns_name);
+ lock_vars[0].data = ns;
+ lock_vars[0].read_fptr = lprocfs_rd_uint;
+ lock_vars[0].write_fptr = lprocfs_wr_uint;
+ lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
+
snprintf(lock_name, MAX_STRING_SIZE, "%s/lru_max_age",
ns->ns_name);
lock_vars[0].data = &ns->ns_max_age;
lock_vars[0].read_fptr = lprocfs_rd_uint;
lock_vars[0].write_fptr = lprocfs_wr_uint;
lprocfs_add_vars(ldlm_ns_proc_dir, lock_vars, 0);
-
}
}
#undef MAX_STRING_SIZE
if (!ns->ns_hash)
GOTO(out_ns, NULL);
+ ns->ns_shrink_thumb = LDLM_LOCK_SHRINK_THUMB;
ns->ns_appetite = apt;
namelen = strlen(name);
OBD_ALLOC(ns->ns_name, namelen + 1);
lustre_msg_set_opc(req->rq_repmsg,
req->rq_reqmsg ? lustre_msg_get_opc(req->rq_reqmsg) : 0);
+ if (req->rq_export && req->rq_export->exp_obd)
+ target_pack_pool_reply(req);
+
if (req->rq_export == NULL || req->rq_export->exp_connection == NULL)
conn = ptlrpc_get_connection(req->rq_peer, req->rq_self, NULL);
else
}
run_test 121 "read cancel race ========="
+cmd_cancel_lru_locks() {
+ NS=$1
+ test "x$NS" = "x" && NS="mdc"
+ for d in `find $LPROC/ldlm/namespaces | grep $NS`; do
+ if test -f $d/lru_size; then
+ cancel_lru_locks $d
+ fi
+ done
+}
+
test_124a() {
[ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \
skip "no lru resize on server" && return 0
- cancel_lru_locks mdc
+ cmd_cancel_lru_locks "mdc"
lru_resize_enable
- NSDIR=`find $LPROC/ldlm/namespaces | grep mdc | head -1`
# we want to test main pool functionality, that is cancel based on SLV
# this is why shrinkers are disabled
NR=2000
mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
- LRU_SIZE=`cat $NSDIR/lru_size`
-
# use touch to produce $NR new locks
log "create $NR files at $DIR/$tdir"
for ((i=0;i<$NR;i++)); do touch $DIR/$tdir/f$i; done
+
+ NSDIR=""
+ LRU_SIZE=0
+ for d in `find $LPROC/ldlm/namespaces | grep mdc-`; do
+ if test -f $d/lru_size; then
+ LRU_SIZE=`cat $d/lru_size`
+ if test $LRU_SIZE -gt 0; then
+ log "using $d namespace"
+ NSDIR=$d
+ break
+ fi
+ fi
+ done
- LRU_SIZE_B=`cat $NSDIR/lru_size`
- if test $LRU_SIZE -ge $LRU_SIZE_B; then
+ if test -z $NSDIR; then
skip "No cached locks created!"
- cat $NSDIR/pool/state
return 0
fi
- LRU_SIZE_B=$((LRU_SIZE_B-LRU_SIZE))
- log "created $LRU_SIZE_B lock(s)"
+
+ if test $LRU_SIZE -lt 100; then
+ skip "Not enough cached locks created!"
+ return 0
+ fi
+ log "created $LRU_SIZE lock(s)"
# we want to sleep 30s to not make test too long
SLEEP=30
# Use $LRU_SIZE_B here to take into account real number of locks created
# in the case of CMD, LRU_SIZE_B != $NR in most of cases
LVF=$(($MAX_HRS * 60 * 60 * $LIMIT / $SLEEP))
+ LRU_SIZE_B=$LRU_SIZE
log "make client drop locks $LVF times faster so that ${SLEEP}s is enough to cancel $LRU_SIZE_B lock(s)"
OLD_LVF=`cat $NSDIR/pool/lock_volume_factor`
echo "$LVF" > $NSDIR/pool/lock_volume_factor
}
run_test 124a "lru resize ======================================="
+set_lru_size() {
+ NS=$1
+ SIZE=$2
+ test "x$NS" = "x" && NS="mdc"
+ test "x$SIZE" = "x" && SIZE="0"
+ test $SIZE -lt 0 && SIZE="0"
+ test $SIZE -gt 0 && ACTION="disabled" || ACTION="enabled"
+ for d in `find $LPROC/ldlm/namespaces | grep $NS`; do
+ if test -f $d/lru_size; then
+ log "$(basename $d):"
+ log " lru resize $ACTION"
+ log " lru_size=$SIZE"
+ echo $SIZE > $d/lru_size
+ fi
+ done
+}
+
+get_lru_size() {
+ NS=$1
+ test "x$NS" = "x" && NS="mdc"
+ for d in `find $LPROC/ldlm/namespaces | grep $NS`; do
+ if test -f $d/lru_size; then
+ log "$(basename $d):"
+ log " lru_size=$(cat $d/lru_size)"
+ fi
+ done
+}
+
test_124b() {
[ -z "`grep lru_resize $LPROC/mdc/*/connect_flags`" ] && \
skip "no lru resize on server" && return 0
- cleanup -f || error "failed to unmount"
- MOUNTOPT="$MOUNTOPT,nolruresize"
- setup
- NR=2000
- mkdir -p $DIR/$tdir || error "failed to create $DIR/$tdir"
+ NSDIR=`find $LPROC/ldlm/namespaces | grep mdc | head -1`
+ LIMIT=`cat $NSDIR/pool/limit`
+
+ NR_CPU=$(awk '/processor/' /proc/cpuinfo | wc -l)
+ # 100 locks here is default value for non-shrinkable lru as well
+ # as the order to switch to static lru managing policy
+ # define LDLM_DEFAULT_LRU_SIZE (100 * num_online_cpus())
+ LDLM_DEFAULT_LRU_SIZE=$((100 * NR_CPU))
+
+ NR=$((LIMIT-(LIMIT/3)))
+ log "starting lru resize disable cycle"
+ set_lru_size "mdc-" $LDLM_DEFAULT_LRU_SIZE
- createmany -o $DIR/$tdir/f $NR
- log "doing ls -la $DIR/$tdir 3 times (lru resize disabled)"
+ mkdir -p $DIR/$tdir/disable_lru_resize ||
+ error "failed to create $DIR/$tdir/disable_lru_resize"
+
+ createmany -o $DIR/$tdir/disable_lru_resize/f $NR
+ log "doing ls -la $DIR/$tdir/disable_lru_resize 3 times"
stime=`date +%s`
- ls -la $DIR/$tdir > /dev/null
- ls -la $DIR/$tdir > /dev/null
- ls -la $DIR/$tdir > /dev/null
+ ls -la $DIR/$tdir/disable_lru_resize > /dev/null
+ ls -la $DIR/$tdir/disable_lru_resize > /dev/null
+ ls -la $DIR/$tdir/disable_lru_resize > /dev/null
etime=`date +%s`
nolruresize_delta=$((etime-stime))
log "ls -la time: $nolruresize_delta seconds"
+ get_lru_size "mdc-"
+
+ log "starting lru resize enable cycle"
+ mkdir -p $DIR/$tdir/enable_lru_resize ||
+ error "failed to create $DIR/$tdir/enable_lru_resize"
- cleanup -f || error "failed to unmount"
- MOUNTOPT=`echo $MOUNTOPT | sed "s/nolruresize/lruresize/"`
- setup
+ # 0 locks means here flush lru and switch to lru resize policy
+ set_lru_size "mdc-" 0
- createmany -o $DIR/$tdir/f $NR
- log "doing ls -la $DIR/$tdir 3 times (lru resize enabled)"
+ createmany -o $DIR/$tdir/enable_lru_resize/f $NR
+ log "doing ls -la $DIR/$tdir/enable_lru_resize 3 times"
stime=`date +%s`
- ls -la $DIR/$tdir > /dev/null
- ls -la $DIR/$tdir > /dev/null
- ls -la $DIR/$tdir > /dev/null
+ ls -la $DIR/$tdir/enable_lru_resize > /dev/null
+ ls -la $DIR/$tdir/enable_lru_resize > /dev/null
+ ls -la $DIR/$tdir/enable_lru_resize > /dev/null
etime=`date +%s`
lruresize_delta=$((etime-stime))
log "ls -la time: $lruresize_delta seconds"
+ get_lru_size "mdc-"
if test $lruresize_delta -gt $nolruresize_delta; then
log "ls -la is $((lruresize_delta - $nolruresize_delta))s slower with lru resize enabled"
else
log "lru resize performs the same with no lru resize"
fi
-
- unlinkmany $DIR/$tdir/f $NR
}
run_test 124b "lru resize (performance test) ======================="