* GPL HEADER END
*/
/*
- * Copyright (c) 2014, 2016, Intel Corporation.
+ * Copyright (c) 2014, 2017, Intel Corporation.
*/
/*
* This file is part of Lustre, http://www.lustre.org/
#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/random.h>
#include <lnet/lib-lnet.h>
-#include <lnet/lnetctl.h>
+#include <uapi/linux/lnet/lnetctl.h>
#define LNET_MSG_MASK (LNET_PUT_BIT | LNET_ACK_BIT | \
LNET_GET_BIT | LNET_REPLY_BIT)
/**
* seconds to drop the next message, it's exclusive with dr_drop_at
*/
- cfs_time_t dr_drop_time;
+ time64_t dr_drop_time;
/** baseline to caculate dr_drop_time */
- cfs_time_t dr_time_base;
+ time64_t dr_time_base;
/** statistic of dropped messages */
struct lnet_fault_stat dr_stat;
};
static bool
lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
+ lnet_nid_t local_nid, lnet_nid_t dst,
+ unsigned int type, unsigned int portal)
{
if (!lnet_fault_nid_match(attr->fa_src, src) ||
- !lnet_fault_nid_match(attr->fa_dst, dst))
+ !lnet_fault_nid_match(attr->fa_dst, dst) ||
+ !lnet_fault_nid_match(attr->fa_local_nid, local_nid))
return false;
- if (!(attr->fa_msg_mask & (1 << type)))
+ if (!(attr->fa_msg_mask & BIT(type)))
return false;
/* NB: ACK and REPLY have no portal, but they should have been
rule->dr_attr = *attr;
if (attr->u.drop.da_interval != 0) {
- rule->dr_time_base = cfs_time_shift(attr->u.drop.da_interval);
- rule->dr_drop_time = cfs_time_shift(cfs_rand() %
- attr->u.drop.da_interval);
+ rule->dr_time_base = ktime_get_seconds() + attr->u.drop.da_interval;
+ rule->dr_drop_time = ktime_get_seconds() +
+ prandom_u32_max(attr->u.drop.da_interval);
} else {
- rule->dr_drop_at = cfs_rand() % attr->u.drop.da_rate;
+ rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
}
lnet_net_lock(LNET_LOCK_EX);
{
struct lnet_drop_rule *rule;
struct lnet_drop_rule *tmp;
- struct list_head zombies;
- int n = 0;
+ LIST_HEAD(zombies);
+ int n = 0;
ENTRY;
- INIT_LIST_HEAD(&zombies);
-
lnet_net_lock(LNET_LOCK_EX);
list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) {
if (rule->dr_attr.fa_src != src && src != 0)
memset(&rule->dr_stat, 0, sizeof(rule->dr_stat));
if (attr->u.drop.da_rate != 0) {
- rule->dr_drop_at = cfs_rand() % attr->u.drop.da_rate;
+ rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
} else {
- rule->dr_drop_time = cfs_time_shift(cfs_rand() %
- attr->u.drop.da_interval);
- rule->dr_time_base = cfs_time_shift(attr->u.drop.
- da_interval);
+ rule->dr_drop_time = ktime_get_seconds() +
+ prandom_u32_max(attr->u.drop.da_interval);
+ rule->dr_time_base = ktime_get_seconds() + attr->u.drop.da_interval;
}
spin_unlock(&rule->dr_lock);
}
EXIT;
}
+static void
+lnet_fault_match_health(enum lnet_msg_hstatus *hstatus, __u32 mask)
+{
+ int choice;
+ int delta;
+ int best_delta;
+ int i;
+
+ /* assign a random failure */
+ choice = prandom_u32_max(LNET_MSG_STATUS_END - LNET_MSG_STATUS_OK);
+ if (choice == 0)
+ choice++;
+
+ if (mask == HSTATUS_RANDOM) {
+ *hstatus = choice;
+ return;
+ }
+
+ if (mask & BIT(choice)) {
+ *hstatus = choice;
+ return;
+ }
+
+ /* round to the closest ON bit */
+ i = HSTATUS_END;
+ best_delta = HSTATUS_END;
+ while (i > 0) {
+ if (mask & BIT(i)) {
+ delta = choice - i;
+ if (delta < 0)
+ delta *= -1;
+ if (delta < best_delta) {
+ best_delta = delta;
+ choice = i;
+ }
+ }
+ i--;
+ }
+
+ *hstatus = choice;
+}
+
/**
* check source/destination NID, portal, message type and drop rate,
* decide whether should drop this message or not
*/
static bool
drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
+ lnet_nid_t local_nid, lnet_nid_t dst,
+ unsigned int type, unsigned int portal,
+ enum lnet_msg_hstatus *hstatus)
{
struct lnet_fault_attr *attr = &rule->dr_attr;
bool drop;
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
+ if (!lnet_fault_attr_match(attr, src, local_nid, dst, type, portal))
+ return false;
+
+ if (attr->u.drop.da_drop_all) {
+ CDEBUG(D_NET, "set to drop all messages\n");
+ drop = true;
+ goto drop_matched;
+ }
+
+ /*
+ * if we're trying to match a health status error but it hasn't
+ * been set in the rule, then don't match
+ */
+ if ((hstatus && !attr->u.drop.da_health_error_mask) ||
+ (!hstatus && attr->u.drop.da_health_error_mask))
return false;
/* match this rule, check drop rate now */
spin_lock(&rule->dr_lock);
- if (rule->dr_drop_time != 0) { /* time based drop */
- cfs_time_t now = cfs_time_current();
+ if (attr->u.drop.da_random) {
+ int value = prandom_u32_max(attr->u.drop.da_interval);
+ if (value >= (attr->u.drop.da_interval / 2))
+ drop = true;
+ else
+ drop = false;
+ } else if (rule->dr_drop_time != 0) { /* time based drop */
+ time64_t now = ktime_get_seconds();
rule->dr_stat.fs_count++;
- drop = cfs_time_aftereq(now, rule->dr_drop_time);
+ drop = now >= rule->dr_drop_time;
if (drop) {
- if (cfs_time_after(now, rule->dr_time_base))
+ if (now > rule->dr_time_base)
rule->dr_time_base = now;
rule->dr_drop_time = rule->dr_time_base +
- cfs_time_seconds(cfs_rand() %
- attr->u.drop.da_interval);
- rule->dr_time_base += cfs_time_seconds(attr->u.drop.
- da_interval);
-
- CDEBUG(D_NET, "Drop Rule %s->%s: next drop : "
- CFS_TIME_T"\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst),
- rule->dr_drop_time);
+ prandom_u32_max(attr->u.drop.da_interval);
+ rule->dr_time_base += attr->u.drop.da_interval;
+
+ CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lld\n",
+ libcfs_nid2str(attr->fa_src),
+ libcfs_nid2str(attr->fa_dst),
+ rule->dr_drop_time);
}
} else { /* rate based drop */
count = rule->dr_stat.fs_count;
if (do_div(count, attr->u.drop.da_rate) == 0) {
rule->dr_drop_at = rule->dr_stat.fs_count +
- cfs_rand() % attr->u.drop.da_rate;
+ prandom_u32_max(attr->u.drop.da_rate);
CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n",
libcfs_nid2str(attr->fa_src),
libcfs_nid2str(attr->fa_dst), rule->dr_drop_at);
}
}
+drop_matched:
+
if (drop) { /* drop this message, update counters */
+ if (hstatus)
+ lnet_fault_match_health(hstatus,
+ attr->u.drop.da_health_error_mask);
lnet_fault_stat_inc(&rule->dr_stat, type);
rule->dr_stat.u.drop.ds_dropped++;
}
* Check if message from \a src to \a dst can match any existed drop rule
*/
bool
-lnet_drop_rule_match(lnet_hdr_t *hdr)
+lnet_drop_rule_match(struct lnet_hdr *hdr,
+ lnet_nid_t local_nid,
+ enum lnet_msg_hstatus *hstatus)
{
- struct lnet_drop_rule *rule;
- lnet_nid_t src = le64_to_cpu(hdr->src_nid);
- lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
- unsigned int typ = le32_to_cpu(hdr->type);
- unsigned int ptl = -1;
- bool drop = false;
- int cpt;
+ lnet_nid_t src = le64_to_cpu(hdr->src_nid);
+ lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
+ unsigned int typ = le32_to_cpu(hdr->type);
+ struct lnet_drop_rule *rule;
+ unsigned int ptl = -1;
+ bool drop = false;
+ int cpt;
/* NB: if Portal is specified, then only PUT and GET will be
* filtered by drop rule */
cpt = lnet_net_lock_current();
list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- drop = drop_rule_match(rule, src, dst, typ, ptl);
+ drop = drop_rule_match(rule, src, local_nid, dst, typ, ptl,
+ hstatus);
if (drop)
break;
}
-
lnet_net_unlock(cpt);
+
return drop;
}
/**
* seconds to delay the next message, it's exclusive with dl_delay_at
*/
- cfs_time_t dl_delay_time;
+ time64_t dl_delay_time;
/** baseline to caculate dl_delay_time */
- cfs_time_t dl_time_base;
- /** jiffies to send the next delayed message */
- unsigned long dl_msg_send;
+ time64_t dl_time_base;
+ /** seconds until we send the next delayed message */
+ time64_t dl_msg_send;
/** delayed message list */
struct list_head dl_msg_list;
/** statistic of delayed messages */
static struct delay_daemon_data delay_dd;
-static cfs_time_t
-round_timeout(cfs_time_t timeout)
-{
- return cfs_time_seconds((unsigned int)
- cfs_duration_sec(cfs_time_sub(timeout, 0)) + 1);
-}
-
static void
delay_rule_decref(struct lnet_delay_rule *rule)
{
struct lnet_fault_attr *attr = &rule->dl_attr;
bool delay;
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
+ if (!lnet_fault_attr_match(attr, src, LNET_NID_ANY,
+ dst, type, portal))
return false;
/* match this rule, check delay rate now */
spin_lock(&rule->dl_lock);
if (rule->dl_delay_time != 0) { /* time based delay */
- cfs_time_t now = cfs_time_current();
+ time64_t now = ktime_get_seconds();
rule->dl_stat.fs_count++;
- delay = cfs_time_aftereq(now, rule->dl_delay_time);
+ delay = now >= rule->dl_delay_time;
if (delay) {
- if (cfs_time_after(now, rule->dl_time_base))
+ if (now > rule->dl_time_base)
rule->dl_time_base = now;
rule->dl_delay_time = rule->dl_time_base +
- cfs_time_seconds(cfs_rand() %
- attr->u.delay.la_interval);
- rule->dl_time_base += cfs_time_seconds(attr->u.delay.
- la_interval);
-
- CDEBUG(D_NET, "Delay Rule %s->%s: next delay : "
- CFS_TIME_T"\n",
- libcfs_nid2str(attr->fa_src),
- libcfs_nid2str(attr->fa_dst),
- rule->dl_delay_time);
+ prandom_u32_max(attr->u.delay.la_interval);
+ rule->dl_time_base += attr->u.delay.la_interval;
+
+ CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lld\n",
+ libcfs_nid2str(attr->fa_src),
+ libcfs_nid2str(attr->fa_dst),
+ rule->dl_delay_time);
}
} else { /* rate based delay */
count = rule->dl_stat.fs_count;
if (do_div(count, attr->u.delay.la_rate) == 0) {
rule->dl_delay_at = rule->dl_stat.fs_count +
- cfs_rand() % attr->u.delay.la_rate;
+ prandom_u32_max(attr->u.delay.la_rate);
CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n",
libcfs_nid2str(attr->fa_src),
libcfs_nid2str(attr->fa_dst), rule->dl_delay_at);
rule->dl_stat.u.delay.ls_delayed++;
list_add_tail(&msg->msg_list, &rule->dl_msg_list);
- msg->msg_delay_send = round_timeout(
- cfs_time_shift(attr->u.delay.la_latency));
+ msg->msg_delay_send = ktime_get_seconds() + attr->u.delay.la_latency;
if (rule->dl_msg_send == -1) {
rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
+ mod_timer(&rule->dl_timer,
+ jiffies + cfs_time_seconds(rule->dl_msg_send));
}
spin_unlock(&rule->dl_lock);
* will be delayed if there is a match.
*/
bool
-lnet_delay_rule_match_locked(lnet_hdr_t *hdr, struct lnet_msg *msg)
+lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg)
{
struct lnet_delay_rule *rule;
lnet_nid_t src = le64_to_cpu(hdr->src_nid);
{
struct lnet_msg *msg;
struct lnet_msg *tmp;
- unsigned long now = cfs_time_current();
+ time64_t now = ktime_get_seconds();
- if (!all && rule->dl_msg_send > now)
+ if (!all && cfs_time_seconds(rule->dl_msg_send) > now)
return;
spin_lock(&rule->dl_lock);
msg = list_entry(rule->dl_msg_list.next,
struct lnet_msg, msg_list);
rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
+ mod_timer(&rule->dl_timer,
+ jiffies + cfs_time_seconds(rule->dl_msg_send));
}
spin_unlock(&rule->dl_lock);
}
case LNET_CREDIT_OK:
lnet_ni_recv(ni, msg->msg_private, msg, 0,
0, msg->msg_len, msg->msg_len);
+ /* fallthrough */
case LNET_CREDIT_WAIT:
continue;
default: /* failures */
}
}
- lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len);
- lnet_finalize(ni, msg, rc);
+ lnet_drop_message(ni, cpt, msg->msg_private, msg->msg_len,
+ msg->msg_type);
+ lnet_finalize(msg, rc);
}
}
void
lnet_delay_rule_check(void)
{
- struct lnet_delay_rule *rule;
- struct list_head msgs;
+ struct lnet_delay_rule *rule;
+ LIST_HEAD(msgs);
- INIT_LIST_HEAD(&msgs);
while (1) {
if (list_empty(&delay_dd.dd_sched_rules))
break;
}
static void
-delay_timer_cb(unsigned long arg)
+delay_timer_cb(cfs_timer_cb_arg_t data)
{
- struct lnet_delay_rule *rule = (struct lnet_delay_rule *)arg;
+ struct lnet_delay_rule *rule = cfs_from_timer(rule, data, dl_timer);
spin_lock_bh(&delay_dd.dd_lock);
if (list_empty(&rule->dl_sched_link) && delay_dd.dd_running) {
wait_event(delay_dd.dd_ctl_waitq, delay_dd.dd_running);
}
- init_timer(&rule->dl_timer);
- rule->dl_timer.function = delay_timer_cb;
- rule->dl_timer.data = (unsigned long)rule;
+ cfs_timer_setup(&rule->dl_timer, delay_timer_cb,
+ (unsigned long)rule, 0);
spin_lock_init(&rule->dl_lock);
INIT_LIST_HEAD(&rule->dl_msg_list);
rule->dl_attr = *attr;
if (attr->u.delay.la_interval != 0) {
- rule->dl_time_base = cfs_time_shift(attr->u.delay.la_interval);
- rule->dl_delay_time = cfs_time_shift(cfs_rand() %
- attr->u.delay.la_interval);
+ rule->dl_time_base = ktime_get_seconds() +
+ attr->u.delay.la_interval;
+ rule->dl_delay_time = ktime_get_seconds() +
+ prandom_u32_max(attr->u.delay.la_interval);
} else {
- rule->dl_delay_at = cfs_rand() % attr->u.delay.la_rate;
+ rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
}
rule->dl_msg_send = -1;
lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown)
{
struct lnet_delay_rule *rule;
- struct lnet_delay_rule *tmp;
- struct list_head rule_list;
- struct list_head msg_list;
- int n = 0;
- bool cleanup;
+ struct lnet_delay_rule *tmp;
+ LIST_HEAD(rule_list);
+ LIST_HEAD(msg_list);
+ int n = 0;
+ bool cleanup;
ENTRY;
- INIT_LIST_HEAD(&rule_list);
- INIT_LIST_HEAD(&msg_list);
-
if (shutdown)
src = dst = 0;
memset(&rule->dl_stat, 0, sizeof(rule->dl_stat));
if (attr->u.delay.la_rate != 0) {
- rule->dl_delay_at = cfs_rand() % attr->u.delay.la_rate;
+ rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
} else {
- rule->dl_delay_time = cfs_time_shift(cfs_rand() %
- attr->u.delay.la_interval);
- rule->dl_time_base = cfs_time_shift(attr->u.delay.
- la_interval);
+ rule->dl_delay_time = ktime_get_seconds() +
+ prandom_u32_max(attr->u.delay.la_interval);
+ rule->dl_time_base = ktime_get_seconds() +
+ attr->u.delay.la_interval;
}
spin_unlock(&rule->dl_lock);
}
int
lnet_fault_init(void)
{
- CLASSERT(LNET_PUT_BIT == 1 << LNET_MSG_PUT);
- CLASSERT(LNET_ACK_BIT == 1 << LNET_MSG_ACK);
- CLASSERT(LNET_GET_BIT == 1 << LNET_MSG_GET);
- CLASSERT(LNET_REPLY_BIT == 1 << LNET_MSG_REPLY);
+ BUILD_BUG_ON(LNET_PUT_BIT != BIT(LNET_MSG_PUT));
+ BUILD_BUG_ON(LNET_ACK_BIT != BIT(LNET_MSG_ACK));
+ BUILD_BUG_ON(LNET_GET_BIT != BIT(LNET_MSG_GET));
+ BUILD_BUG_ON(LNET_REPLY_BIT != BIT(LNET_MSG_REPLY));
mutex_init(&delay_dd.dd_mutex);
spin_lock_init(&delay_dd.dd_lock);