*/
/*
* This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
*
* lnet/lnet/net_fault.c
*
#define DEBUG_SUBSYSTEM S_LNET
+#include <linux/random.h>
#include <lnet/lib-lnet.h>
#include <uapi/linux/lnet/lnetctl.h>
};
static bool
-lnet_fault_nid_match(lnet_nid_t nid, lnet_nid_t msg_nid)
+lnet_fault_nid_match(lnet_nid_t nid, struct lnet_nid *msg_nid)
{
- if (nid == msg_nid || nid == LNET_NID_ANY)
+ if (nid == LNET_NID_ANY)
+ return true;
+ if (!msg_nid)
+ return false;
+ if (lnet_nid_to_nid4(msg_nid) == nid)
return true;
- if (LNET_NIDNET(nid) != LNET_NIDNET(msg_nid))
+ if (LNET_NIDNET(nid) != LNET_NID_NET(msg_nid))
return false;
/* 255.255.255.255@net is wildcard for all addresses in a network */
}
static bool
-lnet_fault_attr_match(struct lnet_fault_attr *attr, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
+lnet_fault_attr_match(struct lnet_fault_attr *attr,
+ struct lnet_nid *src,
+ struct lnet_nid *local_nid,
+ struct lnet_nid *dst,
+ unsigned int type, unsigned int portal)
{
if (!lnet_fault_nid_match(attr->fa_src, src) ||
- !lnet_fault_nid_match(attr->fa_dst, dst))
+ !lnet_fault_nid_match(attr->fa_dst, dst) ||
+ !lnet_fault_nid_match(attr->fa_local_nid, local_nid))
return false;
- if (!(attr->fa_msg_mask & (1 << type)))
+ if (!(attr->fa_msg_mask & BIT(type)))
return false;
/* NB: ACK and REPLY have no portal, but they should have been
if (attr->u.drop.da_interval != 0) {
rule->dr_time_base = ktime_get_seconds() + attr->u.drop.da_interval;
rule->dr_drop_time = ktime_get_seconds() +
- cfs_rand() % attr->u.drop.da_interval;
+ prandom_u32_max(attr->u.drop.da_interval);
} else {
- rule->dr_drop_at = cfs_rand() % attr->u.drop.da_rate;
+ rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
}
lnet_net_lock(LNET_LOCK_EX);
{
struct lnet_drop_rule *rule;
struct lnet_drop_rule *tmp;
- struct list_head zombies;
- int n = 0;
+ LIST_HEAD(zombies);
+ int n = 0;
ENTRY;
- INIT_LIST_HEAD(&zombies);
-
lnet_net_lock(LNET_LOCK_EX);
list_for_each_entry_safe(rule, tmp, &the_lnet.ln_drop_rules, dr_link) {
if (rule->dr_attr.fa_src != src && src != 0)
memset(&rule->dr_stat, 0, sizeof(rule->dr_stat));
if (attr->u.drop.da_rate != 0) {
- rule->dr_drop_at = cfs_rand() % attr->u.drop.da_rate;
+ rule->dr_drop_at = prandom_u32_max(attr->u.drop.da_rate);
} else {
rule->dr_drop_time = ktime_get_seconds() +
- cfs_rand() % attr->u.drop.da_interval;
+ prandom_u32_max(attr->u.drop.da_interval);
rule->dr_time_base = ktime_get_seconds() + attr->u.drop.da_interval;
}
spin_unlock(&rule->dr_lock);
EXIT;
}
+static void
+lnet_fault_match_health(enum lnet_msg_hstatus *hstatus, __u32 mask)
+{
+ int choice;
+ int delta;
+ int best_delta;
+ int i;
+
+ /* assign a random failure */
+ choice = prandom_u32_max(LNET_MSG_STATUS_END - LNET_MSG_STATUS_OK);
+ if (choice == 0)
+ choice++;
+
+ if (mask == HSTATUS_RANDOM) {
+ *hstatus = choice;
+ return;
+ }
+
+ if (mask & BIT(choice)) {
+ *hstatus = choice;
+ return;
+ }
+
+ /* round to the closest ON bit */
+ i = HSTATUS_END;
+ best_delta = HSTATUS_END;
+ while (i > 0) {
+ if (mask & BIT(i)) {
+ delta = choice - i;
+ if (delta < 0)
+ delta *= -1;
+ if (delta < best_delta) {
+ best_delta = delta;
+ choice = i;
+ }
+ }
+ i--;
+ }
+
+ *hstatus = choice;
+}
+
/**
* check source/destination NID, portal, message type and drop rate,
* decide whether should drop this message or not
*/
static bool
-drop_rule_match(struct lnet_drop_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal)
+drop_rule_match(struct lnet_drop_rule *rule,
+ struct lnet_nid *src,
+ struct lnet_nid *local_nid,
+ struct lnet_nid *dst,
+ unsigned int type, unsigned int portal,
+ enum lnet_msg_hstatus *hstatus)
{
struct lnet_fault_attr *attr = &rule->dr_attr;
bool drop;
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
+ if (!lnet_fault_attr_match(attr, src, local_nid, dst, type, portal))
+ return false;
+
+ if (attr->u.drop.da_drop_all) {
+ CDEBUG(D_NET, "set to drop all messages\n");
+ drop = true;
+ goto drop_matched;
+ }
+
+ /*
+ * if we're trying to match a health status error but it hasn't
+ * been set in the rule, then don't match
+ */
+ if ((hstatus && !attr->u.drop.da_health_error_mask) ||
+ (!hstatus && attr->u.drop.da_health_error_mask))
return false;
/* match this rule, check drop rate now */
spin_lock(&rule->dr_lock);
- if (rule->dr_drop_time != 0) { /* time based drop */
+ if (attr->u.drop.da_random) {
+ int value = prandom_u32_max(attr->u.drop.da_interval);
+ if (value >= (attr->u.drop.da_interval / 2))
+ drop = true;
+ else
+ drop = false;
+ } else if (rule->dr_drop_time != 0) { /* time based drop */
time64_t now = ktime_get_seconds();
rule->dr_stat.fs_count++;
rule->dr_time_base = now;
rule->dr_drop_time = rule->dr_time_base +
- cfs_rand() % attr->u.drop.da_interval;
+ prandom_u32_max(attr->u.drop.da_interval);
rule->dr_time_base += attr->u.drop.da_interval;
CDEBUG(D_NET, "Drop Rule %s->%s: next drop : %lld\n",
count = rule->dr_stat.fs_count;
if (do_div(count, attr->u.drop.da_rate) == 0) {
rule->dr_drop_at = rule->dr_stat.fs_count +
- cfs_rand() % attr->u.drop.da_rate;
+ prandom_u32_max(attr->u.drop.da_rate);
CDEBUG(D_NET, "Drop Rule %s->%s: next drop: %lu\n",
libcfs_nid2str(attr->fa_src),
libcfs_nid2str(attr->fa_dst), rule->dr_drop_at);
}
}
+drop_matched:
+
if (drop) { /* drop this message, update counters */
+ if (hstatus)
+ lnet_fault_match_health(hstatus,
+ attr->u.drop.da_health_error_mask);
lnet_fault_stat_inc(&rule->dr_stat, type);
rule->dr_stat.u.drop.ds_dropped++;
}
* Check if message from \a src to \a dst can match any existed drop rule
*/
bool
-lnet_drop_rule_match(struct lnet_hdr *hdr)
+lnet_drop_rule_match(struct lnet_hdr *hdr,
+ struct lnet_nid *local_nid,
+ enum lnet_msg_hstatus *hstatus)
{
- struct lnet_drop_rule *rule;
- lnet_nid_t src = le64_to_cpu(hdr->src_nid);
- lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
- unsigned int typ = le32_to_cpu(hdr->type);
- unsigned int ptl = -1;
- bool drop = false;
- int cpt;
+ unsigned int typ = hdr->type;
+ struct lnet_drop_rule *rule;
+ unsigned int ptl = -1;
+ bool drop = false;
+ int cpt;
/* NB: if Portal is specified, then only PUT and GET will be
* filtered by drop rule */
cpt = lnet_net_lock_current();
list_for_each_entry(rule, &the_lnet.ln_drop_rules, dr_link) {
- drop = drop_rule_match(rule, src, dst, typ, ptl);
+ drop = drop_rule_match(rule, &hdr->src_nid, local_nid,
+ &hdr->dest_nid, typ, ptl,
+ hstatus);
if (drop)
break;
}
-
lnet_net_unlock(cpt);
+
return drop;
}
time64_t dl_delay_time;
/** baseline to caculate dl_delay_time */
time64_t dl_time_base;
- /** jiffies to send the next delayed message */
- unsigned long dl_msg_send;
+ /** seconds until we send the next delayed message */
+ time64_t dl_msg_send;
/** delayed message list */
struct list_head dl_msg_list;
/** statistic of delayed messages */
* decide whether should delay this message or not
*/
static bool
-delay_rule_match(struct lnet_delay_rule *rule, lnet_nid_t src,
- lnet_nid_t dst, unsigned int type, unsigned int portal,
- struct lnet_msg *msg)
+delay_rule_match(struct lnet_delay_rule *rule, struct lnet_nid *src,
+ struct lnet_nid *dst, unsigned int type, unsigned int portal,
+ struct lnet_msg *msg)
{
- struct lnet_fault_attr *attr = &rule->dl_attr;
- bool delay;
+ struct lnet_fault_attr *attr = &rule->dl_attr;
+ bool delay;
+ time64_t now = ktime_get_seconds();
- if (!lnet_fault_attr_match(attr, src, dst, type, portal))
+ if (!lnet_fault_attr_match(attr, src, NULL,
+ dst, type, portal))
return false;
/* match this rule, check delay rate now */
spin_lock(&rule->dl_lock);
if (rule->dl_delay_time != 0) { /* time based delay */
- time64_t now = ktime_get_seconds();
-
rule->dl_stat.fs_count++;
delay = now >= rule->dl_delay_time;
if (delay) {
rule->dl_time_base = now;
rule->dl_delay_time = rule->dl_time_base +
- cfs_rand() % attr->u.delay.la_interval;
+ prandom_u32_max(attr->u.delay.la_interval);
rule->dl_time_base += attr->u.delay.la_interval;
CDEBUG(D_NET, "Delay Rule %s->%s: next delay : %lld\n",
count = rule->dl_stat.fs_count;
if (do_div(count, attr->u.delay.la_rate) == 0) {
rule->dl_delay_at = rule->dl_stat.fs_count +
- cfs_rand() % attr->u.delay.la_rate;
+ prandom_u32_max(attr->u.delay.la_rate);
CDEBUG(D_NET, "Delay Rule %s->%s: next delay: %lu\n",
libcfs_nid2str(attr->fa_src),
libcfs_nid2str(attr->fa_dst), rule->dl_delay_at);
rule->dl_stat.u.delay.ls_delayed++;
list_add_tail(&msg->msg_list, &rule->dl_msg_list);
- msg->msg_delay_send = ktime_get_seconds() + attr->u.delay.la_latency;
+ msg->msg_delay_send = now + attr->u.delay.la_latency;
if (rule->dl_msg_send == -1) {
rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
+ mod_timer(&rule->dl_timer,
+ jiffies + cfs_time_seconds(attr->u.delay.la_latency));
}
spin_unlock(&rule->dl_lock);
lnet_delay_rule_match_locked(struct lnet_hdr *hdr, struct lnet_msg *msg)
{
struct lnet_delay_rule *rule;
- lnet_nid_t src = le64_to_cpu(hdr->src_nid);
- lnet_nid_t dst = le64_to_cpu(hdr->dest_nid);
- unsigned int typ = le32_to_cpu(hdr->type);
+ unsigned int typ = hdr->type;
unsigned int ptl = -1;
/* NB: called with hold of lnet_net_lock */
ptl = le32_to_cpu(hdr->msg.get.ptl_index);
list_for_each_entry(rule, &the_lnet.ln_delay_rules, dl_link) {
- if (delay_rule_match(rule, src, dst, typ, ptl, msg))
+ if (delay_rule_match(rule, &hdr->src_nid, &hdr->dest_nid,
+ typ, ptl, msg))
return true;
}
struct lnet_msg *tmp;
time64_t now = ktime_get_seconds();
- if (!all && cfs_time_seconds(rule->dl_msg_send) > now)
+ if (!all && rule->dl_msg_send > now)
return;
spin_lock(&rule->dl_lock);
} else if (!list_empty(msg_list)) {
/* dequeued some timedout messages, update timer for the
* next delayed message on rule */
- msg = list_entry(rule->dl_msg_list.next,
- struct lnet_msg, msg_list);
+ msg = list_first_entry(&rule->dl_msg_list,
+ struct lnet_msg, msg_list);
rule->dl_msg_send = msg->msg_delay_send;
- mod_timer(&rule->dl_timer, rule->dl_msg_send);
+ mod_timer(&rule->dl_timer,
+ jiffies +
+ cfs_time_seconds(msg->msg_delay_send - now));
}
spin_unlock(&rule->dl_lock);
}
{
struct lnet_msg *msg;
- while (!list_empty(msg_list)) {
+ while ((msg = list_first_entry_or_null(msg_list, struct lnet_msg,
+ msg_list)) != NULL) {
struct lnet_ni *ni;
int cpt;
int rc;
- msg = list_entry(msg_list->next, struct lnet_msg, msg_list);
+ if (msg->msg_sending) {
+ /* Delayed send */
+ list_del_init(&msg->msg_list);
+ ni = msg->msg_txni;
+ CDEBUG(D_NET, "TRACE: msg %p %s -> %s : %s\n", msg,
+ libcfs_nidstr(&ni->ni_nid),
+ libcfs_nidstr(&msg->msg_txpeer->lpni_nid),
+ lnet_msgtyp2str(msg->msg_type));
+ lnet_ni_send(ni, msg);
+ continue;
+ }
+
+ /* Delayed receive */
LASSERT(msg->msg_rxpeer != NULL);
LASSERT(msg->msg_rxni != NULL);
case LNET_CREDIT_OK:
lnet_ni_recv(ni, msg->msg_private, msg, 0,
0, msg->msg_len, msg->msg_len);
+ fallthrough;
case LNET_CREDIT_WAIT:
continue;
default: /* failures */
void
lnet_delay_rule_check(void)
{
- struct lnet_delay_rule *rule;
- struct list_head msgs;
+ struct lnet_delay_rule *rule;
+ LIST_HEAD(msgs);
- INIT_LIST_HEAD(&msgs);
while (1) {
if (list_empty(&delay_dd.dd_sched_rules))
break;
break;
}
- rule = list_entry(delay_dd.dd_sched_rules.next,
- struct lnet_delay_rule, dl_sched_link);
+ rule = list_first_entry(&delay_dd.dd_sched_rules,
+ struct lnet_delay_rule, dl_sched_link);
list_del_init(&rule->dl_sched_link);
spin_unlock_bh(&delay_dd.dd_lock);
rule->dl_time_base = ktime_get_seconds() +
attr->u.delay.la_interval;
rule->dl_delay_time = ktime_get_seconds() +
- cfs_rand() % attr->u.delay.la_interval;
+ prandom_u32_max(attr->u.delay.la_interval);
} else {
- rule->dl_delay_at = cfs_rand() % attr->u.delay.la_rate;
+ rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
}
rule->dl_msg_send = -1;
lnet_delay_rule_del(lnet_nid_t src, lnet_nid_t dst, bool shutdown)
{
struct lnet_delay_rule *rule;
- struct lnet_delay_rule *tmp;
- struct list_head rule_list;
- struct list_head msg_list;
- int n = 0;
- bool cleanup;
+ struct lnet_delay_rule *tmp;
+ LIST_HEAD(rule_list);
+ LIST_HEAD(msg_list);
+ int n = 0;
+ bool cleanup;
ENTRY;
- INIT_LIST_HEAD(&rule_list);
- INIT_LIST_HEAD(&msg_list);
-
if (shutdown)
src = dst = 0;
memset(&rule->dl_stat, 0, sizeof(rule->dl_stat));
if (attr->u.delay.la_rate != 0) {
- rule->dl_delay_at = cfs_rand() % attr->u.delay.la_rate;
+ rule->dl_delay_at = prandom_u32_max(attr->u.delay.la_rate);
} else {
rule->dl_delay_time = ktime_get_seconds() +
- cfs_rand() % attr->u.delay.la_interval;
+ prandom_u32_max(attr->u.delay.la_interval);
rule->dl_time_base = ktime_get_seconds() +
attr->u.delay.la_interval;
}
int
lnet_fault_init(void)
{
- CLASSERT(LNET_PUT_BIT == 1 << LNET_MSG_PUT);
- CLASSERT(LNET_ACK_BIT == 1 << LNET_MSG_ACK);
- CLASSERT(LNET_GET_BIT == 1 << LNET_MSG_GET);
- CLASSERT(LNET_REPLY_BIT == 1 << LNET_MSG_REPLY);
+ BUILD_BUG_ON(LNET_PUT_BIT != BIT(LNET_MSG_PUT));
+ BUILD_BUG_ON(LNET_ACK_BIT != BIT(LNET_MSG_ACK));
+ BUILD_BUG_ON(LNET_GET_BIT != BIT(LNET_MSG_GET));
+ BUILD_BUG_ON(LNET_REPLY_BIT != BIT(LNET_MSG_REPLY));
mutex_init(&delay_dd.dd_mutex);
spin_lock_init(&delay_dd.dd_lock);