int (*lnd_nl_set)(int cmd, struct nlattr *attr, int type, void *data);
const struct ln_key_list *lnd_keys;
+
+ /* get LND timeout */
+ int (*lnd_get_timeout)(void);
};
struct lnet_tx_queue {
__u32 lnd_auth_key;
char lnd_traffic_class_str[LNET_MAX_STR_LEN];
__u32 lnd_traffic_class;
+ __u32 lnd_timeout;
};
struct lnet_ioctl_config_socklnd_tunables {
.lnd_eager_recv = kgnilnd_eager_recv,
.lnd_nl_get = kgnilnd_nl_get,
.lnd_nl_set = kgnilnd_nl_set,
+ .lnd_get_timeout = kgnilnd_timeout,
};
kgn_data_t kgnilnd_data;
static inline int kgnilnd_timeout(void)
{
- return *kgnilnd_tunables.kgn_timeout ?
- *kgnilnd_tunables.kgn_timeout :
- lnet_get_lnd_timeout();
+ return *kgnilnd_tunables.kgn_timeout ?: lnet_get_lnd_timeout();
}
/* Macro wrapper for _kgnilnd_schedule_conn. This will store the function
.lkp_value = "traffic_class",
.lkp_data_type = NLA_STRING,
},
+ [LNET_NET_KFILND_TUNABLES_ATTR_TIMEOUT] = {
+ .lkp_value = "timeout",
+ .lkp_data_type = NLA_S32,
+ },
},
};
tunables->lnd_tun_u.lnd_kfi.lnd_auth_key);
nla_put_string(msg, LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS,
tunables->lnd_tun_u.lnd_kfi.lnd_traffic_class_str);
+ nla_put_s32(msg, LNET_NET_KFILND_TUNABLES_ATTR_TIMEOUT,
+ kfilnd_timeout());
return 0;
}
static int kfilnd_startup(struct lnet_ni *ni);
static const struct lnet_lnd the_kfilnd = {
- .lnd_type = KFILND,
- .lnd_startup = kfilnd_startup,
- .lnd_shutdown = kfilnd_shutdown,
- .lnd_send = kfilnd_send,
- .lnd_recv = kfilnd_recv,
- .lnd_nl_get = kfilnd_nl_get,
- .lnd_nl_set = kfilnd_nl_set,
- .lnd_keys = &kfilnd_tunables_keys,
+ .lnd_type = KFILND,
+ .lnd_startup = kfilnd_startup,
+ .lnd_shutdown = kfilnd_shutdown,
+ .lnd_send = kfilnd_send,
+ .lnd_recv = kfilnd_recv,
+ .lnd_nl_get = kfilnd_nl_get,
+ .lnd_nl_set = kfilnd_nl_set,
+ .lnd_keys = &kfilnd_tunables_keys,
+ .lnd_get_timeout = kfilnd_timeout,
};
static int kfilnd_startup(struct lnet_ni *ni)
#define KFILND_MY_PROCID 49152
+/* default kfilnd timeout in seconds */
+#define KFILND_TIMEOUT_DEFAULT 125
+
/* 256 Rx contexts max */
#define KFILND_FAB_RX_CTX_BITS 8
LNET_NET_KFILND_TUNABLES_ATTR_PROV_MINOR,
LNET_NET_KFILND_TUNABLES_ATTR_AUTH_KEY,
LNET_NET_KFILND_TUNABLES_ATTR_TRAFFIC_CLASS,
+ LNET_NET_KFILND_TUNABLES_ATTR_TIMEOUT,
__LNET_NET_KFILND_TUNABLES_ATTR_MAX_PLUS_ONE,
};
extern struct workqueue_struct *kfilnd_wq;
+extern int kfi_timeout;
extern unsigned int cksum;
extern unsigned int tx_scale_factor;
extern unsigned int rx_cq_scale_factor;
return atomic_read(&kp->kp_remove_peer) > 0;
}
+static inline int kfilnd_timeout(void)
+{
+ return kfi_timeout ?: lnet_get_lnd_timeout();
+}
+
/* Values for kp_hello_state. Valid transitions:
* NONE -> INIT
* INIT -> NONE (only when fail to allocate kfilnd_tn for hello req)
return true;
else if (proactive_handshake &&
ktime_before(kp->kp_last_alive +
- lnet_get_lnd_timeout() * 2,
+ kfilnd_timeout() * 2,
ktime_get_seconds()))
return true;
} else if (hello_state == KP_HELLO_SENDING &&
- ktime_before(kp->kp_hello_ts + lnet_get_lnd_timeout(),
+ ktime_before(kp->kp_hello_ts + kfilnd_timeout(),
ktime_get_seconds())) {
/* Sent hello but never received reply */
CDEBUG(D_NET,
module_param(cksum, uint, 0444);
MODULE_PARM_DESC(cksum, "Enable checksums for non-zero messages (not RDMA)");
+int kfi_timeout = KFILND_TIMEOUT_DEFAULT;
+module_param(kfi_timeout, int, 0644);
+MODULE_PARM_DESC(kfi_timeout, "KFI LND timeout (seconds)");
+
/* Scale factor for TX context queue depth. The factor is applied to the number
* of credits to determine queue depth.
*/
return -EINVAL;
}
+ kfilnd_tunables->lnd_timeout = kfilnd_timeout();
+
return 0;
}
/**
* kfilnd_peer_purge_old_peer() - Delete the specified peer from the cache
- * if we haven't heard from it within 5x LND timeouts.
+ * if we haven't heard from it within KP_PURGE_LIMIT seconds.
* @kp: The peer to be checked or purged
*/
static void kfilnd_peer_purge_old_peer(struct kfilnd_peer *kp)
{
if (ktime_after(ktime_get_seconds(),
- kp->kp_last_alive + (lnet_get_lnd_timeout() * 5))) {
+ kp->kp_last_alive + KP_PURGE_LIMIT)) {
CDEBUG(D_NET,
"Haven't heard from %s(%p):0x%llx in %lld seconds\n",
libcfs_nid2str(kp->kp_nid), kp, kp->kp_addr,
#include "kfilnd.h"
+/* Time limit we can go without hearing from a peer before
+ * removing it from the cache. Default: 5x LND timeouts.
+ */
+#define KP_PURGE_LIMIT (kfilnd_timeout() * 5)
+
void kfilnd_peer_put(struct kfilnd_peer *kp);
struct kfilnd_peer *kfilnd_peer_get(struct kfilnd_dev *dev, lnet_nid_t nid);
void kfilnd_peer_alive(struct kfilnd_peer *kp);
tn->tn_response_rx = ep->end_context_id;
tn->tn_state = TN_STATE_IDLE;
tn->hstatus = LNET_MSG_STATUS_OK;
- tn->deadline = ktime_get_seconds() + lnet_get_lnd_timeout();
+ tn->deadline = ktime_get_seconds() + kfilnd_timeout();
tn->tn_replay_deadline = ktime_sub(tn->deadline,
- (lnet_get_lnd_timeout() / 2));
+ (kfilnd_timeout() / 2));
tn->is_initiator = is_initiator;
INIT_WORK(&tn->timeout_work, kfilnd_tn_timeout_work);
.lnd_nl_get = kiblnd_nl_get,
.lnd_nl_set = kiblnd_nl_set,
.lnd_keys = &kiblnd_tunables_keys,
+ .lnd_get_timeout = kiblnd_timeout,
};
static void ko2inlnd_assert_wire_constants(void)
#define IBLND_CREDITS_DEFAULT 8 /* default # of peer_ni credits */
#define IBLND_CREDITS_MAX ((typeof(((struct kib_msg *) 0)->ibm_credits)) - 1) /* Max # of peer_ni credits */
+#define IBLND_TIMEOUT_DEFAULT 50 /* Default o2iblnd timeout in seconds */
+
#ifdef HAVE_OFED_RDMA_CREATE_ID_5ARG
# define kiblnd_rdma_create_id(ns, cb, dev, ps, qpt) \
rdma_create_id((ns) ? (ns) : &init_net, cb, dev, ps, qpt)
static inline int kiblnd_timeout(void)
{
- return *kiblnd_tunables.kib_timeout ? *kiblnd_tunables.kib_timeout :
- lnet_get_lnd_timeout();
+ return *kiblnd_tunables.kib_timeout ?: lnet_get_lnd_timeout();
}
/* lnd_connreq_timeout = lnd_timeout / 4 */
module_param(cksum, int, 0644);
MODULE_PARM_DESC(cksum, "set non-zero to enable message (not RDMA) checksums");
-static int timeout;
+static int timeout = IBLND_TIMEOUT_DEFAULT;
module_param(timeout, int, 0644);
MODULE_PARM_DESC(timeout, "timeout (seconds)");
.lnd_nl_get = ksocknal_nl_get,
.lnd_nl_set = ksocknal_nl_set,
.lnd_keys = &ksocknal_tunables_keys,
+ .lnd_get_timeout = ksocknal_timeout,
};
static int __init ksocklnd_init(void)
*/
#define SOCKNAL_SHUTDOWN_BIAS (INT_MIN+1)
+/* default ksocklnd timeout in seconds */
+#define SOCKNAL_TIMEOUT_DEFAULT 50
+
/** connd timeout */
#define SOCKNAL_CONND_TIMEOUT 120
/** reserved thread for accepting & creating new connd */
#define CURRENT_LND_VERSION 1
-static int sock_timeout;
+static int sock_timeout = SOCKNAL_TIMEOUT_DEFAULT;
module_param(sock_timeout, int, 0644);
MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)");
{
size_t min_size = 0;
int i;
+ const struct lnet_lnd *net_lnd;
if (!ni || !cfg_ni || !tun || !nid_is_nid4(&ni->ni_nid))
return;
LNET_STATS_TYPE_DROP);
}
+ /* Update the tunables timeout value from the dynamic timeout API */
+ net_lnd = ni->ni_net->net_lnd;
+
+ switch (net_lnd->lnd_type) {
+ case SOCKLND:
+ ni->ni_lnd_tunables.lnd_tun_u.lnd_sock.lnd_timeout =
+ net_lnd->lnd_get_timeout();
+ break;
+ case O2IBLND:
+ ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib.lnd_timeout =
+ net_lnd->lnd_get_timeout();
+ break;
+ case KFILND:
+ ni->ni_lnd_tunables.lnd_tun_u.lnd_kfi.lnd_timeout =
+ net_lnd->lnd_get_timeout();
+ break;
+ case GNILND:
+ ni->ni_lnd_tunables.lnd_tun_u.lnd_gni.lnd_timeout =
+ net_lnd->lnd_get_timeout();
+ }
+
/*
* tun->lt_tun will always be present, but in order to be
* backwards compatible, we need to deal with the cases when
}
}
+/* get_msg_deadline
+ * Gets the message deadline in nanoseconds.
+ * If the LND for this message implements its own lnd_get_timeout()
+ * function via its exposed API, we use this to calculate the LNet
+ * transaction timeout (LTT) value, based on the message's NI LND timeout
+ * (LNDT) and global retry count (LRC):
+ * LTT = LNDT * (LRC + 1) + 1
+ * If the LND did not implement the lnd_get_timeout() function or the LNDT
+ * was set to zero, fall back to default global LTT implementation.
+ */
+static ktime_t get_msg_deadline(struct lnet_ni *msg_ni)
+{
+ unsigned int msg_timeout = lnet_transaction_timeout;
+
+ if (msg_ni && msg_ni->ni_net->net_lnd->lnd_get_timeout) {
+ int lnd_timeout = msg_ni->ni_net->net_lnd->lnd_get_timeout();
+
+ if (lnd_timeout > 0)
+ msg_timeout = lnd_timeout * (lnet_retry_count + 1) + 1;
+ }
+ return ktime_add_ns(ktime_get(), msg_timeout * NSEC_PER_SEC);
+}
+
void
lnet_msg_commit(struct lnet_msg *msg, int cpt)
{
struct lnet_msg_container *container = the_lnet.ln_msg_containers[cpt];
struct lnet_counters_common *common;
- s64 timeout_ns;
-
- /* set the message deadline */
- timeout_ns = lnet_transaction_timeout * NSEC_PER_SEC;
- msg->msg_deadline = ktime_add_ns(ktime_get(), timeout_ns);
- /* routed message can be committed for both receiving and sending */
+ /* A routed message can be committed for both receiving and sending */
LASSERT(!msg->msg_tx_committed);
if (msg->msg_sending) {
LASSERT(!msg->msg_receiving);
+
+ /* Set the message deadline using msg send NI */
+ msg->msg_deadline = get_msg_deadline(msg->msg_txni);
msg->msg_tx_cpt = cpt;
msg->msg_tx_committed = 1;
if (msg->msg_rx_committed) { /* routed message REPLY */
}
} else {
LASSERT(!msg->msg_sending);
+
+ /* Set the message deadline using msg recv NI */
+ msg->msg_deadline = get_msg_deadline(msg->msg_rxni);
msg->msg_rx_cpt = cpt;
msg->msg_rx_committed = 1;
}
}
static struct netstrfns libcfs_netstrfns[] = {
- { .nf_type = LOLND,
- .nf_name = "lo",
- .nf_modname = "klolnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_lo_str2addr,
- .nf_parse_addrlist = libcfs_num_parse,
- .nf_print_addrlist = libcfs_num_addr_range_print,
- .nf_match_addr = libcfs_num_match
+ {
+ .nf_type = LOLND,
+ .nf_name = "lo",
+ .nf_modname = "klolnd",
+ .nf_addr2str = libcfs_decnum_addr2str,
+ .nf_str2addr = libcfs_lo_str2addr,
+ .nf_parse_addrlist = libcfs_num_parse,
+ .nf_print_addrlist = libcfs_num_addr_range_print,
+ .nf_match_addr = libcfs_num_match
},
{ .nf_type = SOCKLND,
.nf_name = "tcp",
.nf_match_addr = cfs_ip_addr_match,
.nf_match_netmask = libcfs_ip_in_netmask
},
- { .nf_type = GNILND,
- .nf_name = "gni",
- .nf_modname = "kgnilnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_num_str2addr,
- .nf_parse_addrlist = libcfs_num_parse,
- .nf_print_addrlist = libcfs_num_addr_range_print,
- .nf_match_addr = libcfs_num_match
+ {
+ .nf_type = GNILND,
+ .nf_name = "gni",
+ .nf_modname = "kgnilnd",
+ .nf_addr2str = libcfs_decnum_addr2str,
+ .nf_str2addr = libcfs_num_str2addr,
+ .nf_parse_addrlist = libcfs_num_parse,
+ .nf_print_addrlist = libcfs_num_addr_range_print,
+ .nf_match_addr = libcfs_num_match
},
- { .nf_type = GNIIPLND,
- .nf_name = "gip",
- .nf_modname = "kgnilnd",
- .nf_addr2str = libcfs_ip_addr2str,
- .nf_str2addr = libcfs_ip_str2addr,
- .nf_parse_addrlist = cfs_ip_addr_parse,
- .nf_print_addrlist = libcfs_ip_addr_range_print,
- .nf_match_addr = cfs_ip_addr_match
+ {
+ .nf_type = GNIIPLND,
+ .nf_name = "gip",
+ .nf_modname = "kgnilnd",
+ .nf_addr2str = libcfs_ip_addr2str,
+ .nf_str2addr = libcfs_ip_str2addr,
+ .nf_parse_addrlist = cfs_ip_addr_parse,
+ .nf_print_addrlist = libcfs_ip_addr_range_print,
+ .nf_match_addr = cfs_ip_addr_match
},
- { .nf_type = PTL4LND,
- .nf_name = "ptlf",
- .nf_modname = "kptl4lnd",
- .nf_addr2str = libcfs_decnum_addr2str,
- .nf_str2addr = libcfs_num_str2addr,
- .nf_parse_addrlist = libcfs_num_parse,
- .nf_print_addrlist = libcfs_num_addr_range_print,
- .nf_match_addr = libcfs_num_match
+ {
+ .nf_type = PTL4LND,
+ .nf_name = "ptlf",
+ .nf_modname = "kptl4lnd",
+ .nf_addr2str = libcfs_decnum_addr2str,
+ .nf_str2addr = libcfs_num_str2addr,
+ .nf_parse_addrlist = libcfs_num_parse,
+ .nf_print_addrlist = libcfs_num_addr_range_print,
+ .nf_match_addr = libcfs_num_match
},
{
.nf_type = KFILND,
return rc;
}
+int lustre_lnet_config_lnd_timeout(int timeout, __u32 net, int seq_no,
+ struct cYAML **err_rc)
+{
+ int rc = LUSTRE_CFG_RC_NO_ERR;
+ char err_str[LNET_MAX_STR_LEN] = "";
+ char val[INT_STRING_LEN];
+ __u32 lnd = LNET_NETTYP(net);
+
+ snprintf(val, sizeof(val), "%d", timeout);
+
+ switch (lnd) {
+ case SOCKLND:
+ rc = write_sysfs_file(socklnd_modparam_path, "sock_timeout",
+ val, 1, strlen(val) + 1);
+ break;
+ case O2IBLND:
+ rc = write_sysfs_file(o2iblnd_modparam_path, "timeout", val, 1,
+ strlen(val) + 1);
+ break;
+ case KFILND:
+ rc = write_sysfs_file(kfilnd_modparam_path, "kfi_timeout", val,
+ 1, strlen(val) + 1);
+ break;
+ case GNILND:
+ rc = write_sysfs_file(gnilnd_modparam_path, "timeout", val, 1,
+ strlen(val) + 1);
+ break;
+ default:
+ snprintf(err_str, sizeof(err_str),
+ "\"Net %s does not accept a LND timeout\"",
+ libcfs_lnd2str(lnd));
+ rc = -EINVAL;
+ }
+
+ /* Check return code from writing sysfs file */
+ if (rc)
+ snprintf(err_str, sizeof(err_str),
+ "\"Failed to set LND timeout for net %s\"",
+ libcfs_lnd2str(lnd));
+
+ cYAML_build_error(rc, seq_no, "set", "lnd_timeout", err_str, err_rc);
+ return rc;
+}
+
int lustre_lnet_config_transaction_to(int timeout, int seq_no, struct cYAML **err_rc)
{
int rc = LUSTRE_CFG_RC_NO_ERR;
char val[LNET_MAX_STR_LEN];
int service_port = -1, l_errno = 0;
- rc = read_sysfs_file(o2ib_modparam_path, "service", val,
+ rc = read_sysfs_file(o2iblnd_modparam_path, "service", val,
1, sizeof(val));
if (rc) {
l_errno = errno;
#define INT_STRING_LEN 23
#define LNET_DEFAULT_INDENT 6
+/* LNet module parameter path */
#define modparam_path "/sys/module/lnet/parameters/"
-#define o2ib_modparam_path "/sys/module/ko2iblnd/parameters/"
+
+/* LND module parameter paths */
+#define o2iblnd_modparam_path "/sys/module/ko2iblnd/parameters/"
+#define socklnd_modparam_path "/sys/module/ksocklnd/parameters/"
+#define kfilnd_modparam_path "/sys/module/kkfilnd/parameters/"
+#define gnilnd_modparam_path "/sys/module/kgnilnd/parameters/"
+
#define gni_nid_path "/proc/cray_xt/"
enum lnetctl_cmd {
int lustre_lnet_show_rtr_sensitivity(int seq_no, struct cYAML **show_rc,
struct cYAML **err_rc);
+/* lustre_lnet_config_lnd_timeout
+ * sets the LND timeout which defines how long the LND should take to complete
+ * a network transaction, by writing the timeout value to the sysfs file
+ * (usually under /sys/module/<lnd>/parameters/).
+ *
+ * timeout - timeout value to configure, in seconds
+ * net_type - LND id to configure the timeout on
+ * seq_no - sequence number of the request
+ * err_rc - [OUT] struct cYAML tree describing the error. Freed by
+ * caller
+ */
+int lustre_lnet_config_lnd_timeout(int timeout, __u32 net_type, int seq_no,
+ struct cYAML **err_rc);
+
/*
* lustre_lnet_config_transaction_to
* sets the timeout after which a message expires or a timeout event is
lnd_cfg->lnd_traffic_class) == NULL)
return LUSTRE_CFG_RC_OUT_OF_MEM;
+ if (cYAML_create_number(lndparams, "timeout",
+ lnd_cfg->lnd_timeout) == NULL)
+ return LUSTRE_CFG_RC_OUT_OF_MEM;
+
return LUSTRE_CFG_RC_NO_ERR;
}
#endif
"\t--verbose: display detailed output per network."
" Optional argument of '2' outputs more stats\n"},
{"set", jt_set_ni_value, 0, "set local NI specific parameter\n"
- "\t--nid: NI NID to set the\n"
+ "\t--nid: NI NID to set the value on\n"
+ "\t--net: network to set the value on (e.g. tcp, o2ib, kfi)\n"
+ "\t--lnd-timeout: set LND timeout (seconds) for LND used by --net argument\n"
"\t--health: specify health value to set\n"
"\t--conns-per-peer: number of connections per peer\n"
"\t--all: set all NIs value to the one specified\n"},
return rc;
}
-static int set_value_helper(int argc, char **argv, int cmd,
- int (*cb)(int, bool, char*, int, int, struct cYAML**))
-{
- char *nidstr = NULL;
- long int healthv = -1;
- bool all = false;
- long int state = -1;
- long int cpp = -1;
- int rc, opt;
- struct cYAML *err_rc = NULL;
- const char *const short_options = "t:n:m:s:a";
- static const struct option long_options[] = {
- { .name = "nid", .has_arg = required_argument, .val = 'n' },
- { .name = "health", .has_arg = required_argument, .val = 't' },
- { .name = "conns-per-peer", .has_arg = required_argument, .val = 'm' },
- { .name = "state", .has_arg = required_argument, .val = 's' },
- { .name = "all", .has_arg = no_argument, .val = 'a' },
- { .name = NULL }
- };
-
- while ((opt = getopt_long(argc, argv, short_options,
- long_options, NULL)) != -1) {
- switch (opt) {
- case 'n':
- nidstr = optarg;
- break;
- case 't':
- if (parse_long(optarg, &healthv) != 0)
- healthv = -1;
- break;
- case 's':
- if (cmd != LNET_CMD_PEERS ||
- parse_long(optarg, &state) != 0)
- state = -1;
- break;
- case 'm':
- if (cmd != LNET_CMD_NETS ||
- parse_long(optarg, &cpp) != 0)
- cpp = -1;
- break;
-
- case 'a':
- all = true;
- break;
- default:
- return 0;
- }
- }
-
- rc = cb(healthv, all, nidstr, cmd == LNET_CMD_PEERS ? state : cpp, -1,
- &err_rc);
- if (rc != LUSTRE_CFG_RC_NO_ERR)
- cYAML_print_tree2file(stderr, err_rc);
-
- cYAML_free_tree(err_rc);
-
- return rc;
-}
-
-int yaml_lnet_config_ni_healthv(int healthv, bool all, char *nidstr, int cpp,
- int seq_no, struct cYAML **err_rc)
+static int yaml_lnet_config_ni_value(int healthv, bool all, char *nidstr,
+ int cpp, int lnd_timeout, __u32 net,
+ int seq_no, struct cYAML **err_rc)
{
struct lnet_ioctl_config_lnd_tunables tunables;
struct lnet_dlc_network_descr nw_descr;
int rc = 0;
/* For NI you can't have both setting all NIDs and a requested NID */
- if (!all && !nidstr)
+ if (all && nidstr)
return -EINVAL;
+ if (lnd_timeout > -1)
+ return lustre_lnet_config_lnd_timeout(lnd_timeout, net,
+ -1, err_rc);
+
if (cpp == -1 && healthv == -1)
return 0;
return rc;
}
-static int jt_set_ni_value(int argc, char **argv)
-{
- int rc = check_cmd(net_cmds, "net", "set", 0, argc, argv);
-
- if (rc < 0)
- return rc;
-
- return set_value_helper(argc, argv, LNET_CMD_NETS,
- yaml_lnet_config_ni_healthv);
-}
-
static int yaml_lnet_peer_display(yaml_parser_t *reply, bool list_only)
{
yaml_emitter_t debug;
return rc == 1 ? 0 : rc;
}
-int yaml_lnet_config_peer_ni_healthv(int healthv, bool all, char *lpni_nid,
- int state, int seq_no, struct cYAML **err_rc)
+static int yaml_lnet_config_peer_ni_healthv(int healthv, bool all,
+ char *lpni_nid, int state,
+ int seq_no, struct cYAML **err_rc)
{
int rc;
return rc;
}
+static int set_value_helper(int argc, char **argv, int cmd)
+{
+ char *nidstr = NULL;
+ long healthv = -1;
+ __u32 net = 0;
+ int lnd_timeout = -1;
+ bool all = false;
+ long state = -1;
+ long cpp = -1;
+ int seq_no = -1;
+ int rc, opt;
+ struct cYAML *err_rc = NULL;
+ char err_str[LNET_MAX_STR_LEN] = "";
+ static const struct option long_options[] = {
+ { .val = 'a', .name = "all", .has_arg = no_argument },
+ { .val = 'i', .name = "net", .has_arg = required_argument },
+ { .val = 'l', .name = "lnd-timeout",
+ .has_arg = required_argument },
+ { .val = 'm', .name = "conns-per-peer",
+ .has_arg = required_argument },
+ { .val = 'n', .name = "nid", .has_arg = required_argument },
+ { .val = 's', .name = "state", .has_arg = required_argument },
+ { .val = 't', .name = "health", .has_arg = required_argument },
+ { .name = NULL }
+ };
+
+ while ((opt = getopt_long(argc, argv, "ai:l:m:n:s:t:",
+ long_options, NULL)) != -1) {
+ switch (opt) {
+ case 'a':
+ all = true;
+ break;
+ case 'i':
+ net = libcfs_str2net(optarg);
+ if (net == LNET_NET_ANY) {
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ snprintf(err_str, sizeof(err_str),
+ "\"Invalid network type: %s\"",
+ optarg);
+ goto out;
+ }
+ break;
+ case 'l':
+ lnd_timeout = atoi(optarg);
+ if (lnd_timeout < 0) {
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ snprintf(err_str, sizeof(err_str),
+ "\"Invalid LND timeout value '%s', must be >= 0\"",
+ optarg);
+ goto out;
+ }
+ break;
+ case 'm':
+ if (cmd != LNET_CMD_NETS ||
+ parse_long(optarg, &cpp) != 0)
+ cpp = -1;
+ break;
+ case 'n':
+ nidstr = optarg;
+ break;
+ case 's':
+ if (cmd != LNET_CMD_PEERS ||
+ parse_long(optarg, &state) != 0)
+ state = -1;
+ break;
+ case 't':
+ if (parse_long(optarg, &healthv) != 0)
+ healthv = -1;
+ break;
+ case '?':
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ snprintf(err_str, sizeof(err_str),
+ "\"Invalid option or missing argument\"");
+ goto out;
+ default:
+ return 0;
+ }
+ }
+
+ if (lnd_timeout >= 0 && net == 0) {
+ rc = LUSTRE_CFG_RC_BAD_PARAM;
+ snprintf(err_str, sizeof(err_str),
+ "\"Specified --lnd-timeout without --net option\"");
+ goto out;
+ }
+
+ if (cmd == LNET_CMD_PEERS)
+ rc = yaml_lnet_config_peer_ni_healthv(healthv, all, nidstr,
+ state, seq_no, &err_rc);
+ else
+ rc = yaml_lnet_config_ni_value(healthv, all, nidstr, cpp,
+ lnd_timeout, net, seq_no,
+ &err_rc);
+
+out:
+ if (rc != LUSTRE_CFG_RC_NO_ERR) {
+ cYAML_build_error(rc, -1, "net", "set", err_str, &err_rc);
+ cYAML_print_tree2file(stderr, err_rc);
+ }
+
+ cYAML_free_tree(err_rc);
+
+ return rc;
+}
+
+static int jt_set_ni_value(int argc, char **argv)
+{
+ int rc = check_cmd(net_cmds, "net", "set", 0, argc, argv);
+
+ if (rc < 0)
+ return rc;
+
+ return set_value_helper(argc, argv, LNET_CMD_NETS);
+}
+
static int jt_set_peer_ni_value(int argc, char **argv)
{
int rc = check_cmd(peer_cmds, "peer", "set", 0, argc, argv);
if (rc < 0)
return rc;
- return set_value_helper(argc, argv, LNET_CMD_PEERS,
- yaml_lnet_config_peer_ni_healthv);
+ return set_value_helper(argc, argv, LNET_CMD_PEERS);
}
static int yaml_debug_recovery(enum lnet_health_type type)
done < $LNET_PARAMS_FILE
}
+function set_ltt_node() {
+ # Achieves a desired LNet Transaction Timeout (LTT) value for a node by
+ # setting the LND timeout (LNDT) value for the network being used for
+ # tests.
+ local node=$1
+ local ltt=$2
+ local nettype=$3
+
+ if do_node $node $LNETCTL net set -h | grep -q -- "--lnd-timeout:"; then
+ # lnetctl supports setting the LNDT parameter.
+ local retry_count=$(do_node $node $LNETCTL global show |
+ awk '/retry_count/{print $NF}')
+
+ # Determine LNDT value to achieve LTT. This is taken from the
+ # the formula, using LNet retry count (LRC):
+ # LTT = LNDT(LRC + 1) + 1
+ # LNDT = (LTT - 1)/(LRC + 1)
+ local lnd_timeout=$(( (ltt - 1) / (retry_count + 1) ))
+
+ do_node $node $LNETCTL net set \
+ --net ${nettype} --lnd-timeout $lnd_timeout ||
+ error "Failed to set LND timeout on ${nettype} net"
+ fi
+ # Also set the default global LTT
+ do_node $node $LNETCTL set transaction_timeout $ltt ||
+ error "Failed to set transaction_timeout on $node"
+}
+
function lnet_health_pre() {
save_lnet_params
# Lower transaction timeout to speed up test execution
- $LNETCTL set transaction_timeout 10 ||
- error "Failed to set transaction_timeout $?"
+ set_ltt_node $HOSTNAME 10 $NETTYPE ||
+ error "Failed to set transaction timeout $?"
RETRY_PARAM=$($LNETCTL global show | awk '/retry_count/{print $NF}')
RSND_PRE=$($LNETCTL stats show | awk '/resend_count/{print $NF}')
}
run_test 232 "Test setting ToS value"
+check_parameter() {
+ local para=$1
+ local value=$2
+
+ echo "check parameter ${para} value ${value}"
+
+ return $(( $(do_lnetctl net show -v | \
+ tee /dev/stderr | \
+ grep -c "^ \+${para}: ${value}$") != ${#INTERFACES[@]} ))
+}
+
+test_241() {
+ reinit_dlc || return $?
+
+ do_lnetctl net add --net ${NETTYPE} --if ${INTERFACES[0]} ||
+ error "Failed to add net"
+
+ do_lnetctl net set -h | grep -q -- "--lnd-timeout:" ||
+ skip "lnetctl net set does not support --lnd-timeout option"
+
+ # Capture existing timeout value, we'll restore to this later
+ local old_lnd_to=$($LNETCTL net show --net ${NETTYPE} -v |
+ awk '/^\s+timeout:/{print $NF}')
+ local expected_lnd_to=$(( old_lnd_to + 1 ))
+
+ # Set new timeout and check it shows up in tunables
+ do_lnetctl net set --net ${NETTYPE} --lnd-timeout ${expected_lnd_to} ||
+ error "Failed to set LND timeout on ${NETTYPE} net"
+
+ check_parameter "timeout" ${expected_lnd_to} ||
+ error "Expected LND timeout $expected_lnd_to"
+
+ # Check if setting LND timeout to zero ends up defaulting to global
+ # lnd_timeout value
+ local global_lnd_to=$($LNETCTL global show -v |
+ awk '/lnd_timeout:/{print $NF}')
+
+ do_lnetctl net set --net ${NETTYPE} --lnd-timeout 0 ||
+ "Failed to set LND timeout on ${NETTYPE} net to zero"
+
+ check_parameter "timeout" ${global_lnd_to} ||
+ error "Expected LND timeout $global_lnd_to"
+
+ # Restore tunable timeout to old value
+ do_lnetctl net set --net ${NETTYPE} --lnd-timeout ${old_lnd_to}
+}
+run_test 241 "Check setting LND timeout value via lnetctl updates tunables"
+
### Test that linux route is added for each ni
test_250() {
local skip_param
do_expired_message_drop_test() {
local rnid lnid old_tto
- old_tto=$($LNETCTL global show |
- awk '/transaction_timeout:/{print $NF}')
+ local old_retry=$($LNETCTL global show |
+ awk '/^\s+retry_count:/{print $NF}')
- [[ -z $old_tto ]] &&
- error "Cannot determine LNet transaction timeout"
+ # Capture default, global LNet transaction timeout (LTT). If there's an
+ # LND timeout (LNDT) set for $NETTYPE, the true LTT = LNDT(LRC + 1) + 1.
+ local old_ltt=$($LNETCTL global show |
+ awk '/^\s+transaction_timeout:/{print $NF}')
+ local old_lnd_to=$($LNETCTL net show --net $NETTYPE --verbose |
+ awk '/^\s+timeout:/{print $NF}')
+ [[ -z "$old_lnd_to" ]] ||
+ old_ltt=$(( old_lnd_to * (old_retry + 1) + 1 ))
- local tto=10
+ do_lnetctl set retry_count 0 || error "Failed to set retry count to 0"
+ $LNETCTL global show
- do_lnetctl set transaction_timeout "${tto}" ||
- error "Failed to set transaction_timeout"
+ [[ -z $old_ltt ]] &&
+ error "Cannot determine LNet transaction timeout"
- # We want to consume all peer credits for at least transaction_timeout
- # seconds
- local delay
+ # Set new LNet transaction timeout (LTT)
+ local ltt=10
+ set_ltt_node $HOSTNAME $ltt $NETTYPE ||
+ error "Failed to set transaction timeout"
- delay=$((tto + 1))
+ # We want to consume all peer credits for at least LTT seconds
+ local delay=$((ltt + 1))
for lnid in "${LNIDS[@]}"; do
for rnid in "${RNIDS[@]}"; do
[[ $dropped -ne 1 ]] &&
error "Expect 1 dropped GET but found $dropped"
- do_lnetctl set transaction_timeout "${old_tto}"
+ # Restore retry_count and transaction timeout values in the order they
+ # were changed.
+ do_lnetctl set retry_count $old_retry
+ set_ltt_node $HOSTNAME ${old_ltt} $NETTYPE
return 0
}
skip "Need local peer credits >= router's peer credits"
fi
- local old_tto=$(do_node $router $LNETCTL global show |
- awk '/transaction_timeout:/{print $NF}')
-
- [[ -n $old_tto ]] ||
- error "Cannot determine LNet transaction timeout"
-
- local tto=10
-
- do_node $router $LNETCTL set transaction_timeout $tto ||
- error "Failed to set transaction_timeout"
-
local old_retry=$(do_node $router $LNETCTL global show |
awk '/retry_count:/{print $NF}')
[[ -n $old_retry ]] ||
error "Cannot determine LNet retry count"
+ # Capture default, global LNet transaction timeout (LTT). If there's an
+ # LND timeout (LNDT) set for the router's REMOTE_NET, the true
+ # LTT = LNDT(LRC + 1) + 1.
+ local old_ltt=$(do_node $router $LNETCTL global show |
+ awk '/transaction_timeout:/{print $NF}')
+ local old_lnd_to=$(do_node $router $LNETCTL net show --net $REMOTE_NET \
+ --verbose | awk '/^\s+timeout:/{print $NF}')
+ [[ -n $old_lnd_to ]] && old_ltt=$(( old_lnd_to * (old_retry + 1) + 1 ))
+
+ # Set router's retry_count to zero to shorten/simplify message timeout.
+
do_node $router $LNETCTL set retry_count 0 ||
+ error "Failed to set retry_count"
+
+ local ltt=10
+
+ set_ltt_node $router $ltt $REMOTE_NET ||
error "Failed to set transaction_timeout"
#define CFS_FAIL_DELAY_MSG_FORWARD 0xe002
# We want to consume all peer credits for at least transaction_timeout
# seconds
- local delay=$((tto + 1))
+ local delay=$((ltt + 1))
local rnid lnid cmd
local args="-l $delay -r 1 -m GET"
((rcsum == 0)) || error "Detected ping failures"
- do_node $router $LNETCTL set transaction_timeout ${old_tto}
+ # Restore old retry_count and REMOTE_NET LTT values
do_node $router $LNETCTL set retry_count ${old_retry}
+ set_ltt_node $router ${old_ltt} $REMOTE_NET
# Router should not drop any of the messages that have exceeded their
# deadline
}
run_test 305 "Resolve hostname before lnetctl ping"
-check_parameter() {
- local para=$1
- local value=$2
-
- echo "check parameter ${para} value ${value}"
-
- return $(( $(do_lnetctl net show -v | \
- tee /dev/stderr | \
- grep -c "^ \+${para}: ${value}$") != ${#INTERFACES[@]} ))
-}
-
static_config() {
local module=$1
local setting=$2