X-Git-Url: https://git.whamcloud.com/?p=fs%2Flustre-release.git;a=blobdiff_plain;f=lnet%2Fklnds%2Fsocklnd%2Fsocklnd_modparams.c;h=22042806961e29d0b7d7ad78dcabdde10b3812d2;hp=aab22f05a2d1c8ee2a92d00a39706c4a18f7bd1f;hb=HEAD;hpb=d96a9248708d4da02728c9976a9a90ba29bd2bc0 diff --git a/lnet/klnds/socklnd/socklnd_modparams.c b/lnet/klnds/socklnd/socklnd_modparams.c index aab22f0..6cb4eab 100644 --- a/lnet/klnds/socklnd/socklnd_modparams.c +++ b/lnet/klnds/socklnd/socklnd_modparams.c @@ -21,15 +21,26 @@ #include "socklnd.h" -static int sock_timeout = 50; +#include +#if defined(__x86_64__) || defined(__i386__) +#include +#endif +#ifdef HAVE_ETHTOOL_LINK_SETTINGS +#include +#include +#endif + +#define CURRENT_LND_VERSION 1 + +static int sock_timeout; module_param(sock_timeout, int, 0644); MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)"); -static int credits = 256; +static int credits = DEFAULT_CREDITS; module_param(credits, int, 0444); MODULE_PARM_DESC(credits, "# concurrent sends"); -static int peer_credits = 8; +static int peer_credits = DEFAULT_PEER_CREDITS; module_param(peer_credits, int, 0444); MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer"); @@ -37,7 +48,7 @@ static int peer_buffer_credits; module_param(peer_buffer_credits, int, 0444); MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits"); -static int peer_timeout = 180; +static int peer_timeout = DEFAULT_PEER_TIMEOUT; module_param(peer_timeout, int, 0444); MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)"); @@ -117,11 +128,9 @@ static int inject_csum_error = 0; module_param(inject_csum_error, int, 0644); MODULE_PARM_DESC(inject_csum_error, "set non-zero to inject a checksum error"); -#ifdef CPU_AFFINITY static int enable_irq_affinity = 0; module_param(enable_irq_affinity, int, 0644); MODULE_PARM_DESC(enable_irq_affinity, "enable IRQ affinity"); -#endif static int nonblk_zcack = 1; module_param(nonblk_zcack, int, 0644); @@ -139,6 +148,15 @@ static unsigned int zc_recv_min_nfrags = 16; module_param(zc_recv_min_nfrags, int, 0644); MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv"); +static unsigned int conns_per_peer = DEFAULT_CONNS_PER_PEER; +module_param(conns_per_peer, uint, 0644); +MODULE_PARM_DESC(conns_per_peer, "number of connections per peer"); + +/* By default skip_mr_route_setup is 0 (do not skip) */ +static unsigned int skip_mr_route_setup; +module_param(skip_mr_route_setup, uint, 0444); +MODULE_PARM_DESC(skip_mr_route_setup, "skip automatic setup of linux routes for MR"); + #ifdef SOCKNAL_BACKOFF static int backoff_init = 3; module_param(backoff_init, int, 0644); @@ -155,71 +173,249 @@ module_param(protocol, int, 0644); MODULE_PARM_DESC(protocol, "protocol version"); #endif -ksock_tunables_t ksocknal_tunables; +static int tos = -1; +static int param_set_tos(const char *val, cfs_kernel_param_arg_t *kp); +#ifdef HAVE_KERNEL_PARAM_OPS +static const struct kernel_param_ops param_ops_tos = { + .set = param_set_tos, + .get = param_get_int, +}; + +#define param_check_tos(name, p) \ + __param_check(name, p, int) +module_param(tos, tos, 0444); +#else +module_param_call(tos, param_set_tos, param_get_int, &tos, 0444); +#endif +MODULE_PARM_DESC(tos, "Set the type of service (=-1 to disable)"); + +static inline bool is_native_host(void) +{ +#ifdef HAVE_HYPERVISOR_IS_TYPE + return hypervisor_is_type(X86_HYPER_NATIVE); +#elif defined(__x86_64__) || defined(__i386__) + return x86_hyper == NULL; +#else + return true; +#endif +} + +struct ksock_tunables ksocknal_tunables; +struct lnet_ioctl_config_socklnd_tunables ksock_default_tunables; + +static int param_set_tos(const char *val, cfs_kernel_param_arg_t *kp) +{ + int rc, t; + + if (!val) + return -EINVAL; + + rc = kstrtoint(val, 0, &t); + if (rc) + return rc; + + if (t < -1 || t > 0xff) + return -ERANGE; + + *((int *)kp->arg) = t; + + return 0; +} + +#ifdef HAVE_ETHTOOL_LINK_SETTINGS +static int ksocklnd_ni_get_eth_intf_speed(struct lnet_ni *ni) +{ + struct net_device *dev; + int intf_idx = -1; + int ret = -1; + + DECLARE_CONST_IN_IFADDR(ifa); + + /* check if ni has interface assigned */ + if (!ni->ni_net_ns || !ni->ni_interface) + return 0; + + rtnl_lock(); + for_each_netdev(ni->ni_net_ns, dev) { + int flags = dev_get_flags(dev); + struct in_device *in_dev; + + if (flags & IFF_LOOPBACK) /* skip the loopback IF */ + continue; + + if (!(flags & IFF_UP)) + continue; + + in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + continue; + + in_dev_for_each_ifa_rtnl(ifa, in_dev) { + if (strcmp(ifa->ifa_label, ni->ni_interface) == 0) + intf_idx = dev->ifindex; + } + endfor_ifa(in_dev); + + if (intf_idx >= 0) + break; + } + if (intf_idx >= 0) { + struct ethtool_link_ksettings cmd; + int ethtool_ret; + + /* Some devices may not be providing link settings */ + ethtool_ret = __ethtool_get_link_ksettings(dev, &cmd); + if (!ethtool_ret) + ret = cmd.base.speed; + else + ret = ethtool_ret; + } + rtnl_unlock(); + + return ret; +} + +static int ksocklnd_speed2cpp(int speed) +{ + /* Use the minimum of 1Gbps to avoid calling ilog2 with 0 */ + if (speed < 1000) + speed = 1000; + + /* Pick heuristically optimal conns_per_peer value + * for the specified ethernet interface speed (Mbps) + */ + return ilog2(speed/1000) / 2 + 1; +} +#endif + +static int ksocklnd_lookup_conns_per_peer(struct lnet_ni *ni) +{ + int cpp = 1; +#ifdef HAVE_ETHTOOL_LINK_SETTINGS + int speed = ksocklnd_ni_get_eth_intf_speed(ni); + + if (ni->ni_interface) + CDEBUG(D_NET, "intf %s speed %d\n", ni->ni_interface, speed); + + if (speed > 0) + cpp = ksocklnd_speed2cpp(speed); +#endif + return cpp; +} int ksocknal_tunables_init(void) { + ksock_default_tunables.lnd_version = CURRENT_LND_VERSION; + ksock_default_tunables.lnd_conns_per_peer = conns_per_peer; + ksock_default_tunables.lnd_tos = tos; - /* initialize ksocknal_tunables structure */ - ksocknal_tunables.ksnd_timeout = &sock_timeout; + /* initialize ksocknal_tunables structure */ + ksocknal_tunables.ksnd_timeout = &sock_timeout; ksocknal_tunables.ksnd_nscheds = &nscheds; - ksocknal_tunables.ksnd_nconnds = &nconnds; - ksocknal_tunables.ksnd_nconnds_max = &nconnds_max; - ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms; - ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms; - ksocknal_tunables.ksnd_eager_ack = &eager_ack; - ksocknal_tunables.ksnd_typed_conns = &typed_conns; - ksocknal_tunables.ksnd_min_bulk = &min_bulk; - ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size; - ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size; - ksocknal_tunables.ksnd_nagle = &nagle; - ksocknal_tunables.ksnd_round_robin = &round_robin; - ksocknal_tunables.ksnd_keepalive = &keepalive; - ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle; - ksocknal_tunables.ksnd_keepalive_count = &keepalive_count; - ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl; - ksocknal_tunables.ksnd_credits = &credits; - ksocknal_tunables.ksnd_peertxcredits = &peer_credits; - ksocknal_tunables.ksnd_peerrtrcredits = &peer_buffer_credits; - ksocknal_tunables.ksnd_peertimeout = &peer_timeout; - ksocknal_tunables.ksnd_enable_csum = &enable_csum; - ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error; - ksocknal_tunables.ksnd_nonblk_zcack = &nonblk_zcack; - ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload; - ksocknal_tunables.ksnd_zc_recv = &zc_recv; - ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags; - -#ifdef CPU_AFFINITY + ksocknal_tunables.ksnd_nconnds = &nconnds; + ksocknal_tunables.ksnd_nconnds_max = &nconnds_max; + ksocknal_tunables.ksnd_min_reconnectms = &min_reconnectms; + ksocknal_tunables.ksnd_max_reconnectms = &max_reconnectms; + ksocknal_tunables.ksnd_eager_ack = &eager_ack; + ksocknal_tunables.ksnd_typed_conns = &typed_conns; + ksocknal_tunables.ksnd_min_bulk = &min_bulk; + ksocknal_tunables.ksnd_tx_buffer_size = &tx_buffer_size; + ksocknal_tunables.ksnd_rx_buffer_size = &rx_buffer_size; + ksocknal_tunables.ksnd_nagle = &nagle; + ksocknal_tunables.ksnd_round_robin = &round_robin; + ksocknal_tunables.ksnd_keepalive = &keepalive; + ksocknal_tunables.ksnd_keepalive_idle = &keepalive_idle; + ksocknal_tunables.ksnd_keepalive_count = &keepalive_count; + ksocknal_tunables.ksnd_keepalive_intvl = &keepalive_intvl; + ksocknal_tunables.ksnd_credits = &credits; + ksocknal_tunables.ksnd_peertxcredits = &peer_credits; + ksocknal_tunables.ksnd_peerrtrcredits = &peer_buffer_credits; + ksocknal_tunables.ksnd_peertimeout = &peer_timeout; + ksocknal_tunables.ksnd_enable_csum = &enable_csum; + ksocknal_tunables.ksnd_inject_csum_error = &inject_csum_error; + ksocknal_tunables.ksnd_nonblk_zcack = &nonblk_zcack; + ksocknal_tunables.ksnd_zc_min_payload = &zc_min_payload; + ksocknal_tunables.ksnd_zc_recv = &zc_recv; + ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags; + if (conns_per_peer > ((1 << SOCKNAL_CONN_COUNT_MAX_BITS)-1)) { + CWARN("socklnd conns_per_peer is capped at %u.\n", + (1 << SOCKNAL_CONN_COUNT_MAX_BITS)-1); + } + ksocknal_tunables.ksnd_conns_per_peer = &conns_per_peer; + if (enable_irq_affinity) { CWARN("irq_affinity is removed from socklnd because modern " "computer always has fast CPUs and more cores than " "# NICs, although you still can set irq_affinity by " "another way, please check manual for details.\n"); } - ksocknal_tunables.ksnd_irq_affinity = &enable_irq_affinity; -#endif + ksocknal_tunables.ksnd_irq_affinity = &enable_irq_affinity; #ifdef SOCKNAL_BACKOFF - ksocknal_tunables.ksnd_backoff_init = &backoff_init; - ksocknal_tunables.ksnd_backoff_max = &backoff_max; + ksocknal_tunables.ksnd_backoff_init = &backoff_init; + ksocknal_tunables.ksnd_backoff_max = &backoff_max; #endif #if SOCKNAL_VERSION_DEBUG - ksocknal_tunables.ksnd_protocol = &protocol; + ksocknal_tunables.ksnd_protocol = &protocol; #endif -#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM - ksocknal_tunables.ksnd_sysctl = NULL; -#endif + if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10)) + *ksocknal_tunables.ksnd_zc_min_payload = (2 << 10); - if (*ksocknal_tunables.ksnd_zc_min_payload < (2 << 10)) - *ksocknal_tunables.ksnd_zc_min_payload = (2 << 10); + /* When on a hypervisor set the minimum zero copy size + * above the maximum payload size + */ + if (!is_native_host()) + *ksocknal_tunables.ksnd_zc_min_payload = (16 << 20) + 1; - /* initialize platform-sepcific tunables */ - return ksocknal_lib_tunables_init(); -}; + return 0; +} -void ksocknal_tunables_fini(void) +void ksocknal_tunables_setup(struct lnet_ni *ni) { - ksocknal_lib_tunables_fini(); + struct lnet_ioctl_config_socklnd_tunables *tunables; + struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables; + + /* If no tunables specified, setup default tunables */ + if (!ni->ni_lnd_tunables_set) + memcpy(&ni->ni_lnd_tunables.lnd_tun_u.lnd_sock, + &ksock_default_tunables, sizeof(*tunables)); + + tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_sock; + + /* Current API version */ + tunables->lnd_version = CURRENT_LND_VERSION; + + net_tunables = &ni->ni_net->net_tunables; + + if (net_tunables->lct_peer_timeout == -1) + net_tunables->lct_peer_timeout = + *ksocknal_tunables.ksnd_peertimeout; + + if (net_tunables->lct_max_tx_credits == -1) + net_tunables->lct_max_tx_credits = + *ksocknal_tunables.ksnd_credits; + + if (net_tunables->lct_peer_tx_credits == -1) + net_tunables->lct_peer_tx_credits = + *ksocknal_tunables.ksnd_peertxcredits; + + if (net_tunables->lct_peer_tx_credits > + net_tunables->lct_max_tx_credits) + net_tunables->lct_peer_tx_credits = + net_tunables->lct_max_tx_credits; + + if (net_tunables->lct_peer_rtr_credits == -1) + net_tunables->lct_peer_rtr_credits = + *ksocknal_tunables.ksnd_peerrtrcredits; + + if (!tunables->lnd_conns_per_peer) + tunables->lnd_conns_per_peer = + ksocklnd_lookup_conns_per_peer(ni); + + if (tunables->lnd_tos < 0) + tunables->lnd_tos = tos; + + tunables->lnd_timeout = ksocknal_timeout(); }