Whamcloud - gitweb
LU-19098 hsm: don't print progname twice with lhsmtool
[fs/lustre-release.git] / lnet / klnds / socklnd / socklnd_modparams.c
index 2204280..2c95e30 100644 (file)
@@ -1,22 +1,13 @@
-/*
- * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
  *
  * Copyright (c) 2011, 2012, Intel Corporation.
+ */
+
+/* This file is part of Lustre, http://www.lustre.org/
  *
- *   Author: Eric Barton <eric@bartonsoftware.com>
- *
- *   Portals is free software; you can redistribute it and/or
- *   modify it under the terms of version 2 of the GNU General Public
- *   License as published by the Free Software Foundation.
- *
- *   Portals is distributed in the hope that it will be useful,
- *   but WITHOUT ANY WARRANTY; without even the implied warranty of
- *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- *   GNU General Public License for more details.
- *
- *   You should have received a copy of the GNU General Public License
- *   along with Portals; if not, write to the Free Software
- *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+ * Author: Eric Barton <eric@bartonsoftware.com>
  */
 
 #include "socklnd.h"
 #if defined(__x86_64__) || defined(__i386__)
 #include <asm/hypervisor.h>
 #endif
+#ifdef HAVE_ETHTOOL_LINK_SETTINGS
+#include <linux/inetdevice.h>
+#include <linux/ethtool.h>
+#include <net/addrconf.h>
+#endif
+
+#define CURRENT_LND_VERSION 1
 
-static int sock_timeout;
+static int sock_timeout = SOCKNAL_TIMEOUT_DEFAULT;
 module_param(sock_timeout, int, 0644);
 MODULE_PARM_DESC(sock_timeout, "dead socket timeout (seconds)");
 
@@ -142,6 +140,15 @@ static unsigned int zc_recv_min_nfrags = 16;
 module_param(zc_recv_min_nfrags, int, 0644);
 MODULE_PARM_DESC(zc_recv_min_nfrags, "minimum # of fragments to enable ZC recv");
 
+static unsigned int conns_per_peer = DEFAULT_CONNS_PER_PEER;
+module_param(conns_per_peer, uint, 0644);
+MODULE_PARM_DESC(conns_per_peer, "number of connections per peer");
+
+/* By default skip_mr_route_setup is 0 (do not skip) */
+static unsigned int skip_mr_route_setup;
+module_param(skip_mr_route_setup, uint, 0444);
+MODULE_PARM_DESC(skip_mr_route_setup, "skip automatic setup of linux routes for MR");
+
 #ifdef SOCKNAL_BACKOFF
 static int backoff_init = 3;
 module_param(backoff_init, int, 0644);
@@ -158,6 +165,22 @@ module_param(protocol, int, 0644);
 MODULE_PARM_DESC(protocol, "protocol version");
 #endif
 
+static int tos = -1;
+static int param_set_tos(const char *val, cfs_kernel_param_arg_t *kp);
+#ifdef HAVE_KERNEL_PARAM_OPS
+static const struct kernel_param_ops param_ops_tos = {
+       .set = param_set_tos,
+       .get = param_get_int,
+};
+
+#define param_check_tos(name, p) \
+       __param_check(name, p, int)
+module_param(tos, tos, 0444);
+#else
+module_param_call(tos, param_set_tos, param_get_int, &tos, 0444);
+#endif
+MODULE_PARM_DESC(tos, "Set the type of service (=-1 to disable)");
+
 static inline bool is_native_host(void)
 {
 #ifdef HAVE_HYPERVISOR_IS_TYPE
@@ -170,9 +193,143 @@ static inline bool is_native_host(void)
 }
 
 struct ksock_tunables ksocknal_tunables;
+struct lnet_ioctl_config_socklnd_tunables ksock_default_tunables;
+
+static int param_set_tos(const char *val, cfs_kernel_param_arg_t *kp)
+{
+       int rc, t;
+
+       if (!val)
+               return -EINVAL;
+
+       rc = kstrtoint(val, 0, &t);
+       if (rc)
+               return rc;
+
+       if (t < -1 || t > 0xff)
+               return -ERANGE;
+
+       *((int *)kp->arg) = t;
+
+       return 0;
+}
+
+#ifdef HAVE_ETHTOOL_LINK_SETTINGS
+static int ksocklnd_ni_get_eth_intf_speed(struct lnet_ni *ni)
+{
+       struct net_device *dev;
+       int intf_idx = -1;
+       int ret = -1;
+
+       DECLARE_CONST_IN_IFADDR(ifa);
+
+       /* check if ni has interface assigned */
+       if (!ni->ni_net_ns || !ni->ni_interface)
+               return 0;
+
+       rtnl_lock();
+       for_each_netdev(ni->ni_net_ns, dev) {
+               int flags = dev_get_flags(dev);
+               struct in_device *in_dev;
+
+               if (flags & IFF_LOOPBACK) /* skip the loopback IF */
+                       continue;
+
+               if (!(flags & IFF_UP))
+                       continue;
+
+               in_dev = __in_dev_get_rtnl(dev);
+               if (in_dev) {
+                       in_dev_for_each_ifa_rtnl(ifa, in_dev) {
+                               if (strcmp(ifa->ifa_label, ni->ni_interface) == 0)
+                                       intf_idx = dev->ifindex;
+                       }
+                       endfor_ifa(in_dev);
+               } else {
+#if IS_ENABLED(CONFIG_IPV6)
+                       struct inet6_dev *in6_dev = __in6_dev_get(dev);
+
+                       if (in6_dev) {
+                               const struct inet6_ifaddr *ifa6;
+
+                               list_for_each_entry_rcu(ifa6,
+                                                       &in6_dev->addr_list,
+                                                       if_list) {
+                                       if (ifa6->flags & IFA_F_TEMPORARY)
+                                               continue;
+
+                                       /* As different IPv6 addresses don't
+                                        * have unique labels, it is safest
+                                        * just to use the first and ignore
+                                        * the rest.
+                                        */
+                                       if (strcmp(dev->name,
+                                                  ni->ni_interface) == 0) {
+                                               intf_idx = dev->ifindex;
+                                               break;
+                                       }
+                               }
+                       } else {
+#endif
+                               continue;
+#if IS_ENABLED(CONFIG_IPV6)
+                       }
+#endif
+               }
+
+               if (intf_idx >= 0)
+                       break;
+       }
+       if (intf_idx >= 0) {
+               struct ethtool_link_ksettings cmd;
+               int ethtool_ret;
+
+               /* Some devices may not be providing link settings */
+               ethtool_ret = __ethtool_get_link_ksettings(dev, &cmd);
+               if (!ethtool_ret)
+                       ret = cmd.base.speed;
+               else
+                       ret = ethtool_ret;
+       }
+       rtnl_unlock();
+
+       return ret;
+}
+
+static int ksocklnd_speed2cpp(int speed)
+{
+       /* Use the minimum of 1Gbps to avoid calling ilog2 with 0 */
+       if (speed < 1000)
+               speed = 1000;
+
+       /* Pick heuristically optimal conns_per_peer value
+        * for the specified ethernet interface speed (Mbps)
+        */
+       return ilog2(speed/1000) / 2 + 1;
+}
+#endif
+
+static int ksocklnd_lookup_conns_per_peer(struct lnet_ni *ni)
+{
+       int cpp = 1;
+#ifdef HAVE_ETHTOOL_LINK_SETTINGS
+       int speed = ksocklnd_ni_get_eth_intf_speed(ni);
+
+       if (ni->ni_interface)
+               CDEBUG(D_NET, "intf %s speed %d\n", ni->ni_interface, speed);
+
+       if (speed > 0)
+               cpp = ksocklnd_speed2cpp(speed);
+#endif
+       return cpp;
+}
 
 int ksocknal_tunables_init(void)
 {
+       ksock_default_tunables.lnd_version = CURRENT_LND_VERSION;
+       ksock_default_tunables.lnd_conns_per_peer = conns_per_peer;
+       ksock_default_tunables.lnd_tos = tos;
+
        /* initialize ksocknal_tunables structure */
        ksocknal_tunables.ksnd_timeout            = &sock_timeout;
        ksocknal_tunables.ksnd_nscheds            = &nscheds;
@@ -201,6 +358,11 @@ int ksocknal_tunables_init(void)
        ksocknal_tunables.ksnd_zc_min_payload     = &zc_min_payload;
        ksocknal_tunables.ksnd_zc_recv            = &zc_recv;
        ksocknal_tunables.ksnd_zc_recv_min_nfrags = &zc_recv_min_nfrags;
+       if (conns_per_peer > ((1 << SOCKNAL_CONN_COUNT_MAX_BITS)-1)) {
+               CWARN("socklnd conns_per_peer is capped at %u.\n",
+                     (1 << SOCKNAL_CONN_COUNT_MAX_BITS)-1);
+       }
+       ksocknal_tunables.ksnd_conns_per_peer     = &conns_per_peer;
 
        if (enable_irq_affinity) {
                CWARN("irq_affinity is removed from socklnd because modern "
@@ -229,4 +391,52 @@ int ksocknal_tunables_init(void)
                *ksocknal_tunables.ksnd_zc_min_payload = (16 << 20) + 1;
 
        return 0;
-};
+}
+
+void ksocknal_tunables_setup(struct lnet_ni *ni)
+{
+       struct lnet_ioctl_config_socklnd_tunables *tunables;
+       struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables;
+
+       /* If no tunables specified, setup default tunables */
+       if (!ni->ni_lnd_tunables_set)
+               memcpy(&ni->ni_lnd_tunables.lnd_tun_u.lnd_sock,
+                      &ksock_default_tunables, sizeof(*tunables));
+
+       tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_sock;
+
+       /* Current API version */
+       tunables->lnd_version = CURRENT_LND_VERSION;
+
+       net_tunables = &ni->ni_net->net_tunables;
+
+       if (net_tunables->lct_peer_timeout == -1)
+               net_tunables->lct_peer_timeout =
+                       *ksocknal_tunables.ksnd_peertimeout;
+
+       if (net_tunables->lct_max_tx_credits == -1)
+               net_tunables->lct_max_tx_credits =
+                       *ksocknal_tunables.ksnd_credits;
+
+       if (net_tunables->lct_peer_tx_credits == -1)
+               net_tunables->lct_peer_tx_credits =
+                       *ksocknal_tunables.ksnd_peertxcredits;
+
+       if (net_tunables->lct_peer_tx_credits >
+           net_tunables->lct_max_tx_credits)
+               net_tunables->lct_peer_tx_credits =
+                       net_tunables->lct_max_tx_credits;
+
+       if (net_tunables->lct_peer_rtr_credits == -1)
+               net_tunables->lct_peer_rtr_credits =
+                       *ksocknal_tunables.ksnd_peerrtrcredits;
+
+       if (!tunables->lnd_conns_per_peer)
+               tunables->lnd_conns_per_peer =
+                       ksocklnd_lookup_conns_per_peer(ni);
+
+       if (tunables->lnd_tos < 0)
+               tunables->lnd_tos = tos;
+
+       tunables->lnd_timeout = ksocknal_timeout();
+}