LU-15186 o2iblnd: Default map_on_demand to 1

[fs/lustre-release.git] / lnet / klnds / o2iblnd / o2iblnd_modparams.c
diff --git a/lnet/klnds/o2iblnd/o2iblnd_modparams.c b/lnet/klnds/o2iblnd/o2iblnd_modparams.c

index 1b96ff3..95e7200 100644 (file)
--- a/lnet/klnds/o2iblnd/o2iblnd_modparams.c
+++ b/lnet/klnds/o2iblnd/o2iblnd_modparams.c
@@ -15,21 +15,18 @@
   *
   * You should have received a copy of the GNU General Public License
   * version 2 along with this program; If not, see
- * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
- *
- * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
- * CA 95054 USA or visit www.sun.com if you need additional information or
- * have any questions.
+ * http://www.gnu.org/licenses/gpl-2.0.html
   *
   * GPL HEADER END
   */
  /*
   * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
   * Use is subject to license terms.
+ *
+ * Copyright (c) 2012, 2017, Intel Corporation.
   */
  /*
   * This file is part of Lustre, http://www.lustre.org/
- * Lustre is a trademark of Sun Microsystems, Inc.
   *
   * lnet/klnds/o2iblnd/o2iblnd_modparams.c
   *
@@ -38,85 +35,132 @@
  
  #include "o2iblnd.h"
  
+#define CURRENT_LND_VERSION 1
+
  static int service = 987;
-CFS_MODULE_PARM(service, "i", int, 0444,
-                "service number (within RDMA_PS_TCP)");
+module_param(service, int, 0444);
+MODULE_PARM_DESC(service, "service number (within RDMA_PS_TCP)");
  
  static int cksum = 0;
-CFS_MODULE_PARM(cksum, "i", int, 0644,
-                "set non-zero to enable message (not RDMA) checksums");
+module_param(cksum, int, 0644);
+MODULE_PARM_DESC(cksum, "set non-zero to enable message (not RDMA) checksums");
+
+static int timeout;
+module_param(timeout, int, 0644);
+MODULE_PARM_DESC(timeout, "timeout (seconds)");
  
-static int timeout = 50;
-CFS_MODULE_PARM(timeout, "i", int, 0644,
-                "timeout (seconds)");
+/* Number of threads in each scheduler pool which is percpt,
+ * we will estimate reasonable value based on CPUs if it's set to zero. */
+static int nscheds;
+module_param(nscheds, int, 0444);
+MODULE_PARM_DESC(nscheds, "number of threads in each scheduler pool");
  
-static int ntx = 256;
-CFS_MODULE_PARM(ntx, "i", int, 0444,
-                "# of message descriptors");
+static unsigned int conns_per_peer = 1;
+module_param(conns_per_peer, uint, 0444);
+MODULE_PARM_DESC(conns_per_peer, "number of connections per peer");
  
-static int credits = 64;
-CFS_MODULE_PARM(credits, "i", int, 0444,
-                "# concurrent sends");
+/* NB: this value is shared by all CPTs, it can grow at runtime */
+static int ntx = 512;
+module_param(ntx, int, 0444);
+MODULE_PARM_DESC(ntx, "# of message descriptors allocated for each pool");
  
-static int peer_credits = 8;
-CFS_MODULE_PARM(peer_credits, "i", int, 0444,
-                "# concurrent sends to 1 peer");
+/* NB: this value is shared by all CPTs */
+static int credits = DEFAULT_CREDITS;
+module_param(credits, int, 0444);
+MODULE_PARM_DESC(credits, "# concurrent sends");
+
+static int peer_credits = DEFAULT_PEER_CREDITS;
+module_param(peer_credits, int, 0444);
+MODULE_PARM_DESC(peer_credits, "# concurrent sends to 1 peer");
  
  static int peer_credits_hiw = 0;
-CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
-                "when eagerly to return credits");
+module_param(peer_credits_hiw, int, 0444);
+MODULE_PARM_DESC(peer_credits_hiw, "when eagerly to return credits");
  
  static int peer_buffer_credits = 0;
-CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
-                "# per-peer router buffer credits");
+module_param(peer_buffer_credits, int, 0444);
+MODULE_PARM_DESC(peer_buffer_credits, "# per-peer router buffer credits");
  
-static int peer_timeout = 180;
-CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
-                "Seconds without aliveness news to declare peer dead (<=0 to disable)");
+static int peer_timeout = DEFAULT_PEER_TIMEOUT;
+module_param(peer_timeout, int, 0444);
+MODULE_PARM_DESC(peer_timeout, "Seconds without aliveness news to declare peer dead (<=0 to disable)");
  
  static char *ipif_name = "ib0";
-CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
-                "IPoIB interface name");
+module_param(ipif_name, charp, 0444);
+MODULE_PARM_DESC(ipif_name, "IPoIB interface name");
  
  static int retry_count = 5;
-CFS_MODULE_PARM(retry_count, "i", int, 0644,
-                "Retransmissions when no ACK received");
+module_param(retry_count, int, 0644);
+MODULE_PARM_DESC(retry_count, "Number of times to retry connection operations");
  
  static int rnr_retry_count = 6;
-CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
-                "RNR retransmissions");
+module_param(rnr_retry_count, int, 0644);
+MODULE_PARM_DESC(rnr_retry_count, "RNR retransmissions");
  
  static int keepalive = 100;
-CFS_MODULE_PARM(keepalive, "i", int, 0644,
-                "Idle time in seconds before sending a keepalive");
+module_param(keepalive, int, 0644);
+MODULE_PARM_DESC(keepalive, "Idle time in seconds before sending a keepalive");
+
+static int ib_mtu;
+module_param(ib_mtu, int, 0444);
+MODULE_PARM_DESC(ib_mtu, "IB MTU 256/512/1024/2048/4096");
  
-static int ib_mtu = 0;
-CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
-                "IB MTU 256/512/1024/2048/4096");
+static int concurrent_sends;
+module_param(concurrent_sends, int, 0444);
+MODULE_PARM_DESC(concurrent_sends, "send work-queue sizing");
  
-static int concurrent_sends = 0;
-CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
-                "send work-queue sizing");
+static int use_fastreg_gaps;
+module_param(use_fastreg_gaps, int, 0444);
+MODULE_PARM_DESC(use_fastreg_gaps, "Enable discontiguous fastreg fragment support. Expect performance drop");
  
-static int map_on_demand = 0;
-CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
-                "map on demand");
+/*
+ * map_on_demand is a flag used to determine if we can use FMR or FastReg.
+ * This is applicable for kernels which support global memory regions. For
+ * later kernels this flag is always enabled, since we will always either
+ * use FMR or FastReg
+ * For kernels which support global memory regions map_on_demand defaults
+ * to 0 which means we will be using global memory regions exclusively.
+ * If it is set to a value other than 0, then we will behave as follows:
+ *  1. Always default the number of fragments to IBLND_MAX_RDMA_FRAGS
+ *  2. Create FMR/FastReg pools
+ *  3. Negotiate the supported number of fragments per connection
+ *  4. Attempt to transmit using global memory regions only if
+ *     map-on-demand is not turned on, otherwise use FMR or FastReg
+ *  5. In case of transmitting tx with GAPS over FMR we will need to
+ *     transmit it with multiple fragments. Look at the comments in
+ *     kiblnd_fmr_map_tx() for an explanation of the behavior.
+ *
+ * For later kernels we default map_on_demand to 1 and not allow
+ * it to be set to 0, since there is no longer support for global memory
+ * regions. Behavior:
+ *  1. Default the number of fragments to IBLND_MAX_RDMA_FRAGS
+ *  2. Create FMR/FastReg pools
+ *  3. Negotiate the supported number of fragments per connection
+ *  4. Look at the comments in kiblnd_fmr_map_tx() for an explanation of
+ *     the behavior when transmit with GAPS verses contiguous.
+ */
+#ifdef HAVE_IB_GET_DMA_MR
+#define MOD_STR "map on demand"
+#else
+#define MOD_STR "map on demand (obsolete)"
+#endif
+static int map_on_demand = 1;
+module_param(map_on_demand, int, 0444);
+MODULE_PARM_DESC(map_on_demand, MOD_STR);
  
+/* NB: this value is shared by all CPTs, it can grow at runtime */
  static int fmr_pool_size = 512;
-CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
-                "size of the fmr pool (>= ntx / 4)");
+module_param(fmr_pool_size, int, 0444);
+MODULE_PARM_DESC(fmr_pool_size, "size of fmr pool on each CPT (>= ntx / 4)");
  
+/* NB: this value is shared by all CPTs, it can grow at runtime */
  static int fmr_flush_trigger = 384;
-CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
-                "# dirty FMRs that triggers pool flush");
+module_param(fmr_flush_trigger, int, 0444);
+MODULE_PARM_DESC(fmr_flush_trigger, "# dirty FMRs that triggers pool flush");
  
  static int fmr_cache = 1;
-CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
-                "non-zero to enable FMR caching");
-
-static int pmr_pool_size = 512;
-CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
-                "size of the MR cache pmr pool");
+module_param(fmr_cache, int, 0444);
+MODULE_PARM_DESC(fmr_cache, "non-zero to enable FMR caching");
  
  /*
   * 0: disable failover
@@ -124,382 +168,165 @@ CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
   * 2: force to failover (for debug)
   */
  static int dev_failover = 0;
-CFS_MODULE_PARM(dev_failover, "i", int, 0444,
-               "HCA failover for bonding (0 off, 1 on, other values reserved)");
-
+module_param(dev_failover, int, 0444);
+MODULE_PARM_DESC(dev_failover, "HCA failover for bonding (0 off, 1 on, other values reserved)");
  
-static int require_privileged_port = 0;
-CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
-                "require privileged port when accepting connection");
+static int require_privileged_port;
+module_param(require_privileged_port, int, 0644);
+MODULE_PARM_DESC(require_privileged_port, "require privileged port when accepting connection");
  
  static int use_privileged_port = 1;
-CFS_MODULE_PARM(use_privileged_port, "i", int, 0644,
-                "use privileged port when initiating connection");
+module_param(use_privileged_port, int, 0644);
+MODULE_PARM_DESC(use_privileged_port, "use privileged port when initiating connection");
+
+static unsigned int wrq_sge = 2;
+module_param(wrq_sge, uint, 0444);
+MODULE_PARM_DESC(wrq_sge, "# scatter/gather element per work request");
  
-kib_tunables_t kiblnd_tunables = {
+struct kib_tunables kiblnd_tunables = {
          .kib_dev_failover           = &dev_failover,
          .kib_service                = &service,
          .kib_cksum                  = &cksum,
          .kib_timeout                = &timeout,
          .kib_keepalive              = &keepalive,
-        .kib_ntx                    = &ntx,
-        .kib_credits                = &credits,
-        .kib_peertxcredits          = &peer_credits,
-        .kib_peercredits_hiw        = &peer_credits_hiw,
-        .kib_peerrtrcredits         = &peer_buffer_credits,
-        .kib_peertimeout            = &peer_timeout,
          .kib_default_ipif           = &ipif_name,
          .kib_retry_count            = &retry_count,
          .kib_rnr_retry_count        = &rnr_retry_count,
-        .kib_concurrent_sends       = &concurrent_sends,
          .kib_ib_mtu                 = &ib_mtu,
-        .kib_map_on_demand          = &map_on_demand,
-        .kib_fmr_pool_size          = &fmr_pool_size,
-        .kib_fmr_flush_trigger      = &fmr_flush_trigger,
-        .kib_fmr_cache              = &fmr_cache,
-        .kib_pmr_pool_size          = &pmr_pool_size,
          .kib_require_priv_port      = &require_privileged_port,
-        .kib_use_priv_port          = &use_privileged_port
+       .kib_use_priv_port          = &use_privileged_port,
+       .kib_nscheds                = &nscheds,
+       .kib_wrq_sge                = &wrq_sge,
+       .kib_use_fastreg_gaps       = &use_fastreg_gaps,
  };
  
-#if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
-
-static char ipif_basename_space[32];
-
-#ifndef HAVE_SYSCTL_UNNUMBERED
-
-enum {
-        O2IBLND_SERVICE  = 1,
-        O2IBLND_CKSUM,
-        O2IBLND_TIMEOUT,
-        O2IBLND_NTX,
-        O2IBLND_CREDITS,
-        O2IBLND_PEER_TXCREDITS,
-        O2IBLND_PEER_CREDITS_HIW,
-        O2IBLND_PEER_RTRCREDITS,
-        O2IBLND_PEER_TIMEOUT,
-        O2IBLND_IPIF_BASENAME,
-        O2IBLND_RETRY_COUNT,
-        O2IBLND_RNR_RETRY_COUNT,
-        O2IBLND_KEEPALIVE,
-        O2IBLND_CONCURRENT_SENDS,
-        O2IBLND_IB_MTU,
-        O2IBLND_MAP_ON_DEMAND,
-        O2IBLND_FMR_POOL_SIZE,
-        O2IBLND_FMR_FLUSH_TRIGGER,
-        O2IBLND_FMR_CACHE,
-        O2IBLND_PMR_POOL_SIZE,
-        O2IBLND_DEV_FAILOVER
-};
-#else
-
-#define O2IBLND_SERVICE          CTL_UNNUMBERED
-#define O2IBLND_CKSUM            CTL_UNNUMBERED
-#define O2IBLND_TIMEOUT          CTL_UNNUMBERED
-#define O2IBLND_NTX              CTL_UNNUMBERED
-#define O2IBLND_CREDITS          CTL_UNNUMBERED
-#define O2IBLND_PEER_TXCREDITS   CTL_UNNUMBERED
-#define O2IBLND_PEER_CREDITS_HIW CTL_UNNUMBERED
-#define O2IBLND_PEER_RTRCREDITS  CTL_UNNUMBERED
-#define O2IBLND_PEER_TIMEOUT     CTL_UNNUMBERED
-#define O2IBLND_IPIF_BASENAME    CTL_UNNUMBERED
-#define O2IBLND_RETRY_COUNT      CTL_UNNUMBERED
-#define O2IBLND_RNR_RETRY_COUNT  CTL_UNNUMBERED
-#define O2IBLND_KEEPALIVE        CTL_UNNUMBERED
-#define O2IBLND_CONCURRENT_SENDS CTL_UNNUMBERED
-#define O2IBLND_IB_MTU           CTL_UNNUMBERED
-#define O2IBLND_MAP_ON_DEMAND    CTL_UNNUMBERED
-#define O2IBLND_FMR_POOL_SIZE    CTL_UNNUMBERED
-#define O2IBLND_FMR_FLUSH_TRIGGER CTL_UNNUMBERED
-#define O2IBLND_FMR_CACHE        CTL_UNNUMBERED
-#define O2IBLND_PMR_POOL_SIZE    CTL_UNNUMBERED
-#define O2IBLND_DEV_FAILOVER     CTL_UNNUMBERED
-
-#endif
-
-static cfs_sysctl_table_t kiblnd_ctl_table[] = {
-        {
-                .ctl_name = O2IBLND_SERVICE,
-                .procname = "service",
-                .data     = &service,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_CKSUM,
-                .procname = "cksum",
-                .data     = &cksum,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_TIMEOUT,
-                .procname = "timeout",
-                .data     = &timeout,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_NTX,
-                .procname = "ntx",
-                .data     = &ntx,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_CREDITS,
-                .procname = "credits",
-                .data     = &credits,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_PEER_TXCREDITS,
-                .procname = "peer_credits",
-                .data     = &peer_credits,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_PEER_CREDITS_HIW,
-                .procname = "peer_credits_hiw",
-                .data     = &peer_credits_hiw,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_PEER_RTRCREDITS,
-                .procname = "peer_buffer_credits",
-                .data     = &peer_buffer_credits,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_PEER_TIMEOUT,
-                .procname = "peer_timeout",
-                .data     = &peer_timeout,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_IPIF_BASENAME,
-                .procname = "ipif_name",
-                .data     = ipif_basename_space,
-                .maxlen   = sizeof(ipif_basename_space),
-                .mode     = 0444,
-                .proc_handler = &proc_dostring
-        },
-        {
-                .ctl_name = O2IBLND_RETRY_COUNT,
-                .procname = "retry_count",
-                .data     = &retry_count,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_RNR_RETRY_COUNT,
-                .procname = "rnr_retry_count",
-                .data     = &rnr_retry_count,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_KEEPALIVE,
-                .procname = "keepalive",
-                .data     = &keepalive,
-                .maxlen   = sizeof(int),
-                .mode     = 0644,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_CONCURRENT_SENDS,
-                .procname = "concurrent_sends",
-                .data     = &concurrent_sends,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_IB_MTU,
-                .procname = "ib_mtu",
-                .data     = &ib_mtu,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_MAP_ON_DEMAND,
-                .procname = "map_on_demand",
-                .data     = &map_on_demand,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-
-        {
-                .ctl_name = O2IBLND_FMR_POOL_SIZE,
-                .procname = "fmr_pool_size",
-                .data     = &fmr_pool_size,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
-                .procname = "fmr_flush_trigger",
-                .data     = &fmr_flush_trigger,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_FMR_CACHE,
-                .procname = "fmr_cache",
-                .data     = &fmr_cache,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_PMR_POOL_SIZE,
-                .procname = "pmr_pool_size",
-                .data     = &pmr_pool_size,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {
-                .ctl_name = O2IBLND_DEV_FAILOVER,
-                .procname = "dev_failover",
-                .data     = &dev_failover,
-                .maxlen   = sizeof(int),
-                .mode     = 0444,
-                .proc_handler = &proc_dointvec
-        },
-        {0}
-};
-
-static cfs_sysctl_table_t kiblnd_top_ctl_table[] = {
-        {
-                .ctl_name = CTL_O2IBLND,
-                .procname = "o2iblnd",
-                .data     = NULL,
-                .maxlen   = 0,
-                .mode     = 0555,
-                .child    = kiblnd_ctl_table
-        },
-        {0}
-};
-
-void
-kiblnd_initstrtunable(char *space, char *str, int size)
-{
-        strncpy(space, str, size);
-        space[size-1] = 0;
-}
-
-void
-kiblnd_sysctl_init (void)
-{
-        kiblnd_initstrtunable(ipif_basename_space, ipif_name,
-                              sizeof(ipif_basename_space));
+static struct lnet_ioctl_config_o2iblnd_tunables default_tunables;
  
-        kiblnd_tunables.kib_sysctl =
-                cfs_register_sysctl_table(kiblnd_top_ctl_table, 0);
-
-        if (kiblnd_tunables.kib_sysctl == NULL)
-                CWARN("Can't setup /proc tunables\n");
-}
-
-void
-kiblnd_sysctl_fini (void)
-{
-        if (kiblnd_tunables.kib_sysctl != NULL)
-                cfs_unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
-}
-
-#else
-
-void
-kiblnd_sysctl_init (void)
-{
-}
-
-void
-kiblnd_sysctl_fini (void)
+/* # messages/RDMAs in-flight */
+int
+kiblnd_msg_queue_size(int version, struct lnet_ni *ni)
  {
+       if (version == IBLND_MSG_VERSION_1)
+               return IBLND_MSG_QUEUE_SIZE_V1;
+       else if (ni)
+               return ni->ni_net->net_tunables.lct_peer_tx_credits;
+       else
+               return peer_credits;
  }
  
-#endif
-
  int
-kiblnd_tunables_init (void)
+kiblnd_tunables_setup(struct lnet_ni *ni)
  {
-        if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
-                CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
-                       *kiblnd_tunables.kib_ib_mtu);
-                return -EINVAL;
-        }
-
-        if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
-                *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
-
-        if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
-                *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
-
-        if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
-                *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
-
-        if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
-                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
-
-        if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
-                *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
-
-        if (*kiblnd_tunables.kib_map_on_demand < 0 ||
-            *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
-                *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
-
-        if (*kiblnd_tunables.kib_map_on_demand == 1)
-                *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
-
-        if (*kiblnd_tunables.kib_concurrent_sends == 0) {
-                if (*kiblnd_tunables.kib_map_on_demand > 0 &&
-                    *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
-                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
-                else
-                        *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
-        }
-
-        if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
-                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
-
-        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
-                *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
-
-        if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
-                CWARN("Concurrent sends %d is lower than message queue size: %d, "
-                      "performance may drop slightly.\n",
-                      *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
-        }
+       struct lnet_ioctl_config_o2iblnd_tunables *tunables;
+       struct lnet_ioctl_config_lnd_cmn_tunables *net_tunables;
+
+       /*
+        * if there was no tunables specified, setup the tunables to be
+        * defaulted
+        */
+       if (!ni->ni_lnd_tunables_set)
+               memcpy(&ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib,
+                      &default_tunables, sizeof(*tunables));
+
+       tunables = &ni->ni_lnd_tunables.lnd_tun_u.lnd_o2ib;
+
+       /* Current API version */
+       tunables->lnd_version = CURRENT_LND_VERSION;
+
+       if (*kiblnd_tunables.kib_ib_mtu &&
+           ib_mtu_enum_to_int(ib_mtu_int_to_enum(*kiblnd_tunables.kib_ib_mtu)) !=
+           *kiblnd_tunables.kib_ib_mtu) {
+               CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
+                      *kiblnd_tunables.kib_ib_mtu);
+               return -EINVAL;
+       }
+
+       net_tunables = &ni->ni_net->net_tunables;
+
+       if (net_tunables->lct_peer_timeout == -1)
+               net_tunables->lct_peer_timeout = peer_timeout;
+
+       if (net_tunables->lct_max_tx_credits == -1)
+               net_tunables->lct_max_tx_credits = credits;
+
+       if (net_tunables->lct_peer_tx_credits == -1)
+               net_tunables->lct_peer_tx_credits = peer_credits;
+
+       if (net_tunables->lct_peer_rtr_credits == -1)
+               net_tunables->lct_peer_rtr_credits = peer_buffer_credits;
+
+       if (net_tunables->lct_peer_tx_credits < IBLND_CREDITS_DEFAULT)
+               net_tunables->lct_peer_tx_credits = IBLND_CREDITS_DEFAULT;
+
+       if (net_tunables->lct_peer_tx_credits > IBLND_CREDITS_MAX)
+               net_tunables->lct_peer_tx_credits = IBLND_CREDITS_MAX;
+
+       if (net_tunables->lct_peer_tx_credits >
+           net_tunables->lct_max_tx_credits)
+               net_tunables->lct_peer_tx_credits =
+                       net_tunables->lct_max_tx_credits;
+
+#ifndef HAVE_IB_GET_DMA_MR
+       /*
+        * For kernels which do not support global memory regions, always
+        * enable map_on_demand
+        */
+       if (tunables->lnd_map_on_demand == 0)
+               tunables->lnd_map_on_demand = 1;
+#endif
  
-        kiblnd_sysctl_init();
-        return 0;
+       if (!tunables->lnd_peercredits_hiw)
+               tunables->lnd_peercredits_hiw = peer_credits_hiw;
+
+       if (tunables->lnd_peercredits_hiw < net_tunables->lct_peer_tx_credits / 2)
+               tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits / 2;
+
+       if (tunables->lnd_peercredits_hiw >= net_tunables->lct_peer_tx_credits)
+               tunables->lnd_peercredits_hiw = net_tunables->lct_peer_tx_credits - 1;
+
+       if (tunables->lnd_concurrent_sends == 0)
+                       tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits;
+
+       if (tunables->lnd_concurrent_sends > net_tunables->lct_peer_tx_credits * 2)
+               tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits * 2;
+
+       if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits / 2)
+               tunables->lnd_concurrent_sends = net_tunables->lct_peer_tx_credits / 2;
+
+       if (tunables->lnd_concurrent_sends < net_tunables->lct_peer_tx_credits) {
+               CWARN("Concurrent sends %d is lower than message "
+                     "queue size: %d, performance may drop slightly.\n",
+                     tunables->lnd_concurrent_sends,
+                     net_tunables->lct_peer_tx_credits);
+       }
+
+       if (!tunables->lnd_fmr_pool_size)
+               tunables->lnd_fmr_pool_size = fmr_pool_size;
+       if (!tunables->lnd_fmr_flush_trigger)
+               tunables->lnd_fmr_flush_trigger = fmr_flush_trigger;
+       if (!tunables->lnd_fmr_cache)
+               tunables->lnd_fmr_cache = fmr_cache;
+       if (!tunables->lnd_ntx)
+               tunables->lnd_ntx = ntx;
+       if (!tunables->lnd_conns_per_peer) {
+               tunables->lnd_conns_per_peer = (conns_per_peer) ?
+                       conns_per_peer : 1;
+       }
+
+       return 0;
  }
  
-void
-kiblnd_tunables_fini (void)
+int
+kiblnd_tunables_init(void)
  {
-        kiblnd_sysctl_fini();
+       default_tunables.lnd_version = CURRENT_LND_VERSION;
+       default_tunables.lnd_peercredits_hiw = peer_credits_hiw;
+       default_tunables.lnd_map_on_demand = map_on_demand;
+       default_tunables.lnd_concurrent_sends = concurrent_sends;
+       default_tunables.lnd_fmr_pool_size = fmr_pool_size;
+       default_tunables.lnd_fmr_flush_trigger = fmr_flush_trigger;
+       default_tunables.lnd_fmr_cache = fmr_cache;
+       default_tunables.lnd_ntx = ntx;
+       default_tunables.lnd_conns_per_peer = conns_per_peer;
+       return 0;
  }