lnet/klnds/gnilnd/gnilnd_modparams.c

   1 /*
   2  * Copyright (C) 2004 Cluster File Systems, Inc.
   3  *
   4  * Copyright (C) 2009-2012 Cray, Inc.
   5  *
   6  *   Derived from work by: Eric Barton <eric@bartonsoftware.com>
   7  *   Author: Nic Henke <nic@cray.com>
   8  *
   9  *   This file is part of Lustre, http://www.lustre.org.
  10  *
  11  *   Lustre is free software; you can redistribute it and/or
  12  *   modify it under the terms of version 2 of the GNU General Public
  13  *   License as published by the Free Software Foundation.
  14  *
  15  *   Lustre is distributed in the hope that it will be useful,
  16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
  17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18  *   GNU General Public License for more details.
  19  *
  20  *   You should have received a copy of the GNU General Public License
  21  *   along with Lustre; if not, write to the Free Software
  22  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  23  *
  24  */
  25
  26 #include "gnilnd.h"
  27
  28 static int credits = GNILND_DEFAULT_CREDITS;
  29 module_param(credits, int, 0444);
  30 MODULE_PARM_DESC(credits, "# concurrent sends");
  31
  32 static int eager_credits = 256 * 1024;
  33 module_param(eager_credits, int, 0644);
  34 MODULE_PARM_DESC(eager_credits, "# eager buffers");
  35
  36 static int peer_credits = 16;
  37 module_param(peer_credits, int, 0444);
  38 MODULE_PARM_DESC(peer_credits, "# LNet peer credits");
  39
  40 /* NB - we'll not actually limit sends to this, we just size the mailbox buffer
  41  * such that at most we'll have concurrent_sends * max_immediate messages
  42  * in the mailbox */
  43 static int concurrent_sends = 0;
  44 module_param(concurrent_sends, int, 0444);
  45 MODULE_PARM_DESC(concurrent_sends, "# concurrent HW sends to 1 peer");
  46
  47 /* default for 2k nodes @ 16 peer credits */
  48 static int fma_cq_size = 32768;
  49 module_param(fma_cq_size, int, 0444);
  50 MODULE_PARM_DESC(fma_cq_size, "size of the completion queue");
  51
  52 static int timeout = GNILND_BASE_TIMEOUT;
  53 /* can't change @ runtime because LNet gets NI data at startup from
  54  * this value */
  55 module_param(timeout, int, 0444);
  56 MODULE_PARM_DESC(timeout, "communications timeout (seconds)");
  57
  58 /* time to wait between datagram timeout and sending of next dgram */
  59 static int min_reconnect_interval = GNILND_MIN_RECONNECT_TO;
  60 module_param(min_reconnect_interval, int, 0644);
  61 MODULE_PARM_DESC(min_reconnect_interval, "minimum connection retry interval (seconds)");
  62
  63 /* if this goes longer than timeout, we'll timeout the TX before
  64  * the dgram */
  65 static int max_reconnect_interval = GNILND_MAX_RECONNECT_TO;
  66 module_param(max_reconnect_interval, int, 0644);
  67 MODULE_PARM_DESC(max_reconnect_interval, "maximum connection retry interval (seconds)");
  68
  69 static int max_immediate = 2048;
  70 module_param(max_immediate, int, 0444);
  71 MODULE_PARM_DESC(max_immediate, "immediate/RDMA breakpoint");
  72
  73 static int checksum = GNILND_CHECKSUM_DEFAULT;
  74 module_param(checksum, int, 0644);
  75 MODULE_PARM_DESC(checksum, "0: None, 1: headers, 2: short msg, 3: all traffic");
  76
  77 static int checksum_dump = 0;
  78 module_param(checksum_dump, int, 0644);
  79 MODULE_PARM_DESC(checksum_dump, "0: None, 1: dump log on failure, 2: payload data to D_INFO log");
  80
  81 static int bte_put_dlvr_mode = GNILND_RDMA_DLVR_OPTION;
  82 module_param(bte_put_dlvr_mode, int, 0644);
  83 MODULE_PARM_DESC(bte_put_dlvr_mode, "Modify BTE Put Routing Option");
  84
  85 static int bte_get_dlvr_mode = GNILND_RDMA_DLVR_OPTION;
  86 module_param(bte_get_dlvr_mode, int, 0644);
  87 MODULE_PARM_DESC(bte_get_dlvr_mode, "Modify BTE Get Routing Option");
  88
  89 static int bte_relaxed_ordering = 1;
  90 module_param(bte_relaxed_ordering, int, 0644);
  91 MODULE_PARM_DESC(bte_relaxed_ordering, "enable relaxed ordering (PASSPW) for BTE (RDMA) transfers");
  92
  93 #ifdef CONFIG_MK1OM
  94 static int ptag = GNI_PTAG_LND_KNC;
  95 #else
  96 static int ptag = GNI_PTAG_LND;
  97 #endif
  98 module_param(ptag, int, 0444);
  99 MODULE_PARM_DESC(ptag, "ptag for Gemini CDM");
 100
 101 static int pkey = GNI_JOB_CREATE_COOKIE(GNI_PKEY_LND, 0);
 102 module_param(pkey, int, 0444);
 103 MODULE_PARM_DESC(pkey, "pkey for CDM");
 104
 105 static int max_retransmits = 128;
 106 module_param(max_retransmits, int, 0444);
 107 MODULE_PARM_DESC(max_retransmits,
 108                  "max retransmits for FMA before entering delay queue");
 109
 110 static int nwildcard = 4;
 111 module_param(nwildcard, int, 0444);
 112 MODULE_PARM_DESC(nwildcard, "# wildcard datagrams to post per net (interface)");
 113
 114 static int nice = -20;
 115 module_param(nice, int, 0444);
 116 MODULE_PARM_DESC(nice, "nice value for kgnilnd threads, default -20");
 117
 118 static int rdmaq_intervals = 4;
 119 module_param(rdmaq_intervals, int, 0644);
 120 MODULE_PARM_DESC(rdmaq_intervals, "# intervals per second for rdmaq throttling, default 4, 0 to disable");
 121
 122 static int loops = 100;
 123 module_param(loops, int, 0644);
 124 MODULE_PARM_DESC(loops, "# of loops before scheduler is friendly, default 100");
 125
 126 static int hash_size = 503;
 127 module_param(hash_size, int, 0444);
 128 MODULE_PARM_DESC(hash_size, "prime number for peer/conn hash sizing, default 503");
 129
 130 static int peer_health = 0;
 131 module_param(peer_health, int, 0444);
 132 MODULE_PARM_DESC(peer_health, "Disable peer timeout for LNet peer health, default off, > 0 to enable");
 133
 134 static int peer_timeout = -1;
 135 module_param(peer_timeout, int, 0444);
 136 MODULE_PARM_DESC(peer_timeout, "Peer timeout used for peer_health, default based on gnilnd timeout, > -1 to manually set");
 137
 138 static int vmap_cksum = 0;
 139 module_param(vmap_cksum, int, 0644);
 140 MODULE_PARM_DESC(vmap_cksum, "use vmap for all kiov checksumming, default off");
 141
 142 static int mbox_per_block = GNILND_FMABLK;
 143 module_param(mbox_per_block, int, 0644);
 144 MODULE_PARM_DESC(mbox_per_block, "mailboxes per block");
 145
 146 static int nphys_mbox = 0;
 147 module_param(nphys_mbox, int, 0444);
 148 MODULE_PARM_DESC(nphys_mbox, "# mbox to preallocate from physical memory, default 0");
 149
 150 static int mbox_credits = GNILND_MBOX_CREDITS;
 151 module_param(mbox_credits, int, 0644);
 152 MODULE_PARM_DESC(mbox_credits, "number of credits per mailbox");
 153
 154 static int sched_threads = GNILND_SCHED_THREADS;
 155 module_param(sched_threads, int, 0444);
 156 MODULE_PARM_DESC(sched_threads, "number of threads for moving data");
 157
 158 static int net_hash_size = 11;
 159 module_param(net_hash_size, int, 0444);
 160 MODULE_PARM_DESC(net_hash_size, "prime number for net hash sizing, default 11");
 161
 162 static int hardware_timeout = GNILND_HARDWARE_TIMEOUT;
 163 module_param(hardware_timeout, int, 0444);
 164 MODULE_PARM_DESC(hardware_timeout, "maximum time for traffic to get from one node to another");
 165
 166 static int mdd_timeout = GNILND_MDD_TIMEOUT;
 167 module_param(mdd_timeout, int, 0644);
 168 MODULE_PARM_DESC(mdd_timeout, "maximum time (in minutes) for mdd to be held");
 169
 170 static int sched_timeout = GNILND_SCHED_TIMEOUT;
 171 module_param(sched_timeout, int, 0644);
 172 MODULE_PARM_DESC(sched_timeout, "scheduler aliveness in seconds max time");
 173
 174 static int sched_nice = GNILND_SCHED_NICE;
 175 module_param(sched_nice, int, 0444);
 176 MODULE_PARM_DESC(sched_nice, "scheduler's nice setting, default compute 0 service -20");
 177
 178 static int reverse_rdma = GNILND_REVERSE_RDMA;
 179 module_param(reverse_rdma, int, 0644);
 180 MODULE_PARM_DESC(reverse_rdma, "Normal 0: Reverse GET: 1 Reverse Put: 2 Reverse Both: 3");
 181
 182 static int dgram_timeout = GNILND_DGRAM_TIMEOUT;
 183 module_param(dgram_timeout, int, 0644);
 184 MODULE_PARM_DESC(dgram_timeout, "dgram thread aliveness seconds max time");
 185
 186 static int efault_lbug = 0;
 187 module_param(efault_lbug, int, 0644);
 188 MODULE_PARM_DESC(efault_lbug, "If a compute receives an EFAULT in a message should it LBUG. 0 off 1 on");
 189
 190 static int fast_reconn = GNILND_FAST_RECONNECT;
 191 module_param(fast_reconn, int, 0644);
 192 MODULE_PARM_DESC(fast_reconn, "fast reconnect on connection timeout");
 193
 194 static int max_conn_purg = GNILND_PURGATORY_MAX;
 195 module_param(max_conn_purg, int, 0644);
 196 MODULE_PARM_DESC(max_conn_purg, "Max number of connections per peer in purgatory");
 197
 198 static int thread_affinity = 0;
 199 module_param(thread_affinity, int, 0444);
 200 MODULE_PARM_DESC(thread_affinity, "scheduler thread affinity default 0 (disabled)");
 201
 202 static int thread_safe = GNILND_TS_ENABLE;
 203 module_param(thread_safe, int, 0444);
 204 MODULE_PARM_DESC(thread_safe, "Use kgni thread safe API if available");
 205
 206 static int reg_fail_timeout = GNILND_REGFAILTO_DISABLE;
 207 module_param(reg_fail_timeout, int, 0644);
 208 MODULE_PARM_DESC(reg_fail_timeout, "fmablk registration timeout LBUG");
 209
 210 static int to_reconn_disable;
 211 module_param(to_reconn_disable, int, 0644);
 212 MODULE_PARM_DESC(to_reconn_disable,
 213                   "Timed out connection waits for peer before reconnecting");
 214
 215 static int vzalloc_no_retry = GNILND_VZALLOC_RETRY;
 216 module_param(vzalloc_no_retry, int, 0644);
 217 MODULE_PARM_DESC(vzalloc_no_retry,
 218                  "Should we pass the no_retry flag to vmalloc 1: no_retry 0: normal");
 219
 220 kgn_tunables_t kgnilnd_tunables = {
 221         .kgn_min_reconnect_interval = &min_reconnect_interval,
 222         .kgn_max_reconnect_interval = &max_reconnect_interval,
 223         .kgn_credits                = &credits,
 224         .kgn_peer_credits           = &peer_credits,
 225         .kgn_concurrent_sends       = &concurrent_sends,
 226         .kgn_fma_cq_size            = &fma_cq_size,
 227         .kgn_timeout                = &timeout,
 228         .kgn_max_immediate          = &max_immediate,
 229         .kgn_checksum               = &checksum,
 230         .kgn_checksum_dump          = &checksum_dump,
 231         .kgn_bte_put_dlvr_mode      = &bte_put_dlvr_mode,
 232         .kgn_bte_get_dlvr_mode      = &bte_get_dlvr_mode,
 233         .kgn_bte_relaxed_ordering   = &bte_relaxed_ordering,
 234         .kgn_ptag                   = &ptag,
 235         .kgn_pkey                   = &pkey,
 236         .kgn_max_retransmits        = &max_retransmits,
 237         .kgn_nwildcard              = &nwildcard,
 238         .kgn_nice                   = &nice,
 239         .kgn_rdmaq_intervals        = &rdmaq_intervals,
 240         .kgn_loops                  = &loops,
 241         .kgn_peer_hash_size         = &hash_size,
 242         .kgn_peer_health            = &peer_health,
 243         .kgn_peer_timeout           = &peer_timeout,
 244         .kgn_vmap_cksum             = &vmap_cksum,
 245         .kgn_mbox_per_block         = &mbox_per_block,
 246         .kgn_nphys_mbox             = &nphys_mbox,
 247         .kgn_mbox_credits           = &mbox_credits,
 248         .kgn_sched_threads          = &sched_threads,
 249         .kgn_net_hash_size          = &net_hash_size,
 250         .kgn_hardware_timeout       = &hardware_timeout,
 251         .kgn_mdd_timeout            = &mdd_timeout,
 252         .kgn_sched_timeout          = &sched_timeout,
 253         .kgn_sched_nice             = &sched_nice,
 254         .kgn_reverse_rdma           = &reverse_rdma,
 255         .kgn_dgram_timeout          = &dgram_timeout,
 256         .kgn_eager_credits          = &eager_credits,
 257         .kgn_fast_reconn            = &fast_reconn,
 258         .kgn_efault_lbug            = &efault_lbug,
 259         .kgn_thread_affinity        = &thread_affinity,
 260         .kgn_thread_safe            = &thread_safe,
 261         .kgn_reg_fail_timeout       = &reg_fail_timeout,
 262         .kgn_to_reconn_disable      = &to_reconn_disable,
 263         .kgn_max_purgatory          = &max_conn_purg,
 264         .kgn_vzalloc_noretry        = &vzalloc_no_retry
 265 };
 266
 267 int
 268 kgnilnd_tunables_init(void)
 269 {
 270         int rc = 0;
 271
 272         switch (*kgnilnd_tunables.kgn_checksum) {
 273         default:
 274                 CERROR("Invalid checksum module parameter: %d\n",
 275                        *kgnilnd_tunables.kgn_checksum);
 276                 rc = -EINVAL;
 277                 GOTO(out, rc);
 278         case GNILND_CHECKSUM_OFF:
 279                 /* no checksumming */
 280                 break;
 281         case GNILND_CHECKSUM_SMSG_HEADER:
 282                 LCONSOLE_INFO("SMSG header only checksumming enabled\n");
 283                 break;
 284         case GNILND_CHECKSUM_SMSG:
 285                 LCONSOLE_INFO("SMSG checksumming enabled\n");
 286                 break;
 287         case GNILND_CHECKSUM_SMSG_BTE:
 288                 LCONSOLE_INFO("SMSG + BTE checksumming enabled\n");
 289                 break;
 290         }
 291
 292         if (*kgnilnd_tunables.kgn_max_immediate > GNILND_MAX_IMMEDIATE) {
 293                 LCONSOLE_ERROR("kgnilnd module parameter 'max_immediate' too large %d > %d\n",
 294                 *kgnilnd_tunables.kgn_max_immediate, GNILND_MAX_IMMEDIATE);
 295                 rc = -EINVAL;
 296                 GOTO(out, rc);
 297         }
 298
 299         if (*kgnilnd_tunables.kgn_mbox_per_block < 1) {
 300                 *kgnilnd_tunables.kgn_mbox_per_block = 1;
 301         }
 302
 303         if (*kgnilnd_tunables.kgn_concurrent_sends == 0) {
 304                 *kgnilnd_tunables.kgn_concurrent_sends = *kgnilnd_tunables.kgn_peer_credits;
 305         } else if (*kgnilnd_tunables.kgn_concurrent_sends > *kgnilnd_tunables.kgn_peer_credits) {
 306                 LCONSOLE_ERROR("kgnilnd parameter 'concurrent_sends' too large: %d > %d (peer_credits)\n",
 307                                *kgnilnd_tunables.kgn_concurrent_sends, *kgnilnd_tunables.kgn_peer_credits);
 308                 rc = -EINVAL;
 309         }
 310 out:
 311         return rc;
 312 }