Whamcloud - gitweb
LU-2989 build: some make targets are broken
[fs/lustre-release.git] / lnet / klnds / gnilnd / gnilnd_modparams.c
1 /*
2  * Copyright (C) 2004 Cluster File Systems, Inc.
3  *
4  * Copyright (C) 2009-2012 Cray, Inc.
5  *
6  *   Derived from work by: Eric Barton <eric@bartonsoftware.com>
7  *   Author: Nic Henke <nic@cray.com>
8  *
9  *   This file is part of Lustre, http://www.lustre.org.
10  *
11  *   Lustre is free software; you can redistribute it and/or
12  *   modify it under the terms of version 2 of the GNU General Public
13  *   License as published by the Free Software Foundation.
14  *
15  *   Lustre is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  *   GNU General Public License for more details.
19  *
20  *   You should have received a copy of the GNU General Public License
21  *   along with Lustre; if not, write to the Free Software
22  *   Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23  *
24  */
25
26 #include "gnilnd.h"
27
28 static int credits = 256;
29 CFS_MODULE_PARM(credits, "i", int, 0444,
30                 "# concurrent sends");
31
32 static int peer_credits = 16;
33 CFS_MODULE_PARM(peer_credits, "i", int, 0444,
34                 "# LNet peer credits");
35
36 /* NB - we'll not actually limit sends to this, we just size the mailbox buffer
37  * such that at most we'll have concurrent_sends * max_immediate messages
38  * in the mailbox */
39 static int concurrent_sends = 0;
40 CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
41                 "# concurrent HW sends to 1 peer");
42
43 /* default for 2k nodes @ 16 peer credits */
44 static int fma_cq_size = 32768;
45 CFS_MODULE_PARM(fma_cq_size, "i", int, 0444,
46                 "size of the completion queue");
47
48 static int timeout = GNILND_BASE_TIMEOUT;
49 /* can't change @ runtime because LNet gets NI data at startup from
50  * this value */
51 CFS_MODULE_PARM(timeout, "i", int, 0444,
52                 "communications timeout (seconds)");
53
54 /* time to wait between datagram timeout and sending of next dgram */
55 static int min_reconnect_interval = GNILND_MIN_RECONNECT_TO;
56 CFS_MODULE_PARM(min_reconnect_interval, "i", int, 0644,
57                 "minimum connection retry interval (seconds)");
58
59 /* if this goes longer than timeout, we'll timeout the TX before
60  * the dgram */
61 static int max_reconnect_interval = GNILND_MAX_RECONNECT_TO;
62 CFS_MODULE_PARM(max_reconnect_interval, "i", int, 0644,
63                 "maximum connection retry interval (seconds)");
64
65 static int max_immediate = (2<<10);
66 CFS_MODULE_PARM(max_immediate, "i", int, 0644,
67                 "immediate/RDMA breakpoint");
68
69 static int checksum = GNILND_CHECKSUM_DEFAULT;
70 CFS_MODULE_PARM(checksum, "i", int, 0644,
71                 "0: None, 1: headers, 2: short msg, 3: all traffic");
72
73 static int checksum_dump = 0;
74 CFS_MODULE_PARM(checksum_dump, "i", int, 0644,
75                 "0: None, 1: dump log on failure, 2: payload data to D_INFO log");
76
77 static int bte_dlvr_mode = GNILND_RDMA_DLVR_OPTION;
78 CFS_MODULE_PARM(bte_dlvr_mode, "i", int, 0644,
79                 "enable hashing for BTE (RDMA) transfers");
80
81 static int bte_relaxed_ordering = 1;
82 CFS_MODULE_PARM(bte_relaxed_ordering, "i", int, 0644,
83                 "enable relaxed ordering (PASSPW) for BTE (RDMA) transfers");
84
85 static int ptag = GNI_PTAG_LND;
86 CFS_MODULE_PARM(ptag, "i", int, 0444,
87                 "ptag for Gemini CDM");
88
89 static int max_retransmits = 1024;
90 CFS_MODULE_PARM(max_retransmits, "i", int, 0444,
91                 "max retransmits for FMA");
92
93 static int nwildcard = 4;
94 CFS_MODULE_PARM(nwildcard, "i", int, 0444,
95                 "# wildcard datagrams to post per net (interface)");
96
97 static int nice = -20;
98 CFS_MODULE_PARM(nice, "i", int, 0444,
99                 "nice value for kgnilnd threads, default -20");
100
101 static int rdmaq_intervals = 4;
102 CFS_MODULE_PARM(rdmaq_intervals, "i", int, 0644,
103                 "# intervals per second for rdmaq throttling, default 4, 0 to disable");
104
105 static int loops = 100;
106 CFS_MODULE_PARM(loops, "i", int, 0644,
107                 "# of loops before scheduler is friendly, default 100");
108
109 static int hash_size = 503;
110 CFS_MODULE_PARM(hash_size, "i", int, 0444,
111                 "prime number for peer/conn hash sizing, default 503");
112
113 static int peer_health = 0;
114 CFS_MODULE_PARM(peer_health, "i", int, 0444,
115                 "Disable peer timeout for LNet peer health, default off, > 0 to enable");
116
117 static int peer_timeout = -1;
118 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
119                 "Peer timeout used for peer_health, default based on gnilnd timeout, > -1 to manually set");
120
121 static int vmap_cksum = 0;
122 CFS_MODULE_PARM(vmap_cksum, "i", int, 0644,
123                 "use vmap for all kiov checksumming, default off");
124
125 static int mbox_per_block = GNILND_FMABLK;
126 CFS_MODULE_PARM(mbox_per_block, "i", int, 0644,
127                 "mailboxes per block");
128
129 static int nphys_mbox = 0;
130 CFS_MODULE_PARM(nphys_mbox, "i", int, 0444,
131                 "# mbox to preallocate from physical memory, default 0");
132
133 static int mbox_credits = GNILND_MBOX_CREDITS;
134 CFS_MODULE_PARM(mbox_credits, "i", int, 0644,
135                 "number of credits per mailbox");
136
137 static int sched_threads = GNILND_SCHED_THREADS;
138 CFS_MODULE_PARM(sched_threads, "i", int, 0444,
139                 "number of threads for moving data");
140
141 static int net_hash_size = 11;
142 CFS_MODULE_PARM(net_hash_size, "i", int, 0444,
143                 "prime number for net hash sizing, default 11");
144
145 static int hardware_timeout = GNILND_HARDWARE_TIMEOUT;
146 CFS_MODULE_PARM(hardware_timeout, "i", int, 0444,
147                 "maximum time for traffic to get from one node to another");
148
149 static int mdd_timeout = GNILND_MDD_TIMEOUT;
150 CFS_MODULE_PARM(mdd_timeout, "i", int, 0644,
151                 "maximum time (in minutes) for mdd to be held");
152
153 static int sched_timeout = GNILND_SCHED_TIMEOUT;
154 CFS_MODULE_PARM(sched_timeout, "i", int, 0644,
155                 "scheduler aliveness in seconds max time");
156
157 static int sched_nice = GNILND_SCHED_NICE;
158 CFS_MODULE_PARM(sched_nice, "i", int, 0444,
159                 "scheduler's nice setting, default compute 0 service -20");
160
161 static int reverse_rdma = GNILND_REVERSE_RDMA;
162 CFS_MODULE_PARM(reverse_rdma, "i", int, 0644,
163                 "Normal 0: Reverse GET: 1 Reverse Put: 2 Reverse Both: 3");
164
165 static int dgram_timeout = GNILND_DGRAM_TIMEOUT;
166 CFS_MODULE_PARM(dgram_timeout, "i", int, 0644,
167                 "dgram thread aliveness seconds max time");
168
169 kgn_tunables_t kgnilnd_tunables = {
170         .kgn_min_reconnect_interval = &min_reconnect_interval,
171         .kgn_max_reconnect_interval = &max_reconnect_interval,
172         .kgn_credits                = &credits,
173         .kgn_peer_credits           = &peer_credits,
174         .kgn_concurrent_sends       = &concurrent_sends,
175         .kgn_fma_cq_size            = &fma_cq_size,
176         .kgn_timeout                = &timeout,
177         .kgn_max_immediate          = &max_immediate,
178         .kgn_checksum               = &checksum,
179         .kgn_checksum_dump          = &checksum_dump,
180         .kgn_bte_dlvr_mode          = &bte_dlvr_mode,
181         .kgn_bte_relaxed_ordering   = &bte_relaxed_ordering,
182         .kgn_ptag                   = &ptag,
183         .kgn_max_retransmits        = &max_retransmits,
184         .kgn_nwildcard              = &nwildcard,
185         .kgn_nice                   = &nice,
186         .kgn_rdmaq_intervals        = &rdmaq_intervals,
187         .kgn_loops                  = &loops,
188         .kgn_peer_hash_size         = &hash_size,
189         .kgn_peer_health            = &peer_health,
190         .kgn_peer_timeout           = &peer_timeout,
191         .kgn_vmap_cksum             = &vmap_cksum,
192         .kgn_mbox_per_block         = &mbox_per_block,
193         .kgn_nphys_mbox             = &nphys_mbox,
194         .kgn_mbox_credits           = &mbox_credits,
195         .kgn_sched_threads          = &sched_threads,
196         .kgn_net_hash_size          = &net_hash_size,
197         .kgn_hardware_timeout       = &hardware_timeout,
198         .kgn_mdd_timeout            = &mdd_timeout,
199         .kgn_sched_timeout          = &sched_timeout,
200         .kgn_sched_nice             = &sched_nice,
201         .kgn_reverse_rdma           = &reverse_rdma,
202         .kgn_dgram_timeout          = &dgram_timeout
203 };
204
205 #if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
206 static cfs_sysctl_table_t kgnilnd_ctl_table[] = {
207         {
208                 INIT_CTL_NAME(2)
209                 .procname = "min_reconnect_interval",
210                 .data     = &min_reconnect_interval,
211                 .maxlen   = sizeof(int),
212                 .mode     = 0644,
213                 .proc_handler = &proc_dointvec
214         },
215         {
216                 INIT_CTL_NAME(3)
217                 .procname = "max_reconnect_interval",
218                 .data     = &max_reconnect_interval,
219                 .maxlen   = sizeof(int),
220                 .mode     = 0644,
221                 .proc_handler = &proc_dointvec
222         },
223         {
224                 INIT_CTL_NAME(5)
225                 .procname = "credits",
226                 .data     = &credits,
227                 .maxlen   = sizeof(int),
228                 .mode     = 0444,
229                 .proc_handler = &proc_dointvec
230         },
231         {
232                 INIT_CTL_NAME(6)
233                 .procname = "peer_credits",
234                 .data     = &peer_credits,
235                 .maxlen   = sizeof(int),
236                 .mode     = 0444,
237                 .proc_handler = &proc_dointvec
238         },
239         {
240                 INIT_CTL_NAME(7)
241                 .procname = "fma_cq_size",
242                 .data     = &fma_cq_size,
243                 .maxlen   = sizeof(int),
244                 .mode     = 0444,
245                 .proc_handler = &proc_dointvec
246         },
247         {
248                 INIT_CTL_NAME(8)
249                 .procname = "timeout",
250                 .data     = &timeout,
251                 .maxlen   = sizeof(int),
252                 .mode     = 0444,
253                 .proc_handler = &proc_dointvec
254         },
255         {
256                 INIT_CTL_NAME(9)
257                 .procname = "max_immediate",
258                 .data     = &max_immediate,
259                 .maxlen   = sizeof(int),
260                 .mode     = 0444,
261                 .proc_handler = &proc_dointvec
262         },
263         {
264                 INIT_CTL_NAME(10)
265                 .procname = "checksum",
266                 .data     = &checksum,
267                 .maxlen   = sizeof(int),
268                 .mode     = 0644,
269                 .proc_handler = &proc_dointvec
270         },
271         {
272                 INIT_CTL_NAME(11)
273                 .procname = "bte_dlvr_mode",
274                 .data     = &bte_dlvr_mode,
275                 .maxlen   = sizeof(int),
276                 .mode     = 0644,
277                 .proc_handler = &proc_dointvec
278         },
279         {
280                 INIT_CTL_NAME(13)
281                 .procname = "ptag",
282                 .data     = &ptag,
283                 .maxlen   = sizeof(int),
284                 .mode     = 0444,
285                 .proc_handler = &proc_dointvec
286         },
287         {
288                 INIT_CTL_NAME(14)
289                 .procname = "nwildcard",
290                 .data     = &nwildcard,
291                 .maxlen   = sizeof(int),
292                 .mode     = 0444,
293                 .proc_handler = &proc_dointvec
294         },
295         {
296                 INIT_CTL_NAME(15)
297                 .procname = "bte_relaxed_ordering",
298                 .data     = &bte_relaxed_ordering,
299                 .maxlen   = sizeof(int),
300                 .mode     = 0644,
301                 .proc_handler = &proc_dointvec
302         },
303         {
304                 INIT_CTL_NAME(16)
305                 .procname = "checksum_dump",
306                 .data     = &checksum_dump,
307                 .maxlen   = sizeof(int),
308                 .mode     = 0644,
309                 .proc_handler = &proc_dointvec
310         },
311         {
312                 INIT_CTL_NAME(17)
313                 .procname = "nice",
314                 .data     = &nice,
315                 .maxlen   = sizeof(int),
316                 .mode     = 0444,
317                 .proc_handler = &proc_dointvec
318         },
319         {
320                 INIT_CTL_NAME(18)
321                 .procname = "rdmaq_intervals",
322                 .data     = &rdmaq_intervals,
323                 .maxlen   = sizeof(int),
324                 .mode     = 0644,
325                 .proc_handler = &proc_dointvec
326         },
327         {
328                 INIT_CTL_NAME(19)
329                 .procname = "loops",
330                 .data     = &loops,
331                 .maxlen   = sizeof(int),
332                 .mode     = 0644,
333                 .proc_handler = &proc_dointvec
334         },
335         {
336                 INIT_CTL_NAME(20)
337                 .procname = "hash_size",
338                 .data     = &hash_size,
339                 .maxlen   = sizeof(int),
340                 .mode     = 0444,
341                 .proc_handler = &proc_dointvec
342         },
343         {
344                 INIT_CTL_NAME(21)
345                 .procname = "peer_health",
346                 .data     = &peer_health,
347                 .maxlen   = sizeof(int),
348                 .mode     = 0444,
349                 .proc_handler = &proc_dointvec
350         },
351         {
352                 INIT_CTL_NAME(22)
353                 .procname = "vmap_cksum",
354                 .data     = &vmap_cksum,
355                 .maxlen   = sizeof(int),
356                 .mode     = 0644,
357                 .proc_handler = &proc_dointvec
358         },
359         {
360                 INIT_CTL_NAME(23)
361                 .procname = "mbox_per_block",
362                 .data     = &mbox_per_block,
363                 .maxlen   = sizeof(int),
364                 .mode     = 0644,
365                 .proc_handler = &proc_dointvec
366         },
367         {
368                 INIT_CTL_NAME(24)
369                 .procname = "mbox_credits"
370                 .data     = &mbox_credits,
371                 .maxlen   = sizeof(int),
372                 .mode     = 0644,
373                 .proc_handler = &proc_dointvec
374         },
375         {
376                 INIT_CTL_NAME(25)
377                 .procname = "sched_threads"
378                 .data     = &sched_threads,
379                 .maxlen   = sizeof(int),
380                 .mode     = 0444,
381                 .proc_handler = &proc_dointvec
382         },
383         {
384                 INIT_CTL_NAME(26)
385                 .procname = "net_hash_size",
386                 .data     = &net_hash_size,
387                 .maxlen   = sizeof(int),
388                 .mode     = 0444,
389                 .proc_handler = &proc_dointvec
390         },
391         {
392                 INIT_CTL_NAME(27)
393                 .procname = "hardware_timeout",
394                 .data     = &hardware_timeout,
395                 .maxlen   = sizeof(int),
396                 .mode     = 0444,
397                 .proc_handler = &proc_dointvec
398         },
399         {
400                 INIT_CTL_NAME(28)
401                 .procname = "mdd_timeout",
402                 .data     = &mdd_timeout,
403                 .maxlen   = sizeof(int),
404                 .mode     = 0644,
405                 .proc_handler = &proc_dointvec
406         },
407         {
408                 INIT_CTL_NAME(29)
409                 .procname = "max_retransmits"
410                 .data     = &max_retransmits,
411                 .maxlen   = sizeof(int),
412                 .mode     = 0444,
413                 .proc_handler = &proc_dointvec
414         },
415         {
416                 INIT_CTL_NAME(30)
417                 .procname = "concurrent_sends",
418                 .data     = &concurrent_sends,
419                 .maxlen   = sizeof(int),
420                 .mode     = 0444,
421                 .proc_handler = &proc_dointvec
422         },
423         {
424                 INIT_CTL_NAME(31)
425                 .procname = "nphys_mbox",
426                 .data     = &nphys_mbox,
427                 .maxlen   = sizeof(int),
428                 .mode     = 0444,
429                 .proc_handler = &proc_dointvec
430         },
431         {
432                 INIT_CTL_NAME(32)
433                 .procname = "sched_timeout",
434                 .data     = &sched_timeout,
435                 .maxlen   = sizeof(int),
436                 .mode     = 0644,
437                 .proc_handler = &proc_dointvec
438         },
439         {
440                 INIT_CTL_NAME(33)
441                 .procname = "sched_nice",
442                 .data     = &sched_nice,
443                 .maxlen   = sizeof(int),
444                 .mode     = 0444,
445                 .proc_handler = &proc_dointvec
446         },
447         {
448                 INIT_CTL_NAME(34)
449                 .procname = "reverse_rdma",
450                 .data     = &reverse_rdma,
451                 .maxlen   = sizeof(int),
452                 .mode     = 0644,
453                 .proc_handler = &proc_dointvec
454         },
455                 INIT_CTL_NAME(35)
456                 .procname = "dgram_timeout"
457                 .data     = &dgram_timeout,
458                 .maxlen   = sizeof(int),
459                 .mode     = 0644,
460                 .proc_handler = &proc_dointvec
461         },
462         {
463                 INIT_CTL_NAME(36)
464                 .procname = "peer_timeout"
465                 .data     = &peer_timeout,
466                 .maxlen   = sizeof(int),
467                 .mode     = 0444,
468                 .proc_handler = &proc_dointvec
469         },
470         {0}
471 };
472
473 static cfs_sysctl_table_t kgnilnd_top_ctl_table[] = {
474         {
475                 INIT_CTL_NAME(202)
476                 .procname = "gnilnd",
477                 .data     = NULL,
478                 .maxlen   = 0,
479                 .mode     = 0555,
480                 .child    = kgnilnd_ctl_table
481         },
482         {       INIT_CTL_NAME(0)   }
483 };
484 #endif
485
486 int
487 kgnilnd_tunables_init()
488 {
489         int rc = 0;
490
491 #if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
492         kgnilnd_tunables.kgn_sysctl =
493                 cfs_register_sysctl_table(kgnilnd_top_ctl_table, 0);
494
495         if (kgnilnd_tunables.kgn_sysctl == NULL)
496                 CWARN("Can't setup /proc tunables\n");
497 #endif
498         switch (*kgnilnd_tunables.kgn_checksum) {
499         default:
500                 CERROR("Invalid checksum module parameter: %d\n",
501                        *kgnilnd_tunables.kgn_checksum);
502                 rc = -EINVAL;
503                 GOTO(out, rc);
504         case GNILND_CHECKSUM_OFF:
505                 /* no checksumming */
506                 break;
507         case GNILND_CHECKSUM_SMSG_HEADER:
508                 LCONSOLE_INFO("SMSG header only checksumming enabled\n");
509                 break;
510         case GNILND_CHECKSUM_SMSG:
511                 LCONSOLE_INFO("SMSG checksumming enabled\n");
512                 break;
513         case GNILND_CHECKSUM_SMSG_BTE:
514                 LCONSOLE_INFO("SMSG + BTE checksumming enabled\n");
515                 break;
516         }
517
518         if (*kgnilnd_tunables.kgn_max_immediate > GNILND_MAX_IMMEDIATE) {
519                 LCONSOLE_ERROR("kgnilnd module parameter 'max_immediate' too large %d > %d\n",
520                 *kgnilnd_tunables.kgn_max_immediate, GNILND_MAX_IMMEDIATE);
521                 rc = -EINVAL;
522                 GOTO(out, rc);
523         }
524
525         if (*kgnilnd_tunables.kgn_mbox_per_block < 1) {
526                 *kgnilnd_tunables.kgn_mbox_per_block = 1;
527         }
528
529         if (*kgnilnd_tunables.kgn_concurrent_sends == 0) {
530                 *kgnilnd_tunables.kgn_concurrent_sends = *kgnilnd_tunables.kgn_peer_credits;
531         } else if (*kgnilnd_tunables.kgn_concurrent_sends > *kgnilnd_tunables.kgn_peer_credits) {
532                 LCONSOLE_ERROR("kgnilnd parameter 'concurrent_sends' too large: %d > %d (peer_credits)\n",
533                                *kgnilnd_tunables.kgn_concurrent_sends, *kgnilnd_tunables.kgn_peer_credits);
534                 rc = -EINVAL;
535         }
536 out:
537         return rc;
538 }
539
540 void
541 kgnilnd_tunables_fini()
542 {
543 #if CONFIG_SYSCTL && !CFS_SYSFS_MODULE_PARM
544         if (kgnilnd_tunables.kgn_sysctl != NULL)
545                 cfs_unregister_sysctl_table(kgnilnd_tunables.kgn_sysctl);
546 #endif
547 }