4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
31 * This file is part of Lustre, http://www.lustre.org/
32 * Lustre is a trademark of Sun Microsystems, Inc.
34 * lnet/klnds/o2iblnd/o2iblnd_modparams.c
36 * Author: Eric Barton <eric@bartonsoftware.com>
41 static int service = 987;
42 CFS_MODULE_PARM(service, "i", int, 0444,
43 "service number (within RDMA_PS_TCP)");
46 CFS_MODULE_PARM(cksum, "i", int, 0644,
47 "set non-zero to enable message (not RDMA) checksums");
49 static int timeout = 50;
50 CFS_MODULE_PARM(timeout, "i", int, 0644,
54 CFS_MODULE_PARM(ntx, "i", int, 0444,
55 "# of message descriptors");
57 static int credits = 64;
58 CFS_MODULE_PARM(credits, "i", int, 0444,
59 "# concurrent sends");
61 static int peer_credits = 8;
62 CFS_MODULE_PARM(peer_credits, "i", int, 0444,
63 "# concurrent sends to 1 peer");
65 static int peer_credits_hiw = 0;
66 CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
67 "when eagerly to return credits");
69 static int peer_buffer_credits = 0;
70 CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
71 "# per-peer router buffer credits");
73 static int peer_timeout = 180;
74 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
75 "Seconds without aliveness news to declare peer dead (<=0 to disable)");
77 static char *ipif_name = "ib0";
78 CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
79 "IPoIB interface name");
81 static int retry_count = 5;
82 CFS_MODULE_PARM(retry_count, "i", int, 0644,
83 "Retransmissions when no ACK received");
85 static int rnr_retry_count = 6;
86 CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
87 "RNR retransmissions");
89 static int keepalive = 100;
90 CFS_MODULE_PARM(keepalive, "i", int, 0644,
91 "Idle time in seconds before sending a keepalive");
93 static int ib_mtu = 0;
94 CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
95 "IB MTU 256/512/1024/2048/4096");
97 static int concurrent_sends = 0;
98 CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
99 "send work-queue sizing");
101 static int map_on_demand = 0;
102 CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
105 static int fmr_pool_size = 512;
106 CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
107 "size of the fmr pool (>= ntx / 4)");
109 static int fmr_flush_trigger = 384;
110 CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
111 "# dirty FMRs that triggers pool flush");
113 static int fmr_cache = 1;
114 CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
115 "non-zero to enable FMR caching");
117 static int pmr_pool_size = 512;
118 CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
119 "size of the MR cache pmr pool");
122 * 0: disable failover
123 * 1: enable failover if necessary
124 * 2: force to failover (for debug)
126 static int dev_failover = 0;
127 CFS_MODULE_PARM(dev_failover, "i", int, 0444,
128 "HCA failover for bonding (0 off, 1 on, other values reserved)");
131 static int require_privileged_port = 0;
132 CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
133 "require privileged port when accepting connection");
135 static int use_privileged_port = 1;
136 CFS_MODULE_PARM(use_privileged_port, "i", int, 0644,
137 "use privileged port when initiating connection");
139 kib_tunables_t kiblnd_tunables = {
140 .kib_dev_failover = &dev_failover,
141 .kib_service = &service,
143 .kib_timeout = &timeout,
144 .kib_keepalive = &keepalive,
146 .kib_credits = &credits,
147 .kib_peertxcredits = &peer_credits,
148 .kib_peercredits_hiw = &peer_credits_hiw,
149 .kib_peerrtrcredits = &peer_buffer_credits,
150 .kib_peertimeout = &peer_timeout,
151 .kib_default_ipif = &ipif_name,
152 .kib_retry_count = &retry_count,
153 .kib_rnr_retry_count = &rnr_retry_count,
154 .kib_concurrent_sends = &concurrent_sends,
155 .kib_ib_mtu = &ib_mtu,
156 .kib_map_on_demand = &map_on_demand,
157 .kib_fmr_pool_size = &fmr_pool_size,
158 .kib_fmr_flush_trigger = &fmr_flush_trigger,
159 .kib_fmr_cache = &fmr_cache,
160 .kib_pmr_pool_size = &pmr_pool_size,
161 .kib_require_priv_port = &require_privileged_port,
162 .kib_use_priv_port = &use_privileged_port
165 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
167 static char ipif_basename_space[32];
169 #ifndef HAVE_SYSCTL_UNNUMBERED
177 O2IBLND_PEER_TXCREDITS,
178 O2IBLND_PEER_CREDITS_HIW,
179 O2IBLND_PEER_RTRCREDITS,
180 O2IBLND_PEER_TIMEOUT,
181 O2IBLND_IPIF_BASENAME,
183 O2IBLND_RNR_RETRY_COUNT,
185 O2IBLND_CONCURRENT_SENDS,
187 O2IBLND_MAP_ON_DEMAND,
188 O2IBLND_FMR_POOL_SIZE,
189 O2IBLND_FMR_FLUSH_TRIGGER,
191 O2IBLND_PMR_POOL_SIZE,
196 #define O2IBLND_SERVICE CTL_UNNUMBERED
197 #define O2IBLND_CKSUM CTL_UNNUMBERED
198 #define O2IBLND_TIMEOUT CTL_UNNUMBERED
199 #define O2IBLND_NTX CTL_UNNUMBERED
200 #define O2IBLND_CREDITS CTL_UNNUMBERED
201 #define O2IBLND_PEER_TXCREDITS CTL_UNNUMBERED
202 #define O2IBLND_PEER_CREDITS_HIW CTL_UNNUMBERED
203 #define O2IBLND_PEER_RTRCREDITS CTL_UNNUMBERED
204 #define O2IBLND_PEER_TIMEOUT CTL_UNNUMBERED
205 #define O2IBLND_IPIF_BASENAME CTL_UNNUMBERED
206 #define O2IBLND_RETRY_COUNT CTL_UNNUMBERED
207 #define O2IBLND_RNR_RETRY_COUNT CTL_UNNUMBERED
208 #define O2IBLND_KEEPALIVE CTL_UNNUMBERED
209 #define O2IBLND_CONCURRENT_SENDS CTL_UNNUMBERED
210 #define O2IBLND_IB_MTU CTL_UNNUMBERED
211 #define O2IBLND_MAP_ON_DEMAND CTL_UNNUMBERED
212 #define O2IBLND_FMR_POOL_SIZE CTL_UNNUMBERED
213 #define O2IBLND_FMR_FLUSH_TRIGGER CTL_UNNUMBERED
214 #define O2IBLND_FMR_CACHE CTL_UNNUMBERED
215 #define O2IBLND_PMR_POOL_SIZE CTL_UNNUMBERED
216 #define O2IBLND_DEV_FAILOVER CTL_UNNUMBERED
220 static cfs_sysctl_table_t kiblnd_ctl_table[] = {
222 .ctl_name = O2IBLND_SERVICE,
223 .procname = "service",
225 .maxlen = sizeof(int),
227 .proc_handler = &proc_dointvec
230 .ctl_name = O2IBLND_CKSUM,
233 .maxlen = sizeof(int),
235 .proc_handler = &proc_dointvec
238 .ctl_name = O2IBLND_TIMEOUT,
239 .procname = "timeout",
241 .maxlen = sizeof(int),
243 .proc_handler = &proc_dointvec
246 .ctl_name = O2IBLND_NTX,
249 .maxlen = sizeof(int),
251 .proc_handler = &proc_dointvec
254 .ctl_name = O2IBLND_CREDITS,
255 .procname = "credits",
257 .maxlen = sizeof(int),
259 .proc_handler = &proc_dointvec
262 .ctl_name = O2IBLND_PEER_TXCREDITS,
263 .procname = "peer_credits",
264 .data = &peer_credits,
265 .maxlen = sizeof(int),
267 .proc_handler = &proc_dointvec
270 .ctl_name = O2IBLND_PEER_CREDITS_HIW,
271 .procname = "peer_credits_hiw",
272 .data = &peer_credits_hiw,
273 .maxlen = sizeof(int),
275 .proc_handler = &proc_dointvec
278 .ctl_name = O2IBLND_PEER_RTRCREDITS,
279 .procname = "peer_buffer_credits",
280 .data = &peer_buffer_credits,
281 .maxlen = sizeof(int),
283 .proc_handler = &proc_dointvec
286 .ctl_name = O2IBLND_PEER_TIMEOUT,
287 .procname = "peer_timeout",
288 .data = &peer_timeout,
289 .maxlen = sizeof(int),
291 .proc_handler = &proc_dointvec
294 .ctl_name = O2IBLND_IPIF_BASENAME,
295 .procname = "ipif_name",
296 .data = ipif_basename_space,
297 .maxlen = sizeof(ipif_basename_space),
299 .proc_handler = &proc_dostring
302 .ctl_name = O2IBLND_RETRY_COUNT,
303 .procname = "retry_count",
304 .data = &retry_count,
305 .maxlen = sizeof(int),
307 .proc_handler = &proc_dointvec
310 .ctl_name = O2IBLND_RNR_RETRY_COUNT,
311 .procname = "rnr_retry_count",
312 .data = &rnr_retry_count,
313 .maxlen = sizeof(int),
315 .proc_handler = &proc_dointvec
318 .ctl_name = O2IBLND_KEEPALIVE,
319 .procname = "keepalive",
321 .maxlen = sizeof(int),
323 .proc_handler = &proc_dointvec
326 .ctl_name = O2IBLND_CONCURRENT_SENDS,
327 .procname = "concurrent_sends",
328 .data = &concurrent_sends,
329 .maxlen = sizeof(int),
331 .proc_handler = &proc_dointvec
334 .ctl_name = O2IBLND_IB_MTU,
335 .procname = "ib_mtu",
337 .maxlen = sizeof(int),
339 .proc_handler = &proc_dointvec
342 .ctl_name = O2IBLND_MAP_ON_DEMAND,
343 .procname = "map_on_demand",
344 .data = &map_on_demand,
345 .maxlen = sizeof(int),
347 .proc_handler = &proc_dointvec
351 .ctl_name = O2IBLND_FMR_POOL_SIZE,
352 .procname = "fmr_pool_size",
353 .data = &fmr_pool_size,
354 .maxlen = sizeof(int),
356 .proc_handler = &proc_dointvec
359 .ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
360 .procname = "fmr_flush_trigger",
361 .data = &fmr_flush_trigger,
362 .maxlen = sizeof(int),
364 .proc_handler = &proc_dointvec
367 .ctl_name = O2IBLND_FMR_CACHE,
368 .procname = "fmr_cache",
370 .maxlen = sizeof(int),
372 .proc_handler = &proc_dointvec
375 .ctl_name = O2IBLND_PMR_POOL_SIZE,
376 .procname = "pmr_pool_size",
377 .data = &pmr_pool_size,
378 .maxlen = sizeof(int),
380 .proc_handler = &proc_dointvec
383 .ctl_name = O2IBLND_DEV_FAILOVER,
384 .procname = "dev_failover",
385 .data = &dev_failover,
386 .maxlen = sizeof(int),
388 .proc_handler = &proc_dointvec
393 static cfs_sysctl_table_t kiblnd_top_ctl_table[] = {
395 .ctl_name = CTL_O2IBLND,
396 .procname = "o2iblnd",
400 .child = kiblnd_ctl_table
406 kiblnd_initstrtunable(char *space, char *str, int size)
408 strncpy(space, str, size);
413 kiblnd_sysctl_init (void)
415 kiblnd_initstrtunable(ipif_basename_space, ipif_name,
416 sizeof(ipif_basename_space));
418 kiblnd_tunables.kib_sysctl =
419 cfs_register_sysctl_table(kiblnd_top_ctl_table, 0);
421 if (kiblnd_tunables.kib_sysctl == NULL)
422 CWARN("Can't setup /proc tunables\n");
426 kiblnd_sysctl_fini (void)
428 if (kiblnd_tunables.kib_sysctl != NULL)
429 cfs_unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
435 kiblnd_sysctl_init (void)
440 kiblnd_sysctl_fini (void)
447 kiblnd_tunables_init (void)
449 if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
450 CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
451 *kiblnd_tunables.kib_ib_mtu);
455 if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
456 *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
458 if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
459 *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
461 if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
462 *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
464 if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
465 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
467 if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
468 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
470 if (*kiblnd_tunables.kib_map_on_demand < 0 ||
471 *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
472 *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
474 if (*kiblnd_tunables.kib_map_on_demand == 1)
475 *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
477 if (*kiblnd_tunables.kib_concurrent_sends == 0) {
478 if (*kiblnd_tunables.kib_map_on_demand > 0 &&
479 *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
480 *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
482 *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
485 if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
486 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
488 if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
489 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
491 if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
492 CWARN("Concurrent sends %d is lower than message queue size: %d, "
493 "performance may drop slightly.\n",
494 *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
497 kiblnd_sysctl_init();
502 kiblnd_tunables_fini (void)
504 kiblnd_sysctl_fini();