1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/klnds/o2iblnd/o2iblnd_modparams.c
38 * Author: Eric Barton <eric@bartonsoftware.com>
43 static int service = 987;
44 CFS_MODULE_PARM(service, "i", int, 0444,
45 "service number (within RDMA_PS_TCP)");
48 CFS_MODULE_PARM(cksum, "i", int, 0644,
49 "set non-zero to enable message (not RDMA) checksums");
51 static int timeout = 50;
52 CFS_MODULE_PARM(timeout, "i", int, 0644,
56 CFS_MODULE_PARM(ntx, "i", int, 0444,
57 "# of message descriptors");
59 static int credits = 64;
60 CFS_MODULE_PARM(credits, "i", int, 0444,
61 "# concurrent sends");
63 static int peer_credits = 8;
64 CFS_MODULE_PARM(peer_credits, "i", int, 0444,
65 "# concurrent sends to 1 peer");
67 static int peer_credits_hiw = 0;
68 CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
69 "when eagerly to return credits");
71 static int peer_buffer_credits = 0;
72 CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
73 "# per-peer router buffer credits");
75 static int peer_timeout = 180;
76 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
77 "Seconds without aliveness news to declare peer dead (<=0 to disable)");
79 static char *ipif_name = "ib0";
80 CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
81 "IPoIB interface name");
83 static int retry_count = 5;
84 CFS_MODULE_PARM(retry_count, "i", int, 0644,
85 "Retransmissions when no ACK received");
87 static int rnr_retry_count = 6;
88 CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
89 "RNR retransmissions");
91 static int keepalive = 100;
92 CFS_MODULE_PARM(keepalive, "i", int, 0644,
93 "Idle time in seconds before sending a keepalive");
95 static int ib_mtu = 0;
96 CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
97 "IB MTU 256/512/1024/2048/4096");
99 static int concurrent_sends = 0;
100 CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
101 "send work-queue sizing");
103 static int map_on_demand = 0;
104 CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
107 static int fmr_pool_size = 512;
108 CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
109 "size of the fmr pool (>= ntx / 4)");
111 static int fmr_flush_trigger = 384;
112 CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
113 "# dirty FMRs that triggers pool flush");
115 static int fmr_cache = 1;
116 CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
117 "non-zero to enable FMR caching");
119 static int pmr_pool_size = 512;
120 CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
121 "size of the MR cache pmr pool");
124 * 0: disable failover
125 * 1: enable failover if necessary
126 * 2: force to failover (for debug)
128 static int dev_failover = 0;
129 CFS_MODULE_PARM(dev_failover, "i", int, 0444,
130 "HCA failover for bonding (0 off, 1 on, other values reserved)");
133 static int require_privileged_port = 0;
134 CFS_MODULE_PARM(require_privileged_port, "i", int, 0644,
135 "require privileged port when accepting connection");
137 static int use_privileged_port = 1;
138 CFS_MODULE_PARM(use_privileged_port, "i", int, 0644,
139 "use privileged port when initiating connection");
141 kib_tunables_t kiblnd_tunables = {
142 .kib_dev_failover = &dev_failover,
143 .kib_service = &service,
145 .kib_timeout = &timeout,
146 .kib_keepalive = &keepalive,
148 .kib_credits = &credits,
149 .kib_peertxcredits = &peer_credits,
150 .kib_peercredits_hiw = &peer_credits_hiw,
151 .kib_peerrtrcredits = &peer_buffer_credits,
152 .kib_peertimeout = &peer_timeout,
153 .kib_default_ipif = &ipif_name,
154 .kib_retry_count = &retry_count,
155 .kib_rnr_retry_count = &rnr_retry_count,
156 .kib_concurrent_sends = &concurrent_sends,
157 .kib_ib_mtu = &ib_mtu,
158 .kib_map_on_demand = &map_on_demand,
159 .kib_fmr_pool_size = &fmr_pool_size,
160 .kib_fmr_flush_trigger = &fmr_flush_trigger,
161 .kib_fmr_cache = &fmr_cache,
162 .kib_pmr_pool_size = &pmr_pool_size,
163 .kib_require_priv_port = &require_privileged_port,
164 .kib_use_priv_port = &use_privileged_port
167 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
169 static char ipif_basename_space[32];
171 #ifndef HAVE_SYSCTL_UNNUMBERED
179 O2IBLND_PEER_TXCREDITS,
180 O2IBLND_PEER_CREDITS_HIW,
181 O2IBLND_PEER_RTRCREDITS,
182 O2IBLND_PEER_TIMEOUT,
183 O2IBLND_IPIF_BASENAME,
185 O2IBLND_RNR_RETRY_COUNT,
187 O2IBLND_CONCURRENT_SENDS,
189 O2IBLND_MAP_ON_DEMAND,
190 O2IBLND_FMR_POOL_SIZE,
191 O2IBLND_FMR_FLUSH_TRIGGER,
193 O2IBLND_PMR_POOL_SIZE,
198 #define O2IBLND_SERVICE CTL_UNNUMBERED
199 #define O2IBLND_CKSUM CTL_UNNUMBERED
200 #define O2IBLND_TIMEOUT CTL_UNNUMBERED
201 #define O2IBLND_NTX CTL_UNNUMBERED
202 #define O2IBLND_CREDITS CTL_UNNUMBERED
203 #define O2IBLND_PEER_TXCREDITS CTL_UNNUMBERED
204 #define O2IBLND_PEER_CREDITS_HIW CTL_UNNUMBERED
205 #define O2IBLND_PEER_RTRCREDITS CTL_UNNUMBERED
206 #define O2IBLND_PEER_TIMEOUT CTL_UNNUMBERED
207 #define O2IBLND_IPIF_BASENAME CTL_UNNUMBERED
208 #define O2IBLND_RETRY_COUNT CTL_UNNUMBERED
209 #define O2IBLND_RNR_RETRY_COUNT CTL_UNNUMBERED
210 #define O2IBLND_KEEPALIVE CTL_UNNUMBERED
211 #define O2IBLND_CONCURRENT_SENDS CTL_UNNUMBERED
212 #define O2IBLND_IB_MTU CTL_UNNUMBERED
213 #define O2IBLND_MAP_ON_DEMAND CTL_UNNUMBERED
214 #define O2IBLND_FMR_POOL_SIZE CTL_UNNUMBERED
215 #define O2IBLND_FMR_FLUSH_TRIGGER CTL_UNNUMBERED
216 #define O2IBLND_FMR_CACHE CTL_UNNUMBERED
217 #define O2IBLND_PMR_POOL_SIZE CTL_UNNUMBERED
218 #define O2IBLND_DEV_FAILOVER CTL_UNNUMBERED
222 static cfs_sysctl_table_t kiblnd_ctl_table[] = {
224 .ctl_name = O2IBLND_SERVICE,
225 .procname = "service",
227 .maxlen = sizeof(int),
229 .proc_handler = &proc_dointvec
232 .ctl_name = O2IBLND_CKSUM,
235 .maxlen = sizeof(int),
237 .proc_handler = &proc_dointvec
240 .ctl_name = O2IBLND_TIMEOUT,
241 .procname = "timeout",
243 .maxlen = sizeof(int),
245 .proc_handler = &proc_dointvec
248 .ctl_name = O2IBLND_NTX,
251 .maxlen = sizeof(int),
253 .proc_handler = &proc_dointvec
256 .ctl_name = O2IBLND_CREDITS,
257 .procname = "credits",
259 .maxlen = sizeof(int),
261 .proc_handler = &proc_dointvec
264 .ctl_name = O2IBLND_PEER_TXCREDITS,
265 .procname = "peer_credits",
266 .data = &peer_credits,
267 .maxlen = sizeof(int),
269 .proc_handler = &proc_dointvec
272 .ctl_name = O2IBLND_PEER_CREDITS_HIW,
273 .procname = "peer_credits_hiw",
274 .data = &peer_credits_hiw,
275 .maxlen = sizeof(int),
277 .proc_handler = &proc_dointvec
280 .ctl_name = O2IBLND_PEER_RTRCREDITS,
281 .procname = "peer_buffer_credits",
282 .data = &peer_buffer_credits,
283 .maxlen = sizeof(int),
285 .proc_handler = &proc_dointvec
288 .ctl_name = O2IBLND_PEER_TIMEOUT,
289 .procname = "peer_timeout",
290 .data = &peer_timeout,
291 .maxlen = sizeof(int),
293 .proc_handler = &proc_dointvec
296 .ctl_name = O2IBLND_IPIF_BASENAME,
297 .procname = "ipif_name",
298 .data = ipif_basename_space,
299 .maxlen = sizeof(ipif_basename_space),
301 .proc_handler = &proc_dostring
304 .ctl_name = O2IBLND_RETRY_COUNT,
305 .procname = "retry_count",
306 .data = &retry_count,
307 .maxlen = sizeof(int),
309 .proc_handler = &proc_dointvec
312 .ctl_name = O2IBLND_RNR_RETRY_COUNT,
313 .procname = "rnr_retry_count",
314 .data = &rnr_retry_count,
315 .maxlen = sizeof(int),
317 .proc_handler = &proc_dointvec
320 .ctl_name = O2IBLND_KEEPALIVE,
321 .procname = "keepalive",
323 .maxlen = sizeof(int),
325 .proc_handler = &proc_dointvec
328 .ctl_name = O2IBLND_CONCURRENT_SENDS,
329 .procname = "concurrent_sends",
330 .data = &concurrent_sends,
331 .maxlen = sizeof(int),
333 .proc_handler = &proc_dointvec
336 .ctl_name = O2IBLND_IB_MTU,
337 .procname = "ib_mtu",
339 .maxlen = sizeof(int),
341 .proc_handler = &proc_dointvec
344 .ctl_name = O2IBLND_MAP_ON_DEMAND,
345 .procname = "map_on_demand",
346 .data = &map_on_demand,
347 .maxlen = sizeof(int),
349 .proc_handler = &proc_dointvec
353 .ctl_name = O2IBLND_FMR_POOL_SIZE,
354 .procname = "fmr_pool_size",
355 .data = &fmr_pool_size,
356 .maxlen = sizeof(int),
358 .proc_handler = &proc_dointvec
361 .ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
362 .procname = "fmr_flush_trigger",
363 .data = &fmr_flush_trigger,
364 .maxlen = sizeof(int),
366 .proc_handler = &proc_dointvec
369 .ctl_name = O2IBLND_FMR_CACHE,
370 .procname = "fmr_cache",
372 .maxlen = sizeof(int),
374 .proc_handler = &proc_dointvec
377 .ctl_name = O2IBLND_PMR_POOL_SIZE,
378 .procname = "pmr_pool_size",
379 .data = &pmr_pool_size,
380 .maxlen = sizeof(int),
382 .proc_handler = &proc_dointvec
385 .ctl_name = O2IBLND_DEV_FAILOVER,
386 .procname = "dev_failover",
387 .data = &dev_failover,
388 .maxlen = sizeof(int),
390 .proc_handler = &proc_dointvec
395 static cfs_sysctl_table_t kiblnd_top_ctl_table[] = {
397 .ctl_name = CTL_O2IBLND,
398 .procname = "o2iblnd",
402 .child = kiblnd_ctl_table
408 kiblnd_initstrtunable(char *space, char *str, int size)
410 strncpy(space, str, size);
415 kiblnd_sysctl_init (void)
417 kiblnd_initstrtunable(ipif_basename_space, ipif_name,
418 sizeof(ipif_basename_space));
420 kiblnd_tunables.kib_sysctl =
421 cfs_register_sysctl_table(kiblnd_top_ctl_table, 0);
423 if (kiblnd_tunables.kib_sysctl == NULL)
424 CWARN("Can't setup /proc tunables\n");
428 kiblnd_sysctl_fini (void)
430 if (kiblnd_tunables.kib_sysctl != NULL)
431 cfs_unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
437 kiblnd_sysctl_init (void)
442 kiblnd_sysctl_fini (void)
449 kiblnd_tunables_init (void)
451 if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
452 CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
453 *kiblnd_tunables.kib_ib_mtu);
457 if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
458 *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
460 if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
461 *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
463 if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
464 *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
466 if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
467 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
469 if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
470 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
472 if (*kiblnd_tunables.kib_map_on_demand < 0 ||
473 *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
474 *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
476 if (*kiblnd_tunables.kib_map_on_demand == 1)
477 *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
479 if (*kiblnd_tunables.kib_concurrent_sends == 0) {
480 if (*kiblnd_tunables.kib_map_on_demand > 0 &&
481 *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
482 *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
484 *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
487 if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
488 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
490 if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
491 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
493 if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
494 CWARN("Concurrent sends %d is lower than message queue size: %d, "
495 "performance may drop slightly.\n",
496 *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
499 kiblnd_sysctl_init();
504 kiblnd_tunables_fini (void)
506 kiblnd_sysctl_fini();