1 /* -*- mode: c; c-basic-offset: 8; indent-tabs-mode: nil; -*-
2 * vim:expandtab:shiftwidth=8:tabstop=8:
6 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 only,
10 * as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License version 2 for more details (a copy is included
16 * in the LICENSE file that accompanied this code).
18 * You should have received a copy of the GNU General Public License
19 * version 2 along with this program; If not, see
20 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
22 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
23 * CA 95054 USA or visit www.sun.com if you need additional information or
29 * Copyright 2008 Sun Microsystems, Inc. All rights reserved
30 * Use is subject to license terms.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lnet/klnds/o2iblnd/o2iblnd_modparams.c
38 * Author: Eric Barton <eric@bartonsoftware.com>
43 static int service = 987;
44 CFS_MODULE_PARM(service, "i", int, 0444,
45 "service number (within RDMA_PS_TCP)");
48 CFS_MODULE_PARM(cksum, "i", int, 0644,
49 "set non-zero to enable message (not RDMA) checksums");
51 static int timeout = 50;
52 CFS_MODULE_PARM(timeout, "i", int, 0644,
56 CFS_MODULE_PARM(ntx, "i", int, 0444,
57 "# of message descriptors");
59 static int credits = 64;
60 CFS_MODULE_PARM(credits, "i", int, 0444,
61 "# concurrent sends");
63 static int peer_credits = 8;
64 CFS_MODULE_PARM(peer_credits, "i", int, 0444,
65 "# concurrent sends to 1 peer");
67 static int peer_credits_hiw = 0;
68 CFS_MODULE_PARM(peer_credits_hiw, "i", int, 0444,
69 "when eagerly to return credits");
71 static int peer_buffer_credits = 0;
72 CFS_MODULE_PARM(peer_buffer_credits, "i", int, 0444,
73 "# per-peer router buffer credits");
75 static int peer_timeout = 0;
76 CFS_MODULE_PARM(peer_timeout, "i", int, 0444,
77 "Seconds without aliveness news to declare peer dead (<=0 to disable)");
79 static char *ipif_name = "ib0";
80 CFS_MODULE_PARM(ipif_name, "s", charp, 0444,
81 "IPoIB interface name");
83 static int retry_count = 5;
84 CFS_MODULE_PARM(retry_count, "i", int, 0644,
85 "Retransmissions when no ACK received");
87 static int rnr_retry_count = 6;
88 CFS_MODULE_PARM(rnr_retry_count, "i", int, 0644,
89 "RNR retransmissions");
91 static int keepalive = 100;
92 CFS_MODULE_PARM(keepalive, "i", int, 0644,
93 "Idle time in seconds before sending a keepalive");
95 static int ib_mtu = 0;
96 CFS_MODULE_PARM(ib_mtu, "i", int, 0444,
97 "IB MTU 256/512/1024/2048/4096");
99 static int concurrent_sends = 0;
100 CFS_MODULE_PARM(concurrent_sends, "i", int, 0444,
101 "send work-queue sizing");
103 static int map_on_demand = 0;
104 CFS_MODULE_PARM(map_on_demand, "i", int, 0444,
107 static int fmr_pool_size = 512;
108 CFS_MODULE_PARM(fmr_pool_size, "i", int, 0444,
109 "size of the fmr pool (>= ntx)");
111 static int fmr_flush_trigger = 384;
112 CFS_MODULE_PARM(fmr_flush_trigger, "i", int, 0444,
113 "# dirty FMRs that triggers pool flush");
115 static int fmr_cache = 1;
116 CFS_MODULE_PARM(fmr_cache, "i", int, 0444,
117 "non-zero to enable FMR caching");
119 static int pmr_pool_size = 512;
120 CFS_MODULE_PARM(pmr_pool_size, "i", int, 0444,
121 "size of the MR cache pmr pool");
123 kib_tunables_t kiblnd_tunables = {
124 .kib_service = &service,
126 .kib_timeout = &timeout,
127 .kib_keepalive = &keepalive,
129 .kib_credits = &credits,
130 .kib_peertxcredits = &peer_credits,
131 .kib_peercredits_hiw = &peer_credits_hiw,
132 .kib_peerrtrcredits = &peer_buffer_credits,
133 .kib_peertimeout = &peer_timeout,
134 .kib_default_ipif = &ipif_name,
135 .kib_retry_count = &retry_count,
136 .kib_rnr_retry_count = &rnr_retry_count,
137 .kib_concurrent_sends = &concurrent_sends,
138 .kib_ib_mtu = &ib_mtu,
139 .kib_map_on_demand = &map_on_demand,
140 .kib_fmr_pool_size = &fmr_pool_size,
141 .kib_fmr_flush_trigger = &fmr_flush_trigger,
142 .kib_fmr_cache = &fmr_cache,
143 .kib_pmr_pool_size = &pmr_pool_size,
146 #if defined(CONFIG_SYSCTL) && !CFS_SYSFS_MODULE_PARM
148 static char ipif_basename_space[32];
150 #ifndef HAVE_SYSCTL_UNNUMBERED
158 O2IBLND_PEER_TXCREDITS,
159 O2IBLND_PEER_CREDITS_HIW,
160 O2IBLND_PEER_RTRCREDITS,
161 O2IBLND_PEER_TIMEOUT,
162 O2IBLND_IPIF_BASENAME,
164 O2IBLND_RNR_RETRY_COUNT,
166 O2IBLND_CONCURRENT_SENDS,
168 O2IBLND_MAP_ON_DEMAND,
169 O2IBLND_FMR_POOL_SIZE,
170 O2IBLND_FMR_FLUSH_TRIGGER,
172 O2IBLND_PMR_POOL_SIZE
176 #define O2IBLND_SERVICE CTL_UNNUMBERED
177 #define O2IBLND_CKSUM CTL_UNNUMBERED
178 #define O2IBLND_TIMEOUT CTL_UNNUMBERED
179 #define O2IBLND_NTX CTL_UNNUMBERED
180 #define O2IBLND_CREDITS CTL_UNNUMBERED
181 #define O2IBLND_PEER_TXCREDITS CTL_UNNUMBERED
182 #define O2IBLND_PEER_CREDITS_HIW CTL_UNNUMBERED
183 #define O2IBLND_PEER_RTRCREDITS CTL_UNNUMBERED
184 #define O2IBLND_PEER_TIMEOUT CTL_UNNUMBERED
185 #define O2IBLND_IPIF_BASENAME CTL_UNNUMBERED
186 #define O2IBLND_RETRY_COUNT CTL_UNNUMBERED
187 #define O2IBLND_RNR_RETRY_COUNT CTL_UNNUMBERED
188 #define O2IBLND_KEEPALIVE CTL_UNNUMBERED
189 #define O2IBLND_CONCURRENT_SENDS CTL_UNNUMBERED
190 #define O2IBLND_IB_MTU CTL_UNNUMBERED
191 #define O2IBLND_MAP_ON_DEMAND CTL_UNNUMBERED
192 #define O2IBLND_FMR_POOL_SIZE CTL_UNNUMBERED
193 #define O2IBLND_FMR_FLUSH_TRIGGER CTL_UNNUMBERED
194 #define O2IBLND_FMR_CACHE CTL_UNNUMBERED
195 #define O2IBLND_PMR_POOL_SIZE CTL_UNNUMBERED
199 static cfs_sysctl_table_t kiblnd_ctl_table[] = {
201 .ctl_name = O2IBLND_SERVICE,
202 .procname = "service",
204 .maxlen = sizeof(int),
206 .proc_handler = &proc_dointvec
209 .ctl_name = O2IBLND_CKSUM,
212 .maxlen = sizeof(int),
214 .proc_handler = &proc_dointvec
217 .ctl_name = O2IBLND_TIMEOUT,
218 .procname = "timeout",
220 .maxlen = sizeof(int),
222 .proc_handler = &proc_dointvec
225 .ctl_name = O2IBLND_NTX,
228 .maxlen = sizeof(int),
230 .proc_handler = &proc_dointvec
233 .ctl_name = O2IBLND_CREDITS,
234 .procname = "credits",
236 .maxlen = sizeof(int),
238 .proc_handler = &proc_dointvec
241 .ctl_name = O2IBLND_PEER_TXCREDITS,
242 .procname = "peer_credits",
243 .data = &peer_credits,
244 .maxlen = sizeof(int),
246 .proc_handler = &proc_dointvec
249 .ctl_name = O2IBLND_PEER_CREDITS_HIW,
250 .procname = "peer_credits_hiw",
251 .data = &peer_credits_hiw,
252 .maxlen = sizeof(int),
254 .proc_handler = &proc_dointvec
257 .ctl_name = O2IBLND_PEER_RTRCREDITS,
258 .procname = "peer_buffer_credits",
259 .data = &peer_buffer_credits,
260 .maxlen = sizeof(int),
262 .proc_handler = &proc_dointvec
265 .ctl_name = O2IBLND_PEER_TIMEOUT,
266 .procname = "peer_timeout",
267 .data = &peer_timeout,
268 .maxlen = sizeof(int),
270 .proc_handler = &proc_dointvec
273 .ctl_name = O2IBLND_IPIF_BASENAME,
274 .procname = "ipif_name",
275 .data = ipif_basename_space,
276 .maxlen = sizeof(ipif_basename_space),
278 .proc_handler = &proc_dostring
281 .ctl_name = O2IBLND_RETRY_COUNT,
282 .procname = "retry_count",
283 .data = &retry_count,
284 .maxlen = sizeof(int),
286 .proc_handler = &proc_dointvec
289 .ctl_name = O2IBLND_RNR_RETRY_COUNT,
290 .procname = "rnr_retry_count",
291 .data = &rnr_retry_count,
292 .maxlen = sizeof(int),
294 .proc_handler = &proc_dointvec
297 .ctl_name = O2IBLND_KEEPALIVE,
298 .procname = "keepalive",
300 .maxlen = sizeof(int),
302 .proc_handler = &proc_dointvec
305 .ctl_name = O2IBLND_CONCURRENT_SENDS,
306 .procname = "concurrent_sends",
307 .data = &concurrent_sends,
308 .maxlen = sizeof(int),
310 .proc_handler = &proc_dointvec
313 .ctl_name = O2IBLND_IB_MTU,
314 .procname = "ib_mtu",
316 .maxlen = sizeof(int),
318 .proc_handler = &proc_dointvec
321 .ctl_name = O2IBLND_MAP_ON_DEMAND,
322 .procname = "map_on_demand",
323 .data = &map_on_demand,
324 .maxlen = sizeof(int),
326 .proc_handler = &proc_dointvec
330 .ctl_name = O2IBLND_FMR_POOL_SIZE,
331 .procname = "fmr_pool_size",
332 .data = &fmr_pool_size,
333 .maxlen = sizeof(int),
335 .proc_handler = &proc_dointvec
338 .ctl_name = O2IBLND_FMR_FLUSH_TRIGGER,
339 .procname = "fmr_flush_trigger",
340 .data = &fmr_flush_trigger,
341 .maxlen = sizeof(int),
343 .proc_handler = &proc_dointvec
346 .ctl_name = O2IBLND_FMR_CACHE,
347 .procname = "fmr_cache",
349 .maxlen = sizeof(int),
351 .proc_handler = &proc_dointvec
354 .ctl_name = O2IBLND_PMR_POOL_SIZE,
355 .procname = "pmr_pool_size",
356 .data = &pmr_pool_size,
357 .maxlen = sizeof(int),
359 .proc_handler = &proc_dointvec
364 static cfs_sysctl_table_t kiblnd_top_ctl_table[] = {
366 .ctl_name = CTL_O2IBLND,
367 .procname = "o2iblnd",
371 .child = kiblnd_ctl_table
377 kiblnd_initstrtunable(char *space, char *str, int size)
379 strncpy(space, str, size);
384 kiblnd_sysctl_init (void)
386 kiblnd_initstrtunable(ipif_basename_space, ipif_name,
387 sizeof(ipif_basename_space));
389 kiblnd_tunables.kib_sysctl =
390 cfs_register_sysctl_table(kiblnd_top_ctl_table, 0);
392 if (kiblnd_tunables.kib_sysctl == NULL)
393 CWARN("Can't setup /proc tunables\n");
397 kiblnd_sysctl_fini (void)
399 if (kiblnd_tunables.kib_sysctl != NULL)
400 cfs_unregister_sysctl_table(kiblnd_tunables.kib_sysctl);
406 kiblnd_sysctl_init (void)
411 kiblnd_sysctl_fini (void)
418 kiblnd_tunables_init (void)
420 if (*kiblnd_tunables.kib_credits > *kiblnd_tunables.kib_ntx) {
421 CERROR("Can't set credits(%d) > ntx(%d)\n",
422 *kiblnd_tunables.kib_credits,
423 *kiblnd_tunables.kib_ntx);
427 if (kiblnd_translate_mtu(*kiblnd_tunables.kib_ib_mtu) < 0) {
428 CERROR("Invalid ib_mtu %d, expected 256/512/1024/2048/4096\n",
429 *kiblnd_tunables.kib_ib_mtu);
433 if (*kiblnd_tunables.kib_peertxcredits < IBLND_CREDITS_DEFAULT)
434 *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_DEFAULT;
436 if (*kiblnd_tunables.kib_peertxcredits > IBLND_CREDITS_MAX)
437 *kiblnd_tunables.kib_peertxcredits = IBLND_CREDITS_MAX;
439 if (*kiblnd_tunables.kib_peertxcredits > *kiblnd_tunables.kib_credits)
440 *kiblnd_tunables.kib_peertxcredits = *kiblnd_tunables.kib_credits;
442 if (*kiblnd_tunables.kib_peercredits_hiw < *kiblnd_tunables.kib_peertxcredits / 2)
443 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits / 2;
445 if (*kiblnd_tunables.kib_peercredits_hiw >= *kiblnd_tunables.kib_peertxcredits)
446 *kiblnd_tunables.kib_peercredits_hiw = *kiblnd_tunables.kib_peertxcredits - 1;
448 if (*kiblnd_tunables.kib_map_on_demand < 0 ||
449 *kiblnd_tunables.kib_map_on_demand > IBLND_MAX_RDMA_FRAGS)
450 *kiblnd_tunables.kib_map_on_demand = 0; /* disable map-on-demand */
452 if (*kiblnd_tunables.kib_map_on_demand == 1)
453 *kiblnd_tunables.kib_map_on_demand = 2; /* don't make sense to create map if only one fragment */
455 if (*kiblnd_tunables.kib_concurrent_sends == 0) {
456 if (*kiblnd_tunables.kib_map_on_demand > 0 &&
457 *kiblnd_tunables.kib_map_on_demand <= IBLND_MAX_RDMA_FRAGS / 8)
458 *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits) * 2;
460 *kiblnd_tunables.kib_concurrent_sends = (*kiblnd_tunables.kib_peertxcredits);
463 if (*kiblnd_tunables.kib_concurrent_sends > *kiblnd_tunables.kib_peertxcredits * 2)
464 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits * 2;
466 if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits / 2)
467 *kiblnd_tunables.kib_concurrent_sends = *kiblnd_tunables.kib_peertxcredits / 2;
469 if (*kiblnd_tunables.kib_concurrent_sends < *kiblnd_tunables.kib_peertxcredits) {
470 CWARN("Concurrent sends %d is lower than message queue size: %d, "
471 "performance may drop slightly.\n",
472 *kiblnd_tunables.kib_concurrent_sends, *kiblnd_tunables.kib_peertxcredits);
475 kiblnd_sysctl_init();
480 kiblnd_tunables_fini (void)
482 kiblnd_sysctl_fini();